geo-intel-offline 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ """
2
+ geo_intel_offline - Production-ready offline geo-intelligence library.
3
+
4
+ Resolves latitude/longitude coordinates to:
5
+ - Country name
6
+ - ISO2/ISO3 codes
7
+ - Continent
8
+ - Timezone
9
+ - Confidence score
10
+ """
11
+
12
+ from .api import resolve, GeoIntelResult
13
+
14
+ __version__ = "1.0.0"
15
+ __all__ = ["resolve", "GeoIntelResult"]
@@ -0,0 +1,114 @@
1
+ """
2
+ Public API for geo_intel_offline library.
3
+
4
+ Clean, simple interface that hides implementation details.
5
+ """
6
+
7
+ from typing import Dict, Optional, List
8
+ from .resolver import resolve as _resolve, ResolutionResult
9
+
10
+
11
+ class GeoIntelResult:
12
+ """
13
+ Result object for geo-intelligence resolution.
14
+
15
+ Provides both dictionary-like access and attribute access.
16
+ """
17
+
18
+ def __init__(self, result: ResolutionResult):
19
+ self._result = result
20
+
21
+ @property
22
+ def country(self) -> Optional[str]:
23
+ """Country name."""
24
+ return self._result.country_name
25
+
26
+ @property
27
+ def iso2(self) -> Optional[str]:
28
+ """ISO 3166-1 alpha-2 code."""
29
+ return self._result.iso2
30
+
31
+ @property
32
+ def iso3(self) -> Optional[str]:
33
+ """ISO 3166-1 alpha-3 code."""
34
+ return self._result.iso3
35
+
36
+ @property
37
+ def continent(self) -> Optional[str]:
38
+ """Continent name."""
39
+ return self._result.continent
40
+
41
+ @property
42
+ def timezone(self) -> Optional[str]:
43
+ """IANA timezone identifier."""
44
+ return self._result.timezone
45
+
46
+ @property
47
+ def confidence(self) -> float:
48
+ """Confidence score (0.0-1.0)."""
49
+ return self._result.confidence
50
+
51
+ def to_dict(self) -> Dict:
52
+ """Convert to dictionary."""
53
+ return self._result.to_dict()
54
+
55
+ def __repr__(self) -> str:
56
+ return (
57
+ f"GeoIntelResult("
58
+ f"country={self.country!r}, "
59
+ f"iso2={self.iso2!r}, "
60
+ f"iso3={self.iso3!r}, "
61
+ f"confidence={self.confidence:.2f}"
62
+ f")"
63
+ )
64
+
65
+
66
+ def resolve(
67
+ lat: float,
68
+ lon: float,
69
+ data_dir: Optional[str] = None,
70
+ countries: Optional[List[str]] = None,
71
+ continents: Optional[List[str]] = None,
72
+ exclude_countries: Optional[List[str]] = None
73
+ ) -> GeoIntelResult:
74
+ """
75
+ Resolve latitude/longitude to geo-intelligence.
76
+
77
+ This is the main public API function. It resolves a coordinate pair
78
+ to country, ISO codes, continent, timezone, and confidence score.
79
+
80
+ Args:
81
+ lat: Latitude (-90.0 to 90.0)
82
+ lon: Longitude (-180.0 to 180.0)
83
+ data_dir: Optional custom data directory path
84
+ countries: Optional list of ISO2 codes to load (modular format only)
85
+ continents: Optional list of continent names to load (modular format only)
86
+ exclude_countries: Optional list of ISO2 codes to exclude (modular format only)
87
+
88
+ Returns:
89
+ GeoIntelResult object with resolved information
90
+
91
+ Example:
92
+ >>> result = resolve(40.7128, -74.0060)
93
+ >>> print(result.country)
94
+ 'United States'
95
+ >>> print(result.confidence)
96
+ 0.98
97
+
98
+ >>> # Load only specific countries
99
+ >>> result = resolve(40.7128, -74.0060, countries=["US", "CA"])
100
+
101
+ >>> # Load by continent
102
+ >>> result = resolve(40.7128, -74.0060, continents=["North America"])
103
+
104
+ Raises:
105
+ ValueError: If lat/lon are out of valid range
106
+ FileNotFoundError: If data files are missing
107
+ """
108
+ resolution_result = _resolve(
109
+ lat, lon, data_dir,
110
+ countries=countries,
111
+ continents=continents,
112
+ exclude_countries=exclude_countries
113
+ )
114
+ return GeoIntelResult(resolution_result)
@@ -0,0 +1,238 @@
1
+ """
2
+ Data compression utilities for reducing dataset file size.
3
+
4
+ Supports multiple compression formats:
5
+ 1. gzip - Standard library, good compression ratio
6
+ 2. msgpack - Binary format, smaller than JSON + faster parsing
7
+
8
+ All compression is lossless - no data is modified, only encoded differently.
9
+ """
10
+
11
+ import gzip
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Dict, Any, Optional
15
+
16
+ # msgpack is optional - only needed if using MessagePack format
17
+ HAS_MSGPACK = False
18
+ try:
19
+ import msgpack # type: ignore
20
+ HAS_MSGPACK = True
21
+ except ImportError:
22
+ pass
23
+
24
+
25
+ def compress_json_to_gzip(json_file: Path, output_file: Optional[Path] = None) -> Path:
26
+ """
27
+ Compress a JSON file using gzip.
28
+
29
+ Args:
30
+ json_file: Path to source JSON file
31
+ output_file: Optional output path (defaults to json_file + '.gz')
32
+
33
+ Returns:
34
+ Path to compressed file
35
+ """
36
+ if output_file is None:
37
+ output_file = json_file.with_suffix(json_file.suffix + '.gz')
38
+
39
+ with open(json_file, 'rb') as f_in:
40
+ with gzip.open(output_file, 'wb', compresslevel=9) as f_out:
41
+ f_out.writelines(f_in)
42
+
43
+ return output_file
44
+
45
+
46
+ def decompress_gzip_to_json(gzip_file: Path, output_file: Optional[Path] = None) -> Path:
47
+ """
48
+ Decompress a gzip file to JSON.
49
+
50
+ Args:
51
+ gzip_file: Path to compressed file
52
+ output_file: Optional output path (defaults to gzip_file without .gz)
53
+
54
+ Returns:
55
+ Path to decompressed JSON file
56
+ """
57
+ if output_file is None:
58
+ output_file = gzip_file.with_suffix('')
59
+ if output_file.suffix == '.gz':
60
+ output_file = output_file.with_suffix('')
61
+
62
+ with gzip.open(gzip_file, 'rb') as f_in:
63
+ with open(output_file, 'wb') as f_out:
64
+ f_out.write(f_in.read())
65
+
66
+ return output_file
67
+
68
+
69
+ def load_json_gzip(gzip_file: Path) -> Dict[str, Any]:
70
+ """
71
+ Load JSON data directly from a gzip-compressed file.
72
+
73
+ Args:
74
+ gzip_file: Path to compressed JSON file
75
+
76
+ Returns:
77
+ Parsed JSON data
78
+ """
79
+ with gzip.open(gzip_file, 'rt', encoding='utf-8') as f:
80
+ return json.load(f)
81
+
82
+
83
+ def save_json_gzip(data: Dict[str, Any], output_file: Path, compresslevel: int = 9) -> Path:
84
+ """
85
+ Save JSON data directly to a gzip-compressed file.
86
+
87
+ Args:
88
+ data: Data to save (will be JSON-serialized)
89
+ output_file: Output file path (will be .json.gz)
90
+ compresslevel: gzip compression level (1-9, 9 = maximum)
91
+
92
+ Returns:
93
+ Path to saved file
94
+ """
95
+ with gzip.open(output_file, 'wt', encoding='utf-8', compresslevel=compresslevel) as f:
96
+ json.dump(data, f, separators=(',', ':'), ensure_ascii=False)
97
+
98
+ return output_file
99
+
100
+
101
+ def load_msgpack(msgpack_file: Path) -> Dict[str, Any]:
102
+ """
103
+ Load data from MessagePack binary file.
104
+
105
+ Args:
106
+ msgpack_file: Path to .msgpack file
107
+
108
+ Returns:
109
+ Parsed data
110
+ """
111
+ if not HAS_MSGPACK:
112
+ raise ImportError("msgpack library required. Install with: pip install msgpack")
113
+ with open(msgpack_file, 'rb') as f:
114
+ return msgpack.unpackb(f.read(), raw=False)
115
+
116
+
117
+ def save_msgpack(data: Dict[str, Any], output_file: Path) -> Path:
118
+ """
119
+ Save data to MessagePack binary file.
120
+
121
+ Args:
122
+ data: Data to save
123
+ output_file: Output file path (should be .msgpack)
124
+
125
+ Returns:
126
+ Path to saved file
127
+ """
128
+ if not HAS_MSGPACK:
129
+ raise ImportError("msgpack library required. Install with: pip install msgpack")
130
+ with open(output_file, 'wb') as f:
131
+ f.write(msgpack.packb(data, use_bin_type=True))
132
+ return output_file
133
+
134
+
135
+ def verify_data_integrity(original_file: Path, compressed_file: Path, format: str = 'gzip') -> bool:
136
+ """
137
+ Verify that compressed data matches original data exactly.
138
+
139
+ Args:
140
+ original_file: Original JSON file
141
+ compressed_file: Compressed file
142
+ format: Compression format ('gzip' or 'msgpack')
143
+
144
+ Returns:
145
+ True if data matches, False otherwise
146
+ """
147
+ # Load original
148
+ with open(original_file, 'r', encoding='utf-8') as f:
149
+ original_data = json.load(f)
150
+
151
+ # Load compressed
152
+ if format == 'gzip':
153
+ if compressed_file.suffix == '.gz':
154
+ decompressed_data = load_json_gzip(compressed_file)
155
+ else:
156
+ # Assume it's compressed
157
+ decompressed_data = load_json_gzip(compressed_file)
158
+ elif format == 'msgpack':
159
+ decompressed_data = load_msgpack(compressed_file)
160
+ else:
161
+ raise ValueError(f"Unknown format: {format}")
162
+
163
+ # Compare (convert to JSON strings for comparison to handle ordering)
164
+ original_json = json.dumps(original_data, sort_keys=True, separators=(',', ':'))
165
+ decompressed_json = json.dumps(decompressed_data, sort_keys=True, separators=(',', ':'))
166
+
167
+ return original_json == decompressed_json
168
+
169
+
170
+ def get_compression_ratio(original_file: Path, compressed_file: Path) -> float:
171
+ """
172
+ Calculate compression ratio.
173
+
174
+ Returns:
175
+ Compression ratio (compressed_size / original_size)
176
+ """
177
+ original_size = original_file.stat().st_size
178
+ compressed_size = compressed_file.stat().st_size
179
+ return compressed_size / original_size
180
+
181
+
182
+ def compress_all_data_files(data_dir: Path, use_msgpack: bool = False) -> Dict[str, Any]:
183
+ """
184
+ Compress all JSON data files in a directory.
185
+
186
+ Args:
187
+ data_dir: Directory containing JSON files
188
+ use_msgpack: If True, use MessagePack format; otherwise use gzip
189
+
190
+ Returns:
191
+ Dict with compression statistics
192
+ """
193
+ results = {
194
+ 'compressed_files': [],
195
+ 'total_original_size': 0,
196
+ 'total_compressed_size': 0,
197
+ 'compression_ratio': 0.0,
198
+ 'files': {}
199
+ }
200
+
201
+ json_files = list(data_dir.glob('*.json'))
202
+
203
+ for json_file in json_files:
204
+ original_size = json_file.stat().st_size
205
+ results['total_original_size'] += original_size
206
+
207
+ if use_msgpack:
208
+ # Save as MessagePack
209
+ msgpack_file = json_file.with_suffix('.msgpack')
210
+ save_msgpack(json.load(open(json_file, 'r', encoding='utf-8')), msgpack_file)
211
+ compressed_size = msgpack_file.stat().st_size
212
+ results['compressed_files'].append(str(msgpack_file))
213
+ else:
214
+ # Compress with gzip
215
+ gzip_file = compress_json_to_gzip(json_file)
216
+ compressed_size = gzip_file.stat().st_size
217
+ results['compressed_files'].append(str(gzip_file))
218
+
219
+ results['total_compressed_size'] += compressed_size
220
+ ratio = compressed_size / original_size
221
+ results['files'][json_file.name] = {
222
+ 'original_size': original_size,
223
+ 'compressed_size': compressed_size,
224
+ 'ratio': ratio
225
+ }
226
+
227
+ # Verify integrity
228
+ if use_msgpack:
229
+ verified = verify_data_integrity(json_file, msgpack_file, 'msgpack')
230
+ else:
231
+ verified = verify_data_integrity(json_file, gzip_file, 'gzip')
232
+
233
+ if not verified:
234
+ raise ValueError(f"Data integrity check failed for {json_file.name}")
235
+
236
+ results['compression_ratio'] = results['total_compressed_size'] / results['total_original_size']
237
+
238
+ return results
@@ -0,0 +1,89 @@
1
+ """
2
+ Confidence scoring for geo-intelligence results.
3
+
4
+ Confidence is based on:
5
+ 1. Distance to polygon edge (closer = lower confidence)
6
+ 2. Geohash ambiguity (multiple candidates = lower confidence)
7
+ 3. Border proximity threshold
8
+
9
+ Design Decision: Use distance-based scoring with thresholds:
10
+ - > 0.1° from edge: 0.98-1.0 confidence (high)
11
+ - 0.01°-0.1° from edge: 0.85-0.98 confidence (medium)
12
+ - < 0.01° from edge: 0.70-0.85 confidence (low)
13
+ - Multiple candidates: Reduce by 0.1-0.2
14
+
15
+ This gives users actionable confidence metrics without over-promising accuracy.
16
+ """
17
+
18
+ from typing import List, Tuple
19
+ from .pip import distance_to_polygon_edge
20
+
21
+
22
+ def calculate_confidence(
23
+ point: Tuple[float, float],
24
+ polygon: List[Tuple[float, float]],
25
+ holes: List[List[Tuple[float, float]]] = None,
26
+ candidate_count: int = 1
27
+ ) -> float:
28
+ """
29
+ Calculate confidence score for geo-intelligence result.
30
+
31
+ Args:
32
+ point: (lat, lon) tuple
33
+ polygon: Exterior polygon ring
34
+ holes: Interior rings (holes) if any
35
+ candidate_count: Number of candidate countries found (ambiguity penalty)
36
+
37
+ Returns:
38
+ Confidence score between 0.0 and 1.0
39
+ """
40
+ # Calculate distance to nearest edge (exterior or holes)
41
+ dist_exterior = distance_to_polygon_edge(point, polygon)
42
+
43
+ min_dist = dist_exterior
44
+
45
+ if holes:
46
+ for hole in holes:
47
+ dist_hole = distance_to_polygon_edge(point, hole)
48
+ min_dist = min(min_dist, dist_hole)
49
+
50
+ # Convert distance (degrees) to confidence
51
+ # 0.1° ≈ 11km at equator, good threshold for "far from border"
52
+ if min_dist >= 0.1:
53
+ base_confidence = 0.98
54
+ elif min_dist >= 0.05:
55
+ # Linear interpolation between 0.05° and 0.1°
56
+ base_confidence = 0.88 + (min_dist - 0.05) / 0.05 * 0.10
57
+ elif min_dist >= 0.01:
58
+ # Linear interpolation between 0.01° and 0.05°
59
+ base_confidence = 0.75 + (min_dist - 0.01) / 0.04 * 0.13
60
+ else:
61
+ # Very close to border
62
+ base_confidence = 0.70 + min_dist / 0.01 * 0.05
63
+
64
+ # Apply ambiguity penalty
65
+ if candidate_count > 1:
66
+ # Multiple candidates reduce confidence
67
+ penalty = min(0.2, (candidate_count - 1) * 0.05)
68
+ base_confidence -= penalty
69
+
70
+ # Clamp to valid range
71
+ return max(0.5, min(1.0, base_confidence))
72
+
73
+
74
+ def get_confidence_label(confidence: float) -> str:
75
+ """
76
+ Get human-readable confidence label.
77
+
78
+ Args:
79
+ confidence: Confidence score (0.0-1.0)
80
+
81
+ Returns:
82
+ Label: "high", "medium", or "low"
83
+ """
84
+ if confidence >= 0.90:
85
+ return "high"
86
+ elif confidence >= 0.75:
87
+ return "medium"
88
+ else:
89
+ return "low"
Binary file
Binary file