PyPI - geo-intel-offline - Versions diffs - 1.0.1__py3-none-any.whl - Mend

geo-intel-offline 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

geo_intel_offline/__init__.py +15 -0
geo_intel_offline/api.py +114 -0
geo_intel_offline/compression.py +238 -0
geo_intel_offline/confidence.py +89 -0
geo_intel_offline/data/geohash_index.json.gz +0 -0
geo_intel_offline/data/metadata.json.gz +0 -0
geo_intel_offline/data/polygons.json.gz +0 -0
geo_intel_offline/data_builder.py +528 -0
geo_intel_offline/data_builder_minimal.py +173 -0
geo_intel_offline/data_builder_modular.py +474 -0
geo_intel_offline/data_loader.py +170 -0
geo_intel_offline/geohash.py +150 -0
geo_intel_offline/hierarchical_resolver.py +136 -0
geo_intel_offline/migrate_to_modular.py +159 -0
geo_intel_offline/modular_data_loader.py +212 -0
geo_intel_offline/pip.py +150 -0
geo_intel_offline/polygon_utils.py +104 -0
geo_intel_offline/resolver.py +306 -0
geo_intel_offline-1.0.1.dist-info/LICENSE +21 -0
geo_intel_offline-1.0.1.dist-info/METADATA +784 -0
geo_intel_offline-1.0.1.dist-info/RECORD +24 -0
geo_intel_offline-1.0.1.dist-info/WHEEL +5 -0
geo_intel_offline-1.0.1.dist-info/entry_points.txt +2 -0
geo_intel_offline-1.0.1.dist-info/top_level.txt +1 -0

geo_intel_offline/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+geo_intel_offline - Production-ready offline geo-intelligence library.
+Resolves latitude/longitude coordinates to:
+- Country name
+- ISO2/ISO3 codes
+- Continent
+- Timezone
+- Confidence score
+"""
+from .api import resolve, GeoIntelResult
+__version__ = "1.0.0"
+__all__ = ["resolve", "GeoIntelResult"]

geo_intel_offline/api.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""
+Public API for geo_intel_offline library.
+Clean, simple interface that hides implementation details.
+"""
+from typing import Dict, Optional, List
+from .resolver import resolve as _resolve, ResolutionResult
+class GeoIntelResult:
+    """
+    Result object for geo-intelligence resolution.
+    Provides both dictionary-like access and attribute access.
+    """
+    def __init__(self, result: ResolutionResult):
+        self._result = result
+    @property
+    def country(self) -> Optional[str]:
+        """Country name."""
+        return self._result.country_name
+    @property
+    def iso2(self) -> Optional[str]:
+        """ISO 3166-1 alpha-2 code."""
+        return self._result.iso2
+    @property
+    def iso3(self) -> Optional[str]:
+        """ISO 3166-1 alpha-3 code."""
+        return self._result.iso3
+    @property
+    def continent(self) -> Optional[str]:
+        """Continent name."""
+        return self._result.continent
+    @property
+    def timezone(self) -> Optional[str]:
+        """IANA timezone identifier."""
+        return self._result.timezone
+    @property
+    def confidence(self) -> float:
+        """Confidence score (0.0-1.0)."""
+        return self._result.confidence
+    def to_dict(self) -> Dict:
+        """Convert to dictionary."""
+        return self._result.to_dict()
+    def __repr__(self) -> str:
+        return (
+            f"GeoIntelResult("
+            f"country={self.country!r}, "
+            f"iso2={self.iso2!r}, "
+            f"iso3={self.iso3!r}, "
+            f"confidence={self.confidence:.2f}"
+            f")"
+        )
+def resolve(
+    lat: float,
+    lon: float,
+    data_dir: Optional[str] = None,
+    countries: Optional[List[str]] = None,
+    continents: Optional[List[str]] = None,
+    exclude_countries: Optional[List[str]] = None
+) -> GeoIntelResult:
+    """
+    Resolve latitude/longitude to geo-intelligence.
+    This is the main public API function. It resolves a coordinate pair
+    to country, ISO codes, continent, timezone, and confidence score.
+    Args:
+        lat: Latitude (-90.0 to 90.0)
+        lon: Longitude (-180.0 to 180.0)
+        data_dir: Optional custom data directory path
+        countries: Optional list of ISO2 codes to load (modular format only)
+        continents: Optional list of continent names to load (modular format only)
+        exclude_countries: Optional list of ISO2 codes to exclude (modular format only)
+    Returns:
+        GeoIntelResult object with resolved information
+    Example:
+        >>> result = resolve(40.7128, -74.0060)
+        >>> print(result.country)
+        'United States'
+        >>> print(result.confidence)
+        0.98
+        >>> # Load only specific countries
+        >>> result = resolve(40.7128, -74.0060, countries=["US", "CA"])
+        >>> # Load by continent
+        >>> result = resolve(40.7128, -74.0060, continents=["North America"])
+    Raises:
+        ValueError: If lat/lon are out of valid range
+        FileNotFoundError: If data files are missing
+    """
+    resolution_result = _resolve(
+        lat, lon, data_dir,
+        countries=countries,
+        continents=continents,
+        exclude_countries=exclude_countries
+    )
+    return GeoIntelResult(resolution_result)

geo_intel_offline/compression.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""
+Data compression utilities for reducing dataset file size.
+Supports multiple compression formats:
+1. gzip - Standard library, good compression ratio
+2. msgpack - Binary format, smaller than JSON + faster parsing
+All compression is lossless - no data is modified, only encoded differently.
+"""
+import gzip
+import json
+from pathlib import Path
+from typing import Dict, Any, Optional
+# msgpack is optional - only needed if using MessagePack format
+HAS_MSGPACK = False
+try:
+    import msgpack  # type: ignore
+    HAS_MSGPACK = True
+except ImportError:
+    pass
+def compress_json_to_gzip(json_file: Path, output_file: Optional[Path] = None) -> Path:
+    """
+    Compress a JSON file using gzip.
+    Args:
+        json_file: Path to source JSON file
+        output_file: Optional output path (defaults to json_file + '.gz')
+    Returns:
+        Path to compressed file
+    """
+    if output_file is None:
+        output_file = json_file.with_suffix(json_file.suffix + '.gz')
+    with open(json_file, 'rb') as f_in:
+        with gzip.open(output_file, 'wb', compresslevel=9) as f_out:
+            f_out.writelines(f_in)
+    return output_file
+def decompress_gzip_to_json(gzip_file: Path, output_file: Optional[Path] = None) -> Path:
+    """
+    Decompress a gzip file to JSON.
+    Args:
+        gzip_file: Path to compressed file
+        output_file: Optional output path (defaults to gzip_file without .gz)
+    Returns:
+        Path to decompressed JSON file
+    """
+    if output_file is None:
+        output_file = gzip_file.with_suffix('')
+        if output_file.suffix == '.gz':
+            output_file = output_file.with_suffix('')
+    with gzip.open(gzip_file, 'rb') as f_in:
+        with open(output_file, 'wb') as f_out:
+            f_out.write(f_in.read())
+    return output_file
+def load_json_gzip(gzip_file: Path) -> Dict[str, Any]:
+    """
+    Load JSON data directly from a gzip-compressed file.
+    Args:
+        gzip_file: Path to compressed JSON file
+    Returns:
+        Parsed JSON data
+    """
+    with gzip.open(gzip_file, 'rt', encoding='utf-8') as f:
+        return json.load(f)
+def save_json_gzip(data: Dict[str, Any], output_file: Path, compresslevel: int = 9) -> Path:
+    """
+    Save JSON data directly to a gzip-compressed file.
+    Args:
+        data: Data to save (will be JSON-serialized)
+        output_file: Output file path (will be .json.gz)
+        compresslevel: gzip compression level (1-9, 9 = maximum)
+    Returns:
+        Path to saved file
+    """
+    with gzip.open(output_file, 'wt', encoding='utf-8', compresslevel=compresslevel) as f:
+        json.dump(data, f, separators=(',', ':'), ensure_ascii=False)
+    return output_file
+def load_msgpack(msgpack_file: Path) -> Dict[str, Any]:
+    """
+    Load data from MessagePack binary file.
+    Args:
+        msgpack_file: Path to .msgpack file
+    Returns:
+        Parsed data
+    """
+    if not HAS_MSGPACK:
+        raise ImportError("msgpack library required. Install with: pip install msgpack")
+    with open(msgpack_file, 'rb') as f:
+        return msgpack.unpackb(f.read(), raw=False)
+def save_msgpack(data: Dict[str, Any], output_file: Path) -> Path:
+    """
+    Save data to MessagePack binary file.
+    Args:
+        data: Data to save
+        output_file: Output file path (should be .msgpack)
+    Returns:
+        Path to saved file
+    """
+    if not HAS_MSGPACK:
+        raise ImportError("msgpack library required. Install with: pip install msgpack")
+    with open(output_file, 'wb') as f:
+        f.write(msgpack.packb(data, use_bin_type=True))
+    return output_file
+def verify_data_integrity(original_file: Path, compressed_file: Path, format: str = 'gzip') -> bool:
+    """
+    Verify that compressed data matches original data exactly.
+    Args:
+        original_file: Original JSON file
+        compressed_file: Compressed file
+        format: Compression format ('gzip' or 'msgpack')
+    Returns:
+        True if data matches, False otherwise
+    """
+    # Load original
+    with open(original_file, 'r', encoding='utf-8') as f:
+        original_data = json.load(f)
+    # Load compressed
+    if format == 'gzip':
+        if compressed_file.suffix == '.gz':
+            decompressed_data = load_json_gzip(compressed_file)
+        else:
+            # Assume it's compressed
+            decompressed_data = load_json_gzip(compressed_file)
+    elif format == 'msgpack':
+        decompressed_data = load_msgpack(compressed_file)
+    else:
+        raise ValueError(f"Unknown format: {format}")
+    # Compare (convert to JSON strings for comparison to handle ordering)
+    original_json = json.dumps(original_data, sort_keys=True, separators=(',', ':'))
+    decompressed_json = json.dumps(decompressed_data, sort_keys=True, separators=(',', ':'))
+    return original_json == decompressed_json
+def get_compression_ratio(original_file: Path, compressed_file: Path) -> float:
+    """
+    Calculate compression ratio.
+    Returns:
+        Compression ratio (compressed_size / original_size)
+    """
+    original_size = original_file.stat().st_size
+    compressed_size = compressed_file.stat().st_size
+    return compressed_size / original_size
+def compress_all_data_files(data_dir: Path, use_msgpack: bool = False) -> Dict[str, Any]:
+    """
+    Compress all JSON data files in a directory.
+    Args:
+        data_dir: Directory containing JSON files
+        use_msgpack: If True, use MessagePack format; otherwise use gzip
+    Returns:
+        Dict with compression statistics
+    """
+    results = {
+        'compressed_files': [],
+        'total_original_size': 0,
+        'total_compressed_size': 0,
+        'compression_ratio': 0.0,
+        'files': {}
+    }
+    json_files = list(data_dir.glob('*.json'))
+    for json_file in json_files:
+        original_size = json_file.stat().st_size
+        results['total_original_size'] += original_size
+        if use_msgpack:
+            # Save as MessagePack
+            msgpack_file = json_file.with_suffix('.msgpack')
+            save_msgpack(json.load(open(json_file, 'r', encoding='utf-8')), msgpack_file)
+            compressed_size = msgpack_file.stat().st_size
+            results['compressed_files'].append(str(msgpack_file))
+        else:
+            # Compress with gzip
+            gzip_file = compress_json_to_gzip(json_file)
+            compressed_size = gzip_file.stat().st_size
+            results['compressed_files'].append(str(gzip_file))
+        results['total_compressed_size'] += compressed_size
+        ratio = compressed_size / original_size
+        results['files'][json_file.name] = {
+            'original_size': original_size,
+            'compressed_size': compressed_size,
+            'ratio': ratio
+        }
+        # Verify integrity
+        if use_msgpack:
+            verified = verify_data_integrity(json_file, msgpack_file, 'msgpack')
+        else:
+            verified = verify_data_integrity(json_file, gzip_file, 'gzip')
+        if not verified:
+            raise ValueError(f"Data integrity check failed for {json_file.name}")
+    results['compression_ratio'] = results['total_compressed_size'] / results['total_original_size']
+    return results

geo_intel_offline/confidence.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+Confidence scoring for geo-intelligence results.
+Confidence is based on:
+1. Distance to polygon edge (closer = lower confidence)
+2. Geohash ambiguity (multiple candidates = lower confidence)
+3. Border proximity threshold
+Design Decision: Use distance-based scoring with thresholds:
+- > 0.1° from edge: 0.98-1.0 confidence (high)
+- 0.01°-0.1° from edge: 0.85-0.98 confidence (medium)
+- < 0.01° from edge: 0.70-0.85 confidence (low)
+- Multiple candidates: Reduce by 0.1-0.2
+This gives users actionable confidence metrics without over-promising accuracy.
+"""
+from typing import List, Tuple
+from .pip import distance_to_polygon_edge
+def calculate_confidence(
+    point: Tuple[float, float],
+    polygon: List[Tuple[float, float]],
+    holes: List[List[Tuple[float, float]]] = None,
+    candidate_count: int = 1
+) -> float:
+    """
+    Calculate confidence score for geo-intelligence result.
+    Args:
+        point: (lat, lon) tuple
+        polygon: Exterior polygon ring
+        holes: Interior rings (holes) if any
+        candidate_count: Number of candidate countries found (ambiguity penalty)
+    Returns:
+        Confidence score between 0.0 and 1.0
+    """
+    # Calculate distance to nearest edge (exterior or holes)
+    dist_exterior = distance_to_polygon_edge(point, polygon)
+    min_dist = dist_exterior
+    if holes:
+        for hole in holes:
+            dist_hole = distance_to_polygon_edge(point, hole)
+            min_dist = min(min_dist, dist_hole)
+    # Convert distance (degrees) to confidence
+    # 0.1° ≈ 11km at equator, good threshold for "far from border"
+    if min_dist >= 0.1:
+        base_confidence = 0.98
+    elif min_dist >= 0.05:
+        # Linear interpolation between 0.05° and 0.1°
+        base_confidence = 0.88 + (min_dist - 0.05) / 0.05 * 0.10
+    elif min_dist >= 0.01:
+        # Linear interpolation between 0.01° and 0.05°
+        base_confidence = 0.75 + (min_dist - 0.01) / 0.04 * 0.13
+    else:
+        # Very close to border
+        base_confidence = 0.70 + min_dist / 0.01 * 0.05
+    # Apply ambiguity penalty
+    if candidate_count > 1:
+        # Multiple candidates reduce confidence
+        penalty = min(0.2, (candidate_count - 1) * 0.05)
+        base_confidence -= penalty
+    # Clamp to valid range
+    return max(0.5, min(1.0, base_confidence))
+def get_confidence_label(confidence: float) -> str:
+    """
+    Get human-readable confidence label.
+    Args:
+        confidence: Confidence score (0.0-1.0)
+    Returns:
+        Label: "high", "medium", or "low"
+    """
+    if confidence >= 0.90:
+        return "high"
+    elif confidence >= 0.75:
+        return "medium"
+    else:
+        return "low"

geo_intel_offline/data/geohash_index.json.gz ADDED Viewed

Binary file

geo_intel_offline/data/metadata.json.gz ADDED Viewed

Binary file

geo_intel_offline/data/polygons.json.gz ADDED Viewed

Binary file