PyPI - quickbase-extract - Versions diffs - 0.1.0__py3-none-any.whl - Mend

quickbase-extract 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

quickbase_extract/__init__.py +98 -0
quickbase_extract/api_handlers.py +210 -0
quickbase_extract/cache_freshness.py +199 -0
quickbase_extract/cache_manager.py +234 -0
quickbase_extract/cache_sync.py +74 -0
quickbase_extract/client.py +61 -0
quickbase_extract/py.typed +0 -0
quickbase_extract/report_data.py +253 -0
quickbase_extract/report_metadata.py +316 -0
quickbase_extract/utils.py +42 -0
quickbase_extract-0.1.0.dist-info/METADATA +1735 -0
quickbase_extract-0.1.0.dist-info/RECORD +14 -0
quickbase_extract-0.1.0.dist-info/WHEEL +4 -0
quickbase_extract-0.1.0.dist-info/licenses/LICENSE.txt +21 -0

quickbase_extract/cache_manager.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""Unified cache management for local dev and Lambda environments."""
+import logging
+import os
+from pathlib import Path
+import boto3
+from quickbase_extract.utils import normalize_name
+logger = logging.getLogger(__name__)
+class CacheManager:
+    """Manages cache reads/writes for both local and Lambda environments.
+    Supports local file-based caching and S3-backed caching on Lambda.
+    Cache root path is configurable via QUICKBASE_CACHE_ROOT environment variable.
+    """
+    def __init__(self, cache_root: Path | None = None):
+        """Initialize the cache manager.
+        Args:
+            cache_root: Path to cache root directory. If not provided, uses
+                QUICKBASE_CACHE_ROOT env var, or defaults based on environment.
+        """
+        self.is_lambda = bool(os.environ.get("AWS_LAMBDA_FUNCTION_NAME"))
+        self.environment = os.environ.get("ENV", "dev")
+        self.s3_bucket = os.environ.get("CACHE_BUCKET")
+        self.s3_client = boto3.client("s3") if self.is_lambda else None
+        # Determine cache root path
+        if cache_root:
+            # Explicitly provided
+            self.cache_root = Path(cache_root)
+        elif os.environ.get("QUICKBASE_CACHE_ROOT"):
+            # From environment variable
+            self.cache_root = Path(os.environ.get("QUICKBASE_CACHE_ROOT"))
+        else:
+            # Default based on environment
+            if self.is_lambda:
+                self.cache_root = Path("/tmp/quickbase-extract/data")
+            else:
+                # Local: use current working directory or home
+                self.cache_root = Path.cwd() / ".quickbase-cache" / self.environment
+        self.cache_root.mkdir(parents=True, exist_ok=True)
+        logger.debug(f"Cache root: {self.cache_root}")
+    def get_metadata_path(self, app_name: str, table_name: str, report_name: str) -> Path:
+        """Get path for report metadata file.
+        Args:
+            app_name: Application name.
+            table_name: Table name.
+            report_name: Report name.
+        Returns:
+            Path object for the metadata file.
+        Example:
+            >>> cache_mgr.get_metadata_path("Sales Tracker", "Opportunities", "Open Deals")
+            PosixPath('.quickbase-cache/dev/report_metadata/sales_tracker/opportunities_open_deals.json')
+        """
+        app_fmt = normalize_name(app_name)
+        table_fmt = normalize_name(table_name)
+        report_fmt = normalize_name(report_name)
+        path = self.cache_root / "report_metadata" / app_fmt / f"{table_fmt}_{report_fmt}.json"
+        path.parent.mkdir(parents=True, exist_ok=True)
+        return path
+    def get_data_path(self, app_name: str, table_name: str, report_name: str) -> Path:
+        """Get path for report data file.
+        Args:
+            app_name: Application name.
+            table_name: Table name.
+            report_name: Report name.
+        Returns:
+            Path object for the data file.
+        Example:
+            >>> cache_mgr.get_data_path("Sales Tracker", "Opportunities", "Open Deals")
+            PosixPath('.quickbase-cache/dev/report_data/sales_tracker/opportunities_open_deals_data.json')
+        """
+        app_fmt = normalize_name(app_name)
+        table_fmt = normalize_name(table_name)
+        report_fmt = normalize_name(report_name)
+        path = self.cache_root / "report_data" / app_fmt / f"{table_fmt}_{report_fmt}_data.json"
+        path.parent.mkdir(parents=True, exist_ok=True)
+        return path
+    def write_file(self, file_path: Path, content: str) -> None:
+        """Write cache file and sync to S3 if on Lambda.
+        Args:
+            file_path: Path where file should be written.
+            content: String content to write.
+        Raises:
+            Exception: If S3 sync fails on Lambda (required for operation success).
+        Example:
+            >>> cache_mgr.write_file(Path("metadata.json"), '{"key": "value"}')
+        """
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(content)
+        if self.is_lambda and self.s3_client:
+            self._sync_to_s3(file_path)
+    def read_file(self, file_path: Path) -> str:
+        """Read cache file.
+        Args:
+            file_path: Path to file to read.
+        Returns:
+            File contents as string.
+        Raises:
+            FileNotFoundError: If file does not exist.
+        Example:
+            >>> content = cache_mgr.read_file(Path("metadata.json"))
+        """
+        if not file_path.exists():
+            raise FileNotFoundError(f"Cache file not found: {file_path}")
+        return file_path.read_text()
+    def _sync_to_s3(self, file_path: Path) -> None:
+        """Upload file to S3 for persistence across Lambda invocations.
+        Args:
+            file_path: Path to file to upload.
+        Raises:
+            Exception: If upload fails. This is critical - Lambda /tmp is ephemeral.
+        """
+        try:
+            relative_path = file_path.relative_to(self.cache_root)
+            s3_key = f"{self.environment}/{relative_path}"
+            self.s3_client.upload_file(str(file_path), self.s3_bucket, s3_key)
+            logger.info(f"Synced {s3_key} to S3")
+        except Exception as e:
+            logger.error(f"Failed to sync {file_path} to S3: {e}")
+            raise
+    def sync_from_s3(self) -> None:
+        """Download all cache files from S3 to /tmp (Lambda only).
+        Restores cache from S3 at Lambda initialization. Only runs on Lambda.
+        Logs and continues if bucket not configured.
+        Raises:
+            Exception: If S3 operations fail.
+        Note:
+            Lambda /tmp has storage limits (default 512 MB, max 10 GB).
+            Current cache size (~32 MB) is well within limits.
+        """
+        if not self.is_lambda or not self.s3_client:
+            logger.debug("Not in Lambda or S3 client unavailable, skipping S3 sync")
+            return
+        if not self.s3_bucket:
+            logger.debug("CACHE_BUCKET not set, skipping S3 sync")
+            return
+        logger.info(f"Syncing cache from S3 for environment: {self.environment}")
+        try:
+            paginator = self.s3_client.get_paginator("list_objects_v2")
+            pages = paginator.paginate(Bucket=self.s3_bucket, Prefix=f"{self.environment}/")
+            file_count = 0
+            for page in pages:
+                for obj in page.get("Contents", []):
+                    s3_key = obj["Key"]
+                    if not s3_key or s3_key.endswith("/"):
+                        continue
+                    # Extract relative path (remove environment prefix)
+                    relative_key = s3_key.replace(f"{self.environment}/", "", 1)
+                    local_path = self.cache_root / relative_key
+                    local_path.parent.mkdir(parents=True, exist_ok=True)
+                    self.s3_client.download_file(self.s3_bucket, s3_key, str(local_path))
+                    file_count += 1
+            logger.info(f"Synced {file_count} files from S3")
+        except Exception as e:
+            logger.error(f"Failed to sync from S3: {e}")
+            raise
+# Singleton instance
+_cache_manager: CacheManager | None = None
+def get_cache_manager(cache_root: Path | None = None) -> CacheManager:
+    """Get or create cache manager singleton instance.
+    Args:
+        cache_root: Optional path to cache root. Only used on first call.
+            Subsequent calls ignore this parameter and return the existing instance.
+    Returns:
+        Singleton CacheManager instance.
+    Warning:
+        The cache_root parameter is only respected on the first call. If you need
+        to change cache locations, use CacheManager directly instead of the singleton.
+    Example:
+        >>> cache_mgr = get_cache_manager(Path("/custom/cache"))
+        >>> # Later calls ignore cache_root
+        >>> same_mgr = get_cache_manager(Path("/different/path"))  # Returns first instance
+    """
+    global _cache_manager
+    if _cache_manager is None:
+        _cache_manager = CacheManager(cache_root=cache_root)
+    return _cache_manager
+def _reset_cache_manager() -> None:
+    """Reset the singleton cache manager. For testing only."""
+    global _cache_manager
+    _cache_manager = None
+    _cache_manager = None
+    _cache_manager = None

quickbase_extract/cache_sync.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""S3-backed cache sync for Lambda environments."""
+import logging
+from quickbase_extract.cache_manager import get_cache_manager
+logger = logging.getLogger(__name__)
+# Module-level flag to track if we've synced this Lambda invocation
+_CACHE_SYNCED = False
+def sync_from_s3_once(force: bool = False) -> None:
+    """Download cache from S3 to /tmp on Lambda cold start.
+    Only syncs if cache hasn't been synced in this invocation.
+    Subsequent calls are no-ops unless force=True.
+    On Lambda, the sync flag persists across warm invocations within the same
+    container, so warm starts skip the sync (Lambda /tmp persists). Only cold
+    starts trigger a sync.
+    On local environments, automatically detects if CACHE_BUCKET is configured.
+    If not configured, does nothing (local caching only).
+    Args:
+        force: If True, sync even if already synced in this invocation.
+            Defaults to False.
+    Raises:
+        Exception: If S3 operations fail.
+    Example:
+        >>> # In Lambda handler initialization
+        >>> sync_from_s3_once()  # Syncs on cold start
+        >>> sync_from_s3_once()  # No-op on same invocation
+        >>>
+        >>> # Force re-sync if needed
+        >>> sync_from_s3_once(force=True)
+    """
+    global _CACHE_SYNCED
+    if _CACHE_SYNCED and not force:
+        logger.debug("Cache already synced in this invocation, skipping")
+        return
+    cache_mgr = get_cache_manager()
+    cache_mgr.sync_from_s3()  # Handles Lambda detection internally
+    _CACHE_SYNCED = True
+    logger.info("Cache synced from S3")
+def is_cache_synced() -> bool:
+    """Check if cache has been synced in this invocation.
+    Returns:
+        True if cache has been synced, False otherwise.
+    Example:
+        >>> if not is_cache_synced():
+        ...     print("Cache needs syncing")
+    """
+    return _CACHE_SYNCED
+def _reset_cache_sync() -> None:
+    """Reset the cache sync flag. For testing only.
+    Example:
+        >>> # In test teardown
+        >>> _reset_cache_sync()
+    """
+    global _CACHE_SYNCED
+    _CACHE_SYNCED = False

quickbase_extract/client.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""Quickbase API client factory."""
+import logging
+from typing import Any
+import quickbase_api
+logger = logging.getLogger(__name__)
+# Cache for client instances (realm, user_token) -> client
+_client_cache: dict[tuple[str, str], Any] = {}
+def get_qb_client(realm: str, user_token: str, cache: bool = True) -> Any:
+    """Create and return a Quickbase API client.
+    Clients are cached by (realm, token) combination to avoid recreating
+    connections. Use cache=False to force a new client instance.
+    Args:
+        realm: Quickbase realm (e.g., 'example.quickbase.com').
+        user_token: Quickbase user token (from environment or config).
+        cache: Whether to reuse cached client. Defaults to True.
+    Returns:
+        Quickbase API client instance.
+    Raises:
+        ValueError: If realm or user_token is empty.
+        Exception: If client creation fails.
+    """
+    # Input validation
+    if not realm or not realm.strip():
+        raise ValueError("Realm cannot be empty")
+    if not user_token or not user_token.strip():
+        raise ValueError("User token cannot be empty")
+    # Check cache
+    cache_key = (realm, user_token)
+    if cache and cache_key in _client_cache:
+        logger.debug(f"Returning cached Quickbase client for realm: {realm}")
+        return _client_cache[cache_key]
+    # Create new client
+    try:
+        client = quickbase_api.client(realm=realm, user_token=user_token)
+        logger.debug(f"Created Quickbase client for realm: {realm}")
+        if cache:
+            _client_cache[cache_key] = client
+        return client
+    except Exception as e:
+        logger.error(f"Failed to create Quickbase client for realm {realm}: {e}")
+        raise
+def _reset_client_cache() -> None:
+    """Clear the client cache. For testing only."""
+    global _client_cache
+    _client_cache = {}

quickbase_extract/py.typed ADDED Viewed

File without changes

quickbase_extract/report_data.py ADDED Viewed

@@ -0,0 +1,253 @@
+"""Quickbase data fetching, caching, and loading."""
+import json
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from quickbase_extract.api_handlers import handle_query
+from quickbase_extract.cache_manager import get_cache_manager
+logger = logging.getLogger(__name__)
+def _flatten_and_relabel_records(records: list[dict], field_label: dict, fields: list[int]) -> list[dict]:
+    """Transform Quickbase records to flat dicts with field labels as keys.
+    Args:
+        records: List of records from Quickbase API (nested format).
+        field_label: Dict mapping field labels to IDs.
+        fields: List of field IDs in desired order.
+    Returns:
+        List of dicts with field labels as keys.
+    """
+    # Build reverse mapping: field ID -> label
+    id_to_label = {v: k for k, v in field_label.items()}
+    field_order = [str(f) for f in fields]
+    final_list = []
+    for record in records:
+        # Flatten: {field_id: {value: actual}} -> {field_id: actual}
+        flat = {fid: val["value"] for fid, val in record.items()}
+        # Re-order to match report field order
+        ordered = {fid: flat[fid] for fid in field_order if fid in flat}
+        # Swap field IDs with labels
+        labeled = {id_to_label[fid]: val for fid, val in ordered.items() if fid in id_to_label}
+        final_list.append(labeled)
+    return final_list
+def get_data(
+    client,
+    report_metadata: dict,
+    report_desc: str,
+    cache: bool = False,
+    cache_root=None,
+) -> list[dict]:
+    """Query a Quickbase table for data using cached report metadata.
+    Args:
+        client: Quickbase API client.
+        report_metadata: Full metadata dict (from load_report_metadata_batch).
+        report_desc: Unique description of a specific table report.
+        cache: Whether to cache the retrieved data. Defaults to False.
+        cache_root: Optional cache root path. If not provided, uses CacheManager default.
+    Returns:
+        List of dicts with field labels as keys.
+    Raises:
+        KeyError: If report_desc not found in report_metadata.
+        Exception: If Quickbase API query fails.
+    Example:
+        >>> metadata = load_report_metadata_batch(configs)
+        >>> data = get_data(client, metadata, "sales_open_deals", cache=True)
+        >>> print(f"Found {len(data)} records")
+    """
+    info = report_metadata[report_desc]
+    app_name = info["app_name"]
+    table_name = info["table_name"]
+    report_name = info["report_name"]
+    # Query Quickbase
+    query_data = handle_query(
+        client,
+        info["table_id"],
+        select=info["fields"],
+        where=info["filter"],
+        sort_by=info["report"]["query"]["sortBy"],
+    )
+    data = query_data["data"]
+    # Transform records
+    final_list = _flatten_and_relabel_records(data, info["field_label"], info["fields"])
+    # Cache if requested
+    if cache:
+        cache_mgr = get_cache_manager(cache_root=cache_root)
+        data_path = cache_mgr.get_data_path(app_name, table_name, report_name)
+        cache_mgr.write_file(data_path, json.dumps(final_list, indent=4))
+        logger.info(f"{report_desc} data cached ({len(final_list)} records)")
+    else:
+        logger.info(f"{report_desc} data fetched but not cached ({len(final_list)} records)")
+    return final_list
+def get_data_parallel(
+    client,
+    report_metadata: dict,
+    report_descriptions: list[str],
+    cache: bool = False,
+    cache_root=None,
+    max_workers: int = 8,
+) -> dict[str, list[dict]]:
+    """Fetch multiple reports in parallel using cached report metadata.
+    Executes data fetching for multiple reports concurrently to improve
+    performance. Uses a fail-fast approach: if any report fetch fails,
+    all remaining tasks are cancelled and the exception is raised immediately.
+    Args:
+        client: Quickbase API client. Should be thread-safe for concurrent use.
+        report_metadata: Full metadata dict (from load_report_metadata_batch).
+        report_descriptions: List of report descriptions to fetch.
+        cache: Whether to cache retrieved data. Defaults to False.
+        cache_root: Optional cache root path. If not provided, uses CacheManager default.
+        max_workers: Maximum number of concurrent threads. Default is 8.
+            Adjust based on API rate limits and system resources.
+    Returns:
+        Dict mapping report_description -> list of record dicts.
+    Raises:
+        KeyError: If any report_desc not found in report_metadata.
+        Exception: First exception encountered during parallel execution.
+            All pending tasks are cancelled when an error occurs.
+    Example:
+        >>> metadata = load_report_metadata_batch(configs)
+        >>> descriptions = ["sales_open_deals", "sales_contacts"]
+        >>> all_data = get_data_parallel(client, metadata, descriptions, cache=True)
+        >>> print(f"Fetched {len(all_data)} reports")
+    Note:
+        - Ensure the Quickbase client can handle concurrent requests
+        - Consider API rate limits when setting max_workers
+        - All tasks are cancelled on first failure (fail-fast behavior)
+    """
+    if not report_descriptions:
+        logger.warning("No report descriptions provided, nothing to fetch")
+        return {}
+    total_reports = len(report_descriptions)
+    logger.info(f"Starting parallel fetch for {total_reports} reports with {max_workers} workers")
+    results = {}
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # Submit all tasks (fixed argument order)
+        future_to_report = {
+            executor.submit(
+                get_data,
+                client,
+                report_metadata,  # Fixed: was swapped with report_desc
+                report_desc,  # Fixed: was swapped with report_metadata
+                cache=cache,
+                cache_root=cache_root,
+            ): report_desc
+            for report_desc in report_descriptions
+        }
+        # Process as they complete, fail fast on first error
+        for future in as_completed(future_to_report):
+            report_desc = future_to_report[future]
+            try:
+                data = future.result()  # Individual fetches are logged in get_data
+                results[report_desc] = data
+            except Exception as e:
+                # Cancel all remaining tasks
+                executor.shutdown(wait=False, cancel_futures=True)
+                logger.error(f"Failed to fetch {report_desc}: {e}")
+                raise
+    logger.info(f"Successfully completed parallel fetch for all {total_reports} reports")
+    return results
+def load_data(report_metadata: dict, report_desc: str, cache_root=None) -> list[dict]:
+    """Load cached data for a Quickbase report.
+    Args:
+        report_metadata: Full metadata dict (from load_report_metadata_batch).
+        report_desc: Unique description of a specific table report.
+        cache_root: Optional cache root path. If not provided, uses CacheManager default.
+    Returns:
+        List of dicts with field labels as keys.
+    Raises:
+        KeyError: If report_desc not found in report_metadata.
+        FileNotFoundError: If cached data does not exist.
+    Example:
+        >>> metadata = load_report_metadata_batch(configs)
+        >>> data = load_data(metadata, "sales_open_deals")
+        >>> print(f"Loaded {len(data)} records from cache")
+    """
+    info = report_metadata[report_desc]
+    app_name = info["app_name"]
+    table_name = info["table_name"]
+    report_name = info["report_name"]
+    cache_mgr = get_cache_manager(cache_root=cache_root)
+    data_path = cache_mgr.get_data_path(app_name, table_name, report_name)
+    if not data_path.exists():
+        raise FileNotFoundError(f"Cached data not found for '{report_desc}'. Expected: {data_path}")
+    return json.loads(cache_mgr.read_file(data_path))
+def load_data_batch(
+    report_metadata: dict,
+    report_descriptions: list[str],
+    cache_root=None,
+) -> dict[str, list[dict]]:
+    """Load cached data for multiple reports.
+    Sequentially loads cached data for each report description.
+    This is a batch wrapper around load_data for convenience.
+    Args:
+        report_metadata: Full metadata dict (from load_report_metadata_batch).
+        report_descriptions: List of report descriptions to load.
+        cache_root: Optional cache root path. If not provided, uses
+            CacheManager default.
+    Returns:
+        Dict mapping report_description -> list of record dicts.
+    Raises:
+        KeyError: If any report_desc not found in report_metadata.
+        FileNotFoundError: If any cached data does not exist.
+    Example:
+        >>> metadata = load_report_metadata_batch(configs)
+        >>> descriptions = ["sales_open_deals", "sales_contacts"]
+        >>> all_data = load_data_batch(metadata, descriptions)
+        >>> print(f"Loaded {len(all_data)} reports from cache")
+    """
+    if not report_descriptions:
+        return {}
+    data = {}
+    for report_desc in report_descriptions:
+        data[report_desc] = load_data(report_metadata, report_desc, cache_root=cache_root)
+    return data