quickbase-extract 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ """Quickbase Extract - Extract and cache Quickbase report data.
2
+
3
+ A Python package for efficiently retrieving, transforming, and caching data
4
+ from Quickbase reports with built-in error handling, retry logic, and S3 support
5
+ for Lambda environments.
6
+
7
+ Quick Start:
8
+ >>> from quickbase_extract import get_qb_client, refresh_all, load_report_metadata_batch
9
+ >>> from quickbase_extract import get_data_parallel
10
+ >>>
11
+ >>> # Initialize client
12
+ >>> client = get_qb_client(realm="example.quickbase.com", user_token="...")
13
+ >>>
14
+ >>> # Refresh metadata cache
15
+ >>> refresh_all(client, report_configs)
16
+ >>>
17
+ >>> # Load metadata and fetch data
18
+ >>> metadata = load_report_metadata_batch(report_configs)
19
+ >>> data = get_data_parallel(client, metadata, ["report1", "report2"], cache=True)
20
+ """
21
+
22
+ import logging
23
+
24
+ # API operations with error handling
25
+ from quickbase_extract.api_handlers import QuickbaseOperationError, handle_delete, handle_query, handle_upsert
26
+
27
+ # Cache monitoring
28
+ from quickbase_extract.cache_freshness import (
29
+ CacheFileInfo,
30
+ CacheSummary,
31
+ check_cache_freshness,
32
+ get_cache_files,
33
+ get_cache_summary,
34
+ )
35
+
36
+ # Cache management
37
+ from quickbase_extract.cache_manager import CacheManager, get_cache_manager
38
+ from quickbase_extract.cache_sync import is_cache_synced, sync_from_s3_once
39
+
40
+ # Client
41
+ from quickbase_extract.client import get_qb_client
42
+
43
+ # Report data retrieval
44
+ from quickbase_extract.report_data import get_data, get_data_parallel, load_data, load_data_batch
45
+
46
+ # Report metadata
47
+ from quickbase_extract.report_metadata import (
48
+ get_report_metadata,
49
+ get_report_metadata_parallel,
50
+ load_report_metadata,
51
+ load_report_metadata_batch,
52
+ refresh_all,
53
+ )
54
+
55
+ # Utilities
56
+ from quickbase_extract.utils import find_report, normalize_name
57
+
58
+ __version__ = "0.1.0"
59
+
60
+ # Configure logging
61
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
62
+
63
+ __all__ = [
64
+ # Version
65
+ "__version__",
66
+ # Client
67
+ "get_qb_client",
68
+ # Cache management
69
+ "CacheManager",
70
+ "get_cache_manager",
71
+ "sync_from_s3_once",
72
+ "is_cache_synced",
73
+ # Cache monitoring
74
+ "CacheFileInfo",
75
+ "CacheSummary",
76
+ "check_cache_freshness",
77
+ "get_cache_files",
78
+ "get_cache_summary",
79
+ # API operations
80
+ "QuickbaseOperationError",
81
+ "handle_delete",
82
+ "handle_query",
83
+ "handle_upsert",
84
+ # Report metadata
85
+ "get_report_metadata",
86
+ "get_report_metadata_parallel",
87
+ "load_report_metadata",
88
+ "load_report_metadata_batch",
89
+ "refresh_all",
90
+ # Report data
91
+ "get_data",
92
+ "get_data_parallel",
93
+ "load_data",
94
+ "load_data_batch",
95
+ # Utilities
96
+ "find_report",
97
+ "normalize_name",
98
+ ]
@@ -0,0 +1,210 @@
1
+ """Error handling utilities for Quickbase operations.
2
+
3
+ Provides retry logic for rate-limited requests, standardized error handling,
4
+ and logging for Quickbase API operations.
5
+ """
6
+
7
+ import logging
8
+ import random
9
+ import time
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class QuickbaseOperationError(Exception):
15
+ """Raised when a Quickbase API operation fails."""
16
+
17
+ def __init__(self, operation: str, details: str = ""):
18
+ self.operation = operation
19
+ self.details = details
20
+ super().__init__(f"Quickbase {operation} failed: {details}")
21
+
22
+
23
+ def handle_upsert(
24
+ client,
25
+ table_id: str,
26
+ data: list[dict],
27
+ description: str = "",
28
+ max_retries: int = 3,
29
+ ) -> dict:
30
+ """Execute a Quickbase upsert with error handling, retry logic, and logging.
31
+
32
+ Retries on rate limiting (429 errors) with exponential backoff and jitter.
33
+ Wait time is capped at 60 seconds per retry.
34
+
35
+ Args:
36
+ client: Quickbase API client.
37
+ table_id: Target table ID.
38
+ data: List of record dicts to upsert.
39
+ description: Human-readable description for logging. Defaults to empty string.
40
+ max_retries: Maximum number of retry attempts. Defaults to 3.
41
+
42
+ Returns:
43
+ API response dict containing metadata about created/updated/unchanged records.
44
+
45
+ Raises:
46
+ QuickbaseOperationError: If the upsert fails after all retries.
47
+
48
+ Example:
49
+ >>> records = [{"6": {"value": "John"}, "7": {"value": "Doe"}}]
50
+ >>> result = handle_upsert(client, "bq8xyx9z", records, "customer records")
51
+ """
52
+ for attempt in range(max_retries):
53
+ try:
54
+ result = client.upsert_records(table_id, data=data)
55
+
56
+ created = result.get("metadata", {}).get("createdRecordIds", [])
57
+ updated = result.get("metadata", {}).get("updatedRecordIds", [])
58
+ unchanged = result.get("metadata", {}).get("unchangedRecordIds", [])
59
+
60
+ logger.info(
61
+ f"Upsert {description}: {len(created)} created, {len(updated)} updated, " f"{len(unchanged)} unchanged"
62
+ )
63
+
64
+ return result
65
+
66
+ except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
67
+ error_str = str(e)
68
+
69
+ # Retry on 429 (rate limit)
70
+ if "429" in error_str and attempt < max_retries - 1:
71
+ wait_time = min(2**attempt, 60) + random.uniform(0, 1)
72
+ logger.warning(
73
+ f"Rate limited on upsert {description} (attempt {attempt + 1}/{max_retries}), "
74
+ f"retrying in {wait_time:.1f}s"
75
+ )
76
+ time.sleep(wait_time)
77
+ else:
78
+ logger.error(f"Upsert {description} failed: {error_str}")
79
+ raise QuickbaseOperationError("upsert", error_str) from e
80
+
81
+
82
+ def handle_delete(
83
+ client,
84
+ table_id: str,
85
+ where: str,
86
+ description: str = "",
87
+ max_retries: int = 3,
88
+ ) -> int:
89
+ """Execute a Quickbase delete with error handling, logging, and rate limit retry.
90
+
91
+ Only retries on rate limiting (429 errors) with exponential backoff and jitter.
92
+ Other errors fail immediately for safety. Wait time is capped at 60 seconds per retry.
93
+
94
+ Args:
95
+ client: Quickbase API client.
96
+ table_id: Target table ID.
97
+ where: Quickbase filter string specifying records to delete.
98
+ description: Human-readable description for logging. Defaults to empty string.
99
+ max_retries: Maximum number of retry attempts for rate limits. Defaults to 3.
100
+
101
+ Returns:
102
+ Number of records deleted.
103
+
104
+ Raises:
105
+ QuickbaseOperationError: If the delete fails.
106
+
107
+ Example:
108
+ >>> deleted = handle_delete(client, "bq8xyx9z", "{3.EX.'test'}", "test records")
109
+
110
+ Note:
111
+ For safety, only 429 (rate limit) errors are retried. All other errors
112
+ fail immediately to prevent unintended deletions.
113
+ """
114
+ for attempt in range(max_retries):
115
+ try:
116
+ deleted = client.delete_records(table_id, where=where)
117
+ logger.info(f"Delete {description}: {deleted} records deleted")
118
+ return deleted
119
+
120
+ except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
121
+ error_str = str(e)
122
+
123
+ # Only retry on 429 (rate limit) - other errors are too risky to retry
124
+ if "429" in error_str and attempt < max_retries - 1:
125
+ wait_time = min(2**attempt, 60) + random.uniform(0, 1)
126
+ logger.warning(
127
+ f"Rate limited on delete {description} (attempt {attempt + 1}/{max_retries}), "
128
+ f"retrying in {wait_time:.1f}s"
129
+ )
130
+ time.sleep(wait_time)
131
+ else:
132
+ logger.error(f"Delete {description} failed: {error_str}")
133
+ raise QuickbaseOperationError("delete", error_str) from e
134
+
135
+
136
+ def handle_query(
137
+ client,
138
+ table_id: str,
139
+ *,
140
+ select: list[int] = None,
141
+ where: str = None,
142
+ sort_by: list[dict] = None,
143
+ group_by: list[dict] = None,
144
+ options: dict = None,
145
+ description: str = "",
146
+ max_retries: int = 3,
147
+ ) -> dict:
148
+ """Execute a Quickbase query with error handling, retry logic, and logging.
149
+
150
+ Retries on rate limiting (429 errors) with exponential backoff and jitter.
151
+ Wait time is capped at 60 seconds per retry.
152
+
153
+ Args:
154
+ client: Quickbase API client.
155
+ table_id: Target table ID.
156
+ select: List of field IDs to return. If omitted, returns fields from
157
+ the default report.
158
+ where: A Quickbase query string (e.g., "{12.EX.'VPF'}").
159
+ sort_by: Sort order, e.g., [{"fieldId": 6, "order": "ASC"}].
160
+ group_by: Grouping, e.g., [{"fieldId": 6, "grouping": "equal-values"}].
161
+ options: Additional options, e.g.,
162
+ {"skip": 0, "top": 100, "compareWithAppLocalTime": False}.
163
+ description: Human-readable description for logging. Defaults to empty string.
164
+ max_retries: Maximum number of retry attempts. Defaults to 3.
165
+
166
+ Returns:
167
+ API response dict containing query results.
168
+
169
+ Raises:
170
+ QuickbaseOperationError: If the query fails after all retries.
171
+
172
+ Example:
173
+ >>> result = handle_query(
174
+ ... client,
175
+ ... "bq8xyx9z",
176
+ ... select=[6, 7, 8],
177
+ ... where="{12.EX.'Active'}",
178
+ ... description="active customers"
179
+ ... )
180
+ """
181
+ for attempt in range(max_retries):
182
+ try:
183
+ result = client.query_for_data(
184
+ table_id,
185
+ select=select,
186
+ where=where,
187
+ sort_by=sort_by,
188
+ group_by=group_by,
189
+ options=options,
190
+ )
191
+ record_count = len(result.get("data", []))
192
+ desc_str = f" {description}" if description else ""
193
+ logger.info(f"Query{desc_str} returned {record_count} records")
194
+ return result
195
+
196
+ except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
197
+ error_str = str(e)
198
+
199
+ if "429" in error_str and attempt < max_retries - 1:
200
+ wait_time = min(2**attempt, 60) + random.uniform(0, 1)
201
+ desc_str = f" {description}" if description else f" table {table_id}"
202
+ logger.warning(
203
+ f"Rate limited on query{desc_str} (attempt {attempt + 1}/{max_retries}), "
204
+ f"retrying in {wait_time:.1f}s"
205
+ )
206
+ time.sleep(wait_time)
207
+ else:
208
+ desc_str = f" {description}" if description else f" on table {table_id}"
209
+ logger.error(f"Query{desc_str} failed: {error_str}")
210
+ raise QuickbaseOperationError("query", error_str) from e
@@ -0,0 +1,199 @@
1
+ """Cache monitoring and freshness detection.
2
+
3
+ Inspects cached JSON files, checks their age, and identifies stale cache entries.
4
+ Works with both local and Lambda environments via CacheManager.
5
+ """
6
+
7
+ import logging
8
+ import time
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import TypedDict
12
+
13
+ from quickbase_extract.cache_manager import get_cache_manager
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Cache freshness thresholds (in hours)
18
+ # Metadata rarely changes, so longer threshold is acceptable
19
+ DEFAULT_METADATA_STALE_HOURS = 168 # 7 days
20
+ # Data should be refreshed more frequently
21
+ DEFAULT_DATA_STALE_HOURS = 24 # 1 day
22
+ # General default when cache type isn't specified
23
+ DEFAULT_STALE_THRESHOLD_HOURS = 36
24
+
25
+
26
+ class CacheFileInfo(TypedDict):
27
+ """Information about a cached file."""
28
+
29
+ file: str
30
+ path: Path
31
+ size_bytes: int
32
+ size_mb: float
33
+ modified: datetime
34
+ age_hours: float
35
+
36
+
37
+ class CacheSummary(TypedDict):
38
+ """Summary statistics for cache directory."""
39
+
40
+ cache_dir: str
41
+ total_files: int
42
+ total_size_mb: float
43
+ oldest_file: str | None
44
+ oldest_age_hours: float
45
+ newest_file: str | None
46
+ newest_age_hours: float
47
+
48
+
49
+ def get_cache_files(cache_root: Path | None = None) -> list[CacheFileInfo]:
50
+ """Get all cached JSON files with their metadata.
51
+
52
+ Scans report_data and report_metadata directories for JSON files.
53
+
54
+ Args:
55
+ cache_root: Optional cache root path. If not provided, uses CacheManager default.
56
+
57
+ Returns:
58
+ List of dicts with file path, size, and modification time.
59
+ Sorted by age in descending order (oldest files first).
60
+
61
+ Raises:
62
+ FileNotFoundError: If cache directory doesn't exist.
63
+ PermissionError: If cache directory is not readable.
64
+
65
+ Example:
66
+ >>> files = get_cache_files()
67
+ >>> print(f"Oldest file: {files[0]['file']}, age: {files[0]['age_hours']}h")
68
+ """
69
+ if cache_root is None:
70
+ cache_mgr = get_cache_manager()
71
+ cache_root = cache_mgr.cache_root
72
+ else:
73
+ cache_root = Path(cache_root)
74
+
75
+ if not cache_root.exists():
76
+ raise FileNotFoundError(f"Cache directory does not exist: {cache_root}")
77
+
78
+ if not cache_root.is_dir():
79
+ raise NotADirectoryError(f"Cache path is not a directory: {cache_root}")
80
+
81
+ try:
82
+ files = []
83
+
84
+ # Scan all JSON files in cache directory
85
+ for json_file in cache_root.rglob("*.json"):
86
+ try:
87
+ stat = json_file.stat()
88
+ age_hours = (time.time() - stat.st_mtime) / 3600
89
+
90
+ files.append(
91
+ {
92
+ "file": str(json_file.relative_to(cache_root)),
93
+ "path": json_file,
94
+ "size_bytes": stat.st_size,
95
+ "size_mb": round(stat.st_size / (1024 * 1024), 2),
96
+ "modified": datetime.fromtimestamp(stat.st_mtime),
97
+ "age_hours": round(age_hours, 1),
98
+ }
99
+ )
100
+ except (OSError, ValueError) as e:
101
+ # File might have been deleted or become inaccessible
102
+ logger.warning(f"Failed to stat file {json_file}: {e}")
103
+ continue
104
+
105
+ # Sort by age descending (oldest first)
106
+ return sorted(files, key=lambda x: x["age_hours"], reverse=True)
107
+
108
+ except PermissionError as e:
109
+ raise PermissionError(f"Cannot read cache directory {cache_root}: {e}") from e
110
+
111
+
112
+ def check_cache_freshness(
113
+ threshold_hours: float = DEFAULT_STALE_THRESHOLD_HOURS,
114
+ cache_root: Path | None = None,
115
+ ) -> list[CacheFileInfo]:
116
+ """Check for stale cache files.
117
+
118
+ Args:
119
+ threshold_hours: Files older than this are considered stale. Defaults to
120
+ DEFAULT_STALE_THRESHOLD_HOURS (36 hours).
121
+ cache_root: Optional cache root path. If not provided, uses CacheManager default.
122
+
123
+ Returns:
124
+ List of stale file info dicts, sorted by age descending (oldest first).
125
+ Empty list if all files are fresh.
126
+
127
+ Raises:
128
+ FileNotFoundError: If cache directory doesn't exist.
129
+ PermissionError: If cache directory is not readable.
130
+
131
+ Example:
132
+ >>> stale = check_cache_freshness(threshold_hours=24)
133
+ >>> if stale:
134
+ ... print(f"Found {len(stale)} files older than 24 hours")
135
+ """
136
+ files = get_cache_files(cache_root=cache_root)
137
+ stale = [f for f in files if f["age_hours"] > threshold_hours]
138
+
139
+ if stale:
140
+ logger.warning(
141
+ f"Found {len(stale)} stale cache files (older than {threshold_hours}h). "
142
+ f"Oldest: {stale[0]['file']} ({stale[0]['age_hours']}h)"
143
+ )
144
+ else:
145
+ logger.info(f"All {len(files)} cache files are fresh (within {threshold_hours}h)")
146
+
147
+ return stale
148
+
149
+
150
+ def get_cache_summary(cache_root: Path | None = None) -> CacheSummary:
151
+ """Get a summary of the cache directory.
152
+
153
+ Args:
154
+ cache_root: Optional cache root path. If not provided, uses CacheManager default.
155
+
156
+ Returns:
157
+ Dict with total files, size, oldest/newest file info.
158
+
159
+ Raises:
160
+ FileNotFoundError: If cache directory doesn't exist.
161
+ PermissionError: If cache directory is not readable.
162
+
163
+ Example:
164
+ >>> summary = get_cache_summary()
165
+ >>> print(f"Cache: {summary['total_files']} files, {summary['total_size_mb']} MB")
166
+ >>> print(f"Oldest: {summary['oldest_file']} ({summary['oldest_age_hours']}h)")
167
+ """
168
+ if cache_root is None:
169
+ cache_mgr = get_cache_manager()
170
+ cache_root = cache_mgr.cache_root
171
+ else:
172
+ cache_root = Path(cache_root)
173
+
174
+ files = get_cache_files(cache_root=cache_root)
175
+
176
+ if not files:
177
+ return {
178
+ "cache_dir": str(cache_root),
179
+ "total_files": 0,
180
+ "total_size_mb": 0.0,
181
+ "oldest_file": None,
182
+ "oldest_age_hours": 0.0,
183
+ "newest_file": None,
184
+ "newest_age_hours": 0.0,
185
+ }
186
+
187
+ total_size = sum(f["size_bytes"] for f in files)
188
+ oldest = files[0] # First item in descending age order
189
+ newest = files[-1] # Last item in descending age order
190
+
191
+ return {
192
+ "cache_dir": str(cache_root),
193
+ "total_files": len(files),
194
+ "total_size_mb": round(total_size / (1024 * 1024), 1),
195
+ "oldest_file": oldest["file"],
196
+ "oldest_age_hours": oldest["age_hours"],
197
+ "newest_file": newest["file"],
198
+ "newest_age_hours": newest["age_hours"],
199
+ }