quickbase-extract 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quickbase_extract/__init__.py +98 -0
- quickbase_extract/api_handlers.py +210 -0
- quickbase_extract/cache_freshness.py +199 -0
- quickbase_extract/cache_manager.py +234 -0
- quickbase_extract/cache_sync.py +74 -0
- quickbase_extract/client.py +61 -0
- quickbase_extract/py.typed +0 -0
- quickbase_extract/report_data.py +253 -0
- quickbase_extract/report_metadata.py +316 -0
- quickbase_extract/utils.py +42 -0
- quickbase_extract-0.1.0.dist-info/METADATA +1735 -0
- quickbase_extract-0.1.0.dist-info/RECORD +14 -0
- quickbase_extract-0.1.0.dist-info/WHEEL +4 -0
- quickbase_extract-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Quickbase Extract - Extract and cache Quickbase report data.
|
|
2
|
+
|
|
3
|
+
A Python package for efficiently retrieving, transforming, and caching data
|
|
4
|
+
from Quickbase reports with built-in error handling, retry logic, and S3 support
|
|
5
|
+
for Lambda environments.
|
|
6
|
+
|
|
7
|
+
Quick Start:
|
|
8
|
+
>>> from quickbase_extract import get_qb_client, refresh_all, load_report_metadata_batch
|
|
9
|
+
>>> from quickbase_extract import get_data_parallel
|
|
10
|
+
>>>
|
|
11
|
+
>>> # Initialize client
|
|
12
|
+
>>> client = get_qb_client(realm="example.quickbase.com", user_token="...")
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Refresh metadata cache
|
|
15
|
+
>>> refresh_all(client, report_configs)
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Load metadata and fetch data
|
|
18
|
+
>>> metadata = load_report_metadata_batch(report_configs)
|
|
19
|
+
>>> data = get_data_parallel(client, metadata, ["report1", "report2"], cache=True)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
# API operations with error handling
|
|
25
|
+
from quickbase_extract.api_handlers import QuickbaseOperationError, handle_delete, handle_query, handle_upsert
|
|
26
|
+
|
|
27
|
+
# Cache monitoring
|
|
28
|
+
from quickbase_extract.cache_freshness import (
|
|
29
|
+
CacheFileInfo,
|
|
30
|
+
CacheSummary,
|
|
31
|
+
check_cache_freshness,
|
|
32
|
+
get_cache_files,
|
|
33
|
+
get_cache_summary,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Cache management
|
|
37
|
+
from quickbase_extract.cache_manager import CacheManager, get_cache_manager
|
|
38
|
+
from quickbase_extract.cache_sync import is_cache_synced, sync_from_s3_once
|
|
39
|
+
|
|
40
|
+
# Client
|
|
41
|
+
from quickbase_extract.client import get_qb_client
|
|
42
|
+
|
|
43
|
+
# Report data retrieval
|
|
44
|
+
from quickbase_extract.report_data import get_data, get_data_parallel, load_data, load_data_batch
|
|
45
|
+
|
|
46
|
+
# Report metadata
|
|
47
|
+
from quickbase_extract.report_metadata import (
|
|
48
|
+
get_report_metadata,
|
|
49
|
+
get_report_metadata_parallel,
|
|
50
|
+
load_report_metadata,
|
|
51
|
+
load_report_metadata_batch,
|
|
52
|
+
refresh_all,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Utilities
|
|
56
|
+
from quickbase_extract.utils import find_report, normalize_name
|
|
57
|
+
|
|
58
|
+
__version__ = "0.1.0"
|
|
59
|
+
|
|
60
|
+
# Configure logging
|
|
61
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
# Version
|
|
65
|
+
"__version__",
|
|
66
|
+
# Client
|
|
67
|
+
"get_qb_client",
|
|
68
|
+
# Cache management
|
|
69
|
+
"CacheManager",
|
|
70
|
+
"get_cache_manager",
|
|
71
|
+
"sync_from_s3_once",
|
|
72
|
+
"is_cache_synced",
|
|
73
|
+
# Cache monitoring
|
|
74
|
+
"CacheFileInfo",
|
|
75
|
+
"CacheSummary",
|
|
76
|
+
"check_cache_freshness",
|
|
77
|
+
"get_cache_files",
|
|
78
|
+
"get_cache_summary",
|
|
79
|
+
# API operations
|
|
80
|
+
"QuickbaseOperationError",
|
|
81
|
+
"handle_delete",
|
|
82
|
+
"handle_query",
|
|
83
|
+
"handle_upsert",
|
|
84
|
+
# Report metadata
|
|
85
|
+
"get_report_metadata",
|
|
86
|
+
"get_report_metadata_parallel",
|
|
87
|
+
"load_report_metadata",
|
|
88
|
+
"load_report_metadata_batch",
|
|
89
|
+
"refresh_all",
|
|
90
|
+
# Report data
|
|
91
|
+
"get_data",
|
|
92
|
+
"get_data_parallel",
|
|
93
|
+
"load_data",
|
|
94
|
+
"load_data_batch",
|
|
95
|
+
# Utilities
|
|
96
|
+
"find_report",
|
|
97
|
+
"normalize_name",
|
|
98
|
+
]
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Error handling utilities for Quickbase operations.
|
|
2
|
+
|
|
3
|
+
Provides retry logic for rate-limited requests, standardized error handling,
|
|
4
|
+
and logging for Quickbase API operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import random
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class QuickbaseOperationError(Exception):
|
|
15
|
+
"""Raised when a Quickbase API operation fails."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, operation: str, details: str = ""):
|
|
18
|
+
self.operation = operation
|
|
19
|
+
self.details = details
|
|
20
|
+
super().__init__(f"Quickbase {operation} failed: {details}")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def handle_upsert(
|
|
24
|
+
client,
|
|
25
|
+
table_id: str,
|
|
26
|
+
data: list[dict],
|
|
27
|
+
description: str = "",
|
|
28
|
+
max_retries: int = 3,
|
|
29
|
+
) -> dict:
|
|
30
|
+
"""Execute a Quickbase upsert with error handling, retry logic, and logging.
|
|
31
|
+
|
|
32
|
+
Retries on rate limiting (429 errors) with exponential backoff and jitter.
|
|
33
|
+
Wait time is capped at 60 seconds per retry.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
client: Quickbase API client.
|
|
37
|
+
table_id: Target table ID.
|
|
38
|
+
data: List of record dicts to upsert.
|
|
39
|
+
description: Human-readable description for logging. Defaults to empty string.
|
|
40
|
+
max_retries: Maximum number of retry attempts. Defaults to 3.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
API response dict containing metadata about created/updated/unchanged records.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
QuickbaseOperationError: If the upsert fails after all retries.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
>>> records = [{"6": {"value": "John"}, "7": {"value": "Doe"}}]
|
|
50
|
+
>>> result = handle_upsert(client, "bq8xyx9z", records, "customer records")
|
|
51
|
+
"""
|
|
52
|
+
for attempt in range(max_retries):
|
|
53
|
+
try:
|
|
54
|
+
result = client.upsert_records(table_id, data=data)
|
|
55
|
+
|
|
56
|
+
created = result.get("metadata", {}).get("createdRecordIds", [])
|
|
57
|
+
updated = result.get("metadata", {}).get("updatedRecordIds", [])
|
|
58
|
+
unchanged = result.get("metadata", {}).get("unchangedRecordIds", [])
|
|
59
|
+
|
|
60
|
+
logger.info(
|
|
61
|
+
f"Upsert {description}: {len(created)} created, {len(updated)} updated, " f"{len(unchanged)} unchanged"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
|
|
67
|
+
error_str = str(e)
|
|
68
|
+
|
|
69
|
+
# Retry on 429 (rate limit)
|
|
70
|
+
if "429" in error_str and attempt < max_retries - 1:
|
|
71
|
+
wait_time = min(2**attempt, 60) + random.uniform(0, 1)
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Rate limited on upsert {description} (attempt {attempt + 1}/{max_retries}), "
|
|
74
|
+
f"retrying in {wait_time:.1f}s"
|
|
75
|
+
)
|
|
76
|
+
time.sleep(wait_time)
|
|
77
|
+
else:
|
|
78
|
+
logger.error(f"Upsert {description} failed: {error_str}")
|
|
79
|
+
raise QuickbaseOperationError("upsert", error_str) from e
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def handle_delete(
|
|
83
|
+
client,
|
|
84
|
+
table_id: str,
|
|
85
|
+
where: str,
|
|
86
|
+
description: str = "",
|
|
87
|
+
max_retries: int = 3,
|
|
88
|
+
) -> int:
|
|
89
|
+
"""Execute a Quickbase delete with error handling, logging, and rate limit retry.
|
|
90
|
+
|
|
91
|
+
Only retries on rate limiting (429 errors) with exponential backoff and jitter.
|
|
92
|
+
Other errors fail immediately for safety. Wait time is capped at 60 seconds per retry.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
client: Quickbase API client.
|
|
96
|
+
table_id: Target table ID.
|
|
97
|
+
where: Quickbase filter string specifying records to delete.
|
|
98
|
+
description: Human-readable description for logging. Defaults to empty string.
|
|
99
|
+
max_retries: Maximum number of retry attempts for rate limits. Defaults to 3.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Number of records deleted.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
QuickbaseOperationError: If the delete fails.
|
|
106
|
+
|
|
107
|
+
Example:
|
|
108
|
+
>>> deleted = handle_delete(client, "bq8xyx9z", "{3.EX.'test'}", "test records")
|
|
109
|
+
|
|
110
|
+
Note:
|
|
111
|
+
For safety, only 429 (rate limit) errors are retried. All other errors
|
|
112
|
+
fail immediately to prevent unintended deletions.
|
|
113
|
+
"""
|
|
114
|
+
for attempt in range(max_retries):
|
|
115
|
+
try:
|
|
116
|
+
deleted = client.delete_records(table_id, where=where)
|
|
117
|
+
logger.info(f"Delete {description}: {deleted} records deleted")
|
|
118
|
+
return deleted
|
|
119
|
+
|
|
120
|
+
except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
|
|
121
|
+
error_str = str(e)
|
|
122
|
+
|
|
123
|
+
# Only retry on 429 (rate limit) - other errors are too risky to retry
|
|
124
|
+
if "429" in error_str and attempt < max_retries - 1:
|
|
125
|
+
wait_time = min(2**attempt, 60) + random.uniform(0, 1)
|
|
126
|
+
logger.warning(
|
|
127
|
+
f"Rate limited on delete {description} (attempt {attempt + 1}/{max_retries}), "
|
|
128
|
+
f"retrying in {wait_time:.1f}s"
|
|
129
|
+
)
|
|
130
|
+
time.sleep(wait_time)
|
|
131
|
+
else:
|
|
132
|
+
logger.error(f"Delete {description} failed: {error_str}")
|
|
133
|
+
raise QuickbaseOperationError("delete", error_str) from e
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def handle_query(
|
|
137
|
+
client,
|
|
138
|
+
table_id: str,
|
|
139
|
+
*,
|
|
140
|
+
select: list[int] = None,
|
|
141
|
+
where: str = None,
|
|
142
|
+
sort_by: list[dict] = None,
|
|
143
|
+
group_by: list[dict] = None,
|
|
144
|
+
options: dict = None,
|
|
145
|
+
description: str = "",
|
|
146
|
+
max_retries: int = 3,
|
|
147
|
+
) -> dict:
|
|
148
|
+
"""Execute a Quickbase query with error handling, retry logic, and logging.
|
|
149
|
+
|
|
150
|
+
Retries on rate limiting (429 errors) with exponential backoff and jitter.
|
|
151
|
+
Wait time is capped at 60 seconds per retry.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
client: Quickbase API client.
|
|
155
|
+
table_id: Target table ID.
|
|
156
|
+
select: List of field IDs to return. If omitted, returns fields from
|
|
157
|
+
the default report.
|
|
158
|
+
where: A Quickbase query string (e.g., "{12.EX.'VPF'}").
|
|
159
|
+
sort_by: Sort order, e.g., [{"fieldId": 6, "order": "ASC"}].
|
|
160
|
+
group_by: Grouping, e.g., [{"fieldId": 6, "grouping": "equal-values"}].
|
|
161
|
+
options: Additional options, e.g.,
|
|
162
|
+
{"skip": 0, "top": 100, "compareWithAppLocalTime": False}.
|
|
163
|
+
description: Human-readable description for logging. Defaults to empty string.
|
|
164
|
+
max_retries: Maximum number of retry attempts. Defaults to 3.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
API response dict containing query results.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
QuickbaseOperationError: If the query fails after all retries.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> result = handle_query(
|
|
174
|
+
... client,
|
|
175
|
+
... "bq8xyx9z",
|
|
176
|
+
... select=[6, 7, 8],
|
|
177
|
+
... where="{12.EX.'Active'}",
|
|
178
|
+
... description="active customers"
|
|
179
|
+
... )
|
|
180
|
+
"""
|
|
181
|
+
for attempt in range(max_retries):
|
|
182
|
+
try:
|
|
183
|
+
result = client.query_for_data(
|
|
184
|
+
table_id,
|
|
185
|
+
select=select,
|
|
186
|
+
where=where,
|
|
187
|
+
sort_by=sort_by,
|
|
188
|
+
group_by=group_by,
|
|
189
|
+
options=options,
|
|
190
|
+
)
|
|
191
|
+
record_count = len(result.get("data", []))
|
|
192
|
+
desc_str = f" {description}" if description else ""
|
|
193
|
+
logger.info(f"Query{desc_str} returned {record_count} records")
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
|
|
197
|
+
error_str = str(e)
|
|
198
|
+
|
|
199
|
+
if "429" in error_str and attempt < max_retries - 1:
|
|
200
|
+
wait_time = min(2**attempt, 60) + random.uniform(0, 1)
|
|
201
|
+
desc_str = f" {description}" if description else f" table {table_id}"
|
|
202
|
+
logger.warning(
|
|
203
|
+
f"Rate limited on query{desc_str} (attempt {attempt + 1}/{max_retries}), "
|
|
204
|
+
f"retrying in {wait_time:.1f}s"
|
|
205
|
+
)
|
|
206
|
+
time.sleep(wait_time)
|
|
207
|
+
else:
|
|
208
|
+
desc_str = f" {description}" if description else f" on table {table_id}"
|
|
209
|
+
logger.error(f"Query{desc_str} failed: {error_str}")
|
|
210
|
+
raise QuickbaseOperationError("query", error_str) from e
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Cache monitoring and freshness detection.
|
|
2
|
+
|
|
3
|
+
Inspects cached JSON files, checks their age, and identifies stale cache entries.
|
|
4
|
+
Works with both local and Lambda environments via CacheManager.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TypedDict
|
|
12
|
+
|
|
13
|
+
from quickbase_extract.cache_manager import get_cache_manager
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Cache freshness thresholds (in hours)
|
|
18
|
+
# Metadata rarely changes, so longer threshold is acceptable
|
|
19
|
+
DEFAULT_METADATA_STALE_HOURS = 168 # 7 days
|
|
20
|
+
# Data should be refreshed more frequently
|
|
21
|
+
DEFAULT_DATA_STALE_HOURS = 24 # 1 day
|
|
22
|
+
# General default when cache type isn't specified
|
|
23
|
+
DEFAULT_STALE_THRESHOLD_HOURS = 36
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CacheFileInfo(TypedDict):
|
|
27
|
+
"""Information about a cached file."""
|
|
28
|
+
|
|
29
|
+
file: str
|
|
30
|
+
path: Path
|
|
31
|
+
size_bytes: int
|
|
32
|
+
size_mb: float
|
|
33
|
+
modified: datetime
|
|
34
|
+
age_hours: float
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CacheSummary(TypedDict):
|
|
38
|
+
"""Summary statistics for cache directory."""
|
|
39
|
+
|
|
40
|
+
cache_dir: str
|
|
41
|
+
total_files: int
|
|
42
|
+
total_size_mb: float
|
|
43
|
+
oldest_file: str | None
|
|
44
|
+
oldest_age_hours: float
|
|
45
|
+
newest_file: str | None
|
|
46
|
+
newest_age_hours: float
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_cache_files(cache_root: Path | None = None) -> list[CacheFileInfo]:
|
|
50
|
+
"""Get all cached JSON files with their metadata.
|
|
51
|
+
|
|
52
|
+
Scans report_data and report_metadata directories for JSON files.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
cache_root: Optional cache root path. If not provided, uses CacheManager default.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
List of dicts with file path, size, and modification time.
|
|
59
|
+
Sorted by age in descending order (oldest files first).
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
FileNotFoundError: If cache directory doesn't exist.
|
|
63
|
+
PermissionError: If cache directory is not readable.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
>>> files = get_cache_files()
|
|
67
|
+
>>> print(f"Oldest file: {files[0]['file']}, age: {files[0]['age_hours']}h")
|
|
68
|
+
"""
|
|
69
|
+
if cache_root is None:
|
|
70
|
+
cache_mgr = get_cache_manager()
|
|
71
|
+
cache_root = cache_mgr.cache_root
|
|
72
|
+
else:
|
|
73
|
+
cache_root = Path(cache_root)
|
|
74
|
+
|
|
75
|
+
if not cache_root.exists():
|
|
76
|
+
raise FileNotFoundError(f"Cache directory does not exist: {cache_root}")
|
|
77
|
+
|
|
78
|
+
if not cache_root.is_dir():
|
|
79
|
+
raise NotADirectoryError(f"Cache path is not a directory: {cache_root}")
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
files = []
|
|
83
|
+
|
|
84
|
+
# Scan all JSON files in cache directory
|
|
85
|
+
for json_file in cache_root.rglob("*.json"):
|
|
86
|
+
try:
|
|
87
|
+
stat = json_file.stat()
|
|
88
|
+
age_hours = (time.time() - stat.st_mtime) / 3600
|
|
89
|
+
|
|
90
|
+
files.append(
|
|
91
|
+
{
|
|
92
|
+
"file": str(json_file.relative_to(cache_root)),
|
|
93
|
+
"path": json_file,
|
|
94
|
+
"size_bytes": stat.st_size,
|
|
95
|
+
"size_mb": round(stat.st_size / (1024 * 1024), 2),
|
|
96
|
+
"modified": datetime.fromtimestamp(stat.st_mtime),
|
|
97
|
+
"age_hours": round(age_hours, 1),
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
except (OSError, ValueError) as e:
|
|
101
|
+
# File might have been deleted or become inaccessible
|
|
102
|
+
logger.warning(f"Failed to stat file {json_file}: {e}")
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# Sort by age descending (oldest first)
|
|
106
|
+
return sorted(files, key=lambda x: x["age_hours"], reverse=True)
|
|
107
|
+
|
|
108
|
+
except PermissionError as e:
|
|
109
|
+
raise PermissionError(f"Cannot read cache directory {cache_root}: {e}") from e
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def check_cache_freshness(
|
|
113
|
+
threshold_hours: float = DEFAULT_STALE_THRESHOLD_HOURS,
|
|
114
|
+
cache_root: Path | None = None,
|
|
115
|
+
) -> list[CacheFileInfo]:
|
|
116
|
+
"""Check for stale cache files.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
threshold_hours: Files older than this are considered stale. Defaults to
|
|
120
|
+
DEFAULT_STALE_THRESHOLD_HOURS (36 hours).
|
|
121
|
+
cache_root: Optional cache root path. If not provided, uses CacheManager default.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of stale file info dicts, sorted by age descending (oldest first).
|
|
125
|
+
Empty list if all files are fresh.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
FileNotFoundError: If cache directory doesn't exist.
|
|
129
|
+
PermissionError: If cache directory is not readable.
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
>>> stale = check_cache_freshness(threshold_hours=24)
|
|
133
|
+
>>> if stale:
|
|
134
|
+
... print(f"Found {len(stale)} files older than 24 hours")
|
|
135
|
+
"""
|
|
136
|
+
files = get_cache_files(cache_root=cache_root)
|
|
137
|
+
stale = [f for f in files if f["age_hours"] > threshold_hours]
|
|
138
|
+
|
|
139
|
+
if stale:
|
|
140
|
+
logger.warning(
|
|
141
|
+
f"Found {len(stale)} stale cache files (older than {threshold_hours}h). "
|
|
142
|
+
f"Oldest: {stale[0]['file']} ({stale[0]['age_hours']}h)"
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
logger.info(f"All {len(files)} cache files are fresh (within {threshold_hours}h)")
|
|
146
|
+
|
|
147
|
+
return stale
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def get_cache_summary(cache_root: Path | None = None) -> CacheSummary:
|
|
151
|
+
"""Get a summary of the cache directory.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
cache_root: Optional cache root path. If not provided, uses CacheManager default.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Dict with total files, size, oldest/newest file info.
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
FileNotFoundError: If cache directory doesn't exist.
|
|
161
|
+
PermissionError: If cache directory is not readable.
|
|
162
|
+
|
|
163
|
+
Example:
|
|
164
|
+
>>> summary = get_cache_summary()
|
|
165
|
+
>>> print(f"Cache: {summary['total_files']} files, {summary['total_size_mb']} MB")
|
|
166
|
+
>>> print(f"Oldest: {summary['oldest_file']} ({summary['oldest_age_hours']}h)")
|
|
167
|
+
"""
|
|
168
|
+
if cache_root is None:
|
|
169
|
+
cache_mgr = get_cache_manager()
|
|
170
|
+
cache_root = cache_mgr.cache_root
|
|
171
|
+
else:
|
|
172
|
+
cache_root = Path(cache_root)
|
|
173
|
+
|
|
174
|
+
files = get_cache_files(cache_root=cache_root)
|
|
175
|
+
|
|
176
|
+
if not files:
|
|
177
|
+
return {
|
|
178
|
+
"cache_dir": str(cache_root),
|
|
179
|
+
"total_files": 0,
|
|
180
|
+
"total_size_mb": 0.0,
|
|
181
|
+
"oldest_file": None,
|
|
182
|
+
"oldest_age_hours": 0.0,
|
|
183
|
+
"newest_file": None,
|
|
184
|
+
"newest_age_hours": 0.0,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
total_size = sum(f["size_bytes"] for f in files)
|
|
188
|
+
oldest = files[0] # First item in descending age order
|
|
189
|
+
newest = files[-1] # Last item in descending age order
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
"cache_dir": str(cache_root),
|
|
193
|
+
"total_files": len(files),
|
|
194
|
+
"total_size_mb": round(total_size / (1024 * 1024), 1),
|
|
195
|
+
"oldest_file": oldest["file"],
|
|
196
|
+
"oldest_age_hours": oldest["age_hours"],
|
|
197
|
+
"newest_file": newest["file"],
|
|
198
|
+
"newest_age_hours": newest["age_hours"],
|
|
199
|
+
}
|