quickbase-extract 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/.editorconfig +0 -8
  2. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/.gitignore +0 -9
  3. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/CHANGELOG.md +11 -0
  4. quickbase_extract-0.2.0/README.md → quickbase_extract-0.2.1/PKG-INFO +27 -56
  5. quickbase_extract-0.2.0/PKG-INFO → quickbase_extract-0.2.1/README.md +0 -83
  6. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/pyproject.toml +1 -1
  7. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/__init__.py +12 -2
  8. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/api_handlers.py +5 -3
  9. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/cache_manager.py +6 -0
  10. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/cache_sync.py +5 -17
  11. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/conftest.py +9 -0
  12. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_api_handlers.py +2 -0
  13. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_cache_manager.py +127 -32
  14. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_cache_orchestration.py +2 -0
  15. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_cache_sync.py +2 -0
  16. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_report_data.py +2 -0
  17. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_report_metadata.py +2 -0
  18. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/.pre-commit-config.yaml +0 -0
  19. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/.python-version +0 -0
  20. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/LICENSE.txt +0 -0
  21. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/TODO.md +0 -0
  22. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/cache_orchestration.py +0 -0
  23. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/config.py +0 -0
  24. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/py.typed +0 -0
  25. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/report_data.py +0 -0
  26. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/report_metadata.py +0 -0
  27. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/src/quickbase_extract/utils.py +0 -0
  28. {quickbase_extract-0.2.0 → quickbase_extract-0.2.1}/tests/test_utils.py +0 -0
@@ -20,14 +20,6 @@ indent_style = space
20
20
  indent_size = 4
21
21
  max_line_length = 120
22
22
 
23
- # ============================================================
24
- # JavaScript / TypeScript
25
- # ============================================================
26
- [*.{js,jsx,ts,tsx}]
27
- indent_style = space
28
- indent_size = 2
29
- max_line_length = 120
30
-
31
23
  # ============================================================
32
24
  # JSON
33
25
  # ============================================================
@@ -48,15 +48,6 @@ src/*.egg-info/
48
48
  *.log
49
49
  logs/
50
50
 
51
- # Database files
52
- *.db
53
- *.sqlite
54
- *.sqlite3
55
-
56
- # Node modules
57
- node_modules/
58
- npm-debug.log
59
-
60
51
  # IDE caches
61
52
  .idea/
62
53
  *.swp
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.2.1] - 2026-04-25
9
+
10
+ ### Fixed
11
+
12
+ - `sync_from_s3()` now preserves S3 `LastModified` timestamps via `os.utime()`, so `get_cache_age_hours()` returns accurate ages after S3 restore (previously always returned ~0 on Lambda cold start)
13
+ - Renamed misleading `should_sync` variable to `already_synced` in `sync_from_s3_once()` for clarity
14
+
15
+ ### Removed
16
+
17
+ - `FORCE_CACHE_REFRESH` environment variable support from `sync_from_s3_once()` — use `force=True` parameter instead
18
+
8
19
  ## [0.2.0] - 2026-04-22
9
20
 
10
21
  ### Added
@@ -1,3 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: quickbase-extract
3
+ Version: 0.2.1
4
+ Summary: Extract and cache Quickbase report data with built-in error handling and S3 support
5
+ Project-URL: Homepage, https://github.com/tbrezler/quickbase-extract
6
+ Project-URL: Repository, https://github.com/tbrezler/quickbase-extract.git
7
+ Project-URL: Documentation, https://github.com/tbrezler/quickbase-extract#readme
8
+ Author-email: Tyler Brezler <tbrezler@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE.txt
11
+ Keywords: caching,data-extraction,etl,quickbase
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.12
19
+ Requires-Dist: boto3>=1.26.0
20
+ Requires-Dist: quickbase-api>=0.3.1
21
+ Provides-Extra: dev
22
+ Requires-Dist: black>=23.0; extra == 'dev'
23
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
24
+ Requires-Dist: pytest>=7.0; extra == 'dev'
25
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
1
28
  # Quickbase Extract
2
29
 
3
30
  A Python package for efficiently extracting, transforming, and caching data from Quickbase reports with built-in error handling, retry logic, and S3 support for AWS Lambda environments.
@@ -855,7 +882,6 @@ s3://mit-bio-quickbase/my_project/dev/cache/report_metadata/...
855
882
  | `CACHE_BUCKET` | S3 bucket for Lambda cache | - |
856
883
  | `METADATA_STALE_HOURS` | Threshold (hours) for metadata cache staleness | `168` (7 days) |
857
884
  | `DATA_STALE_HOURS` | Threshold (hours) for data cache staleness | `24` (1 day) |
858
- | `FORCE_CACHE_REFRESH` | If set to "true", forces cache refresh on next sync | - |
859
885
 
860
886
  ### Custom Cache Location
861
887
 
@@ -1238,7 +1264,6 @@ Checks metadata and data caches independently. Refreshes only the caches that ar
1238
1264
  **Environment Variables:**
1239
1265
  - `METADATA_STALE_HOURS`: Override default metadata staleness threshold (in hours)
1240
1266
  - `DATA_STALE_HOURS`: Override default data staleness threshold (in hours)
1241
- - `FORCE_ALL_CACHE_REFRESH`: Set to "true" to force refresh regardless of cache state
1242
1267
 
1243
1268
  **Returns:** None
1244
1269
 
@@ -2081,43 +2106,6 @@ fields = info["fields"]
2081
2106
 
2082
2107
  ---
2083
2108
 
2084
- ### Issue: "FORCE_CACHE_REFRESH not working"
2085
-
2086
- **Symptom:** Set `FORCE_CACHE_REFRESH=true` but cache still not refreshing.
2087
-
2088
- **Cause:** Environment variable case-sensitive or not set correctly.
2089
-
2090
- **Solution:**
2091
-
2092
- 1. **Verify env var is set (case-sensitive):**
2093
- ```bash
2094
- # Must be exactly this (uppercase)
2095
- export FORCE_ALL_CACHE_REFRESH=true
2096
-
2097
- # Not these (wrong):
2098
- export force_cache_refresh=true
2099
- export Force_Cache_Refresh=true
2100
- ```
2101
-
2102
- 2. **Verify it's being read:**
2103
- ```python
2104
- import os
2105
- print(os.environ.get("FORCE_ALL_CACHE_REFRESH")) # Should print "true"
2106
- ```
2107
-
2108
- 3. **Use programmatic force instead:**
2109
- ```python
2110
- ensure_cache_freshness(
2111
- client=client,
2112
- cache_manager=cache_mgr,
2113
- report_configs_all=get_all_reports(),
2114
- report_configs_to_cache=get_reports_to_cache(),
2115
- force_all=True # Programmatic force (always works)
2116
- )
2117
- ```
2118
-
2119
- ---
2120
-
2121
2109
  ### Issue: Data fetch returns empty or different results
2122
2110
 
2123
2111
  **Symptom:** `get_data()` returns empty list or fewer records than expected.
@@ -2426,23 +2414,6 @@ ensure_cache_freshness(
2426
2414
  )
2427
2415
  ```
2428
2416
 
2429
- #### Environment Variable Force
2430
-
2431
- Set `FORCE_ALL_CACHE_REFRESH=true` before invoking:
2432
-
2433
- ```bash
2434
- # In Lambda environment variables or shell
2435
- export FORCE_ALL_CACHE_REFRESH=true
2436
-
2437
- # Then call normally (will force refresh automatically)
2438
- ensure_cache_freshness(
2439
- client=client,
2440
- cache_manager=cache_mgr,
2441
- report_configs_all=get_all_reports(),
2442
- report_configs_to_cache=get_reports_to_cache()
2443
- )
2444
- ```
2445
-
2446
2417
  ### Cache-All-Data Mode
2447
2418
 
2448
2419
  For production, cache data for all reports instead of a subset:
@@ -1,30 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: quickbase-extract
3
- Version: 0.2.0
4
- Summary: Extract and cache Quickbase report data with built-in error handling and S3 support
5
- Project-URL: Homepage, https://github.com/tbrezler/quickbase-extract
6
- Project-URL: Repository, https://github.com/tbrezler/quickbase-extract.git
7
- Project-URL: Documentation, https://github.com/tbrezler/quickbase-extract#readme
8
- Author-email: Tyler Brezler <tbrezler@gmail.com>
9
- License: MIT
10
- License-File: LICENSE.txt
11
- Keywords: caching,data-extraction,etl,quickbase
12
- Classifier: Development Status :: 3 - Alpha
13
- Classifier: Intended Audience :: Developers
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Programming Language :: Python :: 3
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
- Requires-Python: >=3.12
19
- Requires-Dist: boto3>=1.26.0
20
- Requires-Dist: quickbase-api>=0.3.1
21
- Provides-Extra: dev
22
- Requires-Dist: black>=23.0; extra == 'dev'
23
- Requires-Dist: pytest-cov>=4.0; extra == 'dev'
24
- Requires-Dist: pytest>=7.0; extra == 'dev'
25
- Requires-Dist: ruff>=0.1.0; extra == 'dev'
26
- Description-Content-Type: text/markdown
27
-
28
1
  # Quickbase Extract
29
2
 
30
3
  A Python package for efficiently extracting, transforming, and caching data from Quickbase reports with built-in error handling, retry logic, and S3 support for AWS Lambda environments.
@@ -882,7 +855,6 @@ s3://mit-bio-quickbase/my_project/dev/cache/report_metadata/...
882
855
  | `CACHE_BUCKET` | S3 bucket for Lambda cache | - |
883
856
  | `METADATA_STALE_HOURS` | Threshold (hours) for metadata cache staleness | `168` (7 days) |
884
857
  | `DATA_STALE_HOURS` | Threshold (hours) for data cache staleness | `24` (1 day) |
885
- | `FORCE_CACHE_REFRESH` | If set to "true", forces cache refresh on next sync | - |
886
858
 
887
859
  ### Custom Cache Location
888
860
 
@@ -1265,7 +1237,6 @@ Checks metadata and data caches independently. Refreshes only the caches that ar
1265
1237
  **Environment Variables:**
1266
1238
  - `METADATA_STALE_HOURS`: Override default metadata staleness threshold (in hours)
1267
1239
  - `DATA_STALE_HOURS`: Override default data staleness threshold (in hours)
1268
- - `FORCE_ALL_CACHE_REFRESH`: Set to "true" to force refresh regardless of cache state
1269
1240
 
1270
1241
  **Returns:** None
1271
1242
 
@@ -2108,43 +2079,6 @@ fields = info["fields"]
2108
2079
 
2109
2080
  ---
2110
2081
 
2111
- ### Issue: "FORCE_CACHE_REFRESH not working"
2112
-
2113
- **Symptom:** Set `FORCE_CACHE_REFRESH=true` but cache still not refreshing.
2114
-
2115
- **Cause:** Environment variable case-sensitive or not set correctly.
2116
-
2117
- **Solution:**
2118
-
2119
- 1. **Verify env var is set (case-sensitive):**
2120
- ```bash
2121
- # Must be exactly this (uppercase)
2122
- export FORCE_ALL_CACHE_REFRESH=true
2123
-
2124
- # Not these (wrong):
2125
- export force_cache_refresh=true
2126
- export Force_Cache_Refresh=true
2127
- ```
2128
-
2129
- 2. **Verify it's being read:**
2130
- ```python
2131
- import os
2132
- print(os.environ.get("FORCE_ALL_CACHE_REFRESH")) # Should print "true"
2133
- ```
2134
-
2135
- 3. **Use programmatic force instead:**
2136
- ```python
2137
- ensure_cache_freshness(
2138
- client=client,
2139
- cache_manager=cache_mgr,
2140
- report_configs_all=get_all_reports(),
2141
- report_configs_to_cache=get_reports_to_cache(),
2142
- force_all=True # Programmatic force (always works)
2143
- )
2144
- ```
2145
-
2146
- ---
2147
-
2148
2082
  ### Issue: Data fetch returns empty or different results
2149
2083
 
2150
2084
  **Symptom:** `get_data()` returns empty list or fewer records than expected.
@@ -2453,23 +2387,6 @@ ensure_cache_freshness(
2453
2387
  )
2454
2388
  ```
2455
2389
 
2456
- #### Environment Variable Force
2457
-
2458
- Set `FORCE_ALL_CACHE_REFRESH=true` before invoking:
2459
-
2460
- ```bash
2461
- # In Lambda environment variables or shell
2462
- export FORCE_ALL_CACHE_REFRESH=true
2463
-
2464
- # Then call normally (will force refresh automatically)
2465
- ensure_cache_freshness(
2466
- client=client,
2467
- cache_manager=cache_mgr,
2468
- report_configs_all=get_all_reports(),
2469
- report_configs_to_cache=get_reports_to_cache()
2470
- )
2471
- ```
2472
-
2473
2390
  ### Cache-All-Data Mode
2474
2391
 
2475
2392
  For production, cache data for all reports instead of a subset:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "quickbase-extract"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Extract and cache Quickbase report data with built-in error handling and S3 support"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -28,7 +28,12 @@ import logging
28
28
  from importlib.metadata import version
29
29
 
30
30
  # API operations with error handling
31
- from quickbase_extract.api_handlers import QuickbaseOperationError, handle_delete, handle_query, handle_upsert
31
+ from quickbase_extract.api_handlers import (
32
+ QuickbaseOperationError,
33
+ handle_delete,
34
+ handle_query,
35
+ handle_upsert,
36
+ )
32
37
 
33
38
  # Cache management
34
39
  from quickbase_extract.cache_manager import CacheManager
@@ -43,7 +48,12 @@ from quickbase_extract.cache_sync import is_cache_synced, sync_from_s3_once
43
48
  from quickbase_extract.config import ReportConfig
44
49
 
45
50
  # Report data retrieval
46
- from quickbase_extract.report_data import get_data, get_data_parallel, load_data, load_data_batch
51
+ from quickbase_extract.report_data import (
52
+ get_data,
53
+ get_data_parallel,
54
+ load_data,
55
+ load_data_batch,
56
+ )
47
57
 
48
58
  # Report metadata
49
59
  from quickbase_extract.report_metadata import (
@@ -4,6 +4,8 @@ Provides retry logic for rate-limited requests, standardized error handling,
4
4
  and logging for Quickbase API operations.
5
5
  """
6
6
 
7
+ # ruff: noqa: BLE001
8
+
7
9
  import logging
8
10
  import random
9
11
  import time
@@ -63,7 +65,7 @@ def handle_upsert(
63
65
 
64
66
  return result
65
67
 
66
- except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
68
+ except Exception as e: # Need to catch all exceptions for retry logic
67
69
  error_str = str(e)
68
70
 
69
71
  # Retry on 429 (rate limit)
@@ -117,7 +119,7 @@ def handle_delete(
117
119
  logger.info(f"Delete {description}: {deleted} records deleted")
118
120
  return deleted
119
121
 
120
- except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
122
+ except Exception as e: # Need to catch all exceptions for retry logic
121
123
  error_str = str(e)
122
124
 
123
125
  # Only retry on 429 (rate limit) - other errors are too risky to retry
@@ -193,7 +195,7 @@ def handle_query(
193
195
  logger.info(f"Query{desc_str} returned {record_count} records")
194
196
  return result
195
197
 
196
- except Exception as e: # noqa: BLE001 # Need to catch all exceptions for retry logic
198
+ except Exception as e: # Need to catch all exceptions for retry logic
197
199
  error_str = str(e)
198
200
 
199
201
  if "429" in error_str and attempt < max_retries - 1:
@@ -227,6 +227,12 @@ class CacheManager:
227
227
 
228
228
  local_path.parent.mkdir(parents=True, exist_ok=True)
229
229
  self.s3_client.download_file(self.s3_bucket, s3_key, str(local_path))
230
+
231
+ # Preserve S3 LastModified timestamp so cache age checks remain accurate
232
+ # Without this, st_mtime is set to "now" and age always reads ~0 after sync
233
+ last_modified = obj["LastModified"].timestamp()
234
+ os.utime(str(local_path), (last_modified, last_modified))
235
+
230
236
  file_count += 1
231
237
 
232
238
  logger.info(f"Synced {file_count} files from S3")
@@ -1,7 +1,6 @@
1
1
  """S3-backed cache sync for Lambda environments."""
2
2
 
3
3
  import logging
4
- import os
5
4
 
6
5
  from quickbase_extract.cache_manager import CacheManager
7
6
 
@@ -15,7 +14,7 @@ def sync_from_s3_once(cache_manager: CacheManager, force: bool = False) -> None:
15
14
  """Download cache from S3 to /tmp on Lambda cold start.
16
15
 
17
16
  Only syncs if cache hasn't been synced in this invocation.
18
- Subsequent calls are no-ops unless force=True or FORCE_CACHE_REFRESH env var is set.
17
+ Subsequent calls are no-ops unless force=True.
19
18
 
20
19
  On Lambda, the sync flag persists across warm invocations within the same
21
20
  container, so warm starts skip the sync (Lambda /tmp persists). Only cold
@@ -27,17 +26,11 @@ def sync_from_s3_once(cache_manager: CacheManager, force: bool = False) -> None:
27
26
  Args:
28
27
  cache_manager: CacheManager instance for cache operations.
29
28
  force: If True, sync even if already synced in this invocation.
30
- Defaults to False. Can also be triggered via FORCE_CACHE_REFRESH
31
- environment variable.
29
+ Defaults to False.
32
30
 
33
31
  Raises:
34
32
  Exception: If S3 operations fail.
35
33
 
36
- Environment Variables:
37
- FORCE_CACHE_REFRESH: If set to "true" (case-insensitive), forces a
38
- cache sync even if already synced. Useful for triggering refreshes
39
- without code changes (e.g., from Lambda console or alerting system).
40
-
41
34
  Example:
42
35
  >>> # In Lambda handler initialization
43
36
  >>> cache_manager = CacheManager(
@@ -50,18 +43,13 @@ def sync_from_s3_once(cache_manager: CacheManager, force: bool = False) -> None:
50
43
  >>>
51
44
  >>> # Force re-sync if needed (programmatically)
52
45
  >>> sync_from_s3_once(cache_manager, force=True)
53
- >>>
54
- >>> # Or set environment variable before invocation
55
- >>> # FORCE_CACHE_REFRESH=true (then call normally)
56
- >>> sync_from_s3_once(cache_manager) # Will sync regardless of _CACHE_SYNCED flag
57
46
  """
58
47
  global _CACHE_SYNCED
59
48
 
60
- # Check for force refresh via environment variable
61
- force_env = os.environ.get("FORCE_CACHE_REFRESH", "").lower() == "true"
62
- should_sync = _CACHE_SYNCED and not force and not force_env
49
+ # Check for force refresh
50
+ already_synced = _CACHE_SYNCED and not force
63
51
 
64
- if should_sync:
52
+ if already_synced:
65
53
  logger.debug("Cache already synced in this invocation, skipping")
66
54
  return
67
55
 
@@ -3,6 +3,7 @@
3
3
  from unittest.mock import MagicMock
4
4
 
5
5
  import pytest
6
+
6
7
  from quickbase_extract.config import ReportConfig
7
8
 
8
9
 
@@ -201,3 +202,11 @@ def reset_singletons():
201
202
  yield
202
203
 
203
204
  _reset_cache_sync()
205
+
206
+
207
+ @pytest.fixture
208
+ def mock_s3_client():
209
+ """Create a mock S3 client for testing."""
210
+ from unittest.mock import MagicMock
211
+
212
+ return MagicMock()
@@ -3,6 +3,7 @@
3
3
  import time
4
4
 
5
5
  import pytest
6
+
6
7
  from quickbase_extract.api_handlers import QuickbaseOperationError, handle_delete, handle_query, handle_upsert
7
8
 
8
9
 
@@ -223,3 +224,4 @@ class TestHandleQuery:
223
224
  handle_query(mock_qb_api, "tblXYZ", description="customer records")
224
225
 
225
226
  assert "customer records" in caplog.text
227
+ assert "customer records" in caplog.text
@@ -1,10 +1,14 @@
1
1
  """Unit tests for cache_manager module."""
2
2
 
3
+ import logging
3
4
  import os
4
5
  import time
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
5
8
  from unittest.mock import MagicMock, patch
6
9
 
7
10
  import pytest
11
+
8
12
  from quickbase_extract.cache_manager import CacheManager
9
13
 
10
14
 
@@ -52,7 +56,6 @@ class TestCacheManagerInit:
52
56
  nested_dir = temp_cache_dir / "nested" / "cache"
53
57
  assert not nested_dir.exists()
54
58
 
55
- # Create CacheManager - this should create the directory
56
59
  CacheManager(cache_root=nested_dir)
57
60
 
58
61
  assert nested_dir.exists()
@@ -63,7 +66,6 @@ class TestCacheManagerInit:
63
66
  monkeypatch.setenv("CACHE_BUCKET", "my-bucket")
64
67
 
65
68
  with patch("quickbase_extract.cache_manager.boto3.client") as mock_boto:
66
- # Create CacheManager - this should call boto3.client
67
69
  CacheManager(
68
70
  cache_root=temp_cache_dir,
69
71
  s3_bucket="my-bucket",
@@ -83,12 +85,13 @@ class TestCacheManagerInit:
83
85
  """Test that s3_prefix is required when using S3 on Lambda."""
84
86
  monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "test-function")
85
87
 
86
- with pytest.raises(ValueError, match="s3_prefix is required"):
87
- CacheManager(
88
- cache_root=temp_cache_dir,
89
- s3_bucket="my-bucket",
90
- s3_prefix=None,
91
- )
88
+ with patch("quickbase_extract.cache_manager.boto3.client"):
89
+ with pytest.raises(ValueError, match="s3_prefix is required"):
90
+ CacheManager(
91
+ cache_root=temp_cache_dir,
92
+ s3_bucket="my-bucket",
93
+ s3_prefix=None,
94
+ )
92
95
 
93
96
 
94
97
  class TestCacheManagerPaths:
@@ -102,7 +105,7 @@ class TestCacheManagerPaths:
102
105
 
103
106
  assert path.name == "my_table_python.json"
104
107
  assert "report_metadata" in str(path)
105
- assert "my_app" in str(path) # Now includes app subdirectory
108
+ assert "my_app" in str(path)
106
109
 
107
110
  def test_get_data_path(self, temp_cache_dir):
108
111
  """Test data path generation."""
@@ -112,7 +115,7 @@ class TestCacheManagerPaths:
112
115
 
113
116
  assert path.name == "my_table_python_data.json"
114
117
  assert "report_data" in str(path)
115
- assert "my_app" in str(path) # Now includes app subdirectory
118
+ assert "my_app" in str(path)
116
119
 
117
120
  def test_metadata_path_creates_parent_dirs(self, temp_cache_dir):
118
121
  """Test that metadata path creation makes parent directories."""
@@ -134,7 +137,7 @@ class TestCacheManagerPaths:
134
137
 
135
138
  path = mgr.get_metadata_path("Data Lake", "Employee Appointments", "Aureus")
136
139
 
137
- assert "data_lake" in str(path) # App subdirectory
140
+ assert "data_lake" in str(path)
138
141
  assert "employee_appointments" in str(path)
139
142
  assert "aureus" in str(path)
140
143
 
@@ -198,7 +201,6 @@ class TestCacheManagerFileOperations:
198
201
  test_file = temp_cache_dir / "test.json"
199
202
  mgr.write_file(test_file, "content")
200
203
 
201
- # S3 upload should be called
202
204
  mock_s3.upload_file.assert_called_once()
203
205
 
204
206
  def test_write_file_no_s3_sync_locally(self, temp_cache_dir, monkeypatch):
@@ -212,6 +214,29 @@ class TestCacheManagerFileOperations:
212
214
 
213
215
  mock_boto.assert_not_called()
214
216
 
217
+ def test_write_file_raises_on_s3_failure(self, temp_cache_dir, monkeypatch):
218
+ """Test that write_file propagates S3 upload errors."""
219
+ monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "test-function")
220
+ monkeypatch.setenv("CACHE_BUCKET", "my-bucket")
221
+
222
+ with patch("quickbase_extract.cache_manager.boto3.client") as mock_boto:
223
+ mock_s3 = MagicMock()
224
+ mock_boto.return_value = mock_s3
225
+ mock_s3.upload_file.side_effect = Exception("S3 upload failed")
226
+
227
+ mgr = CacheManager(
228
+ cache_root=temp_cache_dir,
229
+ s3_bucket="my-bucket",
230
+ s3_prefix="project/dev/cache",
231
+ )
232
+ test_file = temp_cache_dir / "test.json"
233
+
234
+ with pytest.raises(Exception, match="S3 upload failed"):
235
+ mgr.write_file(test_file, "content")
236
+
237
+ # File should still be written locally
238
+ assert test_file.exists()
239
+
215
240
 
216
241
  class TestCacheManagerS3Sync:
217
242
  """Tests for S3 sync operations."""
@@ -224,7 +249,6 @@ class TestCacheManagerS3Sync:
224
249
  mgr = CacheManager(cache_root=temp_cache_dir)
225
250
  mgr.sync_from_s3()
226
251
 
227
- # Should not call boto3 if not Lambda
228
252
  mock_boto.assert_not_called()
229
253
 
230
254
  def test_sync_from_s3_requires_cache_bucket(self, temp_cache_dir, monkeypatch, caplog):
@@ -237,9 +261,10 @@ class TestCacheManagerS3Sync:
237
261
  mock_boto.return_value = mock_s3
238
262
 
239
263
  mgr = CacheManager(cache_root=temp_cache_dir)
240
- mgr.sync_from_s3()
241
264
 
242
- # S3 operations should not be called
265
+ with caplog.at_level(logging.DEBUG):
266
+ mgr.sync_from_s3()
267
+
243
268
  mock_s3.get_paginator.assert_not_called()
244
269
  assert "CACHE_BUCKET not set" in caplog.text
245
270
 
@@ -252,19 +277,31 @@ class TestCacheManagerS3Sync:
252
277
  mock_s3 = MagicMock()
253
278
  mock_boto.return_value = mock_s3
254
279
 
255
- # Mock paginator response
280
+ mock_last_modified = datetime(2025, 1, 1, tzinfo=UTC)
281
+
256
282
  mock_paginator = MagicMock()
257
283
  mock_s3.get_paginator.return_value = mock_paginator
258
- # The paginator returns a list of pages, each page has a "Contents" key
259
284
  mock_paginator.paginate.return_value = [
260
285
  {
261
286
  "Contents": [
262
- {"Key": "project/dev/cache/report_metadata/app/table_report.json"},
263
- {"Key": "project/dev/cache/report_data/app/table_data.json"},
287
+ {
288
+ "Key": "project/dev/cache/report_metadata/app/table_report.json",
289
+ "LastModified": mock_last_modified,
290
+ },
291
+ {
292
+ "Key": "project/dev/cache/report_data/app/table_data.json",
293
+ "LastModified": mock_last_modified,
294
+ },
264
295
  ]
265
296
  }
266
297
  ]
267
298
 
299
+ def create_file(bucket, key, local_path):
300
+ Path(local_path).parent.mkdir(parents=True, exist_ok=True)
301
+ Path(local_path).write_text("{}")
302
+
303
+ mock_s3.download_file.side_effect = create_file
304
+
268
305
  mgr = CacheManager(
269
306
  cache_root=temp_cache_dir,
270
307
  s3_bucket="my-bucket",
@@ -272,7 +309,6 @@ class TestCacheManagerS3Sync:
272
309
  )
273
310
  mgr.sync_from_s3()
274
311
 
275
- # Should download each file
276
312
  assert mock_s3.download_file.call_count == 2
277
313
 
278
314
  def test_sync_from_s3_creates_directories(self, temp_cache_dir, monkeypatch, caplog):
@@ -284,25 +320,61 @@ class TestCacheManagerS3Sync:
284
320
  mock_s3 = MagicMock()
285
321
  mock_boto.return_value = mock_s3
286
322
 
323
+ mock_last_modified = datetime(2025, 1, 1, tzinfo=UTC)
324
+
287
325
  mock_paginator = MagicMock()
288
326
  mock_s3.get_paginator.return_value = mock_paginator
289
327
  mock_paginator.paginate.return_value = [
290
328
  {
291
329
  "Contents": [
292
- {"Key": "project/dev/cache/report_metadata/app/table_report.json"},
330
+ {
331
+ "Key": "project/dev/cache/report_metadata/app/table_report.json",
332
+ "LastModified": mock_last_modified,
333
+ },
293
334
  ]
294
335
  }
295
336
  ]
296
337
 
338
+ def create_file(bucket, key, local_path):
339
+ Path(local_path).parent.mkdir(parents=True, exist_ok=True)
340
+ Path(local_path).write_text("{}")
341
+
342
+ mock_s3.download_file.side_effect = create_file
343
+
297
344
  mgr = CacheManager(
298
345
  cache_root=temp_cache_dir,
299
346
  s3_bucket="my-bucket",
300
347
  s3_prefix="project/dev/cache",
301
348
  )
302
- mgr.sync_from_s3()
349
+
350
+ with caplog.at_level(logging.INFO):
351
+ mgr.sync_from_s3()
303
352
 
304
353
  assert "Synced 1 files from S3" in caplog.text
305
354
 
355
+ def test_get_cache_age_hours_with_old_files(self, temp_cache_dir):
356
+ """Verify cache age is calculated correctly from file mtime."""
357
+
358
+ # Create cache directory structure
359
+ cache_dir = temp_cache_dir / "report_metadata" / "app"
360
+ cache_dir.mkdir(parents=True, exist_ok=True)
361
+
362
+ # Create a test file
363
+ test_file = cache_dir / "table_report.json"
364
+ test_file.write_text("{}")
365
+
366
+ # Set its mtime to 48 hours ago
367
+ past_time = time.time() - (48 * 3600)
368
+ os.utime(test_file, (past_time, past_time))
369
+
370
+ cache_manager = CacheManager(cache_root=temp_cache_dir)
371
+
372
+ # Get cache age
373
+ age = cache_manager.get_cache_age_hours("metadata")
374
+
375
+ # Should be approximately 48 hours (within 1 hour tolerance for test execution time)
376
+ assert 47 < age < 49
377
+
306
378
 
307
379
  class TestCacheManagerCacheChecks:
308
380
  """Tests for cache state checking methods."""
@@ -311,30 +383,24 @@ class TestCacheManagerCacheChecks:
311
383
  """Test is_cache_empty for metadata."""
312
384
  mgr = CacheManager(cache_root=temp_cache_dir)
313
385
 
314
- # Empty initially
315
386
  assert mgr.is_cache_empty("metadata") is True
316
387
 
317
- # Create a metadata file
318
388
  metadata_dir = temp_cache_dir / "report_metadata" / "app"
319
389
  metadata_dir.mkdir(parents=True)
320
390
  (metadata_dir / "table_report.json").write_text("{}")
321
391
 
322
- # Not empty now
323
392
  assert mgr.is_cache_empty("metadata") is False
324
393
 
325
394
  def test_is_cache_empty_data(self, temp_cache_dir):
326
395
  """Test is_cache_empty for data."""
327
396
  mgr = CacheManager(cache_root=temp_cache_dir)
328
397
 
329
- # Empty initially
330
398
  assert mgr.is_cache_empty("data") is True
331
399
 
332
- # Create a data file
333
400
  data_dir = temp_cache_dir / "report_data" / "app"
334
401
  data_dir.mkdir(parents=True)
335
402
  (data_dir / "table_data.json").write_text("[]")
336
403
 
337
- # Not empty now
338
404
  assert mgr.is_cache_empty("data") is False
339
405
 
340
406
  def test_is_cache_empty_invalid_type(self, temp_cache_dir):
@@ -348,18 +414,16 @@ class TestCacheManagerCacheChecks:
348
414
  """Test get_cache_age_hours returns correct age."""
349
415
  mgr = CacheManager(cache_root=temp_cache_dir)
350
416
 
351
- # Create a metadata file
352
417
  metadata_dir = temp_cache_dir / "report_metadata" / "app"
353
418
  metadata_dir.mkdir(parents=True)
354
419
  metadata_file = metadata_dir / "table_report.json"
355
420
  metadata_file.write_text("{}")
356
421
 
357
- # Set modification time to 2 hours ago
358
422
  old_time = time.time() - (2 * 3600)
359
423
  os.utime(metadata_file, (old_time, old_time))
360
424
 
361
425
  age = mgr.get_cache_age_hours("metadata")
362
- assert 1.9 < age < 2.1 # Allow small margin
426
+ assert 1.9 < age < 2.1
363
427
 
364
428
  def test_get_cache_age_hours_empty_cache(self, temp_cache_dir):
365
429
  """Test get_cache_age_hours returns 0 for empty cache."""
@@ -374,4 +438,35 @@ class TestCacheManagerCacheChecks:
374
438
 
375
439
  with pytest.raises(ValueError, match="cache_type must be"):
376
440
  mgr.get_cache_age_hours("invalid")
377
- mgr.get_cache_age_hours("invalid")
441
+
442
+
443
+ class TestCacheManagerReportChecks:
444
+ """Tests for report-level existence checks."""
445
+
446
+ def test_has_report_metadata_true(self, temp_cache_dir):
447
+ """Test has_report_metadata returns True when file exists."""
448
+ mgr = CacheManager(cache_root=temp_cache_dir)
449
+ path = mgr.get_metadata_path("My App", "My Table", "Python")
450
+ path.write_text("{}")
451
+
452
+ assert mgr.has_report_metadata("My App", "My Table", "Python") is True
453
+
454
+ def test_has_report_metadata_false(self, temp_cache_dir):
455
+ """Test has_report_metadata returns False when file missing."""
456
+ mgr = CacheManager(cache_root=temp_cache_dir)
457
+
458
+ assert mgr.has_report_metadata("My App", "My Table", "Python") is False
459
+
460
+ def test_has_report_data_true(self, temp_cache_dir):
461
+ """Test has_report_data returns True when file exists."""
462
+ mgr = CacheManager(cache_root=temp_cache_dir)
463
+ path = mgr.get_data_path("My App", "My Table", "Python")
464
+ path.write_text("[]")
465
+
466
+ assert mgr.has_report_data("My App", "My Table", "Python") is True
467
+
468
+ def test_has_report_data_false(self, temp_cache_dir):
469
+ """Test has_report_data returns False when file missing."""
470
+ mgr = CacheManager(cache_root=temp_cache_dir)
471
+
472
+ assert mgr.has_report_data("My App", "My Table", "Python") is False
@@ -6,6 +6,7 @@ import time
6
6
  from unittest.mock import MagicMock, patch
7
7
 
8
8
  import pytest
9
+
9
10
  from quickbase_extract.cache_manager import CacheManager
10
11
  from quickbase_extract.cache_orchestration import (
11
12
  CacheRefreshError,
@@ -945,3 +946,4 @@ class TestEnsureCacheFreshnessIntegration:
945
946
 
946
947
  # Should be fresh because parameter overrides env var
947
948
  assert "Cache is fresh" in caplog.text
949
+ assert "Cache is fresh" in caplog.text
@@ -3,6 +3,7 @@
3
3
  from unittest.mock import patch
4
4
 
5
5
  import pytest
6
+
6
7
  from quickbase_extract.cache_manager import CacheManager
7
8
  from quickbase_extract.cache_sync import _reset_cache_sync, is_cache_synced, sync_from_s3_once
8
9
 
@@ -115,3 +116,4 @@ class TestResetCacheSync:
115
116
  # Should be able to sync again
116
117
  sync_from_s3_once(cache_mgr)
117
118
  assert mock_sync.call_count == 2
119
+ assert mock_sync.call_count == 2
@@ -3,6 +3,7 @@
3
3
  import json
4
4
 
5
5
  import pytest
6
+
6
7
  from quickbase_extract.cache_manager import CacheManager
7
8
  from quickbase_extract.config import ReportConfig
8
9
  from quickbase_extract.report_data import (
@@ -611,3 +612,4 @@ class TestLoadDataBatch:
611
612
  assert all_data[sample_report_configs[0]][0]["Name"] == "Alice"
612
613
  assert all_data[sample_report_configs[0]][0]["Name"] == "Alice"
613
614
  assert all_data[sample_report_configs[0]][0]["Name"] == "Alice"
615
+ assert all_data[sample_report_configs[0]][0]["Name"] == "Alice"
@@ -3,6 +3,7 @@
3
3
  import json
4
4
 
5
5
  import pytest
6
+
6
7
  from quickbase_extract.cache_manager import CacheManager
7
8
  from quickbase_extract.config import ReportConfig
8
9
  from quickbase_extract.report_metadata import (
@@ -395,3 +396,4 @@ class TestLoadReportMetadataBatch:
395
396
  assert all_metadata[config1]["table_id"] == "tblXYZ123"
396
397
  assert all_metadata[config1]["table_id"] == "tblXYZ123"
397
398
  assert all_metadata[config1]["table_id"] == "tblXYZ123"
399
+ assert all_metadata[config1]["table_id"] == "tblXYZ123"