contextbase-shared-plugins 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
- contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
- contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
- shared_plugins/__init__.py +12 -0
- shared_plugins/automation.py +11 -0
- shared_plugins/bindings.py +253 -0
- shared_plugins/control_plane.py +208 -0
- shared_plugins/dlt.py +84 -0
- shared_plugins/env.py +102 -0
- shared_plugins/exceptions.py +10 -0
- shared_plugins/google_client/__init__.py +1 -0
- shared_plugins/google_client/auth.py +82 -0
- shared_plugins/google_client/batch_retry.py +308 -0
- shared_plugins/google_client/http_errors.py +27 -0
- shared_plugins/microsoft_dataverse/__init__.py +27 -0
- shared_plugins/microsoft_dataverse/annotations.py +38 -0
- shared_plugins/microsoft_dataverse/auth.py +26 -0
- shared_plugins/microsoft_dataverse/binding_config.py +35 -0
- shared_plugins/microsoft_dataverse/client.py +456 -0
- shared_plugins/microsoft_dataverse/ctx.py +21 -0
- shared_plugins/microsoft_dataverse/identifiers.py +62 -0
- shared_plugins/microsoft_dataverse/ingress.py +53 -0
- shared_plugins/microsoft_dataverse/metadata.py +106 -0
- shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
- shared_plugins/microsoft_dataverse/source.py +250 -0
- shared_plugins/microsoft_dataverse/tables.py +34 -0
- shared_plugins/microsoft_dataverse/translators.py +128 -0
- shared_plugins/microsoft_dataverse/types.py +346 -0
- shared_plugins/models.py +91 -0
- shared_plugins/naming.py +83 -0
- shared_plugins/pg_column_comments.py +59 -0
- shared_plugins/pyairbyte.py +399 -0
- shared_plugins/resources.py +179 -0
- shared_plugins/scratch.py +127 -0
- shared_plugins/sqlalchemy_types.py +225 -0
- shared_plugins/sqlite.py +123 -0
- shared_plugins/values.py +117 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
from .env import CTXB_SCRATCH_DIR_ENV_VAR, load_shared_python_settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _validate_safe_segment(value: str, *, label: str) -> str:
|
|
12
|
+
normalized = value.strip()
|
|
13
|
+
if not normalized:
|
|
14
|
+
raise ValueError(f"{label} cannot be blank.")
|
|
15
|
+
if normalized in {".", ".."}:
|
|
16
|
+
raise ValueError(f"{label} cannot be '.' or '..'.")
|
|
17
|
+
if "/" in normalized or "\\" in normalized or "\x00" in normalized:
|
|
18
|
+
raise ValueError(f"{label} cannot contain path separators or null bytes.")
|
|
19
|
+
return normalized
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _resolve_scratch_root() -> Path:
|
|
23
|
+
scratch_root = load_shared_python_settings().ctx_scratch_dir
|
|
24
|
+
if scratch_root is None:
|
|
25
|
+
raise RuntimeError(
|
|
26
|
+
f"{CTXB_SCRATCH_DIR_ENV_VAR} is not set; run via ctxb so scratch paths are configured."
|
|
27
|
+
)
|
|
28
|
+
return scratch_root
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _parse_relative_dir(relative_dir: str) -> Path:
|
|
32
|
+
normalized = relative_dir.strip()
|
|
33
|
+
if not normalized:
|
|
34
|
+
raise ValueError("relative_dir cannot be blank.")
|
|
35
|
+
|
|
36
|
+
relative_path = Path(normalized)
|
|
37
|
+
if relative_path.is_absolute():
|
|
38
|
+
raise ValueError("relative_dir must be relative, not absolute.")
|
|
39
|
+
|
|
40
|
+
safe_parts = [
|
|
41
|
+
_validate_safe_segment(part, label="relative_dir segment")
|
|
42
|
+
for part in relative_path.parts
|
|
43
|
+
]
|
|
44
|
+
return Path(*safe_parts)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _resolve_binding_target_paths(
|
|
48
|
+
*,
|
|
49
|
+
binding_id: str,
|
|
50
|
+
relative_dir: str,
|
|
51
|
+
) -> tuple[Path, Path]:
|
|
52
|
+
scratch_root = _resolve_scratch_root()
|
|
53
|
+
safe_binding_id = _validate_safe_segment(binding_id, label="binding_id")
|
|
54
|
+
safe_relative_dir = _parse_relative_dir(relative_dir)
|
|
55
|
+
|
|
56
|
+
binding_root = (scratch_root / safe_binding_id).absolute()
|
|
57
|
+
target_dir = (binding_root / safe_relative_dir).absolute()
|
|
58
|
+
if not target_dir.is_relative_to(binding_root):
|
|
59
|
+
raise ValueError("relative_dir escapes the binding scratch root.")
|
|
60
|
+
return binding_root, target_dir
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _remove_path(path: Path) -> None:
|
|
64
|
+
if not path.exists():
|
|
65
|
+
return
|
|
66
|
+
if path.is_dir():
|
|
67
|
+
shutil.rmtree(path)
|
|
68
|
+
else:
|
|
69
|
+
path.unlink()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _build_temp_path(binding_root: Path, *, prefix: str) -> Path:
|
|
73
|
+
return binding_root / f".{prefix}-{uuid4().hex}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def replace_scratch_dir_files(
|
|
77
|
+
*,
|
|
78
|
+
binding_id: str,
|
|
79
|
+
relative_dir: str,
|
|
80
|
+
files: Mapping[str, bytes],
|
|
81
|
+
) -> dict[str, str]:
|
|
82
|
+
binding_root, target_dir = _resolve_binding_target_paths(
|
|
83
|
+
binding_id=binding_id,
|
|
84
|
+
relative_dir=relative_dir,
|
|
85
|
+
)
|
|
86
|
+
binding_root.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
staging_dir = _build_temp_path(binding_root, prefix="ctx-staging")
|
|
89
|
+
staging_dir.mkdir(parents=False, exist_ok=False)
|
|
90
|
+
|
|
91
|
+
safe_name_by_input_name: dict[str, str] = {}
|
|
92
|
+
normalized_safe_names: set[str] = set()
|
|
93
|
+
for file_name, content in files.items():
|
|
94
|
+
safe_file_name = _validate_safe_segment(file_name, label="file name")
|
|
95
|
+
if not isinstance(content, bytes):
|
|
96
|
+
raise TypeError("files values must be bytes.")
|
|
97
|
+
|
|
98
|
+
if safe_file_name in normalized_safe_names:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"files contains duplicate normalized file names: '{safe_file_name}'."
|
|
101
|
+
)
|
|
102
|
+
normalized_safe_names.add(safe_file_name)
|
|
103
|
+
safe_name_by_input_name[file_name] = safe_file_name
|
|
104
|
+
|
|
105
|
+
file_path = staging_dir / safe_file_name
|
|
106
|
+
file_path.write_bytes(content)
|
|
107
|
+
|
|
108
|
+
backup_path: Path | None = None
|
|
109
|
+
try:
|
|
110
|
+
target_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
if target_dir.exists():
|
|
112
|
+
backup_path = _build_temp_path(binding_root, prefix="ctx-backup")
|
|
113
|
+
target_dir.rename(backup_path)
|
|
114
|
+
staging_dir.rename(target_dir)
|
|
115
|
+
except Exception:
|
|
116
|
+
if backup_path is not None and backup_path.exists() and not target_dir.exists():
|
|
117
|
+
backup_path.rename(target_dir)
|
|
118
|
+
raise
|
|
119
|
+
finally:
|
|
120
|
+
_remove_path(staging_dir)
|
|
121
|
+
if backup_path is not None:
|
|
122
|
+
_remove_path(backup_path)
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
file_name: str((target_dir / safe_name).absolute())
|
|
126
|
+
for file_name, safe_name in safe_name_by_input_name.items()
|
|
127
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from sqlalchemy.types import Float, Integer, TypeDecorator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _timedelta_total_microseconds(delta: timedelta) -> int:
|
|
11
|
+
return ((delta.days * 86_400) + delta.seconds) * 1_000_000 + delta.microseconds
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _timedelta_total_milliseconds(delta: timedelta) -> int:
|
|
15
|
+
total_microseconds = _timedelta_total_microseconds(delta)
|
|
16
|
+
if total_microseconds >= 0:
|
|
17
|
+
return total_microseconds // 1_000
|
|
18
|
+
return -((-total_microseconds) // 1_000)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
APPLE_CORE_DATA_EPOCH = datetime(2001, 1, 1, tzinfo=timezone.utc)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AppleCoreDataTimestamp(TypeDecorator[datetime | None]):
|
|
25
|
+
"""Store Apple Core Data timestamps as aware UTC datetimes."""
|
|
26
|
+
|
|
27
|
+
impl = Float
|
|
28
|
+
cache_ok = True
|
|
29
|
+
|
|
30
|
+
def __init__(self, *, null_sentinels: Iterable[float] = ()) -> None:
|
|
31
|
+
super().__init__()
|
|
32
|
+
self.null_sentinels = tuple(
|
|
33
|
+
sorted({float(sentinel) for sentinel in null_sentinels})
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def process_bind_param(
|
|
37
|
+
self,
|
|
38
|
+
value: object,
|
|
39
|
+
dialect: Any,
|
|
40
|
+
) -> float | None:
|
|
41
|
+
del dialect
|
|
42
|
+
if value is None:
|
|
43
|
+
return None
|
|
44
|
+
if not isinstance(value, datetime):
|
|
45
|
+
raise TypeError(
|
|
46
|
+
"AppleCoreDataTimestamp values must be datetime instances or None."
|
|
47
|
+
)
|
|
48
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
49
|
+
raise TypeError("AppleCoreDataTimestamp requires an aware datetime.")
|
|
50
|
+
|
|
51
|
+
normalized = value.astimezone(timezone.utc)
|
|
52
|
+
return (normalized - APPLE_CORE_DATA_EPOCH).total_seconds()
|
|
53
|
+
|
|
54
|
+
def process_result_value(
|
|
55
|
+
self,
|
|
56
|
+
value: object,
|
|
57
|
+
dialect: Any,
|
|
58
|
+
) -> datetime | None:
|
|
59
|
+
del dialect
|
|
60
|
+
if value is None:
|
|
61
|
+
return None
|
|
62
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
63
|
+
raise TypeError(
|
|
64
|
+
"AppleCoreDataTimestamp database values must be numeric or None."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
numeric_value = float(value)
|
|
68
|
+
if numeric_value in self.null_sentinels:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
return APPLE_CORE_DATA_EPOCH + timedelta(seconds=numeric_value)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class AppleMessageTimestamp(TypeDecorator[datetime | None]):
|
|
75
|
+
"""Store iMessage timestamps as aware UTC datetimes.
|
|
76
|
+
|
|
77
|
+
iMessage's ``chat.db`` stores timestamps as **nanoseconds** since
|
|
78
|
+
2001-01-01 UTC starting with iOS 12 / macOS Mojave; pre-iOS 11 databases
|
|
79
|
+
stored the same columns as **seconds** since the same epoch. Rows from a
|
|
80
|
+
restored backup can mix both formats in the same column, so each value is
|
|
81
|
+
classified by magnitude: anything below 10**12 is treated as seconds and
|
|
82
|
+
everything else as nanoseconds. Sub-microsecond precision is lost on read
|
|
83
|
+
since ``datetime`` cannot represent it.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
impl = Integer
|
|
87
|
+
cache_ok = True
|
|
88
|
+
|
|
89
|
+
def __init__(self, *, null_sentinels: Iterable[int] = ()) -> None:
|
|
90
|
+
super().__init__()
|
|
91
|
+
self.null_sentinels = tuple(
|
|
92
|
+
sorted({int(sentinel) for sentinel in null_sentinels})
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def process_bind_param(
|
|
96
|
+
self,
|
|
97
|
+
value: object,
|
|
98
|
+
dialect: Any,
|
|
99
|
+
) -> int | None:
|
|
100
|
+
del dialect
|
|
101
|
+
if value is None:
|
|
102
|
+
return None
|
|
103
|
+
if not isinstance(value, datetime):
|
|
104
|
+
raise TypeError(
|
|
105
|
+
"AppleMessageTimestamp values must be datetime instances or None."
|
|
106
|
+
)
|
|
107
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
108
|
+
raise TypeError("AppleMessageTimestamp requires an aware datetime.")
|
|
109
|
+
|
|
110
|
+
normalized = value.astimezone(timezone.utc)
|
|
111
|
+
delta = normalized - APPLE_CORE_DATA_EPOCH
|
|
112
|
+
return _timedelta_total_microseconds(delta) * 1_000
|
|
113
|
+
|
|
114
|
+
def process_result_value(
|
|
115
|
+
self,
|
|
116
|
+
value: object,
|
|
117
|
+
dialect: Any,
|
|
118
|
+
) -> datetime | None:
|
|
119
|
+
del dialect
|
|
120
|
+
if value is None:
|
|
121
|
+
return None
|
|
122
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
123
|
+
raise TypeError(
|
|
124
|
+
"AppleMessageTimestamp database values must be int or None."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if value in self.null_sentinels:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
if value >= 10**12:
|
|
131
|
+
microseconds = value // 1_000
|
|
132
|
+
return APPLE_CORE_DATA_EPOCH + timedelta(microseconds=microseconds)
|
|
133
|
+
|
|
134
|
+
return APPLE_CORE_DATA_EPOCH + timedelta(seconds=value)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
CHROMIUM_EPOCH = datetime(1601, 1, 1, tzinfo=timezone.utc)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ChromiumTimestamp(TypeDecorator[datetime | None]):
|
|
141
|
+
"""Store Chromium timestamps as aware UTC datetimes.
|
|
142
|
+
|
|
143
|
+
Chromium stores timestamps as microseconds since 1601-01-01 00:00:00 UTC.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
impl = Integer
|
|
147
|
+
cache_ok = True
|
|
148
|
+
|
|
149
|
+
def process_bind_param(
|
|
150
|
+
self,
|
|
151
|
+
value: object,
|
|
152
|
+
dialect: Any,
|
|
153
|
+
) -> int | None:
|
|
154
|
+
del dialect
|
|
155
|
+
if value is None:
|
|
156
|
+
return None
|
|
157
|
+
if not isinstance(value, datetime):
|
|
158
|
+
raise TypeError(
|
|
159
|
+
"ChromiumTimestamp values must be datetime instances or None."
|
|
160
|
+
)
|
|
161
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
162
|
+
raise TypeError("ChromiumTimestamp requires an aware datetime.")
|
|
163
|
+
|
|
164
|
+
normalized = value.astimezone(timezone.utc)
|
|
165
|
+
delta = normalized - CHROMIUM_EPOCH
|
|
166
|
+
return _timedelta_total_microseconds(delta)
|
|
167
|
+
|
|
168
|
+
def process_result_value(
|
|
169
|
+
self,
|
|
170
|
+
value: object,
|
|
171
|
+
dialect: Any,
|
|
172
|
+
) -> datetime | None:
|
|
173
|
+
del dialect
|
|
174
|
+
if value is None:
|
|
175
|
+
return None
|
|
176
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
177
|
+
raise TypeError(
|
|
178
|
+
"ChromiumTimestamp database values must be numeric or None."
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return CHROMIUM_EPOCH + timedelta(microseconds=int(value))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
UNIX_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class UnixMillisTimestamp(TypeDecorator[datetime | None]):
|
|
188
|
+
"""Store Unix-millisecond timestamps as aware UTC datetimes."""
|
|
189
|
+
|
|
190
|
+
impl = Integer
|
|
191
|
+
cache_ok = True
|
|
192
|
+
|
|
193
|
+
def process_bind_param(
|
|
194
|
+
self,
|
|
195
|
+
value: object,
|
|
196
|
+
dialect: Any,
|
|
197
|
+
) -> int | None:
|
|
198
|
+
del dialect
|
|
199
|
+
if value is None:
|
|
200
|
+
return None
|
|
201
|
+
if not isinstance(value, datetime):
|
|
202
|
+
raise TypeError(
|
|
203
|
+
"UnixMillisTimestamp values must be datetime instances or None."
|
|
204
|
+
)
|
|
205
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
206
|
+
raise TypeError("UnixMillisTimestamp requires an aware datetime.")
|
|
207
|
+
|
|
208
|
+
normalized = value.astimezone(timezone.utc)
|
|
209
|
+
delta = normalized - UNIX_EPOCH
|
|
210
|
+
return _timedelta_total_milliseconds(delta)
|
|
211
|
+
|
|
212
|
+
def process_result_value(
|
|
213
|
+
self,
|
|
214
|
+
value: object,
|
|
215
|
+
dialect: Any,
|
|
216
|
+
) -> datetime | None:
|
|
217
|
+
del dialect
|
|
218
|
+
if value is None:
|
|
219
|
+
return None
|
|
220
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
221
|
+
raise TypeError(
|
|
222
|
+
"UnixMillisTimestamp database values must be numeric or None."
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return UNIX_EPOCH + timedelta(milliseconds=int(value))
|
shared_plugins/sqlite.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import shutil
|
|
5
|
+
import sqlite3
|
|
6
|
+
import tempfile
|
|
7
|
+
import time
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Iterator
|
|
11
|
+
from urllib.parse import urlencode
|
|
12
|
+
|
|
13
|
+
LOGGER = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
BACKUP_TIMEOUT_SECONDS = 5.0
|
|
16
|
+
BACKUP_PAGES_PER_STEP = 100
|
|
17
|
+
SQLITE_COPY_SIBLING_SUFFIXES = ("-journal", "-wal", "-shm")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _BackupTimeout(Exception):
|
|
21
|
+
"""Internal sentinel raised from the backup progress callback."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def sqlite_snapshot(
|
|
26
|
+
db_path: Path,
|
|
27
|
+
*,
|
|
28
|
+
backup_timeout: float = BACKUP_TIMEOUT_SECONDS,
|
|
29
|
+
) -> Iterator[Path]:
|
|
30
|
+
"""Create a consistent snapshot of a potentially-locked SQLite database.
|
|
31
|
+
|
|
32
|
+
Yields a path to a temporary copy that the caller can open with any
|
|
33
|
+
library (raw sqlite3, SQLAlchemy, etc.). The temporary directory and
|
|
34
|
+
all files are deleted when the context manager exits.
|
|
35
|
+
|
|
36
|
+
Strategy:
|
|
37
|
+
|
|
38
|
+
1. Try the SQLite Online Backup API with cooperative timeout. A
|
|
39
|
+
`progress` callback fires after every backup_step iteration —
|
|
40
|
+
including SQLITE_BUSY / SQLITE_LOCKED — and aborts the backup
|
|
41
|
+
by raising when the wall-clock deadline is exceeded. The source
|
|
42
|
+
connection is opened with `timeout=0` so SQLite's internal busy
|
|
43
|
+
retry doesn't loop in C; control returns to Python between every
|
|
44
|
+
step, which is where we check the deadline.
|
|
45
|
+
|
|
46
|
+
2. If the backup deadline is exceeded, fall back to copying the
|
|
47
|
+
database file and any rollback-journal or WAL siblings. Best-
|
|
48
|
+
effort: not transactionally guaranteed, but works across SQLite
|
|
49
|
+
journal modes.
|
|
50
|
+
|
|
51
|
+
Non-timeout errors (missing file, corrupt DB, etc.) propagate to
|
|
52
|
+
the caller — they are not part of the "should we fall back?"
|
|
53
|
+
decision.
|
|
54
|
+
"""
|
|
55
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
56
|
+
snapshot_dir = Path(tmp_dir)
|
|
57
|
+
snapshot_path = snapshot_dir / db_path.name
|
|
58
|
+
if _try_backup(db_path, snapshot_path, backup_timeout):
|
|
59
|
+
LOGGER.debug("sqlite_snapshot.backup_ok path=%s", db_path)
|
|
60
|
+
else:
|
|
61
|
+
LOGGER.info(
|
|
62
|
+
"sqlite_snapshot.backup_timeout path=%s timeout=%.1fs, falling back to file copy",
|
|
63
|
+
db_path,
|
|
64
|
+
backup_timeout,
|
|
65
|
+
)
|
|
66
|
+
_copy_with_siblings(db_path, snapshot_dir)
|
|
67
|
+
|
|
68
|
+
yield snapshot_path
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _try_backup(db_path: Path, dest_path: Path, timeout: float) -> bool:
|
|
72
|
+
"""Attempt a backup-API snapshot. Returns True on success, False on timeout."""
|
|
73
|
+
deadline = time.monotonic() + timeout
|
|
74
|
+
|
|
75
|
+
def abort_if_past_deadline(rc: int, remaining: int, total: int) -> None:
|
|
76
|
+
if time.monotonic() > deadline:
|
|
77
|
+
raise _BackupTimeout()
|
|
78
|
+
|
|
79
|
+
# timeout=0 disables SQLite's internal busy retry so the progress
|
|
80
|
+
# callback fires on every BUSY/LOCKED step (otherwise SQLite's C
|
|
81
|
+
# layer would loop on BUSY for up to busy_timeout before returning
|
|
82
|
+
# to Python, and we'd lose our cooperative cancellation point).
|
|
83
|
+
source = sqlite3.connect(_sqlite_readonly_uri(db_path), uri=True, timeout=0)
|
|
84
|
+
try:
|
|
85
|
+
dest = sqlite3.connect(dest_path)
|
|
86
|
+
try:
|
|
87
|
+
source.backup(
|
|
88
|
+
dest,
|
|
89
|
+
pages=BACKUP_PAGES_PER_STEP,
|
|
90
|
+
progress=abort_if_past_deadline,
|
|
91
|
+
)
|
|
92
|
+
return True
|
|
93
|
+
except _BackupTimeout:
|
|
94
|
+
_cleanup_partial_backup(dest_path)
|
|
95
|
+
return False
|
|
96
|
+
finally:
|
|
97
|
+
dest.close()
|
|
98
|
+
finally:
|
|
99
|
+
source.close()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _copy_with_siblings(db_path: Path, dest_dir: Path) -> None:
|
|
103
|
+
"""Copy a database file and any rollback-journal or WAL siblings."""
|
|
104
|
+
for artifact in _iter_sqlite_artifacts(db_path):
|
|
105
|
+
if artifact.is_file():
|
|
106
|
+
shutil.copy2(artifact, dest_dir / artifact.name)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _cleanup_partial_backup(dest_path: Path) -> None:
|
|
110
|
+
for artifact in _iter_sqlite_artifacts(dest_path):
|
|
111
|
+
if artifact.exists():
|
|
112
|
+
artifact.unlink()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _iter_sqlite_artifacts(db_path: Path) -> Iterator[Path]:
|
|
116
|
+
yield db_path
|
|
117
|
+
for suffix in SQLITE_COPY_SIBLING_SUFFIXES:
|
|
118
|
+
yield db_path.parent / f"{db_path.name}{suffix}"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _sqlite_readonly_uri(db_path: Path) -> str:
|
|
122
|
+
base_uri = db_path.resolve(strict=False).as_uri()
|
|
123
|
+
return f"{base_uri}?{urlencode({'mode': 'ro'})}"
|
shared_plugins/values.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def as_mapping(value: object) -> Mapping[str, Any] | None:
|
|
10
|
+
if not isinstance(value, Mapping):
|
|
11
|
+
return None
|
|
12
|
+
return value
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def as_string(value: object) -> str | None:
|
|
16
|
+
if isinstance(value, str):
|
|
17
|
+
return value
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def non_empty_string(value: object) -> str | None:
|
|
22
|
+
text = as_string(value)
|
|
23
|
+
if text is None:
|
|
24
|
+
return None
|
|
25
|
+
stripped = text.strip()
|
|
26
|
+
return stripped or None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def coerce_non_empty_string_mapping(value: object) -> dict[str, str]:
|
|
30
|
+
if value is None:
|
|
31
|
+
return {}
|
|
32
|
+
if not isinstance(value, Mapping):
|
|
33
|
+
raise ValueError(
|
|
34
|
+
"Expected a mapping of non-empty string keys to non-empty string values."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
mapping: dict[str, str] = {}
|
|
38
|
+
for key, raw_value in value.items():
|
|
39
|
+
if not isinstance(key, str) or key == "" or key != key.strip():
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Expected a mapping of non-empty string keys to non-empty string values."
|
|
42
|
+
)
|
|
43
|
+
if (
|
|
44
|
+
not isinstance(raw_value, str)
|
|
45
|
+
or raw_value == ""
|
|
46
|
+
or raw_value != raw_value.strip()
|
|
47
|
+
):
|
|
48
|
+
raise ValueError(
|
|
49
|
+
"Expected a mapping of non-empty string keys to non-empty string values."
|
|
50
|
+
)
|
|
51
|
+
mapping[key] = raw_value
|
|
52
|
+
return mapping
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_json_value(value: object) -> Any:
|
|
56
|
+
if isinstance(value, (dict, list)):
|
|
57
|
+
return value
|
|
58
|
+
if isinstance(value, str):
|
|
59
|
+
return json.loads(value)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def require_non_empty_text(
|
|
64
|
+
value: object,
|
|
65
|
+
*,
|
|
66
|
+
label: str,
|
|
67
|
+
context: str = "payload",
|
|
68
|
+
) -> str:
|
|
69
|
+
if isinstance(value, str):
|
|
70
|
+
text = value.strip() or None
|
|
71
|
+
elif value is None:
|
|
72
|
+
text = None
|
|
73
|
+
else:
|
|
74
|
+
text = str(value).strip() or None
|
|
75
|
+
if text is None:
|
|
76
|
+
raise RuntimeError(f"{context} is missing required field '{label}'.")
|
|
77
|
+
return text
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def require_non_negative_int(
|
|
81
|
+
value: object,
|
|
82
|
+
*,
|
|
83
|
+
message: str = "must be a non-negative integer",
|
|
84
|
+
) -> int:
|
|
85
|
+
if value is None or isinstance(value, bool):
|
|
86
|
+
raise ValueError(message)
|
|
87
|
+
if isinstance(value, int):
|
|
88
|
+
if value >= 0:
|
|
89
|
+
return value
|
|
90
|
+
raise ValueError(message)
|
|
91
|
+
if isinstance(value, str):
|
|
92
|
+
stripped = value.strip()
|
|
93
|
+
if stripped == "":
|
|
94
|
+
raise ValueError(message)
|
|
95
|
+
try:
|
|
96
|
+
parsed = int(stripped)
|
|
97
|
+
except ValueError as exc:
|
|
98
|
+
raise ValueError(message) from exc
|
|
99
|
+
if parsed >= 0:
|
|
100
|
+
return parsed
|
|
101
|
+
raise ValueError(message)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def parse_utc_datetime_from_str(value: str) -> datetime | None:
|
|
105
|
+
if not isinstance(value, str):
|
|
106
|
+
raise TypeError(f"Expected str, got {type(value).__name__}")
|
|
107
|
+
|
|
108
|
+
text = value.strip()
|
|
109
|
+
if not text:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
normalized = f"{text[:-1]}+00:00" if text.endswith("Z") else text
|
|
113
|
+
parsed = datetime.fromisoformat(normalized)
|
|
114
|
+
|
|
115
|
+
if parsed.tzinfo is None or parsed.utcoffset() is None:
|
|
116
|
+
return parsed.replace(tzinfo=timezone.utc)
|
|
117
|
+
return parsed.astimezone(timezone.utc)
|