morphdb 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphdb/__init__.py +7 -0
- morphdb/__main__.py +34 -0
- morphdb/apps.py +86 -0
- morphdb/associations.py +494 -0
- morphdb/db.py +168 -0
- morphdb/errors.py +36 -0
- morphdb/fieldtypes.py +351 -0
- morphdb/objects.py +395 -0
- morphdb/router.py +58 -0
- morphdb/routes.py +254 -0
- morphdb/schema.py +203 -0
- morphdb/server.py +194 -0
- morphdb/util.py +19 -0
- morphdb-0.1.0.dist-info/METADATA +324 -0
- morphdb-0.1.0.dist-info/RECORD +19 -0
- morphdb-0.1.0.dist-info/WHEEL +5 -0
- morphdb-0.1.0.dist-info/entry_points.txt +2 -0
- morphdb-0.1.0.dist-info/licenses/LICENSE +21 -0
- morphdb-0.1.0.dist-info/top_level.txt +1 -0
morphdb/db.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""SQLite storage layer.
|
|
2
|
+
|
|
3
|
+
A single connection guarded by a reentrant lock. MorphDB is a localhost-scale
|
|
4
|
+
tool; serializing access with one lock is simpler and plenty fast, and it keeps
|
|
5
|
+
the logical tables consistent without per-statement transaction juggling.
|
|
6
|
+
|
|
7
|
+
Multi-tenancy
|
|
8
|
+
-------------
|
|
9
|
+
One MorphDB process hosts many independent **apps** (one per website). Every
|
|
10
|
+
type and object belongs to exactly one app, identified by an app *key*. The
|
|
11
|
+
``apps`` table is the tenant root; every other table carries an ``app`` column
|
|
12
|
+
with a ``REFERENCES apps(key) ON DELETE CASCADE`` foreign key, so deleting an
|
|
13
|
+
app wipes all of its schemas, objects, relations, and edges in one statement.
|
|
14
|
+
``PRAGMA foreign_keys=ON`` makes that cascade (and the "app must exist" check)
|
|
15
|
+
real at the storage layer.
|
|
16
|
+
|
|
17
|
+
Tables
|
|
18
|
+
------
|
|
19
|
+
apps key PK, created_at
|
|
20
|
+
object_schemas (app, name) PK, fields JSON, timestamps
|
|
21
|
+
objects guid PK, app, object_type, data JSON blob, timestamps
|
|
22
|
+
association_schemas (app, name) PK, from/to type, forward/inverse label, ...
|
|
23
|
+
associations id PK, app, assoc_name, from_guid, to_guid (one row/edge)
|
|
24
|
+
|
|
25
|
+
Within an app, type names are unique (the composite primary key enforces it);
|
|
26
|
+
the same name may be reused freely in a different app.
|
|
27
|
+
|
|
28
|
+
Design note — associations are stored as a single canonical row per edge (not
|
|
29
|
+
two mirrored rows). Bidirectional traversal is achieved by querying both the
|
|
30
|
+
from_guid and to_guid columns (both indexed). This avoids the dual-write
|
|
31
|
+
consistency hazard of mirrored rows while still letting an object discover all
|
|
32
|
+
of its relationships in one query.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
import sqlite3
|
|
36
|
+
import threading
|
|
37
|
+
from contextlib import contextmanager
|
|
38
|
+
|
|
39
|
+
_LOCK = threading.RLock()
|
|
40
|
+
_CONN = None
|
|
41
|
+
|
|
42
|
+
SCHEMA_SQL = """
|
|
43
|
+
CREATE TABLE IF NOT EXISTS apps (
|
|
44
|
+
key TEXT PRIMARY KEY,
|
|
45
|
+
created_at TEXT NOT NULL
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
CREATE TABLE IF NOT EXISTS object_schemas (
|
|
49
|
+
app TEXT NOT NULL REFERENCES apps(key) ON DELETE CASCADE,
|
|
50
|
+
name TEXT NOT NULL,
|
|
51
|
+
fields TEXT NOT NULL,
|
|
52
|
+
created_at TEXT NOT NULL,
|
|
53
|
+
updated_at TEXT NOT NULL,
|
|
54
|
+
PRIMARY KEY (app, name)
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
CREATE TABLE IF NOT EXISTS objects (
|
|
58
|
+
guid TEXT PRIMARY KEY,
|
|
59
|
+
app TEXT NOT NULL REFERENCES apps(key) ON DELETE CASCADE,
|
|
60
|
+
object_type TEXT NOT NULL,
|
|
61
|
+
data TEXT NOT NULL,
|
|
62
|
+
created_at TEXT NOT NULL,
|
|
63
|
+
updated_at TEXT NOT NULL
|
|
64
|
+
);
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_objects_app_type ON objects(app, object_type);
|
|
66
|
+
|
|
67
|
+
CREATE TABLE IF NOT EXISTS association_schemas (
|
|
68
|
+
app TEXT NOT NULL REFERENCES apps(key) ON DELETE CASCADE,
|
|
69
|
+
name TEXT NOT NULL,
|
|
70
|
+
from_type TEXT NOT NULL,
|
|
71
|
+
to_type TEXT NOT NULL,
|
|
72
|
+
forward_name TEXT NOT NULL,
|
|
73
|
+
inverse_name TEXT NOT NULL,
|
|
74
|
+
cardinality TEXT NOT NULL,
|
|
75
|
+
symmetric INTEGER NOT NULL DEFAULT 0,
|
|
76
|
+
forward_description TEXT,
|
|
77
|
+
inverse_description TEXT,
|
|
78
|
+
created_at TEXT NOT NULL,
|
|
79
|
+
updated_at TEXT NOT NULL,
|
|
80
|
+
PRIMARY KEY (app, name)
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
CREATE TABLE IF NOT EXISTS associations (
|
|
84
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
85
|
+
app TEXT NOT NULL REFERENCES apps(key) ON DELETE CASCADE,
|
|
86
|
+
assoc_name TEXT NOT NULL,
|
|
87
|
+
from_guid TEXT NOT NULL,
|
|
88
|
+
to_guid TEXT NOT NULL,
|
|
89
|
+
created_at TEXT NOT NULL,
|
|
90
|
+
UNIQUE(app, assoc_name, from_guid, to_guid)
|
|
91
|
+
);
|
|
92
|
+
CREATE INDEX IF NOT EXISTS idx_assoc_app ON associations(app);
|
|
93
|
+
CREATE INDEX IF NOT EXISTS idx_assoc_from ON associations(from_guid);
|
|
94
|
+
CREATE INDEX IF NOT EXISTS idx_assoc_to ON associations(to_guid);
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_assoc_app_name ON associations(app, assoc_name);
|
|
96
|
+
"""
|
|
97
|
+
# The (app, name) primary keys on object_schemas/association_schemas index the
|
|
98
|
+
# app column as their leftmost prefix, so app-scoped lookups on those tables are
|
|
99
|
+
# already covered; objects/associations get explicit app indexes above.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def init_db(path):
|
|
103
|
+
"""Open (or create) the database at ``path`` and ensure the schema exists.
|
|
104
|
+
|
|
105
|
+
``path`` may be ``":memory:"`` for ephemeral use (tests). Safe to call more
|
|
106
|
+
than once; the second call replaces the connection (used by tests).
|
|
107
|
+
"""
|
|
108
|
+
global _CONN
|
|
109
|
+
with _LOCK:
|
|
110
|
+
if _CONN is not None:
|
|
111
|
+
try:
|
|
112
|
+
_CONN.close()
|
|
113
|
+
except Exception:
|
|
114
|
+
pass
|
|
115
|
+
conn = sqlite3.connect(path, check_same_thread=False)
|
|
116
|
+
conn.row_factory = sqlite3.Row
|
|
117
|
+
conn.execute("PRAGMA journal_mode=WAL;")
|
|
118
|
+
conn.execute("PRAGMA synchronous=NORMAL;")
|
|
119
|
+
conn.execute("PRAGMA busy_timeout=5000;")
|
|
120
|
+
conn.execute("PRAGMA foreign_keys=ON;") # enforce the app cascade + FKs
|
|
121
|
+
conn.executescript(SCHEMA_SQL)
|
|
122
|
+
_migrate(conn)
|
|
123
|
+
conn.commit()
|
|
124
|
+
_CONN = conn
|
|
125
|
+
return _CONN
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _migrate(conn):
|
|
129
|
+
"""Guard against opening a database from before the multi-tenant 'app' model.
|
|
130
|
+
|
|
131
|
+
Apps make ``(app, name)`` the identity of a type, which changes table primary
|
|
132
|
+
keys — that is not an additive ``ALTER`` we can apply in place. Rather than
|
|
133
|
+
silently rehome old rows under some magic app key (and risk reinterpreting
|
|
134
|
+
data), refuse with a clear message. Fresh databases (and ``:memory:``) are
|
|
135
|
+
created app-aware by ``SCHEMA_SQL`` above and pass straight through.
|
|
136
|
+
"""
|
|
137
|
+
info = conn.execute("PRAGMA table_info(object_schemas)").fetchall()
|
|
138
|
+
cols = {r["name"] for r in info}
|
|
139
|
+
if info and "app" not in cols:
|
|
140
|
+
raise RuntimeError(
|
|
141
|
+
"This database predates MorphDB's multi-tenant 'app' model and cannot "
|
|
142
|
+
"be opened. Point --db at a fresh file (or remove the old one); the "
|
|
143
|
+
"app model requires a clean schema."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@contextmanager
|
|
148
|
+
def transaction():
|
|
149
|
+
"""Yield the shared connection inside an exclusive, committed transaction.
|
|
150
|
+
|
|
151
|
+
All reads and writes funnel through here so that multi-statement operations
|
|
152
|
+
(e.g. enforce cardinality then insert) are atomic with respect to each other.
|
|
153
|
+
"""
|
|
154
|
+
if _CONN is None:
|
|
155
|
+
raise RuntimeError("Database not initialized. Call init_db() first.")
|
|
156
|
+
with _LOCK:
|
|
157
|
+
try:
|
|
158
|
+
yield _CONN
|
|
159
|
+
_CONN.commit()
|
|
160
|
+
except Exception:
|
|
161
|
+
_CONN.rollback()
|
|
162
|
+
raise
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def conn():
|
|
166
|
+
if _CONN is None:
|
|
167
|
+
raise RuntimeError("Database not initialized. Call init_db() first.")
|
|
168
|
+
return _CONN
|
morphdb/errors.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Typed API errors. Handlers raise these; the server turns them into JSON."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ApiError(Exception):
|
|
5
|
+
"""An error with an HTTP status code and a machine-readable code.
|
|
6
|
+
|
|
7
|
+
Raised anywhere in the request path; the server catches it and renders
|
|
8
|
+
a JSON body of the shape ``{"error": {"code": ..., "message": ...}}``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, status, code, message, **extra):
|
|
12
|
+
self.status = status
|
|
13
|
+
self.code = code
|
|
14
|
+
self.message = message
|
|
15
|
+
self.extra = extra
|
|
16
|
+
super().__init__(message)
|
|
17
|
+
|
|
18
|
+
def to_dict(self):
|
|
19
|
+
body = {"code": self.code, "message": self.message}
|
|
20
|
+
body.update(self.extra)
|
|
21
|
+
return {"error": body}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Convenience constructors for the common cases ---------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def bad_request(message, **extra):
|
|
28
|
+
return ApiError(400, "bad_request", message, **extra)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def not_found(message, **extra):
|
|
32
|
+
return ApiError(404, "not_found", message, **extra)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def conflict(message, **extra):
|
|
36
|
+
return ApiError(409, "conflict", message, **extra)
|
morphdb/fieldtypes.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Field type system for object schemas.
|
|
2
|
+
|
|
3
|
+
A schema field is normalized to ``{"type": <type>, "required": bool, "default": <any>}``.
|
|
4
|
+
Values are coerced/validated on write so that what comes back out of the store is
|
|
5
|
+
predictable for the frontend, even though the underlying storage is a JSON blob.
|
|
6
|
+
|
|
7
|
+
The type set is intentionally small and forgiving — coding agents generate messy
|
|
8
|
+
data and we would rather coerce than reject when the intent is unambiguous.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import math
|
|
12
|
+
import re
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
|
|
15
|
+
from .errors import bad_request
|
|
16
|
+
|
|
17
|
+
FIELD_TYPES = {"string", "number", "boolean", "json", "datetime"}
|
|
18
|
+
|
|
19
|
+
# Field names must be safe SQL/JSON identifiers: they are interpolated into
|
|
20
|
+
# json_extract paths (e.g. "$.title") and ORDER BY clauses, so restricting them
|
|
21
|
+
# to this charset closes any injection vector at the source. Anchor with \Z (not
|
|
22
|
+
# $, which also matches just before a trailing newline) so "city\n" is rejected.
|
|
23
|
+
_FIELD_NAME_RE = re.compile(r"\A[A-Za-z][A-Za-z0-9_]*\Z")
|
|
24
|
+
_NUM_STR_RE = re.compile(r"-?\d+(\.\d+)?([eE][+-]?\d+)?")
|
|
25
|
+
_INT_STR_RE = re.compile(r"-?\d+")
|
|
26
|
+
|
|
27
|
+
# A bare numeric datetime string is treated as epoch seconds only above this
|
|
28
|
+
# magnitude (~1973); smaller bare numbers like "2024" are ambiguous (a year?)
|
|
29
|
+
# and are rejected rather than silently parsed as a 1970-relative timestamp.
|
|
30
|
+
_EPOCH_MIN_ABS = 1e8
|
|
31
|
+
|
|
32
|
+
# Max nesting depth for a json value. Kept well under Python's recursion limit
|
|
33
|
+
# so a value that validates on write can always be re-parsed on read.
|
|
34
|
+
MAX_JSON_DEPTH = 100
|
|
35
|
+
|
|
36
|
+
# Canonical datetime form: fixed-width UTC ISO-8601 so lexical ordering equals
|
|
37
|
+
# chronological ordering and all equivalent representations collapse to one.
|
|
38
|
+
_DT_CANON = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def normalize_field_def(name, raw):
|
|
42
|
+
"""Accept shorthand (``"string"``) or rich (``{"type": ...}``) field defs.
|
|
43
|
+
|
|
44
|
+
Returns the canonical ``{"type", "required", "default"}`` form.
|
|
45
|
+
"""
|
|
46
|
+
if isinstance(raw, str):
|
|
47
|
+
ftype, required, default = raw, False, None
|
|
48
|
+
elif isinstance(raw, dict):
|
|
49
|
+
ftype = raw.get("type")
|
|
50
|
+
required = bool(raw.get("required", False))
|
|
51
|
+
default = raw.get("default")
|
|
52
|
+
else:
|
|
53
|
+
raise bad_request(
|
|
54
|
+
f"Field '{name}' must be a type string or an object, got {type(raw).__name__}."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if ftype not in FIELD_TYPES:
|
|
58
|
+
raise bad_request(
|
|
59
|
+
f"Field '{name}' has unknown type '{ftype}'. "
|
|
60
|
+
f"Valid types: {sorted(FIELD_TYPES)}."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Validate the default eagerly so a bad default is caught at schema-define time.
|
|
64
|
+
if default is not None:
|
|
65
|
+
default = coerce_value(name, default, ftype)
|
|
66
|
+
|
|
67
|
+
return {"type": ftype, "required": required, "default": default}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def validate_member_name(name, kind="field"):
|
|
71
|
+
"""Validate a field or relation name.
|
|
72
|
+
|
|
73
|
+
Field and relation names share the object's body namespace and are
|
|
74
|
+
interpolated into SQL/JSON paths (json_extract '$.name', ORDER BY), so they
|
|
75
|
+
must be safe identifiers. Anchored with \\Z (not $) so a trailing newline
|
|
76
|
+
cannot sneak through; '__' is forbidden because it is the filter-operator
|
|
77
|
+
separator (field__gt); a leading underscore is reserved for system fields.
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(name, str) or not name:
|
|
80
|
+
raise bad_request(f"{kind.capitalize()} name must be a non-empty string, got {name!r}.")
|
|
81
|
+
if name.startswith("_"):
|
|
82
|
+
raise bad_request(
|
|
83
|
+
f"{kind.capitalize()} name '{name}' is reserved (leading underscore is "
|
|
84
|
+
"for system fields like _guid/_type)."
|
|
85
|
+
)
|
|
86
|
+
if not _FIELD_NAME_RE.match(name):
|
|
87
|
+
raise bad_request(
|
|
88
|
+
f"Invalid {kind} name '{name}'. Use a letter followed by letters, "
|
|
89
|
+
"digits, or underscores (e.g. 'title', 'due_date')."
|
|
90
|
+
)
|
|
91
|
+
if "__" in name:
|
|
92
|
+
raise bad_request(
|
|
93
|
+
f"{kind.capitalize()} name '{name}' may not contain '__' (reserved for "
|
|
94
|
+
"filter operators like field__gt)."
|
|
95
|
+
)
|
|
96
|
+
return name
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def normalize_fields(fields):
|
|
100
|
+
"""Normalize a whole ``{name: def}`` mapping. Validates field names."""
|
|
101
|
+
if not isinstance(fields, dict):
|
|
102
|
+
raise bad_request("'fields' must be an object mapping field name -> type.")
|
|
103
|
+
out = {}
|
|
104
|
+
for name, raw in fields.items():
|
|
105
|
+
validate_member_name(name, "field")
|
|
106
|
+
out[name] = normalize_field_def(name, raw)
|
|
107
|
+
return out
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _canonical_dt(dtobj):
|
|
111
|
+
"""Render a datetime as the canonical fixed-width UTC ISO string.
|
|
112
|
+
|
|
113
|
+
Naive datetimes are assumed to be UTC; aware ones are converted to UTC.
|
|
114
|
+
"""
|
|
115
|
+
if dtobj.tzinfo is None:
|
|
116
|
+
dtobj = dtobj.replace(tzinfo=timezone.utc)
|
|
117
|
+
return dtobj.astimezone(timezone.utc).strftime(_DT_CANON)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _epoch_to_canonical(field, value):
|
|
121
|
+
fval = float(value)
|
|
122
|
+
# Reject ambiguously-small magnitudes consistently for both JSON numbers and
|
|
123
|
+
# numeric strings (a value like 0 or 2024 is more likely a mistake than a
|
|
124
|
+
# 1970-relative timestamp).
|
|
125
|
+
if abs(fval) < _EPOCH_MIN_ABS:
|
|
126
|
+
raise bad_request(
|
|
127
|
+
f"Field '{field}': ambiguous datetime {value!r}; use an ISO-8601 "
|
|
128
|
+
f"string, or epoch seconds with magnitude >= {int(_EPOCH_MIN_ABS)}."
|
|
129
|
+
)
|
|
130
|
+
try:
|
|
131
|
+
return _canonical_dt(datetime.fromtimestamp(fval, tz=timezone.utc))
|
|
132
|
+
except (OverflowError, OSError, ValueError):
|
|
133
|
+
raise bad_request(f"Field '{field}': epoch value {value!r} is out of range.")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _parse_datetime(field, value):
|
|
137
|
+
"""Validate a datetime string and return it in canonical UTC ISO form.
|
|
138
|
+
|
|
139
|
+
Accepts ISO-8601 (with or without 'Z'/offset), a few common formats, and a
|
|
140
|
+
bare numeric string treated as epoch seconds (so query values can match the
|
|
141
|
+
epoch-seconds write path).
|
|
142
|
+
"""
|
|
143
|
+
s = value.strip()
|
|
144
|
+
if not s:
|
|
145
|
+
raise bad_request(f"Field '{field}': empty datetime string.")
|
|
146
|
+
# Treat a bare number as epoch seconds only if it's large enough to be an
|
|
147
|
+
# unambiguous timestamp; otherwise fall through (and likely 400) so a value
|
|
148
|
+
# like "2024" isn't silently turned into a 1970 instant.
|
|
149
|
+
if _NUM_STR_RE.fullmatch(s) and abs(float(s)) >= _EPOCH_MIN_ABS:
|
|
150
|
+
return _epoch_to_canonical(field, s)
|
|
151
|
+
# 'Z' (UTC) must be a single trailing marker and not coexist with another
|
|
152
|
+
# offset — fromisoformat would silently ignore a misplaced/duplicate 'Z'.
|
|
153
|
+
if "Z" in s:
|
|
154
|
+
core = s[:-1]
|
|
155
|
+
if s.count("Z") != 1 or not s.endswith("Z") or "+" in core \
|
|
156
|
+
or core.count("-") > 2:
|
|
157
|
+
raise bad_request(
|
|
158
|
+
f"Field '{field}': malformed datetime '{value}'."
|
|
159
|
+
)
|
|
160
|
+
iso = s[:-1] + "+00:00" if s.endswith("Z") else s
|
|
161
|
+
# Python 3.10 fromisoformat accepts only 3/6-digit fractional seconds;
|
|
162
|
+
# truncate longer (nano/micro) fractions to 6 digits so valid ISO-8601
|
|
163
|
+
# timestamps from Go/Java/JS are accepted.
|
|
164
|
+
iso = re.sub(r"(\.\d{6})\d+", r"\1", iso)
|
|
165
|
+
try:
|
|
166
|
+
return _canonical_dt(datetime.fromisoformat(iso))
|
|
167
|
+
except ValueError:
|
|
168
|
+
pass
|
|
169
|
+
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ",
|
|
170
|
+
"%Y-%m-%d %H:%M:%S", "%Y/%m/%d", "%m/%d/%Y"):
|
|
171
|
+
try:
|
|
172
|
+
return _canonical_dt(datetime.strptime(s, fmt))
|
|
173
|
+
except ValueError:
|
|
174
|
+
continue
|
|
175
|
+
raise bad_request(
|
|
176
|
+
f"Field '{field}': '{value}' is not a valid date/datetime (use ISO-8601)."
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _validate_json(field, value, _depth=0):
|
|
181
|
+
"""Recursively validate a json value before it is stored.
|
|
182
|
+
|
|
183
|
+
Rejects (a) NaN/Infinity, which json.dumps would emit as invalid JSON, and
|
|
184
|
+
(b) nesting deeper than MAX_JSON_DEPTH. Without the depth cap, a value that
|
|
185
|
+
parses on write can overflow Python's recursion limit when json.loads
|
|
186
|
+
re-parses it on read, permanently 500-ing the type's read endpoints.
|
|
187
|
+
"""
|
|
188
|
+
if _depth > MAX_JSON_DEPTH:
|
|
189
|
+
raise bad_request(
|
|
190
|
+
f"Field '{field}': json nesting exceeds {MAX_JSON_DEPTH} levels."
|
|
191
|
+
)
|
|
192
|
+
if isinstance(value, float) and not math.isfinite(value):
|
|
193
|
+
raise bad_request(
|
|
194
|
+
f"Field '{field}': json values may not contain NaN or Infinity."
|
|
195
|
+
)
|
|
196
|
+
if isinstance(value, dict):
|
|
197
|
+
for v in value.values():
|
|
198
|
+
_validate_json(field, v, _depth + 1)
|
|
199
|
+
elif isinstance(value, (list, tuple)):
|
|
200
|
+
for v in value:
|
|
201
|
+
_validate_json(field, v, _depth + 1)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def coerce_value(field, value, ftype):
|
|
205
|
+
"""Coerce a single value to its declared type, or raise ApiError.
|
|
206
|
+
|
|
207
|
+
``None`` always passes through (absence of a value is allowed unless the
|
|
208
|
+
field is required, which is checked separately).
|
|
209
|
+
"""
|
|
210
|
+
if value is None:
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
if ftype == "string":
|
|
214
|
+
if isinstance(value, (dict, list)):
|
|
215
|
+
raise bad_request(f"Field '{field}' expects a string, got {type(value).__name__}.")
|
|
216
|
+
if isinstance(value, bool):
|
|
217
|
+
return "true" if value else "false"
|
|
218
|
+
return str(value)
|
|
219
|
+
|
|
220
|
+
if ftype == "number":
|
|
221
|
+
# bool is a subclass of int in Python — exclude it explicitly.
|
|
222
|
+
if isinstance(value, bool):
|
|
223
|
+
raise bad_request(f"Field '{field}' expects a number, got boolean.")
|
|
224
|
+
if isinstance(value, (int, float)):
|
|
225
|
+
num = value
|
|
226
|
+
elif isinstance(value, str):
|
|
227
|
+
s = value.strip()
|
|
228
|
+
if not _NUM_STR_RE.fullmatch(s):
|
|
229
|
+
# Reject anything that isn't a plain decimal/exponent number,
|
|
230
|
+
# including Python-only forms like underscore separators.
|
|
231
|
+
raise bad_request(f"Field '{field}' expects a number, got {value!r}.")
|
|
232
|
+
# Parse integer strings with int() (not float()) to preserve exact
|
|
233
|
+
# value for magnitudes beyond float's 53-bit mantissa.
|
|
234
|
+
num = int(s) if _INT_STR_RE.fullmatch(s) else float(s)
|
|
235
|
+
else:
|
|
236
|
+
raise bad_request(
|
|
237
|
+
f"Field '{field}' expects a number, got {type(value).__name__}."
|
|
238
|
+
)
|
|
239
|
+
# Reject non-finite values: json.dumps would emit bare NaN/Infinity
|
|
240
|
+
# (invalid JSON) and SQLite's json_extract chokes on them.
|
|
241
|
+
if isinstance(num, float) and not math.isfinite(num):
|
|
242
|
+
raise bad_request(
|
|
243
|
+
f"Field '{field}' must be a finite number (got {value!r})."
|
|
244
|
+
)
|
|
245
|
+
return num
|
|
246
|
+
|
|
247
|
+
if ftype == "boolean":
|
|
248
|
+
if isinstance(value, bool):
|
|
249
|
+
return value
|
|
250
|
+
if isinstance(value, (int, float)) and value in (0, 1):
|
|
251
|
+
return bool(value)
|
|
252
|
+
if isinstance(value, str):
|
|
253
|
+
low = value.strip().lower()
|
|
254
|
+
if low in ("true", "1", "yes", "y", "on"):
|
|
255
|
+
return True
|
|
256
|
+
if low in ("false", "0", "no", "n", "off"):
|
|
257
|
+
return False
|
|
258
|
+
raise bad_request(f"Field '{field}' expects a boolean, got {value!r}.")
|
|
259
|
+
|
|
260
|
+
if ftype == "datetime":
|
|
261
|
+
# Normalize to a canonical UTC ISO string. Accept epoch seconds or an ISO
|
|
262
|
+
# string; reject values that are not real dates so the column stays
|
|
263
|
+
# sortable/comparable.
|
|
264
|
+
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
265
|
+
return _epoch_to_canonical(field, value)
|
|
266
|
+
if isinstance(value, str):
|
|
267
|
+
return _parse_datetime(field, value)
|
|
268
|
+
raise bad_request(
|
|
269
|
+
f"Field '{field}' expects a datetime, got {type(value).__name__}."
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if ftype == "json":
|
|
273
|
+
# Any JSON-serializable value is fine, except non-finite floats (invalid
|
|
274
|
+
# JSON) and excessively deep nesting (unreadable) — both poison reads.
|
|
275
|
+
_validate_json(field, value)
|
|
276
|
+
return value
|
|
277
|
+
|
|
278
|
+
raise bad_request(f"Field '{field}' has unhandled type '{ftype}'.")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def project_data(stored, fields):
|
|
282
|
+
"""Project a stored JSON blob through the *current* schema (lazy invalidation).
|
|
283
|
+
|
|
284
|
+
Fields that no longer exist in the schema are dropped from the output.
|
|
285
|
+
Fields that exist in the schema but are missing from the blob are filled
|
|
286
|
+
with their default (or ``None``). This is what makes schema edits O(1):
|
|
287
|
+
we never rewrite stored rows, we just reinterpret them on read.
|
|
288
|
+
"""
|
|
289
|
+
out = {}
|
|
290
|
+
for name, fdef in fields.items():
|
|
291
|
+
t = fdef["type"]
|
|
292
|
+
if name in stored:
|
|
293
|
+
v = stored[name]
|
|
294
|
+
# A json field accepts any value (including null). For typed fields,
|
|
295
|
+
# a stored value counts only if its actual type still matches the
|
|
296
|
+
# field's current type; after a retype, a value of the old type is
|
|
297
|
+
# treated as "not set for the new type yet" and falls back to the
|
|
298
|
+
# default. Schema edits never rewrite rows (purely lazy); the query
|
|
299
|
+
# layer applies the exact same rule, so reads and queries agree.
|
|
300
|
+
if t == "json":
|
|
301
|
+
out[name] = v
|
|
302
|
+
continue
|
|
303
|
+
if v is not None and _matches_type(v, t):
|
|
304
|
+
out[name] = v
|
|
305
|
+
continue
|
|
306
|
+
out[name] = fdef.get("default")
|
|
307
|
+
return out
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _matches_type(value, ftype):
|
|
311
|
+
"""True if a stored Python value's type matches a declared field type."""
|
|
312
|
+
if ftype == "number":
|
|
313
|
+
return isinstance(value, (int, float)) and not isinstance(value, bool)
|
|
314
|
+
if ftype == "boolean":
|
|
315
|
+
return isinstance(value, bool)
|
|
316
|
+
if ftype in ("string", "datetime"):
|
|
317
|
+
return isinstance(value, str)
|
|
318
|
+
return True # json
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def validate_against_schema(data, fields, partial=False):
|
|
322
|
+
"""Coerce/validate an incoming ``data`` dict against schema fields.
|
|
323
|
+
|
|
324
|
+
Unknown fields (not in the schema) are rejected — this keeps the data
|
|
325
|
+
honest and surfaces agent typos early. When ``partial`` is True (PATCH-like
|
|
326
|
+
upsert of a subset), required-field checks are skipped.
|
|
327
|
+
"""
|
|
328
|
+
if not isinstance(data, dict):
|
|
329
|
+
raise bad_request("Object data must be a JSON object.")
|
|
330
|
+
|
|
331
|
+
unknown = [k for k in data if k not in fields and not k.startswith("_")]
|
|
332
|
+
if unknown:
|
|
333
|
+
raise bad_request(
|
|
334
|
+
f"Unknown field(s) {unknown}. Declared fields: {sorted(fields)}. "
|
|
335
|
+
"Update the schema first, or remove the stray field."
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
out = {}
|
|
339
|
+
for name, fdef in fields.items():
|
|
340
|
+
if name in data:
|
|
341
|
+
out[name] = coerce_value(name, data[name], fdef["type"])
|
|
342
|
+
elif not partial:
|
|
343
|
+
# On a full write, materialize the default into the stored blob so
|
|
344
|
+
# that queries (which read the blob directly) agree with reads
|
|
345
|
+
# (which project the blob). A missing required field with no default
|
|
346
|
+
# is an error.
|
|
347
|
+
if fdef.get("default") is not None:
|
|
348
|
+
out[name] = fdef["default"]
|
|
349
|
+
elif fdef.get("required"):
|
|
350
|
+
raise bad_request(f"Required field '{name}' is missing.")
|
|
351
|
+
return out
|