nexo-brain 7.27.3 → 7.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/bin/windows-wsl-bridge.js +9 -0
- package/package.json +1 -1
- package/src/causal_graph.py +763 -0
- package/src/classifier_local.py +44 -0
- package/src/cognitive/_core.py +3 -0
- package/src/cognitive_control_observatory.py +2 -0
- package/src/db/__init__.py +8 -0
- package/src/db/_commitments.py +344 -0
- package/src/db/_entities.py +98 -11
- package/src/db/_memory_v2.py +130 -2
- package/src/db/_schema.py +565 -0
- package/src/desktop_bridge.py +1 -1
- package/src/doctor/providers/runtime.py +9 -3
- package/src/enforcement_engine.py +128 -2
- package/src/entity_live_profile.py +1073 -0
- package/src/failure_prevention.py +1052 -0
- package/src/hook_guardrails.py +104 -0
- package/src/knowledge_graph.py +46 -9
- package/src/local_context/api.py +54 -22
- package/src/local_context/usage_events.py +273 -8
- package/src/memory_executive.py +620 -0
- package/src/memory_utility.py +952 -0
- package/src/plugin_loader.py +9 -5
- package/src/plugins/entities.py +84 -7
- package/src/plugins/entity_live_profile.py +101 -0
- package/src/plugins/failure_prevention.py +162 -0
- package/src/plugins/memory_export.py +55 -18
- package/src/plugins/protocol.py +133 -0
- package/src/plugins/semantic_layers.py +138 -0
- package/src/pre_answer_router.py +622 -28
- package/src/pre_answer_runtime.py +463 -18
- package/src/r14_correction_learning.py +3 -3
- package/src/requirements.txt +5 -1
- package/src/runtime_versioning.py +11 -1
- package/src/saved_not_used_audit.py +44 -3
- package/src/scripts/nexo-followup-runner.py +194 -0
- package/src/semantic_layers.py +1153 -0
- package/src/semantic_reasoner.py +2 -2
- package/src/semantic_router.py +58 -11
- package/src/server.py +41 -3
- package/src/tools_sessions.py +88 -31
- package/src/tools_transcripts.py +38 -22
- package/src/user_state_model.py +971 -0
- package/tool-enforcement-map.json +230 -0
|
@@ -0,0 +1,1073 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""EntityLiveProfile facade.
|
|
4
|
+
|
|
5
|
+
This module composes existing authoritative stores into a redacted, cacheable
|
|
6
|
+
profile. It must never become the owner of identity, artifacts, paths, facts,
|
|
7
|
+
relations, commitments, or evidence.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import db
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from local_context.extractors import canonical_entity_key, normalize_entity_alias
|
|
21
|
+
except Exception: # pragma: no cover - local_context may be unavailable in tiny runtimes
|
|
22
|
+
def normalize_entity_alias(value: str) -> str:
|
|
23
|
+
return " ".join(str(value or "").lower().split())
|
|
24
|
+
|
|
25
|
+
def canonical_entity_key(value: str) -> str:
|
|
26
|
+
clean = normalize_entity_alias(value)
|
|
27
|
+
return f"alias:{clean}" if clean else ""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
PROFILE_VERSION = "entity_live_profile.v1"
|
|
31
|
+
DEFAULT_SURFACES = ("pre_answer", "pre_action", "debug_local", "audit")
|
|
32
|
+
BLOCKED_PUBLIC_SURFACES = {"export", "release_public"}
|
|
33
|
+
HEAVY_SOURCES = {"local_context", "entity_dossier", "memory", "transcripts", "cognitive", "remote_llm"}
|
|
34
|
+
LIGHT_BUDGET_TIERS = {"instant", "quick"}
|
|
35
|
+
PRIVACY_RANK = {"public": 0, "normal": 1, "private": 2, "sensitive": 3, "secret": 4}
|
|
36
|
+
|
|
37
|
+
_SECRET_PATTERNS = (
|
|
38
|
+
re.compile(r"\bBearer\s+[A-Za-z0-9._\-~+/]{12,}\b", re.I),
|
|
39
|
+
re.compile(r"\bsk-[A-Za-z0-9_\-]{20,}\b"),
|
|
40
|
+
re.compile(r"\b(?:ghp|gho|ghu|ghs|github_pat|glpat|xoxb|xoxp|shpat)_[A-Za-z0-9_]{16,}\b", re.I),
|
|
41
|
+
re.compile(r"\b(AKIA|ASIA)[A-Z0-9]{16,}\b"),
|
|
42
|
+
re.compile(r"\b(?:password|passwd|pwd|token|secret|api[_-]?key)\s*[:=]\s*['\"]?[^'\"\s,;]{8,}", re.I),
|
|
43
|
+
)
|
|
44
|
+
_PATH_PATTERN = re.compile(
|
|
45
|
+
r"(?<![\w])(?:~|/Users|/home|/var|/srv|/www|/etc|/opt|/tmp|/Volumes)"
|
|
46
|
+
r"(?:/[^\s,;:'\")\]}]+)+"
|
|
47
|
+
)
|
|
48
|
+
_GENERIC_ABS_PATH_PATTERN = re.compile(r"(?<![\w:])/[A-Za-z0-9._@+-]+(?:/[A-Za-z0-9._@+-]+)+")
|
|
49
|
+
_IP_PATTERN = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
|
|
50
|
+
_DOCROOT_PATTERN = re.compile(r"\b(?:docroot|document_root|root_path|vhost)\s*[:=]\s*[^\s,;]+", re.I)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _conn():
|
|
54
|
+
return db.get_db()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _now() -> float:
|
|
58
|
+
try:
|
|
59
|
+
return float(db.now_epoch())
|
|
60
|
+
except Exception:
|
|
61
|
+
return time.time()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _table_exists(conn, table: str) -> bool:
|
|
65
|
+
try:
|
|
66
|
+
return conn.execute(
|
|
67
|
+
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
|
|
68
|
+
(table,),
|
|
69
|
+
).fetchone() is not None
|
|
70
|
+
except Exception:
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _ensure_table(conn, table: str) -> None:
|
|
75
|
+
if _table_exists(conn, table):
|
|
76
|
+
return
|
|
77
|
+
from db._schema import run_migrations
|
|
78
|
+
|
|
79
|
+
run_migrations(conn)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _json(value: Any, default: Any) -> str:
|
|
83
|
+
if value in (None, ""):
|
|
84
|
+
value = default
|
|
85
|
+
try:
|
|
86
|
+
return json.dumps(value, ensure_ascii=False, sort_keys=True)
|
|
87
|
+
except Exception:
|
|
88
|
+
return json.dumps(default, ensure_ascii=False, sort_keys=True)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _parse_json(value: Any, default: Any) -> Any:
|
|
92
|
+
if value in (None, ""):
|
|
93
|
+
return default
|
|
94
|
+
if isinstance(value, (dict, list)):
|
|
95
|
+
return value
|
|
96
|
+
try:
|
|
97
|
+
parsed = json.loads(str(value))
|
|
98
|
+
return parsed if parsed is not None else default
|
|
99
|
+
except Exception:
|
|
100
|
+
return default
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _hash(value: Any, *, length: int = 24) -> str:
|
|
104
|
+
return hashlib.sha256(
|
|
105
|
+
json.dumps(value, ensure_ascii=True, sort_keys=True, separators=(",", ":")).encode(
|
|
106
|
+
"utf-8",
|
|
107
|
+
errors="ignore",
|
|
108
|
+
)
|
|
109
|
+
).hexdigest()[:length]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _safe_event_uid(idempotency_key: str, fallback_parts: Any) -> str:
|
|
113
|
+
clean = str(idempotency_key or "").strip()
|
|
114
|
+
if clean:
|
|
115
|
+
redacted = redact_entity_value(clean)
|
|
116
|
+
if redacted == clean and re.fullmatch(r"[A-Za-z0-9_.:-]{1,120}", clean):
|
|
117
|
+
return clean
|
|
118
|
+
return f"ACU-{_hash(fallback_parts, length=32)}"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _unique(values: list[Any] | tuple[Any, ...]) -> list[str]:
|
|
122
|
+
seen: set[str] = set()
|
|
123
|
+
result: list[str] = []
|
|
124
|
+
for raw in values:
|
|
125
|
+
clean = str(raw or "").strip()
|
|
126
|
+
if not clean or clean in seen:
|
|
127
|
+
continue
|
|
128
|
+
seen.add(clean)
|
|
129
|
+
result.append(clean)
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def redact_entity_value(value: Any) -> str:
|
|
134
|
+
"""Return a compact value safe for normal profile surfaces."""
|
|
135
|
+
if isinstance(value, (dict, list, tuple)):
|
|
136
|
+
text = _json(value, {})
|
|
137
|
+
else:
|
|
138
|
+
text = str(value or "")
|
|
139
|
+
for pattern in _SECRET_PATTERNS:
|
|
140
|
+
text = pattern.sub("[REDACTED:secret]", text)
|
|
141
|
+
text = _DOCROOT_PATTERN.sub("[REDACTED:docroot]", text)
|
|
142
|
+
text = _PATH_PATTERN.sub("[REDACTED:path]", text)
|
|
143
|
+
text = _GENERIC_ABS_PATH_PATTERN.sub("[REDACTED:path]", text)
|
|
144
|
+
text = _IP_PATTERN.sub("[REDACTED:ip]", text)
|
|
145
|
+
return text[:1200]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def sanitize_refs(refs: Any) -> list[str]:
|
|
149
|
+
if isinstance(refs, str):
|
|
150
|
+
raw_items = [refs]
|
|
151
|
+
elif isinstance(refs, (list, tuple, set)):
|
|
152
|
+
raw_items = list(refs)
|
|
153
|
+
else:
|
|
154
|
+
raw_items = []
|
|
155
|
+
return _unique([redact_entity_value(item) for item in raw_items])[:50]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def normalize_privacy_level(value: str | None) -> str:
|
|
159
|
+
clean = str(value or "normal").strip().lower()
|
|
160
|
+
return clean if clean in PRIVACY_RANK else "normal"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _max_privacy(*levels: str) -> str:
|
|
164
|
+
best = "public"
|
|
165
|
+
for level in levels:
|
|
166
|
+
clean = normalize_privacy_level(level)
|
|
167
|
+
if PRIVACY_RANK[clean] > PRIVACY_RANK[best]:
|
|
168
|
+
best = clean
|
|
169
|
+
return best
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _surfaces_for_privacy(privacy_level: str, *, allow_public_release: bool = False) -> list[str]:
|
|
173
|
+
privacy = normalize_privacy_level(privacy_level)
|
|
174
|
+
if privacy == "secret":
|
|
175
|
+
return ["audit"]
|
|
176
|
+
if privacy in {"private", "sensitive"}:
|
|
177
|
+
return ["pre_action", "debug_local", "audit"]
|
|
178
|
+
surfaces = list(DEFAULT_SURFACES)
|
|
179
|
+
if allow_public_release:
|
|
180
|
+
surfaces.extend(["export", "release_public"])
|
|
181
|
+
return surfaces
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _surface_allowed(surface: str, allowed: list[str] | tuple[str, ...]) -> bool:
|
|
185
|
+
clean = str(surface or "").strip()
|
|
186
|
+
if clean in BLOCKED_PUBLIC_SURFACES:
|
|
187
|
+
return clean in allowed
|
|
188
|
+
return clean in allowed
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _field(
|
|
192
|
+
*,
|
|
193
|
+
name: str,
|
|
194
|
+
value: Any,
|
|
195
|
+
owner_source: str,
|
|
196
|
+
source_refs: list[str] | tuple[str, ...],
|
|
197
|
+
privacy_level: str = "normal",
|
|
198
|
+
write_policy: str = "owner_only",
|
|
199
|
+
last_verified_at: float | None = None,
|
|
200
|
+
expires_at: float | None = None,
|
|
201
|
+
conflict_state: str = "none",
|
|
202
|
+
allowed_surfaces: list[str] | None = None,
|
|
203
|
+
) -> dict[str, Any]:
|
|
204
|
+
privacy = normalize_privacy_level(privacy_level)
|
|
205
|
+
return {
|
|
206
|
+
"name": name,
|
|
207
|
+
"value_redacted": redact_entity_value(value),
|
|
208
|
+
"owner_source": owner_source,
|
|
209
|
+
"source_refs": sanitize_refs(source_refs),
|
|
210
|
+
"privacy_level": privacy,
|
|
211
|
+
"allowed_surfaces": allowed_surfaces or _surfaces_for_privacy(privacy),
|
|
212
|
+
"write_policy": write_policy,
|
|
213
|
+
"last_verified_at": last_verified_at,
|
|
214
|
+
"expires_at": expires_at,
|
|
215
|
+
"stale_status": _stale_status(last_verified_at, expires_at),
|
|
216
|
+
"conflict_state": conflict_state,
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _stale_status(last_verified_at: float | None, expires_at: float | None) -> str:
|
|
221
|
+
now = _now()
|
|
222
|
+
if expires_at is not None and float(expires_at or 0) <= now:
|
|
223
|
+
return "expired"
|
|
224
|
+
if not last_verified_at:
|
|
225
|
+
return "unknown"
|
|
226
|
+
return "fresh"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _ttl_seconds(kind: str, surface: str) -> int:
|
|
230
|
+
clean_kind = str(kind or "").lower()
|
|
231
|
+
clean_surface = str(surface or "").lower()
|
|
232
|
+
pre_action = clean_surface == "pre_action"
|
|
233
|
+
if clean_kind in {"person", "client", "contact", "host"}:
|
|
234
|
+
return 24 * 3600 if pre_action else 7 * 24 * 3600
|
|
235
|
+
if clean_kind in {"project", "repo", "artifact", "managed_asset", "service", "dashboard"}:
|
|
236
|
+
return 4 * 3600 if pre_action else 24 * 3600
|
|
237
|
+
if clean_kind in {"server", "domain", "release", "campaign"}:
|
|
238
|
+
return 3600 if pre_action else 6 * 3600
|
|
239
|
+
return 24 * 3600
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _entity_aliases(row: dict[str, Any]) -> list[str]:
|
|
243
|
+
aliases: list[str] = [row.get("name") or ""]
|
|
244
|
+
aliases.extend(_parse_json(row.get("aliases"), []))
|
|
245
|
+
metadata = _parse_json(row.get("metadata"), {})
|
|
246
|
+
value_json = _parse_json(row.get("value"), {})
|
|
247
|
+
for source in (metadata, value_json):
|
|
248
|
+
if isinstance(source, dict):
|
|
249
|
+
aliases.extend(source.get("aliases") or [])
|
|
250
|
+
alias = source.get("alias")
|
|
251
|
+
aliases.extend(alias if isinstance(alias, list) else [alias or ""])
|
|
252
|
+
return _unique(aliases)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _row_access_mode(row: dict[str, Any]) -> str:
|
|
256
|
+
metadata = _parse_json(row.get("metadata"), {})
|
|
257
|
+
value_json = _parse_json(row.get("value"), {})
|
|
258
|
+
for candidate in (
|
|
259
|
+
row.get("access_mode"),
|
|
260
|
+
metadata.get("access_mode") if isinstance(metadata, dict) else "",
|
|
261
|
+
value_json.get("access_mode") if isinstance(value_json, dict) else "",
|
|
262
|
+
):
|
|
263
|
+
clean = str(candidate or "").strip().lower()
|
|
264
|
+
if clean:
|
|
265
|
+
return clean
|
|
266
|
+
return "unknown"
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _row_privacy(row: dict[str, Any]) -> str:
|
|
270
|
+
metadata = _parse_json(row.get("metadata"), {})
|
|
271
|
+
if isinstance(metadata, dict):
|
|
272
|
+
return normalize_privacy_level(metadata.get("privacy_level"))
|
|
273
|
+
return "normal"
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _entity_candidate_score(query: str, row: dict[str, Any]) -> float:
|
|
277
|
+
clean_query = normalize_entity_alias(query)
|
|
278
|
+
if not clean_query:
|
|
279
|
+
return 0.0
|
|
280
|
+
if str(row.get("id") or "") == clean_query or f"entity:{row.get('id')}" == clean_query:
|
|
281
|
+
return 1.0
|
|
282
|
+
aliases = [normalize_entity_alias(item) for item in _entity_aliases(row)]
|
|
283
|
+
if clean_query in aliases:
|
|
284
|
+
return 0.99
|
|
285
|
+
if any(clean_query and clean_query in alias for alias in aliases):
|
|
286
|
+
return 0.88
|
|
287
|
+
terms = set(clean_query.split())
|
|
288
|
+
if not terms:
|
|
289
|
+
return 0.0
|
|
290
|
+
best = 0.0
|
|
291
|
+
for alias in aliases:
|
|
292
|
+
alias_terms = set(alias.split())
|
|
293
|
+
overlap = terms & alias_terms
|
|
294
|
+
if overlap:
|
|
295
|
+
best = max(best, 0.35 + len(overlap) / max(len(alias_terms), 1) * 0.45)
|
|
296
|
+
return min(0.82, best)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def resolve_entity(query: str, *, conn=None, limit: int = 8) -> dict[str, Any]:
|
|
300
|
+
"""Resolve a query against the canonical entities table."""
|
|
301
|
+
connection = conn or _conn()
|
|
302
|
+
clean_query = str(query or "").strip()
|
|
303
|
+
if not clean_query or not _table_exists(connection, "entities"):
|
|
304
|
+
return {"ok": True, "status": "not_found", "query": clean_query, "candidates": [], "needs_disambiguation": False}
|
|
305
|
+
rows = [dict(row) for row in connection.execute("SELECT * FROM entities").fetchall()]
|
|
306
|
+
scored = []
|
|
307
|
+
for row in rows:
|
|
308
|
+
score = _entity_candidate_score(clean_query, row)
|
|
309
|
+
if score <= 0:
|
|
310
|
+
continue
|
|
311
|
+
scored.append((score, row))
|
|
312
|
+
scored.sort(key=lambda item: (item[0], float(item[1].get("confidence") or 0.0)), reverse=True)
|
|
313
|
+
candidates = [
|
|
314
|
+
{
|
|
315
|
+
"entity_key": f"entity:{row.get('id')}",
|
|
316
|
+
"entity_id": int(row.get("id") or 0),
|
|
317
|
+
"display_name": row.get("name") or "",
|
|
318
|
+
"canonical_kind": row.get("type") or "entity",
|
|
319
|
+
"score": round(float(score), 4),
|
|
320
|
+
"aliases": _entity_aliases(row)[:12],
|
|
321
|
+
"source_ref": f"entity:{row.get('id')}",
|
|
322
|
+
}
|
|
323
|
+
for score, row in scored[: max(1, int(limit))]
|
|
324
|
+
]
|
|
325
|
+
if not candidates:
|
|
326
|
+
return {"ok": True, "status": "not_found", "query": clean_query, "candidates": [], "needs_disambiguation": False}
|
|
327
|
+
needs_disambiguation = (
|
|
328
|
+
len(candidates) > 1
|
|
329
|
+
and candidates[0]["score"] < 1.0
|
|
330
|
+
and candidates[1]["score"] >= candidates[0]["score"] - 0.08
|
|
331
|
+
)
|
|
332
|
+
if needs_disambiguation:
|
|
333
|
+
return {
|
|
334
|
+
"ok": True,
|
|
335
|
+
"status": "ambiguous",
|
|
336
|
+
"query": clean_query,
|
|
337
|
+
"candidates": candidates,
|
|
338
|
+
"needs_disambiguation": True,
|
|
339
|
+
}
|
|
340
|
+
best = scored[0][1]
|
|
341
|
+
return {
|
|
342
|
+
"ok": True,
|
|
343
|
+
"status": "resolved",
|
|
344
|
+
"query": clean_query,
|
|
345
|
+
"entity_key": f"entity:{best.get('id')}",
|
|
346
|
+
"entity": best,
|
|
347
|
+
"confidence": candidates[0]["score"],
|
|
348
|
+
"candidates": candidates,
|
|
349
|
+
"needs_disambiguation": False,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _project_atlas_path() -> Path:
|
|
354
|
+
return Path("~/.nexo/brain/project-atlas.json").expanduser()
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _atlas_projects(atlas: dict[str, Any]) -> dict[str, Any]:
|
|
358
|
+
if isinstance(atlas.get("projects"), dict):
|
|
359
|
+
return atlas["projects"]
|
|
360
|
+
return {k: v for k, v in atlas.items() if isinstance(v, dict) and not str(k).startswith("_")}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _load_atlas(atlas: dict[str, Any] | None = None, atlas_path: str | Path | None = None) -> dict[str, Any]:
|
|
364
|
+
if isinstance(atlas, dict):
|
|
365
|
+
return atlas
|
|
366
|
+
path = Path(atlas_path).expanduser() if atlas_path else _project_atlas_path()
|
|
367
|
+
try:
|
|
368
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
369
|
+
return payload if isinstance(payload, dict) else {}
|
|
370
|
+
except Exception:
|
|
371
|
+
return {}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _match_atlas_projects(atlas: dict[str, Any], aliases: list[str]) -> list[dict[str, Any]]:
|
|
375
|
+
projects = _atlas_projects(atlas)
|
|
376
|
+
normalized_aliases = {normalize_entity_alias(alias) for alias in aliases if alias}
|
|
377
|
+
matches: list[dict[str, Any]] = []
|
|
378
|
+
for key, entry in projects.items():
|
|
379
|
+
if not isinstance(entry, dict):
|
|
380
|
+
continue
|
|
381
|
+
haystack = [str(key), str(entry.get("description") or "")]
|
|
382
|
+
haystack.extend(str(alias) for alias in (entry.get("aliases") or []))
|
|
383
|
+
normalized = {normalize_entity_alias(item) for item in haystack if item}
|
|
384
|
+
if normalized_aliases & normalized:
|
|
385
|
+
matches.append({"project_key": str(key), **entry})
|
|
386
|
+
continue
|
|
387
|
+
if any(a and any(a in h or h in a for h in normalized) for a in normalized_aliases):
|
|
388
|
+
matches.append({"project_key": str(key), **entry})
|
|
389
|
+
return matches[:5]
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _artifact_rows(conn, aliases: list[str], project_keys: list[str]) -> list[dict[str, Any]]:
|
|
393
|
+
if not _table_exists(conn, "artifact_registry"):
|
|
394
|
+
return []
|
|
395
|
+
terms = _unique([*aliases, *project_keys])[:12]
|
|
396
|
+
rows: list[dict[str, Any]] = []
|
|
397
|
+
seen: set[int] = set()
|
|
398
|
+
for term in terms:
|
|
399
|
+
clean = f"%{term.lower()}%"
|
|
400
|
+
found = conn.execute(
|
|
401
|
+
"""
|
|
402
|
+
SELECT DISTINCT r.*
|
|
403
|
+
FROM artifact_registry r
|
|
404
|
+
LEFT JOIN artifact_aliases a ON a.artifact_id = r.id
|
|
405
|
+
WHERE LOWER(r.canonical_name) LIKE ?
|
|
406
|
+
OR LOWER(r.domain) LIKE ?
|
|
407
|
+
OR LOWER(r.description) LIKE ?
|
|
408
|
+
OR LOWER(COALESCE(a.phrase, '')) LIKE ?
|
|
409
|
+
ORDER BY r.last_touched_at DESC
|
|
410
|
+
LIMIT 8
|
|
411
|
+
""",
|
|
412
|
+
(clean, clean, clean, clean),
|
|
413
|
+
).fetchall()
|
|
414
|
+
for row in found:
|
|
415
|
+
artifact_id = int(row["id"])
|
|
416
|
+
if artifact_id in seen:
|
|
417
|
+
continue
|
|
418
|
+
seen.add(artifact_id)
|
|
419
|
+
rows.append(dict(row))
|
|
420
|
+
return rows[:12]
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def _collect_open_items(conn, aliases: list[str]) -> list[dict[str, Any]]:
|
|
424
|
+
items: list[dict[str, Any]] = []
|
|
425
|
+
terms = [alias for alias in aliases if alias][:8]
|
|
426
|
+
if not terms:
|
|
427
|
+
return items
|
|
428
|
+
tables = [
|
|
429
|
+
("commitments", "id", "description", "status", "status NOT IN ('done','cancelled','failed','closed')"),
|
|
430
|
+
("followups", "id", "description", "status", "status NOT IN ('done','cancelled','completed','closed')"),
|
|
431
|
+
("workflow_runs", "run_id", "goal", "status", "status NOT IN ('done','cancelled','failed','closed','completed')"),
|
|
432
|
+
("protocol_tasks", "task_id", "goal", "status", "status NOT IN ('done','cancelled','failed','closed')"),
|
|
433
|
+
]
|
|
434
|
+
for table, id_col, text_col, status_col, status_filter in tables:
|
|
435
|
+
if not _table_exists(conn, table):
|
|
436
|
+
continue
|
|
437
|
+
clauses = " OR ".join(f"LOWER({text_col}) LIKE ?" for _ in terms)
|
|
438
|
+
params = [f"%{term.lower()}%" for term in terms]
|
|
439
|
+
try:
|
|
440
|
+
rows = conn.execute(
|
|
441
|
+
f"""
|
|
442
|
+
SELECT {id_col} AS item_id, {text_col} AS summary, {status_col} AS status
|
|
443
|
+
FROM {table}
|
|
444
|
+
WHERE ({clauses}) AND {status_filter}
|
|
445
|
+
ORDER BY rowid DESC
|
|
446
|
+
LIMIT 5
|
|
447
|
+
""",
|
|
448
|
+
params,
|
|
449
|
+
).fetchall()
|
|
450
|
+
except Exception:
|
|
451
|
+
continue
|
|
452
|
+
for row in rows:
|
|
453
|
+
items.append(
|
|
454
|
+
{
|
|
455
|
+
"item_ref": f"{table}:{row['item_id']}",
|
|
456
|
+
"owner_source": table,
|
|
457
|
+
"status": row["status"],
|
|
458
|
+
"summary_redacted": redact_entity_value(row["summary"]),
|
|
459
|
+
"source_refs": [f"{table}:{row['item_id']}"],
|
|
460
|
+
"privacy_level": "normal",
|
|
461
|
+
"allowed_surfaces": list(DEFAULT_SURFACES),
|
|
462
|
+
"write_policy": "owner_only",
|
|
463
|
+
}
|
|
464
|
+
)
|
|
465
|
+
return items[:20]
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _atlas_artifact_conflicts(projects: list[dict[str, Any]], artifacts: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
469
|
+
conflicts: list[dict[str, Any]] = []
|
|
470
|
+
atlas_paths: dict[str, set[str]] = {}
|
|
471
|
+
for project in projects:
|
|
472
|
+
key = str(project.get("project_key") or "")
|
|
473
|
+
locations = project.get("locations") if isinstance(project.get("locations"), dict) else {}
|
|
474
|
+
atlas_paths[key] = {str(value) for value in locations.values() if str(value or "").strip()}
|
|
475
|
+
for artifact in artifacts:
|
|
476
|
+
domain = str(artifact.get("domain") or "")
|
|
477
|
+
if domain not in atlas_paths or not atlas_paths[domain]:
|
|
478
|
+
continue
|
|
479
|
+
artifact_paths = _parse_json(artifact.get("paths"), [])
|
|
480
|
+
for path in artifact_paths:
|
|
481
|
+
clean_path = str(path or "")
|
|
482
|
+
if clean_path and clean_path not in atlas_paths[domain]:
|
|
483
|
+
conflicts.append(
|
|
484
|
+
{
|
|
485
|
+
"conflict_type": "authority_conflict",
|
|
486
|
+
"field": "location",
|
|
487
|
+
"winner": "project_atlas",
|
|
488
|
+
"loser": "artifact_registry",
|
|
489
|
+
"reason": "Project Atlas is authoritative for project/action locations.",
|
|
490
|
+
"source_refs": [f"project_atlas:{domain}", f"artifact_registry:{artifact.get('id')}"],
|
|
491
|
+
"value_redacted": redact_entity_value(clean_path),
|
|
492
|
+
}
|
|
493
|
+
)
|
|
494
|
+
return conflicts[:10]
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _local_dossier_fields(query: str, *, budget_tier: str, surface: str, max_chars: int = 6000) -> tuple[list[dict[str, Any]], list[str], dict[str, Any]]:
|
|
498
|
+
if budget_tier in LIGHT_BUDGET_TIERS or surface == "pre_answer":
|
|
499
|
+
return [], [], {"skipped": True, "reason": "budget_or_surface"}
|
|
500
|
+
try:
|
|
501
|
+
from local_context import api as local_context_api
|
|
502
|
+
except Exception as exc:
|
|
503
|
+
return [], [], {"skipped": True, "reason": f"unavailable:{exc}"}
|
|
504
|
+
try:
|
|
505
|
+
payload = local_context_api.entity_dossier(query, max_assets=24, max_chunks=0, max_facts=80, max_chars=max_chars)
|
|
506
|
+
except Exception as exc:
|
|
507
|
+
return [], [], {"skipped": True, "reason": f"dossier_error:{exc}"}
|
|
508
|
+
if payload.get("needs_disambiguation"):
|
|
509
|
+
return [], [], {"needs_disambiguation": True, "candidates": payload.get("candidates") or []}
|
|
510
|
+
fields: list[dict[str, Any]] = []
|
|
511
|
+
refs: list[str] = sanitize_refs(payload.get("evidence_refs") or [])
|
|
512
|
+
for fact in (payload.get("facts") or [])[:40]:
|
|
513
|
+
value = fact.get("value") or ""
|
|
514
|
+
if not value:
|
|
515
|
+
continue
|
|
516
|
+
fields.append(
|
|
517
|
+
_field(
|
|
518
|
+
name=f"local_fact:{fact.get('predicate') or 'fact'}",
|
|
519
|
+
value=value,
|
|
520
|
+
owner_source="local_context.entity_dossier",
|
|
521
|
+
source_refs=[f"local_asset:{fact.get('source_asset_id')}#chunk:{fact.get('source_chunk_id')}"],
|
|
522
|
+
privacy_level="private",
|
|
523
|
+
write_policy="candidate_only",
|
|
524
|
+
allowed_surfaces=["debug_local", "audit"],
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
return fields, refs, {"skipped": False, "facts_returned": len(fields)}
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def build_entity_profile(
|
|
531
|
+
query: str,
|
|
532
|
+
*,
|
|
533
|
+
surface: str = "pre_answer",
|
|
534
|
+
budget_tier: str = "standard",
|
|
535
|
+
atlas: dict[str, Any] | None = None,
|
|
536
|
+
atlas_path: str | Path | None = None,
|
|
537
|
+
include_local_context: bool | None = None,
|
|
538
|
+
cache: bool = False,
|
|
539
|
+
conn=None,
|
|
540
|
+
) -> dict[str, Any]:
|
|
541
|
+
"""Build a redacted live profile from canonical stores."""
|
|
542
|
+
connection = conn or _conn()
|
|
543
|
+
now = _now()
|
|
544
|
+
resolution = resolve_entity(query, conn=connection)
|
|
545
|
+
used_sources: list[str] = ["entities"]
|
|
546
|
+
missing_required_sources: list[str] = []
|
|
547
|
+
fields: list[dict[str, Any]] = []
|
|
548
|
+
open_items: list[dict[str, Any]] = []
|
|
549
|
+
relations: list[dict[str, Any]] = []
|
|
550
|
+
conflicts: list[dict[str, Any]] = []
|
|
551
|
+
source_refs: list[str] = []
|
|
552
|
+
privacy = "normal"
|
|
553
|
+
aliases: list[str] = []
|
|
554
|
+
canonical_kind = ""
|
|
555
|
+
canonical_name = ""
|
|
556
|
+
entity_key = canonical_entity_key(query)
|
|
557
|
+
action_blocked = False
|
|
558
|
+
|
|
559
|
+
if resolution["status"] == "ambiguous":
|
|
560
|
+
action_blocked = True
|
|
561
|
+
profile = _profile_base(
|
|
562
|
+
query=query,
|
|
563
|
+
entity_key=entity_key,
|
|
564
|
+
canonical_kind="entity",
|
|
565
|
+
canonical_name=str(query or ""),
|
|
566
|
+
aliases=[],
|
|
567
|
+
resolution=resolution,
|
|
568
|
+
fields=[],
|
|
569
|
+
relations=[],
|
|
570
|
+
open_items=[],
|
|
571
|
+
conflicts=[{
|
|
572
|
+
"conflict_type": "needs_disambiguation",
|
|
573
|
+
"reason": "Several entities match; choose one before acting or writing.",
|
|
574
|
+
"source_refs": [candidate["source_ref"] for candidate in resolution.get("candidates") or []],
|
|
575
|
+
}],
|
|
576
|
+
source_refs=[],
|
|
577
|
+
privacy_level=privacy,
|
|
578
|
+
surface=surface,
|
|
579
|
+
budget_tier=budget_tier,
|
|
580
|
+
used_sources=used_sources,
|
|
581
|
+
missing_required_sources=[],
|
|
582
|
+
created_at=now,
|
|
583
|
+
action_blocked=action_blocked,
|
|
584
|
+
)
|
|
585
|
+
return _maybe_cache(profile, cache=cache, conn=connection)
|
|
586
|
+
|
|
587
|
+
if resolution["status"] == "resolved":
|
|
588
|
+
row = resolution["entity"]
|
|
589
|
+
entity_key = resolution["entity_key"]
|
|
590
|
+
canonical_kind = row.get("type") or "entity"
|
|
591
|
+
canonical_name = row.get("name") or ""
|
|
592
|
+
aliases = _entity_aliases(row)
|
|
593
|
+
source_refs.append(entity_key)
|
|
594
|
+
last_verified = float(row.get("updated_at") or row.get("created_at") or now)
|
|
595
|
+
expires = last_verified + _ttl_seconds(canonical_kind, surface)
|
|
596
|
+
privacy = _row_privacy(row)
|
|
597
|
+
access_mode = _row_access_mode(row)
|
|
598
|
+
write_policy = "read_only" if access_mode == "read_only" else "owner_only"
|
|
599
|
+
fields.extend(
|
|
600
|
+
[
|
|
601
|
+
_field(name="canonical_name", value=canonical_name, owner_source="entities", source_refs=[entity_key], privacy_level=privacy, last_verified_at=last_verified, expires_at=expires, write_policy=write_policy),
|
|
602
|
+
_field(name="canonical_kind", value=canonical_kind, owner_source="entities", source_refs=[entity_key], privacy_level="normal", last_verified_at=last_verified, expires_at=expires, write_policy=write_policy),
|
|
603
|
+
_field(name="aliases", value=aliases, owner_source="entities", source_refs=[entity_key], privacy_level=privacy, last_verified_at=last_verified, expires_at=expires, write_policy=write_policy),
|
|
604
|
+
_field(name="access_mode", value=access_mode, owner_source="entities", source_refs=[entity_key], privacy_level="normal", last_verified_at=last_verified, expires_at=expires, write_policy=write_policy),
|
|
605
|
+
_field(name="entity_value", value=row.get("value") or "", owner_source="entities", source_refs=[entity_key], privacy_level=privacy, last_verified_at=last_verified, expires_at=expires, write_policy=write_policy),
|
|
606
|
+
]
|
|
607
|
+
)
|
|
608
|
+
if access_mode == "read_only":
|
|
609
|
+
action_blocked = True
|
|
610
|
+
else:
|
|
611
|
+
missing_required_sources.append("entities")
|
|
612
|
+
canonical_kind = "entity"
|
|
613
|
+
canonical_name = str(query or "")
|
|
614
|
+
aliases = [canonical_name] if canonical_name else []
|
|
615
|
+
|
|
616
|
+
atlas_payload = _load_atlas(atlas=atlas, atlas_path=atlas_path)
|
|
617
|
+
atlas_projects = _match_atlas_projects(atlas_payload, aliases or [query])
|
|
618
|
+
if atlas_projects:
|
|
619
|
+
used_sources.append("project_atlas")
|
|
620
|
+
for project in atlas_projects:
|
|
621
|
+
project_key = str(project.get("project_key") or "")
|
|
622
|
+
source_refs.append(f"project_atlas:{project_key}")
|
|
623
|
+
fields.append(
|
|
624
|
+
_field(
|
|
625
|
+
name="project_key",
|
|
626
|
+
value=project_key,
|
|
627
|
+
owner_source="project_atlas",
|
|
628
|
+
source_refs=[f"project_atlas:{project_key}"],
|
|
629
|
+
privacy_level="normal",
|
|
630
|
+
write_policy="read_only",
|
|
631
|
+
last_verified_at=now,
|
|
632
|
+
expires_at=now + _ttl_seconds("project", surface),
|
|
633
|
+
)
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
artifacts = _artifact_rows(connection, aliases or [query], [p.get("project_key", "") for p in atlas_projects])
|
|
637
|
+
if artifacts:
|
|
638
|
+
used_sources.append("artifact_registry")
|
|
639
|
+
for artifact in artifacts[:8]:
|
|
640
|
+
source_refs.append(f"artifact_registry:{artifact.get('id')}")
|
|
641
|
+
fields.append(
|
|
642
|
+
_field(
|
|
643
|
+
name="artifact_refs",
|
|
644
|
+
value=[f"artifact_registry:{artifact.get('id')}" for artifact in artifacts[:8]],
|
|
645
|
+
owner_source="artifact_registry",
|
|
646
|
+
source_refs=[f"artifact_registry:{artifact.get('id')}" for artifact in artifacts[:8]],
|
|
647
|
+
privacy_level="normal",
|
|
648
|
+
write_policy="owner_only",
|
|
649
|
+
last_verified_at=now,
|
|
650
|
+
expires_at=now + _ttl_seconds("artifact", surface),
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
conflicts.extend(_atlas_artifact_conflicts(atlas_projects, artifacts))
|
|
654
|
+
if conflicts:
|
|
655
|
+
action_blocked = True
|
|
656
|
+
|
|
657
|
+
open_items = _collect_open_items(connection, aliases or [query])
|
|
658
|
+
if open_items:
|
|
659
|
+
used_sources.extend(_unique([item["owner_source"] for item in open_items]))
|
|
660
|
+
for item in open_items:
|
|
661
|
+
source_refs.extend(item.get("source_refs") or [])
|
|
662
|
+
|
|
663
|
+
local_requested = include_local_context
|
|
664
|
+
if local_requested is None:
|
|
665
|
+
local_requested = budget_tier not in LIGHT_BUDGET_TIERS and surface in {"pre_action", "debug_local", "audit"}
|
|
666
|
+
if local_requested:
|
|
667
|
+
local_fields, local_refs, local_meta = _local_dossier_fields(query, budget_tier=budget_tier, surface=surface)
|
|
668
|
+
if local_meta.get("needs_disambiguation"):
|
|
669
|
+
action_blocked = True
|
|
670
|
+
conflicts.append({
|
|
671
|
+
"conflict_type": "needs_disambiguation",
|
|
672
|
+
"reason": "Local entity dossier needs disambiguation before use.",
|
|
673
|
+
"source_refs": [],
|
|
674
|
+
"candidates": local_meta.get("candidates") or [],
|
|
675
|
+
})
|
|
676
|
+
if local_fields:
|
|
677
|
+
used_sources.append("local_context")
|
|
678
|
+
fields.extend(local_fields)
|
|
679
|
+
source_refs.extend(local_refs)
|
|
680
|
+
|
|
681
|
+
profile = _profile_base(
|
|
682
|
+
query=query,
|
|
683
|
+
entity_key=entity_key,
|
|
684
|
+
canonical_kind=canonical_kind,
|
|
685
|
+
canonical_name=canonical_name,
|
|
686
|
+
aliases=aliases,
|
|
687
|
+
resolution=resolution,
|
|
688
|
+
fields=fields,
|
|
689
|
+
relations=relations,
|
|
690
|
+
open_items=open_items,
|
|
691
|
+
conflicts=conflicts,
|
|
692
|
+
source_refs=source_refs,
|
|
693
|
+
privacy_level=_max_privacy(privacy, *(field.get("privacy_level") or "normal" for field in fields)),
|
|
694
|
+
surface=surface,
|
|
695
|
+
budget_tier=budget_tier,
|
|
696
|
+
used_sources=used_sources,
|
|
697
|
+
missing_required_sources=missing_required_sources,
|
|
698
|
+
created_at=now,
|
|
699
|
+
action_blocked=action_blocked,
|
|
700
|
+
)
|
|
701
|
+
return _maybe_cache(profile, cache=cache, conn=connection)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _profile_base(
|
|
705
|
+
*,
|
|
706
|
+
query: str,
|
|
707
|
+
entity_key: str,
|
|
708
|
+
canonical_kind: str,
|
|
709
|
+
canonical_name: str,
|
|
710
|
+
aliases: list[str],
|
|
711
|
+
resolution: dict[str, Any],
|
|
712
|
+
fields: list[dict[str, Any]],
|
|
713
|
+
relations: list[dict[str, Any]],
|
|
714
|
+
open_items: list[dict[str, Any]],
|
|
715
|
+
conflicts: list[dict[str, Any]],
|
|
716
|
+
source_refs: list[str],
|
|
717
|
+
privacy_level: str,
|
|
718
|
+
surface: str,
|
|
719
|
+
budget_tier: str,
|
|
720
|
+
used_sources: list[str],
|
|
721
|
+
missing_required_sources: list[str],
|
|
722
|
+
created_at: float,
|
|
723
|
+
action_blocked: bool,
|
|
724
|
+
) -> dict[str, Any]:
|
|
725
|
+
allowed_surfaces = _surfaces_for_privacy(privacy_level)
|
|
726
|
+
clean_resolution = _redacted_resolution(resolution)
|
|
727
|
+
filtered_fields = [
|
|
728
|
+
field for field in fields
|
|
729
|
+
if _surface_allowed(surface, field.get("allowed_surfaces") or [])
|
|
730
|
+
]
|
|
731
|
+
filtered_open_items = [
|
|
732
|
+
item for item in open_items
|
|
733
|
+
if _surface_allowed(surface, item.get("allowed_surfaces") or [])
|
|
734
|
+
]
|
|
735
|
+
clean_refs = sanitize_refs(source_refs)
|
|
736
|
+
input_hash = _hash({"query": query, "surface": surface, "budget_tier": budget_tier, "entity_key": entity_key})
|
|
737
|
+
refs_hash = _hash(clean_refs)
|
|
738
|
+
expires_values = [field.get("expires_at") for field in filtered_fields if field.get("expires_at")]
|
|
739
|
+
expires_at = min(expires_values) if expires_values else created_at + _ttl_seconds(canonical_kind, surface)
|
|
740
|
+
last_values = [field.get("last_verified_at") for field in filtered_fields if field.get("last_verified_at")]
|
|
741
|
+
last_verified_at = max(last_values) if last_values else None
|
|
742
|
+
stale_status = "conflict" if conflicts else _stale_status(last_verified_at, expires_at)
|
|
743
|
+
uid = f"ELP-{_hash([PROFILE_VERSION, entity_key, refs_hash, input_hash], length=32)}"
|
|
744
|
+
heavy_used = sorted(HEAVY_SOURCES & set(used_sources))
|
|
745
|
+
budget = {
|
|
746
|
+
"budget_tier": budget_tier,
|
|
747
|
+
"used_sources": _unique(used_sources),
|
|
748
|
+
"heavy_sources_used": heavy_used,
|
|
749
|
+
"degraded": bool(missing_required_sources) or (budget_tier in LIGHT_BUDGET_TIERS and bool(heavy_used)),
|
|
750
|
+
"missing_required_sources": _unique(missing_required_sources),
|
|
751
|
+
}
|
|
752
|
+
if budget_tier in LIGHT_BUDGET_TIERS and heavy_used:
|
|
753
|
+
action_blocked = True
|
|
754
|
+
conflicts.append({
|
|
755
|
+
"conflict_type": "budget_violation",
|
|
756
|
+
"reason": "Instant/quick profiles cannot use heavy sources.",
|
|
757
|
+
"source_refs": heavy_used,
|
|
758
|
+
})
|
|
759
|
+
if str(surface or "") in BLOCKED_PUBLIC_SURFACES and str(surface or "") not in allowed_surfaces:
|
|
760
|
+
action_blocked = True
|
|
761
|
+
return {
|
|
762
|
+
"profile_uid": uid,
|
|
763
|
+
"profile_version": PROFILE_VERSION,
|
|
764
|
+
"entity_key": entity_key,
|
|
765
|
+
"canonical_kind": canonical_kind,
|
|
766
|
+
"canonical_name": redact_entity_value(canonical_name),
|
|
767
|
+
"aliases": [redact_entity_value(alias) for alias in aliases[:20]],
|
|
768
|
+
"resolution": {
|
|
769
|
+
**clean_resolution,
|
|
770
|
+
"action_blocked": bool(action_blocked),
|
|
771
|
+
},
|
|
772
|
+
"authority": {
|
|
773
|
+
"non_authoritative_cache": True,
|
|
774
|
+
"identity_owner": "entities",
|
|
775
|
+
"project_owner": "project_atlas",
|
|
776
|
+
"artifact_owner": "artifact_registry",
|
|
777
|
+
"local_fact_owner": "local_context.entity_dossier",
|
|
778
|
+
"relation_owner": "kg_edges",
|
|
779
|
+
"history_owner": "memory_events/evidence_ledger/change_log",
|
|
780
|
+
},
|
|
781
|
+
"fields": filtered_fields,
|
|
782
|
+
"relations": relations,
|
|
783
|
+
"open_items": filtered_open_items,
|
|
784
|
+
"conflicts": conflicts,
|
|
785
|
+
"stale_status": stale_status,
|
|
786
|
+
"last_verified_at": last_verified_at,
|
|
787
|
+
"expires_at": expires_at,
|
|
788
|
+
"source_refs": clean_refs,
|
|
789
|
+
"source_refs_hash": refs_hash,
|
|
790
|
+
"input_hash": input_hash,
|
|
791
|
+
"privacy_level": normalize_privacy_level(privacy_level),
|
|
792
|
+
"allowed_surfaces": allowed_surfaces,
|
|
793
|
+
"surface": surface,
|
|
794
|
+
"budget": budget,
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def _redacted_resolution(resolution: dict[str, Any]) -> dict[str, Any]:
|
|
799
|
+
clean = {
|
|
800
|
+
key: value
|
|
801
|
+
for key, value in (resolution or {}).items()
|
|
802
|
+
if key not in {"entity"}
|
|
803
|
+
}
|
|
804
|
+
candidates = []
|
|
805
|
+
for candidate in clean.get("candidates") or []:
|
|
806
|
+
if not isinstance(candidate, dict):
|
|
807
|
+
continue
|
|
808
|
+
candidates.append({
|
|
809
|
+
"entity_key": candidate.get("entity_key") or "",
|
|
810
|
+
"entity_id": candidate.get("entity_id") or 0,
|
|
811
|
+
"display_name": redact_entity_value(candidate.get("display_name") or ""),
|
|
812
|
+
"canonical_kind": redact_entity_value(candidate.get("canonical_kind") or ""),
|
|
813
|
+
"score": candidate.get("score") or 0.0,
|
|
814
|
+
"aliases": [redact_entity_value(alias) for alias in (candidate.get("aliases") or [])[:12]],
|
|
815
|
+
"source_ref": candidate.get("source_ref") or "",
|
|
816
|
+
})
|
|
817
|
+
clean["candidates"] = candidates
|
|
818
|
+
return clean
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def _maybe_cache(profile: dict[str, Any], *, cache: bool, conn=None) -> dict[str, Any]:
|
|
822
|
+
if cache:
|
|
823
|
+
stored = store_entity_profile(profile, conn=conn)
|
|
824
|
+
profile["cache"] = stored
|
|
825
|
+
return profile
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def store_entity_profile(profile: dict[str, Any], *, conn=None) -> dict[str, Any]:
|
|
829
|
+
connection = conn or _conn()
|
|
830
|
+
_ensure_table(connection, "entity_profile_cache")
|
|
831
|
+
now = _now()
|
|
832
|
+
payload = json.loads(_json(profile, {}))
|
|
833
|
+
connection.execute(
|
|
834
|
+
"""
|
|
835
|
+
INSERT INTO entity_profile_cache (
|
|
836
|
+
profile_uid, profile_version, entity_key, canonical_kind, canonical_name,
|
|
837
|
+
source_refs_hash, input_hash, profile_redacted_json, source_refs_json,
|
|
838
|
+
stale_status, privacy_level, allowed_surfaces_json, last_verified_at,
|
|
839
|
+
expires_at, created_at, updated_at
|
|
840
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
841
|
+
ON CONFLICT(profile_uid) DO UPDATE SET
|
|
842
|
+
profile_redacted_json=excluded.profile_redacted_json,
|
|
843
|
+
source_refs_json=excluded.source_refs_json,
|
|
844
|
+
stale_status=excluded.stale_status,
|
|
845
|
+
privacy_level=excluded.privacy_level,
|
|
846
|
+
allowed_surfaces_json=excluded.allowed_surfaces_json,
|
|
847
|
+
last_verified_at=excluded.last_verified_at,
|
|
848
|
+
expires_at=excluded.expires_at,
|
|
849
|
+
updated_at=excluded.updated_at
|
|
850
|
+
""",
|
|
851
|
+
(
|
|
852
|
+
payload["profile_uid"],
|
|
853
|
+
payload.get("profile_version") or PROFILE_VERSION,
|
|
854
|
+
payload.get("entity_key") or "",
|
|
855
|
+
payload.get("canonical_kind") or "",
|
|
856
|
+
payload.get("canonical_name") or "",
|
|
857
|
+
payload.get("source_refs_hash") or "",
|
|
858
|
+
payload.get("input_hash") or "",
|
|
859
|
+
_json(payload, {}),
|
|
860
|
+
_json(payload.get("source_refs") or [], []),
|
|
861
|
+
payload.get("stale_status") or "unknown",
|
|
862
|
+
payload.get("privacy_level") or "normal",
|
|
863
|
+
_json(payload.get("allowed_surfaces") or [], []),
|
|
864
|
+
payload.get("last_verified_at"),
|
|
865
|
+
payload.get("expires_at"),
|
|
866
|
+
now,
|
|
867
|
+
now,
|
|
868
|
+
),
|
|
869
|
+
)
|
|
870
|
+
connection.commit()
|
|
871
|
+
return {"ok": True, "profile_uid": payload["profile_uid"]}
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def load_cached_entity_profile(profile_uid: str, *, conn=None) -> dict[str, Any] | None:
|
|
875
|
+
connection = conn or _conn()
|
|
876
|
+
if not _table_exists(connection, "entity_profile_cache"):
|
|
877
|
+
return None
|
|
878
|
+
row = connection.execute(
|
|
879
|
+
"SELECT profile_redacted_json FROM entity_profile_cache WHERE profile_uid=?",
|
|
880
|
+
(profile_uid,),
|
|
881
|
+
).fetchone()
|
|
882
|
+
if not row:
|
|
883
|
+
return None
|
|
884
|
+
payload = _parse_json(row["profile_redacted_json"], {})
|
|
885
|
+
return payload if isinstance(payload, dict) else None
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
def upsert_managed_asset(
|
|
889
|
+
*,
|
|
890
|
+
entity_key: str,
|
|
891
|
+
artifact_id: int | None = None,
|
|
892
|
+
project_key: str = "",
|
|
893
|
+
asset_kind: str = "other",
|
|
894
|
+
provider_ref: str = "",
|
|
895
|
+
external_ref: str = "",
|
|
896
|
+
status: str = "planned",
|
|
897
|
+
source_refs: list[str] | tuple[str, ...] | None = None,
|
|
898
|
+
privacy_level: str = "normal",
|
|
899
|
+
metadata: dict[str, Any] | None = None,
|
|
900
|
+
conn=None,
|
|
901
|
+
) -> dict[str, Any]:
|
|
902
|
+
"""Create/update a managed asset bridge without creating artifacts."""
|
|
903
|
+
connection = conn or _conn()
|
|
904
|
+
_ensure_table(connection, "nexo_managed_assets")
|
|
905
|
+
if artifact_id is not None:
|
|
906
|
+
found = connection.execute("SELECT id FROM artifact_registry WHERE id=?", (int(artifact_id),)).fetchone()
|
|
907
|
+
if not found:
|
|
908
|
+
return {"ok": False, "error": "artifact_id_not_found", "artifact_id": int(artifact_id)}
|
|
909
|
+
external_hash = hashlib.sha256(str(external_ref or "").encode("utf-8", errors="ignore")).hexdigest() if external_ref else ""
|
|
910
|
+
asset_uid = _managed_asset_uid(entity_key=entity_key, artifact_id=artifact_id, provider_ref=provider_ref, external_ref_hash=external_hash, project_key=project_key, asset_kind=asset_kind)
|
|
911
|
+
now = _now()
|
|
912
|
+
connection.execute(
|
|
913
|
+
"""
|
|
914
|
+
INSERT INTO nexo_managed_assets (
|
|
915
|
+
asset_uid, artifact_id, entity_key, project_key, asset_kind,
|
|
916
|
+
provider_ref, provider_redacted, external_ref_hash, status,
|
|
917
|
+
source_refs_json, privacy_level, last_verified_at, created_at,
|
|
918
|
+
updated_at, metadata_json
|
|
919
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
920
|
+
ON CONFLICT(asset_uid) DO UPDATE SET
|
|
921
|
+
artifact_id=excluded.artifact_id,
|
|
922
|
+
project_key=excluded.project_key,
|
|
923
|
+
asset_kind=excluded.asset_kind,
|
|
924
|
+
provider_ref=excluded.provider_ref,
|
|
925
|
+
provider_redacted=excluded.provider_redacted,
|
|
926
|
+
external_ref_hash=excluded.external_ref_hash,
|
|
927
|
+
status=excluded.status,
|
|
928
|
+
source_refs_json=excluded.source_refs_json,
|
|
929
|
+
privacy_level=excluded.privacy_level,
|
|
930
|
+
last_verified_at=excluded.last_verified_at,
|
|
931
|
+
updated_at=excluded.updated_at,
|
|
932
|
+
metadata_json=excluded.metadata_json
|
|
933
|
+
""",
|
|
934
|
+
(
|
|
935
|
+
asset_uid,
|
|
936
|
+
int(artifact_id) if artifact_id is not None else None,
|
|
937
|
+
entity_key,
|
|
938
|
+
project_key or "",
|
|
939
|
+
asset_kind or "other",
|
|
940
|
+
provider_ref or "",
|
|
941
|
+
redact_entity_value(provider_ref),
|
|
942
|
+
external_hash,
|
|
943
|
+
status or "planned",
|
|
944
|
+
_json(sanitize_refs(source_refs or []), []),
|
|
945
|
+
normalize_privacy_level(privacy_level),
|
|
946
|
+
now,
|
|
947
|
+
now,
|
|
948
|
+
now,
|
|
949
|
+
_json(_sanitize_metadata(metadata or {}), {}),
|
|
950
|
+
),
|
|
951
|
+
)
|
|
952
|
+
connection.commit()
|
|
953
|
+
return {"ok": True, "asset_uid": asset_uid, "artifact_id": artifact_id, "external_ref_hash": external_hash}
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
def _managed_asset_uid(
|
|
957
|
+
*,
|
|
958
|
+
entity_key: str,
|
|
959
|
+
artifact_id: int | None,
|
|
960
|
+
provider_ref: str,
|
|
961
|
+
external_ref_hash: str,
|
|
962
|
+
project_key: str,
|
|
963
|
+
asset_kind: str,
|
|
964
|
+
) -> str:
|
|
965
|
+
if artifact_id is not None:
|
|
966
|
+
return f"managed_asset:artifact:{int(artifact_id)}"
|
|
967
|
+
if provider_ref and external_ref_hash:
|
|
968
|
+
return f"managed_asset:provider:{_hash([provider_ref, external_ref_hash], length=24)}"
|
|
969
|
+
return f"managed_asset:{_hash([entity_key, project_key, asset_kind], length=24)}"
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
def _sanitize_metadata(value: dict[str, Any]) -> dict[str, Any]:
|
|
973
|
+
clean: dict[str, Any] = {}
|
|
974
|
+
for key, item in (value or {}).items():
|
|
975
|
+
lower = str(key).lower()
|
|
976
|
+
if lower in {"payload", "raw", "body", "content", "secret", "token", "password", "provider_payload"}:
|
|
977
|
+
clean[str(key)] = "[REDACTED]"
|
|
978
|
+
elif isinstance(item, dict):
|
|
979
|
+
clean[str(key)] = _sanitize_metadata(item)
|
|
980
|
+
elif isinstance(item, list):
|
|
981
|
+
clean[str(key)] = [redact_entity_value(part) for part in item[:20]]
|
|
982
|
+
else:
|
|
983
|
+
clean[str(key)] = redact_entity_value(item)
|
|
984
|
+
return clean
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def record_asset_context_updated(
|
|
988
|
+
*,
|
|
989
|
+
entity_key: str,
|
|
990
|
+
asset_uid: str,
|
|
991
|
+
change_type: str,
|
|
992
|
+
artifact_id: int | None = None,
|
|
993
|
+
project_key: str = "",
|
|
994
|
+
source_refs: list[str] | tuple[str, ...] | None = None,
|
|
995
|
+
privacy_level: str = "normal",
|
|
996
|
+
session_id: str = "",
|
|
997
|
+
idempotency_key: str = "",
|
|
998
|
+
metadata: dict[str, Any] | None = None,
|
|
999
|
+
conn=None,
|
|
1000
|
+
) -> dict[str, Any]:
|
|
1001
|
+
"""Record an idempotent asset context update and mirror it to memory_events."""
|
|
1002
|
+
connection = conn or _conn()
|
|
1003
|
+
_ensure_table(connection, "asset_context_updated")
|
|
1004
|
+
refs = sanitize_refs(source_refs or [])
|
|
1005
|
+
clean_change = redact_entity_value(change_type)
|
|
1006
|
+
event_uid = _safe_event_uid(
|
|
1007
|
+
idempotency_key,
|
|
1008
|
+
[entity_key, asset_uid, artifact_id, project_key, clean_change, refs],
|
|
1009
|
+
)
|
|
1010
|
+
now = _now()
|
|
1011
|
+
connection.execute(
|
|
1012
|
+
"""
|
|
1013
|
+
INSERT OR IGNORE INTO asset_context_updated (
|
|
1014
|
+
event_uid, entity_key, asset_uid, artifact_id, project_key,
|
|
1015
|
+
change_type, source_refs_json, privacy_level, redaction_applied,
|
|
1016
|
+
created_at, memory_event_uid
|
|
1017
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1, ?, '')
|
|
1018
|
+
""",
|
|
1019
|
+
(
|
|
1020
|
+
event_uid,
|
|
1021
|
+
entity_key,
|
|
1022
|
+
asset_uid,
|
|
1023
|
+
int(artifact_id) if artifact_id is not None else None,
|
|
1024
|
+
project_key or "",
|
|
1025
|
+
clean_change,
|
|
1026
|
+
_json(refs, []),
|
|
1027
|
+
normalize_privacy_level(privacy_level),
|
|
1028
|
+
now,
|
|
1029
|
+
),
|
|
1030
|
+
)
|
|
1031
|
+
connection.commit()
|
|
1032
|
+
memory_event = db.record_memory_event(
|
|
1033
|
+
event_type="asset_context_updated",
|
|
1034
|
+
source_type="asset_context_updated",
|
|
1035
|
+
source_id=asset_uid,
|
|
1036
|
+
session_id=session_id,
|
|
1037
|
+
project_key=project_key,
|
|
1038
|
+
actor="nexo",
|
|
1039
|
+
raw_ref=event_uid,
|
|
1040
|
+
privacy_level=normalize_privacy_level(privacy_level),
|
|
1041
|
+
metadata={
|
|
1042
|
+
"entity_key": entity_key,
|
|
1043
|
+
"asset_uid": asset_uid,
|
|
1044
|
+
"artifact_id": artifact_id,
|
|
1045
|
+
"project_key": project_key,
|
|
1046
|
+
"change_type": clean_change,
|
|
1047
|
+
"source_refs": refs,
|
|
1048
|
+
**_sanitize_metadata(metadata or {}),
|
|
1049
|
+
},
|
|
1050
|
+
event_uid=event_uid,
|
|
1051
|
+
idempotency_key=event_uid,
|
|
1052
|
+
)
|
|
1053
|
+
memory_uid = str(memory_event.get("event_uid") or event_uid)
|
|
1054
|
+
connection.execute(
|
|
1055
|
+
"UPDATE asset_context_updated SET memory_event_uid=? WHERE event_uid=? AND COALESCE(memory_event_uid, '')=''",
|
|
1056
|
+
(memory_uid, event_uid),
|
|
1057
|
+
)
|
|
1058
|
+
connection.commit()
|
|
1059
|
+
row = connection.execute("SELECT * FROM asset_context_updated WHERE event_uid=?", (event_uid,)).fetchone()
|
|
1060
|
+
return {"ok": True, "event_uid": event_uid, "memory_event_uid": memory_uid, "inserted": bool(memory_event.get("inserted")), "row": dict(row) if row else {}}
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
__all__ = [
|
|
1064
|
+
"PROFILE_VERSION",
|
|
1065
|
+
"build_entity_profile",
|
|
1066
|
+
"load_cached_entity_profile",
|
|
1067
|
+
"record_asset_context_updated",
|
|
1068
|
+
"redact_entity_value",
|
|
1069
|
+
"resolve_entity",
|
|
1070
|
+
"sanitize_refs",
|
|
1071
|
+
"store_entity_profile",
|
|
1072
|
+
"upsert_managed_asset",
|
|
1073
|
+
]
|