nexo-brain 7.31.13 → 7.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,6 +50,12 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
50
50
  PERFORMANCE_PROFILE_KEY = "performance_profile"
51
51
  DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
52
52
  VALID_CONTEXT_MODES = {"compact", "full"}
53
+ # FTS5 keyword recall over local_chunks. Additive, guarded, reversible.
54
+ # Backfill batch size; 0 disables the incremental backfill entirely.
55
+ FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500") or "500")
56
+ FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
57
+ FTS_MIGRATION_DONE_KEY = "fts_migration_done"
58
+ FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
53
59
  EMBEDDING_REFRESH_JOB = "embedding_refresh"
54
60
  ENTITY_FACTS_JOB = "entity_facts"
55
61
  BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
@@ -3541,6 +3547,14 @@ def run_once(
3541
3547
  }
3542
3548
  scan_result = scan_once(limit=effective_scan_limit)
3543
3549
  job_result = process_jobs(limit=effective_process_limit)
3550
+ # Incremental FTS backfill: bounded one-batch-per-tick, after the disk-budget
3551
+ # gate (above) and after process_jobs. Best-effort — never let it break the
3552
+ # cron tick. Skips itself when disabled (batch=0) or already done.
3553
+ if FTS_BACKFILL_BATCH > 0:
3554
+ try:
3555
+ _backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
3556
+ except Exception:
3557
+ pass
3544
3558
  conn_after = _conn()
3545
3559
  initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
3546
3560
  blocking_active_after = _active_job_count(conn_after, blocking_only=True)
@@ -4109,6 +4123,32 @@ def _status_from_conn(conn, *, readonly: bool = False) -> dict:
4109
4123
  "permissions": [],
4110
4124
  "models": model_status()["models"],
4111
4125
  "support_log_available": True,
4126
+ "fts_recall": _fts_status(conn),
4127
+ }
4128
+
4129
+
4130
+ def _fts_status(conn) -> dict:
4131
+ """Operator-facing FTS5 backfill progress (so progress can be watched)."""
4132
+ try:
4133
+ done = _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"
4134
+ except Exception:
4135
+ done = False
4136
+ try:
4137
+ cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
4138
+ except Exception:
4139
+ cursor = 0
4140
+ try:
4141
+ total = int(_get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "0") or "0")
4142
+ except Exception:
4143
+ total = 0
4144
+ return {
4145
+ "enabled": _fts_enabled_env(),
4146
+ "available": _fts_available(conn),
4147
+ "done": done,
4148
+ "cursor": cursor,
4149
+ "total": total,
4150
+ "read_path": "fts" if _fts_ready(conn) else "like",
4151
+ "backfill_batch": FTS_BACKFILL_BATCH,
4112
4152
  }
4113
4153
 
4114
4154
 
@@ -4433,6 +4473,136 @@ def _context_prefilter_limit(default: int = 1200) -> int:
4433
4473
  return max(100, min(value, 5000))
4434
4474
 
4435
4475
 
4476
+ def _fts_enabled_env() -> bool:
4477
+ """Feature flag for the FTS5 read path (default on; set 0/false to roll back)."""
4478
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_FTS_ENABLED", "1").strip().lower()
4479
+ return value not in {"0", "false", "no", "off"}
4480
+
4481
+
4482
+ def _fts_available(conn) -> bool:
4483
+ """True if the local_chunks_fts FTS5 vtab exists and MATCH works.
4484
+
4485
+ Hosts without FTS5 support fall back to a plain shadow table that does NOT
4486
+ support MATCH, so the cheap probe runs a trivial MATCH and catches
4487
+ OperationalError. The probe (a sqlite_master lookup + MATCH LIMIT 0) is fast
4488
+ enough to run inline without caching, which avoids stale per-connection
4489
+ cache bugs across reconnects.
4490
+ """
4491
+ try:
4492
+ row = conn.execute(
4493
+ "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='local_chunks_fts' LIMIT 1"
4494
+ ).fetchone()
4495
+ if not row:
4496
+ return False
4497
+ # Trivial MATCH proves this is a real FTS5 vtab (shadow fallback raises).
4498
+ conn.execute(
4499
+ "SELECT rowid FROM local_chunks_fts WHERE local_chunks_fts MATCH ? LIMIT 0",
4500
+ ("nexo_fts_probe",),
4501
+ ).fetchall()
4502
+ return True
4503
+ except sqlite3.OperationalError:
4504
+ return False
4505
+ except Exception:
4506
+ return False
4507
+
4508
+
4509
+ def _fts_ready(conn) -> bool:
4510
+ """The FTS read path is authoritative only when: backfill done AND the
4511
+ feature flag is on AND FTS5 is actually available on this host."""
4512
+ if not _fts_enabled_env():
4513
+ return False
4514
+ if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") != "1":
4515
+ return False
4516
+ return _fts_available(conn)
4517
+
4518
+
4519
+ def _fts_match_expr(terms: list[str]) -> str:
4520
+ """Build a safe FTS5 MATCH expression from query terms.
4521
+
4522
+ Each term is double-quoted (FTS5 phrase syntax) with embedded double quotes
4523
+ doubled, neutralizing FTS operators/special chars; terms are OR-joined.
4524
+ Returns '' when there is nothing safe to match.
4525
+ """
4526
+ quoted = []
4527
+ for term in terms:
4528
+ cleaned = str(term or "").strip()
4529
+ if not cleaned:
4530
+ continue
4531
+ quoted.append('"' + cleaned.replace('"', '""') + '"')
4532
+ return " OR ".join(quoted)
4533
+
4534
+
4535
+ def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
4536
+ """Incrementally mirror legacy local_chunks rows into local_chunks_fts.
4537
+
4538
+ Idempotent + resumable: a cursor (max processed rowid) is persisted in
4539
+ local_index_state per batch and committed, so a crash resumes from the last
4540
+ committed rowid. INSERT OR REPLACE keyed by rowid makes re-runs safe.
4541
+ When no rows remain past the cursor the done flag is set. Returns a small
4542
+ status dict. NOTE: new chunks written after schema migration already get FTS
4543
+ rows via the local_chunks_fts triggers, so this only handles pre-existing
4544
+ rows (the legacy 19GB DB).
4545
+ """
4546
+ if batch_limit is None:
4547
+ batch_limit = FTS_BACKFILL_BATCH
4548
+ batch_limit = int(batch_limit)
4549
+ if batch_limit <= 0:
4550
+ return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"}
4551
+ if not _fts_available(conn):
4552
+ return {"ok": True, "skipped": "fts_unavailable", "done": False}
4553
+ if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1":
4554
+ return {"ok": True, "skipped": "already_done", "done": True}
4555
+
4556
+ def _run() -> dict:
4557
+ try:
4558
+ cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
4559
+ except Exception:
4560
+ cursor = 0
4561
+ # Snapshot the total once (first backfill tick) so the operator status
4562
+ # surface can show progress without a COUNT(*) on the 19GB table per tick.
4563
+ if _get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "") == "":
4564
+ try:
4565
+ total_row = conn.execute("SELECT COUNT(*) AS total FROM local_chunks").fetchone()
4566
+ _set_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, str(int(total_row["total"] or 0)))
4567
+ except Exception:
4568
+ pass
4569
+ rows = conn.execute(
4570
+ """
4571
+ SELECT c.rowid AS rid, c.text AS text,
4572
+ COALESCE(a.privacy_class, 'normal') AS privacy_class,
4573
+ COALESCE(a.status, 'active') AS asset_status
4574
+ FROM local_chunks c
4575
+ LEFT JOIN local_assets a ON a.asset_id = c.asset_id
4576
+ WHERE c.rowid > ?
4577
+ ORDER BY c.rowid ASC
4578
+ LIMIT ?
4579
+ """,
4580
+ (cursor, batch_limit),
4581
+ ).fetchall()
4582
+ if not rows:
4583
+ _set_state_conn(conn, FTS_MIGRATION_DONE_KEY, "1")
4584
+ conn.commit()
4585
+ return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
4586
+ max_rid = cursor
4587
+ for row in rows:
4588
+ rid = int(row["rid"])
4589
+ conn.execute("DELETE FROM local_chunks_fts WHERE rowid = ?", (rid,))
4590
+ conn.execute(
4591
+ """
4592
+ INSERT INTO local_chunks_fts(rowid, text, privacy_class, asset_status)
4593
+ VALUES (?, ?, ?, ?)
4594
+ """,
4595
+ (rid, str(row["text"] or ""), str(row["privacy_class"] or "normal"), str(row["asset_status"] or "active")),
4596
+ )
4597
+ if rid > max_rid:
4598
+ max_rid = rid
4599
+ _set_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, str(max_rid))
4600
+ conn.commit()
4601
+ return {"ok": True, "done": False, "processed": len(rows), "cursor": max_rid}
4602
+
4603
+ return _with_sqlite_busy_retry(_run)
4604
+
4605
+
4436
4606
  def _context_candidate_rows(
4437
4607
  conn,
4438
4608
  entity_asset_ids: list[str],
@@ -4444,39 +4614,73 @@ def _context_candidate_rows(
4444
4614
  prefilter_limit = min(int(base_limit or 5000), _context_prefilter_limit())
4445
4615
  prefilter_rows = []
4446
4616
  if terms:
4447
- term_clauses = []
4448
- params: list[str] = []
4449
- for term in terms:
4450
- term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
4451
- like = f"%{term}%"
4452
- params.extend([like, like, like])
4453
- prefilter_rows = conn.execute(
4454
- f"""
4455
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4456
- e.vector_json, e.model_id, e.model_revision, e.dimension
4457
- FROM local_chunks c
4458
- JOIN local_assets a ON a.asset_id = c.asset_id
4459
- LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4460
- LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4461
- WHERE a.status='active'
4462
- AND a.privacy_class='normal'
4463
- AND ({" OR ".join(term_clauses)})
4464
- ORDER BY
4465
- CASE
4466
- WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
4467
- WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
4468
- ELSE 2
4469
- END,
4470
- c.created_at DESC
4471
- LIMIT ?
4472
- """,
4473
- [
4474
- *params,
4475
- *(f"%{term}%" for term in terms),
4476
- *(f"%{term}%" for term in terms),
4477
- prefilter_limit,
4478
- ],
4479
- ).fetchall()
4617
+ used_fts = False
4618
+ # DUAL-READ: only take the FTS path once the backfill is done AND the
4619
+ # flag is on AND FTS5 is available. Until then (or on rollback) the
4620
+ # EXACT legacy LIKE path runs, so retrieval is unaffected mid-migration.
4621
+ if _fts_ready(conn):
4622
+ match_expr = _fts_match_expr(terms)
4623
+ if match_expr:
4624
+ try:
4625
+ prefilter_rows = conn.execute(
4626
+ """
4627
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4628
+ e.vector_json, e.model_id, e.model_revision, e.dimension
4629
+ FROM local_chunks_fts f
4630
+ JOIN local_chunks c ON c.rowid = f.rowid
4631
+ JOIN local_assets a ON a.asset_id = c.asset_id
4632
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4633
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4634
+ WHERE local_chunks_fts MATCH ?
4635
+ AND f.privacy_class='normal'
4636
+ AND f.asset_status='active'
4637
+ AND a.status='active'
4638
+ AND a.privacy_class='normal'
4639
+ ORDER BY bm25(local_chunks_fts), c.created_at DESC
4640
+ LIMIT ?
4641
+ """,
4642
+ (match_expr, prefilter_limit),
4643
+ ).fetchall()
4644
+ used_fts = True
4645
+ except sqlite3.OperationalError:
4646
+ # Malformed FTS expression (odd user input) -> fall back to
4647
+ # the legacy LIKE path below instead of erroring the answer.
4648
+ prefilter_rows = []
4649
+ used_fts = False
4650
+ if not used_fts:
4651
+ term_clauses = []
4652
+ params: list[str] = []
4653
+ for term in terms:
4654
+ term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
4655
+ like = f"%{term}%"
4656
+ params.extend([like, like, like])
4657
+ prefilter_rows = conn.execute(
4658
+ f"""
4659
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4660
+ e.vector_json, e.model_id, e.model_revision, e.dimension
4661
+ FROM local_chunks c
4662
+ JOIN local_assets a ON a.asset_id = c.asset_id
4663
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4664
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4665
+ WHERE a.status='active'
4666
+ AND a.privacy_class='normal'
4667
+ AND ({" OR ".join(term_clauses)})
4668
+ ORDER BY
4669
+ CASE
4670
+ WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
4671
+ WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
4672
+ ELSE 2
4673
+ END,
4674
+ c.created_at DESC
4675
+ LIMIT ?
4676
+ """,
4677
+ [
4678
+ *params,
4679
+ *(f"%{term}%" for term in terms),
4680
+ *(f"%{term}%" for term in terms),
4681
+ prefilter_limit,
4682
+ ],
4683
+ ).fetchall()
4480
4684
 
4481
4685
  fallback_limit = prefilter_limit if not terms else max(120, min(500, prefilter_limit // 3))
4482
4686
  base_rows = conn.execute(
@@ -8,7 +8,7 @@ from typing import Iterable
8
8
  from urllib.parse import quote
9
9
 
10
10
  import paths
11
- from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs
11
+ from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs, _m84_local_chunks_fts
12
12
 
13
13
  LOCAL_CONTEXT_DB_NAME = "local-context.db"
14
14
  MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
@@ -118,7 +118,8 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
118
118
  _m64_local_context_live_dirs(conn)
119
119
  _ensure_entity_dossier_schema(conn)
120
120
  _ensure_local_context_v2_schema(conn)
121
- conn.execute("PRAGMA user_version=65")
121
+ _m84_local_chunks_fts(conn)
122
+ conn.execute("PRAGMA user_version=84")
122
123
  conn.commit()
123
124
 
124
125
 
@@ -9,12 +9,24 @@ from typing import Any
9
9
 
10
10
  from db import (
11
11
  build_pre_action_context,
12
+ get_memory_observations_by_uids,
12
13
  list_memory_events,
13
14
  list_memory_observations,
14
15
  process_memory_observation_queue,
15
16
  search_memory_observations_fts,
17
+ vector_scan_observations,
16
18
  )
17
19
 
20
+ # Weight for the semantic (vector) signal when fused with the lexical/FTS score.
21
+ # A strong paraphrase match (high cosine) can carry an observation that the
22
+ # token-overlap score missed entirely, while still ranking below an exact
23
+ # lexical hit on the same query.
24
+ _VECTOR_FUSION_WEIGHT = 0.85
25
+ # Minimum cosine for a semantic-only candidate to survive the relaxed filter.
26
+ # Below this, a vector "match" is noise and must not resurrect an observation
27
+ # that the lexical path already rejected.
28
+ _VECTOR_MIN_SCORE = 0.30
29
+
18
30
 
19
31
  def _tokens(text: str) -> set[str]:
20
32
  return {
@@ -37,6 +49,41 @@ def _score(query: str, text: str, base: float = 0.0) -> float:
37
49
  return min(1.0, base + len(overlap) / max(1, len(query_tokens)))
38
50
 
39
51
 
52
+ def _model_is_warm() -> bool:
53
+ """True only when embedding the query will NOT trigger a cold model load."""
54
+ try:
55
+ import cognitive._core as cog
56
+ except Exception:
57
+ return False
58
+ try:
59
+ if cog._model_download_disabled():
60
+ return True
61
+ except Exception:
62
+ return False
63
+ return getattr(cog, "_model", None) is not None
64
+
65
+
66
+ def _maybe_query_embedding(query: str):
67
+ """Embed the query ONCE for semantic fusion, or return None.
68
+
69
+ CRITICAL latency guard: this never loads a cold model. It returns None
70
+ (degrading to the FTS/token path) unless the deterministic offline fallback
71
+ is active or the real model is already warm in-process. Any failure also
72
+ yields None.
73
+ """
74
+ clean = (query or "").strip()
75
+ if not clean:
76
+ return None
77
+ if not _model_is_warm():
78
+ return None
79
+ try:
80
+ import cognitive._core as cog
81
+
82
+ return cog.embed(clean)
83
+ except Exception:
84
+ return None
85
+
86
+
40
87
  def _project_hint_values(project_hint: str = "") -> set[str]:
41
88
  clean = (project_hint or "").strip()
42
89
  if not clean:
@@ -225,6 +272,30 @@ def memory_search(
225
272
  ):
226
273
  uid = item.get("observation_uid") or f"id:{item.get('id')}"
227
274
  observations_by_uid.setdefault(uid, item)
275
+ # Semantic fusion: embed the query ONCE (only when a model is already warm —
276
+ # never trigger a cold model load on this latency path) and run a bounded
277
+ # vector scan over precomputed observation embeddings. Paraphrases that the
278
+ # lexical/FTS path missed are pulled in here.
279
+ vector_scores: dict[str, float] = {}
280
+ if clean_query:
281
+ query_vector = _maybe_query_embedding(clean_query)
282
+ if query_vector is not None:
283
+ for hit in vector_scan_observations(
284
+ query_vector,
285
+ limit=max_items * 3,
286
+ start_ts=start,
287
+ end_ts=end,
288
+ min_score=_VECTOR_MIN_SCORE,
289
+ ):
290
+ uid = hit.get("observation_uid")
291
+ if uid:
292
+ vector_scores[uid] = float(hit.get("vector_score") or 0.0)
293
+ # Materialise semantic-only observations the lexical scan did not see.
294
+ missing_uids = [uid for uid in vector_scores if uid not in observations_by_uid]
295
+ if missing_uids:
296
+ for uid, item in get_memory_observations_by_uids(missing_uids).items():
297
+ observations_by_uid.setdefault(uid, item)
298
+
228
299
  observations = list(observations_by_uid.values())
229
300
  events = list_memory_events(
230
301
  query=clean_query,
@@ -234,12 +305,23 @@ def memory_search(
234
305
  end_ts=end,
235
306
  )
236
307
 
237
- candidates = [
238
- _observation_to_candidate(item, clean_query)
239
- for item in observations
240
- if _within_range(item.get("created_at"), start, end)
241
- and _project_matches(item.get("project_key") or "", project_hint)
242
- ]
308
+ candidates = []
309
+ for item in observations:
310
+ if not _within_range(item.get("created_at"), start, end):
311
+ continue
312
+ if not _project_matches(item.get("project_key") or "", project_hint):
313
+ continue
314
+ candidate = _observation_to_candidate(item, clean_query)
315
+ uid = item.get("observation_uid") or f"id:{item.get('id')}"
316
+ vector_score = vector_scores.get(uid, 0.0)
317
+ if vector_score > 0:
318
+ # Fuse: keep the higher of the lexical score and the weighted vector
319
+ # signal so a strong paraphrase survives while exact lexical hits
320
+ # still outrank weak semantic ones.
321
+ fused = max(float(candidate.get("score") or 0.0), _VECTOR_FUSION_WEIGHT * vector_score)
322
+ candidate["score"] = round(fused, 4)
323
+ candidate["vector_score"] = round(vector_score, 4)
324
+ candidates.append(candidate)
243
325
  candidates.extend(
244
326
  _event_to_candidate(item, clean_query)
245
327
  for item in events
@@ -248,7 +330,14 @@ def memory_search(
248
330
  )
249
331
 
250
332
  if clean_query:
251
- candidates = [item for item in candidates if item.get("score", 0) > 0]
333
+ # Relaxed filter: a candidate survives if it has a positive lexical score
334
+ # OR a qualifying semantic (vector) match. Previously the hard score>0
335
+ # filter dropped semantic-only paraphrase hits before they could rank.
336
+ candidates = [
337
+ item
338
+ for item in candidates
339
+ if item.get("score", 0) > 0 or item.get("vector_score", 0) > 0
340
+ ]
252
341
  candidates.sort(key=lambda item: (item.get("score", 0), item.get("created_at") or 0), reverse=True)
253
342
  candidates = candidates[:max_items]
254
343
 
@@ -1437,6 +1437,7 @@ def handle_confidence_check(
1437
1437
  unknowns: str = "[]",
1438
1438
  verification_step: str = "",
1439
1439
  stakes: str = "",
1440
+ sid: str = "",
1440
1441
  ) -> str:
1441
1442
  """Return the metacognitive response mode: answer, verify, ask, or defer."""
1442
1443
  clean_goal = (goal or "").strip()
@@ -1465,6 +1466,37 @@ def handle_confidence_check(
1465
1466
  verification_step=(verification_step or "").strip(),
1466
1467
  stakes=(stakes or "").strip(),
1467
1468
  )
1469
+ # Persist the check so the G1 answer-contract gate can detect fulfillment of
1470
+ # verify/ask/defer contracts (this table was previously never written, so
1471
+ # verify contracts were structurally unfulfillable). Best-effort: a failure
1472
+ # here must never break the metacognitive answer — g1 simply re-nudges, a
1473
+ # visible signal rather than a silent corruption.
1474
+ try:
1475
+ import hashlib
1476
+ from db import get_db
1477
+ from plugins.guard import _resolve_active_sid
1478
+ conn = get_db()
1479
+ resolved_sid = (sid or "").strip() or _resolve_active_sid(conn)
1480
+ if resolved_sid:
1481
+ conn.execute(
1482
+ """INSERT INTO confidence_checks
1483
+ (session_id, task_id, goal_hash, task_type, area,
1484
+ response_mode, confidence, high_stakes, created_at)
1485
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""",
1486
+ (
1487
+ resolved_sid,
1488
+ "",
1489
+ hashlib.sha256(clean_goal.encode("utf-8")).hexdigest()[:16],
1490
+ clean_type,
1491
+ (area or "").strip(),
1492
+ str(result.get("mode") or ""),
1493
+ int(result.get("confidence") or 0),
1494
+ 1 if result.get("high_stakes") else 0,
1495
+ ),
1496
+ )
1497
+ conn.commit()
1498
+ except Exception:
1499
+ pass
1468
1500
  return json.dumps({"ok": True, **result}, ensure_ascii=False, indent=2)
1469
1501
 
1470
1502
 
@@ -2138,31 +2170,30 @@ def handle_task_close(
2138
2170
  limit=3,
2139
2171
  )
2140
2172
  if pending_corrections:
2141
- debt = _ensure_open_debt(
2142
- task["session_id"],
2143
- task_id,
2144
- "missing_learning_after_correction",
2145
- severity="error",
2146
- evidence=(
2147
- "User correction was detected for this session and has not "
2148
- "been resolved by nexo_learning_add. task_close is blocked "
2149
- "until a durable learning is persisted."
2150
- ),
2151
- debts=debts_created,
2152
- )
2153
- return json.dumps(
2154
- {
2155
- "ok": False,
2156
- "error": "Cannot close task while a detected user correction has no durable nexo_learning_add.",
2157
- "hint": "Call nexo_learning_add with the reusable rule learned from the correction, then retry nexo_task_close.",
2158
- "task_id": task_id,
2159
- "blocked_by": "d5_correction_learning_required",
2160
- "debt_id": debt.get("id"),
2161
- "pending_corrections": len(pending_corrections),
2162
- },
2163
- ensure_ascii=False,
2164
- indent=2,
2173
+ # SOFT enforcement (Ola 1): do NOT block the close. A detected user
2174
+ # correction without a durable nexo_learning_add opens/dedupes an
2175
+ # error-severity protocol_debt and the task still closes. The daily
2176
+ # self-audit + correction_requirement_summary surface the open debt, and
2177
+ # if THIS close supplies the learning, the `if correction:` block below
2178
+ # captures it and resolves both the requirement and the debt. A hard
2179
+ # block here interrupted the operator on every correction (friction);
2180
+ # the debt is the non-blocking signal instead.
2181
+ learning_in_this_close = bool(
2182
+ (learning_title or "").strip() and (learning_content or "").strip()
2165
2183
  )
2184
+ if not learning_in_this_close:
2185
+ _ensure_open_debt(
2186
+ task["session_id"],
2187
+ task_id,
2188
+ "missing_learning_after_correction",
2189
+ severity="error",
2190
+ evidence=(
2191
+ "User correction detected for this session without a durable "
2192
+ "nexo_learning_add; debt opened (soft enforcement) — task closed "
2193
+ "but a follow-up learning is required."
2194
+ ),
2195
+ debts=debts_created,
2196
+ )
2166
2197
 
2167
2198
  # ── Evidence enforcement: reject 'done' without proof ──
2168
2199
  # G1 hardening: "done" is no longer allowed to degrade into a debt-only
@@ -2350,6 +2381,22 @@ def handle_task_close(
2350
2381
  debt_types=["missing_change_log"],
2351
2382
  resolution="Change log created by nexo_task_close",
2352
2383
  )
2384
+ # Cognitive OS Ola 1 — materialize causal/provenance edges from the
2385
+ # closed task (task→change_log "ops:produced" + change_log→task
2386
+ # "causal:motivated_by"). record_task_close_edges had NO caller, so
2387
+ # the causal graph stayed empty (0 candidates) and could never feed
2388
+ # connect-the-dots at answer time. Best-effort: graph wiring must
2389
+ # never break a task close.
2390
+ try:
2391
+ import causal_graph
2392
+ causal_graph.record_task_close_edges(
2393
+ task_id=task_id,
2394
+ change_log_id=change_log_id,
2395
+ project_key=str(task.get("project_hint") or task.get("area") or ""),
2396
+ reason_public=(clean_change_summary or task.get("goal") or "")[:200],
2397
+ )
2398
+ except Exception:
2399
+ pass
2353
2400
  else:
2354
2401
  debt = _ensure_open_debt(
2355
2402
  task["session_id"],