deja-cli 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deja_cli-0.2.0 → deja_cli-0.3.1}/PKG-INFO +1 -1
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/cloud.py +241 -49
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/extractor.py +19 -9
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/reflection.py +86 -19
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/__init__.py +245 -6
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/_helpers.py +18 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/model.py +82 -3
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/policy.py +6 -3
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/repos/memories.py +17 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/ranking.py +32 -9
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/search.py +26 -5
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/watchers/codex_cli.py +24 -3
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/watchers/gemini_cli.py +18 -2
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/_helpers.py +15 -4
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/backfill.py +78 -14
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/cloud.py +249 -19
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/maintenance.py +42 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/memory.py +5 -1
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/session.py +37 -10
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/setup.py +98 -14
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/mcp_server.py +34 -5
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/web.py +19 -3
- deja_cli-0.3.1/deja/llm/base.py +54 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/llm/embedding.py +10 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/llm/factory.py +38 -1
- deja_cli-0.3.1/deja/llm/providers/anthropic.py +47 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/hooks/deja-post-fail.sh +13 -1
- deja_cli-0.3.1/hooks/deja-precompact.sh +32 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/hooks/deja-recall.sh +19 -2
- {deja_cli-0.2.0 → deja_cli-0.3.1}/pyproject.toml +1 -1
- deja_cli-0.2.0/deja/llm/base.py +0 -34
- deja_cli-0.2.0/deja/llm/providers/anthropic.py +0 -21
- deja_cli-0.2.0/hooks/deja-precompact.sh +0 -20
- {deja_cli-0.2.0 → deja_cli-0.3.1}/.github/workflows/ci.yml +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/.gitignore +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/LICENSE +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/README.pypi.md +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/config/default.yaml +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/config.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/_schema.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/connection.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/queries.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/repos/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/repos/observations.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/repos/reflection.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/load.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/maintenance.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/core/store/services/save.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/watchers/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/watchers/base.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/ingest/watchers/claude_code.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/transfer.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/cli/watch.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/interfaces/web_ui/index.html +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/llm/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/llm/providers/__init__.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/llm/providers/ollama.py +0 -0
- {deja_cli-0.2.0 → deja_cli-0.3.1}/deja/main.py +0 -0
|
@@ -12,13 +12,90 @@ import threading
|
|
|
12
12
|
import webbrowser
|
|
13
13
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Callable, Iterator, Optional
|
|
15
|
+
from typing import Callable, Iterator, Optional, Union
|
|
16
16
|
from urllib.parse import parse_qs, urlparse
|
|
17
17
|
|
|
18
18
|
import httpx
|
|
19
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
23
|
+
|
|
24
|
+
# ── Typed payloads (R8, 2026-04-22 review) ──────────────────────────────────
|
|
25
|
+
# Replace the previous ``dict → dict`` boundary at auth + push with typed
|
|
26
|
+
# Pydantic models. The bugs the prior pass caught (N5 trigger→triggerCmds
|
|
27
|
+
# silent drop; N6 endpoint not persisted in auth; theoretical AuthState
|
|
28
|
+
# typo wedging every command) all lived in dict access patterns where a
|
|
29
|
+
# missing or misspelled key was indistinguishable from "field not set."
|
|
30
|
+
# Pydantic models make construction the single point that enforces the
|
|
31
|
+
# field contract.
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AuthState(BaseModel):
|
|
35
|
+
"""The contents of ``~/.deja/auth.json``.
|
|
36
|
+
|
|
37
|
+
``access_token`` is the PAT; ``endpoint`` is the host the token was
|
|
38
|
+
issued by (Bug N6, 2026-04-19) and travels with the credential — it
|
|
39
|
+
overrides ``config.cloud.endpoint`` so a token issued by host A is
|
|
40
|
+
never sent to host B. The two extra OAuth-flow fields (``token_type``,
|
|
41
|
+
``user_id``) are tolerated but not required so legacy auth files keep
|
|
42
|
+
parsing.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
model_config = ConfigDict(extra="allow") # tolerate forward-compat fields
|
|
46
|
+
|
|
47
|
+
access_token: str
|
|
48
|
+
endpoint: Optional[str] = None
|
|
49
|
+
token_type: Optional[str] = None
|
|
50
|
+
user_id: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CloudPushPayload(BaseModel):
|
|
54
|
+
"""The shape ``POST /v1/memories`` and ``POST /v1/sync/push`` accept.
|
|
55
|
+
|
|
56
|
+
The cloud uses ``forbidNonWhitelisted`` validation, so any unknown
|
|
57
|
+
key 400s. The previous string-allowlist filter dropped fields whose
|
|
58
|
+
canonical name on the cloud side differs from the local schema —
|
|
59
|
+
most notoriously the local ``trigger`` (comma string) vs cloud
|
|
60
|
+
``triggerCmds`` (list[str]) (Bug N5). Pydantic aliases handle the
|
|
61
|
+
rename in one place.
|
|
62
|
+
|
|
63
|
+
Field set / aliases must match the cloud DTO at
|
|
64
|
+
``apps/api/src/memories/dto/create-memory.dto.ts``. When the cloud
|
|
65
|
+
adds a new accepted field, add it here with the correct alias and
|
|
66
|
+
every save path picks it up automatically.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
70
|
+
|
|
71
|
+
# Required identification
|
|
72
|
+
local_id: str = Field(serialization_alias="localId")
|
|
73
|
+
content: str
|
|
74
|
+
type: str
|
|
75
|
+
scope: str
|
|
76
|
+
# Optional metadata accepted by the cloud (must match the cloud DTO —
|
|
77
|
+
# ``domain`` / ``source`` / ``entity_graph`` / ``embedding`` / the
|
|
78
|
+
# raw ``trigger`` comma-string / timestamps other than lastConfirmed
|
|
79
|
+
# are deliberately excluded; the cloud rejects unknown keys with a
|
|
80
|
+
# 400 under ``forbidNonWhitelisted``).
|
|
81
|
+
project: Optional[str] = None
|
|
82
|
+
confidence: Optional[float] = None
|
|
83
|
+
category: Optional[str] = None
|
|
84
|
+
trigger_cmds: Optional[list[str]] = Field(
|
|
85
|
+
default=None, serialization_alias="triggerCmds",
|
|
86
|
+
)
|
|
87
|
+
last_confirmed: Optional[str] = Field(
|
|
88
|
+
default=None, serialization_alias="lastConfirmed",
|
|
89
|
+
)
|
|
90
|
+
archived: Optional[bool] = None
|
|
91
|
+
archived_at: Optional[str] = Field(
|
|
92
|
+
default=None, serialization_alias="archivedAt",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def to_wire(self) -> dict:
|
|
96
|
+
"""Serialize to the camelCase shape the cloud DTO expects, dropping Nones."""
|
|
97
|
+
return self.model_dump(by_alias=True, exclude_none=True)
|
|
98
|
+
|
|
22
99
|
CLI_REDIRECT_PORT = 51234
|
|
23
100
|
|
|
24
101
|
AUTH_FILE = Path.home() / ".deja" / "auth.json"
|
|
@@ -46,8 +123,8 @@ def _get_endpoint(config=None) -> str:
|
|
|
46
123
|
"where these specific creds belong."
|
|
47
124
|
"""
|
|
48
125
|
auth = load_auth()
|
|
49
|
-
if auth and auth.
|
|
50
|
-
return str(auth
|
|
126
|
+
if auth and auth.endpoint:
|
|
127
|
+
return str(auth.endpoint).rstrip("/")
|
|
51
128
|
if config is None:
|
|
52
129
|
return DEFAULT_ENDPOINT
|
|
53
130
|
cloud = getattr(config, "cloud", None)
|
|
@@ -59,13 +136,22 @@ def _get_endpoint(config=None) -> str:
|
|
|
59
136
|
# ── Token storage ─────────────────────────────────────────────────────
|
|
60
137
|
|
|
61
138
|
|
|
62
|
-
def load_auth() -> Optional[
|
|
139
|
+
def load_auth() -> Optional[AuthState]:
|
|
140
|
+
"""Read ``~/.deja/auth.json`` into a typed AuthState, or None if absent.
|
|
141
|
+
|
|
142
|
+
R8 (2026-04-22 review): previously returned ``Optional[dict]`` and
|
|
143
|
+
every caller did ``auth.get("access_token")`` / ``auth.get("endpoint")``.
|
|
144
|
+
A future caller passing ``{"token": ...}`` instead of
|
|
145
|
+
``{"access_token": ...}`` would silently wedge every subsequent
|
|
146
|
+
command. Pydantic construction now rejects that at the boundary.
|
|
147
|
+
"""
|
|
63
148
|
if not AUTH_FILE.exists():
|
|
64
149
|
return None
|
|
65
|
-
|
|
150
|
+
raw = json.loads(AUTH_FILE.read_text())
|
|
151
|
+
return AuthState.model_validate(raw)
|
|
66
152
|
|
|
67
153
|
|
|
68
|
-
def save_auth(data: dict) -> None:
|
|
154
|
+
def save_auth(data: Union[AuthState, dict]) -> None:
|
|
69
155
|
# Bug Q2 (2026-04-19 pass 3): atomic rewrite. ``Path.write_text``
|
|
70
156
|
# truncates then writes, so a crash mid-write (Ctrl-C, OOM, kernel
|
|
71
157
|
# panic) leaves ``auth.json`` empty or half-written — ``load_auth``
|
|
@@ -75,8 +161,14 @@ def save_auth(data: dict) -> None:
|
|
|
75
161
|
# same directory + ``os.replace`` + cleanup on failure.
|
|
76
162
|
# Bug Q3 (2026-04-19 pass 3): 0700 on the parent so new installs
|
|
77
163
|
# don't create a world-readable ~/.deja.
|
|
164
|
+
# Accept either a typed ``AuthState`` or a raw dict (for legacy callers
|
|
165
|
+
# / tests). Normalize via model construction so a dict missing
|
|
166
|
+
# ``access_token`` raises a validation error here, not later when a
|
|
167
|
+
# caller tries to read the field.
|
|
168
|
+
if not isinstance(data, AuthState):
|
|
169
|
+
data = AuthState.model_validate(data)
|
|
78
170
|
AUTH_FILE.parent.mkdir(exist_ok=True, mode=0o700)
|
|
79
|
-
payload = json.dumps(data, indent=2)
|
|
171
|
+
payload = json.dumps(data.model_dump(exclude_none=True), indent=2)
|
|
80
172
|
fd, tmp_name = tempfile.mkstemp(
|
|
81
173
|
prefix=".auth.", suffix=".tmp", dir=AUTH_FILE.parent,
|
|
82
174
|
)
|
|
@@ -104,7 +196,7 @@ def get_token(config=None) -> Optional[str]:
|
|
|
104
196
|
auth = load_auth()
|
|
105
197
|
if not auth:
|
|
106
198
|
return None
|
|
107
|
-
return auth.
|
|
199
|
+
return auth.access_token
|
|
108
200
|
|
|
109
201
|
|
|
110
202
|
# ── Browser login flow ────────────────────────────────────────────────
|
|
@@ -190,9 +282,6 @@ def whoami(config=None) -> Optional[dict]:
|
|
|
190
282
|
# ── Save to cloud ─────────────────────────────────────────────────────
|
|
191
283
|
|
|
192
284
|
|
|
193
|
-
_PUSH_FIELDS = {"content", "type", "project", "confidence", "triggerCmds", "category"}
|
|
194
|
-
|
|
195
|
-
|
|
196
285
|
def push_memory(memory: dict, config=None) -> tuple[bool, Optional[str]]:
|
|
197
286
|
"""Push a single memory to cloud. Best-effort, never raises.
|
|
198
287
|
|
|
@@ -411,47 +500,57 @@ def save_stuck_ids(endpoint: str, stuck: dict[str, str]) -> None:
|
|
|
411
500
|
def _sanitize_for_push(memory: dict) -> dict:
|
|
412
501
|
"""Convert a local memory dict to the shape the cloud API accepts.
|
|
413
502
|
|
|
414
|
-
The cloud uses ``forbidNonWhitelisted`` validation, so any key not on
|
|
415
|
-
DTO causes ``HTTP 400 "property X should not exist"``.
|
|
416
|
-
(
|
|
417
|
-
|
|
418
|
-
the
|
|
419
|
-
|
|
420
|
-
``archived_at
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
``
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
503
|
+
The cloud uses ``forbidNonWhitelisted`` validation, so any key not on
|
|
504
|
+
the DTO causes ``HTTP 400 "property X should not exist"``. R8
|
|
505
|
+
(2026-04-22 review) replaces the previous string-allowlist filter
|
|
506
|
+
with a typed :class:`CloudPushPayload`. Construction picks the
|
|
507
|
+
accepted fields, the model's serialization aliases handle the
|
|
508
|
+
snake→camel rename in one place (``last_confirmed`` →
|
|
509
|
+
``lastConfirmed``, ``archived_at`` → ``archivedAt``,
|
|
510
|
+
``trigger`` → ``triggerCmds``), and ``to_wire`` drops fields that
|
|
511
|
+
are ``None`` so we never send a key whose value the cloud would
|
|
512
|
+
have to special-case.
|
|
513
|
+
|
|
514
|
+
Bug N5 (2026-04-19): the local schema stores command-boundary
|
|
515
|
+
triggers as a snake-case ``trigger`` comma-string (``"alembic
|
|
516
|
+
upgrade, db migrate"``), while the cloud DTO expects camelCase
|
|
517
|
+
``triggerCmds: list[str]``. The pre-fix sanitizer's allowlist
|
|
518
|
+
filter dropped ``trigger`` (wrong key) and never synthesized
|
|
519
|
+
``triggerCmds``, so the **batch** push path silently lost every
|
|
520
|
+
trigger on every backlog flush. Translation now lives on the
|
|
521
|
+
Pydantic model so eager-CLI, eager-MCP, and batch-sync share the
|
|
522
|
+
one source of truth.
|
|
523
|
+
|
|
524
|
+
Keep field set + aliases in sync with ``CreateMemoryDto`` in
|
|
434
525
|
``~/projects/deja_sh/apps/api/src/memories/dto/create-memory.dto.ts``.
|
|
435
526
|
"""
|
|
436
|
-
payload = {k: v for k, v in memory.items() if k in _PUSH_FIELDS}
|
|
437
|
-
if "id" in memory:
|
|
438
|
-
payload["localId"] = memory["id"]
|
|
439
|
-
payload["scope"] = "global"
|
|
440
|
-
if memory.get("last_confirmed"):
|
|
441
|
-
payload["lastConfirmed"] = memory["last_confirmed"]
|
|
442
|
-
archived_at = memory.get("archived_at")
|
|
443
|
-
if archived_at:
|
|
444
|
-
# Local truth is the timestamp; the boolean is derived. Send both so
|
|
445
|
-
# the cloud has the original archive time for LWW conflict resolution
|
|
446
|
-
# rather than auto-stamping NOW() on receipt.
|
|
447
|
-
payload["archived"] = True
|
|
448
|
-
payload["archivedAt"] = archived_at
|
|
449
527
|
trigger_str = memory.get("trigger")
|
|
528
|
+
trigger_cmds: Optional[list[str]] = None
|
|
450
529
|
if trigger_str:
|
|
451
530
|
tokens = [t.strip() for t in trigger_str.split(",") if t.strip()]
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
531
|
+
trigger_cmds = tokens or None
|
|
532
|
+
|
|
533
|
+
archived_at = memory.get("archived_at")
|
|
534
|
+
payload = CloudPushPayload(
|
|
535
|
+
local_id=memory.get("id", ""),
|
|
536
|
+
content=memory.get("content", ""),
|
|
537
|
+
type=memory.get("type", ""),
|
|
538
|
+
# Cloud-side scope is flat — the local "global" / "project:<name>"
|
|
539
|
+
# encoding doesn't apply (the cloud derives scope from its own
|
|
540
|
+
# ``project`` column).
|
|
541
|
+
scope="global",
|
|
542
|
+
project=memory.get("project"),
|
|
543
|
+
confidence=memory.get("confidence"),
|
|
544
|
+
category=memory.get("category"),
|
|
545
|
+
trigger_cmds=trigger_cmds,
|
|
546
|
+
last_confirmed=memory.get("last_confirmed"),
|
|
547
|
+
# Send both the boolean and the timestamp so the cloud uses the
|
|
548
|
+
# original archive time for LWW conflict resolution rather than
|
|
549
|
+
# auto-stamping NOW() on receipt.
|
|
550
|
+
archived=True if archived_at else None,
|
|
551
|
+
archived_at=archived_at,
|
|
552
|
+
)
|
|
553
|
+
return payload.to_wire()
|
|
455
554
|
|
|
456
555
|
|
|
457
556
|
_PULL_RENAME = {
|
|
@@ -532,6 +631,27 @@ def _sanitize_for_pull(memory: dict) -> dict:
|
|
|
532
631
|
return out
|
|
533
632
|
|
|
534
633
|
|
|
634
|
+
class SyncPushPartialError(RuntimeError):
|
|
635
|
+
"""Raised by :func:`sync_push` when a transport failure aborts the
|
|
636
|
+
push mid-stream. Carries the ``partial`` dict (``accepted``,
|
|
637
|
+
``skipped``, ``conflicts``, ``serverTime`` aggregated across the
|
|
638
|
+
batches that DID land) so callers can persist what landed before
|
|
639
|
+
the failure rather than re-pushing everything blind.
|
|
640
|
+
|
|
641
|
+
Bug N1 (2026-05-01 review): the previous shape raised plain
|
|
642
|
+
``RuntimeError`` and discarded ``aggregated["conflicts"]`` —
|
|
643
|
+
earlier-batch quota / content-too-long rejections never reached
|
|
644
|
+
the user, who saw "sync push failed" with no list of which rows
|
|
645
|
+
were already permanently rejected. With LWW upserts the next
|
|
646
|
+
sync re-pushes everything (safe), but the operator still has no
|
|
647
|
+
signal about the rejected subset.
|
|
648
|
+
"""
|
|
649
|
+
|
|
650
|
+
def __init__(self, message: str, *, partial: dict) -> None:
|
|
651
|
+
super().__init__(message)
|
|
652
|
+
self.partial = partial
|
|
653
|
+
|
|
654
|
+
|
|
535
655
|
SYNC_PUSH_BATCH_SIZE = 50
|
|
536
656
|
"""Max rows per ``POST /v1/sync/push`` body.
|
|
537
657
|
|
|
@@ -593,9 +713,23 @@ def sync_push(memories: list[dict], config=None) -> dict:
|
|
|
593
713
|
chunk = sanitized[start : start + SYNC_PUSH_BATCH_SIZE]
|
|
594
714
|
resp = httpx.post(url, json={"memories": chunk}, headers=headers, timeout=60)
|
|
595
715
|
if not resp.is_success:
|
|
596
|
-
|
|
716
|
+
# N1 (2026-05-01 review): before raising, log any conflicts
|
|
717
|
+
# we accumulated from EARLIER successful batches so they're
|
|
718
|
+
# at least observable — and attach the full partial dict to
|
|
719
|
+
# the exception so callers that catch ``SyncPushPartialError``
|
|
720
|
+
# can persist it (e.g. into ``sync_state.json`` so the next
|
|
721
|
+
# sync knows which rows the cloud already rejected).
|
|
722
|
+
if aggregated["conflicts"]:
|
|
723
|
+
logger.warning(
|
|
724
|
+
"cloud sync push aborted with %d earlier-batch "
|
|
725
|
+
"rejection(s) before the transport error — see "
|
|
726
|
+
"exception.partial['conflicts']",
|
|
727
|
+
len(aggregated["conflicts"]),
|
|
728
|
+
)
|
|
729
|
+
raise SyncPushPartialError(
|
|
597
730
|
f"sync push failed ({resp.status_code}) after "
|
|
598
|
-
f"{aggregated['accepted']} accepted in earlier batches: {resp.text}"
|
|
731
|
+
f"{aggregated['accepted']} accepted in earlier batches: {resp.text}",
|
|
732
|
+
partial=aggregated,
|
|
599
733
|
)
|
|
600
734
|
body = resp.json()
|
|
601
735
|
aggregated["accepted"] += body.get("accepted", 0) or 0
|
|
@@ -642,3 +776,61 @@ def sync_pull(since: Optional[str] = None, config=None) -> dict:
|
|
|
642
776
|
)
|
|
643
777
|
resp.raise_for_status()
|
|
644
778
|
return resp.json()
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def get_memory_by_local_id(
|
|
782
|
+
local_id: str, config=None
|
|
783
|
+
) -> Optional[list[dict]]:
|
|
784
|
+
"""Fetch the cloud's view of a row by its local id (Layer 2 verify).
|
|
785
|
+
|
|
786
|
+
Hits ``GET /v1/memories/by-local-id/<local_id>`` (shipped 2026-05-04 on
|
|
787
|
+
deja_sh) and returns the body — an array of cloud rows matching
|
|
788
|
+
``(user_id, local_id)``, sorted ``updatedAt DESC``. Per the cloud
|
|
789
|
+
contract:
|
|
790
|
+
|
|
791
|
+
- ``200`` with an array → success. ``len == 0`` means no row matches.
|
|
792
|
+
``len == 1`` is the normal case. ``len > 1`` is an anomaly the
|
|
793
|
+
verify path is expected to surface (no UNIQUE on
|
|
794
|
+
``(user_id, local_id)`` in Postgres yet).
|
|
795
|
+
- ``404`` → no row matches; surfaced as an empty list so callers can
|
|
796
|
+
handle "missing" and "anomaly" with one branch.
|
|
797
|
+
- Anything else → returns ``None`` (best-effort: a transient
|
|
798
|
+
verification failure must not be conflated with a divergence
|
|
799
|
+
signal — that would spam ``_stuck`` on every flaky network).
|
|
800
|
+
|
|
801
|
+
The call is intentionally one localId at a time; the divergence
|
|
802
|
+
surface is rare (only fires on push-archive verification today) and
|
|
803
|
+
the cloud endpoint is single-id by design.
|
|
804
|
+
"""
|
|
805
|
+
token = get_token(config)
|
|
806
|
+
if not token:
|
|
807
|
+
raise RuntimeError("Not logged in. Run `deja login`.")
|
|
808
|
+
endpoint = _get_endpoint(config)
|
|
809
|
+
url = f"{endpoint}/v1/memories/by-local-id/{local_id}"
|
|
810
|
+
try:
|
|
811
|
+
resp = httpx.get(
|
|
812
|
+
url,
|
|
813
|
+
headers={"Authorization": f"Bearer {token}"},
|
|
814
|
+
timeout=10,
|
|
815
|
+
)
|
|
816
|
+
except Exception as exc:
|
|
817
|
+
logger.warning("verify-by-local-id %s failed: %s", local_id, exc)
|
|
818
|
+
return None
|
|
819
|
+
if resp.status_code == 404:
|
|
820
|
+
return []
|
|
821
|
+
if not resp.is_success:
|
|
822
|
+
logger.warning(
|
|
823
|
+
"verify-by-local-id %s returned %d: %s",
|
|
824
|
+
local_id, resp.status_code, resp.text[:120],
|
|
825
|
+
)
|
|
826
|
+
return None
|
|
827
|
+
body = resp.json()
|
|
828
|
+
if isinstance(body, list):
|
|
829
|
+
return body
|
|
830
|
+
# Unexpected shape (cloud contract change?). Treat as best-effort
|
|
831
|
+
# failure rather than asserting; logging gives the operator signal.
|
|
832
|
+
logger.warning(
|
|
833
|
+
"verify-by-local-id %s returned non-list body: %r",
|
|
834
|
+
local_id, body,
|
|
835
|
+
)
|
|
836
|
+
return None
|
|
@@ -97,15 +97,25 @@ async def extract_memories(
|
|
|
97
97
|
|
|
98
98
|
user_prompt = f"Session transcript/summary to extract memories from:\n\n{transcript}"
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
100
|
+
# Bug R2 (2026-04-22): let adapter / transport / JSON-parse errors
|
|
101
|
+
# propagate. The previous ``except Exception: return []`` made every
|
|
102
|
+
# LLM outage indistinguishable from "the model had nothing to
|
|
103
|
+
# extract." The watcher's ``_process`` then treated the empty list
|
|
104
|
+
# as a successful extraction and stamped ``_processed[path]`` —
|
|
105
|
+
# silently burning the transcript. Same bug class as H5 on the read
|
|
106
|
+
# side. Callers decide:
|
|
107
|
+
#
|
|
108
|
+
# - Watcher (``deja/ingest/watchers/base.py``) wraps this call in
|
|
109
|
+
# its own try/except and routes to ``_schedule_extraction_retry``
|
|
110
|
+
# (bounded N8/P2 backoff: 30s / 2min / 10min, then give up loudly).
|
|
111
|
+
# - CLI callers (``deja save-session``, ``deja ingest-skills``,
|
|
112
|
+
# ``deja backfill``) catch and either exit cleanly or log + skip
|
|
113
|
+
# the one file and continue.
|
|
114
|
+
result = await adapter.complete_structured(
|
|
115
|
+
system=EXTRACTION_SYSTEM,
|
|
116
|
+
user=user_prompt,
|
|
117
|
+
schema=EXTRACTION_SCHEMA,
|
|
118
|
+
)
|
|
109
119
|
|
|
110
120
|
memories = result.get("memories", [])
|
|
111
121
|
if not isinstance(memories, list):
|
|
@@ -336,6 +336,37 @@ class ReflectionEngine:
|
|
|
336
336
|
await self.store.set_reflection_meta(None, last_archive_at=_now_iso())
|
|
337
337
|
return count
|
|
338
338
|
|
|
339
|
+
async def run_dedup_fuzzy(self, project: Optional[str] = None) -> dict:
|
|
340
|
+
"""Bug R4 (2026-04-22): wire fuzzy dedup into scheduled reflection.
|
|
341
|
+
|
|
342
|
+
``MaintenanceService.dedup_fuzzy`` has shipped since the Phase 7
|
|
343
|
+
restructure but was never invoked from ``run_full`` — while
|
|
344
|
+
docstrings (MemoryStore, SaveService, MaintenanceService) and
|
|
345
|
+
user-facing docs (code-reading-guide, plan.md, AGENTS.md,
|
|
346
|
+
GEMINI.md, repo-strategic-assessment) all asserted it ran at
|
|
347
|
+
reflection time. Save-side dedup stayed exact-match only, so
|
|
348
|
+
two-character edits accumulated forever and the vault's
|
|
349
|
+
top-ranked results filled with near-duplicates over time.
|
|
350
|
+
Returns ``{"merged": N, "archived": N}``.
|
|
351
|
+
|
|
352
|
+
When ``project is None``, fan out per-project + globals (mirrors
|
|
353
|
+
``run_reflector(None)``'s N2 pattern). The underlying service's
|
|
354
|
+
``dedup_fuzzy(project=None)`` touches only the global bucket
|
|
355
|
+
because ``fetch_active(None)`` is global-only by convention —
|
|
356
|
+
so a single call would silently leave every project-scoped
|
|
357
|
+
near-duplicate unmerged. Fanning out keeps the scheduled pass
|
|
358
|
+
honest.
|
|
359
|
+
"""
|
|
360
|
+
if project is not None:
|
|
361
|
+
return await self.store.dedup_fuzzy(project=project)
|
|
362
|
+
merged = 0
|
|
363
|
+
archived = 0
|
|
364
|
+
for p in await self.store.list_memory_projects():
|
|
365
|
+
result = await self.store.dedup_fuzzy(project=p)
|
|
366
|
+
merged += result.get("merged", 0)
|
|
367
|
+
archived += result.get("archived", 0)
|
|
368
|
+
return {"merged": merged, "archived": archived}
|
|
369
|
+
|
|
339
370
|
# ── Agent mode ─────────────────────────────────────────────────────────
|
|
340
371
|
|
|
341
372
|
async def agent_mode_prompt(self, project: Optional[str] = None) -> str:
|
|
@@ -354,26 +385,53 @@ class ReflectionEngine:
|
|
|
354
385
|
lines = [
|
|
355
386
|
f"You are acting as a memory reflector for project '{project_label}'.",
|
|
356
387
|
"",
|
|
357
|
-
f"
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
388
|
+
f"PROCESS EVERY ONE of the {len(memories)} active memories below — not just",
|
|
389
|
+
"whatever topic the user happened to ask about in this session. The default",
|
|
390
|
+
"mode of `deja reflect --agent-mode` is a complete sweep: comb every row,",
|
|
391
|
+
"build a punch list of all obvious issues, then execute the punch list in",
|
|
392
|
+
"one pass.",
|
|
393
|
+
"",
|
|
394
|
+
"Scan for, in roughly this priority order:",
|
|
395
|
+
" - Exact-content duplicates (same words across multiple IDs — pick the",
|
|
396
|
+
" one with highest reuse_count, archive the rest)",
|
|
397
|
+
" - Scope-leaks (same content saved at scope:global AND scope:project:X —",
|
|
398
|
+
" keep the correctly-scoped one, archive the other)",
|
|
399
|
+
" - Junk / save errors (single-word patterns, truncated content, malformed",
|
|
400
|
+
" entries)",
|
|
401
|
+
" - Stale stub TODO lists ('next items: 1. X, 2. Y') — typically archive",
|
|
402
|
+
" - Untriggered gotchas tied to a specific command boundary (add --trigger)",
|
|
403
|
+
" - Misclassified entries (pattern that is really a gotcha; gotcha that is",
|
|
404
|
+
" really a preference)",
|
|
405
|
+
" - Semantic duplicates (same rule, different wording — fuzzy threshold",
|
|
406
|
+
" won't catch these, you must)",
|
|
407
|
+
"",
|
|
408
|
+
"Actions:",
|
|
409
|
+
" 1. Archive (stale, no longer relevant):",
|
|
410
|
+
" deja archive <id>",
|
|
411
|
+
" 2. Invalidate (actively contradicted by newer information):",
|
|
412
|
+
" deja invalidate <id>",
|
|
413
|
+
" 3. Consolidate (two or more memories express the same thing):",
|
|
414
|
+
" deja archive <id1>",
|
|
415
|
+
" deja archive <id2>",
|
|
365
416
|
f' deja save "<condensed content>" --type <type>{project_flag}',
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
417
|
+
" (If one existing version is clearly better, just archive the lesser",
|
|
418
|
+
" and keep the better as-is — no new save needed.)",
|
|
419
|
+
" 4. Trigger-tag (gotcha clearly tied to a specific command, no trigger yet):",
|
|
420
|
+
' deja update <id> --trigger "cmd1, cmd2"',
|
|
421
|
+
" Use this for gotchas about what to do right before/after a specific",
|
|
422
|
+
" command. Example triggers: 'kubectl apply', 'alembic upgrade',",
|
|
423
|
+
" 'terraform apply'. Only tag gotchas — not preferences, decisions,",
|
|
424
|
+
" or progress.",
|
|
425
|
+
" 5. Reclassify (saved as the wrong type — e.g. pattern that is really a",
|
|
426
|
+
" gotcha):",
|
|
427
|
+
" deja update <id> --type gotcha",
|
|
373
428
|
"",
|
|
374
|
-
"Be conservative
|
|
375
|
-
"
|
|
376
|
-
"
|
|
429
|
+
"Be conservative on each ACTION (skip a memory when intent is unclear),",
|
|
430
|
+
"but exhaustive on COVERAGE (visit every memory, not just user-flagged ones).",
|
|
431
|
+
"For trigger tagging: if a gotcha is already tagged (shown as [trigger:...]),",
|
|
432
|
+
"skip it.",
|
|
433
|
+
f"If after sweeping all {len(memories)} you find nothing actionable, that's",
|
|
434
|
+
"fine — say so.",
|
|
377
435
|
"",
|
|
378
436
|
"--- MEMORIES ---",
|
|
379
437
|
"",
|
|
@@ -395,7 +453,15 @@ class ReflectionEngine:
|
|
|
395
453
|
# ── Full pass + auto-trigger ────────────────────────────────────────────
|
|
396
454
|
|
|
397
455
|
async def run_full(self, project: Optional[str] = None) -> dict:
|
|
398
|
-
"""Full reflection pass: observer → reflector → decay → promote → archive.
|
|
456
|
+
"""Full reflection pass: observer → reflector → decay → promote → dedup_fuzzy → archive.
|
|
457
|
+
|
|
458
|
+
Bug R4 (2026-04-22): ``dedup_fuzzy`` slots between ``promote``
|
|
459
|
+
and ``archive`` — after promotion (so promoted patterns are in
|
|
460
|
+
the active set for cross-project dedup) and before archival (so
|
|
461
|
+
archive sweeps low-confidence rows AFTER near-duplicates have
|
|
462
|
+
been merged, giving the survivor the full merged confidence/
|
|
463
|
+
reuse bump).
|
|
464
|
+
"""
|
|
399
465
|
results: dict = {}
|
|
400
466
|
if self.adapter:
|
|
401
467
|
results["observer"] = await self.run_observer(project)
|
|
@@ -405,6 +471,7 @@ class ReflectionEngine:
|
|
|
405
471
|
results["reflector"] = 0
|
|
406
472
|
results["decay"] = await self.run_decay()
|
|
407
473
|
results["promote"] = await self.run_promote()
|
|
474
|
+
results["dedup_fuzzy"] = await self.run_dedup_fuzzy(project)
|
|
408
475
|
results["archive"] = await self.run_archive()
|
|
409
476
|
return results
|
|
410
477
|
|