@neikyun/ciel 6.8.0 → 6.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/.claude/hooks/memory-bootstrap.sh +287 -0
- package/assets/.claude/hooks/memory-engine.py +718 -0
- package/assets/.claude/hooks/session-start.sh +99 -0
- package/assets/.claude/hooks/user-prompt-submit.sh +112 -0
- package/assets/commands/ciel-audit.md +77 -17
- package/assets/commands/ciel-memory-bootstrap.md +160 -0
- package/assets/commands/ciel-status.md +1 -1
- package/assets/platforms/opencode/.opencode/agents/ciel-explorer.md +1 -1
- package/assets/platforms/opencode/.opencode/agents/ciel-improver.md +2 -2
- package/assets/platforms/opencode/.opencode/agents/ciel-researcher.md +1 -1
- package/assets/platforms/opencode/.opencode/commands/ciel-audit.md +40 -22
- package/package.json +1 -1
- package/scripts/postinstall.cjs +4 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Ciel cued-recall memory engine.
|
|
3
|
+
|
|
4
|
+
Subcommands:
|
|
5
|
+
query — given prompt + cwd, return top-K memories under token cap.
|
|
6
|
+
Updates trigger_count and last_triggered for matched memories.
|
|
7
|
+
Marks stale entries on the fly.
|
|
8
|
+
init — create empty .ciel/memory/{episodes,concepts,guards}/ + index.json
|
|
9
|
+
rebuild-index — scan all *.md frontmatter, regenerate index.json from source
|
|
10
|
+
|
|
11
|
+
Designed to be called from hooks/user-prompt-submit.sh and from
|
|
12
|
+
hooks/memory-bootstrap.sh. No external Python dependencies (stdlib only) —
|
|
13
|
+
must run wherever Ciel is installed without `pip install`.
|
|
14
|
+
|
|
15
|
+
See docs/adrs/0001-cued-recall-memory.md for design rationale.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import sys
|
|
19
|
+
import os
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
import fnmatch
|
|
23
|
+
import argparse
|
|
24
|
+
import secrets
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
# ─── Constants ──────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
TOKEN_CAPS = {
|
|
31
|
+
"trivial": 1000,
|
|
32
|
+
"standard": 3000,
|
|
33
|
+
"critical": 5000,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Map file extensions to language tags. Used for language scoping.
|
|
37
|
+
LANG_BY_EXT = {
|
|
38
|
+
".ts": "typescript", ".tsx": "typescript",
|
|
39
|
+
".js": "javascript", ".jsx": "javascript", ".mjs": "javascript",
|
|
40
|
+
".py": "python",
|
|
41
|
+
".kt": "kotlin", ".kts": "kotlin",
|
|
42
|
+
".go": "go",
|
|
43
|
+
".rs": "rust",
|
|
44
|
+
".sql": "sql",
|
|
45
|
+
".sh": "bash", ".bash": "bash",
|
|
46
|
+
".rb": "ruby",
|
|
47
|
+
".java": "java",
|
|
48
|
+
".cs": "csharp",
|
|
49
|
+
".php": "php",
|
|
50
|
+
".swift": "swift",
|
|
51
|
+
".c": "c", ".cpp": "cpp", ".cc": "cpp", ".h": "c", ".hpp": "cpp",
|
|
52
|
+
".md": "markdown",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Common intent keywords. Extensible; first match wins per kw.
|
|
56
|
+
INTENT_KEYWORDS = [
|
|
57
|
+
("migration", "schema-change"),
|
|
58
|
+
("schema", "schema-change"),
|
|
59
|
+
("alter table", "schema-change"),
|
|
60
|
+
("route", "new-route"),
|
|
61
|
+
("endpoint", "new-route"),
|
|
62
|
+
("controller", "new-route"),
|
|
63
|
+
("component", "new-component"),
|
|
64
|
+
("test", "testing"),
|
|
65
|
+
("vitest", "testing"),
|
|
66
|
+
("jest", "testing"),
|
|
67
|
+
("pytest", "testing"),
|
|
68
|
+
("deploy", "deploy"),
|
|
69
|
+
("release", "deploy"),
|
|
70
|
+
("ci/cd", "deploy"),
|
|
71
|
+
("auth", "auth"),
|
|
72
|
+
("login", "auth"),
|
|
73
|
+
("oauth", "auth"),
|
|
74
|
+
("jwt", "auth"),
|
|
75
|
+
("session", "auth"),
|
|
76
|
+
("payment", "payment"),
|
|
77
|
+
("stripe", "payment"),
|
|
78
|
+
("webhook", "webhook"),
|
|
79
|
+
("hook", "hook"),
|
|
80
|
+
("refactor", "refactor"),
|
|
81
|
+
("rename", "rename"),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# ─── Cue extraction ─────────────────────────────────────────────────────────
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def estimate_tokens(text: str) -> int:
|
|
88
|
+
"""Rough token estimate: 1 token ≈ 4 chars (English/code)."""
|
|
89
|
+
return max(1, len(text) // 4)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# URL/domain patterns that should never be treated as file paths. Matches
|
|
93
|
+
# example.com/foo, github.com/x, api.service.io/...
|
|
94
|
+
_URL_TLD_RE = re.compile(r'^[\w-]+\.(com|org|io|dev|sh|net|co|me|app|cloud|ai|gg|run|site|xyz|page|blog)\b', re.I)
|
|
95
|
+
|
|
96
|
+
# Built-in/standard-library names that look like PascalCase but are too generic
|
|
97
|
+
# to be useful symbol cues. Mentioning "Promise" in prose shouldn't fire any memory.
|
|
98
|
+
_SYMBOL_STOPLIST = frozenset({
|
|
99
|
+
'Promise', 'Array', 'String', 'Number', 'Object', 'Map', 'Set', 'Date',
|
|
100
|
+
'Error', 'Boolean', 'JSON', 'Math', 'RegExp', 'Symbol', 'Function',
|
|
101
|
+
'List', 'Dict', 'Tuple', 'None', 'True', 'False', 'Any', # Python
|
|
102
|
+
'When', 'Then', 'After', 'Before', 'While', 'If', 'Else', 'Otherwise',
|
|
103
|
+
'TODO', 'FIXME', 'NOTE', 'XXX', 'HACK', 'NB',
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def extract_path_cues(prompt: str):
|
|
108
|
+
"""Find file/path-like tokens in the prompt.
|
|
109
|
+
|
|
110
|
+
Filters out URLs, version ratios (1/2), and bare domains. False positives
|
|
111
|
+
on a path-shaped token still get filtered at scoring time when fnmatch
|
|
112
|
+
finds no match against any memory's path_patterns.
|
|
113
|
+
"""
|
|
114
|
+
raw = re.findall(r'[\w./*\-]+/[\w./*\-]+|\b[\w-]+\.[a-z]{1,5}\b', prompt)
|
|
115
|
+
cleaned = []
|
|
116
|
+
for p in raw:
|
|
117
|
+
# Strip only TRAILING punctuation (paths can legitimately start with
|
|
118
|
+
# `.` — `.claude/settings.json`, `.gitignore`, `.env`). Leading-dot
|
|
119
|
+
# stripping was a v1 bug that made dotfile paths invisible.
|
|
120
|
+
p = p.rstrip('.,)(\'"`')
|
|
121
|
+
# Strip a few common leading punctuation chars but NEVER the dot.
|
|
122
|
+
p = p.lstrip(',)(\'"`')
|
|
123
|
+
if not p or len(p) <= 2:
|
|
124
|
+
continue
|
|
125
|
+
# Drop URLs / domains (github.com/foo, example.com/bar).
|
|
126
|
+
if _URL_TLD_RE.match(p):
|
|
127
|
+
continue
|
|
128
|
+
# Drop protocol-prefixed cruft from raw URL captures.
|
|
129
|
+
if p.startswith('//') or p.startswith('http'):
|
|
130
|
+
continue
|
|
131
|
+
# Drop pure numeric ratios like "1/2", "2026-05-08".
|
|
132
|
+
if re.match(r'^[\d./-]+$', p):
|
|
133
|
+
continue
|
|
134
|
+
cleaned.append(p)
|
|
135
|
+
return cleaned
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def extract_symbol_cues(prompt: str):
|
|
139
|
+
"""Extract camelCase, PascalCase, and snake_case identifiers.
|
|
140
|
+
|
|
141
|
+
Filters out the symbol stoplist (built-ins like Promise/Array/String,
|
|
142
|
+
English sentence-starts like When/Then/After) to avoid score inflation
|
|
143
|
+
from prose. PascalCase requires ≥2 capital transitions to skip plain
|
|
144
|
+
capitalized words.
|
|
145
|
+
"""
|
|
146
|
+
out = set()
|
|
147
|
+
out.update(re.findall(r'\b[a-z]+(?:[A-Z][a-zA-Z0-9]+)+\b', prompt)) # camelCase
|
|
148
|
+
# PascalCase: require at least one inner camel boundary (≥2 capitals).
|
|
149
|
+
# `+` not `*` rejects single-capital words like "When" / "Then".
|
|
150
|
+
out.update(re.findall(r'\b[A-Z][a-z0-9]+(?:[A-Z][a-zA-Z0-9]+)+\b', prompt)) # PascalCase
|
|
151
|
+
out.update(re.findall(r'\b[a-z]+(?:_[a-z0-9]+){2,}\b', prompt)) # snake_case (≥2 underscores)
|
|
152
|
+
return [s for s in out if s not in _SYMBOL_STOPLIST]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def extract_intent_cues(prompt: str):
|
|
156
|
+
"""Detect intent keywords in the prompt. Lowercase substring match."""
|
|
157
|
+
p = prompt.lower()
|
|
158
|
+
intents = set()
|
|
159
|
+
for kw, label in INTENT_KEYWORDS:
|
|
160
|
+
if kw in p:
|
|
161
|
+
intents.add(label)
|
|
162
|
+
return list(intents)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def extract_language_cues(prompt: str):
|
|
166
|
+
"""Infer programming language from file extensions mentioned in prompt."""
|
|
167
|
+
langs = set()
|
|
168
|
+
for ext, lang in LANG_BY_EXT.items():
|
|
169
|
+
if re.search(rf'\b\w+{re.escape(ext)}\b', prompt):
|
|
170
|
+
langs.add(lang)
|
|
171
|
+
return list(langs)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# ─── Matching & scoring ─────────────────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# Cache for compiled glob-to-regex patterns. Patterns are read from the corpus
|
|
178
|
+
# and rarely change between calls within a single hook invocation.
|
|
179
|
+
_PATTERN_CACHE = {}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _glob_to_regex(pattern: str):
|
|
183
|
+
"""Translate a gitignore-style glob (with `**`) to a Python regex.
|
|
184
|
+
|
|
185
|
+
`**` matches any sequence including slashes (recursive).
|
|
186
|
+
`*` matches any sequence excluding slashes (single segment).
|
|
187
|
+
`?` matches a single non-slash char.
|
|
188
|
+
Other characters are escaped literally.
|
|
189
|
+
|
|
190
|
+
fnmatch.fnmatch's `*` greedily eats slashes too, which silently produces
|
|
191
|
+
false positives on patterns like `src/*.ts`. This translator is stricter
|
|
192
|
+
and matches what users coming from gitignore/tsconfig expect.
|
|
193
|
+
"""
|
|
194
|
+
if pattern in _PATTERN_CACHE:
|
|
195
|
+
return _PATTERN_CACHE[pattern]
|
|
196
|
+
out = []
|
|
197
|
+
i = 0
|
|
198
|
+
n = len(pattern)
|
|
199
|
+
while i < n:
|
|
200
|
+
c = pattern[i]
|
|
201
|
+
if c == '*':
|
|
202
|
+
if i + 1 < n and pattern[i + 1] == '*':
|
|
203
|
+
# `**` — match any sequence (including /)
|
|
204
|
+
out.append('.*')
|
|
205
|
+
i += 2
|
|
206
|
+
# Eat trailing `/` after `**` for clean alignment
|
|
207
|
+
if i < n and pattern[i] == '/':
|
|
208
|
+
i += 1
|
|
209
|
+
else:
|
|
210
|
+
# `*` — match anything except /
|
|
211
|
+
out.append('[^/]*')
|
|
212
|
+
i += 1
|
|
213
|
+
elif c == '?':
|
|
214
|
+
out.append('[^/]')
|
|
215
|
+
i += 1
|
|
216
|
+
elif c in '.+()^$|{}\\[]':
|
|
217
|
+
out.append('\\' + c)
|
|
218
|
+
i += 1
|
|
219
|
+
else:
|
|
220
|
+
out.append(c)
|
|
221
|
+
i += 1
|
|
222
|
+
compiled = re.compile('^' + ''.join(out) + '$')
|
|
223
|
+
_PATTERN_CACHE[pattern] = compiled
|
|
224
|
+
return compiled
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def match_path_pattern(pattern: str, paths) -> bool:
|
|
228
|
+
"""Match a glob pattern against any candidate path. Supports `**`."""
|
|
229
|
+
rx = _glob_to_regex(pattern)
|
|
230
|
+
for path in paths:
|
|
231
|
+
if rx.match(path):
|
|
232
|
+
return True
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def score_memory(mem, paths, symbols, intents, langs) -> int:
|
|
237
|
+
"""Score a memory's relevance. 0 = exclude. Positive = include, higher first."""
|
|
238
|
+
# Hard language gate: if memory is language-specific AND prompt has language
|
|
239
|
+
# cues AND no overlap → exclude. Avoids Kotlin memories firing on TS edits.
|
|
240
|
+
mem_langs = mem.get('languages') or []
|
|
241
|
+
if mem_langs and langs and not (set(mem_langs) & set(langs)):
|
|
242
|
+
return 0
|
|
243
|
+
|
|
244
|
+
score = 0
|
|
245
|
+
for pattern in mem.get('path_patterns') or []:
|
|
246
|
+
if match_path_pattern(pattern, paths):
|
|
247
|
+
score += 10
|
|
248
|
+
for sym in mem.get('symbols') or []:
|
|
249
|
+
if sym in symbols:
|
|
250
|
+
score += 8
|
|
251
|
+
for intent in mem.get('intents') or []:
|
|
252
|
+
if intent in intents:
|
|
253
|
+
score += 5
|
|
254
|
+
|
|
255
|
+
# No cue match at all → don't include (cued recall, not free recall)
|
|
256
|
+
if score == 0:
|
|
257
|
+
return 0
|
|
258
|
+
|
|
259
|
+
# Boost for proven utility (frequent triggers)
|
|
260
|
+
score += min(mem.get('trigger_count') or 0, 10)
|
|
261
|
+
|
|
262
|
+
# Recency factor — clamp negative ages (clock skew, future-dated entries)
|
|
263
|
+
last = mem.get('last_triggered') or mem.get('captured_at')
|
|
264
|
+
if last:
|
|
265
|
+
try:
|
|
266
|
+
then = datetime.fromisoformat(last.replace('Z', '+00:00'))
|
|
267
|
+
age_days = max(0, (datetime.now(timezone.utc) - then).days)
|
|
268
|
+
if age_days < 7:
|
|
269
|
+
score += 5
|
|
270
|
+
elif age_days < 30:
|
|
271
|
+
score += 2
|
|
272
|
+
elif age_days > 180:
|
|
273
|
+
score -= 3
|
|
274
|
+
except (ValueError, TypeError):
|
|
275
|
+
pass
|
|
276
|
+
|
|
277
|
+
return score
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# ─── Decay ──────────────────────────────────────────────────────────────────
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def mark_stale_inplace(memories: dict, now: datetime) -> int:
|
|
284
|
+
"""Flag stale=True for memories past their stale_after_days threshold.
|
|
285
|
+
|
|
286
|
+
Returns count newly marked. Active memories that have never been triggered
|
|
287
|
+
decay from captured_at; triggered memories from last_triggered.
|
|
288
|
+
|
|
289
|
+
Future-dated anchors (clock skew, manual edit) are clamped to now → those
|
|
290
|
+
memories are immune to staling, which matches user expectation that a
|
|
291
|
+
just-captured memory shouldn't decay regardless of timestamp source.
|
|
292
|
+
"""
|
|
293
|
+
newly_stale = 0
|
|
294
|
+
for mid, m in memories.items():
|
|
295
|
+
if m.get('stale'):
|
|
296
|
+
continue
|
|
297
|
+
anchor = m.get('last_triggered') or m.get('captured_at')
|
|
298
|
+
threshold = m.get('stale_after_days', 90)
|
|
299
|
+
if not anchor:
|
|
300
|
+
continue
|
|
301
|
+
try:
|
|
302
|
+
then = datetime.fromisoformat(anchor.replace('Z', '+00:00'))
|
|
303
|
+
age_days = max(0, (now - then).days)
|
|
304
|
+
if age_days > threshold:
|
|
305
|
+
m['stale'] = True
|
|
306
|
+
newly_stale += 1
|
|
307
|
+
except (ValueError, TypeError):
|
|
308
|
+
pass
|
|
309
|
+
return newly_stale
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ─── Subcommands ────────────────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def resolve_cwd(arg_cwd):
|
|
316
|
+
return Path(arg_cwd or os.environ.get('CLAUDE_PROJECT_DIR') or os.getcwd())
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def atomic_write_json(path: Path, data) -> None:
|
|
320
|
+
"""Write JSON atomically via per-process tmp file + rename.
|
|
321
|
+
|
|
322
|
+
Per-process unique tmp prevents two concurrent writers from corrupting
|
|
323
|
+
each other's tmp during the write phase. Rename is atomic on the same
|
|
324
|
+
filesystem.
|
|
325
|
+
|
|
326
|
+
NOTE: this prevents *partial writes*, not *lost updates*. If session A
|
|
327
|
+
and session B both read index.json at the same time, increment, and
|
|
328
|
+
write, the later writer wins. For lock-protected read-modify-write,
|
|
329
|
+
use atomic_update_index() which holds an fcntl advisory lock for the
|
|
330
|
+
full read-update-write cycle.
|
|
331
|
+
"""
|
|
332
|
+
tmp = path.with_suffix(f'.{os.getpid()}.{secrets.token_hex(2)}.tmp')
|
|
333
|
+
with open(tmp, 'w', encoding='utf-8') as f:
|
|
334
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
335
|
+
tmp.replace(path)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def atomic_update_index(path: Path, mutator):
|
|
339
|
+
"""Read-modify-write index.json under an fcntl advisory lock.
|
|
340
|
+
|
|
341
|
+
`mutator` is a callable taking the parsed dict and mutating it in place
|
|
342
|
+
(or returning a new dict). Returns the final dict written to disk.
|
|
343
|
+
|
|
344
|
+
This serializes concurrent sessions: each waits for the previous to
|
|
345
|
+
finish its read-modify-write before proceeding. Trigger increments
|
|
346
|
+
therefore compose correctly (8 → 9 → 10) instead of last-writer-wins.
|
|
347
|
+
|
|
348
|
+
On platforms without fcntl (Windows), falls back to plain atomic write
|
|
349
|
+
without the lock — accept lost-update risk on those platforms.
|
|
350
|
+
"""
|
|
351
|
+
try:
|
|
352
|
+
import fcntl
|
|
353
|
+
except ImportError:
|
|
354
|
+
# Windows fallback — no advisory lock available in stdlib
|
|
355
|
+
if path.exists():
|
|
356
|
+
with open(path, encoding='utf-8') as f:
|
|
357
|
+
data = json.load(f)
|
|
358
|
+
else:
|
|
359
|
+
data = {}
|
|
360
|
+
result = mutator(data)
|
|
361
|
+
final = result if result is not None else data
|
|
362
|
+
atomic_write_json(path, final)
|
|
363
|
+
return final
|
|
364
|
+
|
|
365
|
+
# POSIX: hold an exclusive lock on the index file for the read-write cycle.
|
|
366
|
+
# Open in r+ mode so we can read and write through the same fd.
|
|
367
|
+
if not path.exists():
|
|
368
|
+
atomic_write_json(path, {})
|
|
369
|
+
with open(path, 'r+', encoding='utf-8') as f:
|
|
370
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
371
|
+
try:
|
|
372
|
+
f.seek(0)
|
|
373
|
+
try:
|
|
374
|
+
data = json.load(f)
|
|
375
|
+
except json.JSONDecodeError:
|
|
376
|
+
data = {}
|
|
377
|
+
result = mutator(data)
|
|
378
|
+
final = result if result is not None else data
|
|
379
|
+
f.seek(0)
|
|
380
|
+
f.truncate()
|
|
381
|
+
json.dump(final, f, indent=2, ensure_ascii=False)
|
|
382
|
+
f.flush()
|
|
383
|
+
os.fsync(f.fileno())
|
|
384
|
+
finally:
|
|
385
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
386
|
+
return final
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def cmd_query(args):
|
|
390
|
+
cwd = resolve_cwd(args.cwd)
|
|
391
|
+
index_file = cwd / '.ciel' / 'memory' / 'index.json'
|
|
392
|
+
if not index_file.exists():
|
|
393
|
+
return # Silent: no memory corpus yet
|
|
394
|
+
|
|
395
|
+
prompt = args.prompt or ''
|
|
396
|
+
cap = TOKEN_CAPS.get((args.depth or 'standard').lower(), 3000)
|
|
397
|
+
|
|
398
|
+
paths = extract_path_cues(prompt)
|
|
399
|
+
symbols = extract_symbol_cues(prompt)
|
|
400
|
+
intents = extract_intent_cues(prompt)
|
|
401
|
+
langs = extract_language_cues(prompt)
|
|
402
|
+
now = datetime.now(timezone.utc)
|
|
403
|
+
iso_now = now.isoformat().replace('+00:00', 'Z')
|
|
404
|
+
|
|
405
|
+
# The selection is computed inside the mutator so it sees the
|
|
406
|
+
# locked-and-fresh state, then the same mutator persists triggers.
|
|
407
|
+
output_lines = []
|
|
408
|
+
output_used = [0]
|
|
409
|
+
|
|
410
|
+
def mutator(idx):
|
|
411
|
+
mems = idx.get('memories', {})
|
|
412
|
+
if not mems:
|
|
413
|
+
return idx
|
|
414
|
+
|
|
415
|
+
mark_stale_inplace(mems, now)
|
|
416
|
+
|
|
417
|
+
scored = []
|
|
418
|
+
for mid, m in mems.items():
|
|
419
|
+
if m.get('stale'):
|
|
420
|
+
continue
|
|
421
|
+
s = score_memory(m, paths, symbols, intents, langs)
|
|
422
|
+
if s > 0:
|
|
423
|
+
scored.append((s, mid, m))
|
|
424
|
+
|
|
425
|
+
if not scored:
|
|
426
|
+
return idx
|
|
427
|
+
|
|
428
|
+
scored.sort(key=lambda x: -x[0])
|
|
429
|
+
|
|
430
|
+
selected = []
|
|
431
|
+
used = estimate_tokens("Cued-recall memory matches:\n")
|
|
432
|
+
overhead = estimate_tokens(
|
|
433
|
+
"\nRead full content from .ciel/memory/{episodes,concepts,guards}/ when relevant."
|
|
434
|
+
)
|
|
435
|
+
budget = cap - overhead
|
|
436
|
+
for _, mid, m in scored:
|
|
437
|
+
line = f" [{mid}, fired {m.get('trigger_count', 0)}×] {m.get('title', '?')}"
|
|
438
|
+
cost = estimate_tokens(line) + 1
|
|
439
|
+
if used + cost > budget:
|
|
440
|
+
break
|
|
441
|
+
used += cost
|
|
442
|
+
selected.append((mid, m, line))
|
|
443
|
+
|
|
444
|
+
if not selected:
|
|
445
|
+
return idx
|
|
446
|
+
|
|
447
|
+
# Update triggers under the lock — composes correctly across sessions
|
|
448
|
+
for mid, m, _ in selected:
|
|
449
|
+
m['trigger_count'] = (m.get('trigger_count') or 0) + 1
|
|
450
|
+
m['last_triggered'] = iso_now
|
|
451
|
+
|
|
452
|
+
for _, _, line in selected:
|
|
453
|
+
output_lines.append(line)
|
|
454
|
+
output_used[0] = used
|
|
455
|
+
return idx
|
|
456
|
+
|
|
457
|
+
atomic_update_index(index_file, mutator)
|
|
458
|
+
|
|
459
|
+
if not output_lines:
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
print("Cued-recall memory matches:")
|
|
463
|
+
for line in output_lines:
|
|
464
|
+
print(line)
|
|
465
|
+
print(f"Read full content from .ciel/memory/{{episodes,concepts,guards}}/ when relevant. ({output_used[0]}/{cap} tokens)")
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def cmd_init(args):
|
|
469
|
+
cwd = resolve_cwd(args.cwd)
|
|
470
|
+
base = cwd / '.ciel' / 'memory'
|
|
471
|
+
for sub in ('episodes', 'concepts', 'guards'):
|
|
472
|
+
(base / sub).mkdir(parents=True, exist_ok=True)
|
|
473
|
+
index_file = base / 'index.json'
|
|
474
|
+
if not index_file.exists():
|
|
475
|
+
atomic_write_json(index_file, {
|
|
476
|
+
"version": 2,
|
|
477
|
+
"memories": {},
|
|
478
|
+
"by_path": {},
|
|
479
|
+
"by_symbol": {},
|
|
480
|
+
"by_intent": {},
|
|
481
|
+
"by_language": {},
|
|
482
|
+
})
|
|
483
|
+
print(f"Initialized memory corpus at {base}/")
|
|
484
|
+
else:
|
|
485
|
+
print(f"Memory corpus already exists at {base}/")
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# Fields whose values must remain string regardless of how they look.
|
|
489
|
+
# Prevents int-coercion of numeric-looking ids (e.g. "12345") which would
|
|
490
|
+
# break the index keying and JSON round-trip.
|
|
491
|
+
_STRING_FIELDS = frozenset({'id', 'title', 'last_triggered', 'captured_at', 'file', 'source', 'captured_from'})
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def parse_yaml_frontmatter(text: str) -> dict:
|
|
495
|
+
"""Minimal YAML parser for the frontmatter dialect we use.
|
|
496
|
+
|
|
497
|
+
Supports: scalar key:value, inline arrays [a, b], block lists with
|
|
498
|
+
' - item' continuations, booleans (true/false), null. No anchors,
|
|
499
|
+
no nested maps. Sufficient for our frontmatter schema.
|
|
500
|
+
|
|
501
|
+
String-typed fields (id, title, timestamps) are NEVER int-coerced, even
|
|
502
|
+
if they look numeric. See _STRING_FIELDS.
|
|
503
|
+
"""
|
|
504
|
+
out = {}
|
|
505
|
+
current_list_key = None
|
|
506
|
+
for raw_line in text.split('\n'):
|
|
507
|
+
if not raw_line.strip() or raw_line.strip().startswith('#'):
|
|
508
|
+
continue
|
|
509
|
+
if raw_line.startswith(' - ') or raw_line.startswith('- '):
|
|
510
|
+
if current_list_key:
|
|
511
|
+
val = raw_line.lstrip(' -').strip().strip('"\'')
|
|
512
|
+
out.setdefault(current_list_key, []).append(val)
|
|
513
|
+
continue
|
|
514
|
+
if ':' in raw_line:
|
|
515
|
+
key, _, val = raw_line.partition(':')
|
|
516
|
+
key = key.strip()
|
|
517
|
+
val = val.strip()
|
|
518
|
+
current_list_key = None
|
|
519
|
+
if not val:
|
|
520
|
+
current_list_key = key
|
|
521
|
+
out[key] = []
|
|
522
|
+
elif val.startswith('[') and val.endswith(']'):
|
|
523
|
+
inner = val[1:-1].strip()
|
|
524
|
+
items = [x.strip().strip('"\'') for x in inner.split(',') if x.strip()]
|
|
525
|
+
out[key] = items
|
|
526
|
+
elif val.lower() == 'null' or val == '~':
|
|
527
|
+
out[key] = None
|
|
528
|
+
elif key in _STRING_FIELDS:
|
|
529
|
+
# Hard-typed as string regardless of numeric appearance.
|
|
530
|
+
# Null check above takes precedence so explicit nulls survive.
|
|
531
|
+
out[key] = val.strip('"\'')
|
|
532
|
+
elif val.lower() == 'true':
|
|
533
|
+
out[key] = True
|
|
534
|
+
elif val.lower() == 'false':
|
|
535
|
+
out[key] = False
|
|
536
|
+
else:
|
|
537
|
+
try:
|
|
538
|
+
out[key] = int(val)
|
|
539
|
+
except ValueError:
|
|
540
|
+
out[key] = val.strip('"\'')
|
|
541
|
+
return out
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def cmd_rebuild_index(args):
|
|
545
|
+
cwd = resolve_cwd(args.cwd)
|
|
546
|
+
base = cwd / '.ciel' / 'memory'
|
|
547
|
+
if not base.exists():
|
|
548
|
+
print(f"No memory directory at {base}", file=sys.stderr)
|
|
549
|
+
sys.exit(1)
|
|
550
|
+
|
|
551
|
+
idx = {
|
|
552
|
+
"version": 2,
|
|
553
|
+
"memories": {},
|
|
554
|
+
"by_path": {},
|
|
555
|
+
"by_symbol": {},
|
|
556
|
+
"by_intent": {},
|
|
557
|
+
"by_language": {},
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
parsed = 0
|
|
561
|
+
for mdfile in base.rglob('*.md'):
|
|
562
|
+
if mdfile.name.lower() in ('readme.md', 'review-queue.md'):
|
|
563
|
+
continue
|
|
564
|
+
try:
|
|
565
|
+
content = mdfile.read_text(encoding='utf-8')
|
|
566
|
+
m = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
567
|
+
if not m:
|
|
568
|
+
continue
|
|
569
|
+
fm = parse_yaml_frontmatter(m.group(1))
|
|
570
|
+
mid = fm.get('id')
|
|
571
|
+
if not mid:
|
|
572
|
+
continue
|
|
573
|
+
fm['file'] = str(mdfile.relative_to(base))
|
|
574
|
+
idx['memories'][mid] = fm
|
|
575
|
+
for path in fm.get('path_patterns') or []:
|
|
576
|
+
idx['by_path'].setdefault(path, []).append(mid)
|
|
577
|
+
for sym in fm.get('symbols') or []:
|
|
578
|
+
idx['by_symbol'].setdefault(sym, []).append(mid)
|
|
579
|
+
for intent in fm.get('intents') or []:
|
|
580
|
+
idx['by_intent'].setdefault(intent, []).append(mid)
|
|
581
|
+
for lang in fm.get('languages') or []:
|
|
582
|
+
idx['by_language'].setdefault(lang, []).append(mid)
|
|
583
|
+
parsed += 1
|
|
584
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
585
|
+
print(f"Warning: skipping {mdfile}: {e}", file=sys.stderr)
|
|
586
|
+
|
|
587
|
+
out = base / 'index.json'
|
|
588
|
+
atomic_write_json(out, idx)
|
|
589
|
+
print(f"Rebuilt index: {parsed} memories")
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def cmd_new_id(args):
|
|
593
|
+
"""Emit a fresh, collision-free memory id.
|
|
594
|
+
|
|
595
|
+
Format: mem_<unix_seconds>_<6 hex chars>. Two parallel sessions calling
|
|
596
|
+
this within the same second still get distinct ids (~16M space per sec).
|
|
597
|
+
"""
|
|
598
|
+
ts = int(datetime.now(timezone.utc).timestamp())
|
|
599
|
+
suffix = secrets.token_hex(3)
|
|
600
|
+
print(f"mem_{ts}_{suffix}")
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def cmd_capture(args):
|
|
604
|
+
cwd = resolve_cwd(args.cwd)
|
|
605
|
+
base = cwd / '.ciel' / 'memory'
|
|
606
|
+
mem_type = args.type or 'episode'
|
|
607
|
+
target_dir = base / (mem_type + 's' if not mem_type.endswith('s') else mem_type)
|
|
608
|
+
|
|
609
|
+
if not target_dir.exists():
|
|
610
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
611
|
+
|
|
612
|
+
ts = int(datetime.now(timezone.utc).timestamp())
|
|
613
|
+
suffix = secrets.token_hex(3)
|
|
614
|
+
mid = f"mem_{ts}_{suffix}"
|
|
615
|
+
|
|
616
|
+
now = datetime.now(timezone.utc)
|
|
617
|
+
iso_now = now.isoformat().replace('+00:00', 'Z')
|
|
618
|
+
date_str = now.strftime('%Y-%m-%d')
|
|
619
|
+
|
|
620
|
+
slug = re.sub(r'[^a-z0-9]+', '-', args.title.lower()).strip('-')[:60]
|
|
621
|
+
filename = f"{date_str}-{slug}.md"
|
|
622
|
+
|
|
623
|
+
languages = [l.strip() for l in (args.languages or '').split(',') if l.strip()]
|
|
624
|
+
path_patterns = [p.strip() for p in (args.path_patterns or '').split(',') if p.strip()]
|
|
625
|
+
symbols = [s.strip() for s in (args.symbols or '').split(',') if s.strip()]
|
|
626
|
+
intents = [i.strip() for i in (args.intents or '').split(',') if i.strip()]
|
|
627
|
+
|
|
628
|
+
content = args.content or args.title
|
|
629
|
+
|
|
630
|
+
frontmatter = {
|
|
631
|
+
"id": mid,
|
|
632
|
+
"title": args.title,
|
|
633
|
+
"languages": languages,
|
|
634
|
+
"path_patterns": path_patterns,
|
|
635
|
+
"symbols": symbols,
|
|
636
|
+
"intents": intents,
|
|
637
|
+
"captured_at": iso_now,
|
|
638
|
+
"captured_from": "runtime",
|
|
639
|
+
"source": args.source or 'manual capture',
|
|
640
|
+
"trigger_count": 0,
|
|
641
|
+
"last_triggered": None,
|
|
642
|
+
"stale_after_days": "90",
|
|
643
|
+
"stale": False,
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
lines = ["---"]
|
|
647
|
+
for key, val in frontmatter.items():
|
|
648
|
+
if isinstance(val, list):
|
|
649
|
+
lines.append(f"{key}:")
|
|
650
|
+
if val:
|
|
651
|
+
for item in val:
|
|
652
|
+
lines.append(f" - \"{item}\"")
|
|
653
|
+
else:
|
|
654
|
+
lines.append(" []")
|
|
655
|
+
elif val is None:
|
|
656
|
+
lines.append(f"{key}: null")
|
|
657
|
+
elif isinstance(val, bool):
|
|
658
|
+
lines.append(f"{key}: {'true' if val else 'false'}")
|
|
659
|
+
else:
|
|
660
|
+
lines.append(f"{key}: {val}")
|
|
661
|
+
lines.append("---")
|
|
662
|
+
lines.append("")
|
|
663
|
+
lines.append(f"# {args.title}")
|
|
664
|
+
lines.append("")
|
|
665
|
+
lines.append(content)
|
|
666
|
+
lines.append("")
|
|
667
|
+
|
|
668
|
+
episode_text = '\n'.join(lines)
|
|
669
|
+
filepath = target_dir / filename
|
|
670
|
+
filepath.write_text(episode_text, encoding='utf-8')
|
|
671
|
+
print(f"Created: {filepath.relative_to(cwd)}")
|
|
672
|
+
|
|
673
|
+
cmd_rebuild_index(args)
|
|
674
|
+
print(f"Index rebuilt with memory: {mid}")
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
# ─── CLI ────────────────────────────────────────────────────────────────────
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def main():
|
|
681
|
+
p = argparse.ArgumentParser(description='Ciel cued-recall memory engine')
|
|
682
|
+
sub = p.add_subparsers(dest='cmd', required=True)
|
|
683
|
+
|
|
684
|
+
qp = sub.add_parser('query', help='Match memories against prompt cues; update triggers')
|
|
685
|
+
qp.add_argument('--prompt', default='')
|
|
686
|
+
qp.add_argument('--cwd', default=None)
|
|
687
|
+
qp.add_argument('--depth', default='standard', choices=['trivial', 'standard', 'critical', 'Trivial', 'Standard', 'Critical'])
|
|
688
|
+
qp.set_defaults(func=cmd_query)
|
|
689
|
+
|
|
690
|
+
ip = sub.add_parser('init', help='Initialize empty .ciel/memory/ structure')
|
|
691
|
+
ip.add_argument('--cwd', default=None)
|
|
692
|
+
ip.set_defaults(func=cmd_init)
|
|
693
|
+
|
|
694
|
+
rp = sub.add_parser('rebuild-index', help='Scan *.md frontmatter, regenerate index.json')
|
|
695
|
+
rp.add_argument('--cwd', default=None)
|
|
696
|
+
rp.set_defaults(func=cmd_rebuild_index)
|
|
697
|
+
|
|
698
|
+
np = sub.add_parser('new-id', help='Emit a collision-free memory id')
|
|
699
|
+
np.set_defaults(func=cmd_new_id)
|
|
700
|
+
|
|
701
|
+
cp = sub.add_parser('capture', help='Create episode file and rebuild index in one call')
|
|
702
|
+
cp.add_argument('--title', required=True, help='Memory title')
|
|
703
|
+
cp.add_argument('--source', default=None, help='Source of the capture (e.g. hook name, PR URL)')
|
|
704
|
+
cp.add_argument('--intents', default=None, help='Comma-separated intent tags')
|
|
705
|
+
cp.add_argument('--path-patterns', default=None, help='Comma-separated glob patterns')
|
|
706
|
+
cp.add_argument('--symbols', default=None, help='Comma-separated symbol names')
|
|
707
|
+
cp.add_argument('--languages', default=None, help='Comma-separated language tags')
|
|
708
|
+
cp.add_argument('--content', default=None, help='Memory body text (defaults to title)')
|
|
709
|
+
cp.add_argument('--type', default='episode', choices=['episode', 'concept', 'guard'], help='Memory type')
|
|
710
|
+
cp.add_argument('--cwd', default=None)
|
|
711
|
+
cp.set_defaults(func=cmd_capture)
|
|
712
|
+
|
|
713
|
+
args = p.parse_args()
|
|
714
|
+
args.func(args)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
if __name__ == '__main__':
|
|
718
|
+
main()
|