@neikyun/ciel 6.11.0 → 6.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/.claude/hooks/memory-engine.py +29 -4
- package/assets/.claude/settings.json +8 -8
- package/assets/commands/ciel-create-skill.md +2 -2
- package/assets/commands/ciel-status.md +1 -1
- package/assets/platforms/opencode/.opencode/agents/ciel-improver.md +2 -2
- package/assets/platforms/opencode/.opencode/commands/ciel-create-skill.md +2 -2
- package/assets/platforms/opencode/.opencode/commands/ciel-memory-bootstrap.md +195 -0
- package/assets/skills/workflow/adr-auto/SKILL.md +88 -0
- package/assets/skills/workflow/ai-failure-modes-detector/SKILL.md +180 -0
- package/assets/skills/workflow/ask-window/SKILL.md +119 -0
- package/assets/skills/workflow/avec-quoi-versioner/SKILL.md +111 -0
- package/assets/skills/workflow/ci-watcher/SKILL.md +194 -0
- package/assets/skills/workflow/critiquer-auditor/SKILL.md +135 -0
- package/assets/skills/workflow/critiquer-auditor/reference.md +134 -0
- package/assets/skills/workflow/debug-reasoning-rca/SKILL.md +174 -0
- package/assets/skills/workflow/depth-classifier/SKILL.md +118 -0
- package/assets/skills/workflow/diverge/SKILL.md +91 -0
- package/assets/skills/workflow/doc-validator-official/SKILL.md +196 -0
- package/assets/skills/workflow/evaluer-sizer/SKILL.md +112 -0
- package/assets/skills/workflow/faire-gatekeeper/SKILL.md +99 -0
- package/assets/skills/workflow/flux-narrator/SKILL.md +93 -0
- package/assets/skills/workflow/memoire/SKILL.md +198 -0
- package/assets/skills/workflow/memoire-consolidator/SKILL.md +91 -0
- package/assets/skills/workflow/meta-critiquer/SKILL.md +112 -0
- package/assets/skills/workflow/modern-patterns-checker/SKILL.md +166 -0
- package/assets/skills/workflow/pattern-fitness-check/SKILL.md +108 -0
- package/assets/skills/workflow/playwright-visual-critic/SKILL.md +98 -0
- package/assets/skills/workflow/pr-review-responder/SKILL.md +214 -0
- package/assets/skills/workflow/prouver-verifier/SKILL.md +184 -0
- package/assets/skills/workflow/prouver-verifier/reference.md +152 -0
- package/assets/skills/workflow/quoi-framer/SKILL.md +91 -0
- package/assets/skills/workflow/relire-critic/SKILL.md +99 -0
- package/assets/skills/workflow/security-regression-check/SKILL.md +86 -0
- package/assets/skills/workflow/self-consistency-verifier/SKILL.md +85 -0
- package/assets/skills/workflow/spike-mode/SKILL.md +101 -0
- package/assets/skills/workflow/stride-analyzer/SKILL.md +96 -0
- package/assets/skills/workflow/stride-analyzer/reference.md +144 -0
- package/assets/skills/workflow/test-strategy-vitest-playwright/SKILL.md +119 -0
- package/package.json +1 -1
|
@@ -851,6 +851,17 @@ def cmd_analyze(args):
|
|
|
851
851
|
insights_json = base / 'insights.json'
|
|
852
852
|
atomic_write_json(insights_json, insights)
|
|
853
853
|
|
|
854
|
+
# Cap human-readable INSIGHTS.md sections when the corpus is large.
|
|
855
|
+
# insights.json (machine consumer) keeps everything; INSIGHTS.md is read
|
|
856
|
+
# by humans + by ciel-audit narration so token cost matters at scale.
|
|
857
|
+
LARGE_CORPUS_THRESHOLD = 150
|
|
858
|
+
TOP_N = 10
|
|
859
|
+
|
|
860
|
+
def maybe_cap(items):
|
|
861
|
+
if total > LARGE_CORPUS_THRESHOLD and len(items) > TOP_N:
|
|
862
|
+
return items[:TOP_N], len(items) - TOP_N
|
|
863
|
+
return items, 0
|
|
864
|
+
|
|
854
865
|
lines = [
|
|
855
866
|
"# Memory insights",
|
|
856
867
|
"",
|
|
@@ -866,50 +877,64 @@ def cmd_analyze(args):
|
|
|
866
877
|
lines.append("")
|
|
867
878
|
|
|
868
879
|
if promotion_candidates:
|
|
880
|
+
shown, omitted = maybe_cap(promotion_candidates)
|
|
869
881
|
lines += [
|
|
870
882
|
"## Promotion candidates",
|
|
871
883
|
"",
|
|
872
884
|
f"Episodes triggered >= {MIN_PROMOTION} times. Promote via skill `memoire-consolidator`.",
|
|
873
885
|
"",
|
|
874
886
|
]
|
|
875
|
-
for mid in
|
|
887
|
+
for mid in shown:
|
|
876
888
|
m = episodes[mid]
|
|
877
889
|
lines.append(f"- `{mid}` (trigger_count={m.get('trigger_count', 0)}) - {m.get('title', '?')}")
|
|
890
|
+
if omitted:
|
|
891
|
+
lines.append(f"- _+{omitted} more, see insights.json_")
|
|
878
892
|
lines.append("")
|
|
879
893
|
|
|
880
894
|
if dead_anchors:
|
|
895
|
+
shown, omitted = maybe_cap(dead_anchors)
|
|
881
896
|
lines += [
|
|
882
897
|
"## Dead anchors",
|
|
883
898
|
"",
|
|
884
899
|
"Memories whose every `path_patterns` entry resolves to no file. Triage in `.ciel/memory/review-queue.md`.",
|
|
885
900
|
"",
|
|
886
901
|
]
|
|
887
|
-
for mid in
|
|
902
|
+
for mid in shown:
|
|
888
903
|
m = memories[mid]
|
|
889
904
|
patterns = ", ".join(m.get('path_patterns') or [])
|
|
890
905
|
lines.append(f"- `{mid}` - {m.get('title', '?')} (patterns: {patterns})")
|
|
906
|
+
if omitted:
|
|
907
|
+
lines.append(f"- _+{omitted} more, see insights.json_")
|
|
891
908
|
lines.append("")
|
|
892
909
|
|
|
893
910
|
if intent_clusters:
|
|
911
|
+
ranked = sorted(intent_clusters.items(), key=lambda x: -len(x[1]))
|
|
912
|
+
shown, omitted = maybe_cap(ranked)
|
|
894
913
|
lines += [
|
|
895
914
|
"## Intent clusters",
|
|
896
915
|
"",
|
|
897
916
|
f"Intents shared by >= {MIN_SUPPORT} memories - recurring topics.",
|
|
898
917
|
"",
|
|
899
918
|
]
|
|
900
|
-
for intent, ids in
|
|
919
|
+
for intent, ids in shown:
|
|
901
920
|
lines.append(f"- `{intent}` ({len(ids)}): {', '.join(ids)}")
|
|
921
|
+
if omitted:
|
|
922
|
+
lines.append(f"- _+{omitted} more, see insights.json_")
|
|
902
923
|
lines.append("")
|
|
903
924
|
|
|
904
925
|
if path_clusters:
|
|
926
|
+
ranked = sorted(path_clusters.items(), key=lambda x: -len(x[1]))
|
|
927
|
+
shown, omitted = maybe_cap(ranked)
|
|
905
928
|
lines += [
|
|
906
929
|
"## Path clusters",
|
|
907
930
|
"",
|
|
908
931
|
f"Paths referenced by >= {MIN_SUPPORT} memories - high-traffic surface.",
|
|
909
932
|
"",
|
|
910
933
|
]
|
|
911
|
-
for path, ids in
|
|
934
|
+
for path, ids in shown:
|
|
912
935
|
lines.append(f"- `{path}` ({len(ids)}): {', '.join(ids)}")
|
|
936
|
+
if omitted:
|
|
937
|
+
lines.append(f"- _+{omitted} more, see insights.json_")
|
|
913
938
|
lines.append("")
|
|
914
939
|
|
|
915
940
|
insights_md = base / 'INSIGHTS.md'
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"hooks": [
|
|
21
21
|
{
|
|
22
22
|
"type": "command",
|
|
23
|
-
"command": "echo \"[CIEL v6] Session started — Pipeline: DOCS → QUOI → ASK → AVEC QUOI → DIVERGE → RECHERCHE → CODEBASE → EVALUER → ASK2 → FAIRE → RELIRE → PROUVER → MEMOIRE → META\" && \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/session-version-check.sh"
|
|
23
|
+
"command": "echo \"[CIEL v6] Session started — Pipeline: DOCS → QUOI → ASK → AVEC QUOI → DIVERGE → RECHERCHE → CODEBASE → EVALUER → ASK2 → FAIRE → RELIRE → PROUVER → MEMOIRE → META\" && { ! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/session-version-check.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/session-version-check.sh\"; }"
|
|
24
24
|
}
|
|
25
25
|
]
|
|
26
26
|
}
|
|
@@ -31,11 +31,11 @@
|
|
|
31
31
|
"hooks": [
|
|
32
32
|
{
|
|
33
33
|
"type": "command",
|
|
34
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/check-test-first.sh"
|
|
34
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/check-test-first.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/check-test-first.sh\""
|
|
35
35
|
},
|
|
36
36
|
{
|
|
37
37
|
"type": "command",
|
|
38
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/pre-tool-write.sh"
|
|
38
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/pre-tool-write.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/pre-tool-write.sh\""
|
|
39
39
|
}
|
|
40
40
|
]
|
|
41
41
|
},
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
{
|
|
46
46
|
"type": "command",
|
|
47
47
|
"if": "Bash(rm *)",
|
|
48
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/block-destructive.sh"
|
|
48
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/block-destructive.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/block-destructive.sh\""
|
|
49
49
|
}
|
|
50
50
|
]
|
|
51
51
|
},
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"hooks": [
|
|
55
55
|
{
|
|
56
56
|
"type": "command",
|
|
57
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/pre-agent-gate.sh"
|
|
57
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/pre-agent-gate.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/pre-agent-gate.sh\""
|
|
58
58
|
}
|
|
59
59
|
]
|
|
60
60
|
}
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"hooks": [
|
|
66
66
|
{
|
|
67
67
|
"type": "command",
|
|
68
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/track-file.sh"
|
|
68
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/track-file.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/track-file.sh\""
|
|
69
69
|
},
|
|
70
70
|
{
|
|
71
71
|
"type": "command",
|
|
@@ -108,7 +108,7 @@
|
|
|
108
108
|
"hooks": [
|
|
109
109
|
{
|
|
110
110
|
"type": "command",
|
|
111
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/meta-critiquer.sh"
|
|
111
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/meta-critiquer.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/meta-critiquer.sh\""
|
|
112
112
|
}
|
|
113
113
|
]
|
|
114
114
|
}
|
|
@@ -118,7 +118,7 @@
|
|
|
118
118
|
"hooks": [
|
|
119
119
|
{
|
|
120
120
|
"type": "command",
|
|
121
|
-
"command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/meta-critiquer.sh"
|
|
121
|
+
"command": "! [ -x \"$CLAUDE_PROJECT_DIR/.claude/hooks/meta-critiquer.sh\" ] || \"$CLAUDE_PROJECT_DIR/.claude/hooks/meta-critiquer.sh\""
|
|
122
122
|
}
|
|
123
123
|
]
|
|
124
124
|
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Generates a valid Ciel SKILL.md scaffold following Anthropic Skills-first rules (kebab-case ≤64, YAML description ≤
|
|
2
|
+
description: Generates a valid Ciel SKILL.md scaffold following Anthropic Skills-first rules (kebab-case ≤64, YAML description ≤1536, body ≤500 lines).
|
|
3
3
|
---
|
|
4
4
|
|
|
5
5
|
# /ciel-create-skill — Create a new Ciel skill
|
|
6
6
|
|
|
7
|
-
*Generates a valid SKILL.md scaffold following Anthropic Skills-first rules (kebab-case name ≤64 chars, YAML frontmatter ≤
|
|
7
|
+
*Generates a valid SKILL.md scaffold following Anthropic Skills-first rules (kebab-case name ≤64 chars, YAML frontmatter ≤1536-char description, ≤500-line body, progressive disclosure to one reference.md).*
|
|
8
8
|
|
|
9
9
|
Usage: `/ciel-create-skill <name> <purpose>`
|
|
10
10
|
|
|
@@ -265,7 +265,7 @@ Generates a valid SKILL.md scaffold following Ciel's conventions. Returns a diff
|
|
|
265
265
|
- Third person: "Analyzes X" ✓ / "I analyze X" ✗
|
|
266
266
|
- Front-load use case + trigger keywords
|
|
267
267
|
- Include "Use when..." clause
|
|
268
|
-
- ≤
|
|
268
|
+
- ≤ 1536 chars, recommended 200-500
|
|
269
269
|
|
|
270
270
|
### 3. Scaffold SKILL.md
|
|
271
271
|
|
|
@@ -333,7 +333,7 @@ Problems: no trigger, no output, no specificity.
|
|
|
333
333
|
|
|
334
334
|
- [ ] Name valid kebab-case, ≤ 64 chars, unique?
|
|
335
335
|
- [ ] Category is one of the 5 valid categories?
|
|
336
|
-
- [ ] Description: third person, ≤
|
|
336
|
+
- [ ] Description: third person, ≤ 1536 chars, includes trigger?
|
|
337
337
|
- [ ] SKILL.md ≤ 300 lines?
|
|
338
338
|
- [ ] No overlap with existing skills (grep checked)?
|
|
339
339
|
- [ ] YAML frontmatter valid?
|
|
@@ -7,12 +7,12 @@ subtask: true
|
|
|
7
7
|
> **OpenCode note**: This command requires `claude --print` headless mode for full functionality (binary evals, skill scaffold generation). On OpenCode it runs in degraded mode — the improver agent returns proposals only. For the full harness, use Claude Code.
|
|
8
8
|
|
|
9
9
|
---
|
|
10
|
-
description: Generates a valid Ciel SKILL.md scaffold following Anthropic Skills-first rules (kebab-case ≤64, YAML description ≤
|
|
10
|
+
description: Generates a valid Ciel SKILL.md scaffold following Anthropic Skills-first rules (kebab-case ≤64, YAML description ≤1536, body ≤500 lines).
|
|
11
11
|
---
|
|
12
12
|
|
|
13
13
|
# /ciel-create-skill — Create a new Ciel skill
|
|
14
14
|
|
|
15
|
-
*Generates a valid SKILL.md scaffold following Anthropic Skills-first rules (kebab-case name ≤64 chars, YAML frontmatter ≤
|
|
15
|
+
*Generates a valid SKILL.md scaffold following Anthropic Skills-first rules (kebab-case name ≤64 chars, YAML frontmatter ≤1536-char description, ≤500-line body, progressive disclosure to one reference.md).*
|
|
16
16
|
|
|
17
17
|
Usage: `/ciel-create-skill <name> <purpose>`
|
|
18
18
|
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Scan project for ingestable tribal docs (lessons.md, ciel-overlay.md, .claude/rules/, Claude Code auto-memory at ~/.claude/projects/<slug>/memory/, etc.) and propose ingestion into the cued-recall memory under .ciel/memory/. Reports findings if no sources found. Always confirms each candidate with the user before writing.
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# /ciel-memory-bootstrap — Initialize Cued-Recall Memory
|
|
6
|
+
|
|
7
|
+
**Purpose:** First-run scan of an existing project to convert tribal knowledge already documented in `lessons.md`, `ciel-overlay.md`, `.claude/rules/`, Claude Code's per-project auto-memory (`~/.claude/projects/<slug>/memory/`), and similar files into the structured cued-recall memory at `.ciel/memory/`.
|
|
8
|
+
|
|
9
|
+
**Usage:** `/ciel-memory-bootstrap` (no args)
|
|
10
|
+
|
|
11
|
+
This is **deterministic**: no agent dispatch, no pipeline, no DIVERGE/EVALUER. Just scan, propose, write on user confirmation.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Instructions
|
|
16
|
+
|
|
17
|
+
You are bootstrapping the cued-recall memory for this project. Follow these steps in order.
|
|
18
|
+
|
|
19
|
+
### Step 1 — Scan
|
|
20
|
+
|
|
21
|
+
Run the bootstrap script in `scan` mode:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Try installed location first, fallback to dev location
|
|
25
|
+
script="$CLAUDE_PROJECT_DIR/.claude/hooks/memory-bootstrap.sh"
|
|
26
|
+
[ -f "$script" ] || script="$CLAUDE_PROJECT_DIR/hooks/memory-bootstrap.sh"
|
|
27
|
+
bash "$script" scan
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Or, if running on an installed Ciel: `bash "$HOME/.ciel/hooks/memory-bootstrap.sh" scan`.
|
|
31
|
+
|
|
32
|
+
Report the output verbatim to the user.
|
|
33
|
+
|
|
34
|
+
### Step 2 — Decide path
|
|
35
|
+
|
|
36
|
+
Based on the scan output:
|
|
37
|
+
|
|
38
|
+
- **If 0 sources found** → tell the user clearly: "No tribal docs to bootstrap from. The cued-recall memory will populate organically as you intervene with me. Nothing more to do." End here.
|
|
39
|
+
- **If sources found** → proceed to Step 3.
|
|
40
|
+
|
|
41
|
+
### Step 3 — Initialize structure
|
|
42
|
+
|
|
43
|
+
Run:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
script="$CLAUDE_PROJECT_DIR/.claude/hooks/memory-bootstrap.sh"
|
|
47
|
+
[ -f "$script" ] || script="$CLAUDE_PROJECT_DIR/hooks/memory-bootstrap.sh"
|
|
48
|
+
bash "$script" ingest
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
This creates `.ciel/memory/{episodes,concepts,guards}/` and an empty `index.json`. It does NOT auto-write memories — auto-ingestion would create cargo-cult entries from possibly-stale docs (see ADR-0001).
|
|
52
|
+
|
|
53
|
+
### Step 4 — Read each source
|
|
54
|
+
|
|
55
|
+
For each source found in Step 1, `Read` the file fully. Identify candidate memories:
|
|
56
|
+
|
|
57
|
+
| Source format | What becomes a memory |
|
|
58
|
+
|---|---|
|
|
59
|
+
| `[YYYY-MM-DD] MISTAKE: X → RULE: Y` lines (lessons.md style) | One memory per line. Title = the rule. |
|
|
60
|
+
| `## Heading\n\n- rule\n- rule` (rules.md style) | One memory per rule. |
|
|
61
|
+
| Numbered lessons in `ciel-overlay.md` "Key Lessons" | One memory per lesson. |
|
|
62
|
+
| `## section` in CLAUDE.md/AGENTS.md describing a non-obvious convention | One memory per section. |
|
|
63
|
+
| **Claude Code auto-memory** entries (`~/.claude/projects/<slug>/memory/*.md`, excluding `MEMORY.md`) | One memory per file. Title = frontmatter `description`. Cues derived per "Auto-memory mapping" below. |
|
|
64
|
+
|
|
65
|
+
#### Auto-memory mapping (special parser)
|
|
66
|
+
|
|
67
|
+
Claude Code auto-memory uses a different frontmatter than Ciel's cued-recall. Each source file looks like:
|
|
68
|
+
|
|
69
|
+
```yaml
|
|
70
|
+
---
|
|
71
|
+
name: feedback-okhttp-cookiejar-override
|
|
72
|
+
description: Neiyomi shared PersistentCookieJar overrides manual Cookie headers via OkHttp BridgeInterceptor
|
|
73
|
+
metadata:
|
|
74
|
+
type: feedback
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
(body markdown — Context / Why / How to apply sections)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
When you encounter a file under `$AUTO_MEMORY_DIR`, map it to a Ciel episode as follows:
|
|
81
|
+
|
|
82
|
+
| Auto-memory field | Ciel frontmatter field | Notes |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| `description:` | `title:` | one-line summary |
|
|
85
|
+
| `name:` | base of slug for filename | already kebab-case |
|
|
86
|
+
| `metadata.type:` (`user`/`feedback`/`project`/`reference`) | `intents:` `[<type>]` plus topic-specific intents inferred from body | e.g. `feedback` + `okhttp` + `cookie` |
|
|
87
|
+
| body markdown | Ciel episode body, verbatim | preserve Context/Why/How to apply structure |
|
|
88
|
+
| paths cited in body (e.g. `src/`, `*.kt`, `Caddyfile`) | `path_patterns:` | infer from grep — narrow patterns preferred |
|
|
89
|
+
| symbols cited in body (class/function/table names) | `symbols:` | infer from grep |
|
|
90
|
+
| language hint (file extensions in body) | `languages:` | `kotlin`/`typescript`/`python`/`sql`/etc. |
|
|
91
|
+
| `captured_from:` (NEW) | `auto-memory-migration` | distinguishes from user-intervention captures |
|
|
92
|
+
|
|
93
|
+
**Skip `MEMORY.md`** — it's a table-of-contents index, not memory content. The scan already excludes it.
|
|
94
|
+
|
|
95
|
+
**Backup before delete.** After successfully writing an episode file for an auto-memory entry, MOVE (not delete) the source to `$AUTO_MEMORY_DIR/.migrated-to-ciel/<filename>` so the user can audit migration. The MEMORY.md index file itself stays in place — Claude Code may regenerate it on next session.
|
|
96
|
+
|
|
97
|
+
Skip:
|
|
98
|
+
- The pipeline / workflow descriptions (those belong in CLAUDE.md, not memory)
|
|
99
|
+
- General principles already in CLAUDE.md
|
|
100
|
+
- Anything that's just project description (READMEish)
|
|
101
|
+
- Code examples (those go in skills/, not memory)
|
|
102
|
+
|
|
103
|
+
### Step 5 — Propose batch capture
|
|
104
|
+
|
|
105
|
+
Once you have N candidate memories from the sources, present them to the user **as a batch**, not one by one (avoid 50 confirmation prompts). Use a single `AskUserQuestion` with the structure:
|
|
106
|
+
|
|
107
|
+
> "Found N candidates from your tribal docs. I'll list them; you tell me which to capture, which to skip, or 'all'."
|
|
108
|
+
|
|
109
|
+
For each candidate, show:
|
|
110
|
+
- **Title** (one line)
|
|
111
|
+
- **Source** (file:line)
|
|
112
|
+
- **Suggested tags** (paths, symbols, intents, language inferred from the lesson content)
|
|
113
|
+
|
|
114
|
+
The user replies with: "all", "1,3,5,8" (specific indices), or "skip".
|
|
115
|
+
|
|
116
|
+
### Step 6 — Write captured memories
|
|
117
|
+
|
|
118
|
+
For each captured candidate, create `.ciel/memory/episodes/<YYYY-MM-DD>-<slug>.md` with frontmatter:
|
|
119
|
+
|
|
120
|
+
```yaml
|
|
121
|
+
---
|
|
122
|
+
id: mem_<NNN>
|
|
123
|
+
title: <title>
|
|
124
|
+
languages: [<inferred>]
|
|
125
|
+
path_patterns:
|
|
126
|
+
- <pattern>
|
|
127
|
+
symbols: [<inferred>]
|
|
128
|
+
intents: [<inferred>]
|
|
129
|
+
captured_at: <ISO8601 now>
|
|
130
|
+
captured_from: bootstrap
|
|
131
|
+
source: <original-file:line>
|
|
132
|
+
trigger_count: 0
|
|
133
|
+
last_triggered: null
|
|
134
|
+
stale_after_days: 90
|
|
135
|
+
stale: false
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
# <title>
|
|
139
|
+
|
|
140
|
+
<content from source, lightly cleaned>
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
ID strategy: read existing `index.json` for max id, increment. Slug = first 5 words of title, kebab-cased.
|
|
144
|
+
|
|
145
|
+
### Step 7 — Rebuild index
|
|
146
|
+
|
|
147
|
+
After all writes, regenerate `.ciel/memory/index.json` by parsing every frontmatter under `.ciel/memory/{episodes,concepts,guards}/`:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# pseudo — use python3 -c '...' inline
|
|
151
|
+
for each *.md file:
|
|
152
|
+
parse frontmatter
|
|
153
|
+
add to memories dict by id
|
|
154
|
+
for each path_pattern, symbol, intent, language:
|
|
155
|
+
append id to corresponding by_* index
|
|
156
|
+
write back to index.json
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Step 8 — Confirm
|
|
160
|
+
|
|
161
|
+
Report:
|
|
162
|
+
|
|
163
|
+
- N memories captured
|
|
164
|
+
- Sources processed
|
|
165
|
+
- Index rebuilt with M total entries
|
|
166
|
+
- Suggest: "Cued-recall memory now active. Memories will auto-inject when their cues match in future tasks. Run `/ciel-memory-bootstrap` again anytime to re-scan for new tribal docs."
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Constraints
|
|
171
|
+
|
|
172
|
+
- **Never write a memory without user confirmation.** Even on bulk confirmation ("all"), display the list first.
|
|
173
|
+
- **Do not delete the source files.** Bootstrap converts; the user keeps the originals as long as they want.
|
|
174
|
+
- **Tag conservatively.** A memory tagged with `**/*` will fire on every task and pollute. If unsure, narrow the path pattern.
|
|
175
|
+
- **No agent dispatch.** This command is deterministic and runs inline.
|
|
176
|
+
- **Idempotent.** Re-running on an already-bootstrapped project should detect existing memories (by source field) and offer to skip duplicates.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Failure modes
|
|
181
|
+
|
|
182
|
+
| Symptom | Cause | Fix |
|
|
183
|
+
|---|---|---|
|
|
184
|
+
| Script not found | `$CLAUDE_PROJECT_DIR` not set | Try `$HOME/.ciel/hooks/memory-bootstrap.sh` instead |
|
|
185
|
+
| Nothing scanned | No tribal docs in this project | Working as intended; report and end |
|
|
186
|
+
| Memories all tagged with broad paths | Source content didn't include path hints | Ask user to refine tags after listing |
|
|
187
|
+
| index.json malformed after rebuild | python3 parse error | Recreate empty index, re-run rebuild step |
|
|
188
|
+
| Auto-memory not detected | Slug derivation mismatch (cwd has unexpected characters) | Override via `CIEL_AUTO_MEMORY_DIR=<absolute-path> bash hooks/memory-bootstrap.sh scan` |
|
|
189
|
+
| Auto-memory file has `name:` but no `description:` | Older auto-memory format | Use first heading or filename as title; ask user to confirm before writing |
|
|
190
|
+
|
|
191
|
+
## See also
|
|
192
|
+
|
|
193
|
+
- `docs/adrs/0001-cued-recall-memory.md` — full design rationale
|
|
194
|
+
- `skills/workflow/memoire/SKILL.md` — capture/recall flow
|
|
195
|
+
- `skills/workflow/memoire-consolidator/SKILL.md` — periodic maintenance
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: adr-auto
|
|
3
|
+
description: How to document architectural decisions automatically in Ciel v5 (etape 12). After FAIRE but before RELIRE, if the task involved a significant architectural decision, write an ADR (Architecture Decision Record) to docs/adrs/. Prevents knowledge loss.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Automatic ADR — Document Decisions in Real Time (Ciel v5)
|
|
7
|
+
|
|
8
|
+
## What this covers
|
|
9
|
+
|
|
10
|
+
How to document architectural decisions during the Ciel v5 pipeline (etape 12: ADR). After FAIRE but before RELIRE, if the task involved a significant architectural decision, write an ADR. The decision is documented while fresh, not months later.
|
|
11
|
+
|
|
12
|
+
## Core principle
|
|
13
|
+
|
|
14
|
+
**If the decision was non-trivial, document WHY.** Code shows WHAT. ADRs show WHY. Without ADRs, future developers (or future you) will wonder why the code is the way it is.
|
|
15
|
+
|
|
16
|
+
## When to write an ADR
|
|
17
|
+
|
|
18
|
+
Write an ADR when the task involves:
|
|
19
|
+
- Adding a new dependency/library
|
|
20
|
+
- Choosing between two technologies
|
|
21
|
+
- Changing a database schema
|
|
22
|
+
- Adopting a design pattern
|
|
23
|
+
- Making a performance trade-off
|
|
24
|
+
- Changing the build/deploy pipeline
|
|
25
|
+
- Any decision with long-term consequences
|
|
26
|
+
|
|
27
|
+
Do NOT write an ADR for:
|
|
28
|
+
- Bug fixes (tests document the fix)
|
|
29
|
+
- Refactoring without semantic change
|
|
30
|
+
- Renames/reorganizations
|
|
31
|
+
- Dependency upgrades (changelog suffices)
|
|
32
|
+
|
|
33
|
+
## ADR format (based on Michael Nygard's template)
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
# ADR-<NNN>: <Title>
|
|
37
|
+
|
|
38
|
+
## Status
|
|
39
|
+
|
|
40
|
+
<proposed | accepted | deprecated | superseded by ADR-NNN>
|
|
41
|
+
|
|
42
|
+
## Context
|
|
43
|
+
|
|
44
|
+
<What is the issue that we're seeing that is motivating this decision or change? 2-3 sentences.>
|
|
45
|
+
|
|
46
|
+
## Decision
|
|
47
|
+
|
|
48
|
+
<What is the change that we're proposing and/or doing? 1-2 sentences.>
|
|
49
|
+
|
|
50
|
+
## Consequences
|
|
51
|
+
|
|
52
|
+
<What becomes easier or harder to do because of this change? 2-3 items.>
|
|
53
|
+
|
|
54
|
+
## References
|
|
55
|
+
|
|
56
|
+
<Link to relevant docs, tickets, or PRs>
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## File naming
|
|
60
|
+
|
|
61
|
+
`docs/adrs/<NNN>-<kebab-case-title>.md`
|
|
62
|
+
|
|
63
|
+
Start at 001 and increment.
|
|
64
|
+
|
|
65
|
+
## How to trigger (Ciel v5)
|
|
66
|
+
|
|
67
|
+
In the Ciel pipeline (etape 12), during ADR:
|
|
68
|
+
1. Check if the task involved a significant decision (see list above)
|
|
69
|
+
2. If yes -> write `docs/adrs/<NNN>-<title>.md`
|
|
70
|
+
3. Update `.ciel/map.json` to reference the new ADR
|
|
71
|
+
4. Reference the ADR in the RELIRE submission so the critic can check it
|
|
72
|
+
|
|
73
|
+
## Common rationalizations
|
|
74
|
+
|
|
75
|
+
| Rationalization | Reality |
|
|
76
|
+
|---|---|
|
|
77
|
+
| "The code is self-documenting" | Code shows WHAT. ADRs show WHY. Six months from now, "why did we choose this" is not visible in the code. |
|
|
78
|
+
| "I'll add it later" | Later is when the decision is forgotten and the context is lost. Write it now or it never gets written. |
|
|
79
|
+
| "This decision is too small for an ADR" | If you had to think about it for more than 30 seconds, it's big enough for an ADR. |
|
|
80
|
+
| "Nobody reads ADRs anyway" | Nobody reads them until they need to undo a decision and can't figure out why it was made. Then they're invaluable. |
|
|
81
|
+
|
|
82
|
+
## How to verify
|
|
83
|
+
|
|
84
|
+
- [ ] ADR written for every significant decision?
|
|
85
|
+
- [ ] No ADR written for trivial changes?
|
|
86
|
+
- [ ] ADR includes context, decision, consequences?
|
|
87
|
+
- [ ] Map updated with ADR reference?
|
|
88
|
+
- [ ] ADR committed with the code?
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ai-failure-modes-detector
|
|
3
|
+
description: Detects the six canonical failure modes of LLM-generated code — invented APIs, hallucinated dependencies, version drift, async/sync mismatch, confident-wrong logic, and extrinsic hallucination (plausible but unverifiable output). Runs self-consistency triple-generation checks, AST-based dependency audits, and uncertainty scoring. Triggers BEFORE merging agent-authored code, especially when the author is an LLM. Partners with doc-validator-official (API-level) and self-consistency-verifier (semantic-level).
|
|
4
|
+
allowed-tools: Read, Grep, Glob, Bash
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# ai-failure-modes-detector — Catch confident-wrong before it lands
|
|
8
|
+
|
|
9
|
+
LLM-generated code compiles more often than it's correct. Six failure modes account for >90% of post-merge incidents in agentic PRs (ISSTA 2025). This skill runs each check systematically.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Inputs (infer before asking — see orchestrator's Autonomy protocol)
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
CODE_UNDER_REVIEW: [file paths OR diff hunk]
|
|
17
|
+
AUTHOR: [human | LLM | mixed]
|
|
18
|
+
PROPOSED_DEPS: [new dependencies being added, if any]
|
|
19
|
+
TEST_COVERAGE: [files that have tests | files without]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Auto-inference sources (exhaust BEFORE asking the user)
|
|
23
|
+
|
|
24
|
+
- **CODE_UNDER_REVIEW** → `git diff HEAD~1` (last commit) or `git diff main...HEAD` (branch diff) — usually the intent. If user said "this file", extract from prompt.
|
|
25
|
+
- **AUTHOR** → check the last commit's message / co-author trailer. `Co-Authored-By: Claude` or `Generated with Claude Code` → LLM. Otherwise human. If unsure, assume `mixed` (safer default).
|
|
26
|
+
- **PROPOSED_DEPS** → `git diff HEAD~1 -- package.json go.mod requirements.txt` → list added entries. Zero added → skip dep-hallucination check.
|
|
27
|
+
- **TEST_COVERAGE** → for each changed file in CODE_UNDER_REVIEW, check if a corresponding `*.test.*` / `*_test.go` / `test_*.py` exists next to it.
|
|
28
|
+
|
|
29
|
+
Never ask the user for AUTHOR — always inferable from git. Never ask for TEST_COVERAGE — always checkable via filesystem.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## The six failure modes
|
|
34
|
+
|
|
35
|
+
### 1. Invented APIs
|
|
36
|
+
|
|
37
|
+
Function/class/method that doesn't exist in the library at the pinned version.
|
|
38
|
+
|
|
39
|
+
**Detection**:
|
|
40
|
+
- Grep every import and every method call on imported symbols
|
|
41
|
+
- Cross-reference with `node_modules/<pkg>/package.json` + type definitions
|
|
42
|
+
- For dynamic imports (`await import()`), inspect at runtime if possible
|
|
43
|
+
|
|
44
|
+
**Signal**: import resolves but `<symbol>` not in the `.d.ts` or `__init__.py`.
|
|
45
|
+
|
|
46
|
+
### 2. Hallucinated dependencies
|
|
47
|
+
|
|
48
|
+
`npm package` or `pip package` that doesn't exist on the registry (or typo-squat).
|
|
49
|
+
|
|
50
|
+
**Detection**:
|
|
51
|
+
- For each new dep in PROPOSED_DEPS: `npm view <pkg> --json` or `pip index versions <pkg>`
|
|
52
|
+
- Check publisher reputation (weekly downloads, last publish date, repo link present)
|
|
53
|
+
- Typo-squat check: Levenshtein distance ≤ 2 from a popular package name is SUSPICIOUS
|
|
54
|
+
|
|
55
|
+
**Signal**: registry returns 404, or package has < 100 downloads/week with no repo.
|
|
56
|
+
|
|
57
|
+
### 3. Version drift
|
|
58
|
+
|
|
59
|
+
Code uses an API that exists but at a different version than pinned.
|
|
60
|
+
|
|
61
|
+
**Detection**:
|
|
62
|
+
- For each external API call, check "Added in vX.Y" / "Deprecated in vX.Y" metadata
|
|
63
|
+
- Compare against pinned version in lockfile
|
|
64
|
+
|
|
65
|
+
**Signal**: API exists in v2, code pins v1 — silently broken.
|
|
66
|
+
|
|
67
|
+
### 4. Async/sync mismatch
|
|
68
|
+
|
|
69
|
+
Sync call in an async codebase or a Promise-returning function not awaited.
|
|
70
|
+
|
|
71
|
+
**Detection** (TS):
|
|
72
|
+
- `@typescript-eslint/no-floating-promises`
|
|
73
|
+
- Grep for `fetch(`, `fs.readFileSync` (sync in async) or unawaited `async` functions
|
|
74
|
+
- Any `Promise<T>` returned from a function whose callers don't `await`
|
|
75
|
+
|
|
76
|
+
**Detection** (Python):
|
|
77
|
+
- Sync `requests.get()` inside an `async def`
|
|
78
|
+
- `asyncio.run()` called inside an event loop
|
|
79
|
+
|
|
80
|
+
**Signal**: type checker emits "Promise returned but not awaited" OR sync call blocks in async context.
|
|
81
|
+
|
|
82
|
+
### 5. Confident-wrong logic
|
|
83
|
+
|
|
84
|
+
Code is syntactically and typing-wise valid, passes linting, but is semantically wrong:
|
|
85
|
+
- Off-by-one on pagination
|
|
86
|
+
- Wrong operator (`>=` where `>` needed)
|
|
87
|
+
- Negated boolean
|
|
88
|
+
- Swapped arguments of same type
|
|
89
|
+
|
|
90
|
+
**Detection**:
|
|
91
|
+
- Run existing tests (if present) — failing tests is the first signal
|
|
92
|
+
- Invariant check: can you state in 1 sentence what the code guarantees? Does it actually guarantee it?
|
|
93
|
+
- For any numerical boundary, ask: "off-by-one in either direction — which breaks?"
|
|
94
|
+
|
|
95
|
+
**Signal**: behavior divergence between stated goal and actual execution.
|
|
96
|
+
|
|
97
|
+
### 6. Extrinsic hallucination
|
|
98
|
+
|
|
99
|
+
Output is plausible but references facts outside the code that cannot be verified:
|
|
100
|
+
- Cites a spec section that doesn't exist
|
|
101
|
+
- Comments claim "per RFC 7231 §5.3" when section 5.3 doesn't cover that
|
|
102
|
+
- Error codes invented (`ERR_USER_QUOTA_EXCEEDED` — is that really thrown?)
|
|
103
|
+
|
|
104
|
+
**Detection**:
|
|
105
|
+
- Every code comment with a source claim → spot-check
|
|
106
|
+
- Every user-facing string (error codes, log messages) → grep for prior use in the codebase
|
|
107
|
+
|
|
108
|
+
**Signal**: claim cannot be corroborated.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Report format
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
## AI-FAILURE-MODES VERDICT
|
|
116
|
+
|
|
117
|
+
### Author
|
|
118
|
+
LLM (auto-detected via commit message pattern | user-declared)
|
|
119
|
+
|
|
120
|
+
### Findings by mode
|
|
121
|
+
1. Invented APIs:
|
|
122
|
+
[BLOCK] src/auth.ts:42 — `jwt.verifyStrict()` not in jsonwebtoken@9.0.2 (use `verify()` with `algorithms` option)
|
|
123
|
+
|
|
124
|
+
2. Hallucinated deps:
|
|
125
|
+
(none — all 3 new deps exist on npm, >10k weekly downloads)
|
|
126
|
+
|
|
127
|
+
3. Version drift:
|
|
128
|
+
[WARN] src/db.ts:18 — `drizzle.innerJoin()` added in v0.30, pinned 0.29 — upgrade drizzle-orm
|
|
129
|
+
|
|
130
|
+
4. Async/sync mismatch:
|
|
131
|
+
[BLOCK] src/upload.ts:55 — `fs.writeFileSync()` inside async handler — blocks event loop
|
|
132
|
+
|
|
133
|
+
5. Confident-wrong:
|
|
134
|
+
[WARN] src/pagination.ts:22 — `offset = page * pageSize` — off-by-one on page=0
|
|
135
|
+
|
|
136
|
+
6. Extrinsic:
|
|
137
|
+
[INFO] src/rate-limit.ts:10 — comment cites "per RFC 6585 §4" — RFC 6585 does not have §4; 429 is §4 of RFC 6585 (comment is right, citation format wrong)
|
|
138
|
+
|
|
139
|
+
### Summary
|
|
140
|
+
BLOCK: 2
|
|
141
|
+
WARN: 2
|
|
142
|
+
INFO: 1
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Guardrails
|
|
148
|
+
|
|
149
|
+
- **BLOCK means don't merge** — invented APIs, hallucinated deps, and async/sync mismatches are production-breaking.
|
|
150
|
+
- **WARN means discuss in review** — not auto-blocking but requires human acknowledgment.
|
|
151
|
+
- **Run against diff, not whole repo** — old code isn't the subject; the new change is.
|
|
152
|
+
- **When tests are absent**, confidence in "confident-wrong" findings drops — request tests be added before clearing the review.
|
|
153
|
+
- **Don't false-positive on stubs** — intentional mocks in `__mocks__/` or `test-helpers/` may reference not-yet-implemented APIs; verify context.
|
|
154
|
+
- **Typo-squat false positives**: popular packages sometimes have close cousins (`request` vs `request-promise`) — check download count AND repo history before flagging.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## How to verify
|
|
159
|
+
|
|
160
|
+
- [ ] All 6 failure modes checked (invented APIs, hallucinated deps, version drift, async/sync, confident-wrong, extrinsic)?
|
|
161
|
+
- [ ] Each finding has evidence (file:line or URL)?
|
|
162
|
+
- [ ] VERDICT issued (CLEAN / FINDINGS)?
|
|
163
|
+
- [ ] Author identified (LLM vs human)?
|
|
164
|
+
- [ ] External API calls validated against official docs?
|
|
165
|
+
|
|
166
|
+
## When triggered
|
|
167
|
+
|
|
168
|
+
- Post-write hook when AUTHOR=LLM and task is Standard/Critical
|
|
169
|
+
- Before any PR merge authored wholly or partially by an agent
|
|
170
|
+
- After `@ciel-explorer` completes CODEBASE review
|
|
171
|
+
- User command: "audit this code for AI mistakes"
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## References
|
|
176
|
+
|
|
177
|
+
- ISSTA 2025 — "LLM Hallucinations in Practical Code Generation: Phenomena, Mechanism, and Mitigation"
|
|
178
|
+
- arxiv 2601.19106 — "Detecting and Correcting Hallucinations in LLM-Generated Code"
|
|
179
|
+
- arxiv 2404.00971 — "Beyond Functional Correctness"
|
|
180
|
+
- Anthropic 2604.08906 — agentic framework failure taxonomy
|