lemmafit 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +93 -4
- package/blank-template/README.md +3 -0
- package/blank-template/SPEC.yaml +1 -0
- package/blank-template/index.html +12 -0
- package/blank-template/lemmafit/.vibe/config.json +5 -0
- package/blank-template/lemmafit/dafny/Domain.dfy +5 -0
- package/blank-template/lemmafit/dafny/Replay.dfy +147 -0
- package/blank-template/package.json +25 -0
- package/blank-template/src/App.css +3 -0
- package/blank-template/src/App.tsx +10 -0
- package/blank-template/src/dafny/.gitkeep +0 -0
- package/blank-template/src/index.css +29 -0
- package/blank-template/src/main.tsx +10 -0
- package/blank-template/src/vite-env.d.ts +6 -0
- package/blank-template/template.gitignore +3 -0
- package/blank-template/tsconfig.json +21 -0
- package/blank-template/tsconfig.node.json +11 -0
- package/blank-template/vite.config.js +9 -0
- package/cli/context-hook.js +103 -0
- package/cli/daemon.js +24 -0
- package/cli/download-dafny2js.js +136 -0
- package/cli/generate-guarantees-md.js +223 -0
- package/cli/lemmafit.js +385 -0
- package/cli/session-hook.js +74 -0
- package/cli/sync.js +168 -0
- package/cli/verify-hook.js +221 -0
- package/commands/guarantees.md +138 -0
- package/docs/CLAUDE_INSTRUCTIONS.md +137 -0
- package/kernels/Replay.dfy +147 -0
- package/lib/daemon-client.js +54 -0
- package/lib/daemon.js +990 -0
- package/lib/download-dafny.js +130 -0
- package/lib/log.js +32 -0
- package/lib/spawn-claude.js +51 -0
- package/package.json +49 -5
- package/skills/lemmafit-dafny/SKILL.md +101 -0
- package/skills/lemmafit-post-react-audit/SKILL.md +46 -0
- package/skills/lemmafit-pre-react-audits/SKILL.md +67 -0
- package/skills/lemmafit-proofs/SKILL.md +24 -0
- package/skills/lemmafit-react-pattern/SKILL.md +62 -0
- package/skills/lemmafit-spec/SKILL.md +71 -0
- package/index.js +0 -5
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Claude Code PostToolUse hook for lemmafit.
|
|
4
|
+
*
|
|
5
|
+
* When Claude writes a .dfy file, this hook:
|
|
6
|
+
* 1. Requests verification from the daemon via Unix socket
|
|
7
|
+
* 2. Outputs result for Claude to see
|
|
8
|
+
*
|
|
9
|
+
* Hook receives JSON on stdin:
|
|
10
|
+
* {
|
|
11
|
+
* "hook": "PostToolUse",
|
|
12
|
+
* "tool_name": "Write",
|
|
13
|
+
* "tool_input": { "file_path": "...", "content": "..." },
|
|
14
|
+
* "tool_output": "..."
|
|
15
|
+
* }
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const path = require('path');
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const { initLog, log } = require('../lib/log');
|
|
21
|
+
const { requestDaemon } = require('../lib/daemon-client');
|
|
22
|
+
|
|
23
|
+
async function readStdin() {
|
|
24
|
+
const chunks = [];
|
|
25
|
+
for await (const chunk of process.stdin) {
|
|
26
|
+
chunks.push(chunk);
|
|
27
|
+
}
|
|
28
|
+
return Buffer.concat(chunks).toString('utf8');
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function findProjectRoot(filePath) {
|
|
32
|
+
let dir = path.dirname(filePath);
|
|
33
|
+
while (dir !== path.dirname(dir)) {
|
|
34
|
+
if (fs.existsSync(path.join(dir, 'lemmafit'))) {
|
|
35
|
+
return dir;
|
|
36
|
+
}
|
|
37
|
+
dir = path.dirname(dir);
|
|
38
|
+
}
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function readStatus(projectDir) {
|
|
43
|
+
const statusPath = path.join(projectDir, 'lemmafit', '.vibe', 'status.json');
|
|
44
|
+
try {
|
|
45
|
+
return JSON.parse(fs.readFileSync(statusPath, 'utf8'));
|
|
46
|
+
} catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function formatErrors(status) {
|
|
52
|
+
const lines = [];
|
|
53
|
+
for (const [file, fileStatus] of Object.entries(status.files || {})) {
|
|
54
|
+
for (const error of fileStatus.errors || []) {
|
|
55
|
+
lines.push(` ${file}:${error.line}: ${error.message}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return lines.join('\n');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function formatWarnings(status) {
|
|
62
|
+
const lines = [];
|
|
63
|
+
for (const [file, fileStatus] of Object.entries(status.files || {})) {
|
|
64
|
+
for (const warning of fileStatus.warnings || []) {
|
|
65
|
+
lines.push(` ${file}:${warning.line}: ${warning.message}`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return lines.join('\n');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function formatAxioms(status) {
|
|
72
|
+
const axioms = status.axioms || [];
|
|
73
|
+
if (axioms.length === 0) return '';
|
|
74
|
+
|
|
75
|
+
const lines = axioms.map(a => ` ${a.file}:${a.line}: ${a.content}`);
|
|
76
|
+
return `\nAxioms (unproven assumptions):\n${lines.join('\n')}`;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function formatSpecQueue(status) {
|
|
80
|
+
const queue = status.specQueue;
|
|
81
|
+
if (!queue || queue.length === 0) return '';
|
|
82
|
+
|
|
83
|
+
const verified = queue.filter(c => c.verifiedAt).length;
|
|
84
|
+
const unverified = queue.length - verified;
|
|
85
|
+
const summary = [
|
|
86
|
+
unverified > 0 ? `${unverified} pending` : null,
|
|
87
|
+
verified > 0 ? `${verified} verified` : null,
|
|
88
|
+
].filter(Boolean).join(', ');
|
|
89
|
+
|
|
90
|
+
const lines = ['', `Spec queue (${summary}):`];
|
|
91
|
+
for (const c of queue) {
|
|
92
|
+
const tag = c.verifiedAt ? ' [verified]' : '';
|
|
93
|
+
if (c.type === 'added') {
|
|
94
|
+
lines.push(` +${c.line}: ${c.text}${tag}`);
|
|
95
|
+
} else if (c.type === 'removed') {
|
|
96
|
+
lines.push(` - ${c.text}${tag}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return lines.join('\n');
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function main() {
|
|
103
|
+
const input = await readStdin();
|
|
104
|
+
|
|
105
|
+
let hookData;
|
|
106
|
+
try {
|
|
107
|
+
hookData = JSON.parse(input);
|
|
108
|
+
} catch {
|
|
109
|
+
process.exit(0);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const toolName = hookData.tool_name;
|
|
113
|
+
const filePath = hookData.tool_input?.file_path;
|
|
114
|
+
|
|
115
|
+
if ((toolName !== 'Write' && toolName !== 'Edit') || !filePath?.endsWith('.dfy')) {
|
|
116
|
+
process.exit(0);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const projectDir = findProjectRoot(filePath);
|
|
120
|
+
if (!projectDir) {
|
|
121
|
+
console.log(JSON.stringify({
|
|
122
|
+
systemMessage: '[lemmafit] verify-hook executed (no project found)',
|
|
123
|
+
hookSpecificOutput: { hookEventName: 'PostToolUse', additionalContext: 'Note: Not in a lemmafit project (no lemmafit directory found)' }
|
|
124
|
+
}));
|
|
125
|
+
process.exit(0);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
initLog(projectDir);
|
|
129
|
+
log('verify', `Write detected: ${filePath}`);
|
|
130
|
+
|
|
131
|
+
const lines = ['Verifying...'];
|
|
132
|
+
let status;
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const sockPath = path.join(projectDir, 'lemmafit', '.vibe', 'daemon.sock');
|
|
136
|
+
status = await requestDaemon(sockPath, { action: 'verify' });
|
|
137
|
+
log('verify', 'Used daemon (socket)');
|
|
138
|
+
} catch (err) {
|
|
139
|
+
log('verify', `Socket failed, falling back to direct: ${err.message}`);
|
|
140
|
+
const { Daemon } = require('../lib/daemon');
|
|
141
|
+
const daemon = new Daemon(projectDir);
|
|
142
|
+
await daemon.runOnce();
|
|
143
|
+
status = readStatus(projectDir);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (!status) {
|
|
147
|
+
lines.push('Warning: Could not read verification status');
|
|
148
|
+
console.log(JSON.stringify({
|
|
149
|
+
systemMessage: '[lemmafit] verify-hook executed',
|
|
150
|
+
hookSpecificOutput: { hookEventName: 'PostToolUse', additionalContext: lines.join('\n') }
|
|
151
|
+
}));
|
|
152
|
+
process.exit(0);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (status.state === 'verified') {
|
|
156
|
+
const axiomNote = (status.axioms?.length > 0)
|
|
157
|
+
? ` (${status.axioms.length} axiom${status.axioms.length > 1 ? 's' : ''})`
|
|
158
|
+
: '';
|
|
159
|
+
log('verify', `Verified${axiomNote}`);
|
|
160
|
+
lines.push(`✓ Verified and compiled${axiomNote}`);
|
|
161
|
+
if (status.axioms?.length > 0) {
|
|
162
|
+
lines.push(formatAxioms(status));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Stamp unverified spec queue items so Claude knows code compiled with them in scope
|
|
166
|
+
if (status.specQueue?.length > 0) {
|
|
167
|
+
const now = new Date().toISOString();
|
|
168
|
+
let stamped = 0;
|
|
169
|
+
for (const item of status.specQueue) {
|
|
170
|
+
if (!item.verifiedAt) {
|
|
171
|
+
item.verifiedAt = now;
|
|
172
|
+
stamped++;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (stamped > 0) {
|
|
176
|
+
const statusPath = path.join(projectDir, 'lemmafit', '.vibe', 'status.json');
|
|
177
|
+
status.timestamp = new Date().toISOString();
|
|
178
|
+
fs.writeFileSync(statusPath, JSON.stringify(status, null, 2));
|
|
179
|
+
log('verify', `Stamped ${stamped} spec queue item(s) as verified`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
} else if (status.state === 'error') {
|
|
183
|
+
const errorCount = Object.values(status.files || {})
|
|
184
|
+
.reduce((sum, f) => sum + (f.errors?.length || 0), 0);
|
|
185
|
+
const warningCount = Object.values(status.files || {})
|
|
186
|
+
.reduce((sum, f) => sum + (f.warnings?.length || 0), 0);
|
|
187
|
+
if (errorCount === 0 && warningCount > 0) {
|
|
188
|
+
log('verify', `Verification passed, compilation blocked (${warningCount} warning(s))`);
|
|
189
|
+
lines.push(`⚠ Verification passed, compilation blocked (${warningCount} warning${warningCount !== 1 ? 's' : ''}):`);
|
|
190
|
+
lines.push(formatWarnings(status));
|
|
191
|
+
} else {
|
|
192
|
+
log('verify', `Failed with ${errorCount} error(s)`);
|
|
193
|
+
lines.push(`✗ Verification failed (${errorCount} error${errorCount !== 1 ? 's' : ''}):`);
|
|
194
|
+
lines.push(formatErrors(status));
|
|
195
|
+
if (warningCount > 0) {
|
|
196
|
+
lines.push(`\n⚠ Warnings (${warningCount}):`);
|
|
197
|
+
lines.push(formatWarnings(status));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
if (status.compileError) {
|
|
201
|
+
lines.push(`\nCompilation error: ${status.compileError}`);
|
|
202
|
+
}
|
|
203
|
+
} else {
|
|
204
|
+
lines.push(`Status: ${status.state}`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const specLine = formatSpecQueue(status);
|
|
208
|
+
if (specLine) {
|
|
209
|
+
lines.push(specLine);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
console.log(JSON.stringify({
|
|
213
|
+
systemMessage: '[lemmafit] verify-hook executed',
|
|
214
|
+
hookSpecificOutput: { hookEventName: 'PostToolUse', additionalContext: lines.join('\n') }
|
|
215
|
+
}));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
main().catch((err) => {
|
|
219
|
+
console.error('Hook error:', err.message);
|
|
220
|
+
process.exit(1);
|
|
221
|
+
});
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Generate Guarantees and Run Claimcheck
|
|
2
|
+
|
|
3
|
+
You are generating human-readable guarantees from proven Dafny code and verifying them with claimcheck.
|
|
4
|
+
|
|
5
|
+
## Step 0: Make sure the state of the project is verified
|
|
6
|
+
|
|
7
|
+
Check `lemmafit/.vibe/status.json` for the current verification status.
|
|
8
|
+
|
|
9
|
+
## Step 1: Read project data
|
|
10
|
+
|
|
11
|
+
Read these files:
|
|
12
|
+
- `lemmafit/.vibe/claims.json` — extracted proof obligations from Dafny (predicates with conjuncts, lemmas with requires/ensures, functions with contracts, axioms)
|
|
13
|
+
- `SPEC.yaml` — natural language requirements with spec entries
|
|
14
|
+
- `lemmafit/dafny/Domain.dfy` — the Dafny source code
|
|
15
|
+
- `lemmafit/.vibe/config.json` — project config (need `appCore` field for claimcheck domain)
|
|
16
|
+
|
|
17
|
+
If `claims.json` doesn't exist, tell the user to run the daemon first (`npm run daemon` or `npm run dev`) so Dafny verification produces claims.
|
|
18
|
+
|
|
19
|
+
## Step 2: Map claims to spec entries
|
|
20
|
+
|
|
21
|
+
Analyze the claims from `claims.json` and map them to spec entries from `SPEC.yaml`.
|
|
22
|
+
|
|
23
|
+
**How claims work:**
|
|
24
|
+
- **Predicate conjuncts** (in `predicates[].conjuncts`) are invariant properties proven by the `StepPreservesInv` lemma. Each conjunct in the `Inv` predicate is a separate proven property.
|
|
25
|
+
- **Lemma ensures** (in `lemmas[].ensures`) are standalone proven properties. The lemma name is in `lemmas[].name`.
|
|
26
|
+
- **Function contracts** (in `functions[].requires` and `functions[].ensures`) are proven pre/postconditions.
|
|
27
|
+
- **Axioms** (in `axioms[]`) are assumed, NOT proven — they represent the trust surface.
|
|
28
|
+
|
|
29
|
+
**Mapping rules:**
|
|
30
|
+
- A claim "covers" a spec entry if the Dafny expression proves the property described by that spec entry
|
|
31
|
+
- One claim can cover multiple spec entries
|
|
32
|
+
- Spec entries with `status: trusted` don't need covering claims
|
|
33
|
+
- Spec entries with `verifiable: false` should be skipped
|
|
34
|
+
- Identify **gaps**: spec entries with `status: verified` that have NO covering claim
|
|
35
|
+
|
|
36
|
+
## Step 3: Write guarantees.json
|
|
37
|
+
|
|
38
|
+
Write `lemmafit/.vibe/guarantees.json` with this format:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"generatedAt": "<ISO timestamp>",
|
|
43
|
+
"guarantees": [
|
|
44
|
+
{
|
|
45
|
+
"specId": "spec-001",
|
|
46
|
+
"requirement": "<title from SPEC.yaml>",
|
|
47
|
+
"status": "proven",
|
|
48
|
+
"coveredBy": [
|
|
49
|
+
{
|
|
50
|
+
"claimId": "inv:<Module>.<Predicate>:<conjunctIndex>",
|
|
51
|
+
"type": "invariant-conjunct",
|
|
52
|
+
"expression": "<the Dafny expression>",
|
|
53
|
+
"lemmaName": "StepPreservesInv"
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
"reasoning": "<why this claim covers this spec entry>"
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"gaps": [
|
|
60
|
+
{
|
|
61
|
+
"specId": "spec-005",
|
|
62
|
+
"requirement": "<title>",
|
|
63
|
+
"reason": "<why no claim covers this>"
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
For `lemmaName`:
|
|
70
|
+
- Invariant conjuncts → `"StepPreservesInv"`
|
|
71
|
+
- Lemma ensures → the lemma's name (e.g. `"UndoReversesLast"`)
|
|
72
|
+
- Function contracts → the function's name
|
|
73
|
+
|
|
74
|
+
## Step 4: Handle multi-lemma requirements
|
|
75
|
+
|
|
76
|
+
If a single requirement is covered by **multiple** lemmas (e.g. an invariant conjunct AND a standalone lemma together prove one requirement), you must write a **new wrapper lemma** in the Dafny source that proves the full requirement in one place. The wrapper lemma should:
|
|
77
|
+
- Have a name that clearly describes the requirement (e.g. `Guarantee_UniqueUpvotes`)
|
|
78
|
+
- Call/use the individual lemmas as needed
|
|
79
|
+
- Have an `ensures` clause that directly expresses the full natural-language requirement
|
|
80
|
+
|
|
81
|
+
This is necessary because claimcheck needs exactly one lemma per requirement to verify faithfulness.
|
|
82
|
+
|
|
83
|
+
Wait for the daemon to re-verify after writing any new lemmas before proceeding.
|
|
84
|
+
|
|
85
|
+
## Step 5: Write claimcheck mapping
|
|
86
|
+
|
|
87
|
+
Write `lemmafit/.vibe/claimcheck-mapping.json` — an array of `{ requirement, lemmaName, file }` objects derived from the guarantees. The `file` field is a **path relative to the mapping file** (i.e. relative to `lemmafit/.vibe/`).
|
|
88
|
+
|
|
89
|
+
```json
|
|
90
|
+
[
|
|
91
|
+
{ "requirement": "Each user can only upvote once", "lemmaName": "Guarantee_UniqueUpvotes", "file": "../dafny/Domain.dfy" }
|
|
92
|
+
]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Each requirement should map to exactly one lemma. If you wrote wrapper lemmas in step 4, use those.
|
|
96
|
+
|
|
97
|
+
## Step 6: Run claimcheck
|
|
98
|
+
|
|
99
|
+
Run `claimcheck-multi` to verify that each lemma faithfully expresses its requirement.
|
|
100
|
+
|
|
101
|
+
**CRITICAL: claimcheck-multi MUST run in the background with output redirected to files.** It spawns `claude -p` as a subprocess, which will hang indefinitely if run in the foreground from a Claude Code session. Always use `&` and redirect stdout/stderr:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
claimcheck-multi -m lemmafit/.vibe/claimcheck-mapping.json -d <appCore> --json --claude-code > lemmafit/.vibe/claimcheck.json 2> lemmafit/.vibe/claimcheck-err.log &
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Replace `<appCore>` with the value from `lemmafit/.vibe/config.json`.
|
|
108
|
+
|
|
109
|
+
No `--dfy` flag needed — `claimcheck-multi` resolves file paths from the `file` field in each mapping entry (relative to the mapping file).
|
|
110
|
+
|
|
111
|
+
Wait for it to complete (poll with `cat lemmafit/.vibe/claimcheck.json` until it contains valid JSON), then read the results.
|
|
112
|
+
|
|
113
|
+
## Step 7: Report and iterate
|
|
114
|
+
|
|
115
|
+
Parse the claimcheck results and report:
|
|
116
|
+
- **Confirmed** — the lemma faithfully expresses the requirement. No action needed.
|
|
117
|
+
- **Disputed** — a discrepancy was found. Show the `discrepancy` text and `weakeningType` (tautology, weakened-postcondition, narrowed-scope, wrong-property).
|
|
118
|
+
- **Error** — lemma not found in source. Check the lemmaName.
|
|
119
|
+
|
|
120
|
+
**If any claims are disputed:** Suggest specific fixes to the Dafny code or the requirement text. If the user agrees, make the fixes, wait for re-verification, and re-run the guarantees process.
|
|
121
|
+
|
|
122
|
+
## Step 8: Ensure all files up to date
|
|
123
|
+
|
|
124
|
+
Once iteration is complete, compare `claimcheck-mapping.json` with `guarantees.json` to ensure they contain equivalent information. If there's a discrepancy, trace back to the Dafny code to find which is most accurate. Adjust the relevant file accordingly and re-run `/guarantees` command. Once confirmed, report that the files are in sync.
|
|
125
|
+
|
|
126
|
+
## Step 9: Generate guarantees.md via the script
|
|
127
|
+
|
|
128
|
+
Do this only after Step 9 confirms that `claimcheck-mapping.json` and `guarantees.json` are in sync.
|
|
129
|
+
|
|
130
|
+
Run the deterministic report generator:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
npx lemmafit-generate-guarantees
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
This reads `lemmafit/.vibe/guarantees.json`, `lemmafit/.vibe/claimcheck.json`, and `SPEC.yaml` and writes `lemmafit/.vibe/guarantees.md`. Do NOT write this file manually — always use the script so the report matches the JSON exactly.
|
|
137
|
+
|
|
138
|
+
Report to the user: "A report of your app's guarantees has been generated in lemmafit/reports/guarantees.md"
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Lemmafit Project
|
|
2
|
+
|
|
3
|
+
This project uses lemmafit to formally verify ALL logic in the codebase. You help users build web apps with mathematically verified state machines, business logic, and any other logic-based code.
|
|
4
|
+
|
|
5
|
+
Before writing code, ask yourself: "Is any part of this code verifiable?" Verifiable means that it contains logic that CAN be written in Dafny. If the answer is "yes," write the code in Dafny. Do not write directly in JavaScript or Typescript, anything that can be verified.
|
|
6
|
+
|
|
7
|
+
## How It Works
|
|
8
|
+
|
|
9
|
+
1. Users describe an app or individual features in natural language
|
|
10
|
+
2. You write Dafny specifications in `dafny/` directory
|
|
11
|
+
3. Verification and compilation happen automatically (you'll see the results)
|
|
12
|
+
4. The React app in `src/` uses the verified API
|
|
13
|
+
|
|
14
|
+
## Key Files
|
|
15
|
+
|
|
16
|
+
- `SPEC.yaml` - Natural language requirements (source of truth for all specs)
|
|
17
|
+
- `lemmafit/dafny/*.dfy` - Dafny specifications (all logic should be written in Dafny)
|
|
18
|
+
- `src/dafny/app.ts` - Auto-generated TypeScript API (do not edit directly)
|
|
19
|
+
- `src/App.tsx` - React app that uses the verified API
|
|
20
|
+
- `lemmafit/.vibe/status.json` - Current verification status
|
|
21
|
+
- `lemmafit/.vibe/logic-surface.json` - Logic interface/API
|
|
22
|
+
- `lemmafit/.vibe/modules.json` - Multi-module configuration (optional, see below)
|
|
23
|
+
|
|
24
|
+
## Multi-Module Projects
|
|
25
|
+
|
|
26
|
+
By default, lemmafit uses a single Dafny module with the Replay kernel pattern (Domain.dfy → app.ts). For projects that need multiple independent verified modules, create `lemmafit/.vibe/modules.json`:
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
[
|
|
30
|
+
{
|
|
31
|
+
"entry": "lemmafit/dafny/Workflow.dfy",
|
|
32
|
+
"appCore": "Workflow",
|
|
33
|
+
"outputName": "Workflow",
|
|
34
|
+
"jsonApi": true
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"entry": "lemmafit/dafny/Validation.dfy",
|
|
38
|
+
"appCore": "Validation",
|
|
39
|
+
"outputName": "Validation",
|
|
40
|
+
"jsonApi": true,
|
|
41
|
+
"nullOptions": true,
|
|
42
|
+
"target": "node"
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
When `modules.json` exists:
|
|
48
|
+
- Each module is compiled independently to `src/dafny/{outputName}.cjs` and `src/dafny/{outputName}.ts`
|
|
49
|
+
- Each module is its own AppCore (no separate AppCore module needed)
|
|
50
|
+
- `jsonApi: true` enables full JSON marshalling (plain types in/out, no Dafny runtime types)
|
|
51
|
+
- `nullOptions: true` maps `Option<T>` to `T | null` at the boundary
|
|
52
|
+
- `target` sets the dafny2js compilation target (default: `"client"`). Valid values: `"client"` (browser/React), `"node"` (Node.js, uses `fs.readFileSync`), `"inline"` (universal, inlines .cjs code), `"deno"` (Deno adapter), `"cloudflare"` (Cloudflare Workers adapter)
|
|
53
|
+
- Modules don't know about each other — write a thin TypeScript glue file to connect them
|
|
54
|
+
- The glue file is unverified but should be minimal and auditable
|
|
55
|
+
- Prefer returning result types with verified error messages over boolean predicates — the UI can display them directly without duplicating logic
|
|
56
|
+
|
|
57
|
+
## Available Skills
|
|
58
|
+
|
|
59
|
+
- `lemmafit-dafny`: Load this skill before writing or editing .dfy files
|
|
60
|
+
- `lemmafit-proofs`: Load this skill before writing or editing lemmas
|
|
61
|
+
- `lemmafit-react-pattern`: Load this skill before writing React
|
|
62
|
+
- `lemmafit-spec`: Load this skill when user asks to add or edit feature, and before writing or editing the spec.yaml file
|
|
63
|
+
|
|
64
|
+
If you try to read any of these files and they are missing, alert the user.
|
|
65
|
+
|
|
66
|
+
## WORKFLOW
|
|
67
|
+
Follow these steps in order every time the user asks for a feature or change that involves any logic.
|
|
68
|
+
|
|
69
|
+
Step-by-step development workflow for building apps and features with lemmafit. Use when the user asks for a new feature, describes functionality, or when spec changes need to be addressed. Covers the full loop from spec.yaml to verified React code.
|
|
70
|
+
|
|
71
|
+
Report in the chat which step you are on as you move through the steps.
|
|
72
|
+
|
|
73
|
+
## Step 0: Check for pending spec changes
|
|
74
|
+
|
|
75
|
+
Read `.vibe/status.json`. If `specQueue` has items, address those first before doing anything else. Each item is a requirement that was added/changed/removed in SPEC.yaml but not yet reflected in Dafny code.
|
|
76
|
+
|
|
77
|
+
## Step 1: Write SPEC.yaml entries
|
|
78
|
+
Load lemmafit-spec skill before writing or editing the spec.yaml
|
|
79
|
+
|
|
80
|
+
Translate the user's request into structured entries in `SPEC.yaml`. A hook runs automatically after you write SPEC.yaml — it diffs your changes and creates a spec queue. You'll see the pending items in the output.
|
|
81
|
+
|
|
82
|
+
## Step 2: Write Dafny specifications
|
|
83
|
+
Load lemmafit-dafny skill before writing or editing any Dafny.
|
|
84
|
+
|
|
85
|
+
Write `.dfy` files in `lemmafit/dafny/` that formalize the verifiable spec entries.
|
|
86
|
+
|
|
87
|
+
A hook runs automatically after you write any `.dfy` file — it verifies and compiles immediately. You must wait for a response from the daemon before moving forward. The response will be one of two:
|
|
88
|
+
- `✓ Verified and compiled` — success, spec queue auto-cleared, wrappers regenerated (`src/dafny/app.ts` or per-module `src/dafny/{name}.ts`)
|
|
89
|
+
- `✗ Verification failed` — fix the errors shown and write the file again
|
|
90
|
+
|
|
91
|
+
Do not move to the next step until verification passes (verified and compiled).
|
|
92
|
+
|
|
93
|
+
## Step 3: Check Dafny against SPEC.yaml
|
|
94
|
+
Always keep SPEC.yaml and Dafny in sync — if you change one, update the other.
|
|
95
|
+
|
|
96
|
+
## Step 4: Write proofs
|
|
97
|
+
Load the lemmafit-proofs skill before writing any lemmas.
|
|
98
|
+
|
|
99
|
+
## Step 5: Run Pre-React Audits
|
|
100
|
+
Load the lemmafit-audits skill before proceeding.
|
|
101
|
+
|
|
102
|
+
Run 2 audits:
|
|
103
|
+
**Proof Strength Audit**: Check the strength of the actual proofs against the specs. Any gaps? Any weak proofs?
|
|
104
|
+
**Logic-in-Js Audit**: Is there any logic that the app or feature will require (for this build phase) that is not being implemented in Dafny?
|
|
105
|
+
|
|
106
|
+
Label each finding as `minor`, `moderate`, or `critical`.
|
|
107
|
+
|
|
108
|
+
Iterate on Steps 4 and 5 until audit returns only minor findings.
|
|
109
|
+
|
|
110
|
+
## Step 6: Write React code
|
|
111
|
+
Load lemmafit-react-pattern skill before writing React code.
|
|
112
|
+
|
|
113
|
+
Only after verification passes. The auto-generated API is at `src/dafny/app.ts` (single-module) or `src/dafny/{name}.ts` (multi-module). Never edit generated files.
|
|
114
|
+
|
|
115
|
+
- Create hooks in `src/hooks/` that wrap `Api.Init`, `Api.Dispatch`, `Api.Present`
|
|
116
|
+
- Create components in `src/components/` that receive data/callbacks via props
|
|
117
|
+
- Keep `App.tsx` as a thin composition root
|
|
118
|
+
|
|
119
|
+
Never re-implement logic in React that already exists in the verified API.
|
|
120
|
+
|
|
121
|
+
## Step 7: Run Post-React Audit
|
|
122
|
+
Load the lemmafit-post-react-audit skill
|
|
123
|
+
|
|
124
|
+
Ensure that effect-free logic is implemented primarity in Dafny rather than directly in JavaScript/TypeScript.
|
|
125
|
+
|
|
126
|
+
## Step 8: Verify guarantees
|
|
127
|
+
|
|
128
|
+
After proofs are solid and React is wired, check that claims actually cover the spec requirements. Ask the user if they want to run `/guarantees` command to generate a report. If they say yes, run the command.
|
|
129
|
+
|
|
130
|
+
## Step 9: Iterate
|
|
131
|
+
|
|
132
|
+
If the user asks for changes, start by editing SPEC.yaml (Step #1), then move through each step again until full loop is complete.
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
abstract module {:compile false} Domain {
|
|
2
|
+
type Model
|
|
3
|
+
type Action
|
|
4
|
+
|
|
5
|
+
ghost predicate Inv(m: Model)
|
|
6
|
+
|
|
7
|
+
function Init(): Model
|
|
8
|
+
function Apply(m: Model, a: Action): Model
|
|
9
|
+
requires Inv(m)
|
|
10
|
+
function Normalize(m: Model): Model
|
|
11
|
+
|
|
12
|
+
lemma InitSatisfiesInv()
|
|
13
|
+
ensures Inv(Init())
|
|
14
|
+
|
|
15
|
+
lemma StepPreservesInv(m: Model, a: Action)
|
|
16
|
+
requires Inv(m)
|
|
17
|
+
ensures Inv(Normalize(Apply(m,a)))
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
abstract module {:compile false} Kernel {
|
|
21
|
+
import D : Domain
|
|
22
|
+
|
|
23
|
+
function Step(m: D.Model, a: D.Action): D.Model
|
|
24
|
+
requires D.Inv(m)
|
|
25
|
+
{
|
|
26
|
+
D.Normalize(D.Apply(m, a))
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function InitHistory(): History {
|
|
30
|
+
History([], D.Init(), [])
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
datatype History =
|
|
34
|
+
History(past: seq<D.Model>, present: D.Model, future: seq<D.Model>)
|
|
35
|
+
|
|
36
|
+
function Do(h: History, a: D.Action): History
|
|
37
|
+
requires D.Inv(h.present)
|
|
38
|
+
{
|
|
39
|
+
History(h.past + [h.present], Step(h.present, a), [])
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Apply action without recording to history (for live preview during drag)
|
|
43
|
+
function Preview(h: History, a: D.Action): History
|
|
44
|
+
requires D.Inv(h.present)
|
|
45
|
+
{
|
|
46
|
+
History(h.past, Step(h.present, a), h.future)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Commit current state, recording baseline to history (for end of drag)
|
|
50
|
+
function CommitFrom(h: History, baseline: D.Model): History {
|
|
51
|
+
History(h.past + [baseline], h.present, [])
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function Undo(h: History): History {
|
|
55
|
+
if |h.past| == 0 then h
|
|
56
|
+
else
|
|
57
|
+
var i := |h.past| - 1;
|
|
58
|
+
History(h.past[..i], h.past[i], [h.present] + h.future)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function Redo(h: History): History {
|
|
62
|
+
if |h.future| == 0 then h
|
|
63
|
+
else
|
|
64
|
+
History(h.past + [h.present], h.future[0], h.future[1..])
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
lemma DoPreservesInv(h: History, a: D.Action)
|
|
68
|
+
requires D.Inv(h.present)
|
|
69
|
+
ensures D.Inv(Do(h, a).present)
|
|
70
|
+
{
|
|
71
|
+
D.StepPreservesInv(h.present, a);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
ghost predicate HistInv(h: History) {
|
|
75
|
+
(forall i | 0 <= i < |h.past| :: D.Inv(h.past[i])) &&
|
|
76
|
+
D.Inv(h.present) &&
|
|
77
|
+
(forall j | 0 <= j < |h.future| :: D.Inv(h.future[j]))
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
lemma InitHistorySatisfiesInv()
|
|
81
|
+
ensures HistInv(InitHistory())
|
|
82
|
+
{
|
|
83
|
+
D.InitSatisfiesInv();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
lemma UndoPreservesHistInv(h: History)
|
|
87
|
+
requires HistInv(h)
|
|
88
|
+
ensures HistInv(Undo(h))
|
|
89
|
+
{
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
lemma RedoPreservesHistInv(h: History)
|
|
93
|
+
requires HistInv(h)
|
|
94
|
+
ensures HistInv(Redo(h))
|
|
95
|
+
{
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
lemma DoPreservesHistInv(h: History, a: D.Action)
|
|
99
|
+
requires HistInv(h)
|
|
100
|
+
ensures HistInv(Do(h, a))
|
|
101
|
+
{
|
|
102
|
+
D.StepPreservesInv(h.present, a);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
lemma PreviewPreservesHistInv(h: History, a: D.Action)
|
|
106
|
+
requires HistInv(h)
|
|
107
|
+
ensures HistInv(Preview(h, a))
|
|
108
|
+
{
|
|
109
|
+
D.StepPreservesInv(h.present, a);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
lemma CommitFromPreservesHistInv(h: History, baseline: D.Model)
|
|
113
|
+
requires HistInv(h)
|
|
114
|
+
requires D.Inv(baseline)
|
|
115
|
+
ensures HistInv(CommitFrom(h, baseline))
|
|
116
|
+
{
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// proxy for linear undo: after a new action, there is no redo branch
|
|
120
|
+
lemma DoHasNoRedoBranch(h: History, a: D.Action)
|
|
121
|
+
requires HistInv(h)
|
|
122
|
+
ensures Redo(Do(h, a)) == Do(h, a)
|
|
123
|
+
{
|
|
124
|
+
}
|
|
125
|
+
// round-tripping properties
|
|
126
|
+
lemma UndoRedoRoundTrip(h: History)
|
|
127
|
+
requires |h.past| > 0
|
|
128
|
+
ensures Redo(Undo(h)) == h
|
|
129
|
+
{
|
|
130
|
+
}
|
|
131
|
+
lemma RedoUndoRoundTrip(h: History)
|
|
132
|
+
requires |h.future| > 0
|
|
133
|
+
ensures Undo(Redo(h)) == h
|
|
134
|
+
{
|
|
135
|
+
}
|
|
136
|
+
// idempotence at boundaries
|
|
137
|
+
lemma UndoAtBeginningIsNoOp(h: History)
|
|
138
|
+
requires |h.past| == 0
|
|
139
|
+
ensures Undo(h) == h
|
|
140
|
+
{
|
|
141
|
+
}
|
|
142
|
+
lemma RedoAtEndIsNoOp(h: History)
|
|
143
|
+
requires |h.future| == 0
|
|
144
|
+
ensures Redo(h) == h
|
|
145
|
+
{
|
|
146
|
+
}
|
|
147
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unix domain socket client for communicating with the lemmafit daemon.
|
|
3
|
+
* Used by verify-hook and spec-hook to request verification/spec processing
|
|
4
|
+
* without polling status.json.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const net = require('net');
|
|
8
|
+
|
|
9
|
+
function requestDaemon(sockPath, message, timeoutMs = 60000) {
|
|
10
|
+
return new Promise((resolve, reject) => {
|
|
11
|
+
const client = net.createConnection(sockPath);
|
|
12
|
+
const chunks = [];
|
|
13
|
+
let settled = false;
|
|
14
|
+
|
|
15
|
+
const timeout = setTimeout(() => {
|
|
16
|
+
if (!settled) {
|
|
17
|
+
settled = true;
|
|
18
|
+
client.destroy();
|
|
19
|
+
reject(new Error(`Timed out after ${timeoutMs}ms waiting for daemon response`));
|
|
20
|
+
}
|
|
21
|
+
}, timeoutMs);
|
|
22
|
+
timeout.unref();
|
|
23
|
+
|
|
24
|
+
client.on('connect', () => {
|
|
25
|
+
client.write(JSON.stringify(message) + '\n');
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
client.on('data', (chunk) => {
|
|
29
|
+
chunks.push(chunk);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
client.on('end', () => {
|
|
33
|
+
if (settled) return;
|
|
34
|
+
settled = true;
|
|
35
|
+
clearTimeout(timeout);
|
|
36
|
+
client.destroy();
|
|
37
|
+
try {
|
|
38
|
+
const data = Buffer.concat(chunks).toString('utf8').trim();
|
|
39
|
+
resolve(JSON.parse(data));
|
|
40
|
+
} catch (err) {
|
|
41
|
+
reject(new Error(`Invalid response from daemon: ${err.message}`));
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
client.on('error', (err) => {
|
|
46
|
+
if (settled) return;
|
|
47
|
+
settled = true;
|
|
48
|
+
clearTimeout(timeout);
|
|
49
|
+
reject(new Error(`Cannot connect to daemon: ${err.message}`));
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
module.exports = { requestDaemon };
|