deepflow 0.1.103 → 0.1.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/bin/install-dynamic-hooks.test.js +461 -0
  2. package/bin/install.js +150 -250
  3. package/bin/lineage-ingest.js +70 -0
  4. package/hooks/df-check-update.js +1 -0
  5. package/hooks/df-command-usage.js +18 -0
  6. package/hooks/df-dashboard-push.js +1 -0
  7. package/hooks/df-execution-history.js +1 -0
  8. package/hooks/df-explore-protocol.js +83 -0
  9. package/hooks/df-explore-protocol.test.js +228 -0
  10. package/hooks/df-hook-event-tags.test.js +127 -0
  11. package/hooks/df-invariant-check.js +1 -0
  12. package/hooks/df-quota-logger.js +1 -0
  13. package/hooks/df-snapshot-guard.js +1 -0
  14. package/hooks/df-spec-lint.js +58 -1
  15. package/hooks/df-spec-lint.test.js +412 -0
  16. package/hooks/df-statusline.js +1 -0
  17. package/hooks/df-subagent-registry.js +1 -0
  18. package/hooks/df-tool-usage.js +13 -3
  19. package/hooks/df-worktree-guard.js +1 -0
  20. package/package.json +1 -1
  21. package/src/commands/df/debate.md +1 -1
  22. package/src/commands/df/eval.md +117 -0
  23. package/src/commands/df/execute.md +1 -1
  24. package/src/commands/df/fix.md +104 -0
  25. package/src/eval/git-memory.js +159 -0
  26. package/src/eval/git-memory.test.js +439 -0
  27. package/src/eval/hypothesis.js +80 -0
  28. package/src/eval/hypothesis.test.js +169 -0
  29. package/src/eval/loop.js +378 -0
  30. package/src/eval/loop.test.js +306 -0
  31. package/src/eval/metric-collector.js +163 -0
  32. package/src/eval/metric-collector.test.js +369 -0
  33. package/src/eval/metric-pivot.js +119 -0
  34. package/src/eval/metric-pivot.test.js +350 -0
  35. package/src/eval/mutator-prompt.js +106 -0
  36. package/src/eval/mutator-prompt.test.js +180 -0
  37. package/templates/config-template.yaml +5 -0
  38. package/templates/eval-fixture-template/config.yaml +39 -0
  39. package/templates/eval-fixture-template/fixture/.deepflow/decisions.md +5 -0
  40. package/templates/eval-fixture-template/fixture/hooks/invariant.js +28 -0
  41. package/templates/eval-fixture-template/fixture/package.json +12 -0
  42. package/templates/eval-fixture-template/fixture/specs/doing-example-task.md +18 -0
  43. package/templates/eval-fixture-template/fixture/src/commands/df/example.md +18 -0
  44. package/templates/eval-fixture-template/fixture/src/config.js +40 -0
  45. package/templates/eval-fixture-template/fixture/src/index.js +19 -0
  46. package/templates/eval-fixture-template/fixture/src/pipeline.js +40 -0
  47. package/templates/eval-fixture-template/fixture/src/skills/example-skill/SKILL.md +32 -0
  48. package/templates/eval-fixture-template/fixture/src/spec-loader.js +35 -0
  49. package/templates/eval-fixture-template/fixture/src/task-runner.js +32 -0
  50. package/templates/eval-fixture-template/fixture/src/verifier.js +37 -0
  51. package/templates/eval-fixture-template/hypotheses.md +14 -0
  52. package/templates/eval-fixture-template/spec.md +34 -0
  53. package/templates/eval-fixture-template/tests/behavior.test.js +69 -0
  54. package/templates/eval-fixture-template/tests/guard.test.js +108 -0
  55. package/templates/eval-fixture-template.test.js +318 -0
  56. package/templates/explore-agent.md +5 -74
  57. package/templates/explore-protocol.md +44 -0
  58. package/templates/spec-template.md +4 -0
@@ -0,0 +1,318 @@
1
+ const { describe, it } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+
6
+ const TEMPLATE_DIR = path.join(__dirname, 'eval-fixture-template');
7
+ const FIXTURE_DIR = path.join(TEMPLATE_DIR, 'fixture');
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // AC-4: Template directory exists with expected structure
11
+ // ---------------------------------------------------------------------------
12
+
13
+ describe('eval-fixture-template structure', () => {
14
+ it('template root directory exists', () => {
15
+ assert.ok(fs.existsSync(TEMPLATE_DIR));
16
+ assert.ok(fs.statSync(TEMPLATE_DIR).isDirectory());
17
+ });
18
+
19
+ it('has benchmark root files: spec.md, config.yaml, hypotheses.md', () => {
20
+ for (const file of ['spec.md', 'config.yaml', 'hypotheses.md']) {
21
+ const filePath = path.join(TEMPLATE_DIR, file);
22
+ assert.ok(fs.existsSync(filePath), `missing ${file}`);
23
+ }
24
+ });
25
+
26
+ it('has tests/ directory with test files', () => {
27
+ const testsDir = path.join(TEMPLATE_DIR, 'tests');
28
+ assert.ok(fs.existsSync(testsDir));
29
+ assert.ok(fs.statSync(testsDir).isDirectory());
30
+
31
+ const testFiles = fs.readdirSync(testsDir);
32
+ assert.ok(testFiles.includes('guard.test.js'), 'missing guard.test.js');
33
+ assert.ok(testFiles.includes('behavior.test.js'), 'missing behavior.test.js');
34
+ });
35
+
36
+ it('has fixture/ directory', () => {
37
+ assert.ok(fs.existsSync(FIXTURE_DIR));
38
+ assert.ok(fs.statSync(FIXTURE_DIR).isDirectory());
39
+ });
40
+ });
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // fixture/ contains 10-15+ skeleton files
44
+ // ---------------------------------------------------------------------------
45
+
46
+ describe('fixture/ skeleton file count', () => {
47
+ it('contains at least 10 files', () => {
48
+ const files = [];
49
+ function walk(dir) {
50
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
51
+ const full = path.join(dir, entry.name);
52
+ if (entry.isDirectory()) walk(full);
53
+ else files.push(full);
54
+ }
55
+ }
56
+ walk(FIXTURE_DIR);
57
+ assert.ok(
58
+ files.length >= 10,
59
+ `expected >= 10 fixture files, got ${files.length}`
60
+ );
61
+ });
62
+
63
+ it('contains expected subdirectories: src/, specs/, hooks/, .deepflow/', () => {
64
+ for (const sub of ['src', 'specs', 'hooks', '.deepflow']) {
65
+ const subPath = path.join(FIXTURE_DIR, sub);
66
+ assert.ok(fs.existsSync(subPath), `missing fixture/${sub}/`);
67
+ assert.ok(fs.statSync(subPath).isDirectory(), `fixture/${sub} is not a directory`);
68
+ }
69
+ });
70
+ });
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // spec.md exists and has content
74
+ // ---------------------------------------------------------------------------
75
+
76
+ describe('spec.md', () => {
77
+ const specPath = path.join(TEMPLATE_DIR, 'spec.md');
78
+
79
+ it('exists and is non-empty', () => {
80
+ assert.ok(fs.existsSync(specPath));
81
+ const content = fs.readFileSync(specPath, 'utf8');
82
+ assert.ok(content.trim().length > 0, 'spec.md is empty');
83
+ });
84
+
85
+ it('contains an Objective section', () => {
86
+ const content = fs.readFileSync(specPath, 'utf8');
87
+ assert.ok(content.includes('## Objective'), 'spec.md missing ## Objective');
88
+ });
89
+
90
+ it('contains a Target Metric section', () => {
91
+ const content = fs.readFileSync(specPath, 'utf8');
92
+ assert.ok(content.includes('## Target Metric'), 'spec.md missing ## Target Metric');
93
+ });
94
+
95
+ it('contains Acceptance Criteria section', () => {
96
+ const content = fs.readFileSync(specPath, 'utf8');
97
+ assert.ok(content.includes('## Acceptance Criteria'), 'spec.md missing ## Acceptance Criteria');
98
+ });
99
+ });
100
+
101
+ // ---------------------------------------------------------------------------
102
+ // config.yaml exists and has valid structure
103
+ // ---------------------------------------------------------------------------
104
+
105
+ describe('config.yaml', () => {
106
+ const configPath = path.join(TEMPLATE_DIR, 'config.yaml');
107
+
108
+ it('exists and is non-empty', () => {
109
+ assert.ok(fs.existsSync(configPath));
110
+ const content = fs.readFileSync(configPath, 'utf8');
111
+ assert.ok(content.trim().length > 0, 'config.yaml is empty');
112
+ });
113
+
114
+ it('contains benchmark name field', () => {
115
+ const content = fs.readFileSync(configPath, 'utf8');
116
+ assert.ok(content.includes('name:'), 'config.yaml missing name field');
117
+ });
118
+
119
+ it('contains metrics section with target', () => {
120
+ const content = fs.readFileSync(configPath, 'utf8');
121
+ assert.ok(content.includes('metrics:'), 'config.yaml missing metrics section');
122
+ assert.ok(content.includes('target:'), 'config.yaml missing target metric');
123
+ });
124
+
125
+ it('contains guard_command', () => {
126
+ const content = fs.readFileSync(configPath, 'utf8');
127
+ assert.ok(content.includes('guard_command:'), 'config.yaml missing guard_command');
128
+ });
129
+
130
+ it('contains fixture section with run_command', () => {
131
+ const content = fs.readFileSync(configPath, 'utf8');
132
+ assert.ok(content.includes('fixture:'), 'config.yaml missing fixture section');
133
+ assert.ok(content.includes('run_command:'), 'config.yaml missing run_command');
134
+ });
135
+
136
+ it('contains loop section with default_iterations', () => {
137
+ const content = fs.readFileSync(configPath, 'utf8');
138
+ assert.ok(content.includes('loop:'), 'config.yaml missing loop section');
139
+ assert.ok(content.includes('default_iterations:'), 'config.yaml missing default_iterations');
140
+ });
141
+ });
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // fixture/package.json has build/test scripts
145
+ // ---------------------------------------------------------------------------
146
+
147
+ describe('fixture/package.json', () => {
148
+ const pkgPath = path.join(FIXTURE_DIR, 'package.json');
149
+
150
+ it('exists and is valid JSON', () => {
151
+ assert.ok(fs.existsSync(pkgPath));
152
+ assert.doesNotThrow(() => JSON.parse(fs.readFileSync(pkgPath, 'utf8')));
153
+ });
154
+
155
+ it('has a test script', () => {
156
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
157
+ assert.ok(pkg.scripts, 'package.json missing scripts');
158
+ assert.equal(typeof pkg.scripts.test, 'string', 'missing test script');
159
+ assert.ok(pkg.scripts.test.length > 0, 'test script is empty');
160
+ });
161
+
162
+ it('has a build script', () => {
163
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
164
+ assert.equal(typeof pkg.scripts.build, 'string', 'missing build script');
165
+ assert.ok(pkg.scripts.build.length > 0, 'build script is empty');
166
+ });
167
+
168
+ it('has a name field', () => {
169
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
170
+ assert.equal(typeof pkg.name, 'string');
171
+ assert.ok(pkg.name.length > 0);
172
+ });
173
+ });
174
+
175
+ // ---------------------------------------------------------------------------
176
+ // Template files are non-empty
177
+ // ---------------------------------------------------------------------------
178
+
179
+ describe('all template files are non-empty', () => {
180
+ const allFiles = [];
181
+ function walk(dir) {
182
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
183
+ const full = path.join(dir, entry.name);
184
+ if (entry.isDirectory()) walk(full);
185
+ else allFiles.push(full);
186
+ }
187
+ }
188
+ walk(TEMPLATE_DIR);
189
+
190
+ for (const filePath of allFiles) {
191
+ const rel = path.relative(TEMPLATE_DIR, filePath);
192
+ it(`${rel} is non-empty`, () => {
193
+ const stat = fs.statSync(filePath);
194
+ assert.ok(stat.size > 0, `${rel} is empty (0 bytes)`);
195
+ });
196
+ }
197
+ });
198
+
199
+ // ---------------------------------------------------------------------------
200
+ // hypotheses.md has seeded hypotheses
201
+ // ---------------------------------------------------------------------------
202
+
203
+ describe('hypotheses.md', () => {
204
+ const hypPath = path.join(TEMPLATE_DIR, 'hypotheses.md');
205
+
206
+ it('exists and is non-empty', () => {
207
+ assert.ok(fs.existsSync(hypPath));
208
+ const content = fs.readFileSync(hypPath, 'utf8');
209
+ assert.ok(content.trim().length > 0);
210
+ });
211
+
212
+ it('contains at least 3 hypothesis entries', () => {
213
+ const content = fs.readFileSync(hypPath, 'utf8');
214
+ // Hypotheses are lines after the --- separator, non-empty
215
+ const afterSeparator = content.split('---').slice(1).join('---');
216
+ const hypotheses = afterSeparator
217
+ .split('\n')
218
+ .map((l) => l.trim())
219
+ .filter((l) => l.length > 0);
220
+ assert.ok(
221
+ hypotheses.length >= 3,
222
+ `expected >= 3 hypotheses, got ${hypotheses.length}`
223
+ );
224
+ });
225
+ });
226
+
227
+ // ---------------------------------------------------------------------------
228
+ // Fixture source files exist and have meaningful content
229
+ // ---------------------------------------------------------------------------
230
+
231
+ describe('fixture source files', () => {
232
+ const expectedSrcFiles = [
233
+ 'src/index.js',
234
+ 'src/config.js',
235
+ 'src/pipeline.js',
236
+ 'src/spec-loader.js',
237
+ 'src/task-runner.js',
238
+ 'src/verifier.js',
239
+ ];
240
+
241
+ for (const rel of expectedSrcFiles) {
242
+ it(`fixture/${rel} exists and is non-empty`, () => {
243
+ const filePath = path.join(FIXTURE_DIR, rel);
244
+ assert.ok(fs.existsSync(filePath), `missing fixture/${rel}`);
245
+ const content = fs.readFileSync(filePath, 'utf8');
246
+ assert.ok(content.trim().length > 0, `fixture/${rel} is empty`);
247
+ });
248
+ }
249
+
250
+ it('fixture/src/skills/example-skill/SKILL.md has YAML frontmatter', () => {
251
+ const skillPath = path.join(FIXTURE_DIR, 'src', 'skills', 'example-skill', 'SKILL.md');
252
+ assert.ok(fs.existsSync(skillPath));
253
+ const content = fs.readFileSync(skillPath, 'utf8');
254
+ assert.ok(content.startsWith('---'), 'SKILL.md should start with YAML frontmatter');
255
+ assert.ok(content.includes('allowed-tools'), 'SKILL.md missing allowed-tools');
256
+ });
257
+
258
+ it('fixture/src/commands/df/example.md exists with frontmatter', () => {
259
+ const cmdPath = path.join(FIXTURE_DIR, 'src', 'commands', 'df', 'example.md');
260
+ assert.ok(fs.existsSync(cmdPath));
261
+ const content = fs.readFileSync(cmdPath, 'utf8');
262
+ assert.ok(content.startsWith('---'), 'example.md should start with YAML frontmatter');
263
+ });
264
+
265
+ it('fixture/hooks/invariant.js exists and is non-empty', () => {
266
+ const hookPath = path.join(FIXTURE_DIR, 'hooks', 'invariant.js');
267
+ assert.ok(fs.existsSync(hookPath));
268
+ const content = fs.readFileSync(hookPath, 'utf8');
269
+ assert.ok(content.trim().length > 0);
270
+ });
271
+
272
+ it('fixture/specs/ contains a doing-*.md file', () => {
273
+ const specFiles = fs.readdirSync(path.join(FIXTURE_DIR, 'specs'));
274
+ const doingSpecs = specFiles.filter(
275
+ (f) => f.startsWith('doing-') && f.endsWith('.md')
276
+ );
277
+ assert.ok(doingSpecs.length >= 1, 'no doing-*.md spec found in fixture/specs/');
278
+ });
279
+
280
+ it('fixture/.deepflow/decisions.md exists', () => {
281
+ const decPath = path.join(FIXTURE_DIR, '.deepflow', 'decisions.md');
282
+ assert.ok(fs.existsSync(decPath));
283
+ const content = fs.readFileSync(decPath, 'utf8');
284
+ assert.ok(content.trim().length > 0);
285
+ });
286
+ });
287
+
288
+ // ---------------------------------------------------------------------------
289
+ // Guard test file is executable / valid node script
290
+ // ---------------------------------------------------------------------------
291
+
292
+ describe('tests/ are valid node scripts', () => {
293
+ it('guard.test.js uses fs and path modules', () => {
294
+ const content = fs.readFileSync(
295
+ path.join(TEMPLATE_DIR, 'tests', 'guard.test.js'),
296
+ 'utf8'
297
+ );
298
+ assert.ok(content.includes("require('fs')") || content.includes("require('node:fs')"));
299
+ assert.ok(content.includes("require('path')") || content.includes("require('node:path')"));
300
+ });
301
+
302
+ it('behavior.test.js uses fs and path modules', () => {
303
+ const content = fs.readFileSync(
304
+ path.join(TEMPLATE_DIR, 'tests', 'behavior.test.js'),
305
+ 'utf8'
306
+ );
307
+ assert.ok(content.includes("require('fs')") || content.includes("require('node:fs')"));
308
+ assert.ok(content.includes("require('path')") || content.includes("require('node:path')"));
309
+ });
310
+
311
+ it('guard.test.js references fixture dir', () => {
312
+ const content = fs.readFileSync(
313
+ path.join(TEMPLATE_DIR, 'tests', 'guard.test.js'),
314
+ 'utf8'
315
+ );
316
+ assert.ok(content.includes('fixture'), 'guard.test.js should reference fixture directory');
317
+ });
318
+ });
@@ -1,4 +1,7 @@
1
- # Explore Agent Pattern
1
+ # Explore Agent — Orchestrator Rules
2
+
3
+ Instructions for commands that **spawn** Explore agents (plan, spec, debate).
4
+ The agent itself receives `explore-protocol.md` automatically via hook.
2
5
 
3
6
  ## Spawn Rules
4
7
 
@@ -12,85 +15,13 @@ Task(subagent_type="Explore", model="haiku", prompt="Find: ...")
12
15
  # Returns final message only; blocks until all complete; no late notifications
13
16
  ```
14
17
 
15
- ## Search Protocol
16
-
17
- Exploration follows three named phases:
18
-
19
- ### DIVERSIFY
20
- - **Goal**: Find ALL potential matches across the codebase quickly
21
- - **Method**: Launch 5–8 parallel tool calls in a single message
22
- - **Tools**: Glob (broad patterns), Grep (regex searches), Read (file content verification)
23
- - **Result**: Narrow down to 2–5 candidate files
24
-
25
- Example: Search for "config" + "settings" + "env" patterns in parallel, not sequentially.
26
-
27
- ### CONVERGE
28
- - **Goal**: Validate matches against the search criteria
29
- - **Method**: Read only the matched files; extract relevant line ranges
30
- - **Result**: Eliminate false positives, confirm relevance
31
-
32
- ### EARLY STOP
33
- - **Goal**: Avoid wasting tokens on exhaustive searches
34
- - **Rule**: Stop as soon as **>= 2 relevant files found** that answer the question
35
- - **Exception**: If searching for a single unique thing (e.g., "the entry point file"), find just 1
36
-
37
18
  ## Prompt Structure
38
19
 
39
20
  ```
40
21
  Find: [specific question]
41
-
42
- Return ONLY:
43
- - filepath:startLine-endLine -- why relevant
44
- - Integration points (if asked)
45
-
46
- DO NOT: read/summarize specs, make recommendations, propose solutions, generate tables, narrate your search process.
47
-
48
- Max response: 500 tokens (configurable via .deepflow/config.yaml explore.max_tokens)
49
22
  ```
50
23
 
51
- ## Examples
52
-
53
- ### GOOD: Parallel search (2 turns total)
54
-
55
- **Turn 1 (DIVERSIFY):**
56
- ```
57
- - Glob: "src/**/*.ts" pattern="config" (search in all TS files)
58
- - Glob: "src/**/*.js" pattern="config" (search in all JS files)
59
- - Grep: pattern="export.*config", type="ts" (find exports)
60
- - Grep: pattern="interface.*Config", type="ts" (find type definitions)
61
- - Grep: pattern="class.*Settings", type="ts" (alternative pattern)
62
- - Read: src/index.ts (verify entry point structure)
63
- ```
64
-
65
- **Turn 2 (CONVERGE):**
66
- Return only confirmed matches:
67
- ```
68
- src/config/app.ts:1-45 -- main config export with environment settings
69
- src/config/types.ts:10-30 -- Config interface definition
70
- src/utils/settings.ts:1-20 -- Settings helper functions
71
- ```
72
-
73
- ### DO NOT: Sequential search (antipattern, 5+ turns)
74
-
75
- ```
76
- Turn 1: Glob for config files
77
- Turn 2: Read the first file
78
- Turn 3: Grep for config patterns
79
- Turn 4: Read results
80
- Turn 5: Another Grep search
81
- ... (narrating each step)
82
- ```
83
-
84
- This pattern wastes tokens and breaks context efficiency.
85
-
86
- ## Fallback
87
-
88
- Search dependency directories **only when not found in app code**:
89
- - `node_modules/` — npm packages
90
- - `vendor/` — vendored dependencies
91
- - `site-packages/` — Python packages
92
-
93
- Fallback instruction: "Check node_modules/ only if target not found in src/ or lib/"
24
+ Do NOT include search instructions in the prompt — the `df-explore-protocol` hook injects `explore-protocol.md` automatically.
94
25
 
95
26
  ## Scope Restrictions
96
27
 
@@ -0,0 +1,44 @@
1
+ # Search Protocol — MANDATORY
2
+
3
+ ## STEP 1: Your first message MUST start with these LSP calls (parallel):
4
+
5
+ ```
6
+ LSP(operation="workspaceSymbol", filePath="{any_file}", line=1, character=1)
7
+ LSP(operation="documentSymbol", filePath="{most_likely_file}", line=1, character=1)
8
+ LSP(operation="findReferences", filePath="{known_symbol_file}", line={symbol_line}, character={symbol_char})
9
+ Grep(pattern="...", path="...")
10
+ Glob(pattern="**/*keyword*")
11
+ ```
12
+
13
+ Replace placeholders with values relevant to the search query. Launch ALL in parallel (one message, 5-8 calls).
14
+
15
+ If LSP returns errors or empty, ignore and use Grep/Glob results.
16
+
17
+ ## STEP 2: CONVERGE on matches
18
+
19
+ - `LSP(operation="findReferences", ...)` on key symbols to trace usage
20
+ - `LSP(operation="documentSymbol", ...)` on matched files for line ranges
21
+ - `Read(offset=N, limit=M)` for only the relevant range — NEVER read full files
22
+
23
+ ## STEP 3: EARLY STOP
24
+
25
+ Stop as soon as >= 2 relevant files answer the question.
26
+
27
+ ---
28
+
29
+ Antipattern — NEVER do this:
30
+ ```
31
+ Turn 1: Glob → Turn 2: Read full file → Turn 3: Grep → Turn 4: Read → Turn 5: Grep
32
+ ```
33
+
34
+ Fallback: search `node_modules/`/`vendor/` ONLY when not found in app code.
35
+
36
+ ---
37
+
38
+ ## OUTPUT FORMAT — your ENTIRE response MUST be ONLY these lines:
39
+
40
+ ```
41
+ filepath:startLine-endLine -- why relevant
42
+ ```
43
+
44
+ Nothing else. No narration. No headers. No tables. No explanations. Max 500 tokens.
@@ -1,3 +1,7 @@
1
+ ---
2
+ # derives-from: done-{parent-spec-name} # optional: links this spec to a parent for lineage/rework tracking
3
+ ---
4
+
1
5
  # {Name}
2
6
 
3
7
  ## Objective