deepflow 0.1.103 → 0.1.105
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install-dynamic-hooks.test.js +461 -0
- package/bin/install.js +171 -250
- package/bin/install.test.js +205 -0
- package/bin/lineage-ingest.js +70 -0
- package/hooks/df-check-update.js +1 -0
- package/hooks/df-command-usage.js +18 -0
- package/hooks/df-dashboard-push.js +5 -4
- package/hooks/df-dashboard-push.test.js +256 -0
- package/hooks/df-execution-history.js +1 -0
- package/hooks/df-explore-protocol.js +83 -0
- package/hooks/df-explore-protocol.test.js +228 -0
- package/hooks/df-hook-event-tags.test.js +127 -0
- package/hooks/df-invariant-check.js +4 -3
- package/hooks/df-invariant-check.test.js +141 -0
- package/hooks/df-quota-logger.js +12 -23
- package/hooks/df-quota-logger.test.js +324 -0
- package/hooks/df-snapshot-guard.js +1 -0
- package/hooks/df-spec-lint.js +58 -1
- package/hooks/df-spec-lint.test.js +412 -0
- package/hooks/df-statusline.js +1 -0
- package/hooks/df-subagent-registry.js +1 -0
- package/hooks/df-tool-usage.js +13 -3
- package/hooks/df-worktree-guard.js +1 -0
- package/package.json +1 -1
- package/src/commands/df/debate.md +1 -1
- package/src/commands/df/eval.md +117 -0
- package/src/commands/df/execute.md +1 -1
- package/src/commands/df/fix.md +104 -0
- package/src/eval/git-memory.js +159 -0
- package/src/eval/git-memory.test.js +439 -0
- package/src/eval/hypothesis.js +80 -0
- package/src/eval/hypothesis.test.js +169 -0
- package/src/eval/loop.js +378 -0
- package/src/eval/loop.test.js +306 -0
- package/src/eval/metric-collector.js +163 -0
- package/src/eval/metric-collector.test.js +369 -0
- package/src/eval/metric-pivot.js +119 -0
- package/src/eval/metric-pivot.test.js +350 -0
- package/src/eval/mutator-prompt.js +106 -0
- package/src/eval/mutator-prompt.test.js +180 -0
- package/templates/config-template.yaml +5 -6
- package/templates/eval-fixture-template/config.yaml +39 -0
- package/templates/eval-fixture-template/fixture/.deepflow/decisions.md +5 -0
- package/templates/eval-fixture-template/fixture/hooks/invariant.js +28 -0
- package/templates/eval-fixture-template/fixture/package.json +12 -0
- package/templates/eval-fixture-template/fixture/specs/doing-example-task.md +18 -0
- package/templates/eval-fixture-template/fixture/src/commands/df/example.md +18 -0
- package/templates/eval-fixture-template/fixture/src/config.js +40 -0
- package/templates/eval-fixture-template/fixture/src/index.js +19 -0
- package/templates/eval-fixture-template/fixture/src/pipeline.js +40 -0
- package/templates/eval-fixture-template/fixture/src/skills/example-skill/SKILL.md +32 -0
- package/templates/eval-fixture-template/fixture/src/spec-loader.js +35 -0
- package/templates/eval-fixture-template/fixture/src/task-runner.js +32 -0
- package/templates/eval-fixture-template/fixture/src/verifier.js +37 -0
- package/templates/eval-fixture-template/hypotheses.md +14 -0
- package/templates/eval-fixture-template/spec.md +34 -0
- package/templates/eval-fixture-template/tests/behavior.test.js +69 -0
- package/templates/eval-fixture-template/tests/guard.test.js +108 -0
- package/templates/eval-fixture-template.test.js +318 -0
- package/templates/explore-agent.md +5 -74
- package/templates/explore-protocol.md +44 -0
- package/templates/spec-template.md +4 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: df:fix
|
|
3
|
+
description: Create a new spec derived from a completed spec to address issues, regressions, or unmet acceptance criteria
|
|
4
|
+
allowed-tools: [Read, Write, AskUserQuestion]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# /df:fix {done-spec-name} — Create Fix Spec from Completed Spec
|
|
8
|
+
|
|
9
|
+
## Purpose
|
|
10
|
+
|
|
11
|
+
Creates a new spec file pre-populated with lineage from a `done-*` spec. Use when a completed feature has regressions, unmet ACs, or needs targeted fixes without reopening the original spec.
|
|
12
|
+
|
|
13
|
+
## Behavior
|
|
14
|
+
|
|
15
|
+
### 1. VALIDATE ARGUMENT
|
|
16
|
+
|
|
17
|
+
The command receives one argument: `{done-spec-name}` (e.g., `done-auth`).
|
|
18
|
+
|
|
19
|
+
If no argument is provided, ask:
|
|
20
|
+
```
|
|
21
|
+
Which completed spec needs a fix? (e.g., done-auth)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### 2. READ PARENT SPEC
|
|
25
|
+
|
|
26
|
+
Read `specs/{done-spec-name}.md`.
|
|
27
|
+
|
|
28
|
+
If the file does not exist, show:
|
|
29
|
+
```
|
|
30
|
+
Error: specs/{done-spec-name}.md not found.
|
|
31
|
+
Make sure the spec exists and uses the done-* prefix.
|
|
32
|
+
```
|
|
33
|
+
Then stop.
|
|
34
|
+
|
|
35
|
+
Extract from the parent spec:
|
|
36
|
+
- **Objective** (from `## Objective` section)
|
|
37
|
+
- **Acceptance Criteria** (all items from `## Acceptance Criteria` section)
|
|
38
|
+
|
|
39
|
+
### 3. ASK WHAT NEEDS FIXING
|
|
40
|
+
|
|
41
|
+
Use `AskUserQuestion` to ask (max 4 questions per call):
|
|
42
|
+
|
|
43
|
+
1. What is broken or not working as expected?
|
|
44
|
+
2. Which acceptance criteria from the original spec are failing or incomplete? (show the extracted ACs as reference)
|
|
45
|
+
3. Any new constraints or scope boundaries for this fix?
|
|
46
|
+
|
|
47
|
+
### 4. CREATE FIX SPEC
|
|
48
|
+
|
|
49
|
+
Determine a short name for the fix spec. Default: `fix-{done-spec-name-without-done-prefix}` (e.g., `fix-auth`).
|
|
50
|
+
|
|
51
|
+
Create `specs/{fix-name}.md`:
|
|
52
|
+
|
|
53
|
+
```markdown
|
|
54
|
+
---
|
|
55
|
+
derives-from: {done-spec-name}
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
# Fix: {Title derived from done-spec-name}
|
|
59
|
+
|
|
60
|
+
## Objective
|
|
61
|
+
|
|
62
|
+
Fix issues in `{done-spec-name}`: {one sentence summarizing what needs to be fixed, from user input}
|
|
63
|
+
|
|
64
|
+
## Requirements
|
|
65
|
+
|
|
66
|
+
- **REQ-1**: [Requirement based on user-described issue]
|
|
67
|
+
|
|
68
|
+
## Constraints
|
|
69
|
+
|
|
70
|
+
- Scope limited to fixing regressions/gaps from `{done-spec-name}`
|
|
71
|
+
- Must not break passing ACs from the parent spec
|
|
72
|
+
|
|
73
|
+
## Acceptance Criteria
|
|
74
|
+
|
|
75
|
+
<!-- Failing ACs carried over from parent spec -->
|
|
76
|
+
{carried-over failing ACs as unchecked items}
|
|
77
|
+
|
|
78
|
+
<!-- New ACs for this fix -->
|
|
79
|
+
- [ ] {new criterion from user input, if any}
|
|
80
|
+
|
|
81
|
+
## Technical Notes
|
|
82
|
+
|
|
83
|
+
Parent spec: `specs/{done-spec-name}.md`
|
|
84
|
+
Parent objective: {parent objective text}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 5. CONFIRM
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
Created specs/{fix-name}.md
|
|
91
|
+
|
|
92
|
+
derives-from: {done-spec-name}
|
|
93
|
+
Carried over {N} ACs from parent spec
|
|
94
|
+
|
|
95
|
+
Next: Run /df:plan {fix-name} to generate fix tasks
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Rules
|
|
99
|
+
|
|
100
|
+
- Always set `derives-from` in the frontmatter — this is the lineage anchor
|
|
101
|
+
- Carry over only ACs that are failing or unverified; do not duplicate passing ones
|
|
102
|
+
- Keep fix specs narrowly scoped — no scope creep beyond the stated issue
|
|
103
|
+
- Do not reopen or modify the parent `done-*` spec
|
|
104
|
+
- Fix spec name must start with `fix-` by default; user may override
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { execSync } = require('child_process');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Formats the commit message for an experiment commit.
|
|
7
|
+
* Format: experiment({skillName}): {hypothesis} | {target}={value} delta={delta}% {status} | {secondaries}
|
|
8
|
+
*/
|
|
9
|
+
function formatCommitMessage({ skillName, hypothesis, target, value, delta, status, secondaries }) {
|
|
10
|
+
const secondariesStr = secondaries != null ? String(secondaries) : '';
|
|
11
|
+
return `experiment(${skillName}): ${hypothesis} | ${target}=${value} delta=${delta}% ${status} | ${secondariesStr}`;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Commits all staged/unstaged changes as an experiment commit.
|
|
16
|
+
* AC-10, AC-13: Each experiment gets exactly one commit before verification.
|
|
17
|
+
*
|
|
18
|
+
* @param {object} opts
|
|
19
|
+
* @param {string} opts.cwd - Working directory (git repo root)
|
|
20
|
+
* @param {string} opts.skillName - Skill being evaluated
|
|
21
|
+
* @param {string} opts.hypothesis - Short hypothesis string
|
|
22
|
+
* @param {string} opts.target - Primary metric name
|
|
23
|
+
* @param {string|number} opts.value - Primary metric value
|
|
24
|
+
* @param {string|number} opts.delta - Delta percentage (numeric, sign included)
|
|
25
|
+
* @param {string} opts.status - "pass" | "fail" | "inconclusive"
|
|
26
|
+
* @param {string} [opts.secondaries] - Secondary metrics string
|
|
27
|
+
* @returns {string} The commit hash (short)
|
|
28
|
+
*/
|
|
29
|
+
function commitExperiment({ cwd, skillName, hypothesis, target, value, delta, status, secondaries }) {
|
|
30
|
+
const message = formatCommitMessage({ skillName, hypothesis, target, value, delta, status, secondaries });
|
|
31
|
+
|
|
32
|
+
// Stage all changes so the commit captures the experiment state
|
|
33
|
+
execSync('git add -A', { cwd, stdio: 'pipe' });
|
|
34
|
+
execSync(`git commit -m ${JSON.stringify(message)}`, { cwd, stdio: 'pipe' });
|
|
35
|
+
|
|
36
|
+
const hash = execSync('git rev-parse --short HEAD', { cwd, stdio: 'pipe' }).toString().trim();
|
|
37
|
+
return hash;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Reverts the HEAD commit using `git revert --no-edit`.
|
|
42
|
+
* AC-7: Keeps failed experiment in history (no reset/amend).
|
|
43
|
+
*
|
|
44
|
+
* @param {object} opts
|
|
45
|
+
* @param {string} opts.cwd - Working directory (git repo root)
|
|
46
|
+
* @returns {string} The revert commit hash (short)
|
|
47
|
+
*/
|
|
48
|
+
function revertExperiment({ cwd }) {
|
|
49
|
+
execSync('git revert HEAD --no-edit', { cwd, stdio: 'pipe' });
|
|
50
|
+
const hash = execSync('git rev-parse --short HEAD', { cwd, stdio: 'pipe' }).toString().trim();
|
|
51
|
+
return hash;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Parses a single commit subject line into a structured experiment record.
|
|
56
|
+
* Returns null if the line does not match the experiment format.
|
|
57
|
+
*
|
|
58
|
+
* @param {string} hash
|
|
59
|
+
* @param {string} subject
|
|
60
|
+
* @returns {object|null}
|
|
61
|
+
*/
|
|
62
|
+
function parseExperimentLine(hash, subject) {
|
|
63
|
+
// Pattern: experiment({skillName}): {hypothesis} | {target}={value} delta={delta}% {status} | {secondaries}
|
|
64
|
+
const outerMatch = subject.match(/^experiment\(([^)]+)\):\s*(.+?)\s*\|\s*(.+?)\s*\|\s*(.*)$/);
|
|
65
|
+
if (!outerMatch) return null;
|
|
66
|
+
|
|
67
|
+
const [, parsedSkillName, hypothesis, metricsPart, secondaries] = outerMatch;
|
|
68
|
+
|
|
69
|
+
// Parse metrics: {target}={value} delta={delta}% {status}
|
|
70
|
+
const metricsMatch = metricsPart.match(/^(\S+)=(\S+)\s+delta=([-+]?[\d.]+)%\s+(\S+)$/);
|
|
71
|
+
if (!metricsMatch) return null;
|
|
72
|
+
|
|
73
|
+
const [, target, value, delta, status] = metricsMatch;
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
hash,
|
|
77
|
+
skillName: parsedSkillName,
|
|
78
|
+
hypothesis,
|
|
79
|
+
target,
|
|
80
|
+
value,
|
|
81
|
+
delta: parseFloat(delta),
|
|
82
|
+
status,
|
|
83
|
+
secondaries: secondaries.trim(),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Queries git log for experiment commits matching a given skill.
|
|
89
|
+
* AC-10: Uses `git log --grep` to retrieve complete experiment history.
|
|
90
|
+
*
|
|
91
|
+
* @param {object} opts
|
|
92
|
+
* @param {string} opts.cwd - Working directory (git repo root)
|
|
93
|
+
* @param {string} [opts.skillName] - If provided, filters by experiment({skillName}):
|
|
94
|
+
* @returns {Array<{hash, skillName, hypothesis, target, value, delta, status, secondaries}>}
|
|
95
|
+
*/
|
|
96
|
+
function queryExperiments({ cwd, skillName }) {
|
|
97
|
+
const grepPattern = skillName
|
|
98
|
+
? `experiment(${skillName}):`
|
|
99
|
+
: 'experiment(';
|
|
100
|
+
|
|
101
|
+
let output;
|
|
102
|
+
try {
|
|
103
|
+
output = execSync(
|
|
104
|
+
`git log --grep=${JSON.stringify(grepPattern)} --format="%H %s"`,
|
|
105
|
+
{ cwd, stdio: 'pipe' }
|
|
106
|
+
).toString().trim();
|
|
107
|
+
} catch {
|
|
108
|
+
return [];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (!output) return [];
|
|
112
|
+
|
|
113
|
+
const results = [];
|
|
114
|
+
for (const line of output.split('\n')) {
|
|
115
|
+
const spaceIdx = line.indexOf(' ');
|
|
116
|
+
if (spaceIdx === -1) continue;
|
|
117
|
+
const hash = line.slice(0, spaceIdx);
|
|
118
|
+
const subject = line.slice(spaceIdx + 1);
|
|
119
|
+
const parsed = parseExperimentLine(hash, subject);
|
|
120
|
+
if (parsed) results.push(parsed);
|
|
121
|
+
}
|
|
122
|
+
return results;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Returns a formatted history string suitable for inclusion in a mutator prompt.
|
|
127
|
+
* AC-10: Provides the complete experiment history for a skill.
|
|
128
|
+
*
|
|
129
|
+
* @param {object} opts
|
|
130
|
+
* @param {string} opts.cwd - Working directory (git repo root)
|
|
131
|
+
* @param {string} [opts.skillName] - Filter by skill name
|
|
132
|
+
* @param {number} [opts.maxEntries=20] - Maximum number of entries to return
|
|
133
|
+
* @returns {string} Formatted history or "(no experiment history)" if empty
|
|
134
|
+
*/
|
|
135
|
+
function getExperimentHistory({ cwd, skillName, maxEntries = 20 }) {
|
|
136
|
+
const experiments = queryExperiments({ cwd, skillName });
|
|
137
|
+
const entries = experiments.slice(0, maxEntries);
|
|
138
|
+
|
|
139
|
+
if (entries.length === 0) {
|
|
140
|
+
return '(no experiment history)';
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const lines = entries.map((e) => {
|
|
144
|
+
const secondary = e.secondaries ? ` | ${e.secondaries}` : '';
|
|
145
|
+
return `[${e.hash}] ${e.skillName}: ${e.hypothesis} => ${e.target}=${e.value} delta=${e.delta}% ${e.status}${secondary}`;
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
return lines.join('\n');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
module.exports = {
|
|
152
|
+
commitExperiment,
|
|
153
|
+
revertExperiment,
|
|
154
|
+
queryExperiments,
|
|
155
|
+
getExperimentHistory,
|
|
156
|
+
// exported for testing
|
|
157
|
+
formatCommitMessage,
|
|
158
|
+
parseExperimentLine,
|
|
159
|
+
};
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { describe, it, before, after } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const os = require('os');
|
|
8
|
+
const { execSync } = require('child_process');
|
|
9
|
+
|
|
10
|
+
const {
|
|
11
|
+
commitExperiment,
|
|
12
|
+
revertExperiment,
|
|
13
|
+
queryExperiments,
|
|
14
|
+
getExperimentHistory,
|
|
15
|
+
formatCommitMessage,
|
|
16
|
+
parseExperimentLine,
|
|
17
|
+
} = require('./git-memory.js');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Creates a temporary git repo with an initial commit.
|
|
21
|
+
* Returns the directory path.
|
|
22
|
+
*/
|
|
23
|
+
function createTempRepo() {
|
|
24
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-memory-test-'));
|
|
25
|
+
execSync('git init', { cwd: dir, stdio: 'pipe' });
|
|
26
|
+
execSync('git config user.email "test@test.com"', { cwd: dir, stdio: 'pipe' });
|
|
27
|
+
execSync('git config user.name "Test"', { cwd: dir, stdio: 'pipe' });
|
|
28
|
+
// Initial commit so we have a HEAD
|
|
29
|
+
fs.writeFileSync(path.join(dir, 'README.md'), '# test repo\n');
|
|
30
|
+
execSync('git add -A && git commit -m "initial commit"', { cwd: dir, stdio: 'pipe' });
|
|
31
|
+
return dir;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function cleanupRepo(dir) {
|
|
35
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// --- Unit tests for pure functions ---
|
|
39
|
+
|
|
40
|
+
describe('formatCommitMessage', () => {
|
|
41
|
+
it('formats a complete experiment commit message', () => {
|
|
42
|
+
const msg = formatCommitMessage({
|
|
43
|
+
skillName: 'browse-fetch',
|
|
44
|
+
hypothesis: 'reduce timeout to 5s',
|
|
45
|
+
target: 'latency',
|
|
46
|
+
value: '4.2s',
|
|
47
|
+
delta: '-16',
|
|
48
|
+
status: 'pass',
|
|
49
|
+
secondaries: 'accuracy=98%',
|
|
50
|
+
});
|
|
51
|
+
assert.strictEqual(
|
|
52
|
+
msg,
|
|
53
|
+
'experiment(browse-fetch): reduce timeout to 5s | latency=4.2s delta=-16% pass | accuracy=98%'
|
|
54
|
+
);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('handles missing secondaries as empty string', () => {
|
|
58
|
+
const msg = formatCommitMessage({
|
|
59
|
+
skillName: 'skill-a',
|
|
60
|
+
hypothesis: 'test hyp',
|
|
61
|
+
target: 'speed',
|
|
62
|
+
value: '10',
|
|
63
|
+
delta: '+5',
|
|
64
|
+
status: 'fail',
|
|
65
|
+
secondaries: undefined,
|
|
66
|
+
});
|
|
67
|
+
assert.ok(msg.endsWith('| '), `Expected message to end with "| " but got: ${msg}`);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('handles numeric value and delta', () => {
|
|
71
|
+
const msg = formatCommitMessage({
|
|
72
|
+
skillName: 'x',
|
|
73
|
+
hypothesis: 'h',
|
|
74
|
+
target: 't',
|
|
75
|
+
value: 42,
|
|
76
|
+
delta: -3.5,
|
|
77
|
+
status: 'pass',
|
|
78
|
+
secondaries: null,
|
|
79
|
+
});
|
|
80
|
+
assert.ok(msg.includes('t=42'));
|
|
81
|
+
assert.ok(msg.includes('delta=-3.5%'));
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe('parseExperimentLine', () => {
|
|
86
|
+
it('parses a well-formed experiment subject', () => {
|
|
87
|
+
const subject = 'experiment(browse-fetch): reduce timeout | latency=4.2s delta=-16% pass | accuracy=98%';
|
|
88
|
+
const result = parseExperimentLine('abc123', subject);
|
|
89
|
+
assert.deepStrictEqual(result, {
|
|
90
|
+
hash: 'abc123',
|
|
91
|
+
skillName: 'browse-fetch',
|
|
92
|
+
hypothesis: 'reduce timeout',
|
|
93
|
+
target: 'latency',
|
|
94
|
+
value: '4.2s',
|
|
95
|
+
delta: -16,
|
|
96
|
+
status: 'pass',
|
|
97
|
+
secondaries: 'accuracy=98%',
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('returns null for non-experiment commits', () => {
|
|
102
|
+
assert.strictEqual(parseExperimentLine('abc', 'feat(core): add feature'), null);
|
|
103
|
+
assert.strictEqual(parseExperimentLine('abc', 'random text'), null);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('returns null for malformed metrics section', () => {
|
|
107
|
+
const subject = 'experiment(x): hyp | bad-metrics | sec';
|
|
108
|
+
assert.strictEqual(parseExperimentLine('abc', subject), null);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('parses positive delta', () => {
|
|
112
|
+
const subject = 'experiment(s): h | metric=100 delta=+12.5% pass | ';
|
|
113
|
+
const result = parseExperimentLine('def', subject);
|
|
114
|
+
assert.strictEqual(result.delta, 12.5);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('handles empty secondaries', () => {
|
|
118
|
+
const subject = 'experiment(s): h | m=1 delta=0% inconclusive | ';
|
|
119
|
+
const result = parseExperimentLine('ghi', subject);
|
|
120
|
+
assert.strictEqual(result.secondaries, '');
|
|
121
|
+
assert.strictEqual(result.status, 'inconclusive');
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// --- Integration tests with temp git repos ---
|
|
126
|
+
|
|
127
|
+
describe('commitExperiment', () => {
|
|
128
|
+
let cwd;
|
|
129
|
+
|
|
130
|
+
before(() => {
|
|
131
|
+
cwd = createTempRepo();
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
after(() => {
|
|
135
|
+
cleanupRepo(cwd);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('creates a commit with correctly formatted message (AC-10)', () => {
|
|
139
|
+
// Create a file change to commit
|
|
140
|
+
fs.writeFileSync(path.join(cwd, 'experiment1.txt'), 'trial 1\n');
|
|
141
|
+
|
|
142
|
+
const hash = commitExperiment({
|
|
143
|
+
cwd,
|
|
144
|
+
skillName: 'browse-fetch',
|
|
145
|
+
hypothesis: 'reduce timeout to 5s',
|
|
146
|
+
target: 'latency',
|
|
147
|
+
value: '4.2s',
|
|
148
|
+
delta: '-16',
|
|
149
|
+
status: 'pass',
|
|
150
|
+
secondaries: 'accuracy=98%',
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
assert.ok(typeof hash === 'string');
|
|
154
|
+
assert.ok(hash.length >= 7, `Hash should be at least 7 chars, got: ${hash}`);
|
|
155
|
+
|
|
156
|
+
// Verify commit message format
|
|
157
|
+
const subject = execSync('git log -1 --format=%s', { cwd, stdio: 'pipe' }).toString().trim();
|
|
158
|
+
assert.strictEqual(
|
|
159
|
+
subject,
|
|
160
|
+
'experiment(browse-fetch): reduce timeout to 5s | latency=4.2s delta=-16% pass | accuracy=98%'
|
|
161
|
+
);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('stages all changes before committing', () => {
|
|
165
|
+
// Create an untracked file — commitExperiment should stage it
|
|
166
|
+
fs.writeFileSync(path.join(cwd, 'untracked.txt'), 'new file\n');
|
|
167
|
+
|
|
168
|
+
commitExperiment({
|
|
169
|
+
cwd,
|
|
170
|
+
skillName: 'test-skill',
|
|
171
|
+
hypothesis: 'auto-stage',
|
|
172
|
+
target: 'coverage',
|
|
173
|
+
value: '80',
|
|
174
|
+
delta: '+2',
|
|
175
|
+
status: 'pass',
|
|
176
|
+
secondaries: '',
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// The file should be in the commit
|
|
180
|
+
const files = execSync('git diff-tree --no-commit-id --name-only -r HEAD', { cwd, stdio: 'pipe' })
|
|
181
|
+
.toString().trim();
|
|
182
|
+
assert.ok(files.includes('untracked.txt'));
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
describe('revertExperiment (AC-7)', () => {
|
|
187
|
+
let cwd;
|
|
188
|
+
|
|
189
|
+
before(() => {
|
|
190
|
+
cwd = createTempRepo();
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
after(() => {
|
|
194
|
+
cleanupRepo(cwd);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it('uses git revert (not reset) — preserves experiment commit in history', () => {
|
|
198
|
+
// Make an experiment commit
|
|
199
|
+
fs.writeFileSync(path.join(cwd, 'exp.txt'), 'experiment data\n');
|
|
200
|
+
commitExperiment({
|
|
201
|
+
cwd,
|
|
202
|
+
skillName: 'skill-a',
|
|
203
|
+
hypothesis: 'bad idea',
|
|
204
|
+
target: 'speed',
|
|
205
|
+
value: '10',
|
|
206
|
+
delta: '-5',
|
|
207
|
+
status: 'fail',
|
|
208
|
+
secondaries: '',
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const experimentSubject = execSync('git log -1 --format=%s', { cwd, stdio: 'pipe' }).toString().trim();
|
|
212
|
+
|
|
213
|
+
// Revert
|
|
214
|
+
const revertHash = revertExperiment({ cwd });
|
|
215
|
+
assert.ok(typeof revertHash === 'string');
|
|
216
|
+
|
|
217
|
+
// Verify revert commit message contains "Revert"
|
|
218
|
+
const revertSubject = execSync('git log -1 --format=%s', { cwd, stdio: 'pipe' }).toString().trim();
|
|
219
|
+
assert.ok(revertSubject.startsWith('Revert'), `Expected revert commit to start with "Revert", got: ${revertSubject}`);
|
|
220
|
+
|
|
221
|
+
// The original experiment commit should still be in the log (not erased)
|
|
222
|
+
const fullLog = execSync('git log --format=%s', { cwd, stdio: 'pipe' }).toString();
|
|
223
|
+
assert.ok(fullLog.includes(experimentSubject), 'Original experiment commit should remain in history after revert');
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
it('reverted file content matches pre-experiment state', () => {
|
|
227
|
+
// Write a file, commit as experiment, then revert
|
|
228
|
+
fs.writeFileSync(path.join(cwd, 'state.txt'), 'before\n');
|
|
229
|
+
execSync('git add -A && git commit -m "baseline"', { cwd, stdio: 'pipe' });
|
|
230
|
+
|
|
231
|
+
fs.writeFileSync(path.join(cwd, 'state.txt'), 'after experiment\n');
|
|
232
|
+
commitExperiment({
|
|
233
|
+
cwd,
|
|
234
|
+
skillName: 's',
|
|
235
|
+
hypothesis: 'h',
|
|
236
|
+
target: 't',
|
|
237
|
+
value: '1',
|
|
238
|
+
delta: '0',
|
|
239
|
+
status: 'fail',
|
|
240
|
+
secondaries: '',
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
revertExperiment({ cwd });
|
|
244
|
+
|
|
245
|
+
const content = fs.readFileSync(path.join(cwd, 'state.txt'), 'utf-8');
|
|
246
|
+
assert.strictEqual(content, 'before\n');
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
describe('queryExperiments', () => {
|
|
251
|
+
let cwd;
|
|
252
|
+
|
|
253
|
+
before(() => {
|
|
254
|
+
cwd = createTempRepo();
|
|
255
|
+
|
|
256
|
+
// Create multiple experiment commits for different skills
|
|
257
|
+
fs.writeFileSync(path.join(cwd, 'a.txt'), '1');
|
|
258
|
+
commitExperiment({
|
|
259
|
+
cwd, skillName: 'skill-a', hypothesis: 'hyp-a1',
|
|
260
|
+
target: 'speed', value: '100', delta: '+10', status: 'pass', secondaries: 'mem=50MB',
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
fs.writeFileSync(path.join(cwd, 'b.txt'), '2');
|
|
264
|
+
commitExperiment({
|
|
265
|
+
cwd, skillName: 'skill-b', hypothesis: 'hyp-b1',
|
|
266
|
+
target: 'accuracy', value: '95', delta: '-2', status: 'fail', secondaries: '',
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
fs.writeFileSync(path.join(cwd, 'c.txt'), '3');
|
|
270
|
+
commitExperiment({
|
|
271
|
+
cwd, skillName: 'skill-a', hypothesis: 'hyp-a2',
|
|
272
|
+
target: 'speed', value: '120', delta: '+20', status: 'pass', secondaries: '',
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
after(() => {
|
|
277
|
+
cleanupRepo(cwd);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it('returns all experiments when no skillName filter', () => {
|
|
281
|
+
const results = queryExperiments({ cwd });
|
|
282
|
+
assert.strictEqual(results.length, 3);
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it('filters by skillName', () => {
|
|
286
|
+
const results = queryExperiments({ cwd, skillName: 'skill-a' });
|
|
287
|
+
assert.strictEqual(results.length, 2);
|
|
288
|
+
for (const r of results) {
|
|
289
|
+
assert.strictEqual(r.skillName, 'skill-a');
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it('returns empty array for unknown skill', () => {
|
|
294
|
+
const results = queryExperiments({ cwd, skillName: 'nonexistent' });
|
|
295
|
+
assert.deepStrictEqual(results, []);
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
it('parses commit messages into structured objects with correct fields', () => {
|
|
299
|
+
const results = queryExperiments({ cwd, skillName: 'skill-a' });
|
|
300
|
+
// git log returns newest first
|
|
301
|
+
const newest = results[0];
|
|
302
|
+
assert.strictEqual(newest.hypothesis, 'hyp-a2');
|
|
303
|
+
assert.strictEqual(newest.target, 'speed');
|
|
304
|
+
assert.strictEqual(newest.value, '120');
|
|
305
|
+
assert.strictEqual(newest.delta, 20);
|
|
306
|
+
assert.strictEqual(newest.status, 'pass');
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
it('returns empty array in repo with no experiment commits', () => {
|
|
310
|
+
const emptyRepo = createTempRepo();
|
|
311
|
+
try {
|
|
312
|
+
const results = queryExperiments({ cwd: emptyRepo, skillName: 'anything' });
|
|
313
|
+
assert.deepStrictEqual(results, []);
|
|
314
|
+
} finally {
|
|
315
|
+
cleanupRepo(emptyRepo);
|
|
316
|
+
}
|
|
317
|
+
});
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
describe('getExperimentHistory', () => {
|
|
321
|
+
let cwd;
|
|
322
|
+
|
|
323
|
+
before(() => {
|
|
324
|
+
cwd = createTempRepo();
|
|
325
|
+
|
|
326
|
+
for (let i = 1; i <= 5; i++) {
|
|
327
|
+
fs.writeFileSync(path.join(cwd, `file${i}.txt`), `${i}`);
|
|
328
|
+
commitExperiment({
|
|
329
|
+
cwd, skillName: 'perf', hypothesis: `trial-${i}`,
|
|
330
|
+
target: 'throughput', value: `${i * 100}`, delta: `+${i}`,
|
|
331
|
+
status: i % 2 === 0 ? 'fail' : 'pass', secondaries: '',
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
after(() => {
|
|
337
|
+
cleanupRepo(cwd);
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
it('returns formatted multi-line string', () => {
|
|
341
|
+
const history = getExperimentHistory({ cwd, skillName: 'perf' });
|
|
342
|
+
const lines = history.split('\n');
|
|
343
|
+
assert.strictEqual(lines.length, 5);
|
|
344
|
+
// Each line should contain the skill name and hash
|
|
345
|
+
for (const line of lines) {
|
|
346
|
+
assert.ok(line.includes('perf:'), `Line should contain "perf:": ${line}`);
|
|
347
|
+
assert.ok(line.startsWith('['), `Line should start with "[": ${line}`);
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
it('respects maxEntries', () => {
|
|
352
|
+
const history = getExperimentHistory({ cwd, skillName: 'perf', maxEntries: 2 });
|
|
353
|
+
const lines = history.split('\n');
|
|
354
|
+
assert.strictEqual(lines.length, 2);
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
it('defaults maxEntries to 20', () => {
|
|
358
|
+
// We have 5 experiments, all should appear (5 < 20)
|
|
359
|
+
const history = getExperimentHistory({ cwd, skillName: 'perf' });
|
|
360
|
+
const lines = history.split('\n');
|
|
361
|
+
assert.strictEqual(lines.length, 5);
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
it('returns "(no experiment history)" when no experiments match', () => {
|
|
365
|
+
const history = getExperimentHistory({ cwd, skillName: 'nonexistent' });
|
|
366
|
+
assert.strictEqual(history, '(no experiment history)');
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
it('returns "(no experiment history)" for empty repo', () => {
|
|
370
|
+
const emptyRepo = createTempRepo();
|
|
371
|
+
try {
|
|
372
|
+
const history = getExperimentHistory({ cwd: emptyRepo });
|
|
373
|
+
assert.strictEqual(history, '(no experiment history)');
|
|
374
|
+
} finally {
|
|
375
|
+
cleanupRepo(emptyRepo);
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
describe('round-trip: commit → query → verify', () => {
|
|
381
|
+
let cwd;
|
|
382
|
+
|
|
383
|
+
before(() => {
|
|
384
|
+
cwd = createTempRepo();
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
after(() => {
|
|
388
|
+
cleanupRepo(cwd);
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
it('parsed fields match original input', () => {
|
|
392
|
+
const input = {
|
|
393
|
+
skillName: 'round-trip-skill',
|
|
394
|
+
hypothesis: 'cache improves latency',
|
|
395
|
+
target: 'p99',
|
|
396
|
+
value: '42ms',
|
|
397
|
+
delta: '-33',
|
|
398
|
+
status: 'pass',
|
|
399
|
+
secondaries: 'p50=12ms cpu=65%',
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
fs.writeFileSync(path.join(cwd, 'rt.txt'), 'round-trip test\n');
|
|
403
|
+
const commitHash = commitExperiment({ cwd, ...input });
|
|
404
|
+
|
|
405
|
+
const experiments = queryExperiments({ cwd, skillName: 'round-trip-skill' });
|
|
406
|
+
assert.strictEqual(experiments.length, 1);
|
|
407
|
+
|
|
408
|
+
const parsed = experiments[0];
|
|
409
|
+
assert.strictEqual(parsed.skillName, input.skillName);
|
|
410
|
+
assert.strictEqual(parsed.hypothesis, input.hypothesis);
|
|
411
|
+
assert.strictEqual(parsed.target, input.target);
|
|
412
|
+
assert.strictEqual(parsed.value, input.value);
|
|
413
|
+
assert.strictEqual(parsed.delta, parseFloat(input.delta));
|
|
414
|
+
assert.strictEqual(parsed.status, input.status);
|
|
415
|
+
assert.strictEqual(parsed.secondaries, input.secondaries);
|
|
416
|
+
// Hash from commit should match hash from query (full vs short may differ, but short should be prefix)
|
|
417
|
+
assert.ok(
|
|
418
|
+
parsed.hash.startsWith(commitHash) || commitHash.startsWith(parsed.hash.slice(0, 7)),
|
|
419
|
+
`Hashes should be related: commit=${commitHash}, query=${parsed.hash}`
|
|
420
|
+
);
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
it('round-trip with numeric delta preserves sign', () => {
|
|
424
|
+
fs.writeFileSync(path.join(cwd, 'rt2.txt'), 'test\n');
|
|
425
|
+
commitExperiment({
|
|
426
|
+
cwd,
|
|
427
|
+
skillName: 'sign-test',
|
|
428
|
+
hypothesis: 'negative delta',
|
|
429
|
+
target: 'errors',
|
|
430
|
+
value: '3',
|
|
431
|
+
delta: '-50',
|
|
432
|
+
status: 'pass',
|
|
433
|
+
secondaries: '',
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
const [result] = queryExperiments({ cwd, skillName: 'sign-test' });
|
|
437
|
+
assert.strictEqual(result.delta, -50);
|
|
438
|
+
});
|
|
439
|
+
});
|