@dalzoubi/dev-agents-sync 1.0.12 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tests/define-estimate-flow.test.mjs
|
|
3
|
+
*
|
|
4
|
+
* Slice: rewrite-define-estimate-flow
|
|
5
|
+
*
|
|
6
|
+
* Asserts that prompts/agents/define.md has been updated to replace the
|
|
7
|
+
* per-slice estimate Q&A flow (asking the user one question per slice) with a
|
|
8
|
+
* model-generated estimate flow that:
|
|
9
|
+
* - generates estimates from slice scope and success criteria
|
|
10
|
+
* - presents them with one-line rationales inline
|
|
11
|
+
* - accepts per-line user overrides validated against ^\d+(\.5)?$
|
|
12
|
+
* - refuses to finalize on low confidence and asks one targeted clarification
|
|
13
|
+
*
|
|
14
|
+
* These tests FAIL before the implementation change and PASS after.
|
|
15
|
+
*
|
|
16
|
+
* Path under test: prompts/agents/define.md (relative to repo root)
|
|
17
|
+
* Test runner: node --test (Node built-in, >= 20)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { describe, it, before } from 'node:test';
|
|
21
|
+
import assert from 'node:assert/strict';
|
|
22
|
+
import { readFileSync } from 'node:fs';
|
|
23
|
+
import { fileURLToPath } from 'node:url';
|
|
24
|
+
import path from 'node:path';
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Resolve the file under test relative to this file's location.
|
|
28
|
+
// This repo layout: packages/dev-agents-sync/tests/ -> repo root is 3 levels up.
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
32
|
+
const REPO_ROOT = path.resolve(__dirname, '../../..');
|
|
33
|
+
const DEFINE_MD = path.join(REPO_ROOT, 'prompts', 'agents', 'define.md');
|
|
34
|
+
|
|
35
|
+
let content;
|
|
36
|
+
|
|
37
|
+
before(() => {
|
|
38
|
+
content = readFileSync(DEFINE_MD, 'utf8');
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// Criterion 1 — the OLD per-slice Q&A pattern must be gone
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
describe('define.md — per-slice estimate Q&A removed', () => {
|
|
46
|
+
it('does NOT instruct the agent to ask the user for estimates one at a time', () => {
|
|
47
|
+
// The old phrasing asked "one question per slice — one at a time" for estimates.
|
|
48
|
+
// After the rewrite, that pattern must not appear.
|
|
49
|
+
const oldPattern = /ask one question per slice[^.]*one at a time/i;
|
|
50
|
+
assert.ok(
|
|
51
|
+
!oldPattern.test(content),
|
|
52
|
+
'define.md must not contain "ask one question per slice … one at a time" for estimates',
|
|
53
|
+
);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Criterion 2 — model GENERATES estimates from scope and success criteria
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
describe('define.md — model-generated estimates', () => {
|
|
62
|
+
it('instructs the agent to generate estimates from slice scope and success criteria', () => {
|
|
63
|
+
// We expect explicit instruction that estimates are derived from scope/success criteria,
|
|
64
|
+
// not solicited from the user via Q&A.
|
|
65
|
+
const generatesFromScope =
|
|
66
|
+
/generat\w*\s+estimate/i.test(content) ||
|
|
67
|
+
/estimate\w*\s+(?:from|based on|using)\s+(?:the\s+)?(?:slice\s+)?scope/i.test(content) ||
|
|
68
|
+
/estimate\w*\s+(?:from|based on|using)\s+(?:the\s+)?success\s+criteri/i.test(content) ||
|
|
69
|
+
/(?:scope|success\s+criteri)\w*.*\bto\s+(?:derive|compute|produce|generate)\s+estimate/i.test(content);
|
|
70
|
+
|
|
71
|
+
assert.ok(
|
|
72
|
+
generatesFromScope,
|
|
73
|
+
'define.md must instruct the agent to generate estimates from slice scope and/or success criteria',
|
|
74
|
+
);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Criterion 3 — exact calibration anchor sentence
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
describe('define.md — calibration anchor', () => {
|
|
83
|
+
it('contains the exact calibration anchor sentence', () => {
|
|
84
|
+
const anchor = '1 day = one focused engineer-day of implementation work, excluding review and deploy';
|
|
85
|
+
assert.ok(
|
|
86
|
+
content.includes(anchor),
|
|
87
|
+
`define.md must contain the exact calibration anchor: "${anchor}"`,
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// Criterion 4 — estimates presented with one-line rationales inline
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
describe('define.md — inline rationales', () => {
|
|
97
|
+
it('instructs the agent to present estimates with one-line rationales inline', () => {
|
|
98
|
+
// Must contain explicit instruction about rationale alongside each estimate.
|
|
99
|
+
const rationaleInstruction =
|
|
100
|
+
/one[- ]line\s+rationale/i.test(content) ||
|
|
101
|
+
/rationale\s+(?:for\s+)?(?:each|every|per)[- ](?:slice\s+)?estimate/i.test(content) ||
|
|
102
|
+
/(?:present|show|display|include)\s+estimates?\s+with\s+(?:a\s+)?(?:one[- ]line\s+)?rationale/i.test(content);
|
|
103
|
+
|
|
104
|
+
assert.ok(
|
|
105
|
+
rationaleInstruction,
|
|
106
|
+
'define.md must instruct the agent to present estimates with one-line rationales inline',
|
|
107
|
+
);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Criterion 5 — per-line overrides validated against ^\d+(\.5)?$
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
describe('define.md — per-line overrides', () => {
|
|
116
|
+
it('instructs the agent to accept per-line overrides', () => {
|
|
117
|
+
const overrideInstruction =
|
|
118
|
+
/per[- ]line\s+override/i.test(content) ||
|
|
119
|
+
/override\w*\s+(?:per|each|any)\s+(?:line|slice|estimate)/i.test(content) ||
|
|
120
|
+
/(?:accept|allow)\s+(?:per[- ]line|inline)\s+override/i.test(content);
|
|
121
|
+
|
|
122
|
+
assert.ok(
|
|
123
|
+
overrideInstruction,
|
|
124
|
+
'define.md must instruct the agent to accept per-line overrides on the generated estimates',
|
|
125
|
+
);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('still validates overrides against ^\\d+(\\.5)?$', () => {
|
|
129
|
+
// The regex must remain in the file — it was there before and must survive the rewrite.
|
|
130
|
+
assert.ok(
|
|
131
|
+
content.includes('^\\d+(\\.5)?$'),
|
|
132
|
+
'define.md must still validate estimates against ^\\d+(\\.5)?$',
|
|
133
|
+
);
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// Criterion 6 — refuse to finalize on low confidence; ask one targeted question
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
describe('define.md — low-confidence refusal', () => {
|
|
142
|
+
it('instructs the agent to refuse to finalize on low confidence', () => {
|
|
143
|
+
const lowConfidenceRefusal =
|
|
144
|
+
/low\s+confidence/i.test(content) ||
|
|
145
|
+
/(?:refuse|do\s+not|don'?t)\s+(?:finalize|proceed|write)\s+(?:the\s+spec\s+)?(?:on|when|if)\s+(?:low|insufficient)\s+confidence/i.test(content) ||
|
|
146
|
+
/(?:insufficient|low)\s+(?:confidence|information).*(?:refuse|do\s+not|don'?t)\s+finalize/i.test(content);
|
|
147
|
+
|
|
148
|
+
assert.ok(
|
|
149
|
+
lowConfidenceRefusal,
|
|
150
|
+
'define.md must instruct the agent to refuse to finalize when confidence is low',
|
|
151
|
+
);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it('instructs the agent to ask one targeted clarification question on low confidence', () => {
|
|
155
|
+
const oneTargetedQuestion =
|
|
156
|
+
/one\s+targeted\s+clarification/i.test(content) ||
|
|
157
|
+
/ask\s+(?:a\s+|one\s+)?(?:single\s+)?targeted\s+(?:clarification\s+)?question/i.test(content) ||
|
|
158
|
+
/one\s+(?:clarification|follow[- ]up)\s+question/i.test(content);
|
|
159
|
+
|
|
160
|
+
assert.ok(
|
|
161
|
+
oneTargetedQuestion,
|
|
162
|
+
'define.md must instruct the agent to ask exactly one targeted clarification question when confidence is low',
|
|
163
|
+
);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// Criterion 7 — half-day regex ^\d+(\.5)?$ still present (regression guard)
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
describe('define.md — half-day regex preserved', () => {
|
|
172
|
+
it('still contains the half-day granularity regex ^\\d+(\\.5)?$', () => {
|
|
173
|
+
assert.ok(
|
|
174
|
+
content.includes('^\\d+(\\.5)?$'),
|
|
175
|
+
'define.md must still contain the half-day regex ^\\d+(\\.5)?$',
|
|
176
|
+
);
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// Criterion 8 — "refuse to write without all estimates" rule still present
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
describe('define.md — refuse to finalize without all estimates preserved', () => {
|
|
185
|
+
it('still contains the rule to refuse writing the spec when any slice lacks an estimate', () => {
|
|
186
|
+
// The existing rule says something equivalent to "never write the spec while any slice is missing its estimate"
|
|
187
|
+
const refuseRule =
|
|
188
|
+
/(?:refuse|never\s+write|do\s+not\s+write)\s+(?:the\s+)?spec\s+(?:while|if|when)\s+any\s+slice\s+(?:is\s+)?(?:missing|lacks?)\s+(?:its\s+)?estimate/i.test(content) ||
|
|
189
|
+
/(?:missing|lacks?)\s+(?:its\s+)?estimate.*(?:never\s+write|refuse)/i.test(content) ||
|
|
190
|
+
/never\s+write\s+the\s+spec\s+while\s+any\s+slice\s+is\s+missing\s+its\s+estimate/i.test(content);
|
|
191
|
+
|
|
192
|
+
assert.ok(
|
|
193
|
+
refuseRule,
|
|
194
|
+
'define.md must still instruct the agent to refuse to write the spec when any slice is missing its estimate',
|
|
195
|
+
);
|
|
196
|
+
});
|
|
197
|
+
});
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tests/supervise-estimate-provenance.test.mjs
|
|
3
|
+
*
|
|
4
|
+
* Slice: note-supervise-estimate-source
|
|
5
|
+
*
|
|
6
|
+
* Asserts that prompts/agents/supervise.md contains a provenance note
|
|
7
|
+
* clarifying that per-slice estimates are model-generated by the Define agent
|
|
8
|
+
* (with optional user overrides) and are read at runtime as-is.
|
|
9
|
+
*
|
|
10
|
+
* These tests FAIL before the implementation change and PASS after.
|
|
11
|
+
*
|
|
12
|
+
* Path under test: prompts/agents/supervise.md (relative to repo root)
|
|
13
|
+
* Test runner: node --test (Node built-in, >= 20)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { describe, it, before } from 'node:test';
|
|
17
|
+
import assert from 'node:assert/strict';
|
|
18
|
+
import { readFileSync } from 'node:fs';
|
|
19
|
+
import { fileURLToPath } from 'node:url';
|
|
20
|
+
import path from 'node:path';
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Resolve the file under test relative to this file's location.
|
|
24
|
+
// Repo layout: packages/dev-agents-sync/tests/ -> repo root is 3 levels up.
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
28
|
+
const REPO_ROOT = path.resolve(__dirname, '../../..');
|
|
29
|
+
const SUPERVISE_MD = path.join(REPO_ROOT, 'prompts', 'agents', 'supervise.md');
|
|
30
|
+
|
|
31
|
+
let content;
|
|
32
|
+
|
|
33
|
+
before(() => {
|
|
34
|
+
content = readFileSync(SUPERVISE_MD, 'utf8');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Criterion 1 — estimates are acknowledged as model-generated
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
describe('supervise.md — estimate provenance: model-generated', () => {
|
|
42
|
+
it('contains language acknowledging that per-slice estimates are model-generated', () => {
|
|
43
|
+
// The note must use "model-generated" (hyphenated or spaced) in the context
|
|
44
|
+
// of estimates. We check for the phrase itself anywhere in the file because
|
|
45
|
+
// the spec parsing section is the only place estimates are discussed.
|
|
46
|
+
const modelGeneratedPattern = /model[- ]generated/i;
|
|
47
|
+
|
|
48
|
+
assert.ok(
|
|
49
|
+
modelGeneratedPattern.test(content),
|
|
50
|
+
'supervise.md must contain "model-generated" to acknowledge that per-slice estimates are produced by the Define agent, not user-supplied',
|
|
51
|
+
);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Criterion 2 — the Define agent is named as the source
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
describe('supervise.md — estimate provenance: Define agent as source', () => {
|
|
60
|
+
it('names the Define agent as the source of per-slice estimates', () => {
|
|
61
|
+
// We require the explicit phrase "Define agent" (the agent name) to appear
|
|
62
|
+
// in proximity to "estimate" or "model-generated", so that the vague
|
|
63
|
+
// co-occurrence of "Define" (as the /define in-place recovery command) and
|
|
64
|
+
// "estimate" in the parsing section does not produce a false positive.
|
|
65
|
+
// The patterns below require "Define agent" as a two-word phrase, which
|
|
66
|
+
// only the provenance note will introduce.
|
|
67
|
+
const defineAgentNearEstimate =
|
|
68
|
+
/Define\s+agent[^]{0,400}estimate/i.test(content) ||
|
|
69
|
+
/estimate[^]{0,400}Define\s+agent/i.test(content) ||
|
|
70
|
+
/Define\s+agent[^]{0,400}model[- ]generated/i.test(content) ||
|
|
71
|
+
/model[- ]generated[^]{0,400}Define\s+agent/i.test(content);
|
|
72
|
+
|
|
73
|
+
assert.ok(
|
|
74
|
+
defineAgentNearEstimate,
|
|
75
|
+
'supervise.md must name the Define agent as the source of per-slice estimates (requires "Define agent" near "estimate" or "model-generated")',
|
|
76
|
+
);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Criterion 3 — optional user overrides are acknowledged
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
describe('supervise.md — estimate provenance: optional user overrides', () => {
|
|
85
|
+
it('acknowledges that users may override model-generated estimates', () => {
|
|
86
|
+
// The note must mention that the user can (optionally) override the
|
|
87
|
+
// model-generated estimate. "optional" + "override" is the minimal signal.
|
|
88
|
+
const optionalOverridePattern =
|
|
89
|
+
/optional\w*\s+(?:user\s+)?override/i.test(content) ||
|
|
90
|
+
/(?:user\s+)?override\w*\s+(?:are\s+)?optional/i.test(content) ||
|
|
91
|
+
/with\s+optional\s+(?:user\s+)?override/i.test(content);
|
|
92
|
+
|
|
93
|
+
assert.ok(
|
|
94
|
+
optionalOverridePattern,
|
|
95
|
+
'supervise.md must acknowledge that user overrides of model-generated estimates are optional',
|
|
96
|
+
);
|
|
97
|
+
});
|
|
98
|
+
});
|