sneakoscope 0.7.49 → 0.7.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/package.json +1 -1
- package/src/cli/main.mjs +144 -14
- package/src/cli/maintenance-commands.mjs +19 -10
- package/src/core/fsx.mjs +1 -1
- package/src/core/image-ux-review.mjs +298 -0
- package/src/core/init.mjs +22 -9
- package/src/core/mission.mjs +14 -2
- package/src/core/pipeline.mjs +102 -12
- package/src/core/routes.mjs +39 -4
- package/src/core/team-live.mjs +7 -2
- package/src/core/team-review-policy.mjs +49 -0
- package/src/core/tmux-ui.mjs +38 -14
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import fsp from 'node:fs/promises';
|
|
3
|
+
import { nowIso, sha256, writeJsonAtomic } from './fsx.mjs';
|
|
4
|
+
import { CODEX_APP_IMAGE_GENERATION_DOC_URL } from './routes.mjs';
|
|
5
|
+
|
|
6
|
+
export const IMAGE_UX_REVIEW_GATE_ARTIFACT = 'image-ux-review-gate.json';
|
|
7
|
+
export const IMAGE_UX_REVIEW_POLICY_ARTIFACT = 'image-ux-review-policy.json';
|
|
8
|
+
export const IMAGE_UX_REVIEW_SCREEN_INVENTORY_ARTIFACT = 'image-ux-screen-inventory.json';
|
|
9
|
+
export const IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT = 'image-ux-generated-review-ledger.json';
|
|
10
|
+
export const IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT = 'image-ux-issue-ledger.json';
|
|
11
|
+
export const IMAGE_UX_REVIEW_ITERATION_REPORT_ARTIFACT = 'image-ux-iteration-report.json';
|
|
12
|
+
export const IMAGE_UX_REVIEW_API_DOC_URL = 'https://developers.openai.com/api/docs/guides/image-generation';
|
|
13
|
+
|
|
14
|
+
export const IMAGE_UX_REVIEW_REQUIRED_GATE_FIELDS = Object.freeze([
|
|
15
|
+
'policy_created',
|
|
16
|
+
'screen_inventory_created',
|
|
17
|
+
'source_screenshots_captured',
|
|
18
|
+
'imagegen_review_images_generated',
|
|
19
|
+
'generated_review_images_analyzed',
|
|
20
|
+
'issue_ledger_created',
|
|
21
|
+
'p0_p1_zero',
|
|
22
|
+
'bounded_iteration_complete',
|
|
23
|
+
'changed_screens_rechecked_or_not_applicable',
|
|
24
|
+
'honest_mode_complete'
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
function cleanText(value, fallback = '') {
|
|
28
|
+
const text = String(value ?? '').replace(/\s+/g, ' ').trim();
|
|
29
|
+
return text || fallback;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function contractText(contract = {}) {
|
|
33
|
+
return cleanText(`${contract.prompt || ''} ${JSON.stringify(contract.answers || {})}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function compactId(prefix, text) {
|
|
37
|
+
return `${prefix}-${sha256(cleanText(text, prefix)).slice(0, 10)}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildImageUxReviewPolicy(contract = {}) {
|
|
41
|
+
return {
|
|
42
|
+
schema_version: 1,
|
|
43
|
+
created_at: nowIso(),
|
|
44
|
+
contract_hash: contract.sealed_hash || null,
|
|
45
|
+
policy: 'image_generation_ui_ux_review_loop',
|
|
46
|
+
score_threshold: 0.88,
|
|
47
|
+
minimum_delta_to_continue: 0.03,
|
|
48
|
+
max_full_surface_passes: 2,
|
|
49
|
+
max_screen_retries: 2,
|
|
50
|
+
stop_conditions: [
|
|
51
|
+
'Every source screenshot has a matching generated annotated review image',
|
|
52
|
+
'The generated review image has been analyzed back into structured issue rows',
|
|
53
|
+
'P0/P1 issues are zero after allowed fixes',
|
|
54
|
+
'overall_score >= 0.88',
|
|
55
|
+
'improvement_delta < 0.03 after at least one repair pass',
|
|
56
|
+
'max_full_surface_passes or max_screen_retries reached',
|
|
57
|
+
'Codex App imagegen/gpt-image-2 evidence is unavailable'
|
|
58
|
+
],
|
|
59
|
+
source_capture: {
|
|
60
|
+
required: true,
|
|
61
|
+
evidence_policy: 'Use Codex Computer Use for live UI/browser capture when available, or user-provided screenshots for static review. Do not treat browser automation screenshots as Codex Computer Use evidence.',
|
|
62
|
+
accepted_sources: ['codex_computer_use_screenshot', 'user_provided_screenshot', 'exported_static_artifact_image']
|
|
63
|
+
},
|
|
64
|
+
image_generation_review: {
|
|
65
|
+
required_for_gate: true,
|
|
66
|
+
model: 'gpt-image-2',
|
|
67
|
+
preferred_surface: 'Codex App built-in image generation via $imagegen',
|
|
68
|
+
codex_app_imagegen_doc: CODEX_APP_IMAGE_GENERATION_DOC_URL,
|
|
69
|
+
api_image_generation_doc: IMAGE_UX_REVIEW_API_DOC_URL,
|
|
70
|
+
output_artifact: IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT,
|
|
71
|
+
anti_substitution_rule: 'A text-only visual review cannot satisfy this route. Missing generated annotated review images block the gate instead of being simulated.',
|
|
72
|
+
reference_image_flow: [
|
|
73
|
+
'Use each source UI screenshot as the reference image input',
|
|
74
|
+
'Ask imagegen/gpt-image-2 to create a new annotated critique image, not just prose',
|
|
75
|
+
'Draw numbered callouts directly on problem regions',
|
|
76
|
+
'Show severity labels P0/P1/P2/P3 on the generated image',
|
|
77
|
+
'Include visual hierarchy, contrast, alignment, density, affordance, and flow markers',
|
|
78
|
+
'Add a small corrected mini-comp or before/after strip when useful'
|
|
79
|
+
],
|
|
80
|
+
review_prompt_template: [
|
|
81
|
+
'Review this UI screenshot as a senior product design lead.',
|
|
82
|
+
'Output a new annotated review image, using the screenshot as reference.',
|
|
83
|
+
'Overlay numbered callouts on concrete UI regions; label each with P0/P1/P2/P3.',
|
|
84
|
+
'Mark eye-flow arrows, hierarchy/contrast/alignment/density problems, and ambiguous affordances.',
|
|
85
|
+
'Include a compact corrected mini-comp or before/after strip for the highest-impact fix.',
|
|
86
|
+
'Do not invent product requirements beyond what is visible or provided in the route context.'
|
|
87
|
+
].join(' ')
|
|
88
|
+
},
|
|
89
|
+
extraction_policy: {
|
|
90
|
+
input_artifact: IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT,
|
|
91
|
+
output_artifact: IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT,
|
|
92
|
+
rule: 'Analyze the generated annotated review image with vision/OCR, then convert each visible callout into an issue row with severity, region, evidence image id, likely cause, and specific fix action.',
|
|
93
|
+
required_issue_fields: ['id', 'severity', 'screen_id', 'callout_id', 'region', 'evidence_image_id', 'title', 'detail', 'fix_action', 'status']
|
|
94
|
+
},
|
|
95
|
+
remediation_policy: {
|
|
96
|
+
code_changes_allowed: 'only_when_user_or_route_contract_requests_fixing',
|
|
97
|
+
priority_order: ['P0', 'P1', 'P2', 'P3'],
|
|
98
|
+
patch_rule: 'Patch P0/P1 first, then cheap local P2. Re-run only changed, failed, or high-risk screens.',
|
|
99
|
+
no_fallback: 'Do not replace the image-generation review with a hand-written fallback review.'
|
|
100
|
+
},
|
|
101
|
+
evidence_artifacts: [
|
|
102
|
+
IMAGE_UX_REVIEW_SCREEN_INVENTORY_ARTIFACT,
|
|
103
|
+
IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT,
|
|
104
|
+
IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT,
|
|
105
|
+
IMAGE_UX_REVIEW_ITERATION_REPORT_ARTIFACT,
|
|
106
|
+
IMAGE_UX_REVIEW_GATE_ARTIFACT
|
|
107
|
+
],
|
|
108
|
+
notes: [
|
|
109
|
+
'The central mechanism is generated visual critique: gpt-image-2/imagegen must produce a new review image from the source UI screenshot.',
|
|
110
|
+
'The generated review image is then read back into text/JSON issues. This is intentionally different from direct text-only screenshot critique.'
|
|
111
|
+
]
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function buildImageUxScreenInventory(contract = {}) {
|
|
116
|
+
const text = contractText(contract);
|
|
117
|
+
const suppliedImages = [
|
|
118
|
+
...(Array.isArray(contract.answers?.IMAGE_UX_REVIEW_SOURCE_IMAGES) ? contract.answers.IMAGE_UX_REVIEW_SOURCE_IMAGES : []),
|
|
119
|
+
...(Array.isArray(contract.answers?.SOURCE_SCREENSHOTS) ? contract.answers.SOURCE_SCREENSHOTS : [])
|
|
120
|
+
].map((item) => cleanText(item)).filter(Boolean);
|
|
121
|
+
const target = cleanText(contract.answers?.TARGET_URL || contract.answers?.TARGET_SURFACE || contract.prompt, 'UI surface to review');
|
|
122
|
+
return {
|
|
123
|
+
schema_version: 1,
|
|
124
|
+
created_at: nowIso(),
|
|
125
|
+
contract_hash: contract.sealed_hash || null,
|
|
126
|
+
target,
|
|
127
|
+
task_signature: compactId('image-ux-target', text),
|
|
128
|
+
capture_required: suppliedImages.length === 0,
|
|
129
|
+
source_screens: suppliedImages.map((source, index) => ({
|
|
130
|
+
id: `screen-${index + 1}`,
|
|
131
|
+
source,
|
|
132
|
+
source_type: /^https?:\/\//i.test(source) ? 'url_or_remote_image' : 'local_or_named_image',
|
|
133
|
+
status: 'provided_unverified'
|
|
134
|
+
})),
|
|
135
|
+
capture_policy: 'Capture actual UI screens with Codex Computer Use when the target is live. For static images, record the provided image path or attachment id. Each source screen must later map to a generated review image.',
|
|
136
|
+
passed: suppliedImages.length > 0,
|
|
137
|
+
blockers: suppliedImages.length > 0 ? [] : ['source_screenshots_not_captured_yet']
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function buildImageUxGeneratedReviewLedger(contract = {}, inventory = buildImageUxScreenInventory(contract), existing = null) {
|
|
142
|
+
const existingImages = Array.isArray(existing?.generated_review_images) ? existing.generated_review_images : [];
|
|
143
|
+
const sourceScreens = inventory.source_screens || [];
|
|
144
|
+
const missingScreens = sourceScreens.filter((screen) => !existingImages.some((image) => image.source_screen_id === screen.id));
|
|
145
|
+
const blockers = [];
|
|
146
|
+
if (sourceScreens.length === 0) blockers.push('no_source_screenshots_for_imagegen_review');
|
|
147
|
+
if (missingScreens.length > 0) blockers.push('missing_generated_annotated_review_images');
|
|
148
|
+
return {
|
|
149
|
+
schema_version: 1,
|
|
150
|
+
created_at: nowIso(),
|
|
151
|
+
contract_hash: contract.sealed_hash || null,
|
|
152
|
+
provider: {
|
|
153
|
+
model: 'gpt-image-2',
|
|
154
|
+
preferred_surface: 'Codex App $imagegen',
|
|
155
|
+
codex_app_imagegen_doc: CODEX_APP_IMAGE_GENERATION_DOC_URL,
|
|
156
|
+
api_image_generation_doc: IMAGE_UX_REVIEW_API_DOC_URL
|
|
157
|
+
},
|
|
158
|
+
required: true,
|
|
159
|
+
generated_review_images: existingImages,
|
|
160
|
+
planned_reviews: sourceScreens.map((screen) => ({
|
|
161
|
+
id: compactId('image-ux-review', `${screen.id}:${screen.source || screen.id}`),
|
|
162
|
+
source_screen_id: screen.id,
|
|
163
|
+
status: existingImages.some((image) => image.source_screen_id === screen.id) ? 'generated' : 'pending_imagegen',
|
|
164
|
+
required_output: 'annotated_review_image_with_numbered_callouts_and_optional_mini_comp'
|
|
165
|
+
})),
|
|
166
|
+
generated_count: existingImages.length,
|
|
167
|
+
required_count: sourceScreens.length,
|
|
168
|
+
blockers,
|
|
169
|
+
passed: sourceScreens.length > 0 && blockers.length === 0,
|
|
170
|
+
notes: [
|
|
171
|
+
'This ledger records real generated review images. It must not be marked passed from prose-only critique.',
|
|
172
|
+
'Route workers should attach generated image paths, Codex App output ids, or API output paths before passing the gate.'
|
|
173
|
+
]
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export function buildImageUxIssueLedger(contract = {}, generatedReviewLedger = buildImageUxGeneratedReviewLedger(contract), existing = null) {
|
|
178
|
+
const issues = Array.isArray(existing?.issues) ? existing.issues : [];
|
|
179
|
+
const missingGeneratedReview = generatedReviewLedger.passed !== true;
|
|
180
|
+
const blockers = missingGeneratedReview ? ['generated_review_images_missing_or_incomplete'] : [];
|
|
181
|
+
const blockingIssues = issues.filter((issue) => ['P0', 'P1'].includes(issue.severity) && issue.status !== 'fixed' && issue.status !== 'accepted_not_applicable');
|
|
182
|
+
if (blockingIssues.length > 0) blockers.push('p0_p1_issues_unresolved');
|
|
183
|
+
return {
|
|
184
|
+
schema_version: 1,
|
|
185
|
+
created_at: nowIso(),
|
|
186
|
+
contract_hash: contract.sealed_hash || null,
|
|
187
|
+
extraction_source: IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT,
|
|
188
|
+
extraction_rule: 'Issues must be extracted from the generated annotated review image callouts, not invented from memory.',
|
|
189
|
+
issues,
|
|
190
|
+
blocking_issue_count: blockingIssues.length,
|
|
191
|
+
p0_p1_zero: blockingIssues.length === 0,
|
|
192
|
+
blockers,
|
|
193
|
+
passed: generatedReviewLedger.passed === true && blockingIssues.length === 0,
|
|
194
|
+
scorecard: {
|
|
195
|
+
visual_review_completion: generatedReviewLedger.passed ? 0.92 : 0.25,
|
|
196
|
+
issue_extraction_integrity: generatedReviewLedger.passed && issues.length > 0 ? 0.9 : 0.4,
|
|
197
|
+
p0_p1_resolution: blockingIssues.length === 0 ? 0.9 : 0.4,
|
|
198
|
+
overall_score: Number((generatedReviewLedger.passed && blockingIssues.length === 0 ? 0.9 : 0.42).toFixed(3))
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export function buildImageUxIterationReport(contract = {}, policy = buildImageUxReviewPolicy(contract), generatedReviewLedger = buildImageUxGeneratedReviewLedger(contract), issueLedger = buildImageUxIssueLedger(contract, generatedReviewLedger)) {
|
|
204
|
+
const passed = generatedReviewLedger.passed === true
|
|
205
|
+
&& issueLedger.passed === true
|
|
206
|
+
&& Number(issueLedger.scorecard?.overall_score || 0) >= Number(policy.score_threshold || 0.88);
|
|
207
|
+
return {
|
|
208
|
+
schema_version: 1,
|
|
209
|
+
created_at: nowIso(),
|
|
210
|
+
contract_hash: contract.sealed_hash || null,
|
|
211
|
+
loop_policy: {
|
|
212
|
+
max_full_surface_passes: policy.max_full_surface_passes,
|
|
213
|
+
max_screen_retries: policy.max_screen_retries,
|
|
214
|
+
score_threshold: policy.score_threshold,
|
|
215
|
+
minimum_delta_to_continue: policy.minimum_delta_to_continue
|
|
216
|
+
},
|
|
217
|
+
passes: [
|
|
218
|
+
{
|
|
219
|
+
pass: 1,
|
|
220
|
+
type: generatedReviewLedger.passed ? 'imagegen_visual_review_extraction' : 'waiting_for_imagegen_generated_review_images',
|
|
221
|
+
generated_review_images: generatedReviewLedger.generated_count || 0,
|
|
222
|
+
blocking_issue_count: issueLedger.blocking_issue_count || 0,
|
|
223
|
+
score: issueLedger.scorecard?.overall_score || 0,
|
|
224
|
+
status: passed ? 'passed' : 'blocked'
|
|
225
|
+
}
|
|
226
|
+
],
|
|
227
|
+
stopped: true,
|
|
228
|
+
stop_reason: passed ? 'score_threshold_met_and_no_p0_p1_issues' : 'imagegen_review_evidence_or_issue_resolution_required',
|
|
229
|
+
passed
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function defaultImageUxReviewGate(contract = {}, parts = {}) {
|
|
234
|
+
const policy = parts.policy || buildImageUxReviewPolicy(contract);
|
|
235
|
+
const inventory = parts.inventory || buildImageUxScreenInventory(contract);
|
|
236
|
+
const generatedReviewLedger = parts.generatedReviewLedger || buildImageUxGeneratedReviewLedger(contract, inventory);
|
|
237
|
+
const issueLedger = parts.issueLedger || buildImageUxIssueLedger(contract, generatedReviewLedger);
|
|
238
|
+
const iterationReport = parts.iterationReport || buildImageUxIterationReport(contract, policy, generatedReviewLedger, issueLedger);
|
|
239
|
+
return {
|
|
240
|
+
schema_version: 1,
|
|
241
|
+
created_at: nowIso(),
|
|
242
|
+
contract_hash: contract.sealed_hash || null,
|
|
243
|
+
passed: false,
|
|
244
|
+
policy_created: true,
|
|
245
|
+
screen_inventory_created: true,
|
|
246
|
+
source_screenshots_captured: inventory.passed === true,
|
|
247
|
+
imagegen_review_images_generated: generatedReviewLedger.passed === true,
|
|
248
|
+
generated_review_images_analyzed: issueLedger.extraction_source === IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT && generatedReviewLedger.passed === true,
|
|
249
|
+
issue_ledger_created: true,
|
|
250
|
+
p0_p1_zero: issueLedger.p0_p1_zero === true && generatedReviewLedger.passed === true,
|
|
251
|
+
bounded_iteration_complete: iterationReport.passed === true,
|
|
252
|
+
changed_screens_rechecked_or_not_applicable: iterationReport.passed === true,
|
|
253
|
+
honest_mode_complete: false,
|
|
254
|
+
required_artifacts: [
|
|
255
|
+
IMAGE_UX_REVIEW_POLICY_ARTIFACT,
|
|
256
|
+
IMAGE_UX_REVIEW_SCREEN_INVENTORY_ARTIFACT,
|
|
257
|
+
IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT,
|
|
258
|
+
IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT,
|
|
259
|
+
IMAGE_UX_REVIEW_ITERATION_REPORT_ARTIFACT
|
|
260
|
+
],
|
|
261
|
+
blockers: [
|
|
262
|
+
...(inventory.blockers || []),
|
|
263
|
+
...(generatedReviewLedger.blockers || []),
|
|
264
|
+
...(issueLedger.blockers || [])
|
|
265
|
+
],
|
|
266
|
+
notes: [
|
|
267
|
+
'Do not pass this gate from direct text-only screenshot critique.',
|
|
268
|
+
'Pass only after source screenshots have real generated annotated review images and those generated images are extracted into issue rows.'
|
|
269
|
+
]
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
export async function writeImageUxReviewRouteArtifacts(dir, contract = {}) {
|
|
274
|
+
const policy = buildImageUxReviewPolicy(contract);
|
|
275
|
+
const inventory = buildImageUxScreenInventory(contract);
|
|
276
|
+
const existingGenerated = await readExistingJson(dir, IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT);
|
|
277
|
+
const existingIssues = await readExistingJson(dir, IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT);
|
|
278
|
+
const generatedReviewLedger = buildImageUxGeneratedReviewLedger(contract, inventory, existingGenerated);
|
|
279
|
+
const issueLedger = buildImageUxIssueLedger(contract, generatedReviewLedger, existingIssues);
|
|
280
|
+
const iterationReport = buildImageUxIterationReport(contract, policy, generatedReviewLedger, issueLedger);
|
|
281
|
+
const gate = defaultImageUxReviewGate(contract, { policy, inventory, generatedReviewLedger, issueLedger, iterationReport });
|
|
282
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_POLICY_ARTIFACT), policy);
|
|
283
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_SCREEN_INVENTORY_ARTIFACT), inventory);
|
|
284
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT), generatedReviewLedger);
|
|
285
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT), issueLedger);
|
|
286
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_ITERATION_REPORT_ARTIFACT), iterationReport);
|
|
287
|
+
await writeJsonAtomic(path.join(dir, IMAGE_UX_REVIEW_GATE_ARTIFACT), gate);
|
|
288
|
+
return { policy, inventory, generated_review_ledger: generatedReviewLedger, issue_ledger: issueLedger, iteration_report: iterationReport, gate };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
async function readExistingJson(dir, file) {
|
|
292
|
+
try {
|
|
293
|
+
const raw = await fsp.readFile(path.join(dir, file), 'utf8');
|
|
294
|
+
return JSON.parse(raw);
|
|
295
|
+
} catch {
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
}
|
package/src/core/init.mjs
CHANGED
|
@@ -6,7 +6,8 @@ import { DEFAULT_DB_SAFETY_POLICY } from './db-safety.mjs';
|
|
|
6
6
|
import { isHarnessSourceProject, writeHarnessGuardPolicy } from './harness-guard.mjs';
|
|
7
7
|
import { repairSksGeneratedArtifacts } from './harness-conflicts.mjs';
|
|
8
8
|
import { installVersionGitHook } from './version-manager.mjs';
|
|
9
|
-
import {
|
|
9
|
+
import { MIN_TEAM_REVIEWER_LANES, MIN_TEAM_REVIEW_POLICY_TEXT } from './team-review-policy.mjs';
|
|
10
|
+
import { AWESOME_DESIGN_MD_REFERENCE, CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_COMPUTER_USE_ONLY_POLICY, DESIGN_SYSTEM_SSOT, DOLLAR_COMMANDS, DOLLAR_COMMAND_ALIASES, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, GETDESIGN_REFERENCE, PPT_CONDITIONAL_SKILL_ALLOWLIST, PPT_PIPELINE_MCP_ALLOWLIST, PPT_PIPELINE_SKILL_ALLOWLIST, RECOMMENDED_DESIGN_REFERENCES, RECOMMENDED_MCP_SERVERS, RECOMMENDED_SKILLS, chatCaptureIntakeText, context7ConfigToml, getdesignReferencePolicyText, imageUxReviewPipelinePolicyText, outcomeRubricPolicyText, pptPipelineAllowlistPolicyText, speedLanePolicyText, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
10
11
|
import { SKILL_DREAM_POLICY, skillDreamPolicyText } from './skill-forge.mjs';
|
|
11
12
|
|
|
12
13
|
const REFLECTION_MEMORY_PATH = '.sneakoscope/memory/q2_facts/post-route-reflection.md';
|
|
@@ -93,7 +94,13 @@ function isSksManagedHook(hook) {
|
|
|
93
94
|
return hook.type === 'command' && /\bhook\s+(?:user-prompt-submit|pre-tool|post-tool|permission-request|stop)\b/.test(command) && /\b(?:sks|sneakoscope|sks\.mjs)\b/.test(command);
|
|
94
95
|
}
|
|
95
96
|
|
|
96
|
-
const AGENTS_BLOCK = "\n# Sneakoscope Codex Managed Rules\n\nThis repository uses Sneakoscope Codex.\n\n## Core Rules\n\n- Codex native `/goal` workflows are the persisted continuation surface; Ralph is removed from the user-facing SKS surface.\n- Keep runtime state bounded: raw logs go to files, prompts get tails/summaries, and `sks gc` may prune stale artifacts.\n- Before substantive work, SKS checks npm for a newer package. If newer, ask update-now vs skip-for-this-conversation.\n- Versioning is managed by the SKS pre-commit hook; check `sks versioning status`. Bypass only with `SKS_DISABLE_VERSIONING=1`.\n- Installed harness files are immutable to LLM edits: `.codex/*`, `.agents/skills/`, `.codex/agents/`, `.sneakoscope/*policy*.json`, `AGENTS.md`, and `node_modules/sneakoscope`. The Sneakoscope engine source repo is the only automatic exception.\n- OMX/DCodex conflicts block setup/doctor. Show `sks conflicts prompt`; cleanup requires explicit human approval.\n- Do not stop at a plan when implementation was requested. Finish, verify, or report the hard blocker.\n- Do not create unrequested fallback implementation code. If the requested path is impossible, block with evidence instead of inventing substitute behavior.\n\n## Routes\n\n- General execution/code-changing prompts default to `$Team`: analysis scouts, TriWiki refresh/validate, read-only debate, consensus, concrete runtime task graph/inboxes, fresh executor team, review, integration, Honest Mode.\n- `$Computer-Use` / `$CU` is the maximum-speed Codex Computer Use lane for UI/browser/visual tasks: skip Team debate and upfront TriWiki loops, use Codex Computer Use directly, then refresh/validate TriWiki and run Honest Mode at final closeout.\n- `$Goal` is a fast bridge/overlay for Codex native `/goal` create/pause/resume/clear persistence controls; implementation continues through the selected SKS execution route.\n- TriWiki recall must stay bounded. Use `sks wiki sweep` to record demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim.\n- Team missions must keep schema-backed evidence current: `work-order-ledger.json`, `effort-decision.json`, `team-dashboard-state.json`, and route-specific visual/dogfood artifacts where applicable. Use `sks validate-artifacts latest` before claiming those artifacts pass.\n- `$DFix` is only for tiny design/content edits and bypasses the main pipeline, Team, TriWiki/TriFix/reflection recording, and persistent route state; it still uses a one-line DFix-specific Honest check before final. `$PPT` is the restrained, information-first HTML/PDF presentation route and must seal delivery context, audience profile, STP, decision context, and 3+ pain-point/solution/aha mappings before design/render work. It must avoid over-designed visuals, carry detail through hierarchy, spacing, alignment, thin rules, source clarity, and subtle accents, preserve editable source HTML under `source-html/`, record `ppt-parallel-report.json`, and clean PPT-only temporary build files before completion. `$Answer`, `$Help`, and `$Wiki` stay lightweight.\n- For code work, surface route/guard/write scopes first, split independent worker scopes when available, and keep parent-owned integration and verification.\n- Design work reads `design.md` as the only design decision SSOT. If missing, create it through `design-system-builder` from `docs/Design-Sys-Prompt.md`; getdesign.md, getdesign-reference, and curated DESIGN.md examples from https://github.com/VoltAgent/awesome-design-md are source inputs to fuse into that SSOT or route-local style tokens, not parallel design authorities. Image/logo/raster assets use `imagegen`, which must prefer official Codex App built-in image generation via `$imagegen` / `gpt-image-2` before API generation.\n- Research, AutoResearch, performance, token, accuracy, SEO/GEO, or workflow-improvement claims need experiment/eval evidence. Do not claim live model accuracy without a scored dataset.\n- Treat handwritten files above 3000 lines as split-review risks. Run `sks code-structure scan` and prefer extraction before adding substantial logic.\n- Skill dreaming stays lightweight: route use records JSON counters in `.sneakoscope/skills/dream-state.json`, and full skill inventory/recommendation runs only after the configured count/cooldown threshold. Reports are recommendation-only; deleting or merging skills needs explicit user approval.\n\n## Evidence And Context\n\n- Context7 is required for external libraries, APIs, MCPs, package managers, SDKs, and generated docs: resolve-library-id then query-docs.\n- When tech stack, framework, package, runtime, or deployment-platform versions change, use Context7 or official vendor web docs, record current syntax/security/limit guidance as high-priority TriWiki claims, then refresh and validate before coding.\n- TriWiki is the context-tracking SSOT for long-running missions, Team handoffs, and context-pressure recovery. Read `.sneakoscope/wiki/context-pack.json` before each stage, use `attention.use_first` for compact high-trust recall, hydrate `attention.hydrate_first` from source before risky or lower-trust decisions, refresh after findings or artifact changes, and validate before handoffs/final claims.\n- Source priority: current code/tests/config, decision contract, vgraph, beta, GX render/snapshot metadata, LLM Wiki coordinate index, then model knowledge only if allowed.\n- Final response before stop: summarize what was done, what changed for the user/repo, what was verified, and what remains unverified or blocked; then run Honest Mode. Say what passed and what was not verified.\n- `$From-Chat-IMG` uses forensic visual effort, not ordinary Team effort. Completion is blocked until source inventory, visual mapping, work-order coverage, scoped dogfood/QA, and post-fix verification artifacts are present and valid.\n\n## Safety\n\n- Database access is high risk. Use read-only inspection by default; live data mutation is out of scope unless a sealed contract allows local or branch-only migration files.\n- MAD and MAD-SKS widen only explicit scoped permissions; they still do not authorize unrequested fallback implementation code.\n- Task completion requires relevant tests or justification, zero unsupported critical claims, accepted visual/wiki drift, and final evidence.\n\n## Codex App\n\nUse `.codex/SNEAKOSCOPE.md`, generated `.agents/skills`, `.codex/hooks.json`, and SKS dollar commands (`$sks`, `$team`, `$computer-use`, `$cu`, `$ppt`, `$goal`, `$dfix`, `$qa-loop`, etc.) as the app control surface.\n";
|
|
97
|
+
const AGENTS_BLOCK = "\n# Sneakoscope Codex Managed Rules\n\nThis repository uses Sneakoscope Codex.\n\n## Core Rules\n\n- Codex native `/goal` workflows are the persisted continuation surface; Ralph is removed from the user-facing SKS surface.\n- Keep runtime state bounded: raw logs go to files, prompts get tails/summaries, and `sks gc` may prune stale artifacts.\n- Before substantive work, SKS checks npm for a newer package. If newer, ask update-now vs skip-for-this-conversation.\n- Versioning is managed by the SKS pre-commit hook; check `sks versioning status`. Bypass only with `SKS_DISABLE_VERSIONING=1`.\n- Installed harness files are immutable to LLM edits: `.codex/*`, `.agents/skills/`, `.codex/agents/`, `.sneakoscope/*policy*.json`, `AGENTS.md`, and `node_modules/sneakoscope`. The Sneakoscope engine source repo is the only automatic exception.\n- OMX/DCodex conflicts block setup/doctor. Show `sks conflicts prompt`; cleanup requires explicit human approval.\n- Do not stop at a plan when implementation was requested. Finish, verify, or report the hard blocker.\n- Do not create unrequested fallback implementation code. If the requested path is impossible, block with evidence instead of inventing substitute behavior.\n\n## Routes\n\n- General execution/code-changing prompts default to `$Team`: analysis scouts, TriWiki refresh/validate, read-only debate, consensus, concrete runtime task graph/inboxes, fresh executor team, minimum five-lane Team review, integration, Honest Mode.\n- `$Computer-Use` / `$CU` is the maximum-speed Codex Computer Use lane for UI/browser/visual tasks: skip Team debate and upfront TriWiki loops, use Codex Computer Use directly, then refresh/validate TriWiki and run Honest Mode at final closeout.\n- `$Goal` is a fast bridge/overlay for Codex native `/goal` create/pause/resume/clear persistence controls; implementation continues through the selected SKS execution route.\n- TriWiki recall must stay bounded. Use `sks wiki sweep` to record demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim.\n- Team missions must keep schema-backed evidence current: `work-order-ledger.json`, `effort-decision.json`, `team-dashboard-state.json`, and route-specific visual/dogfood artifacts where applicable. Team completion requires at least five independent reviewer/QA validation lanes before integration or final, even when a prompt requests fewer reviewers. Use `sks validate-artifacts latest` before claiming those artifacts pass.\n- `$DFix` is only for tiny design/content edits and bypasses the main pipeline, Team, TriWiki/TriFix/reflection recording, and persistent route state; it still uses a one-line DFix-specific Honest check before final. `$PPT` is the restrained, information-first HTML/PDF presentation route and must seal delivery context, audience profile, STP, decision context, and 3+ pain-point/solution/aha mappings before design/render work. It must avoid over-designed visuals, carry detail through hierarchy, spacing, alignment, thin rules, source clarity, and subtle accents, preserve editable source HTML under `source-html/`, record `ppt-parallel-report.json`, and clean PPT-only temporary build files before completion. `$Answer`, `$Help`, and `$Wiki` stay lightweight.\n- For code work, surface route/guard/write scopes first, split independent worker scopes when available, and keep parent-owned integration and verification.\n- Design work reads `design.md` as the only design decision SSOT. If missing, create it through `design-system-builder` from `docs/Design-Sys-Prompt.md`; getdesign.md, getdesign-reference, and curated DESIGN.md examples from https://github.com/VoltAgent/awesome-design-md are source inputs to fuse into that SSOT or route-local style tokens, not parallel design authorities. Image/logo/raster assets use `imagegen`, which must prefer official Codex App built-in image generation via `$imagegen` / `gpt-image-2` before API generation.\n- Research, AutoResearch, performance, token, accuracy, SEO/GEO, or workflow-improvement claims need experiment/eval evidence. Do not claim live model accuracy without a scored dataset.\n- Treat handwritten files above 3000 lines as split-review risks. Run `sks code-structure scan` and prefer extraction before adding substantial logic.\n- Skill dreaming stays lightweight: route use records JSON counters in `.sneakoscope/skills/dream-state.json`, and full skill inventory/recommendation runs only after the configured count/cooldown threshold. Reports are recommendation-only; deleting or merging skills needs explicit user approval.\n\n## Evidence And Context\n\n- Context7 is required for external libraries, APIs, MCPs, package managers, SDKs, and generated docs: resolve-library-id then query-docs.\n- When tech stack, framework, package, runtime, or deployment-platform versions change, use Context7 or official vendor web docs, record current syntax/security/limit guidance as high-priority TriWiki claims, then refresh and validate before coding.\n- TriWiki is the context-tracking SSOT for long-running missions, Team handoffs, and context-pressure recovery. Read `.sneakoscope/wiki/context-pack.json` before each stage, use `attention.use_first` for compact high-trust recall, hydrate `attention.hydrate_first` from source before risky or lower-trust decisions, refresh after findings or artifact changes, and validate before handoffs/final claims.\n- Source priority: current code/tests/config, decision contract, vgraph, beta, GX render/snapshot metadata, LLM Wiki coordinate index, then model knowledge only if allowed.\n- Final response before stop: summarize what was done, what changed for the user/repo, what was verified, and what remains unverified or blocked; then run Honest Mode. Say what passed and what was not verified.\n- `$From-Chat-IMG` uses forensic visual effort, not ordinary Team effort. Completion is blocked until source inventory, visual mapping, work-order coverage, scoped dogfood/QA, and post-fix verification artifacts are present and valid.\n\n## Safety\n\n- Database access is high risk. Use read-only inspection by default; live data mutation is out of scope unless a sealed contract allows local or branch-only migration files.\n- MAD and MAD-SKS widen only explicit scoped permissions; they still do not authorize unrequested fallback implementation code.\n- Task completion requires relevant tests or justification, zero unsupported critical claims, accepted visual/wiki drift, and final evidence.\n\n## Codex App\n\nUse `.codex/SNEAKOSCOPE.md`, generated `.agents/skills`, `.codex/hooks.json`, and SKS dollar commands (`$sks`, `$team`, `$computer-use`, `$cu`, `$ppt`, `$goal`, `$dfix`, `$qa-loop`, etc.) as the app control surface.\n";
|
|
98
|
+
|
|
99
|
+
function agentsBlockText() {
|
|
100
|
+
return AGENTS_BLOCK
|
|
101
|
+
.replace('`$Answer`, `$Help`, and `$Wiki` stay lightweight.', '`$Image-UX-Review` / `$UX-Review` is the imagegen/gpt-image-2 UI/UX review route: source screenshots must become generated annotated review images, those generated images must be extracted into issue ledgers, and text-only critique cannot pass the route gate. `$Answer`, `$Help`, and `$Wiki` stay lightweight.')
|
|
102
|
+
.replace('`$ppt`, `$goal`, `$dfix`, `$qa-loop`', '`$ppt`, `$image-ux-review`, `$ux-review`, `$goal`, `$dfix`, `$qa-loop`');
|
|
103
|
+
}
|
|
97
104
|
|
|
98
105
|
export async function initProject(root, opts = {}) {
|
|
99
106
|
const created = [];
|
|
@@ -601,7 +608,7 @@ function upsertTomlTable(text, table, block) {
|
|
|
601
608
|
if (localOnly && existingAgents && !hasManagedAgentsBlock) {
|
|
602
609
|
created.push('AGENTS.md skipped (local-only existing file)');
|
|
603
610
|
} else {
|
|
604
|
-
await mergeManagedBlock(agentsMdPath, 'Sneakoscope Codex GX MANAGED BLOCK',
|
|
611
|
+
await mergeManagedBlock(agentsMdPath, 'Sneakoscope Codex GX MANAGED BLOCK', agentsBlockText());
|
|
605
612
|
created.push('AGENTS.md managed block');
|
|
606
613
|
}
|
|
607
614
|
|
|
@@ -723,6 +730,7 @@ function codexAppQuickReference(scope, commandPrefix) {
|
|
|
723
730
|
`Runtime root: ${commandPrefix} root shows whether SKS is using the nearest project root or the per-user global SKS runtime root; outside any project marker, runtime commands use the global root instead of writing .sneakoscope into the current random directory.`,
|
|
724
731
|
`Context Tracking: TriWiki SSOT. Before each route phase read only the latest coordinate+voxel overlay pack at .sneakoscope/wiki/context-pack.json; coordinate-only legacy packs are invalid. Use attention.use_first for compact high-trust recall and hydrate attention.hydrate_first from source before risky/lower-trust decisions. During every stage hydrate low-trust claims from source/hash/RGBA anchors; after changes run ${commandPrefix} wiki refresh or pack; before handoff/final run ${commandPrefix} wiki validate .sneakoscope/wiki/context-pack.json.`,
|
|
725
732
|
stackCurrentDocsPolicyText(commandPrefix),
|
|
733
|
+
`Team review: ${MIN_TEAM_REVIEW_POLICY_TEXT}`,
|
|
726
734
|
`Team tmux view: ${commandPrefix} team "task" prepares live watch/lane commands without opening a Team tmux view by default; add --open-tmux when you explicitly want a named Team tmux session with an overview watch pane plus color-coded split per-agent lanes; ${commandPrefix} team lane latest --agent analysis_scout_1 --follow shows one agent's status, assigned runtime tasks, recent agent events, direct messages, and fallback global tail; ${commandPrefix} team message latest --from analysis_scout_1 --to executor_1 --message "handoff note" mirrors bounded agent communication into transcript/lane panes; ${commandPrefix} team cleanup-tmux latest marks the SKS session record complete and asks follow panes to show a cleanup summary then stop.`,
|
|
727
735
|
`Runtime: open Codex App once, then run ${commandPrefix} bootstrap and ${commandPrefix} deps check. Bare ${commandPrefix} opens or reuses the default tmux/Codex CLI session; before launch it checks npm @openai/codex@latest and prompts Y/n when the installed Codex CLI is missing or outdated. ${commandPrefix} codex-app remote-control wraps the Codex CLI 0.130.0+ headless remote-control entrypoint. ${commandPrefix} tmux open is the explicit form for session/workspace flags.`,
|
|
728
736
|
`Guard: generated harness files are immutable outside the engine source repo; check ${commandPrefix} guard check; conflicts use ${commandPrefix} conflicts prompt with human approval.`
|
|
@@ -730,12 +738,13 @@ function codexAppQuickReference(scope, commandPrefix) {
|
|
|
730
738
|
}
|
|
731
739
|
|
|
732
740
|
export async function installSkills(root) {
|
|
741
|
+
const imageUxReviewSkill = (name) => `---\nname: ${name}\ndescription: $Image-UX-Review/$UX-Review imagegen/gpt-image-2 annotated UI/UX review loop.\n---\n\nUse only for $Image-UX-Review, $UX-Review, $visual-review, or $ui-ux-review UI/UX review requests. ${imageUxReviewPipelinePolicyText()} Core loop: capture or attach source UI screenshots, then invoke Codex App $imagegen with gpt-image-2 to create a new generated annotated review image from each source screenshot, then analyze the generated review image with vision/OCR into image-ux-issue-ledger.json, then apply only requested safe fixes and recheck changed screens. Text-only screenshot critique cannot satisfy this route; missing generated annotated review images must keep image-ux-review-gate.json blocked. Use Codex Computer Use for live UI/browser capture when available; browser automation screenshots are not a substitute for Codex Computer Use evidence. Required artifacts: image-ux-review-policy.json, image-ux-screen-inventory.json, image-ux-generated-review-ledger.json, image-ux-issue-ledger.json, image-ux-iteration-report.json, image-ux-review-gate.json. Finish with reflection and Honest Mode.\n`;
|
|
733
742
|
const skills = {
|
|
734
743
|
'dfix': `---\nname: dfix\ndescription: Ultralight fast design/content fix mode for $DFix or $dfix requests and inferred simple edits such as text color, copy, labels, spacing, or translation.\n---\n\nUse for tiny copy/color/label/spacing/translation edits. List exact micro-edits, inspect only needed files, apply only those edits, and run cheap verification. Bypass broad SKS routing, mission state, TriWiki/TriFix/reflection/state recording, Goal, Research, eval, redesign, and repeated full-route Honest Mode loops. Start the final answer with \`DFix 완료 요약:\` and include one \`DFix 솔직모드:\` line covering verified, not verified, and remaining issues. Read \`design.md\` for UI work when present; use imagegen for image/logo/raster assets.\n`,
|
|
735
744
|
'answer': `---\nname: answer\ndescription: Answer-only research route for ordinary questions that should not start implementation.\n---\n\nUse for explanations, comparisons, status, facts, source-backed research, or docs guidance. Use repo/TriWiki first for project-local facts; hydrate low-trust claims from source. Browse or use Context7 for current external package/API/framework/MCP docs. End with a concise answer summary plus Honest Mode; do not create missions, subagents, or file edits.\n`,
|
|
736
745
|
'sks': `---\nname: sks\ndescription: General Sneakoscope Codex command route for $SKS or $sks usage, setup, status, and workflow help.\n---\n\nUse local SKS commands: bootstrap, deps, commands, quickstart, codex-app, context7, guard, conflicts, reasoning, wiki, pipeline status, pipeline plan, skill-dream. Promote code-changing work to Team unless Answer/DFix/Help/Wiki/safety route fits. Surface route/guard/scope, use TriWiki, do not edit installed harness files outside this engine repo, and require human-approved conflict cleanup. ${skillDreamPolicyText()}\n`,
|
|
737
746
|
'wiki': `---\nname: wiki\ndescription: Dollar-command route for $Wiki TriWiki refresh, pack, validate, and prune commands.\n---\n\nUse for $Wiki or Korean wiki-refresh requests. Refresh/update/갱신: run sks wiki refresh, then validate .sneakoscope/wiki/context-pack.json. Pack: run sks wiki pack, then validate. Prune/clean/정리: use sks wiki refresh --prune, or sks wiki prune --dry-run for inspection. Report claims, anchors, trust, attention.use_first/hydrate_first, validation, and blockers. Do not start ambiguity-gated implementation, subagents, or unrelated work.\n`,
|
|
738
|
-
'team': `---\nname: team\ndescription: SKS Team orchestration for $Team/code work; $From-Chat-IMG is the explicit chat-image alias.\n---\n\nUse for $Team/code work. Ambiguity gate first, but score goal, constraints, success criteria, and codebase context before asking; ask only the lowest-clarity scope/safety/behavior/acceptance question(s), otherwise auto-seal inferred answers. Read pipeline-plan.json or run sks pipeline plan to see the runtime lane, kept/skipped stages, and verification before implementation. Write team-roster.json; team-gate.json needs team_roster_confirmed=true. executor:N means N scouts, N debate voices, then fresh N executors. After consensus, compile team-graph.json, team-runtime-tasks.json, team-decomposition-report.json, and team-inbox/ so worker handoff uses concrete runtime task ids with role/path/domain/lane hints. Refresh/validate TriWiki before debate, implementation, review, and final; consume attention.use_first and hydrate attention.hydrate_first before risky decisions. ${outcomeRubricPolicyText()} ${speedLanePolicyText()} ${skillDreamPolicyText()} Log events and use sks team message for bounded inter-agent communication in transcript/lane panes. Color-coded tmux lanes distinguish overview/scout/planning/execution/review/safety sessions in one tmux window using split panes when tmux is available. $Team/$team plus sks --mad uses the MAD-SKS permission gate module: live server work, normal DB writes, Supabase MCP writes, direct SQL, schema cleanup, and needed migrations are open for the active invocation; only catastrophic DB wipe/all-row/project-management guards remain. End with cleanup-tmux or a cleanup event so follow panes show cleanup and stop; pass team-session-cleanup.json, then reflection and Honest Mode. Parent integrates/verifies.\n\n${chatCaptureIntakeText()}\n`,
|
|
747
|
+
'team': `---\nname: team\ndescription: SKS Team orchestration for $Team/code work; $From-Chat-IMG is the explicit chat-image alias.\n---\n\nUse for $Team/code work. Ambiguity gate first, but score goal, constraints, success criteria, and codebase context before asking; ask only the lowest-clarity scope/safety/behavior/acceptance question(s), otherwise auto-seal inferred answers. Read pipeline-plan.json or run sks pipeline plan to see the runtime lane, kept/skipped stages, and verification before implementation. Write team-roster.json; team-gate.json needs team_roster_confirmed=true. executor:N means N scouts, N debate voices, then fresh N executors. ${MIN_TEAM_REVIEW_POLICY_TEXT} After consensus, compile team-graph.json, team-runtime-tasks.json, team-decomposition-report.json, and team-inbox/ so worker handoff uses concrete runtime task ids with role/path/domain/lane hints. Refresh/validate TriWiki before debate, implementation, review, and final; consume attention.use_first and hydrate attention.hydrate_first before risky decisions. ${outcomeRubricPolicyText()} ${speedLanePolicyText()} ${skillDreamPolicyText()} Log events and use sks team message for bounded inter-agent communication in transcript/lane panes. Color-coded tmux lanes distinguish overview/scout/planning/execution/review/safety sessions in one tmux window using split panes when tmux is available. $Team/$team plus sks --mad uses the MAD-SKS permission gate module: live server work, normal DB writes, Supabase MCP writes, direct SQL, schema cleanup, and needed migrations are open for the active invocation; only catastrophic DB wipe/all-row/project-management guards remain. End with cleanup-tmux or a cleanup event so follow panes show cleanup and stop; pass team-session-cleanup.json, then reflection and Honest Mode. Parent integrates/verifies.\n\n${chatCaptureIntakeText()}\n`,
|
|
739
748
|
'from-chat-img': `---\nname: from-chat-img\ndescription: Explicit $From-Chat-IMG Team alias for chat screenshot plus attachment analysis.\n---\n\nUse only for From-Chat-IMG/$From-Chat-IMG. It enters the normal Team pipeline. Treat uploads as chat screenshot plus originals. Use Codex Computer Use visual inspection when available, list requirements first, match regions to attachments with confidence, write ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, and ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}, then continue Team gates, review, reflection, and Honest Mode. ${CODEX_COMPUTER_USE_ONLY_POLICY} The ledger must account for every visible customer request, screenshot image region, and separate attachment; ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} must have a checked item for each request, image-region/attachment match, work item, scoped QA-LOOP, and verification step; ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} stores temporary TriWiki-backed session context with expires_after_sessions=${FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS}. ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} must prove QA-LOOP ran over the exact customer-request work-order range after implementation, with every work item covered, post-fix verification complete, and zero unresolved findings. team-gate.json cannot pass From-Chat-IMG completion until unresolved_items is empty, every checklist box is checked, and scoped_qa_loop_completed=true.\n`,
|
|
740
749
|
'qa-loop': `---\nname: qa-loop\ndescription: $QA-LOOP dogfoods UI/API as human proxy with safety gates, Codex Computer Use-only UI evidence, safe fixes, rechecks, and a QA report.\n---\n\nUse only $QA-LOOP. Ask scope, target, mutation, login. Credentials are runtime-only; never save secrets. UI-level E2E needs Codex Computer Use evidence or must be marked unverified; Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, and other browser automation do not satisfy UI/browser verification. Deployed targets are read-only; destructive removal is forbidden. After answer/run, dogfood real flows, apply safe contract-allowed code/test/docs fixes, recheck, and do not pass qa-gate.json with unresolved findings or without post_fix_verification_complete. Finish qa-ledger, date/version report, gate, completion summary, and Honest Mode.\n`,
|
|
741
750
|
'ppt': `---\nname: ppt\ndescription: $PPT information-first HTML/PDF presentation pipeline with STP, audience, pain-point, format, research, design-system, and verification questions.\n---\n\nUse only when the user invokes $PPT or asks to create a presentation, deck, slides, pitch deck, proposal deck, HTML presentation, or PDF presentation artifact. Before artifact work, seal presentation-specific ambiguity answers: delivery context, target audience profile including role/average age/job/industry/topic familiarity/decision power, STP strategy, decision context and objections, and 3+ pain-point to solution mappings with expected aha moments. Presentation design must be simple, restrained, and information-first: avoid over-designed decoration, ornamental gradients, nested cards, and effects that compete with the message. Design detail should be embedded through typography hierarchy, spacing, alignment, thin rules, source clarity, and subtle accents. ${pptPipelineAllowlistPolicyText()} Use design.md as the only design decision SSOT. If design.md is missing, use docs/Design-Sys-Prompt.md plus getdesign-reference and curated DESIGN.md examples from ${AWESOME_DESIGN_MD_REFERENCE.url} only as source inputs, then fuse them into route-local PPT style tokens with a recorded design_ssot instead of treating references as parallel authorities. If generated image assets or slide visual critique are needed, use imagegen/gpt-image-2 only when that asset/review need is explicitly sealed in the $PPT contract; prefer Codex App built-in image generation (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) and use the OpenAI Image API with OPENAI_API_KEY when CLI-side required image assets can be generated. Use web or Context7 evidence only when external facts/libraries/current docs are required by the PPT contract, record verified claims in ppt-fact-ledger.json, record generated image asset plans/results/blockers in ppt-image-asset-ledger.json, then create the PDF plus editable source HTML under source-html/, keep independent strategy/render/file-write phases parallel where inputs allow, record ppt-parallel-report.json, run the bounded ppt-review-policy/ppt-review-ledger/ppt-iteration-report loop, and verify readability, overlap, format fit, source coverage, export state, unsupported-claim status, image-asset completion, review-loop termination, and temporary build files cleanup. Finish with reflection and Honest Mode; do not skip STP/audience questions for presentation artifacts.\n`,
|
|
@@ -749,7 +758,7 @@ export async function installSkills(root) {
|
|
|
749
758
|
'mad-sks': `---\nname: mad-sks\ndescription: Explicit high-risk authorization modifier for $MAD-SKS scoped Supabase MCP DB permission widening.\n---\n\nUse only when the user explicitly invokes $MAD-SKS or top-level sks --mad. It can be combined with another route, such as $MAD-SKS $Team or $DB ... $MAD-SKS; in that case the other command remains the primary workflow and MAD-SKS is only the temporary permission grant. The widened permission applies only while the active mission gate is open, must be deactivated when the task ends, and opens live server work, Supabase MCP database writes, column/schema cleanup, direct execute SQL, migration application when required, and normal targeted DB writes. Keep only catastrophic safeguards: whole database/schema/table removal, truncate, all-row delete/update, reset, dangerous project/branch management, credential exfiltration, persistent security weakening, and unrequested fallback implementation remain blocked. Do not carry MAD-SKS permission into later prompts or routes. The permission profile is centralized in src/core/permission-gates.mjs so skill/hook/MCP-style gates share one decision function.\n`,
|
|
750
759
|
'gx': `---\nname: gx\ndescription: Dollar-command route for $GX or $gx deterministic GX visual context cartridges.\n---\n\nUse when the user invokes $GX/$gx or asks for architecture/context visualization through SKS. Prefer sks gx init, render, validate, drift, and snapshot. vgraph.json remains the source of truth.\n`,
|
|
751
760
|
'help': `---\nname: help\ndescription: Dollar-command route for $Help or $help explaining installed SKS commands and workflows.\n---\n\nUse when the user invokes $Help/$help or asks what commands exist. Prefer concise output from sks commands, sks usage <topic>, sks quickstart, sks aliases, and sks codex-app.\n`,
|
|
752
|
-
'prompt-pipeline': `---\nname: prompt-pipeline\ndescription: Default SKS prompt optimization pipeline for execution prompts; Answer and DFix bypass it.\n---\n\nClassify intent: Answer only for real questions; question-shaped implicit instructions, complaints, and mandatory-policy statements route to Team. DFix handles tiny design/content; code defaults to Team unless safety/research/GX route fits. Infer goal, target, constraints, acceptance, risk, and smallest safe route. Score ambiguity first using goal, constraints, success criteria, and codebase context; ask only the lowest-clarity scope/safety/behavior/acceptance-changing questions within a small question budget, otherwise seal inferred answers. Materialize pipeline-plan.json for the runtime lane, kept/skipped stages, no-fallback invariant, and verification; inspect with sks pipeline plan, adding --proof-field when changed files are known. Code work surfaces route/guard/scopes, materializes team-roster.json from default or explicit counts before implementation, compiles concrete Team runtime graph/inbox artifacts after consensus, and parent owns integration/tests/Context7/Honest Mode. ${outcomeRubricPolicyText()} ${speedLanePolicyText()} ${skillDreamPolicyText()}\n\n${chatCaptureIntakeText()}\n\nDesign: non-PPT UI/UX reads design.md; if missing use design-system-builder; use imagegen for image/logo/raster, and imagegen must prefer Codex App built-in image generation (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) before API generation. For $PPT, ${pptPipelineAllowlistPolicyText()} ${getdesignReferencePolicyText()} TriWiki context-tracking SSOT: .sneakoscope/wiki/context-pack.json; read only the latest coordinate+voxel overlay pack before every route stage, run sks wiki refresh/pack after changes, validate before handoffs/final.\n`,
|
|
761
|
+
'prompt-pipeline': `---\nname: prompt-pipeline\ndescription: Default SKS prompt optimization pipeline for execution prompts; Answer and DFix bypass it.\n---\n\nClassify intent: Answer only for real questions; question-shaped implicit instructions, complaints, and mandatory-policy statements route to Team. DFix handles tiny design/content; code defaults to Team unless safety/research/GX route fits. Infer goal, target, constraints, acceptance, risk, and smallest safe route. Score ambiguity first using goal, constraints, success criteria, and codebase context; ask only the lowest-clarity scope/safety/behavior/acceptance-changing questions within a small question budget, otherwise seal inferred answers. Materialize pipeline-plan.json for the runtime lane, kept/skipped stages, no-fallback invariant, and verification; inspect with sks pipeline plan, adding --proof-field when changed files are known. Code work surfaces route/guard/scopes, materializes team-roster.json from default or explicit counts before implementation, compiles concrete Team runtime graph/inbox artifacts after consensus, and parent owns integration/tests/Context7/Honest Mode. ${outcomeRubricPolicyText()} ${speedLanePolicyText()} ${skillDreamPolicyText()}\n\n${chatCaptureIntakeText()}\n\nDesign: non-PPT UI/UX reads design.md; if missing use design-system-builder; use imagegen for image/logo/raster, and imagegen must prefer Codex App built-in image generation (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) before API generation. For UI/UX review/audit requests that mention image generation, gpt-image-2, callouts, or annotated review images, route to $Image-UX-Review/$UX-Review and require generated annotated review image evidence before issue extraction; do not satisfy that route with text-only critique. For $PPT, ${pptPipelineAllowlistPolicyText()} ${getdesignReferencePolicyText()} TriWiki context-tracking SSOT: .sneakoscope/wiki/context-pack.json; read only the latest coordinate+voxel overlay pack before every route stage, run sks wiki refresh/pack after changes, validate before handoffs/final.\n`,
|
|
753
762
|
'reasoning-router': `---\nname: reasoning-router\ndescription: Temporary SKS reasoning-effort routing for every command and pipeline route.\n---\n\nmedium: simple copy/color/discovery/setup/mechanical edits. high: logic, safety, architecture, DB, orchestration, refactor, multi-file work. xhigh: research, AutoResearch, falsification, benchmarks, SEO/GEO, open-ended discovery, and From-Chat-IMG image work-order analysis. Routing is temporary; return to default after the gate. Inspect with sks reasoning and sks pipeline status.\n`,
|
|
754
763
|
'pipeline-runner': `---\nname: pipeline-runner\ndescription: Execute SKS dollar-command routes as stateful pipelines with mission artifacts, route gates, Context7 evidence, temporary reasoning routing, reflection, and Honest Mode.\n---\n\nEvery $ command is a route. Use current.json, mission artifacts, and pipeline-plan.json as the execution plan: it records the lane, skipped stages, kept stages, verification, and no-unrequested-fallback invariant. Use temporary reasoning, TriWiki before stages, source hydration, Context7 when required, Team cleanup before reflection, reflection for full routes, and completion summary plus Honest Mode before final. Surface guard/scopes, record evidence, refresh/pack/validate TriWiki, and check sks pipeline status/resume/plan. ${speedLanePolicyText()} ${skillDreamPolicyText()}\n`,
|
|
755
764
|
'context7-docs': `---\nname: context7-docs\ndescription: Enforce Context7 MCP documentation evidence for SKS routes that depend on external libraries, frameworks, APIs, MCPs, package managers, DB SDKs, or generated docs.\n---\n\nWhen required, resolve-library-id, then query-docs for the resolved id. Legacy get-library-docs evidence is accepted. Prefer sks context7 tools/resolve/docs/evidence and finish only after both evidence stages exist. Check setup with sks context7 check.\n`,
|
|
@@ -766,6 +775,10 @@ export async function installSkills(root) {
|
|
|
766
775
|
'turbo-context-pack': `---\nname: turbo-context-pack\ndescription: Build ultra-low-token context packet with Q4 bits, Q3 tags, top-K claims, and minimal evidence.\n---\n\nDefault to Q4/Q3 plus TriWiki RGBA anchors and attention.use_first. Add Q2/Q1 only when needed or when attention.hydrate_first says source hydration is required. Keep id, hash, path, and coordinate tuple for hydration.\n`,
|
|
767
776
|
'research-discovery': `---\nname: research-discovery\ndescription: Run SKS Research Mode for frontier-style research, hypotheses, novelty ledgers, falsification, and experiments.\n---\n\nFrame criteria, map assumptions, generate hypotheses, falsify, keep surviving insights, and record novelty/confidence/falsifiers/next experiments. Do not overclaim.\n`,
|
|
768
777
|
'performance-evaluator': `---\nname: performance-evaluator\ndescription: Evaluate SKS performance, token-saving, accuracy-proxy, context-compression, or workflow improvements.\n---\n\nUse sks eval run/compare before claims. Report token_savings_pct, accuracy_delta/proxy, required_recall, support, and meaningful_improvement.\n`,
|
|
778
|
+
'image-ux-review': imageUxReviewSkill('image-ux-review'),
|
|
779
|
+
'ux-review': imageUxReviewSkill('ux-review'),
|
|
780
|
+
'visual-review': imageUxReviewSkill('visual-review'),
|
|
781
|
+
'ui-ux-review': imageUxReviewSkill('ui-ux-review'),
|
|
769
782
|
'imagegen': `---\nname: imagegen\ndescription: Required bridge to Codex App built-in image generation for logos, image assets, raster visuals, and image edits.\n---\n\nUse for generated or edited image assets: logo, product image, illustration, sprite, mockup, texture, cutout, or bitmap. Prefer the official Codex App built-in image generation feature documented at ${CODEX_APP_IMAGE_GENERATION_DOC_URL}: ask naturally or invoke \`$imagegen\`; Codex uses built-in image generation with gpt-image-2 and counts it against Codex usage limits. For larger batches only, use the API path when OPENAI_API_KEY is explicitly available and the user or route contract allows API-priced generation. Do not substitute placeholder SVG/HTML/CSS for requested raster assets; follow design.md when relevant.\n`,
|
|
770
783
|
'getdesign-reference': `---\nname: getdesign-reference\ndescription: Use getdesign.md official design reference as an input to the design.md SSOT for UI/UX, presentation, and HTML/PDF systems.\n---\n\nUse when creating or improving design.md, UI/UX design systems, deck-like HTML artifacts, presentation PDFs, or brand-inspired visual systems. design.md is the only design decision SSOT; reference ${GETDESIGN_REFERENCE.url}, ${GETDESIGN_REFERENCE.docs_url}, and ${AWESOME_DESIGN_MD_REFERENCE.url} only as source inputs to synthesize or update that SSOT or a route-local style-token artifact. Prefer the official Codex skill if available with \`${GETDESIGN_REFERENCE.codex_skill_install}\`. If the skill CLI is unavailable, use this generated skill plus official docs/API/CLI/SDK references and curated DESIGN.md examples as inputs. Do not claim getdesign MCP is configured unless a current official MCP surface is actually installed.\n`,
|
|
771
784
|
'design-system-builder': `---\nname: design-system-builder\ndescription: Create the single design.md SSOT from docs/Design-Sys-Prompt.md when UI/UX work has no design system.\n---\n\nWhen \`design.md\` is missing, read docs/Design-Sys-Prompt.md as the builder prompt, inspect product/UI context, and use getdesign-reference, official getdesign.md docs, and curated DESIGN.md examples from ${AWESOME_DESIGN_MD_REFERENCE.url} only as source inputs. Fuse those inputs into one design.md SSOT with tokens, components, states, imagery, accessibility, and verification rules; do not leave multiple design files or references as competing authorities. Use the plan tool only for real ambiguity plus default font recommendation. Use imagegen for assets.\n`,
|
|
@@ -832,7 +845,7 @@ async function removeStaleGeneratedSkillsFromManifest(root, skillNames) {
|
|
|
832
845
|
}
|
|
833
846
|
|
|
834
847
|
function enrichSkillContent(name, content) {
|
|
835
|
-
if (!['sks', 'answer', 'wiki', 'team', 'qa-loop', 'ppt', 'computer-use', 'computer-use-fast', 'cu', 'goal', 'research', 'autoresearch', 'db', 'gx', 'reflection', 'prompt-pipeline', 'pipeline-runner', 'context7-docs', 'turbo-context-pack', 'hproof-evidence-bind'].includes(name)) return content;
|
|
848
|
+
if (!['sks', 'answer', 'wiki', 'team', 'qa-loop', 'ppt', 'image-ux-review', 'ux-review', 'visual-review', 'ui-ux-review', 'computer-use', 'computer-use-fast', 'cu', 'goal', 'research', 'autoresearch', 'db', 'gx', 'reflection', 'prompt-pipeline', 'pipeline-runner', 'context7-docs', 'turbo-context-pack', 'hproof-evidence-bind'].includes(name)) return content;
|
|
836
849
|
const text = String(content || '').trimEnd();
|
|
837
850
|
const activation = pipelineActivationText(name);
|
|
838
851
|
if (text.includes('TriWiki context-tracking SSOT')) {
|
|
@@ -850,7 +863,7 @@ Context tracking:
|
|
|
850
863
|
}
|
|
851
864
|
|
|
852
865
|
function pipelineActivationText(name) {
|
|
853
|
-
const stateful = new Set(['sks', 'team', 'qa-loop', 'ppt', 'computer-use', 'computer-use-fast', 'cu', 'goal', 'research', 'autoresearch', 'db', 'gx', 'prompt-pipeline', 'pipeline-runner']);
|
|
866
|
+
const stateful = new Set(['sks', 'team', 'qa-loop', 'ppt', 'image-ux-review', 'ux-review', 'visual-review', 'ui-ux-review', 'computer-use', 'computer-use-fast', 'cu', 'goal', 'research', 'autoresearch', 'db', 'gx', 'prompt-pipeline', 'pipeline-runner']);
|
|
854
867
|
if (!stateful.has(name)) return '';
|
|
855
868
|
return `Codex App pipeline activation:
|
|
856
869
|
- If the SKS UserPromptSubmit hook already injected route context, follow that context.
|
|
@@ -930,10 +943,10 @@ async function removeDirIfEmpty(dir) {
|
|
|
930
943
|
async function installCodexAgents(root) {
|
|
931
944
|
const agents = {
|
|
932
945
|
'analysis-scout.toml': `name = "analysis_scout"\ndescription = "Read-only Team analysis scout. Maps one independent repo/docs/tests/API/risk/user-friction slice and returns TriWiki-ready source-backed findings before debate starts."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are an SKS Team analysis scout.\nDo not edit files.\nOwn exactly one investigation slice assigned by the parent orchestrator.\nMap relevant source files, docs, tests, APIs, DB or safety risks, UX friction, and likely implementation boundaries.\nReturn concise source-backed claims suitable for team-analysis.md and TriWiki ingestion: claim, source path, evidence hash or quoted anchor, risk, confidence, and recommended implementation slice.\nDo not debate the final plan and do not implement code.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`,
|
|
933
|
-
'team-consensus.toml': `name = "team_consensus"\ndescription = "Planning and debate specialist for SKS Team mode. Maps options, constraints, role-persona risks, and proposes the agreed objective before implementation starts."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are the SKS Team consensus specialist.\nDo not edit files.\nMap the affected code paths, viable approaches, constraints, risks, and acceptance criteria.\nRun the debate as role-persona synthesis: final users are low-context, self-interested, stubborn, and inconvenience-averse; executors are capable developers; reviewers are strict.\nArgue for the smallest coherent objective that can be handed to a fresh executor_N development team.\nReturn: recommended objective, rejected alternatives, implementation slices, required reviewers, user-friction risks, and unresolved risks.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`,
|
|
946
|
+
'team-consensus.toml': `name = "team_consensus"\ndescription = "Planning and debate specialist for SKS Team mode. Maps options, constraints, role-persona risks, and proposes the agreed objective before implementation starts."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are the SKS Team consensus specialist.\nDo not edit files.\nMap the affected code paths, viable approaches, constraints, risks, and acceptance criteria.\nRun the debate as role-persona synthesis: final users are low-context, self-interested, stubborn, and inconvenience-averse; executors are capable developers; reviewers are strict.\nArgue for the smallest coherent objective that can be handed to a fresh executor_N development team.\nPlan for at least ${MIN_TEAM_REVIEWER_LANES} independent reviewer/QA validation lanes before integration or final.\nReturn: recommended objective, rejected alternatives, implementation slices, required reviewers, user-friction risks, and unresolved risks.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`,
|
|
934
947
|
'implementation-worker.toml': `name = "implementation_worker"\ndescription = "Implementation specialist for SKS Team mode. Owns one bounded write set and coordinates with other executor_N workers."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "workspace-write"\ndeveloper_instructions = """\nYou are an SKS Team executor/developer in the fresh development bundle.\nYou are not alone in the codebase. Other executor_N workers may be editing disjoint files.\nOnly edit the files or module slice assigned to you.\nDo not revert or overwrite edits made by others.\nRead local patterns first, make the smallest correct change, avoid adding user friction, run focused verification for your slice, and report changed paths plus evidence.\nDo not create fallback implementation code, substitute behavior, mock behavior, or compatibility shims unless the user or sealed decision contract explicitly requested them.\nRespect all SKS hooks, DB safety rules, no-question run rules, and H-Proof completion gates.\nAlso return concise LIVE_EVENT lines for started, blocked, changed files, verification, and final result so the parent can record them.\n"""\n`,
|
|
935
948
|
'db-safety-reviewer.toml': `name = "db_safety_reviewer"\ndescription = "Read-only database safety reviewer for SQL, migrations, Supabase, RLS, destructive-operation risk, and rollback safety."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are a database safety reviewer.\nNever modify files or execute destructive commands.\nReview migrations, SQL, Supabase RLS, transaction boundaries, rollback safety, and MCP database tool usage.\nBlock DROP, TRUNCATE, mass DELETE/UPDATE, db reset, db push, project deletion, branch reset/merge/delete, RLS disabling, and live execute_sql writes.\nReturn concrete risks, exact file references, and required fixes.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`,
|
|
936
|
-
'qa-reviewer.toml': `name = "qa_reviewer"\ndescription = "Strict read-only verification reviewer for correctness, regressions, missing tests, user friction, and final evidence."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are an SKS Team strict reviewer.\nDo not edit files.\nReview correctness, edge cases, regression risk, missing tests, unsupported claims, and whether the final evidence proves the claimed outcome.\nAlso evaluate practical friction from the viewpoint of a stubborn, low-context final user who dislikes inconvenience.\nPrioritize concrete findings with file references and focused verification suggestions.\nFlag any unrequested fallback implementation code, substitute behavior, mock behavior, or compatibility shim as a blocking finding unless the user or sealed decision contract explicitly requested it.\nReturn no findings if the implementation is sound, and clearly list residual test gaps.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`
|
|
949
|
+
'qa-reviewer.toml': `name = "qa_reviewer"\ndescription = "Strict read-only verification reviewer for correctness, regressions, missing tests, user friction, and final evidence."\nmodel = "gpt-5.5"\nmodel_reasoning_effort = "high"\nsandbox_mode = "read-only"\ndeveloper_instructions = """\nYou are an SKS Team strict reviewer.\nDo not edit files.\nReview correctness, edge cases, regression risk, missing tests, unsupported claims, and whether the final evidence proves the claimed outcome.\nTeam review must cover at least ${MIN_TEAM_REVIEWER_LANES} independent reviewer/QA validation lanes before integration or final; flag missing review lane evidence.\nAlso evaluate practical friction from the viewpoint of a stubborn, low-context final user who dislikes inconvenience.\nPrioritize concrete findings with file references and focused verification suggestions.\nFlag any unrequested fallback implementation code, substitute behavior, mock behavior, or compatibility shim as a blocking finding unless the user or sealed decision contract explicitly requested it.\nReturn no findings if the implementation is sound, and clearly list residual test gaps.\nAlso return a concise LIVE_EVENT line that the parent can record with sks team event.\n"""\n`
|
|
937
950
|
};
|
|
938
951
|
const dir = path.join(root, '.codex', 'agents');
|
|
939
952
|
await ensureDir(dir);
|
package/src/core/mission.mjs
CHANGED
|
@@ -46,8 +46,20 @@ export async function findLatestMission(root) {
|
|
|
46
46
|
if (!(await exists(dir))) return null;
|
|
47
47
|
const fs = await import('node:fs/promises');
|
|
48
48
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
49
|
-
const ids = entries.filter((e) => e.isDirectory() && e.name.startsWith('M-')).map((e) => e.name)
|
|
50
|
-
|
|
49
|
+
const ids = entries.filter((e) => e.isDirectory() && e.name.startsWith('M-')).map((e) => e.name);
|
|
50
|
+
const candidates = await Promise.all(ids.map(async (id) => {
|
|
51
|
+
const dirPath = missionDir(root, id);
|
|
52
|
+
const stat = await fs.stat(dirPath).catch(() => null);
|
|
53
|
+
const mission = await readJson(path.join(dirPath, 'mission.json'), {}).catch(() => ({}));
|
|
54
|
+
const createdMs = Date.parse(mission.created_at || mission.updated_at || '');
|
|
55
|
+
return {
|
|
56
|
+
id,
|
|
57
|
+
createdMs: Number.isFinite(createdMs) ? createdMs : 0,
|
|
58
|
+
mtimeMs: stat?.mtimeMs || 0
|
|
59
|
+
};
|
|
60
|
+
}));
|
|
61
|
+
candidates.sort((a, b) => (a.createdMs - b.createdMs) || (a.mtimeMs - b.mtimeMs) || a.id.localeCompare(b.id));
|
|
62
|
+
return candidates.at(-1)?.id || null;
|
|
51
63
|
}
|
|
52
64
|
|
|
53
65
|
export async function setCurrent(root, patch) {
|