sneakoscope 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -0
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/build-manifest.json +7 -1
- package/dist/commands/bench.d.ts +24 -0
- package/dist/commands/image-ux-review.d.ts +182 -0
- package/dist/commands/wiki.d.ts +1 -1
- package/dist/core/bench.d.ts +24 -0
- package/dist/core/bench.js +13 -0
- package/dist/core/codex-exec-output-schema.d.ts +25 -0
- package/dist/core/codex-exec-output-schema.js +76 -1
- package/dist/core/commands/bench-command.d.ts +24 -0
- package/dist/core/commands/image-ux-review-command.d.ts +182 -0
- package/dist/core/commands/image-ux-review-command.js +157 -20
- package/dist/core/commands/wiki-command.d.ts +2 -2
- package/dist/core/evidence/evidence-router.js +10 -0
- package/dist/core/evidence/evidence-schema.d.ts +1 -1
- package/dist/core/evidence/evidence-schema.js +5 -0
- package/dist/core/fsx.d.ts +1 -1
- package/dist/core/fsx.js +1 -1
- package/dist/core/image-ux-review/callout-extraction.d.ts +42 -0
- package/dist/core/image-ux-review/callout-extraction.js +23 -7
- package/dist/core/image-ux-review/fix-loop.d.ts +8 -0
- package/dist/core/image-ux-review/fix-loop.js +20 -0
- package/dist/core/image-ux-review/imagegen-adapter.d.ts +16 -1
- package/dist/core/image-ux-review/imagegen-adapter.js +194 -7
- package/dist/core/image-ux-review/patch-handoff.d.ts +87 -0
- package/dist/core/image-ux-review/patch-handoff.js +56 -0
- package/dist/core/image-ux-review/real-callout-extractor.d.ts +34 -0
- package/dist/core/image-ux-review/real-callout-extractor.js +84 -0
- package/dist/core/image-ux-review/recapture.d.ts +19 -0
- package/dist/core/image-ux-review/recapture.js +28 -3
- package/dist/core/image-ux-review.d.ts +62 -0
- package/dist/core/image-ux-review.js +54 -5
- package/dist/core/performance-budgets.json +12 -0
- package/dist/core/proof/evidence-collector.d.ts +1 -1
- package/dist/core/proof/proof-writer.d.ts +6 -0
- package/dist/core/proof/proof-writer.js +18 -0
- package/dist/core/scouts/scout-output-parser.d.ts +1 -1
- package/dist/core/scouts/scout-output-parser.js +21 -1
- package/dist/core/structured-output-adapter.d.ts +34 -0
- package/dist/core/structured-output-adapter.js +157 -0
- package/dist/core/triwiki-wrongness/wrongness-cli.d.ts +2 -2
- package/dist/core/triwiki-wrongness/wrongness-ledger.d.ts +10 -0
- package/dist/core/triwiki-wrongness/wrongness-ledger.js +18 -0
- package/dist/core/triwiki-wrongness/wrongness-proof-linker.d.ts +1 -1
- package/dist/core/triwiki-wrongness/wrongness-retrieval.d.ts +1 -1
- package/dist/core/triwiki-wrongness/wrongness-schema.d.ts +1 -1
- package/dist/core/triwiki-wrongness/wrongness-schema.js +22 -1
- package/dist/core/trust-kernel/trust-report.js +6 -0
- package/dist/core/version.d.ts +1 -1
- package/dist/core/version.js +1 -1
- package/dist/core/wiki-image/validation.js +21 -0
- package/package.json +3 -2
- package/schemas/codex/image-ux-issue-ledger.schema.json +24 -5
package/README.md
CHANGED
|
@@ -4,6 +4,8 @@ Fast legacy-free proof-first Codex trust layer with image-based Voxel TriWiki.
|
|
|
4
4
|
|
|
5
5
|
Sneakoscope Codex (`sks`) is a Codex CLI/App harness that makes repeatable Codex work auditable.
|
|
6
6
|
|
|
7
|
+
SKS **1.0.9** is the Official Docs Ultimate Kernel: Codex CLI `rust-v0.132.0` structured resume output is now an actual runner, `gpt-image-2` review generation uses Codex App `$imagegen` evidence or an optional OpenAI Images API fallback, Structured Outputs strict schemas are the extraction fallback, and `$UX-Review this screenshot with gpt-image-2 callouts, then fix the issues` blocks fake callouts until generated image pixels are schema-extracted, patched, recaptured, and re-reviewed.
|
|
8
|
+
|
|
7
9
|
SKS **1.0.8** is the Codex 0.132 UX-Review Seal: Codex CLI `rust-v0.132.0` compatibility is explicit, `codex exec resume --output-schema` is the preferred structured-output path, and `$UX-Review this screenshot with gpt-image-2 callouts, then fix the issues` is a real visual trust loop from source screenshot fidelity to generated callout ingestion, issue ledger extraction, bounded safe fixes, recapture/re-review, Image Voxel relations, Wrongness, Completion Proof, and Trust Report gates.
|
|
8
10
|
|
|
9
11
|
SKS **1.0.7** is the Ultimate Final Completion seal for the Codex trust harness: Computer Use live evidence is an opt-in, local-only macOS evidence path with explicit `probe_only`, `live_capture_attempted`, `live_capture_success`, and `live_capture_blocked` modes; `codex-lb setup` reports durable persistence versus `process_only_ephemeral` honestly; and docs/release readiness checks block mock/probe/live overclaims.
|
|
@@ -27,6 +29,22 @@ SKS does not try to clone every other harness. It focuses on one thing: making C
|
|
|
27
29
|

|
|
28
30
|
|
|
29
31
|
|
|
32
|
+
## 1.0.9 Official Docs Ultimate Kernel
|
|
33
|
+
|
|
34
|
+
1.0.9 closes the remaining policy-vs-run-path gap. Attached generated images are recorded with `callout_extraction_status: pending` and empty callouts until `codex exec resume --output-schema` or the OpenAI Structured Outputs fallback returns a schema-valid issue ledger. Real `gpt-image-2` generation records request/response artifacts, source SHA-256, high-fidelity automatic input metadata, output hashes, local-only privacy, and blockers instead of substituting prose or generic callouts.
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
sks ux-review run --image ./screenshot.png --generate-callouts --json
|
|
38
|
+
sks ux-review attach-generated latest --image ./generated-callouts.png --json
|
|
39
|
+
sks ux-review extract-issues --generated-image ./generated-callouts.png --json
|
|
40
|
+
sks ux-review attach-after latest --image ./after.png --json
|
|
41
|
+
sks ux-review proof latest --json
|
|
42
|
+
sks ux-review explain latest
|
|
43
|
+
npm run official-docs:compat
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Release checks now include `npm run official-docs:compat` and write `.sneakoscope/reports/official-docs-compat-1.0.9.json` plus `.sneakoscope/reports/release-readiness-1.0.9.json`.
|
|
47
|
+
|
|
30
48
|
## 1.0.8 Codex 0.132 UX-Review Seal
|
|
31
49
|
|
|
32
50
|
1.0.8 makes UX-Review the representative SKS visual trust harness rather than a policy-only fixture. The CLI/App route now records source screenshot original-resolution metadata, requires real `gpt-image-2` generated callout images before verified UX claims, extracts visible callouts into `schemas/codex/image-ux-issue-ledger.schema.json`, plans bounded P0/P1-first fixes, and requires recapture/re-review before visual fix verification.
|
|
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
|
|
|
4
4
|
fn main() {
|
|
5
5
|
let mut args = std::env::args().skip(1);
|
|
6
6
|
match args.next().as_deref() {
|
|
7
|
-
Some("--version") => println!("sks-rs 1.0.
|
|
7
|
+
Some("--version") => println!("sks-rs 1.0.9"),
|
|
8
8
|
Some("compact-info") => {
|
|
9
9
|
let mut input = String::new();
|
|
10
10
|
let _ = io::stdin().read_to_string(&mut input);
|
package/dist/bin/sks.js
CHANGED
package/dist/build-manifest.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema": "sks.dist-build.v2",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.9",
|
|
4
4
|
"typescript": true,
|
|
5
5
|
"mjs_runtime_files": 0,
|
|
6
6
|
"files": [
|
|
@@ -382,6 +382,10 @@
|
|
|
382
382
|
"core/image-ux-review/fix-task-planner.js",
|
|
383
383
|
"core/image-ux-review/imagegen-adapter.d.ts",
|
|
384
384
|
"core/image-ux-review/imagegen-adapter.js",
|
|
385
|
+
"core/image-ux-review/patch-handoff.d.ts",
|
|
386
|
+
"core/image-ux-review/patch-handoff.js",
|
|
387
|
+
"core/image-ux-review/real-callout-extractor.d.ts",
|
|
388
|
+
"core/image-ux-review/real-callout-extractor.js",
|
|
385
389
|
"core/image-ux-review/recapture.d.ts",
|
|
386
390
|
"core/image-ux-review/recapture.js",
|
|
387
391
|
"core/init.d.ts",
|
|
@@ -567,6 +571,8 @@
|
|
|
567
571
|
"core/secret-redaction.js",
|
|
568
572
|
"core/skill-forge.d.ts",
|
|
569
573
|
"core/skill-forge.js",
|
|
574
|
+
"core/structured-output-adapter.d.ts",
|
|
575
|
+
"core/structured-output-adapter.js",
|
|
570
576
|
"core/team-dag.d.ts",
|
|
571
577
|
"core/team-dag.js",
|
|
572
578
|
"core/team-dashboard-renderer.d.ts",
|
package/dist/commands/bench.d.ts
CHANGED
|
@@ -60,6 +60,18 @@ export declare function run(_command: any, args?: any): Promise<void | {
|
|
|
60
60
|
'sks scouts engines --json': number;
|
|
61
61
|
};
|
|
62
62
|
}>;
|
|
63
|
+
ux_review_staged_latency_budgets: Readonly<{
|
|
64
|
+
source_screenshot_ingest: 500;
|
|
65
|
+
gpt_image_2_generation: 120000;
|
|
66
|
+
callout_extraction: 120000;
|
|
67
|
+
fix_task_planning: 500;
|
|
68
|
+
recapture_re_review: 120000;
|
|
69
|
+
image_voxel_relation_validation: 800;
|
|
70
|
+
codex_compat_probe_batch: 5000;
|
|
71
|
+
computer_use_status_probe_batch: 5000;
|
|
72
|
+
codex_lb_status_probe_batch: 5000;
|
|
73
|
+
scout_engine_probe_batch: 5000;
|
|
74
|
+
}>;
|
|
63
75
|
ok: boolean;
|
|
64
76
|
commands: any[];
|
|
65
77
|
} | {
|
|
@@ -127,6 +139,18 @@ export declare function run(_command: any, args?: any): Promise<void | {
|
|
|
127
139
|
'sks scouts engines --json': number;
|
|
128
140
|
};
|
|
129
141
|
}>;
|
|
142
|
+
ux_review_staged_latency_budgets: Readonly<{
|
|
143
|
+
source_screenshot_ingest: 500;
|
|
144
|
+
gpt_image_2_generation: 120000;
|
|
145
|
+
callout_extraction: 120000;
|
|
146
|
+
fix_task_planning: 500;
|
|
147
|
+
recapture_re_review: 120000;
|
|
148
|
+
image_voxel_relation_validation: 800;
|
|
149
|
+
codex_compat_probe_batch: 5000;
|
|
150
|
+
computer_use_status_probe_batch: 5000;
|
|
151
|
+
codex_lb_status_probe_batch: 5000;
|
|
152
|
+
scout_engine_probe_batch: 5000;
|
|
153
|
+
}>;
|
|
130
154
|
ok: boolean;
|
|
131
155
|
commands: any[];
|
|
132
156
|
};
|
|
@@ -47,6 +47,8 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
47
47
|
codex_app_imagegen_doc: string;
|
|
48
48
|
api_image_generation_doc: string;
|
|
49
49
|
gpt_image_2_model_doc: string;
|
|
50
|
+
image_input_fidelity_note: string;
|
|
51
|
+
unsupported_parameters_omitted: string[];
|
|
50
52
|
required_policy: string;
|
|
51
53
|
output_artifact: string;
|
|
52
54
|
anti_substitution_rule: string;
|
|
@@ -57,6 +59,8 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
57
59
|
input_artifact: string;
|
|
58
60
|
output_artifact: string;
|
|
59
61
|
preferred_path: string;
|
|
62
|
+
fallback_path: string;
|
|
63
|
+
structured_outputs_doc: string;
|
|
60
64
|
fallback_cap: string;
|
|
61
65
|
required_issue_fields: string[];
|
|
62
66
|
};
|
|
@@ -100,6 +104,8 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
100
104
|
created_at: string;
|
|
101
105
|
model: string;
|
|
102
106
|
surface: string;
|
|
107
|
+
endpoint: string;
|
|
108
|
+
api_docs: string;
|
|
103
109
|
privacy: string;
|
|
104
110
|
requests: any;
|
|
105
111
|
blocker_if_unavailable: {
|
|
@@ -111,6 +117,26 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
111
117
|
guidance: string;
|
|
112
118
|
};
|
|
113
119
|
};
|
|
120
|
+
imagegen_response: {
|
|
121
|
+
schema: string;
|
|
122
|
+
created_at: string;
|
|
123
|
+
provider: any;
|
|
124
|
+
model: string;
|
|
125
|
+
ok: boolean;
|
|
126
|
+
status: string;
|
|
127
|
+
output_image_path: any;
|
|
128
|
+
output_image_sha256: any;
|
|
129
|
+
output_id: any;
|
|
130
|
+
dimensions: {
|
|
131
|
+
width: any;
|
|
132
|
+
height: any;
|
|
133
|
+
format: any;
|
|
134
|
+
} | null;
|
|
135
|
+
latency_ms: any;
|
|
136
|
+
token_cost_metadata: any;
|
|
137
|
+
local_only: boolean;
|
|
138
|
+
blockers: any;
|
|
139
|
+
};
|
|
114
140
|
generated_review_ledger: {
|
|
115
141
|
schema: string;
|
|
116
142
|
schema_version: number;
|
|
@@ -155,6 +181,7 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
155
181
|
schema_version: number;
|
|
156
182
|
extraction_source: string;
|
|
157
183
|
extraction_method: string;
|
|
184
|
+
extraction_schema: string;
|
|
158
185
|
extracted_from_generated_callout: boolean;
|
|
159
186
|
issues: any;
|
|
160
187
|
blocking_issue_count: any;
|
|
@@ -182,6 +209,14 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
182
209
|
dirty_status_before_patch: any;
|
|
183
210
|
changed_files: any;
|
|
184
211
|
patch_commands: any;
|
|
212
|
+
patch_results: any;
|
|
213
|
+
counts: {
|
|
214
|
+
before: any;
|
|
215
|
+
open: any;
|
|
216
|
+
fixed: any;
|
|
217
|
+
remains_open: any;
|
|
218
|
+
regression: number;
|
|
219
|
+
};
|
|
185
220
|
patchable_tasks: any;
|
|
186
221
|
risky_tasks_blocked: any;
|
|
187
222
|
repeated_blocker: {
|
|
@@ -213,6 +248,17 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
213
248
|
before_after_relation_required: boolean;
|
|
214
249
|
gpt_image_2_re_review_required: boolean;
|
|
215
250
|
output_schema_recheck_required: boolean;
|
|
251
|
+
attach_after_command: string;
|
|
252
|
+
after_screenshot: {
|
|
253
|
+
path: any;
|
|
254
|
+
sha256: any;
|
|
255
|
+
dimensions: any;
|
|
256
|
+
privacy: string;
|
|
257
|
+
} | null;
|
|
258
|
+
before_after_relation_created: boolean;
|
|
259
|
+
re_review_required: boolean;
|
|
260
|
+
re_review_issue_ledger_required: boolean;
|
|
261
|
+
regression_blocker: string | null;
|
|
216
262
|
changed_screens_rechecked_or_not_applicable: boolean;
|
|
217
263
|
blockers: string[];
|
|
218
264
|
passed: boolean;
|
|
@@ -284,6 +330,7 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
284
330
|
} | {
|
|
285
331
|
schema: string;
|
|
286
332
|
ok: any;
|
|
333
|
+
status: string;
|
|
287
334
|
mission_id: any;
|
|
288
335
|
issue_ledger: {
|
|
289
336
|
contract_hash: any;
|
|
@@ -298,6 +345,7 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
298
345
|
schema_version: number;
|
|
299
346
|
extraction_source: string;
|
|
300
347
|
extraction_method: string;
|
|
348
|
+
extraction_schema: string;
|
|
301
349
|
extracted_from_generated_callout: boolean;
|
|
302
350
|
issues: any;
|
|
303
351
|
blocking_issue_count: any;
|
|
@@ -310,6 +358,140 @@ export declare function run(command: any, args?: any): Promise<void | {
|
|
|
310
358
|
};
|
|
311
359
|
};
|
|
312
360
|
proof: any;
|
|
361
|
+
} | {
|
|
362
|
+
schema: string;
|
|
363
|
+
ok: boolean;
|
|
364
|
+
mission_id: any;
|
|
365
|
+
generated_review_ledger: {
|
|
366
|
+
schema: string;
|
|
367
|
+
schema_version: number;
|
|
368
|
+
created_at: string;
|
|
369
|
+
provider: {
|
|
370
|
+
model: string;
|
|
371
|
+
preferred_surface: string;
|
|
372
|
+
};
|
|
373
|
+
generated_review_images: {
|
|
374
|
+
source_screen_id: string;
|
|
375
|
+
status: string;
|
|
376
|
+
image_voxel_relation: string;
|
|
377
|
+
callout_extraction_status: string;
|
|
378
|
+
callouts: {
|
|
379
|
+
id: string;
|
|
380
|
+
callout_id: string;
|
|
381
|
+
severity: string;
|
|
382
|
+
bbox: number[];
|
|
383
|
+
region: string;
|
|
384
|
+
title: string;
|
|
385
|
+
detail: string;
|
|
386
|
+
fix_action: string;
|
|
387
|
+
status: string;
|
|
388
|
+
source: string;
|
|
389
|
+
confidence: number;
|
|
390
|
+
extraction_provider: string;
|
|
391
|
+
extraction_schema: string;
|
|
392
|
+
generated_image_sha256: any;
|
|
393
|
+
bbox_coordinate_space: string;
|
|
394
|
+
bbox_confidence: number;
|
|
395
|
+
severity_visible: boolean;
|
|
396
|
+
callout_number_visible: boolean;
|
|
397
|
+
text_ocr_confidence: number;
|
|
398
|
+
fix_verification_status: string;
|
|
399
|
+
post_fix_recheck_issue_id: null;
|
|
400
|
+
}[];
|
|
401
|
+
id: any;
|
|
402
|
+
path: string;
|
|
403
|
+
sha256: any;
|
|
404
|
+
width: number | null;
|
|
405
|
+
height: number | null;
|
|
406
|
+
format: string;
|
|
407
|
+
provider_model: string;
|
|
408
|
+
provider_surface: any;
|
|
409
|
+
requested_fidelity: string;
|
|
410
|
+
image_input_fidelity_note: string;
|
|
411
|
+
privacy: string;
|
|
412
|
+
output_id: any;
|
|
413
|
+
created_at: any;
|
|
414
|
+
real_generated: boolean;
|
|
415
|
+
mock: boolean;
|
|
416
|
+
callout_extraction_required: boolean;
|
|
417
|
+
source: string;
|
|
418
|
+
}[];
|
|
419
|
+
generated_count: number;
|
|
420
|
+
required_count: number;
|
|
421
|
+
blockers: never[];
|
|
422
|
+
passed: boolean;
|
|
423
|
+
contract_hash: any;
|
|
424
|
+
};
|
|
425
|
+
gate: {
|
|
426
|
+
passed: boolean;
|
|
427
|
+
schema: string;
|
|
428
|
+
schema_version: number;
|
|
429
|
+
created_at: string;
|
|
430
|
+
contract_hash: any;
|
|
431
|
+
real_source_screenshot_present: boolean;
|
|
432
|
+
computer_use_or_user_screenshot_source: any;
|
|
433
|
+
gpt_image_2_callout_generated: boolean;
|
|
434
|
+
generated_image_ingested: boolean;
|
|
435
|
+
callout_extraction_schema_valid: boolean;
|
|
436
|
+
issue_ledger_from_generated_callout: boolean;
|
|
437
|
+
p0_p1_zero_after_fix: boolean;
|
|
438
|
+
fix_loop_executed_or_not_needed: boolean;
|
|
439
|
+
changed_screens_rechecked: boolean;
|
|
440
|
+
image_voxel_relations_created: boolean;
|
|
441
|
+
wrongness_checked: boolean;
|
|
442
|
+
honest_mode_complete: boolean;
|
|
443
|
+
required_artifacts: string[];
|
|
444
|
+
blockers: any[];
|
|
445
|
+
verification_caps: {
|
|
446
|
+
text_only_review: string;
|
|
447
|
+
mock_fixture: string;
|
|
448
|
+
codex_less_than_0_132_fallback: string;
|
|
449
|
+
};
|
|
450
|
+
notes: string[];
|
|
451
|
+
};
|
|
452
|
+
} | {
|
|
453
|
+
schema: string;
|
|
454
|
+
ok: boolean;
|
|
455
|
+
mission_id: any;
|
|
456
|
+
after_screenshot: {
|
|
457
|
+
path: string;
|
|
458
|
+
sha256: any;
|
|
459
|
+
dimensions: {
|
|
460
|
+
width: null;
|
|
461
|
+
height: null;
|
|
462
|
+
format: string;
|
|
463
|
+
} | {
|
|
464
|
+
width: number;
|
|
465
|
+
height: number;
|
|
466
|
+
format: string;
|
|
467
|
+
};
|
|
468
|
+
privacy: string;
|
|
469
|
+
};
|
|
470
|
+
recapture_plan: {
|
|
471
|
+
schema: string;
|
|
472
|
+
changed_screens_only: boolean;
|
|
473
|
+
recapture_required: boolean;
|
|
474
|
+
recapture_source: string;
|
|
475
|
+
recaptured_screenshot_sha256: any;
|
|
476
|
+
recaptured_screenshot_dimensions: any;
|
|
477
|
+
before_after_relation_required: boolean;
|
|
478
|
+
gpt_image_2_re_review_required: boolean;
|
|
479
|
+
output_schema_recheck_required: boolean;
|
|
480
|
+
attach_after_command: string;
|
|
481
|
+
after_screenshot: {
|
|
482
|
+
path: any;
|
|
483
|
+
sha256: any;
|
|
484
|
+
dimensions: any;
|
|
485
|
+
privacy: string;
|
|
486
|
+
} | null;
|
|
487
|
+
before_after_relation_created: boolean;
|
|
488
|
+
re_review_required: boolean;
|
|
489
|
+
re_review_issue_ledger_required: boolean;
|
|
490
|
+
regression_blocker: string | null;
|
|
491
|
+
changed_screens_rechecked_or_not_applicable: boolean;
|
|
492
|
+
blockers: string[];
|
|
493
|
+
passed: boolean;
|
|
494
|
+
};
|
|
313
495
|
} | {
|
|
314
496
|
schema: string;
|
|
315
497
|
ok: boolean;
|
package/dist/commands/wiki.d.ts
CHANGED
|
@@ -15,7 +15,7 @@ export declare function run(_command: any, args?: any): Promise<void | {
|
|
|
15
15
|
};
|
|
16
16
|
active_records: {
|
|
17
17
|
id: string;
|
|
18
|
-
kind: "
|
|
18
|
+
kind: "callout_extraction_schema_failed" | "missing_evidence" | "incorrect_claim" | "overconfident_claim" | "stale_evidence" | "test_failure" | "route_misclassification" | "scout_error" | "visual_anchor_error" | "image_bbox_error" | "db_safety_false_positive" | "db_safety_false_negative" | "hook_policy_mismatch" | "hook_semantic_mismatch" | "hook_strict_subset_misclassified" | "codex_lb_health_misread" | "codex_lb_missing_env_raw_message" | "codex_lb_setup_choice_drift" | "codex_lb_env_persistence_failure" | "computer_use_policy_misclassification" | "computer_use_live_smoke_mismatch" | "computer_use_external_block_overclaimed" | "mock_real_confusion" | "user_intent_misread" | "artifact_schema_error" | "trust_status_overclaim" | "ux_review_text_only_fallback" | "ux_generated_image_not_real" | "ux_fake_generic_callout_detected" | "ux_callout_ocr_uncertain" | "gpt_image_2_callout_generation_failed" | "callout_bbox_out_of_bounds" | "ux_patch_applied_without_recheck" | "ux_after_recheck_regression" | "ux_image_fidelity_mismatch" | "ux_output_schema_unavailable_fallback" | "fix_loop_noop_patch" | "visual_fix_not_rechecked" | "post_fix_regression_detected" | "repeated_blocker_stop";
|
|
19
19
|
severity: "high" | "low" | "medium" | "critical";
|
|
20
20
|
route: string | null;
|
|
21
21
|
claim: string;
|
package/dist/core/bench.d.ts
CHANGED
|
@@ -66,6 +66,18 @@ export declare const CORE_BENCH_BUDGETS: {
|
|
|
66
66
|
'sks features check --json': number;
|
|
67
67
|
'sks scouts engines --json': number;
|
|
68
68
|
};
|
|
69
|
+
export declare const UX_REVIEW_STAGED_LATENCY_BUDGETS: Readonly<{
|
|
70
|
+
source_screenshot_ingest: 500;
|
|
71
|
+
gpt_image_2_generation: 120000;
|
|
72
|
+
callout_extraction: 120000;
|
|
73
|
+
fix_task_planning: 500;
|
|
74
|
+
recapture_re_review: 120000;
|
|
75
|
+
image_voxel_relation_validation: 800;
|
|
76
|
+
codex_compat_probe_batch: 5000;
|
|
77
|
+
computer_use_status_probe_batch: 5000;
|
|
78
|
+
codex_lb_status_probe_batch: 5000;
|
|
79
|
+
scout_engine_probe_batch: 5000;
|
|
80
|
+
}>;
|
|
69
81
|
export declare function runCoreBench(root?: any, { iterations, tier }?: any): Promise<{
|
|
70
82
|
schema: string;
|
|
71
83
|
generated_at: string;
|
|
@@ -128,6 +140,18 @@ export declare function runCoreBench(root?: any, { iterations, tier }?: any): Pr
|
|
|
128
140
|
'sks scouts engines --json': number;
|
|
129
141
|
};
|
|
130
142
|
}>;
|
|
143
|
+
ux_review_staged_latency_budgets: Readonly<{
|
|
144
|
+
source_screenshot_ingest: 500;
|
|
145
|
+
gpt_image_2_generation: 120000;
|
|
146
|
+
callout_extraction: 120000;
|
|
147
|
+
fix_task_planning: 500;
|
|
148
|
+
recapture_re_review: 120000;
|
|
149
|
+
image_voxel_relation_validation: 800;
|
|
150
|
+
codex_compat_probe_batch: 5000;
|
|
151
|
+
computer_use_status_probe_batch: 5000;
|
|
152
|
+
codex_lb_status_probe_batch: 5000;
|
|
153
|
+
scout_engine_probe_batch: 5000;
|
|
154
|
+
}>;
|
|
131
155
|
ok: boolean;
|
|
132
156
|
commands: any[];
|
|
133
157
|
}>;
|
package/dist/core/bench.js
CHANGED
|
@@ -60,6 +60,18 @@ export const CORE_BENCH_BUDGET_TIERS = Object.freeze({
|
|
|
60
60
|
}
|
|
61
61
|
});
|
|
62
62
|
export const CORE_BENCH_BUDGETS = CORE_BENCH_BUDGET_TIERS['source-local'];
|
|
63
|
+
export const UX_REVIEW_STAGED_LATENCY_BUDGETS = Object.freeze({
|
|
64
|
+
source_screenshot_ingest: 500,
|
|
65
|
+
gpt_image_2_generation: 120_000,
|
|
66
|
+
callout_extraction: 120_000,
|
|
67
|
+
fix_task_planning: 500,
|
|
68
|
+
recapture_re_review: 120_000,
|
|
69
|
+
image_voxel_relation_validation: 800,
|
|
70
|
+
codex_compat_probe_batch: 5_000,
|
|
71
|
+
computer_use_status_probe_batch: 5_000,
|
|
72
|
+
codex_lb_status_probe_batch: 5_000,
|
|
73
|
+
scout_engine_probe_batch: 5_000
|
|
74
|
+
});
|
|
63
75
|
const CORE_COMMANDS = Object.freeze([
|
|
64
76
|
['sks --version', ['--version']],
|
|
65
77
|
['sks help', ['help']],
|
|
@@ -107,6 +119,7 @@ export async function runCoreBench(root = process.cwd(), { iterations = 3, tier
|
|
|
107
119
|
tier,
|
|
108
120
|
iterations: Math.max(1, Number(iterations) || 1),
|
|
109
121
|
budget_tiers: CORE_BENCH_BUDGET_TIERS,
|
|
122
|
+
ux_review_staged_latency_budgets: UX_REVIEW_STAGED_LATENCY_BUDGETS,
|
|
110
123
|
ok: rows.every((row) => row.ok),
|
|
111
124
|
commands: rows
|
|
112
125
|
};
|
|
@@ -16,6 +16,24 @@ export interface CodexResumeOutputSchemaCommandInput {
|
|
|
16
16
|
json?: boolean;
|
|
17
17
|
extraArgs?: readonly string[];
|
|
18
18
|
}
|
|
19
|
+
export interface CodexExecResumeOutputSchemaRunResult {
|
|
20
|
+
schema: 'sks.codex-exec-output-schema-run.v1';
|
|
21
|
+
ok: boolean;
|
|
22
|
+
status: 'parsed' | 'blocked' | 'integration_optional' | 'degraded_supported';
|
|
23
|
+
args: string[];
|
|
24
|
+
codex_bin: string | null;
|
|
25
|
+
output_file: string | null;
|
|
26
|
+
parsed_json: unknown | null;
|
|
27
|
+
blocker: ReturnType<typeof structuredOutputBlocker> | null;
|
|
28
|
+
validation: {
|
|
29
|
+
ok: boolean;
|
|
30
|
+
issues: string[];
|
|
31
|
+
};
|
|
32
|
+
stdout_tail: string;
|
|
33
|
+
stderr_tail: string;
|
|
34
|
+
timed_out: boolean;
|
|
35
|
+
exit_code: number | null;
|
|
36
|
+
}
|
|
19
37
|
export declare function detectCodexExecResumeOutputSchema(opts?: any): Promise<CodexExecResumeOutputSchemaAvailability>;
|
|
20
38
|
export declare function codexSchemaPath(name: string): Promise<string>;
|
|
21
39
|
export declare function assertCodexSchemaFile(schemaPath: string): Promise<{
|
|
@@ -25,6 +43,13 @@ export declare function assertCodexSchemaFile(schemaPath: string): Promise<{
|
|
|
25
43
|
issues: string[];
|
|
26
44
|
}>;
|
|
27
45
|
export declare function buildCodexExecResumeOutputSchemaArgs(input: CodexResumeOutputSchemaCommandInput): Promise<string[]>;
|
|
46
|
+
export declare function runCodexExecResumeWithOutputSchema(input: CodexResumeOutputSchemaCommandInput, opts?: {
|
|
47
|
+
codexBin?: string | null;
|
|
48
|
+
timeoutMs?: number;
|
|
49
|
+
maxOutputBytes?: number;
|
|
50
|
+
cwd?: string;
|
|
51
|
+
env?: NodeJS.ProcessEnv;
|
|
52
|
+
}): Promise<CodexExecResumeOutputSchemaRunResult>;
|
|
28
53
|
export declare function parseStructuredCodexOutput(text: unknown): {
|
|
29
54
|
ok: boolean;
|
|
30
55
|
value: unknown | null;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import
|
|
2
|
+
import fsp from 'node:fs/promises';
|
|
3
|
+
import { ensureDir, exists, packageRoot, readJson, runProcess, which } from './fsx.js';
|
|
3
4
|
import { codexVersionPolicy, compareSemverLike, parseCodexVersionText } from './codex-compat/codex-version-policy.js';
|
|
4
5
|
export async function detectCodexExecResumeOutputSchema(opts = {}) {
|
|
5
6
|
const codexBin = opts.codexBin || await which('codex').catch(() => null);
|
|
@@ -83,6 +84,64 @@ export async function buildCodexExecResumeOutputSchemaArgs(input) {
|
|
|
83
84
|
args.push(String(input.prompt));
|
|
84
85
|
return args;
|
|
85
86
|
}
|
|
87
|
+
export async function runCodexExecResumeWithOutputSchema(input, opts = {}) {
|
|
88
|
+
const availability = await detectCodexExecResumeOutputSchema({ codexBin: opts.codexBin || undefined });
|
|
89
|
+
if (!availability.codex_bin || availability.status !== 'available' || !availability.output_schema_supported) {
|
|
90
|
+
const status = availability.status === 'available' ? 'degraded_supported' : availability.status;
|
|
91
|
+
return {
|
|
92
|
+
schema: 'sks.codex-exec-output-schema-run.v1',
|
|
93
|
+
ok: false,
|
|
94
|
+
status,
|
|
95
|
+
args: [],
|
|
96
|
+
codex_bin: availability.codex_bin,
|
|
97
|
+
output_file: null,
|
|
98
|
+
parsed_json: null,
|
|
99
|
+
blocker: structuredOutputBlocker('output_schema_unavailable', availability.warnings.join('; ') || 'codex exec resume --output-schema unavailable'),
|
|
100
|
+
validation: { ok: false, issues: ['output_schema_unavailable'] },
|
|
101
|
+
stdout_tail: '',
|
|
102
|
+
stderr_tail: '',
|
|
103
|
+
timed_out: false,
|
|
104
|
+
exit_code: null
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
const outputFile = input.outputFile
|
|
108
|
+
? path.resolve(input.outputFile)
|
|
109
|
+
: path.join(packageRoot(), '.sneakoscope', 'tmp', `codex-output-schema-${Date.now()}.json`);
|
|
110
|
+
await ensureDir(path.dirname(outputFile));
|
|
111
|
+
const args = await buildCodexExecResumeOutputSchemaArgs({ ...input, outputFile });
|
|
112
|
+
const runOpts = {
|
|
113
|
+
cwd: opts.cwd || packageRoot(),
|
|
114
|
+
timeoutMs: opts.timeoutMs || 120_000,
|
|
115
|
+
maxOutputBytes: opts.maxOutputBytes || 256 * 1024
|
|
116
|
+
};
|
|
117
|
+
if (opts.env)
|
|
118
|
+
runOpts.env = opts.env;
|
|
119
|
+
const result = await runProcess(availability.codex_bin, args, runOpts);
|
|
120
|
+
const outputText = await readOutputText(outputFile, result.stdout);
|
|
121
|
+
const parsed = parseStructuredCodexOutput(outputText);
|
|
122
|
+
const schema = await readJson(path.resolve(input.outputSchemaPath), null);
|
|
123
|
+
const validation = parsed.ok ? validateStructuredOutput(parsed.value, schema) : { ok: false, issues: ['json_parse_failed'] };
|
|
124
|
+
const blocker = !parsed.ok
|
|
125
|
+
? parsed.blocker
|
|
126
|
+
: validation.ok
|
|
127
|
+
? null
|
|
128
|
+
: structuredOutputBlocker('schema_validation_failed', validation.issues.join(', '));
|
|
129
|
+
return {
|
|
130
|
+
schema: 'sks.codex-exec-output-schema-run.v1',
|
|
131
|
+
ok: result.code === 0 && parsed.ok && validation.ok,
|
|
132
|
+
status: result.code === 0 && parsed.ok && validation.ok ? 'parsed' : 'blocked',
|
|
133
|
+
args,
|
|
134
|
+
codex_bin: availability.codex_bin,
|
|
135
|
+
output_file: outputFile,
|
|
136
|
+
parsed_json: parsed.ok ? parsed.value : null,
|
|
137
|
+
blocker,
|
|
138
|
+
validation,
|
|
139
|
+
stdout_tail: redactCodexOutput(result.stdout).slice(-12_000),
|
|
140
|
+
stderr_tail: redactCodexOutput(result.stderr).slice(-12_000),
|
|
141
|
+
timed_out: result.timedOut,
|
|
142
|
+
exit_code: result.code
|
|
143
|
+
};
|
|
144
|
+
}
|
|
86
145
|
export function parseStructuredCodexOutput(text) {
|
|
87
146
|
const raw = String(text || '').trim();
|
|
88
147
|
if (!raw) {
|
|
@@ -105,6 +164,13 @@ export function validateStructuredOutput(value, schema) {
|
|
|
105
164
|
if (!row || !Object.hasOwn(row, key))
|
|
106
165
|
issues.push(`required:${key}`);
|
|
107
166
|
}
|
|
167
|
+
if (schema?.additionalProperties === false && row) {
|
|
168
|
+
const allowed = new Set(Object.keys(schema.properties || {}));
|
|
169
|
+
for (const key of Object.keys(row)) {
|
|
170
|
+
if (!allowed.has(key))
|
|
171
|
+
issues.push(`additional:${key}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
108
174
|
return { ok: issues.length === 0, issues };
|
|
109
175
|
}
|
|
110
176
|
export function structuredOutputBlocker(reason, detail) {
|
|
@@ -128,4 +194,13 @@ function sanitizeResumeId(value) {
|
|
|
128
194
|
throw new Error('Unsafe Codex resume session id');
|
|
129
195
|
return id;
|
|
130
196
|
}
|
|
197
|
+
async function readOutputText(outputFile, stdout) {
|
|
198
|
+
try {
|
|
199
|
+
const text = await fsp.readFile(outputFile, 'utf8');
|
|
200
|
+
if (text.trim())
|
|
201
|
+
return text;
|
|
202
|
+
}
|
|
203
|
+
catch { }
|
|
204
|
+
return stdout;
|
|
205
|
+
}
|
|
131
206
|
//# sourceMappingURL=codex-exec-output-schema.js.map
|
|
@@ -60,6 +60,18 @@ export declare function benchCommand(args?: any): Promise<void | {
|
|
|
60
60
|
'sks scouts engines --json': number;
|
|
61
61
|
};
|
|
62
62
|
}>;
|
|
63
|
+
ux_review_staged_latency_budgets: Readonly<{
|
|
64
|
+
source_screenshot_ingest: 500;
|
|
65
|
+
gpt_image_2_generation: 120000;
|
|
66
|
+
callout_extraction: 120000;
|
|
67
|
+
fix_task_planning: 500;
|
|
68
|
+
recapture_re_review: 120000;
|
|
69
|
+
image_voxel_relation_validation: 800;
|
|
70
|
+
codex_compat_probe_batch: 5000;
|
|
71
|
+
computer_use_status_probe_batch: 5000;
|
|
72
|
+
codex_lb_status_probe_batch: 5000;
|
|
73
|
+
scout_engine_probe_batch: 5000;
|
|
74
|
+
}>;
|
|
63
75
|
ok: boolean;
|
|
64
76
|
commands: any[];
|
|
65
77
|
} | {
|
|
@@ -127,6 +139,18 @@ export declare function benchCommand(args?: any): Promise<void | {
|
|
|
127
139
|
'sks scouts engines --json': number;
|
|
128
140
|
};
|
|
129
141
|
}>;
|
|
142
|
+
ux_review_staged_latency_budgets: Readonly<{
|
|
143
|
+
source_screenshot_ingest: 500;
|
|
144
|
+
gpt_image_2_generation: 120000;
|
|
145
|
+
callout_extraction: 120000;
|
|
146
|
+
fix_task_planning: 500;
|
|
147
|
+
recapture_re_review: 120000;
|
|
148
|
+
image_voxel_relation_validation: 800;
|
|
149
|
+
codex_compat_probe_batch: 5000;
|
|
150
|
+
computer_use_status_probe_batch: 5000;
|
|
151
|
+
codex_lb_status_probe_batch: 5000;
|
|
152
|
+
scout_engine_probe_batch: 5000;
|
|
153
|
+
}>;
|
|
130
154
|
ok: boolean;
|
|
131
155
|
commands: any[];
|
|
132
156
|
};
|