sneakoscope 1.0.8 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +30 -0
  2. package/crates/sks-core/Cargo.lock +1 -1
  3. package/crates/sks-core/Cargo.toml +1 -1
  4. package/crates/sks-core/src/main.rs +1 -1
  5. package/dist/bin/sks.js +1 -1
  6. package/dist/build-manifest.json +9 -1
  7. package/dist/commands/bench.d.ts +24 -0
  8. package/dist/commands/image-ux-review.d.ts +182 -0
  9. package/dist/commands/wiki.d.ts +1 -1
  10. package/dist/core/bench.d.ts +24 -0
  11. package/dist/core/bench.js +13 -0
  12. package/dist/core/codex-exec-output-schema.d.ts +25 -0
  13. package/dist/core/codex-exec-output-schema.js +76 -1
  14. package/dist/core/commands/basic-cli.js +4 -36
  15. package/dist/core/commands/bench-command.d.ts +24 -0
  16. package/dist/core/commands/image-ux-review-command.d.ts +182 -0
  17. package/dist/core/commands/image-ux-review-command.js +157 -20
  18. package/dist/core/commands/wiki-command.d.ts +2 -2
  19. package/dist/core/evidence/evidence-router.js +10 -0
  20. package/dist/core/evidence/evidence-schema.d.ts +1 -1
  21. package/dist/core/evidence/evidence-schema.js +5 -0
  22. package/dist/core/fsx.d.ts +1 -1
  23. package/dist/core/fsx.js +1 -1
  24. package/dist/core/hooks-runtime.js +3 -9
  25. package/dist/core/image-ux-review/callout-extraction.d.ts +42 -0
  26. package/dist/core/image-ux-review/callout-extraction.js +23 -7
  27. package/dist/core/image-ux-review/fix-loop.d.ts +8 -0
  28. package/dist/core/image-ux-review/fix-loop.js +20 -0
  29. package/dist/core/image-ux-review/imagegen-adapter.d.ts +16 -1
  30. package/dist/core/image-ux-review/imagegen-adapter.js +194 -7
  31. package/dist/core/image-ux-review/patch-handoff.d.ts +87 -0
  32. package/dist/core/image-ux-review/patch-handoff.js +56 -0
  33. package/dist/core/image-ux-review/real-callout-extractor.d.ts +34 -0
  34. package/dist/core/image-ux-review/real-callout-extractor.js +84 -0
  35. package/dist/core/image-ux-review/recapture.d.ts +19 -0
  36. package/dist/core/image-ux-review/recapture.js +28 -3
  37. package/dist/core/image-ux-review.d.ts +62 -0
  38. package/dist/core/image-ux-review.js +54 -5
  39. package/dist/core/performance-budgets.json +12 -0
  40. package/dist/core/proof/evidence-collector.d.ts +1 -1
  41. package/dist/core/proof/proof-writer.d.ts +6 -0
  42. package/dist/core/proof/proof-writer.js +18 -0
  43. package/dist/core/scouts/scout-output-parser.d.ts +1 -1
  44. package/dist/core/scouts/scout-output-parser.js +21 -1
  45. package/dist/core/structured-output-adapter.d.ts +34 -0
  46. package/dist/core/structured-output-adapter.js +157 -0
  47. package/dist/core/triwiki-wrongness/wrongness-cli.d.ts +2 -2
  48. package/dist/core/triwiki-wrongness/wrongness-ledger.d.ts +10 -0
  49. package/dist/core/triwiki-wrongness/wrongness-ledger.js +18 -0
  50. package/dist/core/triwiki-wrongness/wrongness-proof-linker.d.ts +1 -1
  51. package/dist/core/triwiki-wrongness/wrongness-retrieval.d.ts +1 -1
  52. package/dist/core/triwiki-wrongness/wrongness-schema.d.ts +1 -1
  53. package/dist/core/triwiki-wrongness/wrongness-schema.js +22 -1
  54. package/dist/core/trust-kernel/trust-report.js +6 -0
  55. package/dist/core/update-check.d.ts +29 -0
  56. package/dist/core/update-check.js +97 -0
  57. package/dist/core/version.d.ts +1 -1
  58. package/dist/core/version.js +1 -1
  59. package/dist/core/wiki-image/validation.js +21 -0
  60. package/package.json +3 -2
  61. package/schemas/codex/image-ux-issue-ledger.schema.json +24 -5
package/README.md CHANGED
@@ -4,6 +4,10 @@ Fast legacy-free proof-first Codex trust layer with image-based Voxel TriWiki.
4
4
 
5
5
  Sneakoscope Codex (`sks`) is a Codex CLI/App harness that makes repeatable Codex work auditable.
6
6
 
7
+ SKS **1.10.0** is the Function-Only Update Check release: `sks update-check` and the pre-work update gate now share a lightweight npm freshness function that reports `route_required: false` and `pipeline_required: false`, so checking for a newer SKS package never starts Team, setup, doctor, or any execution pipeline.
8
+
9
+ SKS **1.0.9** is the Official Docs Ultimate Kernel: Codex CLI `rust-v0.132.0` structured resume output is now an actual runner, `gpt-image-2` review generation uses Codex App `$imagegen` evidence or an optional OpenAI Images API fallback, Structured Outputs strict schemas are the extraction fallback, and `$UX-Review this screenshot with gpt-image-2 callouts, then fix the issues` blocks fake callouts until generated image pixels are schema-extracted, patched, recaptured, and re-reviewed.
10
+
7
11
  SKS **1.0.8** is the Codex 0.132 UX-Review Seal: Codex CLI `rust-v0.132.0` compatibility is explicit, `codex exec resume --output-schema` is the preferred structured-output path, and `$UX-Review this screenshot with gpt-image-2 callouts, then fix the issues` is a real visual trust loop from source screenshot fidelity to generated callout ingestion, issue ledger extraction, bounded safe fixes, recapture/re-review, Image Voxel relations, Wrongness, Completion Proof, and Trust Report gates.
8
12
 
9
13
  SKS **1.0.7** is the Ultimate Final Completion seal for the Codex trust harness: Computer Use live evidence is an opt-in, local-only macOS evidence path with explicit `probe_only`, `live_capture_attempted`, `live_capture_success`, and `live_capture_blocked` modes; `codex-lb setup` reports durable persistence versus `process_only_ephemeral` honestly; and docs/release readiness checks block mock/probe/live overclaims.
@@ -27,6 +31,32 @@ SKS does not try to clone every other harness. It focuses on one thing: making C
27
31
  ![Sneakoscope Codex Trust Layer](docs/assets/sneakoscope-architecture-pipeline.jpg)
28
32
 
29
33
 
34
+ ## 1.10.0 Function-Only Update Check
35
+
36
+ 1.10.0 keeps the update freshness check out of the SKS mission pipeline. The shared `runSksUpdateCheck` function performs only an npm `view sneakoscope version` lookup, honors `SKS_NPM_VIEW_SNEAKOSCOPE_VERSION` for hermetic tests, and returns explicit `mode: "function"`, `route_required: false`, and `pipeline_required: false` evidence for CLI JSON output and hook-gate reuse.
37
+
38
+ ```bash
39
+ sks update-check --json
40
+ ```
41
+
42
+ Release checks now write `.sneakoscope/reports/official-docs-compat-1.10.0.json` plus `.sneakoscope/reports/release-readiness-1.10.0.json`.
43
+
44
+ ## 1.0.9 Official Docs Ultimate Kernel
45
+
46
+ 1.0.9 closes the remaining policy-vs-run-path gap. Attached generated images are recorded with `callout_extraction_status: pending` and empty callouts until `codex exec resume --output-schema` or the OpenAI Structured Outputs fallback returns a schema-valid issue ledger. Real `gpt-image-2` generation records request/response artifacts, source SHA-256, high-fidelity automatic input metadata, output hashes, local-only privacy, and blockers instead of substituting prose or generic callouts.
47
+
48
+ ```bash
49
+ sks ux-review run --image ./screenshot.png --generate-callouts --json
50
+ sks ux-review attach-generated latest --image ./generated-callouts.png --json
51
+ sks ux-review extract-issues --generated-image ./generated-callouts.png --json
52
+ sks ux-review attach-after latest --image ./after.png --json
53
+ sks ux-review proof latest --json
54
+ sks ux-review explain latest
55
+ npm run official-docs:compat
56
+ ```
57
+
58
+ Release checks now include `npm run official-docs:compat` and write `.sneakoscope/reports/official-docs-compat-1.0.9.json` plus `.sneakoscope/reports/release-readiness-1.0.9.json`.
59
+
30
60
  ## 1.0.8 Codex 0.132 UX-Review Seal
31
61
 
32
62
  1.0.8 makes UX-Review the representative SKS visual trust harness rather than a policy-only fixture. The CLI/App route now records source screenshot original-resolution metadata, requires real `gpt-image-2` generated callout images before verified UX claims, extracts visible callouts into `schemas/codex/image-ux-issue-ledger.schema.json`, plans bounded P0/P1-first fixes, and requires recapture/re-review before visual fix verification.
@@ -76,7 +76,7 @@ dependencies = [
76
76
 
77
77
  [[package]]
78
78
  name = "sks-core"
79
- version = "1.0.8"
79
+ version = "1.10.0"
80
80
  dependencies = [
81
81
  "serde_json",
82
82
  ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sks-core"
3
- version = "1.0.8"
3
+ version = "1.10.0"
4
4
  edition = "2021"
5
5
 
6
6
  [dependencies]
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
4
4
  fn main() {
5
5
  let mut args = std::env::args().skip(1);
6
6
  match args.next().as_deref() {
7
- Some("--version") => println!("sks-rs 1.0.8"),
7
+ Some("--version") => println!("sks-rs 1.10.0"),
8
8
  Some("compact-info") => {
9
9
  let mut input = String::new();
10
10
  let _ = io::stdin().read_to_string(&mut input);
package/dist/bin/sks.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- const FAST_PACKAGE_VERSION = '1.0.8';
2
+ const FAST_PACKAGE_VERSION = '1.10.0';
3
3
  const args = process.argv.slice(2);
4
4
  try {
5
5
  if (args[0] === '--version' || args[0] === '-v' || args[0] === 'version') {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema": "sks.dist-build.v2",
3
- "version": "1.0.8",
3
+ "version": "1.10.0",
4
4
  "typescript": true,
5
5
  "mjs_runtime_files": 0,
6
6
  "files": [
@@ -382,6 +382,10 @@
382
382
  "core/image-ux-review/fix-task-planner.js",
383
383
  "core/image-ux-review/imagegen-adapter.d.ts",
384
384
  "core/image-ux-review/imagegen-adapter.js",
385
+ "core/image-ux-review/patch-handoff.d.ts",
386
+ "core/image-ux-review/patch-handoff.js",
387
+ "core/image-ux-review/real-callout-extractor.d.ts",
388
+ "core/image-ux-review/real-callout-extractor.js",
385
389
  "core/image-ux-review/recapture.d.ts",
386
390
  "core/image-ux-review/recapture.js",
387
391
  "core/init.d.ts",
@@ -567,6 +571,8 @@
567
571
  "core/secret-redaction.js",
568
572
  "core/skill-forge.d.ts",
569
573
  "core/skill-forge.js",
574
+ "core/structured-output-adapter.d.ts",
575
+ "core/structured-output-adapter.js",
570
576
  "core/team-dag.d.ts",
571
577
  "core/team-dag.js",
572
578
  "core/team-dashboard-renderer.d.ts",
@@ -611,6 +617,8 @@
611
617
  "core/trust-kernel/trust-report.js",
612
618
  "core/trust-kernel/trust-status.d.ts",
613
619
  "core/trust-kernel/trust-status.js",
620
+ "core/update-check.d.ts",
621
+ "core/update-check.js",
614
622
  "core/validators/completion-proof-validator.d.ts",
615
623
  "core/validators/completion-proof-validator.js",
616
624
  "core/validators/evidence-validator.d.ts",
@@ -60,6 +60,18 @@ export declare function run(_command: any, args?: any): Promise<void | {
60
60
  'sks scouts engines --json': number;
61
61
  };
62
62
  }>;
63
+ ux_review_staged_latency_budgets: Readonly<{
64
+ source_screenshot_ingest: 500;
65
+ gpt_image_2_generation: 120000;
66
+ callout_extraction: 120000;
67
+ fix_task_planning: 500;
68
+ recapture_re_review: 120000;
69
+ image_voxel_relation_validation: 800;
70
+ codex_compat_probe_batch: 5000;
71
+ computer_use_status_probe_batch: 5000;
72
+ codex_lb_status_probe_batch: 5000;
73
+ scout_engine_probe_batch: 5000;
74
+ }>;
63
75
  ok: boolean;
64
76
  commands: any[];
65
77
  } | {
@@ -127,6 +139,18 @@ export declare function run(_command: any, args?: any): Promise<void | {
127
139
  'sks scouts engines --json': number;
128
140
  };
129
141
  }>;
142
+ ux_review_staged_latency_budgets: Readonly<{
143
+ source_screenshot_ingest: 500;
144
+ gpt_image_2_generation: 120000;
145
+ callout_extraction: 120000;
146
+ fix_task_planning: 500;
147
+ recapture_re_review: 120000;
148
+ image_voxel_relation_validation: 800;
149
+ codex_compat_probe_batch: 5000;
150
+ computer_use_status_probe_batch: 5000;
151
+ codex_lb_status_probe_batch: 5000;
152
+ scout_engine_probe_batch: 5000;
153
+ }>;
130
154
  ok: boolean;
131
155
  commands: any[];
132
156
  };
@@ -47,6 +47,8 @@ export declare function run(command: any, args?: any): Promise<void | {
47
47
  codex_app_imagegen_doc: string;
48
48
  api_image_generation_doc: string;
49
49
  gpt_image_2_model_doc: string;
50
+ image_input_fidelity_note: string;
51
+ unsupported_parameters_omitted: string[];
50
52
  required_policy: string;
51
53
  output_artifact: string;
52
54
  anti_substitution_rule: string;
@@ -57,6 +59,8 @@ export declare function run(command: any, args?: any): Promise<void | {
57
59
  input_artifact: string;
58
60
  output_artifact: string;
59
61
  preferred_path: string;
62
+ fallback_path: string;
63
+ structured_outputs_doc: string;
60
64
  fallback_cap: string;
61
65
  required_issue_fields: string[];
62
66
  };
@@ -100,6 +104,8 @@ export declare function run(command: any, args?: any): Promise<void | {
100
104
  created_at: string;
101
105
  model: string;
102
106
  surface: string;
107
+ endpoint: string;
108
+ api_docs: string;
103
109
  privacy: string;
104
110
  requests: any;
105
111
  blocker_if_unavailable: {
@@ -111,6 +117,26 @@ export declare function run(command: any, args?: any): Promise<void | {
111
117
  guidance: string;
112
118
  };
113
119
  };
120
+ imagegen_response: {
121
+ schema: string;
122
+ created_at: string;
123
+ provider: any;
124
+ model: string;
125
+ ok: boolean;
126
+ status: string;
127
+ output_image_path: any;
128
+ output_image_sha256: any;
129
+ output_id: any;
130
+ dimensions: {
131
+ width: any;
132
+ height: any;
133
+ format: any;
134
+ } | null;
135
+ latency_ms: any;
136
+ token_cost_metadata: any;
137
+ local_only: boolean;
138
+ blockers: any;
139
+ };
114
140
  generated_review_ledger: {
115
141
  schema: string;
116
142
  schema_version: number;
@@ -155,6 +181,7 @@ export declare function run(command: any, args?: any): Promise<void | {
155
181
  schema_version: number;
156
182
  extraction_source: string;
157
183
  extraction_method: string;
184
+ extraction_schema: string;
158
185
  extracted_from_generated_callout: boolean;
159
186
  issues: any;
160
187
  blocking_issue_count: any;
@@ -182,6 +209,14 @@ export declare function run(command: any, args?: any): Promise<void | {
182
209
  dirty_status_before_patch: any;
183
210
  changed_files: any;
184
211
  patch_commands: any;
212
+ patch_results: any;
213
+ counts: {
214
+ before: any;
215
+ open: any;
216
+ fixed: any;
217
+ remains_open: any;
218
+ regression: number;
219
+ };
185
220
  patchable_tasks: any;
186
221
  risky_tasks_blocked: any;
187
222
  repeated_blocker: {
@@ -213,6 +248,17 @@ export declare function run(command: any, args?: any): Promise<void | {
213
248
  before_after_relation_required: boolean;
214
249
  gpt_image_2_re_review_required: boolean;
215
250
  output_schema_recheck_required: boolean;
251
+ attach_after_command: string;
252
+ after_screenshot: {
253
+ path: any;
254
+ sha256: any;
255
+ dimensions: any;
256
+ privacy: string;
257
+ } | null;
258
+ before_after_relation_created: boolean;
259
+ re_review_required: boolean;
260
+ re_review_issue_ledger_required: boolean;
261
+ regression_blocker: string | null;
216
262
  changed_screens_rechecked_or_not_applicable: boolean;
217
263
  blockers: string[];
218
264
  passed: boolean;
@@ -284,6 +330,7 @@ export declare function run(command: any, args?: any): Promise<void | {
284
330
  } | {
285
331
  schema: string;
286
332
  ok: any;
333
+ status: string;
287
334
  mission_id: any;
288
335
  issue_ledger: {
289
336
  contract_hash: any;
@@ -298,6 +345,7 @@ export declare function run(command: any, args?: any): Promise<void | {
298
345
  schema_version: number;
299
346
  extraction_source: string;
300
347
  extraction_method: string;
348
+ extraction_schema: string;
301
349
  extracted_from_generated_callout: boolean;
302
350
  issues: any;
303
351
  blocking_issue_count: any;
@@ -310,6 +358,140 @@ export declare function run(command: any, args?: any): Promise<void | {
310
358
  };
311
359
  };
312
360
  proof: any;
361
+ } | {
362
+ schema: string;
363
+ ok: boolean;
364
+ mission_id: any;
365
+ generated_review_ledger: {
366
+ schema: string;
367
+ schema_version: number;
368
+ created_at: string;
369
+ provider: {
370
+ model: string;
371
+ preferred_surface: string;
372
+ };
373
+ generated_review_images: {
374
+ source_screen_id: string;
375
+ status: string;
376
+ image_voxel_relation: string;
377
+ callout_extraction_status: string;
378
+ callouts: {
379
+ id: string;
380
+ callout_id: string;
381
+ severity: string;
382
+ bbox: number[];
383
+ region: string;
384
+ title: string;
385
+ detail: string;
386
+ fix_action: string;
387
+ status: string;
388
+ source: string;
389
+ confidence: number;
390
+ extraction_provider: string;
391
+ extraction_schema: string;
392
+ generated_image_sha256: any;
393
+ bbox_coordinate_space: string;
394
+ bbox_confidence: number;
395
+ severity_visible: boolean;
396
+ callout_number_visible: boolean;
397
+ text_ocr_confidence: number;
398
+ fix_verification_status: string;
399
+ post_fix_recheck_issue_id: null;
400
+ }[];
401
+ id: any;
402
+ path: string;
403
+ sha256: any;
404
+ width: number | null;
405
+ height: number | null;
406
+ format: string;
407
+ provider_model: string;
408
+ provider_surface: any;
409
+ requested_fidelity: string;
410
+ image_input_fidelity_note: string;
411
+ privacy: string;
412
+ output_id: any;
413
+ created_at: any;
414
+ real_generated: boolean;
415
+ mock: boolean;
416
+ callout_extraction_required: boolean;
417
+ source: string;
418
+ }[];
419
+ generated_count: number;
420
+ required_count: number;
421
+ blockers: never[];
422
+ passed: boolean;
423
+ contract_hash: any;
424
+ };
425
+ gate: {
426
+ passed: boolean;
427
+ schema: string;
428
+ schema_version: number;
429
+ created_at: string;
430
+ contract_hash: any;
431
+ real_source_screenshot_present: boolean;
432
+ computer_use_or_user_screenshot_source: any;
433
+ gpt_image_2_callout_generated: boolean;
434
+ generated_image_ingested: boolean;
435
+ callout_extraction_schema_valid: boolean;
436
+ issue_ledger_from_generated_callout: boolean;
437
+ p0_p1_zero_after_fix: boolean;
438
+ fix_loop_executed_or_not_needed: boolean;
439
+ changed_screens_rechecked: boolean;
440
+ image_voxel_relations_created: boolean;
441
+ wrongness_checked: boolean;
442
+ honest_mode_complete: boolean;
443
+ required_artifacts: string[];
444
+ blockers: any[];
445
+ verification_caps: {
446
+ text_only_review: string;
447
+ mock_fixture: string;
448
+ codex_less_than_0_132_fallback: string;
449
+ };
450
+ notes: string[];
451
+ };
452
+ } | {
453
+ schema: string;
454
+ ok: boolean;
455
+ mission_id: any;
456
+ after_screenshot: {
457
+ path: string;
458
+ sha256: any;
459
+ dimensions: {
460
+ width: null;
461
+ height: null;
462
+ format: string;
463
+ } | {
464
+ width: number;
465
+ height: number;
466
+ format: string;
467
+ };
468
+ privacy: string;
469
+ };
470
+ recapture_plan: {
471
+ schema: string;
472
+ changed_screens_only: boolean;
473
+ recapture_required: boolean;
474
+ recapture_source: string;
475
+ recaptured_screenshot_sha256: any;
476
+ recaptured_screenshot_dimensions: any;
477
+ before_after_relation_required: boolean;
478
+ gpt_image_2_re_review_required: boolean;
479
+ output_schema_recheck_required: boolean;
480
+ attach_after_command: string;
481
+ after_screenshot: {
482
+ path: any;
483
+ sha256: any;
484
+ dimensions: any;
485
+ privacy: string;
486
+ } | null;
487
+ before_after_relation_created: boolean;
488
+ re_review_required: boolean;
489
+ re_review_issue_ledger_required: boolean;
490
+ regression_blocker: string | null;
491
+ changed_screens_rechecked_or_not_applicable: boolean;
492
+ blockers: string[];
493
+ passed: boolean;
494
+ };
313
495
  } | {
314
496
  schema: string;
315
497
  ok: boolean;
@@ -15,7 +15,7 @@ export declare function run(_command: any, args?: any): Promise<void | {
15
15
  };
16
16
  active_records: {
17
17
  id: string;
18
- kind: "incorrect_claim" | "overconfident_claim" | "stale_evidence" | "missing_evidence" | "test_failure" | "route_misclassification" | "scout_error" | "visual_anchor_error" | "image_bbox_error" | "db_safety_false_positive" | "db_safety_false_negative" | "hook_policy_mismatch" | "hook_semantic_mismatch" | "hook_strict_subset_misclassified" | "codex_lb_health_misread" | "codex_lb_missing_env_raw_message" | "codex_lb_setup_choice_drift" | "codex_lb_env_persistence_failure" | "computer_use_policy_misclassification" | "computer_use_live_smoke_mismatch" | "computer_use_external_block_overclaimed" | "mock_real_confusion" | "user_intent_misread" | "artifact_schema_error" | "trust_status_overclaim" | "ux_review_text_only_fallback" | "gpt_image_2_callout_generation_failed" | "callout_extraction_schema_failed" | "callout_bbox_out_of_bounds" | "fix_loop_noop_patch" | "visual_fix_not_rechecked" | "post_fix_regression_detected" | "repeated_blocker_stop";
18
+ kind: "callout_extraction_schema_failed" | "missing_evidence" | "incorrect_claim" | "overconfident_claim" | "stale_evidence" | "test_failure" | "route_misclassification" | "scout_error" | "visual_anchor_error" | "image_bbox_error" | "db_safety_false_positive" | "db_safety_false_negative" | "hook_policy_mismatch" | "hook_semantic_mismatch" | "hook_strict_subset_misclassified" | "codex_lb_health_misread" | "codex_lb_missing_env_raw_message" | "codex_lb_setup_choice_drift" | "codex_lb_env_persistence_failure" | "computer_use_policy_misclassification" | "computer_use_live_smoke_mismatch" | "computer_use_external_block_overclaimed" | "mock_real_confusion" | "user_intent_misread" | "artifact_schema_error" | "trust_status_overclaim" | "ux_review_text_only_fallback" | "ux_generated_image_not_real" | "ux_fake_generic_callout_detected" | "ux_callout_ocr_uncertain" | "gpt_image_2_callout_generation_failed" | "callout_bbox_out_of_bounds" | "ux_patch_applied_without_recheck" | "ux_after_recheck_regression" | "ux_image_fidelity_mismatch" | "ux_output_schema_unavailable_fallback" | "fix_loop_noop_patch" | "visual_fix_not_rechecked" | "post_fix_regression_detected" | "repeated_blocker_stop";
19
19
  severity: "high" | "low" | "medium" | "critical";
20
20
  route: string | null;
21
21
  claim: string;
@@ -66,6 +66,18 @@ export declare const CORE_BENCH_BUDGETS: {
66
66
  'sks features check --json': number;
67
67
  'sks scouts engines --json': number;
68
68
  };
69
+ export declare const UX_REVIEW_STAGED_LATENCY_BUDGETS: Readonly<{
70
+ source_screenshot_ingest: 500;
71
+ gpt_image_2_generation: 120000;
72
+ callout_extraction: 120000;
73
+ fix_task_planning: 500;
74
+ recapture_re_review: 120000;
75
+ image_voxel_relation_validation: 800;
76
+ codex_compat_probe_batch: 5000;
77
+ computer_use_status_probe_batch: 5000;
78
+ codex_lb_status_probe_batch: 5000;
79
+ scout_engine_probe_batch: 5000;
80
+ }>;
69
81
  export declare function runCoreBench(root?: any, { iterations, tier }?: any): Promise<{
70
82
  schema: string;
71
83
  generated_at: string;
@@ -128,6 +140,18 @@ export declare function runCoreBench(root?: any, { iterations, tier }?: any): Pr
128
140
  'sks scouts engines --json': number;
129
141
  };
130
142
  }>;
143
+ ux_review_staged_latency_budgets: Readonly<{
144
+ source_screenshot_ingest: 500;
145
+ gpt_image_2_generation: 120000;
146
+ callout_extraction: 120000;
147
+ fix_task_planning: 500;
148
+ recapture_re_review: 120000;
149
+ image_voxel_relation_validation: 800;
150
+ codex_compat_probe_batch: 5000;
151
+ computer_use_status_probe_batch: 5000;
152
+ codex_lb_status_probe_batch: 5000;
153
+ scout_engine_probe_batch: 5000;
154
+ }>;
131
155
  ok: boolean;
132
156
  commands: any[];
133
157
  }>;
@@ -60,6 +60,18 @@ export const CORE_BENCH_BUDGET_TIERS = Object.freeze({
60
60
  }
61
61
  });
62
62
  export const CORE_BENCH_BUDGETS = CORE_BENCH_BUDGET_TIERS['source-local'];
63
+ export const UX_REVIEW_STAGED_LATENCY_BUDGETS = Object.freeze({
64
+ source_screenshot_ingest: 500,
65
+ gpt_image_2_generation: 120_000,
66
+ callout_extraction: 120_000,
67
+ fix_task_planning: 500,
68
+ recapture_re_review: 120_000,
69
+ image_voxel_relation_validation: 800,
70
+ codex_compat_probe_batch: 5_000,
71
+ computer_use_status_probe_batch: 5_000,
72
+ codex_lb_status_probe_batch: 5_000,
73
+ scout_engine_probe_batch: 5_000
74
+ });
63
75
  const CORE_COMMANDS = Object.freeze([
64
76
  ['sks --version', ['--version']],
65
77
  ['sks help', ['help']],
@@ -107,6 +119,7 @@ export async function runCoreBench(root = process.cwd(), { iterations = 3, tier
107
119
  tier,
108
120
  iterations: Math.max(1, Number(iterations) || 1),
109
121
  budget_tiers: CORE_BENCH_BUDGET_TIERS,
122
+ ux_review_staged_latency_budgets: UX_REVIEW_STAGED_LATENCY_BUDGETS,
110
123
  ok: rows.every((row) => row.ok),
111
124
  commands: rows
112
125
  };
@@ -16,6 +16,24 @@ export interface CodexResumeOutputSchemaCommandInput {
16
16
  json?: boolean;
17
17
  extraArgs?: readonly string[];
18
18
  }
19
+ export interface CodexExecResumeOutputSchemaRunResult {
20
+ schema: 'sks.codex-exec-output-schema-run.v1';
21
+ ok: boolean;
22
+ status: 'parsed' | 'blocked' | 'integration_optional' | 'degraded_supported';
23
+ args: string[];
24
+ codex_bin: string | null;
25
+ output_file: string | null;
26
+ parsed_json: unknown | null;
27
+ blocker: ReturnType<typeof structuredOutputBlocker> | null;
28
+ validation: {
29
+ ok: boolean;
30
+ issues: string[];
31
+ };
32
+ stdout_tail: string;
33
+ stderr_tail: string;
34
+ timed_out: boolean;
35
+ exit_code: number | null;
36
+ }
19
37
  export declare function detectCodexExecResumeOutputSchema(opts?: any): Promise<CodexExecResumeOutputSchemaAvailability>;
20
38
  export declare function codexSchemaPath(name: string): Promise<string>;
21
39
  export declare function assertCodexSchemaFile(schemaPath: string): Promise<{
@@ -25,6 +43,13 @@ export declare function assertCodexSchemaFile(schemaPath: string): Promise<{
25
43
  issues: string[];
26
44
  }>;
27
45
  export declare function buildCodexExecResumeOutputSchemaArgs(input: CodexResumeOutputSchemaCommandInput): Promise<string[]>;
46
+ export declare function runCodexExecResumeWithOutputSchema(input: CodexResumeOutputSchemaCommandInput, opts?: {
47
+ codexBin?: string | null;
48
+ timeoutMs?: number;
49
+ maxOutputBytes?: number;
50
+ cwd?: string;
51
+ env?: NodeJS.ProcessEnv;
52
+ }): Promise<CodexExecResumeOutputSchemaRunResult>;
28
53
  export declare function parseStructuredCodexOutput(text: unknown): {
29
54
  ok: boolean;
30
55
  value: unknown | null;
@@ -1,5 +1,6 @@
1
1
  import path from 'node:path';
2
- import { exists, packageRoot, readJson, runProcess, which } from './fsx.js';
2
+ import fsp from 'node:fs/promises';
3
+ import { ensureDir, exists, packageRoot, readJson, runProcess, which } from './fsx.js';
3
4
  import { codexVersionPolicy, compareSemverLike, parseCodexVersionText } from './codex-compat/codex-version-policy.js';
4
5
  export async function detectCodexExecResumeOutputSchema(opts = {}) {
5
6
  const codexBin = opts.codexBin || await which('codex').catch(() => null);
@@ -83,6 +84,64 @@ export async function buildCodexExecResumeOutputSchemaArgs(input) {
83
84
  args.push(String(input.prompt));
84
85
  return args;
85
86
  }
87
+ export async function runCodexExecResumeWithOutputSchema(input, opts = {}) {
88
+ const availability = await detectCodexExecResumeOutputSchema({ codexBin: opts.codexBin || undefined });
89
+ if (!availability.codex_bin || availability.status !== 'available' || !availability.output_schema_supported) {
90
+ const status = availability.status === 'available' ? 'degraded_supported' : availability.status;
91
+ return {
92
+ schema: 'sks.codex-exec-output-schema-run.v1',
93
+ ok: false,
94
+ status,
95
+ args: [],
96
+ codex_bin: availability.codex_bin,
97
+ output_file: null,
98
+ parsed_json: null,
99
+ blocker: structuredOutputBlocker('output_schema_unavailable', availability.warnings.join('; ') || 'codex exec resume --output-schema unavailable'),
100
+ validation: { ok: false, issues: ['output_schema_unavailable'] },
101
+ stdout_tail: '',
102
+ stderr_tail: '',
103
+ timed_out: false,
104
+ exit_code: null
105
+ };
106
+ }
107
+ const outputFile = input.outputFile
108
+ ? path.resolve(input.outputFile)
109
+ : path.join(packageRoot(), '.sneakoscope', 'tmp', `codex-output-schema-${Date.now()}.json`);
110
+ await ensureDir(path.dirname(outputFile));
111
+ const args = await buildCodexExecResumeOutputSchemaArgs({ ...input, outputFile });
112
+ const runOpts = {
113
+ cwd: opts.cwd || packageRoot(),
114
+ timeoutMs: opts.timeoutMs || 120_000,
115
+ maxOutputBytes: opts.maxOutputBytes || 256 * 1024
116
+ };
117
+ if (opts.env)
118
+ runOpts.env = opts.env;
119
+ const result = await runProcess(availability.codex_bin, args, runOpts);
120
+ const outputText = await readOutputText(outputFile, result.stdout);
121
+ const parsed = parseStructuredCodexOutput(outputText);
122
+ const schema = await readJson(path.resolve(input.outputSchemaPath), null);
123
+ const validation = parsed.ok ? validateStructuredOutput(parsed.value, schema) : { ok: false, issues: ['json_parse_failed'] };
124
+ const blocker = !parsed.ok
125
+ ? parsed.blocker
126
+ : validation.ok
127
+ ? null
128
+ : structuredOutputBlocker('schema_validation_failed', validation.issues.join(', '));
129
+ return {
130
+ schema: 'sks.codex-exec-output-schema-run.v1',
131
+ ok: result.code === 0 && parsed.ok && validation.ok,
132
+ status: result.code === 0 && parsed.ok && validation.ok ? 'parsed' : 'blocked',
133
+ args,
134
+ codex_bin: availability.codex_bin,
135
+ output_file: outputFile,
136
+ parsed_json: parsed.ok ? parsed.value : null,
137
+ blocker,
138
+ validation,
139
+ stdout_tail: redactCodexOutput(result.stdout).slice(-12_000),
140
+ stderr_tail: redactCodexOutput(result.stderr).slice(-12_000),
141
+ timed_out: result.timedOut,
142
+ exit_code: result.code
143
+ };
144
+ }
86
145
  export function parseStructuredCodexOutput(text) {
87
146
  const raw = String(text || '').trim();
88
147
  if (!raw) {
@@ -105,6 +164,13 @@ export function validateStructuredOutput(value, schema) {
105
164
  if (!row || !Object.hasOwn(row, key))
106
165
  issues.push(`required:${key}`);
107
166
  }
167
+ if (schema?.additionalProperties === false && row) {
168
+ const allowed = new Set(Object.keys(schema.properties || {}));
169
+ for (const key of Object.keys(row)) {
170
+ if (!allowed.has(key))
171
+ issues.push(`additional:${key}`);
172
+ }
173
+ }
108
174
  return { ok: issues.length === 0, issues };
109
175
  }
110
176
  export function structuredOutputBlocker(reason, detail) {
@@ -128,4 +194,13 @@ function sanitizeResumeId(value) {
128
194
  throw new Error('Unsafe Codex resume session id');
129
195
  return id;
130
196
  }
197
+ async function readOutputText(outputFile, stdout) {
198
+ try {
199
+ const text = await fsp.readFile(outputFile, 'utf8');
200
+ if (text.trim())
201
+ return text;
202
+ }
203
+ catch { }
204
+ return stdout;
205
+ }
131
206
  //# sourceMappingURL=codex-exec-output-schema.js.map