@aria_asi/cli 0.2.39 → 0.2.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/aria.js +236 -34
- package/dist/aria-connector/src/action-ledger-core.d.ts +387 -0
- package/dist/aria-connector/src/action-ledger-core.d.ts.map +1 -0
- package/dist/aria-connector/src/action-ledger-core.js +638 -0
- package/dist/aria-connector/src/action-ledger-core.js.map +1 -0
- package/dist/aria-connector/src/chat.d.ts.map +1 -1
- package/dist/aria-connector/src/chat.js +5 -6
- package/dist/aria-connector/src/chat.js.map +1 -1
- package/dist/aria-connector/src/codebase-scanner.d.ts +1 -1
- package/dist/aria-connector/src/codebase-scanner.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/claude-code.d.ts +1 -0
- package/dist/aria-connector/src/connectors/claude-code.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/claude-code.js +152 -14
- package/dist/aria-connector/src/connectors/claude-code.js.map +1 -1
- package/dist/aria-connector/src/connectors/codebase-awareness.d.ts +10 -0
- package/dist/aria-connector/src/connectors/codebase-awareness.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/codebase-awareness.js +276 -27
- package/dist/aria-connector/src/connectors/codebase-awareness.js.map +1 -1
- package/dist/aria-connector/src/connectors/codex.d.ts +3 -1
- package/dist/aria-connector/src/connectors/codex.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/codex.js +1271 -40
- package/dist/aria-connector/src/connectors/codex.js.map +1 -1
- package/dist/aria-connector/src/connectors/cursor.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/cursor.js +7 -0
- package/dist/aria-connector/src/connectors/cursor.js.map +1 -1
- package/dist/aria-connector/src/connectors/governed-adapter.d.ts +30 -0
- package/dist/aria-connector/src/connectors/governed-adapter.d.ts.map +1 -0
- package/dist/aria-connector/src/connectors/governed-adapter.js +132 -0
- package/dist/aria-connector/src/connectors/governed-adapter.js.map +1 -0
- package/dist/aria-connector/src/connectors/opencode.d.ts +3 -1
- package/dist/aria-connector/src/connectors/opencode.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/opencode.js +18 -2
- package/dist/aria-connector/src/connectors/opencode.js.map +1 -1
- package/dist/aria-connector/src/connectors/repo-guard.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/repo-guard.js +25 -14
- package/dist/aria-connector/src/connectors/repo-guard.js.map +1 -1
- package/dist/aria-connector/src/connectors/runtime.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/runtime.js +92 -2
- package/dist/aria-connector/src/connectors/runtime.js.map +1 -1
- package/dist/aria-connector/src/connectors/shell.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/shell.js +123 -7
- package/dist/aria-connector/src/connectors/shell.js.map +1 -1
- package/dist/aria-connector/src/cross-cli-hive-binding.d.ts +63 -0
- package/dist/aria-connector/src/cross-cli-hive-binding.d.ts.map +1 -0
- package/dist/aria-connector/src/cross-cli-hive-binding.js +205 -0
- package/dist/aria-connector/src/cross-cli-hive-binding.js.map +1 -0
- package/dist/aria-connector/src/garden-control-plane.d.ts +6 -1
- package/dist/aria-connector/src/garden-control-plane.d.ts.map +1 -1
- package/dist/aria-connector/src/garden-control-plane.js +8 -2
- package/dist/aria-connector/src/garden-control-plane.js.map +1 -1
- package/dist/aria-connector/src/governed-surface-runner.d.ts +189 -0
- package/dist/aria-connector/src/governed-surface-runner.d.ts.map +1 -0
- package/dist/aria-connector/src/governed-surface-runner.js +1022 -0
- package/dist/aria-connector/src/governed-surface-runner.js.map +1 -0
- package/dist/aria-connector/src/index.d.ts +10 -1
- package/dist/aria-connector/src/index.d.ts.map +1 -1
- package/dist/aria-connector/src/index.js +5 -0
- package/dist/aria-connector/src/index.js.map +1 -1
- package/dist/aria-connector/src/task-runner.d.ts +3 -0
- package/dist/aria-connector/src/task-runner.d.ts.map +1 -0
- package/dist/aria-connector/src/task-runner.js +3526 -0
- package/dist/aria-connector/src/task-runner.js.map +1 -0
- package/dist/aria-web/src/lib/codebase-scanner.d.ts +21 -2
- package/dist/aria-web/src/lib/codebase-scanner.d.ts.map +1 -1
- package/dist/aria-web/src/lib/codebase-scanner.js +59 -14
- package/dist/aria-web/src/lib/codebase-scanner.js.map +1 -1
- package/dist/assets/hooks/README.md +58 -0
- package/dist/assets/hooks/aria-agent-handoff.mjs +147 -2
- package/dist/assets/hooks/aria-agent-ledger-merge.mjs +31 -7
- package/dist/assets/hooks/aria-architect-fallback.mjs +10 -2
- package/dist/assets/hooks/aria-claim-evidence-stop-gate.mjs +240 -0
- package/dist/assets/hooks/aria-cognition-substrate-binding.mjs +84 -10
- package/dist/assets/hooks/aria-first-class-coach.mjs +305 -10
- package/dist/assets/hooks/aria-harness-via-sdk.mjs +93 -16
- package/dist/assets/hooks/aria-import-resolution-gate.mjs +106 -20
- package/dist/assets/hooks/aria-outcome-record.mjs +56 -20
- package/dist/assets/hooks/aria-pre-emit-autoload.mjs +1809 -0
- package/dist/assets/hooks/aria-pre-emit-autoload.mjs.before-orchestration-redesign +1400 -0
- package/dist/assets/hooks/aria-pre-emit-dryrun.mjs +22 -3
- package/dist/assets/hooks/aria-pre-text-gate.mjs +11 -2
- package/dist/assets/hooks/aria-pre-tool-gate.mjs +516 -92
- package/dist/assets/hooks/aria-pre-tool-use.mjs +70 -6
- package/dist/assets/hooks/aria-preprompt-consult.mjs +23 -4
- package/dist/assets/hooks/aria-repo-doctrine-gate.mjs +29 -3
- package/dist/assets/hooks/aria-stop-gate.mjs +585 -76
- package/dist/assets/hooks/aria-trigger-autolearn.mjs +17 -3
- package/dist/assets/hooks/aria-universal-turn-packet.mjs +1165 -0
- package/dist/assets/hooks/aria-userprompt-abandon-detect.mjs +9 -1
- package/dist/assets/hooks/canonical-settings-block.json +172 -0
- package/dist/assets/hooks/codex-native/aria-harness-ticker-sidecar.mjs +92 -0
- package/dist/assets/hooks/codex-native/aria-hive-wal-consumer.mjs +86 -0
- package/dist/assets/hooks/codex-native/aria-live-ticker.mjs +38 -0
- package/dist/assets/hooks/codex-native/aria-post-tool-use.mjs +236 -0
- package/dist/assets/hooks/codex-native/aria-pre-tool-use.mjs +362 -0
- package/dist/assets/hooks/codex-native/aria-stop.mjs +691 -0
- package/dist/assets/hooks/codex-native/aria-userprompt-submit.mjs +623 -0
- package/dist/assets/hooks/codex-native/atlas-session-context.mjs +121 -0
- package/dist/assets/hooks/codex-native/lib/evaluate-with-kernel.mjs +257 -0
- package/dist/assets/hooks/codex-native/lib/hive-wal-consumer.mjs +452 -0
- package/dist/assets/hooks/codex-native/lib/kernel/deterministic-cognitive-kernel.mjs +914 -0
- package/dist/assets/hooks/codex-native/lib/project-boundary-cognition.mjs +143 -0
- package/dist/assets/hooks/codex-native/lib/runtime-client.mjs +3567 -0
- package/dist/assets/hooks/codex-native/lib/task-project-ledger.mjs +294 -0
- package/dist/assets/hooks/doctrine_trigger_map.json +236 -25
- package/dist/assets/hooks/doctrine_trigger_map.schema.json +46 -0
- package/dist/assets/hooks/install.sh +84 -0
- package/dist/assets/hooks/lib/action-ledger-core.mjs +269 -0
- package/dist/assets/hooks/lib/aria-gate-ledger.mjs +143 -0
- package/dist/assets/hooks/lib/ast-stub-shape-detector.mjs +107 -0
- package/dist/assets/hooks/lib/atlas-dossier-client.mjs +151 -0
- package/dist/assets/hooks/lib/atlas-orchestrator-postwire.mjs +221 -0
- package/dist/assets/hooks/lib/canonical-lenses.mjs +83 -6
- package/dist/assets/hooks/lib/coach-intent-classifier.mjs +248 -0
- package/dist/assets/hooks/lib/cognitive-block-parser.mjs +111 -0
- package/dist/assets/hooks/lib/doctrine-trigger-map-loader.mjs +137 -0
- package/dist/assets/hooks/lib/domain-output-quality.mjs +132 -3
- package/dist/assets/hooks/lib/empty-catch-scanner.mjs +91 -0
- package/dist/assets/hooks/lib/end-phase-qa-autofire.mjs +426 -0
- package/dist/assets/hooks/lib/evaluate-with-kernel.mjs +133 -0
- package/dist/assets/hooks/lib/first-class-coach.mjs +454 -19
- package/dist/assets/hooks/lib/gate-audit.mjs +12 -2
- package/dist/assets/hooks/lib/gate-loop-state.mjs +11 -2
- package/dist/assets/hooks/lib/goal-contract-quality.mjs +302 -0
- package/dist/assets/hooks/lib/hook-message-window.mjs +101 -9
- package/dist/assets/hooks/lib/invocation-required-verifier.mjs +184 -0
- package/dist/assets/hooks/lib/kernel/deterministic-cognitive-kernel.mjs +906 -0
- package/dist/assets/hooks/lib/obligation-ledger.mjs +147 -0
- package/dist/assets/hooks/lib/orchestration-manifest-extract.mjs +217 -0
- package/dist/assets/hooks/lib/owner-authorizations.mjs +269 -0
- package/dist/assets/hooks/lib/probe-discipline-scanner.mjs +142 -0
- package/dist/assets/hooks/lib/project-boundary-cognition.mjs +143 -0
- package/dist/assets/hooks/lib/recovery-context.mjs +151 -0
- package/dist/assets/hooks/lib/recovery-template-loader.mjs +154 -0
- package/dist/assets/hooks/lib/self-doctrine-check.mjs +321 -0
- package/dist/assets/hooks/lib/sensitive-shape-detector.mjs +64 -0
- package/dist/assets/hooks/lib/skill-autoload-gate-impl.mjs +226 -1
- package/dist/assets/hooks/lib/stop-hook-protocol.mjs +166 -0
- package/dist/assets/hooks/lib/surface-caught.mjs +94 -0
- package/dist/assets/hooks/recovery-templates/force-reauthor.md +67 -0
- package/dist/assets/hooks/recovery-templates/handoff-recovery.md +25 -0
- package/dist/assets/hooks/scripts/check-hard-risk-prefix.mjs +99 -0
- package/dist/assets/hooks/skills/aria-conversational-doctrine-discipline/SKILL.md +101 -0
- package/dist/assets/hooks/test-aria-preturn-memory-gate.mjs +2 -2
- package/dist/assets/hooks/test-tier-lens-labeling.mjs +14 -3
- package/dist/assets/opencode-plugins/harness-context/index.js +39 -6
- package/dist/assets/opencode-plugins/harness-context/task-project-ledger.mjs +5 -1
- package/dist/assets/opencode-plugins/harness-gate/index.js +36 -0
- package/dist/assets/opencode-plugins/harness-gate/lib/atlas-dossier-client.js +1 -0
- package/dist/assets/opencode-plugins/harness-gate/lib/recovery-grants.js +79 -0
- package/dist/assets/opencode-plugins/harness-outcome/index.js +12 -0
- package/dist/assets/opencode-plugins/harness-stop/index.js +97 -2
- package/dist/assets/opencode-plugins/harness-stop/lib/atlas-dossier-client.js +1 -0
- package/dist/assets/opencode-plugins/harness-stop/lib/domain-output-quality.js +15 -2
- package/dist/assets/opencode-plugins/lib/coach.js +148 -0
- package/dist/runtime/coach-kernel.mjs +144 -7
- package/dist/runtime/codex-bridge.mjs +254 -8
- package/dist/runtime/discipline/doctrine_trigger_map.json +236 -25
- package/dist/runtime/discipline/skills/aria-cognition/34-frameworks-unified/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-aristotle-cognitives/SKILL.md +128 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-aristotle-intra-phase/SKILL.md +99 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-aristotle-post-phase/SKILL.md +118 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-aristotle-pre-phase/SKILL.md +117 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-axioms-first-principles/SKILL.md +202 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-axioms-first-principles/agents/openai.yaml +4 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-axioms-first-principles/references/source-map.md +130 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-backend-architect/SKILL.md +124 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-backend-architect/references/backend-cookbook.md +417 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-business-audit/SKILL.md +133 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-business-audit/references/audit-cookbook.md +247 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-business-frame/SKILL.md +138 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-business-frame/references/business-cookbook.md +154 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-chat/SKILL.md +84 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-chat/scripts/aria-chat.sh +57 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-cognition-autofire/SKILL.md +137 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-cognition-batch/SKILL.md +264 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-decision-mizan/SKILL.md +136 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-decision-mizan/references/decision-frameworks.md +287 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-first-class-operating-contract/SKILL.md +104 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-frontend-architect/SKILL.md +123 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-frontend-architect/references/frontend-cookbook.md +358 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-fullstack-orchestrator/SKILL.md +127 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-fullstack-orchestrator/references/fullstack-cookbook.md +383 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-gtm-architect/SKILL.md +126 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-gtm-architect/references/gtm-cookbook.md +235 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-harness-deploy/SKILL.md +145 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-harness-no-stripping/SKILL.md +135 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-harness-onboarding/SKILL.md +130 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-harness-output-discipline/SKILL.md +120 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-harness-substrate-binding/SKILL.md +139 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-http-harness-client/SKILL.md +85 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-http-harness-client/scripts/smoke.mjs +47 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-k8s-deploy/SKILL.md +174 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-k8s-deploy/agents/openai.yaml +3 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-ladduniframe/SKILL.md +60 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-ledger-fleet-execution/SKILL.md +126 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-live-ops/SKILL.md +54 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-mac-ssh-ops/SKILL.md +100 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-memory-index/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-noor-cognitives/SKILL.md +120 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-ops/SKILL.md +60 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-ops/references/live-endpoints.md +59 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-quality-audit/SKILL.md +133 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-readable-output/SKILL.md +239 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-readable-output/references/layout-cookbook.md +366 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-reasoning/SKILL.md +67 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-reasoning/references/core-principles.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-repo-audit/SKILL.md +135 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-repo-audit/references/repo-audit-cookbook.md +375 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-research-orchestrator/SKILL.md +138 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-research-orchestrator/references/research-patterns.md +270 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-retention-engine/SKILL.md +120 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-retention-engine/references/retention-cookbook.md +271 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-revenue-engine/SKILL.md +128 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-revenue-engine/references/revenue-cookbook.md +227 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-senior-code-audit/SKILL.md +233 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-senior-code-audit/references/audit-checklist.md +369 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-senior-code-cookbook/SKILL.md +288 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-senior-code-cookbook/references/engineering-cookbook.md +489 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-soul-principles/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/aria-task-codex-executor/SKILL.md +86 -0
- package/dist/runtime/discipline/skills/aria-cognition/aristotle-engine/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/cross-domain-24/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/deepsoul-emotional/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/fitrah-guard/SKILL.md +78 -0
- package/dist/runtime/discipline/skills/aria-cognition/ghazali-8lens/SKILL.md +227 -29
- package/dist/runtime/discipline/skills/aria-cognition/ghazali-8lens/references/ghazali-8lens-cookbook.md +797 -0
- package/dist/runtime/discipline/skills/aria-cognition/ijtihad-novel/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/ilham-intuition/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/never-guess/SKILL.md +77 -0
- package/dist/runtime/discipline/skills/aria-cognition/noor-recognition/SKILL.md +45 -0
- package/dist/runtime/discipline/skills/aria-cognition/qiyas-analogy/SKILL.md +174 -14
- package/dist/runtime/discipline/skills/aria-cognition/ruh-basis/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/tadabbur/SKILL.md +506 -0
- package/dist/runtime/discipline/skills/aria-cognition/tadabbur/references/tadabbur-cookbook.md +921 -0
- package/dist/runtime/discipline/skills/aria-cognition/tadabbur-ops/SKILL.md +42 -0
- package/dist/runtime/discipline/skills/aria-cognition/tafakkur/SKILL.md +104 -0
- package/dist/runtime/doctrine_trigger_map.json +236 -25
- package/dist/runtime/embedded-public-key.mjs +27 -0
- package/dist/runtime/gated-ledger.mjs +41 -14
- package/dist/runtime/harness-daemon.mjs +85 -10
- package/dist/runtime/hive-wal-publisher.mjs +292 -0
- package/dist/runtime/hooks/README.md +58 -0
- package/dist/runtime/hooks/aria-agent-handoff.mjs +147 -2
- package/dist/runtime/hooks/aria-agent-ledger-merge.mjs +31 -7
- package/dist/runtime/hooks/aria-architect-fallback.mjs +10 -2
- package/dist/runtime/hooks/aria-claim-evidence-stop-gate.mjs +240 -0
- package/dist/runtime/hooks/aria-cognition-substrate-binding.mjs +84 -10
- package/dist/runtime/hooks/aria-first-class-coach.mjs +305 -10
- package/dist/runtime/hooks/aria-harness-via-sdk.mjs +93 -16
- package/dist/runtime/hooks/aria-import-resolution-gate.mjs +106 -20
- package/dist/runtime/hooks/aria-outcome-record.mjs +56 -20
- package/dist/runtime/hooks/aria-pre-emit-autoload.mjs +1809 -0
- package/dist/runtime/hooks/aria-pre-emit-autoload.mjs.before-orchestration-redesign +1400 -0
- package/dist/runtime/hooks/aria-pre-emit-dryrun.mjs +22 -3
- package/dist/runtime/hooks/aria-pre-text-gate.mjs +11 -2
- package/dist/runtime/hooks/aria-pre-tool-gate.mjs +516 -92
- package/dist/runtime/hooks/aria-pre-tool-use.mjs +70 -6
- package/dist/runtime/hooks/aria-preprompt-consult.mjs +23 -4
- package/dist/runtime/hooks/aria-repo-doctrine-gate.mjs +29 -3
- package/dist/runtime/hooks/aria-stop-gate.mjs +585 -76
- package/dist/runtime/hooks/aria-trigger-autolearn.mjs +17 -3
- package/dist/runtime/hooks/aria-universal-turn-packet.mjs +1165 -0
- package/dist/runtime/hooks/aria-userprompt-abandon-detect.mjs +9 -1
- package/dist/runtime/hooks/canonical-settings-block.json +172 -0
- package/dist/runtime/hooks/codex-native/aria-harness-ticker-sidecar.mjs +92 -0
- package/dist/runtime/hooks/codex-native/aria-hive-wal-consumer.mjs +86 -0
- package/dist/runtime/hooks/codex-native/aria-live-ticker.mjs +38 -0
- package/dist/runtime/hooks/codex-native/aria-post-tool-use.mjs +236 -0
- package/dist/runtime/hooks/codex-native/aria-pre-tool-use.mjs +362 -0
- package/dist/runtime/hooks/codex-native/aria-stop.mjs +691 -0
- package/dist/runtime/hooks/codex-native/aria-userprompt-submit.mjs +623 -0
- package/dist/runtime/hooks/codex-native/atlas-session-context.mjs +121 -0
- package/dist/runtime/hooks/codex-native/lib/evaluate-with-kernel.mjs +257 -0
- package/dist/runtime/hooks/codex-native/lib/hive-wal-consumer.mjs +452 -0
- package/dist/runtime/hooks/codex-native/lib/kernel/deterministic-cognitive-kernel.mjs +914 -0
- package/dist/runtime/hooks/codex-native/lib/project-boundary-cognition.mjs +143 -0
- package/dist/runtime/hooks/codex-native/lib/runtime-client.mjs +3567 -0
- package/dist/runtime/hooks/codex-native/lib/task-project-ledger.mjs +294 -0
- package/dist/runtime/hooks/doctrine_trigger_map.json +236 -25
- package/dist/runtime/hooks/doctrine_trigger_map.schema.json +46 -0
- package/dist/runtime/hooks/install.sh +84 -0
- package/dist/runtime/hooks/lib/action-ledger-core.mjs +269 -0
- package/dist/runtime/hooks/lib/aria-gate-ledger.mjs +143 -0
- package/dist/runtime/hooks/lib/ast-stub-shape-detector.mjs +107 -0
- package/dist/runtime/hooks/lib/atlas-dossier-client.mjs +151 -0
- package/dist/runtime/hooks/lib/atlas-orchestrator-postwire.mjs +221 -0
- package/dist/runtime/hooks/lib/canonical-lenses.mjs +83 -6
- package/dist/runtime/hooks/lib/coach-intent-classifier.mjs +248 -0
- package/dist/runtime/hooks/lib/cognitive-block-parser.mjs +111 -0
- package/dist/runtime/hooks/lib/doctrine-trigger-map-loader.mjs +137 -0
- package/dist/runtime/hooks/lib/domain-output-quality.mjs +132 -3
- package/dist/runtime/hooks/lib/empty-catch-scanner.mjs +91 -0
- package/dist/runtime/hooks/lib/end-phase-qa-autofire.mjs +426 -0
- package/dist/runtime/hooks/lib/evaluate-with-kernel.mjs +133 -0
- package/dist/runtime/hooks/lib/first-class-coach.mjs +454 -19
- package/dist/runtime/hooks/lib/gate-audit.mjs +12 -2
- package/dist/runtime/hooks/lib/gate-loop-state.mjs +11 -2
- package/dist/runtime/hooks/lib/goal-contract-quality.mjs +302 -0
- package/dist/runtime/hooks/lib/hook-message-window.mjs +101 -9
- package/dist/runtime/hooks/lib/invocation-required-verifier.mjs +184 -0
- package/dist/runtime/hooks/lib/kernel/deterministic-cognitive-kernel.mjs +906 -0
- package/dist/runtime/hooks/lib/obligation-ledger.mjs +147 -0
- package/dist/runtime/hooks/lib/orchestration-manifest-extract.mjs +217 -0
- package/dist/runtime/hooks/lib/owner-authorizations.mjs +269 -0
- package/dist/runtime/hooks/lib/probe-discipline-scanner.mjs +142 -0
- package/dist/runtime/hooks/lib/project-boundary-cognition.mjs +143 -0
- package/dist/runtime/hooks/lib/recovery-context.mjs +151 -0
- package/dist/runtime/hooks/lib/recovery-template-loader.mjs +154 -0
- package/dist/runtime/hooks/lib/self-doctrine-check.mjs +321 -0
- package/dist/runtime/hooks/lib/sensitive-shape-detector.mjs +64 -0
- package/dist/runtime/hooks/lib/skill-autoload-gate-impl.mjs +226 -1
- package/dist/runtime/hooks/lib/stop-hook-protocol.mjs +166 -0
- package/dist/runtime/hooks/lib/surface-caught.mjs +94 -0
- package/dist/runtime/hooks/recovery-templates/force-reauthor.md +67 -0
- package/dist/runtime/hooks/recovery-templates/handoff-recovery.md +25 -0
- package/dist/runtime/hooks/scripts/check-hard-risk-prefix.mjs +99 -0
- package/dist/runtime/hooks/skills/aria-conversational-doctrine-discipline/SKILL.md +101 -0
- package/dist/runtime/hooks/test-aria-preturn-memory-gate.mjs +2 -2
- package/dist/runtime/hooks/test-tier-lens-labeling.mjs +14 -3
- package/dist/runtime/lib/evaluate-with-kernel.mjs +133 -0
- package/dist/runtime/lib/kernel/deterministic-cognitive-kernel.mjs +906 -0
- package/dist/runtime/local-phase.mjs +10 -5
- package/dist/runtime/manifest.json +8 -8
- package/dist/runtime/packet-verifier.mjs +166 -0
- package/dist/runtime/provider-proxy.mjs +13 -0
- package/dist/runtime/quality-enforcer.mjs +40 -23
- package/dist/runtime/runtime-rails/registry.mjs +252 -0
- package/dist/runtime/sdk/BUNDLED.json +2 -2
- package/dist/runtime/sdk/index.d.ts +119 -4
- package/dist/runtime/sdk/index.js +138 -12
- package/dist/runtime/sdk/index.js.map +1 -1
- package/dist/runtime/service.mjs +8036 -764
- package/dist/runtime/sub-agent-enforcer.mjs +201 -0
- package/dist/runtime/task-project-ledger.mjs +5 -1
- package/dist/sdk/BUNDLED.json +2 -2
- package/dist/sdk/index.d.ts +119 -4
- package/dist/sdk/index.js +138 -12
- package/dist/sdk/index.js.map +1 -1
- package/hooks/README.md +58 -0
- package/hooks/aria-agent-handoff.mjs +147 -2
- package/hooks/aria-agent-ledger-merge.mjs +31 -7
- package/hooks/aria-architect-fallback.mjs +10 -2
- package/hooks/aria-claim-evidence-stop-gate.mjs +240 -0
- package/hooks/aria-cognition-substrate-binding.mjs +84 -10
- package/hooks/aria-first-class-coach.mjs +305 -10
- package/hooks/aria-harness-via-sdk.mjs +93 -16
- package/hooks/aria-import-resolution-gate.mjs +106 -20
- package/hooks/aria-outcome-record.mjs +56 -20
- package/hooks/aria-pre-emit-autoload.mjs +1809 -0
- package/hooks/aria-pre-emit-autoload.mjs.before-orchestration-redesign +1400 -0
- package/hooks/aria-pre-emit-dryrun.mjs +22 -3
- package/hooks/aria-pre-text-gate.mjs +11 -2
- package/hooks/aria-pre-tool-gate.mjs +516 -92
- package/hooks/aria-pre-tool-use.mjs +70 -6
- package/hooks/aria-preprompt-consult.mjs +23 -4
- package/hooks/aria-repo-doctrine-gate.mjs +29 -3
- package/hooks/aria-stop-gate.mjs +585 -76
- package/hooks/aria-trigger-autolearn.mjs +17 -3
- package/hooks/aria-universal-turn-packet.mjs +1165 -0
- package/hooks/aria-userprompt-abandon-detect.mjs +9 -1
- package/hooks/canonical-settings-block.json +172 -0
- package/hooks/codex-native/aria-harness-ticker-sidecar.mjs +92 -0
- package/hooks/codex-native/aria-hive-wal-consumer.mjs +86 -0
- package/hooks/codex-native/aria-live-ticker.mjs +38 -0
- package/hooks/codex-native/aria-post-tool-use.mjs +236 -0
- package/hooks/codex-native/aria-pre-tool-use.mjs +362 -0
- package/hooks/codex-native/aria-stop.mjs +691 -0
- package/hooks/codex-native/aria-userprompt-submit.mjs +623 -0
- package/hooks/codex-native/atlas-session-context.mjs +121 -0
- package/hooks/codex-native/lib/evaluate-with-kernel.mjs +257 -0
- package/hooks/codex-native/lib/hive-wal-consumer.mjs +452 -0
- package/hooks/codex-native/lib/kernel/deterministic-cognitive-kernel.mjs +914 -0
- package/hooks/codex-native/lib/project-boundary-cognition.mjs +143 -0
- package/hooks/codex-native/lib/runtime-client.mjs +3567 -0
- package/hooks/codex-native/lib/task-project-ledger.mjs +294 -0
- package/hooks/doctrine_trigger_map.json +236 -25
- package/hooks/doctrine_trigger_map.schema.json +46 -0
- package/hooks/install.sh +84 -0
- package/hooks/lib/action-ledger-core.mjs +269 -0
- package/hooks/lib/aria-gate-ledger.mjs +143 -0
- package/hooks/lib/ast-stub-shape-detector.mjs +107 -0
- package/hooks/lib/atlas-dossier-client.mjs +151 -0
- package/hooks/lib/atlas-orchestrator-postwire.mjs +221 -0
- package/hooks/lib/canonical-lenses.mjs +83 -6
- package/hooks/lib/coach-intent-classifier.mjs +248 -0
- package/hooks/lib/cognitive-block-parser.mjs +111 -0
- package/hooks/lib/doctrine-trigger-map-loader.mjs +137 -0
- package/hooks/lib/domain-output-quality.mjs +132 -3
- package/hooks/lib/empty-catch-scanner.mjs +91 -0
- package/hooks/lib/end-phase-qa-autofire.mjs +426 -0
- package/hooks/lib/evaluate-with-kernel.mjs +133 -0
- package/hooks/lib/first-class-coach.mjs +454 -19
- package/hooks/lib/gate-audit.mjs +12 -2
- package/hooks/lib/gate-loop-state.mjs +11 -2
- package/hooks/lib/goal-contract-quality.mjs +302 -0
- package/hooks/lib/hook-message-window.mjs +101 -9
- package/hooks/lib/invocation-required-verifier.mjs +184 -0
- package/hooks/lib/kernel/deterministic-cognitive-kernel.mjs +906 -0
- package/hooks/lib/obligation-ledger.mjs +147 -0
- package/hooks/lib/orchestration-manifest-extract.mjs +217 -0
- package/hooks/lib/owner-authorizations.mjs +269 -0
- package/hooks/lib/probe-discipline-scanner.mjs +142 -0
- package/hooks/lib/project-boundary-cognition.mjs +143 -0
- package/hooks/lib/recovery-context.mjs +151 -0
- package/hooks/lib/recovery-template-loader.mjs +154 -0
- package/hooks/lib/self-doctrine-check.mjs +321 -0
- package/hooks/lib/sensitive-shape-detector.mjs +64 -0
- package/hooks/lib/skill-autoload-gate-impl.mjs +226 -1
- package/hooks/lib/stop-hook-protocol.mjs +166 -0
- package/hooks/lib/surface-caught.mjs +94 -0
- package/hooks/recovery-templates/force-reauthor.md +67 -0
- package/hooks/recovery-templates/handoff-recovery.md +25 -0
- package/hooks/scripts/check-hard-risk-prefix.mjs +99 -0
- package/hooks/skills/aria-conversational-doctrine-discipline/SKILL.md +101 -0
- package/hooks/test-aria-preturn-memory-gate.mjs +2 -2
- package/hooks/test-tier-lens-labeling.mjs +14 -3
- package/opencode-plugins/harness-context/index.js +39 -6
- package/opencode-plugins/harness-context/task-project-ledger.mjs +5 -1
- package/opencode-plugins/harness-gate/index.js +36 -0
- package/opencode-plugins/harness-gate/lib/atlas-dossier-client.js +1 -0
- package/opencode-plugins/harness-gate/lib/recovery-grants.js +79 -0
- package/opencode-plugins/harness-outcome/index.js +12 -0
- package/opencode-plugins/harness-stop/index.js +97 -2
- package/opencode-plugins/harness-stop/lib/atlas-dossier-client.js +1 -0
- package/opencode-plugins/harness-stop/lib/domain-output-quality.js +15 -2
- package/opencode-plugins/lib/coach.js +148 -0
- package/package.json +71 -5
- package/runtime-src/coach-kernel.mjs +144 -7
- package/runtime-src/codex-bridge.mjs +254 -8
- package/runtime-src/embedded-public-key.mjs +27 -0
- package/runtime-src/gated-ledger.mjs +41 -14
- package/runtime-src/harness-daemon.mjs +85 -10
- package/runtime-src/hive-wal-publisher.mjs +292 -0
- package/runtime-src/lib/evaluate-with-kernel.mjs +133 -0
- package/runtime-src/lib/kernel/deterministic-cognitive-kernel.mjs +906 -0
- package/runtime-src/local-phase.mjs +10 -5
- package/runtime-src/packet-verifier.mjs +166 -0
- package/runtime-src/provider-proxy.mjs +13 -0
- package/runtime-src/quality-enforcer.mjs +40 -23
- package/runtime-src/runtime-rails/registry.mjs +252 -0
- package/runtime-src/service.mjs +8036 -764
- package/runtime-src/sub-agent-enforcer.mjs +201 -0
- package/scripts/aria-ledger-append.mjs +337 -0
- package/scripts/aria-task-cheap-worker-dispatch.mjs +234 -0
- package/scripts/audit-of-audit-prior-tasks.mjs +194 -0
- package/scripts/audit-of-audit-this-turn.mjs +116 -0
- package/scripts/bundle-sdk.mjs +31 -5
- package/scripts/check-cli-wrapper-provider-contract.mjs +160 -0
- package/scripts/check-client-compatibility.mjs +15 -5
- package/scripts/check-client-smoke.mjs +297 -0
- package/scripts/check-codex-orchestrator-adoption.mjs +150 -0
- package/scripts/check-glm-env-wired.mjs +131 -0
- package/scripts/check-hive-local-storage-contract.mjs +91 -0
- package/scripts/check-hook-mirror.mjs +150 -0
- package/scripts/check-install-sh-drift.mjs +152 -0
- package/scripts/check-kernel-sync.mjs +101 -0
- package/scripts/check-package-artifact.mjs +152 -0
- package/scripts/check-registry-mirror.mjs +71 -0
- package/scripts/drain-owner-airtable-sync-queue.mjs +287 -0
- package/scripts/export-owner-status-sheets.mjs +589 -0
- package/scripts/live-sidecar-receipt-canary.mjs +347 -0
- package/scripts/qiyas-tadabbur-model-matrix.mjs +970 -0
- package/scripts/quality-ab-live-provider.mjs +913 -0
- package/scripts/self-test-action-ledger-core.mjs +190 -0
- package/scripts/self-test-approval-receipt-binding.mjs +122 -0
- package/scripts/self-test-autofire-quality-output.mjs +110 -0
- package/scripts/self-test-claude-code-action-ledger.mjs +132 -0
- package/scripts/self-test-claude-code-mechanical-autofire-hive.mjs +138 -0
- package/scripts/self-test-claude-code-mechanical-autofire.mjs +234 -0
- package/scripts/self-test-codebase-awareness-atlas-delta.mjs +159 -0
- package/scripts/self-test-codebase-awareness-delta-ingest.mjs +179 -0
- package/scripts/self-test-codex-live-hook-parity.mjs +84 -0
- package/scripts/self-test-codex-native-action-ledger.mjs +167 -0
- package/scripts/self-test-codex-native-hook-json-contract.mjs +74 -0
- package/scripts/self-test-codex-orchestrator-continuity.mjs +113 -0
- package/scripts/self-test-codex-readable-recovery.mjs +94 -0
- package/scripts/self-test-codex-self-harness.mjs +538 -0
- package/scripts/self-test-compiled-workunit.mjs +214 -0
- package/scripts/self-test-continuation-output-smoke.mjs +101 -0
- package/scripts/self-test-cross-cli-fleet-ticker.mjs +85 -0
- package/scripts/self-test-cross-cli-hive-adoption.mjs +125 -0
- package/scripts/self-test-cross-cli-hive-learning.mjs +146 -0
- package/scripts/self-test-cross-phase-tool-failure.mjs +110 -0
- package/scripts/self-test-cross-surface-action-ledger.mjs +149 -0
- package/scripts/self-test-end-of-phase-qa-court.mjs +616 -0
- package/scripts/self-test-evaluate-with-kernel.mjs +111 -0
- package/scripts/self-test-first-class-output-delta-proof.mjs +307 -0
- package/scripts/self-test-goal-contract-output-qa.mjs +73 -0
- package/scripts/self-test-goal-contract.mjs +35 -0
- package/scripts/self-test-governed-adapters.mjs +105 -0
- package/scripts/self-test-governed-surface-runner.mjs +198 -0
- package/scripts/self-test-harness-gates.mjs +15 -12
- package/scripts/self-test-harness-ticker-sidecar.mjs +153 -0
- package/scripts/self-test-hive-org-kernel.mjs +233 -0
- package/scripts/self-test-hive-session-coordination.mjs +156 -0
- package/scripts/self-test-hive-wal-consumer.mjs +111 -0
- package/scripts/self-test-kernel-a3-a4-selection.mjs +179 -0
- package/scripts/self-test-ledger-append.mjs +175 -0
- package/scripts/self-test-live-codex-posttool-packet-smoke.mjs +111 -0
- package/scripts/self-test-live-codex-pretool-packet-smoke.mjs +101 -0
- package/scripts/self-test-live-codex-stop-qa-kernel-smoke.mjs +43 -0
- package/scripts/self-test-live-wrapper-substrate-inventory.mjs +149 -0
- package/scripts/self-test-local-main-sync-script.mjs +47 -0
- package/scripts/self-test-mechanical-autofire-resolver.mjs +296 -0
- package/scripts/self-test-no-consult-cognitive-skills-output.mjs +135 -0
- package/scripts/self-test-owner-airtable-sync-queue.mjs +196 -0
- package/scripts/self-test-owner-airtable-sync.mjs +181 -0
- package/scripts/self-test-owner-sheets-action-ledger.mjs +100 -0
- package/scripts/self-test-production-preflight.mjs +78 -0
- package/scripts/self-test-project-boundary-cognition.mjs +79 -0
- package/scripts/self-test-qa-exec-kernel.mjs +34 -0
- package/scripts/self-test-qa-recovery-learning-loop.mjs +113 -0
- package/scripts/self-test-qiyas-label-alignment.mjs +94 -0
- package/scripts/self-test-recovery-context.mjs +110 -0
- package/scripts/self-test-repo-guard.mjs +10 -0
- package/scripts/self-test-runtime-health-self-heal.mjs +161 -0
- package/scripts/self-test-runtime-postcondition.mjs +70 -0
- package/scripts/self-test-soul-precommit-hook.mjs +39 -0
- package/scripts/self-test-stop-gate-kernel-guards.mjs +185 -0
- package/scripts/self-test-stop-gate.mjs +128 -0
- package/scripts/self-test-substrate-kernel-execution-receipt.mjs +130 -0
- package/scripts/self-test-substrate-open-skill-floor.mjs +87 -0
- package/scripts/self-test-substrate-output-quality-eval.mjs +171 -0
- package/scripts/self-test-task-closeout-drift.mjs +97 -0
- package/scripts/self-test-task-project-ledger-readiness.mjs +43 -0
- package/scripts/self-test-task-runner-phase-consumer.mjs +134 -0
- package/scripts/self-test-task-worker-lane.mjs +256 -0
- package/scripts/self-test-turn-substrate-qa-kernel.mjs +188 -0
- package/scripts/self-test-universal-action-capture.mjs +153 -0
- package/scripts/self-test-universal-turn-packet-entrypoints.mjs +252 -0
- package/scripts/self-test-universal-turn-packet.mjs +320 -0
- package/scripts/session-quality-backfill.mjs +253 -0
- package/scripts/smoke-autofire-100-prompts.mjs +481 -0
- package/scripts/sync-local-main-on-task-complete.mjs +278 -0
- package/scripts/sync-owner-status-airtable.mjs +1158 -0
- package/scripts/validate-skill-prompts.mjs +12 -1
- package/scripts/verify-codex-native-mirror.mjs +262 -0
- package/skills/34-frameworks-unified/SKILL.md +42 -0
- package/skills/api-design/SKILL.md +123 -0
- package/skills/architecture-decision/SKILL.md +105 -0
- package/skills/aria-aristotle-cognitives/SKILL.md +128 -0
- package/skills/aria-aristotle-intra-phase/SKILL.md +99 -0
- package/skills/aria-aristotle-post-phase/SKILL.md +116 -0
- package/skills/aria-aristotle-pre-phase/SKILL.md +117 -0
- package/skills/aria-axioms-first-principles/SKILL.md +202 -0
- package/skills/aria-axioms-first-principles/agents/openai.yaml +4 -0
- package/skills/aria-axioms-first-principles/references/source-map.md +130 -0
- package/skills/aria-chat/SKILL.md +84 -0
- package/skills/aria-chat/scripts/aria-chat.sh +57 -0
- package/skills/aria-cognition/34-frameworks-unified/SKILL.md +42 -0
- package/skills/aria-cognition/aria-aristotle-cognitives/SKILL.md +128 -0
- package/skills/aria-cognition/aria-aristotle-intra-phase/SKILL.md +99 -0
- package/skills/aria-cognition/aria-aristotle-post-phase/SKILL.md +118 -0
- package/skills/aria-cognition/aria-aristotle-pre-phase/SKILL.md +117 -0
- package/skills/aria-cognition/aria-axioms-first-principles/SKILL.md +202 -0
- package/skills/aria-cognition/aria-axioms-first-principles/agents/openai.yaml +4 -0
- package/skills/aria-cognition/aria-axioms-first-principles/references/source-map.md +130 -0
- package/skills/aria-cognition/aria-backend-architect/SKILL.md +124 -0
- package/skills/aria-cognition/aria-backend-architect/references/backend-cookbook.md +417 -0
- package/skills/aria-cognition/aria-business-audit/SKILL.md +133 -0
- package/skills/aria-cognition/aria-business-audit/references/audit-cookbook.md +247 -0
- package/skills/aria-cognition/aria-business-frame/SKILL.md +138 -0
- package/skills/aria-cognition/aria-business-frame/references/business-cookbook.md +154 -0
- package/skills/aria-cognition/aria-chat/SKILL.md +84 -0
- package/skills/aria-cognition/aria-chat/scripts/aria-chat.sh +57 -0
- package/skills/aria-cognition/aria-cognition-autofire/SKILL.md +137 -0
- package/skills/aria-cognition/aria-cognition-batch/SKILL.md +264 -0
- package/skills/aria-cognition/aria-decision-mizan/SKILL.md +136 -0
- package/skills/aria-cognition/aria-decision-mizan/references/decision-frameworks.md +287 -0
- package/skills/aria-cognition/aria-first-class-operating-contract/SKILL.md +104 -0
- package/skills/aria-cognition/aria-frontend-architect/SKILL.md +123 -0
- package/skills/aria-cognition/aria-frontend-architect/references/frontend-cookbook.md +358 -0
- package/skills/aria-cognition/aria-fullstack-orchestrator/SKILL.md +127 -0
- package/skills/aria-cognition/aria-fullstack-orchestrator/references/fullstack-cookbook.md +383 -0
- package/skills/aria-cognition/aria-gtm-architect/SKILL.md +126 -0
- package/skills/aria-cognition/aria-gtm-architect/references/gtm-cookbook.md +235 -0
- package/skills/aria-cognition/aria-harness-deploy/SKILL.md +145 -0
- package/skills/aria-cognition/aria-harness-no-stripping/SKILL.md +135 -0
- package/skills/aria-cognition/aria-harness-onboarding/SKILL.md +130 -0
- package/skills/aria-cognition/aria-harness-output-discipline/SKILL.md +120 -0
- package/skills/aria-cognition/aria-harness-substrate-binding/SKILL.md +139 -0
- package/skills/aria-cognition/aria-http-harness-client/SKILL.md +85 -0
- package/skills/aria-cognition/aria-http-harness-client/scripts/smoke.mjs +47 -0
- package/skills/aria-cognition/aria-k8s-deploy/SKILL.md +174 -0
- package/skills/aria-cognition/aria-k8s-deploy/agents/openai.yaml +3 -0
- package/skills/aria-cognition/aria-ladduniframe/SKILL.md +60 -0
- package/skills/aria-cognition/aria-ledger-fleet-execution/SKILL.md +126 -0
- package/skills/aria-cognition/aria-live-ops/SKILL.md +54 -0
- package/skills/aria-cognition/aria-mac-ssh-ops/SKILL.md +100 -0
- package/skills/aria-cognition/aria-memory-index/SKILL.md +42 -0
- package/skills/aria-cognition/aria-noor-cognitives/SKILL.md +120 -0
- package/skills/aria-cognition/aria-ops/SKILL.md +60 -0
- package/skills/aria-cognition/aria-ops/references/live-endpoints.md +59 -0
- package/skills/aria-cognition/aria-quality-audit/SKILL.md +133 -0
- package/skills/aria-cognition/aria-readable-output/SKILL.md +239 -0
- package/skills/aria-cognition/aria-readable-output/references/layout-cookbook.md +366 -0
- package/skills/aria-cognition/aria-reasoning/SKILL.md +67 -0
- package/skills/aria-cognition/aria-reasoning/references/core-principles.md +42 -0
- package/skills/aria-cognition/aria-repo-audit/SKILL.md +135 -0
- package/skills/aria-cognition/aria-repo-audit/references/repo-audit-cookbook.md +375 -0
- package/skills/aria-cognition/aria-research-orchestrator/SKILL.md +138 -0
- package/skills/aria-cognition/aria-research-orchestrator/references/research-patterns.md +270 -0
- package/skills/aria-cognition/aria-retention-engine/SKILL.md +120 -0
- package/skills/aria-cognition/aria-retention-engine/references/retention-cookbook.md +271 -0
- package/skills/aria-cognition/aria-revenue-engine/SKILL.md +128 -0
- package/skills/aria-cognition/aria-revenue-engine/references/revenue-cookbook.md +227 -0
- package/skills/aria-cognition/aria-senior-code-audit/SKILL.md +233 -0
- package/skills/aria-cognition/aria-senior-code-audit/references/audit-checklist.md +369 -0
- package/skills/aria-cognition/aria-senior-code-cookbook/SKILL.md +288 -0
- package/skills/aria-cognition/aria-senior-code-cookbook/references/engineering-cookbook.md +489 -0
- package/skills/aria-cognition/aria-soul-principles/SKILL.md +42 -0
- package/skills/aria-cognition/aria-task-codex-executor/SKILL.md +86 -0
- package/skills/aria-cognition/aristotle-engine/SKILL.md +42 -0
- package/skills/aria-cognition/cross-domain-24/SKILL.md +42 -0
- package/skills/aria-cognition/deepsoul-emotional/SKILL.md +42 -0
- package/skills/aria-cognition/fitrah-guard/SKILL.md +78 -0
- package/skills/aria-cognition/ghazali-8lens/SKILL.md +227 -29
- package/skills/aria-cognition/ghazali-8lens/references/ghazali-8lens-cookbook.md +797 -0
- package/skills/aria-cognition/ijtihad-novel/SKILL.md +42 -0
- package/skills/aria-cognition/ilham-intuition/SKILL.md +42 -0
- package/skills/aria-cognition/never-guess/SKILL.md +77 -0
- package/skills/aria-cognition/noor-recognition/SKILL.md +45 -0
- package/skills/aria-cognition/qiyas-analogy/SKILL.md +174 -14
- package/skills/aria-cognition/ruh-basis/SKILL.md +42 -0
- package/skills/aria-cognition/tadabbur/SKILL.md +506 -0
- package/skills/aria-cognition/tadabbur/references/tadabbur-cookbook.md +921 -0
- package/skills/aria-cognition/tadabbur-ops/SKILL.md +42 -0
- package/skills/aria-cognition/tafakkur/SKILL.md +104 -0
- package/skills/aria-cognition-autofire/SKILL.md +109 -0
- package/skills/aria-cognition-batch/SKILL.md +264 -0
- package/skills/aria-conversational-doctrine-discipline/SKILL.md +125 -0
- package/skills/aria-essence/SKILL.md +81 -0
- package/skills/aria-essence/references/domain-matrix.md +80 -0
- package/skills/aria-essence/references/evolution-loop.md +30 -0
- package/skills/aria-essence/references/readable-cognition.md +27 -0
- package/skills/aria-first-class-operating-contract/SKILL.md +104 -0
- package/skills/aria-forge-guardrails/SKILL.md +53 -0
- package/skills/aria-forge-guardrails/references/checklist.md +31 -0
- package/skills/aria-harness-deploy/SKILL.md +145 -0
- package/skills/aria-harness-no-stripping/SKILL.md +135 -0
- package/skills/aria-harness-onboarding/SKILL.md +130 -0
- package/skills/aria-harness-output-discipline/SKILL.md +120 -0
- package/skills/aria-harness-substrate-binding/SKILL.md +139 -0
- package/skills/aria-http-harness-client/SKILL.md +85 -0
- package/skills/aria-http-harness-client/scripts/smoke.mjs +47 -0
- package/skills/aria-k8s-deploy/SKILL.md +174 -0
- package/skills/aria-k8s-deploy/agents/openai.yaml +3 -0
- package/skills/aria-ladduniframe/SKILL.md +60 -0
- package/skills/aria-ledger-fleet-execution/SKILL.md +126 -0
- package/skills/aria-live-ops/SKILL.md +54 -0
- package/skills/aria-mac-ssh-ops/SKILL.md +100 -0
- package/skills/aria-memory-index/SKILL.md +42 -0
- package/skills/aria-noor-cognitives/SKILL.md +120 -0
- package/skills/aria-ops/SKILL.md +60 -0
- package/skills/aria-ops/references/live-endpoints.md +59 -0
- package/skills/aria-quality-audit/SKILL.md +133 -0
- package/skills/aria-reasoning/SKILL.md +67 -0
- package/skills/aria-reasoning/references/core-principles.md +42 -0
- package/skills/aria-repo-doctrine/SKILL.md +57 -0
- package/skills/aria-soul-principles/SKILL.md +42 -0
- package/skills/aria-task-codex-executor/SKILL.md +86 -0
- package/skills/aristotle-engine/SKILL.md +42 -0
- package/skills/ci-cd-pipeline/SKILL.md +116 -0
- package/skills/code-review/SKILL.md +131 -0
- package/skills/cross-domain-24/SKILL.md +42 -0
- package/skills/database-design/SKILL.md +124 -0
- package/skills/deepsoul-emotional/SKILL.md +42 -0
- package/skills/deno-kv-raft-pubsub/SKILL.md +561 -0
- package/skills/deno-kv-raft-pubsub/reference/maelstrom-integration.md +393 -0
- package/skills/deno-kv-raft-pubsub/reference/pubsub-api.md +376 -0
- package/skills/deno-kv-raft-pubsub/reference/raft-spec.md +402 -0
- package/skills/deno-kv-raft-pubsub/reference/state-machine.md +182 -0
- package/skills/error-handling/SKILL.md +159 -0
- package/skills/firecrawl/SKILL.md +165 -0
- package/skills/firecrawl/rules/install.md +82 -0
- package/skills/firecrawl/rules/security.md +26 -0
- package/skills/firecrawl-agent/SKILL.md +86 -0
- package/skills/firecrawl-build-interact/SKILL.md +96 -0
- package/skills/firecrawl-build-onboarding/SKILL.md +131 -0
- package/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
- package/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
- package/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
- package/skills/firecrawl-build-scrape/SKILL.md +97 -0
- package/skills/firecrawl-build-search/SKILL.md +97 -0
- package/skills/firecrawl-clone/SKILL.md +419 -0
- package/skills/firecrawl-crawl/SKILL.md +87 -0
- package/skills/firecrawl-download/SKILL.md +98 -0
- package/skills/firecrawl-interact/SKILL.md +112 -0
- package/skills/firecrawl-map/SKILL.md +79 -0
- package/skills/firecrawl-scrape/SKILL.md +97 -0
- package/skills/firecrawl-search/SKILL.md +88 -0
- package/skills/fitrah-guard/SKILL.md +78 -0
- package/skills/forge-quality-rules/SKILL.md +61 -0
- package/skills/ghazali-8lens/SKILL.md +56 -0
- package/skills/ijtihad-novel/SKILL.md +42 -0
- package/skills/ilham-intuition/SKILL.md +42 -0
- package/skills/imagegen/LICENSE.txt +201 -0
- package/skills/imagegen/SKILL.md +374 -0
- package/skills/imagegen/agents/openai.yaml +6 -0
- package/skills/imagegen/assets/imagegen-small.svg +5 -0
- package/skills/imagegen/assets/imagegen.png +0 -0
- package/skills/imagegen/references/cli.md +242 -0
- package/skills/imagegen/references/codex-network.md +33 -0
- package/skills/imagegen/references/image-api.md +90 -0
- package/skills/imagegen/references/prompting.md +118 -0
- package/skills/imagegen/references/sample-prompts.md +433 -0
- package/skills/imagegen/scripts/image_gen.py +995 -0
- package/skills/imagegen/scripts/remove_chroma_key.py +440 -0
- package/skills/istiqra-induction/SKILL.md +44 -0
- package/skills/ladunni-22/SKILL.md +53 -0
- package/skills/mizan/SKILL.md +90 -0
- package/skills/nadia/SKILL.md +56 -0
- package/skills/nadia-psi/SKILL.md +56 -0
- package/skills/never-guess/SKILL.md +75 -0
- package/skills/noor-recognition/SKILL.md +45 -0
- package/skills/observability/SKILL.md +133 -0
- package/skills/openai-docs/LICENSE.txt +201 -0
- package/skills/openai-docs/SKILL.md +100 -0
- package/skills/openai-docs/agents/openai.yaml +14 -0
- package/skills/openai-docs/assets/openai-small.svg +3 -0
- package/skills/openai-docs/assets/openai.png +0 -0
- package/skills/openai-docs/references/latest-model.md +37 -0
- package/skills/openai-docs/references/prompting-guide.md +244 -0
- package/skills/openai-docs/references/upgrade-guide.md +181 -0
- package/skills/openai-docs/scripts/resolve-latest-model-info.js +147 -0
- package/skills/pdf/LICENSE.txt +201 -0
- package/skills/pdf/SKILL.md +85 -0
- package/skills/pdf/agents/openai.yaml +5 -0
- package/skills/pdf/assets/pdf.png +0 -0
- package/skills/playwright/LICENSE.txt +201 -0
- package/skills/playwright/NOTICE.txt +14 -0
- package/skills/playwright/SKILL.md +165 -0
- package/skills/playwright/agents/openai.yaml +6 -0
- package/skills/playwright/assets/playwright-small.svg +3 -0
- package/skills/playwright/assets/playwright.png +0 -0
- package/skills/playwright/references/cli.md +116 -0
- package/skills/playwright/references/workflows.md +95 -0
- package/skills/playwright/scripts/playwright_cli.sh +25 -0
- package/skills/plugin-creator/SKILL.md +178 -0
- package/skills/plugin-creator/agents/openai.yaml +6 -0
- package/skills/plugin-creator/assets/plugin-creator-small.svg +3 -0
- package/skills/plugin-creator/assets/plugin-creator.png +0 -0
- package/skills/plugin-creator/references/plugin-json-spec.md +170 -0
- package/skills/plugin-creator/scripts/create_basic_plugin.py +301 -0
- package/skills/predictor/SKILL.md +43 -0
- package/skills/qiyas-analogy/SKILL.md +204 -0
- package/skills/refactoring/SKILL.md +137 -0
- package/skills/ruh-basis/SKILL.md +42 -0
- package/skills/security-review/SKILL.md +129 -0
- package/skills/skill-creator/SKILL.md +434 -0
- package/skills/skill-creator/agents/openai.yaml +5 -0
- package/skills/skill-creator/assets/skill-creator-small.svg +3 -0
- package/skills/skill-creator/assets/skill-creator.png +0 -0
- package/skills/skill-creator/license.txt +202 -0
- package/skills/skill-creator/references/openai_yaml.md +49 -0
- package/skills/skill-creator/scripts/generate_openai_yaml.py +226 -0
- package/skills/skill-creator/scripts/init_skill.py +400 -0
- package/skills/skill-creator/scripts/quick_validate.py +101 -0
- package/skills/skill-installer/LICENSE.txt +202 -0
- package/skills/skill-installer/SKILL.md +76 -0
- package/skills/skill-installer/agents/openai.yaml +5 -0
- package/skills/skill-installer/assets/skill-installer-small.svg +3 -0
- package/skills/skill-installer/assets/skill-installer.png +0 -0
- package/skills/skill-installer/scripts/github_utils.py +21 -0
- package/skills/skill-installer/scripts/install-skill-from-github.py +308 -0
- package/skills/skill-installer/scripts/list-skills.py +107 -0
- package/skills/skills-and-hooks-reference/SKILL.md +196 -0
- package/skills/soul-domains/SKILL.md +43 -0
- package/skills/tadabbur/SKILL.md +232 -0
- package/skills/tadabbur-ops/SKILL.md +42 -0
- package/skills/tafakkur/SKILL.md +104 -0
- package/skills/testing-strategy/SKILL.md +122 -0
- package/src/action-ledger-core.ts +1054 -0
- package/src/chat.ts +5 -6
- package/src/codebase-scanner.ts +2 -0
- package/src/connectors/claude-code.ts +149 -12
- package/src/connectors/codebase-awareness.ts +325 -25
- package/src/connectors/codex.ts +1273 -40
- package/src/connectors/cursor.ts +8 -0
- package/src/connectors/governed-adapter.ts +174 -0
- package/src/connectors/opencode.ts +18 -2
- package/src/connectors/repo-guard.ts +24 -12
- package/src/connectors/runtime.ts +99 -2
- package/src/connectors/shell.ts +125 -7
- package/src/cross-cli-hive-binding.ts +290 -0
- package/src/garden-control-plane.ts +24 -1
- package/src/governed-surface-runner.ts +1227 -0
- package/src/index.ts +104 -1
- package/src/task-runner.ts +3794 -0
- package/dist/aria-connector/src/install-hooks.d.ts +0 -18
- package/dist/aria-connector/src/install-hooks.d.ts.map +0 -1
- package/dist/aria-connector/src/install-hooks.js +0 -224
- package/dist/aria-connector/src/install-hooks.js.map +0 -1
- package/dist/aria-connector/src/onboarding-wizard.d.ts +0 -5
- package/dist/aria-connector/src/onboarding-wizard.d.ts.map +0 -1
- package/dist/aria-connector/src/onboarding-wizard.js +0 -188
- package/dist/aria-connector/src/onboarding-wizard.js.map +0 -1
- package/dist/cli-0.2.38.tgz +0 -0
- package/dist/install.sh +0 -13
- package/src/__tests__/anthropic-oauth.test.ts +0 -186
- package/src/__tests__/auth-commands.test.ts +0 -132
- package/src/__tests__/owner-login.test.ts +0 -311
|
@@ -0,0 +1,913 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { createHash } from 'node:crypto';
|
|
3
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
4
|
+
import os from 'node:os';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import {
|
|
7
|
+
analyzeDomainOutputQuality,
|
|
8
|
+
} from '../hooks/lib/domain-output-quality.mjs';
|
|
9
|
+
|
|
10
|
+
const repoRoot = process.cwd();
|
|
11
|
+
const runtimeUrl = (process.env.ARIA_RUNTIME_URL || 'http://127.0.0.1:4319').replace(/\/+$/, '');
|
|
12
|
+
const runId = new Date().toISOString().replace(/[:.]/g, '-');
|
|
13
|
+
const outputRoot = path.join(repoRoot, 'artifacts', 'quality-ab', runId);
|
|
14
|
+
const outputsDir = path.join(outputRoot, 'outputs');
|
|
15
|
+
const promptInstructionRx = /\b(?:fire|skill|skills|autofire|qiyas|tadabbur|runtime|runtimes|load|autoload|packet|hook)\b/i;
|
|
16
|
+
const benchmarkRequiredSkillIds = Object.freeze([
|
|
17
|
+
'aria-cognition-autofire',
|
|
18
|
+
'aria-first-class-operating-contract',
|
|
19
|
+
'aria-cognition-batch',
|
|
20
|
+
'aria-quality-audit',
|
|
21
|
+
'aria-repo-doctrine',
|
|
22
|
+
'aria-http-harness-client',
|
|
23
|
+
'aria-harness-substrate-binding',
|
|
24
|
+
'aria-axioms-first-principles',
|
|
25
|
+
'never-guess',
|
|
26
|
+
'tadabbur',
|
|
27
|
+
'tadabbur-ops',
|
|
28
|
+
'tafakkur',
|
|
29
|
+
'qiyas-analogy',
|
|
30
|
+
'forge-quality-rules',
|
|
31
|
+
'aria-task-codex-executor',
|
|
32
|
+
'aria-backend-architect',
|
|
33
|
+
'aria-harness-no-stripping',
|
|
34
|
+
'aria-readable-output',
|
|
35
|
+
'aria-ledger-fleet-execution',
|
|
36
|
+
'predictor',
|
|
37
|
+
'aria-aristotle-pre-phase',
|
|
38
|
+
'aria-aristotle-intra-phase',
|
|
39
|
+
'aria-aristotle-post-phase',
|
|
40
|
+
'mizan',
|
|
41
|
+
'ghazali-8lens',
|
|
42
|
+
'aria-noor-cognitives',
|
|
43
|
+
'aria-business-frame',
|
|
44
|
+
'aria-business-audit',
|
|
45
|
+
'aria-revenue-engine',
|
|
46
|
+
'aria-retention-engine',
|
|
47
|
+
'aria-gtm-architect',
|
|
48
|
+
'aria-senior-code-cookbook',
|
|
49
|
+
'aria-senior-code-audit',
|
|
50
|
+
'testing-strategy',
|
|
51
|
+
'security-review',
|
|
52
|
+
'architecture-decision',
|
|
53
|
+
'api-design',
|
|
54
|
+
'observability',
|
|
55
|
+
'error-handling',
|
|
56
|
+
'aria-decision-mizan',
|
|
57
|
+
'aria-aristotle-cognitives',
|
|
58
|
+
'aria-forge-guardrails',
|
|
59
|
+
'aria-harness-deploy',
|
|
60
|
+
'aria-harness-onboarding',
|
|
61
|
+
'aria-harness-output-discipline',
|
|
62
|
+
'aria-frontend-architect',
|
|
63
|
+
'aria-fullstack-orchestrator',
|
|
64
|
+
'aria-research-orchestrator',
|
|
65
|
+
'cross-domain-24',
|
|
66
|
+
'fitrah-guard',
|
|
67
|
+
'noor-recognition',
|
|
68
|
+
'ladunni-22',
|
|
69
|
+
'ijtihad-novel',
|
|
70
|
+
'ilham-intuition',
|
|
71
|
+
'istiqra-induction',
|
|
72
|
+
'ruh-basis',
|
|
73
|
+
'soul-domains',
|
|
74
|
+
'deepsoul-emotional',
|
|
75
|
+
'aria-repo-audit',
|
|
76
|
+
'aria-live-ops',
|
|
77
|
+
'aria-memory-index',
|
|
78
|
+
'aria-ops',
|
|
79
|
+
]);
|
|
80
|
+
|
|
81
|
+
const baseSystemPrompt = [
|
|
82
|
+
'You are a pragmatic senior engineering agent.',
|
|
83
|
+
'Answer the user directly with status, evidence boundary, risks, and the next concrete action.',
|
|
84
|
+
'Do not claim completion unless the prompt includes evidence.',
|
|
85
|
+
].join(' ');
|
|
86
|
+
|
|
87
|
+
const promptBank = [
|
|
88
|
+
'A repo has several dirty files and the owner asks what to do next. Produce a concise action recommendation with verification boundaries.',
|
|
89
|
+
'A deployment route changed and the live endpoint may still be using old code. Explain the safest debugging sequence.',
|
|
90
|
+
'A quality check reports that an answer is too vague. Rewrite the closeout so it is evidence-bound and owner-readable.',
|
|
91
|
+
'A connector has a pre-provider preparation step and a post-answer validation step. Identify the highest-risk failure mode.',
|
|
92
|
+
'A team wants marketing proof from a smoke run. State what evidence is acceptable and what claim must remain bounded.',
|
|
93
|
+
'A long-running task ledger has completed four phases. Summarize what should be verified before trusting the result.',
|
|
94
|
+
'A user is upset about lost time and unclear output. Draft the next response that reduces confusion and names the next action.',
|
|
95
|
+
'A feature claims automatic improvement but only stores logs. Evaluate whether that proves behavior changed.',
|
|
96
|
+
'A code reviewer sees completion language with no test output. Write the review finding and the required proof.',
|
|
97
|
+
'A tool runner must avoid overloading a machine. Recommend how to schedule live checks and report progress.',
|
|
98
|
+
'A model answer includes many internal receipts. Rewrite the owner-facing answer so it preserves evidence without dumping raw internals.',
|
|
99
|
+
'A project needs a before-after measurement of answer quality. Define the metrics and success threshold.',
|
|
100
|
+
'An integration uses a sidecar to prepare context before a model call. Explain how to prove the sidecar affected the answer.',
|
|
101
|
+
'A QA loop finds a correctable issue. Describe the repair-first flow and how to record the lesson.',
|
|
102
|
+
'A baseline answer asks what to do next. Produce a better answer that decides the next step with evidence boundaries.',
|
|
103
|
+
'A live smoke passed but provider answer quality has not been measured. Give the honest status and next experiment.',
|
|
104
|
+
'An output says production ready after one local check. Reframe the claim and identify missing evidence.',
|
|
105
|
+
'A runtime can select many methods without placing their full text in the model prompt. Explain the quality and cost tradeoff.',
|
|
106
|
+
'A release note needs to be useful to operators. Write the structure and verification requirements.',
|
|
107
|
+
'A test produces screenshots and metrics. Explain how to turn that into defensible marketing evidence.',
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
function ensureDir(dir) {
|
|
111
|
+
mkdirSync(dir, { recursive: true, mode: 0o755 });
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function sha256(value) {
|
|
115
|
+
return createHash('sha256').update(String(value ?? '')).digest('hex');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function redactText(value) {
|
|
119
|
+
return String(value ?? '')
|
|
120
|
+
.replace(/sk-[^"\s,}\]]{8,}/g, 'sk-[REDACTED]')
|
|
121
|
+
.replace(/Bearer [^"\s,}\]]{8,}/g, 'Bearer [REDACTED]');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function redactRequest(value) {
|
|
125
|
+
if (Array.isArray(value)) return value.map(redactRequest);
|
|
126
|
+
if (typeof value === 'string') return redactText(value);
|
|
127
|
+
if (!value || typeof value !== 'object') return value;
|
|
128
|
+
return Object.fromEntries(Object.entries(value).map(([key, child]) => {
|
|
129
|
+
if (/api[_-]?key|authorization|bearer|secret|token|password/i.test(key) && typeof child !== 'boolean') return [key, '[REDACTED]'];
|
|
130
|
+
return [key, redactRequest(child)];
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function writeJsonArtifact(filePath, value) {
|
|
135
|
+
writeFileSync(filePath, JSON.stringify(redactRequest(value), null, 2) + '\n');
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function argNumber(name, fallback, options = {}) {
|
|
139
|
+
const prefix = `--${name}=`;
|
|
140
|
+
const raw = process.argv.find((arg) => arg.startsWith(prefix));
|
|
141
|
+
const value = raw ? Number(raw.slice(prefix.length)) : Number(process.env[`ARIA_AB_${name.replaceAll('-', '_').toUpperCase()}`] || fallback);
|
|
142
|
+
const min = Number.isFinite(options.min) ? options.min : 1;
|
|
143
|
+
const max = Number.isFinite(options.max) ? options.max : Number.MAX_SAFE_INTEGER;
|
|
144
|
+
return Math.min(max, Math.max(min, Number.isFinite(value) ? value : fallback));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
async function mapConcurrent(items, concurrency, worker) {
|
|
148
|
+
const results = new Array(items.length);
|
|
149
|
+
let next = 0;
|
|
150
|
+
const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
151
|
+
while (next < items.length) {
|
|
152
|
+
const index = next;
|
|
153
|
+
next += 1;
|
|
154
|
+
results[index] = await worker(items[index], index);
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
await Promise.all(workers);
|
|
158
|
+
return results;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function htmlEscape(value) {
|
|
162
|
+
return String(value ?? '')
|
|
163
|
+
.replaceAll('&', '&')
|
|
164
|
+
.replaceAll('<', '<')
|
|
165
|
+
.replaceAll('>', '>')
|
|
166
|
+
.replaceAll('"', '"')
|
|
167
|
+
.replaceAll("'", ''');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function readJsonIfExists(filePath) {
|
|
171
|
+
if (!existsSync(filePath)) return null;
|
|
172
|
+
try {
|
|
173
|
+
return JSON.parse(readFileSync(filePath, 'utf8'));
|
|
174
|
+
} catch {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function asRecord(value) {
|
|
180
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value : {};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function completionUsageDetails(data = {}) {
|
|
184
|
+
const usage = asRecord(data.usage);
|
|
185
|
+
const completionDetails = asRecord(usage.completion_tokens_details || usage.completionTokensDetails);
|
|
186
|
+
const completionTokens = Number(usage.completion_tokens ?? usage.completionTokens ?? 0);
|
|
187
|
+
const reasoningTokens = Number(completionDetails.reasoning_tokens ?? completionDetails.reasoningTokens ?? 0);
|
|
188
|
+
return {
|
|
189
|
+
promptTokens: Number(usage.prompt_tokens ?? usage.promptTokens ?? 0),
|
|
190
|
+
completionTokens,
|
|
191
|
+
totalTokens: Number(usage.total_tokens ?? usage.totalTokens ?? 0),
|
|
192
|
+
reasoningTokens,
|
|
193
|
+
allCompletionTokensSpentOnReasoning: completionTokens > 0 && reasoningTokens >= completionTokens,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function safeConfigSummary() {
|
|
198
|
+
const configPath = path.join(os.homedir(), '.aria', 'config.json');
|
|
199
|
+
const config = readJsonIfExists(configPath) || {};
|
|
200
|
+
const model = config.model && typeof config.model === 'object' ? config.model : {};
|
|
201
|
+
const selectedProvider = process.env.ARIA_AB_PROVIDER || model.provider || config.defaultProvider || 'xai';
|
|
202
|
+
const localProviderMatches = String(model.provider || '').toLowerCase() === String(selectedProvider || '').toLowerCase();
|
|
203
|
+
const providerEnvKeyPresent =
|
|
204
|
+
selectedProvider === 'xai' ? Boolean(process.env.XAI_API_KEY || process.env.GROK_API_KEY)
|
|
205
|
+
: selectedProvider === 'deepseek' ? Boolean(process.env.DEEPSEEK_API_KEY || process.env.ARIA_DEEPSEEK_API_KEY)
|
|
206
|
+
: selectedProvider === 'anthropic' ? Boolean(process.env.ANTHROPIC_API_KEY)
|
|
207
|
+
: selectedProvider === 'openai' ? Boolean(process.env.OPENAI_API_KEY)
|
|
208
|
+
: selectedProvider === 'openrouter' ? Boolean(process.env.OPENROUTER_API_KEY)
|
|
209
|
+
: false;
|
|
210
|
+
return {
|
|
211
|
+
provider: selectedProvider,
|
|
212
|
+
model: process.env.ARIA_AB_MODEL || model.model || 'grok-4-3',
|
|
213
|
+
baseUrl: process.env.ARIA_AB_BASE_URL || (localProviderMatches ? model.baseUrl : '') || '',
|
|
214
|
+
apiKeyPresent: Boolean(process.env.ARIA_AB_API_KEY || (localProviderMatches && model.apiKey) || providerEnvKeyPresent),
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function providerRequiresApiKey(provider) {
|
|
219
|
+
return !['ollama'].includes(String(provider || '').toLowerCase());
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function providerUrl(config) {
|
|
223
|
+
if (config.baseUrl) {
|
|
224
|
+
return config.baseUrl.endsWith('/chat/completions') ? config.baseUrl : `${config.baseUrl.replace(/\/+$/, '')}/chat/completions`;
|
|
225
|
+
}
|
|
226
|
+
if (config.provider === 'xai') return 'https://api.x.ai/v1/chat/completions';
|
|
227
|
+
if (config.provider === 'deepseek') return process.env.DEEPSEEK_API_BASE || 'https://api.deepseek.com/v1/chat/completions';
|
|
228
|
+
if (config.provider === 'openai') return 'https://api.openai.com/v1/chat/completions';
|
|
229
|
+
if (config.provider === 'openrouter') return 'https://openrouter.ai/api/v1/chat/completions';
|
|
230
|
+
return 'https://api.x.ai/v1/chat/completions';
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function providerNativeModel(config) {
|
|
234
|
+
if (config.provider === 'xai' && config.model === 'grok-4-3') return 'grok-4.3';
|
|
235
|
+
return config.model;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function providerApiKey(config) {
|
|
239
|
+
const provider = String(config.provider || '').toLowerCase();
|
|
240
|
+
if (process.env.ARIA_AB_API_KEY) return process.env.ARIA_AB_API_KEY;
|
|
241
|
+
const local = readJsonIfExists(path.join(os.homedir(), '.aria', 'config.json')) || {};
|
|
242
|
+
const model = local.model && typeof local.model === 'object' ? local.model : {};
|
|
243
|
+
if (model.provider === provider && model.apiKey) return model.apiKey;
|
|
244
|
+
if (provider === 'xai') return process.env.XAI_API_KEY || '';
|
|
245
|
+
if (provider === 'deepseek') return process.env.DEEPSEEK_API_KEY || process.env.ARIA_DEEPSEEK_API_KEY || '';
|
|
246
|
+
if (provider === 'anthropic') return process.env.ANTHROPIC_API_KEY || '';
|
|
247
|
+
if (provider === 'openai') return process.env.OPENAI_API_KEY || '';
|
|
248
|
+
return '';
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function extractOpenAiText(data) {
|
|
252
|
+
return data?.choices?.[0]?.message?.content || '';
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function extractRuntimeExtra(data) {
|
|
256
|
+
const aria = data?.aria && typeof data.aria === 'object' ? data.aria : {};
|
|
257
|
+
const raw = aria.raw && typeof aria.raw === 'object' ? aria.raw : {};
|
|
258
|
+
const runtimeEvidence = asRecord(aria.runtime_evidence || raw.runtimeEvidence);
|
|
259
|
+
const ledgerRecords = Array.isArray(aria.runtime_ledger?.records)
|
|
260
|
+
? aria.runtime_ledger.records
|
|
261
|
+
: Array.isArray(raw.runtimeLedger?.records)
|
|
262
|
+
? raw.runtimeLedger.records
|
|
263
|
+
: [];
|
|
264
|
+
const coachRecords = Array.isArray(aria.coach_kernel?.records)
|
|
265
|
+
? aria.coach_kernel.records
|
|
266
|
+
: Array.isArray(raw.coachKernel?.records)
|
|
267
|
+
? raw.coachKernel.records
|
|
268
|
+
: [];
|
|
269
|
+
return {
|
|
270
|
+
blocked: aria.blocked === true || raw.blocked === true,
|
|
271
|
+
releaseDecision: raw.qaAsGate?.releaseDecision || null,
|
|
272
|
+
qaAsGate: raw.qaAsGate || null,
|
|
273
|
+
runtimeEvidence,
|
|
274
|
+
runtimeEvidencePresent: Object.keys(runtimeEvidence).length > 0,
|
|
275
|
+
runtimeEvidenceOk: runtimeEvidence.ok === true,
|
|
276
|
+
atlasPresent: Boolean(runtimeEvidence.atlas?.receiptHash || runtimeEvidence.atlas?.contextHash || runtimeEvidence.atlas?.ok || runtimeEvidence.atlas?.atlas?.contextHash),
|
|
277
|
+
sentinelPassportHash: runtimeEvidence.sentinel?.passportHash || null,
|
|
278
|
+
sentinelCompilationHash: runtimeEvidence.sentinel?.compilationHash || null,
|
|
279
|
+
turnCapabilityGrantHash: runtimeEvidence.turnCapabilityGrant?.grantHash || null,
|
|
280
|
+
firedSkillCount: Number(runtimeEvidence.firedSkillCount || 0),
|
|
281
|
+
activeRuntimeCount: Number(runtimeEvidence.activeRuntimeCount || 0),
|
|
282
|
+
phaseExecutionReceiptCount: Number(runtimeEvidence.phaseExecutionReceiptCount || 0),
|
|
283
|
+
qiyasPerspectiveCount: Number(runtimeEvidence.cognitiveRuntimeReceipts?.qiyasPerspectiveCount || 0),
|
|
284
|
+
tadabburStageCount: Number(runtimeEvidence.cognitiveRuntimeReceipts?.tadabburStageCount || 0),
|
|
285
|
+
learningFeedsNextSelection: runtimeEvidence.learning?.learningFeedsNextSelection === true,
|
|
286
|
+
managedLedgerRecords: ledgerRecords.length,
|
|
287
|
+
coachRecords: coachRecords.length,
|
|
288
|
+
sidecarPresent: Boolean(data?.aria),
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async function retryProviderCall(label, fn, attempts = 3) {
|
|
293
|
+
let lastError = null;
|
|
294
|
+
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
295
|
+
try {
|
|
296
|
+
return await fn(attempt);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
lastError = error;
|
|
299
|
+
if (attempt === attempts) break;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
throw new Error(`${label} failed after ${attempts} attempts: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async function callBaselineProvider({ config, prompt, index }) {
|
|
306
|
+
if (config.provider === 'anthropic') {
|
|
307
|
+
return callAnthropicDirect({ config, prompt, index });
|
|
308
|
+
}
|
|
309
|
+
const url = providerUrl(config);
|
|
310
|
+
const apiKey = providerApiKey(config);
|
|
311
|
+
if (providerRequiresApiKey(config.provider) && !apiKey) {
|
|
312
|
+
throw new Error(`missing API key for provider ${config.provider}`);
|
|
313
|
+
}
|
|
314
|
+
return retryProviderCall(`baseline provider prompt ${index}`, async (attempt) => {
|
|
315
|
+
const started = Date.now();
|
|
316
|
+
const requestBody = {
|
|
317
|
+
model: providerNativeModel(config),
|
|
318
|
+
messages: [
|
|
319
|
+
{ role: 'system', content: baseSystemPrompt },
|
|
320
|
+
{ role: 'user', content: prompt },
|
|
321
|
+
],
|
|
322
|
+
max_tokens: Number(process.env.ARIA_AB_MAX_TOKENS || 700),
|
|
323
|
+
temperature: Number(process.env.ARIA_AB_TEMPERATURE || 0.2),
|
|
324
|
+
stream: false,
|
|
325
|
+
metadata: { source: 'quality-ab-live-provider', arm: 'baseline', index, attempt },
|
|
326
|
+
};
|
|
327
|
+
const response = await fetch(url, {
|
|
328
|
+
method: 'POST',
|
|
329
|
+
headers: {
|
|
330
|
+
'content-type': 'application/json',
|
|
331
|
+
...(apiKey ? { authorization: `Bearer ${apiKey}` } : {}),
|
|
332
|
+
...(config.provider === 'openrouter' ? { 'HTTP-Referer': 'http://127.0.0.1', 'X-Title': 'Aria Quality AB' } : {}),
|
|
333
|
+
},
|
|
334
|
+
body: JSON.stringify(requestBody),
|
|
335
|
+
});
|
|
336
|
+
const rawText = await response.text();
|
|
337
|
+
let data = {};
|
|
338
|
+
try {
|
|
339
|
+
data = rawText ? JSON.parse(rawText) : {};
|
|
340
|
+
} catch {
|
|
341
|
+
data = { raw: rawText };
|
|
342
|
+
}
|
|
343
|
+
if (!response.ok) throw new Error(`baseline provider ${response.status}: ${rawText.slice(0, 500)}`);
|
|
344
|
+
return {
|
|
345
|
+
text: extractOpenAiText(data),
|
|
346
|
+
durationMs: Date.now() - started,
|
|
347
|
+
provider: config.provider,
|
|
348
|
+
model: config.model,
|
|
349
|
+
usage: data.usage || null,
|
|
350
|
+
usageDetails: completionUsageDetails(data),
|
|
351
|
+
rawSha256: sha256(rawText),
|
|
352
|
+
rawText,
|
|
353
|
+
requestBody: redactRequest(requestBody),
|
|
354
|
+
requestSha256: sha256(JSON.stringify(requestBody)),
|
|
355
|
+
attempts: attempt,
|
|
356
|
+
};
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async function callAnthropicDirect({ config, prompt, index }) {
|
|
361
|
+
const apiKey = providerApiKey(config);
|
|
362
|
+
if (!apiKey) throw new Error('missing API key for provider anthropic');
|
|
363
|
+
const started = Date.now();
|
|
364
|
+
const requestBody = {
|
|
365
|
+
model: config.model,
|
|
366
|
+
max_tokens: Number(process.env.ARIA_AB_MAX_TOKENS || 700),
|
|
367
|
+
system: baseSystemPrompt,
|
|
368
|
+
messages: [{ role: 'user', content: prompt }],
|
|
369
|
+
metadata: { source: 'quality-ab-live-provider', arm: 'baseline', index },
|
|
370
|
+
};
|
|
371
|
+
const response = await fetch(config.baseUrl || 'https://api.anthropic.com/v1/messages', {
|
|
372
|
+
method: 'POST',
|
|
373
|
+
headers: {
|
|
374
|
+
'content-type': 'application/json',
|
|
375
|
+
'x-api-key': apiKey,
|
|
376
|
+
'anthropic-version': '2023-06-01',
|
|
377
|
+
},
|
|
378
|
+
body: JSON.stringify(requestBody),
|
|
379
|
+
});
|
|
380
|
+
const rawText = await response.text();
|
|
381
|
+
let data = {};
|
|
382
|
+
try {
|
|
383
|
+
data = rawText ? JSON.parse(rawText) : {};
|
|
384
|
+
} catch {
|
|
385
|
+
data = { raw: rawText };
|
|
386
|
+
}
|
|
387
|
+
if (!response.ok) throw new Error(`baseline anthropic ${response.status}: ${rawText.slice(0, 500)}`);
|
|
388
|
+
return {
|
|
389
|
+
text: Array.isArray(data.content) ? data.content.filter((part) => part?.type === 'text').map((part) => part.text).join('') : '',
|
|
390
|
+
durationMs: Date.now() - started,
|
|
391
|
+
provider: config.provider,
|
|
392
|
+
model: providerNativeModel(config),
|
|
393
|
+
usage: data.usage || null,
|
|
394
|
+
usageDetails: completionUsageDetails(data),
|
|
395
|
+
rawSha256: sha256(rawText),
|
|
396
|
+
rawText,
|
|
397
|
+
requestBody: redactRequest(requestBody),
|
|
398
|
+
requestSha256: sha256(JSON.stringify(requestBody)),
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
async function callRuntimeArm({ config, prompt, index }) {
|
|
403
|
+
return retryProviderCall(`runtime provider prompt ${index}`, async (attempt) => {
|
|
404
|
+
const started = Date.now();
|
|
405
|
+
const requestBody = {
|
|
406
|
+
model: config.model,
|
|
407
|
+
llm: {
|
|
408
|
+
provider: config.provider,
|
|
409
|
+
model: config.model,
|
|
410
|
+
baseUrl: config.baseUrl || undefined,
|
|
411
|
+
apiKey: providerApiKey(config) || undefined,
|
|
412
|
+
},
|
|
413
|
+
messages: [
|
|
414
|
+
{ role: 'system', content: baseSystemPrompt },
|
|
415
|
+
{ role: 'user', content: prompt },
|
|
416
|
+
],
|
|
417
|
+
max_tokens: Number(process.env.ARIA_AB_MAX_TOKENS || 700),
|
|
418
|
+
temperature: Number(process.env.ARIA_AB_TEMPERATURE || 0.2),
|
|
419
|
+
stream: false,
|
|
420
|
+
metadata: {
|
|
421
|
+
source: 'quality-ab-live-provider',
|
|
422
|
+
arm: 'mechanical_substrate',
|
|
423
|
+
index,
|
|
424
|
+
attempt,
|
|
425
|
+
universalTurnPacket: true,
|
|
426
|
+
skillExecutionMode: 'mechanical-receipt',
|
|
427
|
+
requiredSkillIds: benchmarkRequiredSkillIds,
|
|
428
|
+
},
|
|
429
|
+
requiredSkillIds: benchmarkRequiredSkillIds,
|
|
430
|
+
ariaDebug: true,
|
|
431
|
+
allowProviderFallback: false,
|
|
432
|
+
};
|
|
433
|
+
const response = await fetch(`${runtimeUrl}/v1/chat/completions`, {
|
|
434
|
+
method: 'POST',
|
|
435
|
+
headers: { 'content-type': 'application/json' },
|
|
436
|
+
body: JSON.stringify(requestBody),
|
|
437
|
+
});
|
|
438
|
+
const rawText = await response.text();
|
|
439
|
+
let data = {};
|
|
440
|
+
try {
|
|
441
|
+
data = rawText ? JSON.parse(rawText) : {};
|
|
442
|
+
} catch {
|
|
443
|
+
data = { raw: rawText };
|
|
444
|
+
}
|
|
445
|
+
if (!response.ok) throw new Error(`runtime arm ${response.status}: ${rawText.slice(0, 500)}`);
|
|
446
|
+
return {
|
|
447
|
+
text: extractOpenAiText(data),
|
|
448
|
+
durationMs: Date.now() - started,
|
|
449
|
+
provider: data?.choices?.[0]?.message?.provider || config.provider,
|
|
450
|
+
model: data?.model || config.model,
|
|
451
|
+
usage: data?.usage || null,
|
|
452
|
+
usageDetails: completionUsageDetails(data),
|
|
453
|
+
rawSha256: sha256(rawText),
|
|
454
|
+
rawText,
|
|
455
|
+
requestBody: redactRequest(requestBody),
|
|
456
|
+
requestSha256: sha256(JSON.stringify(redactRequest(requestBody))),
|
|
457
|
+
attempts: attempt,
|
|
458
|
+
runtimeExtra: extractRuntimeExtra(data),
|
|
459
|
+
};
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
function fineGrainedScore(text, userText) {
|
|
464
|
+
const source = String(text || '');
|
|
465
|
+
const commandCount = (source.match(/`[^`]+`|\b(?:curl|kubectl|git|npm|node|sha256sum|systemctl|journalctl)\b/g) || []).length;
|
|
466
|
+
const numericEvidenceCount = (source.match(/\b(?:\d+\/\d+|\d+(?:\.\d+)?%|sha256:[a-f0-9]+|[a-f0-9]{12,}|exit=0|status=)\b/gi) || []).length;
|
|
467
|
+
const userTerms = String(userText || '').toLowerCase().split(/[^a-z0-9]+/).filter((term) => term.length > 4);
|
|
468
|
+
const userTermHits = new Set(userTerms.filter((term) => source.toLowerCase().includes(term))).size;
|
|
469
|
+
const rawCognition = /<\s*(?:cognition|applied_cognition)\b/i.test(source);
|
|
470
|
+
const directness = /\b(?:status|recommendation|decision|next action|next step)\b/i.test(source) ? 1 : 0;
|
|
471
|
+
const bounded = /\b(?:evidence boundary|not verified|bounded|observed|unverified|no logs|no tests)\b/i.test(source) ? 1 : 0;
|
|
472
|
+
const firstClassDelta = /\b(?:because|so that|therefore|this means|root cause|failure mode|predicate)\b/i.test(source) ? 1 : 0;
|
|
473
|
+
const antiBloat = source.length >= 220 && source.length <= 1200 ? 1 : source.length <= 2200 ? 0.5 : -1;
|
|
474
|
+
return Number((
|
|
475
|
+
directness * 1.5 +
|
|
476
|
+
bounded * 1.5 +
|
|
477
|
+
firstClassDelta +
|
|
478
|
+
Math.min(commandCount, 4) * 0.4 +
|
|
479
|
+
Math.min(numericEvidenceCount, 3) * 0.5 +
|
|
480
|
+
Math.min(userTermHits, 6) * 0.2 +
|
|
481
|
+
antiBloat -
|
|
482
|
+
(rawCognition ? 2.5 : 0)
|
|
483
|
+
).toFixed(3));
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
function scoreOutput(text, userText) {
|
|
487
|
+
const source = String(text || '');
|
|
488
|
+
const runtimeBlockedOutput = /\bAria runtime blocked this output\b|\bRemaining blockers:\b|\bRecovery attempts:\b/i.test(source);
|
|
489
|
+
const visibleCognitionDump = /<\s*(?:cognition|applied_cognition)\b/i.test(source);
|
|
490
|
+
const checks = {
|
|
491
|
+
notRuntimeBlock: !runtimeBlockedOutput,
|
|
492
|
+
concreteNextAction: /\b(next action|next step|do this|implement|verify|run|inspect|measure)\b/i.test(source),
|
|
493
|
+
evidenceBoundary: /\b(evidence|observed|verified|unverified|bounded|not measured|proof|artifact|metric)\b/i.test(source),
|
|
494
|
+
expectedObserved: /\b(expected|observed|predicate|actual|pass|fail|threshold)\b/i.test(source),
|
|
495
|
+
qaCorrection: /\b(QA|finding|correct|correction|repair|harden|verify|re-test)\b/i.test(source),
|
|
496
|
+
riskTradeoff: /\b(risk|tradeoff|failure mode|blast radius|cost|latency|scope)\b/i.test(source),
|
|
497
|
+
ownerReadable: source.length >= 180 && source.length <= 2400 && /\n|:/.test(source),
|
|
498
|
+
avoidsAskOnly: !/\bwhat would you like me to do next\b|\bhow would you like me to proceed\b/i.test(source),
|
|
499
|
+
avoidsFalseClosure: !/\b(production ready|fully done|guaranteed|complete)\b/i.test(source),
|
|
500
|
+
avoidsRawInternals: !/\b(skillExecutionReceipt|phaseReceipts|executedOperatorIdsHash|raw json)\b/i.test(source),
|
|
501
|
+
avoidsVisibleCognitionDump: !visibleCognitionDump,
|
|
502
|
+
learningLoop: /\b(lesson|learning|next selection|feedback|loop|reflexion|record)\b/i.test(source),
|
|
503
|
+
specificVerification: /\b(exit=0|100\/100|\d+\/\d+|status=|sha256|screenshot|metric|test|smoke|ledger)\b/i.test(source),
|
|
504
|
+
conciseStructure: source.split('\n').filter((line) => line.trim()).length >= 2,
|
|
505
|
+
};
|
|
506
|
+
const domain = analyzeDomainOutputQuality(source, { userText });
|
|
507
|
+
const baseScore = Object.values(checks).filter(Boolean).length;
|
|
508
|
+
const penalty = (runtimeBlockedOutput ? Object.keys(checks).length : 0) + domain.blockers.length * 2 + domain.warnings.length * 0.5;
|
|
509
|
+
return {
|
|
510
|
+
score: Math.max(0, baseScore - penalty),
|
|
511
|
+
fineScore: fineGrainedScore(source, userText),
|
|
512
|
+
maxScore: Object.keys(checks).length,
|
|
513
|
+
checks,
|
|
514
|
+
blockerCount: domain.blockers.length,
|
|
515
|
+
warningCount: domain.warnings.length,
|
|
516
|
+
blockers: domain.blockers,
|
|
517
|
+
warnings: domain.warnings,
|
|
518
|
+
repairs: domain.repairs,
|
|
519
|
+
chars: source.length,
|
|
520
|
+
sha256: sha256(source),
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
function classifyPair(row) {
|
|
525
|
+
const primaryDelta = row.substrate.score.score - row.baseline.score.score;
|
|
526
|
+
const fineDelta = Number((row.substrate.score.fineScore - row.baseline.score.fineScore).toFixed(3));
|
|
527
|
+
const delta = primaryDelta !== 0 ? primaryDelta : fineDelta;
|
|
528
|
+
const gapFindings = [];
|
|
529
|
+
const substrateRuntime = row.substrate.runtimeExtra || {};
|
|
530
|
+
const baselineVisibleFailure = !String(row.baseline?.text || '').trim();
|
|
531
|
+
const substrateVisibleFailure = !String(row.substrate?.text || '').trim();
|
|
532
|
+
const substrateRuntimeComplete =
|
|
533
|
+
substrateRuntime.runtimeEvidencePresent === true &&
|
|
534
|
+
substrateRuntime.firedSkillCount >= 50 &&
|
|
535
|
+
substrateRuntime.activeRuntimeCount >= 16 &&
|
|
536
|
+
substrateRuntime.phaseExecutionReceiptCount >= 6 &&
|
|
537
|
+
substrateRuntime.qiyasPerspectiveCount >= 15 &&
|
|
538
|
+
substrateRuntime.tadabburStageCount >= 12 &&
|
|
539
|
+
Boolean(substrateRuntime.sentinelPassportHash) &&
|
|
540
|
+
Boolean(substrateRuntime.turnCapabilityGrantHash);
|
|
541
|
+
if (primaryDelta === 0) gapFindings.push('primary rubric tie: substrate did not create a coarse visible quality lead');
|
|
542
|
+
if (baselineVisibleFailure) gapFindings.push('baseline produced no visible answer text');
|
|
543
|
+
if (substrateVisibleFailure) gapFindings.push('substrate produced no visible answer text');
|
|
544
|
+
if (row.substrate.score.checks.avoidsVisibleCognitionDump === false) gapFindings.push('raw cognition leaked into owner-facing substrate answer');
|
|
545
|
+
if (row.substrate.score.checks.learningLoop === false) gapFindings.push('substrate answer lacks explicit learning/reflexion loop signal');
|
|
546
|
+
if (row.substrate.score.checks.specificVerification === false) gapFindings.push('substrate answer lacks specific measurable verification evidence');
|
|
547
|
+
if (!substrateRuntimeComplete) {
|
|
548
|
+
gapFindings.push(`substrate runtime evidence incomplete: skills=${substrateRuntime.firedSkillCount || 0}/50 runtimes=${substrateRuntime.activeRuntimeCount || 0}/16 phases=${substrateRuntime.phaseExecutionReceiptCount || 0}/6 qiyas=${substrateRuntime.qiyasPerspectiveCount || 0}/15 tadabbur=${substrateRuntime.tadabburStageCount || 0}/12`);
|
|
549
|
+
}
|
|
550
|
+
if (substrateRuntime.blocked && substrateRuntime.qaAsGate?.hardStop !== true) gapFindings.push('substrate blocked a non-hard-stop quality row instead of releasing with QA/correction findings');
|
|
551
|
+
if (row.substrate.score.score <= row.baseline.score.score && fineDelta <= 0) gapFindings.push('substrate failed to beat baseline after fine-grained tie-break');
|
|
552
|
+
return {
|
|
553
|
+
delta,
|
|
554
|
+
primaryDelta,
|
|
555
|
+
fineDelta,
|
|
556
|
+
winner: delta > 0 ? 'substrate' : 'baseline',
|
|
557
|
+
firstClassGap: delta <= 0 || primaryDelta === 0 || gapFindings.length > 0,
|
|
558
|
+
visibleOutputFailure: baselineVisibleFailure || substrateVisibleFailure,
|
|
559
|
+
runtimeEvidenceComplete: substrateRuntimeComplete,
|
|
560
|
+
gapFindings,
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
function mean(values) {
|
|
565
|
+
return values.length ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
function median(values) {
|
|
569
|
+
if (!values.length) return 0;
|
|
570
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
571
|
+
return sorted[Math.floor(sorted.length / 2)];
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
function pairedStats(rows) {
|
|
575
|
+
const deltas = rows.map((row) => row.delta);
|
|
576
|
+
return {
|
|
577
|
+
meanDelta: Number(mean(deltas).toFixed(3)),
|
|
578
|
+
medianDelta: Number(median(deltas).toFixed(3)),
|
|
579
|
+
wins: rows.filter((row) => row.winner === 'substrate').length,
|
|
580
|
+
losses: rows.filter((row) => row.winner === 'baseline').length,
|
|
581
|
+
ties: 0,
|
|
582
|
+
coarseTies: rows.filter((row) => row.primaryDelta === 0).length,
|
|
583
|
+
firstClassGapRows: rows.filter((row) => row.firstClassGap).length,
|
|
584
|
+
baselineMean: Number(mean(rows.map((row) => row.baseline.score.score)).toFixed(3)),
|
|
585
|
+
substrateMean: Number(mean(rows.map((row) => row.substrate.score.score)).toFixed(3)),
|
|
586
|
+
baselineFineMean: Number(mean(rows.map((row) => row.baseline.score.fineScore)).toFixed(3)),
|
|
587
|
+
substrateFineMean: Number(mean(rows.map((row) => row.substrate.score.fineScore)).toFixed(3)),
|
|
588
|
+
baselineBlockers: rows.reduce((sum, row) => sum + row.baseline.score.blockerCount, 0),
|
|
589
|
+
substrateBlockers: rows.reduce((sum, row) => sum + row.substrate.score.blockerCount, 0),
|
|
590
|
+
baselineWarnings: rows.reduce((sum, row) => sum + row.baseline.score.warningCount, 0),
|
|
591
|
+
substrateWarnings: rows.reduce((sum, row) => sum + row.substrate.score.warningCount, 0),
|
|
592
|
+
runtimeEvidenceCompleteRows: rows.filter((row) => row.runtimeEvidenceComplete).length,
|
|
593
|
+
visibleOutputFailureRows: rows.filter((row) => row.visibleOutputFailure).length,
|
|
594
|
+
substrateHardBlockRows: rows.filter((row) => row.substrate.runtimeExtra?.blocked === true).length,
|
|
595
|
+
substrateReleasedWithQaRows: rows.filter((row) => row.substrate.runtimeExtra?.qaAsGate?.releaseDecision === 'released_with_qa_findings').length,
|
|
596
|
+
};
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
function renderMarkdown(summary, rows) {
|
|
600
|
+
const lines = [
|
|
601
|
+
'# Aria Quality A/B Live Provider Eval',
|
|
602
|
+
'',
|
|
603
|
+
`- run_id: ${summary.runId}`,
|
|
604
|
+
`- status: ${summary.status}`,
|
|
605
|
+
`- provider: ${summary.provider.provider}`,
|
|
606
|
+
`- model: ${summary.provider.model}`,
|
|
607
|
+
`- prompts: ${summary.totalPrompts}`,
|
|
608
|
+
`- concurrency: ${summary.concurrency || 1}`,
|
|
609
|
+
`- substrate wins/ties/losses: ${summary.stats.wins}/${summary.stats.ties}/${summary.stats.losses}`,
|
|
610
|
+
`- coarse ties reclassified as gaps: ${summary.stats.coarseTies}`,
|
|
611
|
+
`- first-class gap rows: ${summary.stats.firstClassGapRows}`,
|
|
612
|
+
`- baseline mean score: ${summary.stats.baselineMean}`,
|
|
613
|
+
`- substrate mean score: ${summary.stats.substrateMean}`,
|
|
614
|
+
`- baseline fine mean: ${summary.stats.baselineFineMean}`,
|
|
615
|
+
`- substrate fine mean: ${summary.stats.substrateFineMean}`,
|
|
616
|
+
`- mean delta: ${summary.stats.meanDelta}`,
|
|
617
|
+
`- baseline blockers/warnings: ${summary.stats.baselineBlockers}/${summary.stats.baselineWarnings}`,
|
|
618
|
+
`- substrate blockers/warnings: ${summary.stats.substrateBlockers}/${summary.stats.substrateWarnings}`,
|
|
619
|
+
`- runtime evidence complete rows: ${summary.stats.runtimeEvidenceCompleteRows}/${summary.totalPrompts}`,
|
|
620
|
+
`- visible output failure rows: ${summary.stats.visibleOutputFailureRows}`,
|
|
621
|
+
`- substrate hard-block rows: ${summary.stats.substrateHardBlockRows}`,
|
|
622
|
+
`- substrate released-with-QA rows: ${summary.stats.substrateReleasedWithQaRows}`,
|
|
623
|
+
`- evidence boundary: ${summary.evidenceBoundary}`,
|
|
624
|
+
'',
|
|
625
|
+
'## Prompt Results',
|
|
626
|
+
'',
|
|
627
|
+
'| # | Delta | Primary | Fine | Baseline | Substrate | Winner | Gap | Prompt |',
|
|
628
|
+
'|---:|---:|---:|---:|---:|---:|---|---|---|',
|
|
629
|
+
...rows.map((row) => `| ${row.index} | ${row.delta.toFixed(2)} | ${row.primaryDelta.toFixed(2)} | ${row.fineDelta.toFixed(2)} | ${row.baseline.score.score.toFixed(2)} | ${row.substrate.score.score.toFixed(2)} | ${row.winner} | ${row.gapFindings.length ? htmlEscape(row.gapFindings.join('; ')).replaceAll('|', '\\|') : '-'} | ${htmlEscape(row.prompt).replaceAll('|', '\\|')} |`),
|
|
630
|
+
'',
|
|
631
|
+
];
|
|
632
|
+
return lines.join('\n');
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function buildEvidenceManifest({ summary, rows, artifacts }) {
|
|
636
|
+
return {
|
|
637
|
+
schema: 'aria.quality_ab_live_provider.evidence_manifest.v1',
|
|
638
|
+
runId,
|
|
639
|
+
generatedAt: new Date().toISOString(),
|
|
640
|
+
command: {
|
|
641
|
+
cwd: repoRoot,
|
|
642
|
+
argv: process.argv,
|
|
643
|
+
runtimeUrl,
|
|
644
|
+
},
|
|
645
|
+
envKeyPresence: {
|
|
646
|
+
ARIA_AB_API_KEY: Boolean(process.env.ARIA_AB_API_KEY),
|
|
647
|
+
DEEPSEEK_API_KEY: Boolean(process.env.DEEPSEEK_API_KEY),
|
|
648
|
+
ARIA_DEEPSEEK_API_KEY: Boolean(process.env.ARIA_DEEPSEEK_API_KEY),
|
|
649
|
+
XAI_API_KEY: Boolean(process.env.XAI_API_KEY),
|
|
650
|
+
GROK_API_KEY: Boolean(process.env.GROK_API_KEY),
|
|
651
|
+
ANTHROPIC_API_KEY: Boolean(process.env.ANTHROPIC_API_KEY),
|
|
652
|
+
OPENAI_API_KEY: Boolean(process.env.OPENAI_API_KEY),
|
|
653
|
+
},
|
|
654
|
+
rowArtifacts: rows.map((row) => ({
|
|
655
|
+
index: row.index,
|
|
656
|
+
promptSha256: row.promptSha256,
|
|
657
|
+
inputPath: row.inputPath,
|
|
658
|
+
winner: row.winner,
|
|
659
|
+
delta: row.delta,
|
|
660
|
+
primaryDelta: row.primaryDelta,
|
|
661
|
+
fineDelta: row.fineDelta,
|
|
662
|
+
firstClassGap: row.firstClassGap,
|
|
663
|
+
gapFindings: row.gapFindings,
|
|
664
|
+
baseline: {
|
|
665
|
+
provider: row.baseline.provider,
|
|
666
|
+
model: row.baseline.model,
|
|
667
|
+
durationMs: row.baseline.durationMs,
|
|
668
|
+
attempts: row.baseline.attempts || null,
|
|
669
|
+
requestPath: row.baseline.requestPath,
|
|
670
|
+
requestSha256: row.baseline.requestSha256,
|
|
671
|
+
textPath: row.baseline.textPath,
|
|
672
|
+
rawPath: row.baseline.rawPath,
|
|
673
|
+
rawSha256: row.baseline.rawSha256,
|
|
674
|
+
score: row.baseline.score,
|
|
675
|
+
},
|
|
676
|
+
substrate: {
|
|
677
|
+
provider: row.substrate.provider,
|
|
678
|
+
model: row.substrate.model,
|
|
679
|
+
durationMs: row.substrate.durationMs,
|
|
680
|
+
attempts: row.substrate.attempts || null,
|
|
681
|
+
requestPath: row.substrate.requestPath,
|
|
682
|
+
requestSha256: row.substrate.requestSha256,
|
|
683
|
+
textPath: row.substrate.textPath,
|
|
684
|
+
rawPath: row.substrate.rawPath,
|
|
685
|
+
rawSha256: row.substrate.rawSha256,
|
|
686
|
+
score: row.substrate.score,
|
|
687
|
+
runtimeExtra: row.substrate.runtimeExtra,
|
|
688
|
+
},
|
|
689
|
+
})),
|
|
690
|
+
summary,
|
|
691
|
+
artifacts,
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
function renderHtml(summary, rows) {
|
|
696
|
+
const rowHtml = rows.map((row) => `<tr>
|
|
697
|
+
<td>${row.index}</td>
|
|
698
|
+
<td>${htmlEscape(row.winner)}</td>
|
|
699
|
+
<td>${row.delta.toFixed(2)}</td>
|
|
700
|
+
<td>${row.baseline.score.score.toFixed(2)}</td>
|
|
701
|
+
<td>${row.substrate.score.score.toFixed(2)}</td>
|
|
702
|
+
<td>${htmlEscape(row.prompt)}</td>
|
|
703
|
+
</tr>`).join('\n');
|
|
704
|
+
return `<!doctype html>
|
|
705
|
+
<html lang="en">
|
|
706
|
+
<head>
|
|
707
|
+
<meta charset="utf-8">
|
|
708
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
709
|
+
<title>Aria Quality AB ${htmlEscape(summary.runId)}</title>
|
|
710
|
+
<style>
|
|
711
|
+
body { margin: 0; font-family: Inter, Arial, sans-serif; background: #f8fafc; color: #0f172a; }
|
|
712
|
+
main { max-width: 1160px; margin: 0 auto; padding: 40px 24px; }
|
|
713
|
+
h1 { margin: 0 0 8px; font-size: 40px; }
|
|
714
|
+
.grid { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 12px; margin: 22px 0; }
|
|
715
|
+
.metric { background: white; border: 1px solid #d8dee9; border-radius: 8px; padding: 14px; }
|
|
716
|
+
.metric b { display: block; font-size: 30px; }
|
|
717
|
+
table { width: 100%; border-collapse: collapse; background: white; border: 1px solid #d8dee9; font-size: 13px; }
|
|
718
|
+
th, td { padding: 8px 10px; border-bottom: 1px solid #e2e8f0; text-align: left; vertical-align: top; }
|
|
719
|
+
th { background: #eef2f7; }
|
|
720
|
+
.boundary { border-left: 4px solid #0f766e; padding: 10px 14px; background: #ecfdf5; }
|
|
721
|
+
</style>
|
|
722
|
+
</head>
|
|
723
|
+
<body><main>
|
|
724
|
+
<h1>Quality A/B Live Provider Eval</h1>
|
|
725
|
+
<p>${htmlEscape(summary.runId)} · ${htmlEscape(summary.provider.provider)} / ${htmlEscape(summary.provider.model)}</p>
|
|
726
|
+
<div class="grid">
|
|
727
|
+
<div class="metric"><b>${summary.stats.wins}/${summary.totalPrompts}</b><span>substrate wins</span></div>
|
|
728
|
+
<div class="metric"><b>${summary.stats.meanDelta}</b><span>mean score delta</span></div>
|
|
729
|
+
<div class="metric"><b>${summary.stats.baselineMean}</b><span>baseline mean</span></div>
|
|
730
|
+
<div class="metric"><b>${summary.stats.substrateMean}</b><span>substrate mean</span></div>
|
|
731
|
+
</div>
|
|
732
|
+
<p class="boundary">${htmlEscape(summary.evidenceBoundary)}</p>
|
|
733
|
+
<table><thead><tr><th>#</th><th>Winner</th><th>Delta</th><th>A</th><th>B</th><th>Prompt</th></tr></thead><tbody>${rowHtml}</tbody></table>
|
|
734
|
+
</main></body></html>`;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
function writeSvg(summary) {
|
|
738
|
+
const filePath = path.join(outputRoot, 'quality-ab-card.svg');
|
|
739
|
+
const status = summary.stats.meanDelta > 0 ? '#0f766e' : '#b45309';
|
|
740
|
+
const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="1280" height="720" viewBox="0 0 1280 720">
|
|
741
|
+
<rect width="1280" height="720" fill="#f8fafc"/>
|
|
742
|
+
<rect x="42" y="42" width="1196" height="636" rx="14" fill="#ffffff" stroke="#d8dee9"/>
|
|
743
|
+
<rect x="42" y="42" width="1196" height="18" fill="${status}"/>
|
|
744
|
+
<text x="72" y="114" font-family="Inter, Arial, sans-serif" font-size="42" font-weight="800" fill="#0f172a">Aria Quality A/B Evidence</text>
|
|
745
|
+
<text x="72" y="158" font-family="Inter, Arial, sans-serif" font-size="22" fill="#475569">Live provider outputs scored by deterministic rubric.</text>
|
|
746
|
+
<text x="72" y="236" font-family="Inter, Arial, sans-serif" font-size="32" fill="#172033">Substrate wins: <tspan font-weight="800">${summary.stats.wins}/${summary.totalPrompts}</tspan></text>
|
|
747
|
+
<text x="72" y="296" font-family="Inter, Arial, sans-serif" font-size="32" fill="#172033">Mean delta: <tspan font-weight="800">${summary.stats.meanDelta}</tspan></text>
|
|
748
|
+
<text x="72" y="356" font-family="Inter, Arial, sans-serif" font-size="32" fill="#172033">A mean: <tspan font-weight="800">${summary.stats.baselineMean}</tspan></text>
|
|
749
|
+
<text x="72" y="416" font-family="Inter, Arial, sans-serif" font-size="32" fill="#172033">B mean: <tspan font-weight="800">${summary.stats.substrateMean}</tspan></text>
|
|
750
|
+
<text x="72" y="476" font-family="Inter, Arial, sans-serif" font-size="24" fill="#475569">Provider: ${htmlEscape(summary.provider.provider)} / ${htmlEscape(summary.provider.model)}</text>
|
|
751
|
+
<text x="72" y="642" font-family="Inter, Arial, sans-serif" font-size="18" fill="#64748b">run_id=${htmlEscape(summary.runId)}</text>
|
|
752
|
+
</svg>`;
|
|
753
|
+
writeFileSync(filePath, svg);
|
|
754
|
+
return filePath;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
function readFlagValue(prefix) {
|
|
758
|
+
for (let i = 0; i < process.argv.length; i += 1) {
|
|
759
|
+
const value = process.argv[i];
|
|
760
|
+
if (value === prefix) return process.argv[i + 1] || '';
|
|
761
|
+
if (value.startsWith(`${prefix}=`)) return value.slice(prefix.length + 1);
|
|
762
|
+
}
|
|
763
|
+
return '';
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
function rescoreExisting(metricsPath) {
|
|
767
|
+
const input = JSON.parse(readFileSync(metricsPath, 'utf8'));
|
|
768
|
+
const rows = input.rows.map((row) => {
|
|
769
|
+
const baselineText = readFileSync(row.baseline.textPath, 'utf8');
|
|
770
|
+
const substrateText = readFileSync(row.substrate.textPath, 'utf8');
|
|
771
|
+
const baselineScore = scoreOutput(baselineText, row.prompt);
|
|
772
|
+
const substrateScore = scoreOutput(substrateText, row.prompt);
|
|
773
|
+
const classified = classifyPair({
|
|
774
|
+
baseline: { ...row.baseline, score: baselineScore },
|
|
775
|
+
substrate: { ...row.substrate, score: substrateScore },
|
|
776
|
+
});
|
|
777
|
+
return {
|
|
778
|
+
...row,
|
|
779
|
+
baseline: { ...row.baseline, text: baselineText, score: baselineScore },
|
|
780
|
+
substrate: { ...row.substrate, text: substrateText, score: substrateScore },
|
|
781
|
+
...classified,
|
|
782
|
+
};
|
|
783
|
+
});
|
|
784
|
+
const stats = pairedStats(rows);
|
|
785
|
+
const summary = {
|
|
786
|
+
...input.summary,
|
|
787
|
+
status: stats.meanDelta > 0 && stats.wins > stats.losses ? 'substrate_positive_delta' : 'no_positive_delta',
|
|
788
|
+
generatedAt: new Date().toISOString(),
|
|
789
|
+
evidenceBoundary: `${input.summary.evidenceBoundary} Rescored with runtime-block outputs treated as hard quality failures.`,
|
|
790
|
+
stats,
|
|
791
|
+
};
|
|
792
|
+
const outputDir = path.dirname(metricsPath);
|
|
793
|
+
const rescoredPath = path.join(outputDir, 'rescored-metrics.json');
|
|
794
|
+
const rescoredSummaryPath = path.join(outputDir, 'rescored-summary.md');
|
|
795
|
+
writeFileSync(rescoredPath, JSON.stringify({ summary, rows }, null, 2) + '\n');
|
|
796
|
+
writeFileSync(rescoredSummaryPath, renderMarkdown(summary, rows));
|
|
797
|
+
const result = {
|
|
798
|
+
ok: true,
|
|
799
|
+
mode: 'rescored_existing_artifact',
|
|
800
|
+
rescoredPath,
|
|
801
|
+
rescoredSummaryPath,
|
|
802
|
+
summary,
|
|
803
|
+
};
|
|
804
|
+
writeFileSync(path.join(outputDir, 'rescore-report.json'), JSON.stringify(result, null, 2) + '\n');
|
|
805
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
async function main() {
|
|
809
|
+
const rescorePath = readFlagValue('--rescore');
|
|
810
|
+
if (rescorePath) {
|
|
811
|
+
rescoreExisting(rescorePath);
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
ensureDir(outputRoot);
|
|
815
|
+
ensureDir(outputsDir);
|
|
816
|
+
const count = Math.min(Number(process.env.ARIA_AB_PROMPT_COUNT || process.argv.find((arg) => arg.startsWith('--count='))?.split('=')[1] || 10), promptBank.length);
|
|
817
|
+
const concurrency = argNumber('concurrency', 4, { min: 1, max: 8 });
|
|
818
|
+
const prompts = promptBank.slice(0, count);
|
|
819
|
+
const explicit = prompts.filter((prompt) => promptInstructionRx.test(prompt));
|
|
820
|
+
if (explicit.length) throw new Error(`A/B prompts include explicit trigger wording: ${explicit.join(' | ')}`);
|
|
821
|
+
const provider = safeConfigSummary();
|
|
822
|
+
if (!provider.apiKeyPresent && providerRequiresApiKey(provider.provider)) {
|
|
823
|
+
throw new Error(`No API key available for provider ${provider.provider}`);
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
const jsonlPath = path.join(outputRoot, 'results.jsonl');
|
|
827
|
+
const rows = await mapConcurrent(prompts, concurrency, async (prompt, i) => {
|
|
828
|
+
const [baseline, substrate] = await Promise.all([
|
|
829
|
+
callBaselineProvider({ config: provider, prompt, index: i + 1 }),
|
|
830
|
+
callRuntimeArm({ config: provider, prompt, index: i + 1 }),
|
|
831
|
+
]);
|
|
832
|
+
const baselineScore = scoreOutput(baseline.text, prompt);
|
|
833
|
+
const substrateScore = scoreOutput(substrate.text, prompt);
|
|
834
|
+
const baselineTextPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-baseline.txt`);
|
|
835
|
+
const substrateTextPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-substrate.txt`);
|
|
836
|
+
const baselineRawPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-baseline.raw.json`);
|
|
837
|
+
const substrateRawPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-substrate.raw.json`);
|
|
838
|
+
const baselineRequestPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-baseline.request.json`);
|
|
839
|
+
const substrateRequestPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}-substrate.request.json`);
|
|
840
|
+
const inputPath = path.join(outputsDir, `${String(i + 1).padStart(3, '0')}.input.txt`);
|
|
841
|
+
const baselineRow = { ...baseline, rawText: undefined, requestBody: undefined, requestPath: baselineRequestPath, rawPath: baselineRawPath, textPath: baselineTextPath, score: baselineScore };
|
|
842
|
+
const substrateRow = { ...substrate, rawText: undefined, requestBody: undefined, requestPath: substrateRequestPath, rawPath: substrateRawPath, textPath: substrateTextPath, score: substrateScore };
|
|
843
|
+
const baseRow = {
|
|
844
|
+
index: i + 1,
|
|
845
|
+
prompt,
|
|
846
|
+
promptSha256: sha256(prompt),
|
|
847
|
+
inputPath,
|
|
848
|
+
explicitTriggerInstruction: false,
|
|
849
|
+
baseline: baselineRow,
|
|
850
|
+
substrate: substrateRow,
|
|
851
|
+
};
|
|
852
|
+
const row = { ...baseRow, ...classifyPair(baseRow) };
|
|
853
|
+
writeFileSync(row.baseline.textPath, baseline.text);
|
|
854
|
+
writeFileSync(row.substrate.textPath, substrate.text);
|
|
855
|
+
writeFileSync(row.baseline.rawPath, redactText(baseline.rawText || ''));
|
|
856
|
+
writeFileSync(row.substrate.rawPath, redactText(substrate.rawText || ''));
|
|
857
|
+
writeFileSync(row.inputPath, prompt);
|
|
858
|
+
writeJsonArtifact(row.baseline.requestPath, baseline.requestBody || {});
|
|
859
|
+
writeJsonArtifact(row.substrate.requestPath, substrate.requestBody || {});
|
|
860
|
+
writeFileSync(jsonlPath, JSON.stringify(redactRequest(row)) + '\n', { flag: 'a', mode: 0o644 });
|
|
861
|
+
process.stdout.write(JSON.stringify({
|
|
862
|
+
index: row.index,
|
|
863
|
+
winner: row.winner,
|
|
864
|
+
delta: Number(row.delta.toFixed(2)),
|
|
865
|
+
primaryDelta: Number(row.primaryDelta.toFixed(2)),
|
|
866
|
+
fineDelta: Number(row.fineDelta.toFixed(2)),
|
|
867
|
+
baselineScore: baselineScore.score,
|
|
868
|
+
substrateScore: substrateScore.score,
|
|
869
|
+
firstClassGap: row.firstClassGap,
|
|
870
|
+
baselineMs: baseline.durationMs,
|
|
871
|
+
substrateMs: substrate.durationMs,
|
|
872
|
+
}) + '\n');
|
|
873
|
+
return row;
|
|
874
|
+
});
|
|
875
|
+
rows.sort((a, b) => a.index - b.index);
|
|
876
|
+
|
|
877
|
+
const stats = pairedStats(rows);
|
|
878
|
+
const summary = {
|
|
879
|
+
schema: 'aria.quality_ab_live_provider.v1',
|
|
880
|
+
runId,
|
|
881
|
+
generatedAt: new Date().toISOString(),
|
|
882
|
+
status: stats.meanDelta > 0 && stats.wins > stats.losses ? 'substrate_positive_delta' : 'no_positive_delta',
|
|
883
|
+
evidenceBoundary: 'Live provider outputs were measured with a deterministic rubric. This is a bounded first-pass A/B, not a statistically powered benchmark.',
|
|
884
|
+
provider: { provider: provider.provider, model: provider.model, baseUrlPresent: Boolean(provider.baseUrl), apiKeyPresent: provider.apiKeyPresent },
|
|
885
|
+
totalPrompts: rows.length,
|
|
886
|
+
concurrency,
|
|
887
|
+
promptInstructionViolations: 0,
|
|
888
|
+
stats,
|
|
889
|
+
outputRoot,
|
|
890
|
+
};
|
|
891
|
+
const artifacts = {
|
|
892
|
+
outputRoot,
|
|
893
|
+
metricsPath: path.join(outputRoot, 'metrics.json'),
|
|
894
|
+
jsonlPath,
|
|
895
|
+
summaryPath: path.join(outputRoot, 'summary.md'),
|
|
896
|
+
htmlPath: path.join(outputRoot, 'report.html'),
|
|
897
|
+
svgPath: null,
|
|
898
|
+
evidenceManifestPath: path.join(outputRoot, 'evidence-manifest.json'),
|
|
899
|
+
};
|
|
900
|
+
artifacts.svgPath = writeSvg(summary);
|
|
901
|
+
writeJsonArtifact(artifacts.metricsPath, { summary, rows });
|
|
902
|
+
writeFileSync(artifacts.summaryPath, renderMarkdown(summary, rows));
|
|
903
|
+
writeFileSync(artifacts.htmlPath, renderHtml(summary, rows));
|
|
904
|
+
writeJsonArtifact(artifacts.evidenceManifestPath, buildEvidenceManifest({ summary, rows, artifacts }));
|
|
905
|
+
const finalReport = { ok: true, summary, artifacts };
|
|
906
|
+
writeJsonArtifact(path.join(outputRoot, 'run-report.json'), finalReport);
|
|
907
|
+
process.stdout.write(JSON.stringify(finalReport, null, 2) + '\n');
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
main().catch((error) => {
|
|
911
|
+
console.error(error instanceof Error ? error.stack : String(error));
|
|
912
|
+
process.exit(1);
|
|
913
|
+
});
|