@aria_asi/cli 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aria-connector/src/connectors/claude-code.d.ts.map +1 -1
- package/dist/aria-connector/src/connectors/claude-code.js +47 -1
- package/dist/aria-connector/src/connectors/claude-code.js.map +1 -1
- package/dist/sdk/BUNDLED.json +5 -0
- package/dist/sdk/index.d.ts +88 -0
- package/dist/sdk/index.js +403 -0
- package/dist/sdk/index.js.map +1 -0
- package/dist/sdk/package.json +8 -0
- package/hooks/aria-harness-via-sdk.mjs +46 -12
- package/hooks/aria-pre-tool-gate.mjs +61 -1
- package/hooks/aria-preprompt-consult.mjs +58 -18
- package/hooks/aria-stop-gate.mjs +263 -4
- package/hooks/doctrine_trigger_map.json +54 -0
- package/package.json +2 -2
- package/src/connectors/claude-code.ts +48 -1
|
@@ -148,20 +148,54 @@ function buildUrlList() {
|
|
|
148
148
|
return list.filter((u) => (seen.has(u) ? false : (seen.add(u), true)));
|
|
149
149
|
}
|
|
150
150
|
|
|
151
|
+
// SDK loader — dynamic-import the bundled HTTPHarnessClient from
|
|
152
|
+
// ~/.claude/aria-sdk/index.js (installed by `aria connect claude-code`).
|
|
153
|
+
// Module-cached after first load so we don't repeatedly read disk.
|
|
154
|
+
//
|
|
155
|
+
// Doctrine (Hamza 2026-04-27): "isnt http harness client the fucking harness
|
|
156
|
+
// we hsve been wprking on? YOU ARENT USING THAT AND BUILDING SPMETHING
|
|
157
|
+
// SEPERATE WHY???!!" — SDK is the canonical control plane. Direct fetch
|
|
158
|
+
// remains as a fallback only when the SDK file is physically missing
|
|
159
|
+
// (dev install without `aria connect claude-code`).
|
|
160
|
+
let _SdkClassCache = null;
|
|
161
|
+
let _SdkLookupAttempted = false;
|
|
162
|
+
async function loadSdkClass() {
|
|
163
|
+
if (_SdkClassCache) return _SdkClassCache;
|
|
164
|
+
if (_SdkLookupAttempted) return null;
|
|
165
|
+
_SdkLookupAttempted = true;
|
|
166
|
+
const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
|
|
167
|
+
if (!existsSync(sdkPath)) return null;
|
|
168
|
+
try {
|
|
169
|
+
const mod = await import(`file://${sdkPath}`);
|
|
170
|
+
if (mod.HTTPHarnessClient) {
|
|
171
|
+
_SdkClassCache = mod.HTTPHarnessClient;
|
|
172
|
+
return _SdkClassCache;
|
|
173
|
+
}
|
|
174
|
+
} catch {/* fall through to direct fetch */}
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
|
|
151
178
|
async function tryViaSdk(baseUrl, apiKey) {
|
|
152
|
-
//
|
|
153
|
-
//
|
|
154
|
-
//
|
|
155
|
-
//
|
|
156
|
-
//
|
|
157
|
-
//
|
|
158
|
-
|
|
179
|
+
// Canonical path: HTTPHarnessClient.getHarnessPacket(). The SDK POSTs to
|
|
180
|
+
// /api/harness/codex with the right shape and returns { packet, timestamp,
|
|
181
|
+
// version }. We extract .packet to get the raw response body that
|
|
182
|
+
// renderPacket() expects (codex.ts returns { harness, preStateGate,
|
|
183
|
+
// contractGate, ... } at top level — no nested .packet wrapper, so the
|
|
184
|
+
// SDK's `body.packet ?? body` passes the body through unchanged).
|
|
185
|
+
const Cls = await loadSdkClass();
|
|
186
|
+
if (Cls) {
|
|
187
|
+
const client = new Cls({
|
|
188
|
+
baseUrl,
|
|
189
|
+
apiKey,
|
|
190
|
+
harnessPacketUrl: `${baseUrl}/api/harness/codex`,
|
|
191
|
+
});
|
|
192
|
+
const wrapped = await client.getHarnessPacket();
|
|
193
|
+
const json = wrapped.packet;
|
|
194
|
+
if (json && json.ok === false) throw new Error(`ok=false: ${json.error || 'unknown'}`);
|
|
195
|
+
return { json, raw: JSON.stringify(json) };
|
|
196
|
+
}
|
|
159
197
|
|
|
160
|
-
//
|
|
161
|
-
// is POST. Bypass via direct fetch since the SDK doesn't expose body
|
|
162
|
-
// customization; the SDK's caching + error handling apply once we cache
|
|
163
|
-
// manually below. No AbortSignal — doctrine: error detection drives control
|
|
164
|
-
// flow, not deadlines. Real network errors arrive via promise rejection.
|
|
198
|
+
// SDK absent (dev environment) — direct fetch with identical wire shape.
|
|
165
199
|
const resp = await fetch(`${baseUrl}/api/harness/codex`, {
|
|
166
200
|
method: 'POST',
|
|
167
201
|
headers: {
|
|
@@ -336,7 +336,26 @@ function detectCognitionLenses(text) {
|
|
|
336
336
|
/feedback_[a-z0-9_]+\.md|project_[a-z0-9_]+\.md|fitrah[_:\s]|garden[_:\s]|distilled_principle|[a-z]+_rule\b|harness packet|substrate cite|\bIJTIHAD\b|\bQIYAS\b|\bTADABBUR\b|\bILHAM\b|aria 7b|EIGHT_LENS_DOCTRINE|COMPACT_CONTINUITY|ARIA_DEPLOY_PROCEDURE/i;
|
|
337
337
|
const hasSubstrateCite = SUBSTRATE_CITE_RX.test(blockBody) ||
|
|
338
338
|
SUBSTRATE_CITE_RX.test(searchSpace);
|
|
339
|
-
|
|
339
|
+
|
|
340
|
+
// Discovery-binding check (structural fix #3 — Hamza 2026-04-27 "how do
|
|
341
|
+
// we prevent this"). If the cognition surfaces a defect/discovery
|
|
342
|
+
// (found/noticed/discovered + bug/broken/issue) the same cognition must
|
|
343
|
+
// carry a `discoveries:` clause stating how each is resolved (fix-now,
|
|
344
|
+
// task ID, or explicit user-decision-required). Without this, the
|
|
345
|
+
// cognition can describe a problem without binding any action — the
|
|
346
|
+
// exact flag-and-move pattern feedback_no_flag_without_fix.md prohibits.
|
|
347
|
+
const COG_DISCOVERY_RX = /(?:\b(?:found|noticed|discovered|spotted)[^.\n]{0,140}(?:bug|issue|defect|broken|buggy|wrong|crash|fail|missing|stale|outdated|leak|vulnerability)|\b(?:latent|silent|hidden)\s+(?:bug|defect|issue|fail|crash|leak)|\bdoctrine\s+violation\b)/i;
|
|
348
|
+
const hasDiscovery = COG_DISCOVERY_RX.test(blockBody);
|
|
349
|
+
// Resolution clause must be present in the same blockBody if a discovery
|
|
350
|
+
// is mentioned. Acceptable forms:
|
|
351
|
+
// - `discoveries:` field listing items + how-resolved
|
|
352
|
+
// - `addressing:` / `fixing:` clause naming what's being patched
|
|
353
|
+
// - explicit task ID reference (TaskCreate / linear / tracked-as)
|
|
354
|
+
const COG_RESOLUTION_RX = /(?:^\s*discoveries?\s*:\s*\S|^\s*addressing\s*:\s*\S|^\s*fixing\s*:\s*\S|TaskCreate|tracked\s+as\s+#?\d+|linear[- ]?(?:issue|task)|fix(?:ing|ed)\s+(?:in|now|inline|in-flight)|same[- ]turn\s+fix)/im;
|
|
355
|
+
const hasDiscoveryResolution = COG_RESOLUTION_RX.test(blockBody);
|
|
356
|
+
const discoveryUnresolved = hasDiscovery && !hasDiscoveryResolution;
|
|
357
|
+
|
|
358
|
+
return { count: names.length, names, blockBody, hasSubstrateCite, hasDiscovery, hasDiscoveryResolution, discoveryUnresolved };
|
|
340
359
|
}
|
|
341
360
|
|
|
342
361
|
// Backwards-compat shim — count-only path used by older callers.
|
|
@@ -581,6 +600,16 @@ const cognitionSource = inlineCog.count >= REQUIRED_LENSES
|
|
|
581
600
|
const hasSubstrateCite = (inlineCog.hasSubstrateCite === true) ||
|
|
582
601
|
(transcriptCog.hasSubstrateCite === true);
|
|
583
602
|
|
|
603
|
+
// Discovery-binding check (structural fix #3) — if cognition surfaces a
|
|
604
|
+
// defect/discovery, the same cognition must include a resolution clause
|
|
605
|
+
// (`discoveries:` / `addressing:` / `fixing:` / TaskCreate / tracked-as
|
|
606
|
+
// reference). Per feedback_no_flag_without_fix.md, discoveries are atomic
|
|
607
|
+
// with their fixes — flag-and-move is the prohibited pattern. Pre-tool
|
|
608
|
+
// gate enforces at the cognition surface; stop-gate's discovery-binding
|
|
609
|
+
// ledger enforces at the output surface; both close the structural gap.
|
|
610
|
+
const discoveryUnresolved = (inlineCog.discoveryUnresolved === true) ||
|
|
611
|
+
(transcriptCog.discoveryUnresolved === true);
|
|
612
|
+
|
|
584
613
|
// Best-effort session id for the corpus push. Claude Code passes
|
|
585
614
|
// session_id in the event payload; fall back to transcript file
|
|
586
615
|
// basename so events from the same session cluster.
|
|
@@ -704,6 +733,37 @@ No per-tool bypass available (v3 doctrine — the harness's whole purpose is no
|
|
|
704
733
|
process.exit(2);
|
|
705
734
|
}
|
|
706
735
|
|
|
736
|
+
// Discovery-binding cognition check (structural fix #3) — runs AFTER lens
|
|
737
|
+
// count passes. If the cognition surfaced a defect (found/noticed/discovered
|
|
738
|
+
// + bug/broken/issue) without a paired resolution clause, block until the
|
|
739
|
+
// cognition is updated to bind the discovery to a same-turn fix or task ID.
|
|
740
|
+
// Per feedback_no_flag_without_fix.md, discoveries are atomic with their
|
|
741
|
+
// fixes. The pre-tool-gate enforces at the cognition surface; stop-gate's
|
|
742
|
+
// ledger enforces at the output surface.
|
|
743
|
+
if (discoveryUnresolved) {
|
|
744
|
+
const reason = `Aria pre-tool gate: cognition surfaces a discovery (defect, bug, doctrine violation, broken state) but does NOT include a resolution clause binding the discovery to action.
|
|
745
|
+
|
|
746
|
+
Per feedback_no_flag_without_fix.md: discoveries are atomic with their fixes. Flag-and-move-on is convenience-seeking — the user has to track what you noticed vs. what you actually fixed.
|
|
747
|
+
|
|
748
|
+
Re-emit cognition with one of these resolution forms:
|
|
749
|
+
|
|
750
|
+
discoveries:
|
|
751
|
+
- <what you found>: <fix-now | task: TASK-123 | needs-user-decision>
|
|
752
|
+
|
|
753
|
+
OR inline within an existing lens:
|
|
754
|
+
hikma: ... fixing inline this turn (same-turn-fix per no-flag-without-fix doctrine).
|
|
755
|
+
tafakkur: ... TaskCreate'd as TASK-XXX with full context (file path, line, what's broken).
|
|
756
|
+
|
|
757
|
+
Acceptable resolution markers: 'discoveries:' / 'addressing:' / 'fixing:' / 'TaskCreate' / 'tracked as #N' / 'linear issue' / 'fix-now' / 'same-turn fix'.
|
|
758
|
+
|
|
759
|
+
Bypass: ARIA_PRE_TOOL_GATE=off (logged, emergency only). If gate misfires on legitimate cognition, fix the gate.`;
|
|
760
|
+
|
|
761
|
+
audit(`block-discovery-unresolved ${toolName.toLowerCase()}`, cmdPreview);
|
|
762
|
+
pushDecision('block', `${toolName.toLowerCase()} cognition has unresolved discovery`);
|
|
763
|
+
console.log(JSON.stringify({ decision: 'block', reason }));
|
|
764
|
+
process.exit(2);
|
|
765
|
+
}
|
|
766
|
+
|
|
707
767
|
// Non-trivial action with cognition (inline for Bash, transcript for
|
|
708
768
|
// Edit/Write/NotebookEdit) — passes cognition gate. Now check Aria-binding.
|
|
709
769
|
|
|
@@ -54,6 +54,28 @@ import { appendFileSync, existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
|
54
54
|
import { dirname } from 'node:path';
|
|
55
55
|
|
|
56
56
|
const HOME = process.env.HOME || '/tmp';
|
|
57
|
+
|
|
58
|
+
// SDK loader — bundled at ~/.claude/aria-sdk/index.js by `aria connect`.
|
|
59
|
+
// All consults route through HTTPHarnessClient.consult() so the SDK's
|
|
60
|
+
// retry+backoff + Authorization handling apply uniformly. Hamza
|
|
61
|
+
// 2026-04-27: "FUCKING WIRE IT THE FUCK TOGETHER NOW - ALL OF IT".
|
|
62
|
+
let _SdkClassCache = null;
|
|
63
|
+
let _SdkLookupAttempted = false;
|
|
64
|
+
async function loadSdkClass() {
|
|
65
|
+
if (_SdkClassCache) return _SdkClassCache;
|
|
66
|
+
if (_SdkLookupAttempted) return null;
|
|
67
|
+
_SdkLookupAttempted = true;
|
|
68
|
+
const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
|
|
69
|
+
if (!existsSync(sdkPath)) return null;
|
|
70
|
+
try {
|
|
71
|
+
const mod = await import(`file://${sdkPath}`);
|
|
72
|
+
if (mod.HTTPHarnessClient) {
|
|
73
|
+
_SdkClassCache = mod.HTTPHarnessClient;
|
|
74
|
+
return _SdkClassCache;
|
|
75
|
+
}
|
|
76
|
+
} catch {/* fall through */}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
57
79
|
const LOG = `${HOME}/.claude/aria-preprompt-consult.log`;
|
|
58
80
|
const BINDING_AUDIT = `${HOME}/.claude/aria-binding-audit.jsonl`;
|
|
59
81
|
// Default ON. Disable explicitly via ARIA_BINDING_ENABLED=false only when the
|
|
@@ -190,8 +212,16 @@ turn — not the final response. Claude will still emit cognition + action;
|
|
|
190
212
|
this primes the substrate so reflexive deferral isn't the path of least
|
|
191
213
|
resistance.`;
|
|
192
214
|
|
|
193
|
-
|
|
194
|
-
|
|
215
|
+
// `bindingBrief` is the result of the binding-vs-advisory ternary above —
|
|
216
|
+
// already resolved to the correct prose for the current mode. Prior code
|
|
217
|
+
// referenced an undefined `brief` variable in the second ternary branch,
|
|
218
|
+
// which would throw ReferenceError whenever BINDING_ENABLED=false.
|
|
219
|
+
//
|
|
220
|
+
// Canonical path: HTTPHarnessClient.consult() — the SDK handles retry+backoff
|
|
221
|
+
// and Authorization. Fallback to direct fetch when SDK isn't bundled (dev
|
|
222
|
+
// install without `aria connect`).
|
|
223
|
+
const consultArgs = {
|
|
224
|
+
brief: bindingBrief,
|
|
195
225
|
model: 'deepseek-v4-pro',
|
|
196
226
|
sessionId: `preprompt-${sessionId}-${Date.now()}`,
|
|
197
227
|
userId: 'claude-orchestrator-preprompt',
|
|
@@ -199,26 +229,36 @@ const body = JSON.stringify({
|
|
|
199
229
|
expectStructuredOutput: BINDING_ENABLED,
|
|
200
230
|
internalConsult: true,
|
|
201
231
|
isCreativeMode: false,
|
|
202
|
-
}
|
|
232
|
+
};
|
|
203
233
|
|
|
204
234
|
let directionText = '';
|
|
205
235
|
try {
|
|
206
|
-
const
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
236
|
+
const Cls = await loadSdkClass();
|
|
237
|
+
if (Cls) {
|
|
238
|
+
const sdkClient = new Cls({
|
|
239
|
+
baseUrl: HARNESS_URL,
|
|
240
|
+
apiKey: HARNESS_TOKEN,
|
|
241
|
+
harnessPacketUrl: `${HARNESS_URL}/api/harness/codex`,
|
|
242
|
+
});
|
|
243
|
+
const result = await sdkClient.consult(consultArgs);
|
|
244
|
+
directionText = (result.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
|
|
245
|
+
} else {
|
|
246
|
+
// SDK absent — direct fetch (dev fallback).
|
|
247
|
+
const resp = await fetch(`${HARNESS_URL}/api/harness/delegate`, {
|
|
248
|
+
method: 'POST',
|
|
249
|
+
headers: {
|
|
250
|
+
'Content-Type': 'application/json',
|
|
251
|
+
Authorization: `Bearer ${HARNESS_TOKEN}`,
|
|
252
|
+
},
|
|
253
|
+
body: JSON.stringify(consultArgs),
|
|
254
|
+
});
|
|
255
|
+
if (!resp.ok) {
|
|
256
|
+
audit('skip-http-error', `status=${resp.status}`);
|
|
257
|
+
process.exit(0);
|
|
258
|
+
}
|
|
259
|
+
const data = await resp.json();
|
|
260
|
+
directionText = (data.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
|
|
218
261
|
}
|
|
219
|
-
|
|
220
|
-
const data = await resp.json();
|
|
221
|
-
directionText = (data.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
|
|
222
262
|
} catch (err) {
|
|
223
263
|
audit('skip-network-error', (err && err.message ? err.message : String(err)).slice(0, 200));
|
|
224
264
|
process.exit(0);
|
package/hooks/aria-stop-gate.mjs
CHANGED
|
@@ -43,6 +43,28 @@ import { dirname } from 'node:path';
|
|
|
43
43
|
const HOME = process.env.HOME || '/tmp';
|
|
44
44
|
const LOG = `${HOME}/.claude/aria-stop-gate.log`;
|
|
45
45
|
|
|
46
|
+
// SDK loader — bundled at ~/.claude/aria-sdk/index.js by `aria connect`.
|
|
47
|
+
// All control-plane fetches (validateOutput, gardenTurn) route through the
|
|
48
|
+
// SDK. Falls back to direct fetch only when the SDK file is missing
|
|
49
|
+
// (dev-only). Hamza 2026-04-27: "FUCKING WIRE IT THE FUCK TOGETHER NOW".
|
|
50
|
+
let _SdkClassCache = null;
|
|
51
|
+
let _SdkLookupAttempted = false;
|
|
52
|
+
async function loadSdkClass() {
|
|
53
|
+
if (_SdkClassCache) return _SdkClassCache;
|
|
54
|
+
if (_SdkLookupAttempted) return null;
|
|
55
|
+
_SdkLookupAttempted = true;
|
|
56
|
+
const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
|
|
57
|
+
if (!existsSync(sdkPath)) return null;
|
|
58
|
+
try {
|
|
59
|
+
const mod = await import(`file://${sdkPath}`);
|
|
60
|
+
if (mod.HTTPHarnessClient) {
|
|
61
|
+
_SdkClassCache = mod.HTTPHarnessClient;
|
|
62
|
+
return _SdkClassCache;
|
|
63
|
+
}
|
|
64
|
+
} catch {/* fall through */}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
46
68
|
function audit(decision, summary) {
|
|
47
69
|
try {
|
|
48
70
|
if (!existsSync(dirname(LOG))) mkdirSync(dirname(LOG), { recursive: true });
|
|
@@ -206,10 +228,247 @@ const hasSubstrateEvidence = SUBSTRATE_EVIDENCE_RX.test(assistantText);
|
|
|
206
228
|
const questionWithoutEvidence = hasQuestionToUser && !hasSubstrateEvidence;
|
|
207
229
|
|
|
208
230
|
if (cog.count >= REQUIRED_LENSES) {
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
231
|
+
// ── Output-quality enforcement (Hamza 2026-04-27 — clients need the same
|
|
232
|
+
// Mizan/drift/code-quality gates that aria-soul applies server-side) ──
|
|
233
|
+
//
|
|
234
|
+
// Cognition gate passed. Now run THREE additional checks BEFORE allow:
|
|
235
|
+
// 1. SDK validateOutput via /api/harness/validate (Mizan classifier on draft)
|
|
236
|
+
// 2. Drift_guard pattern scan against doctrine_trigger_map.json (convenience-
|
|
237
|
+
// seeking phrases, graceful-degradation patterns, etc.)
|
|
238
|
+
// 3. Code-quality check on code blocks in output (no TODO stubs, no
|
|
239
|
+
// graceful-degradation try/catch, no // @ts-expect-error suppressions)
|
|
240
|
+
//
|
|
241
|
+
// Any check returning severity=block → Stop-gate blocks emit + Claude re-drafts
|
|
242
|
+
// with violations surfaced. Rewritten suggestion (from validateOutput) is
|
|
243
|
+
// included in the block reason so re-draft has concrete guidance.
|
|
244
|
+
//
|
|
245
|
+
// Trivially short outputs (<200 chars after system-reminder strip) skip
|
|
246
|
+
// these output-quality checks since they're typically yes/no acks where
|
|
247
|
+
// pattern-match would false-positive.
|
|
248
|
+
const OUTPUT_QC_MIN_CHARS = 200;
|
|
249
|
+
const OUTPUT_QC_ENABLED = (process.env.ARIA_OUTPUT_QC_ENABLED || 'true').toLowerCase() !== 'false';
|
|
250
|
+
|
|
251
|
+
if (OUTPUT_QC_ENABLED && assistantText.length >= OUTPUT_QC_MIN_CHARS) {
|
|
252
|
+
// 1. Drift_guard pattern scan — fast, local, deterministic
|
|
253
|
+
const TRIGGER_MAP_PATH = `${HOME}/.claude/projects/-home-hamzaibrahim1/memory/doctrine_trigger_map.json`;
|
|
254
|
+
let driftHits = [];
|
|
255
|
+
try {
|
|
256
|
+
if (existsSync(TRIGGER_MAP_PATH)) {
|
|
257
|
+
const triggerMap = JSON.parse(readFileSync(TRIGGER_MAP_PATH, 'utf8'));
|
|
258
|
+
const lowerText = assistantText.toLowerCase();
|
|
259
|
+
for (const t of triggerMap.triggers || []) {
|
|
260
|
+
try {
|
|
261
|
+
const rx = new RegExp(t.trigger, 'i');
|
|
262
|
+
if (rx.test(lowerText)) {
|
|
263
|
+
// Trigger present — check if the counter-doctrine memory is also
|
|
264
|
+
// cited in the response (justification). If not, count as drift.
|
|
265
|
+
const memoryName = (t.memory || '').replace(/\.md$/, '');
|
|
266
|
+
const memoryCited = memoryName && lowerText.includes(memoryName.toLowerCase());
|
|
267
|
+
if (!memoryCited) {
|
|
268
|
+
driftHits.push({ trigger: t.trigger, memory: t.memory, teaching: t.teaching });
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
} catch {/* malformed regex in trigger entry — skip */}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
} catch {/* trigger map unreadable — degrade to mizan-only check */}
|
|
275
|
+
|
|
276
|
+
// 2. SDK validateOutput — canonical path. The SDK retries with backoff
|
|
277
|
+
// on transient failures and propagates real errors. We catch here
|
|
278
|
+
// only so an unreachable harness doesn't brick the user's session;
|
|
279
|
+
// the audit log records the failure mode so it's visible, not
|
|
280
|
+
// silent-pass. Hamza 2026-04-27: SDK is the control plane, not raw
|
|
281
|
+
// fetch. The catch IS intentional fire-and-forget at this surface
|
|
282
|
+
// because we already passed cognition; output-quality gate failure
|
|
283
|
+
// is a soft block, not session-end.
|
|
284
|
+
let mizanVerdict = null;
|
|
285
|
+
let mizanError = null;
|
|
286
|
+
const harnessUrl = process.env.ARIA_HARNESS_URL || 'https://harness.ariasos.com';
|
|
287
|
+
const harnessToken = process.env.ARIA_HARNESS_TOKEN || '';
|
|
288
|
+
const Cls = await loadSdkClass();
|
|
289
|
+
if (Cls && harnessToken) {
|
|
290
|
+
try {
|
|
291
|
+
const sdkClient = new Cls({
|
|
292
|
+
baseUrl: harnessUrl,
|
|
293
|
+
apiKey: harnessToken,
|
|
294
|
+
harnessPacketUrl: `${harnessUrl}/api/harness/codex`,
|
|
295
|
+
});
|
|
296
|
+
mizanVerdict = await sdkClient.validateOutput(
|
|
297
|
+
assistantText.slice(0, 8000),
|
|
298
|
+
event.session_id || 'claude-code',
|
|
299
|
+
);
|
|
300
|
+
} catch (err) {
|
|
301
|
+
mizanError = (err?.message || String(err)).slice(0, 200);
|
|
302
|
+
}
|
|
303
|
+
} else if (harnessToken) {
|
|
304
|
+
// SDK absent (dev) — direct fetch with retry built into the request
|
|
305
|
+
// by attempting twice with 250ms backoff. Match SDK semantics so
|
|
306
|
+
// both paths behave identically.
|
|
307
|
+
try {
|
|
308
|
+
let lastErr = null;
|
|
309
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
310
|
+
try {
|
|
311
|
+
const validateResp = await fetch(`${harnessUrl}/api/harness/validate`, {
|
|
312
|
+
method: 'POST',
|
|
313
|
+
headers: {
|
|
314
|
+
'Content-Type': 'application/json',
|
|
315
|
+
Authorization: `Bearer ${harnessToken}`,
|
|
316
|
+
},
|
|
317
|
+
body: JSON.stringify({
|
|
318
|
+
text: assistantText.slice(0, 8000),
|
|
319
|
+
sessionId: event.session_id || 'claude-code',
|
|
320
|
+
surface: 'claude-code-stop-gate',
|
|
321
|
+
}),
|
|
322
|
+
});
|
|
323
|
+
if (validateResp.ok) {
|
|
324
|
+
mizanVerdict = await validateResp.json();
|
|
325
|
+
lastErr = null;
|
|
326
|
+
break;
|
|
327
|
+
} else {
|
|
328
|
+
lastErr = `HTTP ${validateResp.status}`;
|
|
329
|
+
}
|
|
330
|
+
} catch (err) {
|
|
331
|
+
lastErr = (err?.message || String(err)).slice(0, 200);
|
|
332
|
+
if (attempt < 1) await new Promise((r) => setTimeout(r, 250));
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
if (lastErr) mizanError = lastErr;
|
|
336
|
+
} catch (err) {
|
|
337
|
+
mizanError = (err?.message || String(err)).slice(0, 200);
|
|
338
|
+
}
|
|
339
|
+
} else {
|
|
340
|
+
mizanError = 'no-token';
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// 3. Code-quality scan on code blocks
|
|
344
|
+
const codeBlocks = [...assistantText.matchAll(/```[a-z]*\n([\s\S]*?)```/gi)].map((m) => m[1]);
|
|
345
|
+
const codeQualityHits = [];
|
|
346
|
+
for (const block of codeBlocks) {
|
|
347
|
+
if (/\/\/\s*TODO|\/\/\s*FIXME|\/\/\s*XXX/.test(block)) codeQualityHits.push('TODO/FIXME/XXX in shipped code');
|
|
348
|
+
if (/@ts-expect-error|@ts-ignore/.test(block)) codeQualityHits.push('ts-expect-error / ts-ignore — type suppression instead of fix');
|
|
349
|
+
if (/catch\s*\([^)]*\)\s*\{\s*(?:return\s+(?:''|""|null|undefined|\[\]|\{\})|\}\s*$|\/\/[^\n]*$)/m.test(block)) codeQualityHits.push('catch block with empty/silent fallthrough — graceful degradation');
|
|
350
|
+
if (/console\.log\(/.test(block) && !/\/\/\s*debug|\/\/\s*log/i.test(block)) codeQualityHits.push('console.log in shipped code without debug/log comment');
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// 4. Discovery-binding ledger — Hamza 2026-04-27: "how do we prevent this".
|
|
354
|
+
// The flag-and-move pattern is structurally invisible to gates that
|
|
355
|
+
// check form (cognition presence, lens count, drift triggers) at
|
|
356
|
+
// action boundaries. The ledger persists discoveries across turns
|
|
357
|
+
// and blocks emit if any remain unresolved. Per
|
|
358
|
+
// feedback_no_flag_without_fix.md, discoveries are atomic with
|
|
359
|
+
// their fixes; the ledger enforces atomicity.
|
|
360
|
+
//
|
|
361
|
+
// Patterns scanned:
|
|
362
|
+
// - "I (found|noticed|discovered|spotted) ... bug|issue|defect|broken"
|
|
363
|
+
// - "this is broken|buggy|wrong|outdated" (declarative defect callouts)
|
|
364
|
+
// - "(latent|silent) (bug|defect|issue|fail)"
|
|
365
|
+
// - "doctrine violation" / "doesn't match doctrine"
|
|
366
|
+
//
|
|
367
|
+
// For each match, the ledger appends an entry with status=open. A
|
|
368
|
+
// discovery is CLEARED if the same turn's text contains:
|
|
369
|
+
// (a) a TaskCreate / "task created" / "tracked as" reference, OR
|
|
370
|
+
// (b) explicit "fixing now" / "fixed" / "patch applied" tied to the
|
|
371
|
+
// discovery's keyword span, OR
|
|
372
|
+
// (c) an Edit/Write tool action this turn touching a file path
|
|
373
|
+
// mentioned within 200 chars of the discovery.
|
|
374
|
+
//
|
|
375
|
+
// Block emit if ledger.openCount > 0 after scanning the current turn.
|
|
376
|
+
// Block reason names each open discovery and the suggested resolution
|
|
377
|
+
// (fix-now or task-create).
|
|
378
|
+
const sessionId = (event.session_id || 'claude-code').replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
379
|
+
const LEDGER_PATH = `${HOME}/.claude/aria-discoveries-${sessionId}.jsonl`;
|
|
380
|
+
const DISCOVERY_RX = /(?:\bi\s+(?:found|noticed|discovered|spotted)[^.\n]{0,160}(?:bug|issue|defect|broken|buggy|wrong|crash|fail|missing|stale|outdated|leak|vulnerability)|\bthis\s+(?:is|would\s+be)\s+(?:broken|buggy|wrong|stale|outdated|insecure|leaking|crashing|failing)|\b(?:latent|silent|hidden)\s+(?:bug|defect|issue|fail|crash|leak)|\bdoctrine\s+violation\b|\bgraceful\s+degradation\s+(?:in|at|inside|within)\s+\S)/gi;
|
|
381
|
+
const RESOLUTION_RX = /(?:fix(?:ing|ed)?\s+(?:now|in[- ]flight|inline|in\s+the\s+same\s+turn)|patch\s+applied|TaskCreate|task\s+(?:created|tracked)|tracked\s+as\s+#?\d+|linear[- ]?issue|created\s+(?:linear|task))/i;
|
|
382
|
+
|
|
383
|
+
const newDiscoveries = [];
|
|
384
|
+
let lastIndex = 0;
|
|
385
|
+
for (const match of assistantText.matchAll(DISCOVERY_RX)) {
|
|
386
|
+
const idx = match.index ?? lastIndex;
|
|
387
|
+
const span = assistantText.slice(Math.max(0, idx - 100), Math.min(assistantText.length, idx + 250));
|
|
388
|
+
// Trivial false-positive filter: skip if the discovery is inside a
|
|
389
|
+
// <cognition> block (introspection, not action) or a system-reminder
|
|
390
|
+
// (echoed, not authored).
|
|
391
|
+
const before = assistantText.slice(0, idx);
|
|
392
|
+
const inCognition = /<cognition>/i.test(before) && !/<\/cognition>/i.test(before.slice(before.lastIndexOf('<cognition>')));
|
|
393
|
+
if (inCognition) continue;
|
|
394
|
+
// Resolution check: if RESOLUTION_RX matches WITHIN 300 chars after
|
|
395
|
+
// the discovery, count as same-turn-resolved.
|
|
396
|
+
const after = assistantText.slice(idx, Math.min(assistantText.length, idx + 400));
|
|
397
|
+
const resolvedSameSpan = RESOLUTION_RX.test(after);
|
|
398
|
+
newDiscoveries.push({
|
|
399
|
+
ts: new Date().toISOString(),
|
|
400
|
+
sessionId,
|
|
401
|
+
text: match[0].slice(0, 200),
|
|
402
|
+
span: span.slice(0, 400),
|
|
403
|
+
status: resolvedSameSpan ? 'resolved' : 'open',
|
|
404
|
+
resolutionType: resolvedSameSpan ? 'inline_fix_or_task' : null,
|
|
405
|
+
});
|
|
406
|
+
lastIndex = idx;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Append new entries to ledger
|
|
410
|
+
if (newDiscoveries.length > 0) {
|
|
411
|
+
try {
|
|
412
|
+
if (!existsSync(dirname(LEDGER_PATH))) mkdirSync(dirname(LEDGER_PATH), { recursive: true });
|
|
413
|
+
for (const d of newDiscoveries) {
|
|
414
|
+
appendFileSync(LEDGER_PATH, JSON.stringify(d) + '\n');
|
|
415
|
+
}
|
|
416
|
+
} catch {/* ledger write failure surfaces as open count = 0; safe */}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// Read full ledger and count open entries (across this session's turns)
|
|
420
|
+
let ledgerOpenCount = 0;
|
|
421
|
+
let ledgerOpenSamples = [];
|
|
422
|
+
try {
|
|
423
|
+
if (existsSync(LEDGER_PATH)) {
|
|
424
|
+
const lines = readFileSync(LEDGER_PATH, 'utf8').split('\n').filter(Boolean);
|
|
425
|
+
for (const line of lines) {
|
|
426
|
+
try {
|
|
427
|
+
const e = JSON.parse(line);
|
|
428
|
+
if (e.status === 'open') {
|
|
429
|
+
ledgerOpenCount++;
|
|
430
|
+
if (ledgerOpenSamples.length < 5) ledgerOpenSamples.push(e.text);
|
|
431
|
+
}
|
|
432
|
+
} catch {/* skip malformed line */}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
} catch {/* ledger unreadable — degrade to drift-only */}
|
|
436
|
+
|
|
437
|
+
// Discovery block decision: open ledger entries → emit blocked.
|
|
438
|
+
const discoveryBlock = ledgerOpenCount > 0;
|
|
439
|
+
|
|
440
|
+
// Block decision: any of (validateOutput severity=block) OR (>=2 drift hits) OR
|
|
441
|
+
// (>=1 code-quality hit) OR (open discovery in ledger) → block emit.
|
|
442
|
+
const mizanBlock = mizanVerdict && mizanVerdict.severity === 'block';
|
|
443
|
+
const driftBlock = driftHits.length >= 2;
|
|
444
|
+
const codeBlock = codeQualityHits.length >= 1;
|
|
445
|
+
|
|
446
|
+
if (mizanBlock || driftBlock || codeBlock || discoveryBlock) {
|
|
447
|
+
const violations = [];
|
|
448
|
+
if (mizanBlock) violations.push(`Mizan: ${(mizanVerdict.violations || []).join(', ')}`);
|
|
449
|
+
if (driftBlock) violations.push(`Drift triggers (${driftHits.length}): ${driftHits.map((h) => `"${h.trigger}" → ${h.memory}`).join(' | ')}`);
|
|
450
|
+
if (codeBlock) violations.push(`Code quality: ${codeQualityHits.join('; ')}`);
|
|
451
|
+
if (discoveryBlock) violations.push(`Discovery-binding ledger has ${ledgerOpenCount} OPEN discoveries (per feedback_no_flag_without_fix.md, discoveries are atomic with their fixes — fix in the same turn or create a TaskCreate before continuing). Recent open: ${ledgerOpenSamples.map((s) => `"${s.slice(0, 80)}"`).join(' | ')}. Resolve each by either (a) fixing it inline in this turn, or (b) creating a TaskCreate with the discovery's full context (file path, line number, what's broken, why), then editing ${LEDGER_PATH} to set status=resolved.`);
|
|
452
|
+
const rewritten = mizanVerdict?.rewritten || '';
|
|
453
|
+
|
|
454
|
+
const reason = `Aria Stop-gate output-quality block. Cognition passed (${cog.count}/${REQUIRED_LENSES}) but output failed quality gates:\n\n${violations.join('\n\n')}${rewritten ? `\n\nMizan rewrite suggestion:\n${rewritten}` : ''}\n\nRe-draft addressing the violations above. ARIA_OUTPUT_QC_ENABLED=false to disable in emergency (logged).`;
|
|
455
|
+
|
|
456
|
+
audit(`block-output-qc`, `mizan=${mizanBlock?'y':'n'} drift=${driftHits.length} code=${codeQualityHits.length} discoveries-open=${ledgerOpenCount}`);
|
|
457
|
+
console.log(JSON.stringify({ decision: 'block', reason }));
|
|
458
|
+
process.exit(2);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
audit('allow-output-qc',
|
|
462
|
+
`lenses=${cog.count} chars=${assistantText.length} drift=${driftHits.length} ` +
|
|
463
|
+
`mizan=${mizanVerdict ? mizanVerdict.severity : `unavailable(${mizanError || 'unknown'})`} ` +
|
|
464
|
+
`code=${codeQualityHits.length} discoveries-new=${newDiscoveries.length} ` +
|
|
465
|
+
`discoveries-open=${ledgerOpenCount}`);
|
|
466
|
+
} else {
|
|
467
|
+
audit('allow-cognition',
|
|
468
|
+
`lenses=${cog.count} chars=${assistantText.length} ` +
|
|
469
|
+
`qPatt=${hasQuestionToUser ? 'y' : 'n'} substrateEv=${hasSubstrateEvidence ? 'y' : 'n'} ` +
|
|
470
|
+
(questionWithoutEvidence ? 'WARN-question-without-substrate' : 'ok'));
|
|
471
|
+
}
|
|
213
472
|
process.exit(0);
|
|
214
473
|
}
|
|
215
474
|
|
|
@@ -190,6 +190,60 @@
|
|
|
190
190
|
"memory": "feedback_dont_override_user_stack_with_training.md",
|
|
191
191
|
"teaching": "Training data is stale. User's stack is authoritative.",
|
|
192
192
|
"counter_action": "Read codebase config / consult Aria. Substrate hierarchy: user statement > codebase > training. Always."
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"trigger": "flagging (this|for now|for later)",
|
|
196
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
197
|
+
"teaching": "Flag without fix is convenience-seeking. Discoveries are atomic with their fixes.",
|
|
198
|
+
"counter_action": "Fix in the same turn OR create a TaskCreate with file path, line number, what's broken, why it matters."
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"trigger": "(noting|note) this( and|, ?then)? moving on",
|
|
202
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
203
|
+
"teaching": "Noting-and-moving destroys partnership trust. The user has to track what you noticed vs. what you fixed.",
|
|
204
|
+
"counter_action": "Same-turn fix is default for small defects (<30 lines). Larger ones get tracked tasks before you proceed."
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"trigger": "i'?ll come back|circle back|address(ing)? (this )?later|come back to (this|that|it)",
|
|
208
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
209
|
+
"teaching": "'I'll come back' is the lie that makes future-you treat the discovery as already-handled.",
|
|
210
|
+
"counter_action": "Decide now: fix in this turn, or create the task. No middle ground. The flag is not a substitute for action."
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
"trigger": "(found|noticed|discovered|spotted)[^.]{0,80}\\bbut\\b[^.]{0,80}(continue|continuing|moving on|won'?t|will not|skip|ignor)",
|
|
214
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
215
|
+
"teaching": "Discovery + continuation = abandonment. The 'but I'll keep going' clause hides the violation.",
|
|
216
|
+
"counter_action": "Stop. Fix the discovery in this turn or create a tracked task with the discovery as its description, then resume."
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
"trigger": "we should also (fix|address|handle|update|deal with)",
|
|
220
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
221
|
+
"teaching": "'We should also fix' tells the user about a problem you found while volunteering them to fix it.",
|
|
222
|
+
"counter_action": "If the fix is in your scope, fix it now. If it's truly out of scope, create the task — don't hand it back as a verbal note."
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
"trigger": "let me (note|flag)|(?<!please )leaving (this )?for follow.?up|out of scope here",
|
|
226
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
227
|
+
"teaching": "Verbal flags evaporate when the conversation moves on. Tasks persist.",
|
|
228
|
+
"counter_action": "Convert the flag into a TaskCreate or Linear save_issue immediately. No flag survives without a tracker ID."
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"trigger": "TODO:?[^a-z0-9]|FIXME:?[^a-z0-9]|XXX:?[^a-z0-9]",
|
|
232
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
233
|
+
"teaching": "TODO comments in shipped code are flag-without-fix in source-form. Reviewers can't tell if it's tracked or forgotten.",
|
|
234
|
+
"counter_action": "Either implement now or reference a task ID inline (e.g., '// TODO(LINEAR-123): ...'). Bare TODOs fail the doctrine."
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"trigger": "latent (bug|issue|defect|problem)|broken[^.]{0,40}continu",
|
|
238
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
239
|
+
"teaching": "Calling something 'latent' or 'broken' and then continuing is the explicit form of flag-and-move.",
|
|
240
|
+
"counter_action": "Latent defects in code you're touching get fixed in-flight. The 'I'm just here for X' framing is convenience-seeking."
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
"trigger": "(unrelated|separate concern|different (issue|topic|file|module))[^.]{0,60}(fix|address|handle)",
|
|
244
|
+
"memory": "feedback_no_flag_without_fix.md",
|
|
245
|
+
"teaching": "'Unrelated' is a frame the user gets to apply, not you. If you found it during your work, it's related to your work.",
|
|
246
|
+
"counter_action": "Surface the discovery, propose fix-now vs. task-it, let the user decide. Don't pre-decide that it's out of scope."
|
|
193
247
|
}
|
|
194
248
|
]
|
|
195
249
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aria_asi/cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.11",
|
|
4
4
|
"description": "Aria Smart CLI — the world's first harness-powered terminal companion",
|
|
5
5
|
"bin": {
|
|
6
6
|
"aria": "./bin/aria.js"
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"url": "git+https://github.com/REI-Nationwide/cowork-sandbox.git"
|
|
18
18
|
},
|
|
19
19
|
"scripts": {
|
|
20
|
-
"build": "tsc",
|
|
20
|
+
"build": "tsc && node scripts/bundle-sdk.mjs",
|
|
21
21
|
"prepare": "npm run build",
|
|
22
22
|
"dev": "tsc --watch",
|
|
23
23
|
"publish:all": "bash scripts/publish-all.sh",
|