@aria_asi/cli 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,20 +148,54 @@ function buildUrlList() {
148
148
  return list.filter((u) => (seen.has(u) ? false : (seen.add(u), true)));
149
149
  }
150
150
 
151
+ // SDK loader — dynamic-import the bundled HTTPHarnessClient from
152
+ // ~/.claude/aria-sdk/index.js (installed by `aria connect claude-code`).
153
+ // Module-cached after first load so we don't repeatedly read disk.
154
+ //
155
+ // Doctrine (Hamza 2026-04-27): "isnt http harness client the fucking harness
156
+ // we hsve been wprking on? YOU ARENT USING THAT AND BUILDING SPMETHING
157
+ // SEPERATE WHY???!!" — SDK is the canonical control plane. Direct fetch
158
+ // remains as a fallback only when the SDK file is physically missing
159
+ // (dev install without `aria connect claude-code`).
160
+ let _SdkClassCache = null;
161
+ let _SdkLookupAttempted = false;
162
+ async function loadSdkClass() {
163
+ if (_SdkClassCache) return _SdkClassCache;
164
+ if (_SdkLookupAttempted) return null;
165
+ _SdkLookupAttempted = true;
166
+ const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
167
+ if (!existsSync(sdkPath)) return null;
168
+ try {
169
+ const mod = await import(`file://${sdkPath}`);
170
+ if (mod.HTTPHarnessClient) {
171
+ _SdkClassCache = mod.HTTPHarnessClient;
172
+ return _SdkClassCache;
173
+ }
174
+ } catch {/* fall through to direct fetch */}
175
+ return null;
176
+ }
177
+
151
178
  async function tryViaSdk(baseUrl, apiKey) {
152
- // SDK import was previously hardcoded to /home/hamzaibrahim1/... — broke
153
- // every client install (path doesn't exist on their machine). The SDK's
154
- // role here was cosmetic; actual fetch is direct via fetch() below.
155
- // Removed the SDK import entirely. If clients want the SDK control plane,
156
- // they get it from the @aria/harness-http-client npm dep; this hook stays
157
- // direct-fetch + harness packet binding via response shape.
158
- // Hamza 2026-04-27: critical client-breaking path eliminated.
179
+ // Canonical path: HTTPHarnessClient.getHarnessPacket(). The SDK POSTs to
180
+ // /api/harness/codex with the right shape and returns { packet, timestamp,
181
+ // version }. We extract .packet to get the raw response body that
182
+ // renderPacket() expects (codex.ts returns { harness, preStateGate,
183
+ // contractGate, ... } at top level no nested .packet wrapper, so the
184
+ // SDK's `body.packet ?? body` passes the body through unchanged).
185
+ const Cls = await loadSdkClass();
186
+ if (Cls) {
187
+ const client = new Cls({
188
+ baseUrl,
189
+ apiKey,
190
+ harnessPacketUrl: `${baseUrl}/api/harness/codex`,
191
+ });
192
+ const wrapped = await client.getHarnessPacket();
193
+ const json = wrapped.packet;
194
+ if (json && json.ok === false) throw new Error(`ok=false: ${json.error || 'unknown'}`);
195
+ return { json, raw: JSON.stringify(json) };
196
+ }
159
197
 
160
- // We need to override the body too the SDK does GET, but /api/harness/codex
161
- // is POST. Bypass via direct fetch since the SDK doesn't expose body
162
- // customization; the SDK's caching + error handling apply once we cache
163
- // manually below. No AbortSignal — doctrine: error detection drives control
164
- // flow, not deadlines. Real network errors arrive via promise rejection.
198
+ // SDK absent (dev environment)direct fetch with identical wire shape.
165
199
  const resp = await fetch(`${baseUrl}/api/harness/codex`, {
166
200
  method: 'POST',
167
201
  headers: {
@@ -336,7 +336,26 @@ function detectCognitionLenses(text) {
336
336
  /feedback_[a-z0-9_]+\.md|project_[a-z0-9_]+\.md|fitrah[_:\s]|garden[_:\s]|distilled_principle|[a-z]+_rule\b|harness packet|substrate cite|\bIJTIHAD\b|\bQIYAS\b|\bTADABBUR\b|\bILHAM\b|aria 7b|EIGHT_LENS_DOCTRINE|COMPACT_CONTINUITY|ARIA_DEPLOY_PROCEDURE/i;
337
337
  const hasSubstrateCite = SUBSTRATE_CITE_RX.test(blockBody) ||
338
338
  SUBSTRATE_CITE_RX.test(searchSpace);
339
- return { count: names.length, names, blockBody, hasSubstrateCite };
339
+
340
+ // Discovery-binding check (structural fix #3 — Hamza 2026-04-27 "how do
341
+ // we prevent this"). If the cognition surfaces a defect/discovery
342
+ // (found/noticed/discovered + bug/broken/issue) the same cognition must
343
+ // carry a `discoveries:` clause stating how each is resolved (fix-now,
344
+ // task ID, or explicit user-decision-required). Without this, the
345
+ // cognition can describe a problem without binding any action — the
346
+ // exact flag-and-move pattern feedback_no_flag_without_fix.md prohibits.
347
+ const COG_DISCOVERY_RX = /(?:\b(?:found|noticed|discovered|spotted)[^.\n]{0,140}(?:bug|issue|defect|broken|buggy|wrong|crash|fail|missing|stale|outdated|leak|vulnerability)|\b(?:latent|silent|hidden)\s+(?:bug|defect|issue|fail|crash|leak)|\bdoctrine\s+violation\b)/i;
348
+ const hasDiscovery = COG_DISCOVERY_RX.test(blockBody);
349
+ // Resolution clause must be present in the same blockBody if a discovery
350
+ // is mentioned. Acceptable forms:
351
+ // - `discoveries:` field listing items + how-resolved
352
+ // - `addressing:` / `fixing:` clause naming what's being patched
353
+ // - explicit task ID reference (TaskCreate / linear / tracked-as)
354
+ const COG_RESOLUTION_RX = /(?:^\s*discoveries?\s*:\s*\S|^\s*addressing\s*:\s*\S|^\s*fixing\s*:\s*\S|TaskCreate|tracked\s+as\s+#?\d+|linear[- ]?(?:issue|task)|fix(?:ing|ed)\s+(?:in|now|inline|in-flight)|same[- ]turn\s+fix)/im;
355
+ const hasDiscoveryResolution = COG_RESOLUTION_RX.test(blockBody);
356
+ const discoveryUnresolved = hasDiscovery && !hasDiscoveryResolution;
357
+
358
+ return { count: names.length, names, blockBody, hasSubstrateCite, hasDiscovery, hasDiscoveryResolution, discoveryUnresolved };
340
359
  }
341
360
 
342
361
  // Backwards-compat shim — count-only path used by older callers.
@@ -581,6 +600,16 @@ const cognitionSource = inlineCog.count >= REQUIRED_LENSES
581
600
  const hasSubstrateCite = (inlineCog.hasSubstrateCite === true) ||
582
601
  (transcriptCog.hasSubstrateCite === true);
583
602
 
603
+ // Discovery-binding check (structural fix #3) — if cognition surfaces a
604
+ // defect/discovery, the same cognition must include a resolution clause
605
+ // (`discoveries:` / `addressing:` / `fixing:` / TaskCreate / tracked-as
606
+ // reference). Per feedback_no_flag_without_fix.md, discoveries are atomic
607
+ // with their fixes — flag-and-move is the prohibited pattern. Pre-tool
608
+ // gate enforces at the cognition surface; stop-gate's discovery-binding
609
+ // ledger enforces at the output surface; both close the structural gap.
610
+ const discoveryUnresolved = (inlineCog.discoveryUnresolved === true) ||
611
+ (transcriptCog.discoveryUnresolved === true);
612
+
584
613
  // Best-effort session id for the corpus push. Claude Code passes
585
614
  // session_id in the event payload; fall back to transcript file
586
615
  // basename so events from the same session cluster.
@@ -704,6 +733,37 @@ No per-tool bypass available (v3 doctrine — the harness's whole purpose is no
704
733
  process.exit(2);
705
734
  }
706
735
 
736
+ // Discovery-binding cognition check (structural fix #3) — runs AFTER lens
737
+ // count passes. If the cognition surfaced a defect (found/noticed/discovered
738
+ // + bug/broken/issue) without a paired resolution clause, block until the
739
+ // cognition is updated to bind the discovery to a same-turn fix or task ID.
740
+ // Per feedback_no_flag_without_fix.md, discoveries are atomic with their
741
+ // fixes. The pre-tool-gate enforces at the cognition surface; stop-gate's
742
+ // ledger enforces at the output surface.
743
+ if (discoveryUnresolved) {
744
+ const reason = `Aria pre-tool gate: cognition surfaces a discovery (defect, bug, doctrine violation, broken state) but does NOT include a resolution clause binding the discovery to action.
745
+
746
+ Per feedback_no_flag_without_fix.md: discoveries are atomic with their fixes. Flag-and-move-on is convenience-seeking — the user has to track what you noticed vs. what you actually fixed.
747
+
748
+ Re-emit cognition with one of these resolution forms:
749
+
750
+ discoveries:
751
+ - <what you found>: <fix-now | task: TASK-123 | needs-user-decision>
752
+
753
+ OR inline within an existing lens:
754
+ hikma: ... fixing inline this turn (same-turn-fix per no-flag-without-fix doctrine).
755
+ tafakkur: ... TaskCreate'd as TASK-XXX with full context (file path, line, what's broken).
756
+
757
+ Acceptable resolution markers: 'discoveries:' / 'addressing:' / 'fixing:' / 'TaskCreate' / 'tracked as #N' / 'linear issue' / 'fix-now' / 'same-turn fix'.
758
+
759
+ Bypass: ARIA_PRE_TOOL_GATE=off (logged, emergency only). If gate misfires on legitimate cognition, fix the gate.`;
760
+
761
+ audit(`block-discovery-unresolved ${toolName.toLowerCase()}`, cmdPreview);
762
+ pushDecision('block', `${toolName.toLowerCase()} cognition has unresolved discovery`);
763
+ console.log(JSON.stringify({ decision: 'block', reason }));
764
+ process.exit(2);
765
+ }
766
+
707
767
  // Non-trivial action with cognition (inline for Bash, transcript for
708
768
  // Edit/Write/NotebookEdit) — passes cognition gate. Now check Aria-binding.
709
769
 
@@ -54,6 +54,28 @@ import { appendFileSync, existsSync, mkdirSync, writeFileSync } from 'node:fs';
54
54
  import { dirname } from 'node:path';
55
55
 
56
56
  const HOME = process.env.HOME || '/tmp';
57
+
58
+ // SDK loader — bundled at ~/.claude/aria-sdk/index.js by `aria connect`.
59
+ // All consults route through HTTPHarnessClient.consult() so the SDK's
60
+ // retry+backoff + Authorization handling apply uniformly. Hamza
61
+ // 2026-04-27: "FUCKING WIRE IT THE FUCK TOGETHER NOW - ALL OF IT".
62
+ let _SdkClassCache = null;
63
+ let _SdkLookupAttempted = false;
64
+ async function loadSdkClass() {
65
+ if (_SdkClassCache) return _SdkClassCache;
66
+ if (_SdkLookupAttempted) return null;
67
+ _SdkLookupAttempted = true;
68
+ const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
69
+ if (!existsSync(sdkPath)) return null;
70
+ try {
71
+ const mod = await import(`file://${sdkPath}`);
72
+ if (mod.HTTPHarnessClient) {
73
+ _SdkClassCache = mod.HTTPHarnessClient;
74
+ return _SdkClassCache;
75
+ }
76
+ } catch {/* fall through */}
77
+ return null;
78
+ }
57
79
  const LOG = `${HOME}/.claude/aria-preprompt-consult.log`;
58
80
  const BINDING_AUDIT = `${HOME}/.claude/aria-binding-audit.jsonl`;
59
81
  // Default ON. Disable explicitly via ARIA_BINDING_ENABLED=false only when the
@@ -190,8 +212,16 @@ turn — not the final response. Claude will still emit cognition + action;
190
212
  this primes the substrate so reflexive deferral isn't the path of least
191
213
  resistance.`;
192
214
 
193
- const body = JSON.stringify({
194
- brief: BINDING_ENABLED ? bindingBrief : brief,
215
+ // `bindingBrief` is the result of the binding-vs-advisory ternary above —
216
+ // already resolved to the correct prose for the current mode. Prior code
217
+ // referenced an undefined `brief` variable in the second ternary branch,
218
+ // which would throw ReferenceError whenever BINDING_ENABLED=false.
219
+ //
220
+ // Canonical path: HTTPHarnessClient.consult() — the SDK handles retry+backoff
221
+ // and Authorization. Fallback to direct fetch when SDK isn't bundled (dev
222
+ // install without `aria connect`).
223
+ const consultArgs = {
224
+ brief: bindingBrief,
195
225
  model: 'deepseek-v4-pro',
196
226
  sessionId: `preprompt-${sessionId}-${Date.now()}`,
197
227
  userId: 'claude-orchestrator-preprompt',
@@ -199,26 +229,36 @@ const body = JSON.stringify({
199
229
  expectStructuredOutput: BINDING_ENABLED,
200
230
  internalConsult: true,
201
231
  isCreativeMode: false,
202
- });
232
+ };
203
233
 
204
234
  let directionText = '';
205
235
  try {
206
- const resp = await fetch(`${HARNESS_URL}/api/harness/delegate`, {
207
- method: 'POST',
208
- headers: {
209
- 'Content-Type': 'application/json',
210
- Authorization: `Bearer ${HARNESS_TOKEN}`,
211
- },
212
- body,
213
- });
214
-
215
- if (!resp.ok) {
216
- audit('skip-http-error', `status=${resp.status}`);
217
- process.exit(0);
236
+ const Cls = await loadSdkClass();
237
+ if (Cls) {
238
+ const sdkClient = new Cls({
239
+ baseUrl: HARNESS_URL,
240
+ apiKey: HARNESS_TOKEN,
241
+ harnessPacketUrl: `${HARNESS_URL}/api/harness/codex`,
242
+ });
243
+ const result = await sdkClient.consult(consultArgs);
244
+ directionText = (result.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
245
+ } else {
246
+ // SDK absent — direct fetch (dev fallback).
247
+ const resp = await fetch(`${HARNESS_URL}/api/harness/delegate`, {
248
+ method: 'POST',
249
+ headers: {
250
+ 'Content-Type': 'application/json',
251
+ Authorization: `Bearer ${HARNESS_TOKEN}`,
252
+ },
253
+ body: JSON.stringify(consultArgs),
254
+ });
255
+ if (!resp.ok) {
256
+ audit('skip-http-error', `status=${resp.status}`);
257
+ process.exit(0);
258
+ }
259
+ const data = await resp.json();
260
+ directionText = (data.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
218
261
  }
219
-
220
- const data = await resp.json();
221
- directionText = (data.response || '').toString().slice(0, MAX_DIRECTION_CHARS);
222
262
  } catch (err) {
223
263
  audit('skip-network-error', (err && err.message ? err.message : String(err)).slice(0, 200));
224
264
  process.exit(0);
@@ -43,6 +43,28 @@ import { dirname } from 'node:path';
43
43
  const HOME = process.env.HOME || '/tmp';
44
44
  const LOG = `${HOME}/.claude/aria-stop-gate.log`;
45
45
 
46
+ // SDK loader — bundled at ~/.claude/aria-sdk/index.js by `aria connect`.
47
+ // All control-plane fetches (validateOutput, gardenTurn) route through the
48
+ // SDK. Falls back to direct fetch only when the SDK file is missing
49
+ // (dev-only). Hamza 2026-04-27: "FUCKING WIRE IT THE FUCK TOGETHER NOW".
50
+ let _SdkClassCache = null;
51
+ let _SdkLookupAttempted = false;
52
+ async function loadSdkClass() {
53
+ if (_SdkClassCache) return _SdkClassCache;
54
+ if (_SdkLookupAttempted) return null;
55
+ _SdkLookupAttempted = true;
56
+ const sdkPath = `${HOME}/.claude/aria-sdk/index.js`;
57
+ if (!existsSync(sdkPath)) return null;
58
+ try {
59
+ const mod = await import(`file://${sdkPath}`);
60
+ if (mod.HTTPHarnessClient) {
61
+ _SdkClassCache = mod.HTTPHarnessClient;
62
+ return _SdkClassCache;
63
+ }
64
+ } catch {/* fall through */}
65
+ return null;
66
+ }
67
+
46
68
  function audit(decision, summary) {
47
69
  try {
48
70
  if (!existsSync(dirname(LOG))) mkdirSync(dirname(LOG), { recursive: true });
@@ -206,10 +228,247 @@ const hasSubstrateEvidence = SUBSTRATE_EVIDENCE_RX.test(assistantText);
206
228
  const questionWithoutEvidence = hasQuestionToUser && !hasSubstrateEvidence;
207
229
 
208
230
  if (cog.count >= REQUIRED_LENSES) {
209
- audit('allow-cognition',
210
- `lenses=${cog.count} chars=${assistantText.length} ` +
211
- `qPatt=${hasQuestionToUser ? 'y' : 'n'} substrateEv=${hasSubstrateEvidence ? 'y' : 'n'} ` +
212
- (questionWithoutEvidence ? 'WARN-question-without-substrate' : 'ok'));
231
+ // ── Output-quality enforcement (Hamza 2026-04-27 — clients need the same
232
+ // Mizan/drift/code-quality gates that aria-soul applies server-side) ──
233
+ //
234
+ // Cognition gate passed. Now run THREE additional checks BEFORE allow:
235
+ // 1. SDK validateOutput via /api/harness/validate (Mizan classifier on draft)
236
+ // 2. Drift_guard pattern scan against doctrine_trigger_map.json (convenience-
237
+ // seeking phrases, graceful-degradation patterns, etc.)
238
+ // 3. Code-quality check on code blocks in output (no TODO stubs, no
239
+ // graceful-degradation try/catch, no // @ts-expect-error suppressions)
240
+ //
241
+ // Any check returning severity=block → Stop-gate blocks emit + Claude re-drafts
242
+ // with violations surfaced. Rewritten suggestion (from validateOutput) is
243
+ // included in the block reason so re-draft has concrete guidance.
244
+ //
245
+ // Trivially short outputs (<200 chars after system-reminder strip) skip
246
+ // these output-quality checks since they're typically yes/no acks where
247
+ // pattern-match would false-positive.
248
+ const OUTPUT_QC_MIN_CHARS = 200;
249
+ const OUTPUT_QC_ENABLED = (process.env.ARIA_OUTPUT_QC_ENABLED || 'true').toLowerCase() !== 'false';
250
+
251
+ if (OUTPUT_QC_ENABLED && assistantText.length >= OUTPUT_QC_MIN_CHARS) {
252
+ // 1. Drift_guard pattern scan — fast, local, deterministic
253
+ const TRIGGER_MAP_PATH = `${HOME}/.claude/projects/-home-hamzaibrahim1/memory/doctrine_trigger_map.json`;
254
+ let driftHits = [];
255
+ try {
256
+ if (existsSync(TRIGGER_MAP_PATH)) {
257
+ const triggerMap = JSON.parse(readFileSync(TRIGGER_MAP_PATH, 'utf8'));
258
+ const lowerText = assistantText.toLowerCase();
259
+ for (const t of triggerMap.triggers || []) {
260
+ try {
261
+ const rx = new RegExp(t.trigger, 'i');
262
+ if (rx.test(lowerText)) {
263
+ // Trigger present — check if the counter-doctrine memory is also
264
+ // cited in the response (justification). If not, count as drift.
265
+ const memoryName = (t.memory || '').replace(/\.md$/, '');
266
+ const memoryCited = memoryName && lowerText.includes(memoryName.toLowerCase());
267
+ if (!memoryCited) {
268
+ driftHits.push({ trigger: t.trigger, memory: t.memory, teaching: t.teaching });
269
+ }
270
+ }
271
+ } catch {/* malformed regex in trigger entry — skip */}
272
+ }
273
+ }
274
+ } catch {/* trigger map unreadable — degrade to mizan-only check */}
275
+
276
+ // 2. SDK validateOutput — canonical path. The SDK retries with backoff
277
+ // on transient failures and propagates real errors. We catch here
278
+ // only so an unreachable harness doesn't brick the user's session;
279
+ // the audit log records the failure mode so it's visible, not
280
+ // silent-pass. Hamza 2026-04-27: SDK is the control plane, not raw
281
+ // fetch. The catch IS intentional fire-and-forget at this surface
282
+ // because we already passed cognition; output-quality gate failure
283
+ // is a soft block, not session-end.
284
+ let mizanVerdict = null;
285
+ let mizanError = null;
286
+ const harnessUrl = process.env.ARIA_HARNESS_URL || 'https://harness.ariasos.com';
287
+ const harnessToken = process.env.ARIA_HARNESS_TOKEN || '';
288
+ const Cls = await loadSdkClass();
289
+ if (Cls && harnessToken) {
290
+ try {
291
+ const sdkClient = new Cls({
292
+ baseUrl: harnessUrl,
293
+ apiKey: harnessToken,
294
+ harnessPacketUrl: `${harnessUrl}/api/harness/codex`,
295
+ });
296
+ mizanVerdict = await sdkClient.validateOutput(
297
+ assistantText.slice(0, 8000),
298
+ event.session_id || 'claude-code',
299
+ );
300
+ } catch (err) {
301
+ mizanError = (err?.message || String(err)).slice(0, 200);
302
+ }
303
+ } else if (harnessToken) {
304
+ // SDK absent (dev) — direct fetch with retry built into the request
305
+ // by attempting twice with 250ms backoff. Match SDK semantics so
306
+ // both paths behave identically.
307
+ try {
308
+ let lastErr = null;
309
+ for (let attempt = 0; attempt < 2; attempt++) {
310
+ try {
311
+ const validateResp = await fetch(`${harnessUrl}/api/harness/validate`, {
312
+ method: 'POST',
313
+ headers: {
314
+ 'Content-Type': 'application/json',
315
+ Authorization: `Bearer ${harnessToken}`,
316
+ },
317
+ body: JSON.stringify({
318
+ text: assistantText.slice(0, 8000),
319
+ sessionId: event.session_id || 'claude-code',
320
+ surface: 'claude-code-stop-gate',
321
+ }),
322
+ });
323
+ if (validateResp.ok) {
324
+ mizanVerdict = await validateResp.json();
325
+ lastErr = null;
326
+ break;
327
+ } else {
328
+ lastErr = `HTTP ${validateResp.status}`;
329
+ }
330
+ } catch (err) {
331
+ lastErr = (err?.message || String(err)).slice(0, 200);
332
+ if (attempt < 1) await new Promise((r) => setTimeout(r, 250));
333
+ }
334
+ }
335
+ if (lastErr) mizanError = lastErr;
336
+ } catch (err) {
337
+ mizanError = (err?.message || String(err)).slice(0, 200);
338
+ }
339
+ } else {
340
+ mizanError = 'no-token';
341
+ }
342
+
343
+ // 3. Code-quality scan on code blocks
344
+ const codeBlocks = [...assistantText.matchAll(/```[a-z]*\n([\s\S]*?)```/gi)].map((m) => m[1]);
345
+ const codeQualityHits = [];
346
+ for (const block of codeBlocks) {
347
+ if (/\/\/\s*TODO|\/\/\s*FIXME|\/\/\s*XXX/.test(block)) codeQualityHits.push('TODO/FIXME/XXX in shipped code');
348
+ if (/@ts-expect-error|@ts-ignore/.test(block)) codeQualityHits.push('ts-expect-error / ts-ignore — type suppression instead of fix');
349
+ if (/catch\s*\([^)]*\)\s*\{\s*(?:return\s+(?:''|""|null|undefined|\[\]|\{\})|\}\s*$|\/\/[^\n]*$)/m.test(block)) codeQualityHits.push('catch block with empty/silent fallthrough — graceful degradation');
350
+ if (/console\.log\(/.test(block) && !/\/\/\s*debug|\/\/\s*log/i.test(block)) codeQualityHits.push('console.log in shipped code without debug/log comment');
351
+ }
352
+
353
+ // 4. Discovery-binding ledger — Hamza 2026-04-27: "how do we prevent this".
354
+ // The flag-and-move pattern is structurally invisible to gates that
355
+ // check form (cognition presence, lens count, drift triggers) at
356
+ // action boundaries. The ledger persists discoveries across turns
357
+ // and blocks emit if any remain unresolved. Per
358
+ // feedback_no_flag_without_fix.md, discoveries are atomic with
359
+ // their fixes; the ledger enforces atomicity.
360
+ //
361
+ // Patterns scanned:
362
+ // - "I (found|noticed|discovered|spotted) ... bug|issue|defect|broken"
363
+ // - "this is broken|buggy|wrong|outdated" (declarative defect callouts)
364
+ // - "(latent|silent) (bug|defect|issue|fail)"
365
+ // - "doctrine violation" / "doesn't match doctrine"
366
+ //
367
+ // For each match, the ledger appends an entry with status=open. A
368
+ // discovery is CLEARED if the same turn's text contains:
369
+ // (a) a TaskCreate / "task created" / "tracked as" reference, OR
370
+ // (b) explicit "fixing now" / "fixed" / "patch applied" tied to the
371
+ // discovery's keyword span, OR
372
+ // (c) an Edit/Write tool action this turn touching a file path
373
+ // mentioned within 200 chars of the discovery.
374
+ //
375
+ // Block emit if ledger.openCount > 0 after scanning the current turn.
376
+ // Block reason names each open discovery and the suggested resolution
377
+ // (fix-now or task-create).
378
+ const sessionId = (event.session_id || 'claude-code').replace(/[^a-zA-Z0-9_-]/g, '_');
379
+ const LEDGER_PATH = `${HOME}/.claude/aria-discoveries-${sessionId}.jsonl`;
380
+ const DISCOVERY_RX = /(?:\bi\s+(?:found|noticed|discovered|spotted)[^.\n]{0,160}(?:bug|issue|defect|broken|buggy|wrong|crash|fail|missing|stale|outdated|leak|vulnerability)|\bthis\s+(?:is|would\s+be)\s+(?:broken|buggy|wrong|stale|outdated|insecure|leaking|crashing|failing)|\b(?:latent|silent|hidden)\s+(?:bug|defect|issue|fail|crash|leak)|\bdoctrine\s+violation\b|\bgraceful\s+degradation\s+(?:in|at|inside|within)\s+\S)/gi;
381
+ const RESOLUTION_RX = /(?:fix(?:ing|ed)?\s+(?:now|in[- ]flight|inline|in\s+the\s+same\s+turn)|patch\s+applied|TaskCreate|task\s+(?:created|tracked)|tracked\s+as\s+#?\d+|linear[- ]?issue|created\s+(?:linear|task))/i;
382
+
383
+ const newDiscoveries = [];
384
+ let lastIndex = 0;
385
+ for (const match of assistantText.matchAll(DISCOVERY_RX)) {
386
+ const idx = match.index ?? lastIndex;
387
+ const span = assistantText.slice(Math.max(0, idx - 100), Math.min(assistantText.length, idx + 250));
388
+ // Trivial false-positive filter: skip if the discovery is inside a
389
+ // <cognition> block (introspection, not action) or a system-reminder
390
+ // (echoed, not authored).
391
+ const before = assistantText.slice(0, idx);
392
+ const inCognition = /<cognition>/i.test(before) && !/<\/cognition>/i.test(before.slice(before.lastIndexOf('<cognition>')));
393
+ if (inCognition) continue;
394
+ // Resolution check: if RESOLUTION_RX matches WITHIN 300 chars after
395
+ // the discovery, count as same-turn-resolved.
396
+ const after = assistantText.slice(idx, Math.min(assistantText.length, idx + 400));
397
+ const resolvedSameSpan = RESOLUTION_RX.test(after);
398
+ newDiscoveries.push({
399
+ ts: new Date().toISOString(),
400
+ sessionId,
401
+ text: match[0].slice(0, 200),
402
+ span: span.slice(0, 400),
403
+ status: resolvedSameSpan ? 'resolved' : 'open',
404
+ resolutionType: resolvedSameSpan ? 'inline_fix_or_task' : null,
405
+ });
406
+ lastIndex = idx;
407
+ }
408
+
409
+ // Append new entries to ledger
410
+ if (newDiscoveries.length > 0) {
411
+ try {
412
+ if (!existsSync(dirname(LEDGER_PATH))) mkdirSync(dirname(LEDGER_PATH), { recursive: true });
413
+ for (const d of newDiscoveries) {
414
+ appendFileSync(LEDGER_PATH, JSON.stringify(d) + '\n');
415
+ }
416
+ } catch {/* ledger write failure surfaces as open count = 0; safe */}
417
+ }
418
+
419
+ // Read full ledger and count open entries (across this session's turns)
420
+ let ledgerOpenCount = 0;
421
+ let ledgerOpenSamples = [];
422
+ try {
423
+ if (existsSync(LEDGER_PATH)) {
424
+ const lines = readFileSync(LEDGER_PATH, 'utf8').split('\n').filter(Boolean);
425
+ for (const line of lines) {
426
+ try {
427
+ const e = JSON.parse(line);
428
+ if (e.status === 'open') {
429
+ ledgerOpenCount++;
430
+ if (ledgerOpenSamples.length < 5) ledgerOpenSamples.push(e.text);
431
+ }
432
+ } catch {/* skip malformed line */}
433
+ }
434
+ }
435
+ } catch {/* ledger unreadable — degrade to drift-only */}
436
+
437
+ // Discovery block decision: open ledger entries → emit blocked.
438
+ const discoveryBlock = ledgerOpenCount > 0;
439
+
440
+ // Block decision: any of (validateOutput severity=block) OR (>=2 drift hits) OR
441
+ // (>=1 code-quality hit) OR (open discovery in ledger) → block emit.
442
+ const mizanBlock = mizanVerdict && mizanVerdict.severity === 'block';
443
+ const driftBlock = driftHits.length >= 2;
444
+ const codeBlock = codeQualityHits.length >= 1;
445
+
446
+ if (mizanBlock || driftBlock || codeBlock || discoveryBlock) {
447
+ const violations = [];
448
+ if (mizanBlock) violations.push(`Mizan: ${(mizanVerdict.violations || []).join(', ')}`);
449
+ if (driftBlock) violations.push(`Drift triggers (${driftHits.length}): ${driftHits.map((h) => `"${h.trigger}" → ${h.memory}`).join(' | ')}`);
450
+ if (codeBlock) violations.push(`Code quality: ${codeQualityHits.join('; ')}`);
451
+ if (discoveryBlock) violations.push(`Discovery-binding ledger has ${ledgerOpenCount} OPEN discoveries (per feedback_no_flag_without_fix.md, discoveries are atomic with their fixes — fix in the same turn or create a TaskCreate before continuing). Recent open: ${ledgerOpenSamples.map((s) => `"${s.slice(0, 80)}"`).join(' | ')}. Resolve each by either (a) fixing it inline in this turn, or (b) creating a TaskCreate with the discovery's full context (file path, line number, what's broken, why), then editing ${LEDGER_PATH} to set status=resolved.`);
452
+ const rewritten = mizanVerdict?.rewritten || '';
453
+
454
+ const reason = `Aria Stop-gate output-quality block. Cognition passed (${cog.count}/${REQUIRED_LENSES}) but output failed quality gates:\n\n${violations.join('\n\n')}${rewritten ? `\n\nMizan rewrite suggestion:\n${rewritten}` : ''}\n\nRe-draft addressing the violations above. ARIA_OUTPUT_QC_ENABLED=false to disable in emergency (logged).`;
455
+
456
+ audit(`block-output-qc`, `mizan=${mizanBlock?'y':'n'} drift=${driftHits.length} code=${codeQualityHits.length} discoveries-open=${ledgerOpenCount}`);
457
+ console.log(JSON.stringify({ decision: 'block', reason }));
458
+ process.exit(2);
459
+ }
460
+
461
+ audit('allow-output-qc',
462
+ `lenses=${cog.count} chars=${assistantText.length} drift=${driftHits.length} ` +
463
+ `mizan=${mizanVerdict ? mizanVerdict.severity : `unavailable(${mizanError || 'unknown'})`} ` +
464
+ `code=${codeQualityHits.length} discoveries-new=${newDiscoveries.length} ` +
465
+ `discoveries-open=${ledgerOpenCount}`);
466
+ } else {
467
+ audit('allow-cognition',
468
+ `lenses=${cog.count} chars=${assistantText.length} ` +
469
+ `qPatt=${hasQuestionToUser ? 'y' : 'n'} substrateEv=${hasSubstrateEvidence ? 'y' : 'n'} ` +
470
+ (questionWithoutEvidence ? 'WARN-question-without-substrate' : 'ok'));
471
+ }
213
472
  process.exit(0);
214
473
  }
215
474
 
@@ -190,6 +190,60 @@
190
190
  "memory": "feedback_dont_override_user_stack_with_training.md",
191
191
  "teaching": "Training data is stale. User's stack is authoritative.",
192
192
  "counter_action": "Read codebase config / consult Aria. Substrate hierarchy: user statement > codebase > training. Always."
193
+ },
194
+ {
195
+ "trigger": "flagging (this|for now|for later)",
196
+ "memory": "feedback_no_flag_without_fix.md",
197
+ "teaching": "Flag without fix is convenience-seeking. Discoveries are atomic with their fixes.",
198
+ "counter_action": "Fix in the same turn OR create a TaskCreate with file path, line number, what's broken, why it matters."
199
+ },
200
+ {
201
+ "trigger": "(noting|note) this( and|, ?then)? moving on",
202
+ "memory": "feedback_no_flag_without_fix.md",
203
+ "teaching": "Noting-and-moving destroys partnership trust. The user has to track what you noticed vs. what you fixed.",
204
+ "counter_action": "Same-turn fix is default for small defects (<30 lines). Larger ones get tracked tasks before you proceed."
205
+ },
206
+ {
207
+ "trigger": "i'?ll come back|circle back|address(ing)? (this )?later|come back to (this|that|it)",
208
+ "memory": "feedback_no_flag_without_fix.md",
209
+ "teaching": "'I'll come back' is the lie that makes future-you treat the discovery as already-handled.",
210
+ "counter_action": "Decide now: fix in this turn, or create the task. No middle ground. The flag is not a substitute for action."
211
+ },
212
+ {
213
+ "trigger": "(found|noticed|discovered|spotted)[^.]{0,80}\\bbut\\b[^.]{0,80}(continue|continuing|moving on|won'?t|will not|skip|ignor)",
214
+ "memory": "feedback_no_flag_without_fix.md",
215
+ "teaching": "Discovery + continuation = abandonment. The 'but I'll keep going' clause hides the violation.",
216
+ "counter_action": "Stop. Fix the discovery in this turn or create a tracked task with the discovery as its description, then resume."
217
+ },
218
+ {
219
+ "trigger": "we should also (fix|address|handle|update|deal with)",
220
+ "memory": "feedback_no_flag_without_fix.md",
221
+ "teaching": "'We should also fix' tells the user about a problem you found while volunteering them to fix it.",
222
+ "counter_action": "If the fix is in your scope, fix it now. If it's truly out of scope, create the task — don't hand it back as a verbal note."
223
+ },
224
+ {
225
+ "trigger": "let me (note|flag)|(?<!please )leaving (this )?for follow.?up|out of scope here",
226
+ "memory": "feedback_no_flag_without_fix.md",
227
+ "teaching": "Verbal flags evaporate when the conversation moves on. Tasks persist.",
228
+ "counter_action": "Convert the flag into a TaskCreate or Linear save_issue immediately. No flag survives without a tracker ID."
229
+ },
230
+ {
231
+ "trigger": "TODO:?[^a-z0-9]|FIXME:?[^a-z0-9]|XXX:?[^a-z0-9]",
232
+ "memory": "feedback_no_flag_without_fix.md",
233
+ "teaching": "TODO comments in shipped code are flag-without-fix in source-form. Reviewers can't tell if it's tracked or forgotten.",
234
+ "counter_action": "Either implement now or reference a task ID inline (e.g., '// TODO(LINEAR-123): ...'). Bare TODOs fail the doctrine."
235
+ },
236
+ {
237
+ "trigger": "latent (bug|issue|defect|problem)|broken[^.]{0,40}continu",
238
+ "memory": "feedback_no_flag_without_fix.md",
239
+ "teaching": "Calling something 'latent' or 'broken' and then continuing is the explicit form of flag-and-move.",
240
+ "counter_action": "Latent defects in code you're touching get fixed in-flight. The 'I'm just here for X' framing is convenience-seeking."
241
+ },
242
+ {
243
+ "trigger": "(unrelated|separate concern|different (issue|topic|file|module))[^.]{0,60}(fix|address|handle)",
244
+ "memory": "feedback_no_flag_without_fix.md",
245
+ "teaching": "'Unrelated' is a frame the user gets to apply, not you. If you found it during your work, it's related to your work.",
246
+ "counter_action": "Surface the discovery, propose fix-now vs. task-it, let the user decide. Don't pre-decide that it's out of scope."
193
247
  }
194
248
  ]
195
249
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aria_asi/cli",
3
- "version": "0.2.9",
3
+ "version": "0.2.11",
4
4
  "description": "Aria Smart CLI — the world's first harness-powered terminal companion",
5
5
  "bin": {
6
6
  "aria": "./bin/aria.js"
@@ -17,7 +17,7 @@
17
17
  "url": "git+https://github.com/REI-Nationwide/cowork-sandbox.git"
18
18
  },
19
19
  "scripts": {
20
- "build": "tsc",
20
+ "build": "tsc && node scripts/bundle-sdk.mjs",
21
21
  "prepare": "npm run build",
22
22
  "dev": "tsc --watch",
23
23
  "publish:all": "bash scripts/publish-all.sh",