@maintainabilityai/research-runner 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -817,8 +817,21 @@ const AuditEmitInput = zod_1.z.object({
817
817
  phase: zod_1.z.enum(['why', 'how', 'what']),
818
818
  intentThreadUuid: zod_1.z.string().min(1),
819
819
  });
820
- const LOCK_RETRY_LIMIT = 3;
821
- const LOCK_RETRY_BASE_MS = 50;
820
+ /**
821
+ * Audit-JSONL file-lock retry budget. Sized for parallel auto-emission:
822
+ * the agent often fires 4 search skills concurrently, each completing in
823
+ * ~500ms–3s. When their handlers return at similar times, all 4 try to
824
+ * grab the JSONL lock simultaneously. Pre-B28a.v1.1 the budget was
825
+ * `3 × 50ms linear = 300ms max` which silently dropped 3 of 4 events on
826
+ * PR #108. New budget: 20 retries with exponential 2^n backoff capped at
827
+ * 500ms each (sequence: 100, 200, 400, 500, 500, 500, …) ≈ 9.6s total
828
+ * wait — comfortably tolerates 4–8 parallel skill invocations while
829
+ * staying well under the runner's overall step timeout. Total emission
830
+ * latency stays unchanged in the happy-path single-writer case.
831
+ */
832
+ const LOCK_RETRY_LIMIT = 20;
833
+ const LOCK_RETRY_BASE_MS = 100;
834
+ const LOCK_RETRY_MAX_MS = 500;
822
835
  /** Recursive key-sorted JSON stringify so the event hash is canonical. */
823
836
  function canonicalStringify(value) {
824
837
  if (value === null || typeof value !== 'object') {
@@ -948,7 +961,12 @@ const handleAuditEmitEvent = async (input) => {
948
961
  }
949
962
  catch (err) {
950
963
  if (err.code === 'EEXIST') {
951
- await sleep(LOCK_RETRY_BASE_MS * (attempt + 1));
964
+ // Exponential backoff capped at LOCK_RETRY_MAX_MS. With 20
965
+ // attempts the wait sequence is 100, 200, 400, 500, 500, … ≈
966
+ // 9.6s total — enough headroom for 4–8 parallel auto-emissions
967
+ // from skills firing concurrently (B28a.v1.1).
968
+ const wait = Math.min(LOCK_RETRY_BASE_MS * (2 ** attempt), LOCK_RETRY_MAX_MS);
969
+ await sleep(wait);
952
970
  continue;
953
971
  }
954
972
  return { ok: false, reason: `audit-lock-failed: ${err.message}` };
@@ -1160,10 +1178,14 @@ async function runSkill(name, input) {
1160
1178
  if (!result.ok) {
1161
1179
  payload.reason = result.reason;
1162
1180
  }
1163
- // Best-effort: an audit-write failure must not shadow the real skill
1164
- // result. The chain-verify CI gate is the catch-net for missed events.
1181
+ // Best-effort: an audit-write failure must not shadow the real
1182
+ // skill result. But we MUST surface the failure to stderr — pre-
1183
+ // B28a.v1.1 these were silently swallowed and PR #108 dropped 3
1184
+ // of 4 parallel-search events with no warning. The chain-verify
1185
+ // CI gate still catches gaps post-hoc; this stderr line catches
1186
+ // them at write time.
1165
1187
  try {
1166
- await handleAuditEmitEvent({
1188
+ const emit = await handleAuditEmitEvent({
1167
1189
  okrId: ctx.okrId,
1168
1190
  runId: ctx.runId,
1169
1191
  phase: ctx.phase,
@@ -1171,8 +1193,13 @@ async function runSkill(name, input) {
1171
1193
  eventKind: 'skill_call',
1172
1194
  payload,
1173
1195
  });
1196
+ if (!emit.ok) {
1197
+ process.stderr.write(`::warning::audit auto-emit failed for skill ${name}: ${emit.reason}\n`);
1198
+ }
1199
+ }
1200
+ catch (err) {
1201
+ process.stderr.write(`::warning::audit auto-emit threw for skill ${name}: ${err.message}\n`);
1174
1202
  }
1175
- catch { /* swallow — chain-verify catches gaps */ }
1176
1203
  }
1177
1204
  }
1178
1205
  return result;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.29",
3
+ "version": "0.1.31",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",