npm - kushi-agents - Versions diffs - 5.5.1 → 5.6.1 - Mend

kushi-agents 5.5.1 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +4 -0
package/package.json +1 -1
package/plugin/instructions/learning-candidates.instructions.md +91 -0
package/plugin/instructions/llm-vs-runner.instructions.md +4 -0
package/plugin/runners/lib/learnings.mjs +203 -0
package/plugin/runners/pull-ado.mjs +2 -0
package/plugin/runners/pull-crm.mjs +2 -0
package/plugin/runners/pull-email.mjs +8 -0
package/plugin/runners/pull-meetings.mjs +2 -0
package/plugin/runners/pull-onenote.mjs +19 -0
package/plugin/runners/pull-sharepoint.mjs +2 -0
package/plugin/runners/pull-teams.mjs +2 -0
package/plugin/runners/refresh.mjs +4 -0
package/plugin/runners/test/fixtures/email-novel-error.json +9 -0
package/plugin/runners/test/integration/pull-email.integration.test.mjs +52 -0
package/plugin/runners/test/unit/learnings.test.mjs +124 -0
package/plugin/skills/self-check/run.ps1 +19 -1

package/README.md CHANGED Viewed

@@ -7,6 +7,10 @@
 [![host: VS Code](https://img.shields.io/badge/host-VS%20Code-007acc)](https://gim-home.github.io/kushi/)
 [![spec: agentskills.io](https://img.shields.io/badge/spec-agentskills.io-22c55e)](https://agentskills.io/skill-creation/best-practices)
+> **v5.6.0 — Learning candidates.** Runners now auto-capture novel field errors into `<project>/Evidence/_learnings-candidates/` for human review and upstream promotion. Local-only (no telemetry, no auto-PR), 7-day dedup, classifier filters out user-side and transient errors. Closes the field-bug loop — published kushi installs can now contribute doctrine back. New probe `D48`, new concept doc [Learning candidates](https://gim-home.github.io/kushi/concepts/learning-candidates/), new how-to [Promote a learning candidate](https://gim-home.github.io/kushi/how-to/promote-learning-candidate/). `kushi share-learnings` (opt-in redacted upstream submission) lands in v5.7.0.
+> **v5.5.0 — Deterministic runners.** Nine pull/orchestrator skills are now thin pointers to Node runners under `plugin/runners/`. The LLM picks scope; the runner does HTTP, file IO, week math, and writes evidence. New probes D44–D47 enforce the contract.
 > **v5.2.0 — Hooks + parallel pulls + OTel + teach + schema-evolve.** Pipeline events trigger configurable hooks (`.kushi/hooks/`); pull dispatch is parallel by default (4 workers); OpenTelemetry export is opt-in via `KUSHI_OTEL_ENDPOINT`; `kushi explain <topic>` teaches concepts; `kushi remember <rule>` persists conventions.
 > **v5.1.0 — Living wiki.** Build-state is now incremental: human edits outside `<!-- kushi:auto -->` fences are preserved, contradictions are flagged with Obsidian-compatible callouts (`> [!warning]`), and a new `lint-state` skill monitors wiki health. State/ is a valid [Obsidian](https://obsidian.md) vault — callout syntax, Dataview-compatible frontmatter, and `[[wikilinks]]` all work natively.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kushi-agents",
-  "version": "5.5.1",
+  "version": "5.6.1",
   "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
   "type": "module",
   "bin": {

package/plugin/instructions/learning-candidates.instructions.md ADDED Viewed

@@ -0,0 +1,91 @@
+---
+applyTo: "**/plugin/runners/**"
+description: "Doctrine for v5.6.0 learning candidates — when runners write local markdown files capturing novel errors for later human promotion to plugin/learnings/<source>.md."
+---
+# Learning candidates (v5.6.0)
+When a runner hits an error worth remembering, it writes a **learning candidate** markdown file to the project's local Evidence dir. No telemetry. No auto-PR. A maintainer reviews candidates later and promotes the real ones to upstream `plugin/learnings/<source>.md`.
+## Where
+```
+<engagement-root>/<project>/Evidence/_learnings-candidates/
+  YYYY-MM-DD-HHmm_<alias>_<source>_<short-sig>.md
+  _seen.json          (hidden dedup ledger — do not edit)
+```
+## When to emit (runner responsibility)
+The runner calls `emitLearningCandidate({ projectRoot, alias, source, entity, week, error, context })` from `plugin/runners/lib/learnings.mjs` in its catch path. The lib enforces the policy filter; callers always call, lib decides whether to write.
+EMIT for:
+- **Novel signatures** — anything not in the known taxonomy (user-side errors + transient HTTP). Most commonly: Graph/Dataverse returned an unexpected shape, a `$select` field came back null where it never had before, an entity-set name pluralization changed.
+- **`body-unavailable` on 2nd+ sighting** for the same `(source, entity)` across runs. The first sighting is noise (could be a moved page, racing index). The second sighting is a quirk worth capturing.
+DO NOT EMIT for:
+| Signature | Why not |
+|---|---|
+| `bad-args`, `config-missing`, `config-invalid` | User-side — fix the config, not the runner. |
+| `token-expired`, `auth-required`, `auth-failed` | User-side — re-auth. |
+| `folder-not-found`, `entity-not-found` | User-side — typo in `boundaries.yml`. |
+| `cross-tenant-blocked`, `permission-denied` | Tenant policy, not a kushi bug. |
+| `fetch-failed` + HTTP 429/502/503/504/408 | Transient — runner already retried. |
+## Dedup
+Same `<source>:<signature>:<fingerprint-8>` is not re-emitted within 7 days per project. Fingerprint is sha256 over `(source, signature, normalized-message)` where the message has hex blobs and long digit runs redacted. This means the same Graph 500 with a different correlation-id collapses to one candidate, but two genuinely different Graph 500s stay distinct.
+## Candidate file format
+The lib writes a markdown file matching the upstream register template, so promotion is copy-paste:
+```markdown
+### YYYY-MM-DD — <source>: <signature> (<fpr>)
+**Symptom**: <error message> (HTTP <status>) — entity `<entity>` — week <week>
+**Root cause**: _TO INVESTIGATE_
+**Fix / workaround**: _TO INVESTIGATE_
+**Doctrine impact**: register-only — TODO promote on next sighting
+**Discovered during**: alias `<alias>` running pull-<source>
+---
+<!-- machine-readable footer -->
+```yaml
+source: ...
+fingerprint: ...
+captured_at: ...
+```
+```
+A maintainer fills in Root cause + Fix + Doctrine impact before promoting.
+## Orchestrator reporting
+`refresh.mjs` and `bootstrap.mjs` count candidate files at the end of a run and include `learning_candidates_written: N` in the stdout JSON. The run-report (`Evidence/<alias>/refresh-reports/...md`) gets a "Learning candidates this run" section pointing at the dir when N > 0.
+## Promotion (manual, v5.6.0)
+1. Open `<project>/Evidence/_learnings-candidates/`.
+2. Pick a candidate. Investigate. Fill in Root cause + Fix.
+3. Copy the body (without the machine-readable footer) into the matching `<KUSHI_ROOT>/plugin/learnings/<source>.md` (newest on top).
+4. Open a PR against `gim-home/kushi`.
+5. Delete the candidate file locally once merged upstream.
+The v5.7.0 `kushi share-learnings` command will automate steps 3–5 with redaction + user confirmation. v5.6.0 ships emission only.
+## Privacy
+Candidates are written **locally** in your own project folder under OneDrive/SharePoint. They never leave your machine until you (or a future opt-in `share-learnings` command) explicitly send them upstream. The fingerprinting + redaction step in v5.7.0 will strip tenant ids, GUIDs, contributor aliases, and project names before any upstream submission.
+## Anti-patterns
+- ❌ **Calling `emitLearningCandidate` from the LLM/chat.** Only runners emit.
+- ❌ **Writing user data into the candidate body.** Symptom/Root cause/Fix should describe the *shape* of the bug, not the project content.
+- ❌ **Promoting after one sighting.** Wait for the second — the dedup window is 7 days specifically to prevent solo-noise promotion.
+- ❌ **Editing `_seen.json` by hand.** Delete the candidate file to force re-emission.

package/plugin/instructions/llm-vs-runner.instructions.md CHANGED Viewed

@@ -44,6 +44,10 @@ node plugin/runners/<source>.mjs --project <P> --alias <A> --entity <E> [--week
 - `deferred` — retry enqueued; runner will retry after `RETRY_MIN_AGE_MIN.<source>` minutes.
 - `failed` — non-retryable failure for this cell.
+## Runner side-effect: learning candidates (v5.6.0)
+Every `pull-*` runner imports `emitLearningCandidate` from `plugin/runners/lib/learnings.mjs` and calls it from its non-retryable error paths. The lib filters out user-side / transient errors and writes a markdown file to `<project>/Evidence/_learnings-candidates/` only when the signature is genuinely novel (or `body-unavailable` is on its 2nd+ sighting). No telemetry, no auto-PR — purely local capture for later human review. See `learning-candidates.instructions.md` and self-check probe D48.
 ## What the LLM still owns
 - Asking the user for `request_id`, `engagement_id`, folder names, chat ids, joinUrls, section URLs, site URLs, when missing.

package/plugin/runners/lib/learnings.mjs ADDED Viewed

@@ -0,0 +1,203 @@
+// plugin/runners/lib/learnings.mjs
+// v5.6.0 — local-only "learning candidates" emission.
+//
+// When a runner hits a truly novel error, or sees the same body-unavailable
+// twice for one entity, write one markdown candidate under
+//   <project>/Evidence/_learnings-candidates/YYYY-MM-DD-HHmm_<alias>_<source>_<sig>.md
+//
+// Doctrine: plugin/instructions/learning-candidates.instructions.md
+// No telemetry. No auto-PR. Reviewed by humans before being promoted to
+// plugin/learnings/<source>.md upstream (deferred to v5.7.0 share-learnings).
+import path from 'node:path';
+import { promises as fs } from 'node:fs';
+import crypto from 'node:crypto';
+import { evidenceRoot } from './layout.mjs';
+const DIR_NAME = '_learnings-candidates';
+const SEEN_FILE = '_seen.json';
+const DEDUP_WINDOW_MS = 7 * 24 * 60 * 60 * 1000;
+// Signatures considered "user-side" or "already-handled" — never emit.
+// Keep this list narrow; anything NOT here is a candidate.
+const USER_SIDE_SIGNATURES = new Set([
+  'bad-args', 'config-missing', 'config-invalid',
+  'token-expired', 'auth-required', 'auth-failed',
+  'folder-not-found', 'entity-not-found',
+  'cross-tenant-blocked', 'permission-denied',
+  // NOTE: 'fetch-failed' is intentionally NOT user-side. The runner uses it
+  // for both retryable (transient HTTP, filtered below) and non-retryable
+  // (unexpected response shape — the novel case we want to capture).
+]);
+const TRANSIENT_HTTP_STATUSES = new Set([429, 502, 503, 504, 408]);
+/**
+ * Decide whether an error is worth capturing as a learning candidate.
+ * Returns { capture: boolean, reason: string }.
+ *
+ * @param {object} error  shape: { signature?, message?, status?, occurrences? }
+ *                        - signature: short kebab-case id (runner-assigned)
+ *                        - message: human-readable error message
+ *                        - status: HTTP status code if applicable
+ *                        - occurrences: cross-run count for repeat-only signatures (e.g. body-unavailable)
+ */
+export function shouldCapture(error) {
+  if (!error || typeof error !== 'object') return { capture: false, reason: 'no-error' };
+  const sig = (error.signature || '').toLowerCase();
+  if (sig && USER_SIDE_SIGNATURES.has(sig)) return { capture: false, reason: 'user-side' };
+  if (error.status && TRANSIENT_HTTP_STATUSES.has(error.status)) return { capture: false, reason: 'transient-http' };
+  // body-unavailable: only emit on 2nd+ sighting for the same entity.
+  if (sig === 'body-unavailable') {
+    const n = Number(error.occurrences || 0);
+    if (n < 2) return { capture: false, reason: 'body-unavailable-first-sighting' };
+    return { capture: true, reason: 'body-unavailable-repeat' };
+  }
+  if (!sig) return { capture: true, reason: 'unclassified' };
+  return { capture: true, reason: 'novel-signature' };
+}
+/** Stable 8-char fingerprint over (source, signature, redacted-message). */
+export function fingerprint(source, signature, message) {
+  const norm = String(message || '')
+    .replace(/[a-f0-9]{8,}/gi, '<hex>')
+    .replace(/\d{4,}/g, '<n>')
+    .replace(/\s+/g, ' ')
+    .trim()
+    .toLowerCase()
+    .slice(0, 200);
+  const h = crypto.createHash('sha256').update(`${source}|${signature || ''}|${norm}`).digest('hex');
+  return h.slice(0, 8);
+}
+function safeSlug(s, max = 40) {
+  return String(s || '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, max) || 'unknown';
+}
+function timestampPrefix(d = new Date()) {
+  const y = d.getFullYear();
+  const mo = String(d.getMonth() + 1).padStart(2, '0');
+  const da = String(d.getDate()).padStart(2, '0');
+  const hh = String(d.getHours()).padStart(2, '0');
+  const mm = String(d.getMinutes()).padStart(2, '0');
+  return `${y}-${mo}-${da}-${hh}${mm}`;
+}
+async function loadSeen(dir) {
+  try {
+    const raw = await fs.readFile(path.join(dir, SEEN_FILE), 'utf8');
+    return JSON.parse(raw);
+  } catch {
+    return {};
+  }
+}
+async function saveSeen(dir, seen) {
+  await fs.writeFile(path.join(dir, SEEN_FILE), JSON.stringify(seen, null, 2));
+}
+function renderMarkdown({ source, alias, entity, week, error, context, fpr, capturedAt }) {
+  const lines = [
+    `### ${capturedAt.slice(0, 10)} — ${source}: ${error.signature || 'unclassified'} (${fpr})`,
+    '',
+    `**Symptom**: ${oneLine(error.message) || '(no message)'} ` +
+      (error.status ? `(HTTP ${error.status})` : '') +
+      (entity ? ` — entity \`${entity}\`` : '') +
+      (week ? ` — week ${week}` : ''),
+    '',
+    `**Root cause**: _TO INVESTIGATE_`,
+    '',
+    `**Fix / workaround**: _TO INVESTIGATE_`,
+    '',
+    `**Doctrine impact**: register-only — TODO promote on next sighting`,
+    '',
+    `**Discovered during**: alias \`${alias}\` running pull-${source}` +
+      (context && context.runner ? ` (runner ${context.runner})` : ''),
+    '',
+    '---',
+    '',
+    '<!-- machine-readable footer -->',
+    '```yaml',
+    `source: ${source}`,
+    `alias: ${alias}`,
+    `entity: ${entity || ''}`,
+    `week: ${week || ''}`,
+    `signature: ${error.signature || 'unclassified'}`,
+    `fingerprint: ${fpr}`,
+    `captured_at: ${capturedAt}`,
+    `error_status: ${error.status || ''}`,
+    `occurrences: ${error.occurrences || 1}`,
+    '```',
+    '',
+  ];
+  return lines.join('\n');
+}
+function oneLine(s) {
+  return String(s || '').replace(/\s+/g, ' ').trim();
+}
+/**
+ * Emit a learning candidate file under <project>/Evidence/_learnings-candidates/.
+ * Idempotent — same fingerprint within DEDUP_WINDOW_MS is skipped.
+ *
+ * @returns {Promise<{written: boolean, path?: string, reason?: string, fingerprint: string}>}
+ */
+export async function emitLearningCandidate({
+  projectRoot, alias, source, entity, week, error, context = {}, now = new Date(),
+}) {
+  if (!projectRoot || !alias || !source) {
+    return { written: false, reason: 'missing-required', fingerprint: '' };
+  }
+  const decision = shouldCapture(error);
+  if (!decision.capture) {
+    return { written: false, reason: decision.reason, fingerprint: '' };
+  }
+  const fpr = fingerprint(source, error.signature, error.message);
+  const dir = path.join(evidenceRoot(projectRoot), DIR_NAME);
+  await fs.mkdir(dir, { recursive: true });
+  const seen = await loadSeen(dir);
+  const prev = seen[fpr];
+  const nowMs = now.getTime();
+  if (prev && (nowMs - new Date(prev.at).getTime()) < DEDUP_WINDOW_MS) {
+    return { written: false, reason: 'deduped', fingerprint: fpr };
+  }
+  const ts = timestampPrefix(now);
+  const sigSlug = safeSlug(error.signature || 'unclassified', 30);
+  const fileName = `${ts}_${safeSlug(alias, 20)}_${source}_${sigSlug}.md`;
+  const target = path.join(dir, fileName);
+  const capturedAt = now.toISOString();
+  const md = renderMarkdown({ source, alias, entity, week, error, context, fpr, capturedAt });
+  await fs.writeFile(target, md);
+  seen[fpr] = { at: capturedAt, file: fileName, source, signature: error.signature || 'unclassified' };
+  await saveSeen(dir, seen);
+  return { written: true, path: target, reason: decision.reason, fingerprint: fpr };
+}
+/**
+ * Read the seen registry — used by orchestrators to count candidates written this run.
+ */
+export async function readCandidateCount(projectRoot) {
+  try {
+    const dir = path.join(evidenceRoot(projectRoot), DIR_NAME);
+    const files = await fs.readdir(dir).catch(() => []);
+    return files.filter(f => f.endsWith('.md')).length;
+  } catch {
+    return 0;
+  }
+}
+export const __test__ = { USER_SIDE_SIGNATURES, TRANSIENT_HTTP_STATUSES, DEDUP_WINDOW_MS };

package/plugin/runners/pull-ado.mjs CHANGED Viewed

@@ -22,6 +22,7 @@ import { updateCell } from './lib/ledger.mjs';
 import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { currentIsoMonday, ymd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'ado';
@@ -174,6 +175,7 @@ async function main() {
     if (retryable && !args.dryRun) {
       await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
     }
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-ado' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/pull-crm.mjs CHANGED Viewed

@@ -21,6 +21,7 @@ import { updateCell } from './lib/ledger.mjs';
 import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { isoMondayString, currentIsoMonday, ymd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'crm';
@@ -155,6 +156,7 @@ async function main() {
         signature: 'fetch-failed', reason: e.message,
       });
     }
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-crm' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/pull-email.mjs CHANGED Viewed

@@ -19,6 +19,7 @@ import { updateCell } from './lib/ledger.mjs';
 import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'email';
@@ -81,6 +82,12 @@ function makeFixtureClient(data) {
   return {
     async findFolder(name) { return foldersByName.get(name) || null; },
     async listMessages(folderId, fromIso, toIso) {
+      if (data.throwOnListMessages) {
+        const t = data.throwOnListMessages;
+        const e = new Error(t.message || 'fixture-throw');
+        if (t.status) e.status = t.status;
+        throw e;
+      }
       const all = (data.messagesByFolder && data.messagesByFolder[folderId]) || [];
       return all.filter(m => m.receivedDateTime >= fromIso && m.receivedDateTime < toIso);
     },
@@ -126,6 +133,7 @@ async function main() {
     const retryable = !e.status || [429, 502, 503, 504].includes(e.status);
     await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: retryable ? 'deferred' : 'failed', last_error: e.message });
     if (retryable && !args.dryRun) await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-email' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/pull-meetings.mjs CHANGED Viewed

@@ -21,6 +21,7 @@ import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { shortHash } from './lib/dedup.mjs';
 import { currentIsoMonday, ymd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'meetings';
@@ -117,6 +118,7 @@ async function main() {
     const retryable = !e.status || [429, 502, 503, 504].includes(e.status);
     await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: retryable ? 'deferred' : 'failed', last_error: e.message });
     if (retryable && !args.dryRun) await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-meetings' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/pull-onenote.mjs CHANGED Viewed

@@ -20,6 +20,8 @@ import { updateCell } from './lib/ledger.mjs';
 import { appendRunLog } from './lib/runlog.mjs';
 import { clear } from './lib/deferred.mjs';
 import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
+import { readLedger, cellKey } from './lib/ledger.mjs';
 const SOURCE = 'onenote';
@@ -132,6 +134,7 @@ async function main() {
   try { section = await client.getSection(args.entity); }
   catch (e) {
     await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: 'failed', last_error: e.message });
+    if (!args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'section-fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-onenote' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'failed', errors: [{ message: e.message }] });
     return 0;
   }
@@ -177,6 +180,22 @@ async function main() {
   }
   const status = bodyUnavailable.length === 0 ? 'captured' : (captures.length === 0 ? 'body-unavailable' : 'partial');
+  if (status === 'body-unavailable' && !args.dryRun) {
+    const prior = (await readLedger(projectRoot, args.alias).catch(() => ({ cells: {} })))
+      .cells?.[cellKey(SOURCE, args.entity, weekStart)];
+    const priorOccurrences = Number(prior?.body_unavailable_runs || 0);
+    const occurrences = priorOccurrences + 1;
+    if (occurrences >= 2) {
+      await emitLearningCandidate({
+        projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart,
+        error: { signature: 'body-unavailable', message: `OneNote section ${section.id}: ${bodyUnavailable.length}/${pages.length} pages had no body across ${occurrences} runs`, occurrences },
+        context: { runner: 'pull-onenote' },
+      });
+    }
+    await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { body_unavailable_runs: occurrences });
+  }
   await clear(projectRoot, args.alias, SOURCE, args.entity).catch(() => {});
   await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, {
     last_status: status,

package/plugin/runners/pull-sharepoint.mjs CHANGED Viewed

@@ -22,6 +22,7 @@ import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { shortHash } from './lib/dedup.mjs';
 import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'sharepoint';
@@ -129,6 +130,7 @@ async function main() {
     const retryable = !e.status || [429, 502, 503, 504].includes(e.status);
     await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: retryable ? 'deferred' : 'failed', last_error: e.message });
     if (retryable && !args.dryRun) await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-sharepoint' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/pull-teams.mjs CHANGED Viewed

@@ -20,6 +20,7 @@ import { appendRunLog } from './lib/runlog.mjs';
 import { enqueue, clear } from './lib/deferred.mjs';
 import { shortHash } from './lib/dedup.mjs';
 import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
+import { emitLearningCandidate } from './lib/learnings.mjs';
 const SOURCE = 'teams';
@@ -105,6 +106,7 @@ async function main() {
     const retryable = !e.status || [429, 502, 503, 504].includes(e.status);
     await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: retryable ? 'deferred' : 'failed', last_error: e.message });
     if (retryable && !args.dryRun) await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
+    if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-teams' } });
     emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
     return retryable ? 1 : 0;
   }

package/plugin/runners/refresh.mjs CHANGED Viewed

@@ -23,6 +23,7 @@ import { fileURLToPath } from 'node:url';
 import { loadConfig, assertProject } from './lib/config.mjs';
 import { readLedger, needsPull } from './lib/ledger.mjs';
 import { currentIsoMonday, ymd } from './lib/weeks.mjs';
+import { readCandidateCount } from './lib/learnings.mjs';
 const HERE = path.dirname(fileURLToPath(import.meta.url));
@@ -223,6 +224,8 @@ async function main() {
     ? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
     : await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
+  const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
   emit({
     status: 'ok',
     project: args.project,
@@ -234,6 +237,7 @@ async function main() {
     skipped: skipped.length,
     results,
     skipped_targets: skipped,
+    learning_candidates_total,
   });
   return 0;
 }

package/plugin/runners/test/fixtures/email-novel-error.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "folders": [
+    { "id": "AAMkAGI=", "displayName": "23. ABN AMRO" }
+  ],
+  "throwOnListMessages": {
+    "status": 500,
+    "message": "Graph returned unexpected null for $select(receivedDateTime)"
+  }
+}

package/plugin/runners/test/integration/pull-email.integration.test.mjs CHANGED Viewed

@@ -95,3 +95,55 @@ test('missing --entity exits 2', () => {
   ], { encoding: 'utf8' });
   assert.equal(res.status, 2);
 });
+test('v5.6.0: non-retryable error emits a learning candidate file', async () => {
+  const projectRoot3 = await fs.mkdtemp(path.join(os.tmpdir(), 'kushi-email-novel-'));
+  await fs.mkdir(path.join(projectRoot3, 'Evidence', 'ushak'), { recursive: true });
+  await fs.writeFile(path.join(projectRoot3, 'integrations.yml'), YAML.stringify({}));
+  const NOVEL_FIXTURE = path.join(HERE, '..', 'fixtures', 'email-novel-error.json');
+  try {
+    const res = spawnSync(process.execPath, [RUNNER,
+      '--project', projectRoot3, '--alias', 'ushak',
+      '--entity', '23. ABN AMRO', '--week', '2026-05-25', '--fixture', NOVEL_FIXTURE,
+    ], { encoding: 'utf8' });
+    assert.equal(res.status, 0, `stderr: ${res.stderr}`);
+    const r = JSON.parse(res.stdout.trim().split('\n').pop());
+    assert.equal(r.status, 'failed');
+    const candDir = path.join(projectRoot3, 'Evidence', '_learnings-candidates');
+    const entries = await fs.readdir(candDir);
+    const mdFiles = entries.filter(f => f.endsWith('.md'));
+    assert.equal(mdFiles.length, 1, `expected exactly one candidate, got: ${entries.join(', ')}`);
+    const body = await fs.readFile(path.join(candDir, mdFiles[0]), 'utf8');
+    assert.match(body, /fetch-failed/);
+    assert.match(body, /unexpected null/);
+    // Re-run within window — should not duplicate.
+    spawnSync(process.execPath, [RUNNER,
+      '--project', projectRoot3, '--alias', 'ushak',
+      '--entity', '23. ABN AMRO', '--week', '2026-05-25', '--fixture', NOVEL_FIXTURE,
+    ], { encoding: 'utf8' });
+    const entries2 = await fs.readdir(candDir);
+    const mdFiles2 = entries2.filter(f => f.endsWith('.md'));
+    assert.equal(mdFiles2.length, 1, 'dedup should prevent a 2nd file within 7d');
+  } finally {
+    await fs.rm(projectRoot3, { recursive: true, force: true });
+  }
+});
+test('v5.6.0: user-side error does NOT emit a learning candidate', async () => {
+  const projectRoot4 = await fs.mkdtemp(path.join(os.tmpdir(), 'kushi-email-userside-'));
+  await fs.mkdir(path.join(projectRoot4, 'Evidence', 'ushak'), { recursive: true });
+  await fs.writeFile(path.join(projectRoot4, 'integrations.yml'), YAML.stringify({}));
+  try {
+    const res = spawnSync(process.execPath, [RUNNER,
+      '--project', projectRoot4, '--alias', 'ushak',
+      '--entity', 'NoSuchFolder', '--week', '2026-05-25', '--fixture', FIXTURE,
+    ], { encoding: 'utf8' });
+    assert.equal(res.status, 0);
+    let exists = true;
+    try { await fs.access(path.join(projectRoot4, 'Evidence', '_learnings-candidates')); }
+    catch { exists = false; }
+    assert.equal(exists, false, 'folder-not-found should never trigger candidate emission');
+  } finally {
+    await fs.rm(projectRoot4, { recursive: true, force: true });
+  }
+});

package/plugin/runners/test/unit/learnings.test.mjs ADDED Viewed

@@ -0,0 +1,124 @@
+// plugin/runners/test/unit/learnings.test.mjs
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { promises as fs } from 'node:fs';
+import path from 'node:path';
+import os from 'node:os';
+import {
+  shouldCapture,
+  fingerprint,
+  emitLearningCandidate,
+  readCandidateCount,
+} from '../../lib/learnings.mjs';
+async function tmpProject() {
+  const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'kushi-learnings-'));
+  await fs.mkdir(path.join(dir, 'Evidence', 'usha'), { recursive: true });
+  return dir;
+}
+test('shouldCapture: user-side signatures are skipped', () => {
+  for (const sig of ['bad-args','config-missing','token-expired','auth-required','folder-not-found','cross-tenant-blocked','permission-denied']) {
+    const d = shouldCapture({ signature: sig, message: 'x' });
+    assert.equal(d.capture, false, `${sig} should not capture`);
+    assert.equal(d.reason, 'user-side');
+  }
+});
+test('shouldCapture: transient HTTP is skipped', () => {
+  for (const status of [429, 502, 503, 504, 408]) {
+    const d = shouldCapture({ signature: 'fetch-failed', message: 'x', status });
+    assert.equal(d.capture, false);
+    assert.equal(d.reason, 'transient-http');
+  }
+});
+test('shouldCapture: fetch-failed with non-transient status captures', () => {
+  const d = shouldCapture({ signature: 'fetch-failed', message: 'weird shape', status: 500 });
+  assert.equal(d.capture, true);
+});
+test('shouldCapture: body-unavailable first sighting skipped, 2nd captures', () => {
+  const first = shouldCapture({ signature: 'body-unavailable', occurrences: 1 });
+  assert.equal(first.capture, false);
+  assert.equal(first.reason, 'body-unavailable-first-sighting');
+  const second = shouldCapture({ signature: 'body-unavailable', occurrences: 2 });
+  assert.equal(second.capture, true);
+  assert.equal(second.reason, 'body-unavailable-repeat');
+});
+test('shouldCapture: unclassified (no signature) captures', () => {
+  const d = shouldCapture({ message: 'something weird' });
+  assert.equal(d.capture, true);
+  assert.equal(d.reason, 'unclassified');
+});
+test('fingerprint normalizes hex blobs and digit runs', () => {
+  const a = fingerprint('email', 'fetch-failed', 'error abc12345def at row 9999');
+  const b = fingerprint('email', 'fetch-failed', 'error 5678ffaa9999 at row 1234');
+  assert.equal(a, b, 'redacted hex/digits should fingerprint the same');
+  assert.equal(a.length, 8);
+});
+test('fingerprint differs for different signatures', () => {
+  const a = fingerprint('email', 'fetch-failed', 'bad shape');
+  const b = fingerprint('email', 'other-sig', 'bad shape');
+  assert.notEqual(a, b);
+});
+test('emitLearningCandidate writes a file for a novel error', async () => {
+  const root = await tmpProject();
+  const res = await emitLearningCandidate({
+    projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25',
+    error: { signature: 'fetch-failed', message: 'unexpected null in $select', status: 500 },
+  });
+  assert.equal(res.written, true);
+  const md = await fs.readFile(res.path, 'utf8');
+  assert.match(md, /Symptom/);
+  assert.match(md, /unexpected null in/);
+  assert.match(md, /captured_at:/);
+});
+test('emitLearningCandidate is silent for user-side errors', async () => {
+  const root = await tmpProject();
+  const res = await emitLearningCandidate({
+    projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25',
+    error: { signature: 'folder-not-found', message: 'Inbox' },
+  });
+  assert.equal(res.written, false);
+  assert.equal(res.reason, 'user-side');
+  assert.equal(await readCandidateCount(root), 0);
+});
+test('emitLearningCandidate dedups within 7-day window', async () => {
+  const root = await tmpProject();
+  const err = { signature: 'fetch-failed', message: 'unexpected null', status: 500 };
+  const r1 = await emitLearningCandidate({ projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25', error: err });
+  assert.equal(r1.written, true);
+  const r2 = await emitLearningCandidate({ projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25', error: err });
+  assert.equal(r2.written, false);
+  assert.equal(r2.reason, 'deduped');
+  assert.equal(await readCandidateCount(root), 1);
+});
+test('emitLearningCandidate re-emits after 7-day window', async () => {
+  const root = await tmpProject();
+  const err = { signature: 'fetch-failed', message: 'unexpected null', status: 500 };
+  const eightDaysAgo = new Date(Date.now() - 8 * 24 * 60 * 60 * 1000);
+  const r1 = await emitLearningCandidate({ projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25', error: err, now: eightDaysAgo });
+  assert.equal(r1.written, true);
+  const r2 = await emitLearningCandidate({ projectRoot: root, alias: 'usha', source: 'email', entity: 'Inbox', week: '2026-05-25', error: err });
+  assert.equal(r2.written, true);
+  assert.equal(await readCandidateCount(root), 2);
+});
+test('emitLearningCandidate rejects missing required fields', async () => {
+  const r = await emitLearningCandidate({ alias: 'usha', source: 'email', error: { message: 'x' } });
+  assert.equal(r.written, false);
+  assert.equal(r.reason, 'missing-required');
+});
+test('readCandidateCount returns 0 when dir missing', async () => {
+  const root = await tmpProject();
+  assert.equal(await readCandidateCount(root), 0);
+});

package/plugin/skills/self-check/run.ps1 CHANGED Viewed

@@ -2485,7 +2485,8 @@ process.stdout.write(JSON.stringify(out));
     $v550Doctrines = @(
         'llm-vs-runner.instructions.md',
         'csc-rendering.instructions.md',
-        'discovery-prompts.instructions.md'
+        'discovery-prompts.instructions.md',
+        'learning-candidates.instructions.md'
     )
     foreach ($d in $v550Doctrines) {
         $df = Join-Path $instructionsDir $d
@@ -2494,6 +2495,23 @@ process.stdout.write(JSON.stringify(out));
         }
     }
+    # === D48.learning-candidates (v5.6.0) — every pull-* runner must import lib/learnings.mjs ===
+    $learningsLib = Join-Path $Root 'plugin/runners/lib/learnings.mjs'
+    if (-not (Test-Path $learningsLib)) {
+        Add-Finding 'D48.learning-candidates' 'V560Learnings' 'error' "plugin/runners/lib/learnings.mjs is missing" "Create lib/learnings.mjs per v5.6.0 learning-candidates spec." $learningsLib 0
+    }
+    $pullRunners = @('pull-email.mjs','pull-teams.mjs','pull-meetings.mjs','pull-onenote.mjs','pull-sharepoint.mjs','pull-crm.mjs','pull-ado.mjs')
+    foreach ($r in $pullRunners) {
+        $rf = Join-Path $Root "plugin/runners/$r"
+        if (-not (Test-Path $rf)) { continue }
+        $rt = Get-Content -Raw $rf
+        if ($rt -notmatch "from\s+'\./lib/learnings\.mjs'") {
+            Add-Finding 'D48.learning-candidates' 'V560Learnings' 'warning' "Runner $r does not import emitLearningCandidate from lib/learnings.mjs" "Add: import { emitLearningCandidate } from './lib/learnings.mjs'; and call it from non-retryable error paths per learning-candidates.instructions.md." $rf 0
+        } elseif ($rt -notmatch 'emitLearningCandidate\s*\(') {
+            Add-Finding 'D48.learning-candidates' 'V560Learnings' 'warning' "Runner $r imports emitLearningCandidate but never calls it" "Wire it into the non-retryable catch path so novel errors are captured as learning candidates." $rf 0
+        }
+    }
 # === Output ===
 if ($Targeted) {
     # Filter findings to those whose code, surface, file path, or message contain the substring.