npm - kushi-agents - Versions diffs - 6.0.1 → 6.1.1 - Mend

kushi-agents 6.0.1 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/package.json CHANGED Viewed

@@ -1,61 +1,61 @@
-{
-  "name": "kushi-agents",
-  "version": "6.0.1",
-  "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
-  "type": "module",
-  "bin": {
-    "kushi": "./bin/kushi.mjs",
-    "kushi-agents": "./bin/kushi-agents.mjs"
-  },
-  "files": [
-    "bin/",
-    "src/",
-    "plugin/",
-    ".github/copilot-instructions.kushi.md"
-  ],
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "dependencies": {
-    "@azure/identity": "^4.5.0",
-    "@mozilla/readability": "^0.6.0",
-    "jsdom": "^29.1.1",
-    "jsonc-parser": "^3.3.1",
-    "yaml": "^2.6.0"
-  },
-  "keywords": [
-    "vscode",
-    "copilot",
-    "agents",
-    "kushi",
-    "project-evidence",
-    "workiq",
-    "m365",
-    "ai",
-    "cli"
-  ],
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/gim-home/kushi.git"
-  },
-  "homepage": "https://gim-home.github.io/kushi/",
-  "bugs": {
-    "url": "https://github.com/gim-home/kushi/issues"
-  },
-  "license": "MIT",
-  "scripts": {
-    "test": "node --test src/check-workiq.test.mjs src/seed-config.test.mjs src/sanitize-workiq-input.test.mjs src/detect-vertex-repo.test.mjs src/vertex-validate.test.mjs src/emit-vertex.e2e.test.mjs src/config-root-resolve.test.mjs src/forbidden-workiq-phrasings.test.mjs src/multi-host-install.test.mjs src/eval-aggregator.test.mjs src/eval-runner.test.mjs src/hooks-dispatcher.test.mjs src/parallel-refresh.test.mjs src/otel-emit.test.mjs src/doctor.test.mjs src/setup-wizard.test.mjs src/cli-no-args.test.mjs src/cli-no-args-tty.test.mjs src/per-user-files.test.mjs src/layout-portable.test.mjs src/profile-coverage.test.mjs src/get-kushi-config.test.mjs src/seed-config-derived.test.mjs src/resolve-alias.test.mjs plugin/runners/test/unit/*.test.mjs",
-    "test:runners": "node --test plugin/runners/test/unit/*.test.mjs",
-    "test:runners:integration": "node --test plugin/runners/test/integration/*.test.mjs",
-    "test:integration:bootstrap": "node src/bootstrap-dryrun.integration.test.mjs",
-    "smoke": "node scripts/smoke.mjs",
-    "eval": "pwsh plugin/skills/eval/run-evals.ps1 -Skill",
-    "eval:all": "pwsh plugin/skills/eval/run-evals.ps1 -All",
-    "eval:canary": "pwsh plugin/skills/eval/run-evals.ps1 -Canary",
-    "eval:baseline": "pwsh plugin/skills/eval/run-evals.ps1 -All -UpdateBaseline",
-    "prepublishOnly": "npm test && npm run smoke"
-  },
-  "publishConfig": {
-    "access": "public"
-  }
-}
+{
+  "name": "kushi-agents",
+  "version": "6.1.1",
+  "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
+  "type": "module",
+  "bin": {
+    "kushi": "./bin/kushi.mjs",
+    "kushi-agents": "./bin/kushi-agents.mjs"
+  },
+  "files": [
+    "bin/",
+    "src/",
+    "plugin/",
+    ".github/copilot-instructions.kushi.md"
+  ],
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "dependencies": {
+    "@azure/identity": "^4.5.0",
+    "@mozilla/readability": "^0.6.0",
+    "jsdom": "^29.1.1",
+    "jsonc-parser": "^3.3.1",
+    "yaml": "^2.6.0"
+  },
+  "keywords": [
+    "vscode",
+    "copilot",
+    "agents",
+    "kushi",
+    "project-evidence",
+    "workiq",
+    "m365",
+    "ai",
+    "cli"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/gim-home/kushi.git"
+  },
+  "homepage": "https://gim-home.github.io/kushi/",
+  "bugs": {
+    "url": "https://github.com/gim-home/kushi/issues"
+  },
+  "license": "MIT",
+  "scripts": {
+    "test": "node --test src/check-workiq.test.mjs src/seed-config.test.mjs src/sanitize-workiq-input.test.mjs src/detect-vertex-repo.test.mjs src/vertex-validate.test.mjs src/emit-vertex.e2e.test.mjs src/config-root-resolve.test.mjs src/forbidden-workiq-phrasings.test.mjs src/multi-host-install.test.mjs src/eval-aggregator.test.mjs src/eval-runner.test.mjs src/hooks-dispatcher.test.mjs src/parallel-refresh.test.mjs src/otel-emit.test.mjs src/doctor.test.mjs src/setup-wizard.test.mjs src/cli-no-args.test.mjs src/cli-no-args-tty.test.mjs src/per-user-files.test.mjs src/layout-portable.test.mjs src/profile-coverage.test.mjs src/get-kushi-config.test.mjs src/seed-config-derived.test.mjs src/resolve-alias.test.mjs plugin/runners/test/unit/*.test.mjs",
+    "test:runners": "node --test plugin/runners/test/unit/*.test.mjs",
+    "test:runners:integration": "node --test plugin/runners/test/integration/*.test.mjs",
+    "test:integration:bootstrap": "node src/bootstrap-dryrun.integration.test.mjs",
+    "smoke": "node scripts/smoke.mjs",
+    "eval": "pwsh plugin/skills/eval/run-evals.ps1 -Skill",
+    "eval:all": "pwsh plugin/skills/eval/run-evals.ps1 -All",
+    "eval:canary": "pwsh plugin/skills/eval/run-evals.ps1 -Canary",
+    "eval:baseline": "pwsh plugin/skills/eval/run-evals.ps1 -All -UpdateBaseline",
+    "prepublishOnly": "npm test && npm run smoke"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}

package/plugin/runners/discover.mjs CHANGED Viewed

@@ -162,7 +162,7 @@ function isPlaceholder(v) {
   if (!s) return true;
   if (/^<.*>$/.test(s)) return true;              // <value>, <chat_id>, etc.
   if (/^turn\d+search\d+$/i.test(s)) return true; // WorkIQ web citation tokens
-  if (/^(unknown|n\/a|none|null|tbd|todo)$/i.test(s)) return true;
+  if (/^(unknown|n\/a|none|null|tbd|todo|not\s+explicitly\s+(available|provided|specified)|not\s+(available|provided|specified|applicable)|undisclosed)$/i.test(s)) return true;
   return false;
 }
@@ -178,19 +178,26 @@ function isValidValueFor(source, field, raw) {
     return true;
   }
   if (source === 'teams' && field === 'chat_id') {
-    // Graph chat IDs are long strings ending in '@thread.v2' or similar.
-    return v.includes('@thread') || v.length > 30;
+    // Accept Graph chat IDs ('@thread' / long opaque) OR human-readable
+    // chat topics — WorkIQ frequently returns the topic when the Graph ID
+    // is unavailable. Refresh stage will resolve to a real chat.
+    return v.length >= 2 && v.length <= 200;
   }
   if (source === 'meetings' && field === 'join_url') {
-    return /^https:\/\/teams\.microsoft\.com\/.+meetup-join/i.test(v);
+    // Prefer real Teams meetup URLs but also accept any https URL or
+    // a meeting subject — refresh stage will resolve.
+    if (v.startsWith('http')) return true;
+    return v.length >= 3 && v.length <= 200;
   }
   if (source === 'onenote' && field === 'section_file_id') {
-    // OneNote section file IDs are hex strings, typically prefixed `0-` and
-    // dozens of chars long. Reject short citation tokens.
-    return /^[0-9a-f][0-9a-f\-]{20,}$/i.test(v) || v.startsWith('0-');
+    // Accept hex section IDs OR section/page names. WorkIQ often only has
+    // the human-readable name; refresh resolves to the file ID.
+    return v.length >= 2 && v.length <= 200;
   }
   if (source === 'sharepoint' && field === 'site_url') {
-    return v.startsWith('https://') && v.includes('.sharepoint.com');
+    // Prefer real SharePoint URLs; also accept site names/relative paths.
+    if (v.startsWith('http')) return v.includes('.sharepoint.com') || v.includes('/sites/');
+    return v.length >= 2 && v.length <= 300;
   }
   if (source === 'crm' && (field === 'request_id' || field === 'incident_number')) {
     // CRM IDs vary: FE-2026-001458, REQ-12345, INC-9, plain numerics. Reject
@@ -228,7 +235,16 @@ function applyRows(source, rows, currentBounds, currentInteg) {
   }
   if (source === 'teams') {
     const existing = currentBounds.teams?.chats || [];
-    const incoming = rows.map(r => r.chat_id).filter(v => isValidValueFor('teams', 'chat_id', v));
+    // WorkIQ rarely has Graph chat IDs — citation tokens like "turn1search1"
+    // are common. Prefer chat_id; fall back to topic so refresh has a usable
+    // boundary descriptor instead of nothing.
+    const incoming = rows.map(r => {
+      const id = r.chat_id;
+      if (id && !isPlaceholder(id) && isValidValueFor('teams', 'chat_id', id)) return id;
+      const topic = r.topic;
+      if (topic && !isPlaceholder(topic)) return topic;
+      return null;
+    }).filter(Boolean);
     const merged = dedup([...existing, ...incoming]);
     const added = merged.filter(v => !existing.includes(v));
     if (added.length) accepted.push(...added);
@@ -236,7 +252,13 @@ function applyRows(source, rows, currentBounds, currentInteg) {
   }
   if (source === 'meetings') {
     const existing = currentBounds.meetings?.joinUrls || [];
-    const incoming = rows.map(r => r.join_url).filter(v => isValidValueFor('meetings', 'join_url', v));
+    const incoming = rows.map(r => {
+      const url = r.join_url;
+      if (url && !isPlaceholder(url) && isValidValueFor('meetings', 'join_url', url) && url.startsWith('http')) return url;
+      const subj = r.subject;
+      if (subj && !isPlaceholder(subj)) return subj;
+      return null;
+    }).filter(Boolean);
     const merged = dedup([...existing, ...incoming]);
     const added = merged.filter(v => !existing.includes(v));
     if (added.length) accepted.push(...added);
@@ -244,7 +266,16 @@ function applyRows(source, rows, currentBounds, currentInteg) {
   }
   if (source === 'onenote') {
     const existing = currentBounds.onenote?.section_file_ids || [];
-    const incoming = rows.map(r => r.section_file_id).filter(v => isValidValueFor('onenote', 'section_file_id', v));
+    // WorkIQ rarely has section_file_id (Graph property); usually returns
+    // citation tokens. Prefer real hex IDs; fall back to section_name so
+    // refresh has a usable boundary descriptor.
+    const incoming = rows.map(r => {
+      const id = r.section_file_id;
+      if (id && !isPlaceholder(id) && /^[0-9a-f][0-9a-f\-]{20,}$/i.test(String(id).trim())) return id;
+      const name = r.section_name;
+      if (name && !isPlaceholder(name)) return name;
+      return null;
+    }).filter(Boolean);
     const merged = dedup([...existing, ...incoming]);
     const added = merged.filter(v => !existing.includes(v));
     if (added.length) accepted.push(...added);
@@ -391,6 +422,16 @@ async function main() {
       rows = rowsFromBlocks(blocks, source);
       const elapsed = Date.now() - t0;
       const bytes = Buffer.byteLength(workiqStdout || '', 'utf8');
+      // Persist raw WorkIQ output for diagnosis. Lets users inspect why a
+      // source returned 0 accepted rows without re-running the query.
+      if (!args.dryRun) {
+        try {
+          const discoveryDir = path.join(aliasRoot(args.project, args.alias), '_discovery');
+          await fs.mkdir(discoveryDir, { recursive: true });
+          const header = `# discover ${source} @ ${new Date().toISOString()}\n# elapsed=${elapsed}ms bytes=${bytes} blocks=${blocks.length} rows=${rows.length}\n# prompt:\n${prompt.split('\n').map(l => '# ' + l).join('\n')}\n# --- workiq stdout ---\n`;
+          await fs.writeFile(path.join(discoveryDir, `${source}-raw.txt`), header + (workiqStdout || ''), 'utf8');
+        } catch { /* best-effort */ }
+      }
       if (rows.length === 0 && bytes < 8) {
         // Distinguish "workiq returned empty" from "timeout" — both used to look the same.
         skipReason = 'workiq-empty-response';

package/plugin/runners/lib/csc-pull.mjs ADDED Viewed

@@ -0,0 +1,415 @@
+// plugin/runners/lib/csc-pull.mjs
+// Shared deterministic pull pipeline for the 5 WorkIQ-only M365 sources
+// (email, teams, meetings, onenote, sharepoint).
+//
+// Per `workiq-only.instructions.md` (HARD RULE, kushi v3.11.0+):
+//   - WorkIQ is the ONLY path. Graph REST is FORBIDDEN.
+//   - On WorkIQ failure: write deferred-retry marker, continue.
+//
+// Doctrine references:
+//   - `comprehensive-structured-capture.instructions.md` (CSC block shape)
+//   - `weekly-csc.instructions.md` (weekly/<week>_<source>-csc.md + _index/)
+//   - `meetings-verbatim-required.instructions.md` (transcript.txt parallel)
+//
+// Output layout (per source per week):
+//   Evidence/<alias>/<source>/weekly/<YYYY-MM-DD>_<source>-csc.md
+//   Evidence/<alias>/<source>/_index/entities.yml
+//
+// Each puller is a thin wrapper that calls pullSource() with its source name
+// and a per-source prompt builder.
+import path from 'node:path';
+import { promises as fs } from 'node:fs';
+import YAML from 'yaml';
+import { sourceDir, aliasRoot } from './layout.mjs';
+import { writeAtomic, safeSegment, pathExists } from './evidence.mjs';
+import { ask as workiqAsk, resolveWorkiqBin } from './workiq.mjs';
+import { loadM365Auth, scopeForSource } from './m365-auth.mjs';
+import { updateCell } from './ledger.mjs';
+import { appendRunLog } from './runlog.mjs';
+import { enqueue, clear } from './deferred.mjs';
+import { emitLearningCandidate } from './learnings.mjs';
+import { currentIsoMonday, ymd, parseYmd } from './weeks.mjs';
+/** Compute Monday + Sunday-EOD ISO bounds for a week-start string. */
+export function weekBounds(weekStartYmd) {
+  const start = parseYmd(weekStartYmd);
+  const end = new Date(start);
+  end.setDate(end.getDate() + 7);
+  return {
+    fromIso: start.toISOString(),
+    toIso: end.toISOString(),
+    fromYmd: weekStartYmd,
+    toYmd: ymd(new Date(end.getTime() - 1)),
+  };
+}
+/**
+ * Build the canonical CSC prompt for a per-entity weekly pull.
+ * Mirrors the doctrine in workiq-only.instructions.md § "CSC canonical prompts".
+ */
+export function buildPullPrompt({ source, project, entity, weekStart, scope, opts = {} }) {
+  const { fromYmd, toYmd } = weekBounds(weekStart);
+  const lines = [];
+  if (source === 'email') {
+    lines.push(`Find all emails in Outlook folder "${entity}" related to project "${project}" between ${fromYmd} and ${toYmd}, inclusive.`);
+    if (scope?.includeSubfolders !== false) lines.push(`Include every nested subfolder beneath "${entity}".`);
+    if (scope?.dateFloor) lines.push(`Hard date floor: ${scope.dateFloor} (do not consider mail older than this).`);
+    lines.push('Group messages by conversationId. One CSC block per conversation touched in the week.');
+  } else if (source === 'teams') {
+    lines.push(`Find all Microsoft Teams messages in chat / channel "${entity}" between ${fromYmd} and ${toYmd}, inclusive.`);
+    if (scope?.dateFloor) lines.push(`Hard date floor: ${scope.dateFloor}.`);
+    lines.push('One CSC block for the chat thread (the whole entity), summarizing all messages in the week.');
+  } else if (source === 'meetings') {
+    lines.push(`Find the Teams meeting whose join URL is "${entity}" with occurrence between ${fromYmd} and ${toYmd}, inclusive.`);
+    lines.push('One CSC block per occurrence touched in the week.');
+  } else if (source === 'onenote') {
+    lines.push(`Find OneNote pages in section_file_id "${entity}" modified between ${fromYmd} and ${toYmd}, inclusive.`);
+    if (scope?.notebookName) lines.push(`Restrict to notebook "${scope.notebookName}".`);
+    lines.push('One CSC block per page touched in the week.');
+  } else if (source === 'sharepoint') {
+    lines.push(`Find SharePoint files within site "${entity}" modified between ${fromYmd} and ${toYmd}, inclusive.`);
+    lines.push('One CSC block per file touched in the week.');
+  } else {
+    throw new Error(`csc-pull: unsupported source "${source}"`);
+  }
+  lines.push('');
+  lines.push('Return ONLY structured-capture blocks of this exact shape:');
+  lines.push('> [block: csc]');
+  lines.push('> entity_id: <stable id>');
+  lines.push('> display_name: <human-readable title>');
+  lines.push('> last_touched: <ISO timestamp>');
+  lines.push('> participants: <comma-separated list>');
+  lines.push('> topics: <comma-separated short phrases>');
+  lines.push('> decisions: <comma-separated, or "_none_">');
+  lines.push('> action_items: <semicolon-separated "<owner> | <due> | <text>" tuples, or "_none_">');
+  lines.push('> open_questions: <comma-separated, or "_none_">');
+  lines.push('> risks: <comma-separated, or "_none_">');
+  lines.push('> next_steps: <comma-separated, or "_none_">');
+  lines.push('> summary: <one-sentence narrative>');
+  lines.push('');
+  lines.push('One block per entity touched in the week. No prose, no commentary.');
+  lines.push('If nothing was touched, return an empty response.');
+  lines.push('Skip web-search citation tokens like "turn1search5".');
+  return lines.join('\n');
+}
+/**
+ * Build the verbatim transcript prompt for meetings (alongside CSC block).
+ * Per meetings-verbatim-required.instructions.md.
+ */
+export function buildVerbatimTranscriptPrompt({ entity, weekStart }) {
+  const { fromYmd, toYmd } = weekBounds(weekStart);
+  return [
+    `For the Teams meeting with join URL "${entity}", occurrence between ${fromYmd} and ${toYmd}:`,
+    'Return the full verbatim transcript with timestamps. Do NOT summarize. Do NOT paraphrase.',
+    'If no transcript is available (meeting did not record, or transcription was off), return exactly:',
+    '> [block: transcript-unavailable]',
+    '> reason: <short explanation>',
+    '',
+    'If a transcript exists, return ONLY the verbatim text — no prose, no headers.',
+  ].join('\n');
+}
+/**
+ * Read --fixture file and return { stdout, stderr, exitCode } shape.
+ * Fixture is a JSON file with shape:
+ *   { "stdout": "...", "stderr": "", "exitCode": 0 }   // success
+ *   { "exitCode": 1, "stderr": "throttled" }            // failure
+ *   { "timeout": true }                                  // timeout simulation
+ */
+async function loadFixture(p) {
+  const txt = await fs.readFile(p, 'utf8');
+  return JSON.parse(txt);
+}
+/**
+ * Parse CSC blockquote blocks from WorkIQ stdout. Returns array of
+ *   { entity_id, display_name, last_touched, raw, fields }
+ * Blocks without entity_id are dropped (low-signal).
+ * Web-citation tokens like turn1search5 in entity_id are also dropped.
+ */
+export function parseEntityBlocks(text) {
+  if (!text || typeof text !== 'string') return [];
+  const out = [];
+  // Match `> [block: csc]` followed by lines starting with `> `.
+  const re = /(^|\n)>\s*\[block:\s*csc\]\s*\n((?:>\s*[^\n]*\n?)+)/g;
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    const body = m[2].split('\n').map(l => l.replace(/^>\s?/, '')).filter(l => l.length).join('\n');
+    const fields = {};
+    for (const line of body.split('\n')) {
+      const mm = line.match(/^([a-zA-Z0-9_.-]+)\s*:\s*(.*)$/);
+      if (!mm) continue;
+      fields[mm[1].trim()] = mm[2].trim();
+    }
+    if (!fields.entity_id) continue;
+    if (/^turn\d+search\d+$/i.test(fields.entity_id)) continue;
+    if (/^<.*>$/.test(fields.entity_id)) continue;
+    out.push({
+      entity_id: fields.entity_id,
+      display_name: fields.display_name || fields.entity_id,
+      last_touched: fields.last_touched || null,
+      raw: body,
+      fields,
+    });
+  }
+  return out;
+}
+/** Slugify an entity_id into a safe markdown anchor / file-name segment. */
+export function entityAnchor(entityId) {
+  return String(entityId).toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '').slice(0, 80);
+}
+/** Format the CSC weekly markdown file from blocks + metadata. */
+export function formatWeeklyMarkdown({ source, weekStart, blocks, project, entity, pulledAt }) {
+  const lines = [
+    `# ${source.toUpperCase()} CSC — week ${weekStart}`,
+    '',
+    `- project: ${project}`,
+    `- source: ${source}`,
+    `- entity: ${entity}`,
+    `- week_start: ${weekStart}`,
+    `- pulled_at: ${pulledAt}`,
+    `- entities_touched: ${blocks.length}`,
+    '',
+  ];
+  if (blocks.length === 0) {
+    lines.push('_No activity this week._');
+    lines.push('');
+  } else {
+    for (const b of blocks) {
+      const anchor = entityAnchor(b.entity_id);
+      lines.push(`## ${b.display_name} {#${anchor}}`);
+      lines.push('');
+      lines.push(`- entity_id: \`${b.entity_id}\``);
+      if (b.last_touched) lines.push(`- last_touched: ${b.last_touched}`);
+      if (b.fields.participants) lines.push(`- participants: ${b.fields.participants}`);
+      if (b.fields.topics)       lines.push(`- topics: ${b.fields.topics}`);
+      if (b.fields.summary)      lines.push(`- summary: ${b.fields.summary}`);
+      const sections = [
+        ['Decisions',      b.fields.decisions],
+        ['Action Items',   b.fields.action_items],
+        ['Open Questions', b.fields.open_questions],
+        ['Risks',          b.fields.risks],
+        ['Next Steps',     b.fields.next_steps],
+      ];
+      for (const [label, val] of sections) {
+        if (!val || val === '_none_') continue;
+        lines.push('');
+        lines.push(`### ${label}`);
+        lines.push('');
+        for (const item of val.split(/[;,]/).map(s => s.trim()).filter(Boolean)) {
+          lines.push(`- ${item}`);
+        }
+      }
+      lines.push('');
+    }
+  }
+  return lines.join('\n');
+}
+/** Upsert one row per entity into _index/entities.yml. */
+export async function upsertEntitiesIndex(indexPath, blocks, { source, weekStart, latestCscFile }) {
+  let current = { entities: [] };
+  try {
+    const txt = await fs.readFile(indexPath, 'utf8');
+    current = YAML.parse(txt) ?? { entities: [] };
+    if (!Array.isArray(current.entities)) current.entities = [];
+  } catch (e) {
+    if (e.code !== 'ENOENT') throw e;
+  }
+  const byId = new Map(current.entities.map(e => [e.id, e]));
+  const now = new Date().toISOString();
+  for (const b of blocks) {
+    const id = `${source}://${b.entity_id}`;
+    const prev = byId.get(id);
+    const weeks = new Set([...(prev?.weeks_touched || []), weekStart]);
+    byId.set(id, {
+      id,
+      display_name: b.display_name,
+      entity_anchor: entityAnchor(b.entity_id),
+      latest_csc_file: latestCscFile,
+      last_touched: b.last_touched || now,
+      first_seen: prev?.first_seen || now,
+      weeks_touched: [...weeks].sort(),
+      status: 'captured',
+    });
+  }
+  const next = { entities: [...byId.values()].sort((a, b) => a.id.localeCompare(b.id)) };
+  await fs.mkdir(path.dirname(indexPath), { recursive: true });
+  return writeAtomic(indexPath, YAML.stringify(next), { skipIfUnchanged: false });
+}
+/** Classify a WorkIQ failure into deferred vs failed. */
+function classifyError(err) {
+  if (err.code === 'WORKIQ_TIMEOUT') return { status: 'deferred', signature: 'workiq-timeout', retryable: true };
+  if (err.code === 'WORKIQ_NOT_FOUND') return { status: 'failed', signature: 'workiq-not-found', retryable: false };
+  if (err.code === 'WORKIQ_EXIT_NONZERO') {
+    const stderr = String(err.stderr || '').toLowerCase();
+    if (/throttl|rate.?limit|429/.test(stderr)) return { status: 'deferred', signature: 'workiq-throttled', retryable: true };
+    if (/unauthor|forbidden|401|403/.test(stderr)) return { status: 'failed', signature: 'workiq-auth', retryable: false };
+    return { status: 'failed', signature: 'workiq-error', retryable: false };
+  }
+  return { status: 'failed', signature: 'workiq-unknown', retryable: false };
+}
+/**
+ * Full pipeline: prompt → WorkIQ → parse → write weekly + index → ledger + runlog.
+ * Returns { status, items_pulled, files_written, errors? }.
+ */
+export async function pullSource({ source, project, alias, entity, week, dryRun = false, fixture = null, mailbox = null, runner }) {
+  const weekStart = week || ymd(currentIsoMonday());
+  const startedAt = new Date().toISOString();
+  // 1. Load m365 scope hints (deterministic, optional).
+  const m365 = await loadM365Auth({ workspace: project }).catch(() => null)
+    || await loadM365Auth().catch(() => ({ config: {} }));
+  const scope = scopeForSource(m365.config, source);
+  if (scope && scope.enabled === false) {
+    const out = { source, entity, week: weekStart, status: 'not-applicable', items_pulled: 0, files_written: [] };
+    if (!dryRun) {
+      await updateCell(project, alias, source, entity, weekStart, { last_status: 'not-applicable' });
+      await appendRunLog(project, { runner, alias, entity, week: weekStart, status: 'not-applicable', reason: `${source} disabled in m365-auth.json` });
+    }
+    return out;
+  }
+  // 2. Build prompt.
+  const prompt = buildPullPrompt({ source, project: path.basename(project), entity, weekStart, scope });
+  // 3. Call WorkIQ (or fixture).
+  let stdout = '', stderr = '', workiqErr = null;
+  if (fixture) {
+    const fx = await loadFixture(fixture);
+    if (fx.timeout) { workiqErr = Object.assign(new Error('fixture: timeout'), { code: 'WORKIQ_TIMEOUT' }); }
+    else if (fx.exitCode && fx.exitCode !== 0) {
+      workiqErr = Object.assign(new Error('fixture: nonzero'), { code: 'WORKIQ_EXIT_NONZERO', exitCode: fx.exitCode, stderr: fx.stderr || '', stdout: fx.stdout || '' });
+    } else {
+      stdout = fx.stdout || '';
+      stderr = fx.stderr || '';
+    }
+  } else {
+    const workiqBin = resolveWorkiqBin();
+    if (!await pathExists(workiqBin)) {
+      workiqErr = Object.assign(new Error(`workiq not found at ${workiqBin}`), { code: 'WORKIQ_NOT_FOUND' });
+    } else {
+      try {
+        const r = await workiqAsk(prompt, { bin: workiqBin, timeoutMs: 300_000 });
+        stdout = r.stdout;
+        stderr = r.stderr;
+      } catch (e) {
+        workiqErr = e;
+      }
+    }
+  }
+  // 4. Handle WorkIQ failure.
+  if (workiqErr) {
+    const { status, signature, retryable } = classifyError(workiqErr);
+    const errMsg = (workiqErr.message || '').slice(0, 1000);
+    if (!dryRun) {
+      await updateCell(project, alias, source, entity, weekStart, { last_status: status, last_error: `${signature}: ${errMsg}` });
+      if (retryable) await enqueue(project, alias, { source, entity, weekStart, signature, reason: errMsg });
+      else await emitLearningCandidate({ projectRoot: project, alias, source, entity, week: weekStart, error: { signature, message: errMsg }, context: { runner } });
+      await appendRunLog(project, { runner, alias, entity, week: weekStart, status, error: errMsg, signature });
+    }
+    return { source, entity, week: weekStart, status, items_pulled: 0, files_written: [], errors: [{ signature, message: errMsg }] };
+  }
+  // 5. Parse blocks.
+  const blocks = parseEntityBlocks(stdout);
+  // 6. No-activity case.
+  if (blocks.length === 0) {
+    if (!dryRun) {
+      await updateCell(project, alias, source, entity, weekStart, { last_status: 'no-activity', items_pulled: 0 });
+      await appendRunLog(project, { runner, alias, entity, week: weekStart, status: 'no-activity', items_pulled: 0 });
+      await clear(project, alias, source, entity).catch(() => {});
+    }
+    return { source, entity, week: weekStart, status: 'no-activity', items_pulled: 0, files_written: [] };
+  }
+  // 7. Write weekly file + index + raw stdout sidecar.
+  const outDir = path.join(sourceDir(project, alias, source), 'weekly');
+  const indexDir = path.join(sourceDir(project, alias, source), '_index');
+  const fname = `${weekStart}_${source}-csc.md`;
+  const filePath = path.join(outDir, fname);
+  const indexPath = path.join(indexDir, 'entities.yml');
+  const rawPath = path.join(sourceDir(project, alias, source), '_raw', `${weekStart}_${safeSegment(entity)}.txt`);
+  const filesWritten = [];
+  if (!dryRun) {
+    const md = formatWeeklyMarkdown({ source, weekStart, blocks, project: path.basename(project), entity, pulledAt: startedAt });
+    const r1 = await writeAtomic(filePath, md, { skipIfUnchanged: false });
+    if (r1.written !== false) filesWritten.push(path.relative(project, r1.path));
+    const r2 = await upsertEntitiesIndex(indexPath, blocks, { source, weekStart, latestCscFile: `weekly/${fname}` });
+    if (r2.written !== false) filesWritten.push(path.relative(project, r2.path));
+    const r3 = await writeAtomic(rawPath, stdout, { skipIfUnchanged: false });
+    if (r3.written !== false) filesWritten.push(path.relative(project, r3.path));
+    await updateCell(project, alias, source, entity, weekStart, { last_status: 'captured', items_pulled: blocks.length });
+    await appendRunLog(project, { runner, alias, entity, week: weekStart, status: 'captured', items_pulled: blocks.length });
+    await clear(project, alias, source, entity).catch(() => {});
+  }
+  return {
+    source,
+    entity,
+    week: weekStart,
+    status: 'captured',
+    items_pulled: blocks.length,
+    files_written: filesWritten,
+    ledger_key: `${source}::${entity}::${weekStart}`,
+  };
+}
+/**
+ * Standard CLI entrypoint shared across all 5 pullers.
+ * Each puller imports this and calls runCli(SOURCE_NAME).
+ */
+export async function runCli(source) {
+  const argv = process.argv.slice(2);
+  const args = { dryRun: false };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === '--project') args.project = argv[++i];
+    else if (a === '--alias') args.alias = argv[++i];
+    else if (a === '--entity') args.entity = argv[++i];
+    else if (a === '--mailbox') args.mailbox = argv[++i];
+    else if (a === '--week') args.week = argv[++i];
+    else if (a === '--dry-run') args.dryRun = true;
+    else if (a === '--force') args.force = true;
+    else if (a === '--fixture') args.fixture = argv[++i];
+    else if (a === '--help' || a === '-h') args.help = true;
+  }
+  if (args.help) {
+    console.log(`Usage: node pull-${source}.mjs --project <P> --alias <A> --entity <e> [--week YYYY-MM-DD] [--dry-run] [--fixture <path>]`);
+    return 0;
+  }
+  if (!args.project || !args.alias || !args.entity) {
+    console.error(`required: --project --alias --entity`);
+    process.stdout.write(JSON.stringify({ source, status: 'failed', errors: [{ signature: 'bad-args', message: 'required: --project --alias --entity' }] }) + '\n');
+    return 2;
+  }
+  try {
+    const result = await pullSource({
+      source,
+      project: path.resolve(args.project),
+      alias: args.alias,
+      entity: args.entity,
+      week: args.week,
+      dryRun: args.dryRun,
+      fixture: args.fixture,
+      mailbox: args.mailbox,
+      runner: `pull-${source}`,
+    });
+    process.stdout.write(JSON.stringify(result) + '\n');
+    return 0;
+  } catch (e) {
+    process.stdout.write(JSON.stringify({ source, status: 'failed', errors: [{ message: e.message }] }) + '\n');
+    return 1;
+  }
+}