npm - kushi-agents - Versions diffs - 5.8.3 → 5.9.0 - Mend

kushi-agents 5.8.3 → 5.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/bin/cli.mjs +84 -1
package/package.json +2 -1
package/plugin/runners/discover.mjs +24 -9
package/plugin/runners/lib/references.mjs +164 -0
package/plugin/runners/pull-references.mjs +209 -0
package/plugin/runners/pull-state.mjs +297 -0
package/plugin/runners/refresh.mjs +20 -0
package/plugin/templates/init/m365-auth.template.json +1 -0
package/src/main.mjs +4 -1

package/bin/cli.mjs CHANGED Viewed

@@ -252,17 +252,100 @@ if (args.includes('--help') || args.includes('-h')) {
   After install, talk to Kushi:
     bootstrap <project>     First-time setup
     refresh <project>       Incremental refresh + rebuild State/
-    state <project>         Re-render State/ from existing Evidence
+    state <project>         Re-render State/ from existing Evidence (deterministic
+                            inventory; LLM build-state skill does narrative synthesis)
+    references <project>    Scan Evidence for URLs and refresh the shared
+                            references pool (Evidence/_shared/references/)
     consolidate <project>   Merge per-user evidence
     status <project>        Show run-log
     ask <project> <q>       Cited Q&A over Evidence/ (auto-routes, --file-back to save)
     lint <project>          Run wiki-lint checks on State/
+  Workspace lifecycle (v5.9.0+):
+    uninstall [--keep-config]      Remove <cwd>/.kushi/ (preserves Evidence/, State/).
+                                   --keep-config preserves config/user/ identity files.
+    upgrade                        npm i -g kushi-agents@latest then re-seed assets
+                                   in cwd (config preserved).
   In VS Code Chat the prefix is "@Kushi". In Clawpilot just say "kushi <verb>".
 `);
   process.exit(0);
 }
+// ── state / refresh / bootstrap verbs (v5.9.0+) ─────────────────────────────
+// Thin shells that exec the deterministic runners. Keeps `kushi state HCA` etc.
+// runnable from the global bin without users having to know the runner paths.
+if (args.length > 0 && ['state', 'refresh-runner', 'bootstrap-runner', 'discover', 'references'].includes(args[0])) {
+  const verb = args[0];
+  const project = args[1];
+  if (!project) {
+    console.error(`\n  Usage: kushi ${verb} <project> [options]\n`);
+    process.exit(1);
+  }
+  const { spawnSync } = await import('node:child_process');
+  const pathMod = await import('node:path');
+  const urlMod = await import('node:url');
+  const here = pathMod.dirname(urlMod.fileURLToPath(import.meta.url));
+  const runnerMap = {
+    state: 'pull-state.mjs',
+    references: 'pull-references.mjs',
+    discover: 'discover.mjs',
+    'refresh-runner': 'refresh.mjs',
+    'bootstrap-runner': 'bootstrap.mjs',
+  };
+  const runner = pathMod.resolve(here, '..', 'plugin', 'runners', runnerMap[verb]);
+  const passthrough = args.slice(2);
+  const r = spawnSync(process.execPath, [runner, '--project', project, ...passthrough], { stdio: 'inherit' });
+  process.exit(r.status ?? 1);
+}
+// ── workspace uninstall / upgrade verbs (v5.9.0+) ───────────────────────────
+if (args.length > 0 && args[0] === 'uninstall' && !args.includes('--clawpilot') && !args.includes('--vscode') && !args.includes('--all-hosts')) {
+  // Workspace uninstall: remove .kushi/ from cwd (preserves Evidence/, State/).
+  const fsMod = await import('node:fs');
+  const pathMod = await import('node:path');
+  const dest = pathMod.resolve(process.cwd(), '.kushi');
+  const keepConfig = args.includes('--keep-config');
+  if (!fsMod.existsSync(dest)) {
+    console.error(`\n  No .kushi/ directory found at ${dest}\n`);
+    process.exit(1);
+  }
+  if (keepConfig) {
+    const assetDirs = ['agents', 'instructions', 'prompts', 'skills', 'templates', 'reference-packs', 'lib', 'runners'];
+    let removed = 0;
+    for (const d of assetDirs) {
+      const p = pathMod.join(dest, d);
+      if (fsMod.existsSync(p)) { fsMod.rmSync(p, { recursive: true, force: true }); removed++; }
+    }
+    console.log(`\n  Removed ${removed} asset dir(s) from ${dest} (config/user/ preserved).\n`);
+  } else {
+    fsMod.rmSync(dest, { recursive: true, force: true });
+    console.log(`\n  Removed ${dest}\n  Evidence/ and State/ left untouched.\n`);
+  }
+  process.exit(0);
+}
+if (args.length > 0 && args[0] === 'upgrade') {
+  // Upgrade: npm i -g @latest, then re-seed assets in cwd preserving config.
+  const { spawnSync } = await import('node:child_process');
+  console.log('\n  Upgrading kushi-agents globally via npm...\n');
+  const npm = process.platform === 'win32' ? 'npm.cmd' : 'npm';
+  const r1 = spawnSync(npm, ['install', '-g', 'kushi-agents@latest'], { stdio: 'inherit' });
+  if (r1.status !== 0) {
+    console.error('\n  npm install failed.\n');
+    process.exit(r1.status ?? 1);
+  }
+  console.log('\n  Refreshing assets in cwd (config preserved)...\n');
+  const fsMod = await import('node:fs');
+  if (fsMod.existsSync('.kushi')) {
+    const r2 = spawnSync(npm, ['exec', '--', 'kushi-agents', '--no-prompt', '--force'], { stdio: 'inherit' });
+    process.exit(r2.status ?? 0);
+  } else {
+    console.log('\n  No .kushi/ in cwd — global upgrade complete; cd into a project and run `kushi` to install.\n');
+    process.exit(0);
+  }
+}
 // ── multi-host mode (v5.0.2+) ───────────────────────────────────────────────
 // Trigger when the user passes any of: --vscode, --all-hosts, --uninstall.
 // --clawpilot ALONE continues to route through the legacy main.mjs path so

package/package.json CHANGED Viewed

@@ -1,9 +1,10 @@
 {
   "name": "kushi-agents",
-  "version": "5.8.3",
+  "version": "5.9.0",
   "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
   "type": "module",
   "bin": {
+    "kushi": "./bin/cli.mjs",
     "kushi-agents": "./bin/cli.mjs"
   },
   "files": [

package/plugin/runners/discover.mjs CHANGED Viewed

@@ -89,16 +89,29 @@ function buildPrompt(source, projectName, scope = null) {
         ? scope.folders
         : ['Inbox']; // safe default — bounds the query so WorkIQ uses Graph filter, not mailbox-wide semantic search
       const isDefault = !(Array.isArray(scope.folders) && scope.folders.length > 0);
+      // matchingPolicy.mode drives whether we search by subfolder name, by mail content, or both.
+      // Recognized values: 'subfolder-only', 'keyword-only', 'hybrid' (default).
+      const mode = (scope.matchMode || 'hybrid').toLowerCase();
+      const doFolder = mode !== 'keyword-only';
+      const doKeyword = mode !== 'subfolder-only';
       lines.push('');
-      if (scope.fuzzy !== false) {
-        lines.push('Restrict your search to Outlook mail folders whose name CONTAINS any of these tokens (case-insensitive, fuzzy substring match — e.g. "FDE" matches "1. FDE", "01 FDE Active", "FDE-archive"):');
-      } else {
-        lines.push('Restrict your search to ONLY these Outlook mail folders (exact name match):');
-      }
+      lines.push('SCOPE BOUNDARY — search ONLY within these Outlook parent folders and ALL nested descendants:');
       for (const f of folders) {
-        lines.push(`  • "${f}"${scope.includeSubfolders ? ' (and all subfolders)' : ''}`);
+        lines.push(`  • "${f}"${scope.includeSubfolders !== false ? ' (recursively include every subfolder underneath)' : ' (this folder only)'}`);
+      }
+      lines.push('Parent folder name match: case-insensitive, fuzzy contains (so "1. FDE" matches "1. FDE", "01. FDE", "1 FDE").');
+      lines.push('Do NOT look outside this boundary.');
+      lines.push('');
+      lines.push(`SEARCH STRATEGY for project "${projectName}" (matchingPolicy.mode = "${mode}"):`);
+      if (doFolder && doKeyword) {
+        lines.push(`  1. Subfolder match: look for any subfolder whose name fuzzy-contains "${projectName}" (e.g. "${projectName}", "102. ${projectName}", "${projectName} - Engagement"). Emit each as a match.`);
+        lines.push(`  2. Mail-content match: also search email subjects, bodies, sender/recipient names within the boundary for "${projectName}" (case-insensitive). Group matching mail by folder and emit each folder.`);
+        lines.push(`  Run BOTH and merge results — do not stop at step 1.`);
+      } else if (doFolder) {
+        lines.push(`  Subfolder match only: emit subfolders whose name fuzzy-contains "${projectName}".`);
+      } else {
+        lines.push(`  Mail-content match only: search email subjects, bodies, sender/recipient names within the boundary for "${projectName}" (case-insensitive). Group matching mail by folder and emit each folder.`);
       }
-      lines.push('Do NOT scan any other mailbox folders.');
       if (isDefault) {
         lines.push('(Note: no project-specific folders configured — defaulting to Inbox+subfolders. For faster, more accurate results, populate emailContext.folders in m365-auth.json.)');
       }
@@ -188,12 +201,14 @@ function applyRows(source, rows, currentBounds, currentInteg) {
     return { boundariesPatch: added.length ? { onenote: { section_file_ids: merged } } : null, accepted };
   }
   if (source === 'sharepoint') {
-    const existing = currentBounds.sharepoint?.sites || [];
+    // v5.9.0: SP sites are project-wide, not per-alias. Write into integrations.yml.
+    const existing = currentInteg.sharepoint?.sites || [];
     const incoming = rows.map(r => r.site_url).filter(Boolean);
     const merged = dedup([...existing, ...incoming]);
     const added = merged.filter(v => !existing.includes(v));
     if (added.length) accepted.push(...added);
-    return { boundariesPatch: added.length ? { sharepoint: { sites: merged } } : null, accepted };
+    const cur = currentInteg.sharepoint || {};
+    return { integrationsPatch: added.length ? { sharepoint: { ...cur, sites: merged } } : null, accepted };
   }
   if (source === 'crm') {
     const cur = currentInteg.crm || {};

package/plugin/runners/lib/references.mjs ADDED Viewed

@@ -0,0 +1,164 @@
+// plugin/runners/lib/references.mjs
+// Deterministic URL extraction, classification, and lightweight HTTP snapshot
+// for the unified references pool. No LLM. v5.9.0.
+import { promises as fs } from 'node:fs';
+import path from 'node:path';
+import crypto from 'node:crypto';
+/** Permissive URL regex. Captures http(s) URLs in markdown / yaml / plain text. */
+const URL_RE = /\bhttps?:\/\/[^\s<>"'`)\]}|\\]+/gi;
+/** Trailing punctuation that is almost never part of a URL. */
+const TRAILING_TRIM = /[)\].,;:!?>'"]+$/;
+export function extractUrls(text) {
+  if (!text || typeof text !== 'string') return [];
+  const out = new Set();
+  const matches = text.match(URL_RE) || [];
+  for (let m of matches) {
+    m = m.replace(TRAILING_TRIM, '');
+    if (m.length > 8) out.add(m);
+  }
+  return [...out];
+}
+/** Stable sha1 of normalized URL (for filenames + index keys). */
+export function urlHash(url) {
+  return crypto.createHash('sha1').update(normalizeUrl(url)).digest('hex').slice(0, 16);
+}
+/** Strip fragment + common tracking query params. Keep path/query semantically. */
+export function normalizeUrl(url) {
+  try {
+    const u = new URL(url);
+    u.hash = '';
+    const drop = ['utm_source','utm_medium','utm_campaign','utm_term','utm_content','wt.mc_id'];
+    for (const k of drop) u.searchParams.delete(k);
+    return u.toString();
+  } catch {
+    return url;
+  }
+}
+const HOST_RULES = [
+  { match: /(^|\.)sharepoint\.com$/i, host: 'sharepoint.com', kind: 'sharepoint', authRequired: true },
+  { match: /(^|\.)loop\.microsoft\.com$/i, host: 'loop.microsoft.com', kind: 'loop', authRequired: true },
+  { match: /loop\.cloud\.microsoft$/i, host: 'loop.cloud.microsoft', kind: 'loop', authRequired: true },
+  { match: /loop-api\.cloud\.microsoft$/i, host: 'loop-api.cloud.microsoft', kind: 'loop', authRequired: true },
+  { match: /(^|\.)fluidpreview\.office\.net$/i, host: 'fluidpreview.office.net', kind: 'loop', authRequired: true },
+  { match: /(^|\.)teams\.microsoft\.com$/i, host: 'teams.microsoft.com', kind: 'teams', authRequired: true },
+  { match: /(^|\.)office\.com$/i, host: 'office.com', kind: 'office', authRequired: true },
+  { match: /(^|\.)dev\.azure\.com$/i, host: 'dev.azure.com', kind: 'ado', authRequired: true },
+  { match: /(^|\.)visualstudio\.com$/i, host: 'visualstudio.com', kind: 'ado', authRequired: true },
+  { match: /(^|\.)dynamics\.com$/i, host: 'dynamics.com', kind: 'crm', authRequired: true },
+  { match: /(^|\.)learn\.microsoft\.com$/i, host: 'learn.microsoft.com', kind: 'docs', authRequired: false },
+  { match: /(^|\.)docs\.microsoft\.com$/i, host: 'docs.microsoft.com', kind: 'docs', authRequired: false },
+  { match: /(^|\.)github\.com$/i, host: 'github.com', kind: 'repo', authRequired: false },
+];
+/** Classify a URL into { host, kind, authRequired }. Falls back to "external". */
+export function classify(url) {
+  let host = '';
+  try { host = new URL(url).hostname.toLowerCase(); } catch { return { host: 'unknown', kind: 'invalid', authRequired: false }; }
+  for (const r of HOST_RULES) {
+    if (r.match.test(host)) return { host: r.host, kind: r.kind, authRequired: r.authRequired };
+  }
+  return { host, kind: 'external', authRequired: false };
+}
+/** Safe filename segment for hosts. */
+export function safeHost(host) {
+  return (host || 'unknown').toLowerCase().replace(/[^a-z0-9.-]/g, '_').slice(0, 80);
+}
+/**
+ * Fetch a URL and extract a small content snapshot.
+ * Returns { ok, status, title, description, h1, contentType, bytes, snippet }.
+ * Keeps payload bounded (<= maxBytes default 64KB).
+ */
+export async function fetchSnapshot(url, { timeoutMs = 15000, maxBytes = 64 * 1024 } = {}) {
+  const ctrl = new AbortController();
+  const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+  try {
+    const res = await fetch(url, {
+      redirect: 'follow',
+      signal: ctrl.signal,
+      headers: { 'user-agent': 'kushi-references/1.0 (+https://github.com/ushakrishnan/kushi)' },
+    });
+    const contentType = res.headers.get('content-type') || '';
+    const reader = res.body?.getReader();
+    let received = 0;
+    const chunks = [];
+    if (reader) {
+      while (received < maxBytes) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        chunks.push(value);
+        received += value.byteLength;
+      }
+      try { reader.cancel(); } catch {}
+    }
+    const buf = Buffer.concat(chunks.map(c => Buffer.from(c)));
+    const text = buf.toString('utf8');
+    const html = /html|xml/i.test(contentType) || /^\s*<!doctype html|<html/i.test(text);
+    const title = html ? extractTag(text, 'title') : '';
+    const description = html ? extractMeta(text, 'description') : '';
+    const h1 = html ? extractTag(text, 'h1') : '';
+    const snippet = html ? stripHtml(text).slice(0, 600) : text.slice(0, 600);
+    return {
+      ok: res.ok,
+      status: res.status,
+      title: clean(title),
+      description: clean(description),
+      h1: clean(h1),
+      contentType,
+      bytes: received,
+      snippet: clean(snippet),
+    };
+  } catch (e) {
+    return { ok: false, status: 0, error: e.name === 'AbortError' ? 'timeout' : (e.code || e.message) };
+  } finally {
+    clearTimeout(timer);
+  }
+}
+function extractTag(html, tag) {
+  const m = html.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i'));
+  return m ? m[1] : '';
+}
+function extractMeta(html, name) {
+  const m = html.match(new RegExp(`<meta[^>]+name=["']${name}["'][^>]*content=["']([^"']+)["']`, 'i'))
+    || html.match(new RegExp(`<meta[^>]+property=["']og:${name}["'][^>]*content=["']([^"']+)["']`, 'i'));
+  return m ? m[1] : '';
+}
+function stripHtml(s) {
+  return s.replace(/<script[\s\S]*?<\/script>/gi, ' ')
+          .replace(/<style[\s\S]*?<\/style>/gi, ' ')
+          .replace(/<[^>]+>/g, ' ')
+          .replace(/\s+/g, ' ')
+          .trim();
+}
+function clean(s) {
+  return (s || '').replace(/\s+/g, ' ').trim().slice(0, 800);
+}
+/** Walk a directory recursively, returning files matching `extensions`. */
+export async function walkFiles(dir, { extensions = ['.yml','.yaml','.md','.txt','.json'], skipDirs = ['_shared/references','node_modules','.git'] } = {}) {
+  const out = [];
+  async function walk(d) {
+    let entries;
+    try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
+    for (const e of entries) {
+      const full = path.join(d, e.name);
+      if (e.isDirectory()) {
+        const skip = skipDirs.some(s => full.replaceAll('\\','/').includes(s));
+        if (!skip) await walk(full);
+      } else if (e.isFile()) {
+        if (extensions.includes(path.extname(e.name).toLowerCase())) out.push(full);
+      }
+    }
+  }
+  await walk(dir);
+  return out;
+}

package/plugin/runners/pull-references.mjs ADDED Viewed

@@ -0,0 +1,209 @@
+#!/usr/bin/env node
+// plugin/runners/pull-references.mjs
+// Unified references pool. Scans Evidence/ for URLs, dedupes against an index,
+// classifies by host, snapshots external URLs via HTTP. Auth-protected URLs
+// (SP/Loop/Teams/ADO/CRM) are recorded with metadata only and marked
+// `pending-auth-fetch` for follow-up by source-specific pulls.
+//
+// Project-shared. Not dated. One snapshot per URL. Re-crawl with --refresh.
+//
+// Usage:
+//   node plugin/runners/pull-references.mjs --project <P> [--refresh] [--dry-run]
+//     [--timeout-ms N] [--max-fetch N] [--only-host <h>]
+import path from 'node:path';
+import { promises as fs } from 'node:fs';
+import YAML from 'yaml';
+import { evidenceRoot, sharedRoot, projectRoot } from './lib/layout.mjs';
+import { writeAtomic, pathExists } from './lib/evidence.mjs';
+import { extractUrls, urlHash, normalizeUrl, classify, safeHost, fetchSnapshot, walkFiles } from './lib/references.mjs';
+function parseArgs(argv) {
+  const args = { dryRun: false, refresh: false, timeoutMs: 15000, maxFetch: 50 };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === '--project') args.project = argv[++i];
+    else if (a === '--refresh') args.refresh = true;
+    else if (a === '--dry-run') args.dryRun = true;
+    else if (a === '--timeout-ms') args.timeoutMs = Number(argv[++i]) || 15000;
+    else if (a === '--max-fetch') args.maxFetch = Number(argv[++i]) || 50;
+    else if (a === '--only-host') args.onlyHost = argv[++i];
+    else if (a === '--help' || a === '-h') args.help = true;
+  }
+  return args;
+}
+function help() {
+  return `Usage: node pull-references.mjs --project <P> [--refresh] [--dry-run] [--timeout-ms N] [--max-fetch N] [--only-host <h>]`;
+}
+function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
+function log(msg) { process.stderr.write(`[references] ${msg}\n`); }
+function refsRoot(project) { return path.join(sharedRoot(project), 'references'); }
+function indexPath(project) { return path.join(refsRoot(project), 'index.yml'); }
+function recordPath(project, host, hash) {
+  return path.join(refsRoot(project), 'by-host', safeHost(host), `${hash}.md`);
+}
+async function loadIndex(p) {
+  if (!await pathExists(p)) return { version: 1, entries: {} };
+  try { return YAML.parse(await fs.readFile(p, 'utf8')) || { version: 1, entries: {} }; }
+  catch { return { version: 1, entries: {} }; }
+}
+function recordTemplate({ url, host, kind, authRequired, firstSeen, sourceFiles, snapshot }) {
+  const fm = {
+    url,
+    normalized_url: normalizeUrl(url),
+    host,
+    kind,
+    auth_required: authRequired,
+    first_seen: firstSeen,
+    last_crawled: snapshot?.crawledAt || null,
+    fetch_status: snapshot?.fetch_status || (authRequired ? 'pending-auth-fetch' : 'unfetched'),
+    http_status: snapshot?.status ?? null,
+    title: snapshot?.title || '',
+    description: snapshot?.description || '',
+    source_files: sourceFiles.slice(0, 20),
+  };
+  const yamlFm = YAML.stringify(fm).trimEnd();
+  const body = snapshot?.snippet
+    ? `\n## Snippet\n\n${snapshot.snippet}\n`
+    : (authRequired
+      ? `\n_Content fetch deferred — this URL requires Microsoft 365 authentication. The matching source-specific pull (sharepoint/loop/teams/ado/crm) will populate richer evidence._\n`
+      : `\n_No snapshot captured._\n`);
+  return `---\n${yamlFm}\n---\n${body}`;
+}
+async function main() {
+  const args = parseArgs(process.argv.slice(2));
+  if (args.help) { console.log(help()); return 0; }
+  if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
+  const root = projectRoot(args.project);
+  if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
+  const evRoot = evidenceRoot(root);
+  if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
+  log(`scanning ${evRoot} for URLs...`);
+  const files = await walkFiles(evRoot);
+  log(`scanning ${files.length} file(s)...`);
+  /** url(normalized) → { url, sourceFiles:Set } */
+  const found = new Map();
+  for (const f of files) {
+    let txt = '';
+    try { txt = await fs.readFile(f, 'utf8'); } catch { continue; }
+    for (const u of extractUrls(txt)) {
+      const k = normalizeUrl(u);
+      const rel = path.relative(root, f);
+      if (!found.has(k)) found.set(k, { url: u, sourceFiles: new Set() });
+      found.get(k).sourceFiles.add(rel);
+    }
+  }
+  log(`found ${found.size} unique URL(s)`);
+  const idxFile = indexPath(root);
+  const index = await loadIndex(idxFile);
+  index.entries ??= {};
+  const today = new Date().toISOString().slice(0, 10);
+  const tasks = [];
+  let newCount = 0, refreshCount = 0, skippedCount = 0;
+  for (const [normalized, { url, sourceFiles }] of found.entries()) {
+    if (args.onlyHost) {
+      const c = classify(url);
+      if (c.host !== args.onlyHost) { skippedCount++; continue; }
+    }
+    const hash = urlHash(url);
+    const existing = index.entries[hash];
+    const isNew = !existing;
+    const needsRefresh = !isNew && args.refresh;
+    if (!isNew && !needsRefresh) {
+      // Update source_files only.
+      const merged = new Set([...(existing.source_files || []), ...sourceFiles]);
+      existing.source_files = [...merged].slice(0, 20);
+      existing.last_seen = today;
+      skippedCount++;
+      continue;
+    }
+    const c = classify(url);
+    tasks.push({ url, normalized, hash, sourceFiles: [...sourceFiles], cls: c, isNew });
+    if (isNew) newCount++; else refreshCount++;
+  }
+  // Cap fetches per run.
+  const fetchable = tasks.filter(t => !t.cls.authRequired && t.cls.kind !== 'invalid').slice(0, args.maxFetch);
+  const fetchKeys = new Set(fetchable.map(t => t.hash));
+  log(`new: ${newCount}, refresh: ${refreshCount}, skip-existing: ${skippedCount}, will fetch: ${fetchable.length}`);
+  let written = 0, fetched = 0, fetchOk = 0;
+  for (const t of tasks) {
+    let snap = null;
+    if (fetchKeys.has(t.hash)) {
+      log(`  fetch ${t.cls.host}: ${t.url.slice(0, 100)}`);
+      snap = await fetchSnapshot(t.url, { timeoutMs: args.timeoutMs });
+      snap.crawledAt = today;
+      snap.fetch_status = snap.ok ? 'fetched' : `fetch-failed:${snap.error || snap.status}`;
+      fetched++;
+      if (snap.ok) fetchOk++;
+    }
+    const firstSeen = index.entries[t.hash]?.first_seen || today;
+    const record = recordTemplate({
+      url: t.url,
+      host: t.cls.host,
+      kind: t.cls.kind,
+      authRequired: t.cls.authRequired,
+      firstSeen,
+      sourceFiles: t.sourceFiles,
+      snapshot: snap,
+    });
+    const recPath = recordPath(root, t.cls.host, t.hash);
+    if (!args.dryRun) {
+      const r = await writeAtomic(recPath, record, { skipIfUnchanged: true });
+      if (r.written) written++;
+    }
+    index.entries[t.hash] = {
+      url: t.url,
+      normalized_url: t.normalized,
+      host: t.cls.host,
+      kind: t.cls.kind,
+      auth_required: t.cls.authRequired,
+      first_seen: firstSeen,
+      last_seen: today,
+      last_crawled: snap?.crawledAt || index.entries[t.hash]?.last_crawled || null,
+      fetch_status: snap?.fetch_status || index.entries[t.hash]?.fetch_status || (t.cls.authRequired ? 'pending-auth-fetch' : 'unfetched'),
+      http_status: snap?.status ?? index.entries[t.hash]?.http_status ?? null,
+      title: snap?.title || index.entries[t.hash]?.title || '',
+      record_path: path.relative(root, recPath).replaceAll('\\', '/'),
+      source_files: t.sourceFiles.slice(0, 20),
+    };
+  }
+  if (!args.dryRun) {
+    await writeAtomic(idxFile, YAML.stringify(index), { skipIfUnchanged: true });
+  }
+  emit({
+    status: 'ok',
+    project: root,
+    dry_run: args.dryRun,
+    scanned_files: files.length,
+    urls_total: found.size,
+    new: newCount,
+    refresh: refreshCount,
+    fetched,
+    fetch_ok: fetchOk,
+    written,
+    index: path.relative(root, idxFile).replaceAll('\\', '/'),
+  });
+  return 0;
+}
+main().then(code => process.exit(code || 0)).catch(e => {
+  emit({ status: 'failed', error: e.message || String(e) });
+  process.exit(1);
+});

package/plugin/runners/pull-state.mjs ADDED Viewed

@@ -0,0 +1,297 @@
+#!/usr/bin/env node
+// plugin/runners/pull-state.mjs
+// Deterministic State/ generator. Inventories Evidence/ and produces:
+//   State/index.md     — TOC pointing at every evidence file by source/week
+//   State/log.md       — chronological run + evidence ledger
+//   State/CLAUDE.md    — host-agnostic project context (project name, sources, alias inventory)
+//   State/AGENTS.md    — alias of CLAUDE.md for OpenAI-flavored hosts
+//
+// This runner does NOT do narrative synthesis — that is the build-state LLM
+// skill's job. This produces the structural skeleton that makes the LLM
+// skill's work cheaper and reproducible. v5.9.0.
+//
+// Usage:
+//   node plugin/runners/pull-state.mjs --project <P> [--dry-run] [--include-legacy]
+import path from 'node:path';
+import { promises as fs } from 'node:fs';
+import YAML from 'yaml';
+import { evidenceRoot, projectRoot, sharedRoot } from './lib/layout.mjs';
+import { writeAtomic, pathExists } from './lib/evidence.mjs';
+function parseArgs(argv) {
+  const args = { dryRun: false, includeLegacy: true };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === '--project') args.project = argv[++i];
+    else if (a === '--dry-run') args.dryRun = true;
+    else if (a === '--no-legacy') args.includeLegacy = false;
+    else if (a === '--help' || a === '-h') args.help = true;
+  }
+  return args;
+}
+function help() {
+  return `Usage: node pull-state.mjs --project <P> [--dry-run] [--no-legacy]`;
+}
+function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
+function log(msg) { process.stderr.write(`[state] ${msg}\n`); }
+const SOURCES = ['email', 'teams', 'meetings', 'onenote', 'sharepoint', 'crm', 'ado'];
+async function listDirs(p) {
+  try {
+    return (await fs.readdir(p, { withFileTypes: true })).filter(e => e.isDirectory()).map(e => e.name);
+  } catch { return []; }
+}
+async function listFiles(p, exts = ['.md', '.yml']) {
+  try {
+    return (await fs.readdir(p, { withFileTypes: true }))
+      .filter(e => e.isFile() && exts.includes(path.extname(e.name).toLowerCase()))
+      .map(e => e.name);
+  } catch { return []; }
+}
+/** Inventory one alias's source folder. Returns { weekly, snapshot, stream, index, total }. */
+async function inventoryAliasSource(aliasSourceDir) {
+  const result = { weekly: [], snapshot: [], stream: [], index: [], total: 0 };
+  for (const layout of ['weekly', 'snapshot', 'stream', '_index']) {
+    const dir = path.join(aliasSourceDir, layout);
+    const files = await listFiles(dir);
+    const key = layout === '_index' ? 'index' : layout;
+    result[key] = files.map(f => path.join(layout, f)).sort();
+    result.total += files.length;
+  }
+  return result;
+}
+/** Inventory shared sources (crm, ado, references). */
+async function inventoryShared(project) {
+  const out = {};
+  const shared = sharedRoot(project);
+  for (const sub of ['crm', 'ado', 'references']) {
+    const dir = path.join(shared, sub);
+    if (!await pathExists(dir)) continue;
+    out[sub] = await listFilesRecursive(dir, shared);
+  }
+  return out;
+}
+async function listFilesRecursive(dir, base) {
+  const out = [];
+  async function walk(d) {
+    let entries;
+    try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
+    for (const e of entries) {
+      const full = path.join(d, e.name);
+      if (e.isDirectory()) await walk(full);
+      else if (e.isFile() && ['.md','.yml'].includes(path.extname(e.name).toLowerCase())) {
+        out.push(path.relative(base, full).replaceAll('\\','/'));
+      }
+    }
+  }
+  await walk(dir);
+  return out.sort();
+}
+async function readIntegrations(project) {
+  const p = path.join(projectRoot(project), 'integrations.yml');
+  if (!await pathExists(p)) return {};
+  try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
+}
+async function readBoundaries(project, alias) {
+  const p = path.join(evidenceRoot(project), alias, 'boundaries.yml');
+  if (!await pathExists(p)) return {};
+  try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
+}
+function fmtSection(title, lines) {
+  return [`## ${title}`, '', ...lines, ''].join('\n');
+}
+function buildIndex({ projectName, integrations, aliases, shared, generatedAt }) {
+  const lines = [];
+  lines.push(`# ${projectName} — State Index`);
+  lines.push('');
+  lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}.`);
+  lines.push('');
+  lines.push('Mechanical inventory only. For narrative synthesis, use the `build-state` skill.');
+  lines.push('');
+  const integLines = [];
+  if (integrations.crm?.request_id) integLines.push(`- **CRM**: \`${integrations.crm.request_id}\``);
+  if (integrations.ado?.engagement_id) integLines.push(`- **ADO**: \`${integrations.ado.engagement_id}\``);
+  const spSites = integrations.sharepoint?.sites || [];
+  if (spSites.length) {
+    integLines.push(`- **SharePoint sites** (${spSites.length}):`);
+    for (const s of spSites.slice(0, 10)) integLines.push(`  - ${s}`);
+  }
+  if (integLines.length) lines.push(fmtSection('Integrations (project-shared)', integLines));
+  // Shared evidence
+  const sharedLines = [];
+  for (const [src, files] of Object.entries(shared)) {
+    if (!files.length) continue;
+    sharedLines.push(`### ${src} (${files.length} file${files.length === 1 ? '' : 's'})`);
+    sharedLines.push('');
+    for (const f of files.slice(0, 30)) sharedLines.push(`- \`Evidence/_shared/${f}\``);
+    if (files.length > 30) sharedLines.push(`- _… and ${files.length - 30} more_`);
+    sharedLines.push('');
+  }
+  if (sharedLines.length) lines.push(fmtSection('Shared Evidence', sharedLines));
+  // Per-alias
+  for (const a of aliases) {
+    const aliasLines = [];
+    aliasLines.push(`Boundaries:`);
+    for (const [k, v] of Object.entries(a.boundaries || {})) {
+      const arr = Array.isArray(v) ? v : (v?.folders || v?.chats || v?.joinUrls || v?.section_file_ids || v?.sites || []);
+      if (Array.isArray(arr) && arr.length) aliasLines.push(`  - ${k}: ${arr.length} item(s)`);
+    }
+    aliasLines.push('');
+    for (const [src, inv] of Object.entries(a.sources || {})) {
+      if (inv.total === 0) continue;
+      aliasLines.push(`**${src}** — ${inv.total} file(s) (weekly: ${inv.weekly.length}, snapshot: ${inv.snapshot.length}, stream: ${inv.stream.length}, index: ${inv.index.length})`);
+      const all = [...inv.weekly, ...inv.snapshot, ...inv.stream, ...inv.index];
+      for (const f of all.slice(0, 12)) aliasLines.push(`- \`Evidence/${a.alias}/${src}/${f}\``);
+      if (all.length > 12) aliasLines.push(`- _… and ${all.length - 12} more_`);
+      aliasLines.push('');
+    }
+    lines.push(fmtSection(`Contributor: ${a.alias}`, aliasLines));
+  }
+  return lines.join('\n').replace(/\n{3,}/g, '\n\n');
+}
+function buildLog({ projectName, runLog, generatedAt }) {
+  const lines = [];
+  lines.push(`# ${projectName} — Run Log`);
+  lines.push('');
+  lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}. Reflects \`Evidence/run-log.yml\`.`);
+  lines.push('');
+  if (!runLog || !Array.isArray(runLog.entries) || runLog.entries.length === 0) {
+    lines.push('_No run-log entries yet._');
+    return lines.join('\n');
+  }
+  const entries = [...runLog.entries].sort((a, b) => String(b.timestamp || '').localeCompare(String(a.timestamp || '')));
+  for (const e of entries.slice(0, 100)) {
+    const ts = e.timestamp || '?';
+    const status = e.status || '?';
+    const src = e.source || '?';
+    const ent = e.entity ? ` \`${e.entity}\`` : '';
+    const wk = e.week ? ` (week ${e.week})` : '';
+    lines.push(`- **${ts}** — ${src}${ent}${wk} → \`${status}\``);
+  }
+  if (entries.length > 100) lines.push(`\n_Showing 100 of ${entries.length} entries._`);
+  return lines.join('\n');
+}
+function buildClaude({ projectName, integrations, aliases, shared }) {
+  const lines = [];
+  lines.push(`# ${projectName} — Project Context`);
+  lines.push('');
+  lines.push('Auto-generated by `pull-state.mjs`. This file gives any AI agent (Claude, Copilot, etc.) the minimal facts to be useful in this engagement.');
+  lines.push('');
+  lines.push('## Project');
+  lines.push(`- Name: \`${projectName}\``);
+  if (integrations.crm?.request_id) lines.push(`- CRM request: \`${integrations.crm.request_id}\``);
+  if (integrations.ado?.engagement_id) lines.push(`- ADO engagement: \`${integrations.ado.engagement_id}\``);
+  if (integrations.sharepoint?.sites?.length) {
+    lines.push(`- SharePoint sites: ${integrations.sharepoint.sites.length}`);
+  }
+  lines.push('');
+  lines.push('## Contributors');
+  for (const a of aliases) {
+    const totalFiles = Object.values(a.sources || {}).reduce((s, inv) => s + (inv.total || 0), 0);
+    lines.push(`- \`${a.alias}\`: ${totalFiles} evidence file(s)`);
+  }
+  lines.push('');
+  lines.push('## Where things live');
+  lines.push('- Per-contributor evidence: `Evidence/<alias>/<source>/...`');
+  lines.push('- Shared evidence: `Evidence/_shared/{crm,ado,references}/`');
+  lines.push('- Project-wide config: `integrations.yml`');
+  lines.push('- This index: `State/index.md`, `State/log.md`');
+  lines.push('');
+  lines.push('## Doctrine');
+  lines.push('- Cite every claim. Use the form `[source: <relative-path> · <iso-ts>]`.');
+  lines.push('- Read-only Q&A: see the `ask-project` skill.');
+  lines.push('- Refresh + state regen: `kushi refresh <project>` then `kushi state <project>`.');
+  return lines.join('\n');
+}
+async function main() {
+  const args = parseArgs(process.argv.slice(2));
+  if (args.help) { console.log(help()); return 0; }
+  if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
+  const root = projectRoot(args.project);
+  if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
+  const evRoot = evidenceRoot(root);
+  if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
+  const projectName = path.basename(root);
+  const generatedAt = new Date().toISOString();
+  log(`scanning ${evRoot}...`);
+  const integrations = await readIntegrations(root);
+  const shared = await inventoryShared(root);
+  // Per-alias inventory
+  const dirs = await listDirs(evRoot);
+  const aliasNames = dirs.filter(d => !d.startsWith('_'));
+  const aliases = [];
+  for (const alias of aliasNames) {
+    const aliasDir = path.join(evRoot, alias);
+    const sources = {};
+    for (const src of SOURCES) {
+      const srcDir = path.join(aliasDir, src);
+      if (!await pathExists(srcDir)) { sources[src] = { weekly:[], snapshot:[], stream:[], index:[], total:0 }; continue; }
+      sources[src] = await inventoryAliasSource(srcDir);
+    }
+    aliases.push({ alias, boundaries: await readBoundaries(root, alias), sources });
+  }
+  // run-log.yml
+  let runLog = {};
+  const runLogPath = path.join(evRoot, 'run-log.yml');
+  if (await pathExists(runLogPath)) {
+    try { runLog = YAML.parse(await fs.readFile(runLogPath, 'utf8')) || {}; } catch { runLog = {}; }
+  }
+  const stateDir = path.join(root, 'State');
+  const indexMd = buildIndex({ projectName, integrations, aliases, shared, generatedAt });
+  const logMd = buildLog({ projectName, runLog, generatedAt });
+  const claudeMd = buildClaude({ projectName, integrations, aliases, shared });
+  const writes = [];
+  if (!args.dryRun) {
+    await fs.mkdir(stateDir, { recursive: true });
+    const r1 = await writeAtomic(path.join(stateDir, 'index.md'), indexMd);
+    const r2 = await writeAtomic(path.join(stateDir, 'log.md'), logMd);
+    const r3 = await writeAtomic(path.join(stateDir, 'CLAUDE.md'), claudeMd);
+    const r4 = await writeAtomic(path.join(stateDir, 'AGENTS.md'), claudeMd);
+    writes.push(r1, r2, r3, r4);
+  }
+  log(`done: ${aliases.length} contributor(s), ${SOURCES.length} sources scanned`);
+  emit({
+    status: 'ok',
+    project: root,
+    project_name: projectName,
+    dry_run: args.dryRun,
+    contributors: aliases.length,
+    contributors_list: aliases.map(a => a.alias),
+    state_dir: path.relative(root, stateDir).replaceAll('\\', '/'),
+    files_written: writes.filter(w => w?.written).map(w => path.relative(root, w.path).replaceAll('\\','/')),
+    note: 'Mechanical inventory only. Run the build-state LLM skill for narrative synthesis.',
+  });
+  return 0;
+}
+main().then(c => process.exit(c || 0)).catch(e => {
+  emit({ status: 'failed', error: e.message || String(e) });
+  process.exit(1);
+});

package/plugin/runners/refresh.mjs CHANGED Viewed

@@ -224,6 +224,24 @@ async function main() {
     ? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
     : await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
+  // v5.9.0: post-pass — unified references pool. Scans Evidence for URLs and
+  // builds a project-shared dedup index with HTTP snapshots for external links.
+  let referencesResult = null;
+  let stateResult = null;
+  if (!args.dryRun) {
+    const refsRunner = path.join(HERE, 'pull-references.mjs');
+    const refsArgv = ['--project', args.project];
+    if (args.force) refsArgv.push('--refresh');
+    const r = await spawnRunner(refsRunner, refsArgv);
+    referencesResult = { source: 'references', exit_code: r.code, stdout: r.stdout?.slice(0, 4000), stderr: r.stderr?.slice(0, 1000) };
+    // v5.9.0: post-pass — deterministic State/ generator. Inventory only;
+    // build-state LLM skill remains the synthesis layer.
+    const stateRunner = path.join(HERE, 'pull-state.mjs');
+    const s = await spawnRunner(stateRunner, ['--project', args.project]);
+    stateResult = { source: 'state', exit_code: s.code, stdout: s.stdout?.slice(0, 4000), stderr: s.stderr?.slice(0, 1000) };
+  }
   const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
   emit({
@@ -237,6 +255,8 @@ async function main() {
     skipped: skipped.length,
     results,
     skipped_targets: skipped,
+    references: referencesResult,
+    state: stateResult,
     learning_candidates_total,
   });
   return 0;

package/plugin/templates/init/m365-auth.template.json CHANGED Viewed

@@ -41,6 +41,7 @@
       "sourceCoverageLabel": "",
       "matchingPolicy": {
         "mode": "hybrid",
+        "_mode_note": "How to find project-related mail within the SCOPE BOUNDARY (folders[] + dateFloor). One of: 'subfolder-only' (only match subfolder names containing the project name), 'keyword-only' (only search mail subjects/bodies for the project name), 'hybrid' (do both and merge — recommended default).",
         "rankingOrder": ["exact", "prefix", "contains"],
         "minConfidenceForFolderScopedSearch": "high",
         "fallbackToFullRootScanWhenAmbiguous": true,

package/src/main.mjs CHANGED Viewed

@@ -120,7 +120,10 @@ async function installVscode(options, resolved, version) {
   // Skips silently when --no-prompt is set, when not running in a TTY, or
   // when all 3 fields are already populated (re-install case).
   const noPrompt = options.noPrompt || process.env.KUSHI_NO_PROMPT === '1';
-  const qs = await runM365Quickstart({ destRoot: fullDest, noPrompt });
+  // --force re-prompts even if all 4 fields are already populated (gives users
+  // a way to change their answers on a reinstall). Without --force the quickstart
+  // skips silently when the file already looks complete.
+  const qs = await runM365Quickstart({ destRoot: fullDest, noPrompt, force: !!options.force });
   if (qs.ran === false && qs.reason && !['already-populated', 'no-prompt-flag'].includes(qs.reason)) {
     console.log(`  Quickstart skipped: ${qs.reason}`);
   }