kushi-agents 5.8.4 → 5.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.mjs CHANGED
@@ -252,17 +252,100 @@ if (args.includes('--help') || args.includes('-h')) {
252
252
  After install, talk to Kushi:
253
253
  bootstrap <project> First-time setup
254
254
  refresh <project> Incremental refresh + rebuild State/
255
- state <project> Re-render State/ from existing Evidence
255
+ state <project> Re-render State/ from existing Evidence (deterministic
256
+ inventory; LLM build-state skill does narrative synthesis)
257
+ references <project> Scan Evidence for URLs and refresh the shared
258
+ references pool (Evidence/_shared/references/)
256
259
  consolidate <project> Merge per-user evidence
257
260
  status <project> Show run-log
258
261
  ask <project> <q> Cited Q&A over Evidence/ (auto-routes, --file-back to save)
259
262
  lint <project> Run wiki-lint checks on State/
260
263
 
264
+ Workspace lifecycle (v5.9.0+):
265
+ uninstall [--keep-config] Remove <cwd>/.kushi/ (preserves Evidence/, State/).
266
+ --keep-config preserves config/user/ identity files.
267
+ upgrade npm i -g kushi-agents@latest then re-seed assets
268
+ in cwd (config preserved).
269
+
261
270
  In VS Code Chat the prefix is "@Kushi". In Clawpilot just say "kushi <verb>".
262
271
  `);
263
272
  process.exit(0);
264
273
  }
265
274
 
275
+ // ── state / refresh / bootstrap verbs (v5.9.0+) ─────────────────────────────
276
+ // Thin shells that exec the deterministic runners. Keeps `kushi state HCA` etc.
277
+ // runnable from the global bin without users having to know the runner paths.
278
+ if (args.length > 0 && ['state', 'refresh-runner', 'bootstrap-runner', 'discover', 'references'].includes(args[0])) {
279
+ const verb = args[0];
280
+ const project = args[1];
281
+ if (!project) {
282
+ console.error(`\n Usage: kushi ${verb} <project> [options]\n`);
283
+ process.exit(1);
284
+ }
285
+ const { spawnSync } = await import('node:child_process');
286
+ const pathMod = await import('node:path');
287
+ const urlMod = await import('node:url');
288
+ const here = pathMod.dirname(urlMod.fileURLToPath(import.meta.url));
289
+ const runnerMap = {
290
+ state: 'pull-state.mjs',
291
+ references: 'pull-references.mjs',
292
+ discover: 'discover.mjs',
293
+ 'refresh-runner': 'refresh.mjs',
294
+ 'bootstrap-runner': 'bootstrap.mjs',
295
+ };
296
+ const runner = pathMod.resolve(here, '..', 'plugin', 'runners', runnerMap[verb]);
297
+ const passthrough = args.slice(2);
298
+ const r = spawnSync(process.execPath, [runner, '--project', project, ...passthrough], { stdio: 'inherit' });
299
+ process.exit(r.status ?? 1);
300
+ }
301
+
302
+ // ── workspace uninstall / upgrade verbs (v5.9.0+) ───────────────────────────
303
+ if (args.length > 0 && args[0] === 'uninstall' && !args.includes('--clawpilot') && !args.includes('--vscode') && !args.includes('--all-hosts')) {
304
+ // Workspace uninstall: remove .kushi/ from cwd (preserves Evidence/, State/).
305
+ const fsMod = await import('node:fs');
306
+ const pathMod = await import('node:path');
307
+ const dest = pathMod.resolve(process.cwd(), '.kushi');
308
+ const keepConfig = args.includes('--keep-config');
309
+ if (!fsMod.existsSync(dest)) {
310
+ console.error(`\n No .kushi/ directory found at ${dest}\n`);
311
+ process.exit(1);
312
+ }
313
+ if (keepConfig) {
314
+ const assetDirs = ['agents', 'instructions', 'prompts', 'skills', 'templates', 'reference-packs', 'lib', 'runners'];
315
+ let removed = 0;
316
+ for (const d of assetDirs) {
317
+ const p = pathMod.join(dest, d);
318
+ if (fsMod.existsSync(p)) { fsMod.rmSync(p, { recursive: true, force: true }); removed++; }
319
+ }
320
+ console.log(`\n Removed ${removed} asset dir(s) from ${dest} (config/user/ preserved).\n`);
321
+ } else {
322
+ fsMod.rmSync(dest, { recursive: true, force: true });
323
+ console.log(`\n Removed ${dest}\n Evidence/ and State/ left untouched.\n`);
324
+ }
325
+ process.exit(0);
326
+ }
327
+
328
+ if (args.length > 0 && args[0] === 'upgrade') {
329
+ // Upgrade: npm i -g @latest, then re-seed assets in cwd preserving config.
330
+ const { spawnSync } = await import('node:child_process');
331
+ console.log('\n Upgrading kushi-agents globally via npm...\n');
332
+ const npm = process.platform === 'win32' ? 'npm.cmd' : 'npm';
333
+ const r1 = spawnSync(npm, ['install', '-g', 'kushi-agents@latest'], { stdio: 'inherit' });
334
+ if (r1.status !== 0) {
335
+ console.error('\n npm install failed.\n');
336
+ process.exit(r1.status ?? 1);
337
+ }
338
+ console.log('\n Refreshing assets in cwd (config preserved)...\n');
339
+ const fsMod = await import('node:fs');
340
+ if (fsMod.existsSync('.kushi')) {
341
+ const r2 = spawnSync(npm, ['exec', '--', 'kushi-agents', '--no-prompt', '--force'], { stdio: 'inherit' });
342
+ process.exit(r2.status ?? 0);
343
+ } else {
344
+ console.log('\n No .kushi/ in cwd — global upgrade complete; cd into a project and run `kushi` to install.\n');
345
+ process.exit(0);
346
+ }
347
+ }
348
+
266
349
  // ── multi-host mode (v5.0.2+) ───────────────────────────────────────────────
267
350
  // Trigger when the user passes any of: --vscode, --all-hosts, --uninstall.
268
351
  // --clawpilot ALONE continues to route through the legacy main.mjs path so
package/package.json CHANGED
@@ -1,9 +1,10 @@
1
1
  {
2
2
  "name": "kushi-agents",
3
- "version": "5.8.4",
3
+ "version": "5.9.0",
4
4
  "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
5
5
  "type": "module",
6
6
  "bin": {
7
+ "kushi": "./bin/cli.mjs",
7
8
  "kushi-agents": "./bin/cli.mjs"
8
9
  },
9
10
  "files": [
@@ -201,12 +201,14 @@ function applyRows(source, rows, currentBounds, currentInteg) {
201
201
  return { boundariesPatch: added.length ? { onenote: { section_file_ids: merged } } : null, accepted };
202
202
  }
203
203
  if (source === 'sharepoint') {
204
- const existing = currentBounds.sharepoint?.sites || [];
204
+ // v5.9.0: SP sites are project-wide, not per-alias. Write into integrations.yml.
205
+ const existing = currentInteg.sharepoint?.sites || [];
205
206
  const incoming = rows.map(r => r.site_url).filter(Boolean);
206
207
  const merged = dedup([...existing, ...incoming]);
207
208
  const added = merged.filter(v => !existing.includes(v));
208
209
  if (added.length) accepted.push(...added);
209
- return { boundariesPatch: added.length ? { sharepoint: { sites: merged } } : null, accepted };
210
+ const cur = currentInteg.sharepoint || {};
211
+ return { integrationsPatch: added.length ? { sharepoint: { ...cur, sites: merged } } : null, accepted };
210
212
  }
211
213
  if (source === 'crm') {
212
214
  const cur = currentInteg.crm || {};
@@ -0,0 +1,164 @@
1
+ // plugin/runners/lib/references.mjs
2
+ // Deterministic URL extraction, classification, and lightweight HTTP snapshot
3
+ // for the unified references pool. No LLM. v5.9.0.
4
+
5
+ import { promises as fs } from 'node:fs';
6
+ import path from 'node:path';
7
+ import crypto from 'node:crypto';
8
+
9
+ /** Permissive URL regex. Captures http(s) URLs in markdown / yaml / plain text. */
10
+ const URL_RE = /\bhttps?:\/\/[^\s<>"'`)\]}|\\]+/gi;
11
+
12
+ /** Trailing punctuation that is almost never part of a URL. */
13
+ const TRAILING_TRIM = /[)\].,;:!?>'"]+$/;
14
+
15
+ export function extractUrls(text) {
16
+ if (!text || typeof text !== 'string') return [];
17
+ const out = new Set();
18
+ const matches = text.match(URL_RE) || [];
19
+ for (let m of matches) {
20
+ m = m.replace(TRAILING_TRIM, '');
21
+ if (m.length > 8) out.add(m);
22
+ }
23
+ return [...out];
24
+ }
25
+
26
+ /** Stable sha1 of normalized URL (for filenames + index keys). */
27
+ export function urlHash(url) {
28
+ return crypto.createHash('sha1').update(normalizeUrl(url)).digest('hex').slice(0, 16);
29
+ }
30
+
31
+ /** Strip fragment + common tracking query params. Keep path/query semantically. */
32
+ export function normalizeUrl(url) {
33
+ try {
34
+ const u = new URL(url);
35
+ u.hash = '';
36
+ const drop = ['utm_source','utm_medium','utm_campaign','utm_term','utm_content','wt.mc_id'];
37
+ for (const k of drop) u.searchParams.delete(k);
38
+ return u.toString();
39
+ } catch {
40
+ return url;
41
+ }
42
+ }
43
+
44
+ const HOST_RULES = [
45
+ { match: /(^|\.)sharepoint\.com$/i, host: 'sharepoint.com', kind: 'sharepoint', authRequired: true },
46
+ { match: /(^|\.)loop\.microsoft\.com$/i, host: 'loop.microsoft.com', kind: 'loop', authRequired: true },
47
+ { match: /loop\.cloud\.microsoft$/i, host: 'loop.cloud.microsoft', kind: 'loop', authRequired: true },
48
+ { match: /loop-api\.cloud\.microsoft$/i, host: 'loop-api.cloud.microsoft', kind: 'loop', authRequired: true },
49
+ { match: /(^|\.)fluidpreview\.office\.net$/i, host: 'fluidpreview.office.net', kind: 'loop', authRequired: true },
50
+ { match: /(^|\.)teams\.microsoft\.com$/i, host: 'teams.microsoft.com', kind: 'teams', authRequired: true },
51
+ { match: /(^|\.)office\.com$/i, host: 'office.com', kind: 'office', authRequired: true },
52
+ { match: /(^|\.)dev\.azure\.com$/i, host: 'dev.azure.com', kind: 'ado', authRequired: true },
53
+ { match: /(^|\.)visualstudio\.com$/i, host: 'visualstudio.com', kind: 'ado', authRequired: true },
54
+ { match: /(^|\.)dynamics\.com$/i, host: 'dynamics.com', kind: 'crm', authRequired: true },
55
+ { match: /(^|\.)learn\.microsoft\.com$/i, host: 'learn.microsoft.com', kind: 'docs', authRequired: false },
56
+ { match: /(^|\.)docs\.microsoft\.com$/i, host: 'docs.microsoft.com', kind: 'docs', authRequired: false },
57
+ { match: /(^|\.)github\.com$/i, host: 'github.com', kind: 'repo', authRequired: false },
58
+ ];
59
+
60
+ /** Classify a URL into { host, kind, authRequired }. Falls back to "external". */
61
+ export function classify(url) {
62
+ let host = '';
63
+ try { host = new URL(url).hostname.toLowerCase(); } catch { return { host: 'unknown', kind: 'invalid', authRequired: false }; }
64
+ for (const r of HOST_RULES) {
65
+ if (r.match.test(host)) return { host: r.host, kind: r.kind, authRequired: r.authRequired };
66
+ }
67
+ return { host, kind: 'external', authRequired: false };
68
+ }
69
+
70
+ /** Safe filename segment for hosts. */
71
+ export function safeHost(host) {
72
+ return (host || 'unknown').toLowerCase().replace(/[^a-z0-9.-]/g, '_').slice(0, 80);
73
+ }
74
+
75
+ /**
76
+ * Fetch a URL and extract a small content snapshot.
77
+ * Returns { ok, status, title, description, h1, contentType, bytes, snippet }.
78
+ * Keeps payload bounded (<= maxBytes default 64KB).
79
+ */
80
+ export async function fetchSnapshot(url, { timeoutMs = 15000, maxBytes = 64 * 1024 } = {}) {
81
+ const ctrl = new AbortController();
82
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs);
83
+ try {
84
+ const res = await fetch(url, {
85
+ redirect: 'follow',
86
+ signal: ctrl.signal,
87
+ headers: { 'user-agent': 'kushi-references/1.0 (+https://github.com/ushakrishnan/kushi)' },
88
+ });
89
+ const contentType = res.headers.get('content-type') || '';
90
+ const reader = res.body?.getReader();
91
+ let received = 0;
92
+ const chunks = [];
93
+ if (reader) {
94
+ while (received < maxBytes) {
95
+ const { done, value } = await reader.read();
96
+ if (done) break;
97
+ chunks.push(value);
98
+ received += value.byteLength;
99
+ }
100
+ try { reader.cancel(); } catch {}
101
+ }
102
+ const buf = Buffer.concat(chunks.map(c => Buffer.from(c)));
103
+ const text = buf.toString('utf8');
104
+ const html = /html|xml/i.test(contentType) || /^\s*<!doctype html|<html/i.test(text);
105
+ const title = html ? extractTag(text, 'title') : '';
106
+ const description = html ? extractMeta(text, 'description') : '';
107
+ const h1 = html ? extractTag(text, 'h1') : '';
108
+ const snippet = html ? stripHtml(text).slice(0, 600) : text.slice(0, 600);
109
+ return {
110
+ ok: res.ok,
111
+ status: res.status,
112
+ title: clean(title),
113
+ description: clean(description),
114
+ h1: clean(h1),
115
+ contentType,
116
+ bytes: received,
117
+ snippet: clean(snippet),
118
+ };
119
+ } catch (e) {
120
+ return { ok: false, status: 0, error: e.name === 'AbortError' ? 'timeout' : (e.code || e.message) };
121
+ } finally {
122
+ clearTimeout(timer);
123
+ }
124
+ }
125
+
126
+ function extractTag(html, tag) {
127
+ const m = html.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i'));
128
+ return m ? m[1] : '';
129
+ }
130
+ function extractMeta(html, name) {
131
+ const m = html.match(new RegExp(`<meta[^>]+name=["']${name}["'][^>]*content=["']([^"']+)["']`, 'i'))
132
+ || html.match(new RegExp(`<meta[^>]+property=["']og:${name}["'][^>]*content=["']([^"']+)["']`, 'i'));
133
+ return m ? m[1] : '';
134
+ }
135
+ function stripHtml(s) {
136
+ return s.replace(/<script[\s\S]*?<\/script>/gi, ' ')
137
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
138
+ .replace(/<[^>]+>/g, ' ')
139
+ .replace(/\s+/g, ' ')
140
+ .trim();
141
+ }
142
+ function clean(s) {
143
+ return (s || '').replace(/\s+/g, ' ').trim().slice(0, 800);
144
+ }
145
+
146
+ /** Walk a directory recursively, returning files matching `extensions`. */
147
+ export async function walkFiles(dir, { extensions = ['.yml','.yaml','.md','.txt','.json'], skipDirs = ['_shared/references','node_modules','.git'] } = {}) {
148
+ const out = [];
149
+ async function walk(d) {
150
+ let entries;
151
+ try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
152
+ for (const e of entries) {
153
+ const full = path.join(d, e.name);
154
+ if (e.isDirectory()) {
155
+ const skip = skipDirs.some(s => full.replaceAll('\\','/').includes(s));
156
+ if (!skip) await walk(full);
157
+ } else if (e.isFile()) {
158
+ if (extensions.includes(path.extname(e.name).toLowerCase())) out.push(full);
159
+ }
160
+ }
161
+ }
162
+ await walk(dir);
163
+ return out;
164
+ }
@@ -0,0 +1,209 @@
1
+ #!/usr/bin/env node
2
+ // plugin/runners/pull-references.mjs
3
+ // Unified references pool. Scans Evidence/ for URLs, dedupes against an index,
4
+ // classifies by host, snapshots external URLs via HTTP. Auth-protected URLs
5
+ // (SP/Loop/Teams/ADO/CRM) are recorded with metadata only and marked
6
+ // `pending-auth-fetch` for follow-up by source-specific pulls.
7
+ //
8
+ // Project-shared. Not dated. One snapshot per URL. Re-crawl with --refresh.
9
+ //
10
+ // Usage:
11
+ // node plugin/runners/pull-references.mjs --project <P> [--refresh] [--dry-run]
12
+ // [--timeout-ms N] [--max-fetch N] [--only-host <h>]
13
+
14
+ import path from 'node:path';
15
+ import { promises as fs } from 'node:fs';
16
+ import YAML from 'yaml';
17
+ import { evidenceRoot, sharedRoot, projectRoot } from './lib/layout.mjs';
18
+ import { writeAtomic, pathExists } from './lib/evidence.mjs';
19
+ import { extractUrls, urlHash, normalizeUrl, classify, safeHost, fetchSnapshot, walkFiles } from './lib/references.mjs';
20
+
21
+ function parseArgs(argv) {
22
+ const args = { dryRun: false, refresh: false, timeoutMs: 15000, maxFetch: 50 };
23
+ for (let i = 0; i < argv.length; i++) {
24
+ const a = argv[i];
25
+ if (a === '--project') args.project = argv[++i];
26
+ else if (a === '--refresh') args.refresh = true;
27
+ else if (a === '--dry-run') args.dryRun = true;
28
+ else if (a === '--timeout-ms') args.timeoutMs = Number(argv[++i]) || 15000;
29
+ else if (a === '--max-fetch') args.maxFetch = Number(argv[++i]) || 50;
30
+ else if (a === '--only-host') args.onlyHost = argv[++i];
31
+ else if (a === '--help' || a === '-h') args.help = true;
32
+ }
33
+ return args;
34
+ }
35
+
36
+ function help() {
37
+ return `Usage: node pull-references.mjs --project <P> [--refresh] [--dry-run] [--timeout-ms N] [--max-fetch N] [--only-host <h>]`;
38
+ }
39
+
40
+ function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
41
+ function log(msg) { process.stderr.write(`[references] ${msg}\n`); }
42
+
43
+ function refsRoot(project) { return path.join(sharedRoot(project), 'references'); }
44
+ function indexPath(project) { return path.join(refsRoot(project), 'index.yml'); }
45
+ function recordPath(project, host, hash) {
46
+ return path.join(refsRoot(project), 'by-host', safeHost(host), `${hash}.md`);
47
+ }
48
+
49
+ async function loadIndex(p) {
50
+ if (!await pathExists(p)) return { version: 1, entries: {} };
51
+ try { return YAML.parse(await fs.readFile(p, 'utf8')) || { version: 1, entries: {} }; }
52
+ catch { return { version: 1, entries: {} }; }
53
+ }
54
+
55
+ function recordTemplate({ url, host, kind, authRequired, firstSeen, sourceFiles, snapshot }) {
56
+ const fm = {
57
+ url,
58
+ normalized_url: normalizeUrl(url),
59
+ host,
60
+ kind,
61
+ auth_required: authRequired,
62
+ first_seen: firstSeen,
63
+ last_crawled: snapshot?.crawledAt || null,
64
+ fetch_status: snapshot?.fetch_status || (authRequired ? 'pending-auth-fetch' : 'unfetched'),
65
+ http_status: snapshot?.status ?? null,
66
+ title: snapshot?.title || '',
67
+ description: snapshot?.description || '',
68
+ source_files: sourceFiles.slice(0, 20),
69
+ };
70
+ const yamlFm = YAML.stringify(fm).trimEnd();
71
+ const body = snapshot?.snippet
72
+ ? `\n## Snippet\n\n${snapshot.snippet}\n`
73
+ : (authRequired
74
+ ? `\n_Content fetch deferred — this URL requires Microsoft 365 authentication. The matching source-specific pull (sharepoint/loop/teams/ado/crm) will populate richer evidence._\n`
75
+ : `\n_No snapshot captured._\n`);
76
+ return `---\n${yamlFm}\n---\n${body}`;
77
+ }
78
+
79
+ async function main() {
80
+ const args = parseArgs(process.argv.slice(2));
81
+ if (args.help) { console.log(help()); return 0; }
82
+ if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
83
+
84
+ const root = projectRoot(args.project);
85
+ if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
86
+
87
+ const evRoot = evidenceRoot(root);
88
+ if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
89
+
90
+ log(`scanning ${evRoot} for URLs...`);
91
+ const files = await walkFiles(evRoot);
92
+ log(`scanning ${files.length} file(s)...`);
93
+
94
+ /** url(normalized) → { url, sourceFiles:Set } */
95
+ const found = new Map();
96
+ for (const f of files) {
97
+ let txt = '';
98
+ try { txt = await fs.readFile(f, 'utf8'); } catch { continue; }
99
+ for (const u of extractUrls(txt)) {
100
+ const k = normalizeUrl(u);
101
+ const rel = path.relative(root, f);
102
+ if (!found.has(k)) found.set(k, { url: u, sourceFiles: new Set() });
103
+ found.get(k).sourceFiles.add(rel);
104
+ }
105
+ }
106
+ log(`found ${found.size} unique URL(s)`);
107
+
108
+ const idxFile = indexPath(root);
109
+ const index = await loadIndex(idxFile);
110
+ index.entries ??= {};
111
+
112
+ const today = new Date().toISOString().slice(0, 10);
113
+ const tasks = [];
114
+ let newCount = 0, refreshCount = 0, skippedCount = 0;
115
+
116
+ for (const [normalized, { url, sourceFiles }] of found.entries()) {
117
+ if (args.onlyHost) {
118
+ const c = classify(url);
119
+ if (c.host !== args.onlyHost) { skippedCount++; continue; }
120
+ }
121
+ const hash = urlHash(url);
122
+ const existing = index.entries[hash];
123
+ const isNew = !existing;
124
+ const needsRefresh = !isNew && args.refresh;
125
+ if (!isNew && !needsRefresh) {
126
+ // Update source_files only.
127
+ const merged = new Set([...(existing.source_files || []), ...sourceFiles]);
128
+ existing.source_files = [...merged].slice(0, 20);
129
+ existing.last_seen = today;
130
+ skippedCount++;
131
+ continue;
132
+ }
133
+ const c = classify(url);
134
+ tasks.push({ url, normalized, hash, sourceFiles: [...sourceFiles], cls: c, isNew });
135
+ if (isNew) newCount++; else refreshCount++;
136
+ }
137
+
138
+ // Cap fetches per run.
139
+ const fetchable = tasks.filter(t => !t.cls.authRequired && t.cls.kind !== 'invalid').slice(0, args.maxFetch);
140
+ const fetchKeys = new Set(fetchable.map(t => t.hash));
141
+ log(`new: ${newCount}, refresh: ${refreshCount}, skip-existing: ${skippedCount}, will fetch: ${fetchable.length}`);
142
+
143
+ let written = 0, fetched = 0, fetchOk = 0;
144
+ for (const t of tasks) {
145
+ let snap = null;
146
+ if (fetchKeys.has(t.hash)) {
147
+ log(` fetch ${t.cls.host}: ${t.url.slice(0, 100)}`);
148
+ snap = await fetchSnapshot(t.url, { timeoutMs: args.timeoutMs });
149
+ snap.crawledAt = today;
150
+ snap.fetch_status = snap.ok ? 'fetched' : `fetch-failed:${snap.error || snap.status}`;
151
+ fetched++;
152
+ if (snap.ok) fetchOk++;
153
+ }
154
+ const firstSeen = index.entries[t.hash]?.first_seen || today;
155
+ const record = recordTemplate({
156
+ url: t.url,
157
+ host: t.cls.host,
158
+ kind: t.cls.kind,
159
+ authRequired: t.cls.authRequired,
160
+ firstSeen,
161
+ sourceFiles: t.sourceFiles,
162
+ snapshot: snap,
163
+ });
164
+ const recPath = recordPath(root, t.cls.host, t.hash);
165
+ if (!args.dryRun) {
166
+ const r = await writeAtomic(recPath, record, { skipIfUnchanged: true });
167
+ if (r.written) written++;
168
+ }
169
+ index.entries[t.hash] = {
170
+ url: t.url,
171
+ normalized_url: t.normalized,
172
+ host: t.cls.host,
173
+ kind: t.cls.kind,
174
+ auth_required: t.cls.authRequired,
175
+ first_seen: firstSeen,
176
+ last_seen: today,
177
+ last_crawled: snap?.crawledAt || index.entries[t.hash]?.last_crawled || null,
178
+ fetch_status: snap?.fetch_status || index.entries[t.hash]?.fetch_status || (t.cls.authRequired ? 'pending-auth-fetch' : 'unfetched'),
179
+ http_status: snap?.status ?? index.entries[t.hash]?.http_status ?? null,
180
+ title: snap?.title || index.entries[t.hash]?.title || '',
181
+ record_path: path.relative(root, recPath).replaceAll('\\', '/'),
182
+ source_files: t.sourceFiles.slice(0, 20),
183
+ };
184
+ }
185
+
186
+ if (!args.dryRun) {
187
+ await writeAtomic(idxFile, YAML.stringify(index), { skipIfUnchanged: true });
188
+ }
189
+
190
+ emit({
191
+ status: 'ok',
192
+ project: root,
193
+ dry_run: args.dryRun,
194
+ scanned_files: files.length,
195
+ urls_total: found.size,
196
+ new: newCount,
197
+ refresh: refreshCount,
198
+ fetched,
199
+ fetch_ok: fetchOk,
200
+ written,
201
+ index: path.relative(root, idxFile).replaceAll('\\', '/'),
202
+ });
203
+ return 0;
204
+ }
205
+
206
+ main().then(code => process.exit(code || 0)).catch(e => {
207
+ emit({ status: 'failed', error: e.message || String(e) });
208
+ process.exit(1);
209
+ });
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/env node
2
+ // plugin/runners/pull-state.mjs
3
+ // Deterministic State/ generator. Inventories Evidence/ and produces:
4
+ // State/index.md — TOC pointing at every evidence file by source/week
5
+ // State/log.md — chronological run + evidence ledger
6
+ // State/CLAUDE.md — host-agnostic project context (project name, sources, alias inventory)
7
+ // State/AGENTS.md — alias of CLAUDE.md for OpenAI-flavored hosts
8
+ //
9
+ // This runner does NOT do narrative synthesis — that is the build-state LLM
10
+ // skill's job. This produces the structural skeleton that makes the LLM
11
+ // skill's work cheaper and reproducible. v5.9.0.
12
+ //
13
+ // Usage:
14
+ // node plugin/runners/pull-state.mjs --project <P> [--dry-run] [--include-legacy]
15
+
16
+ import path from 'node:path';
17
+ import { promises as fs } from 'node:fs';
18
+ import YAML from 'yaml';
19
+ import { evidenceRoot, projectRoot, sharedRoot } from './lib/layout.mjs';
20
+ import { writeAtomic, pathExists } from './lib/evidence.mjs';
21
+
22
+ function parseArgs(argv) {
23
+ const args = { dryRun: false, includeLegacy: true };
24
+ for (let i = 0; i < argv.length; i++) {
25
+ const a = argv[i];
26
+ if (a === '--project') args.project = argv[++i];
27
+ else if (a === '--dry-run') args.dryRun = true;
28
+ else if (a === '--no-legacy') args.includeLegacy = false;
29
+ else if (a === '--help' || a === '-h') args.help = true;
30
+ }
31
+ return args;
32
+ }
33
+
34
+ function help() {
35
+ return `Usage: node pull-state.mjs --project <P> [--dry-run] [--no-legacy]`;
36
+ }
37
+
38
+ function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
39
+ function log(msg) { process.stderr.write(`[state] ${msg}\n`); }
40
+
41
+ const SOURCES = ['email', 'teams', 'meetings', 'onenote', 'sharepoint', 'crm', 'ado'];
42
+
43
+ async function listDirs(p) {
44
+ try {
45
+ return (await fs.readdir(p, { withFileTypes: true })).filter(e => e.isDirectory()).map(e => e.name);
46
+ } catch { return []; }
47
+ }
48
+
49
+ async function listFiles(p, exts = ['.md', '.yml']) {
50
+ try {
51
+ return (await fs.readdir(p, { withFileTypes: true }))
52
+ .filter(e => e.isFile() && exts.includes(path.extname(e.name).toLowerCase()))
53
+ .map(e => e.name);
54
+ } catch { return []; }
55
+ }
56
+
57
+ /** Inventory one alias's source folder. Returns { weekly, snapshot, stream, index, total }. */
58
+ async function inventoryAliasSource(aliasSourceDir) {
59
+ const result = { weekly: [], snapshot: [], stream: [], index: [], total: 0 };
60
+ for (const layout of ['weekly', 'snapshot', 'stream', '_index']) {
61
+ const dir = path.join(aliasSourceDir, layout);
62
+ const files = await listFiles(dir);
63
+ const key = layout === '_index' ? 'index' : layout;
64
+ result[key] = files.map(f => path.join(layout, f)).sort();
65
+ result.total += files.length;
66
+ }
67
+ return result;
68
+ }
69
+
70
+ /** Inventory shared sources (crm, ado, references). */
71
+ async function inventoryShared(project) {
72
+ const out = {};
73
+ const shared = sharedRoot(project);
74
+ for (const sub of ['crm', 'ado', 'references']) {
75
+ const dir = path.join(shared, sub);
76
+ if (!await pathExists(dir)) continue;
77
+ out[sub] = await listFilesRecursive(dir, shared);
78
+ }
79
+ return out;
80
+ }
81
+
82
+ async function listFilesRecursive(dir, base) {
83
+ const out = [];
84
+ async function walk(d) {
85
+ let entries;
86
+ try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
87
+ for (const e of entries) {
88
+ const full = path.join(d, e.name);
89
+ if (e.isDirectory()) await walk(full);
90
+ else if (e.isFile() && ['.md','.yml'].includes(path.extname(e.name).toLowerCase())) {
91
+ out.push(path.relative(base, full).replaceAll('\\','/'));
92
+ }
93
+ }
94
+ }
95
+ await walk(dir);
96
+ return out.sort();
97
+ }
98
+
99
+ async function readIntegrations(project) {
100
+ const p = path.join(projectRoot(project), 'integrations.yml');
101
+ if (!await pathExists(p)) return {};
102
+ try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
103
+ }
104
+
105
+ async function readBoundaries(project, alias) {
106
+ const p = path.join(evidenceRoot(project), alias, 'boundaries.yml');
107
+ if (!await pathExists(p)) return {};
108
+ try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
109
+ }
110
+
111
+ function fmtSection(title, lines) {
112
+ return [`## ${title}`, '', ...lines, ''].join('\n');
113
+ }
114
+
115
+ function buildIndex({ projectName, integrations, aliases, shared, generatedAt }) {
116
+ const lines = [];
117
+ lines.push(`# ${projectName} — State Index`);
118
+ lines.push('');
119
+ lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}.`);
120
+ lines.push('');
121
+ lines.push('Mechanical inventory only. For narrative synthesis, use the `build-state` skill.');
122
+ lines.push('');
123
+
124
+ const integLines = [];
125
+ if (integrations.crm?.request_id) integLines.push(`- **CRM**: \`${integrations.crm.request_id}\``);
126
+ if (integrations.ado?.engagement_id) integLines.push(`- **ADO**: \`${integrations.ado.engagement_id}\``);
127
+ const spSites = integrations.sharepoint?.sites || [];
128
+ if (spSites.length) {
129
+ integLines.push(`- **SharePoint sites** (${spSites.length}):`);
130
+ for (const s of spSites.slice(0, 10)) integLines.push(` - ${s}`);
131
+ }
132
+ if (integLines.length) lines.push(fmtSection('Integrations (project-shared)', integLines));
133
+
134
+ // Shared evidence
135
+ const sharedLines = [];
136
+ for (const [src, files] of Object.entries(shared)) {
137
+ if (!files.length) continue;
138
+ sharedLines.push(`### ${src} (${files.length} file${files.length === 1 ? '' : 's'})`);
139
+ sharedLines.push('');
140
+ for (const f of files.slice(0, 30)) sharedLines.push(`- \`Evidence/_shared/${f}\``);
141
+ if (files.length > 30) sharedLines.push(`- _… and ${files.length - 30} more_`);
142
+ sharedLines.push('');
143
+ }
144
+ if (sharedLines.length) lines.push(fmtSection('Shared Evidence', sharedLines));
145
+
146
+ // Per-alias
147
+ for (const a of aliases) {
148
+ const aliasLines = [];
149
+ aliasLines.push(`Boundaries:`);
150
+ for (const [k, v] of Object.entries(a.boundaries || {})) {
151
+ const arr = Array.isArray(v) ? v : (v?.folders || v?.chats || v?.joinUrls || v?.section_file_ids || v?.sites || []);
152
+ if (Array.isArray(arr) && arr.length) aliasLines.push(` - ${k}: ${arr.length} item(s)`);
153
+ }
154
+ aliasLines.push('');
155
+ for (const [src, inv] of Object.entries(a.sources || {})) {
156
+ if (inv.total === 0) continue;
157
+ aliasLines.push(`**${src}** — ${inv.total} file(s) (weekly: ${inv.weekly.length}, snapshot: ${inv.snapshot.length}, stream: ${inv.stream.length}, index: ${inv.index.length})`);
158
+ const all = [...inv.weekly, ...inv.snapshot, ...inv.stream, ...inv.index];
159
+ for (const f of all.slice(0, 12)) aliasLines.push(`- \`Evidence/${a.alias}/${src}/${f}\``);
160
+ if (all.length > 12) aliasLines.push(`- _… and ${all.length - 12} more_`);
161
+ aliasLines.push('');
162
+ }
163
+ lines.push(fmtSection(`Contributor: ${a.alias}`, aliasLines));
164
+ }
165
+
166
+ return lines.join('\n').replace(/\n{3,}/g, '\n\n');
167
+ }
168
+
169
+ function buildLog({ projectName, runLog, generatedAt }) {
170
+ const lines = [];
171
+ lines.push(`# ${projectName} — Run Log`);
172
+ lines.push('');
173
+ lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}. Reflects \`Evidence/run-log.yml\`.`);
174
+ lines.push('');
175
+ if (!runLog || !Array.isArray(runLog.entries) || runLog.entries.length === 0) {
176
+ lines.push('_No run-log entries yet._');
177
+ return lines.join('\n');
178
+ }
179
+ const entries = [...runLog.entries].sort((a, b) => String(b.timestamp || '').localeCompare(String(a.timestamp || '')));
180
+ for (const e of entries.slice(0, 100)) {
181
+ const ts = e.timestamp || '?';
182
+ const status = e.status || '?';
183
+ const src = e.source || '?';
184
+ const ent = e.entity ? ` \`${e.entity}\`` : '';
185
+ const wk = e.week ? ` (week ${e.week})` : '';
186
+ lines.push(`- **${ts}** — ${src}${ent}${wk} → \`${status}\``);
187
+ }
188
+ if (entries.length > 100) lines.push(`\n_Showing 100 of ${entries.length} entries._`);
189
+ return lines.join('\n');
190
+ }
191
+
192
+ function buildClaude({ projectName, integrations, aliases, shared }) {
193
+ const lines = [];
194
+ lines.push(`# ${projectName} — Project Context`);
195
+ lines.push('');
196
+ lines.push('Auto-generated by `pull-state.mjs`. This file gives any AI agent (Claude, Copilot, etc.) the minimal facts to be useful in this engagement.');
197
+ lines.push('');
198
+ lines.push('## Project');
199
+ lines.push(`- Name: \`${projectName}\``);
200
+ if (integrations.crm?.request_id) lines.push(`- CRM request: \`${integrations.crm.request_id}\``);
201
+ if (integrations.ado?.engagement_id) lines.push(`- ADO engagement: \`${integrations.ado.engagement_id}\``);
202
+ if (integrations.sharepoint?.sites?.length) {
203
+ lines.push(`- SharePoint sites: ${integrations.sharepoint.sites.length}`);
204
+ }
205
+ lines.push('');
206
+ lines.push('## Contributors');
207
+ for (const a of aliases) {
208
+ const totalFiles = Object.values(a.sources || {}).reduce((s, inv) => s + (inv.total || 0), 0);
209
+ lines.push(`- \`${a.alias}\`: ${totalFiles} evidence file(s)`);
210
+ }
211
+ lines.push('');
212
+ lines.push('## Where things live');
213
+ lines.push('- Per-contributor evidence: `Evidence/<alias>/<source>/...`');
214
+ lines.push('- Shared evidence: `Evidence/_shared/{crm,ado,references}/`');
215
+ lines.push('- Project-wide config: `integrations.yml`');
216
+ lines.push('- This index: `State/index.md`, `State/log.md`');
217
+ lines.push('');
218
+ lines.push('## Doctrine');
219
+ lines.push('- Cite every claim. Use the form `[source: <relative-path> · <iso-ts>]`.');
220
+ lines.push('- Read-only Q&A: see the `ask-project` skill.');
221
+ lines.push('- Refresh + state regen: `kushi refresh <project>` then `kushi state <project>`.');
222
+ return lines.join('\n');
223
+ }
224
+
225
+ async function main() {
226
+ const args = parseArgs(process.argv.slice(2));
227
+ if (args.help) { console.log(help()); return 0; }
228
+ if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
229
+
230
+ const root = projectRoot(args.project);
231
+ if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
232
+ const evRoot = evidenceRoot(root);
233
+ if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
234
+
235
+ const projectName = path.basename(root);
236
+ const generatedAt = new Date().toISOString();
237
+
238
+ log(`scanning ${evRoot}...`);
239
+ const integrations = await readIntegrations(root);
240
+ const shared = await inventoryShared(root);
241
+
242
+ // Per-alias inventory
243
+ const dirs = await listDirs(evRoot);
244
+ const aliasNames = dirs.filter(d => !d.startsWith('_'));
245
+ const aliases = [];
246
+ for (const alias of aliasNames) {
247
+ const aliasDir = path.join(evRoot, alias);
248
+ const sources = {};
249
+ for (const src of SOURCES) {
250
+ const srcDir = path.join(aliasDir, src);
251
+ if (!await pathExists(srcDir)) { sources[src] = { weekly:[], snapshot:[], stream:[], index:[], total:0 }; continue; }
252
+ sources[src] = await inventoryAliasSource(srcDir);
253
+ }
254
+ aliases.push({ alias, boundaries: await readBoundaries(root, alias), sources });
255
+ }
256
+
257
+ // run-log.yml
258
+ let runLog = {};
259
+ const runLogPath = path.join(evRoot, 'run-log.yml');
260
+ if (await pathExists(runLogPath)) {
261
+ try { runLog = YAML.parse(await fs.readFile(runLogPath, 'utf8')) || {}; } catch { runLog = {}; }
262
+ }
263
+
264
+ const stateDir = path.join(root, 'State');
265
+ const indexMd = buildIndex({ projectName, integrations, aliases, shared, generatedAt });
266
+ const logMd = buildLog({ projectName, runLog, generatedAt });
267
+ const claudeMd = buildClaude({ projectName, integrations, aliases, shared });
268
+
269
+ const writes = [];
270
+ if (!args.dryRun) {
271
+ await fs.mkdir(stateDir, { recursive: true });
272
+ const r1 = await writeAtomic(path.join(stateDir, 'index.md'), indexMd);
273
+ const r2 = await writeAtomic(path.join(stateDir, 'log.md'), logMd);
274
+ const r3 = await writeAtomic(path.join(stateDir, 'CLAUDE.md'), claudeMd);
275
+ const r4 = await writeAtomic(path.join(stateDir, 'AGENTS.md'), claudeMd);
276
+ writes.push(r1, r2, r3, r4);
277
+ }
278
+
279
+ log(`done: ${aliases.length} contributor(s), ${SOURCES.length} sources scanned`);
280
+ emit({
281
+ status: 'ok',
282
+ project: root,
283
+ project_name: projectName,
284
+ dry_run: args.dryRun,
285
+ contributors: aliases.length,
286
+ contributors_list: aliases.map(a => a.alias),
287
+ state_dir: path.relative(root, stateDir).replaceAll('\\', '/'),
288
+ files_written: writes.filter(w => w?.written).map(w => path.relative(root, w.path).replaceAll('\\','/')),
289
+ note: 'Mechanical inventory only. Run the build-state LLM skill for narrative synthesis.',
290
+ });
291
+ return 0;
292
+ }
293
+
294
+ main().then(c => process.exit(c || 0)).catch(e => {
295
+ emit({ status: 'failed', error: e.message || String(e) });
296
+ process.exit(1);
297
+ });
@@ -224,6 +224,24 @@ async function main() {
224
224
  ? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
225
225
  : await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
226
226
 
227
+ // v5.9.0: post-pass — unified references pool. Scans Evidence for URLs and
228
+ // builds a project-shared dedup index with HTTP snapshots for external links.
229
+ let referencesResult = null;
230
+ let stateResult = null;
231
+ if (!args.dryRun) {
232
+ const refsRunner = path.join(HERE, 'pull-references.mjs');
233
+ const refsArgv = ['--project', args.project];
234
+ if (args.force) refsArgv.push('--refresh');
235
+ const r = await spawnRunner(refsRunner, refsArgv);
236
+ referencesResult = { source: 'references', exit_code: r.code, stdout: r.stdout?.slice(0, 4000), stderr: r.stderr?.slice(0, 1000) };
237
+
238
+ // v5.9.0: post-pass — deterministic State/ generator. Inventory only;
239
+ // build-state LLM skill remains the synthesis layer.
240
+ const stateRunner = path.join(HERE, 'pull-state.mjs');
241
+ const s = await spawnRunner(stateRunner, ['--project', args.project]);
242
+ stateResult = { source: 'state', exit_code: s.code, stdout: s.stdout?.slice(0, 4000), stderr: s.stderr?.slice(0, 1000) };
243
+ }
244
+
227
245
  const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
228
246
 
229
247
  emit({
@@ -237,6 +255,8 @@ async function main() {
237
255
  skipped: skipped.length,
238
256
  results,
239
257
  skipped_targets: skipped,
258
+ references: referencesResult,
259
+ state: stateResult,
240
260
  learning_candidates_total,
241
261
  });
242
262
  return 0;
package/src/main.mjs CHANGED
@@ -120,7 +120,10 @@ async function installVscode(options, resolved, version) {
120
120
  // Skips silently when --no-prompt is set, when not running in a TTY, or
121
121
  // when all 3 fields are already populated (re-install case).
122
122
  const noPrompt = options.noPrompt || process.env.KUSHI_NO_PROMPT === '1';
123
- const qs = await runM365Quickstart({ destRoot: fullDest, noPrompt });
123
+ // --force re-prompts even if all 4 fields are already populated (gives users
124
+ // a way to change their answers on a reinstall). Without --force the quickstart
125
+ // skips silently when the file already looks complete.
126
+ const qs = await runM365Quickstart({ destRoot: fullDest, noPrompt, force: !!options.force });
124
127
  if (qs.ran === false && qs.reason && !['already-populated', 'no-prompt-flag'].includes(qs.reason)) {
125
128
  console.log(` Quickstart skipped: ${qs.reason}`);
126
129
  }