kushi-agents 5.8.3 → 5.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.mjs +84 -1
- package/package.json +2 -1
- package/plugin/runners/discover.mjs +24 -9
- package/plugin/runners/lib/references.mjs +164 -0
- package/plugin/runners/pull-references.mjs +209 -0
- package/plugin/runners/pull-state.mjs +297 -0
- package/plugin/runners/refresh.mjs +20 -0
- package/plugin/templates/init/m365-auth.template.json +1 -0
- package/src/main.mjs +4 -1
package/bin/cli.mjs
CHANGED
|
@@ -252,17 +252,100 @@ if (args.includes('--help') || args.includes('-h')) {
|
|
|
252
252
|
After install, talk to Kushi:
|
|
253
253
|
bootstrap <project> First-time setup
|
|
254
254
|
refresh <project> Incremental refresh + rebuild State/
|
|
255
|
-
state <project> Re-render State/ from existing Evidence
|
|
255
|
+
state <project> Re-render State/ from existing Evidence (deterministic
|
|
256
|
+
inventory; LLM build-state skill does narrative synthesis)
|
|
257
|
+
references <project> Scan Evidence for URLs and refresh the shared
|
|
258
|
+
references pool (Evidence/_shared/references/)
|
|
256
259
|
consolidate <project> Merge per-user evidence
|
|
257
260
|
status <project> Show run-log
|
|
258
261
|
ask <project> <q> Cited Q&A over Evidence/ (auto-routes, --file-back to save)
|
|
259
262
|
lint <project> Run wiki-lint checks on State/
|
|
260
263
|
|
|
264
|
+
Workspace lifecycle (v5.9.0+):
|
|
265
|
+
uninstall [--keep-config] Remove <cwd>/.kushi/ (preserves Evidence/, State/).
|
|
266
|
+
--keep-config preserves config/user/ identity files.
|
|
267
|
+
upgrade npm i -g kushi-agents@latest then re-seed assets
|
|
268
|
+
in cwd (config preserved).
|
|
269
|
+
|
|
261
270
|
In VS Code Chat the prefix is "@Kushi". In Clawpilot just say "kushi <verb>".
|
|
262
271
|
`);
|
|
263
272
|
process.exit(0);
|
|
264
273
|
}
|
|
265
274
|
|
|
275
|
+
// ── state / refresh / bootstrap verbs (v5.9.0+) ─────────────────────────────
|
|
276
|
+
// Thin shells that exec the deterministic runners. Keeps `kushi state HCA` etc.
|
|
277
|
+
// runnable from the global bin without users having to know the runner paths.
|
|
278
|
+
if (args.length > 0 && ['state', 'refresh-runner', 'bootstrap-runner', 'discover', 'references'].includes(args[0])) {
|
|
279
|
+
const verb = args[0];
|
|
280
|
+
const project = args[1];
|
|
281
|
+
if (!project) {
|
|
282
|
+
console.error(`\n Usage: kushi ${verb} <project> [options]\n`);
|
|
283
|
+
process.exit(1);
|
|
284
|
+
}
|
|
285
|
+
const { spawnSync } = await import('node:child_process');
|
|
286
|
+
const pathMod = await import('node:path');
|
|
287
|
+
const urlMod = await import('node:url');
|
|
288
|
+
const here = pathMod.dirname(urlMod.fileURLToPath(import.meta.url));
|
|
289
|
+
const runnerMap = {
|
|
290
|
+
state: 'pull-state.mjs',
|
|
291
|
+
references: 'pull-references.mjs',
|
|
292
|
+
discover: 'discover.mjs',
|
|
293
|
+
'refresh-runner': 'refresh.mjs',
|
|
294
|
+
'bootstrap-runner': 'bootstrap.mjs',
|
|
295
|
+
};
|
|
296
|
+
const runner = pathMod.resolve(here, '..', 'plugin', 'runners', runnerMap[verb]);
|
|
297
|
+
const passthrough = args.slice(2);
|
|
298
|
+
const r = spawnSync(process.execPath, [runner, '--project', project, ...passthrough], { stdio: 'inherit' });
|
|
299
|
+
process.exit(r.status ?? 1);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// ── workspace uninstall / upgrade verbs (v5.9.0+) ───────────────────────────
|
|
303
|
+
if (args.length > 0 && args[0] === 'uninstall' && !args.includes('--clawpilot') && !args.includes('--vscode') && !args.includes('--all-hosts')) {
|
|
304
|
+
// Workspace uninstall: remove .kushi/ from cwd (preserves Evidence/, State/).
|
|
305
|
+
const fsMod = await import('node:fs');
|
|
306
|
+
const pathMod = await import('node:path');
|
|
307
|
+
const dest = pathMod.resolve(process.cwd(), '.kushi');
|
|
308
|
+
const keepConfig = args.includes('--keep-config');
|
|
309
|
+
if (!fsMod.existsSync(dest)) {
|
|
310
|
+
console.error(`\n No .kushi/ directory found at ${dest}\n`);
|
|
311
|
+
process.exit(1);
|
|
312
|
+
}
|
|
313
|
+
if (keepConfig) {
|
|
314
|
+
const assetDirs = ['agents', 'instructions', 'prompts', 'skills', 'templates', 'reference-packs', 'lib', 'runners'];
|
|
315
|
+
let removed = 0;
|
|
316
|
+
for (const d of assetDirs) {
|
|
317
|
+
const p = pathMod.join(dest, d);
|
|
318
|
+
if (fsMod.existsSync(p)) { fsMod.rmSync(p, { recursive: true, force: true }); removed++; }
|
|
319
|
+
}
|
|
320
|
+
console.log(`\n Removed ${removed} asset dir(s) from ${dest} (config/user/ preserved).\n`);
|
|
321
|
+
} else {
|
|
322
|
+
fsMod.rmSync(dest, { recursive: true, force: true });
|
|
323
|
+
console.log(`\n Removed ${dest}\n Evidence/ and State/ left untouched.\n`);
|
|
324
|
+
}
|
|
325
|
+
process.exit(0);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (args.length > 0 && args[0] === 'upgrade') {
|
|
329
|
+
// Upgrade: npm i -g @latest, then re-seed assets in cwd preserving config.
|
|
330
|
+
const { spawnSync } = await import('node:child_process');
|
|
331
|
+
console.log('\n Upgrading kushi-agents globally via npm...\n');
|
|
332
|
+
const npm = process.platform === 'win32' ? 'npm.cmd' : 'npm';
|
|
333
|
+
const r1 = spawnSync(npm, ['install', '-g', 'kushi-agents@latest'], { stdio: 'inherit' });
|
|
334
|
+
if (r1.status !== 0) {
|
|
335
|
+
console.error('\n npm install failed.\n');
|
|
336
|
+
process.exit(r1.status ?? 1);
|
|
337
|
+
}
|
|
338
|
+
console.log('\n Refreshing assets in cwd (config preserved)...\n');
|
|
339
|
+
const fsMod = await import('node:fs');
|
|
340
|
+
if (fsMod.existsSync('.kushi')) {
|
|
341
|
+
const r2 = spawnSync(npm, ['exec', '--', 'kushi-agents', '--no-prompt', '--force'], { stdio: 'inherit' });
|
|
342
|
+
process.exit(r2.status ?? 0);
|
|
343
|
+
} else {
|
|
344
|
+
console.log('\n No .kushi/ in cwd — global upgrade complete; cd into a project and run `kushi` to install.\n');
|
|
345
|
+
process.exit(0);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
266
349
|
// ── multi-host mode (v5.0.2+) ───────────────────────────────────────────────
|
|
267
350
|
// Trigger when the user passes any of: --vscode, --all-hosts, --uninstall.
|
|
268
351
|
// --clawpilot ALONE continues to route through the legacy main.mjs path so
|
package/package.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kushi-agents",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.9.0",
|
|
4
4
|
"description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
+
"kushi": "./bin/cli.mjs",
|
|
7
8
|
"kushi-agents": "./bin/cli.mjs"
|
|
8
9
|
},
|
|
9
10
|
"files": [
|
|
@@ -89,16 +89,29 @@ function buildPrompt(source, projectName, scope = null) {
|
|
|
89
89
|
? scope.folders
|
|
90
90
|
: ['Inbox']; // safe default — bounds the query so WorkIQ uses Graph filter, not mailbox-wide semantic search
|
|
91
91
|
const isDefault = !(Array.isArray(scope.folders) && scope.folders.length > 0);
|
|
92
|
+
// matchingPolicy.mode drives whether we search by subfolder name, by mail content, or both.
|
|
93
|
+
// Recognized values: 'subfolder-only', 'keyword-only', 'hybrid' (default).
|
|
94
|
+
const mode = (scope.matchMode || 'hybrid').toLowerCase();
|
|
95
|
+
const doFolder = mode !== 'keyword-only';
|
|
96
|
+
const doKeyword = mode !== 'subfolder-only';
|
|
92
97
|
lines.push('');
|
|
93
|
-
|
|
94
|
-
lines.push('Restrict your search to Outlook mail folders whose name CONTAINS any of these tokens (case-insensitive, fuzzy substring match — e.g. "FDE" matches "1. FDE", "01 FDE Active", "FDE-archive"):');
|
|
95
|
-
} else {
|
|
96
|
-
lines.push('Restrict your search to ONLY these Outlook mail folders (exact name match):');
|
|
97
|
-
}
|
|
98
|
+
lines.push('SCOPE BOUNDARY — search ONLY within these Outlook parent folders and ALL nested descendants:');
|
|
98
99
|
for (const f of folders) {
|
|
99
|
-
lines.push(` • "${f}"${scope.includeSubfolders ? ' (
|
|
100
|
+
lines.push(` • "${f}"${scope.includeSubfolders !== false ? ' (recursively include every subfolder underneath)' : ' (this folder only)'}`);
|
|
101
|
+
}
|
|
102
|
+
lines.push('Parent folder name match: case-insensitive, fuzzy contains (so "1. FDE" matches "1. FDE", "01. FDE", "1 FDE").');
|
|
103
|
+
lines.push('Do NOT look outside this boundary.');
|
|
104
|
+
lines.push('');
|
|
105
|
+
lines.push(`SEARCH STRATEGY for project "${projectName}" (matchingPolicy.mode = "${mode}"):`);
|
|
106
|
+
if (doFolder && doKeyword) {
|
|
107
|
+
lines.push(` 1. Subfolder match: look for any subfolder whose name fuzzy-contains "${projectName}" (e.g. "${projectName}", "102. ${projectName}", "${projectName} - Engagement"). Emit each as a match.`);
|
|
108
|
+
lines.push(` 2. Mail-content match: also search email subjects, bodies, sender/recipient names within the boundary for "${projectName}" (case-insensitive). Group matching mail by folder and emit each folder.`);
|
|
109
|
+
lines.push(` Run BOTH and merge results — do not stop at step 1.`);
|
|
110
|
+
} else if (doFolder) {
|
|
111
|
+
lines.push(` Subfolder match only: emit subfolders whose name fuzzy-contains "${projectName}".`);
|
|
112
|
+
} else {
|
|
113
|
+
lines.push(` Mail-content match only: search email subjects, bodies, sender/recipient names within the boundary for "${projectName}" (case-insensitive). Group matching mail by folder and emit each folder.`);
|
|
100
114
|
}
|
|
101
|
-
lines.push('Do NOT scan any other mailbox folders.');
|
|
102
115
|
if (isDefault) {
|
|
103
116
|
lines.push('(Note: no project-specific folders configured — defaulting to Inbox+subfolders. For faster, more accurate results, populate emailContext.folders in m365-auth.json.)');
|
|
104
117
|
}
|
|
@@ -188,12 +201,14 @@ function applyRows(source, rows, currentBounds, currentInteg) {
|
|
|
188
201
|
return { boundariesPatch: added.length ? { onenote: { section_file_ids: merged } } : null, accepted };
|
|
189
202
|
}
|
|
190
203
|
if (source === 'sharepoint') {
|
|
191
|
-
|
|
204
|
+
// v5.9.0: SP sites are project-wide, not per-alias. Write into integrations.yml.
|
|
205
|
+
const existing = currentInteg.sharepoint?.sites || [];
|
|
192
206
|
const incoming = rows.map(r => r.site_url).filter(Boolean);
|
|
193
207
|
const merged = dedup([...existing, ...incoming]);
|
|
194
208
|
const added = merged.filter(v => !existing.includes(v));
|
|
195
209
|
if (added.length) accepted.push(...added);
|
|
196
|
-
|
|
210
|
+
const cur = currentInteg.sharepoint || {};
|
|
211
|
+
return { integrationsPatch: added.length ? { sharepoint: { ...cur, sites: merged } } : null, accepted };
|
|
197
212
|
}
|
|
198
213
|
if (source === 'crm') {
|
|
199
214
|
const cur = currentInteg.crm || {};
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// plugin/runners/lib/references.mjs
|
|
2
|
+
// Deterministic URL extraction, classification, and lightweight HTTP snapshot
|
|
3
|
+
// for the unified references pool. No LLM. v5.9.0.
|
|
4
|
+
|
|
5
|
+
import { promises as fs } from 'node:fs';
|
|
6
|
+
import path from 'node:path';
|
|
7
|
+
import crypto from 'node:crypto';
|
|
8
|
+
|
|
9
|
+
/** Permissive URL regex. Captures http(s) URLs in markdown / yaml / plain text. */
|
|
10
|
+
const URL_RE = /\bhttps?:\/\/[^\s<>"'`)\]}|\\]+/gi;
|
|
11
|
+
|
|
12
|
+
/** Trailing punctuation that is almost never part of a URL. */
|
|
13
|
+
const TRAILING_TRIM = /[)\].,;:!?>'"]+$/;
|
|
14
|
+
|
|
15
|
+
export function extractUrls(text) {
|
|
16
|
+
if (!text || typeof text !== 'string') return [];
|
|
17
|
+
const out = new Set();
|
|
18
|
+
const matches = text.match(URL_RE) || [];
|
|
19
|
+
for (let m of matches) {
|
|
20
|
+
m = m.replace(TRAILING_TRIM, '');
|
|
21
|
+
if (m.length > 8) out.add(m);
|
|
22
|
+
}
|
|
23
|
+
return [...out];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Stable sha1 of normalized URL (for filenames + index keys). */
|
|
27
|
+
export function urlHash(url) {
|
|
28
|
+
return crypto.createHash('sha1').update(normalizeUrl(url)).digest('hex').slice(0, 16);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Strip fragment + common tracking query params. Keep path/query semantically. */
|
|
32
|
+
export function normalizeUrl(url) {
|
|
33
|
+
try {
|
|
34
|
+
const u = new URL(url);
|
|
35
|
+
u.hash = '';
|
|
36
|
+
const drop = ['utm_source','utm_medium','utm_campaign','utm_term','utm_content','wt.mc_id'];
|
|
37
|
+
for (const k of drop) u.searchParams.delete(k);
|
|
38
|
+
return u.toString();
|
|
39
|
+
} catch {
|
|
40
|
+
return url;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const HOST_RULES = [
|
|
45
|
+
{ match: /(^|\.)sharepoint\.com$/i, host: 'sharepoint.com', kind: 'sharepoint', authRequired: true },
|
|
46
|
+
{ match: /(^|\.)loop\.microsoft\.com$/i, host: 'loop.microsoft.com', kind: 'loop', authRequired: true },
|
|
47
|
+
{ match: /loop\.cloud\.microsoft$/i, host: 'loop.cloud.microsoft', kind: 'loop', authRequired: true },
|
|
48
|
+
{ match: /loop-api\.cloud\.microsoft$/i, host: 'loop-api.cloud.microsoft', kind: 'loop', authRequired: true },
|
|
49
|
+
{ match: /(^|\.)fluidpreview\.office\.net$/i, host: 'fluidpreview.office.net', kind: 'loop', authRequired: true },
|
|
50
|
+
{ match: /(^|\.)teams\.microsoft\.com$/i, host: 'teams.microsoft.com', kind: 'teams', authRequired: true },
|
|
51
|
+
{ match: /(^|\.)office\.com$/i, host: 'office.com', kind: 'office', authRequired: true },
|
|
52
|
+
{ match: /(^|\.)dev\.azure\.com$/i, host: 'dev.azure.com', kind: 'ado', authRequired: true },
|
|
53
|
+
{ match: /(^|\.)visualstudio\.com$/i, host: 'visualstudio.com', kind: 'ado', authRequired: true },
|
|
54
|
+
{ match: /(^|\.)dynamics\.com$/i, host: 'dynamics.com', kind: 'crm', authRequired: true },
|
|
55
|
+
{ match: /(^|\.)learn\.microsoft\.com$/i, host: 'learn.microsoft.com', kind: 'docs', authRequired: false },
|
|
56
|
+
{ match: /(^|\.)docs\.microsoft\.com$/i, host: 'docs.microsoft.com', kind: 'docs', authRequired: false },
|
|
57
|
+
{ match: /(^|\.)github\.com$/i, host: 'github.com', kind: 'repo', authRequired: false },
|
|
58
|
+
];
|
|
59
|
+
|
|
60
|
+
/** Classify a URL into { host, kind, authRequired }. Falls back to "external". */
|
|
61
|
+
export function classify(url) {
|
|
62
|
+
let host = '';
|
|
63
|
+
try { host = new URL(url).hostname.toLowerCase(); } catch { return { host: 'unknown', kind: 'invalid', authRequired: false }; }
|
|
64
|
+
for (const r of HOST_RULES) {
|
|
65
|
+
if (r.match.test(host)) return { host: r.host, kind: r.kind, authRequired: r.authRequired };
|
|
66
|
+
}
|
|
67
|
+
return { host, kind: 'external', authRequired: false };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Safe filename segment for hosts. */
|
|
71
|
+
export function safeHost(host) {
|
|
72
|
+
return (host || 'unknown').toLowerCase().replace(/[^a-z0-9.-]/g, '_').slice(0, 80);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Fetch a URL and extract a small content snapshot.
|
|
77
|
+
* Returns { ok, status, title, description, h1, contentType, bytes, snippet }.
|
|
78
|
+
* Keeps payload bounded (<= maxBytes default 64KB).
|
|
79
|
+
*/
|
|
80
|
+
export async function fetchSnapshot(url, { timeoutMs = 15000, maxBytes = 64 * 1024 } = {}) {
|
|
81
|
+
const ctrl = new AbortController();
|
|
82
|
+
const timer = setTimeout(() => ctrl.abort(), timeoutMs);
|
|
83
|
+
try {
|
|
84
|
+
const res = await fetch(url, {
|
|
85
|
+
redirect: 'follow',
|
|
86
|
+
signal: ctrl.signal,
|
|
87
|
+
headers: { 'user-agent': 'kushi-references/1.0 (+https://github.com/ushakrishnan/kushi)' },
|
|
88
|
+
});
|
|
89
|
+
const contentType = res.headers.get('content-type') || '';
|
|
90
|
+
const reader = res.body?.getReader();
|
|
91
|
+
let received = 0;
|
|
92
|
+
const chunks = [];
|
|
93
|
+
if (reader) {
|
|
94
|
+
while (received < maxBytes) {
|
|
95
|
+
const { done, value } = await reader.read();
|
|
96
|
+
if (done) break;
|
|
97
|
+
chunks.push(value);
|
|
98
|
+
received += value.byteLength;
|
|
99
|
+
}
|
|
100
|
+
try { reader.cancel(); } catch {}
|
|
101
|
+
}
|
|
102
|
+
const buf = Buffer.concat(chunks.map(c => Buffer.from(c)));
|
|
103
|
+
const text = buf.toString('utf8');
|
|
104
|
+
const html = /html|xml/i.test(contentType) || /^\s*<!doctype html|<html/i.test(text);
|
|
105
|
+
const title = html ? extractTag(text, 'title') : '';
|
|
106
|
+
const description = html ? extractMeta(text, 'description') : '';
|
|
107
|
+
const h1 = html ? extractTag(text, 'h1') : '';
|
|
108
|
+
const snippet = html ? stripHtml(text).slice(0, 600) : text.slice(0, 600);
|
|
109
|
+
return {
|
|
110
|
+
ok: res.ok,
|
|
111
|
+
status: res.status,
|
|
112
|
+
title: clean(title),
|
|
113
|
+
description: clean(description),
|
|
114
|
+
h1: clean(h1),
|
|
115
|
+
contentType,
|
|
116
|
+
bytes: received,
|
|
117
|
+
snippet: clean(snippet),
|
|
118
|
+
};
|
|
119
|
+
} catch (e) {
|
|
120
|
+
return { ok: false, status: 0, error: e.name === 'AbortError' ? 'timeout' : (e.code || e.message) };
|
|
121
|
+
} finally {
|
|
122
|
+
clearTimeout(timer);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function extractTag(html, tag) {
|
|
127
|
+
const m = html.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i'));
|
|
128
|
+
return m ? m[1] : '';
|
|
129
|
+
}
|
|
130
|
+
function extractMeta(html, name) {
|
|
131
|
+
const m = html.match(new RegExp(`<meta[^>]+name=["']${name}["'][^>]*content=["']([^"']+)["']`, 'i'))
|
|
132
|
+
|| html.match(new RegExp(`<meta[^>]+property=["']og:${name}["'][^>]*content=["']([^"']+)["']`, 'i'));
|
|
133
|
+
return m ? m[1] : '';
|
|
134
|
+
}
|
|
135
|
+
function stripHtml(s) {
|
|
136
|
+
return s.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
137
|
+
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
138
|
+
.replace(/<[^>]+>/g, ' ')
|
|
139
|
+
.replace(/\s+/g, ' ')
|
|
140
|
+
.trim();
|
|
141
|
+
}
|
|
142
|
+
function clean(s) {
|
|
143
|
+
return (s || '').replace(/\s+/g, ' ').trim().slice(0, 800);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Walk a directory recursively, returning files matching `extensions`. */
|
|
147
|
+
export async function walkFiles(dir, { extensions = ['.yml','.yaml','.md','.txt','.json'], skipDirs = ['_shared/references','node_modules','.git'] } = {}) {
|
|
148
|
+
const out = [];
|
|
149
|
+
async function walk(d) {
|
|
150
|
+
let entries;
|
|
151
|
+
try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
|
|
152
|
+
for (const e of entries) {
|
|
153
|
+
const full = path.join(d, e.name);
|
|
154
|
+
if (e.isDirectory()) {
|
|
155
|
+
const skip = skipDirs.some(s => full.replaceAll('\\','/').includes(s));
|
|
156
|
+
if (!skip) await walk(full);
|
|
157
|
+
} else if (e.isFile()) {
|
|
158
|
+
if (extensions.includes(path.extname(e.name).toLowerCase())) out.push(full);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
await walk(dir);
|
|
163
|
+
return out;
|
|
164
|
+
}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// plugin/runners/pull-references.mjs
|
|
3
|
+
// Unified references pool. Scans Evidence/ for URLs, dedupes against an index,
|
|
4
|
+
// classifies by host, snapshots external URLs via HTTP. Auth-protected URLs
|
|
5
|
+
// (SP/Loop/Teams/ADO/CRM) are recorded with metadata only and marked
|
|
6
|
+
// `pending-auth-fetch` for follow-up by source-specific pulls.
|
|
7
|
+
//
|
|
8
|
+
// Project-shared. Not dated. One snapshot per URL. Re-crawl with --refresh.
|
|
9
|
+
//
|
|
10
|
+
// Usage:
|
|
11
|
+
// node plugin/runners/pull-references.mjs --project <P> [--refresh] [--dry-run]
|
|
12
|
+
// [--timeout-ms N] [--max-fetch N] [--only-host <h>]
|
|
13
|
+
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
import { promises as fs } from 'node:fs';
|
|
16
|
+
import YAML from 'yaml';
|
|
17
|
+
import { evidenceRoot, sharedRoot, projectRoot } from './lib/layout.mjs';
|
|
18
|
+
import { writeAtomic, pathExists } from './lib/evidence.mjs';
|
|
19
|
+
import { extractUrls, urlHash, normalizeUrl, classify, safeHost, fetchSnapshot, walkFiles } from './lib/references.mjs';
|
|
20
|
+
|
|
21
|
+
function parseArgs(argv) {
|
|
22
|
+
const args = { dryRun: false, refresh: false, timeoutMs: 15000, maxFetch: 50 };
|
|
23
|
+
for (let i = 0; i < argv.length; i++) {
|
|
24
|
+
const a = argv[i];
|
|
25
|
+
if (a === '--project') args.project = argv[++i];
|
|
26
|
+
else if (a === '--refresh') args.refresh = true;
|
|
27
|
+
else if (a === '--dry-run') args.dryRun = true;
|
|
28
|
+
else if (a === '--timeout-ms') args.timeoutMs = Number(argv[++i]) || 15000;
|
|
29
|
+
else if (a === '--max-fetch') args.maxFetch = Number(argv[++i]) || 50;
|
|
30
|
+
else if (a === '--only-host') args.onlyHost = argv[++i];
|
|
31
|
+
else if (a === '--help' || a === '-h') args.help = true;
|
|
32
|
+
}
|
|
33
|
+
return args;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function help() {
|
|
37
|
+
return `Usage: node pull-references.mjs --project <P> [--refresh] [--dry-run] [--timeout-ms N] [--max-fetch N] [--only-host <h>]`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
|
|
41
|
+
function log(msg) { process.stderr.write(`[references] ${msg}\n`); }
|
|
42
|
+
|
|
43
|
+
function refsRoot(project) { return path.join(sharedRoot(project), 'references'); }
|
|
44
|
+
function indexPath(project) { return path.join(refsRoot(project), 'index.yml'); }
|
|
45
|
+
function recordPath(project, host, hash) {
|
|
46
|
+
return path.join(refsRoot(project), 'by-host', safeHost(host), `${hash}.md`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function loadIndex(p) {
|
|
50
|
+
if (!await pathExists(p)) return { version: 1, entries: {} };
|
|
51
|
+
try { return YAML.parse(await fs.readFile(p, 'utf8')) || { version: 1, entries: {} }; }
|
|
52
|
+
catch { return { version: 1, entries: {} }; }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function recordTemplate({ url, host, kind, authRequired, firstSeen, sourceFiles, snapshot }) {
|
|
56
|
+
const fm = {
|
|
57
|
+
url,
|
|
58
|
+
normalized_url: normalizeUrl(url),
|
|
59
|
+
host,
|
|
60
|
+
kind,
|
|
61
|
+
auth_required: authRequired,
|
|
62
|
+
first_seen: firstSeen,
|
|
63
|
+
last_crawled: snapshot?.crawledAt || null,
|
|
64
|
+
fetch_status: snapshot?.fetch_status || (authRequired ? 'pending-auth-fetch' : 'unfetched'),
|
|
65
|
+
http_status: snapshot?.status ?? null,
|
|
66
|
+
title: snapshot?.title || '',
|
|
67
|
+
description: snapshot?.description || '',
|
|
68
|
+
source_files: sourceFiles.slice(0, 20),
|
|
69
|
+
};
|
|
70
|
+
const yamlFm = YAML.stringify(fm).trimEnd();
|
|
71
|
+
const body = snapshot?.snippet
|
|
72
|
+
? `\n## Snippet\n\n${snapshot.snippet}\n`
|
|
73
|
+
: (authRequired
|
|
74
|
+
? `\n_Content fetch deferred — this URL requires Microsoft 365 authentication. The matching source-specific pull (sharepoint/loop/teams/ado/crm) will populate richer evidence._\n`
|
|
75
|
+
: `\n_No snapshot captured._\n`);
|
|
76
|
+
return `---\n${yamlFm}\n---\n${body}`;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function main() {
|
|
80
|
+
const args = parseArgs(process.argv.slice(2));
|
|
81
|
+
if (args.help) { console.log(help()); return 0; }
|
|
82
|
+
if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
|
|
83
|
+
|
|
84
|
+
const root = projectRoot(args.project);
|
|
85
|
+
if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
|
|
86
|
+
|
|
87
|
+
const evRoot = evidenceRoot(root);
|
|
88
|
+
if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
|
|
89
|
+
|
|
90
|
+
log(`scanning ${evRoot} for URLs...`);
|
|
91
|
+
const files = await walkFiles(evRoot);
|
|
92
|
+
log(`scanning ${files.length} file(s)...`);
|
|
93
|
+
|
|
94
|
+
/** url(normalized) → { url, sourceFiles:Set } */
|
|
95
|
+
const found = new Map();
|
|
96
|
+
for (const f of files) {
|
|
97
|
+
let txt = '';
|
|
98
|
+
try { txt = await fs.readFile(f, 'utf8'); } catch { continue; }
|
|
99
|
+
for (const u of extractUrls(txt)) {
|
|
100
|
+
const k = normalizeUrl(u);
|
|
101
|
+
const rel = path.relative(root, f);
|
|
102
|
+
if (!found.has(k)) found.set(k, { url: u, sourceFiles: new Set() });
|
|
103
|
+
found.get(k).sourceFiles.add(rel);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
log(`found ${found.size} unique URL(s)`);
|
|
107
|
+
|
|
108
|
+
const idxFile = indexPath(root);
|
|
109
|
+
const index = await loadIndex(idxFile);
|
|
110
|
+
index.entries ??= {};
|
|
111
|
+
|
|
112
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
113
|
+
const tasks = [];
|
|
114
|
+
let newCount = 0, refreshCount = 0, skippedCount = 0;
|
|
115
|
+
|
|
116
|
+
for (const [normalized, { url, sourceFiles }] of found.entries()) {
|
|
117
|
+
if (args.onlyHost) {
|
|
118
|
+
const c = classify(url);
|
|
119
|
+
if (c.host !== args.onlyHost) { skippedCount++; continue; }
|
|
120
|
+
}
|
|
121
|
+
const hash = urlHash(url);
|
|
122
|
+
const existing = index.entries[hash];
|
|
123
|
+
const isNew = !existing;
|
|
124
|
+
const needsRefresh = !isNew && args.refresh;
|
|
125
|
+
if (!isNew && !needsRefresh) {
|
|
126
|
+
// Update source_files only.
|
|
127
|
+
const merged = new Set([...(existing.source_files || []), ...sourceFiles]);
|
|
128
|
+
existing.source_files = [...merged].slice(0, 20);
|
|
129
|
+
existing.last_seen = today;
|
|
130
|
+
skippedCount++;
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
const c = classify(url);
|
|
134
|
+
tasks.push({ url, normalized, hash, sourceFiles: [...sourceFiles], cls: c, isNew });
|
|
135
|
+
if (isNew) newCount++; else refreshCount++;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Cap fetches per run.
|
|
139
|
+
const fetchable = tasks.filter(t => !t.cls.authRequired && t.cls.kind !== 'invalid').slice(0, args.maxFetch);
|
|
140
|
+
const fetchKeys = new Set(fetchable.map(t => t.hash));
|
|
141
|
+
log(`new: ${newCount}, refresh: ${refreshCount}, skip-existing: ${skippedCount}, will fetch: ${fetchable.length}`);
|
|
142
|
+
|
|
143
|
+
let written = 0, fetched = 0, fetchOk = 0;
|
|
144
|
+
for (const t of tasks) {
|
|
145
|
+
let snap = null;
|
|
146
|
+
if (fetchKeys.has(t.hash)) {
|
|
147
|
+
log(` fetch ${t.cls.host}: ${t.url.slice(0, 100)}`);
|
|
148
|
+
snap = await fetchSnapshot(t.url, { timeoutMs: args.timeoutMs });
|
|
149
|
+
snap.crawledAt = today;
|
|
150
|
+
snap.fetch_status = snap.ok ? 'fetched' : `fetch-failed:${snap.error || snap.status}`;
|
|
151
|
+
fetched++;
|
|
152
|
+
if (snap.ok) fetchOk++;
|
|
153
|
+
}
|
|
154
|
+
const firstSeen = index.entries[t.hash]?.first_seen || today;
|
|
155
|
+
const record = recordTemplate({
|
|
156
|
+
url: t.url,
|
|
157
|
+
host: t.cls.host,
|
|
158
|
+
kind: t.cls.kind,
|
|
159
|
+
authRequired: t.cls.authRequired,
|
|
160
|
+
firstSeen,
|
|
161
|
+
sourceFiles: t.sourceFiles,
|
|
162
|
+
snapshot: snap,
|
|
163
|
+
});
|
|
164
|
+
const recPath = recordPath(root, t.cls.host, t.hash);
|
|
165
|
+
if (!args.dryRun) {
|
|
166
|
+
const r = await writeAtomic(recPath, record, { skipIfUnchanged: true });
|
|
167
|
+
if (r.written) written++;
|
|
168
|
+
}
|
|
169
|
+
index.entries[t.hash] = {
|
|
170
|
+
url: t.url,
|
|
171
|
+
normalized_url: t.normalized,
|
|
172
|
+
host: t.cls.host,
|
|
173
|
+
kind: t.cls.kind,
|
|
174
|
+
auth_required: t.cls.authRequired,
|
|
175
|
+
first_seen: firstSeen,
|
|
176
|
+
last_seen: today,
|
|
177
|
+
last_crawled: snap?.crawledAt || index.entries[t.hash]?.last_crawled || null,
|
|
178
|
+
fetch_status: snap?.fetch_status || index.entries[t.hash]?.fetch_status || (t.cls.authRequired ? 'pending-auth-fetch' : 'unfetched'),
|
|
179
|
+
http_status: snap?.status ?? index.entries[t.hash]?.http_status ?? null,
|
|
180
|
+
title: snap?.title || index.entries[t.hash]?.title || '',
|
|
181
|
+
record_path: path.relative(root, recPath).replaceAll('\\', '/'),
|
|
182
|
+
source_files: t.sourceFiles.slice(0, 20),
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (!args.dryRun) {
|
|
187
|
+
await writeAtomic(idxFile, YAML.stringify(index), { skipIfUnchanged: true });
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
emit({
|
|
191
|
+
status: 'ok',
|
|
192
|
+
project: root,
|
|
193
|
+
dry_run: args.dryRun,
|
|
194
|
+
scanned_files: files.length,
|
|
195
|
+
urls_total: found.size,
|
|
196
|
+
new: newCount,
|
|
197
|
+
refresh: refreshCount,
|
|
198
|
+
fetched,
|
|
199
|
+
fetch_ok: fetchOk,
|
|
200
|
+
written,
|
|
201
|
+
index: path.relative(root, idxFile).replaceAll('\\', '/'),
|
|
202
|
+
});
|
|
203
|
+
return 0;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
main().then(code => process.exit(code || 0)).catch(e => {
|
|
207
|
+
emit({ status: 'failed', error: e.message || String(e) });
|
|
208
|
+
process.exit(1);
|
|
209
|
+
});
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// plugin/runners/pull-state.mjs
|
|
3
|
+
// Deterministic State/ generator. Inventories Evidence/ and produces:
|
|
4
|
+
// State/index.md — TOC pointing at every evidence file by source/week
|
|
5
|
+
// State/log.md — chronological run + evidence ledger
|
|
6
|
+
// State/CLAUDE.md — host-agnostic project context (project name, sources, alias inventory)
|
|
7
|
+
// State/AGENTS.md — alias of CLAUDE.md for OpenAI-flavored hosts
|
|
8
|
+
//
|
|
9
|
+
// This runner does NOT do narrative synthesis — that is the build-state LLM
|
|
10
|
+
// skill's job. This produces the structural skeleton that makes the LLM
|
|
11
|
+
// skill's work cheaper and reproducible. v5.9.0.
|
|
12
|
+
//
|
|
13
|
+
// Usage:
|
|
14
|
+
// node plugin/runners/pull-state.mjs --project <P> [--dry-run] [--include-legacy]
|
|
15
|
+
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
import { promises as fs } from 'node:fs';
|
|
18
|
+
import YAML from 'yaml';
|
|
19
|
+
import { evidenceRoot, projectRoot, sharedRoot } from './lib/layout.mjs';
|
|
20
|
+
import { writeAtomic, pathExists } from './lib/evidence.mjs';
|
|
21
|
+
|
|
22
|
+
function parseArgs(argv) {
|
|
23
|
+
const args = { dryRun: false, includeLegacy: true };
|
|
24
|
+
for (let i = 0; i < argv.length; i++) {
|
|
25
|
+
const a = argv[i];
|
|
26
|
+
if (a === '--project') args.project = argv[++i];
|
|
27
|
+
else if (a === '--dry-run') args.dryRun = true;
|
|
28
|
+
else if (a === '--no-legacy') args.includeLegacy = false;
|
|
29
|
+
else if (a === '--help' || a === '-h') args.help = true;
|
|
30
|
+
}
|
|
31
|
+
return args;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function help() {
|
|
35
|
+
return `Usage: node pull-state.mjs --project <P> [--dry-run] [--no-legacy]`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
|
|
39
|
+
function log(msg) { process.stderr.write(`[state] ${msg}\n`); }
|
|
40
|
+
|
|
41
|
+
const SOURCES = ['email', 'teams', 'meetings', 'onenote', 'sharepoint', 'crm', 'ado'];
|
|
42
|
+
|
|
43
|
+
async function listDirs(p) {
|
|
44
|
+
try {
|
|
45
|
+
return (await fs.readdir(p, { withFileTypes: true })).filter(e => e.isDirectory()).map(e => e.name);
|
|
46
|
+
} catch { return []; }
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function listFiles(p, exts = ['.md', '.yml']) {
|
|
50
|
+
try {
|
|
51
|
+
return (await fs.readdir(p, { withFileTypes: true }))
|
|
52
|
+
.filter(e => e.isFile() && exts.includes(path.extname(e.name).toLowerCase()))
|
|
53
|
+
.map(e => e.name);
|
|
54
|
+
} catch { return []; }
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Inventory one alias's source folder. Returns { weekly, snapshot, stream, index, total }. */
|
|
58
|
+
async function inventoryAliasSource(aliasSourceDir) {
|
|
59
|
+
const result = { weekly: [], snapshot: [], stream: [], index: [], total: 0 };
|
|
60
|
+
for (const layout of ['weekly', 'snapshot', 'stream', '_index']) {
|
|
61
|
+
const dir = path.join(aliasSourceDir, layout);
|
|
62
|
+
const files = await listFiles(dir);
|
|
63
|
+
const key = layout === '_index' ? 'index' : layout;
|
|
64
|
+
result[key] = files.map(f => path.join(layout, f)).sort();
|
|
65
|
+
result.total += files.length;
|
|
66
|
+
}
|
|
67
|
+
return result;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Inventory shared sources (crm, ado, references). */
|
|
71
|
+
async function inventoryShared(project) {
|
|
72
|
+
const out = {};
|
|
73
|
+
const shared = sharedRoot(project);
|
|
74
|
+
for (const sub of ['crm', 'ado', 'references']) {
|
|
75
|
+
const dir = path.join(shared, sub);
|
|
76
|
+
if (!await pathExists(dir)) continue;
|
|
77
|
+
out[sub] = await listFilesRecursive(dir, shared);
|
|
78
|
+
}
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function listFilesRecursive(dir, base) {
|
|
83
|
+
const out = [];
|
|
84
|
+
async function walk(d) {
|
|
85
|
+
let entries;
|
|
86
|
+
try { entries = await fs.readdir(d, { withFileTypes: true }); } catch { return; }
|
|
87
|
+
for (const e of entries) {
|
|
88
|
+
const full = path.join(d, e.name);
|
|
89
|
+
if (e.isDirectory()) await walk(full);
|
|
90
|
+
else if (e.isFile() && ['.md','.yml'].includes(path.extname(e.name).toLowerCase())) {
|
|
91
|
+
out.push(path.relative(base, full).replaceAll('\\','/'));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
await walk(dir);
|
|
96
|
+
return out.sort();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async function readIntegrations(project) {
|
|
100
|
+
const p = path.join(projectRoot(project), 'integrations.yml');
|
|
101
|
+
if (!await pathExists(p)) return {};
|
|
102
|
+
try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function readBoundaries(project, alias) {
|
|
106
|
+
const p = path.join(evidenceRoot(project), alias, 'boundaries.yml');
|
|
107
|
+
if (!await pathExists(p)) return {};
|
|
108
|
+
try { return YAML.parse(await fs.readFile(p, 'utf8')) || {}; } catch { return {}; }
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function fmtSection(title, lines) {
|
|
112
|
+
return [`## ${title}`, '', ...lines, ''].join('\n');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function buildIndex({ projectName, integrations, aliases, shared, generatedAt }) {
|
|
116
|
+
const lines = [];
|
|
117
|
+
lines.push(`# ${projectName} — State Index`);
|
|
118
|
+
lines.push('');
|
|
119
|
+
lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}.`);
|
|
120
|
+
lines.push('');
|
|
121
|
+
lines.push('Mechanical inventory only. For narrative synthesis, use the `build-state` skill.');
|
|
122
|
+
lines.push('');
|
|
123
|
+
|
|
124
|
+
const integLines = [];
|
|
125
|
+
if (integrations.crm?.request_id) integLines.push(`- **CRM**: \`${integrations.crm.request_id}\``);
|
|
126
|
+
if (integrations.ado?.engagement_id) integLines.push(`- **ADO**: \`${integrations.ado.engagement_id}\``);
|
|
127
|
+
const spSites = integrations.sharepoint?.sites || [];
|
|
128
|
+
if (spSites.length) {
|
|
129
|
+
integLines.push(`- **SharePoint sites** (${spSites.length}):`);
|
|
130
|
+
for (const s of spSites.slice(0, 10)) integLines.push(` - ${s}`);
|
|
131
|
+
}
|
|
132
|
+
if (integLines.length) lines.push(fmtSection('Integrations (project-shared)', integLines));
|
|
133
|
+
|
|
134
|
+
// Shared evidence
|
|
135
|
+
const sharedLines = [];
|
|
136
|
+
for (const [src, files] of Object.entries(shared)) {
|
|
137
|
+
if (!files.length) continue;
|
|
138
|
+
sharedLines.push(`### ${src} (${files.length} file${files.length === 1 ? '' : 's'})`);
|
|
139
|
+
sharedLines.push('');
|
|
140
|
+
for (const f of files.slice(0, 30)) sharedLines.push(`- \`Evidence/_shared/${f}\``);
|
|
141
|
+
if (files.length > 30) sharedLines.push(`- _… and ${files.length - 30} more_`);
|
|
142
|
+
sharedLines.push('');
|
|
143
|
+
}
|
|
144
|
+
if (sharedLines.length) lines.push(fmtSection('Shared Evidence', sharedLines));
|
|
145
|
+
|
|
146
|
+
// Per-alias
|
|
147
|
+
for (const a of aliases) {
|
|
148
|
+
const aliasLines = [];
|
|
149
|
+
aliasLines.push(`Boundaries:`);
|
|
150
|
+
for (const [k, v] of Object.entries(a.boundaries || {})) {
|
|
151
|
+
const arr = Array.isArray(v) ? v : (v?.folders || v?.chats || v?.joinUrls || v?.section_file_ids || v?.sites || []);
|
|
152
|
+
if (Array.isArray(arr) && arr.length) aliasLines.push(` - ${k}: ${arr.length} item(s)`);
|
|
153
|
+
}
|
|
154
|
+
aliasLines.push('');
|
|
155
|
+
for (const [src, inv] of Object.entries(a.sources || {})) {
|
|
156
|
+
if (inv.total === 0) continue;
|
|
157
|
+
aliasLines.push(`**${src}** — ${inv.total} file(s) (weekly: ${inv.weekly.length}, snapshot: ${inv.snapshot.length}, stream: ${inv.stream.length}, index: ${inv.index.length})`);
|
|
158
|
+
const all = [...inv.weekly, ...inv.snapshot, ...inv.stream, ...inv.index];
|
|
159
|
+
for (const f of all.slice(0, 12)) aliasLines.push(`- \`Evidence/${a.alias}/${src}/${f}\``);
|
|
160
|
+
if (all.length > 12) aliasLines.push(`- _… and ${all.length - 12} more_`);
|
|
161
|
+
aliasLines.push('');
|
|
162
|
+
}
|
|
163
|
+
lines.push(fmtSection(`Contributor: ${a.alias}`, aliasLines));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return lines.join('\n').replace(/\n{3,}/g, '\n\n');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function buildLog({ projectName, runLog, generatedAt }) {
|
|
170
|
+
const lines = [];
|
|
171
|
+
lines.push(`# ${projectName} — Run Log`);
|
|
172
|
+
lines.push('');
|
|
173
|
+
lines.push(`Generated by \`pull-state.mjs\` on ${generatedAt}. Reflects \`Evidence/run-log.yml\`.`);
|
|
174
|
+
lines.push('');
|
|
175
|
+
if (!runLog || !Array.isArray(runLog.entries) || runLog.entries.length === 0) {
|
|
176
|
+
lines.push('_No run-log entries yet._');
|
|
177
|
+
return lines.join('\n');
|
|
178
|
+
}
|
|
179
|
+
const entries = [...runLog.entries].sort((a, b) => String(b.timestamp || '').localeCompare(String(a.timestamp || '')));
|
|
180
|
+
for (const e of entries.slice(0, 100)) {
|
|
181
|
+
const ts = e.timestamp || '?';
|
|
182
|
+
const status = e.status || '?';
|
|
183
|
+
const src = e.source || '?';
|
|
184
|
+
const ent = e.entity ? ` \`${e.entity}\`` : '';
|
|
185
|
+
const wk = e.week ? ` (week ${e.week})` : '';
|
|
186
|
+
lines.push(`- **${ts}** — ${src}${ent}${wk} → \`${status}\``);
|
|
187
|
+
}
|
|
188
|
+
if (entries.length > 100) lines.push(`\n_Showing 100 of ${entries.length} entries._`);
|
|
189
|
+
return lines.join('\n');
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function buildClaude({ projectName, integrations, aliases, shared }) {
|
|
193
|
+
const lines = [];
|
|
194
|
+
lines.push(`# ${projectName} — Project Context`);
|
|
195
|
+
lines.push('');
|
|
196
|
+
lines.push('Auto-generated by `pull-state.mjs`. This file gives any AI agent (Claude, Copilot, etc.) the minimal facts to be useful in this engagement.');
|
|
197
|
+
lines.push('');
|
|
198
|
+
lines.push('## Project');
|
|
199
|
+
lines.push(`- Name: \`${projectName}\``);
|
|
200
|
+
if (integrations.crm?.request_id) lines.push(`- CRM request: \`${integrations.crm.request_id}\``);
|
|
201
|
+
if (integrations.ado?.engagement_id) lines.push(`- ADO engagement: \`${integrations.ado.engagement_id}\``);
|
|
202
|
+
if (integrations.sharepoint?.sites?.length) {
|
|
203
|
+
lines.push(`- SharePoint sites: ${integrations.sharepoint.sites.length}`);
|
|
204
|
+
}
|
|
205
|
+
lines.push('');
|
|
206
|
+
lines.push('## Contributors');
|
|
207
|
+
for (const a of aliases) {
|
|
208
|
+
const totalFiles = Object.values(a.sources || {}).reduce((s, inv) => s + (inv.total || 0), 0);
|
|
209
|
+
lines.push(`- \`${a.alias}\`: ${totalFiles} evidence file(s)`);
|
|
210
|
+
}
|
|
211
|
+
lines.push('');
|
|
212
|
+
lines.push('## Where things live');
|
|
213
|
+
lines.push('- Per-contributor evidence: `Evidence/<alias>/<source>/...`');
|
|
214
|
+
lines.push('- Shared evidence: `Evidence/_shared/{crm,ado,references}/`');
|
|
215
|
+
lines.push('- Project-wide config: `integrations.yml`');
|
|
216
|
+
lines.push('- This index: `State/index.md`, `State/log.md`');
|
|
217
|
+
lines.push('');
|
|
218
|
+
lines.push('## Doctrine');
|
|
219
|
+
lines.push('- Cite every claim. Use the form `[source: <relative-path> · <iso-ts>]`.');
|
|
220
|
+
lines.push('- Read-only Q&A: see the `ask-project` skill.');
|
|
221
|
+
lines.push('- Refresh + state regen: `kushi refresh <project>` then `kushi state <project>`.');
|
|
222
|
+
return lines.join('\n');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function main() {
|
|
226
|
+
const args = parseArgs(process.argv.slice(2));
|
|
227
|
+
if (args.help) { console.log(help()); return 0; }
|
|
228
|
+
if (!args.project) { console.error(help()); emit({ status: 'failed', error: 'required: --project' }); return 2; }
|
|
229
|
+
|
|
230
|
+
const root = projectRoot(args.project);
|
|
231
|
+
if (!await pathExists(root)) { emit({ status: 'failed', error: `project-not-bootstrapped: ${root}` }); return 2; }
|
|
232
|
+
const evRoot = evidenceRoot(root);
|
|
233
|
+
if (!await pathExists(evRoot)) { emit({ status: 'failed', error: `evidence-missing: ${evRoot}` }); return 2; }
|
|
234
|
+
|
|
235
|
+
const projectName = path.basename(root);
|
|
236
|
+
const generatedAt = new Date().toISOString();
|
|
237
|
+
|
|
238
|
+
log(`scanning ${evRoot}...`);
|
|
239
|
+
const integrations = await readIntegrations(root);
|
|
240
|
+
const shared = await inventoryShared(root);
|
|
241
|
+
|
|
242
|
+
// Per-alias inventory
|
|
243
|
+
const dirs = await listDirs(evRoot);
|
|
244
|
+
const aliasNames = dirs.filter(d => !d.startsWith('_'));
|
|
245
|
+
const aliases = [];
|
|
246
|
+
for (const alias of aliasNames) {
|
|
247
|
+
const aliasDir = path.join(evRoot, alias);
|
|
248
|
+
const sources = {};
|
|
249
|
+
for (const src of SOURCES) {
|
|
250
|
+
const srcDir = path.join(aliasDir, src);
|
|
251
|
+
if (!await pathExists(srcDir)) { sources[src] = { weekly:[], snapshot:[], stream:[], index:[], total:0 }; continue; }
|
|
252
|
+
sources[src] = await inventoryAliasSource(srcDir);
|
|
253
|
+
}
|
|
254
|
+
aliases.push({ alias, boundaries: await readBoundaries(root, alias), sources });
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// run-log.yml
|
|
258
|
+
let runLog = {};
|
|
259
|
+
const runLogPath = path.join(evRoot, 'run-log.yml');
|
|
260
|
+
if (await pathExists(runLogPath)) {
|
|
261
|
+
try { runLog = YAML.parse(await fs.readFile(runLogPath, 'utf8')) || {}; } catch { runLog = {}; }
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const stateDir = path.join(root, 'State');
|
|
265
|
+
const indexMd = buildIndex({ projectName, integrations, aliases, shared, generatedAt });
|
|
266
|
+
const logMd = buildLog({ projectName, runLog, generatedAt });
|
|
267
|
+
const claudeMd = buildClaude({ projectName, integrations, aliases, shared });
|
|
268
|
+
|
|
269
|
+
const writes = [];
|
|
270
|
+
if (!args.dryRun) {
|
|
271
|
+
await fs.mkdir(stateDir, { recursive: true });
|
|
272
|
+
const r1 = await writeAtomic(path.join(stateDir, 'index.md'), indexMd);
|
|
273
|
+
const r2 = await writeAtomic(path.join(stateDir, 'log.md'), logMd);
|
|
274
|
+
const r3 = await writeAtomic(path.join(stateDir, 'CLAUDE.md'), claudeMd);
|
|
275
|
+
const r4 = await writeAtomic(path.join(stateDir, 'AGENTS.md'), claudeMd);
|
|
276
|
+
writes.push(r1, r2, r3, r4);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
log(`done: ${aliases.length} contributor(s), ${SOURCES.length} sources scanned`);
|
|
280
|
+
emit({
|
|
281
|
+
status: 'ok',
|
|
282
|
+
project: root,
|
|
283
|
+
project_name: projectName,
|
|
284
|
+
dry_run: args.dryRun,
|
|
285
|
+
contributors: aliases.length,
|
|
286
|
+
contributors_list: aliases.map(a => a.alias),
|
|
287
|
+
state_dir: path.relative(root, stateDir).replaceAll('\\', '/'),
|
|
288
|
+
files_written: writes.filter(w => w?.written).map(w => path.relative(root, w.path).replaceAll('\\','/')),
|
|
289
|
+
note: 'Mechanical inventory only. Run the build-state LLM skill for narrative synthesis.',
|
|
290
|
+
});
|
|
291
|
+
return 0;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
main().then(c => process.exit(c || 0)).catch(e => {
|
|
295
|
+
emit({ status: 'failed', error: e.message || String(e) });
|
|
296
|
+
process.exit(1);
|
|
297
|
+
});
|
|
@@ -224,6 +224,24 @@ async function main() {
|
|
|
224
224
|
? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
|
|
225
225
|
: await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
|
|
226
226
|
|
|
227
|
+
// v5.9.0: post-pass — unified references pool. Scans Evidence for URLs and
|
|
228
|
+
// builds a project-shared dedup index with HTTP snapshots for external links.
|
|
229
|
+
let referencesResult = null;
|
|
230
|
+
let stateResult = null;
|
|
231
|
+
if (!args.dryRun) {
|
|
232
|
+
const refsRunner = path.join(HERE, 'pull-references.mjs');
|
|
233
|
+
const refsArgv = ['--project', args.project];
|
|
234
|
+
if (args.force) refsArgv.push('--refresh');
|
|
235
|
+
const r = await spawnRunner(refsRunner, refsArgv);
|
|
236
|
+
referencesResult = { source: 'references', exit_code: r.code, stdout: r.stdout?.slice(0, 4000), stderr: r.stderr?.slice(0, 1000) };
|
|
237
|
+
|
|
238
|
+
// v5.9.0: post-pass — deterministic State/ generator. Inventory only;
|
|
239
|
+
// build-state LLM skill remains the synthesis layer.
|
|
240
|
+
const stateRunner = path.join(HERE, 'pull-state.mjs');
|
|
241
|
+
const s = await spawnRunner(stateRunner, ['--project', args.project]);
|
|
242
|
+
stateResult = { source: 'state', exit_code: s.code, stdout: s.stdout?.slice(0, 4000), stderr: s.stderr?.slice(0, 1000) };
|
|
243
|
+
}
|
|
244
|
+
|
|
227
245
|
const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
|
|
228
246
|
|
|
229
247
|
emit({
|
|
@@ -237,6 +255,8 @@ async function main() {
|
|
|
237
255
|
skipped: skipped.length,
|
|
238
256
|
results,
|
|
239
257
|
skipped_targets: skipped,
|
|
258
|
+
references: referencesResult,
|
|
259
|
+
state: stateResult,
|
|
240
260
|
learning_candidates_total,
|
|
241
261
|
});
|
|
242
262
|
return 0;
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
"sourceCoverageLabel": "",
|
|
42
42
|
"matchingPolicy": {
|
|
43
43
|
"mode": "hybrid",
|
|
44
|
+
"_mode_note": "How to find project-related mail within the SCOPE BOUNDARY (folders[] + dateFloor). One of: 'subfolder-only' (only match subfolder names containing the project name), 'keyword-only' (only search mail subjects/bodies for the project name), 'hybrid' (do both and merge — recommended default).",
|
|
44
45
|
"rankingOrder": ["exact", "prefix", "contains"],
|
|
45
46
|
"minConfidenceForFolderScopedSearch": "high",
|
|
46
47
|
"fallbackToFullRootScanWhenAmbiguous": true,
|
package/src/main.mjs
CHANGED
|
@@ -120,7 +120,10 @@ async function installVscode(options, resolved, version) {
|
|
|
120
120
|
// Skips silently when --no-prompt is set, when not running in a TTY, or
|
|
121
121
|
// when all 3 fields are already populated (re-install case).
|
|
122
122
|
const noPrompt = options.noPrompt || process.env.KUSHI_NO_PROMPT === '1';
|
|
123
|
-
|
|
123
|
+
// --force re-prompts even if all 4 fields are already populated (gives users
|
|
124
|
+
// a way to change their answers on a reinstall). Without --force the quickstart
|
|
125
|
+
// skips silently when the file already looks complete.
|
|
126
|
+
const qs = await runM365Quickstart({ destRoot: fullDest, noPrompt, force: !!options.force });
|
|
124
127
|
if (qs.ran === false && qs.reason && !['already-populated', 'no-prompt-flag'].includes(qs.reason)) {
|
|
125
128
|
console.log(` Quickstart skipped: ${qs.reason}`);
|
|
126
129
|
}
|