kushi-agents 6.3.0 → 6.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kushi-agents",
3
- "version": "6.3.0",
3
+ "version": "6.5.0",
4
4
  "description": "Install Kushi — multi-source project evidence agent with Comprehensive Structured Capture (CSC) into weekly-only files across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. Meetings retain a sibling verbatim/ audit folder. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -29,7 +29,7 @@ import { writeAtomic, pathExists } from './lib/evidence.mjs';
29
29
  import { writeRefreshReport, writeBootstrapStatus, appendRunLog } from './lib/runlog.mjs';
30
30
 
31
31
  function parseArgs(argv) {
32
- const args = { force: false, dryRun: false, lookbackDays: null, interactive: false };
32
+ const args = { force: false, dryRun: false, lookbackDays: null, interactive: false, full: false, since: null };
33
33
  for (let i = 0; i < argv.length; i++) {
34
34
  const a = argv[i];
35
35
  if (a === '--project') args.project = argv[++i];
@@ -38,6 +38,8 @@ function parseArgs(argv) {
38
38
  else if (a === '--dry-run') args.dryRun = true;
39
39
  else if (a === '--lookback-days') args.lookbackDays = Number(argv[++i]);
40
40
  else if (a === '--interactive' || a === '-i') args.interactive = true;
41
+ else if (a === '--full') args.full = true;
42
+ else if (a === '--since') args.since = argv[++i];
41
43
  else if (a === '--help' || a === '-h') args.help = true;
42
44
  }
43
45
  return args;
@@ -48,6 +50,12 @@ function help() {
48
50
  'Usage: node bootstrap.mjs --project <P> --alias <A> [options]',
49
51
  '',
50
52
  'Options:',
53
+ ' --full After scaffolding, also run `discover` and `refresh',
54
+ ' --since <floor>` so that on first run you get a fully',
55
+ ' populated Evidence/ tree across all weeks back to the',
56
+ ' lookback floor (default: 2026-03-01 if --since not set).',
57
+ ' --since YYYY-MM-DD Used with --full. Lookback floor for refresh week loop.',
58
+ ' Defaults to 2026-03-01 (engagement start) if omitted.',
51
59
  ' --interactive Prompt for the 3 fields that most affect discover speed',
52
60
  ' (email folders, look-back days, OneNote notebook) and',
53
61
  ' stamp them into .kushi/config/user/m365-auth.json. Non-',
@@ -226,8 +234,8 @@ async function interactiveSetup({ workspace, dryRun }) {
226
234
  function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
227
235
 
228
236
  const INTEGRATIONS_TEMPLATE = {
229
- crm: { instance: 'https://iscrm.crm.dynamics.com', table: 'incidents', request_id: null, record_id: null },
230
- ado: { organization: 'IndustrySolutions', project: 'IS Engagements', apiVersion: '7.1', engagement_id: null },
237
+ crm: { instance: 'https://iscrm.crm.dynamics.com', table: 'incidents', request_id: '<__FILL_ME_IN__>', record_id: '<__FILL_ME_IN__>' },
238
+ ado: { organization: 'IndustrySolutions', project: 'IS Engagements', apiVersion: '7.1', engagement_id: '<__FILL_ME_IN__>' },
231
239
  sharepoint: { allowed_tenants: [] },
232
240
  };
233
241
 
@@ -417,6 +425,15 @@ async function main() {
417
425
  } catch { /* bootstrap-report is diagnostics-only, never block */ }
418
426
  }
419
427
 
428
+ // v6.4.0: --full flag — after scaffolding, also run discover + refresh
429
+ // --since <floor> so a first-run on a fresh project produces a fully
430
+ // populated Evidence/ tree without manual orchestration.
431
+ let chainResults = null;
432
+ if (args.full && !args.dryRun) {
433
+ const since = args.since || '2026-03-01';
434
+ chainResults = await runFullChain({ project: args.project, alias: args.alias, since });
435
+ }
436
+
420
437
  emit({
421
438
  status: 'ok',
422
439
  project: root,
@@ -428,10 +445,43 @@ async function main() {
428
445
  ...(statusPath ? { status_md: path.relative(root, statusPath) } : {}),
429
446
  ...(dateFloorReport ? { date_floor: dateFloorReport } : {}),
430
447
  ...(interactiveReport ? { interactive: interactiveReport } : {}),
448
+ ...(chainResults ? { full_chain: chainResults } : {}),
431
449
  });
432
450
  return 0;
433
451
  }
434
452
 
453
+ import { spawn } from 'node:child_process';
454
+ import { fileURLToPath } from 'node:url';
455
+
456
+ async function runFullChain({ project, alias, since }) {
457
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
458
+ const out = { since, discover: null, refresh: null };
459
+
460
+ process.stderr.write(`\n[bootstrap --full] step 1/2: discover\n`);
461
+ out.discover = await spawnAndCapture(path.join(HERE, 'discover.mjs'), ['--project', project, '--alias', alias]);
462
+ process.stderr.write(`[bootstrap --full] discover exit=${out.discover.exit_code}\n`);
463
+
464
+ process.stderr.write(`[bootstrap --full] step 2/2: refresh --since ${since}\n`);
465
+ out.refresh = await spawnAndCapture(path.join(HERE, 'refresh.mjs'), ['--project', project, '--alias', alias, '--since', since]);
466
+ process.stderr.write(`[bootstrap --full] refresh exit=${out.refresh.exit_code}\n`);
467
+
468
+ return out;
469
+ }
470
+
471
+ function spawnAndCapture(runner, argv) {
472
+ return new Promise(resolve => {
473
+ const proc = spawn(process.execPath, [runner, ...argv], { stdio: ['ignore', 'pipe', 'inherit'] });
474
+ let stdout = '';
475
+ proc.stdout.on('data', d => { stdout += d.toString(); process.stderr.write(d); });
476
+ proc.on('close', code => {
477
+ let parsed = null;
478
+ const lastLine = stdout.trim().split('\n').filter(Boolean).pop();
479
+ try { parsed = lastLine ? JSON.parse(lastLine) : null; } catch { /* not JSON */ }
480
+ resolve({ exit_code: code, parsed });
481
+ });
482
+ });
483
+ }
484
+
435
485
  main().then(code => { process.exitCode = code; }).catch(e => {
436
486
  emit({ status: 'failed', errors: [{ message: e.message }] });
437
487
  process.exit(1);
@@ -252,17 +252,26 @@ function applyRows(source, rows, currentBounds, currentInteg) {
252
252
  }
253
253
  if (source === 'meetings') {
254
254
  const existing = currentBounds.meetings?.joinUrls || [];
255
+ // v6.5.0: meeting boundaries MUST be real http(s) join URLs — pull-meetings
256
+ // can't resolve a subject string into a meeting at the WorkIQ layer (unlike
257
+ // teams chat topics). Reject anything that isn't a URL; track rejected
258
+ // subjects in `accepted` log via reason field for discover-report visibility.
259
+ const rejectedSubjects = [];
255
260
  const incoming = rows.map(r => {
256
261
  const url = r.join_url;
257
- if (url && !isPlaceholder(url) && isValidValueFor('meetings', 'join_url', url) && url.startsWith('http')) return url;
262
+ if (url && !isPlaceholder(url) && isValidValueFor('meetings', 'join_url', url) && /^https?:\/\//.test(url)) return url;
258
263
  const subj = r.subject;
259
- if (subj && !isPlaceholder(subj)) return subj;
264
+ if (subj && !isPlaceholder(subj)) rejectedSubjects.push(subj);
260
265
  return null;
261
266
  }).filter(Boolean);
262
267
  const merged = dedup([...existing, ...incoming]);
263
268
  const added = merged.filter(v => !existing.includes(v));
264
269
  if (added.length) accepted.push(...added);
265
- return { boundariesPatch: added.length ? { meetings: { joinUrls: merged } } : null, accepted };
270
+ return {
271
+ boundariesPatch: added.length ? { meetings: { joinUrls: merged } } : null,
272
+ accepted,
273
+ rejected: rejectedSubjects.length ? rejectedSubjects.map(s => ({ subject: s, reason: 'no-join-url' })) : undefined,
274
+ };
266
275
  }
267
276
  if (source === 'onenote') {
268
277
  const existing = currentBounds.onenote?.section_file_ids || [];
@@ -303,7 +312,7 @@ function applyRows(source, rows, currentBounds, currentInteg) {
303
312
  isValidValueFor('crm', 'request_id', r.request_id) ||
304
313
  isValidValueFor('crm', 'incident_number', r.incident_number)
305
314
  );
306
- if (!top) return { integrationsPatch: null, accepted: [] };
315
+ if (!top) return { integrationsPatch: null, accepted: [], unresolved: 'crm.request_id' };
307
316
  const id = isValidValueFor('crm', 'request_id', top.request_id) ? top.request_id : top.incident_number;
308
317
  const patch = { crm: { ...cur, request_id: id } };
309
318
  accepted.push(id);
@@ -318,7 +327,7 @@ function applyRows(source, rows, currentBounds, currentInteg) {
318
327
  isValidValueFor('ado', 'engagement_id', r.engagement_id) ||
319
328
  isValidValueFor('ado', 'work_item_id', r.work_item_id)
320
329
  );
321
- if (!top) return { integrationsPatch: null, accepted: [] };
330
+ if (!top) return { integrationsPatch: null, accepted: [], unresolved: 'ado.engagement_id' };
322
331
  const id = isValidValueFor('ado', 'engagement_id', top.engagement_id) ? top.engagement_id : top.work_item_id;
323
332
  const patch = { ado: { ...cur, engagement_id: id } };
324
333
  accepted.push(id);
@@ -448,7 +457,7 @@ async function main() {
448
457
  : `${skipReason} after ${elapsed}ms: ${(e.message || '').split('\n')[0].slice(0, 200)}`;
449
458
  log(` ${source}: ✗ ${detail}`);
450
459
  }
451
- const { boundariesPatch, integrationsPatch, accepted } = applyRows(source, rows, bounds, integ);
460
+ const { boundariesPatch, integrationsPatch, accepted, rejected, unresolved } = applyRows(source, rows, bounds, integ);
452
461
  if (boundariesPatch) {
453
462
  Object.assign(bounds, mergeShallow(bounds, boundariesPatch));
454
463
  boundsDirty = true;
@@ -457,7 +466,7 @@ async function main() {
457
466
  Object.assign(integ, mergeShallow(integ, integrationsPatch));
458
467
  integDirty = true;
459
468
  }
460
- sourceResults.push({ source, asked, found: rows.length, accepted, skipped_reason: skipReason });
469
+ sourceResults.push({ source, asked, found: rows.length, accepted, rejected, unresolved, skipped_reason: skipReason });
461
470
  }
462
471
 
463
472
  log(`done: ${sourceResults.filter(r => r.found > 0).length}/${total} sources returned data`);
@@ -1,317 +1,362 @@
1
- #!/usr/bin/env node
2
- // plugin/runners/refresh.mjs
3
- // Deterministic orchestrator: reads boundaries.yml + integrations.yml,
4
- // expands into target cells (source, entity, week), invokes per-source
5
- // pull-*.mjs runners as subprocesses, aggregates JSON results.
6
- //
7
- // Usage:
8
- // node plugin/runners/refresh.mjs --project <P> --alias <A>
9
- // [--week YYYY-MM-DD] # default: current ISO Monday
10
- // [--source <src>] # only run one source
11
- // [--entity <e>] # only run one entity (requires --source)
12
- // [--mode bootstrap|refresh] # default: refresh
13
- // [--force] [--dry-run] [--fixture-dir <dir>]
14
- // [--max-parallel <n>] # default 1 (serial)
15
- //
16
- // Stdout: JSON object { project, alias, week, mode, results: [<per-runner JSON>...] }
17
- // Exit 0 always (per-runner failures surface in results[].status).
18
-
19
- import path from 'node:path';
20
- import { promises as fs } from 'node:fs';
21
- import { spawn } from 'node:child_process';
22
- import { fileURLToPath } from 'node:url';
23
- import { loadConfig, assertProject } from './lib/config.mjs';
24
- import { readLedger, needsPull } from './lib/ledger.mjs';
25
- import { currentIsoMonday, ymd } from './lib/weeks.mjs';
26
- import { readCandidateCount } from './lib/learnings.mjs';
27
- import { writeRefreshReport, appendRunLog } from './lib/runlog.mjs';
28
-
29
- const HERE = path.dirname(fileURLToPath(import.meta.url));
30
-
31
- const SOURCE_RUNNERS = {
32
- crm: 'pull-crm.mjs',
33
- ado: 'pull-ado.mjs',
34
- email: 'pull-email.mjs',
35
- teams: 'pull-teams.mjs',
36
- meetings: 'pull-meetings.mjs',
37
- onenote: 'pull-onenote.mjs',
38
- sharepoint: 'pull-sharepoint.mjs',
39
- };
40
-
41
- function parseArgs(argv) {
42
- const args = { force: false, dryRun: false, mode: 'refresh', maxParallel: 1 };
43
- for (let i = 0; i < argv.length; i++) {
44
- const a = argv[i];
45
- if (a === '--project') args.project = argv[++i];
46
- else if (a === '--alias') args.alias = argv[++i];
47
- else if (a === '--week') args.week = argv[++i];
48
- else if (a === '--source') args.source = argv[++i];
49
- else if (a === '--entity') args.entity = argv[++i];
50
- else if (a === '--mode') args.mode = argv[++i];
51
- else if (a === '--force') args.force = true;
52
- else if (a === '--dry-run') args.dryRun = true;
53
- else if (a === '--fixture-dir') args.fixtureDir = argv[++i];
54
- else if (a === '--max-parallel') args.maxParallel = Math.max(1, parseInt(argv[++i], 10) || 1);
55
- else if (a === '--help' || a === '-h') args.help = true;
56
- }
57
- return args;
58
- }
59
-
60
- function help() {
61
- return `Usage: node refresh.mjs --project <P> --alias <A> [--week YYYY-MM-DD]
62
- [--source <crm|ado|email|teams|meetings|onenote|sharepoint>] [--entity <e>]
63
- [--mode bootstrap|refresh] [--force] [--dry-run] [--max-parallel <n>]
64
- [--fixture-dir <dir>] # use <fixture-dir>/<source>.json for each runner`;
65
- }
66
-
67
- function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
68
-
69
- /**
70
- * Build the (source, entity) target list from integrations + boundaries config.
71
- * Returns: [{ source, entity }, ...]
72
- */
73
- export function buildTargets(merged) {
74
- const targets = [];
75
- // crm: from integrations
76
- const crm = merged.crm || {};
77
- const crmEntity = crm.request_id || crm.record_id;
78
- if (crmEntity) targets.push({ source: 'crm', entity: String(crmEntity) });
79
- // ado
80
- const ado = merged.ado || {};
81
- if (ado.engagement_id) targets.push({ source: 'ado', entity: String(ado.engagement_id) });
82
- // email: per-user mailbox folders
83
- const email = merged.email || {};
84
- for (const f of (email.folders || [])) {
85
- const entity = typeof f === 'string' ? f : (f.displayName || f.name || f.id);
86
- if (entity) targets.push({ source: 'email', entity: String(entity), mailbox: typeof f === 'object' ? f.mailbox : email.mailbox });
87
- }
88
- // teams: chat ids
89
- const teams = merged.teams || {};
90
- for (const c of (teams.chats || [])) {
91
- const entity = typeof c === 'string' ? c : (c.chat_id || c.id);
92
- if (entity) targets.push({ source: 'teams', entity: String(entity) });
93
- }
94
- // meetings: joinUrls
95
- const meetings = merged.meetings || {};
96
- const meetingList = meetings.joinUrls || meetings.meetings || [];
97
- for (const m of meetingList) {
98
- const entity = typeof m === 'string' ? m : (m.joinUrl || m.id);
99
- if (entity) targets.push({ source: 'meetings', entity: String(entity) });
100
- }
101
- // onenote: section_file_ids
102
- const on = merged.onenote || {};
103
- for (const s of (on.section_file_ids || [])) {
104
- const entity = typeof s === 'string' ? s : (s.id || s.section_file_id);
105
- if (entity) targets.push({ source: 'onenote', entity: String(entity) });
106
- }
107
- // sharepoint: site urls
108
- const sp = merged.sharepoint || {};
109
- for (const s of (sp.sites || [])) {
110
- const entity = typeof s === 'string' ? s : (s.url || s.site_url);
111
- if (entity) targets.push({ source: 'sharepoint', entity: String(entity) });
112
- }
113
- return targets;
114
- }
115
-
116
- function spawnRunner(runner, args) {
117
- return new Promise(resolve => {
118
- const proc = spawn(process.execPath, [runner, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
119
- let stdout = '', stderr = '';
120
- proc.stdout.on('data', d => stdout += d.toString());
121
- proc.stderr.on('data', d => stderr += d.toString());
122
- proc.on('close', code => {
123
- let parsed = null;
124
- const lastLine = stdout.trim().split('\n').filter(Boolean).pop();
125
- try { parsed = lastLine ? JSON.parse(lastLine) : null; } catch { /* not JSON */ }
126
- resolve({ exitCode: code, stdout, stderr, parsed });
127
- });
128
- });
129
- }
130
-
131
- async function runOne(target, weekStart, args) {
132
- const runner = path.join(HERE, SOURCE_RUNNERS[target.source]);
133
- const argv = ['--project', args.project, '--alias', args.alias, '--entity', target.entity, '--week', weekStart];
134
- if (args.force) argv.push('--force');
135
- // NOTE: orchestrator-level --dry-run is handled by skipping runOne entirely
136
- // (see main()). We deliberately do not propagate --dry-run to children.
137
- if (target.mailbox) { argv.push('--mailbox', target.mailbox); }
138
- // source-specific config from integrations
139
- if (target.source === 'crm' && target.instance) { argv.push('--instance', target.instance); }
140
- if (target.source === 'ado') {
141
- if (target.organization) argv.push('--organization', target.organization);
142
- if (target.adoProject) argv.push('--ado-project', target.adoProject);
143
- if (target.apiVersion) argv.push('--api-version', target.apiVersion);
144
- }
145
- if (target.source === 'sharepoint' && target.allowedTenants) argv.push('--allowed-tenants', target.allowedTenants);
146
- if (args.fixtureDir) {
147
- const fx = path.join(args.fixtureDir, `${target.source}.json`);
148
- argv.push('--fixture', fx);
149
- }
150
- const res = await spawnRunner(runner, argv);
151
- return {
152
- source: target.source,
153
- entity: target.entity,
154
- week: weekStart,
155
- exit_code: res.exitCode,
156
- parsed: res.parsed,
157
- stderr: res.stderr ? res.stderr.split('\n').slice(0, 5).join('\n') : '',
158
- };
159
- }
160
-
161
- async function pMap(items, limit, fn) {
162
- const results = new Array(items.length);
163
- let i = 0;
164
- const workers = Array.from({ length: Math.min(limit, items.length) }, async () => {
165
- while (true) {
166
- const idx = i++;
167
- if (idx >= items.length) return;
168
- results[idx] = await fn(items[idx], idx);
169
- }
170
- });
171
- await Promise.all(workers);
172
- return results;
173
- }
174
-
175
- async function main() {
176
- const args = parseArgs(process.argv.slice(2));
177
- if (args.help) { console.log(help()); return 0; }
178
- if (!args.project || !args.alias) {
179
- console.error(help());
180
- emit({ status: 'failed', errors: [{ signature: 'bad-args' }] });
181
- return 2;
182
- }
183
-
184
- await assertProject(args.project);
185
- const cfg = await loadConfig(args.project, args.alias);
186
- const weekStart = args.week || ymd(currentIsoMonday());
187
-
188
- let targets = buildTargets(cfg.merged);
189
-
190
- // Attach source-specific config
191
- for (const t of targets) {
192
- if (t.source === 'crm') t.instance = cfg.merged.crm && cfg.merged.crm.instance;
193
- if (t.source === 'ado') {
194
- t.organization = cfg.merged.ado && cfg.merged.ado.organization;
195
- t.adoProject = cfg.merged.ado && cfg.merged.ado.project;
196
- t.apiVersion = cfg.merged.ado && cfg.merged.ado.apiVersion;
197
- }
198
- if (t.source === 'sharepoint') {
199
- const allowed = cfg.merged.sharepoint && cfg.merged.sharepoint.allowed_tenants;
200
- if (Array.isArray(allowed) && allowed.length) t.allowedTenants = allowed.join(',');
201
- }
202
- }
203
-
204
- // Filter by --source / --entity
205
- if (args.source) targets = targets.filter(t => t.source === args.source);
206
- if (args.entity) targets = targets.filter(t => t.entity === args.entity);
207
-
208
- // Apply needsPull filter (unless --force)
209
- const ledger = await readLedger(args.project, args.alias);
210
- const planned = [];
211
- const skipped = [];
212
- for (const t of targets) {
213
- const cellKey = `${t.source}::${t.entity}::${weekStart}`;
214
- const cell = ledger.entries[cellKey];
215
- const decision = needsPull(cell, weekStart, { mode: args.mode, force: args.force });
216
- if (decision.pull) planned.push({ ...t, reason: decision.reason });
217
- else skipped.push({ source: t.source, entity: t.entity, reason: decision.reason });
218
- }
219
-
220
- // Run planned targets. At the orchestrator level, --dry-run means
221
- // "show the plan without spawning workers" — we do NOT delegate dry-run
222
- // to children, because per-runner dry-run semantics vary (some skip
223
- // writes but still HTTP). Refresh-level dry-run is plan-only.
224
- const results = args.dryRun
225
- ? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
226
- : await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
227
-
228
- // v5.9.0: post-pass — unified references pool. Scans Evidence for URLs and
229
- // builds a project-shared dedup index with HTTP snapshots for external links.
230
- let referencesResult = null;
231
- let stateResult = null;
232
- if (!args.dryRun) {
233
- const refsRunner = path.join(HERE, 'pull-references.mjs');
234
- const refsArgv = ['--project', args.project];
235
- if (args.force) refsArgv.push('--refresh');
236
- const r = await spawnRunner(refsRunner, refsArgv);
237
- referencesResult = { source: 'references', exit_code: r.exitCode, stdout: r.stdout?.slice(0, 4000), stderr: r.stderr?.slice(0, 1000) };
238
-
239
- // v5.9.0 / v6.2.0: post-pass deterministic State/ generator. Inventory
240
- // only; build-state LLM skill remains the synthesis layer.
241
- const stateRunner = path.join(HERE, 'pull-state.mjs');
242
- const s = await spawnRunner(stateRunner, ['--project', args.project]);
243
- stateResult = { source: 'state', exit_code: s.exitCode, stdout: s.stdout?.slice(0, 4000), stderr: s.stderr?.slice(0, 1000) };
244
- }
245
-
246
- const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
247
-
248
- // v6.0.1: orchestrator-level diagnostics — write a refresh report and append
249
- // run-log entries for EVERY result (captured / no-activity / partial /
250
- // deferred / failed). Per-runner appendRunLog calls only fired on success
251
- // before, so failures left no audit trail beyond the ephemeral stdout JSON.
252
- const counts = { captured: 0, 'no-activity': 0, partial: 0, deferred: 0, failed: 0, other: 0 };
253
- for (const r of results) {
254
- const status = r?.parsed?.status || (r?.dry_run ? 'dry-run' : 'unknown');
255
- if (counts[status] !== undefined) counts[status]++; else counts.other++;
256
- }
257
- if (!args.dryRun) {
258
- try {
259
- await writeRefreshReport(args.project, args.alias, {
260
- type: args.mode,
261
- summary: `${args.mode} ${weekStart}: planned=${planned.length} skipped=${skipped.length} captured=${counts.captured} no-activity=${counts['no-activity']} partial=${counts.partial} deferred=${counts.deferred} failed=${counts.failed}`,
262
- details: {
263
- week: weekStart,
264
- mode: args.mode,
265
- planned: planned.length,
266
- skipped: skipped.length,
267
- counts,
268
- results: results.map(r => ({
269
- source: r.source,
270
- entity: r.entity,
271
- status: r?.parsed?.status,
272
- exit_code: r.exit_code,
273
- errors: r?.parsed?.errors,
274
- })),
275
- },
276
- });
277
- } catch (e) { /* refresh-report is diagnostics-only, never block */ }
278
-
279
- for (const r of results) {
280
- const status = r?.parsed?.status;
281
- if (!status || status === 'captured') continue; // captured already logged by per-runner
282
- try {
283
- await appendRunLog(args.project, {
284
- runner: `pull-${r.source}`,
285
- alias: args.alias,
286
- entity: r.entity,
287
- week: weekStart,
288
- status,
289
- via: 'refresh-orchestrator',
290
- errors: r?.parsed?.errors,
291
- });
292
- } catch (e) { /* run-log is append-only diagnostics */ }
293
- }
294
- }
295
-
296
- emit({
297
- status: 'ok',
298
- project: args.project,
299
- alias: args.alias,
300
- week: weekStart,
301
- mode: args.mode,
302
- dry_run: args.dryRun,
303
- planned: planned.length,
304
- skipped: skipped.length,
305
- results,
306
- skipped_targets: skipped,
307
- references: referencesResult,
308
- state: stateResult,
309
- learning_candidates_total,
310
- });
311
- return 0;
312
- }
313
-
314
- main().then(code => { process.exitCode = code; }).catch(e => {
315
- emit({ status: 'failed', errors: [{ message: e.message }] });
316
- process.exit(1);
317
- });
1
+ #!/usr/bin/env node
2
+ // plugin/runners/refresh.mjs
3
+ // Deterministic orchestrator: reads boundaries.yml + integrations.yml,
4
+ // expands into target cells (source, entity, week), invokes per-source
5
+ // pull-*.mjs runners as subprocesses, aggregates JSON results.
6
+ //
7
+ // Usage:
8
+ // node plugin/runners/refresh.mjs --project <P> --alias <A>
9
+ // [--week YYYY-MM-DD] # default: current ISO Monday
10
+ // [--source <src>] # only run one source
11
+ // [--entity <e>] # only run one entity (requires --source)
12
+ // [--mode bootstrap|refresh] # default: refresh
13
+ // [--force] [--dry-run] [--fixture-dir <dir>]
14
+ // [--max-parallel <n>] # default 1 (serial)
15
+ //
16
+ // Stdout: JSON object { project, alias, week, mode, results: [<per-runner JSON>...] }
17
+ // Exit 0 always (per-runner failures surface in results[].status).
18
+
19
+ import path from 'node:path';
20
+ import { promises as fs } from 'node:fs';
21
+ import { spawn } from 'node:child_process';
22
+ import { fileURLToPath } from 'node:url';
23
+ import { loadConfig, assertProject } from './lib/config.mjs';
24
+ import { readLedger, needsPull } from './lib/ledger.mjs';
25
+ import { currentIsoMonday, ymd, isoWeeksBetween, isoMondayString } from './lib/weeks.mjs';
26
+ import { readCandidateCount } from './lib/learnings.mjs';
27
+ import { writeRefreshReport, appendRunLog } from './lib/runlog.mjs';
28
+
29
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
30
+
31
+ const SOURCE_RUNNERS = {
32
+ crm: 'pull-crm.mjs',
33
+ ado: 'pull-ado.mjs',
34
+ email: 'pull-email.mjs',
35
+ teams: 'pull-teams.mjs',
36
+ meetings: 'pull-meetings.mjs',
37
+ onenote: 'pull-onenote.mjs',
38
+ sharepoint: 'pull-sharepoint.mjs',
39
+ };
40
+
41
+ function parseArgs(argv) {
42
+ const args = { force: false, dryRun: false, mode: 'refresh', maxParallel: 1 };
43
+ for (let i = 0; i < argv.length; i++) {
44
+ const a = argv[i];
45
+ if (a === '--project') args.project = argv[++i];
46
+ else if (a === '--alias') args.alias = argv[++i];
47
+ else if (a === '--week') args.week = argv[++i];
48
+ else if (a === '--since') args.since = argv[++i];
49
+ else if (a === '--source') args.source = argv[++i];
50
+ else if (a === '--entity') args.entity = argv[++i];
51
+ else if (a === '--mode') args.mode = argv[++i];
52
+ else if (a === '--force') args.force = true;
53
+ else if (a === '--dry-run') args.dryRun = true;
54
+ else if (a === '--fixture-dir') args.fixtureDir = argv[++i];
55
+ else if (a === '--max-parallel') args.maxParallel = Math.max(1, parseInt(argv[++i], 10) || 1);
56
+ else if (a === '--help' || a === '-h') args.help = true;
57
+ }
58
+ return args;
59
+ }
60
+
61
+ function help() {
62
+ return `Usage: node refresh.mjs --project <P> --alias <A> [--week YYYY-MM-DD]
63
+ [--source <crm|ado|email|teams|meetings|onenote|sharepoint>] [--entity <e>]
64
+ [--mode bootstrap|refresh] [--force] [--dry-run] [--max-parallel <n>]
65
+ [--fixture-dir <dir>] # use <fixture-dir>/<source>.json for each runner`;
66
+ }
67
+
68
+ function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
69
+
70
+ /**
71
+ * Build the (source, entity) target list from integrations + boundaries config.
72
+ * Returns: [{ source, entity }, ...]
73
+ */
74
+ export function buildTargets(merged) {
75
+ const targets = [];
76
+ const isPlaceholder = (v) => v == null || /^<.*>$/.test(String(v).trim()) || /^(unknown|n\/a|none|null|tbd|todo)$/i.test(String(v).trim());
77
+ // crm: from integrations
78
+ const crm = merged.crm || {};
79
+ const crmEntity = crm.request_id || crm.record_id;
80
+ if (crmEntity && !isPlaceholder(crmEntity)) targets.push({ source: 'crm', entity: String(crmEntity) });
81
+ // ado
82
+ const ado = merged.ado || {};
83
+ if (ado.engagement_id && !isPlaceholder(ado.engagement_id)) targets.push({ source: 'ado', entity: String(ado.engagement_id) });
84
+ // email: per-user mailbox folders
85
+ const email = merged.email || {};
86
+ for (const f of (email.folders || [])) {
87
+ const entity = typeof f === 'string' ? f : (f.displayName || f.name || f.id);
88
+ if (entity) targets.push({ source: 'email', entity: String(entity), mailbox: typeof f === 'object' ? f.mailbox : email.mailbox });
89
+ }
90
+ // teams: chat ids
91
+ const teams = merged.teams || {};
92
+ for (const c of (teams.chats || [])) {
93
+ const entity = typeof c === 'string' ? c : (c.chat_id || c.id);
94
+ if (entity) targets.push({ source: 'teams', entity: String(entity) });
95
+ }
96
+ // meetings: joinUrls
97
+ const meetings = merged.meetings || {};
98
+ const meetingList = meetings.joinUrls || meetings.meetings || [];
99
+ for (const m of meetingList) {
100
+ const entity = typeof m === 'string' ? m : (m.joinUrl || m.id);
101
+ if (entity) targets.push({ source: 'meetings', entity: String(entity) });
102
+ }
103
+ // onenote: section_file_ids
104
+ const on = merged.onenote || {};
105
+ for (const s of (on.section_file_ids || [])) {
106
+ const entity = typeof s === 'string' ? s : (s.id || s.section_file_id);
107
+ if (entity) targets.push({ source: 'onenote', entity: String(entity) });
108
+ }
109
+ // sharepoint: site urls boundaries first, integrations fallback (sharepoint
110
+ // is a SHARED source; sites usually live in integrations.yml since they
111
+ // don't vary per contributor)
112
+ const sp = merged.sharepoint || {};
113
+ let spSites = sp.sites || [];
114
+ if (!spSites.length && merged.__integrations_sharepoint_sites) {
115
+ spSites = merged.__integrations_sharepoint_sites;
116
+ }
117
+ for (const s of spSites) {
118
+ const entity = typeof s === 'string' ? s : (s.url || s.site_url);
119
+ if (entity) targets.push({ source: 'sharepoint', entity: String(entity) });
120
+ }
121
+ return targets;
122
+ }
123
+
124
+ function spawnRunner(runner, args) {
125
+ return new Promise(resolve => {
126
+ const proc = spawn(process.execPath, [runner, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
127
+ let stdout = '', stderr = '';
128
+ proc.stdout.on('data', d => stdout += d.toString());
129
+ proc.stderr.on('data', d => stderr += d.toString());
130
+ proc.on('close', code => {
131
+ let parsed = null;
132
+ const lastLine = stdout.trim().split('\n').filter(Boolean).pop();
133
+ try { parsed = lastLine ? JSON.parse(lastLine) : null; } catch { /* not JSON */ }
134
+ resolve({ exitCode: code, stdout, stderr, parsed });
135
+ });
136
+ });
137
+ }
138
+
139
+ async function runOne(target, weekStart, args) {
140
+ const runner = path.join(HERE, SOURCE_RUNNERS[target.source]);
141
+ const argv = ['--project', args.project, '--alias', args.alias, '--entity', target.entity, '--week', weekStart];
142
+ if (args.force) argv.push('--force');
143
+ // NOTE: orchestrator-level --dry-run is handled by skipping runOne entirely
144
+ // (see main()). We deliberately do not propagate --dry-run to children.
145
+ if (target.mailbox) { argv.push('--mailbox', target.mailbox); }
146
+ // source-specific config from integrations
147
+ if (target.source === 'crm' && target.instance) { argv.push('--instance', target.instance); }
148
+ if (target.source === 'ado') {
149
+ if (target.organization) argv.push('--organization', target.organization);
150
+ if (target.adoProject) argv.push('--ado-project', target.adoProject);
151
+ if (target.apiVersion) argv.push('--api-version', target.apiVersion);
152
+ }
153
+ if (target.source === 'sharepoint' && target.allowedTenants) argv.push('--allowed-tenants', target.allowedTenants);
154
+ if (args.fixtureDir) {
155
+ const fx = path.join(args.fixtureDir, `${target.source}.json`);
156
+ argv.push('--fixture', fx);
157
+ }
158
+ const res = await spawnRunner(runner, argv);
159
+ return {
160
+ source: target.source,
161
+ entity: target.entity,
162
+ week: weekStart,
163
+ exit_code: res.exitCode,
164
+ parsed: res.parsed,
165
+ stderr: res.stderr ? res.stderr.split('\n').slice(0, 5).join('\n') : '',
166
+ };
167
+ }
168
+
169
+ async function pMap(items, limit, fn) {
170
+ const results = new Array(items.length);
171
+ let i = 0;
172
+ const workers = Array.from({ length: Math.min(limit, items.length) }, async () => {
173
+ while (true) {
174
+ const idx = i++;
175
+ if (idx >= items.length) return;
176
+ results[idx] = await fn(items[idx], idx);
177
+ }
178
+ });
179
+ await Promise.all(workers);
180
+ return results;
181
+ }
182
+
183
+ async function main() {
184
+ const args = parseArgs(process.argv.slice(2));
185
+ if (args.help) { console.log(help()); return 0; }
186
+ if (!args.project || !args.alias) {
187
+ console.error(help());
188
+ emit({ status: 'failed', errors: [{ signature: 'bad-args' }] });
189
+ return 2;
190
+ }
191
+
192
+ await assertProject(args.project);
193
+ const cfg = await loadConfig(args.project, args.alias);
194
+
195
+ // v6.4.0: SharePoint sites fallback. Boundaries.yml almost never lists SP
196
+ // sites (WorkIQ doesn't reliably resolve them); they live in the project's
197
+ // integrations.yml. mergeConfigs uses arrayMode='replace' so an empty
198
+ // boundaries.sharepoint.sites would otherwise wipe integrations sites.
199
+ const intgSpSites = cfg.integrations?.sharepoint?.sites || [];
200
+ if (intgSpSites.length) cfg.merged.__integrations_sharepoint_sites = intgSpSites;
201
+
202
+ // v6.4.0: --since iterates ISO Mondays from `since` through current week.
203
+ // --week pins a single week (back-compat). Default = current week only.
204
+ const todayMonday = ymd(currentIsoMonday());
205
+ const weekList = args.since
206
+ ? isoWeeksBetween(args.since, new Date())
207
+ : [args.week || todayMonday];
208
+
209
+ let allResults = [];
210
+ let allPlanned = 0;
211
+ let allSkipped = 0;
212
+ const allCounts = { captured: 0, 'no-activity': 0, partial: 0, deferred: 0, failed: 0, other: 0 };
213
+ const skippedTargetsByWeek = [];
214
+
215
+ for (const weekStart of weekList) {
216
+ const weekRes = await runOneWeek({ args, cfg, weekStart });
217
+ allResults = allResults.concat(weekRes.results);
218
+ allPlanned += weekRes.planned;
219
+ allSkipped += weekRes.skipped;
220
+ for (const k of Object.keys(allCounts)) allCounts[k] += weekRes.counts[k] || 0;
221
+ skippedTargetsByWeek.push({ week: weekStart, skipped: weekRes.skipped_targets });
222
+ }
223
+
224
+ // v5.9.0: post-pass — unified references pool. Scans Evidence for URLs and
225
+ // builds a project-shared dedup index with HTTP snapshots for external links.
226
+ let referencesResult = null;
227
+ let stateResult = null;
228
+ if (!args.dryRun) {
229
+ const refsRunner = path.join(HERE, 'pull-references.mjs');
230
+ const refsArgv = ['--project', args.project];
231
+ if (args.force) refsArgv.push('--refresh');
232
+ const r = await spawnRunner(refsRunner, refsArgv);
233
+ referencesResult = { source: 'references', exit_code: r.exitCode, stdout: r.stdout?.slice(0, 4000), stderr: r.stderr?.slice(0, 1000) };
234
+
235
+ // v5.9.0 / v6.2.0: post-pass — deterministic State/ generator. Inventory
236
+ // only; build-state LLM skill remains the synthesis layer.
237
+ const stateRunner = path.join(HERE, 'pull-state.mjs');
238
+ const s = await spawnRunner(stateRunner, ['--project', args.project]);
239
+ stateResult = { source: 'state', exit_code: s.exitCode, stdout: s.stdout?.slice(0, 4000), stderr: s.stderr?.slice(0, 1000) };
240
+ }
241
+
242
+ const learning_candidates_total = args.dryRun ? 0 : await readCandidateCount(args.project);
243
+
244
+ // Aggregate report across all weeks
245
+ if (!args.dryRun) {
246
+ try {
247
+ const span = weekList.length === 1 ? weekList[0] : `${weekList[0]}..${weekList[weekList.length - 1]} (${weekList.length} wk)`;
248
+ await writeRefreshReport(args.project, args.alias, {
249
+ type: args.mode,
250
+ summary: `${args.mode} ${span}: planned=${allPlanned} skipped=${allSkipped} captured=${allCounts.captured} no-activity=${allCounts['no-activity']} partial=${allCounts.partial} deferred=${allCounts.deferred} failed=${allCounts.failed}`,
251
+ details: {
252
+ weeks: weekList,
253
+ mode: args.mode,
254
+ planned: allPlanned,
255
+ skipped: allSkipped,
256
+ counts: allCounts,
257
+ results: allResults.map(r => ({
258
+ source: r.source,
259
+ entity: r.entity,
260
+ week: r.week,
261
+ status: r?.parsed?.status,
262
+ exit_code: r.exit_code,
263
+ errors: r?.parsed?.errors,
264
+ })),
265
+ },
266
+ });
267
+ } catch (e) { /* refresh-report is diagnostics-only, never block */ }
268
+
269
+ for (const r of allResults) {
270
+ const status = r?.parsed?.status;
271
+ if (!status || status === 'captured') continue;
272
+ try {
273
+ await appendRunLog(args.project, {
274
+ runner: `pull-${r.source}`,
275
+ alias: args.alias,
276
+ entity: r.entity,
277
+ week: r.week,
278
+ status,
279
+ via: 'refresh-orchestrator',
280
+ errors: r?.parsed?.errors,
281
+ });
282
+ } catch (e) { /* run-log is append-only diagnostics */ }
283
+ }
284
+ }
285
+
286
+ emit({
287
+ status: 'ok',
288
+ project: args.project,
289
+ alias: args.alias,
290
+ weeks: weekList,
291
+ mode: args.mode,
292
+ dry_run: args.dryRun,
293
+ planned: allPlanned,
294
+ skipped: allSkipped,
295
+ counts: allCounts,
296
+ results: allResults,
297
+ skipped_targets_by_week: skippedTargetsByWeek,
298
+ references: referencesResult,
299
+ state: stateResult,
300
+ learning_candidates_total,
301
+ });
302
+ return 0;
303
+ }
304
+
305
+ async function runOneWeek({ args, cfg, weekStart }) {
306
+ let targets = buildTargets(cfg.merged);
307
+
308
+ // Attach source-specific config
309
+ for (const t of targets) {
310
+ if (t.source === 'crm') t.instance = cfg.merged.crm && cfg.merged.crm.instance;
311
+ if (t.source === 'ado') {
312
+ t.organization = cfg.merged.ado && cfg.merged.ado.organization;
313
+ t.adoProject = cfg.merged.ado && cfg.merged.ado.project;
314
+ t.apiVersion = cfg.merged.ado && cfg.merged.ado.apiVersion;
315
+ }
316
+ if (t.source === 'sharepoint') {
317
+ const allowed = cfg.merged.sharepoint && cfg.merged.sharepoint.allowed_tenants;
318
+ if (Array.isArray(allowed) && allowed.length) t.allowedTenants = allowed.join(',');
319
+ }
320
+ }
321
+
322
+ if (args.source) targets = targets.filter(t => t.source === args.source);
323
+ if (args.entity) targets = targets.filter(t => t.entity === args.entity);
324
+
325
+ const ledger = await readLedger(args.project, args.alias);
326
+ const planned = [];
327
+ const skipped = [];
328
+ for (const t of targets) {
329
+ const cellKey = `${t.source}::${t.entity}::${weekStart}`;
330
+ const cell = ledger.entries[cellKey];
331
+ const decision = needsPull(cell, weekStart, { mode: args.mode, force: args.force });
332
+ if (decision.pull) planned.push({ ...t, reason: decision.reason });
333
+ else skipped.push({ source: t.source, entity: t.entity, reason: decision.reason });
334
+ }
335
+
336
+ const results = args.dryRun
337
+ ? planned.map(t => ({ source: t.source, entity: t.entity, week: weekStart, dry_run: true, reason: t.reason }))
338
+ : await pMap(planned, args.maxParallel, t => runOne(t, weekStart, args));
339
+
340
+ // Tag results with their week for aggregate reporting
341
+ for (const r of results) { if (!r.week) r.week = weekStart; }
342
+
343
+ const counts = { captured: 0, 'no-activity': 0, partial: 0, deferred: 0, failed: 0, other: 0 };
344
+ for (const r of results) {
345
+ const status = r?.parsed?.status || (r?.dry_run ? 'dry-run' : 'unknown');
346
+ if (counts[status] !== undefined) counts[status]++; else counts.other++;
347
+ }
348
+
349
+ return {
350
+ week: weekStart,
351
+ planned: planned.length,
352
+ skipped: skipped.length,
353
+ skipped_targets: skipped,
354
+ counts,
355
+ results,
356
+ };
357
+ }
358
+
359
+ main().then(code => { process.exitCode = code; }).catch(e => {
360
+ emit({ status: 'failed', errors: [{ message: e.message }] });
361
+ process.exit(1);
362
+ });