kushi-agents 6.2.1 → 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugin/runners/bootstrap.mjs +51 -1
- package/plugin/runners/pull-email.mjs +3 -194
- package/plugin/runners/pull-meetings.mjs +4 -220
- package/plugin/runners/pull-onenote.mjs +3 -253
- package/plugin/runners/pull-sharepoint.mjs +3 -284
- package/plugin/runners/pull-teams.mjs +3 -183
- package/plugin/runners/refresh.mjs +361 -317
- package/plugin/runners/test/fixtures/refresh-dir/email.json +4 -7
- package/plugin/runners/test/fixtures/refresh-dir/teams.json +4 -6
- package/plugin/runners/test/integration/csc-pull.integration.test.mjs +160 -0
- package/plugin/runners/test/fixtures/email-abn-amro.json +0 -13
- package/plugin/runners/test/fixtures/email-novel-error.json +0 -9
- package/plugin/runners/test/fixtures/meetings-abn-amro.json +0 -10
- package/plugin/runners/test/fixtures/meetings-body-unavailable.json +0 -10
- package/plugin/runners/test/fixtures/onenote-abn-amro.json +0 -30
- package/plugin/runners/test/fixtures/onenote-partial.json +0 -21
- package/plugin/runners/test/fixtures/sharepoint-abn-amro.json +0 -12
- package/plugin/runners/test/fixtures/teams-abn-amro.json +0 -11
- package/plugin/runners/test/integration/pull-email.integration.test.mjs +0 -149
- package/plugin/runners/test/integration/pull-meetings.integration.test.mjs +0 -92
- package/plugin/runners/test/integration/pull-onenote.integration.test.mjs +0 -86
- package/plugin/runners/test/integration/pull-sharepoint.integration.test.mjs +0 -93
- package/plugin/runners/test/integration/pull-teams.integration.test.mjs +0 -91
|
@@ -1,257 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// plugin/runners/pull-onenote.mjs
|
|
3
|
-
// Deterministic
|
|
4
|
-
// BODY_UNAVAILABLE does NOT defer — re-resolves the section via search, then retries.
|
|
5
|
-
// Writes to Evidence/<alias>/onenote/<section-file-id>/<week>/.
|
|
6
|
-
//
|
|
7
|
-
// Usage:
|
|
8
|
-
// node plugin/runners/pull-onenote.mjs --project <P> --alias <A> --entity <section-file-id>
|
|
9
|
-
// [--week YYYY-MM-DD] [--dry-run] [--force] [--fixture <path>]
|
|
3
|
+
// Deterministic onenote pull via WorkIQ (HARD RULE per workiq-only.instructions.md).
|
|
10
4
|
|
|
11
|
-
import
|
|
12
|
-
import { promises as fs } from 'node:fs';
|
|
13
|
-
import YAML from 'yaml';
|
|
14
|
-
import { assertProject, loadConfig } from './lib/config.mjs';
|
|
15
|
-
import { sourceDir } from './lib/layout.mjs';
|
|
16
|
-
import { writeAtomic } from './lib/evidence.mjs';
|
|
17
|
-
import { fetchAllPages, fetchWithRetry, encodeODataOp } from './lib/http.mjs';
|
|
18
|
-
import { getToken, SCOPES } from './lib/identity.mjs';
|
|
19
|
-
import { updateCell } from './lib/ledger.mjs';
|
|
20
|
-
import { appendRunLog } from './lib/runlog.mjs';
|
|
21
|
-
import { clear } from './lib/deferred.mjs';
|
|
22
|
-
import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
|
|
23
|
-
import { emitLearningCandidate } from './lib/learnings.mjs';
|
|
24
|
-
import { readLedger, cellKey } from './lib/ledger.mjs';
|
|
5
|
+
import { runCli } from './lib/csc-pull.mjs';
|
|
25
6
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
function parseArgs(argv) {
|
|
29
|
-
const args = { dryRun: false, force: false };
|
|
30
|
-
for (let i = 0; i < argv.length; i++) {
|
|
31
|
-
const a = argv[i];
|
|
32
|
-
if (a === '--project') args.project = argv[++i];
|
|
33
|
-
else if (a === '--alias') args.alias = argv[++i];
|
|
34
|
-
else if (a === '--entity') args.entity = argv[++i];
|
|
35
|
-
else if (a === '--week') args.week = argv[++i];
|
|
36
|
-
else if (a === '--dry-run') args.dryRun = true;
|
|
37
|
-
else if (a === '--force') args.force = true;
|
|
38
|
-
else if (a === '--fixture') args.fixture = argv[++i];
|
|
39
|
-
else if (a === '--help' || a === '-h') args.help = true;
|
|
40
|
-
}
|
|
41
|
-
return args;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function help() {
|
|
45
|
-
return `Usage: node pull-onenote.mjs --project <P> --alias <A> --entity <section-file-id>
|
|
46
|
-
[--week YYYY-MM-DD] [--dry-run] [--force] [--fixture <path>]`;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
|
|
50
|
-
function configError(msg) { const e = new Error(msg); e.exitCode = 2; return e; }
|
|
51
|
-
function authError(msg) { const e = new Error(msg); e.exitCode = 3; return e; }
|
|
52
|
-
|
|
53
|
-
async function buildClient({ fixture }) {
|
|
54
|
-
if (fixture) {
|
|
55
|
-
const data = JSON.parse(await fs.readFile(fixture, 'utf8'));
|
|
56
|
-
return makeFixtureClient(data);
|
|
57
|
-
}
|
|
58
|
-
const token = await getToken(SCOPES.graph).catch(e => { throw authError(`token fetch failed: ${e.message}`); });
|
|
59
|
-
const headers = { Authorization: `Bearer ${token}`, Accept: 'application/json' };
|
|
60
|
-
return {
|
|
61
|
-
async getSection(sectionId) {
|
|
62
|
-
try {
|
|
63
|
-
const res = await fetchWithRetry(`https://graph.microsoft.com/v1.0/me/onenote/sections/${encodeURIComponent(sectionId)}`, { headers });
|
|
64
|
-
return await res.json();
|
|
65
|
-
} catch (e) { if (e.status === 404) return null; throw e; }
|
|
66
|
-
},
|
|
67
|
-
async resolveSection(sectionFileIdOrName) {
|
|
68
|
-
// Re-resolve via search across sections (used on body-unavailable)
|
|
69
|
-
const url = `https://graph.microsoft.com/v1.0/me/onenote/sections?${encodeODataOp('$filter')}=id eq '${sectionFileIdOrName.replace(/'/g, "''")}'`;
|
|
70
|
-
try {
|
|
71
|
-
const res = await fetchWithRetry(url, { headers });
|
|
72
|
-
const body = await res.json();
|
|
73
|
-
return (body.value && body.value[0]) || null;
|
|
74
|
-
} catch { return null; }
|
|
75
|
-
},
|
|
76
|
-
async listPages(sectionId, fromIso, toIso) {
|
|
77
|
-
const filter = `lastModifiedDateTime ge ${fromIso} and lastModifiedDateTime lt ${toIso}`;
|
|
78
|
-
const url = `https://graph.microsoft.com/v1.0/me/onenote/sections/${encodeURIComponent(sectionId)}/pages?${encodeODataOp('$filter')}=${encodeURIComponent(filter)}&${encodeODataOp('$orderby')}=lastModifiedDateTime desc&${encodeODataOp('$top')}=50`;
|
|
79
|
-
const { items } = await fetchAllPages(url, { headers });
|
|
80
|
-
return items;
|
|
81
|
-
},
|
|
82
|
-
async getPageContent(pageId) {
|
|
83
|
-
const url = `https://graph.microsoft.com/v1.0/me/onenote/pages/${encodeURIComponent(pageId)}/content`;
|
|
84
|
-
try {
|
|
85
|
-
const res = await fetchWithRetry(url, { headers: { ...headers, Accept: 'text/html' } });
|
|
86
|
-
return await res.text();
|
|
87
|
-
} catch (e) {
|
|
88
|
-
if (e.status === 404 || e.status === 410) return null; // body-unavailable signal
|
|
89
|
-
throw e;
|
|
90
|
-
}
|
|
91
|
-
},
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
function makeFixtureClient(data) {
|
|
96
|
-
return {
|
|
97
|
-
async getSection(id) { return (data.section && data.section.id === id) ? data.section : null; },
|
|
98
|
-
async resolveSection(id) { return (data.resolvedSection && data.resolvedSection.id === id) ? data.resolvedSection : null; },
|
|
99
|
-
async listPages(_sid, fromIso, toIso) {
|
|
100
|
-
return (data.pages || []).filter(p => p.lastModifiedDateTime >= fromIso && p.lastModifiedDateTime < toIso);
|
|
101
|
-
},
|
|
102
|
-
async getPageContent(pageId) {
|
|
103
|
-
const p = (data.pages || []).find(p => p.id === pageId);
|
|
104
|
-
return p ? p.content : null;
|
|
105
|
-
},
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
function weekBounds(weekStartYmd) {
|
|
110
|
-
const start = parseYmd(weekStartYmd);
|
|
111
|
-
const end = new Date(start);
|
|
112
|
-
end.setDate(end.getDate() + 7);
|
|
113
|
-
return { fromIso: start.toISOString(), toIso: end.toISOString() };
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
async function main() {
|
|
117
|
-
const args = parseArgs(process.argv.slice(2));
|
|
118
|
-
if (args.help) { console.log(help()); return 0; }
|
|
119
|
-
if (!args.project || !args.alias || !args.entity) {
|
|
120
|
-
console.error(help());
|
|
121
|
-
emit({ source: SOURCE, status: 'failed', errors: [{ signature: 'bad-args', message: 'required: --project --alias --entity' }] });
|
|
122
|
-
return 2;
|
|
123
|
-
}
|
|
124
|
-
const projectRoot = await assertProject(args.project).catch(e => { throw configError(e.message); });
|
|
125
|
-
await loadConfig(projectRoot, args.alias);
|
|
126
|
-
const weekStart = args.week || ymd(currentIsoMonday());
|
|
127
|
-
const { fromIso, toIso } = weekBounds(weekStart);
|
|
128
|
-
|
|
129
|
-
// Validate entity is a OneNote section file ID (hex with dashes, 20+ chars),
|
|
130
|
-
// not a page title from discover fallback. Display names break Graph URL
|
|
131
|
-
// construction with errors like "Resource not found for the segment '30 - HCA - account team'".
|
|
132
|
-
// Fixture mode bypasses validation.
|
|
133
|
-
if (!args.fixture && !/^[0-9a-f][0-9a-f\-]{20,}$/i.test(String(args.entity).trim())) {
|
|
134
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, {
|
|
135
|
-
last_status: 'failed',
|
|
136
|
-
last_error: 'entity is not a OneNote section file ID (display name fallback from discover)',
|
|
137
|
-
});
|
|
138
|
-
if (!args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'entity-not-section-id', message: 'expected hex section file ID like 1-abc123def456...', status: null }, context: { runner: 'pull-onenote' } });
|
|
139
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'failed', errors: [{ signature: 'entity-not-section-id', message: `entity '${args.entity}' is not a OneNote section file ID. Discover stored a section-name fallback. Replace with a hex section_file_id in boundaries.yml onenote.section_file_ids.` }] });
|
|
140
|
-
return 0;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
const client = await buildClient({ fixture: args.fixture });
|
|
144
|
-
const startedAt = new Date().toISOString();
|
|
145
|
-
|
|
146
|
-
// Phase 1: section enum
|
|
147
|
-
let section;
|
|
148
|
-
try { section = await client.getSection(args.entity); }
|
|
149
|
-
catch (e) {
|
|
150
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: 'failed', last_error: e.message });
|
|
151
|
-
if (!args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'section-fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-onenote' } });
|
|
152
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'failed', errors: [{ message: e.message }] });
|
|
153
|
-
return 0;
|
|
154
|
-
}
|
|
155
|
-
if (!section) {
|
|
156
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: 'failed', last_error: 'section not found' });
|
|
157
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'failed', errors: [{ signature: 'section-not-found' }] });
|
|
158
|
-
return 0;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
const pages = await client.listPages(section.id, fromIso, toIso).catch(() => []);
|
|
162
|
-
if (pages.length === 0) {
|
|
163
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: 'no-activity', items_pulled: 0, section_file_id: section.id });
|
|
164
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'no-activity', items_pulled: 0, files_written: [] });
|
|
165
|
-
return 0;
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Phase 2: per-page content fetch (re-resolves section on body-unavailable; does NOT enqueue)
|
|
169
|
-
const captures = [];
|
|
170
|
-
const bodyUnavailable = [];
|
|
171
|
-
for (const p of pages) {
|
|
172
|
-
let content = await client.getPageContent(p.id).catch(() => null);
|
|
173
|
-
if (content == null) {
|
|
174
|
-
const resolved = await client.resolveSection(args.entity);
|
|
175
|
-
if (resolved && resolved.id) {
|
|
176
|
-
content = await client.getPageContent(p.id).catch(() => null);
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
if (content == null) bodyUnavailable.push(p.id);
|
|
180
|
-
else captures.push({ page: p, content });
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
const outDir = path.join(sourceDir(projectRoot, args.alias, SOURCE), section.id, weekStart);
|
|
184
|
-
const filesWritten = [];
|
|
185
|
-
if (!args.dryRun) {
|
|
186
|
-
const r1 = await writeAtomic(path.join(outDir, 'pages.yml'), YAML.stringify(pages));
|
|
187
|
-
if (r1.written !== false) filesWritten.push(path.relative(projectRoot, r1.path));
|
|
188
|
-
for (const c of captures) {
|
|
189
|
-
const r = await writeAtomic(path.join(outDir, 'content', `${c.page.id}.html`), c.content);
|
|
190
|
-
if (r.written !== false) filesWritten.push(path.relative(projectRoot, r.path));
|
|
191
|
-
}
|
|
192
|
-
const r2 = await writeAtomic(path.join(outDir, 'index.md'), renderIndexMd({ section, pages, captures, bodyUnavailable, pulledAt: startedAt }), { skipIfUnchanged: false });
|
|
193
|
-
if (r2.written !== false) filesWritten.push(path.relative(projectRoot, r2.path));
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
const status = bodyUnavailable.length === 0 ? 'captured' : (captures.length === 0 ? 'body-unavailable' : 'partial');
|
|
197
|
-
|
|
198
|
-
if (status === 'body-unavailable' && !args.dryRun) {
|
|
199
|
-
const prior = (await readLedger(projectRoot, args.alias).catch(() => ({ cells: {} })))
|
|
200
|
-
.cells?.[cellKey(SOURCE, args.entity, weekStart)];
|
|
201
|
-
const priorOccurrences = Number(prior?.body_unavailable_runs || 0);
|
|
202
|
-
const occurrences = priorOccurrences + 1;
|
|
203
|
-
if (occurrences >= 2) {
|
|
204
|
-
await emitLearningCandidate({
|
|
205
|
-
projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart,
|
|
206
|
-
error: { signature: 'body-unavailable', message: `OneNote section ${section.id}: ${bodyUnavailable.length}/${pages.length} pages had no body across ${occurrences} runs`, occurrences },
|
|
207
|
-
context: { runner: 'pull-onenote' },
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { body_unavailable_runs: occurrences });
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
await clear(projectRoot, args.alias, SOURCE, args.entity).catch(() => {});
|
|
214
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, {
|
|
215
|
-
last_status: status,
|
|
216
|
-
items_pulled: captures.length,
|
|
217
|
-
pages_enumerated: pages.length,
|
|
218
|
-
body_unavailable: bodyUnavailable.length || undefined,
|
|
219
|
-
section_file_id: section.id,
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
if (!args.dryRun) {
|
|
223
|
-
await appendRunLog(projectRoot, { runner: 'pull-onenote', alias: args.alias, entity: args.entity, week: weekStart, status, pages_enumerated: pages.length, pages_captured: captures.length });
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
emit({
|
|
227
|
-
source: SOURCE, entity: args.entity, week: weekStart, status,
|
|
228
|
-
items_pulled: captures.length, pages_enumerated: pages.length, body_unavailable: bodyUnavailable,
|
|
229
|
-
files_written: filesWritten, ledger_key: `onenote::${args.entity}::${weekStart}`,
|
|
230
|
-
});
|
|
231
|
-
return 0;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
function renderIndexMd({ section, pages, captures, bodyUnavailable, pulledAt }) {
|
|
235
|
-
const lines = [
|
|
236
|
-
`# OneNote — ${section.displayName || section.id}`,
|
|
237
|
-
'',
|
|
238
|
-
`- section_id: ${section.id}`,
|
|
239
|
-
`- pages_enumerated: ${pages.length}`,
|
|
240
|
-
`- pages_captured: ${captures.length}`,
|
|
241
|
-
`- body_unavailable: ${bodyUnavailable.length}`,
|
|
242
|
-
`- pulled_at: ${pulledAt}`,
|
|
243
|
-
'',
|
|
244
|
-
'## Pages',
|
|
245
|
-
];
|
|
246
|
-
for (const p of pages) {
|
|
247
|
-
const captured = captures.find(c => c.page.id === p.id) ? '✓' : '✗';
|
|
248
|
-
lines.push(`- [${captured}] ${p.lastModifiedDateTime} — **${p.title || p.id}**`);
|
|
249
|
-
}
|
|
250
|
-
lines.push('');
|
|
251
|
-
return lines.join('\n');
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
main().then(code => { process.exitCode = code; }).catch(e => {
|
|
255
|
-
emit({ source: SOURCE, status: 'failed', errors: [{ message: e.message, code: e.exitCode || 'unknown' }] });
|
|
256
|
-
process.exitCode = e.exitCode || 1;
|
|
257
|
-
});
|
|
7
|
+
runCli('onenote').then(code => { process.exitCode = code; });
|
|
@@ -1,288 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// plugin/runners/pull-sharepoint.mjs
|
|
3
|
-
// Deterministic
|
|
4
|
-
// - Cross-tenant filter via isAllowedTenant (#29)
|
|
5
|
-
// - One file per item (#28)
|
|
6
|
-
// - Depth-1+ traversal: drive-wide search PLUS root + each top-level folder children
|
|
7
|
-
// - External-link extraction: harvests http(s) URLs from item descriptions/webUrls
|
|
8
|
-
// Writes to Evidence/_shared/sharepoint/<site-hash>/<week>/items/<safe-id>.yml
|
|
9
|
-
// + Evidence/_shared/sharepoint/<site-hash>/<week>/index.md
|
|
10
|
-
// + Evidence/_shared/sharepoint/<site-hash>/<week>/external-links.md
|
|
3
|
+
// Deterministic sharepoint pull via WorkIQ (HARD RULE per workiq-only.instructions.md).
|
|
11
4
|
|
|
12
|
-
import
|
|
13
|
-
import { promises as fs } from 'node:fs';
|
|
14
|
-
import YAML from 'yaml';
|
|
15
|
-
import { assertProject, loadConfig } from './lib/config.mjs';
|
|
16
|
-
import { sourceDir } from './lib/layout.mjs';
|
|
17
|
-
import { writeAtomic, safeSegment } from './lib/evidence.mjs';
|
|
18
|
-
import { fetchAllPages, isAllowedTenant } from './lib/http.mjs';
|
|
19
|
-
import { getToken, SCOPES } from './lib/identity.mjs';
|
|
20
|
-
import { updateCell } from './lib/ledger.mjs';
|
|
21
|
-
import { appendRunLog } from './lib/runlog.mjs';
|
|
22
|
-
import { enqueue, clear } from './lib/deferred.mjs';
|
|
23
|
-
import { shortHash } from './lib/dedup.mjs';
|
|
24
|
-
import { currentIsoMonday, ymd, parseYmd } from './lib/weeks.mjs';
|
|
25
|
-
import { emitLearningCandidate } from './lib/learnings.mjs';
|
|
5
|
+
import { runCli } from './lib/csc-pull.mjs';
|
|
26
6
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
function parseArgs(argv) {
|
|
30
|
-
const args = { dryRun: false, force: false };
|
|
31
|
-
for (let i = 0; i < argv.length; i++) {
|
|
32
|
-
const a = argv[i];
|
|
33
|
-
if (a === '--project') args.project = argv[++i];
|
|
34
|
-
else if (a === '--alias') args.alias = argv[++i];
|
|
35
|
-
else if (a === '--entity') args.entity = argv[++i];
|
|
36
|
-
else if (a === '--allowed-tenants') args.allowedTenants = argv[++i];
|
|
37
|
-
else if (a === '--week') args.week = argv[++i];
|
|
38
|
-
else if (a === '--dry-run') args.dryRun = true;
|
|
39
|
-
else if (a === '--force') args.force = true;
|
|
40
|
-
else if (a === '--fixture') args.fixture = argv[++i];
|
|
41
|
-
else if (a === '--help' || a === '-h') args.help = true;
|
|
42
|
-
}
|
|
43
|
-
return args;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function help() {
|
|
47
|
-
return `Usage: node pull-sharepoint.mjs --project <P> --alias <A> --entity <site-url>
|
|
48
|
-
[--allowed-tenants <comma,list>] [--week YYYY-MM-DD] [--dry-run] [--force] [--fixture <path>]`;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function emit(obj) { process.stdout.write(JSON.stringify(obj) + '\n'); }
|
|
52
|
-
function configError(msg) { const e = new Error(msg); e.exitCode = 2; return e; }
|
|
53
|
-
function authError(msg) { const e = new Error(msg); e.exitCode = 3; return e; }
|
|
54
|
-
|
|
55
|
-
async function buildClient({ fixture }) {
|
|
56
|
-
if (fixture) {
|
|
57
|
-
const data = JSON.parse(await fs.readFile(fixture, 'utf8'));
|
|
58
|
-
return makeFixtureClient(data);
|
|
59
|
-
}
|
|
60
|
-
const token = await getToken(SCOPES.graph).catch(e => { throw authError(`token fetch failed: ${e.message}`); });
|
|
61
|
-
const headers = { Authorization: `Bearer ${token}`, Accept: 'application/json' };
|
|
62
|
-
return {
|
|
63
|
-
async listSiteItems(siteUrl, fromIso, toIso) {
|
|
64
|
-
const u = new URL(siteUrl);
|
|
65
|
-
const sitePath = `${u.hostname}:${u.pathname}`;
|
|
66
|
-
const siteRes = await (await fetch(`https://graph.microsoft.com/v1.0/sites/${encodeURIComponent(sitePath)}`, { headers })).json();
|
|
67
|
-
if (!siteRes.id) return [];
|
|
68
|
-
|
|
69
|
-
const drivesRes = await (await fetch(`https://graph.microsoft.com/v1.0/sites/${siteRes.id}/drives?$top=50`, { headers })).json();
|
|
70
|
-
const drives = drivesRes.value || [];
|
|
71
|
-
|
|
72
|
-
const inWindow = (m) => m && m >= fromIso && m < toIso;
|
|
73
|
-
const collected = new Map();
|
|
74
|
-
|
|
75
|
-
for (const drv of drives) {
|
|
76
|
-
try {
|
|
77
|
-
const searchUrl = `https://graph.microsoft.com/v1.0/drives/${drv.id}/root/search(q='''')?$top=200`;
|
|
78
|
-
const { items } = await fetchAllPages(searchUrl, { headers });
|
|
79
|
-
for (const it of items) {
|
|
80
|
-
if (inWindow(it.lastModifiedDateTime)) {
|
|
81
|
-
it.__drive_id = drv.id; it.__drive_name = drv.name;
|
|
82
|
-
collected.set(it.id, it);
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
} catch (_) { /* fall through to depth-1 walk */ }
|
|
86
|
-
|
|
87
|
-
try {
|
|
88
|
-
const rootUrl = `https://graph.microsoft.com/v1.0/drives/${drv.id}/root/children?$top=200`;
|
|
89
|
-
const { items: rootChildren } = await fetchAllPages(rootUrl, { headers });
|
|
90
|
-
for (const child of rootChildren) {
|
|
91
|
-
child.__drive_id = drv.id; child.__drive_name = drv.name;
|
|
92
|
-
if (inWindow(child.lastModifiedDateTime)) collected.set(child.id, child);
|
|
93
|
-
if (child.folder) {
|
|
94
|
-
try {
|
|
95
|
-
const subUrl = `https://graph.microsoft.com/v1.0/drives/${drv.id}/items/${child.id}/children?$top=200`;
|
|
96
|
-
const { items: subItems } = await fetchAllPages(subUrl, { headers });
|
|
97
|
-
for (const it of subItems) {
|
|
98
|
-
it.__drive_id = drv.id; it.__drive_name = drv.name;
|
|
99
|
-
it.__parent_folder = child.name;
|
|
100
|
-
if (inWindow(it.lastModifiedDateTime)) collected.set(it.id, it);
|
|
101
|
-
}
|
|
102
|
-
} catch (_) { /* skip subfolder on error */ }
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
} catch (_) { /* drive may be empty/unreachable */ }
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
return [...collected.values()];
|
|
109
|
-
},
|
|
110
|
-
};
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function makeFixtureClient(data) {
|
|
114
|
-
return {
|
|
115
|
-
async listSiteItems(siteUrl, fromIso, toIso) {
|
|
116
|
-
const items = (data.itemsBySite && data.itemsBySite[siteUrl]) || [];
|
|
117
|
-
return items.filter(i => i.lastModifiedDateTime >= fromIso && i.lastModifiedDateTime < toIso);
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
function weekBounds(weekStartYmd) {
|
|
123
|
-
const start = parseYmd(weekStartYmd);
|
|
124
|
-
const end = new Date(start);
|
|
125
|
-
end.setDate(end.getDate() + 7);
|
|
126
|
-
return { fromIso: start.toISOString(), toIso: end.toISOString() };
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const URL_RE = /https?:\/\/[^\s<>"`)\]]+/g;
|
|
130
|
-
|
|
131
|
-
function extractLinks(item) {
|
|
132
|
-
const blobs = [
|
|
133
|
-
item.webUrl, item.description, item.name,
|
|
134
|
-
item.parentReference && item.parentReference.path,
|
|
135
|
-
].filter(Boolean).join('\n');
|
|
136
|
-
const set = new Set();
|
|
137
|
-
for (const m of blobs.matchAll(URL_RE)) {
|
|
138
|
-
let u = m[0].replace(/[).,;]+$/, '');
|
|
139
|
-
set.add(u);
|
|
140
|
-
}
|
|
141
|
-
return [...set];
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
function classifyExternal(siteUrl, link) {
|
|
145
|
-
try {
|
|
146
|
-
const a = new URL(siteUrl);
|
|
147
|
-
const b = new URL(link);
|
|
148
|
-
return b.hostname.toLowerCase() !== a.hostname.toLowerCase();
|
|
149
|
-
} catch { return false; }
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
async function main() {
|
|
153
|
-
const args = parseArgs(process.argv.slice(2));
|
|
154
|
-
if (args.help) { console.log(help()); return 0; }
|
|
155
|
-
if (!args.project || !args.alias || !args.entity) {
|
|
156
|
-
console.error(help());
|
|
157
|
-
emit({ source: SOURCE, status: 'failed', errors: [{ signature: 'bad-args', message: 'required: --project --alias --entity' }] });
|
|
158
|
-
return 2;
|
|
159
|
-
}
|
|
160
|
-
const projectRoot = await assertProject(args.project).catch(e => { throw configError(e.message); });
|
|
161
|
-
const cfg = await loadConfig(projectRoot, args.alias);
|
|
162
|
-
const weekStart = args.week || ymd(currentIsoMonday());
|
|
163
|
-
const { fromIso, toIso } = weekBounds(weekStart);
|
|
164
|
-
|
|
165
|
-
const tenantList = (args.allowedTenants || (cfg.merged && cfg.merged.sharepoint && cfg.merged.sharepoint.allowed_tenants && cfg.merged.sharepoint.allowed_tenants.join(',')) || '')
|
|
166
|
-
.split(',').map(s => s.trim()).filter(Boolean);
|
|
167
|
-
|
|
168
|
-
if (tenantList.length > 0 && !isAllowedTenant(args.entity, tenantList)) {
|
|
169
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, {
|
|
170
|
-
last_status: 'failed',
|
|
171
|
-
last_error: `site tenant not in allowed list: ${tenantList.join(',')}`,
|
|
172
|
-
});
|
|
173
|
-
emit({
|
|
174
|
-
source: SOURCE, entity: args.entity, week: weekStart, status: 'failed',
|
|
175
|
-
errors: [{ signature: 'cross-tenant-blocked', message: 'site host not allowed', allowed_tenants: tenantList }],
|
|
176
|
-
});
|
|
177
|
-
return 0;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
const client = await buildClient({ fixture: args.fixture });
|
|
181
|
-
const startedAt = new Date().toISOString();
|
|
182
|
-
|
|
183
|
-
let items;
|
|
184
|
-
try { items = await client.listSiteItems(args.entity, fromIso, toIso); }
|
|
185
|
-
catch (e) {
|
|
186
|
-
const retryable = !e.status || [429, 502, 503, 504].includes(e.status);
|
|
187
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: retryable ? 'deferred' : 'failed', last_error: e.message });
|
|
188
|
-
if (retryable && !args.dryRun) await enqueue(projectRoot, args.alias, { source: SOURCE, entity: args.entity, weekStart, signature: 'fetch-failed', reason: e.message });
|
|
189
|
-
if (!retryable && !args.dryRun) await emitLearningCandidate({ projectRoot, alias: args.alias, source: SOURCE, entity: args.entity, week: weekStart, error: { signature: 'fetch-failed', message: e.message, status: e.status }, context: { runner: 'pull-sharepoint' } });
|
|
190
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: retryable ? 'deferred' : 'failed', errors: [{ message: e.message, status: e.status }] });
|
|
191
|
-
return retryable ? 1 : 0;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const siteHash = shortHash(args.entity);
|
|
195
|
-
|
|
196
|
-
if (items.length === 0) {
|
|
197
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, { last_status: 'no-activity', items_pulled: 0, site_hash: siteHash });
|
|
198
|
-
emit({ source: SOURCE, entity: args.entity, week: weekStart, status: 'no-activity', items_pulled: 0, files_written: [] });
|
|
199
|
-
return 0;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
const outDir = path.join(sourceDir(projectRoot, args.alias, SOURCE), siteHash, weekStart);
|
|
203
|
-
const filesWritten = [];
|
|
204
|
-
const linkAgg = new Map();
|
|
205
|
-
|
|
206
|
-
if (!args.dryRun) {
|
|
207
|
-
for (const it of items) {
|
|
208
|
-
const id = it.id || shortHash(it.webUrl || it.name || JSON.stringify(it));
|
|
209
|
-
const links = extractLinks(it);
|
|
210
|
-
const itemRecord = { ...it, _links: links };
|
|
211
|
-
const r = await writeAtomic(path.join(outDir, 'items', `${safeSegment(id)}.yml`), YAML.stringify(itemRecord));
|
|
212
|
-
if (r.written !== false) filesWritten.push(path.relative(projectRoot, r.path));
|
|
213
|
-
for (const lk of links) {
|
|
214
|
-
if (!classifyExternal(args.entity, lk)) continue;
|
|
215
|
-
if (!linkAgg.has(lk)) linkAgg.set(lk, new Set());
|
|
216
|
-
linkAgg.get(lk).add(it.name || it.id);
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
const r2 = await writeAtomic(path.join(outDir, 'index.md'), renderIndexMd({ siteUrl: args.entity, items, weekStart, pulledAt: startedAt }), { skipIfUnchanged: false });
|
|
220
|
-
if (r2.written !== false) filesWritten.push(path.relative(projectRoot, r2.path));
|
|
221
|
-
|
|
222
|
-
if (linkAgg.size > 0) {
|
|
223
|
-
const r3 = await writeAtomic(path.join(outDir, 'external-links.md'), renderLinksMd({ siteUrl: args.entity, linkAgg, weekStart }), { skipIfUnchanged: false });
|
|
224
|
-
if (r3.written !== false) filesWritten.push(path.relative(projectRoot, r3.path));
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
await clear(projectRoot, args.alias, SOURCE, args.entity).catch(() => {});
|
|
229
|
-
|
|
230
|
-
await updateCell(projectRoot, args.alias, SOURCE, args.entity, weekStart, {
|
|
231
|
-
last_status: 'captured',
|
|
232
|
-
items_pulled: items.length,
|
|
233
|
-
site_hash: siteHash,
|
|
234
|
-
external_links: linkAgg.size,
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
if (!args.dryRun) {
|
|
238
|
-
await appendRunLog(projectRoot, { runner: 'pull-sharepoint', alias: args.alias, entity: args.entity, week: weekStart, status: 'captured', items_pulled: items.length, external_links: linkAgg.size });
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
emit({
|
|
242
|
-
source: SOURCE, entity: args.entity, week: weekStart, status: 'captured',
|
|
243
|
-
items_pulled: items.length, site_hash: siteHash, external_links: linkAgg.size,
|
|
244
|
-
files_written: filesWritten, ledger_key: `sharepoint::${args.entity}::${weekStart}`,
|
|
245
|
-
});
|
|
246
|
-
return 0;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
function renderIndexMd({ siteUrl, items, weekStart, pulledAt }) {
|
|
250
|
-
const lines = [
|
|
251
|
-
`# SharePoint - ${siteUrl} - week ${weekStart}`,
|
|
252
|
-
'',
|
|
253
|
-
`- site_url: ${siteUrl}`,
|
|
254
|
-
`- week_start: ${weekStart}`,
|
|
255
|
-
`- items: ${items.length}`,
|
|
256
|
-
`- pulled_at: ${pulledAt}`,
|
|
257
|
-
'',
|
|
258
|
-
'## Items',
|
|
259
|
-
];
|
|
260
|
-
for (const it of items) {
|
|
261
|
-
const drv = it.__drive_name ? ` _[${it.__drive_name}${it.__parent_folder ? '/' + it.__parent_folder : ''}]_` : '';
|
|
262
|
-
lines.push(`- [${it.lastModifiedDateTime}] **${it.name || it.id}**${drv} - ${it.webUrl || ''}`);
|
|
263
|
-
}
|
|
264
|
-
lines.push('');
|
|
265
|
-
return lines.join('\n');
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
function renderLinksMd({ siteUrl, linkAgg, weekStart }) {
|
|
269
|
-
const lines = [
|
|
270
|
-
`# SharePoint external links - ${siteUrl} - week ${weekStart}`,
|
|
271
|
-
'',
|
|
272
|
-
`- site_url: ${siteUrl}`,
|
|
273
|
-
`- week_start: ${weekStart}`,
|
|
274
|
-
`- links: ${linkAgg.size}`,
|
|
275
|
-
'',
|
|
276
|
-
'## External links',
|
|
277
|
-
];
|
|
278
|
-
for (const [link, sources] of [...linkAgg.entries()].sort()) {
|
|
279
|
-
lines.push(`- ${link} - referenced by: ${[...sources].join(', ')}`);
|
|
280
|
-
}
|
|
281
|
-
lines.push('');
|
|
282
|
-
return lines.join('\n');
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
main().then(code => { process.exitCode = code; }).catch(e => {
|
|
286
|
-
emit({ source: SOURCE, status: 'failed', errors: [{ message: e.message, code: e.exitCode || 'unknown' }] });
|
|
287
|
-
process.exitCode = e.exitCode || 1;
|
|
288
|
-
});
|
|
7
|
+
runCli('sharepoint').then(code => { process.exitCode = code; });
|