wayfind 2.0.35 → 2.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/connectors/notion.js +146 -2
- package/bin/content-store.js +108 -0
- package/bin/team-context.js +2 -0
- package/package.json +1 -1
package/bin/connectors/notion.js
CHANGED
|
@@ -249,11 +249,23 @@ async function configure() {
|
|
|
249
249
|
.map((d) => d.trim())
|
|
250
250
|
.filter(Boolean);
|
|
251
251
|
|
|
252
|
+
// Optional: page IDs for full content extraction
|
|
253
|
+
console.log('');
|
|
254
|
+
console.log('Optional: specific page IDs to extract full content from (comma-separated).');
|
|
255
|
+
console.log('These pages will have their body text included in signals, not just metadata.');
|
|
256
|
+
console.log('Find page IDs in the URL: notion.so/<workspace>/<page-id>');
|
|
257
|
+
const pageInput = await ask('Page IDs: ');
|
|
258
|
+
const pages = pageInput
|
|
259
|
+
.split(',')
|
|
260
|
+
.map((p) => p.trim().replace(/-/g, ''))
|
|
261
|
+
.filter(Boolean);
|
|
262
|
+
|
|
252
263
|
const channelConfig = {
|
|
253
264
|
transport: 'https',
|
|
254
265
|
token,
|
|
255
266
|
token_env: 'NOTION_TOKEN',
|
|
256
267
|
databases: databases.length > 0 ? databases : null,
|
|
268
|
+
pages: pages.length > 0 ? pages : null,
|
|
257
269
|
last_pull: null,
|
|
258
270
|
};
|
|
259
271
|
|
|
@@ -261,8 +273,12 @@ async function configure() {
|
|
|
261
273
|
console.log('Notion connector configured.');
|
|
262
274
|
if (databases.length > 0) {
|
|
263
275
|
console.log(`Monitoring ${databases.length} database(s).`);
|
|
264
|
-
}
|
|
265
|
-
|
|
276
|
+
}
|
|
277
|
+
if (pages.length > 0) {
|
|
278
|
+
console.log(`Extracting content from ${pages.length} page(s).`);
|
|
279
|
+
}
|
|
280
|
+
if (databases.length === 0 && pages.length === 0) {
|
|
281
|
+
console.log('Monitoring all shared pages (metadata only).');
|
|
266
282
|
}
|
|
267
283
|
console.log('');
|
|
268
284
|
|
|
@@ -298,6 +314,37 @@ async function pull(config, since) {
|
|
|
298
314
|
dbEntries.push(...entries.map((e) => ({ ...e, _databaseId: dbId })));
|
|
299
315
|
}
|
|
300
316
|
|
|
317
|
+
// Fetch content for targeted pages
|
|
318
|
+
const targetedPageIds = config.pages || [];
|
|
319
|
+
const pageContents = {};
|
|
320
|
+
if (targetedPageIds.length > 0) {
|
|
321
|
+
for (const pageId of targetedPageIds) {
|
|
322
|
+
try {
|
|
323
|
+
const content = await fetchPageContent(token, pageId);
|
|
324
|
+
if (content && content.trim()) {
|
|
325
|
+
pageContents[pageId] = content;
|
|
326
|
+
}
|
|
327
|
+
} catch {
|
|
328
|
+
// Skip pages that fail — may have been deleted or unshared
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
// Also fetch targeted pages that aren't in the recent pages list
|
|
332
|
+
const recentPageIds = new Set(pages.map((p) => p.id.replace(/-/g, '')));
|
|
333
|
+
for (const pageId of targetedPageIds) {
|
|
334
|
+
if (!recentPageIds.has(pageId.replace(/-/g, ''))) {
|
|
335
|
+
try {
|
|
336
|
+
const endpoint = `/pages/${pageId}`;
|
|
337
|
+
const page = await notionGet(token, endpoint);
|
|
338
|
+
if (page && page.id) {
|
|
339
|
+
pages.push(page);
|
|
340
|
+
}
|
|
341
|
+
} catch {
|
|
342
|
+
// Skip — page may not exist
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
301
348
|
// Fetch comment counts for active pages (top 20 by recency)
|
|
302
349
|
const activePages = pages.slice(0, 20);
|
|
303
350
|
const commentCounts = {};
|
|
@@ -314,6 +361,7 @@ async function pull(config, since) {
|
|
|
314
361
|
|
|
315
362
|
// Analyze
|
|
316
363
|
const analysis = analyzeActivity(pages, dbEntries, commentCounts, sinceDate, todayDate, userMap);
|
|
364
|
+
analysis.pageContents = pageContents;
|
|
317
365
|
|
|
318
366
|
// Generate markdown
|
|
319
367
|
const md = generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap);
|
|
@@ -496,6 +544,86 @@ async function fetchComments(token, pageId) {
|
|
|
496
544
|
}
|
|
497
545
|
}
|
|
498
546
|
|
|
547
|
+
// ── Page content extraction ────────────────────────────────────────────────
|
|
548
|
+
|
|
549
|
+
async function fetchPageContent(token, pageId, maxChars = 5000) {
|
|
550
|
+
const blocks = [];
|
|
551
|
+
let cursor = undefined;
|
|
552
|
+
const MAX_REQUESTS = 5;
|
|
553
|
+
let requests = 0;
|
|
554
|
+
|
|
555
|
+
while (requests < MAX_REQUESTS) {
|
|
556
|
+
requests++;
|
|
557
|
+
const endpoint = `/blocks/${pageId}/children?page_size=100` + (cursor ? `&start_cursor=${cursor}` : '');
|
|
558
|
+
let response;
|
|
559
|
+
try {
|
|
560
|
+
response = await notionGet(token, endpoint);
|
|
561
|
+
} catch {
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
const results = Array.isArray(response.results) ? response.results : [];
|
|
565
|
+
blocks.push(...results);
|
|
566
|
+
if (!response.has_more) break;
|
|
567
|
+
cursor = response.next_cursor;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// Convert blocks to markdown
|
|
571
|
+
const lines = [];
|
|
572
|
+
let totalChars = 0;
|
|
573
|
+
|
|
574
|
+
for (const block of blocks) {
|
|
575
|
+
if (totalChars >= maxChars) break;
|
|
576
|
+
const line = blockToMarkdown(block);
|
|
577
|
+
if (line !== null) {
|
|
578
|
+
lines.push(line);
|
|
579
|
+
totalChars += line.length;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return lines.join('\n');
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function blockToMarkdown(block) {
|
|
587
|
+
const type = block.type;
|
|
588
|
+
if (!type) return null;
|
|
589
|
+
|
|
590
|
+
const richTextToPlain = (rt) =>
|
|
591
|
+
Array.isArray(rt) ? rt.map((t) => t.plain_text || '').join('') : '';
|
|
592
|
+
|
|
593
|
+
const data = block[type];
|
|
594
|
+
if (!data) return null;
|
|
595
|
+
|
|
596
|
+
switch (type) {
|
|
597
|
+
case 'paragraph':
|
|
598
|
+
return richTextToPlain(data.rich_text);
|
|
599
|
+
case 'heading_1':
|
|
600
|
+
return '# ' + richTextToPlain(data.rich_text);
|
|
601
|
+
case 'heading_2':
|
|
602
|
+
return '## ' + richTextToPlain(data.rich_text);
|
|
603
|
+
case 'heading_3':
|
|
604
|
+
return '### ' + richTextToPlain(data.rich_text);
|
|
605
|
+
case 'bulleted_list_item':
|
|
606
|
+
return '- ' + richTextToPlain(data.rich_text);
|
|
607
|
+
case 'numbered_list_item':
|
|
608
|
+
return '1. ' + richTextToPlain(data.rich_text);
|
|
609
|
+
case 'to_do':
|
|
610
|
+
return (data.checked ? '- [x] ' : '- [ ] ') + richTextToPlain(data.rich_text);
|
|
611
|
+
case 'toggle':
|
|
612
|
+
return '> ' + richTextToPlain(data.rich_text);
|
|
613
|
+
case 'callout':
|
|
614
|
+
return '> ' + richTextToPlain(data.rich_text);
|
|
615
|
+
case 'quote':
|
|
616
|
+
return '> ' + richTextToPlain(data.rich_text);
|
|
617
|
+
case 'code':
|
|
618
|
+
return '```\n' + richTextToPlain(data.rich_text) + '\n```';
|
|
619
|
+
case 'divider':
|
|
620
|
+
return '---';
|
|
621
|
+
default:
|
|
622
|
+
// Skip unsupported block types (image, embed, file, etc.)
|
|
623
|
+
return null;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
499
627
|
// ── Property extraction ─────────────────────────────────────────────────────
|
|
500
628
|
|
|
501
629
|
function extractTitle(page) {
|
|
@@ -700,6 +828,22 @@ function generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap) {
|
|
|
700
828
|
lines.push('');
|
|
701
829
|
}
|
|
702
830
|
|
|
831
|
+
// Targeted page content
|
|
832
|
+
const pageContents = analysis.pageContents || {};
|
|
833
|
+
if (Object.keys(pageContents).length > 0) {
|
|
834
|
+
lines.push('## Page Content');
|
|
835
|
+
lines.push('');
|
|
836
|
+
for (const [pageId, content] of Object.entries(pageContents)) {
|
|
837
|
+
// Find the page title from the pages list
|
|
838
|
+
const page = analysis.pages.find((p) => p.id.replace(/-/g, '') === pageId.replace(/-/g, ''));
|
|
839
|
+
const title = page ? extractTitle(page) : `Page ${pageId.slice(0, 8)}`;
|
|
840
|
+
lines.push(`### ${sanitizeForMarkdown(title)}`);
|
|
841
|
+
lines.push('');
|
|
842
|
+
lines.push(content);
|
|
843
|
+
lines.push('');
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
703
847
|
// Summary
|
|
704
848
|
lines.push('## Summary');
|
|
705
849
|
lines.push('');
|
package/bin/content-store.js
CHANGED
|
@@ -1852,6 +1852,114 @@ async function indexSignals(options = {}) {
|
|
|
1852
1852
|
}
|
|
1853
1853
|
}
|
|
1854
1854
|
|
|
1855
|
+
// ── Chunk long signal entries for better embedding retrieval ──────────────
|
|
1856
|
+
// Split signal content by ## headings into section-level entries.
|
|
1857
|
+
// Each chunk gets its own embedding so semantic search matches at section level.
|
|
1858
|
+
const MIN_CHUNK_CHARS = 200;
|
|
1859
|
+
const MAX_CHUNK_CHARS = 3000;
|
|
1860
|
+
|
|
1861
|
+
// Collect all signal files across all channels for chunking
|
|
1862
|
+
const allSignalFiles = [];
|
|
1863
|
+
for (const ch of channels) {
|
|
1864
|
+
const chDir = path.join(signalsDir, ch);
|
|
1865
|
+
try {
|
|
1866
|
+
const entries = fs.readdirSync(chDir, { withFileTypes: true });
|
|
1867
|
+
for (const e of entries) {
|
|
1868
|
+
if (e.isFile() && e.name.endsWith('.md')) {
|
|
1869
|
+
allSignalFiles.push({ filePath: path.join(chDir, e.name), file: e.name, repo: 'signals/' + ch, channel: ch });
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
for (const ownerEntry of entries) {
|
|
1873
|
+
if (!ownerEntry.isDirectory()) continue;
|
|
1874
|
+
const ownerDir = path.join(chDir, ownerEntry.name);
|
|
1875
|
+
let repoEntries;
|
|
1876
|
+
try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
|
|
1877
|
+
for (const repoEntry of repoEntries) {
|
|
1878
|
+
if (!repoEntry.isDirectory()) continue;
|
|
1879
|
+
const repoDir = path.join(ownerDir, repoEntry.name);
|
|
1880
|
+
let repoFiles;
|
|
1881
|
+
try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
|
|
1882
|
+
for (const f of repoFiles) {
|
|
1883
|
+
allSignalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: `${ownerEntry.name}/${repoEntry.name}`, channel: ch });
|
|
1884
|
+
}
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
} catch { continue; }
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1890
|
+
for (const { filePath, file, repo, channel: ch } of allSignalFiles) {
|
|
1891
|
+
let content;
|
|
1892
|
+
try {
|
|
1893
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
1894
|
+
} catch {
|
|
1895
|
+
continue;
|
|
1896
|
+
}
|
|
1897
|
+
if (content.length < MIN_CHUNK_CHARS * 2) continue; // Too short to chunk
|
|
1898
|
+
|
|
1899
|
+
const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
1900
|
+
const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
|
|
1901
|
+
const titleMatch = content.match(/^#\s+(.+)$/m);
|
|
1902
|
+
const parentTitle = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
|
|
1903
|
+
const parentId = generateEntryId(date, repo, file.replace(/\.md$/, ''));
|
|
1904
|
+
|
|
1905
|
+
// Split by ## headings
|
|
1906
|
+
const sections = content.split(/^(?=##\s)/m).filter(s => s.trim().length >= MIN_CHUNK_CHARS);
|
|
1907
|
+
if (sections.length <= 1) continue; // Only one section — parent embedding is sufficient
|
|
1908
|
+
|
|
1909
|
+
for (let i = 0; i < sections.length; i++) {
|
|
1910
|
+
let section = sections[i];
|
|
1911
|
+
const headingMatch = section.match(/^##\s+(.+)$/m);
|
|
1912
|
+
const sectionTitle = headingMatch ? headingMatch[1].trim() : `Section ${i + 1}`;
|
|
1913
|
+
const chunkTitle = `${parentTitle} — ${sectionTitle}`;
|
|
1914
|
+
|
|
1915
|
+
if (section.length > MAX_CHUNK_CHARS) {
|
|
1916
|
+
section = section.slice(0, MAX_CHUNK_CHARS);
|
|
1917
|
+
}
|
|
1918
|
+
|
|
1919
|
+
const chunkId = generateEntryId(date, repo, `chunk-${i}-${file.replace(/\.md$/, '')}`);
|
|
1920
|
+
const chunkHash = contentHash(section);
|
|
1921
|
+
const existingChunk = existingIndex.entries[chunkId];
|
|
1922
|
+
|
|
1923
|
+
if (existingChunk && existingChunk.contentHash === chunkHash) {
|
|
1924
|
+
if (doEmbeddings && !existingChunk.hasEmbedding) {
|
|
1925
|
+
try {
|
|
1926
|
+
const vec = await llm.generateEmbedding(section);
|
|
1927
|
+
existingEmbeddings[chunkId] = vec;
|
|
1928
|
+
existingChunk.hasEmbedding = true;
|
|
1929
|
+
} catch {
|
|
1930
|
+
// Skip
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
continue;
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
existingIndex.entries[chunkId] = {
|
|
1937
|
+
date,
|
|
1938
|
+
repo,
|
|
1939
|
+
title: chunkTitle,
|
|
1940
|
+
source: 'signal-chunk',
|
|
1941
|
+
parentId,
|
|
1942
|
+
chunkIndex: i,
|
|
1943
|
+
user: '',
|
|
1944
|
+
drifted: false,
|
|
1945
|
+
contentHash: chunkHash,
|
|
1946
|
+
contentLength: section.length,
|
|
1947
|
+
tags: [ch, sectionTitle.toLowerCase()],
|
|
1948
|
+
hasEmbedding: false,
|
|
1949
|
+
};
|
|
1950
|
+
|
|
1951
|
+
if (doEmbeddings) {
|
|
1952
|
+
try {
|
|
1953
|
+
const vec = await llm.generateEmbedding(section);
|
|
1954
|
+
existingEmbeddings[chunkId] = vec;
|
|
1955
|
+
existingIndex.entries[chunkId].hasEmbedding = true;
|
|
1956
|
+
} catch {
|
|
1957
|
+
// Continue without embedding
|
|
1958
|
+
}
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1855
1963
|
// Save
|
|
1856
1964
|
existingIndex.entryCount = Object.keys(existingIndex.entries).length;
|
|
1857
1965
|
backend.saveIndex(existingIndex);
|
package/bin/team-context.js
CHANGED
|
@@ -4414,11 +4414,13 @@ function ensureContainerConfig() {
|
|
|
4414
4414
|
// Notion connector
|
|
4415
4415
|
if (!config.notion && process.env.NOTION_TOKEN) {
|
|
4416
4416
|
const databases = process.env.TEAM_CONTEXT_NOTION_DATABASES;
|
|
4417
|
+
const pages = process.env.TEAM_CONTEXT_NOTION_PAGES;
|
|
4417
4418
|
config.notion = {
|
|
4418
4419
|
transport: 'https',
|
|
4419
4420
|
token: process.env.NOTION_TOKEN,
|
|
4420
4421
|
token_env: 'NOTION_TOKEN',
|
|
4421
4422
|
databases: databases ? databases.split(',').map((d) => d.trim()) : null,
|
|
4423
|
+
pages: pages ? pages.split(',').map((p) => p.trim().replace(/-/g, '')) : null,
|
|
4422
4424
|
last_pull: null,
|
|
4423
4425
|
};
|
|
4424
4426
|
changed = true;
|
package/package.json
CHANGED