wayfind 2.0.35 → 2.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -249,11 +249,23 @@ async function configure() {
249
249
  .map((d) => d.trim())
250
250
  .filter(Boolean);
251
251
 
252
+ // Optional: page IDs for full content extraction
253
+ console.log('');
254
+ console.log('Optional: specific page IDs to extract full content from (comma-separated).');
255
+ console.log('These pages will have their body text included in signals, not just metadata.');
256
+ console.log('Find page IDs in the URL: notion.so/<workspace>/<page-id>');
257
+ const pageInput = await ask('Page IDs: ');
258
+ const pages = pageInput
259
+ .split(',')
260
+ .map((p) => p.trim().replace(/-/g, ''))
261
+ .filter(Boolean);
262
+
252
263
  const channelConfig = {
253
264
  transport: 'https',
254
265
  token,
255
266
  token_env: 'NOTION_TOKEN',
256
267
  databases: databases.length > 0 ? databases : null,
268
+ pages: pages.length > 0 ? pages : null,
257
269
  last_pull: null,
258
270
  };
259
271
 
@@ -261,8 +273,12 @@ async function configure() {
261
273
  console.log('Notion connector configured.');
262
274
  if (databases.length > 0) {
263
275
  console.log(`Monitoring ${databases.length} database(s).`);
264
- } else {
265
- console.log('Monitoring all shared pages.');
276
+ }
277
+ if (pages.length > 0) {
278
+ console.log(`Extracting content from ${pages.length} page(s).`);
279
+ }
280
+ if (databases.length === 0 && pages.length === 0) {
281
+ console.log('Monitoring all shared pages (metadata only).');
266
282
  }
267
283
  console.log('');
268
284
 
@@ -298,6 +314,37 @@ async function pull(config, since) {
298
314
  dbEntries.push(...entries.map((e) => ({ ...e, _databaseId: dbId })));
299
315
  }
300
316
 
317
+ // Fetch content for targeted pages
318
+ const targetedPageIds = config.pages || [];
319
+ const pageContents = {};
320
+ if (targetedPageIds.length > 0) {
321
+ for (const pageId of targetedPageIds) {
322
+ try {
323
+ const content = await fetchPageContent(token, pageId);
324
+ if (content && content.trim()) {
325
+ pageContents[pageId] = content;
326
+ }
327
+ } catch {
328
+ // Skip pages that fail — may have been deleted or unshared
329
+ }
330
+ }
331
+ // Also fetch targeted pages that aren't in the recent pages list
332
+ const recentPageIds = new Set(pages.map((p) => p.id.replace(/-/g, '')));
333
+ for (const pageId of targetedPageIds) {
334
+ if (!recentPageIds.has(pageId.replace(/-/g, ''))) {
335
+ try {
336
+ const endpoint = `/pages/${pageId}`;
337
+ const page = await notionGet(token, endpoint);
338
+ if (page && page.id) {
339
+ pages.push(page);
340
+ }
341
+ } catch {
342
+ // Skip — page may not exist
343
+ }
344
+ }
345
+ }
346
+ }
347
+
301
348
  // Fetch comment counts for active pages (top 20 by recency)
302
349
  const activePages = pages.slice(0, 20);
303
350
  const commentCounts = {};
@@ -314,6 +361,7 @@ async function pull(config, since) {
314
361
 
315
362
  // Analyze
316
363
  const analysis = analyzeActivity(pages, dbEntries, commentCounts, sinceDate, todayDate, userMap);
364
+ analysis.pageContents = pageContents;
317
365
 
318
366
  // Generate markdown
319
367
  const md = generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap);
@@ -496,6 +544,86 @@ async function fetchComments(token, pageId) {
496
544
  }
497
545
  }
498
546
 
547
+ // ── Page content extraction ────────────────────────────────────────────────
548
+
549
+ async function fetchPageContent(token, pageId, maxChars = 5000) {
550
+ const blocks = [];
551
+ let cursor = undefined;
552
+ const MAX_REQUESTS = 5;
553
+ let requests = 0;
554
+
555
+ while (requests < MAX_REQUESTS) {
556
+ requests++;
557
+ const endpoint = `/blocks/${pageId}/children?page_size=100` + (cursor ? `&start_cursor=${cursor}` : '');
558
+ let response;
559
+ try {
560
+ response = await notionGet(token, endpoint);
561
+ } catch {
562
+ break;
563
+ }
564
+ const results = Array.isArray(response.results) ? response.results : [];
565
+ blocks.push(...results);
566
+ if (!response.has_more) break;
567
+ cursor = response.next_cursor;
568
+ }
569
+
570
+ // Convert blocks to markdown
571
+ const lines = [];
572
+ let totalChars = 0;
573
+
574
+ for (const block of blocks) {
575
+ if (totalChars >= maxChars) break;
576
+ const line = blockToMarkdown(block);
577
+ if (line !== null) {
578
+ lines.push(line);
579
+ totalChars += line.length;
580
+ }
581
+ }
582
+
583
+ return lines.join('\n');
584
+ }
585
+
586
+ function blockToMarkdown(block) {
587
+ const type = block.type;
588
+ if (!type) return null;
589
+
590
+ const richTextToPlain = (rt) =>
591
+ Array.isArray(rt) ? rt.map((t) => t.plain_text || '').join('') : '';
592
+
593
+ const data = block[type];
594
+ if (!data) return null;
595
+
596
+ switch (type) {
597
+ case 'paragraph':
598
+ return richTextToPlain(data.rich_text);
599
+ case 'heading_1':
600
+ return '# ' + richTextToPlain(data.rich_text);
601
+ case 'heading_2':
602
+ return '## ' + richTextToPlain(data.rich_text);
603
+ case 'heading_3':
604
+ return '### ' + richTextToPlain(data.rich_text);
605
+ case 'bulleted_list_item':
606
+ return '- ' + richTextToPlain(data.rich_text);
607
+ case 'numbered_list_item':
608
+ return '1. ' + richTextToPlain(data.rich_text);
609
+ case 'to_do':
610
+ return (data.checked ? '- [x] ' : '- [ ] ') + richTextToPlain(data.rich_text);
611
+ case 'toggle':
612
+ return '> ' + richTextToPlain(data.rich_text);
613
+ case 'callout':
614
+ return '> ' + richTextToPlain(data.rich_text);
615
+ case 'quote':
616
+ return '> ' + richTextToPlain(data.rich_text);
617
+ case 'code':
618
+ return '```\n' + richTextToPlain(data.rich_text) + '\n```';
619
+ case 'divider':
620
+ return '---';
621
+ default:
622
+ // Skip unsupported block types (image, embed, file, etc.)
623
+ return null;
624
+ }
625
+ }
626
+
499
627
  // ── Property extraction ─────────────────────────────────────────────────────
500
628
 
501
629
  function extractTitle(page) {
@@ -700,6 +828,22 @@ function generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap) {
700
828
  lines.push('');
701
829
  }
702
830
 
831
+ // Targeted page content
832
+ const pageContents = analysis.pageContents || {};
833
+ if (Object.keys(pageContents).length > 0) {
834
+ lines.push('## Page Content');
835
+ lines.push('');
836
+ for (const [pageId, content] of Object.entries(pageContents)) {
837
+ // Find the page title from the pages list
838
+ const page = analysis.pages.find((p) => p.id.replace(/-/g, '') === pageId.replace(/-/g, ''));
839
+ const title = page ? extractTitle(page) : `Page ${pageId.slice(0, 8)}`;
840
+ lines.push(`### ${sanitizeForMarkdown(title)}`);
841
+ lines.push('');
842
+ lines.push(content);
843
+ lines.push('');
844
+ }
845
+ }
846
+
703
847
  // Summary
704
848
  lines.push('## Summary');
705
849
  lines.push('');
@@ -1852,6 +1852,114 @@ async function indexSignals(options = {}) {
1852
1852
  }
1853
1853
  }
1854
1854
 
1855
+ // ── Chunk long signal entries for better embedding retrieval ──────────────
1856
+ // Split signal content by ## headings into section-level entries.
1857
+ // Each chunk gets its own embedding so semantic search matches at section level.
1858
+ const MIN_CHUNK_CHARS = 200;
1859
+ const MAX_CHUNK_CHARS = 3000;
1860
+
1861
+ // Collect all signal files across all channels for chunking
1862
+ const allSignalFiles = [];
1863
+ for (const ch of channels) {
1864
+ const chDir = path.join(signalsDir, ch);
1865
+ try {
1866
+ const entries = fs.readdirSync(chDir, { withFileTypes: true });
1867
+ for (const e of entries) {
1868
+ if (e.isFile() && e.name.endsWith('.md')) {
1869
+ allSignalFiles.push({ filePath: path.join(chDir, e.name), file: e.name, repo: 'signals/' + ch, channel: ch });
1870
+ }
1871
+ }
1872
+ for (const ownerEntry of entries) {
1873
+ if (!ownerEntry.isDirectory()) continue;
1874
+ const ownerDir = path.join(chDir, ownerEntry.name);
1875
+ let repoEntries;
1876
+ try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
1877
+ for (const repoEntry of repoEntries) {
1878
+ if (!repoEntry.isDirectory()) continue;
1879
+ const repoDir = path.join(ownerDir, repoEntry.name);
1880
+ let repoFiles;
1881
+ try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
1882
+ for (const f of repoFiles) {
1883
+ allSignalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: `${ownerEntry.name}/${repoEntry.name}`, channel: ch });
1884
+ }
1885
+ }
1886
+ }
1887
+ } catch { continue; }
1888
+ }
1889
+
1890
+ for (const { filePath, file, repo, channel: ch } of allSignalFiles) {
1891
+ let content;
1892
+ try {
1893
+ content = fs.readFileSync(filePath, 'utf8');
1894
+ } catch {
1895
+ continue;
1896
+ }
1897
+ if (content.length < MIN_CHUNK_CHARS * 2) continue; // Too short to chunk
1898
+
1899
+ const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
1900
+ const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
1901
+ const titleMatch = content.match(/^#\s+(.+)$/m);
1902
+ const parentTitle = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
1903
+ const parentId = generateEntryId(date, repo, file.replace(/\.md$/, ''));
1904
+
1905
+ // Split by ## headings
1906
+ const sections = content.split(/^(?=##\s)/m).filter(s => s.trim().length >= MIN_CHUNK_CHARS);
1907
+ if (sections.length <= 1) continue; // Only one section — parent embedding is sufficient
1908
+
1909
+ for (let i = 0; i < sections.length; i++) {
1910
+ let section = sections[i];
1911
+ const headingMatch = section.match(/^##\s+(.+)$/m);
1912
+ const sectionTitle = headingMatch ? headingMatch[1].trim() : `Section ${i + 1}`;
1913
+ const chunkTitle = `${parentTitle} — ${sectionTitle}`;
1914
+
1915
+ if (section.length > MAX_CHUNK_CHARS) {
1916
+ section = section.slice(0, MAX_CHUNK_CHARS);
1917
+ }
1918
+
1919
+ const chunkId = generateEntryId(date, repo, `chunk-${i}-${file.replace(/\.md$/, '')}`);
1920
+ const chunkHash = contentHash(section);
1921
+ const existingChunk = existingIndex.entries[chunkId];
1922
+
1923
+ if (existingChunk && existingChunk.contentHash === chunkHash) {
1924
+ if (doEmbeddings && !existingChunk.hasEmbedding) {
1925
+ try {
1926
+ const vec = await llm.generateEmbedding(section);
1927
+ existingEmbeddings[chunkId] = vec;
1928
+ existingChunk.hasEmbedding = true;
1929
+ } catch {
1930
+ // Skip
1931
+ }
1932
+ }
1933
+ continue;
1934
+ }
1935
+
1936
+ existingIndex.entries[chunkId] = {
1937
+ date,
1938
+ repo,
1939
+ title: chunkTitle,
1940
+ source: 'signal-chunk',
1941
+ parentId,
1942
+ chunkIndex: i,
1943
+ user: '',
1944
+ drifted: false,
1945
+ contentHash: chunkHash,
1946
+ contentLength: section.length,
1947
+ tags: [ch, sectionTitle.toLowerCase()],
1948
+ hasEmbedding: false,
1949
+ };
1950
+
1951
+ if (doEmbeddings) {
1952
+ try {
1953
+ const vec = await llm.generateEmbedding(section);
1954
+ existingEmbeddings[chunkId] = vec;
1955
+ existingIndex.entries[chunkId].hasEmbedding = true;
1956
+ } catch {
1957
+ // Continue without embedding
1958
+ }
1959
+ }
1960
+ }
1961
+ }
1962
+
1855
1963
  // Save
1856
1964
  existingIndex.entryCount = Object.keys(existingIndex.entries).length;
1857
1965
  backend.saveIndex(existingIndex);
@@ -4414,11 +4414,13 @@ function ensureContainerConfig() {
4414
4414
  // Notion connector
4415
4415
  if (!config.notion && process.env.NOTION_TOKEN) {
4416
4416
  const databases = process.env.TEAM_CONTEXT_NOTION_DATABASES;
4417
+ const pages = process.env.TEAM_CONTEXT_NOTION_PAGES;
4417
4418
  config.notion = {
4418
4419
  transport: 'https',
4419
4420
  token: process.env.NOTION_TOKEN,
4420
4421
  token_env: 'NOTION_TOKEN',
4421
4422
  databases: databases ? databases.split(',').map((d) => d.trim()) : null,
4423
+ pages: pages ? pages.split(',').map((p) => p.trim().replace(/-/g, '')) : null,
4422
4424
  last_pull: null,
4423
4425
  };
4424
4426
  changed = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wayfind",
3
- "version": "2.0.35",
3
+ "version": "2.0.37",
4
4
  "description": "Team decision trail for AI-assisted development. The connective tissue between product, engineering, and strategy.",
5
5
  "bin": {
6
6
  "wayfind": "./bin/team-context.js"