npm - wayfind - Versions diffs - 2.0.35 → 2.0.37 - Mend

wayfind 2.0.35 → 2.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/bin/connectors/notion.js +146 -2
package/bin/content-store.js +108 -0
package/bin/team-context.js +2 -0
package/package.json +1 -1

package/bin/connectors/notion.js CHANGED Viewed

@@ -249,11 +249,23 @@ async function configure() {
     .map((d) => d.trim())
     .filter(Boolean);
+  // Optional: page IDs for full content extraction
+  console.log('');
+  console.log('Optional: specific page IDs to extract full content from (comma-separated).');
+  console.log('These pages will have their body text included in signals, not just metadata.');
+  console.log('Find page IDs in the URL: notion.so/<workspace>/<page-id>');
+  const pageInput = await ask('Page IDs: ');
+  const pages = pageInput
+    .split(',')
+    .map((p) => p.trim().replace(/-/g, ''))
+    .filter(Boolean);
   const channelConfig = {
     transport: 'https',
     token,
     token_env: 'NOTION_TOKEN',
     databases: databases.length > 0 ? databases : null,
+    pages: pages.length > 0 ? pages : null,
     last_pull: null,
   };
@@ -261,8 +273,12 @@ async function configure() {
   console.log('Notion connector configured.');
   if (databases.length > 0) {
     console.log(`Monitoring ${databases.length} database(s).`);
-  } else {
-    console.log('Monitoring all shared pages.');
+  }
+  if (pages.length > 0) {
+    console.log(`Extracting content from ${pages.length} page(s).`);
+  }
+  if (databases.length === 0 && pages.length === 0) {
+    console.log('Monitoring all shared pages (metadata only).');
   }
   console.log('');
@@ -298,6 +314,37 @@ async function pull(config, since) {
     dbEntries.push(...entries.map((e) => ({ ...e, _databaseId: dbId })));
   }
+  // Fetch content for targeted pages
+  const targetedPageIds = config.pages || [];
+  const pageContents = {};
+  if (targetedPageIds.length > 0) {
+    for (const pageId of targetedPageIds) {
+      try {
+        const content = await fetchPageContent(token, pageId);
+        if (content && content.trim()) {
+          pageContents[pageId] = content;
+        }
+      } catch {
+        // Skip pages that fail — may have been deleted or unshared
+      }
+    }
+    // Also fetch targeted pages that aren't in the recent pages list
+    const recentPageIds = new Set(pages.map((p) => p.id.replace(/-/g, '')));
+    for (const pageId of targetedPageIds) {
+      if (!recentPageIds.has(pageId.replace(/-/g, ''))) {
+        try {
+          const endpoint = `/pages/${pageId}`;
+          const page = await notionGet(token, endpoint);
+          if (page && page.id) {
+            pages.push(page);
+          }
+        } catch {
+          // Skip — page may not exist
+        }
+      }
+    }
+  }
   // Fetch comment counts for active pages (top 20 by recency)
   const activePages = pages.slice(0, 20);
   const commentCounts = {};
@@ -314,6 +361,7 @@ async function pull(config, since) {
   // Analyze
   const analysis = analyzeActivity(pages, dbEntries, commentCounts, sinceDate, todayDate, userMap);
+  analysis.pageContents = pageContents;
   // Generate markdown
   const md = generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap);
@@ -496,6 +544,86 @@ async function fetchComments(token, pageId) {
   }
 }
+// ── Page content extraction ────────────────────────────────────────────────
+async function fetchPageContent(token, pageId, maxChars = 5000) {
+  const blocks = [];
+  let cursor = undefined;
+  const MAX_REQUESTS = 5;
+  let requests = 0;
+  while (requests < MAX_REQUESTS) {
+    requests++;
+    const endpoint = `/blocks/${pageId}/children?page_size=100` + (cursor ? `&start_cursor=${cursor}` : '');
+    let response;
+    try {
+      response = await notionGet(token, endpoint);
+    } catch {
+      break;
+    }
+    const results = Array.isArray(response.results) ? response.results : [];
+    blocks.push(...results);
+    if (!response.has_more) break;
+    cursor = response.next_cursor;
+  }
+  // Convert blocks to markdown
+  const lines = [];
+  let totalChars = 0;
+  for (const block of blocks) {
+    if (totalChars >= maxChars) break;
+    const line = blockToMarkdown(block);
+    if (line !== null) {
+      lines.push(line);
+      totalChars += line.length;
+    }
+  }
+  return lines.join('\n');
+}
+function blockToMarkdown(block) {
+  const type = block.type;
+  if (!type) return null;
+  const richTextToPlain = (rt) =>
+    Array.isArray(rt) ? rt.map((t) => t.plain_text || '').join('') : '';
+  const data = block[type];
+  if (!data) return null;
+  switch (type) {
+    case 'paragraph':
+      return richTextToPlain(data.rich_text);
+    case 'heading_1':
+      return '# ' + richTextToPlain(data.rich_text);
+    case 'heading_2':
+      return '## ' + richTextToPlain(data.rich_text);
+    case 'heading_3':
+      return '### ' + richTextToPlain(data.rich_text);
+    case 'bulleted_list_item':
+      return '- ' + richTextToPlain(data.rich_text);
+    case 'numbered_list_item':
+      return '1. ' + richTextToPlain(data.rich_text);
+    case 'to_do':
+      return (data.checked ? '- [x] ' : '- [ ] ') + richTextToPlain(data.rich_text);
+    case 'toggle':
+      return '> ' + richTextToPlain(data.rich_text);
+    case 'callout':
+      return '> ' + richTextToPlain(data.rich_text);
+    case 'quote':
+      return '> ' + richTextToPlain(data.rich_text);
+    case 'code':
+      return '```\n' + richTextToPlain(data.rich_text) + '\n```';
+    case 'divider':
+      return '---';
+    default:
+      // Skip unsupported block types (image, embed, file, etc.)
+      return null;
+  }
+}
 // ── Property extraction ─────────────────────────────────────────────────────
 function extractTitle(page) {
@@ -700,6 +828,22 @@ function generateMarkdown(analysis, sinceDate, todayDate, timestamp, userMap) {
     lines.push('');
   }
+  // Targeted page content
+  const pageContents = analysis.pageContents || {};
+  if (Object.keys(pageContents).length > 0) {
+    lines.push('## Page Content');
+    lines.push('');
+    for (const [pageId, content] of Object.entries(pageContents)) {
+      // Find the page title from the pages list
+      const page = analysis.pages.find((p) => p.id.replace(/-/g, '') === pageId.replace(/-/g, ''));
+      const title = page ? extractTitle(page) : `Page ${pageId.slice(0, 8)}`;
+      lines.push(`### ${sanitizeForMarkdown(title)}`);
+      lines.push('');
+      lines.push(content);
+      lines.push('');
+    }
+  }
   // Summary
   lines.push('## Summary');
   lines.push('');

package/bin/content-store.js CHANGED Viewed

@@ -1852,6 +1852,114 @@ async function indexSignals(options = {}) {
     }
   }
+  // ── Chunk long signal entries for better embedding retrieval ──────────────
+  // Split signal content by ## headings into section-level entries.
+  // Each chunk gets its own embedding so semantic search matches at section level.
+  const MIN_CHUNK_CHARS = 200;
+  const MAX_CHUNK_CHARS = 3000;
+  // Collect all signal files across all channels for chunking
+  const allSignalFiles = [];
+  for (const ch of channels) {
+    const chDir = path.join(signalsDir, ch);
+    try {
+      const entries = fs.readdirSync(chDir, { withFileTypes: true });
+      for (const e of entries) {
+        if (e.isFile() && e.name.endsWith('.md')) {
+          allSignalFiles.push({ filePath: path.join(chDir, e.name), file: e.name, repo: 'signals/' + ch, channel: ch });
+        }
+      }
+      for (const ownerEntry of entries) {
+        if (!ownerEntry.isDirectory()) continue;
+        const ownerDir = path.join(chDir, ownerEntry.name);
+        let repoEntries;
+        try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
+        for (const repoEntry of repoEntries) {
+          if (!repoEntry.isDirectory()) continue;
+          const repoDir = path.join(ownerDir, repoEntry.name);
+          let repoFiles;
+          try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
+          for (const f of repoFiles) {
+            allSignalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: `${ownerEntry.name}/${repoEntry.name}`, channel: ch });
+          }
+        }
+      }
+    } catch { continue; }
+  }
+  for (const { filePath, file, repo, channel: ch } of allSignalFiles) {
+    let content;
+    try {
+      content = fs.readFileSync(filePath, 'utf8');
+    } catch {
+      continue;
+    }
+    if (content.length < MIN_CHUNK_CHARS * 2) continue; // Too short to chunk
+    const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
+    const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
+    const titleMatch = content.match(/^#\s+(.+)$/m);
+    const parentTitle = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
+    const parentId = generateEntryId(date, repo, file.replace(/\.md$/, ''));
+    // Split by ## headings
+    const sections = content.split(/^(?=##\s)/m).filter(s => s.trim().length >= MIN_CHUNK_CHARS);
+    if (sections.length <= 1) continue; // Only one section — parent embedding is sufficient
+    for (let i = 0; i < sections.length; i++) {
+      let section = sections[i];
+      const headingMatch = section.match(/^##\s+(.+)$/m);
+      const sectionTitle = headingMatch ? headingMatch[1].trim() : `Section ${i + 1}`;
+      const chunkTitle = `${parentTitle} — ${sectionTitle}`;
+      if (section.length > MAX_CHUNK_CHARS) {
+        section = section.slice(0, MAX_CHUNK_CHARS);
+      }
+      const chunkId = generateEntryId(date, repo, `chunk-${i}-${file.replace(/\.md$/, '')}`);
+      const chunkHash = contentHash(section);
+      const existingChunk = existingIndex.entries[chunkId];
+      if (existingChunk && existingChunk.contentHash === chunkHash) {
+        if (doEmbeddings && !existingChunk.hasEmbedding) {
+          try {
+            const vec = await llm.generateEmbedding(section);
+            existingEmbeddings[chunkId] = vec;
+            existingChunk.hasEmbedding = true;
+          } catch {
+            // Skip
+          }
+        }
+        continue;
+      }
+      existingIndex.entries[chunkId] = {
+        date,
+        repo,
+        title: chunkTitle,
+        source: 'signal-chunk',
+        parentId,
+        chunkIndex: i,
+        user: '',
+        drifted: false,
+        contentHash: chunkHash,
+        contentLength: section.length,
+        tags: [ch, sectionTitle.toLowerCase()],
+        hasEmbedding: false,
+      };
+      if (doEmbeddings) {
+        try {
+          const vec = await llm.generateEmbedding(section);
+          existingEmbeddings[chunkId] = vec;
+          existingIndex.entries[chunkId].hasEmbedding = true;
+        } catch {
+          // Continue without embedding
+        }
+      }
+    }
+  }
   // Save
   existingIndex.entryCount = Object.keys(existingIndex.entries).length;
   backend.saveIndex(existingIndex);

package/bin/team-context.js CHANGED Viewed

@@ -4414,11 +4414,13 @@ function ensureContainerConfig() {
   // Notion connector
   if (!config.notion && process.env.NOTION_TOKEN) {
     const databases = process.env.TEAM_CONTEXT_NOTION_DATABASES;
+    const pages = process.env.TEAM_CONTEXT_NOTION_PAGES;
     config.notion = {
       transport: 'https',
       token: process.env.NOTION_TOKEN,
       token_env: 'NOTION_TOKEN',
       databases: databases ? databases.split(',').map((d) => d.trim()) : null,
+      pages: pages ? pages.split(',').map((p) => p.trim().replace(/-/g, '')) : null,
       last_pull: null,
     };
     changed = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wayfind",
-  "version": "2.0.35",
+  "version": "2.0.37",
   "description": "Team decision trail for AI-assisted development. The connective tissue between product, engineering, and strategy.",
   "bin": {
     "wayfind": "./bin/team-context.js"