npm - jamdesk - Versions diffs - 1.1.28 → 1.1.29 - Mend

jamdesk 1.1.28 → 1.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +3 -1
package/package.json +1 -1
package/vendored/app/api/chat/[project]/route.ts +160 -133
package/vendored/components/chat/ChatEmptyState.tsx +13 -1
package/vendored/components/chat/ChatPanel.tsx +43 -9
package/vendored/lib/chat-prompt.ts +67 -29
package/vendored/lib/chat-tools.ts +111 -0
package/vendored/lib/crisp-bridge.ts +91 -0
package/vendored/lib/embedding-chunker.ts +85 -11
package/vendored/lib/query-rewriter.ts +91 -0
package/vendored/lib/vector-store.ts +5 -3
package/vendored/workspace-package-lock.json +73 -73

package/vendored/lib/chat-tools.ts ADDED Viewed

@@ -0,0 +1,111 @@
+/**
+ * Tool schemas for the AI documentation chat.
+ *
+ * Forces Claude into structured output: it must call exactly one of
+ * `answer` (for direct answers) or `ask_clarification` (when the query
+ * matches 2-3 distinct topics in the retrieved context).
+ *
+ * Replaces the regex-based `extractCitations` and `extractClarificationOptions`
+ * parsers. Citations become the `cited_page_slugs[]` field on the answer tool;
+ * clarification options become the `options[]` field on the clarification tool.
+ */
+import type Anthropic from '@anthropic-ai/sdk';
+// Stricter local shapes so tests + consumers get typed access to nested
+// schema fields. The SDK types `input_schema.properties` as `unknown | null`,
+// which would force `any` casts at every access site. These local types are
+// structurally assignable to `Anthropic.Tool`, so they remain valid SDK input.
+interface AnswerToolSchema {
+  name: 'answer';
+  description: string;
+  input_schema: {
+    type: 'object';
+    properties: {
+      markdown: { type: 'string'; description: string };
+      cited_page_slugs: {
+        type: 'array';
+        items: { type: 'string' };
+        description: string;
+      };
+    };
+    required: ['markdown', 'cited_page_slugs'];
+  };
+}
+interface ClarificationToolSchema {
+  name: 'ask_clarification';
+  description: string;
+  input_schema: {
+    type: 'object';
+    properties: {
+      question: { type: 'string'; description: string };
+      options: {
+        type: 'array';
+        items: { type: 'string' };
+        minItems: 2;
+        maxItems: 3;
+        description: string;
+      };
+    };
+    required: ['question', 'options'];
+  };
+}
+export const ANSWER_TOOL: AnswerToolSchema = {
+  name: 'answer',
+  description:
+    'Provide a direct answer to the user\'s question using the documentation context. ' +
+    'Always populate cited_page_slugs with the pageSlug of every source you referenced. ' +
+    'If the context does not contain an answer, still call this tool and say "I don\'t have information about that in the documentation." with cited_page_slugs: [].',
+  input_schema: {
+    type: 'object',
+    properties: {
+      markdown: {
+        type: 'string',
+        description:
+          'The answer in markdown. Use code blocks with language hints when showing code. ' +
+          'Do not embed citation text like "[Page Title]" — citations are listed in cited_page_slugs.',
+      },
+      cited_page_slugs: {
+        type: 'array',
+        items: { type: 'string' },
+        description:
+          'The pageSlug values (e.g. "getting-started", "api/auth") for every documentation source you referenced. ' +
+          'Empty array if the answer is "I don\'t have information about that".',
+      },
+    },
+    required: ['markdown', 'cited_page_slugs'],
+  },
+};
+export const CLARIFICATION_TOOL: ClarificationToolSchema = {
+  name: 'ask_clarification',
+  description:
+    'Ask the user to choose between 2-3 distinct topics when their question is ambiguous. ' +
+    'Only use when the documentation context contains multiple unrelated features that could match ' +
+    '(e.g. "Post Analytics" vs "Link Analytics" when asked about "analytics"). ' +
+    'The question must end with a question mark.',
+  input_schema: {
+    type: 'object',
+    properties: {
+      question: {
+        type: 'string',
+        description: 'A short disambiguation question ending with "?". E.g. "Which type of analytics are you asking about?"',
+      },
+      // minItems/maxItems are advisory to the model — Anthropic does NOT
+      // enforce them server-side. The chat route must defensively validate
+      // options.length before rendering a clarification UI.
+      options: {
+        type: 'array',
+        items: { type: 'string' },
+        minItems: 2,
+        maxItems: 3,
+        description: 'The distinct topics the user can choose between. Short labels like "Post Analytics", "Link Analytics".',
+      },
+    },
+    required: ['question', 'options'],
+  },
+};
+// Exported as Anthropic.Tool[] so it passes directly to client.messages.stream({ tools: CHAT_TOOLS }).
+export const CHAT_TOOLS: Anthropic.Tool[] = [ANSWER_TOOL, CLARIFICATION_TOOL];

package/vendored/lib/crisp-bridge.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Bridge to the Crisp chat widget's command queue API.
+ *
+ * Coordinates visibility between our AI chat panel and Crisp's launcher
+ * so they don't visually collide in the bottom-right corner of docs sites.
+ *
+ * Crisp's docs: https://help.crisp.chat/en/article/how-to-use-crisp-from-javascript-api-1xhvaxt/
+ */
+type CrispAction = 'chat:close' | 'chat:hide' | 'chat:show' | 'chat:open';
+type CrispCommand = ['do', CrispAction];
+interface CrispQueue {
+  push(command: CrispCommand): void;
+}
+function getCrisp(): CrispQueue | null {
+  if (typeof window === 'undefined') return null;
+  const q = (window as unknown as { $crisp?: unknown }).$crisp;
+  if (q && typeof (q as { push?: unknown }).push === 'function') {
+    return q as CrispQueue;
+  }
+  return null;
+}
+export function crispAvailable(): boolean {
+  return getCrisp() !== null;
+}
+const RETRY_INTERVAL_MS = 500;
+const RETRY_MAX_ATTEMPTS = 30;
+let pendingHideTimer: ReturnType<typeof setInterval> | null = null;
+function clearPendingHideRetry(): void {
+  if (pendingHideTimer !== null) {
+    clearInterval(pendingHideTimer);
+    pendingHideTimer = null;
+  }
+}
+function applyHide(crisp: CrispQueue): void {
+  // chat:close must come before chat:hide. Crisp can auto-reshow the
+  // launcher after a session reset, so closing any open session first
+  // reduces the race window.
+  crisp.push(['do', 'chat:close']);
+  crisp.push(['do', 'chat:hide']);
+}
+export function hideCrispLauncher(): void {
+  if (typeof window === 'undefined') return;
+  clearPendingHideRetry();
+  const crisp = getCrisp();
+  if (crisp) {
+    applyHide(crisp);
+    return;
+  }
+  let attempts = 0;
+  pendingHideTimer = setInterval(() => {
+    attempts += 1;
+    const q = getCrisp();
+    if (q) {
+      applyHide(q);
+      clearPendingHideRetry();
+      return;
+    }
+    if (attempts >= RETRY_MAX_ATTEMPTS) {
+      clearPendingHideRetry();
+    }
+  }, RETRY_INTERVAL_MS);
+}
+export function showCrispLauncher(): void {
+  clearPendingHideRetry();
+  const crisp = getCrisp();
+  if (!crisp) return;
+  crisp.push(['do', 'chat:show']);
+}
+export function openCrispChat(): void {
+  clearPendingHideRetry();
+  const crisp = getCrisp();
+  if (!crisp) return;
+  crisp.push(['do', 'chat:show']);
+  crisp.push(['do', 'chat:open']);
+}
+// Test-only: clears module-level retry state between cases.
+export function _resetForTests(): void {
+  clearPendingHideRetry();
+}

package/vendored/lib/embedding-chunker.ts CHANGED Viewed

@@ -14,8 +14,15 @@ export interface EmbeddingChunk {
   pageSlug: string;
   /** Heading of the section this chunk belongs to */
   sectionHeading: string;
-  /** Plain text content, stripped of markdown and JSX */
+  /** Plain-text body, stripped of markdown and JSX. Used for display (LLM context, search snippets). */
   content: string;
+  /**
+   * `<pageTitle> > <sectionHeading>\n` breadcrumb (plus `API Reference — METHOD /path\n`
+   * for API pages) that gets prepended to `content` when embedding — so BM25 finds
+   * chunks by page-title terms even when the body never uses them. Kept as a
+   * separate field so snippet consumers don't have to strip it.
+   */
+  prefix: string;
   /** Page title from frontmatter, or slug-derived fallback */
   pageTitle: string;
 }
@@ -111,18 +118,82 @@ function titleFromSlug(slug: string): string {
 }
 /**
- * Detect if a page is an API reference page and return a prefix label.
- * API pages get a prefix like "API Reference — POST /post\n" so the
- * embedding model clusters them distinctly from guides/tutorials.
+ * Build the per-chunk prefix that gets prepended to the cleaned content
+ * before embedding/upsert. Two purposes:
+ *
+ * 1. Title-breadcrumb: `<pageTitle> > <sectionHeading>` gives every chunk
+ *    a literal occurrence of its page title in the indexed text — so BM25
+ *    finds e.g. the `Changelog > April 2026` chunk for a "changelog" query
+ *    even when the month's content (Password Protection, YouTube Shorts)
+ *    never mentions the word "changelog" itself. Without this, pages that
+ *    happen to discuss a concept frequently (like `components/update`
+ *    documenting the `<Update>` MDX tag) outrank the actual answer chunks.
+ *
+ * 2. API-method tag: API-reference pages additionally get an
+ *    `API Reference — POST /endpoint` line so HTTP-method-specific queries
+ *    ("how do I POST to /analytics") cluster to the right endpoint page.
+ *
+ * Both lines end in `\n` so they stay visually separated from the content
+ * body when the chat model reads the full context block.
  */
 function getEmbeddingPrefix(
+  pageTitle: string,
+  sectionHeading: string,
   slug: string,
   frontmatter: Record<string, unknown>,
 ): string {
   const apiMethod = (frontmatter.api as string) || (frontmatter.openapi as string);
-  if (apiMethod) return `API Reference — ${apiMethod}\n`;
-  if (slug.startsWith('apis/')) return 'API Reference\n';
-  return '';
+  const apiLabel = apiMethod
+    ? `API Reference — ${apiMethod}\n`
+    : slug.startsWith('apis/') ? 'API Reference\n' : '';
+  const titleLabel = sectionHeading ? `${pageTitle} > ${sectionHeading}` : pageTitle;
+  return `${apiLabel}${titleLabel}\n`;
+}
+/**
+ * Without this, changelog-style pages that use `<Update>` wrappers (a
+ * Mintlify convention) collapse into a single heading-less blob — features
+ * in April get merged with features in March, and retrieval can't target
+ * a specific release. Promoting each label to a synthetic `### <label>`
+ * heading lets the existing heading splitter separate them.
+ *
+ * Matches `label` regardless of attribute order, handles self-closing tags,
+ * and leaves Update tags without a `label` attribute alone for
+ * `stripForEmbedding` to handle.
+ *
+ * Fenced code blocks are masked so Update tags inside ``` ... ``` examples
+ * (e.g. `components/update.mdx` pages that document the component itself)
+ * don't produce spurious chunks. `[^><]*?` (not `[^>]*?`) prevents a
+ * malformed opener (missing `>`) from greedily matching through to the
+ * next `</Update>` and silently eating body text.
+ */
+function preprocessUpdateBlocks(content: string): string {
+  const preserved: string[] = [];
+  const masked = content.replace(/```[\s\S]*?```/g, (m) => {
+    preserved.push(m);
+    return `\x00${preserved.length - 1}\x00`;
+  });
+  const transformed = masked
+    .replace(
+      /<Update\b[^><]*?\blabel="([^"]+)"[^><]*?\/?>/g,
+      (_, label: string) => {
+        const clean = sanitizeHeadingText(label);
+        return clean ? `\n### ${clean}\n` : '\n';
+      },
+    )
+    .replace(/<\/Update>/g, '\n');
+  return transformed.replace(/\x00(\d+)\x00/g, (_, i) => preserved[parseInt(i)]);
+}
+/**
+ * Collapse whitespace and drop leading `#` runs so a synthetic heading
+ * derived from a label (or a page title flowing into the breadcrumb) can't
+ * inject extra markdown heading depth or span multiple lines.
+ */
+function sanitizeHeadingText(raw: string): string {
+  return raw.replace(/^#+\s*/, '').replace(/\s+/g, ' ').trim();
 }
 /**
@@ -137,11 +208,11 @@ export function chunkPageForEmbedding(
   maxChars = 2000,
 ): EmbeddingChunk[] {
   const slug = page.path.replace(/\.mdx?$/, '').replace(/\\/g, '/');
-  const pageTitle = (page.frontmatter.title as string) || titleFromSlug(slug);
-  const embeddingPrefix = getEmbeddingPrefix(slug, page.frontmatter);
+  const rawTitle = (page.frontmatter.title as string) || titleFromSlug(slug);
+  const pageTitle = sanitizeHeadingText(rawTitle) || titleFromSlug(slug);
   // Normalize Windows line endings before extracting sections
-  const normalizedContent = page.content.replace(/\r\n/g, '\n');
+  const normalizedContent = preprocessUpdateBlocks(page.content.replace(/\r\n/g, '\n'));
   const sections = extractSections(normalizedContent);
   const chunks: EmbeddingChunk[] = [];
@@ -156,13 +227,16 @@ export function chunkPageForEmbedding(
       ? [cleanContent]
       : splitAtSentenceBoundaries(cleanContent, maxChars);
+    const prefix = getEmbeddingPrefix(pageTitle, section.heading, slug, page.frontmatter);
     for (const piece of pieces) {
       if (!piece.trim()) continue;
       chunks.push({
         id: `${slug}#${chunkIndex}`,
         pageSlug: slug,
         sectionHeading: section.heading,
-        content: embeddingPrefix + piece,
+        content: piece,
+        prefix,
         pageTitle,
       });
       chunkIndex++;

package/vendored/lib/query-rewriter.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Lightweight query rewriter for AI chat retrieval.
+ *
+ * Translates vague/natural-language user queries into doc-search vocabulary
+ * using Claude Haiku. Runs in parallel with the original query in the chat
+ * route, so latency is masked — if the rewrite is slow or fails, the caller
+ * falls back to the original query without penalty.
+ *
+ * Design notes:
+ * - Returns null on ANY failure path (no client, API error, empty response).
+ *   Callers must handle null by using the original query.
+ * - max_tokens is capped at 80 — rewrites are 3-10 words typically.
+ * - Conversation history is included so follow-ups like "what about the other
+ *   one" can be disambiguated against the prior user message.
+ */
+import { getAnthropicClient } from '@/lib/anthropic-client';
+const REWRITE_MODEL = 'claude-haiku-4-5-20251001';
+const MAX_REWRITE_CHARS = 200;
+export const SYSTEM_PROMPT = `You rewrite a user's chat question into a short documentation search query.
+Rules:
+- Output ONLY the rewritten query — no quotes, no explanation, no prefix.
+- Use terminology that would appear in technical documentation.
+- Keep it short (3-10 words is ideal).
+- If the user's question already uses technical vocabulary, output it unchanged.
+- If conversation history is provided, use it to disambiguate references like "the other one" or "that".
+Examples:
+Q: how do I make my docs live → deploy documentation site
+Q: change colors → theme customization
+Q: can it do auth → authentication setup
+Q: what about the other one (after discussing analytics) → link analytics
+Q: what is the most recent feature → changelog latest updates new features
+Q: what's new → changelog new features updates
+Q: latest release → changelog release notes updates
+Q: any updates recently → changelog recent updates new features`;
+export interface HistoryMessage {
+  role: 'user' | 'assistant';
+  content: string;
+}
+/**
+ * Rewrite a user query into doc-search vocabulary.
+ * Returns null on any failure — caller should fall back to the original query.
+ */
+export async function rewriteQueryForSearch(
+  message: string,
+  history: HistoryMessage[],
+): Promise<string | null> {
+  const anthropic = getAnthropicClient();
+  if (!anthropic) return null;
+  // Compose the user prompt: include the last user message from history (if any)
+  // so follow-up references resolve. Ignore assistant replies — they'd contaminate
+  // the rewrite with doc-flavored phrasing that might mask the user's real intent.
+  const priorUserMsg = [...history].reverse().find(
+    h => h.role === 'user' && h.content !== message,
+  );
+  const userPrompt = priorUserMsg
+    ? `Previous question: ${priorUserMsg.content}\nCurrent question: ${message}\n\nRewrite the current question:`
+    : `Question: ${message}\n\nRewrite:`;
+  try {
+    const response = await anthropic.messages.create({
+      model: REWRITE_MODEL,
+      max_tokens: 80,
+      temperature: 0.1,
+      system: SYSTEM_PROMPT,
+      messages: [{ role: 'user', content: userPrompt }],
+    });
+    const textBlock = response.content.find(c => c.type === 'text');
+    if (!textBlock || textBlock.type !== 'text') return null;
+    let rewrite = textBlock.text.trim();
+    // Strip surrounding quotes if Claude wrapped the output
+    rewrite = rewrite.replace(/^["'`]+|["'`]+$/g, '').trim();
+    if (!rewrite) return null;
+    // Defensive cap — prevents a runaway response from bloating the search query
+    return rewrite.slice(0, MAX_REWRITE_CHARS);
+  } catch {
+    return null;
+  }
+}

package/vendored/lib/vector-store.ts CHANGED Viewed

@@ -44,8 +44,8 @@ const HYBRID_QUERY_OPTS = {
  */
 const MIN_SCORE = 0.3;
-/** Max chunks per page — ensures diverse results across pages */
-const MAX_CHUNKS_PER_PAGE = 3;
+/** Max chunks per page — raised from 3 to 4 to match broader topK retrieval budget */
+const MAX_CHUNKS_PER_PAGE = 4;
 /** Create a namespaced Upstash Vector index for a project. */
 function getNamespace(projectId: string) {
@@ -80,7 +80,9 @@ export async function upsertChunks(
     await ns.upsert(
       batch.map(c => ({
         id: c.id,
-        data: c.content,
+        // Prefix + body goes to Upstash for embedding/BM25; metadata.content
+        // stays prefix-free so consumers display clean body text.
+        data: c.prefix + c.content,
         metadata: {
           pageSlug: c.pageSlug,
           sectionHeading: c.sectionHeading,