npm - @chatman-media/kb - Versions diffs - 1.3.0 - Mend

@chatman-media/kb 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/LICENSE +21 -0
package/README.md +169 -0
package/dist/ab-router.d.ts +66 -0
package/dist/ab-router.d.ts.map +1 -0
package/dist/answer-types.d.ts +194 -0
package/dist/answer-types.d.ts.map +1 -0
package/dist/answer.d.ts +59 -0
package/dist/answer.d.ts.map +1 -0
package/dist/built-in-tools/calendly.d.ts +19 -0
package/dist/built-in-tools/calendly.d.ts.map +1 -0
package/dist/chunk.d.ts +48 -0
package/dist/chunk.d.ts.map +1 -0
package/dist/conversation-store.d.ts +76 -0
package/dist/conversation-store.d.ts.map +1 -0
package/dist/eval.d.ts +64 -0
package/dist/eval.d.ts.map +1 -0
package/dist/extract-user-facts.d.ts +27 -0
package/dist/extract-user-facts.d.ts.map +1 -0
package/dist/fact-checker.d.ts +46 -0
package/dist/fact-checker.d.ts.map +1 -0
package/dist/grade-skills.d.ts +29 -0
package/dist/grade-skills.d.ts.map +1 -0
package/dist/index.d.ts +76 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +62655 -0
package/dist/ingest.d.ts +49 -0
package/dist/ingest.d.ts.map +1 -0
package/dist/multi-query.d.ts +29 -0
package/dist/multi-query.d.ts.map +1 -0
package/dist/parse-pdf.d.ts +14 -0
package/dist/parse-pdf.d.ts.map +1 -0
package/dist/persona-shortcuts.d.ts +51 -0
package/dist/persona-shortcuts.d.ts.map +1 -0
package/dist/prompt.d.ts +9 -0
package/dist/prompt.d.ts.map +1 -0
package/dist/reflect.d.ts +29 -0
package/dist/reflect.d.ts.map +1 -0
package/dist/reranker.d.ts +71 -0
package/dist/reranker.d.ts.map +1 -0
package/dist/retrieval-utils.d.ts +94 -0
package/dist/retrieval-utils.d.ts.map +1 -0
package/dist/retry.d.ts +53 -0
package/dist/retry.d.ts.map +1 -0
package/dist/rewrite-query.d.ts +30 -0
package/dist/rewrite-query.d.ts.map +1 -0
package/dist/sanitize.d.ts +21 -0
package/dist/sanitize.d.ts.map +1 -0
package/dist/semantic-cache.d.ts +70 -0
package/dist/semantic-cache.d.ts.map +1 -0
package/dist/server.d.ts +77 -0
package/dist/server.d.ts.map +1 -0
package/dist/stores/memory-store.d.ts +72 -0
package/dist/stores/memory-store.d.ts.map +1 -0
package/dist/structured-output.d.ts +21 -0
package/dist/structured-output.d.ts.map +1 -0
package/dist/styles.d.ts +186 -0
package/dist/styles.d.ts.map +1 -0
package/dist/summarize-conversation.d.ts +31 -0
package/dist/summarize-conversation.d.ts.map +1 -0
package/dist/system-prompt.d.ts +11 -0
package/dist/system-prompt.d.ts.map +1 -0
package/dist/text-style-rules.d.ts +133 -0
package/dist/text-style-rules.d.ts.map +1 -0
package/dist/tool-loop.d.ts +44 -0
package/dist/tool-loop.d.ts.map +1 -0
package/dist/tools.d.ts +64 -0
package/dist/tools.d.ts.map +1 -0
package/dist/topic-classifier.d.ts +11 -0
package/dist/topic-classifier.d.ts.map +1 -0
package/dist/types.d.ts +83 -0
package/dist/types.d.ts.map +1 -0
package/dist/utils.d.ts +19 -0
package/dist/utils.d.ts.map +1 -0
package/dist/vision.d.ts +72 -0
package/dist/vision.d.ts.map +1 -0
package/package.json +76 -0
package/src/ab-router.ts +118 -0
package/src/answer-types.ts +191 -0
package/src/answer.ts +696 -0
package/src/built-in-tools/calendly.ts +32 -0
package/src/chunk.ts +198 -0
package/src/conversation-store.ts +138 -0
package/src/eval.ts +127 -0
package/src/extract-user-facts.ts +120 -0
package/src/fact-checker.ts +171 -0
package/src/grade-skills.ts +79 -0
package/src/index.ts +191 -0
package/src/ingest.ts +193 -0
package/src/multi-query.ts +89 -0
package/src/parse-pdf.ts +24 -0
package/src/persona-shortcuts.ts +255 -0
package/src/prompt.ts +190 -0
package/src/reflect.ts +99 -0
package/src/reranker.ts +166 -0
package/src/retrieval-utils.ts +209 -0
package/src/retry.ts +139 -0
package/src/rewrite-query.ts +124 -0
package/src/sanitize.ts +44 -0
package/src/semantic-cache.ts +154 -0
package/src/server.ts +164 -0
package/src/stores/memory-store.ts +249 -0
package/src/structured-output.ts +47 -0
package/src/styles.ts +138 -0
package/src/summarize-conversation.ts +88 -0
package/src/system-prompt.ts +118 -0
package/src/text-style-rules.ts +244 -0
package/src/tool-loop.ts +110 -0
package/src/tools.ts +79 -0
package/src/topic-classifier.ts +112 -0
package/src/types.ts +91 -0
package/src/utils.ts +81 -0
package/src/vision.ts +265 -0

package/src/reranker.ts ADDED Viewed

@@ -0,0 +1,166 @@
+import type { KbSearchHit } from "./types.ts";
+/**
+ * Cross-encoder reranker interface. Called after initial retrieval (vector /
+ * hybrid) to re-score and re-order hits using a more expensive but accurate
+ * relevance model. Optional third stage in the retrieval pipeline.
+ */
+export interface Reranker {
+  rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]>;
+}
+// ── Cohere ────────────────────────────────────────────────────────────────────
+export interface CohereRerankerOptions {
+  apiKey: string;
+  /** Default: "rerank-v3.5" */
+  model?: string;
+  /** Base URL. Default: "https://api.cohere.com/v2" */
+  baseUrl?: string;
+  /** Per-request timeout ms. Default: 30_000. */
+  timeoutMs?: number;
+  fetch?: typeof fetch;
+}
+interface CohereRerankResponse {
+  results?: Array<{ index: number; relevance_score: number }>;
+  message?: string;
+}
+/**
+ * Reranker backed by the Cohere Rerank API.
+ *
+ * @example
+ * ```ts
+ * import { CohereReranker } from "@chatman-media/kb";
+ *
+ * const reranker = new CohereReranker({ apiKey: process.env.COHERE_API_KEY! });
+ * const reranked = await reranker.rerank(question, hits, 5);
+ * ```
+ */
+export class CohereReranker implements Reranker {
+  private readonly apiKey: string;
+  private readonly model: string;
+  private readonly baseUrl: string;
+  private readonly timeoutMs: number;
+  private readonly fetchImpl: typeof fetch;
+  constructor(opts: CohereRerankerOptions) {
+    if (!opts.apiKey) throw new Error("CohereReranker: apiKey required");
+    this.apiKey = opts.apiKey;
+    this.model = opts.model ?? "rerank-v3.5";
+    this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/+$/, "");
+    this.timeoutMs = opts.timeoutMs ?? 30_000;
+    this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
+  }
+  async rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]> {
+    if (hits.length === 0) return hits;
+    const k = topK ?? hits.length;
+    const res = await this.fetchImpl(`${this.baseUrl}/rerank`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+        authorization: `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: this.model,
+        query,
+        documents: hits.map((h) => h.text),
+        top_n: k,
+      }),
+      signal: AbortSignal.timeout(this.timeoutMs),
+    });
+    const payload = (await res.json()) as CohereRerankResponse;
+    if (!res.ok || !payload.results) {
+      throw new Error(`CohereReranker: ${payload.message ?? `HTTP ${res.status}`}`);
+    }
+    return payload.results.slice(0, k).flatMap(({ index, relevance_score }) => {
+      const hit = hits[index];
+      if (!hit) return [];
+      // Remap to distance convention: lower = more relevant
+      return [{ ...hit, distance: 1 - relevance_score }];
+    });
+  }
+}
+// ── Jina ──────────────────────────────────────────────────────────────────────
+export interface JinaRerankerOptions {
+  apiKey: string;
+  /** Default: "jina-reranker-v2-base-multilingual" */
+  model?: string;
+  /** Base URL. Default: "https://api.jina.ai/v1" */
+  baseUrl?: string;
+  /** Per-request timeout ms. Default: 30_000. */
+  timeoutMs?: number;
+  fetch?: typeof fetch;
+}
+interface JinaRerankResponse {
+  results?: Array<{ index: number; relevance_score: number }>;
+  detail?: string;
+}
+/**
+ * Reranker backed by the Jina Reranker API.
+ * The default model is multilingual — works well for Russian and Chinese KB.
+ *
+ * @example
+ * ```ts
+ * import { JinaReranker } from "@chatman-media/kb";
+ *
+ * const reranker = new JinaReranker({ apiKey: process.env.JINA_API_KEY! });
+ * const reranked = await reranker.rerank(question, hits, 5);
+ * ```
+ */
+export class JinaReranker implements Reranker {
+  private readonly apiKey: string;
+  private readonly model: string;
+  private readonly baseUrl: string;
+  private readonly timeoutMs: number;
+  private readonly fetchImpl: typeof fetch;
+  constructor(opts: JinaRerankerOptions) {
+    if (!opts.apiKey) throw new Error("JinaReranker: apiKey required");
+    this.apiKey = opts.apiKey;
+    this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
+    this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/+$/, "");
+    this.timeoutMs = opts.timeoutMs ?? 30_000;
+    this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
+  }
+  async rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]> {
+    if (hits.length === 0) return hits;
+    const k = topK ?? hits.length;
+    const res = await this.fetchImpl(`${this.baseUrl}/rerank`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+        authorization: `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: this.model,
+        query,
+        documents: hits.map((h) => h.text),
+        top_n: k,
+      }),
+      signal: AbortSignal.timeout(this.timeoutMs),
+    });
+    const payload = (await res.json()) as JinaRerankResponse;
+    if (!res.ok || !payload.results) {
+      throw new Error(`JinaReranker: ${payload.detail ?? `HTTP ${res.status}`}`);
+    }
+    return payload.results.slice(0, k).flatMap(({ index, relevance_score }) => {
+      const hit = hits[index];
+      if (!hit) return [];
+      return [{ ...hit, distance: 1 - relevance_score }];
+    });
+  }
+}

package/src/retrieval-utils.ts ADDED Viewed

@@ -0,0 +1,209 @@
+/**
+ * Post-retrieval utilities for the RAG pipeline.
+ *
+ * Three independent transforms, applied in order:
+ *
+ * 0. **`rrfMerge`** — merge multiple hit-lists (from multi-query expansion) via
+ *    Reciprocal Rank Fusion. Deduplicates by chunk_id, boosts chunks that rank
+ *    high across multiple queries.
+ *
+ * 1. **`applyDynamicThreshold`** — trim hits that are "too far" from the query.
+ *    Prevents hallucination-inducing weak matches from polluting the context.
+ *
+ * 2. **`mmrDiversify`** — re-rank via Maximal Marginal Relevance so that the
+ *    final context window covers diverse sub-topics rather than repeating the
+ *    same dense cluster of near-duplicate chunks.
+ *
+ * All functions are pure (no I/O) and operate on the {@link KbSearchHit} array
+ * that comes back from `IKbStore.search` / `IKbStore.hybridSearch`.
+ */
+import type { KbSearchHit } from "./types.ts";
+// ── RRF merge ────────────────────────────────────────────────────────────────
+export interface RrfMergeOpts {
+  /**
+   * RRF smoothing constant. Higher = more weight to lower-ranked items.
+   * Standard value: 60.
+   */
+  k?: number;
+  /**
+   * Maximum number of hits to return. Defaults to all unique hits.
+   */
+  topN?: number;
+}
+/**
+ * Reciprocal Rank Fusion — merge multiple retrieval result lists into one.
+ *
+ * Each hit's score is the sum of `1 / (k + rank)` across all lists where it
+ * appears (1-based rank). Hits that appear in multiple lists get boosted.
+ * Deduplication is by `chunk_id`.
+ *
+ * The output uses the distance convention (lower = better):
+ * `distance = 1 / (1 + rrf_score)` so values stay in (0, 1].
+ *
+ * @param hitLists  One list per query, each sorted best-first.
+ */
+export function rrfMerge(hitLists: KbSearchHit[][], opts: RrfMergeOpts = {}): KbSearchHit[] {
+  const { k = 60, topN } = opts;
+  if (hitLists.length === 0) return [];
+  if (hitLists.length === 1) return topN ? hitLists[0]!.slice(0, topN) : hitLists[0]!;
+  // chunk_id → { hit, rrf_score }
+  const scores = new Map<number, { hit: KbSearchHit; score: number }>();
+  for (const list of hitLists) {
+    for (let i = 0; i < list.length; i++) {
+      const hit = list[i]!;
+      const rank = i + 1; // 1-based
+      const contribution = 1 / (k + rank);
+      const existing = scores.get(hit.chunk_id);
+      if (existing) {
+        existing.score += contribution;
+      } else {
+        scores.set(hit.chunk_id, { hit, score: contribution });
+      }
+    }
+  }
+  const sorted = [...scores.values()].sort((a, b) => b.score - a.score);
+  const result = sorted.map(({ hit, score }) => ({
+    ...hit,
+    // Map to distance: higher RRF score → lower distance
+    distance: 1 / (1 + score),
+  }));
+  return topN ? result.slice(0, topN) : result;
+}
+// ── Dynamic threshold ────────────────────────────────────────────────────────
+export interface DynamicThresholdOpts {
+  /**
+   * Drop hits whose `distance` exceeds this value.
+   * Cosine distance is in [0, 2]; typical "useful" range is ≤ 0.4.
+   * Default: 0.45.
+   */
+  threshold?: number;
+  /**
+   * Always keep at least this many hits even if they all exceed the threshold.
+   * Prevents the context from going completely empty.
+   * Default: 1.
+   */
+  minHits?: number;
+}
+/**
+ * Trim hits that exceed a distance threshold, keeping at least `minHits`.
+ *
+ * When the best match is already weak (high distance), the whole batch is
+ * likely unhelpful — cap it so the model isn't given noise.
+ */
+export function applyDynamicThreshold(
+  hits: KbSearchHit[],
+  opts: DynamicThresholdOpts = {},
+): KbSearchHit[] {
+  const { threshold = 0.45, minHits = 1 } = opts;
+  if (hits.length === 0) return hits;
+  const filtered = hits.filter((h) => h.distance <= threshold);
+  return filtered.length >= minHits ? filtered : hits.slice(0, minHits);
+}
+// ── MMR diversification ──────────────────────────────────────────────────────
+export interface MmrOpts {
+  /**
+   * Trade-off between relevance and diversity.
+   * - 1.0 → pure relevance (same as original ranking)
+   * - 0.0 → pure diversity (greedy maximum coverage)
+   * Default: 0.6.
+   */
+  lambda?: number;
+  /**
+   * Maximum number of hits to return after diversification.
+   * Defaults to the full input length.
+   */
+  topK?: number;
+}
+/**
+ * Maximal Marginal Relevance re-ranking.
+ *
+ * Iteratively selects the next chunk that maximises:
+ *   `score = λ * relevance - (1 - λ) * max_similarity_to_already_selected`
+ *
+ * Relevance is derived from the search distance (lower distance = higher
+ * relevance). Inter-chunk similarity is approximated with **Jaccard overlap on
+ * trigrams** — cheap, no extra embedder call required, and surprisingly
+ * effective at detecting paraphrase duplicates.
+ *
+ * @param hits  Sorted by relevance (best first), as returned by the store.
+ */
+export function mmrDiversify(hits: KbSearchHit[], opts: MmrOpts = {}): KbSearchHit[] {
+  const { lambda = 0.6, topK } = opts;
+  const k = Math.min(topK ?? hits.length, hits.length);
+  if (k <= 1 || hits.length <= 1) return hits.slice(0, k);
+  // Pre-compute trigram sets for each hit (character trigrams on lowercased text).
+  const trigramSets = hits.map((h) => trigrams(h.text));
+  // Normalise distances to relevance scores in [0, 1].
+  // Lower distance = higher relevance.  Shift by max so the worst hit = 0.
+  const maxDist = Math.max(...hits.map((h) => h.distance));
+  const relevance = hits.map((h) => (maxDist > 0 ? 1 - h.distance / maxDist : 1));
+  const selected: number[] = []; // indices into `hits`
+  const remaining = new Set(hits.map((_, i) => i));
+  while (selected.length < k && remaining.size > 0) {
+    let bestIdx = -1;
+    let bestScore = -Infinity;
+    for (const idx of remaining) {
+      const rel = relevance[idx]!;
+      let maxSim = 0;
+      for (const selIdx of selected) {
+        const sim = jaccardSimilarity(trigramSets[idx]!, trigramSets[selIdx]!);
+        if (sim > maxSim) maxSim = sim;
+      }
+      const score = lambda * rel - (1 - lambda) * maxSim;
+      if (score > bestScore) {
+        bestScore = score;
+        bestIdx = idx;
+      }
+    }
+    if (bestIdx === -1) break;
+    selected.push(bestIdx);
+    remaining.delete(bestIdx);
+  }
+  return selected.map((i) => hits[i]!);
+}
+// ── Helpers ──────────────────────────────────────────────────────────────────
+/** Build a Set of character trigrams from a string. */
+function trigrams(text: string): Set<string> {
+  const s = text.toLowerCase().replace(/\s+/g, " ").slice(0, 500); // cap for perf
+  const result = new Set<string>();
+  for (let i = 0; i + 2 < s.length; i++) {
+    result.add(s.slice(i, i + 3));
+  }
+  return result;
+}
+/** Jaccard similarity between two sets. */
+function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
+  if (a.size === 0 && b.size === 0) return 1;
+  if (a.size === 0 || b.size === 0) return 0;
+  let intersection = 0;
+  for (const item of a) {
+    if (b.has(item)) intersection++;
+  }
+  const union = a.size + b.size - intersection;
+  return union === 0 ? 0 : intersection / union;
+}

package/src/retry.ts ADDED Viewed

@@ -0,0 +1,139 @@
+import {
+  ChatApiError,
+  type ChatClient,
+  type ChatCompletionOpts,
+  type ChatMessage,
+} from "@chatman-media/llm-router";
+import { EmbeddingApiError, type EmbeddingClient } from "@chatman-media/llm-router";
+export interface RetryOptions {
+  /**
+   * Maximum number of attempts (including the first). Default: 3.
+   */
+  maxAttempts?: number;
+  /**
+   * Initial backoff in ms before the second attempt. Doubles on each retry.
+   * Default: 500.
+   */
+  initialDelayMs?: number;
+  /**
+   * Cap on backoff delay in ms. Default: 30_000.
+   */
+  maxDelayMs?: number;
+  /**
+   * HTTP status codes that should trigger a retry.
+   * Default: [429, 500, 502, 503, 504].
+   */
+  retryOn?: number[];
+}
+const DEFAULT_RETRY_ON = [429, 500, 502, 503, 504];
+async function withRetry<T>(fn: () => Promise<T>, opts: Required<RetryOptions>): Promise<T> {
+  let lastErr: unknown;
+  for (let attempt = 1; attempt <= opts.maxAttempts; attempt++) {
+    try {
+      return await fn();
+    } catch (err) {
+      lastErr = err;
+      const isRetryable =
+        (err instanceof ChatApiError || err instanceof EmbeddingApiError) &&
+        opts.retryOn.includes(err.statusCode);
+      if (!isRetryable || attempt === opts.maxAttempts) break;
+      const delay = Math.min(
+        opts.initialDelayMs * 2 ** (attempt - 1) + Math.random() * 100,
+        opts.maxDelayMs,
+      );
+      await sleep(delay);
+    }
+  }
+  throw lastErr;
+}
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+function resolveOpts(opts: RetryOptions): Required<RetryOptions> {
+  return {
+    maxAttempts: opts.maxAttempts ?? 3,
+    initialDelayMs: opts.initialDelayMs ?? 500,
+    maxDelayMs: opts.maxDelayMs ?? 30_000,
+    retryOn: opts.retryOn ?? DEFAULT_RETRY_ON,
+  };
+}
+/**
+ * Wraps any `ChatClient` with automatic retry + exponential backoff.
+ *
+ * Retries on transient HTTP errors (429, 5xx) with jittered exponential
+ * backoff. Non-retryable errors (4xx other than 429) propagate immediately.
+ *
+ * @example
+ * ```ts
+ * import { withRetryChatClient, OpenAIChatClient } from "@chatman-media/kb";
+ *
+ * const chat = withRetryChatClient(
+ *   new OpenAIChatClient({ apiKey, baseUrl, model }),
+ *   { maxAttempts: 4, initialDelayMs: 1000 },
+ * );
+ * ```
+ */
+export function withRetryChatClient(client: ChatClient, opts: RetryOptions = {}): ChatClient {
+  const resolved = resolveOpts(opts);
+  const wrapped: ChatClient = {
+    complete(messages: ChatMessage[], completionOpts?: ChatCompletionOpts): Promise<string> {
+      return withRetry(() => client.complete(messages, completionOpts), resolved);
+    },
+  };
+  if (typeof client.stream === "function") {
+    const originalStream = client.stream.bind(client);
+    wrapped.stream = async function* (
+      messages: ChatMessage[],
+      completionOpts?: ChatCompletionOpts,
+    ): AsyncIterable<string> {
+      // For streaming we only retry before the stream starts — once tokens
+      // begin flowing we can't rewind. Wrap the generator creation in retry.
+      let iter: AsyncIterable<string> | undefined;
+      await withRetry(async () => {
+        iter = originalStream(messages, completionOpts);
+        // Eagerly check by starting the iterator — if the HTTP request itself
+        // fails synchronously (before first yield), the error is retryable.
+      }, resolved);
+      if (iter) yield* iter;
+    };
+  }
+  return wrapped;
+}
+/**
+ * Wraps any `EmbeddingClient` with automatic retry + exponential backoff.
+ *
+ * @example
+ * ```ts
+ * import { withRetryEmbeddingClient, OpenAIEmbeddingClient } from "@chatman-media/kb";
+ *
+ * const embedder = withRetryEmbeddingClient(
+ *   new OpenAIEmbeddingClient({ apiKey, baseUrl, model, dim: 1536 }),
+ * );
+ * ```
+ */
+export function withRetryEmbeddingClient(
+  client: EmbeddingClient,
+  opts: RetryOptions = {},
+): EmbeddingClient {
+  const resolved = resolveOpts(opts);
+  return {
+    get dim() {
+      return client.dim;
+    },
+    embed(inputs: string[]): Promise<number[][]> {
+      return withRetry(() => client.embed(inputs), resolved);
+    },
+  };
+}

package/src/rewrite-query.ts ADDED Viewed

@@ -0,0 +1,124 @@
+import type { ChatClient, ChatMessage } from "@chatman-media/llm-router";
+import { stripThinkBlocks } from "./sanitize.ts";
+/**
+ * Rewrites a user question into a search-friendly query using recent
+ * conversation history. Resolves pronouns ("это", "там", "то"), expands
+ * elliptical follow-ups ("а сколько платят?" → "сколько платят моделям в
+ * Дубае"), and folds in named entities from prior turns.
+ *
+ * Why this matters: vector search on the raw user message misses precision
+ * on follow-ups because embeddings of "а в дубае?" sit nowhere near the
+ * actual KB chunks about Dubai contracts. Rewriting bridges that gap.
+ */
+export interface RewriteQueryInput {
+  question: string;
+  /** Recent dialog (oldest first), excluding the current question. */
+  history?: ChatMessage[];
+  chat: ChatClient;
+  /** Cap output length to avoid the model writing essays. Default 200 chars. */
+  maxLength?: number;
+}
+const SYSTEM_PROMPT = `Ты переформулируешь вопрос кандидата в самостоятельный поисковый запрос для базы знаний.
+Правила:
+1. Раскрывай местоимения и эллипсисы по контексту истории ("а там?" → "а в Дубае какие условия?")
+2. Сохраняй ВСЕ ключевые сущности из вопроса и недавней истории (страна, город, сумма, тема)
+3. Никаких вступлений, никакого markdown — ТОЛЬКО переформулированный запрос одной строкой
+4. Если вопрос и так самостоятельный и ясный — верни его без изменений
+5. Если вопрос вообще не про работу/услуги/факты (только смолток типа "привет"/"как дела") — верни его как есть
+6. Не отвечай на вопрос, не давай советов — только переформулируй
+Примеры:
+история: ассистент: в дубае платят 1500 в день, контракт 30 дней
+вопрос: а в стамбуле?
+ответ: какие условия и оплата в стамбуле
+история: ассистент: контракты бывают на 30, 60 и 90 дней
+вопрос: а виза как?
+ответ: как оформляется виза для работы по контракту
+вопрос: сколько платят моделям в дубае?
+ответ: сколько платят моделям в дубае?`;
+/**
+ * Heuristic: skip rewriting when the question is already self-contained.
+ * Saves an LLM call (and thus latency + $) on the majority of inbound
+ * messages which are full standalone questions, not follow-ups.
+ */
+export function questionNeedsRewrite(question: string, history?: ChatMessage[]): boolean {
+  const trimmed = question.trim();
+  if (!trimmed) return false;
+  // No history → no pronouns to resolve, no ellipsis to expand. Even ambiguous
+  // single-word messages can't be rewritten meaningfully without context.
+  if (!history || history.length === 0) return false;
+  // Very short (likely follow-up) or contains common deictic markers
+  // pointing back to prior turns.
+  const wordCount = trimmed.split(/\s+/).length;
+  if (wordCount <= 4) return true;
+  // JS `\b` is ASCII-only — silently fails on Cyrillic. Use Unicode-property
+  // boundaries instead, same trick as stage-router.ts and elsewhere.
+  const deictic =
+    /(?<![\p{L}\p{N}])(это|этот|эта|эти|тот|та|те|там|туда|оттуда|такой|такая|такие|тогда|оно|он|она|они)(?![\p{L}\p{N}])/iu;
+  if (deictic.test(trimmed)) return true;
+  // Starts with a follow-up conjunction.
+  const followUp = /^(а|и|но|или|ещё|еще|тоже)(?![\p{L}\p{N}])/iu;
+  if (followUp.test(trimmed)) return true;
+  return false;
+}
+export async function rewriteQuery(input: RewriteQueryInput): Promise<string> {
+  const original = input.question.trim();
+  if (!original) return original;
+  // Skip work when there's nothing to disambiguate. Saves ~80% of LLM calls
+  // in production on full-question messages (per typical chat distribution).
+  if (!questionNeedsRewrite(original, input.history)) return original;
+  // Compose a compact history snippet — only the last 6 messages, otherwise
+  // we feed the whole conversation into a "rewrite" call which defeats the
+  // latency goal.
+  const tail = (input.history ?? []).slice(-6);
+  const historyText = tail.map((m) => `${m.role}: ${m.content}`).join("\n");
+  const userPrompt =
+    historyText.length > 0
+      ? `история:\n${historyText}\n\nвопрос: ${original}\nответ:`
+      : `вопрос: ${original}\nответ:`;
+  const messages: ChatMessage[] = [
+    { role: "system", content: SYSTEM_PROMPT },
+    { role: "user", content: userPrompt },
+  ];
+  let raw: string;
+  try {
+    raw = await input.chat.complete(messages, { temperature: 0.1 });
+  } catch (err) {
+    console.error("[rewrite-query] LLM call failed; using original:", err);
+    return original;
+  }
+  return sanitizeRewritten(raw, original, input.maxLength ?? 200);
+}
+/** Strips think-tags, "ответ:" prefixes, markdown, line breaks. Falls back
+ *  to original on empty/garbage output. Exported for unit tests. */
+export function sanitizeRewritten(raw: string, fallback: string, maxLength: number): string {
+  let s = stripThinkBlocks(raw);
+  s = s.replace(/```[\s\S]*?```/g, "");
+  s = s.replace(/^\s*(ответ|answer)\s*[:\-—]\s*/i, "");
+  // Take first non-empty line — the model occasionally adds explanations after.
+  const firstLine = s
+    .split("\n")
+    .map((l) => l.trim())
+    .find((l) => l.length > 0);
+  if (!firstLine) return fallback;
+  const trimmed = firstLine.length > maxLength ? firstLine.slice(0, maxLength) : firstLine;
+  return trimmed || fallback;
+}

package/src/sanitize.ts ADDED Viewed

@@ -0,0 +1,44 @@
+import { applyStyleRules } from "./text-style-rules.ts";
+// Hoisted regexes — module-level so they compile once instead of on every
+// hot-path LLM-response cleanup. Used both here and in the various
+// extractor / verifier wrappers (reflect, rewrite-query, fact-checker,
+// vacancy-guard, summarize-conversation, extract-user-facts).
+const THINK_BLOCK_PAIRED = /<think\b[^>]*>[\s\S]*?<\/think>/gi;
+const THINK_BLOCK_LEADING_UNCLOSED = /^\s*<think\b[^>]*>[\s\S]*$/i;
+const CODE_FENCE = /```(?:json)?/gi;
+const LEADING_LABEL = /^\s*(?:answer|ответ|reply|response|согласно\s+контексту)\s*[:\-—]\s*/i;
+/**
+ * Strip `<think>…</think>` reasoning blocks some chat models emit despite
+ * system instructions (qwen3, deepseek-r1 style). Both well-formed
+ * paired and an unclosed leading block are handled.
+ */
+export function stripThinkBlocks(raw: string): string {
+  return raw.replace(THINK_BLOCK_PAIRED, "").replace(THINK_BLOCK_LEADING_UNCLOSED, "");
+}
+/** Strip markdown code fences (` ``` ` and ` ```json `). Useful when an LLM
+ *  wraps its JSON answer in a fenced block despite "only JSON" instructions. */
+export function stripCodeFences(raw: string): string {
+  return raw.replace(CODE_FENCE, "");
+}
+/**
+ * Strip artifacts some chat models emit despite system instructions:
+ * - `<think>…</think>` reasoning blocks (qwen3, deepseek-r1 style).
+ * - leading "Answer:" / "Ответ:" / "Согласно контексту" prefixes.
+ * - surrounding whitespace.
+ * - "AI tells" — em-/en-dashes, unicode ellipsis, "Конечно!" lead-ins
+ *   (see `text-style-rules.ts` for the full list).
+ *
+ * Exported for unit tests.
+ */
+export function sanitizeLlmOutput(raw: string): string {
+  let s = stripThinkBlocks(raw);
+  s = s.replace(LEADING_LABEL, "");
+  // Apply pluggable text-style rules (em-dash → hyphen, ellipsis → ..., etc).
+  // See src/rag/text-style-rules.ts to add new rules without touching this file.
+  s = applyStyleRules(s);
+  return s.trim();
+}