@chatman-media/kb 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +169 -0
  3. package/dist/ab-router.d.ts +66 -0
  4. package/dist/ab-router.d.ts.map +1 -0
  5. package/dist/answer-types.d.ts +194 -0
  6. package/dist/answer-types.d.ts.map +1 -0
  7. package/dist/answer.d.ts +59 -0
  8. package/dist/answer.d.ts.map +1 -0
  9. package/dist/built-in-tools/calendly.d.ts +19 -0
  10. package/dist/built-in-tools/calendly.d.ts.map +1 -0
  11. package/dist/chunk.d.ts +48 -0
  12. package/dist/chunk.d.ts.map +1 -0
  13. package/dist/conversation-store.d.ts +76 -0
  14. package/dist/conversation-store.d.ts.map +1 -0
  15. package/dist/eval.d.ts +64 -0
  16. package/dist/eval.d.ts.map +1 -0
  17. package/dist/extract-user-facts.d.ts +27 -0
  18. package/dist/extract-user-facts.d.ts.map +1 -0
  19. package/dist/fact-checker.d.ts +46 -0
  20. package/dist/fact-checker.d.ts.map +1 -0
  21. package/dist/grade-skills.d.ts +29 -0
  22. package/dist/grade-skills.d.ts.map +1 -0
  23. package/dist/index.d.ts +76 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +62655 -0
  26. package/dist/ingest.d.ts +49 -0
  27. package/dist/ingest.d.ts.map +1 -0
  28. package/dist/multi-query.d.ts +29 -0
  29. package/dist/multi-query.d.ts.map +1 -0
  30. package/dist/parse-pdf.d.ts +14 -0
  31. package/dist/parse-pdf.d.ts.map +1 -0
  32. package/dist/persona-shortcuts.d.ts +51 -0
  33. package/dist/persona-shortcuts.d.ts.map +1 -0
  34. package/dist/prompt.d.ts +9 -0
  35. package/dist/prompt.d.ts.map +1 -0
  36. package/dist/reflect.d.ts +29 -0
  37. package/dist/reflect.d.ts.map +1 -0
  38. package/dist/reranker.d.ts +71 -0
  39. package/dist/reranker.d.ts.map +1 -0
  40. package/dist/retrieval-utils.d.ts +94 -0
  41. package/dist/retrieval-utils.d.ts.map +1 -0
  42. package/dist/retry.d.ts +53 -0
  43. package/dist/retry.d.ts.map +1 -0
  44. package/dist/rewrite-query.d.ts +30 -0
  45. package/dist/rewrite-query.d.ts.map +1 -0
  46. package/dist/sanitize.d.ts +21 -0
  47. package/dist/sanitize.d.ts.map +1 -0
  48. package/dist/semantic-cache.d.ts +70 -0
  49. package/dist/semantic-cache.d.ts.map +1 -0
  50. package/dist/server.d.ts +77 -0
  51. package/dist/server.d.ts.map +1 -0
  52. package/dist/stores/memory-store.d.ts +72 -0
  53. package/dist/stores/memory-store.d.ts.map +1 -0
  54. package/dist/structured-output.d.ts +21 -0
  55. package/dist/structured-output.d.ts.map +1 -0
  56. package/dist/styles.d.ts +186 -0
  57. package/dist/styles.d.ts.map +1 -0
  58. package/dist/summarize-conversation.d.ts +31 -0
  59. package/dist/summarize-conversation.d.ts.map +1 -0
  60. package/dist/system-prompt.d.ts +11 -0
  61. package/dist/system-prompt.d.ts.map +1 -0
  62. package/dist/text-style-rules.d.ts +133 -0
  63. package/dist/text-style-rules.d.ts.map +1 -0
  64. package/dist/tool-loop.d.ts +44 -0
  65. package/dist/tool-loop.d.ts.map +1 -0
  66. package/dist/tools.d.ts +64 -0
  67. package/dist/tools.d.ts.map +1 -0
  68. package/dist/topic-classifier.d.ts +11 -0
  69. package/dist/topic-classifier.d.ts.map +1 -0
  70. package/dist/types.d.ts +83 -0
  71. package/dist/types.d.ts.map +1 -0
  72. package/dist/utils.d.ts +19 -0
  73. package/dist/utils.d.ts.map +1 -0
  74. package/dist/vision.d.ts +72 -0
  75. package/dist/vision.d.ts.map +1 -0
  76. package/package.json +76 -0
  77. package/src/ab-router.ts +118 -0
  78. package/src/answer-types.ts +191 -0
  79. package/src/answer.ts +696 -0
  80. package/src/built-in-tools/calendly.ts +32 -0
  81. package/src/chunk.ts +198 -0
  82. package/src/conversation-store.ts +138 -0
  83. package/src/eval.ts +127 -0
  84. package/src/extract-user-facts.ts +120 -0
  85. package/src/fact-checker.ts +171 -0
  86. package/src/grade-skills.ts +79 -0
  87. package/src/index.ts +191 -0
  88. package/src/ingest.ts +193 -0
  89. package/src/multi-query.ts +89 -0
  90. package/src/parse-pdf.ts +24 -0
  91. package/src/persona-shortcuts.ts +255 -0
  92. package/src/prompt.ts +190 -0
  93. package/src/reflect.ts +99 -0
  94. package/src/reranker.ts +166 -0
  95. package/src/retrieval-utils.ts +209 -0
  96. package/src/retry.ts +139 -0
  97. package/src/rewrite-query.ts +124 -0
  98. package/src/sanitize.ts +44 -0
  99. package/src/semantic-cache.ts +154 -0
  100. package/src/server.ts +164 -0
  101. package/src/stores/memory-store.ts +249 -0
  102. package/src/structured-output.ts +47 -0
  103. package/src/styles.ts +138 -0
  104. package/src/summarize-conversation.ts +88 -0
  105. package/src/system-prompt.ts +118 -0
  106. package/src/text-style-rules.ts +244 -0
  107. package/src/tool-loop.ts +110 -0
  108. package/src/tools.ts +79 -0
  109. package/src/topic-classifier.ts +112 -0
  110. package/src/types.ts +91 -0
  111. package/src/utils.ts +81 -0
  112. package/src/vision.ts +265 -0
@@ -0,0 +1,166 @@
1
+ import type { KbSearchHit } from "./types.ts";
2
+
3
+ /**
4
+ * Cross-encoder reranker interface. Called after initial retrieval (vector /
5
+ * hybrid) to re-score and re-order hits using a more expensive but accurate
6
+ * relevance model. Optional third stage in the retrieval pipeline.
7
+ */
8
+ export interface Reranker {
9
+ rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]>;
10
+ }
11
+
12
+ // ── Cohere ────────────────────────────────────────────────────────────────────
13
+
14
+ export interface CohereRerankerOptions {
15
+ apiKey: string;
16
+ /** Default: "rerank-v3.5" */
17
+ model?: string;
18
+ /** Base URL. Default: "https://api.cohere.com/v2" */
19
+ baseUrl?: string;
20
+ /** Per-request timeout ms. Default: 30_000. */
21
+ timeoutMs?: number;
22
+ fetch?: typeof fetch;
23
+ }
24
+
25
+ interface CohereRerankResponse {
26
+ results?: Array<{ index: number; relevance_score: number }>;
27
+ message?: string;
28
+ }
29
+
30
+ /**
31
+ * Reranker backed by the Cohere Rerank API.
32
+ *
33
+ * @example
34
+ * ```ts
35
+ * import { CohereReranker } from "@chatman-media/kb";
36
+ *
37
+ * const reranker = new CohereReranker({ apiKey: process.env.COHERE_API_KEY! });
38
+ * const reranked = await reranker.rerank(question, hits, 5);
39
+ * ```
40
+ */
41
+ export class CohereReranker implements Reranker {
42
+ private readonly apiKey: string;
43
+ private readonly model: string;
44
+ private readonly baseUrl: string;
45
+ private readonly timeoutMs: number;
46
+ private readonly fetchImpl: typeof fetch;
47
+
48
+ constructor(opts: CohereRerankerOptions) {
49
+ if (!opts.apiKey) throw new Error("CohereReranker: apiKey required");
50
+ this.apiKey = opts.apiKey;
51
+ this.model = opts.model ?? "rerank-v3.5";
52
+ this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/+$/, "");
53
+ this.timeoutMs = opts.timeoutMs ?? 30_000;
54
+ this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
55
+ }
56
+
57
+ async rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]> {
58
+ if (hits.length === 0) return hits;
59
+ const k = topK ?? hits.length;
60
+
61
+ const res = await this.fetchImpl(`${this.baseUrl}/rerank`, {
62
+ method: "POST",
63
+ headers: {
64
+ "content-type": "application/json",
65
+ authorization: `Bearer ${this.apiKey}`,
66
+ },
67
+ body: JSON.stringify({
68
+ model: this.model,
69
+ query,
70
+ documents: hits.map((h) => h.text),
71
+ top_n: k,
72
+ }),
73
+ signal: AbortSignal.timeout(this.timeoutMs),
74
+ });
75
+
76
+ const payload = (await res.json()) as CohereRerankResponse;
77
+ if (!res.ok || !payload.results) {
78
+ throw new Error(`CohereReranker: ${payload.message ?? `HTTP ${res.status}`}`);
79
+ }
80
+
81
+ return payload.results.slice(0, k).flatMap(({ index, relevance_score }) => {
82
+ const hit = hits[index];
83
+ if (!hit) return [];
84
+ // Remap to distance convention: lower = more relevant
85
+ return [{ ...hit, distance: 1 - relevance_score }];
86
+ });
87
+ }
88
+ }
89
+
90
+ // ── Jina ──────────────────────────────────────────────────────────────────────
91
+
92
+ export interface JinaRerankerOptions {
93
+ apiKey: string;
94
+ /** Default: "jina-reranker-v2-base-multilingual" */
95
+ model?: string;
96
+ /** Base URL. Default: "https://api.jina.ai/v1" */
97
+ baseUrl?: string;
98
+ /** Per-request timeout ms. Default: 30_000. */
99
+ timeoutMs?: number;
100
+ fetch?: typeof fetch;
101
+ }
102
+
103
+ interface JinaRerankResponse {
104
+ results?: Array<{ index: number; relevance_score: number }>;
105
+ detail?: string;
106
+ }
107
+
108
+ /**
109
+ * Reranker backed by the Jina Reranker API.
110
+ * The default model is multilingual — works well for Russian and Chinese KB.
111
+ *
112
+ * @example
113
+ * ```ts
114
+ * import { JinaReranker } from "@chatman-media/kb";
115
+ *
116
+ * const reranker = new JinaReranker({ apiKey: process.env.JINA_API_KEY! });
117
+ * const reranked = await reranker.rerank(question, hits, 5);
118
+ * ```
119
+ */
120
+ export class JinaReranker implements Reranker {
121
+ private readonly apiKey: string;
122
+ private readonly model: string;
123
+ private readonly baseUrl: string;
124
+ private readonly timeoutMs: number;
125
+ private readonly fetchImpl: typeof fetch;
126
+
127
+ constructor(opts: JinaRerankerOptions) {
128
+ if (!opts.apiKey) throw new Error("JinaReranker: apiKey required");
129
+ this.apiKey = opts.apiKey;
130
+ this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
131
+ this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/+$/, "");
132
+ this.timeoutMs = opts.timeoutMs ?? 30_000;
133
+ this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
134
+ }
135
+
136
+ async rerank(query: string, hits: KbSearchHit[], topK?: number): Promise<KbSearchHit[]> {
137
+ if (hits.length === 0) return hits;
138
+ const k = topK ?? hits.length;
139
+
140
+ const res = await this.fetchImpl(`${this.baseUrl}/rerank`, {
141
+ method: "POST",
142
+ headers: {
143
+ "content-type": "application/json",
144
+ authorization: `Bearer ${this.apiKey}`,
145
+ },
146
+ body: JSON.stringify({
147
+ model: this.model,
148
+ query,
149
+ documents: hits.map((h) => h.text),
150
+ top_n: k,
151
+ }),
152
+ signal: AbortSignal.timeout(this.timeoutMs),
153
+ });
154
+
155
+ const payload = (await res.json()) as JinaRerankResponse;
156
+ if (!res.ok || !payload.results) {
157
+ throw new Error(`JinaReranker: ${payload.detail ?? `HTTP ${res.status}`}`);
158
+ }
159
+
160
+ return payload.results.slice(0, k).flatMap(({ index, relevance_score }) => {
161
+ const hit = hits[index];
162
+ if (!hit) return [];
163
+ return [{ ...hit, distance: 1 - relevance_score }];
164
+ });
165
+ }
166
+ }
@@ -0,0 +1,209 @@
1
+ /**
2
+ * Post-retrieval utilities for the RAG pipeline.
3
+ *
4
+ * Three independent transforms, applied in order:
5
+ *
6
+ * 0. **`rrfMerge`** — merge multiple hit-lists (from multi-query expansion) via
7
+ * Reciprocal Rank Fusion. Deduplicates by chunk_id, boosts chunks that rank
8
+ * high across multiple queries.
9
+ *
10
+ * 1. **`applyDynamicThreshold`** — trim hits that are "too far" from the query.
11
+ * Prevents hallucination-inducing weak matches from polluting the context.
12
+ *
13
+ * 2. **`mmrDiversify`** — re-rank via Maximal Marginal Relevance so that the
14
+ * final context window covers diverse sub-topics rather than repeating the
15
+ * same dense cluster of near-duplicate chunks.
16
+ *
17
+ * All functions are pure (no I/O) and operate on the {@link KbSearchHit} array
18
+ * that comes back from `IKbStore.search` / `IKbStore.hybridSearch`.
19
+ */
20
+
21
+ import type { KbSearchHit } from "./types.ts";
22
+
23
+ // ── RRF merge ────────────────────────────────────────────────────────────────
24
+
25
+ export interface RrfMergeOpts {
26
+ /**
27
+ * RRF smoothing constant. Higher = more weight to lower-ranked items.
28
+ * Standard value: 60.
29
+ */
30
+ k?: number;
31
+ /**
32
+ * Maximum number of hits to return. Defaults to all unique hits.
33
+ */
34
+ topN?: number;
35
+ }
36
+
37
+ /**
38
+ * Reciprocal Rank Fusion — merge multiple retrieval result lists into one.
39
+ *
40
+ * Each hit's score is the sum of `1 / (k + rank)` across all lists where it
41
+ * appears (1-based rank). Hits that appear in multiple lists get boosted.
42
+ * Deduplication is by `chunk_id`.
43
+ *
44
+ * The output uses the distance convention (lower = better):
45
+ * `distance = 1 / (1 + rrf_score)` so values stay in (0, 1].
46
+ *
47
+ * @param hitLists One list per query, each sorted best-first.
48
+ */
49
+ export function rrfMerge(hitLists: KbSearchHit[][], opts: RrfMergeOpts = {}): KbSearchHit[] {
50
+ const { k = 60, topN } = opts;
51
+ if (hitLists.length === 0) return [];
52
+ if (hitLists.length === 1) return topN ? hitLists[0]!.slice(0, topN) : hitLists[0]!;
53
+
54
+ // chunk_id → { hit, rrf_score }
55
+ const scores = new Map<number, { hit: KbSearchHit; score: number }>();
56
+
57
+ for (const list of hitLists) {
58
+ for (let i = 0; i < list.length; i++) {
59
+ const hit = list[i]!;
60
+ const rank = i + 1; // 1-based
61
+ const contribution = 1 / (k + rank);
62
+ const existing = scores.get(hit.chunk_id);
63
+ if (existing) {
64
+ existing.score += contribution;
65
+ } else {
66
+ scores.set(hit.chunk_id, { hit, score: contribution });
67
+ }
68
+ }
69
+ }
70
+
71
+ const sorted = [...scores.values()].sort((a, b) => b.score - a.score);
72
+ const result = sorted.map(({ hit, score }) => ({
73
+ ...hit,
74
+ // Map to distance: higher RRF score → lower distance
75
+ distance: 1 / (1 + score),
76
+ }));
77
+
78
+ return topN ? result.slice(0, topN) : result;
79
+ }
80
+
81
+ // ── Dynamic threshold ────────────────────────────────────────────────────────
82
+
83
+ export interface DynamicThresholdOpts {
84
+ /**
85
+ * Drop hits whose `distance` exceeds this value.
86
+ * Cosine distance is in [0, 2]; typical "useful" range is ≤ 0.4.
87
+ * Default: 0.45.
88
+ */
89
+ threshold?: number;
90
+ /**
91
+ * Always keep at least this many hits even if they all exceed the threshold.
92
+ * Prevents the context from going completely empty.
93
+ * Default: 1.
94
+ */
95
+ minHits?: number;
96
+ }
97
+
98
+ /**
99
+ * Trim hits that exceed a distance threshold, keeping at least `minHits`.
100
+ *
101
+ * When the best match is already weak (high distance), the whole batch is
102
+ * likely unhelpful — cap it so the model isn't given noise.
103
+ */
104
+ export function applyDynamicThreshold(
105
+ hits: KbSearchHit[],
106
+ opts: DynamicThresholdOpts = {},
107
+ ): KbSearchHit[] {
108
+ const { threshold = 0.45, minHits = 1 } = opts;
109
+ if (hits.length === 0) return hits;
110
+
111
+ const filtered = hits.filter((h) => h.distance <= threshold);
112
+ return filtered.length >= minHits ? filtered : hits.slice(0, minHits);
113
+ }
114
+
115
+ // ── MMR diversification ──────────────────────────────────────────────────────
116
+
117
+ export interface MmrOpts {
118
+ /**
119
+ * Trade-off between relevance and diversity.
120
+ * - 1.0 → pure relevance (same as original ranking)
121
+ * - 0.0 → pure diversity (greedy maximum coverage)
122
+ * Default: 0.6.
123
+ */
124
+ lambda?: number;
125
+ /**
126
+ * Maximum number of hits to return after diversification.
127
+ * Defaults to the full input length.
128
+ */
129
+ topK?: number;
130
+ }
131
+
132
+ /**
133
+ * Maximal Marginal Relevance re-ranking.
134
+ *
135
+ * Iteratively selects the next chunk that maximises:
136
+ * `score = λ * relevance - (1 - λ) * max_similarity_to_already_selected`
137
+ *
138
+ * Relevance is derived from the search distance (lower distance = higher
139
+ * relevance). Inter-chunk similarity is approximated with **Jaccard overlap on
140
+ * trigrams** — cheap, no extra embedder call required, and surprisingly
141
+ * effective at detecting paraphrase duplicates.
142
+ *
143
+ * @param hits Sorted by relevance (best first), as returned by the store.
144
+ */
145
+ export function mmrDiversify(hits: KbSearchHit[], opts: MmrOpts = {}): KbSearchHit[] {
146
+ const { lambda = 0.6, topK } = opts;
147
+ const k = Math.min(topK ?? hits.length, hits.length);
148
+ if (k <= 1 || hits.length <= 1) return hits.slice(0, k);
149
+
150
+ // Pre-compute trigram sets for each hit (character trigrams on lowercased text).
151
+ const trigramSets = hits.map((h) => trigrams(h.text));
152
+
153
+ // Normalise distances to relevance scores in [0, 1].
154
+ // Lower distance = higher relevance. Shift by max so the worst hit = 0.
155
+ const maxDist = Math.max(...hits.map((h) => h.distance));
156
+ const relevance = hits.map((h) => (maxDist > 0 ? 1 - h.distance / maxDist : 1));
157
+
158
+ const selected: number[] = []; // indices into `hits`
159
+ const remaining = new Set(hits.map((_, i) => i));
160
+
161
+ while (selected.length < k && remaining.size > 0) {
162
+ let bestIdx = -1;
163
+ let bestScore = -Infinity;
164
+
165
+ for (const idx of remaining) {
166
+ const rel = relevance[idx]!;
167
+ let maxSim = 0;
168
+ for (const selIdx of selected) {
169
+ const sim = jaccardSimilarity(trigramSets[idx]!, trigramSets[selIdx]!);
170
+ if (sim > maxSim) maxSim = sim;
171
+ }
172
+ const score = lambda * rel - (1 - lambda) * maxSim;
173
+ if (score > bestScore) {
174
+ bestScore = score;
175
+ bestIdx = idx;
176
+ }
177
+ }
178
+
179
+ if (bestIdx === -1) break;
180
+ selected.push(bestIdx);
181
+ remaining.delete(bestIdx);
182
+ }
183
+
184
+ return selected.map((i) => hits[i]!);
185
+ }
186
+
187
+ // ── Helpers ──────────────────────────────────────────────────────────────────
188
+
189
+ /** Build a Set of character trigrams from a string. */
190
+ function trigrams(text: string): Set<string> {
191
+ const s = text.toLowerCase().replace(/\s+/g, " ").slice(0, 500); // cap for perf
192
+ const result = new Set<string>();
193
+ for (let i = 0; i + 2 < s.length; i++) {
194
+ result.add(s.slice(i, i + 3));
195
+ }
196
+ return result;
197
+ }
198
+
199
+ /** Jaccard similarity between two sets. */
200
+ function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
201
+ if (a.size === 0 && b.size === 0) return 1;
202
+ if (a.size === 0 || b.size === 0) return 0;
203
+ let intersection = 0;
204
+ for (const item of a) {
205
+ if (b.has(item)) intersection++;
206
+ }
207
+ const union = a.size + b.size - intersection;
208
+ return union === 0 ? 0 : intersection / union;
209
+ }
package/src/retry.ts ADDED
@@ -0,0 +1,139 @@
1
+ import {
2
+ ChatApiError,
3
+ type ChatClient,
4
+ type ChatCompletionOpts,
5
+ type ChatMessage,
6
+ } from "@chatman-media/llm-router";
7
+ import { EmbeddingApiError, type EmbeddingClient } from "@chatman-media/llm-router";
8
+
9
+ export interface RetryOptions {
10
+ /**
11
+ * Maximum number of attempts (including the first). Default: 3.
12
+ */
13
+ maxAttempts?: number;
14
+ /**
15
+ * Initial backoff in ms before the second attempt. Doubles on each retry.
16
+ * Default: 500.
17
+ */
18
+ initialDelayMs?: number;
19
+ /**
20
+ * Cap on backoff delay in ms. Default: 30_000.
21
+ */
22
+ maxDelayMs?: number;
23
+ /**
24
+ * HTTP status codes that should trigger a retry.
25
+ * Default: [429, 500, 502, 503, 504].
26
+ */
27
+ retryOn?: number[];
28
+ }
29
+
30
+ const DEFAULT_RETRY_ON = [429, 500, 502, 503, 504];
31
+
32
+ async function withRetry<T>(fn: () => Promise<T>, opts: Required<RetryOptions>): Promise<T> {
33
+ let lastErr: unknown;
34
+ for (let attempt = 1; attempt <= opts.maxAttempts; attempt++) {
35
+ try {
36
+ return await fn();
37
+ } catch (err) {
38
+ lastErr = err;
39
+ const isRetryable =
40
+ (err instanceof ChatApiError || err instanceof EmbeddingApiError) &&
41
+ opts.retryOn.includes(err.statusCode);
42
+
43
+ if (!isRetryable || attempt === opts.maxAttempts) break;
44
+
45
+ const delay = Math.min(
46
+ opts.initialDelayMs * 2 ** (attempt - 1) + Math.random() * 100,
47
+ opts.maxDelayMs,
48
+ );
49
+ await sleep(delay);
50
+ }
51
+ }
52
+ throw lastErr;
53
+ }
54
+
55
+ function sleep(ms: number): Promise<void> {
56
+ return new Promise((resolve) => setTimeout(resolve, ms));
57
+ }
58
+
59
+ function resolveOpts(opts: RetryOptions): Required<RetryOptions> {
60
+ return {
61
+ maxAttempts: opts.maxAttempts ?? 3,
62
+ initialDelayMs: opts.initialDelayMs ?? 500,
63
+ maxDelayMs: opts.maxDelayMs ?? 30_000,
64
+ retryOn: opts.retryOn ?? DEFAULT_RETRY_ON,
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Wraps any `ChatClient` with automatic retry + exponential backoff.
70
+ *
71
+ * Retries on transient HTTP errors (429, 5xx) with jittered exponential
72
+ * backoff. Non-retryable errors (4xx other than 429) propagate immediately.
73
+ *
74
+ * @example
75
+ * ```ts
76
+ * import { withRetryChatClient, OpenAIChatClient } from "@chatman-media/kb";
77
+ *
78
+ * const chat = withRetryChatClient(
79
+ * new OpenAIChatClient({ apiKey, baseUrl, model }),
80
+ * { maxAttempts: 4, initialDelayMs: 1000 },
81
+ * );
82
+ * ```
83
+ */
84
+ export function withRetryChatClient(client: ChatClient, opts: RetryOptions = {}): ChatClient {
85
+ const resolved = resolveOpts(opts);
86
+
87
+ const wrapped: ChatClient = {
88
+ complete(messages: ChatMessage[], completionOpts?: ChatCompletionOpts): Promise<string> {
89
+ return withRetry(() => client.complete(messages, completionOpts), resolved);
90
+ },
91
+ };
92
+
93
+ if (typeof client.stream === "function") {
94
+ const originalStream = client.stream.bind(client);
95
+ wrapped.stream = async function* (
96
+ messages: ChatMessage[],
97
+ completionOpts?: ChatCompletionOpts,
98
+ ): AsyncIterable<string> {
99
+ // For streaming we only retry before the stream starts — once tokens
100
+ // begin flowing we can't rewind. Wrap the generator creation in retry.
101
+ let iter: AsyncIterable<string> | undefined;
102
+ await withRetry(async () => {
103
+ iter = originalStream(messages, completionOpts);
104
+ // Eagerly check by starting the iterator — if the HTTP request itself
105
+ // fails synchronously (before first yield), the error is retryable.
106
+ }, resolved);
107
+ if (iter) yield* iter;
108
+ };
109
+ }
110
+
111
+ return wrapped;
112
+ }
113
+
114
+ /**
115
+ * Wraps any `EmbeddingClient` with automatic retry + exponential backoff.
116
+ *
117
+ * @example
118
+ * ```ts
119
+ * import { withRetryEmbeddingClient, OpenAIEmbeddingClient } from "@chatman-media/kb";
120
+ *
121
+ * const embedder = withRetryEmbeddingClient(
122
+ * new OpenAIEmbeddingClient({ apiKey, baseUrl, model, dim: 1536 }),
123
+ * );
124
+ * ```
125
+ */
126
+ export function withRetryEmbeddingClient(
127
+ client: EmbeddingClient,
128
+ opts: RetryOptions = {},
129
+ ): EmbeddingClient {
130
+ const resolved = resolveOpts(opts);
131
+ return {
132
+ get dim() {
133
+ return client.dim;
134
+ },
135
+ embed(inputs: string[]): Promise<number[][]> {
136
+ return withRetry(() => client.embed(inputs), resolved);
137
+ },
138
+ };
139
+ }
@@ -0,0 +1,124 @@
1
+ import type { ChatClient, ChatMessage } from "@chatman-media/llm-router";
2
+ import { stripThinkBlocks } from "./sanitize.ts";
3
+
4
+ /**
5
+ * Rewrites a user question into a search-friendly query using recent
6
+ * conversation history. Resolves pronouns ("это", "там", "то"), expands
7
+ * elliptical follow-ups ("а сколько платят?" → "сколько платят моделям в
8
+ * Дубае"), and folds in named entities from prior turns.
9
+ *
10
+ * Why this matters: vector search on the raw user message misses precision
11
+ * on follow-ups because embeddings of "а в дубае?" sit nowhere near the
12
+ * actual KB chunks about Dubai contracts. Rewriting bridges that gap.
13
+ */
14
+ export interface RewriteQueryInput {
15
+ question: string;
16
+ /** Recent dialog (oldest first), excluding the current question. */
17
+ history?: ChatMessage[];
18
+ chat: ChatClient;
19
+ /** Cap output length to avoid the model writing essays. Default 200 chars. */
20
+ maxLength?: number;
21
+ }
22
+
23
+ const SYSTEM_PROMPT = `Ты переформулируешь вопрос кандидата в самостоятельный поисковый запрос для базы знаний.
24
+
25
+ Правила:
26
+ 1. Раскрывай местоимения и эллипсисы по контексту истории ("а там?" → "а в Дубае какие условия?")
27
+ 2. Сохраняй ВСЕ ключевые сущности из вопроса и недавней истории (страна, город, сумма, тема)
28
+ 3. Никаких вступлений, никакого markdown — ТОЛЬКО переформулированный запрос одной строкой
29
+ 4. Если вопрос и так самостоятельный и ясный — верни его без изменений
30
+ 5. Если вопрос вообще не про работу/услуги/факты (только смолток типа "привет"/"как дела") — верни его как есть
31
+ 6. Не отвечай на вопрос, не давай советов — только переформулируй
32
+
33
+ Примеры:
34
+ история: ассистент: в дубае платят 1500 в день, контракт 30 дней
35
+ вопрос: а в стамбуле?
36
+ ответ: какие условия и оплата в стамбуле
37
+
38
+ история: ассистент: контракты бывают на 30, 60 и 90 дней
39
+ вопрос: а виза как?
40
+ ответ: как оформляется виза для работы по контракту
41
+
42
+ вопрос: сколько платят моделям в дубае?
43
+ ответ: сколько платят моделям в дубае?`;
44
+
45
+ /**
46
+ * Heuristic: skip rewriting when the question is already self-contained.
47
+ * Saves an LLM call (and thus latency + $) on the majority of inbound
48
+ * messages which are full standalone questions, not follow-ups.
49
+ */
50
+ export function questionNeedsRewrite(question: string, history?: ChatMessage[]): boolean {
51
+ const trimmed = question.trim();
52
+ if (!trimmed) return false;
53
+
54
+ // No history → no pronouns to resolve, no ellipsis to expand. Even ambiguous
55
+ // single-word messages can't be rewritten meaningfully without context.
56
+ if (!history || history.length === 0) return false;
57
+
58
+ // Very short (likely follow-up) or contains common deictic markers
59
+ // pointing back to prior turns.
60
+ const wordCount = trimmed.split(/\s+/).length;
61
+ if (wordCount <= 4) return true;
62
+
63
+ // JS `\b` is ASCII-only — silently fails on Cyrillic. Use Unicode-property
64
+ // boundaries instead, same trick as stage-router.ts and elsewhere.
65
+ const deictic =
66
+ /(?<![\p{L}\p{N}])(это|этот|эта|эти|тот|та|те|там|туда|оттуда|такой|такая|такие|тогда|оно|он|она|они)(?![\p{L}\p{N}])/iu;
67
+ if (deictic.test(trimmed)) return true;
68
+
69
+ // Starts with a follow-up conjunction.
70
+ const followUp = /^(а|и|но|или|ещё|еще|тоже)(?![\p{L}\p{N}])/iu;
71
+ if (followUp.test(trimmed)) return true;
72
+
73
+ return false;
74
+ }
75
+
76
+ export async function rewriteQuery(input: RewriteQueryInput): Promise<string> {
77
+ const original = input.question.trim();
78
+ if (!original) return original;
79
+
80
+ // Skip work when there's nothing to disambiguate. Saves ~80% of LLM calls
81
+ // in production on full-question messages (per typical chat distribution).
82
+ if (!questionNeedsRewrite(original, input.history)) return original;
83
+
84
+ // Compose a compact history snippet — only the last 6 messages, otherwise
85
+ // we feed the whole conversation into a "rewrite" call which defeats the
86
+ // latency goal.
87
+ const tail = (input.history ?? []).slice(-6);
88
+ const historyText = tail.map((m) => `${m.role}: ${m.content}`).join("\n");
89
+ const userPrompt =
90
+ historyText.length > 0
91
+ ? `история:\n${historyText}\n\nвопрос: ${original}\nответ:`
92
+ : `вопрос: ${original}\nответ:`;
93
+
94
+ const messages: ChatMessage[] = [
95
+ { role: "system", content: SYSTEM_PROMPT },
96
+ { role: "user", content: userPrompt },
97
+ ];
98
+
99
+ let raw: string;
100
+ try {
101
+ raw = await input.chat.complete(messages, { temperature: 0.1 });
102
+ } catch (err) {
103
+ console.error("[rewrite-query] LLM call failed; using original:", err);
104
+ return original;
105
+ }
106
+
107
+ return sanitizeRewritten(raw, original, input.maxLength ?? 200);
108
+ }
109
+
110
+ /** Strips think-tags, "ответ:" prefixes, markdown, line breaks. Falls back
111
+ * to original on empty/garbage output. Exported for unit tests. */
112
+ export function sanitizeRewritten(raw: string, fallback: string, maxLength: number): string {
113
+ let s = stripThinkBlocks(raw);
114
+ s = s.replace(/```[\s\S]*?```/g, "");
115
+ s = s.replace(/^\s*(ответ|answer)\s*[:\-—]\s*/i, "");
116
+ // Take first non-empty line — the model occasionally adds explanations after.
117
+ const firstLine = s
118
+ .split("\n")
119
+ .map((l) => l.trim())
120
+ .find((l) => l.length > 0);
121
+ if (!firstLine) return fallback;
122
+ const trimmed = firstLine.length > maxLength ? firstLine.slice(0, maxLength) : firstLine;
123
+ return trimmed || fallback;
124
+ }
@@ -0,0 +1,44 @@
1
+ import { applyStyleRules } from "./text-style-rules.ts";
2
+
3
+ // Hoisted regexes — module-level so they compile once instead of on every
4
+ // hot-path LLM-response cleanup. Used both here and in the various
5
+ // extractor / verifier wrappers (reflect, rewrite-query, fact-checker,
6
+ // vacancy-guard, summarize-conversation, extract-user-facts).
7
+ const THINK_BLOCK_PAIRED = /<think\b[^>]*>[\s\S]*?<\/think>/gi;
8
+ const THINK_BLOCK_LEADING_UNCLOSED = /^\s*<think\b[^>]*>[\s\S]*$/i;
9
+ const CODE_FENCE = /```(?:json)?/gi;
10
+ const LEADING_LABEL = /^\s*(?:answer|ответ|reply|response|согласно\s+контексту)\s*[:\-—]\s*/i;
11
+
12
+ /**
13
+ * Strip `<think>…</think>` reasoning blocks some chat models emit despite
14
+ * system instructions (qwen3, deepseek-r1 style). Both well-formed
15
+ * paired and an unclosed leading block are handled.
16
+ */
17
+ export function stripThinkBlocks(raw: string): string {
18
+ return raw.replace(THINK_BLOCK_PAIRED, "").replace(THINK_BLOCK_LEADING_UNCLOSED, "");
19
+ }
20
+
21
+ /** Strip markdown code fences (` ``` ` and ` ```json `). Useful when an LLM
22
+ * wraps its JSON answer in a fenced block despite "only JSON" instructions. */
23
+ export function stripCodeFences(raw: string): string {
24
+ return raw.replace(CODE_FENCE, "");
25
+ }
26
+
27
+ /**
28
+ * Strip artifacts some chat models emit despite system instructions:
29
+ * - `<think>…</think>` reasoning blocks (qwen3, deepseek-r1 style).
30
+ * - leading "Answer:" / "Ответ:" / "Согласно контексту" prefixes.
31
+ * - surrounding whitespace.
32
+ * - "AI tells" — em-/en-dashes, unicode ellipsis, "Конечно!" lead-ins
33
+ * (see `text-style-rules.ts` for the full list).
34
+ *
35
+ * Exported for unit tests.
36
+ */
37
+ export function sanitizeLlmOutput(raw: string): string {
38
+ let s = stripThinkBlocks(raw);
39
+ s = s.replace(LEADING_LABEL, "");
40
+ // Apply pluggable text-style rules (em-dash → hyphen, ellipsis → ..., etc).
41
+ // See src/rag/text-style-rules.ts to add new rules without touching this file.
42
+ s = applyStyleRules(s);
43
+ return s.trim();
44
+ }