@chatman-media/kb 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +169 -0
  3. package/dist/ab-router.d.ts +66 -0
  4. package/dist/ab-router.d.ts.map +1 -0
  5. package/dist/answer-types.d.ts +194 -0
  6. package/dist/answer-types.d.ts.map +1 -0
  7. package/dist/answer.d.ts +59 -0
  8. package/dist/answer.d.ts.map +1 -0
  9. package/dist/built-in-tools/calendly.d.ts +19 -0
  10. package/dist/built-in-tools/calendly.d.ts.map +1 -0
  11. package/dist/chunk.d.ts +48 -0
  12. package/dist/chunk.d.ts.map +1 -0
  13. package/dist/conversation-store.d.ts +76 -0
  14. package/dist/conversation-store.d.ts.map +1 -0
  15. package/dist/eval.d.ts +64 -0
  16. package/dist/eval.d.ts.map +1 -0
  17. package/dist/extract-user-facts.d.ts +27 -0
  18. package/dist/extract-user-facts.d.ts.map +1 -0
  19. package/dist/fact-checker.d.ts +46 -0
  20. package/dist/fact-checker.d.ts.map +1 -0
  21. package/dist/grade-skills.d.ts +29 -0
  22. package/dist/grade-skills.d.ts.map +1 -0
  23. package/dist/index.d.ts +76 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +62655 -0
  26. package/dist/ingest.d.ts +49 -0
  27. package/dist/ingest.d.ts.map +1 -0
  28. package/dist/multi-query.d.ts +29 -0
  29. package/dist/multi-query.d.ts.map +1 -0
  30. package/dist/parse-pdf.d.ts +14 -0
  31. package/dist/parse-pdf.d.ts.map +1 -0
  32. package/dist/persona-shortcuts.d.ts +51 -0
  33. package/dist/persona-shortcuts.d.ts.map +1 -0
  34. package/dist/prompt.d.ts +9 -0
  35. package/dist/prompt.d.ts.map +1 -0
  36. package/dist/reflect.d.ts +29 -0
  37. package/dist/reflect.d.ts.map +1 -0
  38. package/dist/reranker.d.ts +71 -0
  39. package/dist/reranker.d.ts.map +1 -0
  40. package/dist/retrieval-utils.d.ts +94 -0
  41. package/dist/retrieval-utils.d.ts.map +1 -0
  42. package/dist/retry.d.ts +53 -0
  43. package/dist/retry.d.ts.map +1 -0
  44. package/dist/rewrite-query.d.ts +30 -0
  45. package/dist/rewrite-query.d.ts.map +1 -0
  46. package/dist/sanitize.d.ts +21 -0
  47. package/dist/sanitize.d.ts.map +1 -0
  48. package/dist/semantic-cache.d.ts +70 -0
  49. package/dist/semantic-cache.d.ts.map +1 -0
  50. package/dist/server.d.ts +77 -0
  51. package/dist/server.d.ts.map +1 -0
  52. package/dist/stores/memory-store.d.ts +72 -0
  53. package/dist/stores/memory-store.d.ts.map +1 -0
  54. package/dist/structured-output.d.ts +21 -0
  55. package/dist/structured-output.d.ts.map +1 -0
  56. package/dist/styles.d.ts +186 -0
  57. package/dist/styles.d.ts.map +1 -0
  58. package/dist/summarize-conversation.d.ts +31 -0
  59. package/dist/summarize-conversation.d.ts.map +1 -0
  60. package/dist/system-prompt.d.ts +11 -0
  61. package/dist/system-prompt.d.ts.map +1 -0
  62. package/dist/text-style-rules.d.ts +133 -0
  63. package/dist/text-style-rules.d.ts.map +1 -0
  64. package/dist/tool-loop.d.ts +44 -0
  65. package/dist/tool-loop.d.ts.map +1 -0
  66. package/dist/tools.d.ts +64 -0
  67. package/dist/tools.d.ts.map +1 -0
  68. package/dist/topic-classifier.d.ts +11 -0
  69. package/dist/topic-classifier.d.ts.map +1 -0
  70. package/dist/types.d.ts +83 -0
  71. package/dist/types.d.ts.map +1 -0
  72. package/dist/utils.d.ts +19 -0
  73. package/dist/utils.d.ts.map +1 -0
  74. package/dist/vision.d.ts +72 -0
  75. package/dist/vision.d.ts.map +1 -0
  76. package/package.json +76 -0
  77. package/src/ab-router.ts +118 -0
  78. package/src/answer-types.ts +191 -0
  79. package/src/answer.ts +696 -0
  80. package/src/built-in-tools/calendly.ts +32 -0
  81. package/src/chunk.ts +198 -0
  82. package/src/conversation-store.ts +138 -0
  83. package/src/eval.ts +127 -0
  84. package/src/extract-user-facts.ts +120 -0
  85. package/src/fact-checker.ts +171 -0
  86. package/src/grade-skills.ts +79 -0
  87. package/src/index.ts +191 -0
  88. package/src/ingest.ts +193 -0
  89. package/src/multi-query.ts +89 -0
  90. package/src/parse-pdf.ts +24 -0
  91. package/src/persona-shortcuts.ts +255 -0
  92. package/src/prompt.ts +190 -0
  93. package/src/reflect.ts +99 -0
  94. package/src/reranker.ts +166 -0
  95. package/src/retrieval-utils.ts +209 -0
  96. package/src/retry.ts +139 -0
  97. package/src/rewrite-query.ts +124 -0
  98. package/src/sanitize.ts +44 -0
  99. package/src/semantic-cache.ts +154 -0
  100. package/src/server.ts +164 -0
  101. package/src/stores/memory-store.ts +249 -0
  102. package/src/structured-output.ts +47 -0
  103. package/src/styles.ts +138 -0
  104. package/src/summarize-conversation.ts +88 -0
  105. package/src/system-prompt.ts +118 -0
  106. package/src/text-style-rules.ts +244 -0
  107. package/src/tool-loop.ts +110 -0
  108. package/src/tools.ts +79 -0
  109. package/src/topic-classifier.ts +112 -0
  110. package/src/types.ts +91 -0
  111. package/src/utils.ts +81 -0
  112. package/src/vision.ts +265 -0
package/src/answer.ts ADDED
@@ -0,0 +1,696 @@
1
+ import type { z } from "zod";
2
+ import {
3
+ type AnswerInput,
4
+ type AnswerResult,
5
+ type AnswerTelemetry,
6
+ NO_CONTEXT_MARKER,
7
+ type Persona,
8
+ } from "./answer-types.ts";
9
+ import type { ChatClient, ChatMessage } from "@chatman-media/llm-router";
10
+ import { checkFacts } from "./fact-checker.ts";
11
+ import {
12
+ botPresenceReply,
13
+ isBotPresenceQuestion,
14
+ isPersonalFactQuestion,
15
+ isPersonaSmalltalkQuestion,
16
+ personaFactReply,
17
+ personaSmalltalkReply,
18
+ } from "./persona-shortcuts.ts";
19
+ import { composeSystemPrompt } from "./prompt.ts";
20
+ import { rewriteQuery } from "./rewrite-query.ts";
21
+ import { applyDynamicThreshold, mmrDiversify, rrfMerge } from "./retrieval-utils.ts";
22
+ import { expandQueries } from "./multi-query.ts";
23
+ import { sanitizeLlmOutput } from "./sanitize.ts";
24
+ import {
25
+ injectJsonInstruction,
26
+ parseStructuredOutput,
27
+ zodToJsonSchema,
28
+ } from "./structured-output.ts";
29
+ import type { FunnelStage } from "./styles.ts";
30
+ import {
31
+ buildSystemPrompt,
32
+ DEFAULT_PERSONA,
33
+ legacyRagSamplingTemperature,
34
+ } from "./system-prompt.ts";
35
+ import { applyStyleRules } from "./text-style-rules.ts";
36
+ import { buildToolTelemetry, DEFAULT_MAX_TOOL_CYCLES, runToolLoop } from "./tool-loop.ts";
37
+ import type { AnyRagTool } from "./tools.ts";
38
+ import { classifyTopic } from "./topic-classifier.ts";
39
+ import type { KbSearchHit } from "./types.ts";
40
+
41
+ // Re-exports for backward compatibility with existing importers.
42
+ export {
43
+ type AnswerInput,
44
+ type AnswerResult,
45
+ type AnswerTelemetry,
46
+ NO_CONTEXT_MARKER,
47
+ type Persona,
48
+ } from "./answer-types.ts";
49
+ export {
50
+ botPresenceReply,
51
+ isBotPresenceQuestion,
52
+ isPersonalFactQuestion,
53
+ isPersonaSmalltalkQuestion,
54
+ personaFactReply,
55
+ personaSmalltalkReply,
56
+ } from "./persona-shortcuts.ts";
57
+ export { sanitizeLlmOutput } from "./sanitize.ts";
58
+ export {
59
+ buildSystemPrompt,
60
+ legacyRagSamplingTemperature,
61
+ renderSummaryBlock,
62
+ renderUserFactsBlock,
63
+ } from "./system-prompt.ts";
64
+
65
+ // ── Shared retrieval ─────────────────────────────────────────────────────────
66
+
67
+ export interface RetrievalResult {
68
+ hits: KbSearchHit[];
69
+ retrievalMs: number;
70
+ searchQuery: string;
71
+ queries: string[];
72
+ /** null when topicRouting is off or booksPriority path was used. */
73
+ usedTopic: string | null;
74
+ }
75
+
76
+ /**
77
+ * Shared retrieval logic for both `answerWithRag` and `answerWithRagStream`.
78
+ *
79
+ * Steps:
80
+ * 1. Optional query rewrite (LLM resolves pronouns/ellipsis via history).
81
+ * 2. Optional multi-query expansion (LLM generates N variants).
82
+ * 3. Embed all queries in one batch.
83
+ * 4. booksPriority path OR normal path (multi-query → RRF | single → topic fallback).
84
+ * 5. maxDistance filter → applyDynamicThreshold → mmrDiversify → reranker.
85
+ * 6. Slice to topK.
86
+ */
87
+ export async function retrieveHits(input: AnswerInput): Promise<RetrievalResult> {
88
+ const topK = input.topK ?? 5;
89
+ const candidateK = input.reranker ? topK * 3 : topK;
90
+
91
+ const searchQuery = input.rewriteQueryBeforeRetrieval
92
+ ? await rewriteQuery({
93
+ question: input.question,
94
+ ...(input.history ? { history: input.history } : {}),
95
+ chat: input.chat,
96
+ })
97
+ : input.question;
98
+
99
+ const queries = input.multiQuery
100
+ ? await expandQueries({
101
+ question: searchQuery,
102
+ history: input.history,
103
+ chat: input.chat,
104
+ count: input.multiQueryCount ?? 2,
105
+ })
106
+ : [searchQuery];
107
+
108
+ const retrievalStart = Date.now();
109
+ const vecs = await input.embedder.embed(queries);
110
+ const questionVec = vecs[0];
111
+ if (!questionVec) throw new Error("Embedder returned no vector for question");
112
+
113
+ let hits: KbSearchHit[];
114
+ let usedTopic: string | null = null;
115
+
116
+ if (input.booksPriority) {
117
+ hits = await input.kb.prioritySearch({
118
+ embedding: questionVec,
119
+ query: searchQuery,
120
+ k: candidateK,
121
+ vectorOnly: !input.hybridSearch,
122
+ });
123
+ } else {
124
+ const topic = input.topicRouting ? classifyTopic(input.question) : null;
125
+ usedTopic = topic;
126
+
127
+ if (queries.length > 1) {
128
+ const hitLists = await Promise.all(
129
+ queries.map((q, i) => {
130
+ const vec = vecs[i] ?? questionVec;
131
+ return input.hybridSearch
132
+ ? input.kb.hybridSearch({
133
+ embedding: vec,
134
+ query: q,
135
+ k: candidateK,
136
+ ...(topic !== null ? { topic } : {}),
137
+ })
138
+ : input.kb.search(vec, candidateK, topic);
139
+ }),
140
+ );
141
+ hits = rrfMerge(hitLists, { topN: candidateK });
142
+ } else {
143
+ const runSearch = (filterTopic: string | null) =>
144
+ input.hybridSearch
145
+ ? input.kb.hybridSearch({
146
+ embedding: questionVec,
147
+ query: searchQuery,
148
+ k: candidateK,
149
+ ...(filterTopic !== null ? { topic: filterTopic } : {}),
150
+ })
151
+ : input.kb.search(questionVec, candidateK, filterTopic);
152
+
153
+ hits = await runSearch(topic);
154
+ if (topic !== null && hits.length === 0) {
155
+ hits = await runSearch(null);
156
+ usedTopic = null;
157
+ }
158
+ }
159
+ }
160
+
161
+ const maxDist = input.maxDistance;
162
+ if (!input.hybridSearch && maxDist !== undefined) {
163
+ hits = hits.filter((h) => h.distance <= maxDist);
164
+ }
165
+ if (input.autoTrimDistance) {
166
+ hits = applyDynamicThreshold(hits, { threshold: input.autoTrimThreshold });
167
+ }
168
+ if (input.mmr) {
169
+ hits = mmrDiversify(hits, { lambda: input.mmrLambda, topK });
170
+ }
171
+ if (input.reranker && hits.length > 0) {
172
+ hits = await input.reranker.rerank(searchQuery, hits, topK);
173
+ }
174
+ hits = hits.slice(0, topK);
175
+
176
+ return {
177
+ hits,
178
+ retrievalMs: Date.now() - retrievalStart,
179
+ searchQuery,
180
+ queries,
181
+ usedTopic,
182
+ };
183
+ }
184
+
185
+ async function answerFromHits(opts: {
186
+ hits: KbSearchHit[];
187
+ baseTelemetry: AnswerTelemetry;
188
+ startedAt: number;
189
+ input: AnswerInput;
190
+ activePersona: Persona;
191
+ }): Promise<AnswerResult> {
192
+ const { hits, baseTelemetry, startedAt, input, activePersona } = opts;
193
+ const vacBlock = (input.vacanciesBlock ?? "").trim();
194
+
195
+ // Ранний выход «нет контекста» — только если ОТВЕЧАТЬ реально нечем: нет
196
+ // KB-хитов, нет блока вакансий, нет стиля И нет инструментов. Если есть
197
+ // инструменты (напр. обменник: computeQuote/createOrder/fetchRequisites),
198
+ // вызываем LLM с ними — ответ строится на инструментах, а не на базе знаний.
199
+ const hasTools = !!(input.tools && input.tools.length > 0);
200
+ if (hits.length === 0 && !vacBlock && !input.style && !hasTools) {
201
+ return {
202
+ text: NO_CONTEXT_MARKER,
203
+ usedChunkIds: [],
204
+ hits: [],
205
+ telemetry: { ...baseTelemetry, path: "no_context", total_ms: Date.now() - startedAt },
206
+ };
207
+ }
208
+
209
+ const kbContextStr = hits
210
+ .map((h, i) => `[#${i + 1}] (source: ${h.title})\n${h.text}`)
211
+ .join("\n\n");
212
+
213
+ const context = vacBlock
214
+ ? kbContextStr
215
+ ? `${vacBlock}\n\n${kbContextStr}`
216
+ : vacBlock
217
+ : kbContextStr;
218
+
219
+ const contextForPrompt =
220
+ input.style && !context
221
+ ? "АКТУАЛЬНЫЕ ВАКАНСИИ: нет данных. Конкретных вакансий, зарплат и городов сейчас нет в базе — не называй никаких цифр и мест."
222
+ : context;
223
+
224
+ let systemPrompt: string;
225
+ let temperature = legacyRagSamplingTemperature(activePersona);
226
+ if (input.style) {
227
+ const stage: FunnelStage = input.stage ?? "qualify";
228
+ systemPrompt = composeSystemPrompt(input.style, stage, contextForPrompt, {
229
+ includeFewShot: input.includeFewShot ?? true,
230
+ ...(input.userFacts ? { userFacts: input.userFacts } : {}),
231
+ ...(input.conversationSummary ? { conversationSummary: input.conversationSummary } : {}),
232
+ ...(input.skills && input.skills.length > 0 ? { skills: input.skills } : {}),
233
+ ...(input.directorHooks && input.directorHooks.length > 0
234
+ ? { directorHooks: input.directorHooks }
235
+ : {}),
236
+ ...(input.supportPhase ? { supportPhase: input.supportPhase } : {}),
237
+ });
238
+ temperature = input.style.model.temperature;
239
+ } else {
240
+ systemPrompt = buildSystemPrompt(
241
+ input.persona ?? DEFAULT_PERSONA,
242
+ context,
243
+ input.userFacts,
244
+ input.conversationSummary,
245
+ );
246
+ }
247
+
248
+ const messages: ChatMessage[] = [
249
+ { role: "system", content: systemPrompt },
250
+ ...(input.history ?? []),
251
+ { role: "user", content: input.question },
252
+ ];
253
+
254
+ console.log(`[rag] calling LLM (hits=${hits.length} vacBlock=${vacBlock.length > 0})`);
255
+ const generationStart = Date.now();
256
+ const numPredict = input.numPredict ?? input.style?.model.maxTokens;
257
+ const llmOpts = { temperature, ...(numPredict !== undefined ? { numPredict } : {}) };
258
+
259
+ // ── Agentic tool-calling loop ────────────────────────────────────────────
260
+ // Multi-cycle: LLM может вызвать tools → увидеть результаты → вызвать
261
+ // снова, пока не достигнет финального ответа или maxToolCycles cap'а.
262
+ // Single-cycle behavior получается при maxToolCycles=1. Если первый
263
+ // cycle вернул content (no tools requested) — early-return как раньше.
264
+ let toolCallTelemetry: AnswerTelemetry["toolCall"] | undefined;
265
+ let multiCycleToolCalls: AnswerTelemetry["toolCalls"] | undefined;
266
+
267
+ if (input.tools && input.tools.length > 0 && typeof input.chat.completeWithTools === "function") {
268
+ const maxCycles = input.maxToolCycles ?? DEFAULT_MAX_TOOL_CYCLES;
269
+ const loopResult = await runToolLoop({
270
+ chat: input.chat,
271
+ messages,
272
+ tools: input.tools as AnyRagTool[],
273
+ llmOpts,
274
+ maxCycles,
275
+ });
276
+ const telemetryFields = buildToolTelemetry(loopResult.toolCalls);
277
+ toolCallTelemetry = telemetryFields.toolCall;
278
+ multiCycleToolCalls = telemetryFields.toolCalls;
279
+
280
+ // Если loop сам вернул финальный текст (no tools requested на последнем
281
+ // cycle) — отдаём как ok. Иначе messages mutated с tool-results и
282
+ // pipeline продолжает к final-completion ниже.
283
+ if (loopResult.content !== null && loopResult.toolCalls.length === 0) {
284
+ // No tools were ever called — model сам ответил с первого cycle'а.
285
+ const text = sanitizeLlmOutput(loopResult.content);
286
+ const generationMs = Date.now() - generationStart;
287
+ const telemetry: AnswerTelemetry = { ...baseTelemetry, generation_ms: generationMs };
288
+ const result: AnswerResult = {
289
+ text,
290
+ usedChunkIds: hits.map((h) => h.chunk_id),
291
+ hits,
292
+ telemetry: { ...telemetry, path: "ok", total_ms: Date.now() - startedAt },
293
+ };
294
+ input.onTelemetry?.(result.telemetry);
295
+ return result;
296
+ }
297
+ // loopResult.content !== null AND toolCalls.length > 0 means: tools
298
+ // were called and model produced a final answer in last cycle. Pipeline
299
+ // will skip the tools-then-no-final case below and fall through to
300
+ // normal completion (which will re-run on the mutated messages).
301
+ }
302
+
303
+ // ── Structured output ────────────────────────────────────────────────────
304
+ if (input.outputSchema) {
305
+ const jsonSchema = zodToJsonSchema(input.outputSchema);
306
+ let rawJson: string;
307
+
308
+ if (typeof input.chat.completeStructured === "function") {
309
+ rawJson = await input.chat.completeStructured(messages, jsonSchema, llmOpts);
310
+ } else {
311
+ messages[0] = {
312
+ role: "system",
313
+ content: injectJsonInstruction(messages[0]?.content ?? "", jsonSchema),
314
+ };
315
+ rawJson = await input.chat.complete(messages, { ...llmOpts, temperature: 0 });
316
+ }
317
+
318
+ const parsed = parseStructuredOutput(rawJson, input.outputSchema);
319
+ const generationMs = Date.now() - generationStart;
320
+ const telemetry: AnswerTelemetry = {
321
+ ...baseTelemetry,
322
+ generation_ms: generationMs,
323
+ ...(toolCallTelemetry ? { toolCall: toolCallTelemetry } : {}),
324
+ };
325
+ const result: AnswerResult = {
326
+ text: rawJson,
327
+ output: parsed.success ? parsed.data : undefined,
328
+ usedChunkIds: hits.map((h) => h.chunk_id),
329
+ hits,
330
+ telemetry: { ...telemetry, path: "ok", total_ms: Date.now() - startedAt },
331
+ };
332
+ if (!parsed.success) console.warn(`[structured-output] validation failed: ${parsed.error}`);
333
+ input.onTelemetry?.(result.telemetry);
334
+ return result;
335
+ }
336
+
337
+ const raw = await input.chat.complete(messages, llmOpts);
338
+ const text = sanitizeLlmOutput(raw);
339
+ const generationMs = Date.now() - generationStart;
340
+
341
+ const telemetry: AnswerTelemetry = {
342
+ ...baseTelemetry,
343
+ generation_ms: generationMs,
344
+ ...(toolCallTelemetry ? { toolCall: toolCallTelemetry } : {}),
345
+ };
346
+
347
+ const runVacancyCheck = vacBlock.length > 0 && input.vacancyGuard !== false;
348
+
349
+ // The grounding half verifies every claim is backed by KB CONTEXT. At
350
+ // data-collection stages (opener / qualify / close) the bot ASKS the
351
+ // candidate for anketa fields — "скинь возраст и фото" reads to the
352
+ // checker as an unsupported claim, so the whole reply gets dropped and
353
+ // the candidate sees nothing. Exempt those stages from the grounding
354
+ // drop; vacancy accuracy is still always enforced. pitch/objection (and
355
+ // an unknown stage) keep full grounding.
356
+ const GROUNDING_EXEMPT_STAGES: ReadonlySet<string> = new Set(["opener", "qualify", "close"]);
357
+ const groundingExempt = input.stage !== undefined && GROUNDING_EXEMPT_STAGES.has(input.stage);
358
+
359
+ const runFactCheck =
360
+ (input.reflect || runVacancyCheck) &&
361
+ text !== NO_CONTEXT_MARKER &&
362
+ text.trim().length > 0 &&
363
+ // If grounding is exempt for this stage and there's no vacancy block,
364
+ // there's nothing left to verify — skip the LLM call entirely.
365
+ !(groundingExempt && !runVacancyCheck);
366
+
367
+ if (runFactCheck) {
368
+ const verdict = await checkFacts({
369
+ question: input.question,
370
+ answer: text,
371
+ context,
372
+ chat: input.chat,
373
+ ...(runVacancyCheck ? { vacanciesBlock: vacBlock } : {}),
374
+ });
375
+ telemetry.factCheck = {
376
+ grounded: verdict.grounded,
377
+ vacancyOk: verdict.vacancyOk,
378
+ ...(verdict.reason ? { reason: verdict.reason } : {}),
379
+ };
380
+
381
+ if (!verdict.grounded && !groundingExempt) {
382
+ console.warn(
383
+ `[fact-checker] dropping ungrounded answer: ${verdict.reason ?? "unknown"} | answer="${text.slice(0, 120)}"`,
384
+ );
385
+ return {
386
+ text: NO_CONTEXT_MARKER,
387
+ usedChunkIds: hits.map((h) => h.chunk_id),
388
+ hits,
389
+ telemetry: { ...telemetry, path: "ungrounded", total_ms: Date.now() - startedAt },
390
+ };
391
+ }
392
+
393
+ if (!verdict.vacancyOk) {
394
+ console.warn(
395
+ `[fact-checker] dropping answer with mismatched vacancy data: ${verdict.reason ?? "unknown"} | answer="${text.slice(0, 120)}"`,
396
+ );
397
+ return {
398
+ text: NO_CONTEXT_MARKER,
399
+ usedChunkIds: hits.map((h) => h.chunk_id),
400
+ hits,
401
+ telemetry: { ...telemetry, path: "ungrounded", total_ms: Date.now() - startedAt },
402
+ };
403
+ }
404
+ }
405
+
406
+ if (text === NO_CONTEXT_MARKER) {
407
+ telemetry.path = "no_context";
408
+ }
409
+
410
+ return {
411
+ text,
412
+ usedChunkIds: hits.map((h) => h.chunk_id),
413
+ hits,
414
+ telemetry: { ...telemetry, total_ms: Date.now() - startedAt },
415
+ };
416
+ }
417
+
418
+ /**
419
+ * Streaming variant of `answerWithRag`. Yields raw text tokens as they arrive
420
+ * from the LLM. The final telemetry is delivered via `input.onTelemetry` (if
421
+ * set). Falls back to `complete()` when the chat client has no `stream()`.
422
+ *
423
+ * Note: hallucination guard (`reflect`, `vacancyGuard`) is not applied during
424
+ * streaming — fact-checking requires the full answer. Use `answerWithRag()` when
425
+ * fact-checking is required.
426
+ */
427
+ export async function* answerWithRagStream(input: AnswerInput): AsyncIterable<string> {
428
+ const startedAt = Date.now();
429
+ const activePersona: Persona =
430
+ input.style != null
431
+ ? {
432
+ name: input.style.persona.name,
433
+ role: input.style.persona.role,
434
+ ...(input.style.persona.company != null && input.style.persona.company.trim() !== ""
435
+ ? { company: input.style.persona.company.trim() }
436
+ : {}),
437
+ }
438
+ : (input.persona ?? DEFAULT_PERSONA);
439
+
440
+ // ── Persona shortcuts (no retrieval needed) ──────────────────────────────
441
+ if (isPersonaSmalltalkQuestion(input.question)) {
442
+ const text = applyStyleRules(personaSmalltalkReply(activePersona));
443
+ yield text;
444
+ input.onTelemetry?.({ path: "smalltalk", total_ms: Date.now() - startedAt });
445
+ return;
446
+ }
447
+ if (isBotPresenceQuestion(input.question)) {
448
+ const text = applyStyleRules(botPresenceReply(activePersona));
449
+ yield text;
450
+ input.onTelemetry?.({ path: "smalltalk", total_ms: Date.now() - startedAt });
451
+ return;
452
+ }
453
+ const factKey = isPersonalFactQuestion(input.question);
454
+ if (factKey) {
455
+ const factReply = personaFactReply(activePersona, factKey);
456
+ if (factReply) {
457
+ yield applyStyleRules(factReply);
458
+ input.onTelemetry?.({ path: "persona_fact", total_ms: Date.now() - startedAt });
459
+ return;
460
+ }
461
+ }
462
+
463
+ // ── Retrieval ────────────────────────────────────────────────────────────
464
+ const topK = input.topK ?? 5;
465
+ const { hits: retrievedHits, retrievalMs, searchQuery, queries } = await retrieveHits(input);
466
+ let hits = retrievedHits;
467
+
468
+ if (hits.length === 0 && !(input.vacanciesBlock ?? "").trim() && !input.style) {
469
+ input.onTelemetry?.({
470
+ path: "no_context",
471
+ retrieval_ms: retrievalMs,
472
+ total_ms: Date.now() - startedAt,
473
+ });
474
+ yield NO_CONTEXT_MARKER;
475
+ return;
476
+ }
477
+
478
+ // ── Prompt composition ───────────────────────────────────────────────────
479
+ const kbContextStr = hits
480
+ .map((h, i) => `[#${i + 1}] (source: ${h.title})\n${h.text}`)
481
+ .join("\n\n");
482
+ const vacBlock = (input.vacanciesBlock ?? "").trim();
483
+ const context = vacBlock
484
+ ? kbContextStr
485
+ ? `${vacBlock}\n\n${kbContextStr}`
486
+ : vacBlock
487
+ : kbContextStr;
488
+ const contextForPrompt =
489
+ input.style && !context
490
+ ? "АКТУАЛЬНЫЕ ВАКАНСИИ: нет данных. Конкретных вакансий, зарплат и городов сейчас нет в базе — не называй никаких цифр и мест."
491
+ : context;
492
+
493
+ let systemPrompt: string;
494
+ let temperature = legacyRagSamplingTemperature(activePersona);
495
+ if (input.style) {
496
+ const stage: FunnelStage = input.stage ?? "qualify";
497
+ systemPrompt = composeSystemPrompt(input.style, stage, contextForPrompt, {
498
+ includeFewShot: input.includeFewShot ?? true,
499
+ ...(input.userFacts ? { userFacts: input.userFacts } : {}),
500
+ ...(input.conversationSummary ? { conversationSummary: input.conversationSummary } : {}),
501
+ ...(input.skills && input.skills.length > 0 ? { skills: input.skills } : {}),
502
+ ...(input.directorHooks && input.directorHooks.length > 0
503
+ ? { directorHooks: input.directorHooks }
504
+ : {}),
505
+ ...(input.supportPhase ? { supportPhase: input.supportPhase } : {}),
506
+ });
507
+ temperature = input.style.model.temperature;
508
+ } else {
509
+ systemPrompt = buildSystemPrompt(
510
+ input.persona ?? DEFAULT_PERSONA,
511
+ context,
512
+ input.userFacts,
513
+ input.conversationSummary,
514
+ );
515
+ }
516
+
517
+ const messages: ChatMessage[] = [
518
+ { role: "system", content: systemPrompt },
519
+ ...(input.history ?? []),
520
+ { role: "user", content: input.question },
521
+ ];
522
+
523
+ const numPredict = input.numPredict ?? input.style?.model.maxTokens;
524
+ const completionOpts = { temperature, ...(numPredict !== undefined ? { numPredict } : {}) };
525
+
526
+ // ── Stream or fall back to complete() ───────────────────────────────────
527
+ const generationStart = Date.now();
528
+ if (typeof input.chat.stream === "function") {
529
+ for await (const token of input.chat.stream(messages, completionOpts)) {
530
+ yield token;
531
+ }
532
+ } else {
533
+ const raw = await input.chat.complete(messages, completionOpts);
534
+ yield sanitizeLlmOutput(raw);
535
+ }
536
+
537
+ const generationMs = Date.now() - generationStart;
538
+ input.onTelemetry?.({
539
+ path: "ok",
540
+ retrieval_ms: retrievalMs,
541
+ generation_ms: generationMs,
542
+ top_distances: hits.map((h) => Math.round(h.distance * 10000) / 10000),
543
+ ...(input.hybridSearch ? { hybrid: true } : {}),
544
+ ...(searchQuery !== input.question
545
+ ? { original_query: input.question, rewritten_query: searchQuery }
546
+ : {}),
547
+ total_ms: Date.now() - startedAt,
548
+ });
549
+ }
550
+
551
+ export async function answerWithRag<T extends z.ZodTypeAny>(
552
+ input: AnswerInput & { outputSchema: T },
553
+ ): Promise<AnswerResult<z.infer<T>>>;
554
+ export async function answerWithRag(input: AnswerInput): Promise<AnswerResult>;
555
+ export async function answerWithRag(input: AnswerInput): Promise<AnswerResult> {
556
+ const startedAt = Date.now();
557
+ const activePersona: Persona =
558
+ input.style != null
559
+ ? {
560
+ name: input.style.persona.name,
561
+ role: input.style.persona.role,
562
+ ...(input.style.persona.company != null && input.style.persona.company.trim() !== ""
563
+ ? { company: input.style.persona.company.trim() }
564
+ : {}),
565
+ }
566
+ : (input.persona ?? DEFAULT_PERSONA);
567
+
568
+ console.log(
569
+ `[rag] answerWithRag style=${input.style?.slug ?? "none"} stage=${input.stage ?? "none"} q="${input.question.slice(0, 60)}"`,
570
+ );
571
+
572
+ if (isPersonaSmalltalkQuestion(input.question)) {
573
+ const result: AnswerResult = {
574
+ text: applyStyleRules(personaSmalltalkReply(activePersona)),
575
+ usedChunkIds: [],
576
+ hits: [],
577
+ telemetry: { path: "smalltalk", total_ms: Date.now() - startedAt },
578
+ };
579
+ input.onTelemetry?.(result.telemetry);
580
+ return result;
581
+ }
582
+
583
+ if (isBotPresenceQuestion(input.question)) {
584
+ const result: AnswerResult = {
585
+ text: applyStyleRules(botPresenceReply(activePersona)),
586
+ usedChunkIds: [],
587
+ hits: [],
588
+ telemetry: { path: "smalltalk", total_ms: Date.now() - startedAt },
589
+ };
590
+ input.onTelemetry?.(result.telemetry);
591
+ return result;
592
+ }
593
+
594
+ const factKey = isPersonalFactQuestion(input.question);
595
+ if (factKey) {
596
+ const factReply = personaFactReply(activePersona, factKey);
597
+ if (factReply) {
598
+ const result: AnswerResult = {
599
+ text: applyStyleRules(factReply),
600
+ usedChunkIds: [],
601
+ hits: [],
602
+ telemetry: { path: "persona_fact", total_ms: Date.now() - startedAt },
603
+ };
604
+ input.onTelemetry?.(result.telemetry);
605
+ return result;
606
+ }
607
+ }
608
+
609
+ const topK = input.topK ?? 5;
610
+ // RAG-поиск не критичен: если эмбеддер/векторный поиск недоступен (напр.
611
+ // 429 quota у провайдера эмбеддингов), не роняем весь ответ — отвечаем без
612
+ // KB-контекста (на чате + инструментах). База знаний для бота опциональна.
613
+ let retrieval: RetrievalResult;
614
+ try {
615
+ retrieval = await retrieveHits(input);
616
+ } catch (err) {
617
+ console.warn(
618
+ `[rag] retrieval failed → отвечаем без базы знаний: ${
619
+ err instanceof Error ? err.message : String(err)
620
+ }`,
621
+ );
622
+ retrieval = {
623
+ hits: [],
624
+ retrievalMs: 0,
625
+ searchQuery: input.question,
626
+ queries: [],
627
+ usedTopic: null,
628
+ };
629
+ }
630
+ const { hits, retrievalMs, searchQuery, queries, usedTopic } = retrieval;
631
+
632
+ const baseTelemetry: AnswerTelemetry = {
633
+ path: "ok",
634
+ retrieval_ms: retrievalMs,
635
+ top_distances: hits.map((h) => Math.round(h.distance * 10000) / 10000),
636
+ ...(input.hybridSearch ? { hybrid: true } : {}),
637
+ ...(input.topicRouting && usedTopic !== null ? { topic: usedTopic } : {}),
638
+ ...(searchQuery !== input.question
639
+ ? { original_query: input.question, rewritten_query: searchQuery }
640
+ : {}),
641
+ };
642
+
643
+ console.log(
644
+ `[rag] retrieval hits=${hits.length} queries=${queries.length} topic=${usedTopic ?? "global"} ms=${retrievalMs}`,
645
+ );
646
+ const result = await answerFromHits({ hits, baseTelemetry, startedAt, input, activePersona });
647
+ input.onTelemetry?.(result.telemetry);
648
+ return result;
649
+ }
650
+
651
+ /**
652
+ * Soft fallback reply for turns where RAG produced nothing groundable — no KB
653
+ * hit, or the fact-checker dropped the draft as ungrounded. Instead of going
654
+ * silent, the bot answers in its own persona voice, but is hard-constrained
655
+ * NOT to invent any specifics (salaries, dates, visa terms, cities, prices).
656
+ *
657
+ * Concrete questions get an honest "I'll clarify and come back"; general
658
+ * questions get a normal conversational answer. The caller is still expected
659
+ * to log the unanswered question (kb_suggestions) for a later precise reply.
660
+ */
661
+ export async function generateSoftFallback(input: {
662
+ question: string;
663
+ chat: ChatClient;
664
+ persona: Persona;
665
+ history?: ChatMessage[];
666
+ }): Promise<string> {
667
+ const { question, chat, persona, history } = input;
668
+ const who = persona.company?.trim()
669
+ ? `${persona.name} из «${persona.company.trim()}»`
670
+ : persona.name;
671
+
672
+ const systemPrompt = [
673
+ `Ты — ${who}. Ты переписываешься с кандидатом в мессенджере.`,
674
+ "",
675
+ "По вопросу кандидата у тебя СЕЙЧАС нет точных данных.",
676
+ "Ответь живо и по-человечески, своими словами, коротко — 1–3 предложения.",
677
+ "",
678
+ "Жёсткие правила:",
679
+ "- НЕЛЬЗЯ выдумывать конкретику: зарплаты, суммы, проценты, сроки, даты,",
680
+ " города, адреса, условия и стоимость визы, названия компаний, требования.",
681
+ " Никаких цифр и фактов, которых ты не знаешь наверняка.",
682
+ "- Если вопрос требует точных данных — честно скажи, что уточнишь этот",
683
+ " момент и вернёшься с ответом чуть позже.",
684
+ "- Если вопрос общий и конкретики не требует — просто ответь по смыслу.",
685
+ "- Не извиняйся длинно и формально. Тон тёплый и дружелюбный.",
686
+ "- Не упоминай «базу данных», «систему», не говори, что ты бот или ИИ.",
687
+ ].join("\n");
688
+
689
+ const messages: ChatMessage[] = [
690
+ { role: "system", content: systemPrompt },
691
+ ...(history ?? []),
692
+ { role: "user", content: question },
693
+ ];
694
+ const raw = await chat.complete(messages, { temperature: 0.5 });
695
+ return applyStyleRules(sanitizeLlmOutput(raw));
696
+ }