@nixxie-cms/ai-rag 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +23 -0
  2. package/README.md +163 -0
  3. package/dist/declarations/src/AiRagService.d.ts +50 -0
  4. package/dist/declarations/src/AiRagService.d.ts.map +1 -0
  5. package/dist/declarations/src/admin-page.d.ts +29 -0
  6. package/dist/declarations/src/admin-page.d.ts.map +1 -0
  7. package/dist/declarations/src/chunking.d.ts +8 -0
  8. package/dist/declarations/src/chunking.d.ts.map +1 -0
  9. package/dist/declarations/src/collection.d.ts +18 -0
  10. package/dist/declarations/src/collection.d.ts.map +1 -0
  11. package/dist/declarations/src/express.d.ts +36 -0
  12. package/dist/declarations/src/express.d.ts.map +1 -0
  13. package/dist/declarations/src/graphql.d.ts +23 -0
  14. package/dist/declarations/src/graphql.d.ts.map +1 -0
  15. package/dist/declarations/src/index.d.ts +39 -0
  16. package/dist/declarations/src/index.d.ts.map +1 -0
  17. package/dist/declarations/src/plugin.d.ts +53 -0
  18. package/dist/declarations/src/plugin.d.ts.map +1 -0
  19. package/dist/declarations/src/prompt.d.ts +14 -0
  20. package/dist/declarations/src/prompt.d.ts.map +1 -0
  21. package/dist/declarations/src/providers/AnthropicRagProvider.d.ts +16 -0
  22. package/dist/declarations/src/providers/AnthropicRagProvider.d.ts.map +1 -0
  23. package/dist/declarations/src/providers/GeminiRagProvider.d.ts +19 -0
  24. package/dist/declarations/src/providers/GeminiRagProvider.d.ts.map +1 -0
  25. package/dist/declarations/src/providers/OllamaRagProvider.d.ts +23 -0
  26. package/dist/declarations/src/providers/OllamaRagProvider.d.ts.map +1 -0
  27. package/dist/declarations/src/providers/OpenAiRagProvider.d.ts +17 -0
  28. package/dist/declarations/src/providers/OpenAiRagProvider.d.ts.map +1 -0
  29. package/dist/declarations/src/providers/ServiceRagProvider.d.ts +17 -0
  30. package/dist/declarations/src/providers/ServiceRagProvider.d.ts.map +1 -0
  31. package/dist/declarations/src/providers/index.d.ts +14 -0
  32. package/dist/declarations/src/providers/index.d.ts.map +1 -0
  33. package/dist/declarations/src/providers/types.d.ts +45 -0
  34. package/dist/declarations/src/providers/types.d.ts.map +1 -0
  35. package/dist/declarations/src/similarity.d.ts +12 -0
  36. package/dist/declarations/src/similarity.d.ts.map +1 -0
  37. package/dist/declarations/src/types.d.ts +319 -0
  38. package/dist/declarations/src/types.d.ts.map +1 -0
  39. package/dist/declarations/src/vector-store.d.ts +34 -0
  40. package/dist/declarations/src/vector-store.d.ts.map +1 -0
  41. package/dist/nixxie-cms-ai-rag.cjs.d.ts +2 -0
  42. package/dist/nixxie-cms-ai-rag.cjs.js +2507 -0
  43. package/dist/nixxie-cms-ai-rag.esm.js +2481 -0
  44. package/package.json +37 -0
  45. package/src/AiRagService.ts +640 -0
  46. package/src/admin-page.ts +135 -0
  47. package/src/chunking.ts +78 -0
  48. package/src/collection.ts +79 -0
  49. package/src/express.ts +212 -0
  50. package/src/graphql.ts +196 -0
  51. package/src/guard.ts +75 -0
  52. package/src/index.ts +102 -0
  53. package/src/plugin.ts +162 -0
  54. package/src/prompt.ts +62 -0
  55. package/src/providers/AnthropicRagProvider.ts +91 -0
  56. package/src/providers/GeminiRagProvider.ts +147 -0
  57. package/src/providers/OllamaRagProvider.ts +157 -0
  58. package/src/providers/OpenAiRagProvider.ts +108 -0
  59. package/src/providers/ServiceRagProvider.ts +44 -0
  60. package/src/providers/index.ts +67 -0
  61. package/src/providers/types.ts +44 -0
  62. package/src/semaphore.ts +26 -0
  63. package/src/similarity.ts +31 -0
  64. package/src/types.ts +346 -0
  65. package/src/vector-store.ts +136 -0
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@nixxie-cms/ai-rag",
3
+ "version": "1.0.0",
4
+ "license": "MIT",
5
+ "main": "dist/nixxie-cms-ai-rag.cjs.js",
6
+ "module": "dist/nixxie-cms-ai-rag.esm.js",
7
+ "exports": {
8
+ ".": {
9
+ "types": "./dist/nixxie-cms-ai-rag.cjs.js",
10
+ "module": "./dist/nixxie-cms-ai-rag.esm.js",
11
+ "default": "./dist/nixxie-cms-ai-rag.cjs.js"
12
+ },
13
+ "./package.json": "./package.json"
14
+ },
15
+ "dependencies": {
16
+ "@babel/runtime": "^7.24.7"
17
+ },
18
+ "devDependencies": {
19
+ "@nixxie-cms/core": "^1.1.0"
20
+ },
21
+ "peerDependencies": {
22
+ "@nixxie-cms/core": "^1.0.3"
23
+ },
24
+ "optionalDependencies": {
25
+ "@anthropic-ai/sdk": "^0.69.0",
26
+ "openai": "^6.0.0"
27
+ },
28
+ "preconstruct": {
29
+ "entrypoints": [
30
+ "index.ts"
31
+ ]
32
+ },
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "https://github.com/nixxiecms/nixxie/tree/main/packages/ai-rag"
36
+ }
37
+ }
@@ -0,0 +1,640 @@
1
+ import type {
2
+ NixxieAiMessage,
3
+ NixxieAiRagService,
4
+ NixxieContext,
5
+ NixxieRagAnswer,
6
+ NixxieRagAskOptions,
7
+ NixxieRagChunk,
8
+ NixxieRagCitation,
9
+ NixxieRagDocument,
10
+ NixxieRagDocumentInput,
11
+ NixxieRagDocumentQuery,
12
+ NixxieRagIndexStats,
13
+ NixxieRagRetrieveOptions,
14
+ NixxieRagStreamEvent,
15
+ } from '@nixxie-cms/core'
16
+ import { chunkText } from './chunking'
17
+ import { checkGrounding, resolveGuard, shouldRefuseForNoContext, type ResolvedGuard } from './guard'
18
+ import {
19
+ resolveEmbeddingProvider,
20
+ resolveGenerationProvider,
21
+ type EmbeddingProvider,
22
+ type GenerationProvider,
23
+ type RagGenerateOptions,
24
+ } from './providers'
25
+ import { ALLOW_KNOWLEDGE_SYSTEM_PROMPT, buildRagPrompt, DEFAULT_SYSTEM_PROMPT } from './prompt'
26
+ import { Semaphore } from './semaphore'
27
+ import type {
28
+ AiRagConfig,
29
+ RagGenerationConfig,
30
+ VectorRecord,
31
+ VectorStore,
32
+ } from './types'
33
+ import { SqlVectorStore } from './vector-store'
34
+
35
+ type Resolved = {
36
+ documentsCollection: string
37
+ chunksCollection: string
38
+ generation: Required<Pick<RagGenerationConfig, 'temperature' | 'maxTokens'>> & {
39
+ systemPrompt: string
40
+ topP?: number
41
+ model?: string
42
+ buildPrompt?: RagGenerationConfig['buildPrompt']
43
+ }
44
+ embeddingBatchSize: number
45
+ embeddingModel?: string
46
+ topK: number
47
+ minScore: number
48
+ maxContextChars: number
49
+ candidateMultiplier: number
50
+ chunkSize: number
51
+ chunkOverlap: number
52
+ chunkStrategy: 'recursive' | 'sentence' | 'fixed'
53
+ historyLimit: number
54
+ maxQueryChars: number
55
+ guard: ResolvedGuard
56
+ indexing: {
57
+ auto: boolean
58
+ onConnect: boolean
59
+ schedule?: string | number
60
+ concurrency: number
61
+ }
62
+ }
63
+
64
+ function snippet(text: string, max = 240): string {
65
+ const t = text.trim().replace(/\s+/g, ' ')
66
+ return t.length > max ? `${t.slice(0, max)}…` : t
67
+ }
68
+
69
+ /** The main RAG assistant. Create via `createAiRag()`; register via `ragPlugin()`. */
70
+ export class AiRagService implements NixxieAiRagService {
71
+ private documents: any | null = null
72
+ private generation: GenerationProvider
73
+ private embedder: EmbeddingProvider
74
+ private vectorStore: VectorStore
75
+ private rerank?: (query: string, chunks: NixxieRagChunk[]) => Promise<NixxieRagChunk[]>
76
+ private chatGate: Semaphore
77
+ private r: Resolved
78
+
79
+ constructor(config: AiRagConfig = {}) {
80
+ const documentsCollection = config.collections?.documents ?? 'KnowledgeBase'
81
+ const chunksCollection = config.collections?.chunks ?? 'KnowledgeChunk'
82
+
83
+ this.generation = resolveGenerationProvider(config.generation)
84
+ this.embedder = resolveEmbeddingProvider(config.embedding)
85
+ this.vectorStore = config.retrieval?.vectorStore ?? new SqlVectorStore(chunksCollection)
86
+ this.rerank = config.retrieval?.rerank
87
+
88
+ const guard = resolveGuard(config.guard)
89
+ const defaultSystem = guard.allowModelKnowledge
90
+ ? ALLOW_KNOWLEDGE_SYSTEM_PROMPT
91
+ : DEFAULT_SYSTEM_PROMPT
92
+
93
+ this.r = {
94
+ documentsCollection,
95
+ chunksCollection,
96
+ generation: {
97
+ temperature: config.generation?.temperature ?? 0.2,
98
+ maxTokens: config.generation?.maxTokens ?? 1024,
99
+ topP: config.generation?.topP,
100
+ model: config.generation?.model,
101
+ systemPrompt: config.generation?.systemPrompt ?? defaultSystem,
102
+ buildPrompt: config.generation?.buildPrompt,
103
+ },
104
+ embeddingBatchSize: config.embedding?.batchSize ?? 64,
105
+ embeddingModel: config.embedding?.model,
106
+ topK: config.retrieval?.topK ?? 5,
107
+ minScore: config.retrieval?.minScore ?? 0.2,
108
+ maxContextChars: config.retrieval?.maxContextChars ?? 6000,
109
+ candidateMultiplier: config.retrieval?.candidateMultiplier ?? 4,
110
+ chunkSize: config.chunking?.chunkSize ?? 1200,
111
+ chunkOverlap: config.chunking?.chunkOverlap ?? 200,
112
+ chunkStrategy: config.chunking?.strategy ?? 'recursive',
113
+ historyLimit: config.chat?.historyLimit ?? 10,
114
+ maxQueryChars: config.limits?.maxQueryChars ?? 8000,
115
+ guard,
116
+ indexing: {
117
+ auto: config.indexing?.auto ?? true,
118
+ onConnect: config.indexing?.onConnect ?? true,
119
+ schedule: config.indexing?.schedule,
120
+ concurrency: config.indexing?.concurrency ?? 4,
121
+ },
122
+ }
123
+ this.chatGate = new Semaphore(config.limits?.maxConcurrentChats ?? 8)
124
+ }
125
+
126
+ /** Names of the collections this service reads/writes (used by the plugin). */
127
+ get collections(): { documents: string; chunks: string } {
128
+ return { documents: this.r.documentsCollection, chunks: this.r.chunksCollection }
129
+ }
130
+
131
+ get indexingSchedule(): string | number | undefined {
132
+ return this.r.indexing.schedule
133
+ }
134
+
135
+ // ── Lifecycle ──
136
+
137
+ async init(context: NixxieContext): Promise<void> {
138
+ this.documents = this.requireDelegate(context, this.r.documentsCollection)
139
+ await this.vectorStore.init?.(context)
140
+ if (this.r.indexing.onConnect) {
141
+ // Index anything still pending without blocking boot on failures.
142
+ this.indexPending().catch(err =>
143
+ console.error('[@nixxie-cms/ai-rag] Initial indexing failed:', err)
144
+ )
145
+ }
146
+ }
147
+
148
+ private requireDelegate(context: NixxieContext, listKey: string): any {
149
+ const delegate = (context.prisma as any)?.[listKey[0].toLowerCase() + listKey.slice(1)]
150
+ if (!delegate) {
151
+ throw new Error(
152
+ `[@nixxie-cms/ai-rag] Collection "${listKey}" was not found in the Prisma client. ` +
153
+ `Register it via \`ragPlugin()\` (or add \`${listKey}: knowledgeBaseCollection()\`) and run a migration.`
154
+ )
155
+ }
156
+ return delegate
157
+ }
158
+
159
+ private requireDocuments(): any {
160
+ if (!this.documents) {
161
+ throw new Error(
162
+ '[@nixxie-cms/ai-rag] Not ready yet — the knowledge base is available once the database has connected.'
163
+ )
164
+ }
165
+ return this.documents
166
+ }
167
+
168
+ // ── Knowledge-base content ──
169
+
170
+ async addDocument(doc: NixxieRagDocumentInput): Promise<NixxieRagDocument> {
171
+ const [created] = await this.addDocuments([doc])
172
+ return created!
173
+ }
174
+
175
+ async addDocuments(docs: NixxieRagDocumentInput[]): Promise<NixxieRagDocument[]> {
176
+ const model = this.requireDocuments()
177
+ const out: NixxieRagDocument[] = []
178
+ for (const doc of docs) {
179
+ const row = await model.create({
180
+ data: {
181
+ title: doc.title ?? null,
182
+ content: doc.content,
183
+ source: doc.source ?? null,
184
+ tags: doc.tags ?? [],
185
+ metadata: doc.metadata ?? null,
186
+ status: 'pending',
187
+ chunkCount: 0,
188
+ },
189
+ })
190
+ out.push(rowToDocument(row))
191
+ }
192
+ if (this.r.indexing.auto) {
193
+ for (const d of out) {
194
+ try {
195
+ await this.index(d.id)
196
+ } catch (err) {
197
+ console.error(`[@nixxie-cms/ai-rag] Failed to index document ${d.id}:`, err)
198
+ }
199
+ }
200
+ // Re-read to reflect post-index status.
201
+ const refreshed = await Promise.all(out.map(d => this.getDocument(d.id)))
202
+ return refreshed.map((d, i) => d ?? out[i]!)
203
+ }
204
+ return out
205
+ }
206
+
207
+ async getDocument(id: string): Promise<NixxieRagDocument | undefined> {
208
+ const row = await this.requireDocuments().findUnique({ where: { id } })
209
+ return row ? rowToDocument(row) : undefined
210
+ }
211
+
212
+ async listDocuments(query: NixxieRagDocumentQuery = {}): Promise<NixxieRagDocument[]> {
213
+ const where: any = {}
214
+ if (query.status) where.status = { equals: query.status }
215
+ if (query.search) {
216
+ where.OR = [
217
+ { title: { contains: query.search } },
218
+ { content: { contains: query.search } },
219
+ { source: { contains: query.search } },
220
+ ]
221
+ }
222
+ const rows: any[] = await this.requireDocuments().findMany({
223
+ where,
224
+ orderBy: { createdAt: 'desc' },
225
+ skip: query.skip ?? 0,
226
+ ...(query.take !== undefined ? { take: query.take } : {}),
227
+ })
228
+ let docs = rows.map(rowToDocument)
229
+ // Tag filtering is done in Node for cross-database portability (tags are stored as JSON).
230
+ if (query.tags && query.tags.length) {
231
+ docs = docs.filter(d => query.tags!.every(t => (d.tags ?? []).includes(t)))
232
+ }
233
+ return docs
234
+ }
235
+
236
+ async updateDocument(
237
+ id: string,
238
+ patch: Partial<NixxieRagDocumentInput>
239
+ ): Promise<NixxieRagDocument> {
240
+ const model = this.requireDocuments()
241
+ const data: any = { status: 'pending' }
242
+ if (patch.title !== undefined) data.title = patch.title
243
+ if (patch.content !== undefined) data.content = patch.content
244
+ if (patch.source !== undefined) data.source = patch.source
245
+ if (patch.tags !== undefined) data.tags = patch.tags
246
+ if (patch.metadata !== undefined) data.metadata = patch.metadata
247
+ await model.update({ where: { id }, data })
248
+ if (this.r.indexing.auto) {
249
+ try {
250
+ await this.index(id)
251
+ } catch (err) {
252
+ console.error(`[@nixxie-cms/ai-rag] Failed to re-index document ${id}:`, err)
253
+ }
254
+ }
255
+ const doc = await this.getDocument(id)
256
+ if (!doc) throw new Error(`[@nixxie-cms/ai-rag] Document not found after update: ${id}`)
257
+ return doc
258
+ }
259
+
260
+ async removeDocument(id: string): Promise<void> {
261
+ await this.vectorStore.deleteByDocument(id)
262
+ await this.requireDocuments().delete({ where: { id } })
263
+ }
264
+
265
+ /**
266
+ * Delete a document's indexed chunks without touching the document row. Used by the
267
+ * auto-index delete hook, where the KB row has already been removed by the CMS.
268
+ */
269
+ async purgeChunks(documentId: string): Promise<void> {
270
+ await this.vectorStore.deleteByDocument(documentId)
271
+ }
272
+
273
+ // ── Indexing ──
274
+
275
+ async index(documentId: string): Promise<void> {
276
+ const model = this.requireDocuments()
277
+ const row = await model.findUnique({ where: { id: documentId } })
278
+ if (!row) throw new Error(`[@nixxie-cms/ai-rag] Document not found: ${documentId}`)
279
+ const doc = rowToDocument(row)
280
+ if (doc.status === 'disabled') {
281
+ await this.vectorStore.deleteByDocument(documentId)
282
+ await model.update({ where: { id: documentId }, data: { chunkCount: 0 } })
283
+ return
284
+ }
285
+
286
+ await model.update({ where: { id: documentId }, data: { status: 'indexing', error: null } })
287
+ try {
288
+ const text = [doc.title, doc.content].filter(Boolean).join('\n\n')
289
+ const pieces = chunkText(text, {
290
+ strategy: this.r.chunkStrategy,
291
+ chunkSize: this.r.chunkSize,
292
+ chunkOverlap: this.r.chunkOverlap,
293
+ })
294
+ const embeddings = await this.embedBatched(pieces)
295
+ const records: VectorRecord[] = pieces.map((content, i) => ({
296
+ id: `${documentId}:${i}`,
297
+ documentId,
298
+ content,
299
+ embedding: embeddings[i]!,
300
+ title: doc.title,
301
+ source: doc.source,
302
+ tags: doc.tags,
303
+ metadata: doc.metadata,
304
+ }))
305
+ await this.vectorStore.upsert(documentId, records)
306
+ await model.update({
307
+ where: { id: documentId },
308
+ data: {
309
+ status: 'indexed',
310
+ chunkCount: records.length,
311
+ error: null,
312
+ indexedAt: new Date(),
313
+ },
314
+ })
315
+ } catch (err) {
316
+ await model.update({
317
+ where: { id: documentId },
318
+ data: { status: 'error', error: err instanceof Error ? err.message : String(err) },
319
+ })
320
+ throw err
321
+ }
322
+ }
323
+
324
+ async reindex(options: { force?: boolean; tags?: string[] } = {}): Promise<NixxieRagIndexStats> {
325
+ const start = Date.now()
326
+ const docs = await this.listDocuments({ tags: options.tags })
327
+ const targets = options.force ? docs : docs.filter(d => d.status !== 'indexed')
328
+ let chunks = 0
329
+ let errors = 0
330
+ await mapWithConcurrency(targets, this.r.indexing.concurrency, async d => {
331
+ try {
332
+ await this.index(d.id)
333
+ const refreshed = await this.getDocument(d.id)
334
+ chunks += refreshed?.chunkCount ?? 0
335
+ } catch {
336
+ errors++
337
+ }
338
+ })
339
+ return { documents: targets.length, chunks, errors, durationMs: Date.now() - start }
340
+ }
341
+
342
+ async indexPending(): Promise<NixxieRagIndexStats> {
343
+ const start = Date.now()
344
+ const pending = (await this.listDocuments()).filter(
345
+ d => d.status === 'pending' || d.status === 'error'
346
+ )
347
+ let chunks = 0
348
+ let errors = 0
349
+ await mapWithConcurrency(pending, this.r.indexing.concurrency, async d => {
350
+ try {
351
+ await this.index(d.id)
352
+ const refreshed = await this.getDocument(d.id)
353
+ chunks += refreshed?.chunkCount ?? 0
354
+ } catch {
355
+ errors++
356
+ }
357
+ })
358
+ return { documents: pending.length, chunks, errors, durationMs: Date.now() - start }
359
+ }
360
+
361
+ private async embedBatched(texts: string[]): Promise<number[][]> {
362
+ if (texts.length === 0) return []
363
+ const out: number[][] = []
364
+ const size = this.r.embeddingBatchSize
365
+ for (let i = 0; i < texts.length; i += size) {
366
+ const batch = texts.slice(i, i + size)
367
+ out.push(...(await this.embedder.embed(batch, this.r.embeddingModel)))
368
+ }
369
+ return out
370
+ }
371
+
372
+ // ── Retrieval ──
373
+
374
+ async retrieve(query: string, options: NixxieRagRetrieveOptions = {}): Promise<NixxieRagChunk[]> {
375
+ const topK = options.topK ?? this.r.topK
376
+ const minScore = options.minScore ?? this.r.minScore
377
+ const [embedding] = await this.embedder.embed([query], this.r.embeddingModel)
378
+ if (!embedding) return []
379
+ const candidates = await this.vectorStore.query({
380
+ embedding,
381
+ topK: Math.max(topK, topK * this.r.candidateMultiplier),
382
+ tags: options.tags,
383
+ minScore,
384
+ })
385
+ let chunks: NixxieRagChunk[] = candidates.map(c => ({
386
+ id: c.id,
387
+ documentId: c.documentId,
388
+ title: c.title,
389
+ source: c.source,
390
+ content: c.content,
391
+ score: c.score,
392
+ tags: c.tags,
393
+ metadata: c.metadata,
394
+ }))
395
+ if (this.rerank) chunks = await this.rerank(query, chunks)
396
+ return chunks.slice(0, topK)
397
+ }
398
+
399
+ // ── Chat ──
400
+
401
+ async ask(question: string, options: NixxieRagAskOptions = {}): Promise<NixxieRagAnswer> {
402
+ return this.chat([{ role: 'user', content: question }], options)
403
+ }
404
+
405
+ async chat(messages: NixxieAiMessage[], options: NixxieRagAskOptions = {}): Promise<NixxieRagAnswer> {
406
+ const release = await this.chatGate.acquire()
407
+ try {
408
+ const prepared = await this.prepare(messages, options)
409
+ if (prepared.refusal) {
410
+ return {
411
+ text: prepared.refusal,
412
+ sources: [],
413
+ grounded: true,
414
+ refused: true,
415
+ model: this.generation.defaultModel,
416
+ }
417
+ }
418
+ const result = await this.generation.generate(prepared.messages, prepared.genOptions)
419
+ return this.finalize(result.text, result.model, result.usage, prepared.context, options)
420
+ } finally {
421
+ release()
422
+ }
423
+ }
424
+
425
+ async *stream(
426
+ messages: NixxieAiMessage[],
427
+ options: NixxieRagAskOptions = {}
428
+ ): AsyncIterable<NixxieRagStreamEvent> {
429
+ const release = await this.chatGate.acquire()
430
+ try {
431
+ const prepared = await this.prepare(messages, options)
432
+ if (prepared.refusal) {
433
+ const answer: NixxieRagAnswer = {
434
+ text: prepared.refusal,
435
+ sources: [],
436
+ grounded: true,
437
+ refused: true,
438
+ model: this.generation.defaultModel,
439
+ }
440
+ yield { type: 'token', token: prepared.refusal }
441
+ yield { type: 'done', answer }
442
+ return
443
+ }
444
+
445
+ yield { type: 'sources', sources: prepared.citations }
446
+
447
+ let text = ''
448
+ let model = this.generation.defaultModel
449
+ let usage: { inputTokens?: number; outputTokens?: number } | undefined
450
+ if (this.generation.stream) {
451
+ for await (const part of this.generation.stream(prepared.messages, prepared.genOptions)) {
452
+ if (part.delta) {
453
+ text += part.delta
454
+ yield { type: 'token', token: part.delta }
455
+ }
456
+ if (part.done) {
457
+ model = part.model ?? model
458
+ usage = part.usage
459
+ }
460
+ }
461
+ } else {
462
+ const result = await this.generation.generate(prepared.messages, prepared.genOptions)
463
+ text = result.text
464
+ model = result.model
465
+ usage = result.usage
466
+ yield { type: 'token', token: text }
467
+ }
468
+
469
+ const answer = await this.finalize(text, model, usage, prepared.context, options)
470
+ // If the grounding check rewrote the answer to a refusal, surface that token too.
471
+ if (answer.refused && answer.text !== text) yield { type: 'token', token: answer.text }
472
+ yield { type: 'done', answer }
473
+ } catch (err) {
474
+ yield { type: 'error', error: err instanceof Error ? err.message : String(err) }
475
+ } finally {
476
+ release()
477
+ }
478
+ }
479
+
480
+ /** Shared retrieve → guard → prompt pipeline for chat/stream. */
481
+ private async prepare(
482
+ messages: NixxieAiMessage[],
483
+ options: NixxieRagAskOptions
484
+ ): Promise<{
485
+ refusal?: string
486
+ context: NixxieRagChunk[]
487
+ citations: NixxieRagCitation[]
488
+ messages: { role: 'user' | 'assistant'; content: string }[]
489
+ genOptions: RagGenerateOptions
490
+ }> {
491
+ const turns = messages.filter(m => m.role !== 'system')
492
+ const lastUser = [...turns].reverse().find(m => m.role === 'user')
493
+ const question = lastUser?.content ?? ''
494
+ if (question.length > this.r.maxQueryChars) {
495
+ throw new Error(
496
+ `[@nixxie-cms/ai-rag] Query exceeds the ${this.r.maxQueryChars}-character limit.`
497
+ )
498
+ }
499
+
500
+ const guardOn = options.guard ?? this.r.guard.enabled
501
+ const minScore = options.minScore ?? this.r.minScore
502
+
503
+ const context =
504
+ options.context ??
505
+ (question ? await this.retrieve(question, options) : [])
506
+
507
+ if (guardOn && shouldRefuseForNoContext(context, this.r.guard, minScore)) {
508
+ return {
509
+ refusal: this.r.guard.refusal,
510
+ context: [],
511
+ citations: [],
512
+ messages: [],
513
+ genOptions: {},
514
+ }
515
+ }
516
+
517
+ const history = turns
518
+ .slice(0, -1)
519
+ .slice(-this.r.historyLimit)
520
+ .map(m => ({ role: m.role as 'user' | 'assistant', content: m.content }))
521
+
522
+ const systemBase = options.systemSuffix
523
+ ? `${this.r.generation.systemPrompt}\n\n${options.systemSuffix}`
524
+ : this.r.generation.systemPrompt
525
+
526
+ const built = this.r.generation.buildPrompt
527
+ ? this.r.generation.buildPrompt({
528
+ question,
529
+ history,
530
+ context,
531
+ systemPrompt: systemBase,
532
+ requireCitations: guardOn && this.r.guard.requireCitations,
533
+ })
534
+ : buildRagPrompt(
535
+ {
536
+ question,
537
+ history,
538
+ context,
539
+ systemPrompt: systemBase,
540
+ requireCitations: guardOn && this.r.guard.requireCitations,
541
+ },
542
+ { maxContextChars: this.r.maxContextChars }
543
+ )
544
+
545
+ const genOptions: RagGenerateOptions = {
546
+ system: built.system,
547
+ model: options.model ?? this.r.generation.model,
548
+ temperature: options.temperature ?? this.r.generation.temperature,
549
+ maxTokens: options.maxTokens ?? this.r.generation.maxTokens,
550
+ topP: this.r.generation.topP,
551
+ }
552
+
553
+ return {
554
+ context,
555
+ citations: toCitations(context),
556
+ messages: built.messages,
557
+ genOptions,
558
+ }
559
+ }
560
+
561
+ /** Apply the grounding check and assemble the final answer with citations. */
562
+ private async finalize(
563
+ text: string,
564
+ model: string,
565
+ usage: { inputTokens?: number; outputTokens?: number } | undefined,
566
+ context: NixxieRagChunk[],
567
+ options: NixxieRagAskOptions
568
+ ): Promise<NixxieRagAnswer> {
569
+ const guardOn = options.guard ?? this.r.guard.enabled
570
+ let grounded = true
571
+ if (guardOn && this.r.guard.groundingCheck) {
572
+ const check = await checkGrounding(
573
+ this.generation,
574
+ text,
575
+ context,
576
+ this.r.guard.groundingModel ?? this.r.generation.model
577
+ )
578
+ grounded = check.grounded
579
+ if (!grounded && this.r.guard.refuseWhenUngrounded) {
580
+ return {
581
+ text: this.r.guard.refusal,
582
+ sources: [],
583
+ grounded: false,
584
+ refused: true,
585
+ model,
586
+ usage,
587
+ }
588
+ }
589
+ }
590
+ return { text, sources: toCitations(context), grounded, refused: false, model, usage }
591
+ }
592
+
593
+ async close(): Promise<void> {
594
+ // No long-lived resources to release; indexing runs are awaited by their callers.
595
+ }
596
+ }
597
+
598
+ function toCitations(chunks: NixxieRagChunk[]): NixxieRagCitation[] {
599
+ return chunks.map(c => ({
600
+ documentId: c.documentId,
601
+ chunkId: c.id,
602
+ title: c.title,
603
+ source: c.source,
604
+ score: c.score,
605
+ snippet: snippet(c.content),
606
+ }))
607
+ }
608
+
609
+ function rowToDocument(row: any): NixxieRagDocument {
610
+ return {
611
+ id: String(row.id),
612
+ title: row.title ?? undefined,
613
+ content: row.content ?? '',
614
+ source: row.source ?? undefined,
615
+ tags: Array.isArray(row.tags) ? row.tags : (row.tags ?? undefined),
616
+ metadata: (row.metadata as Record<string, unknown>) ?? undefined,
617
+ status: row.status ?? 'pending',
618
+ chunkCount: row.chunkCount ?? 0,
619
+ error: row.error ?? undefined,
620
+ createdAt: row.createdAt ?? new Date(),
621
+ updatedAt: row.updatedAt ?? row.createdAt ?? new Date(),
622
+ indexedAt: row.indexedAt ?? undefined,
623
+ }
624
+ }
625
+
626
+ /** Run `fn` over items with at most `limit` in flight at once. */
627
+ async function mapWithConcurrency<T>(
628
+ items: T[],
629
+ limit: number,
630
+ fn: (item: T) => Promise<void>
631
+ ): Promise<void> {
632
+ const queue = [...items]
633
+ const workers = Array.from({ length: Math.max(1, Math.min(limit, items.length)) }, async () => {
634
+ while (queue.length) {
635
+ const item = queue.shift()!
636
+ await fn(item)
637
+ }
638
+ })
639
+ await Promise.all(workers)
640
+ }