@nixxie-cms/ai-rag 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +163 -0
- package/dist/declarations/src/AiRagService.d.ts +50 -0
- package/dist/declarations/src/AiRagService.d.ts.map +1 -0
- package/dist/declarations/src/admin-page.d.ts +29 -0
- package/dist/declarations/src/admin-page.d.ts.map +1 -0
- package/dist/declarations/src/chunking.d.ts +8 -0
- package/dist/declarations/src/chunking.d.ts.map +1 -0
- package/dist/declarations/src/collection.d.ts +18 -0
- package/dist/declarations/src/collection.d.ts.map +1 -0
- package/dist/declarations/src/express.d.ts +36 -0
- package/dist/declarations/src/express.d.ts.map +1 -0
- package/dist/declarations/src/graphql.d.ts +23 -0
- package/dist/declarations/src/graphql.d.ts.map +1 -0
- package/dist/declarations/src/index.d.ts +39 -0
- package/dist/declarations/src/index.d.ts.map +1 -0
- package/dist/declarations/src/plugin.d.ts +53 -0
- package/dist/declarations/src/plugin.d.ts.map +1 -0
- package/dist/declarations/src/prompt.d.ts +14 -0
- package/dist/declarations/src/prompt.d.ts.map +1 -0
- package/dist/declarations/src/providers/AnthropicRagProvider.d.ts +16 -0
- package/dist/declarations/src/providers/AnthropicRagProvider.d.ts.map +1 -0
- package/dist/declarations/src/providers/GeminiRagProvider.d.ts +19 -0
- package/dist/declarations/src/providers/GeminiRagProvider.d.ts.map +1 -0
- package/dist/declarations/src/providers/OllamaRagProvider.d.ts +23 -0
- package/dist/declarations/src/providers/OllamaRagProvider.d.ts.map +1 -0
- package/dist/declarations/src/providers/OpenAiRagProvider.d.ts +17 -0
- package/dist/declarations/src/providers/OpenAiRagProvider.d.ts.map +1 -0
- package/dist/declarations/src/providers/ServiceRagProvider.d.ts +17 -0
- package/dist/declarations/src/providers/ServiceRagProvider.d.ts.map +1 -0
- package/dist/declarations/src/providers/index.d.ts +14 -0
- package/dist/declarations/src/providers/index.d.ts.map +1 -0
- package/dist/declarations/src/providers/types.d.ts +45 -0
- package/dist/declarations/src/providers/types.d.ts.map +1 -0
- package/dist/declarations/src/similarity.d.ts +12 -0
- package/dist/declarations/src/similarity.d.ts.map +1 -0
- package/dist/declarations/src/types.d.ts +319 -0
- package/dist/declarations/src/types.d.ts.map +1 -0
- package/dist/declarations/src/vector-store.d.ts +34 -0
- package/dist/declarations/src/vector-store.d.ts.map +1 -0
- package/dist/nixxie-cms-ai-rag.cjs.d.ts +2 -0
- package/dist/nixxie-cms-ai-rag.cjs.js +2507 -0
- package/dist/nixxie-cms-ai-rag.esm.js +2481 -0
- package/package.json +37 -0
- package/src/AiRagService.ts +640 -0
- package/src/admin-page.ts +135 -0
- package/src/chunking.ts +78 -0
- package/src/collection.ts +79 -0
- package/src/express.ts +212 -0
- package/src/graphql.ts +196 -0
- package/src/guard.ts +75 -0
- package/src/index.ts +102 -0
- package/src/plugin.ts +162 -0
- package/src/prompt.ts +62 -0
- package/src/providers/AnthropicRagProvider.ts +91 -0
- package/src/providers/GeminiRagProvider.ts +147 -0
- package/src/providers/OllamaRagProvider.ts +157 -0
- package/src/providers/OpenAiRagProvider.ts +108 -0
- package/src/providers/ServiceRagProvider.ts +44 -0
- package/src/providers/index.ts +67 -0
- package/src/providers/types.ts +44 -0
- package/src/semaphore.ts +26 -0
- package/src/similarity.ts +31 -0
- package/src/types.ts +346 -0
- package/src/vector-store.ts +136 -0
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nixxie-cms/ai-rag",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"main": "dist/nixxie-cms-ai-rag.cjs.js",
|
|
6
|
+
"module": "dist/nixxie-cms-ai-rag.esm.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"types": "./dist/nixxie-cms-ai-rag.cjs.js",
|
|
10
|
+
"module": "./dist/nixxie-cms-ai-rag.esm.js",
|
|
11
|
+
"default": "./dist/nixxie-cms-ai-rag.cjs.js"
|
|
12
|
+
},
|
|
13
|
+
"./package.json": "./package.json"
|
|
14
|
+
},
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"@babel/runtime": "^7.24.7"
|
|
17
|
+
},
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"@nixxie-cms/core": "^1.1.0"
|
|
20
|
+
},
|
|
21
|
+
"peerDependencies": {
|
|
22
|
+
"@nixxie-cms/core": "^1.0.3"
|
|
23
|
+
},
|
|
24
|
+
"optionalDependencies": {
|
|
25
|
+
"@anthropic-ai/sdk": "^0.69.0",
|
|
26
|
+
"openai": "^6.0.0"
|
|
27
|
+
},
|
|
28
|
+
"preconstruct": {
|
|
29
|
+
"entrypoints": [
|
|
30
|
+
"index.ts"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"repository": {
|
|
34
|
+
"type": "git",
|
|
35
|
+
"url": "https://github.com/nixxiecms/nixxie/tree/main/packages/ai-rag"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
NixxieAiMessage,
|
|
3
|
+
NixxieAiRagService,
|
|
4
|
+
NixxieContext,
|
|
5
|
+
NixxieRagAnswer,
|
|
6
|
+
NixxieRagAskOptions,
|
|
7
|
+
NixxieRagChunk,
|
|
8
|
+
NixxieRagCitation,
|
|
9
|
+
NixxieRagDocument,
|
|
10
|
+
NixxieRagDocumentInput,
|
|
11
|
+
NixxieRagDocumentQuery,
|
|
12
|
+
NixxieRagIndexStats,
|
|
13
|
+
NixxieRagRetrieveOptions,
|
|
14
|
+
NixxieRagStreamEvent,
|
|
15
|
+
} from '@nixxie-cms/core'
|
|
16
|
+
import { chunkText } from './chunking'
|
|
17
|
+
import { checkGrounding, resolveGuard, shouldRefuseForNoContext, type ResolvedGuard } from './guard'
|
|
18
|
+
import {
|
|
19
|
+
resolveEmbeddingProvider,
|
|
20
|
+
resolveGenerationProvider,
|
|
21
|
+
type EmbeddingProvider,
|
|
22
|
+
type GenerationProvider,
|
|
23
|
+
type RagGenerateOptions,
|
|
24
|
+
} from './providers'
|
|
25
|
+
import { ALLOW_KNOWLEDGE_SYSTEM_PROMPT, buildRagPrompt, DEFAULT_SYSTEM_PROMPT } from './prompt'
|
|
26
|
+
import { Semaphore } from './semaphore'
|
|
27
|
+
import type {
|
|
28
|
+
AiRagConfig,
|
|
29
|
+
RagGenerationConfig,
|
|
30
|
+
VectorRecord,
|
|
31
|
+
VectorStore,
|
|
32
|
+
} from './types'
|
|
33
|
+
import { SqlVectorStore } from './vector-store'
|
|
34
|
+
|
|
35
|
+
type Resolved = {
|
|
36
|
+
documentsCollection: string
|
|
37
|
+
chunksCollection: string
|
|
38
|
+
generation: Required<Pick<RagGenerationConfig, 'temperature' | 'maxTokens'>> & {
|
|
39
|
+
systemPrompt: string
|
|
40
|
+
topP?: number
|
|
41
|
+
model?: string
|
|
42
|
+
buildPrompt?: RagGenerationConfig['buildPrompt']
|
|
43
|
+
}
|
|
44
|
+
embeddingBatchSize: number
|
|
45
|
+
embeddingModel?: string
|
|
46
|
+
topK: number
|
|
47
|
+
minScore: number
|
|
48
|
+
maxContextChars: number
|
|
49
|
+
candidateMultiplier: number
|
|
50
|
+
chunkSize: number
|
|
51
|
+
chunkOverlap: number
|
|
52
|
+
chunkStrategy: 'recursive' | 'sentence' | 'fixed'
|
|
53
|
+
historyLimit: number
|
|
54
|
+
maxQueryChars: number
|
|
55
|
+
guard: ResolvedGuard
|
|
56
|
+
indexing: {
|
|
57
|
+
auto: boolean
|
|
58
|
+
onConnect: boolean
|
|
59
|
+
schedule?: string | number
|
|
60
|
+
concurrency: number
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function snippet(text: string, max = 240): string {
|
|
65
|
+
const t = text.trim().replace(/\s+/g, ' ')
|
|
66
|
+
return t.length > max ? `${t.slice(0, max)}…` : t
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** The main RAG assistant. Create via `createAiRag()`; register via `ragPlugin()`. */
|
|
70
|
+
export class AiRagService implements NixxieAiRagService {
|
|
71
|
+
private documents: any | null = null
|
|
72
|
+
private generation: GenerationProvider
|
|
73
|
+
private embedder: EmbeddingProvider
|
|
74
|
+
private vectorStore: VectorStore
|
|
75
|
+
private rerank?: (query: string, chunks: NixxieRagChunk[]) => Promise<NixxieRagChunk[]>
|
|
76
|
+
private chatGate: Semaphore
|
|
77
|
+
private r: Resolved
|
|
78
|
+
|
|
79
|
+
constructor(config: AiRagConfig = {}) {
|
|
80
|
+
const documentsCollection = config.collections?.documents ?? 'KnowledgeBase'
|
|
81
|
+
const chunksCollection = config.collections?.chunks ?? 'KnowledgeChunk'
|
|
82
|
+
|
|
83
|
+
this.generation = resolveGenerationProvider(config.generation)
|
|
84
|
+
this.embedder = resolveEmbeddingProvider(config.embedding)
|
|
85
|
+
this.vectorStore = config.retrieval?.vectorStore ?? new SqlVectorStore(chunksCollection)
|
|
86
|
+
this.rerank = config.retrieval?.rerank
|
|
87
|
+
|
|
88
|
+
const guard = resolveGuard(config.guard)
|
|
89
|
+
const defaultSystem = guard.allowModelKnowledge
|
|
90
|
+
? ALLOW_KNOWLEDGE_SYSTEM_PROMPT
|
|
91
|
+
: DEFAULT_SYSTEM_PROMPT
|
|
92
|
+
|
|
93
|
+
this.r = {
|
|
94
|
+
documentsCollection,
|
|
95
|
+
chunksCollection,
|
|
96
|
+
generation: {
|
|
97
|
+
temperature: config.generation?.temperature ?? 0.2,
|
|
98
|
+
maxTokens: config.generation?.maxTokens ?? 1024,
|
|
99
|
+
topP: config.generation?.topP,
|
|
100
|
+
model: config.generation?.model,
|
|
101
|
+
systemPrompt: config.generation?.systemPrompt ?? defaultSystem,
|
|
102
|
+
buildPrompt: config.generation?.buildPrompt,
|
|
103
|
+
},
|
|
104
|
+
embeddingBatchSize: config.embedding?.batchSize ?? 64,
|
|
105
|
+
embeddingModel: config.embedding?.model,
|
|
106
|
+
topK: config.retrieval?.topK ?? 5,
|
|
107
|
+
minScore: config.retrieval?.minScore ?? 0.2,
|
|
108
|
+
maxContextChars: config.retrieval?.maxContextChars ?? 6000,
|
|
109
|
+
candidateMultiplier: config.retrieval?.candidateMultiplier ?? 4,
|
|
110
|
+
chunkSize: config.chunking?.chunkSize ?? 1200,
|
|
111
|
+
chunkOverlap: config.chunking?.chunkOverlap ?? 200,
|
|
112
|
+
chunkStrategy: config.chunking?.strategy ?? 'recursive',
|
|
113
|
+
historyLimit: config.chat?.historyLimit ?? 10,
|
|
114
|
+
maxQueryChars: config.limits?.maxQueryChars ?? 8000,
|
|
115
|
+
guard,
|
|
116
|
+
indexing: {
|
|
117
|
+
auto: config.indexing?.auto ?? true,
|
|
118
|
+
onConnect: config.indexing?.onConnect ?? true,
|
|
119
|
+
schedule: config.indexing?.schedule,
|
|
120
|
+
concurrency: config.indexing?.concurrency ?? 4,
|
|
121
|
+
},
|
|
122
|
+
}
|
|
123
|
+
this.chatGate = new Semaphore(config.limits?.maxConcurrentChats ?? 8)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/** Names of the collections this service reads/writes (used by the plugin). */
|
|
127
|
+
get collections(): { documents: string; chunks: string } {
|
|
128
|
+
return { documents: this.r.documentsCollection, chunks: this.r.chunksCollection }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
get indexingSchedule(): string | number | undefined {
|
|
132
|
+
return this.r.indexing.schedule
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ── Lifecycle ──
|
|
136
|
+
|
|
137
|
+
async init(context: NixxieContext): Promise<void> {
|
|
138
|
+
this.documents = this.requireDelegate(context, this.r.documentsCollection)
|
|
139
|
+
await this.vectorStore.init?.(context)
|
|
140
|
+
if (this.r.indexing.onConnect) {
|
|
141
|
+
// Index anything still pending without blocking boot on failures.
|
|
142
|
+
this.indexPending().catch(err =>
|
|
143
|
+
console.error('[@nixxie-cms/ai-rag] Initial indexing failed:', err)
|
|
144
|
+
)
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private requireDelegate(context: NixxieContext, listKey: string): any {
|
|
149
|
+
const delegate = (context.prisma as any)?.[listKey[0].toLowerCase() + listKey.slice(1)]
|
|
150
|
+
if (!delegate) {
|
|
151
|
+
throw new Error(
|
|
152
|
+
`[@nixxie-cms/ai-rag] Collection "${listKey}" was not found in the Prisma client. ` +
|
|
153
|
+
`Register it via \`ragPlugin()\` (or add \`${listKey}: knowledgeBaseCollection()\`) and run a migration.`
|
|
154
|
+
)
|
|
155
|
+
}
|
|
156
|
+
return delegate
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
private requireDocuments(): any {
|
|
160
|
+
if (!this.documents) {
|
|
161
|
+
throw new Error(
|
|
162
|
+
'[@nixxie-cms/ai-rag] Not ready yet — the knowledge base is available once the database has connected.'
|
|
163
|
+
)
|
|
164
|
+
}
|
|
165
|
+
return this.documents
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// ── Knowledge-base content ──
|
|
169
|
+
|
|
170
|
+
async addDocument(doc: NixxieRagDocumentInput): Promise<NixxieRagDocument> {
|
|
171
|
+
const [created] = await this.addDocuments([doc])
|
|
172
|
+
return created!
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async addDocuments(docs: NixxieRagDocumentInput[]): Promise<NixxieRagDocument[]> {
|
|
176
|
+
const model = this.requireDocuments()
|
|
177
|
+
const out: NixxieRagDocument[] = []
|
|
178
|
+
for (const doc of docs) {
|
|
179
|
+
const row = await model.create({
|
|
180
|
+
data: {
|
|
181
|
+
title: doc.title ?? null,
|
|
182
|
+
content: doc.content,
|
|
183
|
+
source: doc.source ?? null,
|
|
184
|
+
tags: doc.tags ?? [],
|
|
185
|
+
metadata: doc.metadata ?? null,
|
|
186
|
+
status: 'pending',
|
|
187
|
+
chunkCount: 0,
|
|
188
|
+
},
|
|
189
|
+
})
|
|
190
|
+
out.push(rowToDocument(row))
|
|
191
|
+
}
|
|
192
|
+
if (this.r.indexing.auto) {
|
|
193
|
+
for (const d of out) {
|
|
194
|
+
try {
|
|
195
|
+
await this.index(d.id)
|
|
196
|
+
} catch (err) {
|
|
197
|
+
console.error(`[@nixxie-cms/ai-rag] Failed to index document ${d.id}:`, err)
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// Re-read to reflect post-index status.
|
|
201
|
+
const refreshed = await Promise.all(out.map(d => this.getDocument(d.id)))
|
|
202
|
+
return refreshed.map((d, i) => d ?? out[i]!)
|
|
203
|
+
}
|
|
204
|
+
return out
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async getDocument(id: string): Promise<NixxieRagDocument | undefined> {
|
|
208
|
+
const row = await this.requireDocuments().findUnique({ where: { id } })
|
|
209
|
+
return row ? rowToDocument(row) : undefined
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async listDocuments(query: NixxieRagDocumentQuery = {}): Promise<NixxieRagDocument[]> {
|
|
213
|
+
const where: any = {}
|
|
214
|
+
if (query.status) where.status = { equals: query.status }
|
|
215
|
+
if (query.search) {
|
|
216
|
+
where.OR = [
|
|
217
|
+
{ title: { contains: query.search } },
|
|
218
|
+
{ content: { contains: query.search } },
|
|
219
|
+
{ source: { contains: query.search } },
|
|
220
|
+
]
|
|
221
|
+
}
|
|
222
|
+
const rows: any[] = await this.requireDocuments().findMany({
|
|
223
|
+
where,
|
|
224
|
+
orderBy: { createdAt: 'desc' },
|
|
225
|
+
skip: query.skip ?? 0,
|
|
226
|
+
...(query.take !== undefined ? { take: query.take } : {}),
|
|
227
|
+
})
|
|
228
|
+
let docs = rows.map(rowToDocument)
|
|
229
|
+
// Tag filtering is done in Node for cross-database portability (tags are stored as JSON).
|
|
230
|
+
if (query.tags && query.tags.length) {
|
|
231
|
+
docs = docs.filter(d => query.tags!.every(t => (d.tags ?? []).includes(t)))
|
|
232
|
+
}
|
|
233
|
+
return docs
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async updateDocument(
|
|
237
|
+
id: string,
|
|
238
|
+
patch: Partial<NixxieRagDocumentInput>
|
|
239
|
+
): Promise<NixxieRagDocument> {
|
|
240
|
+
const model = this.requireDocuments()
|
|
241
|
+
const data: any = { status: 'pending' }
|
|
242
|
+
if (patch.title !== undefined) data.title = patch.title
|
|
243
|
+
if (patch.content !== undefined) data.content = patch.content
|
|
244
|
+
if (patch.source !== undefined) data.source = patch.source
|
|
245
|
+
if (patch.tags !== undefined) data.tags = patch.tags
|
|
246
|
+
if (patch.metadata !== undefined) data.metadata = patch.metadata
|
|
247
|
+
await model.update({ where: { id }, data })
|
|
248
|
+
if (this.r.indexing.auto) {
|
|
249
|
+
try {
|
|
250
|
+
await this.index(id)
|
|
251
|
+
} catch (err) {
|
|
252
|
+
console.error(`[@nixxie-cms/ai-rag] Failed to re-index document ${id}:`, err)
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
const doc = await this.getDocument(id)
|
|
256
|
+
if (!doc) throw new Error(`[@nixxie-cms/ai-rag] Document not found after update: ${id}`)
|
|
257
|
+
return doc
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async removeDocument(id: string): Promise<void> {
|
|
261
|
+
await this.vectorStore.deleteByDocument(id)
|
|
262
|
+
await this.requireDocuments().delete({ where: { id } })
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Delete a document's indexed chunks without touching the document row. Used by the
|
|
267
|
+
* auto-index delete hook, where the KB row has already been removed by the CMS.
|
|
268
|
+
*/
|
|
269
|
+
async purgeChunks(documentId: string): Promise<void> {
|
|
270
|
+
await this.vectorStore.deleteByDocument(documentId)
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ── Indexing ──
|
|
274
|
+
|
|
275
|
+
async index(documentId: string): Promise<void> {
|
|
276
|
+
const model = this.requireDocuments()
|
|
277
|
+
const row = await model.findUnique({ where: { id: documentId } })
|
|
278
|
+
if (!row) throw new Error(`[@nixxie-cms/ai-rag] Document not found: ${documentId}`)
|
|
279
|
+
const doc = rowToDocument(row)
|
|
280
|
+
if (doc.status === 'disabled') {
|
|
281
|
+
await this.vectorStore.deleteByDocument(documentId)
|
|
282
|
+
await model.update({ where: { id: documentId }, data: { chunkCount: 0 } })
|
|
283
|
+
return
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
await model.update({ where: { id: documentId }, data: { status: 'indexing', error: null } })
|
|
287
|
+
try {
|
|
288
|
+
const text = [doc.title, doc.content].filter(Boolean).join('\n\n')
|
|
289
|
+
const pieces = chunkText(text, {
|
|
290
|
+
strategy: this.r.chunkStrategy,
|
|
291
|
+
chunkSize: this.r.chunkSize,
|
|
292
|
+
chunkOverlap: this.r.chunkOverlap,
|
|
293
|
+
})
|
|
294
|
+
const embeddings = await this.embedBatched(pieces)
|
|
295
|
+
const records: VectorRecord[] = pieces.map((content, i) => ({
|
|
296
|
+
id: `${documentId}:${i}`,
|
|
297
|
+
documentId,
|
|
298
|
+
content,
|
|
299
|
+
embedding: embeddings[i]!,
|
|
300
|
+
title: doc.title,
|
|
301
|
+
source: doc.source,
|
|
302
|
+
tags: doc.tags,
|
|
303
|
+
metadata: doc.metadata,
|
|
304
|
+
}))
|
|
305
|
+
await this.vectorStore.upsert(documentId, records)
|
|
306
|
+
await model.update({
|
|
307
|
+
where: { id: documentId },
|
|
308
|
+
data: {
|
|
309
|
+
status: 'indexed',
|
|
310
|
+
chunkCount: records.length,
|
|
311
|
+
error: null,
|
|
312
|
+
indexedAt: new Date(),
|
|
313
|
+
},
|
|
314
|
+
})
|
|
315
|
+
} catch (err) {
|
|
316
|
+
await model.update({
|
|
317
|
+
where: { id: documentId },
|
|
318
|
+
data: { status: 'error', error: err instanceof Error ? err.message : String(err) },
|
|
319
|
+
})
|
|
320
|
+
throw err
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
async reindex(options: { force?: boolean; tags?: string[] } = {}): Promise<NixxieRagIndexStats> {
|
|
325
|
+
const start = Date.now()
|
|
326
|
+
const docs = await this.listDocuments({ tags: options.tags })
|
|
327
|
+
const targets = options.force ? docs : docs.filter(d => d.status !== 'indexed')
|
|
328
|
+
let chunks = 0
|
|
329
|
+
let errors = 0
|
|
330
|
+
await mapWithConcurrency(targets, this.r.indexing.concurrency, async d => {
|
|
331
|
+
try {
|
|
332
|
+
await this.index(d.id)
|
|
333
|
+
const refreshed = await this.getDocument(d.id)
|
|
334
|
+
chunks += refreshed?.chunkCount ?? 0
|
|
335
|
+
} catch {
|
|
336
|
+
errors++
|
|
337
|
+
}
|
|
338
|
+
})
|
|
339
|
+
return { documents: targets.length, chunks, errors, durationMs: Date.now() - start }
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
async indexPending(): Promise<NixxieRagIndexStats> {
|
|
343
|
+
const start = Date.now()
|
|
344
|
+
const pending = (await this.listDocuments()).filter(
|
|
345
|
+
d => d.status === 'pending' || d.status === 'error'
|
|
346
|
+
)
|
|
347
|
+
let chunks = 0
|
|
348
|
+
let errors = 0
|
|
349
|
+
await mapWithConcurrency(pending, this.r.indexing.concurrency, async d => {
|
|
350
|
+
try {
|
|
351
|
+
await this.index(d.id)
|
|
352
|
+
const refreshed = await this.getDocument(d.id)
|
|
353
|
+
chunks += refreshed?.chunkCount ?? 0
|
|
354
|
+
} catch {
|
|
355
|
+
errors++
|
|
356
|
+
}
|
|
357
|
+
})
|
|
358
|
+
return { documents: pending.length, chunks, errors, durationMs: Date.now() - start }
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
private async embedBatched(texts: string[]): Promise<number[][]> {
|
|
362
|
+
if (texts.length === 0) return []
|
|
363
|
+
const out: number[][] = []
|
|
364
|
+
const size = this.r.embeddingBatchSize
|
|
365
|
+
for (let i = 0; i < texts.length; i += size) {
|
|
366
|
+
const batch = texts.slice(i, i + size)
|
|
367
|
+
out.push(...(await this.embedder.embed(batch, this.r.embeddingModel)))
|
|
368
|
+
}
|
|
369
|
+
return out
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// ── Retrieval ──
|
|
373
|
+
|
|
374
|
+
async retrieve(query: string, options: NixxieRagRetrieveOptions = {}): Promise<NixxieRagChunk[]> {
|
|
375
|
+
const topK = options.topK ?? this.r.topK
|
|
376
|
+
const minScore = options.minScore ?? this.r.minScore
|
|
377
|
+
const [embedding] = await this.embedder.embed([query], this.r.embeddingModel)
|
|
378
|
+
if (!embedding) return []
|
|
379
|
+
const candidates = await this.vectorStore.query({
|
|
380
|
+
embedding,
|
|
381
|
+
topK: Math.max(topK, topK * this.r.candidateMultiplier),
|
|
382
|
+
tags: options.tags,
|
|
383
|
+
minScore,
|
|
384
|
+
})
|
|
385
|
+
let chunks: NixxieRagChunk[] = candidates.map(c => ({
|
|
386
|
+
id: c.id,
|
|
387
|
+
documentId: c.documentId,
|
|
388
|
+
title: c.title,
|
|
389
|
+
source: c.source,
|
|
390
|
+
content: c.content,
|
|
391
|
+
score: c.score,
|
|
392
|
+
tags: c.tags,
|
|
393
|
+
metadata: c.metadata,
|
|
394
|
+
}))
|
|
395
|
+
if (this.rerank) chunks = await this.rerank(query, chunks)
|
|
396
|
+
return chunks.slice(0, topK)
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// ── Chat ──
|
|
400
|
+
|
|
401
|
+
async ask(question: string, options: NixxieRagAskOptions = {}): Promise<NixxieRagAnswer> {
|
|
402
|
+
return this.chat([{ role: 'user', content: question }], options)
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
async chat(messages: NixxieAiMessage[], options: NixxieRagAskOptions = {}): Promise<NixxieRagAnswer> {
|
|
406
|
+
const release = await this.chatGate.acquire()
|
|
407
|
+
try {
|
|
408
|
+
const prepared = await this.prepare(messages, options)
|
|
409
|
+
if (prepared.refusal) {
|
|
410
|
+
return {
|
|
411
|
+
text: prepared.refusal,
|
|
412
|
+
sources: [],
|
|
413
|
+
grounded: true,
|
|
414
|
+
refused: true,
|
|
415
|
+
model: this.generation.defaultModel,
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
const result = await this.generation.generate(prepared.messages, prepared.genOptions)
|
|
419
|
+
return this.finalize(result.text, result.model, result.usage, prepared.context, options)
|
|
420
|
+
} finally {
|
|
421
|
+
release()
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
async *stream(
|
|
426
|
+
messages: NixxieAiMessage[],
|
|
427
|
+
options: NixxieRagAskOptions = {}
|
|
428
|
+
): AsyncIterable<NixxieRagStreamEvent> {
|
|
429
|
+
const release = await this.chatGate.acquire()
|
|
430
|
+
try {
|
|
431
|
+
const prepared = await this.prepare(messages, options)
|
|
432
|
+
if (prepared.refusal) {
|
|
433
|
+
const answer: NixxieRagAnswer = {
|
|
434
|
+
text: prepared.refusal,
|
|
435
|
+
sources: [],
|
|
436
|
+
grounded: true,
|
|
437
|
+
refused: true,
|
|
438
|
+
model: this.generation.defaultModel,
|
|
439
|
+
}
|
|
440
|
+
yield { type: 'token', token: prepared.refusal }
|
|
441
|
+
yield { type: 'done', answer }
|
|
442
|
+
return
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
yield { type: 'sources', sources: prepared.citations }
|
|
446
|
+
|
|
447
|
+
let text = ''
|
|
448
|
+
let model = this.generation.defaultModel
|
|
449
|
+
let usage: { inputTokens?: number; outputTokens?: number } | undefined
|
|
450
|
+
if (this.generation.stream) {
|
|
451
|
+
for await (const part of this.generation.stream(prepared.messages, prepared.genOptions)) {
|
|
452
|
+
if (part.delta) {
|
|
453
|
+
text += part.delta
|
|
454
|
+
yield { type: 'token', token: part.delta }
|
|
455
|
+
}
|
|
456
|
+
if (part.done) {
|
|
457
|
+
model = part.model ?? model
|
|
458
|
+
usage = part.usage
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
} else {
|
|
462
|
+
const result = await this.generation.generate(prepared.messages, prepared.genOptions)
|
|
463
|
+
text = result.text
|
|
464
|
+
model = result.model
|
|
465
|
+
usage = result.usage
|
|
466
|
+
yield { type: 'token', token: text }
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const answer = await this.finalize(text, model, usage, prepared.context, options)
|
|
470
|
+
// If the grounding check rewrote the answer to a refusal, surface that token too.
|
|
471
|
+
if (answer.refused && answer.text !== text) yield { type: 'token', token: answer.text }
|
|
472
|
+
yield { type: 'done', answer }
|
|
473
|
+
} catch (err) {
|
|
474
|
+
yield { type: 'error', error: err instanceof Error ? err.message : String(err) }
|
|
475
|
+
} finally {
|
|
476
|
+
release()
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/** Shared retrieve → guard → prompt pipeline for chat/stream. */
|
|
481
|
+
private async prepare(
|
|
482
|
+
messages: NixxieAiMessage[],
|
|
483
|
+
options: NixxieRagAskOptions
|
|
484
|
+
): Promise<{
|
|
485
|
+
refusal?: string
|
|
486
|
+
context: NixxieRagChunk[]
|
|
487
|
+
citations: NixxieRagCitation[]
|
|
488
|
+
messages: { role: 'user' | 'assistant'; content: string }[]
|
|
489
|
+
genOptions: RagGenerateOptions
|
|
490
|
+
}> {
|
|
491
|
+
const turns = messages.filter(m => m.role !== 'system')
|
|
492
|
+
const lastUser = [...turns].reverse().find(m => m.role === 'user')
|
|
493
|
+
const question = lastUser?.content ?? ''
|
|
494
|
+
if (question.length > this.r.maxQueryChars) {
|
|
495
|
+
throw new Error(
|
|
496
|
+
`[@nixxie-cms/ai-rag] Query exceeds the ${this.r.maxQueryChars}-character limit.`
|
|
497
|
+
)
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const guardOn = options.guard ?? this.r.guard.enabled
|
|
501
|
+
const minScore = options.minScore ?? this.r.minScore
|
|
502
|
+
|
|
503
|
+
const context =
|
|
504
|
+
options.context ??
|
|
505
|
+
(question ? await this.retrieve(question, options) : [])
|
|
506
|
+
|
|
507
|
+
if (guardOn && shouldRefuseForNoContext(context, this.r.guard, minScore)) {
|
|
508
|
+
return {
|
|
509
|
+
refusal: this.r.guard.refusal,
|
|
510
|
+
context: [],
|
|
511
|
+
citations: [],
|
|
512
|
+
messages: [],
|
|
513
|
+
genOptions: {},
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
const history = turns
|
|
518
|
+
.slice(0, -1)
|
|
519
|
+
.slice(-this.r.historyLimit)
|
|
520
|
+
.map(m => ({ role: m.role as 'user' | 'assistant', content: m.content }))
|
|
521
|
+
|
|
522
|
+
const systemBase = options.systemSuffix
|
|
523
|
+
? `${this.r.generation.systemPrompt}\n\n${options.systemSuffix}`
|
|
524
|
+
: this.r.generation.systemPrompt
|
|
525
|
+
|
|
526
|
+
const built = this.r.generation.buildPrompt
|
|
527
|
+
? this.r.generation.buildPrompt({
|
|
528
|
+
question,
|
|
529
|
+
history,
|
|
530
|
+
context,
|
|
531
|
+
systemPrompt: systemBase,
|
|
532
|
+
requireCitations: guardOn && this.r.guard.requireCitations,
|
|
533
|
+
})
|
|
534
|
+
: buildRagPrompt(
|
|
535
|
+
{
|
|
536
|
+
question,
|
|
537
|
+
history,
|
|
538
|
+
context,
|
|
539
|
+
systemPrompt: systemBase,
|
|
540
|
+
requireCitations: guardOn && this.r.guard.requireCitations,
|
|
541
|
+
},
|
|
542
|
+
{ maxContextChars: this.r.maxContextChars }
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
const genOptions: RagGenerateOptions = {
|
|
546
|
+
system: built.system,
|
|
547
|
+
model: options.model ?? this.r.generation.model,
|
|
548
|
+
temperature: options.temperature ?? this.r.generation.temperature,
|
|
549
|
+
maxTokens: options.maxTokens ?? this.r.generation.maxTokens,
|
|
550
|
+
topP: this.r.generation.topP,
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
return {
|
|
554
|
+
context,
|
|
555
|
+
citations: toCitations(context),
|
|
556
|
+
messages: built.messages,
|
|
557
|
+
genOptions,
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/** Apply the grounding check and assemble the final answer with citations. */
|
|
562
|
+
private async finalize(
|
|
563
|
+
text: string,
|
|
564
|
+
model: string,
|
|
565
|
+
usage: { inputTokens?: number; outputTokens?: number } | undefined,
|
|
566
|
+
context: NixxieRagChunk[],
|
|
567
|
+
options: NixxieRagAskOptions
|
|
568
|
+
): Promise<NixxieRagAnswer> {
|
|
569
|
+
const guardOn = options.guard ?? this.r.guard.enabled
|
|
570
|
+
let grounded = true
|
|
571
|
+
if (guardOn && this.r.guard.groundingCheck) {
|
|
572
|
+
const check = await checkGrounding(
|
|
573
|
+
this.generation,
|
|
574
|
+
text,
|
|
575
|
+
context,
|
|
576
|
+
this.r.guard.groundingModel ?? this.r.generation.model
|
|
577
|
+
)
|
|
578
|
+
grounded = check.grounded
|
|
579
|
+
if (!grounded && this.r.guard.refuseWhenUngrounded) {
|
|
580
|
+
return {
|
|
581
|
+
text: this.r.guard.refusal,
|
|
582
|
+
sources: [],
|
|
583
|
+
grounded: false,
|
|
584
|
+
refused: true,
|
|
585
|
+
model,
|
|
586
|
+
usage,
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
return { text, sources: toCitations(context), grounded, refused: false, model, usage }
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
async close(): Promise<void> {
|
|
594
|
+
// No long-lived resources to release; indexing runs are awaited by their callers.
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
function toCitations(chunks: NixxieRagChunk[]): NixxieRagCitation[] {
|
|
599
|
+
return chunks.map(c => ({
|
|
600
|
+
documentId: c.documentId,
|
|
601
|
+
chunkId: c.id,
|
|
602
|
+
title: c.title,
|
|
603
|
+
source: c.source,
|
|
604
|
+
score: c.score,
|
|
605
|
+
snippet: snippet(c.content),
|
|
606
|
+
}))
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
function rowToDocument(row: any): NixxieRagDocument {
|
|
610
|
+
return {
|
|
611
|
+
id: String(row.id),
|
|
612
|
+
title: row.title ?? undefined,
|
|
613
|
+
content: row.content ?? '',
|
|
614
|
+
source: row.source ?? undefined,
|
|
615
|
+
tags: Array.isArray(row.tags) ? row.tags : (row.tags ?? undefined),
|
|
616
|
+
metadata: (row.metadata as Record<string, unknown>) ?? undefined,
|
|
617
|
+
status: row.status ?? 'pending',
|
|
618
|
+
chunkCount: row.chunkCount ?? 0,
|
|
619
|
+
error: row.error ?? undefined,
|
|
620
|
+
createdAt: row.createdAt ?? new Date(),
|
|
621
|
+
updatedAt: row.updatedAt ?? row.createdAt ?? new Date(),
|
|
622
|
+
indexedAt: row.indexedAt ?? undefined,
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/** Run `fn` over items with at most `limit` in flight at once. */
|
|
627
|
+
async function mapWithConcurrency<T>(
|
|
628
|
+
items: T[],
|
|
629
|
+
limit: number,
|
|
630
|
+
fn: (item: T) => Promise<void>
|
|
631
|
+
): Promise<void> {
|
|
632
|
+
const queue = [...items]
|
|
633
|
+
const workers = Array.from({ length: Math.max(1, Math.min(limit, items.length)) }, async () => {
|
|
634
|
+
while (queue.length) {
|
|
635
|
+
const item = queue.shift()!
|
|
636
|
+
await fn(item)
|
|
637
|
+
}
|
|
638
|
+
})
|
|
639
|
+
await Promise.all(workers)
|
|
640
|
+
}
|