@nixxie-cms/ai-rag 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +23 -0
  2. package/README.md +163 -0
  3. package/dist/declarations/src/AiRagService.d.ts +50 -0
  4. package/dist/declarations/src/AiRagService.d.ts.map +1 -0
  5. package/dist/declarations/src/admin-page.d.ts +29 -0
  6. package/dist/declarations/src/admin-page.d.ts.map +1 -0
  7. package/dist/declarations/src/chunking.d.ts +8 -0
  8. package/dist/declarations/src/chunking.d.ts.map +1 -0
  9. package/dist/declarations/src/collection.d.ts +18 -0
  10. package/dist/declarations/src/collection.d.ts.map +1 -0
  11. package/dist/declarations/src/express.d.ts +36 -0
  12. package/dist/declarations/src/express.d.ts.map +1 -0
  13. package/dist/declarations/src/graphql.d.ts +23 -0
  14. package/dist/declarations/src/graphql.d.ts.map +1 -0
  15. package/dist/declarations/src/index.d.ts +39 -0
  16. package/dist/declarations/src/index.d.ts.map +1 -0
  17. package/dist/declarations/src/plugin.d.ts +53 -0
  18. package/dist/declarations/src/plugin.d.ts.map +1 -0
  19. package/dist/declarations/src/prompt.d.ts +14 -0
  20. package/dist/declarations/src/prompt.d.ts.map +1 -0
  21. package/dist/declarations/src/providers/AnthropicRagProvider.d.ts +16 -0
  22. package/dist/declarations/src/providers/AnthropicRagProvider.d.ts.map +1 -0
  23. package/dist/declarations/src/providers/GeminiRagProvider.d.ts +19 -0
  24. package/dist/declarations/src/providers/GeminiRagProvider.d.ts.map +1 -0
  25. package/dist/declarations/src/providers/OllamaRagProvider.d.ts +23 -0
  26. package/dist/declarations/src/providers/OllamaRagProvider.d.ts.map +1 -0
  27. package/dist/declarations/src/providers/OpenAiRagProvider.d.ts +17 -0
  28. package/dist/declarations/src/providers/OpenAiRagProvider.d.ts.map +1 -0
  29. package/dist/declarations/src/providers/ServiceRagProvider.d.ts +17 -0
  30. package/dist/declarations/src/providers/ServiceRagProvider.d.ts.map +1 -0
  31. package/dist/declarations/src/providers/index.d.ts +14 -0
  32. package/dist/declarations/src/providers/index.d.ts.map +1 -0
  33. package/dist/declarations/src/providers/types.d.ts +45 -0
  34. package/dist/declarations/src/providers/types.d.ts.map +1 -0
  35. package/dist/declarations/src/similarity.d.ts +12 -0
  36. package/dist/declarations/src/similarity.d.ts.map +1 -0
  37. package/dist/declarations/src/types.d.ts +319 -0
  38. package/dist/declarations/src/types.d.ts.map +1 -0
  39. package/dist/declarations/src/vector-store.d.ts +34 -0
  40. package/dist/declarations/src/vector-store.d.ts.map +1 -0
  41. package/dist/nixxie-cms-ai-rag.cjs.d.ts +2 -0
  42. package/dist/nixxie-cms-ai-rag.cjs.js +2507 -0
  43. package/dist/nixxie-cms-ai-rag.esm.js +2481 -0
  44. package/package.json +37 -0
  45. package/src/AiRagService.ts +640 -0
  46. package/src/admin-page.ts +135 -0
  47. package/src/chunking.ts +78 -0
  48. package/src/collection.ts +79 -0
  49. package/src/express.ts +212 -0
  50. package/src/graphql.ts +196 -0
  51. package/src/guard.ts +75 -0
  52. package/src/index.ts +102 -0
  53. package/src/plugin.ts +162 -0
  54. package/src/prompt.ts +62 -0
  55. package/src/providers/AnthropicRagProvider.ts +91 -0
  56. package/src/providers/GeminiRagProvider.ts +147 -0
  57. package/src/providers/OllamaRagProvider.ts +157 -0
  58. package/src/providers/OpenAiRagProvider.ts +108 -0
  59. package/src/providers/ServiceRagProvider.ts +44 -0
  60. package/src/providers/index.ts +67 -0
  61. package/src/providers/types.ts +44 -0
  62. package/src/semaphore.ts +26 -0
  63. package/src/similarity.ts +31 -0
  64. package/src/types.ts +346 -0
  65. package/src/vector-store.ts +136 -0
package/src/types.ts ADDED
@@ -0,0 +1,346 @@
1
+ import type { NixxieAiService, NixxieRagChunk, NixxieRagCitation } from '@nixxie-cms/core'
2
+
3
+ export type {
4
+ NixxieAiRagService,
5
+ NixxieRagDocument,
6
+ NixxieRagDocumentInput,
7
+ NixxieRagDocumentQuery,
8
+ NixxieRagChunk,
9
+ NixxieRagCitation,
10
+ NixxieRagAnswer,
11
+ NixxieRagAskOptions,
12
+ NixxieRagRetrieveOptions,
13
+ NixxieRagStreamEvent,
14
+ NixxieRagIndexStats,
15
+ } from '@nixxie-cms/core'
16
+
17
+ /** Supported model providers for generation and/or embeddings. */
18
+ export type RagProviderName = 'anthropic' | 'openai' | 'gemini' | 'ollama'
19
+
20
+ /**
21
+ * How to talk to a model provider. Either point at an existing `NixxieAiService`
22
+ * (e.g. the one configured as `context.services.ai`) via `service`, or give the
23
+ * provider + credentials and let ai-rag build the client.
24
+ */
25
+ export type RagProviderConfig = {
26
+ /** Provider to use. Defaults to 'anthropic' for generation, 'openai' for embeddings. */
27
+ provider?: RagProviderName
28
+ /** API key. Not required for `ollama` (local server) or when `service` is given. */
29
+ apiKey?: string
30
+ /** Model id. Sensible per-provider defaults are used when omitted. */
31
+ model?: string
32
+ /**
33
+ * Override the API base URL. Required for `ollama` if it is not on
34
+ * http://localhost:11434. Also used for gateways/proxies/Azure.
35
+ */
36
+ baseUrl?: string
37
+ /**
38
+ * Reuse an already-constructed service instead of building a client. Handy for
39
+ * sharing `context.services.ai`. When set, other fields are ignored for that role.
40
+ */
41
+ service?: NixxieAiService
42
+ /** Extra provider-specific options merged into each request body. */
43
+ extra?: Record<string, unknown>
44
+ }
45
+
46
+ /** Generation (answering) model configuration. */
47
+ export type RagGenerationConfig = RagProviderConfig & {
48
+ /**
49
+ * Sampling temperature. Lower is more grounded/deterministic.
50
+ * @default 0.2
51
+ */
52
+ temperature?: number
53
+ /**
54
+ * Maximum output tokens per answer.
55
+ * @default 1024
56
+ */
57
+ maxTokens?: number
58
+ /** Nucleus sampling cutoff, passed through when the provider supports it. */
59
+ topP?: number
60
+ /**
61
+ * Base system prompt. The retrieved context and grounding rules are appended to it.
62
+ * @default a strict, citation-first assistant prompt
63
+ */
64
+ systemPrompt?: string
65
+ /**
66
+ * Fully override how the final prompt is assembled from the question + retrieved
67
+ * chunks. Return the messages sent to the model. When omitted, a built-in template
68
+ * is used that injects numbered sources and citation instructions.
69
+ */
70
+ buildPrompt?: (args: PromptBuildArgs) => PromptBuildResult
71
+ }
72
+
73
+ /** Embedding model configuration (often a different — even local — provider). */
74
+ export type RagEmbeddingConfig = RagProviderConfig & {
75
+ /**
76
+ * Expected embedding dimensionality. Used to validate stored vectors and to detect
77
+ * a model change that requires a full reindex. Optional — inferred on first embed.
78
+ */
79
+ dimensions?: number
80
+ /**
81
+ * How many chunks to embed per provider call.
82
+ * @default 64
83
+ */
84
+ batchSize?: number
85
+ }
86
+
87
+ /** Where embeddings live and how retrieval searches them. */
88
+ export type RagRetrievalConfig = {
89
+ /**
90
+ * Number of chunks fed to the model as context.
91
+ * @default 5
92
+ */
93
+ topK?: number
94
+ /**
95
+ * Minimum cosine similarity (0–1) for a chunk to be considered relevant. Chunks below
96
+ * this are dropped; if nothing clears the bar the guard can refuse.
97
+ * @default 0.2
98
+ */
99
+ minScore?: number
100
+ /**
101
+ * Cap the total characters of retrieved context injected into the prompt.
102
+ * @default 6000
103
+ */
104
+ maxContextChars?: number
105
+ /**
106
+ * Pluggable vector store. Defaults to a SQL-backed store over the chunk collection
107
+ * (works on any database) doing cosine similarity in Node. Swap for pgvector or an
108
+ * external vector DB by implementing `VectorStore`.
109
+ */
110
+ vectorStore?: VectorStore
111
+ /**
112
+ * Optional second-pass reranker over the candidate chunks (e.g. a cross-encoder).
113
+ * Receives the query and the top candidates; returns them re-scored/re-ordered.
114
+ */
115
+ rerank?: (query: string, chunks: NixxieRagChunk[]) => Promise<NixxieRagChunk[]>
116
+ /**
117
+ * Over-fetch this multiple of `topK` before reranking/scoring.
118
+ * @default 4
119
+ */
120
+ candidateMultiplier?: number
121
+ }
122
+
123
+ export type RagChunkingStrategy = 'recursive' | 'sentence' | 'fixed'
124
+
125
+ /** How documents are split into chunks before embedding. */
126
+ export type RagChunkingConfig = {
127
+ /**
128
+ * Splitting strategy. 'recursive' splits on paragraph→line→sentence→word boundaries
129
+ * to keep chunks coherent; 'sentence' groups whole sentences; 'fixed' is a hard window.
130
+ * @default 'recursive'
131
+ */
132
+ strategy?: RagChunkingStrategy
133
+ /**
134
+ * Target chunk size in characters.
135
+ * @default 1200
136
+ */
137
+ chunkSize?: number
138
+ /**
139
+ * Overlap in characters between consecutive chunks (preserves context across cuts).
140
+ * @default 200
141
+ */
142
+ chunkOverlap?: number
143
+ }
144
+
145
+ /** Indexing behaviour and scheduling. */
146
+ export type RagIndexingConfig = {
147
+ /**
148
+ * Re-embed a document automatically when its KB row is created/updated and remove its
149
+ * chunks when the row is deleted (wired by `ragPlugin()` via collection hooks).
150
+ * @default true
151
+ */
152
+ auto?: boolean
153
+ /**
154
+ * Run a full reindex on a schedule. Cron expression or interval in milliseconds.
155
+ * Requires the jobs service (`context.services.jobs`); the plugin registers the job.
156
+ * @example '0 3 * * *' // 3am daily
157
+ */
158
+ schedule?: string | number
159
+ /**
160
+ * Index any pending/changed documents once, right after the database connects.
161
+ * @default true
162
+ */
163
+ onConnect?: boolean
164
+ /**
165
+ * How many documents to index concurrently.
166
+ * @default 4
167
+ */
168
+ concurrency?: number
169
+ }
170
+
171
+ /** Hallucination guarding — keep answers grounded in the knowledge base. */
172
+ export type RagGuardConfig = {
173
+ /**
174
+ * Master switch for all guarding below.
175
+ * @default true
176
+ */
177
+ enabled?: boolean
178
+ /**
179
+ * Refuse to answer (returning `refusal`) when the best retrieved chunk scores below
180
+ * `minScore`, i.e. the KB has nothing relevant.
181
+ * @default true
182
+ */
183
+ refuseWhenNoContext?: boolean
184
+ /**
185
+ * Message returned when the assistant refuses for lack of grounding.
186
+ * @default "I don't have enough information in my knowledge base to answer that."
187
+ */
188
+ refusal?: string
189
+ /**
190
+ * Instruct the model to cite sources inline as [n] and only use provided context.
191
+ * @default true
192
+ */
193
+ requireCitations?: boolean
194
+ /**
195
+ * Run a second, cheap model pass that checks the drafted answer is supported by the
196
+ * retrieved context and strips/flags unsupported claims. Costs an extra call.
197
+ * @default false
198
+ */
199
+ groundingCheck?: boolean
200
+ /** Model id for the grounding check. Defaults to the generation model. */
201
+ groundingModel?: string
202
+ /**
203
+ * If the grounding check finds the answer is not supported, replace it with `refusal`
204
+ * instead of returning the ungrounded text.
205
+ * @default true
206
+ */
207
+ refuseWhenUngrounded?: boolean
208
+ /**
209
+ * Allow the model to fall back to its own world knowledge when no context is found
210
+ * (sets a softer prompt). Off by default for a strictly grounded assistant.
211
+ * @default false
212
+ */
213
+ allowModelKnowledge?: boolean
214
+ }
215
+
216
+ /** Conversation handling. */
217
+ export type RagChatConfig = {
218
+ /**
219
+ * Maximum prior turns kept when building the prompt (excludes the system prompt).
220
+ * @default 10
221
+ */
222
+ historyLimit?: number
223
+ /**
224
+ * Default to streaming responses where supported (the HTTP route always can stream).
225
+ * @default true
226
+ */
227
+ streaming?: boolean
228
+ }
229
+
230
+ /** Hard limits and safety rails. */
231
+ export type RagLimitsConfig = {
232
+ /**
233
+ * Reject queries longer than this many characters before doing any work.
234
+ * @default 8000
235
+ */
236
+ maxQueryChars?: number
237
+ /**
238
+ * Maximum concurrent in-flight generations. Excess calls wait.
239
+ * @default 8
240
+ */
241
+ maxConcurrentChats?: number
242
+ }
243
+
244
+ /** Names of the collections ai-rag reads/writes. The plugin can create them for you. */
245
+ export type RagCollectionsConfig = {
246
+ /**
247
+ * Collection holding source documents (the KB rows users add).
248
+ * @default 'KnowledgeBase'
249
+ */
250
+ documents?: string
251
+ /**
252
+ * Collection holding indexed chunks + embeddings.
253
+ * @default 'KnowledgeChunk'
254
+ */
255
+ chunks?: string
256
+ }
257
+
258
+ /** The full, flexible configuration for `createAiRag()`. */
259
+ export type AiRagConfig = {
260
+ /** Collection names backing the knowledge base. */
261
+ collections?: RagCollectionsConfig
262
+ /** Generation (answering) model. Defaults to Anthropic Claude. */
263
+ generation?: RagGenerationConfig
264
+ /** Embedding model. Defaults to OpenAI text-embedding-3-small. */
265
+ embedding?: RagEmbeddingConfig
266
+ /** Retrieval + vector store settings. */
267
+ retrieval?: RagRetrievalConfig
268
+ /** Document chunking settings. */
269
+ chunking?: RagChunkingConfig
270
+ /** Indexing behaviour and scheduling. */
271
+ indexing?: RagIndexingConfig
272
+ /** Hallucination guarding. */
273
+ guard?: RagGuardConfig
274
+ /** Conversation handling. */
275
+ chat?: RagChatConfig
276
+ /** Hard limits and safety rails. */
277
+ limits?: RagLimitsConfig
278
+ }
279
+
280
+ // ── Prompt building ──
281
+
282
+ export type PromptBuildArgs = {
283
+ /** The user's latest question. */
284
+ question: string
285
+ /** Prior conversation turns (already trimmed to `chat.historyLimit`). */
286
+ history: { role: 'user' | 'assistant'; content: string }[]
287
+ /** The retrieved, scored chunks chosen as context. */
288
+ context: NixxieRagChunk[]
289
+ /** The resolved base system prompt. */
290
+ systemPrompt: string
291
+ /** Whether citations are required by the guard. */
292
+ requireCitations: boolean
293
+ }
294
+
295
+ export type PromptBuildResult = {
296
+ system: string
297
+ messages: { role: 'user' | 'assistant'; content: string }[]
298
+ }
299
+
300
+ // ── Vector store ──
301
+
302
+ /** A stored chunk + its embedding, as persisted by a `VectorStore`. */
303
+ export type VectorRecord = {
304
+ id: string
305
+ documentId: string
306
+ content: string
307
+ embedding: number[]
308
+ title?: string
309
+ source?: string
310
+ tags?: string[]
311
+ metadata?: Record<string, unknown>
312
+ }
313
+
314
+ export type VectorQuery = {
315
+ /** The query embedding. */
316
+ embedding: number[]
317
+ /** Max results. */
318
+ topK: number
319
+ /** Only match chunks whose document carries ALL of these tags. */
320
+ tags?: string[]
321
+ /** Minimum score to return. */
322
+ minScore?: number
323
+ }
324
+
325
+ /**
326
+ * Pluggable similarity store. The default `SqlVectorStore` keeps vectors in the chunk
327
+ * collection and scores them in Node; implement this interface to back retrieval with
328
+ * pgvector, Qdrant, Pinecone, etc.
329
+ */
330
+ export type VectorStore = {
331
+ /** Called once after the database connects, with a sudo context. */
332
+ init?(context: import('@nixxie-cms/core').NixxieContext): Promise<void> | void
333
+ /** Insert or replace all chunks for a document (replacing any previous chunks). */
334
+ upsert(documentId: string, records: VectorRecord[]): Promise<void>
335
+ /** Delete every chunk belonging to a document. */
336
+ deleteByDocument(documentId: string): Promise<void>
337
+ /** Return the closest chunks to the query embedding, scored in [0,1]. */
338
+ query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>>
339
+ /** Total number of indexed chunks (optional; used for stats). */
340
+ count?(): Promise<number>
341
+ /** Remove everything (used by a forced full reindex). */
342
+ clear?(): Promise<void>
343
+ }
344
+
345
+ /** Internal: the fully-resolved configuration with all defaults applied. */
346
+ export type ResolvedRagCitation = NixxieRagCitation
@@ -0,0 +1,136 @@
1
+ import type { NixxieContext } from '@nixxie-cms/core'
2
+ import { cosineSimilarity } from './similarity'
3
+ import type { VectorQuery, VectorRecord, VectorStore } from './types'
4
+
5
+ function prismaDelegate(context: NixxieContext, listKey: string): any {
6
+ const delegate = (context.prisma as any)?.[listKey[0].toLowerCase() + listKey.slice(1)]
7
+ if (!delegate) {
8
+ throw new Error(
9
+ `[@nixxie-cms/ai-rag] Collection "${listKey}" was not found in the Prisma client. ` +
10
+ `Add it to your config (e.g. via \`ragPlugin()\` or \`knowledgeChunkCollection()\`) and run a migration.`
11
+ )
12
+ }
13
+ return delegate
14
+ }
15
+
16
+ function hasAllTags(recordTags: string[] | undefined, wanted: string[] | undefined): boolean {
17
+ if (!wanted || wanted.length === 0) return true
18
+ if (!recordTags || recordTags.length === 0) return false
19
+ const set = new Set(recordTags)
20
+ return wanted.every(t => set.has(t))
21
+ }
22
+
23
+ function score(records: Array<VectorRecord>, query: VectorQuery) {
24
+ const scored = records
25
+ .map(r => ({ ...r, score: cosineSimilarity(query.embedding, r.embedding) }))
26
+ .filter(r => hasAllTags(r.tags, query.tags))
27
+ .filter(r => (query.minScore === undefined ? true : r.score >= query.minScore))
28
+ scored.sort((a, b) => b.score - a.score)
29
+ return scored.slice(0, query.topK)
30
+ }
31
+
32
+ /**
33
+ * Default vector store: persists embeddings in the chunk collection in the host app's own
34
+ * database (any provider) and scores candidates with cosine similarity in Node. Simple and
35
+ * portable; for very large knowledge bases swap in a pgvector / external store via
36
+ * `retrieval.vectorStore`.
37
+ */
38
+ export class SqlVectorStore implements VectorStore {
39
+ private model: any | null = null
40
+
41
+ constructor(private collection: string) {}
42
+
43
+ init(context: NixxieContext): void {
44
+ this.model = prismaDelegate(context, this.collection)
45
+ }
46
+
47
+ private requireModel(): any {
48
+ if (!this.model) {
49
+ throw new Error(
50
+ '[@nixxie-cms/ai-rag] The vector store is not ready yet — it becomes available once the database has connected.'
51
+ )
52
+ }
53
+ return this.model
54
+ }
55
+
56
+ async upsert(documentId: string, records: VectorRecord[]): Promise<void> {
57
+ const model = this.requireModel()
58
+ await model.deleteMany({ where: { documentId } })
59
+ if (records.length === 0) return
60
+ // createMany can't always return rows and ignores unsupported JSON on some providers,
61
+ // so insert sequentially for portability.
62
+ for (const r of records) {
63
+ await model.create({
64
+ data: {
65
+ documentId,
66
+ content: r.content,
67
+ embedding: r.embedding,
68
+ title: r.title ?? null,
69
+ source: r.source ?? null,
70
+ tags: r.tags ?? [],
71
+ metadata: r.metadata ?? null,
72
+ },
73
+ })
74
+ }
75
+ }
76
+
77
+ async deleteByDocument(documentId: string): Promise<void> {
78
+ await this.requireModel().deleteMany({ where: { documentId } })
79
+ }
80
+
81
+ async query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>> {
82
+ const rows: any[] = await this.requireModel().findMany()
83
+ const records: VectorRecord[] = rows.map(rowToRecord)
84
+ return score(records, query)
85
+ }
86
+
87
+ async count(): Promise<number> {
88
+ return this.requireModel().count()
89
+ }
90
+
91
+ async clear(): Promise<void> {
92
+ await this.requireModel().deleteMany({})
93
+ }
94
+ }
95
+
96
+ function rowToRecord(row: any): VectorRecord {
97
+ return {
98
+ id: String(row.id),
99
+ documentId: String(row.documentId),
100
+ content: row.content ?? '',
101
+ embedding: Array.isArray(row.embedding) ? row.embedding : (row.embedding ?? []),
102
+ title: row.title ?? undefined,
103
+ source: row.source ?? undefined,
104
+ tags: Array.isArray(row.tags) ? row.tags : undefined,
105
+ metadata: (row.metadata as Record<string, unknown>) ?? undefined,
106
+ }
107
+ }
108
+
109
+ /** Ephemeral in-process vector store. Useful for tests and small/transient knowledge bases. */
110
+ export class InMemoryVectorStore implements VectorStore {
111
+ private byDocument = new Map<string, VectorRecord[]>()
112
+
113
+ async upsert(documentId: string, records: VectorRecord[]): Promise<void> {
114
+ this.byDocument.set(documentId, records)
115
+ }
116
+
117
+ async deleteByDocument(documentId: string): Promise<void> {
118
+ this.byDocument.delete(documentId)
119
+ }
120
+
121
+ async query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>> {
122
+ const all: VectorRecord[] = []
123
+ for (const records of this.byDocument.values()) all.push(...records)
124
+ return score(all, query)
125
+ }
126
+
127
+ async count(): Promise<number> {
128
+ let n = 0
129
+ for (const records of this.byDocument.values()) n += records.length
130
+ return n
131
+ }
132
+
133
+ async clear(): Promise<void> {
134
+ this.byDocument.clear()
135
+ }
136
+ }