npm - @nixxie-cms/ai-rag - Versions diffs - 1.0.0 - Mend

@nixxie-cms/ai-rag 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/LICENSE +23 -0
package/README.md +163 -0
package/dist/declarations/src/AiRagService.d.ts +50 -0
package/dist/declarations/src/AiRagService.d.ts.map +1 -0
package/dist/declarations/src/admin-page.d.ts +29 -0
package/dist/declarations/src/admin-page.d.ts.map +1 -0
package/dist/declarations/src/chunking.d.ts +8 -0
package/dist/declarations/src/chunking.d.ts.map +1 -0
package/dist/declarations/src/collection.d.ts +18 -0
package/dist/declarations/src/collection.d.ts.map +1 -0
package/dist/declarations/src/express.d.ts +36 -0
package/dist/declarations/src/express.d.ts.map +1 -0
package/dist/declarations/src/graphql.d.ts +23 -0
package/dist/declarations/src/graphql.d.ts.map +1 -0
package/dist/declarations/src/index.d.ts +39 -0
package/dist/declarations/src/index.d.ts.map +1 -0
package/dist/declarations/src/plugin.d.ts +53 -0
package/dist/declarations/src/plugin.d.ts.map +1 -0
package/dist/declarations/src/prompt.d.ts +14 -0
package/dist/declarations/src/prompt.d.ts.map +1 -0
package/dist/declarations/src/providers/AnthropicRagProvider.d.ts +16 -0
package/dist/declarations/src/providers/AnthropicRagProvider.d.ts.map +1 -0
package/dist/declarations/src/providers/GeminiRagProvider.d.ts +19 -0
package/dist/declarations/src/providers/GeminiRagProvider.d.ts.map +1 -0
package/dist/declarations/src/providers/OllamaRagProvider.d.ts +23 -0
package/dist/declarations/src/providers/OllamaRagProvider.d.ts.map +1 -0
package/dist/declarations/src/providers/OpenAiRagProvider.d.ts +17 -0
package/dist/declarations/src/providers/OpenAiRagProvider.d.ts.map +1 -0
package/dist/declarations/src/providers/ServiceRagProvider.d.ts +17 -0
package/dist/declarations/src/providers/ServiceRagProvider.d.ts.map +1 -0
package/dist/declarations/src/providers/index.d.ts +14 -0
package/dist/declarations/src/providers/index.d.ts.map +1 -0
package/dist/declarations/src/providers/types.d.ts +45 -0
package/dist/declarations/src/providers/types.d.ts.map +1 -0
package/dist/declarations/src/similarity.d.ts +12 -0
package/dist/declarations/src/similarity.d.ts.map +1 -0
package/dist/declarations/src/types.d.ts +319 -0
package/dist/declarations/src/types.d.ts.map +1 -0
package/dist/declarations/src/vector-store.d.ts +34 -0
package/dist/declarations/src/vector-store.d.ts.map +1 -0
package/dist/nixxie-cms-ai-rag.cjs.d.ts +2 -0
package/dist/nixxie-cms-ai-rag.cjs.js +2507 -0
package/dist/nixxie-cms-ai-rag.esm.js +2481 -0
package/package.json +37 -0
package/src/AiRagService.ts +640 -0
package/src/admin-page.ts +135 -0
package/src/chunking.ts +78 -0
package/src/collection.ts +79 -0
package/src/express.ts +212 -0
package/src/graphql.ts +196 -0
package/src/guard.ts +75 -0
package/src/index.ts +102 -0
package/src/plugin.ts +162 -0
package/src/prompt.ts +62 -0
package/src/providers/AnthropicRagProvider.ts +91 -0
package/src/providers/GeminiRagProvider.ts +147 -0
package/src/providers/OllamaRagProvider.ts +157 -0
package/src/providers/OpenAiRagProvider.ts +108 -0
package/src/providers/ServiceRagProvider.ts +44 -0
package/src/providers/index.ts +67 -0
package/src/providers/types.ts +44 -0
package/src/semaphore.ts +26 -0
package/src/similarity.ts +31 -0
package/src/types.ts +346 -0
package/src/vector-store.ts +136 -0

package/src/types.ts ADDED Viewed

@@ -0,0 +1,346 @@
+import type { NixxieAiService, NixxieRagChunk, NixxieRagCitation } from '@nixxie-cms/core'
+export type {
+  NixxieAiRagService,
+  NixxieRagDocument,
+  NixxieRagDocumentInput,
+  NixxieRagDocumentQuery,
+  NixxieRagChunk,
+  NixxieRagCitation,
+  NixxieRagAnswer,
+  NixxieRagAskOptions,
+  NixxieRagRetrieveOptions,
+  NixxieRagStreamEvent,
+  NixxieRagIndexStats,
+} from '@nixxie-cms/core'
+/** Supported model providers for generation and/or embeddings. */
+export type RagProviderName = 'anthropic' | 'openai' | 'gemini' | 'ollama'
+/**
+ * How to talk to a model provider. Either point at an existing `NixxieAiService`
+ * (e.g. the one configured as `context.services.ai`) via `service`, or give the
+ * provider + credentials and let ai-rag build the client.
+ */
+export type RagProviderConfig = {
+  /** Provider to use. Defaults to 'anthropic' for generation, 'openai' for embeddings. */
+  provider?: RagProviderName
+  /** API key. Not required for `ollama` (local server) or when `service` is given. */
+  apiKey?: string
+  /** Model id. Sensible per-provider defaults are used when omitted. */
+  model?: string
+  /**
+   * Override the API base URL. Required for `ollama` if it is not on
+   * http://localhost:11434. Also used for gateways/proxies/Azure.
+   */
+  baseUrl?: string
+  /**
+   * Reuse an already-constructed service instead of building a client. Handy for
+   * sharing `context.services.ai`. When set, other fields are ignored for that role.
+   */
+  service?: NixxieAiService
+  /** Extra provider-specific options merged into each request body. */
+  extra?: Record<string, unknown>
+}
+/** Generation (answering) model configuration. */
+export type RagGenerationConfig = RagProviderConfig & {
+  /**
+   * Sampling temperature. Lower is more grounded/deterministic.
+   * @default 0.2
+   */
+  temperature?: number
+  /**
+   * Maximum output tokens per answer.
+   * @default 1024
+   */
+  maxTokens?: number
+  /** Nucleus sampling cutoff, passed through when the provider supports it. */
+  topP?: number
+  /**
+   * Base system prompt. The retrieved context and grounding rules are appended to it.
+   * @default a strict, citation-first assistant prompt
+   */
+  systemPrompt?: string
+  /**
+   * Fully override how the final prompt is assembled from the question + retrieved
+   * chunks. Return the messages sent to the model. When omitted, a built-in template
+   * is used that injects numbered sources and citation instructions.
+   */
+  buildPrompt?: (args: PromptBuildArgs) => PromptBuildResult
+}
+/** Embedding model configuration (often a different — even local — provider). */
+export type RagEmbeddingConfig = RagProviderConfig & {
+  /**
+   * Expected embedding dimensionality. Used to validate stored vectors and to detect
+   * a model change that requires a full reindex. Optional — inferred on first embed.
+   */
+  dimensions?: number
+  /**
+   * How many chunks to embed per provider call.
+   * @default 64
+   */
+  batchSize?: number
+}
+/** Where embeddings live and how retrieval searches them. */
+export type RagRetrievalConfig = {
+  /**
+   * Number of chunks fed to the model as context.
+   * @default 5
+   */
+  topK?: number
+  /**
+   * Minimum cosine similarity (0–1) for a chunk to be considered relevant. Chunks below
+   * this are dropped; if nothing clears the bar the guard can refuse.
+   * @default 0.2
+   */
+  minScore?: number
+  /**
+   * Cap the total characters of retrieved context injected into the prompt.
+   * @default 6000
+   */
+  maxContextChars?: number
+  /**
+   * Pluggable vector store. Defaults to a SQL-backed store over the chunk collection
+   * (works on any database) doing cosine similarity in Node. Swap for pgvector or an
+   * external vector DB by implementing `VectorStore`.
+   */
+  vectorStore?: VectorStore
+  /**
+   * Optional second-pass reranker over the candidate chunks (e.g. a cross-encoder).
+   * Receives the query and the top candidates; returns them re-scored/re-ordered.
+   */
+  rerank?: (query: string, chunks: NixxieRagChunk[]) => Promise<NixxieRagChunk[]>
+  /**
+   * Over-fetch this multiple of `topK` before reranking/scoring.
+   * @default 4
+   */
+  candidateMultiplier?: number
+}
+export type RagChunkingStrategy = 'recursive' | 'sentence' | 'fixed'
+/** How documents are split into chunks before embedding. */
+export type RagChunkingConfig = {
+  /**
+   * Splitting strategy. 'recursive' splits on paragraph→line→sentence→word boundaries
+   * to keep chunks coherent; 'sentence' groups whole sentences; 'fixed' is a hard window.
+   * @default 'recursive'
+   */
+  strategy?: RagChunkingStrategy
+  /**
+   * Target chunk size in characters.
+   * @default 1200
+   */
+  chunkSize?: number
+  /**
+   * Overlap in characters between consecutive chunks (preserves context across cuts).
+   * @default 200
+   */
+  chunkOverlap?: number
+}
+/** Indexing behaviour and scheduling. */
+export type RagIndexingConfig = {
+  /**
+   * Re-embed a document automatically when its KB row is created/updated and remove its
+   * chunks when the row is deleted (wired by `ragPlugin()` via collection hooks).
+   * @default true
+   */
+  auto?: boolean
+  /**
+   * Run a full reindex on a schedule. Cron expression or interval in milliseconds.
+   * Requires the jobs service (`context.services.jobs`); the plugin registers the job.
+   * @example '0 3 * * *'  // 3am daily
+   */
+  schedule?: string | number
+  /**
+   * Index any pending/changed documents once, right after the database connects.
+   * @default true
+   */
+  onConnect?: boolean
+  /**
+   * How many documents to index concurrently.
+   * @default 4
+   */
+  concurrency?: number
+}
+/** Hallucination guarding — keep answers grounded in the knowledge base. */
+export type RagGuardConfig = {
+  /**
+   * Master switch for all guarding below.
+   * @default true
+   */
+  enabled?: boolean
+  /**
+   * Refuse to answer (returning `refusal`) when the best retrieved chunk scores below
+   * `minScore`, i.e. the KB has nothing relevant.
+   * @default true
+   */
+  refuseWhenNoContext?: boolean
+  /**
+   * Message returned when the assistant refuses for lack of grounding.
+   * @default "I don't have enough information in my knowledge base to answer that."
+   */
+  refusal?: string
+  /**
+   * Instruct the model to cite sources inline as [n] and only use provided context.
+   * @default true
+   */
+  requireCitations?: boolean
+  /**
+   * Run a second, cheap model pass that checks the drafted answer is supported by the
+   * retrieved context and strips/flags unsupported claims. Costs an extra call.
+   * @default false
+   */
+  groundingCheck?: boolean
+  /** Model id for the grounding check. Defaults to the generation model. */
+  groundingModel?: string
+  /**
+   * If the grounding check finds the answer is not supported, replace it with `refusal`
+   * instead of returning the ungrounded text.
+   * @default true
+   */
+  refuseWhenUngrounded?: boolean
+  /**
+   * Allow the model to fall back to its own world knowledge when no context is found
+   * (sets a softer prompt). Off by default for a strictly grounded assistant.
+   * @default false
+   */
+  allowModelKnowledge?: boolean
+}
+/** Conversation handling. */
+export type RagChatConfig = {
+  /**
+   * Maximum prior turns kept when building the prompt (excludes the system prompt).
+   * @default 10
+   */
+  historyLimit?: number
+  /**
+   * Default to streaming responses where supported (the HTTP route always can stream).
+   * @default true
+   */
+  streaming?: boolean
+}
+/** Hard limits and safety rails. */
+export type RagLimitsConfig = {
+  /**
+   * Reject queries longer than this many characters before doing any work.
+   * @default 8000
+   */
+  maxQueryChars?: number
+  /**
+   * Maximum concurrent in-flight generations. Excess calls wait.
+   * @default 8
+   */
+  maxConcurrentChats?: number
+}
+/** Names of the collections ai-rag reads/writes. The plugin can create them for you. */
+export type RagCollectionsConfig = {
+  /**
+   * Collection holding source documents (the KB rows users add).
+   * @default 'KnowledgeBase'
+   */
+  documents?: string
+  /**
+   * Collection holding indexed chunks + embeddings.
+   * @default 'KnowledgeChunk'
+   */
+  chunks?: string
+}
+/** The full, flexible configuration for `createAiRag()`. */
+export type AiRagConfig = {
+  /** Collection names backing the knowledge base. */
+  collections?: RagCollectionsConfig
+  /** Generation (answering) model. Defaults to Anthropic Claude. */
+  generation?: RagGenerationConfig
+  /** Embedding model. Defaults to OpenAI text-embedding-3-small. */
+  embedding?: RagEmbeddingConfig
+  /** Retrieval + vector store settings. */
+  retrieval?: RagRetrievalConfig
+  /** Document chunking settings. */
+  chunking?: RagChunkingConfig
+  /** Indexing behaviour and scheduling. */
+  indexing?: RagIndexingConfig
+  /** Hallucination guarding. */
+  guard?: RagGuardConfig
+  /** Conversation handling. */
+  chat?: RagChatConfig
+  /** Hard limits and safety rails. */
+  limits?: RagLimitsConfig
+}
+// ── Prompt building ──
+export type PromptBuildArgs = {
+  /** The user's latest question. */
+  question: string
+  /** Prior conversation turns (already trimmed to `chat.historyLimit`). */
+  history: { role: 'user' | 'assistant'; content: string }[]
+  /** The retrieved, scored chunks chosen as context. */
+  context: NixxieRagChunk[]
+  /** The resolved base system prompt. */
+  systemPrompt: string
+  /** Whether citations are required by the guard. */
+  requireCitations: boolean
+}
+export type PromptBuildResult = {
+  system: string
+  messages: { role: 'user' | 'assistant'; content: string }[]
+}
+// ── Vector store ──
+/** A stored chunk + its embedding, as persisted by a `VectorStore`. */
+export type VectorRecord = {
+  id: string
+  documentId: string
+  content: string
+  embedding: number[]
+  title?: string
+  source?: string
+  tags?: string[]
+  metadata?: Record<string, unknown>
+}
+export type VectorQuery = {
+  /** The query embedding. */
+  embedding: number[]
+  /** Max results. */
+  topK: number
+  /** Only match chunks whose document carries ALL of these tags. */
+  tags?: string[]
+  /** Minimum score to return. */
+  minScore?: number
+}
+/**
+ * Pluggable similarity store. The default `SqlVectorStore` keeps vectors in the chunk
+ * collection and scores them in Node; implement this interface to back retrieval with
+ * pgvector, Qdrant, Pinecone, etc.
+ */
+export type VectorStore = {
+  /** Called once after the database connects, with a sudo context. */
+  init?(context: import('@nixxie-cms/core').NixxieContext): Promise<void> | void
+  /** Insert or replace all chunks for a document (replacing any previous chunks). */
+  upsert(documentId: string, records: VectorRecord[]): Promise<void>
+  /** Delete every chunk belonging to a document. */
+  deleteByDocument(documentId: string): Promise<void>
+  /** Return the closest chunks to the query embedding, scored in [0,1]. */
+  query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>>
+  /** Total number of indexed chunks (optional; used for stats). */
+  count?(): Promise<number>
+  /** Remove everything (used by a forced full reindex). */
+  clear?(): Promise<void>
+}
+/** Internal: the fully-resolved configuration with all defaults applied. */
+export type ResolvedRagCitation = NixxieRagCitation

package/src/vector-store.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import type { NixxieContext } from '@nixxie-cms/core'
+import { cosineSimilarity } from './similarity'
+import type { VectorQuery, VectorRecord, VectorStore } from './types'
+function prismaDelegate(context: NixxieContext, listKey: string): any {
+  const delegate = (context.prisma as any)?.[listKey[0].toLowerCase() + listKey.slice(1)]
+  if (!delegate) {
+    throw new Error(
+      `[@nixxie-cms/ai-rag] Collection "${listKey}" was not found in the Prisma client. ` +
+        `Add it to your config (e.g. via \`ragPlugin()\` or \`knowledgeChunkCollection()\`) and run a migration.`
+    )
+  }
+  return delegate
+}
+function hasAllTags(recordTags: string[] | undefined, wanted: string[] | undefined): boolean {
+  if (!wanted || wanted.length === 0) return true
+  if (!recordTags || recordTags.length === 0) return false
+  const set = new Set(recordTags)
+  return wanted.every(t => set.has(t))
+}
+function score(records: Array<VectorRecord>, query: VectorQuery) {
+  const scored = records
+    .map(r => ({ ...r, score: cosineSimilarity(query.embedding, r.embedding) }))
+    .filter(r => hasAllTags(r.tags, query.tags))
+    .filter(r => (query.minScore === undefined ? true : r.score >= query.minScore))
+  scored.sort((a, b) => b.score - a.score)
+  return scored.slice(0, query.topK)
+}
+/**
+ * Default vector store: persists embeddings in the chunk collection in the host app's own
+ * database (any provider) and scores candidates with cosine similarity in Node. Simple and
+ * portable; for very large knowledge bases swap in a pgvector / external store via
+ * `retrieval.vectorStore`.
+ */
+export class SqlVectorStore implements VectorStore {
+  private model: any | null = null
+  constructor(private collection: string) {}
+  init(context: NixxieContext): void {
+    this.model = prismaDelegate(context, this.collection)
+  }
+  private requireModel(): any {
+    if (!this.model) {
+      throw new Error(
+        '[@nixxie-cms/ai-rag] The vector store is not ready yet — it becomes available once the database has connected.'
+      )
+    }
+    return this.model
+  }
+  async upsert(documentId: string, records: VectorRecord[]): Promise<void> {
+    const model = this.requireModel()
+    await model.deleteMany({ where: { documentId } })
+    if (records.length === 0) return
+    // createMany can't always return rows and ignores unsupported JSON on some providers,
+    // so insert sequentially for portability.
+    for (const r of records) {
+      await model.create({
+        data: {
+          documentId,
+          content: r.content,
+          embedding: r.embedding,
+          title: r.title ?? null,
+          source: r.source ?? null,
+          tags: r.tags ?? [],
+          metadata: r.metadata ?? null,
+        },
+      })
+    }
+  }
+  async deleteByDocument(documentId: string): Promise<void> {
+    await this.requireModel().deleteMany({ where: { documentId } })
+  }
+  async query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>> {
+    const rows: any[] = await this.requireModel().findMany()
+    const records: VectorRecord[] = rows.map(rowToRecord)
+    return score(records, query)
+  }
+  async count(): Promise<number> {
+    return this.requireModel().count()
+  }
+  async clear(): Promise<void> {
+    await this.requireModel().deleteMany({})
+  }
+}
+function rowToRecord(row: any): VectorRecord {
+  return {
+    id: String(row.id),
+    documentId: String(row.documentId),
+    content: row.content ?? '',
+    embedding: Array.isArray(row.embedding) ? row.embedding : (row.embedding ?? []),
+    title: row.title ?? undefined,
+    source: row.source ?? undefined,
+    tags: Array.isArray(row.tags) ? row.tags : undefined,
+    metadata: (row.metadata as Record<string, unknown>) ?? undefined,
+  }
+}
+/** Ephemeral in-process vector store. Useful for tests and small/transient knowledge bases. */
+export class InMemoryVectorStore implements VectorStore {
+  private byDocument = new Map<string, VectorRecord[]>()
+  async upsert(documentId: string, records: VectorRecord[]): Promise<void> {
+    this.byDocument.set(documentId, records)
+  }
+  async deleteByDocument(documentId: string): Promise<void> {
+    this.byDocument.delete(documentId)
+  }
+  async query(query: VectorQuery): Promise<Array<VectorRecord & { score: number }>> {
+    const all: VectorRecord[] = []
+    for (const records of this.byDocument.values()) all.push(...records)
+    return score(all, query)
+  }
+  async count(): Promise<number> {
+    let n = 0
+    for (const records of this.byDocument.values()) n += records.length
+    return n
+  }
+  async clear(): Promise<void> {
+    this.byDocument.clear()
+  }
+}