npm - @strav/rag - Versions diffs - 0.4.31 → 1.0.0-alpha.19 - Mend

@strav/rag 0.4.31 → 1.0.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/package.json +20 -23
package/src/chunking/chunker.ts +7 -2
package/src/chunking/fixed_size_chunker.ts +24 -8
package/src/chunking/recursive_chunker.ts +89 -28
package/src/drivers/memory_driver.ts +110 -85
package/src/drivers/pgvector_driver.ts +174 -109
package/src/index.ts +40 -36
package/src/migrations.ts +116 -0
package/src/rag_error.ts +76 -0
package/src/rag_manager.ts +290 -66
package/src/rag_provider.ts +85 -7
package/src/rag_vector_schema.ts +56 -0
package/src/types.ts +80 -22
package/src/vector_store.ts +45 -5
package/src/commands/rag_flush.ts +0 -41
package/src/commands/rag_ingest.ts +0 -45
package/src/drivers/null_driver.ts +0 -21
package/src/errors.ts +0 -21
package/src/helpers.ts +0 -186
package/src/retrievable.ts +0 -179
package/stubs/config/rag.ts +0 -33
package/tsconfig.json +0 -5

package/src/types.ts CHANGED Viewed

@@ -1,47 +1,84 @@
-// ── Vector Documents ─────────────────────────────────────────────────────
+/**
+ * `@strav/rag` types — the data shapes apps see when reading and
+ * writing vectors and when running retrieval.
+ *
+ * Three concept clusters:
+ *
+ *   - **Vector docs + queries** — the storage layer. A
+ *     `VectorDocument` is one indexed unit (a chunk of source
+ *     content, its embedding, and free-form metadata).
+ *     `query()` returns `VectorMatch[]` ranked by similarity.
+ *
+ *   - **Retrieval pipeline** — `RetrieveOptions` /
+ *     `RetrieveResult`. Apps call `rag.retrieve(query, ...)`,
+ *     the manager embeds the query through `@strav/brain`,
+ *     queries the active store, and returns matches with
+ *     normalized similarity scores.
+ *
+ *   - **Chunking** — `Chunk`, `Chunker`. The chunker takes raw
+ *     content and produces overlapping segments suitable for
+ *     embedding. Two strategies ship: `fixed` (mechanical N-char
+ *     windows with overlap) and `recursive` (paragraph-aware,
+ *     better for prose).
+ */
+// ─── Vector documents + queries ──────────────────────────────────────────
+/**
+ * One indexed unit. `id` is provider-assigned (ULID by default);
+ * `sourceId` is the optional app-defined pointer back to the row
+ * the chunk came from (e.g., `article_id`) — `deleteBySource`
+ * removes every chunk for one source in a single call.
+ */
 export interface VectorDocument {
-  id?: string | number
-  sourceId?: string | number
+  id?: string
+  sourceId?: string | null
   content: string
   embedding: number[]
   metadata?: Record<string, unknown>
 }
-// ── Query Options & Results ──────────────────────────────────────────────
 export interface QueryOptions {
+  /** Top-K matches to return. Default `5`. */
   topK?: number
+  /** Minimum similarity threshold (0–1). Matches below this are filtered out. */
   threshold?: number
+  /** Metadata filter — flat key/value AND. Driver-specific operators are NOT supported in V1. */
   filter?: Record<string, unknown>
 }
 export interface QueryResult {
   matches: VectorMatch[]
-  processingTimeMs?: number
+  /** Time the underlying store took to compute the query, in ms. */
+  processingTimeMs: number
 }
 export interface VectorMatch {
-  id: string | number
+  id: string
   content: string
+  /** Similarity score in [0, 1]. 1.0 = identical embeddings, 0 = orthogonal. */
   score: number
   metadata: Record<string, unknown>
+  sourceId?: string | null
 }
-// ── Retrieval (high-level pipeline) ──────────────────────────────────────
+// ─── Retrieval pipeline ─────────────────────────────────────────────────
 export interface RetrieveOptions {
+  /** Override the collection. Defaults to the manager's default. */
   collection?: string
+  /** Top-K matches. Default `5`. */
   topK?: number
+  /** Minimum similarity threshold. */
   threshold?: number
+  /** Metadata filter — flat key/value AND. */
   filter?: Record<string, unknown>
-  rerank?: RerankOptions
-}
-export interface RerankOptions {
-  authorityWeight?: number
-  recencyWeight?: number
-  similarityWeight?: number
+  /** Override the store. Defaults to the manager's default store. */
+  store?: string
+  /** Override the embedding model used to encode the query. */
+  embedModel?: string
+  /** Override the brain provider used for embedding. */
+  embedProvider?: string
 }
 export interface RetrieveResult {
@@ -51,19 +88,24 @@ export interface RetrieveResult {
 }
 export interface RetrievedDocument {
-  id: string | number
+  id: string
   content: string
+  /** Same as `VectorMatch.score` — kept as a separate field so future re-ranking can diverge `score` from raw `similarity`. */
   score: number
   similarity: number
   metadata: Record<string, unknown>
+  sourceId?: string | null
 }
-// ── Chunking ─────────────────────────────────────────────────────────────
+// ─── Chunking ────────────────────────────────────────────────────────────
 export interface Chunk {
   content: string
+  /** 0-based ordinal within the source. */
   index: number
+  /** Character offset of the chunk's first character in the source. */
   startOffset: number
+  /** Character offset one past the chunk's last character. */
   endOffset: number
 }
@@ -71,30 +113,46 @@ export interface Chunker {
   chunk(content: string): Chunk[]
 }
-// ── Configuration ────────────────────────────────────────────────────────
+// ─── Configuration ──────────────────────────────────────────────────────
+/**
+ * `config.rag` shape. Apps that don't configure rag get a sensible
+ * default (memory driver, OpenAI text-embedding-3-small, recursive
+ * chunking) — see `RagProvider.boot()` for the defaults.
+ */
 export interface RagConfig {
+  /** Default store name — must be a key in `stores`. */
   default: string
-  prefix: string
+  /** Optional collection-name prefix. Used to namespace per-app or per-tenant. */
+  prefix?: string
   embedding: EmbeddingConfig
   chunking: ChunkingConfig
   stores: Record<string, StoreConfig>
 }
 export interface EmbeddingConfig {
+  /** `@strav/brain` provider key (e.g., `'openai'`, `'gemini'`, `'ollama'`). */
   provider: string
+  /** Model identifier — passed to `brain.embed(..., { model })`. */
   model: string
+  /** Vector dimension. Must match the chosen model. */
   dimension: number
 }
 export interface ChunkingConfig {
-  strategy: string
+  /** `'fixed'` or `'recursive'`. Custom strategies aren't pluggable in V1. */
+  strategy: 'fixed' | 'recursive'
   chunkSize: number
   overlap: number
-  separators?: string[]
+  /** Custom separators for the recursive strategy. Defaults to `['\n\n', '\n', '. ', ' ']`. */
+  separators?: readonly string[]
 }
 export interface StoreConfig {
+  /** `'memory'` or `'pgvector'`; custom drivers register via `rag.extend(name, factory)`. */
   driver: string
+  /** Pgvector: explicit table name override. Default `'rag_vector'`. */
+  table?: string
+  /** Free-form fields driver-specific (e.g., HNSW tuning for pgvector). */
   [key: string]: unknown
 }

package/src/vector_store.ts CHANGED Viewed

@@ -1,15 +1,55 @@
-import type { VectorDocument, QueryOptions, QueryResult } from './types.ts'
+/**
+ * `VectorStore` — the storage abstraction every driver
+ * (`MemoryDriver`, `PgvectorDriver`, custom drivers registered
+ * via `rag.extend(...)`) implements.
+ *
+ * Lifecycle:
+ *
+ *   - `createCollection(name, dimension)` — idempotent. For
+ *     pgvector this is mostly a no-op (the table holds every
+ *     collection); the dimension is enforced at INSERT.
+ *   - `deleteCollection(name)` — drops every vector under
+ *     `collection = name`.
+ *
+ * Reads + writes:
+ *
+ *   - `upsert(collection, docs)` — inserts (and overwrites by id
+ *     when supplied).
+ *   - `delete(collection, ids)` — removes specific vectors.
+ *   - `deleteBySource(collection, sourceId)` — removes every
+ *     vector with the matching `source_id`. Apps call this when
+ *     re-indexing a source row.
+ *   - `flush(collection)` — drops every vector in the
+ *     collection. Faster than `deleteCollection` for the common
+ *     "wipe + re-ingest" pattern because the collection's
+ *     identity stays intact.
+ *   - `query(collection, vector, opts)` — top-K similarity
+ *     search.
+ *
+ * Multitenancy lives BELOW this interface — the pgvector driver
+ * relies on `app.tenant_id` session settings (set by
+ * `tenants.withTenant`) to enforce isolation via RLS. The
+ * `MemoryDriver` is single-tenant by construction and ignores
+ * tenancy.
+ */
+import type { QueryOptions, QueryResult, VectorDocument } from './types.ts'
 export interface VectorStore {
+  /** Driver identifier — `'memory'`, `'pgvector'`, or the name passed to `rag.extend`. */
   readonly name: string
   createCollection(collection: string, dimension: number): Promise<void>
   deleteCollection(collection: string): Promise<void>
-  upsert(collection: string, documents: VectorDocument[]): Promise<void>
-  delete(collection: string, ids: (string | number)[]): Promise<void>
-  deleteBySource(collection: string, sourceId: string | number): Promise<void>
+  upsert(collection: string, documents: readonly VectorDocument[]): Promise<void>
+  delete(collection: string, ids: readonly string[]): Promise<void>
+  deleteBySource(collection: string, sourceId: string): Promise<void>
   flush(collection: string): Promise<void>
-  query(collection: string, vector: number[], options?: QueryOptions): Promise<QueryResult>
+  query(
+    collection: string,
+    vector: readonly number[],
+    options?: QueryOptions,
+  ): Promise<QueryResult>
 }

package/src/commands/rag_flush.ts DELETED Viewed

@@ -1,41 +0,0 @@
-import type { Command } from 'commander'
-import chalk from 'chalk'
-import { bootstrap, shutdown } from '@strav/cli'
-import { BaseModel } from '@strav/database'
-import RagManager from '../rag_manager.ts'
-export function register(program: Command): void {
-  program
-    .command('rag:flush <model>')
-    .description("Flush all vectors from a model's vector collection")
-    .action(async (modelPath: string) => {
-      let db
-      try {
-        const { db: database, config } = await bootstrap()
-        db = database
-        new BaseModel(db)
-        new RagManager(config)
-        const resolved = require.resolve(`${process.cwd()}/${modelPath}`)
-        const module = await import(resolved)
-        const ModelClass = module.default ?? (Object.values(module)[0] as any)
-        if (typeof ModelClass?.flushVectors !== 'function') {
-          console.error(chalk.red(`Model "${modelPath}" does not use the retrievable() mixin.`))
-          process.exit(1)
-        }
-        const collectionName = ModelClass.retrievableAs()
-        console.log(chalk.dim(`Flushing "${collectionName}"...`))
-        await ModelClass.flushVectors()
-        console.log(chalk.green(`Flushed all vectors from "${collectionName}".`))
-      } catch (err) {
-        console.error(chalk.red(`Error: ${err instanceof Error ? err.message : err}`))
-        process.exit(1)
-      } finally {
-        if (db) await shutdown(db)
-      }
-    })
-}

package/src/commands/rag_ingest.ts DELETED Viewed

@@ -1,45 +0,0 @@
-import type { Command } from 'commander'
-import chalk from 'chalk'
-import { bootstrap, shutdown } from '@strav/cli'
-import { BaseModel } from '@strav/database'
-import { BrainManager } from '@strav/brain'
-import RagManager from '../rag_manager.ts'
-export function register(program: Command): void {
-  program
-    .command('rag:ingest <model>')
-    .description('Vectorize all records for a model into the vector store')
-    .option('--chunk <size>', 'Records per batch', '100')
-    .action(async (modelPath: string, options: { chunk: string }) => {
-      let db
-      try {
-        const { db: database, config } = await bootstrap()
-        db = database
-        new BaseModel(db)
-        new RagManager(config)
-        new BrainManager(config)
-        const resolved = require.resolve(`${process.cwd()}/${modelPath}`)
-        const module = await import(resolved)
-        const ModelClass = module.default ?? (Object.values(module)[0] as any)
-        if (typeof ModelClass?.importAll !== 'function') {
-          console.error(chalk.red(`Model "${modelPath}" does not use the retrievable() mixin.`))
-          process.exit(1)
-        }
-        const chunkSize = parseInt(options.chunk, 10)
-        const collectionName = ModelClass.retrievableAs()
-        console.log(chalk.dim(`Vectorizing ${ModelClass.name} into "${collectionName}"...`))
-        const count = await ModelClass.importAll(chunkSize)
-        console.log(chalk.green(`Vectorized ${count} record(s) into "${collectionName}".`))
-      } catch (err) {
-        console.error(chalk.red(`Error: ${err instanceof Error ? err.message : err}`))
-        process.exit(1)
-      } finally {
-        if (db) await shutdown(db)
-      }
-    })
-}

package/src/drivers/null_driver.ts DELETED Viewed

@@ -1,21 +0,0 @@
-import type { VectorStore } from '../vector_store.ts'
-import type { VectorDocument, QueryOptions, QueryResult } from '../types.ts'
-export class NullDriver implements VectorStore {
-  readonly name = 'null'
-  async createCollection(_collection: string, _dimension: number): Promise<void> {}
-  async deleteCollection(_collection: string): Promise<void> {}
-  async upsert(_collection: string, _documents: VectorDocument[]): Promise<void> {}
-  async delete(_collection: string, _ids: (string | number)[]): Promise<void> {}
-  async deleteBySource(_collection: string, _sourceId: string | number): Promise<void> {}
-  async flush(_collection: string): Promise<void> {}
-  async query(
-    _collection: string,
-    _vector: number[],
-    _options?: QueryOptions
-  ): Promise<QueryResult> {
-    return { matches: [] }
-  }
-}

package/src/errors.ts DELETED Viewed

@@ -1,21 +0,0 @@
-import { StravError } from '@strav/kernel'
-export class RagError extends StravError {}
-export class CollectionNotFoundError extends RagError {
-  constructor(collection: string) {
-    super(`Vector collection "${collection}" not found.`)
-  }
-}
-export class VectorQueryError extends RagError {
-  constructor(collection: string, cause?: string) {
-    super(`Vector query on "${collection}" failed${cause ? `: ${cause}` : ''}.`)
-  }
-}
-export class EmbeddingError extends RagError {
-  constructor(cause?: string) {
-    super(`Embedding generation failed${cause ? `: ${cause}` : ''}.`)
-  }
-}

package/src/helpers.ts DELETED Viewed

@@ -1,186 +0,0 @@
-import { brain } from '@strav/brain'
-import RagManager from './rag_manager.ts'
-import type { VectorStore } from './vector_store.ts'
-import type {
-  RetrieveOptions,
-  RetrieveResult,
-  RetrievedDocument,
-  VectorDocument,
-  StoreConfig,
-} from './types.ts'
-import { createChunker } from './chunking/chunker.ts'
-import { EmbeddingError } from './errors.ts'
-export interface IngestOptions {
-  metadata?: Record<string, unknown>
-  sourceId?: string | number
-  chunkSize?: number
-  overlap?: number
-  strategy?: string
-  /**
-   * Optional per-chunk sanitizer applied AFTER chunking, BEFORE
-   * embedding. Use to scrub PII, secrets, or prompt-injection markers
-   * out of untrusted source content before it lands in the vector
-   * store. Return `null` to drop a chunk; otherwise return the
-   * (possibly modified) text.
-   *
-   * The hook is the caller's escape valve — RAG cannot judge what's
-   * sensitive in your domain. See `docs/rag/rag.md` "Content trust
-   * model" for the threat surface (prompt injection at retrieval
-   * time, indexed PII, accidental secret indexing).
-   */
-  sanitize?: (chunk: { content: string; index: number }) => string | null | Promise<string | null>
-}
-export const rag = {
-  store(name?: string): VectorStore {
-    return RagManager.store(name)
-  },
-  extend(name: string, factory: (config: StoreConfig) => VectorStore): void {
-    RagManager.extend(name, factory)
-  },
-  async ingest(
-    collection: string,
-    content: string,
-    options: IngestOptions = {}
-  ): Promise<string[]> {
-    const config = RagManager.config
-    const fullCollection = RagManager.collectionName(collection)
-    const chunkerConfig = {
-      strategy: options.strategy ?? config.chunking.strategy,
-      chunkSize: options.chunkSize ?? config.chunking.chunkSize,
-      overlap: options.overlap ?? config.chunking.overlap,
-      separators: config.chunking.separators,
-    }
-    const chunker = createChunker(chunkerConfig)
-    let chunks = chunker.chunk(content)
-    if (chunks.length === 0) return []
-    // Apply the optional sanitize hook before embedding. Drops chunks
-    // where the hook returns null (e.g., a chunk that's all PII).
-    if (options.sanitize) {
-      const sanitized: typeof chunks = []
-      for (const chunk of chunks) {
-        const result = await options.sanitize({ content: chunk.content, index: chunk.index })
-        if (result === null) continue
-        sanitized.push({ ...chunk, content: result })
-      }
-      chunks = sanitized
-      if (chunks.length === 0) return []
-    }
-    const chunkTexts = chunks.map(c => c.content)
-    let embeddings: number[][]
-    try {
-      embeddings = await brain.embed(chunkTexts, {
-        provider: config.embedding.provider,
-        model: config.embedding.model,
-      })
-    } catch (err) {
-      throw new EmbeddingError(err instanceof Error ? err.message : String(err))
-    }
-    const baseId = crypto.randomUUID()
-    const documents: VectorDocument[] = chunks.map((chunk, i) => ({
-      id: `${baseId}_${i}`,
-      sourceId: options.sourceId,
-      content: chunk.content,
-      embedding: embeddings[i]!,
-      metadata: {
-        ...options.metadata,
-        chunkIndex: chunk.index,
-        startOffset: chunk.startOffset,
-        endOffset: chunk.endOffset,
-      },
-    }))
-    await RagManager.store().upsert(fullCollection, documents)
-    return documents.map(d => String(d.id))
-  },
-  async retrieve(query: string, options: RetrieveOptions = {}): Promise<RetrieveResult> {
-    const start = performance.now()
-    const config = RagManager.config
-    const collection = RagManager.collectionName(options.collection ?? 'default')
-    let queryVector: number[]
-    try {
-      const vectors = await brain.embed(query, {
-        provider: config.embedding.provider,
-        model: config.embedding.model,
-      })
-      queryVector = vectors[0]!
-    } catch (err) {
-      throw new EmbeddingError(err instanceof Error ? err.message : String(err))
-    }
-    const queryResult = await RagManager.store().query(collection, queryVector, {
-      topK: options.topK,
-      threshold: options.threshold,
-      filter: options.filter,
-    })
-    let matches: RetrievedDocument[] = queryResult.matches.map(m => ({
-      id: m.id,
-      content: m.content,
-      score: m.score,
-      similarity: m.score,
-      metadata: m.metadata,
-    }))
-    if (options.rerank) {
-      const {
-        similarityWeight = 0.6,
-        authorityWeight = 0.2,
-        recencyWeight = 0.2,
-      } = options.rerank
-      matches = matches.map(m => {
-        const authority =
-          typeof m.metadata.authority === 'number' ? m.metadata.authority : 0
-        const createdAt = m.metadata.createdAt
-        const recencyScore = createdAt
-          ? 1 / (1 + daysSince(new Date(createdAt as string)) / 30)
-          : 0.5
-        const finalScore =
-          m.similarity * similarityWeight +
-          authority * authorityWeight +
-          recencyScore * recencyWeight
-        return { ...m, score: finalScore }
-      })
-      matches.sort((a, b) => b.score - a.score)
-    }
-    return {
-      matches,
-      query,
-      processingTimeMs: performance.now() - start,
-    }
-  },
-  async delete(collection: string, ids: (string | number)[]): Promise<void> {
-    const fullCollection = RagManager.collectionName(collection)
-    await RagManager.store().delete(fullCollection, ids)
-  },
-  async deleteBySource(collection: string, sourceId: string | number): Promise<void> {
-    const fullCollection = RagManager.collectionName(collection)
-    await RagManager.store().deleteBySource(fullCollection, sourceId)
-  },
-  async flush(collection: string): Promise<void> {
-    const fullCollection = RagManager.collectionName(collection)
-    await RagManager.store().flush(fullCollection)
-  },
-}
-function daysSince(date: Date): number {
-  return (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24)
-}