npm - @strav/rag - Versions diffs - 1.0.0-alpha.19 → 1.0.0-alpha.21 - Mend

@strav/rag 1.0.0-alpha.19 → 1.0.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +5 -4
package/src/console/index.ts +3 -0
package/src/console/rag_console_provider.ts +17 -0
package/src/console/rag_flush.ts +51 -0
package/src/console/rag_list.ts +48 -0
package/src/drivers/pgvector_driver.ts +38 -9
package/src/index.ts +6 -0
package/src/rag_manager.ts +2 -3
package/src/retrievable.ts +270 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@strav/rag",
-  "version": "1.0.0-alpha.19",
+  "version": "1.0.0-alpha.21",
   "description": "Strav RAG module — vector store abstraction, pgvector + in-memory drivers, chunking strategies. Composes with @strav/brain for embeddings and @strav/database for persistence.",
   "type": "module",
   "main": "./src/index.ts",
@@ -19,9 +19,10 @@
     "access": "public"
   },
   "dependencies": {
-    "@strav/brain": "1.0.0-alpha.19",
-    "@strav/database": "1.0.0-alpha.19",
-    "@strav/kernel": "1.0.0-alpha.19"
+    "@strav/brain": "1.0.0-alpha.21",
+    "@strav/cli": "1.0.0-alpha.21",
+    "@strav/database": "1.0.0-alpha.21",
+    "@strav/kernel": "1.0.0-alpha.21"
   },
   "peerDependencies": {
     "@types/bun": ">=1.3.14"

package/src/console/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export { RagConsoleProvider } from './rag_console_provider.ts'
+export { RagFlush } from './rag_flush.ts'
+export { RagList } from './rag_list.ts'

package/src/console/rag_console_provider.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * `RagConsoleProvider` — declares the rag console commands.
+ *
+ * Apps add it to `bootstrap/providers.ts` alongside `RagProvider`.
+ * Separate provider (mirrors `QueueConsoleProvider`) so apps
+ * that don't use the CLI don't pay the cost of resolving the
+ * commands at boot.
+ */
+import { ConsoleProvider } from '@strav/cli'
+import { RagFlush } from './rag_flush.ts'
+import { RagList } from './rag_list.ts'
+export class RagConsoleProvider extends ConsoleProvider {
+  override readonly name = 'console.rag'
+  override readonly commands = [RagFlush, RagList] as const
+}

package/src/console/rag_flush.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * `bun strav rag:flush <collection> [--store=name] [--force]` —
+ * drop every vector in a collection on the active (or named)
+ * store.
+ *
+ * Use cases:
+ *
+ *   - Wiping a corrupted index before re-ingest.
+ *   - Cleaning up a dev / staging environment.
+ *   - Recovering after a dimension / model change.
+ *
+ * The command confirms before running unless `--force` is set.
+ * Doesn't touch the source data — apps run their own re-ingest
+ * afterward, typically via `retrievable` repo's `reindexAll()`.
+ */
+import { Command, type ExecuteArgs, ExitCode } from '@strav/cli'
+import { RagManager } from '../rag_manager.ts'
+export class RagFlush extends Command {
+  static signature = 'rag:flush {collection} {--store=} {--force}'
+  static description = 'Delete every vector in a collection (on the active or --store= named store).'
+  static providers = ['config', 'logger', 'brain', 'rag']
+  override async execute({ args, flags }: ExecuteArgs): Promise<number> {
+    const collection = args.collection as string
+    const storeName = typeof flags.store === 'string' && flags.store.length > 0
+      ? flags.store
+      : undefined
+    const manager = this.app.resolve(RagManager)
+    const fullCollection = manager.collectionName(collection)
+    const storeLabel = storeName ?? manager.config.default
+    if (flags.force !== true) {
+      const ok = await this.confirm(
+        `Delete every vector in collection "${fullCollection}" on store "${storeLabel}"? This is irreversible.`,
+      )
+      if (!ok) {
+        this.info('Aborted.')
+        return ExitCode.Success
+      }
+    }
+    await manager.store(storeName).flush(fullCollection)
+    this.success(
+      `Flushed collection "${fullCollection}" on store "${storeLabel}".`,
+    )
+    return ExitCode.Success
+  }
+}

package/src/console/rag_list.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * `bun strav rag:list` — print the configured RAG stores +
+ * chunker + embedding setup.
+ *
+ * Diagnostic only — no mutations. Useful for verifying that
+ * `config/rag.ts` parses correctly and that the registered
+ * driver names match what's expected.
+ */
+import { Command, type ExecuteArgs, ExitCode } from '@strav/cli'
+import { RagManager } from '../rag_manager.ts'
+export class RagList extends Command {
+  static signature = 'rag:list'
+  static description = 'List configured RAG stores + embedding + chunking settings.'
+  static providers = ['config', 'logger', 'brain', 'rag']
+  override async execute(_args: ExecuteArgs): Promise<number> {
+    const manager = this.app.resolve(RagManager)
+    const config = manager.config
+    this.info(`Default store: ${config.default}`)
+    if (config.prefix) this.info(`Collection prefix: ${config.prefix}`)
+    this.info('')
+    this.info('Stores:')
+    for (const [name, store] of Object.entries(config.stores)) {
+      const flag = name === config.default ? ' (default)' : ''
+      this.info(`  ${name}${flag}: driver=${store.driver}`)
+    }
+    this.info('')
+    this.info('Embedding:')
+    this.info(`  provider: ${config.embedding.provider}`)
+    this.info(`  model:    ${config.embedding.model}`)
+    this.info(`  dim:      ${config.embedding.dimension}`)
+    this.info('')
+    this.info('Chunking:')
+    this.info(`  strategy:  ${config.chunking.strategy}`)
+    this.info(`  chunkSize: ${config.chunking.chunkSize}`)
+    this.info(`  overlap:   ${config.chunking.overlap}`)
+    if (config.chunking.separators) {
+      this.info(`  separators: ${JSON.stringify(config.chunking.separators)}`)
+    }
+    return ExitCode.Success
+  }
+}

package/src/drivers/pgvector_driver.ts CHANGED Viewed

@@ -28,7 +28,11 @@
  *     model them.
  */
-import type { PostgresDatabase } from '@strav/database'
+import {
+  currentTransactionalContext,
+  type DatabaseExecutor,
+  type PostgresDatabase,
+} from '@strav/database'
 import { VectorQueryError } from '../rag_error.ts'
 import { ragVectorSchema } from '../rag_vector_schema.ts'
 import type {
@@ -71,6 +75,20 @@ export class PgvectorDriver implements VectorStore {
     })
   }
+  /**
+   * Route reads + writes through the ambient `UnitOfWork`
+   * transaction when one is active (e.g., inside
+   * `tenants.withTenant(...)`); fall back to the raw pool
+   * otherwise. Mirrors how `Repository.executor(opts)` works in
+   * `@strav/database`, so RLS scoping + transactional event
+   * flushing apply uniformly across framework + driver code.
+   */
+  private exec(): DatabaseExecutor {
+    const ambient = currentTransactionalContext()
+    if (ambient) return ambient.tx
+    return this.db as unknown as DatabaseExecutor
+  }
   // ─── Collections ──────────────────────────────────────────────────────
   async createCollection(_collection: string, _dimension: number): Promise<void> {
@@ -81,7 +99,7 @@ export class PgvectorDriver implements VectorStore {
   }
   async deleteCollection(collection: string): Promise<void> {
-    await this.db.execute(
+    await this.exec().execute(
       `DELETE FROM "${this.table}" WHERE "collection" = $1`,
       [collection],
     )
@@ -96,13 +114,24 @@ export class PgvectorDriver implements VectorStore {
     if (documents.length === 0) return
     // pgvector accepts the vector as a stringified array literal —
     // `[0.12,0.34,...]` — cast with `::vector` at the boundary.
+    //
+    // Tenant scoping: the `tenant_id` column on `rag_vector` is
+    // NOT NULL with no default, so apps wrapping the call in
+    // `tenants.withTenant(...)` need a value supplied. We read
+    // `current_setting('app.tenant_id')` inside the SQL itself —
+    // the same session var the RLS policy reads — so the INSERT
+    // works under tenant scope without the driver knowing the PK
+    // type ahead of time. The `true` second arg makes the
+    // setting return NULL (not throw) outside `withTenant`; the
+    // INSERT then fails the NOT NULL constraint with a clear
+    // error message that nudges the app toward the right wrap.
     for (const doc of documents) {
       const id = doc.id ?? crypto.randomUUID()
       const embeddingLiteral = `[${doc.embedding.join(',')}]`
-      await this.db.execute(
+      await this.exec().execute(
         `INSERT INTO "${this.table}"
-          ("id", "collection", "source_id", "content", "metadata", "embedding", "created_at")
-         VALUES ($1, $2, $3, $4, $5::jsonb, $6::vector, NOW())
+          ("id", "tenant_id", "collection", "source_id", "content", "metadata", "embedding", "created_at")
+         VALUES ($1, current_setting('app.tenant_id', true), $2, $3, $4, $5::jsonb, $6::vector, NOW())
          ON CONFLICT ("id") DO UPDATE SET
            "collection" = EXCLUDED."collection",
            "source_id"  = EXCLUDED."source_id",
@@ -124,21 +153,21 @@ export class PgvectorDriver implements VectorStore {
   async delete(collection: string, ids: readonly string[]): Promise<void> {
     if (ids.length === 0) return
     const placeholders = ids.map((_, i) => `$${i + 2}`).join(', ')
-    await this.db.execute(
+    await this.exec().execute(
       `DELETE FROM "${this.table}" WHERE "collection" = $1 AND "id" IN (${placeholders})`,
       [collection, ...ids],
     )
   }
   async deleteBySource(collection: string, sourceId: string): Promise<void> {
-    await this.db.execute(
+    await this.exec().execute(
       `DELETE FROM "${this.table}" WHERE "collection" = $1 AND "source_id" = $2`,
       [collection, sourceId],
     )
   }
   async flush(collection: string): Promise<void> {
-    await this.db.execute(
+    await this.exec().execute(
       `DELETE FROM "${this.table}" WHERE "collection" = $1`,
       [collection],
     )
@@ -190,7 +219,7 @@ export class PgvectorDriver implements VectorStore {
       score: number | string
     }>
     try {
-      rows = await this.db.query(sql, params)
+      rows = await this.exec().query(sql, params)
     } catch (cause) {
       throw new VectorQueryError(
         `pgvector query failed for collection "${collection}".`,

package/src/index.ts CHANGED Viewed

@@ -32,8 +32,14 @@ export {
   type RagManagerOptions,
   type StoreFactory,
 } from './rag_manager.ts'
+export {
+  RagConsoleProvider,
+  RagFlush,
+  RagList,
+} from './console/index.ts'
 export { RagProvider } from './rag_provider.ts'
 export { ragVectorSchema } from './rag_vector_schema.ts'
+export { retrievable } from './retrievable.ts'
 export type {
   Chunk,
   Chunker,

package/src/rag_manager.ts CHANGED Viewed

@@ -28,7 +28,7 @@ import { PostgresDatabase } from '@strav/database'
 // biome-ignore lint/style/useImportType: BrainManager value import for @inject() param-type metadata.
 import { BrainManager } from '@strav/brain'
 // biome-ignore lint/style/useImportType: Application value import for the container handle.
-import { Application, inject } from '@strav/kernel'
+import { Application, inject, ulid } from '@strav/kernel'
 import { createChunker } from './chunking/chunker.ts'
 import { MemoryDriver } from './drivers/memory_driver.ts'
 import { PgvectorDriver } from './drivers/pgvector_driver.ts'
@@ -204,9 +204,8 @@ export class RagManager {
       )
     }
-    const baseId = crypto.randomUUID()
     const documents: VectorDocument[] = chunks.map((chunk, i) => ({
-      id: `${baseId}_${i}`,
+      id: ulid(),
       ...(options.sourceId !== undefined ? { sourceId: options.sourceId } : {}),
       content: chunk.content,
       embedding: embeddings[i]!,

package/src/retrievable.ts ADDED Viewed

@@ -0,0 +1,270 @@
+/**
+ * `retrievable(Repository)` — class mixin that bolts vector-index
+ * methods onto a Repository so apps can re-index a row and search
+ * its collection without juggling `RagManager` calls by hand.
+ *
+ * ```ts
+ * @inject()
+ * export class ArticleRepository extends retrievable(Repository<Article>) {
+ *   static override readonly schema = articleSchema
+ *   static override readonly model = Article
+ *
+ *   constructor(db: PostgresDatabase, events: EventBus, rag: RagManager) {
+ *     super(db, events)
+ *     this.rag = rag
+ *   }
+ *
+ *   // Override the extension points as needed:
+ *   protected override toContent(a: Article): string {
+ *     return `${a.title}\n\n${a.body}`
+ *   }
+ *
+ *   protected override toMetadata(a: Article): Record<string, unknown> {
+ *     return { authorId: a.author_id, tags: a.tags }
+ *   }
+ * }
+ * ```
+ *
+ * Usage:
+ *
+ * ```ts
+ * const article = await articles.create(...)
+ * await articles.vectorize(article)              // index it
+ *
+ * const { matches } = await articles.retrieve('query')   // search
+ *
+ * await articles.delete(article)
+ * await articles.vectorRemove(article)           // drop from index
+ * ```
+ *
+ * Why not auto-vectorize on `create` / `update`?
+ *
+ *   V1 ships the explicit pattern. An auto-hook tied to repository
+ *   events would couple persistence to the embedding provider's
+ *   availability — a transient rate-limit on the embedder would
+ *   fail the create call. Apps that want auto-vectorize wire it
+ *   themselves via `events.on('article.created', m =>
+ *   articles.vectorize(m))` so they control the failure mode
+ *   (fire-and-forget vs awaited vs queued via `@strav/queue`).
+ *
+ * Extension points (all optional overrides):
+ *
+ *   - `collectionName()` — defaults to the table name from the
+ *     schema. Override when the collection should differ from the
+ *     table, or to compose a per-tenant suffix dynamically.
+ *
+ *   - `toContent(model)` — defaults to concatenating every string
+ *     field on the model with `\n`. The default works for simple
+ *     row shapes; apps with structured content override.
+ *
+ *   - `toMetadata(model)` — defaults to `{}`. Apps return fields
+ *     they want to filter on (e.g. `author_id`, `lang`, `kind`).
+ *
+ *   - `shouldRetrieve(model)` — gates indexing. Return `false` for
+ *     draft / soft-deleted / private rows. The default is `true`.
+ */
+import type { Repository } from '@strav/database'
+import type { RagManager } from './rag_manager.ts'
+import type {
+  RetrieveOptions,
+  RetrieveResult,
+  VectorMatch,
+} from './types.ts'
+/** Minimal constructor type we can mix into. Wider than `typeof Repository` so subclasses with extra ctor args still type-check. */
+// biome-ignore lint/suspicious/noExplicitAny: mixin constructor signatures intentionally accept any[]; the user-side subclass narrows.
+type RepositoryConstructor<TModel extends object> = new (...args: any[]) => Repository<TModel>
+/**
+ * Returns a subclass that extends `Base` with `vectorize` /
+ * `vectorRemove` / `retrieve` plus override-points
+ * (`collectionName`, `toContent`, `toMetadata`,
+ * `shouldRetrieve`). The user-side class declares an explicit
+ * constructor that calls `super(...)` and assigns `this.rag`.
+ */
+export function retrievable<TModel extends object, TBase extends RepositoryConstructor<TModel>>(
+  Base: TBase,
+) {
+  abstract class RetrievableRepository extends Base {
+    /**
+     * The framework's `RagManager`. Assigned by the user-side
+     * subclass constructor. Public on purpose — apps that want to
+     * drop down to raw `rag.store()` / `rag.ingest(...)` access
+     * have a hook.
+     */
+    rag!: RagManager
+    /**
+     * Collection name for vector storage. Defaults to the table
+     * name from `static schema`. Override to point at a different
+     * collection (or to compose per-tenant / per-env suffixes).
+     */
+    protected collectionName(): string {
+      const ctor = this.constructor as unknown as { schema: { name: string } }
+      return ctor.schema.name
+    }
+    /**
+     * Build the indexable text from a model row. Default
+     * concatenates every non-underscore string field with `\n`.
+     * Apps with structured content override this — typically
+     * something like `` `${a.title}\n\n${a.body}` ``.
+     */
+    protected toContent(model: TModel): string {
+      const parts: string[] = []
+      for (const [key, value] of Object.entries(model as Record<string, unknown>)) {
+        if (key.startsWith('_')) continue
+        if (typeof value === 'string' && value.length > 0) parts.push(value)
+      }
+      return parts.join('\n')
+    }
+    /**
+     * Build the metadata bag attached to every chunk. Apps return
+     * fields they want to filter retrievals on. The framework
+     * automatically adds `chunkIndex`, `startOffset`, `endOffset`
+     * — overrides shouldn't try to re-add those.
+     */
+    protected toMetadata(_model: TModel): Record<string, unknown> {
+      return {}
+    }
+    /**
+     * Whether the model should currently be indexed. Override to
+     * skip drafts, soft-deleted rows, private records, etc. The
+     * default `true` indexes every model — fine for the common
+     * case.
+     */
+    protected shouldRetrieve(_model: TModel): boolean {
+      return true
+    }
+    /**
+     * (Re-)index a single model. Drops any existing chunks for
+     * the model's id, then ingests fresh chunks of the current
+     * content. When `shouldRetrieve(model)` returns `false`, the
+     * chunks are dropped without re-ingest — apps don't need a
+     * separate "this just became private" path.
+     *
+     * Returns the vector ids written. Empty array when content
+     * was empty or `shouldRetrieve` returned `false`.
+     */
+    async vectorize(model: TModel): Promise<string[]> {
+      const collection = this.collectionName()
+      const id = modelId(model)
+      // Drop existing chunks for this source first so updates
+      // replace cleanly. (RagManager.ingest writes fresh ids per
+      // call; without this step every re-vectorize would
+      // duplicate.)
+      await this.rag
+        .store()
+        .deleteBySource(this.rag.collectionName(collection), id)
+      if (!this.shouldRetrieve(model)) return []
+      const content = this.toContent(model)
+      if (!content) return []
+      return this.rag.ingest(collection, content, {
+        sourceId: id,
+        metadata: this.toMetadata(model),
+      })
+    }
+    /**
+     * Drop every chunk for one model. Apps call this after
+     * `delete(model)` in their domain code. The mixin doesn't
+     * auto-hook the delete lifecycle for the same reason it
+     * doesn't auto-hook create/update — keeps embedding-provider
+     * availability out of the persistence path.
+     */
+    async vectorRemove(model: TModel): Promise<void> {
+      const collection = this.collectionName()
+      const id = modelId(model)
+      await this.rag
+        .store()
+        .deleteBySource(this.rag.collectionName(collection), id)
+    }
+    /**
+     * Semantic search over this repository's collection. Default
+     * `collection` is the mixin's `collectionName()` — apps that
+     * want to retrieve from another collection pass it explicitly.
+     */
+    async retrieve(
+      query: string,
+      options: Omit<RetrieveOptions, 'collection'> & { collection?: string } = {},
+    ): Promise<RetrieveResult> {
+      return this.rag.retrieve(query, {
+        ...options,
+        collection: options.collection ?? this.collectionName(),
+      })
+    }
+    /**
+     * Re-index every row in the repository. Walks rows in batches
+     * of `batchSize` and vectorizes each. Useful for backfilling
+     * a new collection or recovering after a schema change.
+     *
+     * The CLI's `rag:reindex <repository>` doesn't ship in V1 —
+     * apps that want one wire it as their own console command
+     * pointing at this method.
+     *
+     * Returns the total count of rows processed (NOT the chunk
+     * count — chunks per row vary with content size).
+     */
+    async reindexAll(batchSize: number = 100): Promise<number> {
+      let processed = 0
+      let offset = 0
+      while (true) {
+        const rows = await this.query().orderBy('id', 'asc').limit(batchSize).offset(offset).get()
+        if (rows.length === 0) break
+        for (const row of rows) await this.vectorize(row)
+        processed += rows.length
+        offset += rows.length
+        if (rows.length < batchSize) break
+      }
+      return processed
+    }
+    /**
+     * Match-to-models helper. Takes the `matches` array from
+     * `retrieve(...)` and hydrates the source rows by id, in
+     * match order. Matches whose `sourceId` doesn't resolve to a
+     * row (deleted between index time + retrieval) are dropped.
+     */
+    async resolveMatches(matches: readonly VectorMatch[]): Promise<TModel[]> {
+      const ids = [...new Set(matches.map((m) => m.sourceId).filter((s): s is string => !!s))]
+      if (ids.length === 0) return []
+      const found = await this.findMany(ids as unknown as readonly string[])
+      const byId = new Map<string, TModel>(
+        found.map((m) => [modelId(m), m]),
+      )
+      const out: TModel[] = []
+      for (const match of matches) {
+        if (!match.sourceId) continue
+        const row = byId.get(match.sourceId)
+        if (row) out.push(row)
+      }
+      return out
+    }
+  }
+  return RetrievableRepository
+}
+/**
+ * Coerce a model's `id` to a string. Repositories use ULID or UUID
+ * ids by default, both of which round-trip through `String(...)`
+ * cleanly; integer PKs (bigSerial) coerce the same way.
+ */
+function modelId(model: object): string {
+  const id = (model as { id?: unknown }).id
+  if (id === undefined || id === null) {
+    throw new Error(
+      `retrievable: model has no \`id\` to use as a vector sourceId. The mixin only works on models with a single-column id.`,
+    )
+  }
+  return String(id)
+}