npm - @mastra/rag - Versions diffs - 1.2.3-alpha.0 → 1.2.3-alpha.1 - Mend

@mastra/rag 1.2.3-alpha.0 → 1.2.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/CHANGELOG.md +9 -0
package/package.json +18 -5
package/.turbo/turbo-build.log +0 -4
package/docker-compose.yaml +0 -22
package/eslint.config.js +0 -6
package/src/document/document.test.ts +0 -2975
package/src/document/document.ts +0 -335
package/src/document/extractors/base.ts +0 -30
package/src/document/extractors/index.ts +0 -5
package/src/document/extractors/keywords.test.ts +0 -125
package/src/document/extractors/keywords.ts +0 -126
package/src/document/extractors/questions.test.ts +0 -120
package/src/document/extractors/questions.ts +0 -111
package/src/document/extractors/summary.test.ts +0 -107
package/src/document/extractors/summary.ts +0 -122
package/src/document/extractors/title.test.ts +0 -121
package/src/document/extractors/title.ts +0 -185
package/src/document/extractors/types.ts +0 -40
package/src/document/index.ts +0 -2
package/src/document/prompts/base.ts +0 -77
package/src/document/prompts/format.ts +0 -9
package/src/document/prompts/index.ts +0 -15
package/src/document/prompts/prompt.ts +0 -60
package/src/document/prompts/types.ts +0 -29
package/src/document/schema/index.ts +0 -3
package/src/document/schema/node.ts +0 -187
package/src/document/schema/types.ts +0 -40
package/src/document/transformers/character.ts +0 -267
package/src/document/transformers/html.ts +0 -346
package/src/document/transformers/json.ts +0 -536
package/src/document/transformers/latex.ts +0 -11
package/src/document/transformers/markdown.ts +0 -239
package/src/document/transformers/semantic-markdown.ts +0 -227
package/src/document/transformers/sentence.ts +0 -314
package/src/document/transformers/text.ts +0 -158
package/src/document/transformers/token.ts +0 -137
package/src/document/transformers/transformer.ts +0 -5
package/src/document/types.ts +0 -145
package/src/document/validation.ts +0 -158
package/src/graph-rag/index.test.ts +0 -235
package/src/graph-rag/index.ts +0 -306
package/src/index.ts +0 -8
package/src/rerank/index.test.ts +0 -150
package/src/rerank/index.ts +0 -198
package/src/rerank/relevance/cohere/index.ts +0 -56
package/src/rerank/relevance/index.ts +0 -3
package/src/rerank/relevance/mastra-agent/index.ts +0 -32
package/src/rerank/relevance/zeroentropy/index.ts +0 -26
package/src/tools/README.md +0 -153
package/src/tools/document-chunker.ts +0 -34
package/src/tools/graph-rag.test.ts +0 -115
package/src/tools/graph-rag.ts +0 -157
package/src/tools/index.ts +0 -3
package/src/tools/types.ts +0 -126
package/src/tools/vector-query-database-config.test.ts +0 -190
package/src/tools/vector-query.test.ts +0 -477
package/src/tools/vector-query.ts +0 -171
package/src/utils/convert-sources.ts +0 -43
package/src/utils/default-settings.ts +0 -38
package/src/utils/index.ts +0 -3
package/src/utils/tool-schemas.ts +0 -38
package/src/utils/vector-prompts.ts +0 -832
package/src/utils/vector-search.ts +0 -130
package/tsconfig.build.json +0 -9
package/tsconfig.json +0 -5
package/tsup.config.ts +0 -17
package/vitest.config.ts +0 -8

package/src/rerank/relevance/cohere/index.ts DELETED Viewed

@@ -1,56 +0,0 @@
-import type { RelevanceScoreProvider } from '@mastra/core/relevance';
-interface CohereRerankingResponse {
-  results: Array<{
-    index: number;
-    relevance_score: number;
-  }>;
-  id: string;
-  meta: {
-    api_version: {
-      version: string;
-      is_experimental: boolean;
-    };
-    billed_units: {
-      search_units: number;
-    };
-  };
-}
-export class CohereRelevanceScorer implements RelevanceScoreProvider {
-  private model: string;
-  private apiKey?: string;
-  constructor(model: string, apiKey?: string) {
-    this.apiKey = apiKey;
-    this.model = model;
-  }
-  async getRelevanceScore(query: string, text: string): Promise<number> {
-    const response = await fetch(`https://api.cohere.com/v2/rerank`, {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        Authorization: `Bearer ${this.apiKey}`,
-      },
-      body: JSON.stringify({
-        query,
-        documents: [text],
-        model: this.model,
-        top_n: 1,
-      }),
-    });
-    if (!response.ok) {
-      throw new Error(`Cohere API error: ${response.status} ${await response.text()}`);
-    }
-    const data = (await response.json()) as CohereRerankingResponse;
-    const relevanceScore = data.results[0]?.relevance_score;
-    if (!relevanceScore) {
-      throw new Error('No relevance score found on Cohere response');
-    }
-    return relevanceScore;
-  }
-}

package/src/rerank/relevance/index.ts DELETED Viewed

@@ -1,3 +0,0 @@
-export * from './cohere';
-export * from './mastra-agent';
-export * from './zeroentropy';

package/src/rerank/relevance/mastra-agent/index.ts DELETED Viewed

@@ -1,32 +0,0 @@
-import { Agent } from '@mastra/core/agent';
-import type { MastraLanguageModel } from '@mastra/core/agent';
-import { createSimilarityPrompt } from '@mastra/core/relevance';
-import type { RelevanceScoreProvider } from '@mastra/core/relevance';
-// Mastra Agent implementation
-export class MastraAgentRelevanceScorer implements RelevanceScoreProvider {
-  private agent: Agent;
-  constructor(name: string, model: MastraLanguageModel) {
-    this.agent = new Agent({
-      name: `Relevance Scorer ${name}`,
-      instructions: `You are a specialized agent for evaluating the relevance of text to queries.
-Your task is to rate how well a text passage answers a given query.
-Output only a number between 0 and 1, where:
-1.0 = Perfectly relevant, directly answers the query
-0.0 = Completely irrelevant
-Consider:
-- Direct relevance to the question
-- Completeness of information
-- Quality and specificity
-Always return just the number, no explanation.`,
-      model,
-    });
-  }
-  async getRelevanceScore(query: string, text: string): Promise<number> {
-    const prompt = createSimilarityPrompt(query, text);
-    const response = await this.agent.generate(prompt);
-    return parseFloat(response.text);
-  }
-}

package/src/rerank/relevance/zeroentropy/index.ts DELETED Viewed

@@ -1,26 +0,0 @@
-import type { RelevanceScoreProvider } from '@mastra/core/relevance';
-import ZeroEntropy from 'zeroentropy';
-// ZeroEntropy implementation
-export class ZeroEntropyRelevanceScorer implements RelevanceScoreProvider {
-  private client: ZeroEntropy;
-  private model: string;
-  constructor(model?: string, apiKey?: string) {
-    this.client = new ZeroEntropy({
-      apiKey: apiKey || process.env.ZEROENTROPY_API_KEY || '',
-    });
-    this.model = model || 'zerank-1';
-  }
-  async getRelevanceScore(query: string, text: string): Promise<number> {
-    const response = await this.client.models.rerank({
-      query,
-      documents: [text],
-      model: this.model,
-      top_n: 1,
-    });
-    return response.results[0]?.relevance_score ?? 0;
-  }
-}

package/src/tools/README.md DELETED Viewed

@@ -1,153 +0,0 @@
-# Vector Query Tool with Database-Specific Configurations
-The `createVectorQueryTool` function now supports database-specific configurations to handle unique properties and optimizations for different vector databases.
-## Database Configuration Types
-### Pinecone Configuration
-```typescript
-import { createVectorQueryTool } from '@mastra/rag/tools';
-const pineconeVectorTool = createVectorQueryTool({
-  id: 'pinecone-search',
-  indexName: 'my-index',
-  vectorStoreName: 'pinecone',
-  model: embedModel,
-  databaseConfig: {
-    pinecone: {
-      namespace: 'my-namespace', // Pinecone namespace
-      sparseVector: {
-        // For hybrid search
-        indices: [0, 1, 2],
-        values: [0.1, 0.2, 0.3],
-      },
-    },
-  },
-});
-```
-### pgVector Configuration
-```typescript
-const pgVectorTool = createVectorQueryTool({
-  id: 'pgvector-search',
-  indexName: 'my-index',
-  vectorStoreName: 'postgres',
-  model: embedModel,
-  databaseConfig: {
-    pgvector: {
-      minScore: 0.7, // Minimum similarity score
-      ef: 200, // HNSW search parameter
-      probes: 10, // IVFFlat probe parameter
-    },
-  },
-});
-```
-### Chroma Configuration
-```typescript
-const chromaTool = createVectorQueryTool({
-  id: 'chroma-search',
-  indexName: 'my-index',
-  vectorStoreName: 'chroma',
-  model: embedModel,
-  databaseConfig: {
-    chroma: {
-      where: {
-        // Metadata filtering
-        category: 'documents',
-      },
-      whereDocument: {
-        // Document content filtering
-        $contains: 'important',
-      },
-    },
-  },
-});
-```
-## Runtime Configuration Override
-You can also override database configurations at runtime using the runtime context:
-```typescript
-import { RuntimeContext } from '@mastra/core/runtime-context';
-const runtimeContext = new RuntimeContext();
-// Override Pinecone namespace at runtime
-runtimeContext.set('databaseConfig', {
-  pinecone: {
-    namespace: 'runtime-namespace',
-  },
-});
-await vectorTool.execute({
-  context: { queryText: 'search query' },
-  mastra,
-  runtimeContext,
-});
-```
-## Extensibility for New Databases
-The system is designed to be extensible. For new vector databases, you can:
-1. Add configuration types:
-```typescript
-export interface NewDatabaseConfig {
-  customParam1?: string;
-  customParam2?: number;
-}
-export type DatabaseConfig = {
-  pinecone?: PineconeConfig;
-  pgvector?: PgVectorConfig;
-  chroma?: ChromaConfig;
-  newdatabase?: NewDatabaseConfig; // Add your config here
-  [key: string]: any;
-};
-```
-2. The configuration will be automatically passed through to the vector store's query method.
-## Type Safety
-All database configurations are fully typed, providing IntelliSense and compile-time checking:
-```typescript
-const config: DatabaseConfig = {
-  pinecone: {
-    namespace: 'valid-namespace',
-    sparseVector: {
-      indices: [1, 2, 3],
-      values: [0.1, 0.2, 0.3],
-    },
-  },
-  pgvector: {
-    minScore: 0.8,
-    ef: 100,
-    probes: 5,
-  },
-};
-```
-## Migration Guide
-Existing code will continue to work without changes. To add database-specific configurations:
-```diff
-const vectorTool = createVectorQueryTool({
-  indexName: 'my-index',
-  vectorStoreName: 'pinecone',
-  model: embedModel,
-+ databaseConfig: {
-+   pinecone: {
-+     namespace: 'my-namespace'
-+   }
-+ }
-});
-```

package/src/tools/document-chunker.ts DELETED Viewed

@@ -1,34 +0,0 @@
-import { createTool } from '@mastra/core/tools';
-import { z } from 'zod';
-import type { MDocument, ChunkParams } from '../document';
-const DEFAULT_CHUNK_PARAMS = {
-  strategy: 'recursive' as const,
-  maxSize: 512,
-  overlap: 50,
-  separators: ['\n'],
-} satisfies ChunkParams;
-export const createDocumentChunkerTool = ({
-  doc,
-  params = DEFAULT_CHUNK_PARAMS,
-}: {
-  doc: MDocument;
-  params?: ChunkParams;
-}): ReturnType<typeof createTool> => {
-  return createTool({
-    id: `Document Chunker ${params.strategy} ${params.maxSize}`,
-    inputSchema: z.object({}),
-    description: `Chunks document using ${params.strategy} strategy with maxSize ${params.maxSize} and ${
-      params.overlap || 0
-    } overlap`,
-    execute: async () => {
-      const chunks = await doc.chunk(params);
-      return {
-        chunks,
-      };
-    },
-  });
-};

package/src/tools/graph-rag.test.ts DELETED Viewed

@@ -1,115 +0,0 @@
-import { RuntimeContext } from '@mastra/core/runtime-context';
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { GraphRAG } from '../graph-rag';
-import { vectorQuerySearch } from '../utils';
-import { createGraphRAGTool } from './graph-rag';
-vi.mock('../utils', async importOriginal => {
-  const actual: any = await importOriginal();
-  return {
-    ...actual,
-    vectorQuerySearch: vi.fn().mockResolvedValue({
-      results: [
-        { metadata: { text: 'foo' }, vector: [1, 2, 3] },
-        { metadata: { text: 'bar' }, vector: [4, 5, 6] },
-      ],
-      queryEmbedding: [1, 2, 3],
-    }),
-  };
-});
-vi.mock('../graph-rag', async importOriginal => {
-  const actual: any = await importOriginal();
-  return {
-    ...actual,
-    GraphRAG: vi.fn().mockImplementation(() => {
-      return {
-        createGraph: vi.fn(),
-        query: vi.fn(() => [
-          { content: 'foo', metadata: { text: 'foo' } },
-          { content: 'bar', metadata: { text: 'bar' } },
-        ]),
-      };
-    }),
-  };
-});
-const mockModel = { name: 'test-model' } as any;
-const mockMastra = {
-  getVector: vi.fn(storeName => ({
-    [storeName]: {},
-  })),
-  getLogger: vi.fn(() => ({
-    debug: vi.fn(),
-    warn: vi.fn(),
-    info: vi.fn(),
-    error: vi.fn(),
-  })),
-};
-describe('createGraphRAGTool', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-  it('validates input schema', () => {
-    const tool = createGraphRAGTool({
-      id: 'test',
-      model: mockModel,
-      vectorStoreName: 'testStore',
-      indexName: 'testIndex',
-    });
-    expect(() => tool.inputSchema?.parse({ queryText: 'foo', topK: 10 })).not.toThrow();
-    expect(() => tool.inputSchema?.parse({})).toThrow();
-  });
-  describe('runtimeContext', () => {
-    it('calls vectorQuerySearch and GraphRAG with runtimeContext params', async () => {
-      const tool = createGraphRAGTool({
-        id: 'test',
-        model: mockModel,
-        indexName: 'testIndex',
-        vectorStoreName: 'testStore',
-      });
-      const runtimeContext = new RuntimeContext();
-      runtimeContext.set('indexName', 'anotherIndex');
-      runtimeContext.set('vectorStoreName', 'anotherStore');
-      runtimeContext.set('topK', 5);
-      runtimeContext.set('filter', { foo: 'bar' });
-      runtimeContext.set('randomWalkSteps', 99);
-      runtimeContext.set('restartProb', 0.42);
-      const result = await tool.execute({
-        context: { queryText: 'foo', topK: 2 },
-        mastra: mockMastra as any,
-        runtimeContext,
-      });
-      expect(result.relevantContext).toEqual(['foo', 'bar']);
-      expect(result.sources.length).toBe(2);
-      expect(vectorQuerySearch).toHaveBeenCalledWith(
-        expect.objectContaining({
-          indexName: 'anotherIndex',
-          vectorStore: {
-            anotherStore: {},
-          },
-          queryText: 'foo',
-          model: mockModel,
-          queryFilter: { foo: 'bar' },
-          topK: 5,
-          includeVectors: true,
-        }),
-      );
-      // GraphRAG createGraph and query should be called
-      expect(GraphRAG).toHaveBeenCalled();
-      const instance = (GraphRAG as any).mock.results[0].value;
-      expect(instance.createGraph).toHaveBeenCalled();
-      expect(instance.query).toHaveBeenCalledWith(
-        expect.objectContaining({
-          query: [1, 2, 3],
-          topK: 5,
-          randomWalkSteps: 99,
-          restartProb: 0.42,
-        }),
-      );
-    });
-  });
-});

package/src/tools/graph-rag.ts DELETED Viewed

@@ -1,157 +0,0 @@
-import { createTool } from '@mastra/core/tools';
-import { z } from 'zod';
-import { GraphRAG } from '../graph-rag';
-import { vectorQuerySearch, defaultGraphRagDescription, filterSchema, outputSchema, baseSchema } from '../utils';
-import type { RagTool } from '../utils';
-import { convertToSources } from '../utils/convert-sources';
-import type { GraphRagToolOptions } from './types';
-import { defaultGraphOptions } from './types';
-export const createGraphRAGTool = (options: GraphRagToolOptions) => {
-  const { model, id, description } = options;
-  const toolId = id || `GraphRAG ${options.vectorStoreName} ${options.indexName} Tool`;
-  const toolDescription = description || defaultGraphRagDescription();
-  const graphOptions = {
-    ...defaultGraphOptions,
-    ...(options.graphOptions || {}),
-  };
-  // Initialize GraphRAG
-  const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
-  let isInitialized = false;
-  const inputSchema = options.enableFilter ? filterSchema : z.object(baseSchema).passthrough();
-  return createTool({
-    id: toolId,
-    inputSchema,
-    outputSchema,
-    description: toolDescription,
-    execute: async ({ context, mastra, runtimeContext }) => {
-      const indexName: string = runtimeContext.get('indexName') ?? options.indexName;
-      const vectorStoreName: string = runtimeContext.get('vectorStoreName') ?? options.vectorStoreName;
-      if (!indexName) throw new Error(`indexName is required, got: ${indexName}`);
-      if (!vectorStoreName) throw new Error(`vectorStoreName is required, got: ${vectorStoreName}`);
-      const includeSources: boolean = runtimeContext.get('includeSources') ?? options.includeSources ?? true;
-      const randomWalkSteps: number | undefined = runtimeContext.get('randomWalkSteps') ?? graphOptions.randomWalkSteps;
-      const restartProb: number | undefined = runtimeContext.get('restartProb') ?? graphOptions.restartProb;
-      const topK: number = runtimeContext.get('topK') ?? context.topK ?? 10;
-      const filter: Record<string, any> = runtimeContext.get('filter') ?? context.filter;
-      const queryText = context.queryText;
-      const providerOptions: Record<string, Record<string, any>> | undefined =
-        runtimeContext.get('providerOptions') ?? options.providerOptions;
-      const enableFilter = !!runtimeContext.get('filter') || (options.enableFilter ?? false);
-      const logger = mastra?.getLogger();
-      if (!logger) {
-        console.warn(
-          '[GraphRAGTool] Logger not initialized: no debug or error logs will be recorded for this tool execution.',
-        );
-      }
-      if (logger) {
-        logger.debug('[GraphRAGTool] execute called with:', { queryText, topK, filter });
-      }
-      try {
-        const topKValue =
-          typeof topK === 'number' && !isNaN(topK)
-            ? topK
-            : typeof topK === 'string' && !isNaN(Number(topK))
-              ? Number(topK)
-              : 10;
-        const vectorStore = mastra?.getVector(vectorStoreName);
-        if (!vectorStore) {
-          if (logger) {
-            logger.error('Vector store not found', { vectorStoreName });
-          }
-          return { relevantContext: [], sources: [] };
-        }
-        let queryFilter = {};
-        if (enableFilter) {
-          queryFilter = (() => {
-            try {
-              return typeof filter === 'string' ? JSON.parse(filter) : filter;
-            } catch (error) {
-              // Log the error and use empty object
-              if (logger) {
-                logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
-              }
-              return {};
-            }
-          })();
-        }
-        if (logger) {
-          logger.debug('Prepared vector query parameters:', { queryFilter, topK: topKValue });
-        }
-        const { results, queryEmbedding } = await vectorQuerySearch({
-          indexName,
-          vectorStore,
-          queryText,
-          model,
-          queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
-          topK: topKValue,
-          includeVectors: true,
-          providerOptions,
-        });
-        if (logger) {
-          logger.debug('vectorQuerySearch returned results', { count: results.length });
-        }
-        // Initialize graph if not done yet
-        if (!isInitialized) {
-          // Get all chunks and embeddings for graph construction
-          const chunks = results.map(result => ({
-            text: result?.metadata?.text,
-            metadata: result.metadata ?? {},
-          }));
-          const embeddings = results.map(result => ({
-            vector: result.vector || [],
-          }));
-          if (logger) {
-            logger.debug('Initializing graph', { chunkCount: chunks.length, embeddingCount: embeddings.length });
-          }
-          graphRag.createGraph(chunks, embeddings);
-          isInitialized = true;
-        } else if (logger) {
-          logger.debug('Graph already initialized, skipping graph construction');
-        }
-        // Get reranked results using GraphRAG
-        const rerankedResults = graphRag.query({
-          query: queryEmbedding,
-          topK: topKValue,
-          randomWalkSteps,
-          restartProb,
-        });
-        if (logger) {
-          logger.debug('GraphRAG query returned results', { count: rerankedResults.length });
-        }
-        // Extract and combine relevant chunks
-        const relevantChunks = rerankedResults.map(result => result.content);
-        if (logger) {
-          logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
-        }
-        // `sources` exposes the full retrieval objects
-        const sources = includeSources ? convertToSources(rerankedResults) : [];
-        return {
-          relevantContext: relevantChunks,
-          sources,
-        };
-      } catch (err) {
-        if (logger) {
-          logger.error('Unexpected error in VectorQueryTool execute', {
-            error: err,
-            errorMessage: err instanceof Error ? err.message : String(err),
-            errorStack: err instanceof Error ? err.stack : undefined,
-          });
-        }
-        return { relevantContext: [], sources: [] };
-      }
-    },
-    // Use any for output schema as the structure of the output causes type inference issues
-  }) as RagTool<typeof inputSchema, any>;
-};

package/src/tools/index.ts DELETED Viewed

@@ -1,3 +0,0 @@
-export * from './document-chunker';
-export * from './graph-rag';
-export * from './vector-query';