npm - @code-rag/core - Versions diffs - 0.1.7 → 0.1.8 - Mend

@code-rag/core 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/api-contracts/index.d.ts +2 -0
package/dist/api-contracts/index.js +5 -0
package/dist/api-contracts/viewer-contracts.d.ts +181 -0
package/dist/api-contracts/viewer-contracts.js +124 -0
package/dist/benchmarks/benchmark-evaluator.d.ts +84 -0
package/dist/benchmarks/benchmark-evaluator.js +220 -0
package/dist/benchmarks/index-scanner.d.ts +54 -0
package/dist/benchmarks/index-scanner.js +135 -0
package/dist/benchmarks/index.d.ts +6 -0
package/dist/benchmarks/index.js +4 -0
package/dist/benchmarks/query-generator.d.ts +68 -0
package/dist/benchmarks/query-generator.js +205 -0
package/dist/config/config-parser.js +27 -14
package/dist/embedding/hybrid-search.js +15 -8
package/dist/embedding/lancedb-store.d.ts +13 -0
package/dist/embedding/lancedb-store.js +106 -12
package/dist/index.d.ts +8 -1
package/dist/index.js +5 -0
package/dist/retrieval/context-expander.d.ts +4 -2
package/dist/retrieval/context-expander.js +2 -2
package/dist/retrieval/index.d.ts +1 -1
package/dist/runtime.d.ts +37 -0
package/dist/runtime.js +170 -0
package/dist/utils/safe-cast.d.ts +32 -0
package/dist/utils/safe-cast.js +76 -0
package/package.json +5 -1

package/dist/benchmarks/index-scanner.js ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Scans an existing CodeRAG index (LanceDB) to extract entity information
+ * used for auto-generating benchmark queries with ground truth.
+ *
+ * All functions are pure where possible, taking data as input rather than
+ * connecting to stores directly.
+ */
+import { ok, err } from 'neverthrow';
+import { safeString, safeArray } from '../utils/safe-cast.js';
+export class IndexScanError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = 'IndexScanError';
+    }
+}
+const CHUNK_TYPES = [
+    'function', 'method', 'class', 'module', 'interface',
+    'type_alias', 'config_block', 'import_block', 'doc',
+];
+function toChunkType(value) {
+    const found = CHUNK_TYPES.find((t) => t === value);
+    return found ?? 'function';
+}
+function toStringArray(value) {
+    const arr = safeArray(value, []);
+    return arr.filter((item) => typeof item === 'string');
+}
+/**
+ * Convert raw index rows (from LanceDBStore.getAll()) into ScannedEntity objects.
+ * This is a pure function that operates on already-fetched data.
+ */
+export function parseIndexRows(rows) {
+    try {
+        const entities = [];
+        const entityMap = new Map();
+        const nameToChunkIds = new Map();
+        const fileToChunkIds = new Map();
+        for (const row of rows) {
+            const meta = row.metadata;
+            const name = safeString(meta['name'], '');
+            const chunkType = toChunkType(safeString(meta['chunk_type'], 'function'));
+            const filePath = safeString(meta['file_path'], '');
+            const language = safeString(meta['language'], 'unknown');
+            const nlSummary = safeString(meta['nl_summary'], '');
+            const imports = toStringArray(meta['imports']);
+            const exports = toStringArray(meta['exports']);
+            const declarations = toStringArray(meta['declarations']);
+            const entity = {
+                chunkId: row.id,
+                name,
+                chunkType,
+                filePath,
+                language,
+                nlSummary,
+                imports,
+                exports,
+                declarations,
+            };
+            entities.push(entity);
+            entityMap.set(row.id, entity);
+            // Index by name (skip empty names)
+            if (name.length > 0) {
+                const existing = nameToChunkIds.get(name);
+                if (existing) {
+                    existing.push(row.id);
+                }
+                else {
+                    nameToChunkIds.set(name, [row.id]);
+                }
+            }
+            // Index by file path
+            if (filePath.length > 0) {
+                const existing = fileToChunkIds.get(filePath);
+                if (existing) {
+                    existing.push(row.id);
+                }
+                else {
+                    fileToChunkIds.set(filePath, [row.id]);
+                }
+            }
+        }
+        return ok({
+            entities,
+            totalChunks: entities.length,
+            entityMap,
+            nameToChunkIds,
+            fileToChunkIds,
+        });
+    }
+    catch (error) {
+        const message = error instanceof Error ? error.message : 'Unknown error';
+        return err(new IndexScanError(`Failed to parse index rows: ${message}`));
+    }
+}
+/**
+ * Build a caller map from graph edges.
+ * Maps target chunkId to source chunkIds that reference it.
+ */
+export function buildCallerMap(edges) {
+    const callerMap = new Map();
+    for (const edge of edges) {
+        if (edge.type === 'calls' || edge.type === 'references' || edge.type === 'imports') {
+            const existing = callerMap.get(edge.target);
+            if (existing) {
+                existing.push(edge.source);
+            }
+            else {
+                callerMap.set(edge.target, [edge.source]);
+            }
+        }
+    }
+    return callerMap;
+}
+/**
+ * Build a test file map: maps source file paths to test file chunk IDs.
+ * Heuristic: a file at `foo.test.ts` or `foo.spec.ts` is the test for `foo.ts`.
+ */
+export function buildTestMap(fileToChunkIds) {
+    const testMap = new Map();
+    for (const [filePath, chunkIds] of fileToChunkIds) {
+        const isTestFile = /\.(test|spec)\.[^.]+$/.test(filePath);
+        if (isTestFile) {
+            // Derive the source file path
+            const sourceFilePath = filePath.replace(/\.(test|spec)\./, '.');
+            const existing = testMap.get(sourceFilePath);
+            if (existing) {
+                existing.push(...chunkIds);
+            }
+            else {
+                testMap.set(sourceFilePath, [...chunkIds]);
+            }
+        }
+    }
+    return testMap;
+}

package/dist/benchmarks/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export type { ScannedEntity, IndexScanResult, } from './index-scanner.js';
+export { IndexScanError, parseIndexRows, buildCallerMap, buildTestMap, } from './index-scanner.js';
+export type { BenchmarkQueryType, GeneratedQuery, QueryGeneratorOptions, } from './query-generator.js';
+export { generateQueries, generateFindByNameQueries, generateFindByDescriptionQueries, generateFindCallersQueries, generateFindTestsQueries, generateFindImportsQueries, } from './query-generator.js';
+export type { QueryEvalResult, QueryMetrics, AggregateEvalMetrics, QueryTypeBreakdown, BenchmarkReport, BenchmarkMetadata, SearchFn, BenchmarkProgressFn, } from './benchmark-evaluator.js';
+export { BenchmarkEvalError, computeQueryMetrics, computeAggregateMetrics, computeQueryTypeBreakdown, runBenchmark, formatBenchmarkSummary, } from './benchmark-evaluator.js';

package/dist/benchmarks/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+// Auto-generated benchmark module — barrel export
+export { IndexScanError, parseIndexRows, buildCallerMap, buildTestMap, } from './index-scanner.js';
+export { generateQueries, generateFindByNameQueries, generateFindByDescriptionQueries, generateFindCallersQueries, generateFindTestsQueries, generateFindImportsQueries, } from './query-generator.js';
+export { BenchmarkEvalError, computeQueryMetrics, computeAggregateMetrics, computeQueryTypeBreakdown, runBenchmark, formatBenchmarkSummary, } from './benchmark-evaluator.js';

package/dist/benchmarks/query-generator.d.ts ADDED Viewed

@@ -0,0 +1,68 @@
+/**
+ * Auto-generates benchmark queries with ground-truth from scanned index data.
+ *
+ * Query types:
+ * - find-by-name: "Where is the X function/class/interface?"
+ * - find-by-description: Uses NL summary as query text
+ * - find-callers: "What calls X?" or "What references X?"
+ * - find-tests: "Tests for X" or "Test file for foo.ts"
+ * - find-imports: "What does X import?" or "What imports X?"
+ *
+ * Each query has a ground-truth set of expected chunk IDs.
+ */
+import type { ScannedEntity, IndexScanResult } from './index-scanner.js';
+import type { GraphEdge } from '../graph/dependency-graph.js';
+/** The type of benchmark query generated. */
+export type BenchmarkQueryType = 'find-by-name' | 'find-by-description' | 'find-callers' | 'find-tests' | 'find-imports';
+/** A single auto-generated benchmark query with ground truth. */
+export interface GeneratedQuery {
+    readonly query: string;
+    readonly expectedChunkIds: readonly string[];
+    readonly queryType: BenchmarkQueryType;
+    /** Source entity that inspired this query. */
+    readonly sourceEntityId: string;
+}
+/** Options for query generation. */
+export interface QueryGeneratorOptions {
+    /** Total number of queries to generate (default: 100). */
+    readonly maxQueries: number;
+    /** Distribution of query types as fractions (must sum to 1.0). */
+    readonly distribution?: Readonly<Record<BenchmarkQueryType, number>>;
+}
+/**
+ * Generate find-by-name queries.
+ * Query: "Where is the <name> <type>?" or "<name> <type>"
+ * Ground truth: chunk ID of the entity itself.
+ */
+export declare function generateFindByNameQueries(entities: readonly ScannedEntity[], count: number, random: () => number): GeneratedQuery[];
+/**
+ * Generate find-by-description queries.
+ * Query: The NL summary of the entity.
+ * Ground truth: chunk ID of the entity itself.
+ */
+export declare function generateFindByDescriptionQueries(entities: readonly ScannedEntity[], count: number, random: () => number): GeneratedQuery[];
+/**
+ * Generate find-callers queries.
+ * Query: "What calls <name>?" or "callers of <name>"
+ * Ground truth: chunk IDs of callers from the dependency graph.
+ */
+export declare function generateFindCallersQueries(entities: readonly ScannedEntity[], callerMap: ReadonlyMap<string, readonly string[]>, count: number, random: () => number): GeneratedQuery[];
+/**
+ * Generate find-tests queries.
+ * Query: "tests for <name>" or "test file for <filePath>"
+ * Ground truth: chunk IDs in the corresponding test file.
+ */
+export declare function generateFindTestsQueries(entities: readonly ScannedEntity[], testMap: ReadonlyMap<string, readonly string[]>, count: number, random: () => number): GeneratedQuery[];
+/**
+ * Generate find-imports queries.
+ * Query: "imports of <name>" or "what does <name> import"
+ * Ground truth: chunk IDs of imported modules (resolved via name map).
+ */
+export declare function generateFindImportsQueries(entities: readonly ScannedEntity[], edges: readonly GraphEdge[], count: number, random: () => number): GeneratedQuery[];
+/**
+ * Generate a complete benchmark dataset from scanned index data.
+ *
+ * Distributes queries across types according to the configured distribution,
+ * using a deterministic seeded RNG for reproducibility.
+ */
+export declare function generateQueries(scanResult: IndexScanResult, edges: readonly GraphEdge[], callerMap: ReadonlyMap<string, readonly string[]>, testMap: ReadonlyMap<string, readonly string[]>, options: QueryGeneratorOptions, seed?: number): readonly GeneratedQuery[];

package/dist/benchmarks/query-generator.js ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * Auto-generates benchmark queries with ground-truth from scanned index data.
+ *
+ * Query types:
+ * - find-by-name: "Where is the X function/class/interface?"
+ * - find-by-description: Uses NL summary as query text
+ * - find-callers: "What calls X?" or "What references X?"
+ * - find-tests: "Tests for X" or "Test file for foo.ts"
+ * - find-imports: "What does X import?" or "What imports X?"
+ *
+ * Each query has a ground-truth set of expected chunk IDs.
+ */
+const DEFAULT_DISTRIBUTION = {
+    'find-by-name': 0.30,
+    'find-by-description': 0.25,
+    'find-callers': 0.15,
+    'find-tests': 0.15,
+    'find-imports': 0.15,
+};
+/** Types eligible for name-based queries (skip import_block, config_block). */
+const NAME_QUERY_TYPES = new Set([
+    'function', 'method', 'class', 'interface', 'type_alias', 'module',
+]);
+/**
+ * Deterministic seeded pseudo-random number generator (mulberry32).
+ * Allows reproducible benchmark datasets.
+ */
+function createSeededRandom(seed) {
+    let state = seed | 0;
+    return () => {
+        state = (state + 0x6D2B79F5) | 0;
+        let t = Math.imul(state ^ (state >>> 15), 1 | state);
+        t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+        return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+    };
+}
+/**
+ * Shuffle an array deterministically using Fisher-Yates with seeded RNG.
+ */
+function shuffleDeterministic(items, random) {
+    const result = [...items];
+    for (let i = result.length - 1; i > 0; i--) {
+        const j = Math.floor(random() * (i + 1));
+        const temp = result[i];
+        result[i] = result[j];
+        result[j] = temp;
+    }
+    return result;
+}
+/**
+ * Generate find-by-name queries.
+ * Query: "Where is the <name> <type>?" or "<name> <type>"
+ * Ground truth: chunk ID of the entity itself.
+ */
+export function generateFindByNameQueries(entities, count, random) {
+    const eligible = entities.filter((e) => e.name.length > 0 && NAME_QUERY_TYPES.has(e.chunkType));
+    if (eligible.length === 0)
+        return [];
+    const shuffled = shuffleDeterministic(eligible, random);
+    const selected = shuffled.slice(0, count);
+    return selected.map((entity) => {
+        const typeLabel = entity.chunkType === 'type_alias' ? 'type' : entity.chunkType;
+        return {
+            query: `${entity.name} ${typeLabel}`,
+            expectedChunkIds: [entity.chunkId],
+            queryType: 'find-by-name',
+            sourceEntityId: entity.chunkId,
+        };
+    });
+}
+/**
+ * Generate find-by-description queries.
+ * Query: The NL summary of the entity.
+ * Ground truth: chunk ID of the entity itself.
+ */
+export function generateFindByDescriptionQueries(entities, count, random) {
+    const eligible = entities.filter((e) => e.nlSummary.length > 20 && NAME_QUERY_TYPES.has(e.chunkType));
+    if (eligible.length === 0)
+        return [];
+    const shuffled = shuffleDeterministic(eligible, random);
+    const selected = shuffled.slice(0, count);
+    return selected.map((entity) => ({
+        query: entity.nlSummary,
+        expectedChunkIds: [entity.chunkId],
+        queryType: 'find-by-description',
+        sourceEntityId: entity.chunkId,
+    }));
+}
+/**
+ * Generate find-callers queries.
+ * Query: "What calls <name>?" or "callers of <name>"
+ * Ground truth: chunk IDs of callers from the dependency graph.
+ */
+export function generateFindCallersQueries(entities, callerMap, count, random) {
+    // Only generate queries for entities that actually have callers
+    const eligible = entities.filter((e) => e.name.length > 0 && (callerMap.get(e.chunkId)?.length ?? 0) > 0);
+    if (eligible.length === 0)
+        return [];
+    const shuffled = shuffleDeterministic(eligible, random);
+    const selected = shuffled.slice(0, count);
+    return selected.map((entity) => {
+        const callers = callerMap.get(entity.chunkId) ?? [];
+        return {
+            query: `callers of ${entity.name}`,
+            expectedChunkIds: [...callers, entity.chunkId],
+            queryType: 'find-callers',
+            sourceEntityId: entity.chunkId,
+        };
+    });
+}
+/**
+ * Generate find-tests queries.
+ * Query: "tests for <name>" or "test file for <filePath>"
+ * Ground truth: chunk IDs in the corresponding test file.
+ */
+export function generateFindTestsQueries(entities, testMap, count, random) {
+    // Only generate for entities whose file has a test file
+    const eligible = entities.filter((e) => e.name.length > 0 &&
+        NAME_QUERY_TYPES.has(e.chunkType) &&
+        (testMap.get(e.filePath)?.length ?? 0) > 0);
+    if (eligible.length === 0)
+        return [];
+    const shuffled = shuffleDeterministic(eligible, random);
+    const selected = shuffled.slice(0, count);
+    return selected.map((entity) => {
+        const testChunkIds = testMap.get(entity.filePath) ?? [];
+        return {
+            query: `tests for ${entity.name}`,
+            expectedChunkIds: [...testChunkIds, entity.chunkId],
+            queryType: 'find-tests',
+            sourceEntityId: entity.chunkId,
+        };
+    });
+}
+/**
+ * Generate find-imports queries.
+ * Query: "imports of <name>" or "what does <name> import"
+ * Ground truth: chunk IDs of imported modules (resolved via name map).
+ */
+export function generateFindImportsQueries(entities, edges, count, random) {
+    // Build import targets from graph edges
+    const importTargets = new Map();
+    for (const edge of edges) {
+        if (edge.type === 'imports') {
+            const existing = importTargets.get(edge.source);
+            if (existing) {
+                existing.push(edge.target);
+            }
+            else {
+                importTargets.set(edge.source, [edge.target]);
+            }
+        }
+    }
+    const eligible = entities.filter((e) => e.name.length > 0 &&
+        NAME_QUERY_TYPES.has(e.chunkType) &&
+        (importTargets.get(e.chunkId)?.length ?? 0) > 0);
+    if (eligible.length === 0)
+        return [];
+    const shuffled = shuffleDeterministic(eligible, random);
+    const selected = shuffled.slice(0, count);
+    return selected.map((entity) => {
+        const targets = importTargets.get(entity.chunkId) ?? [];
+        return {
+            query: `imports of ${entity.name}`,
+            expectedChunkIds: [...targets, entity.chunkId],
+            queryType: 'find-imports',
+            sourceEntityId: entity.chunkId,
+        };
+    });
+}
+/**
+ * Generate a complete benchmark dataset from scanned index data.
+ *
+ * Distributes queries across types according to the configured distribution,
+ * using a deterministic seeded RNG for reproducibility.
+ */
+export function generateQueries(scanResult, edges, callerMap, testMap, options, seed = 42) {
+    const random = createSeededRandom(seed);
+    const distribution = options.distribution ?? DEFAULT_DISTRIBUTION;
+    const maxQueries = options.maxQueries;
+    // Calculate target counts per type
+    const targetCounts = {
+        'find-by-name': Math.round(maxQueries * distribution['find-by-name']),
+        'find-by-description': Math.round(maxQueries * distribution['find-by-description']),
+        'find-callers': Math.round(maxQueries * distribution['find-callers']),
+        'find-tests': Math.round(maxQueries * distribution['find-tests']),
+        'find-imports': Math.round(maxQueries * distribution['find-imports']),
+    };
+    const { entities } = scanResult;
+    const nameQueries = generateFindByNameQueries(entities, targetCounts['find-by-name'], random);
+    const descQueries = generateFindByDescriptionQueries(entities, targetCounts['find-by-description'], random);
+    const callerQueries = generateFindCallersQueries(entities, callerMap, targetCounts['find-callers'], random);
+    const testQueries = generateFindTestsQueries(entities, testMap, targetCounts['find-tests'], random);
+    const importQueries = generateFindImportsQueries(entities, edges, targetCounts['find-imports'], random);
+    // Combine and trim to maxQueries
+    const allQueries = [
+        ...nameQueries,
+        ...descQueries,
+        ...callerQueries,
+        ...testQueries,
+        ...importQueries,
+    ];
+    // Shuffle the combined set for fair evaluation
+    return shuffleDeterministic(allQueries, random).slice(0, maxQueries);
+}

package/dist/config/config-parser.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { join } from 'node:path';
 import { ok, err } from 'neverthrow';
 import { parse } from 'yaml';
 import { z } from 'zod';
+import { safeString, safeRecord } from '../utils/safe-cast.js';
 export class ConfigError extends Error {
     constructor(message) {
         super(message);
@@ -156,8 +157,10 @@ export function interpolateEnvVars(obj) {
         return result;
     }
     if (obj !== null && typeof obj === 'object') {
+        // Runtime guard above ensures obj is a non-null object (not an array — handled earlier)
+        const record = safeRecord(obj, {});
         const result = {};
-        for (const [key, value] of Object.entries(obj)) {
+        for (const [key, value] of Object.entries(record)) {
             const interpolated = interpolateEnvVars(value);
             if (interpolated instanceof ConfigError)
                 return interpolated;
@@ -181,8 +184,10 @@ function normalizeEmbeddingConfig(embeddingPartial) {
     const defaults = { ...DEFAULT_CONFIG.embedding };
     const merged = { ...defaults, ...embeddingPartial };
     // Support snake_case key from YAML: openai_compatible -> openaiCompatible
-    const openaiCompat = merged['openaiCompatible'] ??
-        merged['openai_compatible'];
+    const openaiCompatRaw = merged['openaiCompatible'] ?? merged['openai_compatible'];
+    const openaiCompat = openaiCompatRaw !== undefined && openaiCompatRaw !== null
+        ? safeRecord(openaiCompatRaw, {})
+        : undefined;
     // Remove the snake_case variant so only the camelCase one remains
     delete merged['openai_compatible'];
     if (openaiCompat) {
@@ -208,35 +213,42 @@ function normalizeEmbeddingConfig(embeddingPartial) {
     }
     return merged;
 }
+/** Extract a sub-record from a config object, returning undefined if not a valid record. */
+function optionalRecord(value) {
+    if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
+        return safeRecord(value);
+    }
+    return undefined;
+}
 function applyDefaults(partial) {
     return {
-        version: partial['version'] ?? DEFAULT_CONFIG.version,
+        version: safeString(partial['version'], DEFAULT_CONFIG.version),
         project: {
             ...DEFAULT_CONFIG.project,
-            ...partial['project'],
+            ...optionalRecord(partial['project']),
         },
         ingestion: {
             ...DEFAULT_CONFIG.ingestion,
-            ...partial['ingestion'],
+            ...optionalRecord(partial['ingestion']),
         },
-        embedding: normalizeEmbeddingConfig(partial['embedding']),
+        embedding: normalizeEmbeddingConfig(optionalRecord(partial['embedding'])),
         llm: {
             ...DEFAULT_CONFIG.llm,
-            ...partial['llm'],
+            ...optionalRecord(partial['llm']),
         },
         search: {
             ...DEFAULT_CONFIG.search,
-            ...partial['search'],
+            ...optionalRecord(partial['search']),
         },
         storage: {
             ...DEFAULT_CONFIG.storage,
-            ...partial['storage'],
+            ...optionalRecord(partial['storage']),
         },
         ...(partial['reranker'] !== undefined
             ? {
                 reranker: {
                     ...DEFAULT_CONFIG.reranker,
-                    ...partial['reranker'],
+                    ...optionalRecord(partial['reranker']),
                 },
             }
             : {}),
@@ -256,7 +268,7 @@ function deepMerge(target, source) {
         const sv = source[key];
         const tv = target[key];
         if (sv !== null && typeof sv === 'object' && !Array.isArray(sv) && tv !== null && typeof tv === 'object' && !Array.isArray(tv)) {
-            result[key] = deepMerge(tv, sv);
+            result[key] = deepMerge(safeRecord(tv), safeRecord(sv));
         }
         else {
             result[key] = sv;
@@ -290,7 +302,7 @@ export async function loadConfig(rootDir) {
         const localContent = await readFile(localPath, 'utf-8');
         const localParsed = parse(localContent);
         if (localParsed !== null && localParsed !== undefined && typeof localParsed === 'object') {
-            parsed = deepMerge(parsed, localParsed);
+            parsed = deepMerge(safeRecord(parsed, {}), safeRecord(localParsed, {}));
         }
     }
     catch {
@@ -301,10 +313,11 @@ export async function loadConfig(rootDir) {
     if (interpolated instanceof ConfigError) {
         return err(interpolated);
     }
-    const withDefaults = applyDefaults(interpolated);
+    const withDefaults = applyDefaults(safeRecord(interpolated, {}));
     const validationResult = codeRAGConfigSchema.safeParse(withDefaults);
     if (!validationResult.success) {
         return err(new ConfigError(`Config validation failed: ${formatZodErrors(validationResult.error)}`));
     }
+    // eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Zod schema structurally matches CodeRAGConfig; safeParse validates all fields
     return ok(validationResult.data);
 }

package/dist/embedding/hybrid-search.js CHANGED Viewed

@@ -1,6 +1,11 @@
 import { ok, err } from 'neverthrow';
 import { EmbedError } from '../types/provider.js';
+import { safeString, safeStringUnion } from '../utils/safe-cast.js';
 const RRF_K = 60;
+const CHUNK_TYPES = [
+    'function', 'method', 'class', 'module', 'interface',
+    'type_alias', 'config_block', 'import_block', 'doc',
+];
 export class HybridSearch {
     vectorStore;
     bm25Index;
@@ -86,10 +91,12 @@ export class HybridSearch {
             else {
                 // Vector-only hit: hydrate from vector store metadata
                 const meta = vectorMetadataMap.get(chunkId) ?? {};
-                const storedName = meta['name'] ?? '';
-                const storedChunkType = meta['chunk_type'] ?? 'function';
-                const storedFilePath = meta['file_path'] ?? '';
-                const storedLanguage = meta['language'] ?? 'unknown';
+                const storedName = safeString(meta['name'], '');
+                const storedChunkType = safeStringUnion(meta['chunk_type'], CHUNK_TYPES, 'function');
+                const storedFilePath = safeString(meta['file_path'], '');
+                const storedLanguage = safeString(meta['language'], 'unknown');
+                const storedContent = safeString(meta['content'], '');
+                const storedNlSummary = safeString(meta['nl_summary'], '');
                 const chunkMetadata = {
                     chunkType: storedChunkType,
                     name: storedName,
@@ -99,15 +106,15 @@ export class HybridSearch {
                 };
                 merged.push({
                     chunkId,
-                    content: meta['content'] ?? '',
-                    nlSummary: meta['nl_summary'] ?? '',
+                    content: storedContent,
+                    nlSummary: storedNlSummary,
                     score: fusedScore,
                     method: 'hybrid',
                     metadata: chunkMetadata,
                     chunk: {
                         id: chunkId,
-                        content: meta['content'] ?? '',
-                        nlSummary: meta['nl_summary'] ?? '',
+                        content: storedContent,
+                        nlSummary: storedNlSummary,
                         filePath: storedFilePath,
                         startLine: 0,
                         endLine: 0,

package/dist/embedding/lancedb-store.d.ts CHANGED Viewed

@@ -17,5 +17,18 @@ export declare class LanceDBStore implements VectorStore {
     }[], StoreError>>;
     delete(ids: string[]): Promise<Result<void, StoreError>>;
     count(): Promise<Result<number, StoreError>>;
+    getById(id: string): Promise<Result<{
+        id: string;
+        metadata: Record<string, unknown>;
+    } | undefined, StoreError>>;
+    /**
+     * Scan all rows from the table.
+     * Returns an array of { id, metadata } objects (no vectors).
+     * Useful for index analysis and benchmark generation.
+     */
+    getAll(limit?: number): Promise<Result<{
+        id: string;
+        metadata: Record<string, unknown>;
+    }[], StoreError>>;
     close(): void;
 }