@code-rag/core 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ export { PaginationMetaSchema, ChunkSummarySchema, ChunkDetailSchema, GraphNodeSchema, GraphEdgeSchema, ViewerSearchResultSchema, EmbeddingPointSchema, ViewerStatsResponseSchema, ViewerChunksResponseSchema, ViewerChunkDetailResponseSchema, ViewerSearchResponseSchema, ViewerGraphResponseSchema, ViewerEmbeddingsResponseSchema, } from './viewer-contracts.js';
2
+ export type { PaginationMeta as ViewerPaginationMeta, ChunkSummary as ViewerChunkSummary, ChunkDetail as ViewerChunkDetail, ViewerGraphNode, ViewerGraphEdge, ViewerSearchResult as ViewerSearchResultType, EmbeddingPoint as ViewerEmbeddingPoint, ViewerStatsResponse, ViewerChunksResponse, ViewerChunkDetailResponse, ViewerSearchResponse, ViewerGraphResponse, ViewerEmbeddingsResponse, } from './viewer-contracts.js';
@@ -0,0 +1,5 @@
1
+ export {
2
+ // Sub-schemas
3
+ PaginationMetaSchema, ChunkSummarySchema, ChunkDetailSchema, GraphNodeSchema, GraphEdgeSchema, ViewerSearchResultSchema, EmbeddingPointSchema,
4
+ // Endpoint response schemas
5
+ ViewerStatsResponseSchema, ViewerChunksResponseSchema, ViewerChunkDetailResponseSchema, ViewerSearchResponseSchema, ViewerGraphResponseSchema, ViewerEmbeddingsResponseSchema, } from './viewer-contracts.js';
@@ -0,0 +1,181 @@
1
+ /**
2
+ * Shared Zod schemas for the Viewer REST API contract.
3
+ *
4
+ * These schemas define the exact JSON shape that the api-server sends
5
+ * and the viewer client receives. Both packages import these schemas so
6
+ * that any drift between server responses and client expectations is
7
+ * caught at compile time (type mismatch) or runtime (schema.parse()).
8
+ *
9
+ * Every schema here models the **wire format** including the `{ data }` envelope.
10
+ */
11
+ import { z } from 'zod';
12
+ /** Pagination metadata returned alongside list endpoints. */
13
+ export declare const PaginationMetaSchema: z.ZodObject<{
14
+ page: z.ZodNumber;
15
+ pageSize: z.ZodNumber;
16
+ total: z.ZodNumber;
17
+ totalPages: z.ZodNumber;
18
+ }, z.core.$strip>;
19
+ /** A single chunk summary as returned by GET /chunks. */
20
+ export declare const ChunkSummarySchema: z.ZodObject<{
21
+ id: z.ZodString;
22
+ filePath: z.ZodString;
23
+ chunkType: z.ZodString;
24
+ name: z.ZodString;
25
+ language: z.ZodString;
26
+ startLine: z.ZodNumber;
27
+ endLine: z.ZodNumber;
28
+ contentPreview: z.ZodString;
29
+ }, z.core.$strip>;
30
+ /** Full chunk detail as returned by GET /chunks/:id. */
31
+ export declare const ChunkDetailSchema: z.ZodObject<{
32
+ id: z.ZodString;
33
+ filePath: z.ZodString;
34
+ chunkType: z.ZodString;
35
+ name: z.ZodString;
36
+ language: z.ZodString;
37
+ startLine: z.ZodNumber;
38
+ endLine: z.ZodNumber;
39
+ content: z.ZodString;
40
+ nlSummary: z.ZodString;
41
+ metadata: z.ZodRecord<z.ZodString, z.ZodUnknown>;
42
+ vector: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
43
+ }, z.core.$strip>;
44
+ /** Graph node as returned by the dependency-graph module.
45
+ * Uses z.string() for `type` because the actual types in practice extend beyond
46
+ * the core GraphNode union (e.g., 'interface' from AST chunker). TypeScript
47
+ * constrains the allowed values at compile time; Zod validates the wire shape. */
48
+ export declare const GraphNodeSchema: z.ZodObject<{
49
+ id: z.ZodString;
50
+ filePath: z.ZodString;
51
+ symbols: z.ZodArray<z.ZodString>;
52
+ type: z.ZodString;
53
+ }, z.core.$strip>;
54
+ /** Graph edge as returned by the dependency-graph module.
55
+ * Uses z.string() for `type` for the same forward-compatibility reason. */
56
+ export declare const GraphEdgeSchema: z.ZodObject<{
57
+ source: z.ZodString;
58
+ target: z.ZodString;
59
+ type: z.ZodString;
60
+ }, z.core.$strip>;
61
+ /** A single search result as returned by GET /search. */
62
+ export declare const ViewerSearchResultSchema: z.ZodObject<{
63
+ chunkId: z.ZodString;
64
+ filePath: z.ZodString;
65
+ chunkType: z.ZodString;
66
+ name: z.ZodString;
67
+ content: z.ZodString;
68
+ nlSummary: z.ZodString;
69
+ score: z.ZodNumber;
70
+ method: z.ZodString;
71
+ }, z.core.$strip>;
72
+ /** A single embedding point as returned by GET /embeddings. */
73
+ export declare const EmbeddingPointSchema: z.ZodObject<{
74
+ id: z.ZodString;
75
+ filePath: z.ZodString;
76
+ chunkType: z.ZodString;
77
+ language: z.ZodString;
78
+ vector: z.ZodArray<z.ZodNumber>;
79
+ }, z.core.$strip>;
80
+ /** GET /api/v1/viewer/stats */
81
+ export declare const ViewerStatsResponseSchema: z.ZodObject<{
82
+ data: z.ZodObject<{
83
+ chunkCount: z.ZodNumber;
84
+ fileCount: z.ZodNumber;
85
+ languages: z.ZodRecord<z.ZodString, z.ZodNumber>;
86
+ storageBytes: z.ZodNullable<z.ZodNumber>;
87
+ lastIndexed: z.ZodNullable<z.ZodString>;
88
+ }, z.core.$strip>;
89
+ }, z.core.$strip>;
90
+ /** GET /api/v1/viewer/chunks */
91
+ export declare const ViewerChunksResponseSchema: z.ZodObject<{
92
+ data: z.ZodArray<z.ZodObject<{
93
+ id: z.ZodString;
94
+ filePath: z.ZodString;
95
+ chunkType: z.ZodString;
96
+ name: z.ZodString;
97
+ language: z.ZodString;
98
+ startLine: z.ZodNumber;
99
+ endLine: z.ZodNumber;
100
+ contentPreview: z.ZodString;
101
+ }, z.core.$strip>>;
102
+ meta: z.ZodObject<{
103
+ page: z.ZodNumber;
104
+ pageSize: z.ZodNumber;
105
+ total: z.ZodNumber;
106
+ totalPages: z.ZodNumber;
107
+ }, z.core.$strip>;
108
+ }, z.core.$strip>;
109
+ /** GET /api/v1/viewer/chunks/:id */
110
+ export declare const ViewerChunkDetailResponseSchema: z.ZodObject<{
111
+ data: z.ZodObject<{
112
+ id: z.ZodString;
113
+ filePath: z.ZodString;
114
+ chunkType: z.ZodString;
115
+ name: z.ZodString;
116
+ language: z.ZodString;
117
+ startLine: z.ZodNumber;
118
+ endLine: z.ZodNumber;
119
+ content: z.ZodString;
120
+ nlSummary: z.ZodString;
121
+ metadata: z.ZodRecord<z.ZodString, z.ZodUnknown>;
122
+ vector: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
123
+ }, z.core.$strip>;
124
+ }, z.core.$strip>;
125
+ /** GET /api/v1/viewer/search */
126
+ export declare const ViewerSearchResponseSchema: z.ZodObject<{
127
+ data: z.ZodObject<{
128
+ results: z.ZodArray<z.ZodObject<{
129
+ chunkId: z.ZodString;
130
+ filePath: z.ZodString;
131
+ chunkType: z.ZodString;
132
+ name: z.ZodString;
133
+ content: z.ZodString;
134
+ nlSummary: z.ZodString;
135
+ score: z.ZodNumber;
136
+ method: z.ZodString;
137
+ }, z.core.$strip>>;
138
+ timing: z.ZodObject<{
139
+ totalMs: z.ZodNumber;
140
+ }, z.core.$strip>;
141
+ }, z.core.$strip>;
142
+ }, z.core.$strip>;
143
+ /** GET /api/v1/viewer/graph */
144
+ export declare const ViewerGraphResponseSchema: z.ZodObject<{
145
+ data: z.ZodObject<{
146
+ nodes: z.ZodArray<z.ZodObject<{
147
+ id: z.ZodString;
148
+ filePath: z.ZodString;
149
+ symbols: z.ZodArray<z.ZodString>;
150
+ type: z.ZodString;
151
+ }, z.core.$strip>>;
152
+ edges: z.ZodArray<z.ZodObject<{
153
+ source: z.ZodString;
154
+ target: z.ZodString;
155
+ type: z.ZodString;
156
+ }, z.core.$strip>>;
157
+ }, z.core.$strip>;
158
+ }, z.core.$strip>;
159
+ /** GET /api/v1/viewer/embeddings */
160
+ export declare const ViewerEmbeddingsResponseSchema: z.ZodObject<{
161
+ data: z.ZodArray<z.ZodObject<{
162
+ id: z.ZodString;
163
+ filePath: z.ZodString;
164
+ chunkType: z.ZodString;
165
+ language: z.ZodString;
166
+ vector: z.ZodArray<z.ZodNumber>;
167
+ }, z.core.$strip>>;
168
+ }, z.core.$strip>;
169
+ export type PaginationMeta = z.infer<typeof PaginationMetaSchema>;
170
+ export type ChunkSummary = z.infer<typeof ChunkSummarySchema>;
171
+ export type ChunkDetail = z.infer<typeof ChunkDetailSchema>;
172
+ export type ViewerGraphNode = z.infer<typeof GraphNodeSchema>;
173
+ export type ViewerGraphEdge = z.infer<typeof GraphEdgeSchema>;
174
+ export type ViewerSearchResult = z.infer<typeof ViewerSearchResultSchema>;
175
+ export type EmbeddingPoint = z.infer<typeof EmbeddingPointSchema>;
176
+ export type ViewerStatsResponse = z.infer<typeof ViewerStatsResponseSchema>;
177
+ export type ViewerChunksResponse = z.infer<typeof ViewerChunksResponseSchema>;
178
+ export type ViewerChunkDetailResponse = z.infer<typeof ViewerChunkDetailResponseSchema>;
179
+ export type ViewerSearchResponse = z.infer<typeof ViewerSearchResponseSchema>;
180
+ export type ViewerGraphResponse = z.infer<typeof ViewerGraphResponseSchema>;
181
+ export type ViewerEmbeddingsResponse = z.infer<typeof ViewerEmbeddingsResponseSchema>;
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Shared Zod schemas for the Viewer REST API contract.
3
+ *
4
+ * These schemas define the exact JSON shape that the api-server sends
5
+ * and the viewer client receives. Both packages import these schemas so
6
+ * that any drift between server responses and client expectations is
7
+ * caught at compile time (type mismatch) or runtime (schema.parse()).
8
+ *
9
+ * Every schema here models the **wire format** including the `{ data }` envelope.
10
+ */
11
+ import { z } from 'zod';
12
+ // ---------------------------------------------------------------------------
13
+ // Reusable sub-schemas
14
+ // ---------------------------------------------------------------------------
15
+ /** Pagination metadata returned alongside list endpoints. */
16
+ export const PaginationMetaSchema = z.object({
17
+ page: z.number().int(),
18
+ pageSize: z.number().int(),
19
+ total: z.number().int(),
20
+ totalPages: z.number().int(),
21
+ });
22
+ /** A single chunk summary as returned by GET /chunks. */
23
+ export const ChunkSummarySchema = z.object({
24
+ id: z.string(),
25
+ filePath: z.string(),
26
+ chunkType: z.string(),
27
+ name: z.string(),
28
+ language: z.string(),
29
+ startLine: z.number().int(),
30
+ endLine: z.number().int(),
31
+ contentPreview: z.string(),
32
+ });
33
+ /** Full chunk detail as returned by GET /chunks/:id. */
34
+ export const ChunkDetailSchema = z.object({
35
+ id: z.string(),
36
+ filePath: z.string(),
37
+ chunkType: z.string(),
38
+ name: z.string(),
39
+ language: z.string(),
40
+ startLine: z.number().int(),
41
+ endLine: z.number().int(),
42
+ content: z.string(),
43
+ nlSummary: z.string(),
44
+ metadata: z.record(z.string(), z.unknown()),
45
+ vector: z.array(z.number()).optional(),
46
+ });
47
+ /** Graph node as returned by the dependency-graph module.
48
+ * Uses z.string() for `type` because the actual types in practice extend beyond
49
+ * the core GraphNode union (e.g., 'interface' from AST chunker). TypeScript
50
+ * constrains the allowed values at compile time; Zod validates the wire shape. */
51
+ export const GraphNodeSchema = z.object({
52
+ id: z.string(),
53
+ filePath: z.string(),
54
+ symbols: z.array(z.string()),
55
+ type: z.string(),
56
+ });
57
+ /** Graph edge as returned by the dependency-graph module.
58
+ * Uses z.string() for `type` for the same forward-compatibility reason. */
59
+ export const GraphEdgeSchema = z.object({
60
+ source: z.string(),
61
+ target: z.string(),
62
+ type: z.string(),
63
+ });
64
+ /** A single search result as returned by GET /search. */
65
+ export const ViewerSearchResultSchema = z.object({
66
+ chunkId: z.string(),
67
+ filePath: z.string(),
68
+ chunkType: z.string(),
69
+ name: z.string(),
70
+ content: z.string(),
71
+ nlSummary: z.string(),
72
+ score: z.number(),
73
+ method: z.string(),
74
+ });
75
+ /** A single embedding point as returned by GET /embeddings. */
76
+ export const EmbeddingPointSchema = z.object({
77
+ id: z.string(),
78
+ filePath: z.string(),
79
+ chunkType: z.string(),
80
+ language: z.string(),
81
+ vector: z.array(z.number()),
82
+ });
83
+ // ---------------------------------------------------------------------------
84
+ // Endpoint response schemas (full wire shape including envelope)
85
+ // ---------------------------------------------------------------------------
86
+ /** GET /api/v1/viewer/stats */
87
+ export const ViewerStatsResponseSchema = z.object({
88
+ data: z.object({
89
+ chunkCount: z.number().int(),
90
+ fileCount: z.number().int(),
91
+ languages: z.record(z.string(), z.number()),
92
+ storageBytes: z.number().nullable(),
93
+ lastIndexed: z.string().nullable(),
94
+ }),
95
+ });
96
+ /** GET /api/v1/viewer/chunks */
97
+ export const ViewerChunksResponseSchema = z.object({
98
+ data: z.array(ChunkSummarySchema),
99
+ meta: PaginationMetaSchema,
100
+ });
101
+ /** GET /api/v1/viewer/chunks/:id */
102
+ export const ViewerChunkDetailResponseSchema = z.object({
103
+ data: ChunkDetailSchema,
104
+ });
105
+ /** GET /api/v1/viewer/search */
106
+ export const ViewerSearchResponseSchema = z.object({
107
+ data: z.object({
108
+ results: z.array(ViewerSearchResultSchema),
109
+ timing: z.object({
110
+ totalMs: z.number(),
111
+ }),
112
+ }),
113
+ });
114
+ /** GET /api/v1/viewer/graph */
115
+ export const ViewerGraphResponseSchema = z.object({
116
+ data: z.object({
117
+ nodes: z.array(GraphNodeSchema),
118
+ edges: z.array(GraphEdgeSchema),
119
+ }),
120
+ });
121
+ /** GET /api/v1/viewer/embeddings */
122
+ export const ViewerEmbeddingsResponseSchema = z.object({
123
+ data: z.array(EmbeddingPointSchema),
124
+ });
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Benchmark evaluator that runs auto-generated queries through CodeRAG search
3
+ * and computes IR metrics by comparing results to ground truth.
4
+ *
5
+ * Uses the portable IR metrics from @code-rag/benchmarks where possible,
6
+ * but also includes a standalone implementation to avoid cross-package
7
+ * dependency (core should not depend on benchmarks).
8
+ */
9
+ import { type Result } from 'neverthrow';
10
+ import type { GeneratedQuery, BenchmarkQueryType } from './query-generator.js';
11
+ /** Result of evaluating a single query. */
12
+ export interface QueryEvalResult {
13
+ readonly query: string;
14
+ readonly queryType: BenchmarkQueryType;
15
+ readonly retrievedIds: readonly string[];
16
+ readonly expectedIds: readonly string[];
17
+ readonly metrics: QueryMetrics;
18
+ }
19
+ /** Metrics for a single query. */
20
+ export interface QueryMetrics {
21
+ readonly precisionAt5: number;
22
+ readonly precisionAt10: number;
23
+ readonly recallAt10: number;
24
+ readonly mrr: number;
25
+ readonly ndcgAt10: number;
26
+ }
27
+ /** Aggregate metrics across all queries. */
28
+ export interface AggregateEvalMetrics {
29
+ readonly precisionAt5: number;
30
+ readonly precisionAt10: number;
31
+ readonly recallAt10: number;
32
+ readonly mrr: number;
33
+ readonly ndcgAt10: number;
34
+ readonly queryCount: number;
35
+ }
36
+ /** Breakdown of metrics per query type. */
37
+ export interface QueryTypeBreakdown {
38
+ readonly queryType: BenchmarkQueryType;
39
+ readonly metrics: AggregateEvalMetrics;
40
+ }
41
+ /** Full benchmark report. */
42
+ export interface BenchmarkReport {
43
+ readonly aggregate: AggregateEvalMetrics;
44
+ readonly byQueryType: readonly QueryTypeBreakdown[];
45
+ readonly perQuery: readonly QueryEvalResult[];
46
+ readonly metadata: BenchmarkMetadata;
47
+ }
48
+ /** Metadata about the benchmark run. */
49
+ export interface BenchmarkMetadata {
50
+ readonly timestamp: string;
51
+ readonly totalQueries: number;
52
+ readonly totalChunksInIndex: number;
53
+ readonly durationMs: number;
54
+ }
55
+ export declare class BenchmarkEvalError extends Error {
56
+ constructor(message: string);
57
+ }
58
+ /** A function that performs search and returns ordered chunk IDs. */
59
+ export type SearchFn = (query: string) => Promise<readonly string[]>;
60
+ /**
61
+ * Compute metrics for a single query result.
62
+ */
63
+ export declare function computeQueryMetrics(retrieved: readonly string[], expected: readonly string[]): QueryMetrics;
64
+ /**
65
+ * Compute aggregate metrics by averaging per-query metrics.
66
+ */
67
+ export declare function computeAggregateMetrics(results: readonly QueryEvalResult[]): AggregateEvalMetrics;
68
+ /**
69
+ * Group results by query type and compute per-type aggregates.
70
+ */
71
+ export declare function computeQueryTypeBreakdown(results: readonly QueryEvalResult[]): readonly QueryTypeBreakdown[];
72
+ /** Progress callback for benchmark evaluation. */
73
+ export type BenchmarkProgressFn = (completed: number, total: number) => void;
74
+ /**
75
+ * Run the full benchmark evaluation.
76
+ *
77
+ * For each generated query, calls the search function and computes metrics
78
+ * by comparing retrieved chunk IDs to the ground-truth expected IDs.
79
+ */
80
+ export declare function runBenchmark(queries: readonly GeneratedQuery[], searchFn: SearchFn, totalChunksInIndex: number, onProgress?: BenchmarkProgressFn): Promise<Result<BenchmarkReport, BenchmarkEvalError>>;
81
+ /**
82
+ * Format a BenchmarkReport as a human-readable summary table string.
83
+ */
84
+ export declare function formatBenchmarkSummary(report: BenchmarkReport): string;
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Benchmark evaluator that runs auto-generated queries through CodeRAG search
3
+ * and computes IR metrics by comparing results to ground truth.
4
+ *
5
+ * Uses the portable IR metrics from @code-rag/benchmarks where possible,
6
+ * but also includes a standalone implementation to avoid cross-package
7
+ * dependency (core should not depend on benchmarks).
8
+ */
9
+ import { ok, err } from 'neverthrow';
10
+ export class BenchmarkEvalError extends Error {
11
+ constructor(message) {
12
+ super(message);
13
+ this.name = 'BenchmarkEvalError';
14
+ }
15
+ }
16
+ // --- Standalone IR metric functions (no external dependency) ---
17
+ function precisionAtK(retrieved, relevant, k) {
18
+ if (k <= 0 || retrieved.length === 0)
19
+ return 0;
20
+ const topK = retrieved.slice(0, k);
21
+ let hits = 0;
22
+ for (const item of topK) {
23
+ if (relevant.has(item))
24
+ hits++;
25
+ }
26
+ return hits / k;
27
+ }
28
+ function recallAtK(retrieved, relevant, k) {
29
+ if (k <= 0 || relevant.size === 0)
30
+ return 0;
31
+ const topK = retrieved.slice(0, k);
32
+ let hits = 0;
33
+ for (const item of topK) {
34
+ if (relevant.has(item))
35
+ hits++;
36
+ }
37
+ return hits / relevant.size;
38
+ }
39
+ function mrr(retrieved, relevant) {
40
+ for (let i = 0; i < retrieved.length; i++) {
41
+ const item = retrieved[i];
42
+ if (item !== undefined && relevant.has(item)) {
43
+ return 1 / (i + 1);
44
+ }
45
+ }
46
+ return 0;
47
+ }
48
+ function ndcgAtK(retrieved, relevant, k) {
49
+ if (k <= 0 || relevant.size === 0)
50
+ return 0;
51
+ const topK = retrieved.slice(0, k);
52
+ let dcg = 0;
53
+ for (let i = 0; i < topK.length; i++) {
54
+ const item = topK[i];
55
+ if (item !== undefined && relevant.has(item)) {
56
+ dcg += 1 / Math.log2(i + 2);
57
+ }
58
+ }
59
+ const idealCount = Math.min(relevant.size, k);
60
+ let idcg = 0;
61
+ for (let i = 0; i < idealCount; i++) {
62
+ idcg += 1 / Math.log2(i + 2);
63
+ }
64
+ if (idcg === 0)
65
+ return 0;
66
+ return dcg / idcg;
67
+ }
68
+ /**
69
+ * Compute metrics for a single query result.
70
+ */
71
+ export function computeQueryMetrics(retrieved, expected) {
72
+ const relevantSet = new Set(expected);
73
+ return {
74
+ precisionAt5: precisionAtK(retrieved, relevantSet, 5),
75
+ precisionAt10: precisionAtK(retrieved, relevantSet, 10),
76
+ recallAt10: recallAtK(retrieved, relevantSet, 10),
77
+ mrr: mrr(retrieved, relevantSet),
78
+ ndcgAt10: ndcgAtK(retrieved, relevantSet, 10),
79
+ };
80
+ }
81
+ /**
82
+ * Compute aggregate metrics by averaging per-query metrics.
83
+ */
84
+ export function computeAggregateMetrics(results) {
85
+ if (results.length === 0) {
86
+ return {
87
+ precisionAt5: 0,
88
+ precisionAt10: 0,
89
+ recallAt10: 0,
90
+ mrr: 0,
91
+ ndcgAt10: 0,
92
+ queryCount: 0,
93
+ };
94
+ }
95
+ let sumP5 = 0;
96
+ let sumP10 = 0;
97
+ let sumR10 = 0;
98
+ let sumMrr = 0;
99
+ let sumNdcg = 0;
100
+ for (const result of results) {
101
+ sumP5 += result.metrics.precisionAt5;
102
+ sumP10 += result.metrics.precisionAt10;
103
+ sumR10 += result.metrics.recallAt10;
104
+ sumMrr += result.metrics.mrr;
105
+ sumNdcg += result.metrics.ndcgAt10;
106
+ }
107
+ const count = results.length;
108
+ return {
109
+ precisionAt5: sumP5 / count,
110
+ precisionAt10: sumP10 / count,
111
+ recallAt10: sumR10 / count,
112
+ mrr: sumMrr / count,
113
+ ndcgAt10: sumNdcg / count,
114
+ queryCount: count,
115
+ };
116
+ }
117
+ /**
118
+ * Group results by query type and compute per-type aggregates.
119
+ */
120
+ export function computeQueryTypeBreakdown(results) {
121
+ const groups = new Map();
122
+ for (const result of results) {
123
+ const existing = groups.get(result.queryType);
124
+ if (existing) {
125
+ existing.push(result);
126
+ }
127
+ else {
128
+ groups.set(result.queryType, [result]);
129
+ }
130
+ }
131
+ const breakdowns = [];
132
+ for (const [queryType, groupResults] of groups) {
133
+ breakdowns.push({
134
+ queryType,
135
+ metrics: computeAggregateMetrics(groupResults),
136
+ });
137
+ }
138
+ // Sort by query type for consistent output
139
+ breakdowns.sort((a, b) => a.queryType.localeCompare(b.queryType));
140
+ return breakdowns;
141
+ }
142
+ /**
143
+ * Run the full benchmark evaluation.
144
+ *
145
+ * For each generated query, calls the search function and computes metrics
146
+ * by comparing retrieved chunk IDs to the ground-truth expected IDs.
147
+ */
148
+ export async function runBenchmark(queries, searchFn, totalChunksInIndex, onProgress) {
149
+ try {
150
+ const startTime = Date.now();
151
+ const perQuery = [];
152
+ for (let i = 0; i < queries.length; i++) {
153
+ const query = queries[i];
154
+ const retrievedIds = await searchFn(query.query);
155
+ const metrics = computeQueryMetrics(retrievedIds, query.expectedChunkIds);
156
+ perQuery.push({
157
+ query: query.query,
158
+ queryType: query.queryType,
159
+ retrievedIds,
160
+ expectedIds: query.expectedChunkIds,
161
+ metrics,
162
+ });
163
+ if (onProgress) {
164
+ onProgress(i + 1, queries.length);
165
+ }
166
+ }
167
+ const durationMs = Date.now() - startTime;
168
+ const aggregate = computeAggregateMetrics(perQuery);
169
+ const byQueryType = computeQueryTypeBreakdown(perQuery);
170
+ return ok({
171
+ aggregate,
172
+ byQueryType,
173
+ perQuery,
174
+ metadata: {
175
+ timestamp: new Date().toISOString(),
176
+ totalQueries: queries.length,
177
+ totalChunksInIndex,
178
+ durationMs,
179
+ },
180
+ });
181
+ }
182
+ catch (error) {
183
+ const message = error instanceof Error ? error.message : 'Unknown error';
184
+ return err(new BenchmarkEvalError(`Benchmark evaluation failed: ${message}`));
185
+ }
186
+ }
187
+ /**
188
+ * Format a BenchmarkReport as a human-readable summary table string.
189
+ */
190
+ export function formatBenchmarkSummary(report) {
191
+ const lines = [];
192
+ const a = report.aggregate;
193
+ lines.push('Benchmark Results');
194
+ lines.push('=================');
195
+ lines.push(`Queries: ${report.metadata.totalQueries}`);
196
+ lines.push(`Index size: ${report.metadata.totalChunksInIndex} chunks`);
197
+ lines.push(`Duration: ${(report.metadata.durationMs / 1000).toFixed(1)}s`);
198
+ lines.push('');
199
+ lines.push('Aggregate Metrics:');
200
+ lines.push(` P@5: ${fmt(a.precisionAt5)}`);
201
+ lines.push(` P@10: ${fmt(a.precisionAt10)}`);
202
+ lines.push(` Recall@10: ${fmt(a.recallAt10)}`);
203
+ lines.push(` MRR: ${fmt(a.mrr)}`);
204
+ lines.push(` nDCG@10: ${fmt(a.ndcgAt10)}`);
205
+ if (report.byQueryType.length > 0) {
206
+ lines.push('');
207
+ lines.push('By Query Type:');
208
+ lines.push(' Type | Count | P@5 | P@10 | R@10 | MRR | nDCG@10');
209
+ lines.push(' ---------------------|-------|-------|-------|-------|-------|--------');
210
+ for (const bt of report.byQueryType) {
211
+ const m = bt.metrics;
212
+ const type = bt.queryType.padEnd(20);
213
+ lines.push(` ${type} | ${String(m.queryCount).padStart(5)} | ${fmt(m.precisionAt5)} | ${fmt(m.precisionAt10)} | ${fmt(m.recallAt10)} | ${fmt(m.mrr)} | ${fmt(m.ndcgAt10)}`);
214
+ }
215
+ }
216
+ return lines.join('\n');
217
+ }
218
+ function fmt(value) {
219
+ return value.toFixed(4).padStart(6);
220
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Scans an existing CodeRAG index (LanceDB) to extract entity information
3
+ * used for auto-generating benchmark queries with ground truth.
4
+ *
5
+ * All functions are pure where possible, taking data as input rather than
6
+ * connecting to stores directly.
7
+ */
8
+ import { type Result } from 'neverthrow';
9
+ import type { ChunkType } from '../types/chunk.js';
10
+ import type { GraphEdge } from '../graph/dependency-graph.js';
11
+ /** A scanned entity extracted from the index. */
12
+ export interface ScannedEntity {
13
+ readonly chunkId: string;
14
+ readonly name: string;
15
+ readonly chunkType: ChunkType;
16
+ readonly filePath: string;
17
+ readonly language: string;
18
+ readonly nlSummary: string;
19
+ readonly imports: readonly string[];
20
+ readonly exports: readonly string[];
21
+ readonly declarations: readonly string[];
22
+ }
23
+ /** Result of scanning the full index. */
24
+ export interface IndexScanResult {
25
+ readonly entities: readonly ScannedEntity[];
26
+ readonly totalChunks: number;
27
+ /** Map from chunkId to ScannedEntity for quick lookup. */
28
+ readonly entityMap: ReadonlyMap<string, ScannedEntity>;
29
+ /** Map from entity name to chunk IDs that declare it. */
30
+ readonly nameToChunkIds: ReadonlyMap<string, readonly string[]>;
31
+ /** Map from file path to chunk IDs in that file. */
32
+ readonly fileToChunkIds: ReadonlyMap<string, readonly string[]>;
33
+ }
34
+ export declare class IndexScanError extends Error {
35
+ constructor(message: string);
36
+ }
37
+ /**
38
+ * Convert raw index rows (from LanceDBStore.getAll()) into ScannedEntity objects.
39
+ * This is a pure function that operates on already-fetched data.
40
+ */
41
+ export declare function parseIndexRows(rows: readonly {
42
+ id: string;
43
+ metadata: Record<string, unknown>;
44
+ }[]): Result<IndexScanResult, IndexScanError>;
45
+ /**
46
+ * Build a caller map from graph edges.
47
+ * Maps target chunkId to source chunkIds that reference it.
48
+ */
49
+ export declare function buildCallerMap(edges: readonly GraphEdge[]): ReadonlyMap<string, readonly string[]>;
50
+ /**
51
+ * Build a test file map: maps source file paths to test file chunk IDs.
52
+ * Heuristic: a file at `foo.test.ts` or `foo.spec.ts` is the test for `foo.ts`.
53
+ */
54
+ export declare function buildTestMap(fileToChunkIds: ReadonlyMap<string, readonly string[]>): ReadonlyMap<string, readonly string[]>;