pcl-mcp 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/evaluators/context-retrieval-quality.d.ts +30 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.js +50 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.js.map +1 -0
- package/dist/benchmarks/evaluators/ir-metrics.d.ts +32 -0
- package/dist/benchmarks/evaluators/ir-metrics.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/ir-metrics.js +98 -0
- package/dist/benchmarks/evaluators/ir-metrics.js.map +1 -0
- package/dist/benchmarks/evaluators/structured-judge.d.ts +34 -0
- package/dist/benchmarks/evaluators/structured-judge.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/structured-judge.js +153 -0
- package/dist/benchmarks/evaluators/structured-judge.js.map +1 -0
- package/dist/benchmarks/evaluators/token-counter.d.ts +9 -0
- package/dist/benchmarks/evaluators/token-counter.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/token-counter.js +24 -0
- package/dist/benchmarks/evaluators/token-counter.js.map +1 -0
- package/dist/benchmarks/generators/generate-corpus.d.ts +2 -0
- package/dist/benchmarks/generators/generate-corpus.d.ts.map +1 -0
- package/dist/benchmarks/generators/generate-corpus.js +243 -0
- package/dist/benchmarks/generators/generate-corpus.js.map +1 -0
- package/dist/benchmarks/lib/harness.d.ts +23 -0
- package/dist/benchmarks/lib/harness.d.ts.map +1 -0
- package/dist/benchmarks/lib/harness.js +44 -0
- package/dist/benchmarks/lib/harness.js.map +1 -0
- package/dist/benchmarks/lib/types.d.ts +79 -0
- package/dist/benchmarks/lib/types.d.ts.map +1 -0
- package/dist/benchmarks/lib/types.js +2 -0
- package/dist/benchmarks/lib/types.js.map +1 -0
- package/dist/benchmarks/reporters/markdown-reporter.d.ts +2 -0
- package/dist/benchmarks/reporters/markdown-reporter.d.ts.map +1 -0
- package/dist/benchmarks/reporters/markdown-reporter.js +80 -0
- package/dist/benchmarks/reporters/markdown-reporter.js.map +1 -0
- package/dist/benchmarks/runners/bench-ablation.d.ts +2 -0
- package/dist/benchmarks/runners/bench-ablation.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-ablation.js +49 -0
- package/dist/benchmarks/runners/bench-ablation.js.map +1 -0
- package/dist/benchmarks/runners/bench-ai-quality.d.ts +2 -0
- package/dist/benchmarks/runners/bench-ai-quality.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-ai-quality.js +297 -0
- package/dist/benchmarks/runners/bench-ai-quality.js.map +1 -0
- package/dist/benchmarks/runners/bench-interactive-eval.d.ts +2 -0
- package/dist/benchmarks/runners/bench-interactive-eval.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-interactive-eval.js +119 -0
- package/dist/benchmarks/runners/bench-interactive-eval.js.map +1 -0
- package/dist/benchmarks/runners/bench-performance.bench.d.ts +2 -0
- package/dist/benchmarks/runners/bench-performance.bench.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-performance.bench.js +50 -0
- package/dist/benchmarks/runners/bench-performance.bench.js.map +1 -0
- package/dist/benchmarks/runners/bench-search-quality.d.ts +2 -0
- package/dist/benchmarks/runners/bench-search-quality.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-search-quality.js +70 -0
- package/dist/benchmarks/runners/bench-search-quality.js.map +1 -0
- package/dist/benchmarks/runners/bench-token-efficiency.d.ts +2 -0
- package/dist/benchmarks/runners/bench-token-efficiency.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-token-efficiency.js +89 -0
- package/dist/benchmarks/runners/bench-token-efficiency.js.map +1 -0
- package/dist/benchmarks/runners/diag.d.ts +2 -0
- package/dist/benchmarks/runners/diag.d.ts.map +1 -0
- package/dist/benchmarks/runners/diag.js +30 -0
- package/dist/benchmarks/runners/diag.js.map +1 -0
- package/dist/benchmarks/vitest.config.bench.d.ts +3 -0
- package/dist/benchmarks/vitest.config.bench.d.ts.map +1 -0
- package/dist/benchmarks/vitest.config.bench.js +14 -0
- package/dist/benchmarks/vitest.config.bench.js.map +1 -0
- package/dist/bin/pcl.js +116 -17
- package/dist/bin/pcl.js.map +1 -1
- package/dist/src/db.d.ts +2 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +25 -21
- package/dist/src/db.js.map +1 -1
- package/dist/src/embeddings.d.ts +1 -1
- package/dist/src/embeddings.js +2 -2
- package/dist/src/embeddings.js.map +1 -1
- package/dist/src/indexer.d.ts +1 -1
- package/dist/src/indexer.d.ts.map +1 -1
- package/dist/src/indexer.js +6 -2
- package/dist/src/indexer.js.map +1 -1
- package/dist/src/search.d.ts.map +1 -1
- package/dist/src/search.js +138 -26
- package/dist/src/search.js.map +1 -1
- package/dist/src/server.js +6 -0
- package/dist/src/server.js.map +1 -1
- package/dist/src/types.d.ts +1 -0
- package/dist/src/types.d.ts.map +1 -1
- package/dist/tests/db.test.d.ts +2 -0
- package/dist/tests/db.test.d.ts.map +1 -0
- package/dist/tests/db.test.js +459 -0
- package/dist/tests/db.test.js.map +1 -0
- package/dist/tests/embeddings.test.d.ts +2 -0
- package/dist/tests/embeddings.test.d.ts.map +1 -0
- package/dist/tests/embeddings.test.js +165 -0
- package/dist/tests/embeddings.test.js.map +1 -0
- package/dist/tests/helpers/test-harness.d.ts +26 -0
- package/dist/tests/helpers/test-harness.d.ts.map +1 -0
- package/dist/tests/helpers/test-harness.js +80 -0
- package/dist/tests/helpers/test-harness.js.map +1 -0
- package/dist/tests/indexer.test.d.ts +2 -0
- package/dist/tests/indexer.test.d.ts.map +1 -0
- package/dist/tests/indexer.test.js +299 -0
- package/dist/tests/indexer.test.js.map +1 -0
- package/dist/tests/schemas.test.d.ts +2 -0
- package/dist/tests/schemas.test.d.ts.map +1 -0
- package/dist/tests/schemas.test.js +378 -0
- package/dist/tests/schemas.test.js.map +1 -0
- package/dist/tests/search.test.d.ts +2 -0
- package/dist/tests/search.test.d.ts.map +1 -0
- package/dist/tests/search.test.js +129 -0
- package/dist/tests/search.test.js.map +1 -0
- package/dist/tests/tools.test.d.ts +2 -0
- package/dist/tests/tools.test.d.ts.map +1 -0
- package/dist/tests/tools.test.js +232 -0
- package/dist/tests/tools.test.js.map +1 -0
- package/package.json +14 -2
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Corpus generator — expands corpus-small into medium (50), large (100), xlarge (500).
|
|
3
|
+
* Uses deterministic seeding for reproducibility.
|
|
4
|
+
*/
|
|
5
|
+
import { mkdir, writeFile, cp } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
const FIXTURES_DIR = join(import.meta.dirname, "..", "fixtures");
|
|
8
|
+
// Simple seeded PRNG (mulberry32)
|
|
9
|
+
function createRNG(seed) {
|
|
10
|
+
return () => {
|
|
11
|
+
seed |= 0;
|
|
12
|
+
seed = (seed + 0x6d2b79f5) | 0;
|
|
13
|
+
let t = Math.imul(seed ^ (seed >>> 15), 1 | seed);
|
|
14
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
15
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
const PERSONA_NAMES = [
|
|
19
|
+
{ name: "Maya", role: "UX Researcher", goal: "Validate designs with real users" },
|
|
20
|
+
{ name: "Jordan", role: "DevOps Engineer", goal: "Keep infrastructure reliable" },
|
|
21
|
+
{ name: "Priya", role: "Product Owner", goal: "Maximize business value per sprint" },
|
|
22
|
+
{ name: "Marcus", role: "QA Lead", goal: "Catch bugs before they reach production" },
|
|
23
|
+
{ name: "Elena", role: "Data Analyst", goal: "Turn data into actionable insights" },
|
|
24
|
+
{ name: "Tomás", role: "Frontend Developer", goal: "Build pixel-perfect responsive UIs" },
|
|
25
|
+
{ name: "Kenji", role: "Backend Architect", goal: "Design scalable API systems" },
|
|
26
|
+
{ name: "Fatima", role: "Scrum Master", goal: "Remove blockers for the team" },
|
|
27
|
+
{ name: "Oscar", role: "Security Engineer", goal: "Protect user data and prevent breaches" },
|
|
28
|
+
{ name: "Li Wei", role: "Mobile Developer", goal: "Deliver smooth native app experiences" },
|
|
29
|
+
];
|
|
30
|
+
const SPEC_TOPICS = [
|
|
31
|
+
"Time Tracking Widget", "Invoice Generator", "Client Portal", "File Attachments",
|
|
32
|
+
"Task Dependencies", "Gantt Chart View", "Resource Allocation", "Budget Tracker",
|
|
33
|
+
"Custom Fields", "Recurring Tasks", "Email Integration", "Calendar Sync",
|
|
34
|
+
"Milestone Tracking", "Approval Workflows", "Template Library", "API Webhooks",
|
|
35
|
+
"Bulk Operations", "Dark Mode", "Offline Support", "Export to PDF",
|
|
36
|
+
"Team Chat", "Activity Feed", "Custom Reports", "Sprint Planning",
|
|
37
|
+
"Workload View", "Time Zone Support", "Multi-language", "Search & Filter",
|
|
38
|
+
];
|
|
39
|
+
const DECISION_TOPICS = [
|
|
40
|
+
"Use Tailwind CSS for styling",
|
|
41
|
+
"Adopt Zod for runtime validation",
|
|
42
|
+
"Choose PostgreSQL over MongoDB",
|
|
43
|
+
"Use React Server Components",
|
|
44
|
+
"Implement RBAC with Supabase RLS",
|
|
45
|
+
"Deploy on Vercel Edge",
|
|
46
|
+
"Use Resend for transactional emails",
|
|
47
|
+
"Adopt pnpm over npm",
|
|
48
|
+
"Use Vitest for testing",
|
|
49
|
+
"Implement feature flags with PostHog",
|
|
50
|
+
];
|
|
51
|
+
const DOMAIN_RULES = [
|
|
52
|
+
{ title: "Password Policy", critical: false },
|
|
53
|
+
{ title: "File Upload Limits", critical: false },
|
|
54
|
+
{ title: "Workspace Naming Rules", critical: false },
|
|
55
|
+
{ title: "API Versioning Policy", critical: false },
|
|
56
|
+
{ title: "Accessibility Requirements", critical: true },
|
|
57
|
+
{ title: "Internationalization Rules", critical: false },
|
|
58
|
+
{ title: "Error Handling Standards", critical: false },
|
|
59
|
+
{ title: "Logging and Monitoring Policy", critical: true },
|
|
60
|
+
{ title: "Third-Party Integration Rules", critical: false },
|
|
61
|
+
{ title: "Performance Budgets", critical: false },
|
|
62
|
+
];
|
|
63
|
+
function generatePersona(index, rng) {
|
|
64
|
+
const p = PERSONA_NAMES[index % PERSONA_NAMES.length];
|
|
65
|
+
const id = p.name.toLowerCase().replace(/\s+/g, "-");
|
|
66
|
+
const techLevel = ["beginner", "intermediate", "advanced"][Math.floor(rng() * 3)];
|
|
67
|
+
return `---
|
|
68
|
+
id: "${id}"
|
|
69
|
+
name: "${p.name}"
|
|
70
|
+
role: "${p.role}"
|
|
71
|
+
tech_level: "${techLevel}"
|
|
72
|
+
primary_goal: "${p.goal}"
|
|
73
|
+
jobs_to_be_done:
|
|
74
|
+
- "Complete daily work efficiently"
|
|
75
|
+
- "Collaborate with team members"
|
|
76
|
+
- "Track progress on deliverables"
|
|
77
|
+
anti_patterns:
|
|
78
|
+
- "Avoids tools that require excessive configuration"
|
|
79
|
+
- "Ignores features hidden behind multiple clicks"
|
|
80
|
+
channels:
|
|
81
|
+
- "desktop"
|
|
82
|
+
- "email"
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Background
|
|
86
|
+
|
|
87
|
+
${p.name} is a ${p.role} with ${Math.floor(rng() * 10 + 2)} years of experience. Their primary focus is to ${p.goal.toLowerCase()}. They work in a fast-paced environment where context switching is common and tools need to be intuitive and fast.
|
|
88
|
+
|
|
89
|
+
## Key Behaviors
|
|
90
|
+
|
|
91
|
+
${p.name} typically starts the day by reviewing their task list and prioritizing based on urgency and impact. They prefer tools that surface the most important information first without requiring manual sorting or filtering. Keyboard shortcuts are appreciated but not required.
|
|
92
|
+
|
|
93
|
+
## Pain Points
|
|
94
|
+
|
|
95
|
+
The biggest frustration for ${p.name} is when tools slow down their workflow rather than accelerating it. They have abandoned previous tools that required too much upfront configuration or that cluttered the interface with features they did not use. Simplicity and speed are more important than feature completeness.
|
|
96
|
+
`;
|
|
97
|
+
}
|
|
98
|
+
function generateSpec(index, rng) {
|
|
99
|
+
const topic = SPEC_TOPICS[index % SPEC_TOPICS.length];
|
|
100
|
+
const id = topic.toLowerCase().replace(/\s+/g, "-");
|
|
101
|
+
const statuses = ["draft", "in-progress", "approved", "implemented"];
|
|
102
|
+
const status = statuses[Math.floor(rng() * statuses.length)];
|
|
103
|
+
return `---
|
|
104
|
+
id: "${id}"
|
|
105
|
+
title: "${topic}"
|
|
106
|
+
status: "${status}"
|
|
107
|
+
acceptance_criteria:
|
|
108
|
+
- "Feature must be fully functional on desktop and mobile viewports"
|
|
109
|
+
- "Loading time must not exceed 2 seconds on a standard connection"
|
|
110
|
+
- "All user inputs must be validated both client-side and server-side"
|
|
111
|
+
- "Feature must include proper error states and empty states"
|
|
112
|
+
out_of_scope:
|
|
113
|
+
- "Offline support for this feature"
|
|
114
|
+
- "Integration with third-party services"
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Overview
|
|
118
|
+
|
|
119
|
+
The ${topic} feature enables users to manage their ${topic.toLowerCase()} workflow directly within TaskPilot. This reduces context switching and keeps all project-related information in one place.
|
|
120
|
+
|
|
121
|
+
## Requirements
|
|
122
|
+
|
|
123
|
+
The feature must integrate seamlessly with the existing dashboard and project views. Users should be able to access it from the project navigation sidebar. The interface must follow existing design patterns including the card-based layout, consistent button styles, and the standard form validation approach.
|
|
124
|
+
|
|
125
|
+
## Technical Considerations
|
|
126
|
+
|
|
127
|
+
Implementation should use Next.js Server Components for initial data loading and Client Components only for interactive elements. Data should be stored in the existing Supabase PostgreSQL database with appropriate RLS policies. All database queries should be optimized with proper indexes to maintain the 2-second loading time requirement.
|
|
128
|
+
|
|
129
|
+
## User Stories
|
|
130
|
+
|
|
131
|
+
As a project manager, I want to use ${topic.toLowerCase()} so that I can track progress more effectively. As a developer, I want the ${topic.toLowerCase()} interface to be keyboard-navigable so that I can work efficiently without switching to the mouse.
|
|
132
|
+
`;
|
|
133
|
+
}
|
|
134
|
+
function generateDecision(index, _rng) {
|
|
135
|
+
const topic = DECISION_TOPICS[index % DECISION_TOPICS.length];
|
|
136
|
+
const id = `adr-${String(index + 2).padStart(3, "0")}`;
|
|
137
|
+
return `---
|
|
138
|
+
id: "${id}"
|
|
139
|
+
title: "${topic}"
|
|
140
|
+
status: "accepted"
|
|
141
|
+
date: "2025-0${Math.min(index + 1, 9)}-15"
|
|
142
|
+
context: "The team needed to decide on the approach for ${topic.toLowerCase()} to ensure consistency and maintainability across the codebase."
|
|
143
|
+
decision: "We decided to ${topic.toLowerCase()} based on team expertise, ecosystem maturity, and alignment with our existing architecture."
|
|
144
|
+
consequences:
|
|
145
|
+
- "All new code must follow this decision"
|
|
146
|
+
- "Existing code should be migrated incrementally"
|
|
147
|
+
- "Documentation must be updated to reflect this choice"
|
|
148
|
+
alternatives_rejected:
|
|
149
|
+
- "The alternative approach was considered but rejected due to higher complexity"
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Context
|
|
153
|
+
|
|
154
|
+
The team evaluated multiple options for ${topic.toLowerCase()}. The primary factors in the decision were developer experience, performance characteristics, and long-term maintainability.
|
|
155
|
+
|
|
156
|
+
## Decision Details
|
|
157
|
+
|
|
158
|
+
After evaluating the options, we chose to ${topic.toLowerCase()}. This aligns with our existing technology choices and reduces the learning curve for new team members. The decision was made based on a proof-of-concept implementation that demonstrated the viability of this approach.
|
|
159
|
+
|
|
160
|
+
## Migration Plan
|
|
161
|
+
|
|
162
|
+
Existing code that does not follow this decision will be migrated as part of regular maintenance work. There is no urgent timeline for migration, but all new code must adhere to this decision immediately.
|
|
163
|
+
`;
|
|
164
|
+
}
|
|
165
|
+
function generateDomainRule(index, _rng) {
|
|
166
|
+
const rule = DOMAIN_RULES[index % DOMAIN_RULES.length];
|
|
167
|
+
const id = rule.title.toLowerCase().replace(/\s+/g, "-");
|
|
168
|
+
return `---
|
|
169
|
+
id: "${id}"
|
|
170
|
+
critical: ${rule.critical}
|
|
171
|
+
title: "${rule.title}"
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## ${rule.title}
|
|
175
|
+
|
|
176
|
+
These rules define the standards for ${rule.title.toLowerCase()} in TaskPilot. All engineers must follow these rules when working on related features.
|
|
177
|
+
|
|
178
|
+
### Requirements
|
|
179
|
+
|
|
180
|
+
1. All implementations must be reviewed against these standards before merging
|
|
181
|
+
2. Automated checks should be added where feasible to enforce compliance
|
|
182
|
+
3. Exceptions require documented justification and team lead approval
|
|
183
|
+
|
|
184
|
+
### Rationale
|
|
185
|
+
|
|
186
|
+
These rules exist to ensure consistency, maintainability, and quality across the product. They were established based on industry best practices and lessons learned from previous projects.
|
|
187
|
+
`;
|
|
188
|
+
}
|
|
189
|
+
async function generateCorpus(targetSize, outputDir) {
|
|
190
|
+
const rng = createRNG(42); // deterministic seed
|
|
191
|
+
// Start by copying corpus-small as the base
|
|
192
|
+
await cp(join(FIXTURES_DIR, "corpus-small"), outputDir, { recursive: true });
|
|
193
|
+
// Calculate how many of each type to generate
|
|
194
|
+
// Base: 1 product + 2 personas + 2 journeys + 2 specs + 1 decision + 2 domain = 10
|
|
195
|
+
const remaining = targetSize - 10;
|
|
196
|
+
if (remaining <= 0)
|
|
197
|
+
return;
|
|
198
|
+
// Distribution: 20% personas, 15% journeys, 30% specs, 20% decisions, 15% domain
|
|
199
|
+
const personaCount = Math.floor(remaining * 0.2);
|
|
200
|
+
const specCount = Math.floor(remaining * 0.3);
|
|
201
|
+
const decisionCount = Math.floor(remaining * 0.2);
|
|
202
|
+
const domainCount = remaining - personaCount - specCount - decisionCount;
|
|
203
|
+
// Generate personas
|
|
204
|
+
for (let i = 0; i < personaCount; i++) {
|
|
205
|
+
const content = generatePersona(i, rng);
|
|
206
|
+
const name = PERSONA_NAMES[i % PERSONA_NAMES.length].name.toLowerCase().replace(/\s+/g, "-");
|
|
207
|
+
await writeFile(join(outputDir, "personas", `${name}.md`), content);
|
|
208
|
+
}
|
|
209
|
+
// Generate specs
|
|
210
|
+
for (let i = 0; i < specCount; i++) {
|
|
211
|
+
const content = generateSpec(i, rng);
|
|
212
|
+
const name = SPEC_TOPICS[i % SPEC_TOPICS.length].toLowerCase().replace(/\s+/g, "-");
|
|
213
|
+
await writeFile(join(outputDir, "specs", `${name}.md`), content);
|
|
214
|
+
}
|
|
215
|
+
// Generate decisions
|
|
216
|
+
for (let i = 0; i < decisionCount; i++) {
|
|
217
|
+
const content = generateDecision(i, rng);
|
|
218
|
+
await writeFile(join(outputDir, "decisions", `adr-${String(i + 2).padStart(3, "0")}.md`), content);
|
|
219
|
+
}
|
|
220
|
+
// Generate domain rules
|
|
221
|
+
for (let i = 0; i < domainCount; i++) {
|
|
222
|
+
const content = generateDomainRule(i, rng);
|
|
223
|
+
const name = DOMAIN_RULES[i % DOMAIN_RULES.length].title.toLowerCase().replace(/\s+/g, "-");
|
|
224
|
+
await writeFile(join(outputDir, "domain", `${name}.md`), content);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
async function main() {
|
|
228
|
+
const sizes = [
|
|
229
|
+
{ name: "corpus-medium", size: 50 },
|
|
230
|
+
{ name: "corpus-large", size: 100 },
|
|
231
|
+
{ name: "corpus-xlarge", size: 500 },
|
|
232
|
+
];
|
|
233
|
+
for (const { name, size } of sizes) {
|
|
234
|
+
const outputDir = join(FIXTURES_DIR, name);
|
|
235
|
+
console.log(`Generating ${name} (${size} files)...`);
|
|
236
|
+
await mkdir(outputDir, { recursive: true });
|
|
237
|
+
await generateCorpus(size, outputDir);
|
|
238
|
+
console.log(` → ${name} created at ${outputDir}`);
|
|
239
|
+
}
|
|
240
|
+
console.log("\nDone! Generated corpora for benchmarking.");
|
|
241
|
+
}
|
|
242
|
+
main().catch(console.error);
|
|
243
|
+
//# sourceMappingURL=generate-corpus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generate-corpus.js","sourceRoot":"","sources":["../../../benchmarks/generators/generate-corpus.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACxD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;AAEjE,kCAAkC;AAClC,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,GAAG,EAAE;QACV,IAAI,IAAI,CAAC,CAAC;QACV,IAAI,GAAG,CAAC,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;QAClD,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,UAAU,CAAC;IAC/C,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,aAAa,GAAG;IACpB,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,kCAAkC,EAAE;IACjF,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,8BAA8B,EAAE;IACjF,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,oCAAoC,EAAE;IACpF,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,yCAAyC,EAAE;IACpF,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,oCAAoC,EAAE;IACnF,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,oCAAoC,EAAE;IACzF,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,6BAA6B,EAAE;IACjF,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,8BAA8B,EAAE;IAC9E,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,wCAAwC,EAAE;IAC5F,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,uCAAuC,EAAE;CAC5F,CAAC;AAEF,MAAM,WAAW,GAAG;IAClB,sBAAsB,EAAE,mBAAmB,EAAE,eAAe,EAAE,kBAAkB;IAChF,mBAAmB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,gBAAgB;IAChF,eAAe,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,eAAe;IACxE,oBAAoB,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,cAAc;IAC9E,iBAAiB,EAAE,WAAW,EAAE,iBAAiB,EAAE,eAAe;IAClE,WAAW,EAAE,eAAe,EAAE,gBAAgB,EAAE,iBAAiB;IACjE,eAAe,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,iBAAiB;CAC1E,CAAC;AAEF,MAAM,eAAe,GAAG;IACtB,8BAA8B;IAC9B,kCAAkC;IAClC,gCAAgC;IAChC,6BAA6B;IAC7B,kCAAkC;IAClC,uBAAuB;IACvB,qCAAqC;IACrC,qBAAqB;IACrB,wBAAwB;IACxB,sCAAsC;CACvC,CAAC;AAEF,MAAM,YAAY,GAAG;IACnB,EAAE,KAAK,EAAE,iBAAiB,EAAE,QAAQ,EAAE,KAAK,EAAE;IAC7C,EAAE,KAAK,EAAE,oBAAoB,EAAE,QAAQ,EAAE,KAAK,EAAE;IAChD,EAAE,KAAK,EAAE,wBAAwB,EAAE,QAAQ,EAAE,KAAK,EAAE;IACpD,EAAE,KAAK,EAAE,uBAAuB,EAAE,QAAQ,EAAE,KAAK,EAAE;IACnD,EAAE,KAAK,EAAE,4BAA4B,EAAE,QAAQ,EAAE,IAAI,EAAE;IACvD,EAAE,KAAK,EAAE,4BAA4B,EAAE,QAAQ,EAAE,KAAK,EAAE;IACxD,EAAE,KAAK,EAAE,0BAA0B,EAAE,QAAQ,EAAE,KAAK,EAAE;IACtD,EAAE,KAAK,EAAE,+BAA+B,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC1D,EAAE,KAAK,EAAE,+BAA+B,EAAE,QAAQ,EAAE,KAAK,EAAE;IAC3D,EAAE,KAAK,EAAE,qBAAqB,EAAE,QAAQ,EAAE,KAAK,EAAE;CAClD,CAAC;AAEF,SAAS,eAAe,CAAC,KAAa,EAAE,GAAiB;IACvD,MAAM,CAAC,GAAG,aAAa,CAAC,KAAK,GAAG,aAAa,CAAC,MAAM,CAAE,CAAC;IACvD,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACrD,MAAM,SAAS,GAAG,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAElF,OAAO;OACF,EAAE;SACA,CAAC,CAAC,IAAI;SACN,CAAC,CAAC,IAAI;eACA,SAAS;iBACP,CAAC,CAAC,IAAI;;;;;;;;;;;;;;;EAerB,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,IAAI,SAAS,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,mDAAmD,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE;;;;EAI/H,CAAC,CAAC,IAAI;;;;8BAIsB,CAAC,CAAC,IAAI;CACnC,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,KAAa,EAAE,GAAiB;IACpD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,GAAG,WAAW,CAAC,MAAM,CAAE,CAAC;IACvD,MAAM,EAAE,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,CAAC,OAAO,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAE,CAAC;IAE9D,OAAO;OACF,EAAE;UACC,KAAK;WACJ,MAAM;;;;;;;;;;;;;MAaX,KAAK,0CAA0C,KAAK,CAAC,WAAW,EAAE;;;;;;;;;;;;sCAYlC,KAAK,CAAC,WAAW,EAAE,8EAA8E,KAAK,CAAC,WAAW,EAAE;CACzJ,CAAC;AACF,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAkB;IACzD,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,GAAG,eAAe,CAAC,MAAM,CAAE,CAAC;IAC/D,MAAM,EAAE,GAAG,OAAO,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IAEvD,OAAO;OACF,EAAE;UACC,KAAK;;eAEA,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;0DACqB,KAAK,CAAC,WAAW,EAAE;2BAClD,KAAK,CAAC,WAAW,EAAE;;;;;;;;;;;0CAWJ,KAAK,CAAC,WAAW,EAAE;;;;4CAIjB,KAAK,CAAC,WAAW,EAAE;;;;;CAK9D,CAAC;AACF,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAa,EAAE,IAAkB;IAC3D,MAAM,IAAI,GAAG,YAAY,CAAC,KAAK,GAAG,YAAY,CAAC,MAAM,CAAE,CAAC;IACxD,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAEzD,OAAO;OACF,EAAE;YACG,IAAI,CAAC,QAAQ;UACf,IAAI,CAAC,KAAK;;;KAGf,IAAI,CAAC,KAAK;;uCAEwB,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE;;;;;;;;;;;CAW9D,CAAC;AACF,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,UAAkB,EAAE,SAAiB;IACjE,MAAM,GAAG,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,qBAAqB;IAEhD,4CAA4C;IAC5C,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,cAAc,CAAC,EAAE,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE7E,8CAA8C;IAC9C,mFAAmF;IACnF,MAAM,SAAS,GAAG,UAAU,GAAG,EAAE,CAAC;IAClC,IAAI,SAAS,IAAI,CAAC;QAAE,OAAO;IAE3B,iFAAiF;IACjF,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IAClD,MAAM,WAAW,GAAG,SAAS,GAAG,YAAY,GAAG,SAAS,GAAG,aAAa,CAAC;IAEzE,oBAAoB;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,eAAe,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACxC,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9F,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,UAAU,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IACtE,CAAC;IAED,iBAAiB;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACrF,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IACnE,CAAC;IAED,qBAAqB;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,OAAO,GAAG,gBAAgB,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACzC,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,EAAE,OAAO,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IACrG,CAAC;IAED,wBAAwB;IACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,kBAAkB,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC3C,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC7F,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IACpE,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,KAAK,GAAG;QACZ,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,EAAE,EAAE;QACnC,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,EAAE;QACnC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,GAAG,EAAE;KACrC,CAAC;IAEF,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,KAAK,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,KAAK,IAAI,YAAY,CAAC,CAAC;QACrD,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5C,MAAM,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,eAAe,SAAS,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;AAC7D,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type Database from "better-sqlite3";
|
|
2
|
+
declare const FIXTURES_DIR: string;
|
|
3
|
+
export interface BenchHarness {
|
|
4
|
+
db: Database.Database;
|
|
5
|
+
productDir: string;
|
|
6
|
+
cleanup: () => Promise<void>;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Create a fresh benchmark environment:
|
|
10
|
+
* 1. Copy corpus to a temp directory
|
|
11
|
+
* 2. Open a fresh SQLite DB
|
|
12
|
+
* 3. Run fullIndex to populate it
|
|
13
|
+
*/
|
|
14
|
+
export declare function setup(corpus?: "corpus-small", options?: {
|
|
15
|
+
skipIndex?: boolean;
|
|
16
|
+
}): Promise<BenchHarness>;
|
|
17
|
+
/**
|
|
18
|
+
* Warm up the embedding pipeline (first call loads the 23MB model).
|
|
19
|
+
* Call once before timed benchmarks.
|
|
20
|
+
*/
|
|
21
|
+
export declare function warmupEmbeddings(): Promise<void>;
|
|
22
|
+
export { FIXTURES_DIR };
|
|
23
|
+
//# sourceMappingURL=harness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"harness.d.ts","sourceRoot":"","sources":["../../../benchmarks/lib/harness.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAE3C,QAAA,MAAM,YAAY,QAA8C,CAAC;AAEjE,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,QAAQ,CAAC,QAAQ,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC9B;AAED;;;;;GAKG;AACH,wBAAsB,KAAK,CACzB,MAAM,GAAE,cAA+B,EACvC,OAAO,GAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAO,GACpC,OAAO,CAAC,YAAY,CAAC,CAyBvB;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAGtD;AAED,OAAO,EAAE,YAAY,EAAE,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { mkdtemp, cp, rm } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { openDB, closeDB } from "../../src/db.js";
|
|
5
|
+
import { fullIndex } from "../../src/indexer.js";
|
|
6
|
+
const FIXTURES_DIR = join(import.meta.dirname, "..", "fixtures");
|
|
7
|
+
/**
|
|
8
|
+
* Create a fresh benchmark environment:
|
|
9
|
+
* 1. Copy corpus to a temp directory
|
|
10
|
+
* 2. Open a fresh SQLite DB
|
|
11
|
+
* 3. Run fullIndex to populate it
|
|
12
|
+
*/
|
|
13
|
+
export async function setup(corpus = "corpus-small", options = {}) {
|
|
14
|
+
// Ensure any previous DB singleton is closed
|
|
15
|
+
closeDB();
|
|
16
|
+
// Create temp directory and copy corpus
|
|
17
|
+
const tmpDir = await mkdtemp(join(tmpdir(), "pcl-bench-"));
|
|
18
|
+
const productDir = join(tmpDir, "product");
|
|
19
|
+
await cp(join(FIXTURES_DIR, corpus), productDir, { recursive: true });
|
|
20
|
+
// Open fresh DB
|
|
21
|
+
const db = openDB(productDir);
|
|
22
|
+
// Index corpus unless skipped
|
|
23
|
+
if (!options.skipIndex) {
|
|
24
|
+
await fullIndex(db, productDir);
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
db,
|
|
28
|
+
productDir,
|
|
29
|
+
cleanup: async () => {
|
|
30
|
+
closeDB();
|
|
31
|
+
await rm(tmpDir, { recursive: true, force: true });
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Warm up the embedding pipeline (first call loads the 23MB model).
|
|
37
|
+
* Call once before timed benchmarks.
|
|
38
|
+
*/
|
|
39
|
+
export async function warmupEmbeddings() {
|
|
40
|
+
const { embedText } = await import("../../src/embeddings.js");
|
|
41
|
+
await embedText("warmup");
|
|
42
|
+
}
|
|
43
|
+
export { FIXTURES_DIR };
|
|
44
|
+
//# sourceMappingURL=harness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"harness.js","sourceRoot":"","sources":["../../../benchmarks/lib/harness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAGjD,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;AAQjE;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,SAAyB,cAAc,EACvC,UAAmC,EAAE;IAErC,6CAA6C;IAC7C,OAAO,EAAE,CAAC;IAEV,wCAAwC;IACxC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,YAAY,CAAC,CAAC,CAAC;IAC3D,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtE,gBAAgB;IAChB,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9B,8BAA8B;IAC9B,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,SAAS,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,OAAO;QACL,EAAE;QACF,UAAU;QACV,OAAO,EAAE,KAAK,IAAI,EAAE;YAClB,OAAO,EAAE,CAAC;YACV,MAAM,EAAE,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACpC,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAC9D,MAAM,SAAS,CAAC,QAAQ,CAAC,CAAC;AAC5B,CAAC;AAED,OAAO,EAAE,YAAY,EAAE,CAAC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export interface RelevanceJudgment {
|
|
2
|
+
query: string;
|
|
3
|
+
queryType: "exact_term" | "conceptual" | "multi_hop";
|
|
4
|
+
judgments: Array<{
|
|
5
|
+
docId: string;
|
|
6
|
+
relevance: 0 | 1 | 2 | 3;
|
|
7
|
+
}>;
|
|
8
|
+
}
|
|
9
|
+
export interface RelevanceGroundTruth {
|
|
10
|
+
queries: RelevanceJudgment[];
|
|
11
|
+
}
|
|
12
|
+
export interface EvaluationCriterion {
|
|
13
|
+
criterion: string;
|
|
14
|
+
weight: number;
|
|
15
|
+
type: "regex" | "llm_judge";
|
|
16
|
+
pattern?: string;
|
|
17
|
+
prompt?: string;
|
|
18
|
+
}
|
|
19
|
+
export interface CodingTask {
|
|
20
|
+
id: string;
|
|
21
|
+
category: "spec_compliance" | "business_rule" | "persona_alignment" | "architecture" | "journey_correctness";
|
|
22
|
+
description: string;
|
|
23
|
+
requiredContext: string[];
|
|
24
|
+
forbiddenPatterns: string[];
|
|
25
|
+
requiredPatterns: string[];
|
|
26
|
+
evaluationCriteria: EvaluationCriterion[];
|
|
27
|
+
}
|
|
28
|
+
export interface TaskGroundTruth {
|
|
29
|
+
tasks: CodingTask[];
|
|
30
|
+
}
|
|
31
|
+
export interface PerformanceResult {
|
|
32
|
+
name: string;
|
|
33
|
+
mean: number;
|
|
34
|
+
median: number;
|
|
35
|
+
p95: number;
|
|
36
|
+
iterations: number;
|
|
37
|
+
}
|
|
38
|
+
export interface SearchQualityResult {
|
|
39
|
+
mode: "hybrid" | "semantic" | "keyword";
|
|
40
|
+
queryType?: string;
|
|
41
|
+
precisionAt1: number;
|
|
42
|
+
precisionAt3: number;
|
|
43
|
+
precisionAt5: number;
|
|
44
|
+
recallAt5: number;
|
|
45
|
+
mrr: number;
|
|
46
|
+
ndcgAt5: number;
|
|
47
|
+
}
|
|
48
|
+
export interface TokenEfficiencyResult {
|
|
49
|
+
corpusSize: number;
|
|
50
|
+
pclSessionStartTokens: number;
|
|
51
|
+
pasteAllTokens: number;
|
|
52
|
+
savingsPercent: number;
|
|
53
|
+
ratio: number;
|
|
54
|
+
}
|
|
55
|
+
export interface AIQualityResult {
|
|
56
|
+
taskId: string;
|
|
57
|
+
category: string;
|
|
58
|
+
noContextScore: number;
|
|
59
|
+
pasteAllScore: number;
|
|
60
|
+
pclScore: number;
|
|
61
|
+
noContextTokens: number;
|
|
62
|
+
pasteAllTokens: number;
|
|
63
|
+
pclTokens: number;
|
|
64
|
+
}
|
|
65
|
+
export interface AblationResult {
|
|
66
|
+
configuration: string;
|
|
67
|
+
searchQuality: SearchQualityResult;
|
|
68
|
+
}
|
|
69
|
+
export interface BenchmarkReport {
|
|
70
|
+
timestamp: string;
|
|
71
|
+
version: string;
|
|
72
|
+
commit: string;
|
|
73
|
+
performance?: PerformanceResult[];
|
|
74
|
+
searchQuality?: SearchQualityResult[];
|
|
75
|
+
tokenEfficiency?: TokenEfficiencyResult[];
|
|
76
|
+
aiQuality?: AIQualityResult[];
|
|
77
|
+
ablation?: AblationResult[];
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../benchmarks/lib/types.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,YAAY,GAAG,YAAY,GAAG,WAAW,CAAC;IACrD,SAAS,EAAE,KAAK,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;KAC1B,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,EAAE,iBAAiB,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,mBAAmB;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,OAAO,GAAG,WAAW,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,iBAAiB,GAAG,eAAe,GAAG,mBAAmB,GAAG,cAAc,GAAG,qBAAqB,CAAC;IAC7G,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,kBAAkB,EAAE,mBAAmB,EAAE,CAAC;CAC3C;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAID,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,GAAG,UAAU,GAAG,SAAS,CAAC;IACxC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,mBAAmB,CAAC;CACpC;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAClC,aAAa,CAAC,EAAE,mBAAmB,EAAE,CAAC;IACtC,eAAe,CAAC,EAAE,qBAAqB,EAAE,CAAC;IAC1C,SAAS,CAAC,EAAE,eAAe,EAAE,CAAC;IAC9B,QAAQ,CAAC,EAAE,cAAc,EAAE,CAAC;CAC7B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../benchmarks/lib/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-reporter.d.ts","sourceRoot":"","sources":["../../../benchmarks/reporters/markdown-reporter.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown reporter — reads JSON results from benchmarks/results/ and generates REPORT.md
|
|
3
|
+
*/
|
|
4
|
+
import { readFile, writeFile } from "node:fs/promises";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
const RESULTS_DIR = join(import.meta.dirname, "..", "results");
|
|
7
|
+
async function loadJSON(filename) {
|
|
8
|
+
try {
|
|
9
|
+
return JSON.parse(await readFile(join(RESULTS_DIR, filename), "utf8"));
|
|
10
|
+
}
|
|
11
|
+
catch {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
async function main() {
|
|
16
|
+
const lines = [];
|
|
17
|
+
const push = (s) => lines.push(s);
|
|
18
|
+
push("# PCL MCP Benchmark Report");
|
|
19
|
+
push("");
|
|
20
|
+
push(`Generated: ${new Date().toISOString()}`);
|
|
21
|
+
push("");
|
|
22
|
+
// --- Token Efficiency ---
|
|
23
|
+
const tokenData = await loadJSON("token-efficiency.json");
|
|
24
|
+
if (tokenData) {
|
|
25
|
+
push("## Layer 3: Token Efficiency");
|
|
26
|
+
push("");
|
|
27
|
+
push("| Corpus Size | PCL Start | Paste All | Savings | Ratio |");
|
|
28
|
+
push("|-------------|-----------|-----------|---------|-------|");
|
|
29
|
+
for (const r of tokenData.results) {
|
|
30
|
+
push(`| ${r.corpusSize} files | ${r.pclSessionStartTokens} tok | ${r.pasteAllTokens} tok | ${r.savingsPercent.toFixed(1)}% | ${r.ratio.toFixed(1)}x |`);
|
|
31
|
+
}
|
|
32
|
+
push("");
|
|
33
|
+
}
|
|
34
|
+
// --- Search Quality ---
|
|
35
|
+
const searchData = await loadJSON("search-quality.json");
|
|
36
|
+
if (searchData) {
|
|
37
|
+
push("## Layer 2: Search Quality");
|
|
38
|
+
push("");
|
|
39
|
+
push("| Mode | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
40
|
+
push("|------|-----|-----|-----|-----|-----|--------|");
|
|
41
|
+
for (const [mode, m] of Object.entries(searchData.aggregate)) {
|
|
42
|
+
push(`| ${mode} | ${m.precisionAt1.toFixed(3)} | ${m.precisionAt3.toFixed(3)} | ${m.precisionAt5.toFixed(3)} | ${m.recallAt5.toFixed(3)} | ${m.mrr.toFixed(3)} | ${m.ndcgAt5.toFixed(3)} |`);
|
|
43
|
+
}
|
|
44
|
+
push("");
|
|
45
|
+
}
|
|
46
|
+
// --- Ablation ---
|
|
47
|
+
const ablationData = await loadJSON("ablation.json");
|
|
48
|
+
if (ablationData) {
|
|
49
|
+
push("## Layer 5: Ablation Study");
|
|
50
|
+
push("");
|
|
51
|
+
push("| Configuration | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
52
|
+
push("|---------------|-----|-----|-----|-----|-----|--------|");
|
|
53
|
+
for (const r of ablationData.results) {
|
|
54
|
+
const m = r.metrics;
|
|
55
|
+
push(`| ${r.config} | ${m.precisionAt1.toFixed(3)} | ${m.precisionAt3.toFixed(3)} | ${m.precisionAt5.toFixed(3)} | ${m.recallAt5.toFixed(3)} | ${m.mrr.toFixed(3)} | ${m.ndcgAt5.toFixed(3)} |`);
|
|
56
|
+
}
|
|
57
|
+
push("");
|
|
58
|
+
}
|
|
59
|
+
// --- AI Quality ---
|
|
60
|
+
const aiData = await loadJSON("ai-quality.json");
|
|
61
|
+
if (aiData) {
|
|
62
|
+
push("## Layer 4: AI Coding Quality");
|
|
63
|
+
push("");
|
|
64
|
+
push(`Model: \`${aiData.model}\` | Judge: \`${aiData.judgeModel}\``);
|
|
65
|
+
push("");
|
|
66
|
+
push("| Task ID | Category | No Context | Paste All | PCL |");
|
|
67
|
+
push("|---------|----------|------------|-----------|-----|");
|
|
68
|
+
for (const r of aiData.results) {
|
|
69
|
+
push(`| ${r.taskId} | ${r.category} | ${r.noContext.toFixed(1)} | ${r.pasteAll.toFixed(1)} | ${r.pcl.toFixed(1)} |`);
|
|
70
|
+
}
|
|
71
|
+
push(`| **AVERAGE** | | **${aiData.averages.noContext.toFixed(1)}** | **${aiData.averages.pasteAll.toFixed(1)}** | **${aiData.averages.pcl.toFixed(1)}** |`);
|
|
72
|
+
push("");
|
|
73
|
+
}
|
|
74
|
+
const report = lines.join("\n");
|
|
75
|
+
await writeFile(join(RESULTS_DIR, "REPORT.md"), report);
|
|
76
|
+
console.log("Report generated: benchmarks/results/REPORT.md");
|
|
77
|
+
console.log(report);
|
|
78
|
+
}
|
|
79
|
+
main().catch(console.error);
|
|
80
|
+
//# sourceMappingURL=markdown-reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-reporter.js","sourceRoot":"","sources":["../../../benchmarks/reporters/markdown-reporter.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC;AAE/D,KAAK,UAAU,QAAQ,CAAI,QAAgB;IACzC,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,CAAM,CAAC;IAC9E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,CAAC,4BAA4B,CAAC,CAAC;IACnC,IAAI,CAAC,EAAE,CAAC,CAAC;IACT,IAAI,CAAC,cAAc,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAC/C,IAAI,CAAC,EAAE,CAAC,CAAC;IAET,2BAA2B;IAC3B,MAAM,SAAS,GAAG,MAAM,QAAQ,CAQ7B,uBAAuB,CAAC,CAAC;IAE5B,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC,8BAA8B,CAAC,CAAC;QACrC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,2DAA2D,CAAC,CAAC;QAClE,IAAI,CAAC,2DAA2D,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;YAClC,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,YAAY,CAAC,CAAC,qBAAqB,UAAU,CAAC,CAAC,cAAc,UAAU,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC1J,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,yBAAyB;IACzB,MAAM,UAAU,GAAG,MAAM,QAAQ,CAS9B,qBAAqB,CAAC,CAAC;IAE1B,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACnC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACxD,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACxD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7D,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/L,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,mBAAmB;IACnB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAYhC,eAAe,CAAC,CAAC;IAEpB,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACnC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,0DAA0D,CAAC,CAAC;QACjE,IAAI,CAAC,0DAA0D,CAAC,CAAC;QACjE,KAAK,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;YACrC,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC;YACpB,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnM,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,qBAAqB;IACrB,MAAM,MAAM,GAAG,MAAM,QAAQ,CAW1B,iBAAiB,CAAC,CAAC;IAEtB,IAAI,MAAM,EAAE,CAAC;QACX,IAAI,CAAC,+BAA+B,CAAC,CAAC;QACtC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,YAAY,MAAM,CAAC,KAAK,iBAAiB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;QACrE,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,uDAAuD,CAAC,CAAC;QAC9D,IAAI,CAAC,uDAAuD,CAAC,CAAC;QAC9D,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACvH,CAAC;QACD,IAAI,CAAC,uBAAuB,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC7J,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,MAAM,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAC9D,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;AACtB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ablation.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ablation.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { setup } from "../lib/harness.js";
|
|
4
|
+
import { search } from "../../src/search.js";
|
|
5
|
+
import { computeMetrics, averageMetrics } from "../evaluators/ir-metrics.js";
|
|
6
|
+
async function main() {
|
|
7
|
+
console.log("=== Layer 5: Ablation Study ===\n");
|
|
8
|
+
const harness = await setup("corpus-small");
|
|
9
|
+
try {
|
|
10
|
+
const gtPath = join(import.meta.dirname, "..", "fixtures", "ground-truth", "relevance-judgments.json");
|
|
11
|
+
const gt = JSON.parse(await readFile(gtPath, "utf8"));
|
|
12
|
+
const configs = [
|
|
13
|
+
{ name: "Full PCL (hybrid)", mode: "hybrid" },
|
|
14
|
+
{ name: "Keyword only (no embeddings)", mode: "keyword" },
|
|
15
|
+
{ name: "Semantic only (no BM25)", mode: "semantic" },
|
|
16
|
+
];
|
|
17
|
+
console.log("Ablation Results:");
|
|
18
|
+
console.log("\u2500".repeat(85));
|
|
19
|
+
console.log("| Configuration | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
20
|
+
console.log("|------------------------------|-------|-------|-------|-------|-------|--------|");
|
|
21
|
+
const jsonResults = [];
|
|
22
|
+
for (const config of configs) {
|
|
23
|
+
const metrics = [];
|
|
24
|
+
for (const q of gt.queries) {
|
|
25
|
+
const results = await search(harness.db, q.query, {
|
|
26
|
+
mode: config.mode,
|
|
27
|
+
topK: 10,
|
|
28
|
+
});
|
|
29
|
+
const retrieved = results.map((r) => r.id);
|
|
30
|
+
const relevanceScores = new Map(q.judgments.map((j) => [j.docId, j.relevance]));
|
|
31
|
+
metrics.push(computeMetrics(retrieved, relevanceScores));
|
|
32
|
+
}
|
|
33
|
+
const avg = averageMetrics(metrics);
|
|
34
|
+
jsonResults.push({ config: config.name, metrics: avg });
|
|
35
|
+
console.log(`| ${config.name.padEnd(28)} | ${avg.precisionAt1.toFixed(3)} | ${avg.precisionAt3.toFixed(3)} | ${avg.precisionAt5.toFixed(3)} | ${avg.recallAt5.toFixed(3)} | ${avg.mrr.toFixed(3)} | ${avg.ndcgAt5.toFixed(3)} |`);
|
|
36
|
+
}
|
|
37
|
+
// Save results
|
|
38
|
+
await mkdir(join(import.meta.dirname, "..", "results"), {
|
|
39
|
+
recursive: true,
|
|
40
|
+
});
|
|
41
|
+
await writeFile(join(import.meta.dirname, "..", "results", "ablation.json"), JSON.stringify({ timestamp: new Date().toISOString(), results: jsonResults }, null, 2));
|
|
42
|
+
console.log("\nResults saved to benchmarks/results/ablation.json");
|
|
43
|
+
}
|
|
44
|
+
finally {
|
|
45
|
+
await harness.cleanup();
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
main().catch(console.error);
|
|
49
|
+
//# sourceMappingURL=bench-ablation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ablation.js","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ablation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAmB,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAQ7E,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;IAEjD,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CACjB,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,IAAI,EACJ,UAAU,EACV,cAAc,EACd,0BAA0B,CAC3B,CAAC;QACF,MAAM,EAAE,GAAyB,IAAI,CAAC,KAAK,CACzC,MAAM,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,CAC/B,CAAC;QAEF,MAAM,OAAO,GAAqB;YAChC,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC7C,EAAE,IAAI,EAAE,8BAA8B,EAAE,IAAI,EAAE,SAAS,EAAE;YACzD,EAAE,IAAI,EAAE,yBAAyB,EAAE,IAAI,EAAE,UAAU,EAAE;SACtD,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACjC,OAAO,CAAC,GAAG,CACT,mFAAmF,CACpF,CAAC;QACF,OAAO,CAAC,GAAG,CACT,mFAAmF,CACpF,CAAC;QAEF,MAAM,WAAW,GAGZ,EAAE,CAAC;QAER,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAwC,EAAE,CAAC;YAExD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE;oBAChD,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,IAAI,EAAE,EAAE;iBACT,CAAC,CAAC;gBACH,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC3C,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAC/C,CAAC;gBACF,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC,CAAC;YAC3D,CAAC;YAED,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;YACpC,WAAW,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;YAExD,OAAO,CAAC,GAAG,CACT,KAAK,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CACtN,CAAC;QACJ,CAAC;QAED,eAAe;QACf,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,EAAE;YACtD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,SAAS,CACb,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,CAAC,EAC3D,IAAI,CAAC,SAAS,CACZ,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,WAAW,EAAE,EAC7D,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACrE,CAAC;YAAS,CAAC;QACT,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ai-quality.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ai-quality.ts"],"names":[],"mappings":""}
|