@ddse/acm-aicoder 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/capability-map.d.ts +4 -0
- package/dist/src/capability-map.d.ts.map +1 -0
- package/dist/src/capability-map.js +289 -0
- package/dist/src/capability-map.js.map +1 -0
- package/dist/src/registries.d.ts +3 -0
- package/dist/src/registries.d.ts.map +1 -1
- package/dist/src/registries.js +11 -0
- package/dist/src/registries.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +21 -7
- package/.aicoder/index.json +0 -304
- package/AICODER_IMPLEMENTATION_PLAN_PHASE2.md +0 -284
- package/bin/interactive.tsx +0 -232
- package/docs/AICODER.png +0 -0
- package/docs/INTERACTIVE_CLI_GUIDE.md +0 -201
- package/docs/TUI_MOCKUP.md +0 -180
- package/src/config/providers.ts +0 -174
- package/src/config/session.ts +0 -143
- package/src/context/bm25.ts +0 -173
- package/src/context/code-search.ts +0 -188
- package/src/context/context-pack.ts +0 -133
- package/src/context/dependency-mapper.ts +0 -72
- package/src/context/index.ts +0 -8
- package/src/context/symbol-extractor.ts +0 -149
- package/src/context/test-mapper.ts +0 -77
- package/src/context/types.ts +0 -69
- package/src/context/workspace-indexer.ts +0 -249
- package/src/index.ts +0 -5
- package/src/registries.ts +0 -118
- package/src/runtime/budget-manager.ts +0 -118
- package/src/runtime/interactive-runtime.ts +0 -423
- package/src/tasks-v2/analysis-tasks.ts +0 -311
- package/src/tasks-v2/developer-tasks.ts +0 -437
- package/src/tasks-v2/index.ts +0 -3
- package/src/tools-v2/edit-tools.ts +0 -153
- package/src/tools-v2/index.ts +0 -6
- package/src/tools-v2/read-tools.ts +0 -286
- package/src/tools-v2/search-tools.ts +0 -175
- package/src/tools-v2/test-tools.ts +0 -147
- package/src/tools-v2/workspace-context.ts +0 -428
- package/src/ui/App.tsx +0 -392
- package/src/ui/components/ChatPane.tsx +0 -84
- package/src/ui/components/EventsPane.tsx +0 -81
- package/src/ui/components/GoalsTasksPane.tsx +0 -149
- package/src/ui/store.ts +0 -362
- package/tests/integration.test.ts +0 -537
- package/tsconfig.json +0 -22
package/src/config/providers.ts
DELETED
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
// Provider metadata for budget governance
|
|
2
|
-
// Token costs and context limits per model/provider
|
|
3
|
-
|
|
4
|
-
export interface ProviderMetadata {
|
|
5
|
-
provider: string;
|
|
6
|
-
model: string;
|
|
7
|
-
|
|
8
|
-
// Cost per 1M tokens (USD)
|
|
9
|
-
inputCostPer1M?: number;
|
|
10
|
-
outputCostPer1M?: number;
|
|
11
|
-
|
|
12
|
-
// Context window
|
|
13
|
-
maxContextTokens?: number;
|
|
14
|
-
|
|
15
|
-
// Concurrency limits
|
|
16
|
-
maxConcurrentRequests?: number;
|
|
17
|
-
|
|
18
|
-
// Recommended settings
|
|
19
|
-
recommendedTemperature?: number;
|
|
20
|
-
supportsStreaming: boolean;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Provider metadata registry
|
|
24
|
-
// Sources: OpenAI pricing, Anthropic pricing, Azure OpenAI, local models
|
|
25
|
-
export const PROVIDER_METADATA: Record<string, ProviderMetadata> = {
|
|
26
|
-
// OpenAI models
|
|
27
|
-
'gpt-4o': {
|
|
28
|
-
provider: 'openai',
|
|
29
|
-
model: 'gpt-4o',
|
|
30
|
-
inputCostPer1M: 2.50,
|
|
31
|
-
outputCostPer1M: 10.00,
|
|
32
|
-
maxContextTokens: 128000,
|
|
33
|
-
maxConcurrentRequests: 10,
|
|
34
|
-
recommendedTemperature: 0.7,
|
|
35
|
-
supportsStreaming: true,
|
|
36
|
-
},
|
|
37
|
-
'gpt-4o-mini': {
|
|
38
|
-
provider: 'openai',
|
|
39
|
-
model: 'gpt-4o-mini',
|
|
40
|
-
inputCostPer1M: 0.15,
|
|
41
|
-
outputCostPer1M: 0.60,
|
|
42
|
-
maxContextTokens: 128000,
|
|
43
|
-
maxConcurrentRequests: 10,
|
|
44
|
-
recommendedTemperature: 0.7,
|
|
45
|
-
supportsStreaming: true,
|
|
46
|
-
},
|
|
47
|
-
'gpt-4-turbo': {
|
|
48
|
-
provider: 'openai',
|
|
49
|
-
model: 'gpt-4-turbo',
|
|
50
|
-
inputCostPer1M: 10.00,
|
|
51
|
-
outputCostPer1M: 30.00,
|
|
52
|
-
maxContextTokens: 128000,
|
|
53
|
-
maxConcurrentRequests: 10,
|
|
54
|
-
recommendedTemperature: 0.7,
|
|
55
|
-
supportsStreaming: true,
|
|
56
|
-
},
|
|
57
|
-
'gpt-3.5-turbo': {
|
|
58
|
-
provider: 'openai',
|
|
59
|
-
model: 'gpt-3.5-turbo',
|
|
60
|
-
inputCostPer1M: 0.50,
|
|
61
|
-
outputCostPer1M: 1.50,
|
|
62
|
-
maxContextTokens: 16385,
|
|
63
|
-
maxConcurrentRequests: 10,
|
|
64
|
-
recommendedTemperature: 0.7,
|
|
65
|
-
supportsStreaming: true,
|
|
66
|
-
},
|
|
67
|
-
|
|
68
|
-
// Anthropic models
|
|
69
|
-
'claude-3-opus-20240229': {
|
|
70
|
-
provider: 'anthropic',
|
|
71
|
-
model: 'claude-3-opus-20240229',
|
|
72
|
-
inputCostPer1M: 15.00,
|
|
73
|
-
outputCostPer1M: 75.00,
|
|
74
|
-
maxContextTokens: 200000,
|
|
75
|
-
maxConcurrentRequests: 5,
|
|
76
|
-
recommendedTemperature: 0.7,
|
|
77
|
-
supportsStreaming: true,
|
|
78
|
-
},
|
|
79
|
-
'claude-3-sonnet-20240229': {
|
|
80
|
-
provider: 'anthropic',
|
|
81
|
-
model: 'claude-3-sonnet-20240229',
|
|
82
|
-
inputCostPer1M: 3.00,
|
|
83
|
-
outputCostPer1M: 15.00,
|
|
84
|
-
maxContextTokens: 200000,
|
|
85
|
-
maxConcurrentRequests: 5,
|
|
86
|
-
recommendedTemperature: 0.7,
|
|
87
|
-
supportsStreaming: true,
|
|
88
|
-
},
|
|
89
|
-
'claude-3-haiku-20240307': {
|
|
90
|
-
provider: 'anthropic',
|
|
91
|
-
model: 'claude-3-haiku-20240307',
|
|
92
|
-
inputCostPer1M: 0.25,
|
|
93
|
-
outputCostPer1M: 1.25,
|
|
94
|
-
maxContextTokens: 200000,
|
|
95
|
-
maxConcurrentRequests: 5,
|
|
96
|
-
recommendedTemperature: 0.7,
|
|
97
|
-
supportsStreaming: true,
|
|
98
|
-
},
|
|
99
|
-
|
|
100
|
-
// Local/self-hosted models (no cost)
|
|
101
|
-
'llama3.1': {
|
|
102
|
-
provider: 'ollama',
|
|
103
|
-
model: 'llama3.1',
|
|
104
|
-
inputCostPer1M: 0,
|
|
105
|
-
outputCostPer1M: 0,
|
|
106
|
-
maxContextTokens: 128000,
|
|
107
|
-
recommendedTemperature: 0.7,
|
|
108
|
-
supportsStreaming: true,
|
|
109
|
-
},
|
|
110
|
-
'qwen2.5:7b': {
|
|
111
|
-
provider: 'ollama',
|
|
112
|
-
model: 'qwen2.5:7b',
|
|
113
|
-
inputCostPer1M: 0,
|
|
114
|
-
outputCostPer1M: 0,
|
|
115
|
-
maxContextTokens: 32768,
|
|
116
|
-
recommendedTemperature: 0.7,
|
|
117
|
-
supportsStreaming: true,
|
|
118
|
-
},
|
|
119
|
-
'deepseek-coder': {
|
|
120
|
-
provider: 'ollama',
|
|
121
|
-
model: 'deepseek-coder',
|
|
122
|
-
inputCostPer1M: 0,
|
|
123
|
-
outputCostPer1M: 0,
|
|
124
|
-
maxContextTokens: 16000,
|
|
125
|
-
recommendedTemperature: 0.7,
|
|
126
|
-
supportsStreaming: true,
|
|
127
|
-
},
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
export function getProviderMetadata(model: string): ProviderMetadata {
|
|
131
|
-
// Try exact match first
|
|
132
|
-
if (PROVIDER_METADATA[model]) {
|
|
133
|
-
return PROVIDER_METADATA[model];
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
// Try prefix match (e.g., "gpt-4o-2024-05-13" matches "gpt-4o")
|
|
137
|
-
for (const [key, metadata] of Object.entries(PROVIDER_METADATA)) {
|
|
138
|
-
if (model.startsWith(key)) {
|
|
139
|
-
return metadata;
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// Default metadata for unknown models (assume local/free)
|
|
144
|
-
return {
|
|
145
|
-
provider: 'unknown',
|
|
146
|
-
model,
|
|
147
|
-
inputCostPer1M: 0,
|
|
148
|
-
outputCostPer1M: 0,
|
|
149
|
-
maxContextTokens: 8192,
|
|
150
|
-
recommendedTemperature: 0.7,
|
|
151
|
-
supportsStreaming: true,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
export function estimateTokenCount(text: string): number {
|
|
156
|
-
// Rough approximation: 1 token ≈ 4 characters for English text
|
|
157
|
-
// More accurate would use tiktoken library, but this is sufficient for budget estimation
|
|
158
|
-
return Math.ceil(text.length / 4);
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
export function estimateCost(
|
|
162
|
-
inputTokens: number,
|
|
163
|
-
outputTokens: number,
|
|
164
|
-
metadata: ProviderMetadata
|
|
165
|
-
): number {
|
|
166
|
-
if (!metadata.inputCostPer1M || !metadata.outputCostPer1M) {
|
|
167
|
-
return 0;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
const inputCost = (inputTokens / 1_000_000) * metadata.inputCostPer1M;
|
|
171
|
-
const outputCost = (outputTokens / 1_000_000) * metadata.outputCostPer1M;
|
|
172
|
-
|
|
173
|
-
return inputCost + outputCost;
|
|
174
|
-
}
|
package/src/config/session.ts
DELETED
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
// Session configuration for AI Coder Phase 2
|
|
2
|
-
// Manages CLI arguments and session metadata using provider/model semantics
|
|
3
|
-
|
|
4
|
-
import { existsSync, statSync } from 'fs';
|
|
5
|
-
import path from 'path';
|
|
6
|
-
import os from 'os';
|
|
7
|
-
|
|
8
|
-
export type Provider = 'ollama' | 'vllm';
|
|
9
|
-
|
|
10
|
-
export interface SessionConfig {
|
|
11
|
-
provider: Provider;
|
|
12
|
-
model: string;
|
|
13
|
-
baseUrl?: string;
|
|
14
|
-
workspace: string;
|
|
15
|
-
temperature: number;
|
|
16
|
-
seed?: number;
|
|
17
|
-
planCount: 1 | 2;
|
|
18
|
-
sessionId: string;
|
|
19
|
-
timestamp: number;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface CLIArgs {
|
|
23
|
-
provider?: string;
|
|
24
|
-
model?: string;
|
|
25
|
-
'base-url'?: string;
|
|
26
|
-
workspace?: string;
|
|
27
|
-
temperature?: string;
|
|
28
|
-
seed?: string;
|
|
29
|
-
plans?: string;
|
|
30
|
-
[key: string]: string | undefined;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export function parseCliArgs(argv: string[]): CLIArgs {
|
|
34
|
-
const args: CLIArgs = {};
|
|
35
|
-
|
|
36
|
-
for (let i = 0; i < argv.length; i++) {
|
|
37
|
-
const arg = argv[i];
|
|
38
|
-
if (arg.startsWith('--')) {
|
|
39
|
-
// Support --key=value form
|
|
40
|
-
if (arg.includes('=')) {
|
|
41
|
-
const pair = arg.slice(2);
|
|
42
|
-
const eqIndex = pair.indexOf('=');
|
|
43
|
-
const key = pair.slice(0, eqIndex);
|
|
44
|
-
const value = pair.slice(eqIndex + 1);
|
|
45
|
-
if (key.length > 0) {
|
|
46
|
-
args[key] = value;
|
|
47
|
-
}
|
|
48
|
-
continue;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Support space-separated --key value form
|
|
52
|
-
const key = arg.slice(2);
|
|
53
|
-
const value = argv[i + 1];
|
|
54
|
-
if (value && !value.startsWith('--')) {
|
|
55
|
-
args[key] = value;
|
|
56
|
-
i++;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
return args;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export function validateAndNormalizeConfig(args: CLIArgs): SessionConfig {
|
|
65
|
-
const provider = (args.provider as Provider) || 'ollama';
|
|
66
|
-
if (!['ollama', 'vllm'].includes(provider)) {
|
|
67
|
-
throw new Error(`Invalid --provider: ${args.provider}. Must be "ollama" or "vllm".`);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const model = args.model || (provider === 'ollama' ? 'llama3.1' : undefined);
|
|
71
|
-
const missing: string[] = [];
|
|
72
|
-
|
|
73
|
-
if (!model) missing.push('--model');
|
|
74
|
-
if (!args.workspace || args.workspace.trim().length === 0) missing.push('--workspace');
|
|
75
|
-
|
|
76
|
-
if (missing.length > 0) {
|
|
77
|
-
throw new Error(
|
|
78
|
-
`Missing required parameters: ${missing.join(', ')}\n\n` +
|
|
79
|
-
`ACM AI Coder requires these parameters to start:\n` +
|
|
80
|
-
` --provider <ollama|vllm> LLM provider (default: ollama)\n` +
|
|
81
|
-
` --model <name> Model identifier (e.g., llama3.1, gpt-4o)\n` +
|
|
82
|
-
` --workspace <path> REQUIRED: absolute or relative path to project root\n\n` +
|
|
83
|
-
`Optional parameters:\n` +
|
|
84
|
-
` --base-url <url> Override provider base URL\n` +
|
|
85
|
-
` --temperature <0-2> LLM temperature (default: 0.7)\n` +
|
|
86
|
-
` --seed <number> Random seed for reproducibility\n` +
|
|
87
|
-
` --plans <1|2> Number of alternative plans (default: 1)\n\n` +
|
|
88
|
-
`Examples:\n` +
|
|
89
|
-
` acm-aicoder --provider vllm --model gpt-4o --workspace /abs/path\n` +
|
|
90
|
-
` acm-aicoder --provider ollama --model llama3.1 --workspace=~/myproject`
|
|
91
|
-
);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
const temperature = args.temperature ? parseFloat(args.temperature) : 0.7;
|
|
95
|
-
if (isNaN(temperature) || temperature < 0 || temperature > 2) {
|
|
96
|
-
throw new Error(`Invalid --temperature: must be between 0 and 2`);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const seed = args.seed ? parseInt(args.seed, 10) : undefined;
|
|
100
|
-
if (seed !== undefined && Number.isNaN(seed)) {
|
|
101
|
-
throw new Error(`Invalid --seed: must be a number`);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const planCount = args.plans === '2' ? 2 : 1;
|
|
105
|
-
|
|
106
|
-
// Enforce explicit workspace and normalize
|
|
107
|
-
const baseCwd = process.env.INIT_CWD || process.env.PWD || process.cwd();
|
|
108
|
-
let workspaceInput = args.workspace!;
|
|
109
|
-
|
|
110
|
-
// Expand ~ to user home
|
|
111
|
-
if (workspaceInput.startsWith('~/')) {
|
|
112
|
-
workspaceInput = path.join(os.homedir(), workspaceInput.slice(2));
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
const resolvedWorkspace = path.isAbsolute(workspaceInput)
|
|
116
|
-
? workspaceInput
|
|
117
|
-
: path.resolve(baseCwd, workspaceInput);
|
|
118
|
-
|
|
119
|
-
if (!existsSync(resolvedWorkspace)) {
|
|
120
|
-
throw new Error(
|
|
121
|
-
`Workspace path "${resolvedWorkspace}" does not exist. Please provide a valid --workspace.`
|
|
122
|
-
);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const stats = statSync(resolvedWorkspace);
|
|
126
|
-
if (!stats.isDirectory()) {
|
|
127
|
-
throw new Error(
|
|
128
|
-
`Workspace path "${resolvedWorkspace}" is not a directory. Please provide a valid --workspace.`
|
|
129
|
-
);
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
return {
|
|
133
|
-
provider,
|
|
134
|
-
model: model!,
|
|
135
|
-
baseUrl: args['base-url'],
|
|
136
|
-
workspace: resolvedWorkspace,
|
|
137
|
-
temperature,
|
|
138
|
-
seed,
|
|
139
|
-
planCount,
|
|
140
|
-
sessionId: `session-${Date.now()}`,
|
|
141
|
-
timestamp: Date.now(),
|
|
142
|
-
};
|
|
143
|
-
}
|
package/src/context/bm25.ts
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
// Simple BM25 search implementation for ACM examples
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Document interface for BM25 search
|
|
5
|
-
*/
|
|
6
|
-
export interface Document {
|
|
7
|
-
id: string;
|
|
8
|
-
title?: string;
|
|
9
|
-
content: string;
|
|
10
|
-
[key: string]: any;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Search result with score
|
|
15
|
-
*/
|
|
16
|
-
export interface SearchResult {
|
|
17
|
-
document: Document;
|
|
18
|
-
score: number;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* BM25 parameters
|
|
23
|
-
*/
|
|
24
|
-
export interface BM25Params {
|
|
25
|
-
k1?: number; // Term frequency saturation (default: 1.5)
|
|
26
|
-
b?: number; // Length normalization (default: 0.75)
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Simple tokenizer
|
|
31
|
-
*/
|
|
32
|
-
function tokenize(text?: string | null): string[] {
|
|
33
|
-
if (!text) return [];
|
|
34
|
-
|
|
35
|
-
return text
|
|
36
|
-
.toLowerCase()
|
|
37
|
-
.replace(/[^\w\s]/g, ' ')
|
|
38
|
-
.split(/\s+/)
|
|
39
|
-
.filter((token) => token.length > 0);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* BM25 Search Engine
|
|
44
|
-
*
|
|
45
|
-
* Implements the BM25 ranking function for full-text search.
|
|
46
|
-
* BM25 is a probabilistic ranking function used by search engines
|
|
47
|
-
* to estimate the relevance of documents to a query.
|
|
48
|
-
*/
|
|
49
|
-
export class BM25Search {
|
|
50
|
-
private documents: Document[] = [];
|
|
51
|
-
private documentTokens: Map<string, string[]> = new Map();
|
|
52
|
-
private documentFrequency: Map<string, number> = new Map();
|
|
53
|
-
private averageDocumentLength: number = 0;
|
|
54
|
-
private k1: number;
|
|
55
|
-
private b: number;
|
|
56
|
-
|
|
57
|
-
constructor(params: BM25Params = {}) {
|
|
58
|
-
this.k1 = params.k1 ?? 1.5;
|
|
59
|
-
this.b = params.b ?? 0.75;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Index documents for search
|
|
64
|
-
*/
|
|
65
|
-
index(documents: Document[]): void {
|
|
66
|
-
this.documents = documents;
|
|
67
|
-
this.documentTokens.clear();
|
|
68
|
-
this.documentFrequency.clear();
|
|
69
|
-
|
|
70
|
-
// Tokenize documents
|
|
71
|
-
let totalLength = 0;
|
|
72
|
-
for (const doc of documents) {
|
|
73
|
-
const text = this.extractText(doc);
|
|
74
|
-
const tokens = tokenize(text);
|
|
75
|
-
this.documentTokens.set(doc.id, tokens);
|
|
76
|
-
totalLength += tokens.length;
|
|
77
|
-
|
|
78
|
-
// Update document frequency
|
|
79
|
-
const uniqueTokens = new Set(tokens);
|
|
80
|
-
for (const token of uniqueTokens) {
|
|
81
|
-
this.documentFrequency.set(token, (this.documentFrequency.get(token) || 0) + 1);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
this.averageDocumentLength = totalLength / documents.length;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* Extract searchable text from document
|
|
90
|
-
*/
|
|
91
|
-
private extractText(doc: Document): string {
|
|
92
|
-
const parts: string[] = [];
|
|
93
|
-
if (doc.title) parts.push(doc.title);
|
|
94
|
-
if (doc.content) parts.push(doc.content);
|
|
95
|
-
return parts.join(' ');
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Calculate IDF (Inverse Document Frequency)
|
|
100
|
-
*/
|
|
101
|
-
private idf(term: string): number {
|
|
102
|
-
const df = this.documentFrequency.get(term) || 0;
|
|
103
|
-
if (df === 0) return 0;
|
|
104
|
-
|
|
105
|
-
const n = this.documents.length;
|
|
106
|
-
return Math.log((n - df + 0.5) / (df + 0.5) + 1);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* Calculate BM25 score for a document
|
|
111
|
-
*/
|
|
112
|
-
private score(docId: string, queryTokens: string[]): number {
|
|
113
|
-
const tokens = this.documentTokens.get(docId);
|
|
114
|
-
if (!tokens) return 0;
|
|
115
|
-
|
|
116
|
-
const docLength = tokens.length;
|
|
117
|
-
let score = 0;
|
|
118
|
-
|
|
119
|
-
for (const queryToken of queryTokens) {
|
|
120
|
-
const termFreq = tokens.filter((t) => t === queryToken).length;
|
|
121
|
-
if (termFreq === 0) continue;
|
|
122
|
-
|
|
123
|
-
const idf = this.idf(queryToken);
|
|
124
|
-
const numerator = termFreq * (this.k1 + 1);
|
|
125
|
-
const denominator = termFreq + this.k1 * (1 - this.b + this.b * (docLength / this.averageDocumentLength));
|
|
126
|
-
|
|
127
|
-
score += idf * (numerator / denominator);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
return score;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Search for documents matching the query
|
|
135
|
-
*/
|
|
136
|
-
search(query: string, limit: number = 10): SearchResult[] {
|
|
137
|
-
if (this.documents.length === 0) {
|
|
138
|
-
return [];
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
const queryTokens = tokenize(query);
|
|
142
|
-
if (queryTokens.length === 0) {
|
|
143
|
-
return [];
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Score all documents
|
|
147
|
-
const results: SearchResult[] = [];
|
|
148
|
-
for (const doc of this.documents) {
|
|
149
|
-
const score = this.score(doc.id, queryTokens);
|
|
150
|
-
if (score > 0) {
|
|
151
|
-
results.push({ document: doc, score });
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Sort by score (descending) and limit
|
|
156
|
-
results.sort((a, b) => b.score - a.score);
|
|
157
|
-
return results.slice(0, limit);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Get all indexed documents
|
|
162
|
-
*/
|
|
163
|
-
getDocuments(): Document[] {
|
|
164
|
-
return this.documents;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
/**
|
|
168
|
-
* Get document count
|
|
169
|
-
*/
|
|
170
|
-
getDocumentCount(): number {
|
|
171
|
-
return this.documents.length;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
// Code Search - BM25 based search for code files
|
|
2
|
-
import * as fs from 'fs/promises';
|
|
3
|
-
import * as path from 'path';
|
|
4
|
-
import { BM25Search, type Document } from './bm25.js';
|
|
5
|
-
import type { WorkspaceIndex, FileMetadata, SearchResult } from './types.js';
|
|
6
|
-
|
|
7
|
-
export interface SearchOptions {
|
|
8
|
-
k?: number; // Number of results
|
|
9
|
-
preferTypes?: string[]; // Prefer certain file types
|
|
10
|
-
includeContext?: boolean; // Include surrounding lines
|
|
11
|
-
contextLines?: number; // Number of context lines
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export class CodeSearch {
|
|
15
|
-
private bm25: BM25Search;
|
|
16
|
-
private rootPath: string;
|
|
17
|
-
private documents: Map<string, Document> = new Map();
|
|
18
|
-
|
|
19
|
-
constructor(rootPath: string) {
|
|
20
|
-
this.rootPath = rootPath;
|
|
21
|
-
this.bm25 = new BM25Search({ k1: 1.5, b: 0.75 });
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* Index files for search
|
|
26
|
-
*/
|
|
27
|
-
async indexFiles(index: WorkspaceIndex): Promise<void> {
|
|
28
|
-
const documents: Document[] = [];
|
|
29
|
-
this.documents.clear();
|
|
30
|
-
|
|
31
|
-
// Filter to text files only
|
|
32
|
-
const textFiles = index.files.filter(f =>
|
|
33
|
-
!f.isBinary &&
|
|
34
|
-
f.size < 500_000 && // Only index files < 500KB
|
|
35
|
-
(f.language === 'typescript' ||
|
|
36
|
-
f.language === 'javascript' ||
|
|
37
|
-
f.language === 'markdown' ||
|
|
38
|
-
f.language === 'json')
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
// Read and index each file
|
|
42
|
-
for (const file of textFiles) {
|
|
43
|
-
try {
|
|
44
|
-
const fullPath = path.join(this.rootPath, file.path);
|
|
45
|
-
const content = await fs.readFile(fullPath, 'utf-8');
|
|
46
|
-
|
|
47
|
-
const doc: Document = {
|
|
48
|
-
id: file.path,
|
|
49
|
-
title: path.basename(file.path),
|
|
50
|
-
content,
|
|
51
|
-
language: file.language,
|
|
52
|
-
size: file.size,
|
|
53
|
-
path: file.path,
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
documents.push(doc);
|
|
57
|
-
this.documents.set(file.path, doc);
|
|
58
|
-
} catch {
|
|
59
|
-
// Skip files we can't read
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
// Build BM25 index
|
|
64
|
-
this.bm25.index(documents);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Search for code
|
|
69
|
-
*/
|
|
70
|
-
async search(query: string, options: SearchOptions = {}): Promise<SearchResult[]> {
|
|
71
|
-
const k = options.k ?? 10;
|
|
72
|
-
const preferTypes = new Set(options.preferTypes || []);
|
|
73
|
-
|
|
74
|
-
// Search using BM25
|
|
75
|
-
const bm25Results = this.bm25.search(query, k * 2); // Get more initially
|
|
76
|
-
|
|
77
|
-
// Apply preferences and convert to SearchResult
|
|
78
|
-
const results: SearchResult[] = [];
|
|
79
|
-
|
|
80
|
-
for (const result of bm25Results) {
|
|
81
|
-
const doc = result.document;
|
|
82
|
-
let score = result.score;
|
|
83
|
-
|
|
84
|
-
// Boost score for preferred file types
|
|
85
|
-
if (preferTypes.size > 0) {
|
|
86
|
-
const ext = path.extname(doc.path);
|
|
87
|
-
const lang = doc.language as string;
|
|
88
|
-
if (preferTypes.has(ext) || preferTypes.has(lang)) {
|
|
89
|
-
score *= 1.5;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Find best matching line
|
|
94
|
-
const snippet = this.findBestSnippet(doc.content, query, options);
|
|
95
|
-
|
|
96
|
-
results.push({
|
|
97
|
-
path: doc.path,
|
|
98
|
-
score,
|
|
99
|
-
snippet: snippet.text,
|
|
100
|
-
line: snippet.line,
|
|
101
|
-
column: snippet.column,
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// Re-sort by adjusted scores and limit
|
|
106
|
-
return results
|
|
107
|
-
.sort((a, b) => b.score - a.score)
|
|
108
|
-
.slice(0, k);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Find the best snippet matching the query
|
|
113
|
-
*/
|
|
114
|
-
private findBestSnippet(
|
|
115
|
-
content: string,
|
|
116
|
-
query: string,
|
|
117
|
-
options: SearchOptions
|
|
118
|
-
): { text: string; line: number; column: number } {
|
|
119
|
-
const lines = content.split('\n');
|
|
120
|
-
const queryLower = query.toLowerCase();
|
|
121
|
-
const queryTokens = queryLower.split(/\s+/);
|
|
122
|
-
|
|
123
|
-
let bestLine = 0;
|
|
124
|
-
let bestScore = 0;
|
|
125
|
-
|
|
126
|
-
// Find line with most query tokens
|
|
127
|
-
for (let i = 0; i < lines.length; i++) {
|
|
128
|
-
const line = lines[i];
|
|
129
|
-
const lineLower = line.toLowerCase();
|
|
130
|
-
|
|
131
|
-
let score = 0;
|
|
132
|
-
for (const token of queryTokens) {
|
|
133
|
-
if (lineLower.includes(token)) {
|
|
134
|
-
score += 1;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if (score > bestScore) {
|
|
139
|
-
bestScore = score;
|
|
140
|
-
bestLine = i;
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// Extract snippet with context
|
|
145
|
-
const contextLines = options.includeContext ? (options.contextLines ?? 2) : 0;
|
|
146
|
-
const startLine = Math.max(0, bestLine - contextLines);
|
|
147
|
-
const endLine = Math.min(lines.length - 1, bestLine + contextLines);
|
|
148
|
-
|
|
149
|
-
const snippetLines = lines.slice(startLine, endLine + 1);
|
|
150
|
-
const snippet = snippetLines
|
|
151
|
-
.map((line, idx) => {
|
|
152
|
-
const lineNum = startLine + idx + 1;
|
|
153
|
-
const marker = lineNum === bestLine + 1 ? '>' : ' ';
|
|
154
|
-
return `${marker} ${lineNum.toString().padStart(4, ' ')} ${line}`;
|
|
155
|
-
})
|
|
156
|
-
.join('\n');
|
|
157
|
-
|
|
158
|
-
// Find column position (first occurrence of any query token)
|
|
159
|
-
let column = 0;
|
|
160
|
-
const bestLineLower = lines[bestLine].toLowerCase();
|
|
161
|
-
for (const token of queryTokens) {
|
|
162
|
-
const pos = bestLineLower.indexOf(token);
|
|
163
|
-
if (pos >= 0) {
|
|
164
|
-
column = pos;
|
|
165
|
-
break;
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
return {
|
|
170
|
-
text: snippet,
|
|
171
|
-
line: bestLine + 1,
|
|
172
|
-
column,
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
/**
|
|
177
|
-
* Search for symbol by name (exact or partial match)
|
|
178
|
-
*/
|
|
179
|
-
async searchSymbol(symbolName: string): Promise<SearchResult[]> {
|
|
180
|
-
// Search for exact symbol name with high weight
|
|
181
|
-
return this.search(`${symbolName} function class interface`, {
|
|
182
|
-
k: 5,
|
|
183
|
-
preferTypes: ['.ts', '.tsx', '.js', '.jsx'],
|
|
184
|
-
includeContext: true,
|
|
185
|
-
contextLines: 3,
|
|
186
|
-
});
|
|
187
|
-
}
|
|
188
|
-
}
|