@getmikk/ai-context 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@getmikk/ai-context",
3
- "version": "1.8.0",
3
+ "version": "2.0.0",
4
4
  "license": "Apache-2.0",
5
5
  "repository": {
6
6
  "type": "git",
@@ -21,8 +21,8 @@
21
21
  "dev": "tsc --watch"
22
22
  },
23
23
  "dependencies": {
24
- "@getmikk/core": "^1.8.0",
25
- "@getmikk/intent-engine": "^1.8.0"
24
+ "@getmikk/core": "^2.0.0",
25
+ "@getmikk/intent-engine": "^2.0.0"
26
26
  },
27
27
  "devDependencies": {
28
28
  "typescript": "^5.7.0",
@@ -1,15 +1,11 @@
1
1
  import type { MikkContract, MikkLock, MikkLockFunction } from '@getmikk/core'
2
2
  import * as fs from 'node:fs'
3
3
  import * as path from 'node:path'
4
+ import { countTokens, estimateFileTokens } from './token-counter.js'
4
5
 
5
6
  /** Default token budget for claude.md — generous but still bounded */
6
7
  const DEFAULT_TOKEN_BUDGET = 12000
7
8
 
8
- /** Rough token estimation: ~4 chars per token */
9
- function estimateTokens(text: string): number {
10
- return Math.ceil(text.length / 4)
11
- }
12
-
13
9
  /** Metadata from package.json that enriches the AI context */
14
10
  export interface ProjectMeta {
15
11
  description?: string
@@ -49,20 +45,20 @@ export class ClaudeMdGenerator {
49
45
  // --- Tier 1: Summary (always included) ----------------------
50
46
  const summary = this.generateSummary()
51
47
  sections.push(summary)
52
- usedTokens += estimateTokens(summary)
48
+ usedTokens += countTokens(summary)
53
49
 
54
50
  // --- Tech stack & conventions (always included if detectable) ---
55
51
  const techSection = this.generateTechStackSection()
56
52
  if (techSection) {
57
53
  sections.push(techSection)
58
- usedTokens += estimateTokens(techSection)
54
+ usedTokens += countTokens(techSection)
59
55
  }
60
56
 
61
57
  // --- Build / test / run commands -----------------------------
62
58
  const commandsSection = this.generateCommandsSection()
63
59
  if (commandsSection) {
64
60
  sections.push(commandsSection)
65
- usedTokens += estimateTokens(commandsSection)
61
+ usedTokens += countTokens(commandsSection)
66
62
  }
67
63
 
68
64
  // --- Tier 2: Module details (if budget allows) --------------
@@ -76,7 +72,7 @@ export class ClaudeMdGenerator {
76
72
 
77
73
  for (const module of modules) {
78
74
  const moduleSection = this.generateModuleSection(module.id)
79
- const tokens = estimateTokens(moduleSection)
75
+ const tokens = countTokens(moduleSection)
80
76
  if (usedTokens + tokens > this.tokenBudget) {
81
77
  sections.push('\n <!-- Full details truncated due to context budget -->\n')
82
78
  break
@@ -90,7 +86,7 @@ export class ClaudeMdGenerator {
90
86
  // --- Context files: schemas, data models, config ---------
91
87
  const contextSection = this.generateContextFilesSection()
92
88
  if (contextSection) {
93
- const ctxTokens = estimateTokens(contextSection)
89
+ const ctxTokens = countTokens(contextSection)
94
90
  if (usedTokens + ctxTokens <= this.tokenBudget) {
95
91
  sections.push(contextSection)
96
92
  usedTokens += ctxTokens
@@ -100,7 +96,7 @@ export class ClaudeMdGenerator {
100
96
  // --- File import graph per module ----------------------------
101
97
  const importSection = this.generateImportGraphSection()
102
98
  if (importSection) {
103
- const impTokens = estimateTokens(importSection)
99
+ const impTokens = countTokens(importSection)
104
100
  if (usedTokens + impTokens <= this.tokenBudget) {
105
101
  sections.push(importSection)
106
102
  usedTokens += impTokens
@@ -110,7 +106,7 @@ export class ClaudeMdGenerator {
110
106
  // --- HTTP Routes (Express + Next.js) -------------------------
111
107
  const routesSection = this.generateRoutesSection()
112
108
  if (routesSection) {
113
- const routeTokens = estimateTokens(routesSection)
109
+ const routeTokens = countTokens(routesSection)
114
110
  if (usedTokens + routeTokens <= this.tokenBudget) {
115
111
  sections.push(routesSection)
116
112
  usedTokens += routeTokens
@@ -119,14 +115,14 @@ export class ClaudeMdGenerator {
119
115
 
120
116
  // --- Tier 3: Constraints & decisions ------------------------
121
117
  const constraintsSection = this.generateConstraintsSection()
122
- const constraintTokens = estimateTokens(constraintsSection)
118
+ const constraintTokens = countTokens(constraintsSection)
123
119
  if (usedTokens + constraintTokens <= this.tokenBudget) {
124
120
  sections.push(constraintsSection)
125
121
  usedTokens += constraintTokens
126
122
  }
127
123
 
128
124
  const decisionsSection = this.generateDecisionsSection()
129
- const decisionTokens = estimateTokens(decisionsSection)
125
+ const decisionTokens = countTokens(decisionsSection)
130
126
  if (usedTokens + decisionTokens <= this.tokenBudget) {
131
127
  sections.push(decisionsSection)
132
128
  usedTokens += decisionTokens
@@ -225,7 +221,10 @@ export class ClaudeMdGenerator {
225
221
  }
226
222
 
227
223
  // Key functions: top 5 by calledBy count (most depended upon)
224
+ // Exclude functions already in entry points to avoid duplicates
225
+ const entryPointIds = new Set(entryPoints.map(fn => fn.id))
228
226
  const keyFunctions = [...moduleFunctions]
227
+ .filter(fn => !entryPointIds.has(fn.id)) // Exclude duplicates
229
228
  .sort((a, b) => b.calledBy.length - a.calledBy.length)
230
229
  .filter(fn => fn.calledBy.length > 0)
231
230
  .slice(0, 5)
@@ -107,31 +107,92 @@ const STOP_WORDS = new Set([
107
107
  'want', 'like', 'just', 'also', 'some', 'all', 'any', 'my', 'your',
108
108
  ])
109
109
 
110
- function extractKeywords(task: string): string[] {
111
- return task
110
+ const SHORT_TECH_WORDS = new Set([
111
+ 'ai', 'ml', 'ui', 'ux', 'ts', 'js', 'db', 'io', 'id', 'ip',
112
+ 'ci', 'cd', 'qa', 'api', 'mcp', 'jwt', 'sql',
113
+ ])
114
+
115
+ function normalizeKeyword(value: string): string {
116
+ return value.toLowerCase().trim().replace(/[^a-z0-9_-]/g, '')
117
+ }
118
+
119
+ function extractKeywords(task: string, requiredKeywords: string[] = []): string[] {
120
+ const out: string[] = []
121
+ const seen = new Set<string>()
122
+
123
+ for (const match of task.matchAll(/"([^"]+)"|'([^']+)'/g)) {
124
+ const phrase = (match[1] ?? match[2] ?? '').toLowerCase().trim()
125
+ if (!phrase || seen.has(phrase)) continue
126
+ seen.add(phrase)
127
+ out.push(phrase)
128
+ }
129
+
130
+ const words = task
112
131
  .toLowerCase()
113
132
  .replace(/[^a-z0-9\s_-]/g, ' ')
114
133
  .split(/\s+/)
115
- .filter(w => w.length > 2 && !STOP_WORDS.has(w))
134
+ .map(normalizeKeyword)
135
+ .filter(w => {
136
+ if (!w || STOP_WORDS.has(w)) return false
137
+ if (w.length > 2) return true
138
+ return SHORT_TECH_WORDS.has(w)
139
+ })
140
+
141
+ for (const w of words) {
142
+ if (seen.has(w)) continue
143
+ seen.add(w)
144
+ out.push(w)
145
+ }
146
+
147
+ const expandedRequired = requiredKeywords
148
+ .flatMap(item => item.split(/[,\s]+/))
149
+ .map(normalizeKeyword)
150
+ .filter(Boolean)
151
+
152
+ for (const kw of expandedRequired) {
153
+ if (seen.has(kw)) continue
154
+ seen.add(kw)
155
+ out.push(kw)
156
+ }
157
+
158
+ return out
116
159
  }
117
160
 
118
161
  /**
119
162
  * Keyword score for a function: exact match > partial match
120
163
  */
121
- function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
122
- if (keywords.length === 0) return 0
164
+ function keywordScore(
165
+ fn: MikkLockFunction,
166
+ keywords: string[]
167
+ ): { score: number; matchedKeywords: string[] } {
168
+ if (keywords.length === 0) return { score: 0, matchedKeywords: [] }
123
169
  const nameLower = fn.name.toLowerCase()
124
170
  const fileLower = fn.file.toLowerCase()
171
+ const fileNoExt = fileLower.replace(/\.(d\.ts|ts|tsx|js|jsx|mjs|cjs|mts|cts)\b/g, ' ')
172
+ const purposeLower = (fn.purpose ?? '').toLowerCase()
173
+ const tokenSet = new Set<string>([
174
+ ...(nameLower.match(/[a-z0-9]+/g) ?? []),
175
+ ...(fileNoExt.match(/[a-z0-9]+/g) ?? []),
176
+ ...(purposeLower.match(/[a-z0-9]+/g) ?? []),
177
+ ])
125
178
  let score = 0
179
+ const matched: string[] = []
126
180
 
127
181
  for (const kw of keywords) {
128
- if (nameLower === kw) {
182
+ const shortKw = kw.length <= 2
183
+ const exactName = nameLower === kw
184
+ const partial = shortKw
185
+ ? tokenSet.has(kw)
186
+ : (nameLower.includes(kw) || fileLower.includes(kw) || purposeLower.includes(kw))
187
+ if (exactName) {
129
188
  score = Math.max(score, WEIGHT.KEYWORD_EXACT)
130
- } else if (nameLower.includes(kw) || fileLower.includes(kw)) {
189
+ matched.push(kw)
190
+ } else if (partial) {
131
191
  score = Math.max(score, WEIGHT.KEYWORD_PARTIAL)
192
+ matched.push(kw)
132
193
  }
133
194
  }
134
- return score
195
+ return { score, matchedKeywords: matched }
135
196
  }
136
197
 
137
198
  // ---------------------------------------------------------------------------
@@ -145,8 +206,10 @@ function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
145
206
  function resolveSeeds(
146
207
  query: ContextQuery,
147
208
  contract: MikkContract,
148
- lock: MikkLock
209
+ lock: MikkLock,
210
+ keywords: string[]
149
211
  ): string[] {
212
+ const strictMode = query.relevanceMode === 'strict'
150
213
  const seeds = new Set<string>()
151
214
 
152
215
  // 1. Explicit focus files → all functions in those files
@@ -171,16 +234,15 @@ function resolveSeeds(
171
234
 
172
235
  // 3. Keyword match against function names and file paths
173
236
  if (seeds.size === 0) {
174
- const keywords = extractKeywords(query.task)
175
237
  for (const fn of Object.values(lock.functions)) {
176
- if (keywordScore(fn, keywords) >= WEIGHT.KEYWORD_PARTIAL) {
238
+ if (keywordScore(fn, keywords).score >= WEIGHT.KEYWORD_PARTIAL) {
177
239
  seeds.add(fn.id)
178
240
  }
179
241
  }
180
242
  }
181
243
 
182
244
  // 4. Module name match against task
183
- if (seeds.size === 0) {
245
+ if (!strictMode && seeds.size === 0) {
184
246
  const taskLower = query.task.toLowerCase()
185
247
  for (const mod of contract.declared.modules) {
186
248
  if (
@@ -219,11 +281,22 @@ export class ContextBuilder {
219
281
  * 6. Group survivors by module, emit structured context
220
282
  */
221
283
  build(query: ContextQuery): AIContext {
284
+ const relevanceMode = query.relevanceMode ?? 'balanced'
285
+ const strictMode = relevanceMode === 'strict'
222
286
  const tokenBudget = query.tokenBudget ?? DEFAULT_TOKEN_BUDGET
223
287
  const maxHops = query.maxHops ?? 4
288
+ const requiredKeywords = query.requiredKeywords ?? []
289
+ const keywords = extractKeywords(query.task, requiredKeywords)
290
+ const requiredKeywordSet = new Set(
291
+ requiredKeywords
292
+ .flatMap(item => item.split(/[,\s]+/))
293
+ .map(normalizeKeyword)
294
+ .filter(Boolean)
295
+ )
224
296
 
225
297
  // ── Step 1: Resolve seeds ──────────────────────────────────────────
226
- const seeds = resolveSeeds(query, this.contract, this.lock)
298
+ const seeds = resolveSeeds(query, this.contract, this.lock, keywords)
299
+ const seedSet = new Set(seeds)
227
300
 
228
301
  // ── Step 2: BFS proximity scores ──────────────────────────────────
229
302
  const proximityMap = seeds.length > 0
@@ -231,8 +304,15 @@ export class ContextBuilder {
231
304
  : new Map<string, number>()
232
305
 
233
306
  // ── Step 3: Score every function ──────────────────────────────────
234
- const keywords = extractKeywords(query.task)
235
307
  const allFunctions = Object.values(this.lock.functions)
308
+ const focusFiles = query.focusFiles ?? []
309
+ const focusModules = new Set(query.focusModules ?? [])
310
+ const requireAllKeywords = query.requireAllKeywords ?? false
311
+ const minKeywordMatches = query.minKeywordMatches ?? 1
312
+ const strictPassIds = new Set<string>()
313
+ const reasons: string[] = []
314
+ const suggestions: string[] = []
315
+ const nearMissSuggestions: string[] = []
236
316
 
237
317
  const scored: { fn: MikkLockFunction; score: number }[] = allFunctions.map(fn => {
238
318
  let score = 0
@@ -244,33 +324,128 @@ export class ContextBuilder {
244
324
  }
245
325
 
246
326
  // Keyword match
247
- score += keywordScore(fn, keywords)
327
+ const kwInfo = keywordScore(fn, keywords)
328
+ score += kwInfo.score
329
+
330
+ const matchedSet = new Set(kwInfo.matchedKeywords)
331
+ const inFocusFile = focusFiles.some(filePath => fn.file.includes(filePath) || filePath.includes(fn.file))
332
+ const inFocusModule = focusModules.has(fn.moduleId)
333
+ const inFocus = inFocusFile || inFocusModule
334
+
335
+ const requiredPass = requiredKeywordSet.size === 0
336
+ ? true
337
+ : [...requiredKeywordSet].every(kw => matchedSet.has(kw))
338
+ const generalPass = requireAllKeywords
339
+ ? (keywords.length > 0 && matchedSet.size >= keywords.length)
340
+ : (keywords.length === 0 ? false : matchedSet.size >= minKeywordMatches)
341
+ const keywordPass = requiredPass && generalPass
342
+ if (keywordPass) strictPassIds.add(fn.id)
343
+
344
+ if (strictMode) {
345
+ const isSeed = seedSet.has(fn.id)
346
+ const seedFromFocus = isSeed && (inFocus || focusFiles.length > 0 || focusModules.size > 0)
347
+ if (!(inFocus || keywordPass || seedFromFocus)) {
348
+ if (kwInfo.score > 0) {
349
+ nearMissSuggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
350
+ }
351
+ return { fn, score: -1 }
352
+ }
353
+ }
248
354
 
249
355
  // Entry-point bonus
250
- if (fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
356
+ if (!strictMode && fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
251
357
 
252
358
  return { fn, score }
253
359
  })
254
360
 
255
361
  // ── Step 4: Sort by score descending ──────────────────────────────
256
362
  scored.sort((a, b) => b.score - a.score)
363
+ for (const { fn, score } of scored) {
364
+ if (score <= 0) continue
365
+ suggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
366
+ if (suggestions.length >= 5) break
367
+ }
368
+ for (const s of nearMissSuggestions) {
369
+ if (suggestions.includes(s)) continue
370
+ suggestions.push(s)
371
+ if (suggestions.length >= 5) break
372
+ }
257
373
 
258
374
  // ── Step 5: Fill token budget ──────────────────────────────────────
259
- const selected: MikkLockFunction[] = []
375
+ let selected: MikkLockFunction[] = []
376
+
377
+ // Pre-calculate baseline overhead (context files, routes, constraints)
260
378
  let usedTokens = 0
379
+ const routesStr = (!strictMode && this.lock.routes) ? JSON.stringify(this.lock.routes) : ''
380
+ const ctxStr = (!strictMode && this.lock.contextFiles)
381
+ ? this.lock.contextFiles.map(cf => readContextFile(cf.path, query.projectRoot).slice(0, 2000)).join('\n')
382
+ : ''
383
+ usedTokens += estimateTokens(routesStr + ctxStr + JSON.stringify(this.contract.declared.constraints))
261
384
 
262
385
  for (const { fn, score } of scored) {
263
386
  if (score <= 0 && seeds.length > 0) break // Nothing relevant left
264
387
  if (selected.length >= (query.maxFunctions ?? 80)) break
265
388
 
266
389
  const snippet = this.buildFunctionSnippet(fn, query)
267
- const tokens = estimateTokens(snippet)
390
+ // Multiply tokens by 2.2 to account for it being in both JSON and text prompt, plus JSON framing
391
+ const tokens = estimateTokens(snippet) * 2.2
268
392
 
269
- if (usedTokens + tokens > tokenBudget) continue // skip, try smaller ones later
393
+ if (usedTokens + tokens > tokenBudget && selected.length > 0) continue // skip, try smaller ones later
270
394
  selected.push(fn)
271
395
  usedTokens += tokens
272
396
  }
273
397
 
398
+ if (strictMode) {
399
+ if (requiredKeywordSet.size > 0) {
400
+ reasons.push(`required terms: ${[...requiredKeywordSet].join(', ')}`)
401
+ }
402
+ if (strictPassIds.size === 0) {
403
+ reasons.push('no functions matched strict keyword filters')
404
+ }
405
+ }
406
+
407
+ if (strictMode && query.exactOnly) {
408
+ selected = selected.filter(fn => strictPassIds.has(fn.id))
409
+ usedTokens = selected.reduce(
410
+ (sum, fn) => sum + estimateTokens(this.buildFunctionSnippet(fn, query)),
411
+ 0
412
+ )
413
+ if (selected.length === 0 && strictPassIds.size > 0) {
414
+ reasons.push('exact matches exist but did not fit token budget or max function limit')
415
+ }
416
+ }
417
+
418
+ if (strictMode && query.failFast && selected.length === 0) {
419
+ reasons.push('fail-fast enabled: returning no context when exact match set is empty')
420
+ return {
421
+ project: {
422
+ name: this.contract.project.name,
423
+ language: this.contract.project.language,
424
+ description: this.contract.project.description,
425
+ moduleCount: this.contract.declared.modules.length,
426
+ functionCount: Object.keys(this.lock.functions).length,
427
+ },
428
+ modules: [],
429
+ constraints: this.contract.declared.constraints,
430
+ decisions: this.contract.declared.decisions.map(d => ({
431
+ title: d.title,
432
+ reason: d.reason,
433
+ })),
434
+ contextFiles: [],
435
+ routes: [],
436
+ prompt: '',
437
+ meta: {
438
+ seedCount: seeds.length,
439
+ totalFunctionsConsidered: allFunctions.length,
440
+ selectedFunctions: 0,
441
+ estimatedTokens: 0,
442
+ keywords,
443
+ reasons,
444
+ suggestions: suggestions.length > 0 ? suggestions : undefined,
445
+ },
446
+ }
447
+ }
448
+
274
449
  // ── Step 6: Group by module ────────────────────────────────────────
275
450
  const byModule = new Map<string, MikkLockFunction[]>()
276
451
  for (const fn of selected) {
@@ -298,6 +473,10 @@ export class ContextBuilder {
298
473
  // Sort modules: ones with more selected functions first
299
474
  contextModules.sort((a, b) => b.functions.length - a.functions.length)
300
475
 
476
+ // Strict mode favors precision and token efficiency: keep only function graph context.
477
+ const contextFiles = strictMode ? [] : this.lock.contextFiles
478
+ const routes = strictMode ? [] : this.lock.routes
479
+
301
480
  return {
302
481
  project: {
303
482
  name: this.contract.project.name,
@@ -312,12 +491,12 @@ export class ContextBuilder {
312
491
  title: d.title,
313
492
  reason: d.reason,
314
493
  })),
315
- contextFiles: this.lock.contextFiles?.map(cf => ({
494
+ contextFiles: contextFiles?.map(cf => ({
316
495
  path: cf.path,
317
496
  content: readContextFile(cf.path, query.projectRoot),
318
497
  type: cf.type,
319
498
  })),
320
- routes: this.lock.routes?.map(r => ({
499
+ routes: routes?.map(r => ({
321
500
  method: r.method,
322
501
  path: r.path,
323
502
  handler: r.handler,
@@ -332,6 +511,8 @@ export class ContextBuilder {
332
511
  selectedFunctions: selected.length,
333
512
  estimatedTokens: usedTokens,
334
513
  keywords,
514
+ reasons: reasons.length > 0 ? reasons : undefined,
515
+ suggestions: (selected.length === 0 && suggestions.length > 0) ? suggestions : undefined,
335
516
  },
336
517
  }
337
518
  }
@@ -414,6 +595,7 @@ export class ContextBuilder {
414
595
  /** Generate the natural-language prompt section */
415
596
  private generatePrompt(query: ContextQuery, modules: ContextModule[]): string {
416
597
  const lines: string[] = []
598
+ const strictMode = query.relevanceMode === 'strict'
417
599
 
418
600
  lines.push('=== ARCHITECTURAL CONTEXT ===')
419
601
  lines.push(`Project: ${this.contract.project.name} (${this.contract.project.language})`)
@@ -425,7 +607,7 @@ export class ContextBuilder {
425
607
 
426
608
  // Include routes (API endpoints) — critical for understanding how the app works
427
609
  const routes = this.lock.routes
428
- if (routes && routes.length > 0) {
610
+ if (!strictMode && routes && routes.length > 0) {
429
611
  lines.push('=== HTTP ROUTES ===')
430
612
  for (const r of routes) {
431
613
  const mw = r.middlewares.length > 0 ? ` [${r.middlewares.join(', ')}]` : ''
@@ -436,7 +618,7 @@ export class ContextBuilder {
436
618
 
437
619
  // Include context files (schemas, data models) first — they define the shape
438
620
  const ctxFiles = this.lock.contextFiles
439
- if (ctxFiles && ctxFiles.length > 0) {
621
+ if (!strictMode && ctxFiles && ctxFiles.length > 0) {
440
622
  lines.push('=== DATA MODELS & SCHEMAS ===')
441
623
  for (const cf of ctxFiles) {
442
624
  lines.push(`--- ${cf.path} (${cf.type}) ---`)
@@ -746,4 +928,4 @@ function dedent(lines: string[]): string[] {
746
928
  const spaces = l.length - l.trimStart().length
747
929
  return l.substring(Math.min(min, spaces))
748
930
  })
749
- }
931
+ }
package/src/providers.ts CHANGED
@@ -28,6 +28,16 @@ export class ClaudeProvider implements ContextProvider {
28
28
  lines.push(` <seeds_found>${context.meta?.seedCount ?? 0}</seeds_found>`)
29
29
  lines.push(` <functions_selected>${context.meta?.selectedFunctions ?? 0} of ${context.meta?.totalFunctionsConsidered ?? 0}</functions_selected>`)
30
30
  lines.push(` <estimated_tokens>${context.meta?.estimatedTokens ?? 0}</estimated_tokens>`)
31
+ if (context.meta?.reasons && context.meta.reasons.length > 0) {
32
+ for (const reason of context.meta.reasons) {
33
+ lines.push(` <reason>${esc(reason)}</reason>`)
34
+ }
35
+ }
36
+ if (context.meta?.suggestions && context.meta.suggestions.length > 0) {
37
+ for (const s of context.meta.suggestions) {
38
+ lines.push(` <suggestion>${esc(s)}</suggestion>`)
39
+ }
40
+ }
31
41
  lines.push('</context_meta>')
32
42
  lines.push('')
33
43
 
@@ -79,6 +89,23 @@ export class ClaudeProvider implements ContextProvider {
79
89
  lines.push('')
80
90
  }
81
91
 
92
+ if (context.modules.length === 0 && context.meta?.reasons?.length) {
93
+ lines.push('<no_match_reason>')
94
+ for (const reason of context.meta.reasons) {
95
+ lines.push(` <item>${esc(reason)}</item>`)
96
+ }
97
+ lines.push('</no_match_reason>')
98
+ lines.push('')
99
+ if (context.meta.suggestions && context.meta.suggestions.length > 0) {
100
+ lines.push('<did_you_mean>')
101
+ for (const suggestion of context.meta.suggestions) {
102
+ lines.push(` <item>${esc(suggestion)}</item>`)
103
+ }
104
+ lines.push('</did_you_mean>')
105
+ lines.push('')
106
+ }
107
+ }
108
+
82
109
  // ── Context files (schemas, data models, config) ───────────────────
83
110
  if (context.contextFiles && context.contextFiles.length > 0) {
84
111
  lines.push('<context_files>')
@@ -160,6 +187,20 @@ export class CompactProvider implements ContextProvider {
160
187
  `Task keywords: ${context.meta?.keywords?.join(', ') ?? ''}`,
161
188
  '',
162
189
  ]
190
+ if (context.modules.length === 0 && context.meta?.reasons?.length) {
191
+ lines.push('No exact context selected:')
192
+ for (const reason of context.meta.reasons) {
193
+ lines.push(`- ${reason}`)
194
+ }
195
+ if (context.meta.suggestions && context.meta.suggestions.length > 0) {
196
+ lines.push('')
197
+ lines.push('Did you mean:')
198
+ for (const suggestion of context.meta.suggestions) {
199
+ lines.push(`- ${suggestion}`)
200
+ }
201
+ }
202
+ lines.push('')
203
+ }
163
204
  for (const mod of context.modules) {
164
205
  lines.push(`## ${mod.name}`)
165
206
  for (const fn of mod.functions) {
@@ -218,4 +259,4 @@ function esc(s: string): string {
218
259
  .replace(/</g, '&lt;')
219
260
  .replace(/>/g, '&gt;')
220
261
  .replace(/"/g, '&quot;')
221
- }
262
+ }
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Token Counter — accurate, fast token estimation for context budget management.
3
+ *
4
+ * Design:
5
+ * - `countTokens(text)` — accurate, linear-scan, O(n)
6
+ * - `countTokensFast(text)` — single-pass heuristic, O(n) for hot paths
7
+ * - `estimateFileTokens(content, path)` — file-type-aware wrapper
8
+ * - `TokenBudget` — budget manager with truncation
9
+ *
10
+ * The previous implementation used a character-position Set to track processed
11
+ * ranges across multiple regex scans — O(n²) per call on large files.
12
+ * Replaced with a single linear scan that categorises characters without
13
+ * per-character Set lookups.
14
+ */
15
+
16
+ const CHARS_PER_TOKEN = 3.8 // GPT-4 average
17
+ const MIN_CHARS_PER_TOKEN = 2.0 // Dense code
18
+ const MAX_CHARS_PER_TOKEN = 6.0 // Sparse natural language
19
+
20
+ /**
21
+ * Count tokens with reasonable accuracy — O(n) single linear scan.
22
+ *
23
+ * Classifies runs of characters into:
24
+ * - whitespace: free (separators, not tokens)
25
+ * - string literals: ~4 chars/token
26
+ * - digit runs: ~2 chars/token (numbers tokenise finely)
27
+ * - identifiers/keywords: short → 1 token, long → ~3.5 chars/token
28
+ * - operators/punctuation: 1 char = 1 token
29
+ */
30
+ export function countTokens(text: string): number {
31
+ if (!text) return 0
32
+
33
+ let tokens = 0
34
+ let i = 0
35
+ const n = text.length
36
+
37
+ while (i < n) {
38
+ const ch = text[i]
39
+
40
+ // Whitespace — boundary only, no token cost
41
+ if (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
42
+ i++
43
+ continue
44
+ }
45
+
46
+ // String literals — scan to closing quote
47
+ if (ch === '"' || ch === "'" || ch === '`') {
48
+ const q = ch
49
+ let len = 1
50
+ i++
51
+ while (i < n) {
52
+ if (text[i] === '\\') { i += 2; len += 2; continue }
53
+ if (text[i] === q) { i++; len++; break }
54
+ i++; len++
55
+ }
56
+ tokens += Math.max(1, Math.ceil(len / 4))
57
+ continue
58
+ }
59
+
60
+ // Digit runs — token-heavy
61
+ if (ch >= '0' && ch <= '9') {
62
+ let len = 0
63
+ while (i < n && ((text[i] >= '0' && text[i] <= '9') || text[i] === '.')) {
64
+ i++; len++
65
+ }
66
+ tokens += Math.max(1, Math.ceil(len / 2))
67
+ continue
68
+ }
69
+
70
+ // Identifier / keyword runs
71
+ if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_' || ch === '$') {
72
+ let len = 0
73
+ while (
74
+ i < n &&
75
+ ((text[i] >= 'a' && text[i] <= 'z') || (text[i] >= 'A' && text[i] <= 'Z') ||
76
+ (text[i] >= '0' && text[i] <= '9') || text[i] === '_' || text[i] === '$')
77
+ ) { i++; len++ }
78
+ tokens += len <= 6 ? 1 : Math.ceil(len / 3.5)
79
+ continue
80
+ }
81
+
82
+ // Operators, punctuation, brackets — 1 char per token
83
+ tokens++
84
+ i++
85
+ }
86
+
87
+ const minEstimate = Math.ceil(text.length / MAX_CHARS_PER_TOKEN)
88
+ const maxEstimate = Math.ceil(text.length / MIN_CHARS_PER_TOKEN)
89
+ return Math.max(minEstimate, Math.min(maxEstimate, tokens))
90
+ }
91
+
92
+ /**
93
+ * Fast O(n) single-pass heuristic for hot paths (context builder scoring loops).
94
+ */
95
+ export function countTokensFast(text: string): number {
96
+ if (!text) return 0
97
+
98
+ let alphaNum = 0, punct = 0
99
+ for (let i = 0; i < text.length; i++) {
100
+ const c = text.charCodeAt(i)
101
+ if ((c >= 65 && c <= 90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57)) {
102
+ alphaNum++
103
+ } else if (c !== 32 && c !== 9 && c !== 10 && c !== 13) {
104
+ punct++
105
+ }
106
+ }
107
+
108
+ const nonWs = alphaNum + punct
109
+ if (nonWs === 0) return 0
110
+
111
+ const punctRatio = nonWs > 0 ? punct / nonWs : 0
112
+ const charsPerToken = punctRatio > 0.3 ? 2.8 : CHARS_PER_TOKEN
113
+ return Math.max(1, Math.ceil(text.length / charsPerToken))
114
+ }
115
+
116
+ /**
117
+ * Estimate tokens for a file with content-type awareness.
118
+ */
119
+ export function estimateFileTokens(content: string, filePath: string): number {
120
+ const ext = filePath.split('.').pop()?.toLowerCase()
121
+ if (ext === 'md') return Math.ceil(countTokens(content) * 0.9)
122
+ return countTokens(content)
123
+ }
124
+
125
+ /**
126
+ * Token budget manager — tracks usage and truncates content to fit.
127
+ */
128
+ export class TokenBudget {
129
+ private used = 0
130
+
131
+ constructor(
132
+ private readonly maxTokens: number,
133
+ private readonly overflowAllowance: number = 0.1,
134
+ ) {}
135
+
136
+ get remaining(): number {
137
+ return Math.max(0, this.maxTokens - this.used)
138
+ }
139
+
140
+ fits(content: string): boolean {
141
+ return countTokensFast(content) <= this.remaining * (1 + this.overflowAllowance)
142
+ }
143
+
144
+ consume(tokens: number): boolean {
145
+ this.used += tokens
146
+ return this.used <= this.maxTokens * (1 + this.overflowAllowance)
147
+ }
148
+
149
+ truncate(content: string): string {
150
+ if (this.remaining <= 0) return ''
151
+ const estimated = countTokensFast(content)
152
+ if (estimated <= this.remaining) return content
153
+ const ratio = this.remaining / estimated
154
+ const cutAt = Math.floor(content.length * ratio * 0.9)
155
+ return content.slice(0, cutAt) + '\n… [truncated — token budget reached]'
156
+ }
157
+ }
package/src/types.ts CHANGED
@@ -24,6 +24,8 @@ export interface AIContext {
24
24
  selectedFunctions: number
25
25
  estimatedTokens: number
26
26
  keywords: string[]
27
+ reasons?: string[]
28
+ suggestions?: string[]
27
29
  }
28
30
  }
29
31
 
@@ -72,6 +74,18 @@ export interface ContextQuery {
72
74
  includeCallGraph?: boolean
73
75
  /** Include function bodies for top-scored functions (default true) */
74
76
  includeBodies?: boolean
77
+ /** Relevance mode: balanced (default) or strict (high-precision filtering) */
78
+ relevanceMode?: 'balanced' | 'strict'
79
+ /** Additional required terms (comma-separated in CLI) that must be respected */
80
+ requiredKeywords?: string[]
81
+ /** In strict mode, require all extracted/required keywords to match */
82
+ requireAllKeywords?: boolean
83
+ /** Minimum number of matched keywords required in strict mode (default 1) */
84
+ minKeywordMatches?: number
85
+ /** Hard gate in strict mode: final output keeps only strict keyword matches */
86
+ exactOnly?: boolean
87
+ /** In strict mode, return empty context if no exact matches are found */
88
+ failFast?: boolean
75
89
  /** Absolute filesystem path to the project root (needed for body reading) */
76
90
  projectRoot?: string
77
91
  }
@@ -81,4 +95,4 @@ export interface ContextProvider {
81
95
  name: string
82
96
  formatContext(context: AIContext): string
83
97
  maxTokens: number
84
- }
98
+ }
@@ -0,0 +1,159 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import { ContextBuilder } from '../src/context-builder.js'
3
+ import type { ContextQuery } from '../src/types.js'
4
+
5
+ function makeFixture() {
6
+ const contract = {
7
+ project: {
8
+ name: 'mikk',
9
+ language: 'typescript',
10
+ description: 'fixture',
11
+ },
12
+ declared: {
13
+ modules: [
14
+ { id: 'core-parser', name: 'Core Parser', description: '', paths: [], entryFunctions: [] },
15
+ { id: 'ui', name: 'UI', description: '', paths: [], entryFunctions: [] },
16
+ ],
17
+ constraints: [],
18
+ decisions: [],
19
+ },
20
+ } as any
21
+
22
+ const fnResolver = {
23
+ id: 'fn:parser:resolver',
24
+ name: 'resolveImports',
25
+ file: 'packages/core/src/parser/ts-resolver.ts',
26
+ moduleId: 'core-parser',
27
+ startLine: 1,
28
+ endLine: 10,
29
+ params: [],
30
+ returnType: 'void',
31
+ isAsync: false,
32
+ isExported: true,
33
+ purpose: 'resolve ts imports',
34
+ calls: ['fn:parser:helper'],
35
+ calledBy: [],
36
+ edgeCasesHandled: [],
37
+ errorHandling: [],
38
+ }
39
+
40
+ const fnHelper = {
41
+ id: 'fn:parser:helper',
42
+ name: 'normalizeTsPath',
43
+ file: 'packages/core/src/parser/path.ts',
44
+ moduleId: 'core-parser',
45
+ startLine: 1,
46
+ endLine: 8,
47
+ params: [],
48
+ returnType: 'string',
49
+ isAsync: false,
50
+ isExported: false,
51
+ purpose: 'normalize ts path',
52
+ calls: [],
53
+ calledBy: ['fn:parser:resolver'],
54
+ edgeCasesHandled: [],
55
+ errorHandling: [],
56
+ }
57
+
58
+ const fnUnrelated = {
59
+ id: 'fn:ui:render',
60
+ name: 'renderHeader',
61
+ file: 'apps/web/components/header.tsx',
62
+ moduleId: 'ui',
63
+ startLine: 1,
64
+ endLine: 8,
65
+ params: [],
66
+ returnType: 'void',
67
+ isAsync: false,
68
+ isExported: true,
69
+ purpose: 'render ui header',
70
+ calls: [],
71
+ calledBy: [],
72
+ edgeCasesHandled: [],
73
+ errorHandling: [],
74
+ }
75
+
76
+ const lock = {
77
+ functions: {
78
+ [fnResolver.id]: fnResolver,
79
+ [fnHelper.id]: fnHelper,
80
+ [fnUnrelated.id]: fnUnrelated,
81
+ },
82
+ files: {
83
+ [fnResolver.file]: { path: fnResolver.file, moduleId: fnResolver.moduleId, imports: [] },
84
+ [fnHelper.file]: { path: fnHelper.file, moduleId: fnHelper.moduleId, imports: [] },
85
+ [fnUnrelated.file]: { path: fnUnrelated.file, moduleId: fnUnrelated.moduleId, imports: [] },
86
+ },
87
+ routes: [],
88
+ contextFiles: [],
89
+ } as any
90
+
91
+ return { contract, lock }
92
+ }
93
+
94
+ function namesFrom(query: ContextQuery): string[] {
95
+ const { contract, lock } = makeFixture()
96
+ const builder = new ContextBuilder(contract, lock)
97
+ const ctx = builder.build(query)
98
+ return ctx.modules.flatMap(m => m.functions.map(f => f.name))
99
+ }
100
+
101
+ describe('ContextBuilder strict relevance mode', () => {
102
+ test('strict mode filters unrelated entry-point noise', () => {
103
+ const balanced = namesFrom({
104
+ task: 'fix ts resolver imports',
105
+ tokenBudget: 1200,
106
+ includeBodies: false,
107
+ includeCallGraph: false,
108
+ relevanceMode: 'balanced',
109
+ })
110
+ const strict = namesFrom({
111
+ task: 'fix ts resolver imports',
112
+ tokenBudget: 1200,
113
+ includeBodies: false,
114
+ includeCallGraph: false,
115
+ relevanceMode: 'strict',
116
+ minKeywordMatches: 1,
117
+ })
118
+
119
+ expect(balanced).toContain('renderHeader')
120
+ expect(strict).not.toContain('renderHeader')
121
+ expect(strict).toContain('resolveImports')
122
+ })
123
+
124
+ test('requiredKeywords enforces exact focus in strict mode', () => {
125
+ const strict = namesFrom({
126
+ task: 'resolver imports',
127
+ tokenBudget: 1200,
128
+ includeBodies: false,
129
+ includeCallGraph: false,
130
+ relevanceMode: 'strict',
131
+ requiredKeywords: ['ts'],
132
+ minKeywordMatches: 1,
133
+ })
134
+
135
+ expect(strict).toContain('resolveImports')
136
+ expect(strict).toContain('normalizeTsPath')
137
+ expect(strict).not.toContain('renderHeader')
138
+ })
139
+
140
+ test('failFast returns empty context when exact match is impossible', () => {
141
+ const { contract, lock } = makeFixture()
142
+ const builder = new ContextBuilder(contract, lock)
143
+ const ctx = builder.build({
144
+ task: 'resolver imports',
145
+ tokenBudget: 1200,
146
+ includeBodies: false,
147
+ includeCallGraph: false,
148
+ relevanceMode: 'strict',
149
+ requiredKeywords: ['nonexistent'],
150
+ exactOnly: true,
151
+ failFast: true,
152
+ })
153
+
154
+ expect(ctx.modules.length).toBe(0)
155
+ expect(ctx.meta.selectedFunctions).toBe(0)
156
+ expect((ctx.meta.reasons?.length ?? 0) > 0).toBe(true)
157
+ expect((ctx.meta.suggestions?.length ?? 0) > 0).toBe(true)
158
+ })
159
+ })