@getmikk/ai-context 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@getmikk/ai-context",
3
- "version": "1.8.0",
3
+ "version": "1.9.0",
4
4
  "license": "Apache-2.0",
5
5
  "repository": {
6
6
  "type": "git",
@@ -21,8 +21,8 @@
21
21
  "dev": "tsc --watch"
22
22
  },
23
23
  "dependencies": {
24
- "@getmikk/core": "^1.8.0",
25
- "@getmikk/intent-engine": "^1.8.0"
24
+ "@getmikk/core": "^1.9.0",
25
+ "@getmikk/intent-engine": "^1.9.0"
26
26
  },
27
27
  "devDependencies": {
28
28
  "typescript": "^5.7.0",
@@ -1,15 +1,11 @@
1
1
  import type { MikkContract, MikkLock, MikkLockFunction } from '@getmikk/core'
2
2
  import * as fs from 'node:fs'
3
3
  import * as path from 'node:path'
4
+ import { countTokens, estimateFileTokens } from './token-counter.js'
4
5
 
5
6
  /** Default token budget for claude.md — generous but still bounded */
6
7
  const DEFAULT_TOKEN_BUDGET = 12000
7
8
 
8
- /** Rough token estimation: ~4 chars per token */
9
- function estimateTokens(text: string): number {
10
- return Math.ceil(text.length / 4)
11
- }
12
-
13
9
  /** Metadata from package.json that enriches the AI context */
14
10
  export interface ProjectMeta {
15
11
  description?: string
@@ -49,20 +45,20 @@ export class ClaudeMdGenerator {
49
45
  // --- Tier 1: Summary (always included) ----------------------
50
46
  const summary = this.generateSummary()
51
47
  sections.push(summary)
52
- usedTokens += estimateTokens(summary)
48
+ usedTokens += countTokens(summary)
53
49
 
54
50
  // --- Tech stack & conventions (always included if detectable) ---
55
51
  const techSection = this.generateTechStackSection()
56
52
  if (techSection) {
57
53
  sections.push(techSection)
58
- usedTokens += estimateTokens(techSection)
54
+ usedTokens += countTokens(techSection)
59
55
  }
60
56
 
61
57
  // --- Build / test / run commands -----------------------------
62
58
  const commandsSection = this.generateCommandsSection()
63
59
  if (commandsSection) {
64
60
  sections.push(commandsSection)
65
- usedTokens += estimateTokens(commandsSection)
61
+ usedTokens += countTokens(commandsSection)
66
62
  }
67
63
 
68
64
  // --- Tier 2: Module details (if budget allows) --------------
@@ -76,7 +72,7 @@ export class ClaudeMdGenerator {
76
72
 
77
73
  for (const module of modules) {
78
74
  const moduleSection = this.generateModuleSection(module.id)
79
- const tokens = estimateTokens(moduleSection)
75
+ const tokens = countTokens(moduleSection)
80
76
  if (usedTokens + tokens > this.tokenBudget) {
81
77
  sections.push('\n <!-- Full details truncated due to context budget -->\n')
82
78
  break
@@ -90,7 +86,7 @@ export class ClaudeMdGenerator {
90
86
  // --- Context files: schemas, data models, config ---------
91
87
  const contextSection = this.generateContextFilesSection()
92
88
  if (contextSection) {
93
- const ctxTokens = estimateTokens(contextSection)
89
+ const ctxTokens = countTokens(contextSection)
94
90
  if (usedTokens + ctxTokens <= this.tokenBudget) {
95
91
  sections.push(contextSection)
96
92
  usedTokens += ctxTokens
@@ -100,7 +96,7 @@ export class ClaudeMdGenerator {
100
96
  // --- File import graph per module ----------------------------
101
97
  const importSection = this.generateImportGraphSection()
102
98
  if (importSection) {
103
- const impTokens = estimateTokens(importSection)
99
+ const impTokens = countTokens(importSection)
104
100
  if (usedTokens + impTokens <= this.tokenBudget) {
105
101
  sections.push(importSection)
106
102
  usedTokens += impTokens
@@ -110,7 +106,7 @@ export class ClaudeMdGenerator {
110
106
  // --- HTTP Routes (Express + Next.js) -------------------------
111
107
  const routesSection = this.generateRoutesSection()
112
108
  if (routesSection) {
113
- const routeTokens = estimateTokens(routesSection)
109
+ const routeTokens = countTokens(routesSection)
114
110
  if (usedTokens + routeTokens <= this.tokenBudget) {
115
111
  sections.push(routesSection)
116
112
  usedTokens += routeTokens
@@ -119,14 +115,14 @@ export class ClaudeMdGenerator {
119
115
 
120
116
  // --- Tier 3: Constraints & decisions ------------------------
121
117
  const constraintsSection = this.generateConstraintsSection()
122
- const constraintTokens = estimateTokens(constraintsSection)
118
+ const constraintTokens = countTokens(constraintsSection)
123
119
  if (usedTokens + constraintTokens <= this.tokenBudget) {
124
120
  sections.push(constraintsSection)
125
121
  usedTokens += constraintTokens
126
122
  }
127
123
 
128
124
  const decisionsSection = this.generateDecisionsSection()
129
- const decisionTokens = estimateTokens(decisionsSection)
125
+ const decisionTokens = countTokens(decisionsSection)
130
126
  if (usedTokens + decisionTokens <= this.tokenBudget) {
131
127
  sections.push(decisionsSection)
132
128
  usedTokens += decisionTokens
@@ -225,7 +221,10 @@ export class ClaudeMdGenerator {
225
221
  }
226
222
 
227
223
  // Key functions: top 5 by calledBy count (most depended upon)
224
+ // Exclude functions already in entry points to avoid duplicates
225
+ const entryPointIds = new Set(entryPoints.map(fn => fn.id))
228
226
  const keyFunctions = [...moduleFunctions]
227
+ .filter(fn => !entryPointIds.has(fn.id)) // Exclude duplicates
229
228
  .sort((a, b) => b.calledBy.length - a.calledBy.length)
230
229
  .filter(fn => fn.calledBy.length > 0)
231
230
  .slice(0, 5)
@@ -107,31 +107,92 @@ const STOP_WORDS = new Set([
107
107
  'want', 'like', 'just', 'also', 'some', 'all', 'any', 'my', 'your',
108
108
  ])
109
109
 
110
- function extractKeywords(task: string): string[] {
111
- return task
110
+ const SHORT_TECH_WORDS = new Set([
111
+ 'ai', 'ml', 'ui', 'ux', 'ts', 'js', 'db', 'io', 'id', 'ip',
112
+ 'ci', 'cd', 'qa', 'api', 'mcp', 'jwt', 'sql',
113
+ ])
114
+
115
+ function normalizeKeyword(value: string): string {
116
+ return value.toLowerCase().trim().replace(/[^a-z0-9_-]/g, '')
117
+ }
118
+
119
+ function extractKeywords(task: string, requiredKeywords: string[] = []): string[] {
120
+ const out: string[] = []
121
+ const seen = new Set<string>()
122
+
123
+ for (const match of task.matchAll(/"([^"]+)"|'([^']+)'/g)) {
124
+ const phrase = (match[1] ?? match[2] ?? '').toLowerCase().trim()
125
+ if (!phrase || seen.has(phrase)) continue
126
+ seen.add(phrase)
127
+ out.push(phrase)
128
+ }
129
+
130
+ const words = task
112
131
  .toLowerCase()
113
132
  .replace(/[^a-z0-9\s_-]/g, ' ')
114
133
  .split(/\s+/)
115
- .filter(w => w.length > 2 && !STOP_WORDS.has(w))
134
+ .map(normalizeKeyword)
135
+ .filter(w => {
136
+ if (!w || STOP_WORDS.has(w)) return false
137
+ if (w.length > 2) return true
138
+ return SHORT_TECH_WORDS.has(w)
139
+ })
140
+
141
+ for (const w of words) {
142
+ if (seen.has(w)) continue
143
+ seen.add(w)
144
+ out.push(w)
145
+ }
146
+
147
+ const expandedRequired = requiredKeywords
148
+ .flatMap(item => item.split(/[,\s]+/))
149
+ .map(normalizeKeyword)
150
+ .filter(Boolean)
151
+
152
+ for (const kw of expandedRequired) {
153
+ if (seen.has(kw)) continue
154
+ seen.add(kw)
155
+ out.push(kw)
156
+ }
157
+
158
+ return out
116
159
  }
117
160
 
118
161
  /**
119
162
  * Keyword score for a function: exact match > partial match
120
163
  */
121
- function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
122
- if (keywords.length === 0) return 0
164
+ function keywordScore(
165
+ fn: MikkLockFunction,
166
+ keywords: string[]
167
+ ): { score: number; matchedKeywords: string[] } {
168
+ if (keywords.length === 0) return { score: 0, matchedKeywords: [] }
123
169
  const nameLower = fn.name.toLowerCase()
124
170
  const fileLower = fn.file.toLowerCase()
171
+ const fileNoExt = fileLower.replace(/\.(d\.ts|ts|tsx|js|jsx|mjs|cjs|mts|cts)\b/g, ' ')
172
+ const purposeLower = (fn.purpose ?? '').toLowerCase()
173
+ const tokenSet = new Set<string>([
174
+ ...(nameLower.match(/[a-z0-9]+/g) ?? []),
175
+ ...(fileNoExt.match(/[a-z0-9]+/g) ?? []),
176
+ ...(purposeLower.match(/[a-z0-9]+/g) ?? []),
177
+ ])
125
178
  let score = 0
179
+ const matched: string[] = []
126
180
 
127
181
  for (const kw of keywords) {
128
- if (nameLower === kw) {
182
+ const shortKw = kw.length <= 2
183
+ const exactName = nameLower === kw
184
+ const partial = shortKw
185
+ ? tokenSet.has(kw)
186
+ : (nameLower.includes(kw) || fileLower.includes(kw) || purposeLower.includes(kw))
187
+ if (exactName) {
129
188
  score = Math.max(score, WEIGHT.KEYWORD_EXACT)
130
- } else if (nameLower.includes(kw) || fileLower.includes(kw)) {
189
+ matched.push(kw)
190
+ } else if (partial) {
131
191
  score = Math.max(score, WEIGHT.KEYWORD_PARTIAL)
192
+ matched.push(kw)
132
193
  }
133
194
  }
134
- return score
195
+ return { score, matchedKeywords: matched }
135
196
  }
136
197
 
137
198
  // ---------------------------------------------------------------------------
@@ -145,8 +206,10 @@ function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
145
206
  function resolveSeeds(
146
207
  query: ContextQuery,
147
208
  contract: MikkContract,
148
- lock: MikkLock
209
+ lock: MikkLock,
210
+ keywords: string[]
149
211
  ): string[] {
212
+ const strictMode = query.relevanceMode === 'strict'
150
213
  const seeds = new Set<string>()
151
214
 
152
215
  // 1. Explicit focus files → all functions in those files
@@ -171,16 +234,15 @@ function resolveSeeds(
171
234
 
172
235
  // 3. Keyword match against function names and file paths
173
236
  if (seeds.size === 0) {
174
- const keywords = extractKeywords(query.task)
175
237
  for (const fn of Object.values(lock.functions)) {
176
- if (keywordScore(fn, keywords) >= WEIGHT.KEYWORD_PARTIAL) {
238
+ if (keywordScore(fn, keywords).score >= WEIGHT.KEYWORD_PARTIAL) {
177
239
  seeds.add(fn.id)
178
240
  }
179
241
  }
180
242
  }
181
243
 
182
244
  // 4. Module name match against task
183
- if (seeds.size === 0) {
245
+ if (!strictMode && seeds.size === 0) {
184
246
  const taskLower = query.task.toLowerCase()
185
247
  for (const mod of contract.declared.modules) {
186
248
  if (
@@ -219,11 +281,22 @@ export class ContextBuilder {
219
281
  * 6. Group survivors by module, emit structured context
220
282
  */
221
283
  build(query: ContextQuery): AIContext {
284
+ const relevanceMode = query.relevanceMode ?? 'balanced'
285
+ const strictMode = relevanceMode === 'strict'
222
286
  const tokenBudget = query.tokenBudget ?? DEFAULT_TOKEN_BUDGET
223
287
  const maxHops = query.maxHops ?? 4
288
+ const requiredKeywords = query.requiredKeywords ?? []
289
+ const keywords = extractKeywords(query.task, requiredKeywords)
290
+ const requiredKeywordSet = new Set(
291
+ requiredKeywords
292
+ .flatMap(item => item.split(/[,\s]+/))
293
+ .map(normalizeKeyword)
294
+ .filter(Boolean)
295
+ )
224
296
 
225
297
  // ── Step 1: Resolve seeds ──────────────────────────────────────────
226
- const seeds = resolveSeeds(query, this.contract, this.lock)
298
+ const seeds = resolveSeeds(query, this.contract, this.lock, keywords)
299
+ const seedSet = new Set(seeds)
227
300
 
228
301
  // ── Step 2: BFS proximity scores ──────────────────────────────────
229
302
  const proximityMap = seeds.length > 0
@@ -231,8 +304,15 @@ export class ContextBuilder {
231
304
  : new Map<string, number>()
232
305
 
233
306
  // ── Step 3: Score every function ──────────────────────────────────
234
- const keywords = extractKeywords(query.task)
235
307
  const allFunctions = Object.values(this.lock.functions)
308
+ const focusFiles = query.focusFiles ?? []
309
+ const focusModules = new Set(query.focusModules ?? [])
310
+ const requireAllKeywords = query.requireAllKeywords ?? false
311
+ const minKeywordMatches = query.minKeywordMatches ?? 1
312
+ const strictPassIds = new Set<string>()
313
+ const reasons: string[] = []
314
+ const suggestions: string[] = []
315
+ const nearMissSuggestions: string[] = []
236
316
 
237
317
  const scored: { fn: MikkLockFunction; score: number }[] = allFunctions.map(fn => {
238
318
  let score = 0
@@ -244,19 +324,55 @@ export class ContextBuilder {
244
324
  }
245
325
 
246
326
  // Keyword match
247
- score += keywordScore(fn, keywords)
327
+ const kwInfo = keywordScore(fn, keywords)
328
+ score += kwInfo.score
329
+
330
+ const matchedSet = new Set(kwInfo.matchedKeywords)
331
+ const inFocusFile = focusFiles.some(filePath => fn.file.includes(filePath) || filePath.includes(fn.file))
332
+ const inFocusModule = focusModules.has(fn.moduleId)
333
+ const inFocus = inFocusFile || inFocusModule
334
+
335
+ const requiredPass = requiredKeywordSet.size === 0
336
+ ? true
337
+ : [...requiredKeywordSet].every(kw => matchedSet.has(kw))
338
+ const generalPass = requireAllKeywords
339
+ ? (keywords.length > 0 && matchedSet.size >= keywords.length)
340
+ : (keywords.length === 0 ? false : matchedSet.size >= minKeywordMatches)
341
+ const keywordPass = requiredPass && generalPass
342
+ if (keywordPass) strictPassIds.add(fn.id)
343
+
344
+ if (strictMode) {
345
+ const isSeed = seedSet.has(fn.id)
346
+ const seedFromFocus = isSeed && (inFocus || focusFiles.length > 0 || focusModules.size > 0)
347
+ if (!(inFocus || keywordPass || seedFromFocus)) {
348
+ if (kwInfo.score > 0) {
349
+ nearMissSuggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
350
+ }
351
+ return { fn, score: -1 }
352
+ }
353
+ }
248
354
 
249
355
  // Entry-point bonus
250
- if (fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
356
+ if (!strictMode && fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
251
357
 
252
358
  return { fn, score }
253
359
  })
254
360
 
255
361
  // ── Step 4: Sort by score descending ──────────────────────────────
256
362
  scored.sort((a, b) => b.score - a.score)
363
+ for (const { fn, score } of scored) {
364
+ if (score <= 0) continue
365
+ suggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
366
+ if (suggestions.length >= 5) break
367
+ }
368
+ for (const s of nearMissSuggestions) {
369
+ if (suggestions.includes(s)) continue
370
+ suggestions.push(s)
371
+ if (suggestions.length >= 5) break
372
+ }
257
373
 
258
374
  // ── Step 5: Fill token budget ──────────────────────────────────────
259
- const selected: MikkLockFunction[] = []
375
+ let selected: MikkLockFunction[] = []
260
376
  let usedTokens = 0
261
377
 
262
378
  for (const { fn, score } of scored) {
@@ -271,6 +387,57 @@ export class ContextBuilder {
271
387
  usedTokens += tokens
272
388
  }
273
389
 
390
+ if (strictMode) {
391
+ if (requiredKeywordSet.size > 0) {
392
+ reasons.push(`required terms: ${[...requiredKeywordSet].join(', ')}`)
393
+ }
394
+ if (strictPassIds.size === 0) {
395
+ reasons.push('no functions matched strict keyword filters')
396
+ }
397
+ }
398
+
399
+ if (strictMode && query.exactOnly) {
400
+ selected = selected.filter(fn => strictPassIds.has(fn.id))
401
+ usedTokens = selected.reduce(
402
+ (sum, fn) => sum + estimateTokens(this.buildFunctionSnippet(fn, query)),
403
+ 0
404
+ )
405
+ if (selected.length === 0 && strictPassIds.size > 0) {
406
+ reasons.push('exact matches exist but did not fit token budget or max function limit')
407
+ }
408
+ }
409
+
410
+ if (strictMode && query.failFast && selected.length === 0) {
411
+ reasons.push('fail-fast enabled: returning no context when exact match set is empty')
412
+ return {
413
+ project: {
414
+ name: this.contract.project.name,
415
+ language: this.contract.project.language,
416
+ description: this.contract.project.description,
417
+ moduleCount: this.contract.declared.modules.length,
418
+ functionCount: Object.keys(this.lock.functions).length,
419
+ },
420
+ modules: [],
421
+ constraints: this.contract.declared.constraints,
422
+ decisions: this.contract.declared.decisions.map(d => ({
423
+ title: d.title,
424
+ reason: d.reason,
425
+ })),
426
+ contextFiles: [],
427
+ routes: [],
428
+ prompt: '',
429
+ meta: {
430
+ seedCount: seeds.length,
431
+ totalFunctionsConsidered: allFunctions.length,
432
+ selectedFunctions: 0,
433
+ estimatedTokens: 0,
434
+ keywords,
435
+ reasons,
436
+ suggestions: suggestions.length > 0 ? suggestions : undefined,
437
+ },
438
+ }
439
+ }
440
+
274
441
  // ── Step 6: Group by module ────────────────────────────────────────
275
442
  const byModule = new Map<string, MikkLockFunction[]>()
276
443
  for (const fn of selected) {
@@ -298,6 +465,10 @@ export class ContextBuilder {
298
465
  // Sort modules: ones with more selected functions first
299
466
  contextModules.sort((a, b) => b.functions.length - a.functions.length)
300
467
 
468
+ // Strict mode favors precision and token efficiency: keep only function graph context.
469
+ const contextFiles = strictMode ? [] : this.lock.contextFiles
470
+ const routes = strictMode ? [] : this.lock.routes
471
+
301
472
  return {
302
473
  project: {
303
474
  name: this.contract.project.name,
@@ -312,12 +483,12 @@ export class ContextBuilder {
312
483
  title: d.title,
313
484
  reason: d.reason,
314
485
  })),
315
- contextFiles: this.lock.contextFiles?.map(cf => ({
486
+ contextFiles: contextFiles?.map(cf => ({
316
487
  path: cf.path,
317
488
  content: readContextFile(cf.path, query.projectRoot),
318
489
  type: cf.type,
319
490
  })),
320
- routes: this.lock.routes?.map(r => ({
491
+ routes: routes?.map(r => ({
321
492
  method: r.method,
322
493
  path: r.path,
323
494
  handler: r.handler,
@@ -332,6 +503,8 @@ export class ContextBuilder {
332
503
  selectedFunctions: selected.length,
333
504
  estimatedTokens: usedTokens,
334
505
  keywords,
506
+ reasons: reasons.length > 0 ? reasons : undefined,
507
+ suggestions: (selected.length === 0 && suggestions.length > 0) ? suggestions : undefined,
335
508
  },
336
509
  }
337
510
  }
@@ -414,6 +587,7 @@ export class ContextBuilder {
414
587
  /** Generate the natural-language prompt section */
415
588
  private generatePrompt(query: ContextQuery, modules: ContextModule[]): string {
416
589
  const lines: string[] = []
590
+ const strictMode = query.relevanceMode === 'strict'
417
591
 
418
592
  lines.push('=== ARCHITECTURAL CONTEXT ===')
419
593
  lines.push(`Project: ${this.contract.project.name} (${this.contract.project.language})`)
@@ -425,7 +599,7 @@ export class ContextBuilder {
425
599
 
426
600
  // Include routes (API endpoints) — critical for understanding how the app works
427
601
  const routes = this.lock.routes
428
- if (routes && routes.length > 0) {
602
+ if (!strictMode && routes && routes.length > 0) {
429
603
  lines.push('=== HTTP ROUTES ===')
430
604
  for (const r of routes) {
431
605
  const mw = r.middlewares.length > 0 ? ` [${r.middlewares.join(', ')}]` : ''
@@ -436,7 +610,7 @@ export class ContextBuilder {
436
610
 
437
611
  // Include context files (schemas, data models) first — they define the shape
438
612
  const ctxFiles = this.lock.contextFiles
439
- if (ctxFiles && ctxFiles.length > 0) {
613
+ if (!strictMode && ctxFiles && ctxFiles.length > 0) {
440
614
  lines.push('=== DATA MODELS & SCHEMAS ===')
441
615
  for (const cf of ctxFiles) {
442
616
  lines.push(`--- ${cf.path} (${cf.type}) ---`)
@@ -746,4 +920,4 @@ function dedent(lines: string[]): string[] {
746
920
  const spaces = l.length - l.trimStart().length
747
921
  return l.substring(Math.min(min, spaces))
748
922
  })
749
- }
923
+ }
package/src/providers.ts CHANGED
@@ -28,6 +28,16 @@ export class ClaudeProvider implements ContextProvider {
28
28
  lines.push(` <seeds_found>${context.meta?.seedCount ?? 0}</seeds_found>`)
29
29
  lines.push(` <functions_selected>${context.meta?.selectedFunctions ?? 0} of ${context.meta?.totalFunctionsConsidered ?? 0}</functions_selected>`)
30
30
  lines.push(` <estimated_tokens>${context.meta?.estimatedTokens ?? 0}</estimated_tokens>`)
31
+ if (context.meta?.reasons && context.meta.reasons.length > 0) {
32
+ for (const reason of context.meta.reasons) {
33
+ lines.push(` <reason>${esc(reason)}</reason>`)
34
+ }
35
+ }
36
+ if (context.meta?.suggestions && context.meta.suggestions.length > 0) {
37
+ for (const s of context.meta.suggestions) {
38
+ lines.push(` <suggestion>${esc(s)}</suggestion>`)
39
+ }
40
+ }
31
41
  lines.push('</context_meta>')
32
42
  lines.push('')
33
43
 
@@ -79,6 +89,23 @@ export class ClaudeProvider implements ContextProvider {
79
89
  lines.push('')
80
90
  }
81
91
 
92
+ if (context.modules.length === 0 && context.meta?.reasons?.length) {
93
+ lines.push('<no_match_reason>')
94
+ for (const reason of context.meta.reasons) {
95
+ lines.push(` <item>${esc(reason)}</item>`)
96
+ }
97
+ lines.push('</no_match_reason>')
98
+ lines.push('')
99
+ if (context.meta.suggestions && context.meta.suggestions.length > 0) {
100
+ lines.push('<did_you_mean>')
101
+ for (const suggestion of context.meta.suggestions) {
102
+ lines.push(` <item>${esc(suggestion)}</item>`)
103
+ }
104
+ lines.push('</did_you_mean>')
105
+ lines.push('')
106
+ }
107
+ }
108
+
82
109
  // ── Context files (schemas, data models, config) ───────────────────
83
110
  if (context.contextFiles && context.contextFiles.length > 0) {
84
111
  lines.push('<context_files>')
@@ -160,6 +187,20 @@ export class CompactProvider implements ContextProvider {
160
187
  `Task keywords: ${context.meta?.keywords?.join(', ') ?? ''}`,
161
188
  '',
162
189
  ]
190
+ if (context.modules.length === 0 && context.meta?.reasons?.length) {
191
+ lines.push('No exact context selected:')
192
+ for (const reason of context.meta.reasons) {
193
+ lines.push(`- ${reason}`)
194
+ }
195
+ if (context.meta.suggestions && context.meta.suggestions.length > 0) {
196
+ lines.push('')
197
+ lines.push('Did you mean:')
198
+ for (const suggestion of context.meta.suggestions) {
199
+ lines.push(`- ${suggestion}`)
200
+ }
201
+ }
202
+ lines.push('')
203
+ }
163
204
  for (const mod of context.modules) {
164
205
  lines.push(`## ${mod.name}`)
165
206
  for (const fn of mod.functions) {
@@ -218,4 +259,4 @@ function esc(s: string): string {
218
259
  .replace(/</g, '&lt;')
219
260
  .replace(/>/g, '&gt;')
220
261
  .replace(/"/g, '&quot;')
221
- }
262
+ }
@@ -0,0 +1,224 @@
1
+ /**
2
+ * Improved Token Counter
3
+ *
4
+ * Provides more accurate token counting than the simple length/4 approximation.
5
+ * Uses a GPT-4 compatible tokenizer approximation for better budget management.
6
+ */
7
+
8
+ // Character-based token approximation (more accurate than simple division)
9
+ const CHARS_PER_TOKEN = 3.8 // Average for GPT-4 tokenizer
10
+ const MIN_CHARS_PER_TOKEN = 2.0 // For dense code
11
+ const MAX_CHARS_PER_TOKEN = 6.0 // For sparse text
12
+
13
+ // Special token patterns that affect tokenization
14
+ const TOKEN_PATTERNS = {
15
+ // Common programming patterns that typically tokenize as single tokens
16
+ SINGLE_TOKEN_PATTERNS: [
17
+ /\b(if|else|for|while|function|return|const|let|var|class|import|export)\b/g,
18
+ /\b(true|false|null|undefined)\b/g,
19
+ /\b(async|await|try|catch|throw|new|this)\b/g,
20
+ // Operators and punctuation
21
+ /[+\-*\/=<>!&|]+/g,
22
+ /[{}()\[\];,\.]/g,
23
+ // Common function names
24
+ /\b(console\.log|console\.error|console\.warn)\b/g,
25
+ /\b(Math\.(floor|ceil|round|max|min))\b/g,
26
+ ],
27
+
28
+ // Patterns that typically increase token count
29
+ HIGH_TOKEN_PATTERNS: [
30
+ // String literals (each character ~0.25 tokens)
31
+ /'[^']*'/g,
32
+ /"[^"]*"/g,
33
+ /`[^`]*`/g,
34
+ // Numbers (digits ~0.5 tokens each)
35
+ /\b\d+\.?\d*\b/g,
36
+ // Long identifiers (split into multiple tokens)
37
+ /\b[a-z][a-zA-Z0-9]{8,}\b/g,
38
+ ]
39
+ }
40
+
41
+ /**
42
+ * Count tokens with improved accuracy using position-based pattern matching
43
+ */
44
+ export function countTokens(text: string): number {
45
+ if (!text || text.length === 0) return 0
46
+
47
+ let tokenCount = 0
48
+ const processedPositions = new Set<number>() // Track positions to avoid double-counting
49
+
50
+ // Count single-token patterns with position tracking
51
+ for (const pattern of TOKEN_PATTERNS.SINGLE_TOKEN_PATTERNS) {
52
+ for (const match of text.matchAll(pattern)) {
53
+ const start = match.index!
54
+ const end = start + match[0].length
55
+
56
+ // Check if this range overlaps with already processed ranges
57
+ let overlaps = false
58
+ for (let i = start; i < end; i++) {
59
+ if (processedPositions.has(i)) {
60
+ overlaps = true
61
+ break
62
+ }
63
+ }
64
+
65
+ if (!overlaps) {
66
+ tokenCount += 1
67
+ // Mark positions as processed
68
+ for (let i = start; i < end; i++) {
69
+ processedPositions.add(i)
70
+ }
71
+ }
72
+ }
73
+ }
74
+
75
+ // Count high-token patterns (strings, numbers, long identifiers)
76
+ for (const pattern of TOKEN_PATTERNS.HIGH_TOKEN_PATTERNS) {
77
+ for (const match of text.matchAll(pattern)) {
78
+ const start = match.index!
79
+ const end = start + match[0].length
80
+
81
+ // Check for overlaps
82
+ let overlaps = false
83
+ for (let i = start; i < end; i++) {
84
+ if (processedPositions.has(i)) {
85
+ overlaps = true
86
+ break
87
+ }
88
+ }
89
+
90
+ if (!overlaps) {
91
+ let tokensToAdd = 0
92
+ if (match[0].startsWith('\'') || match[0].startsWith('"') || match[0].startsWith('`')) {
93
+ // String literal: roughly 1 token per 4 characters
94
+ tokensToAdd = Math.ceil(match[0].length / 4)
95
+ } else if (/^\d/.test(match[0])) {
96
+ // Number: roughly 1 token per 2 digits
97
+ tokensToAdd = Math.ceil(match[0].length / 2)
98
+ } else {
99
+ // Long identifier: roughly 1 token per 6 characters
100
+ tokensToAdd = Math.ceil(match[0].length / 6)
101
+ }
102
+
103
+ tokenCount += tokensToAdd
104
+ // Mark positions as processed
105
+ for (let i = start; i < end; i++) {
106
+ processedPositions.add(i)
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ // Count remaining characters (general text)
113
+ const remainingText = Array.from(text.split(''))
114
+ .map((char, index) => processedPositions.has(index) ? '' : char)
115
+ .join('')
116
+
117
+ if (remainingText.length > 0) {
118
+ // Use variable rate based on character density
119
+ const avgWordLength = remainingText.split(/\s+/).reduce((sum, word) => sum + word.length, 0) / Math.max(remainingText.split(/\s+/).length, 1)
120
+
121
+ let charsPerToken = CHARS_PER_TOKEN
122
+ if (avgWordLength < 4) {
123
+ charsPerToken = MIN_CHARS_PER_TOKEN // Dense code
124
+ } else if (avgWordLength > 8) {
125
+ charsPerToken = MAX_CHARS_PER_TOKEN // Sparse text
126
+ }
127
+
128
+ tokenCount += Math.ceil(remainingText.length / charsPerToken)
129
+ }
130
+
131
+ // Apply bounds checking for sanity
132
+ const minEstimate = Math.ceil(text.length / MAX_CHARS_PER_TOKEN)
133
+ const maxEstimate = Math.ceil(text.length / MIN_CHARS_PER_TOKEN)
134
+
135
+ return Math.max(minEstimate, Math.min(maxEstimate, tokenCount))
136
+ }
137
+
138
+ /**
139
+ * Fast token count for quick estimates (still more accurate than length/4)
140
+ */
141
+ export function countTokensFast(text: string): number {
142
+ if (!text || text.length === 0) return 0
143
+
144
+ // Quick heuristic based on character patterns
145
+ const codeDensity = (text.match(/[a-zA-Z0-9]/g) || []).length / text.length
146
+ const stringRatio = (text.match(/['"`]/g) || []).length / text.length
147
+
148
+ // Adjust chars per token based on content type
149
+ let charsPerToken = CHARS_PER_TOKEN
150
+ if (codeDensity > 0.7) {
151
+ charsPerToken = 3.2 // Dense code
152
+ } else if (stringRatio > 0.2) {
153
+ charsPerToken = 4.5 // String-heavy
154
+ } else if (codeDensity < 0.3) {
155
+ charsPerToken = 5.0 // Sparse text/comments
156
+ }
157
+
158
+ return Math.ceil(text.length / charsPerToken)
159
+ }
160
+
161
+ /**
162
+ * Estimate tokens for a file with content type awareness
163
+ */
164
+ export function estimateFileTokens(content: string, filePath: string): number {
165
+ const extension = filePath.split('.').pop()?.toLowerCase()
166
+
167
+ // Adjust counting based on file type
168
+ switch (extension) {
169
+ case 'json':
170
+ // JSON is token-heavy due to strings and structure
171
+ return countTokens(content) * 1.1
172
+ case 'md':
173
+ // Markdown has more natural language
174
+ return countTokens(content) * 0.9
175
+ case 'ts':
176
+ case 'tsx':
177
+ case 'js':
178
+ case 'jsx':
179
+ // Code files benefit from pattern recognition
180
+ return countTokens(content)
181
+ default:
182
+ // Use standard counting for unknown types
183
+ return countTokens(content)
184
+ }
185
+ }
186
+
187
+ /**
188
+ * Token budget manager with overflow protection
189
+ */
190
+ export class TokenBudget {
191
+ constructor(private maxTokens: number, private overflowAllowance: number = 0.1) {}
192
+
193
+ /**
194
+ * Check if content fits within budget
195
+ */
196
+ fits(content: string): boolean {
197
+ const tokens = countTokens(content)
198
+ return tokens <= this.maxTokens * (1 + this.overflowAllowance)
199
+ }
200
+
201
+ /**
202
+ * Get remaining token count
203
+ */
204
+ remaining(usedTokens: number): number {
205
+ return Math.max(0, this.maxTokens - usedTokens)
206
+ }
207
+
208
+ /**
209
+ * Truncate content to fit within budget
210
+ */
211
+ truncate(content: string, usedTokens: number = 0): string {
212
+ const available = this.remaining(usedTokens)
213
+ if (available <= 0) return ''
214
+
215
+ const estimatedTokens = countTokens(content)
216
+ if (estimatedTokens <= available) return content
217
+
218
+ // Rough truncation based on character ratio
219
+ const ratio = available / estimatedTokens
220
+ const truncateAt = Math.floor(content.length * ratio * 0.9) // 10% buffer
221
+
222
+ return content.substring(0, truncateAt) + '\n... [truncated due to token budget]'
223
+ }
224
+ }
package/src/types.ts CHANGED
@@ -24,6 +24,8 @@ export interface AIContext {
24
24
  selectedFunctions: number
25
25
  estimatedTokens: number
26
26
  keywords: string[]
27
+ reasons?: string[]
28
+ suggestions?: string[]
27
29
  }
28
30
  }
29
31
 
@@ -72,6 +74,18 @@ export interface ContextQuery {
72
74
  includeCallGraph?: boolean
73
75
  /** Include function bodies for top-scored functions (default true) */
74
76
  includeBodies?: boolean
77
+ /** Relevance mode: balanced (default) or strict (high-precision filtering) */
78
+ relevanceMode?: 'balanced' | 'strict'
79
+ /** Additional required terms (comma-separated in CLI) that must be respected */
80
+ requiredKeywords?: string[]
81
+ /** In strict mode, require all extracted/required keywords to match */
82
+ requireAllKeywords?: boolean
83
+ /** Minimum number of matched keywords required in strict mode (default 1) */
84
+ minKeywordMatches?: number
85
+ /** Hard gate in strict mode: final output keeps only strict keyword matches */
86
+ exactOnly?: boolean
87
+ /** In strict mode, return empty context if no exact matches are found */
88
+ failFast?: boolean
75
89
  /** Absolute filesystem path to the project root (needed for body reading) */
76
90
  projectRoot?: string
77
91
  }
@@ -81,4 +95,4 @@ export interface ContextProvider {
81
95
  name: string
82
96
  formatContext(context: AIContext): string
83
97
  maxTokens: number
84
- }
98
+ }
@@ -0,0 +1,159 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import { ContextBuilder } from '../src/context-builder.js'
3
+ import type { ContextQuery } from '../src/types.js'
4
+
5
+ function makeFixture() {
6
+ const contract = {
7
+ project: {
8
+ name: 'mikk',
9
+ language: 'typescript',
10
+ description: 'fixture',
11
+ },
12
+ declared: {
13
+ modules: [
14
+ { id: 'core-parser', name: 'Core Parser', description: '', paths: [], entryFunctions: [] },
15
+ { id: 'ui', name: 'UI', description: '', paths: [], entryFunctions: [] },
16
+ ],
17
+ constraints: [],
18
+ decisions: [],
19
+ },
20
+ } as any
21
+
22
+ const fnResolver = {
23
+ id: 'fn:parser:resolver',
24
+ name: 'resolveImports',
25
+ file: 'packages/core/src/parser/ts-resolver.ts',
26
+ moduleId: 'core-parser',
27
+ startLine: 1,
28
+ endLine: 10,
29
+ params: [],
30
+ returnType: 'void',
31
+ isAsync: false,
32
+ isExported: true,
33
+ purpose: 'resolve ts imports',
34
+ calls: ['fn:parser:helper'],
35
+ calledBy: [],
36
+ edgeCasesHandled: [],
37
+ errorHandling: [],
38
+ }
39
+
40
+ const fnHelper = {
41
+ id: 'fn:parser:helper',
42
+ name: 'normalizeTsPath',
43
+ file: 'packages/core/src/parser/path.ts',
44
+ moduleId: 'core-parser',
45
+ startLine: 1,
46
+ endLine: 8,
47
+ params: [],
48
+ returnType: 'string',
49
+ isAsync: false,
50
+ isExported: false,
51
+ purpose: 'normalize ts path',
52
+ calls: [],
53
+ calledBy: ['fn:parser:resolver'],
54
+ edgeCasesHandled: [],
55
+ errorHandling: [],
56
+ }
57
+
58
+ const fnUnrelated = {
59
+ id: 'fn:ui:render',
60
+ name: 'renderHeader',
61
+ file: 'apps/web/components/header.tsx',
62
+ moduleId: 'ui',
63
+ startLine: 1,
64
+ endLine: 8,
65
+ params: [],
66
+ returnType: 'void',
67
+ isAsync: false,
68
+ isExported: true,
69
+ purpose: 'render ui header',
70
+ calls: [],
71
+ calledBy: [],
72
+ edgeCasesHandled: [],
73
+ errorHandling: [],
74
+ }
75
+
76
+ const lock = {
77
+ functions: {
78
+ [fnResolver.id]: fnResolver,
79
+ [fnHelper.id]: fnHelper,
80
+ [fnUnrelated.id]: fnUnrelated,
81
+ },
82
+ files: {
83
+ [fnResolver.file]: { path: fnResolver.file, moduleId: fnResolver.moduleId, imports: [] },
84
+ [fnHelper.file]: { path: fnHelper.file, moduleId: fnHelper.moduleId, imports: [] },
85
+ [fnUnrelated.file]: { path: fnUnrelated.file, moduleId: fnUnrelated.moduleId, imports: [] },
86
+ },
87
+ routes: [],
88
+ contextFiles: [],
89
+ } as any
90
+
91
+ return { contract, lock }
92
+ }
93
+
94
+ function namesFrom(query: ContextQuery): string[] {
95
+ const { contract, lock } = makeFixture()
96
+ const builder = new ContextBuilder(contract, lock)
97
+ const ctx = builder.build(query)
98
+ return ctx.modules.flatMap(m => m.functions.map(f => f.name))
99
+ }
100
+
101
+ describe('ContextBuilder strict relevance mode', () => {
102
+ test('strict mode filters unrelated entry-point noise', () => {
103
+ const balanced = namesFrom({
104
+ task: 'fix ts resolver imports',
105
+ tokenBudget: 1200,
106
+ includeBodies: false,
107
+ includeCallGraph: false,
108
+ relevanceMode: 'balanced',
109
+ })
110
+ const strict = namesFrom({
111
+ task: 'fix ts resolver imports',
112
+ tokenBudget: 1200,
113
+ includeBodies: false,
114
+ includeCallGraph: false,
115
+ relevanceMode: 'strict',
116
+ minKeywordMatches: 1,
117
+ })
118
+
119
+ expect(balanced).toContain('renderHeader')
120
+ expect(strict).not.toContain('renderHeader')
121
+ expect(strict).toContain('resolveImports')
122
+ })
123
+
124
+ test('requiredKeywords enforces exact focus in strict mode', () => {
125
+ const strict = namesFrom({
126
+ task: 'resolver imports',
127
+ tokenBudget: 1200,
128
+ includeBodies: false,
129
+ includeCallGraph: false,
130
+ relevanceMode: 'strict',
131
+ requiredKeywords: ['ts'],
132
+ minKeywordMatches: 1,
133
+ })
134
+
135
+ expect(strict).toContain('resolveImports')
136
+ expect(strict).toContain('normalizeTsPath')
137
+ expect(strict).not.toContain('renderHeader')
138
+ })
139
+
140
+ test('failFast returns empty context when exact match is impossible', () => {
141
+ const { contract, lock } = makeFixture()
142
+ const builder = new ContextBuilder(contract, lock)
143
+ const ctx = builder.build({
144
+ task: 'resolver imports',
145
+ tokenBudget: 1200,
146
+ includeBodies: false,
147
+ includeCallGraph: false,
148
+ relevanceMode: 'strict',
149
+ requiredKeywords: ['nonexistent'],
150
+ exactOnly: true,
151
+ failFast: true,
152
+ })
153
+
154
+ expect(ctx.modules.length).toBe(0)
155
+ expect(ctx.meta.selectedFunctions).toBe(0)
156
+ expect((ctx.meta.reasons?.length ?? 0) > 0).toBe(true)
157
+ expect((ctx.meta.suggestions?.length ?? 0) > 0).toBe(true)
158
+ })
159
+ })