npm - @getmikk/ai-context - Versions diffs - 1.8.0 → 2.0.0 - Mend

@getmikk/ai-context 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +3 -3
package/src/claude-md-generator.ts +13 -14
package/src/context-builder.ts +206 -24
package/src/providers.ts +42 -1
package/src/token-counter.ts +157 -0
package/src/types.ts +15 -1
package/tests/context-builder.test.ts +159 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@getmikk/ai-context",
-    "version": "1.8.0",
+    "version": "2.0.0",
     "license": "Apache-2.0",
     "repository": {
         "type": "git",
@@ -21,8 +21,8 @@
         "dev": "tsc --watch"
     },
     "dependencies": {
-        "@getmikk/core": "^1.8.0",
-        "@getmikk/intent-engine": "^1.8.0"
+        "@getmikk/core": "^2.0.0",
+        "@getmikk/intent-engine": "^2.0.0"
     },
     "devDependencies": {
         "typescript": "^5.7.0",

package/src/claude-md-generator.ts CHANGED Viewed

@@ -1,15 +1,11 @@
 import type { MikkContract, MikkLock, MikkLockFunction } from '@getmikk/core'
 import * as fs from 'node:fs'
 import * as path from 'node:path'
+import { countTokens, estimateFileTokens } from './token-counter.js'
 /** Default token budget for claude.md — generous but still bounded */
 const DEFAULT_TOKEN_BUDGET = 12000
-/** Rough token estimation: ~4 chars per token */
-function estimateTokens(text: string): number {
-    return Math.ceil(text.length / 4)
-}
 /** Metadata from package.json that enriches the AI context */
 export interface ProjectMeta {
     description?: string
@@ -49,20 +45,20 @@ export class ClaudeMdGenerator {
         // --- Tier 1: Summary (always included) ----------------------
         const summary = this.generateSummary()
         sections.push(summary)
-        usedTokens += estimateTokens(summary)
+        usedTokens += countTokens(summary)
         // --- Tech stack & conventions (always included if detectable) ---
         const techSection = this.generateTechStackSection()
         if (techSection) {
             sections.push(techSection)
-            usedTokens += estimateTokens(techSection)
+            usedTokens += countTokens(techSection)
         }
         // --- Build / test / run commands -----------------------------
         const commandsSection = this.generateCommandsSection()
         if (commandsSection) {
             sections.push(commandsSection)
-            usedTokens += estimateTokens(commandsSection)
+            usedTokens += countTokens(commandsSection)
         }
         // --- Tier 2: Module details (if budget allows) --------------
@@ -76,7 +72,7 @@ export class ClaudeMdGenerator {
         for (const module of modules) {
             const moduleSection = this.generateModuleSection(module.id)
-            const tokens = estimateTokens(moduleSection)
+            const tokens = countTokens(moduleSection)
             if (usedTokens + tokens > this.tokenBudget) {
                 sections.push('\n  <!-- Full details truncated due to context budget -->\n')
                 break
@@ -90,7 +86,7 @@ export class ClaudeMdGenerator {
         // --- Context files: schemas, data models, config ---------
         const contextSection = this.generateContextFilesSection()
         if (contextSection) {
-            const ctxTokens = estimateTokens(contextSection)
+            const ctxTokens = countTokens(contextSection)
             if (usedTokens + ctxTokens <= this.tokenBudget) {
                 sections.push(contextSection)
                 usedTokens += ctxTokens
@@ -100,7 +96,7 @@ export class ClaudeMdGenerator {
         // --- File import graph per module ----------------------------
         const importSection = this.generateImportGraphSection()
         if (importSection) {
-            const impTokens = estimateTokens(importSection)
+            const impTokens = countTokens(importSection)
             if (usedTokens + impTokens <= this.tokenBudget) {
                 sections.push(importSection)
                 usedTokens += impTokens
@@ -110,7 +106,7 @@ export class ClaudeMdGenerator {
         // --- HTTP Routes (Express + Next.js) -------------------------
         const routesSection = this.generateRoutesSection()
         if (routesSection) {
-            const routeTokens = estimateTokens(routesSection)
+            const routeTokens = countTokens(routesSection)
             if (usedTokens + routeTokens <= this.tokenBudget) {
                 sections.push(routesSection)
                 usedTokens += routeTokens
@@ -119,14 +115,14 @@ export class ClaudeMdGenerator {
         // --- Tier 3: Constraints & decisions ------------------------
         const constraintsSection = this.generateConstraintsSection()
-        const constraintTokens = estimateTokens(constraintsSection)
+        const constraintTokens = countTokens(constraintsSection)
         if (usedTokens + constraintTokens <= this.tokenBudget) {
             sections.push(constraintsSection)
             usedTokens += constraintTokens
         }
         const decisionsSection = this.generateDecisionsSection()
-        const decisionTokens = estimateTokens(decisionsSection)
+        const decisionTokens = countTokens(decisionsSection)
         if (usedTokens + decisionTokens <= this.tokenBudget) {
             sections.push(decisionsSection)
             usedTokens += decisionTokens
@@ -225,7 +221,10 @@ export class ClaudeMdGenerator {
         }
         // Key functions: top 5 by calledBy count (most depended upon)
+        // Exclude functions already in entry points to avoid duplicates
+        const entryPointIds = new Set(entryPoints.map(fn => fn.id))
         const keyFunctions = [...moduleFunctions]
+            .filter(fn => !entryPointIds.has(fn.id)) // Exclude duplicates
             .sort((a, b) => b.calledBy.length - a.calledBy.length)
             .filter(fn => fn.calledBy.length > 0)
             .slice(0, 5)

package/src/context-builder.ts CHANGED Viewed

@@ -107,31 +107,92 @@ const STOP_WORDS = new Set([
     'want', 'like', 'just', 'also', 'some', 'all', 'any', 'my', 'your',
 ])
-function extractKeywords(task: string): string[] {
-    return task
+const SHORT_TECH_WORDS = new Set([
+    'ai', 'ml', 'ui', 'ux', 'ts', 'js', 'db', 'io', 'id', 'ip',
+    'ci', 'cd', 'qa', 'api', 'mcp', 'jwt', 'sql',
+])
+function normalizeKeyword(value: string): string {
+    return value.toLowerCase().trim().replace(/[^a-z0-9_-]/g, '')
+}
+function extractKeywords(task: string, requiredKeywords: string[] = []): string[] {
+    const out: string[] = []
+    const seen = new Set<string>()
+    for (const match of task.matchAll(/"([^"]+)"|'([^']+)'/g)) {
+        const phrase = (match[1] ?? match[2] ?? '').toLowerCase().trim()
+        if (!phrase || seen.has(phrase)) continue
+        seen.add(phrase)
+        out.push(phrase)
+    }
+    const words = task
         .toLowerCase()
         .replace(/[^a-z0-9\s_-]/g, ' ')
         .split(/\s+/)
-        .filter(w => w.length > 2 && !STOP_WORDS.has(w))
+        .map(normalizeKeyword)
+        .filter(w => {
+            if (!w || STOP_WORDS.has(w)) return false
+            if (w.length > 2) return true
+            return SHORT_TECH_WORDS.has(w)
+        })
+    for (const w of words) {
+        if (seen.has(w)) continue
+        seen.add(w)
+        out.push(w)
+    }
+    const expandedRequired = requiredKeywords
+        .flatMap(item => item.split(/[,\s]+/))
+        .map(normalizeKeyword)
+        .filter(Boolean)
+    for (const kw of expandedRequired) {
+        if (seen.has(kw)) continue
+        seen.add(kw)
+        out.push(kw)
+    }
+    return out
 }
 /**
  * Keyword score for a function: exact match > partial match
  */
-function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
-    if (keywords.length === 0) return 0
+function keywordScore(
+    fn: MikkLockFunction,
+    keywords: string[]
+): { score: number; matchedKeywords: string[] } {
+    if (keywords.length === 0) return { score: 0, matchedKeywords: [] }
     const nameLower = fn.name.toLowerCase()
     const fileLower = fn.file.toLowerCase()
+    const fileNoExt = fileLower.replace(/\.(d\.ts|ts|tsx|js|jsx|mjs|cjs|mts|cts)\b/g, ' ')
+    const purposeLower = (fn.purpose ?? '').toLowerCase()
+    const tokenSet = new Set<string>([
+        ...(nameLower.match(/[a-z0-9]+/g) ?? []),
+        ...(fileNoExt.match(/[a-z0-9]+/g) ?? []),
+        ...(purposeLower.match(/[a-z0-9]+/g) ?? []),
+    ])
     let score = 0
+    const matched: string[] = []
     for (const kw of keywords) {
-        if (nameLower === kw) {
+        const shortKw = kw.length <= 2
+        const exactName = nameLower === kw
+        const partial = shortKw
+            ? tokenSet.has(kw)
+            : (nameLower.includes(kw) || fileLower.includes(kw) || purposeLower.includes(kw))
+        if (exactName) {
             score = Math.max(score, WEIGHT.KEYWORD_EXACT)
-        } else if (nameLower.includes(kw) || fileLower.includes(kw)) {
+            matched.push(kw)
+        } else if (partial) {
             score = Math.max(score, WEIGHT.KEYWORD_PARTIAL)
+            matched.push(kw)
         }
     }
-    return score
+    return { score, matchedKeywords: matched }
 }
 // ---------------------------------------------------------------------------
@@ -145,8 +206,10 @@ function keywordScore(fn: MikkLockFunction, keywords: string[]): number {
 function resolveSeeds(
     query: ContextQuery,
     contract: MikkContract,
-    lock: MikkLock
+    lock: MikkLock,
+    keywords: string[]
 ): string[] {
+    const strictMode = query.relevanceMode === 'strict'
     const seeds = new Set<string>()
     // 1. Explicit focus files → all functions in those files
@@ -171,16 +234,15 @@ function resolveSeeds(
     // 3. Keyword match against function names and file paths
     if (seeds.size === 0) {
-        const keywords = extractKeywords(query.task)
         for (const fn of Object.values(lock.functions)) {
-            if (keywordScore(fn, keywords) >= WEIGHT.KEYWORD_PARTIAL) {
+            if (keywordScore(fn, keywords).score >= WEIGHT.KEYWORD_PARTIAL) {
                 seeds.add(fn.id)
             }
         }
     }
     // 4. Module name match against task
-    if (seeds.size === 0) {
+    if (!strictMode && seeds.size === 0) {
         const taskLower = query.task.toLowerCase()
         for (const mod of contract.declared.modules) {
             if (
@@ -219,11 +281,22 @@ export class ContextBuilder {
      * 6. Group survivors by module, emit structured context
      */
     build(query: ContextQuery): AIContext {
+        const relevanceMode = query.relevanceMode ?? 'balanced'
+        const strictMode = relevanceMode === 'strict'
         const tokenBudget = query.tokenBudget ?? DEFAULT_TOKEN_BUDGET
         const maxHops = query.maxHops ?? 4
+        const requiredKeywords = query.requiredKeywords ?? []
+        const keywords = extractKeywords(query.task, requiredKeywords)
+        const requiredKeywordSet = new Set(
+            requiredKeywords
+                .flatMap(item => item.split(/[,\s]+/))
+                .map(normalizeKeyword)
+                .filter(Boolean)
+        )
         // ── Step 1: Resolve seeds ──────────────────────────────────────────
-        const seeds = resolveSeeds(query, this.contract, this.lock)
+        const seeds = resolveSeeds(query, this.contract, this.lock, keywords)
+        const seedSet = new Set(seeds)
         // ── Step 2: BFS proximity scores ──────────────────────────────────
         const proximityMap = seeds.length > 0
@@ -231,8 +304,15 @@ export class ContextBuilder {
             : new Map<string, number>()
         // ── Step 3: Score every function ──────────────────────────────────
-        const keywords = extractKeywords(query.task)
         const allFunctions = Object.values(this.lock.functions)
+        const focusFiles = query.focusFiles ?? []
+        const focusModules = new Set(query.focusModules ?? [])
+        const requireAllKeywords = query.requireAllKeywords ?? false
+        const minKeywordMatches = query.minKeywordMatches ?? 1
+        const strictPassIds = new Set<string>()
+        const reasons: string[] = []
+        const suggestions: string[] = []
+        const nearMissSuggestions: string[] = []
         const scored: { fn: MikkLockFunction; score: number }[] = allFunctions.map(fn => {
             let score = 0
@@ -244,33 +324,128 @@ export class ContextBuilder {
             }
             // Keyword match
-            score += keywordScore(fn, keywords)
+            const kwInfo = keywordScore(fn, keywords)
+            score += kwInfo.score
+            const matchedSet = new Set(kwInfo.matchedKeywords)
+            const inFocusFile = focusFiles.some(filePath => fn.file.includes(filePath) || filePath.includes(fn.file))
+            const inFocusModule = focusModules.has(fn.moduleId)
+            const inFocus = inFocusFile || inFocusModule
+            const requiredPass = requiredKeywordSet.size === 0
+                ? true
+                : [...requiredKeywordSet].every(kw => matchedSet.has(kw))
+            const generalPass = requireAllKeywords
+                ? (keywords.length > 0 && matchedSet.size >= keywords.length)
+                : (keywords.length === 0 ? false : matchedSet.size >= minKeywordMatches)
+            const keywordPass = requiredPass && generalPass
+            if (keywordPass) strictPassIds.add(fn.id)
+            if (strictMode) {
+                const isSeed = seedSet.has(fn.id)
+                const seedFromFocus = isSeed && (inFocus || focusFiles.length > 0 || focusModules.size > 0)
+                if (!(inFocus || keywordPass || seedFromFocus)) {
+                    if (kwInfo.score > 0) {
+                        nearMissSuggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
+                    }
+                    return { fn, score: -1 }
+                }
+            }
             // Entry-point bonus
-            if (fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
+            if (!strictMode && fn.calledBy.length === 0) score += WEIGHT.ENTRY_POINT
             return { fn, score }
         })
         // ── Step 4: Sort by score descending ──────────────────────────────
         scored.sort((a, b) => b.score - a.score)
+        for (const { fn, score } of scored) {
+            if (score <= 0) continue
+            suggestions.push(`${fn.name} (${fn.file}:${fn.startLine})`)
+            if (suggestions.length >= 5) break
+        }
+        for (const s of nearMissSuggestions) {
+            if (suggestions.includes(s)) continue
+            suggestions.push(s)
+            if (suggestions.length >= 5) break
+        }
         // ── Step 5: Fill token budget ──────────────────────────────────────
-        const selected: MikkLockFunction[] = []
+        let selected: MikkLockFunction[] = []
+        // Pre-calculate baseline overhead (context files, routes, constraints)
         let usedTokens = 0
+        const routesStr = (!strictMode && this.lock.routes) ? JSON.stringify(this.lock.routes) : ''
+        const ctxStr = (!strictMode && this.lock.contextFiles)
+            ? this.lock.contextFiles.map(cf => readContextFile(cf.path, query.projectRoot).slice(0, 2000)).join('\n')
+            : ''
+        usedTokens += estimateTokens(routesStr + ctxStr + JSON.stringify(this.contract.declared.constraints))
         for (const { fn, score } of scored) {
             if (score <= 0 && seeds.length > 0) break // Nothing relevant left
             if (selected.length >= (query.maxFunctions ?? 80)) break
             const snippet = this.buildFunctionSnippet(fn, query)
-            const tokens = estimateTokens(snippet)
+            // Multiply tokens by 2.2 to account for it being in both JSON and text prompt, plus JSON framing
+            const tokens = estimateTokens(snippet) * 2.2
-            if (usedTokens + tokens > tokenBudget) continue  // skip, try smaller ones later
+            if (usedTokens + tokens > tokenBudget && selected.length > 0) continue  // skip, try smaller ones later
             selected.push(fn)
             usedTokens += tokens
         }
+        if (strictMode) {
+            if (requiredKeywordSet.size > 0) {
+                reasons.push(`required terms: ${[...requiredKeywordSet].join(', ')}`)
+            }
+            if (strictPassIds.size === 0) {
+                reasons.push('no functions matched strict keyword filters')
+            }
+        }
+        if (strictMode && query.exactOnly) {
+            selected = selected.filter(fn => strictPassIds.has(fn.id))
+            usedTokens = selected.reduce(
+                (sum, fn) => sum + estimateTokens(this.buildFunctionSnippet(fn, query)),
+                0
+            )
+            if (selected.length === 0 && strictPassIds.size > 0) {
+                reasons.push('exact matches exist but did not fit token budget or max function limit')
+            }
+        }
+        if (strictMode && query.failFast && selected.length === 0) {
+            reasons.push('fail-fast enabled: returning no context when exact match set is empty')
+            return {
+                project: {
+                    name: this.contract.project.name,
+                    language: this.contract.project.language,
+                    description: this.contract.project.description,
+                    moduleCount: this.contract.declared.modules.length,
+                    functionCount: Object.keys(this.lock.functions).length,
+                },
+                modules: [],
+                constraints: this.contract.declared.constraints,
+                decisions: this.contract.declared.decisions.map(d => ({
+                    title: d.title,
+                    reason: d.reason,
+                })),
+                contextFiles: [],
+                routes: [],
+                prompt: '',
+                meta: {
+                    seedCount: seeds.length,
+                    totalFunctionsConsidered: allFunctions.length,
+                    selectedFunctions: 0,
+                    estimatedTokens: 0,
+                    keywords,
+                    reasons,
+                    suggestions: suggestions.length > 0 ? suggestions : undefined,
+                },
+            }
+        }
         // ── Step 6: Group by module ────────────────────────────────────────
         const byModule = new Map<string, MikkLockFunction[]>()
         for (const fn of selected) {
@@ -298,6 +473,10 @@ export class ContextBuilder {
         // Sort modules: ones with more selected functions first
         contextModules.sort((a, b) => b.functions.length - a.functions.length)
+        // Strict mode favors precision and token efficiency: keep only function graph context.
+        const contextFiles = strictMode ? [] : this.lock.contextFiles
+        const routes = strictMode ? [] : this.lock.routes
         return {
             project: {
                 name: this.contract.project.name,
@@ -312,12 +491,12 @@ export class ContextBuilder {
                 title: d.title,
                 reason: d.reason,
             })),
-            contextFiles: this.lock.contextFiles?.map(cf => ({
+            contextFiles: contextFiles?.map(cf => ({
                 path: cf.path,
                 content: readContextFile(cf.path, query.projectRoot),
                 type: cf.type,
             })),
-            routes: this.lock.routes?.map(r => ({
+            routes: routes?.map(r => ({
                 method: r.method,
                 path: r.path,
                 handler: r.handler,
@@ -332,6 +511,8 @@ export class ContextBuilder {
                 selectedFunctions: selected.length,
                 estimatedTokens: usedTokens,
                 keywords,
+                reasons: reasons.length > 0 ? reasons : undefined,
+                suggestions: (selected.length === 0 && suggestions.length > 0) ? suggestions : undefined,
             },
         }
     }
@@ -414,6 +595,7 @@ export class ContextBuilder {
     /** Generate the natural-language prompt section */
     private generatePrompt(query: ContextQuery, modules: ContextModule[]): string {
         const lines: string[] = []
+        const strictMode = query.relevanceMode === 'strict'
         lines.push('=== ARCHITECTURAL CONTEXT ===')
         lines.push(`Project: ${this.contract.project.name} (${this.contract.project.language})`)
@@ -425,7 +607,7 @@ export class ContextBuilder {
         // Include routes (API endpoints) — critical for understanding how the app works
         const routes = this.lock.routes
-        if (routes && routes.length > 0) {
+        if (!strictMode && routes && routes.length > 0) {
             lines.push('=== HTTP ROUTES ===')
             for (const r of routes) {
                 const mw = r.middlewares.length > 0 ? ` [${r.middlewares.join(', ')}]` : ''
@@ -436,7 +618,7 @@ export class ContextBuilder {
         // Include context files (schemas, data models) first — they define the shape
         const ctxFiles = this.lock.contextFiles
-        if (ctxFiles && ctxFiles.length > 0) {
+        if (!strictMode && ctxFiles && ctxFiles.length > 0) {
             lines.push('=== DATA MODELS & SCHEMAS ===')
             for (const cf of ctxFiles) {
                 lines.push(`--- ${cf.path} (${cf.type}) ---`)
@@ -746,4 +928,4 @@ function dedent(lines: string[]): string[] {
         const spaces = l.length - l.trimStart().length
         return l.substring(Math.min(min, spaces))
     })
-}
+}

package/src/providers.ts CHANGED Viewed

@@ -28,6 +28,16 @@ export class ClaudeProvider implements ContextProvider {
         lines.push(`  <seeds_found>${context.meta?.seedCount ?? 0}</seeds_found>`)
         lines.push(`  <functions_selected>${context.meta?.selectedFunctions ?? 0} of ${context.meta?.totalFunctionsConsidered ?? 0}</functions_selected>`)
         lines.push(`  <estimated_tokens>${context.meta?.estimatedTokens ?? 0}</estimated_tokens>`)
+        if (context.meta?.reasons && context.meta.reasons.length > 0) {
+            for (const reason of context.meta.reasons) {
+                lines.push(`  <reason>${esc(reason)}</reason>`)
+            }
+        }
+        if (context.meta?.suggestions && context.meta.suggestions.length > 0) {
+            for (const s of context.meta.suggestions) {
+                lines.push(`  <suggestion>${esc(s)}</suggestion>`)
+            }
+        }
         lines.push('</context_meta>')
         lines.push('')
@@ -79,6 +89,23 @@ export class ClaudeProvider implements ContextProvider {
             lines.push('')
         }
+        if (context.modules.length === 0 && context.meta?.reasons?.length) {
+            lines.push('<no_match_reason>')
+            for (const reason of context.meta.reasons) {
+                lines.push(`  <item>${esc(reason)}</item>`)
+            }
+            lines.push('</no_match_reason>')
+            lines.push('')
+            if (context.meta.suggestions && context.meta.suggestions.length > 0) {
+                lines.push('<did_you_mean>')
+                for (const suggestion of context.meta.suggestions) {
+                    lines.push(`  <item>${esc(suggestion)}</item>`)
+                }
+                lines.push('</did_you_mean>')
+                lines.push('')
+            }
+        }
         // ── Context files (schemas, data models, config) ───────────────────
         if (context.contextFiles && context.contextFiles.length > 0) {
             lines.push('<context_files>')
@@ -160,6 +187,20 @@ export class CompactProvider implements ContextProvider {
             `Task keywords: ${context.meta?.keywords?.join(', ') ?? ''}`,
             '',
         ]
+        if (context.modules.length === 0 && context.meta?.reasons?.length) {
+            lines.push('No exact context selected:')
+            for (const reason of context.meta.reasons) {
+                lines.push(`- ${reason}`)
+            }
+            if (context.meta.suggestions && context.meta.suggestions.length > 0) {
+                lines.push('')
+                lines.push('Did you mean:')
+                for (const suggestion of context.meta.suggestions) {
+                    lines.push(`- ${suggestion}`)
+                }
+            }
+            lines.push('')
+        }
         for (const mod of context.modules) {
             lines.push(`## ${mod.name}`)
             for (const fn of mod.functions) {
@@ -218,4 +259,4 @@ function esc(s: string): string {
         .replace(/</g, '&lt;')
         .replace(/>/g, '&gt;')
         .replace(/"/g, '&quot;')
-}
+}

package/src/token-counter.ts ADDED Viewed

@@ -0,0 +1,157 @@
+/**
+ * Token Counter — accurate, fast token estimation for context budget management.
+ *
+ * Design:
+ *  - `countTokens(text)` — accurate, linear-scan, O(n)
+ *  - `countTokensFast(text)` — single-pass heuristic, O(n) for hot paths
+ *  - `estimateFileTokens(content, path)` — file-type-aware wrapper
+ *  - `TokenBudget` — budget manager with truncation
+ *
+ * The previous implementation used a character-position Set to track processed
+ * ranges across multiple regex scans — O(n²) per call on large files.
+ * Replaced with a single linear scan that categorises characters without
+ * per-character Set lookups.
+ */
+const CHARS_PER_TOKEN     = 3.8  // GPT-4 average
+const MIN_CHARS_PER_TOKEN = 2.0  // Dense code
+const MAX_CHARS_PER_TOKEN = 6.0  // Sparse natural language
+/**
+ * Count tokens with reasonable accuracy — O(n) single linear scan.
+ *
+ * Classifies runs of characters into:
+ *   - whitespace: free (separators, not tokens)
+ *   - string literals: ~4 chars/token
+ *   - digit runs: ~2 chars/token (numbers tokenise finely)
+ *   - identifiers/keywords: short → 1 token, long → ~3.5 chars/token
+ *   - operators/punctuation: 1 char = 1 token
+ */
+export function countTokens(text: string): number {
+    if (!text) return 0
+    let tokens = 0
+    let i = 0
+    const n = text.length
+    while (i < n) {
+        const ch = text[i]
+        // Whitespace — boundary only, no token cost
+        if (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
+            i++
+            continue
+        }
+        // String literals — scan to closing quote
+        if (ch === '"' || ch === "'" || ch === '`') {
+            const q = ch
+            let len = 1
+            i++
+            while (i < n) {
+                if (text[i] === '\\') { i += 2; len += 2; continue }
+                if (text[i] === q) { i++; len++; break }
+                i++; len++
+            }
+            tokens += Math.max(1, Math.ceil(len / 4))
+            continue
+        }
+        // Digit runs — token-heavy
+        if (ch >= '0' && ch <= '9') {
+            let len = 0
+            while (i < n && ((text[i] >= '0' && text[i] <= '9') || text[i] === '.')) {
+                i++; len++
+            }
+            tokens += Math.max(1, Math.ceil(len / 2))
+            continue
+        }
+        // Identifier / keyword runs
+        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_' || ch === '$') {
+            let len = 0
+            while (
+                i < n &&
+                ((text[i] >= 'a' && text[i] <= 'z') || (text[i] >= 'A' && text[i] <= 'Z') ||
+                 (text[i] >= '0' && text[i] <= '9') || text[i] === '_' || text[i] === '$')
+            ) { i++; len++ }
+            tokens += len <= 6 ? 1 : Math.ceil(len / 3.5)
+            continue
+        }
+        // Operators, punctuation, brackets — 1 char per token
+        tokens++
+        i++
+    }
+    const minEstimate = Math.ceil(text.length / MAX_CHARS_PER_TOKEN)
+    const maxEstimate = Math.ceil(text.length / MIN_CHARS_PER_TOKEN)
+    return Math.max(minEstimate, Math.min(maxEstimate, tokens))
+}
+/**
+ * Fast O(n) single-pass heuristic for hot paths (context builder scoring loops).
+ */
+export function countTokensFast(text: string): number {
+    if (!text) return 0
+    let alphaNum = 0, punct = 0
+    for (let i = 0; i < text.length; i++) {
+        const c = text.charCodeAt(i)
+        if ((c >= 65 && c <= 90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57)) {
+            alphaNum++
+        } else if (c !== 32 && c !== 9 && c !== 10 && c !== 13) {
+            punct++
+        }
+    }
+    const nonWs = alphaNum + punct
+    if (nonWs === 0) return 0
+    const punctRatio = nonWs > 0 ? punct / nonWs : 0
+    const charsPerToken = punctRatio > 0.3 ? 2.8 : CHARS_PER_TOKEN
+    return Math.max(1, Math.ceil(text.length / charsPerToken))
+}
+/**
+ * Estimate tokens for a file with content-type awareness.
+ */
+export function estimateFileTokens(content: string, filePath: string): number {
+    const ext = filePath.split('.').pop()?.toLowerCase()
+    if (ext === 'md') return Math.ceil(countTokens(content) * 0.9)
+    return countTokens(content)
+}
+/**
+ * Token budget manager — tracks usage and truncates content to fit.
+ */
+export class TokenBudget {
+    private used = 0
+    constructor(
+        private readonly maxTokens: number,
+        private readonly overflowAllowance: number = 0.1,
+    ) {}
+    get remaining(): number {
+        return Math.max(0, this.maxTokens - this.used)
+    }
+    fits(content: string): boolean {
+        return countTokensFast(content) <= this.remaining * (1 + this.overflowAllowance)
+    }
+    consume(tokens: number): boolean {
+        this.used += tokens
+        return this.used <= this.maxTokens * (1 + this.overflowAllowance)
+    }
+    truncate(content: string): string {
+        if (this.remaining <= 0) return ''
+        const estimated = countTokensFast(content)
+        if (estimated <= this.remaining) return content
+        const ratio = this.remaining / estimated
+        const cutAt = Math.floor(content.length * ratio * 0.9)
+        return content.slice(0, cutAt) + '\n… [truncated — token budget reached]'
+    }
+}

package/src/types.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export interface AIContext {
         selectedFunctions: number
         estimatedTokens: number
         keywords: string[]
+        reasons?: string[]
+        suggestions?: string[]
     }
 }
@@ -72,6 +74,18 @@ export interface ContextQuery {
     includeCallGraph?: boolean
     /** Include function bodies for top-scored functions (default true) */
     includeBodies?: boolean
+    /** Relevance mode: balanced (default) or strict (high-precision filtering) */
+    relevanceMode?: 'balanced' | 'strict'
+    /** Additional required terms (comma-separated in CLI) that must be respected */
+    requiredKeywords?: string[]
+    /** In strict mode, require all extracted/required keywords to match */
+    requireAllKeywords?: boolean
+    /** Minimum number of matched keywords required in strict mode (default 1) */
+    minKeywordMatches?: number
+    /** Hard gate in strict mode: final output keeps only strict keyword matches */
+    exactOnly?: boolean
+    /** In strict mode, return empty context if no exact matches are found */
+    failFast?: boolean
     /** Absolute filesystem path to the project root (needed for body reading) */
     projectRoot?: string
 }
@@ -81,4 +95,4 @@ export interface ContextProvider {
     name: string
     formatContext(context: AIContext): string
     maxTokens: number
-}
+}

package/tests/context-builder.test.ts ADDED Viewed

@@ -0,0 +1,159 @@
+import { describe, expect, test } from 'bun:test'
+import { ContextBuilder } from '../src/context-builder.js'
+import type { ContextQuery } from '../src/types.js'
+function makeFixture() {
+    const contract = {
+        project: {
+            name: 'mikk',
+            language: 'typescript',
+            description: 'fixture',
+        },
+        declared: {
+            modules: [
+                { id: 'core-parser', name: 'Core Parser', description: '', paths: [], entryFunctions: [] },
+                { id: 'ui', name: 'UI', description: '', paths: [], entryFunctions: [] },
+            ],
+            constraints: [],
+            decisions: [],
+        },
+    } as any
+    const fnResolver = {
+        id: 'fn:parser:resolver',
+        name: 'resolveImports',
+        file: 'packages/core/src/parser/ts-resolver.ts',
+        moduleId: 'core-parser',
+        startLine: 1,
+        endLine: 10,
+        params: [],
+        returnType: 'void',
+        isAsync: false,
+        isExported: true,
+        purpose: 'resolve ts imports',
+        calls: ['fn:parser:helper'],
+        calledBy: [],
+        edgeCasesHandled: [],
+        errorHandling: [],
+    }
+    const fnHelper = {
+        id: 'fn:parser:helper',
+        name: 'normalizeTsPath',
+        file: 'packages/core/src/parser/path.ts',
+        moduleId: 'core-parser',
+        startLine: 1,
+        endLine: 8,
+        params: [],
+        returnType: 'string',
+        isAsync: false,
+        isExported: false,
+        purpose: 'normalize ts path',
+        calls: [],
+        calledBy: ['fn:parser:resolver'],
+        edgeCasesHandled: [],
+        errorHandling: [],
+    }
+    const fnUnrelated = {
+        id: 'fn:ui:render',
+        name: 'renderHeader',
+        file: 'apps/web/components/header.tsx',
+        moduleId: 'ui',
+        startLine: 1,
+        endLine: 8,
+        params: [],
+        returnType: 'void',
+        isAsync: false,
+        isExported: true,
+        purpose: 'render ui header',
+        calls: [],
+        calledBy: [],
+        edgeCasesHandled: [],
+        errorHandling: [],
+    }
+    const lock = {
+        functions: {
+            [fnResolver.id]: fnResolver,
+            [fnHelper.id]: fnHelper,
+            [fnUnrelated.id]: fnUnrelated,
+        },
+        files: {
+            [fnResolver.file]: { path: fnResolver.file, moduleId: fnResolver.moduleId, imports: [] },
+            [fnHelper.file]: { path: fnHelper.file, moduleId: fnHelper.moduleId, imports: [] },
+            [fnUnrelated.file]: { path: fnUnrelated.file, moduleId: fnUnrelated.moduleId, imports: [] },
+        },
+        routes: [],
+        contextFiles: [],
+    } as any
+    return { contract, lock }
+}
+function namesFrom(query: ContextQuery): string[] {
+    const { contract, lock } = makeFixture()
+    const builder = new ContextBuilder(contract, lock)
+    const ctx = builder.build(query)
+    return ctx.modules.flatMap(m => m.functions.map(f => f.name))
+}
+describe('ContextBuilder strict relevance mode', () => {
+    test('strict mode filters unrelated entry-point noise', () => {
+        const balanced = namesFrom({
+            task: 'fix ts resolver imports',
+            tokenBudget: 1200,
+            includeBodies: false,
+            includeCallGraph: false,
+            relevanceMode: 'balanced',
+        })
+        const strict = namesFrom({
+            task: 'fix ts resolver imports',
+            tokenBudget: 1200,
+            includeBodies: false,
+            includeCallGraph: false,
+            relevanceMode: 'strict',
+            minKeywordMatches: 1,
+        })
+        expect(balanced).toContain('renderHeader')
+        expect(strict).not.toContain('renderHeader')
+        expect(strict).toContain('resolveImports')
+    })
+    test('requiredKeywords enforces exact focus in strict mode', () => {
+        const strict = namesFrom({
+            task: 'resolver imports',
+            tokenBudget: 1200,
+            includeBodies: false,
+            includeCallGraph: false,
+            relevanceMode: 'strict',
+            requiredKeywords: ['ts'],
+            minKeywordMatches: 1,
+        })
+        expect(strict).toContain('resolveImports')
+        expect(strict).toContain('normalizeTsPath')
+        expect(strict).not.toContain('renderHeader')
+    })
+    test('failFast returns empty context when exact match is impossible', () => {
+        const { contract, lock } = makeFixture()
+        const builder = new ContextBuilder(contract, lock)
+        const ctx = builder.build({
+            task: 'resolver imports',
+            tokenBudget: 1200,
+            includeBodies: false,
+            includeCallGraph: false,
+            relevanceMode: 'strict',
+            requiredKeywords: ['nonexistent'],
+            exactOnly: true,
+            failFast: true,
+        })
+        expect(ctx.modules.length).toBe(0)
+        expect(ctx.meta.selectedFunctions).toBe(0)
+        expect((ctx.meta.reasons?.length ?? 0) > 0).toBe(true)
+        expect((ctx.meta.suggestions?.length ?? 0) > 0).toBe(true)
+    })
+})