@getmikk/intent-engine 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,17 +1,15 @@
1
1
  # @getmikk/intent-engine
2
2
 
3
- > AI pre-flight system parses natural-language prompts into structured intents, detects constraint conflicts, and generates implementation suggestions before any code changes happen.
3
+ > Architectural pre-flight — check if your idea is safe before writing a single line.
4
4
 
5
5
  [![npm](https://img.shields.io/npm/v/@getmikk/intent-engine)](https://www.npmjs.com/package/@getmikk/intent-engine)
6
6
  [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](../../LICENSE)
7
7
 
8
- `@getmikk/intent-engine` is the "pre-flight check" layer. Given a developer's natural-language description of what they want to do (e.g., *"add a caching layer to the auth module"*), the engine:
8
+ `@getmikk/intent-engine` is the pre-flight check layer. You describe what you want to build in plain English *"add a caching layer to the auth module"* — and before any code is written, the engine interprets your intent into structured objects, checks it against every architectural constraint in `mikk.json`, detects conflicts and layer violations, and generates a concrete implementation plan with which files to touch and what to create.
9
9
 
10
- 1. **Interprets** the prompt into structured `Intent` objects
11
- 2. **Detects** conflicts against architectural constraints defined in `mikk.json`
12
- 3. **Suggests** which files to touch, what to create, and the estimated blast radius
10
+ For AI coding agents, this is the guardrail that prevents architecturally unsafe code generation. For human developers, it's the equivalent of running your idea past a senior architect who knows every constraint in the codebase.
13
11
 
14
- All of this happens *before* any code is written — giving AI coding agents (or human developers) a guardrail system.
12
+ > Part of [Mikk](../../README.md) the codebase nervous system for AI-assisted development.
15
13
 
16
14
  ---
17
15
 
@@ -212,6 +210,63 @@ for (const s of suggestions) {
212
210
 
213
211
  ---
214
212
 
213
+ ### SemanticSearcher
214
+
215
+ Finds functions semantically similar to a natural-language query using local embeddings via [`@xenova/transformers`](https://github.com/xenova/transformers.js). No API key required — the model runs entirely offline.
216
+
217
+ **Model:** `Xenova/all-MiniLM-L6-v2` (~22 MB, downloaded once to `~/.cache/huggingface` on first use)
218
+ **Optional peer dependency:** `@xenova/transformers >= 2`
219
+
220
+ ```bash
221
+ bun add @xenova/transformers # only needed if you use SemanticSearcher
222
+ ```
223
+
224
+ ```typescript
225
+ import { SemanticSearcher } from '@getmikk/intent-engine'
226
+
227
+ // Check if @xenova/transformers is installed before using
228
+ if (await SemanticSearcher.isAvailable()) {
229
+ const searcher = new SemanticSearcher(projectRoot)
230
+
231
+ // index() builds embeddings; subsequent calls are O(1) cache hits
232
+ await searcher.index(lock)
233
+
234
+ // search() returns the topK most relevant functions
235
+ const results = await searcher.search('validate JWT and return user payload', lock, 5)
236
+ for (const r of results) {
237
+ console.log(`${r.name} (${r.file}:${r.lines}) — score: ${r.score}`)
238
+ console.log(` ${r.purpose}`)
239
+ }
240
+ }
241
+ ```
242
+
243
+ **Cache behaviour:** Embeddings are persisted to `{projectRoot}/.mikk/embeddings.json` and fingerprinted by function count + first 20 sorted IDs. Re-indexing only re-embeds when the lock actually changes (e.g. after `mikk sync`). A cache hit costs a single disk read.
244
+
245
+ **`SemanticMatch`:**
246
+
247
+ | Field | Type | Description |
248
+ |-------|------|-------------|
249
+ | `id` | `string` | Function ID (`fn:module:name`) |
250
+ | `name` | `string` | Function name |
251
+ | `file` | `string` | Source file path |
252
+ | `moduleId` | `string` | Owning module |
253
+ | `purpose` | `string` | One-line purpose from the lock |
254
+ | `lines` | `string` | Line range, e.g. `"12-34"` |
255
+ | `score` | `number` | Cosine similarity `[0, 1]` — higher is more relevant |
256
+
257
+ **API:**
258
+
259
+ | Method | Description |
260
+ |--------|-------------|
261
+ | `SemanticSearcher.isAvailable()` | Returns `true` if `@xenova/transformers` is importable |
262
+ | `new SemanticSearcher(projectRoot)` | Creates an instance scoped to a project root |
263
+ | `.index(lock)` | Builds/loads embeddings for all functions in the lock |
264
+ | `.search(query, lock, topK?)` | Returns top `topK` (default 10) semantically similar functions |
265
+
266
+ > **Note:** Call `index()` before `search()`, otherwise `search()` throws `"Call index() before search()"`. The MCP server keeps a per-project singleton to avoid repeated model loads.
267
+
268
+ ---
269
+
215
270
  ## Usage with AI Agents
216
271
 
217
272
  The intent engine is designed to be called by AI coding agents as a pre-flight check:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@getmikk/intent-engine",
3
- "version": "1.5.1",
3
+ "version": "1.6.0",
4
4
  "license": "Apache-2.0",
5
5
  "repository": {
6
6
  "type": "git",
@@ -21,11 +21,20 @@
21
21
  "dev": "tsc --watch"
22
22
  },
23
23
  "dependencies": {
24
- "@getmikk/core": "^1.5.1",
24
+ "@getmikk/core": "^1.6.0",
25
25
  "zod": "^3.22.0"
26
26
  },
27
+ "peerDependencies": {
28
+ "@xenova/transformers": ""
29
+ },
30
+ "peerDependenciesMeta": {
31
+ "@xenova/transformers": {
32
+ "optional": true
33
+ }
34
+ },
27
35
  "devDependencies": {
28
- "typescript": "^5.7.0",
29
- "@types/node": "^22.0.0"
36
+ "@types/bun": "^1.3.10",
37
+ "@types/node": "^22.0.0",
38
+ "typescript": "^5.7.0"
30
39
  }
31
40
  }
package/src/index.ts CHANGED
@@ -2,5 +2,7 @@ export { IntentInterpreter } from './interpreter.js'
2
2
  export { ConflictDetector } from './conflict-detector.js'
3
3
  export { Suggester } from './suggester.js'
4
4
  export { PreflightPipeline } from './preflight.js'
5
+ export { SemanticSearcher } from './semantic-searcher.js'
6
+ export type { SemanticMatch } from './semantic-searcher.js'
5
7
  export type { Intent, Conflict, ConflictResult, Suggestion, PreflightResult, AIProviderConfig } from './types.js'
6
8
  export { IntentSchema } from './types.js'
@@ -0,0 +1,170 @@
1
+ import * as path from 'node:path'
2
+ import * as fs from 'node:fs/promises'
3
+ import type { MikkLock } from '@getmikk/core'
4
+
5
+ interface EmbeddingCache {
6
+ lockFingerprint: string
7
+ model: string
8
+ embeddings: Record<string, number[]> // fnId → unit-normed vector
9
+ }
10
+
11
+ export interface SemanticMatch {
12
+ id: string
13
+ name: string
14
+ file: string
15
+ moduleId: string
16
+ purpose: string
17
+ lines: string
18
+ score: number // cosine similarity [0, 1]
19
+ }
20
+
21
+ /**
22
+ * SemanticSearcher — finds functions semantically similar to a natural-language
23
+ * query using local embeddings via @xenova/transformers.
24
+ *
25
+ * Model: Xenova/all-MiniLM-L6-v2 (~22 MB, downloads once to ~/.cache/huggingface).
26
+ * Embeddings are incrementally cached in {projectRoot}/.mikk/embeddings.json and
27
+ * recomputed only when the lock changes (fingerprinted by function count + IDs).
28
+ *
29
+ * Usage:
30
+ * const searcher = new SemanticSearcher(projectRoot)
31
+ * await searcher.index(lock)
32
+ * const results = await searcher.search('validate JWT token', lock)
33
+ */
34
+ export class SemanticSearcher {
35
+ static readonly MODEL = 'Xenova/all-MiniLM-L6-v2'
36
+
37
+ private readonly cachePath: string
38
+ private pipeline: any = null
39
+ private cache: EmbeddingCache | null = null
40
+
41
+ constructor(private readonly projectRoot: string) {
42
+ this.cachePath = path.join(projectRoot, '.mikk', 'embeddings.json')
43
+ }
44
+
45
+ /**
46
+ * Returns true when @xenova/transformers is installed and importable.
47
+ * The MCP tool calls this to decide whether to surface the semantic search tool.
48
+ */
49
+ static async isAvailable(): Promise<boolean> {
50
+ try {
51
+ await import('@xenova/transformers')
52
+ return true
53
+ } catch {
54
+ return false
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Build (or load from cache) embeddings for every function in the lock.
60
+ * Safe to call on every MCP request — cache hit is O(1) disk read.
61
+ */
62
+ async index(lock: MikkLock): Promise<void> {
63
+ const fingerprint = lockFingerprint(lock)
64
+
65
+ // ── Cache hit ──────────────────────────────────────────────────────
66
+ try {
67
+ const raw = await fs.readFile(this.cachePath, 'utf-8')
68
+ const cached: EmbeddingCache = JSON.parse(raw)
69
+ // Validate shape before trusting it
70
+ if (
71
+ typeof cached.lockFingerprint === 'string' &&
72
+ typeof cached.model === 'string' &&
73
+ typeof cached.embeddings === 'object' && cached.embeddings !== null &&
74
+ cached.lockFingerprint === fingerprint &&
75
+ cached.model === SemanticSearcher.MODEL
76
+ ) {
77
+ this.cache = cached
78
+ return
79
+ }
80
+ } catch { /* miss or corrupt — rebuild */ }
81
+
82
+ // ── Empty lock fast-path — nothing to embed ────────────────────────
83
+ const fns = Object.values(lock.functions)
84
+ if (fns.length === 0) {
85
+ this.cache = { lockFingerprint: fingerprint, model: SemanticSearcher.MODEL, embeddings: {} }
86
+ return
87
+ }
88
+
89
+ // Text representation: name + purpose + param names (no bodies, keeps it fast)
90
+ const texts = fns.map(fn => {
91
+ const parts: string[] = [fn.name]
92
+ if (fn.purpose) parts.push(fn.purpose)
93
+ if (fn.params?.length) parts.push(fn.params.map((p: any) => p.name).join(' '))
94
+ if (fn.returnType && fn.returnType !== 'void' && fn.returnType !== 'any') {
95
+ parts.push('returns ' + fn.returnType)
96
+ }
97
+ return parts.join(' ')
98
+ })
99
+
100
+ await this.ensurePipeline()
101
+ const embeddings: Record<string, number[]> = {}
102
+ const BATCH = 64
103
+ for (let i = 0; i < fns.length; i += BATCH) {
104
+ const batch = texts.slice(i, i + BATCH)
105
+ const output = await this.pipeline(batch, { pooling: 'mean', normalize: true })
106
+ for (let j = 0; j < batch.length; j++) {
107
+ embeddings[fns[i + j].id] = Array.from(output[j].data as Float32Array)
108
+ }
109
+ }
110
+
111
+ this.cache = { lockFingerprint: fingerprint, model: SemanticSearcher.MODEL, embeddings }
112
+ await fs.mkdir(path.dirname(this.cachePath), { recursive: true })
113
+ await fs.writeFile(this.cachePath, JSON.stringify(this.cache))
114
+ }
115
+
116
+ /**
117
+ * Find the `topK` functions most semantically similar to `query`.
118
+ * Call index() first.
119
+ */
120
+ async search(query: string, lock: MikkLock, topK = 10): Promise<SemanticMatch[]> {
121
+ if (!this.cache) throw new Error('Call index() before search()')
122
+ await this.ensurePipeline()
123
+
124
+ const queryOut = await this.pipeline([query], { pooling: 'mean', normalize: true })
125
+ const queryVec: number[] = Array.from(queryOut[0].data as Float32Array)
126
+
127
+ const scored = Object.entries(this.cache.embeddings).map(([id, vec]) => ({
128
+ id,
129
+ score: cosineSimilarity(queryVec, vec),
130
+ }))
131
+ scored.sort((a, b) => b.score - a.score)
132
+
133
+ return scored.slice(0, topK).map(({ id, score }) => {
134
+ const fn = lock.functions[id]
135
+ // Skip IDs that are in the embedding cache but no longer in the lock
136
+ // (can happen if cache was read from disk and lock changed in same session)
137
+ if (!fn) return null
138
+ return {
139
+ id,
140
+ name: fn.name,
141
+ file: fn.file ?? '',
142
+ moduleId: fn.moduleId ?? '',
143
+ purpose: fn.purpose ?? '',
144
+ lines: `${fn.startLine}-${fn.endLine}`,
145
+ score: Math.round(score * 1000) / 1000,
146
+ }
147
+ }).filter((r): r is SemanticMatch => r !== null)
148
+ }
149
+
150
+ private async ensurePipeline() {
151
+ if (this.pipeline) return
152
+ const { pipeline } = await import('@xenova/transformers')
153
+ this.pipeline = await pipeline('feature-extraction', SemanticSearcher.MODEL)
154
+ }
155
+ }
156
+
157
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
158
+
159
+ /** Lightweight fingerprint: function count + first 20 sorted IDs */
160
+ function lockFingerprint(lock: MikkLock): string {
161
+ const ids = Object.keys(lock.functions).sort().slice(0, 20).join('|')
162
+ return `${Object.keys(lock.functions).length}:${ids}`
163
+ }
164
+
165
+ function cosineSimilarity(a: number[], b: number[]): number {
166
+ let dot = 0
167
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i]
168
+ // Vectors are already unit-normed by the model (normalize: true), so |a|=|b|=1
169
+ return Math.max(-1, Math.min(1, dot))
170
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Ambient stub for the optional peer dependency @xenova/transformers.
3
+ * The real types are only available when the package is installed.
4
+ * We use dynamic import + `any` everywhere so this stub is sufficient.
5
+ */
6
+ declare module '@xenova/transformers' {
7
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
8
+ export function pipeline(task: string, model?: string, options?: any): Promise<any>
9
+ }
@@ -0,0 +1,178 @@
1
+ import { describe, test, expect, beforeAll } from 'bun:test'
2
+ import { SemanticSearcher } from '../src/semantic-searcher'
3
+ import type { MikkLock } from '@getmikk/core'
4
+
5
+ // ── Minimal mock lock ─────────────────────────────────────────────────────────
6
+ const mockLock: MikkLock = {
7
+ version: '1.0.0',
8
+ generatedAt: new Date().toISOString(),
9
+ generatorVersion: '1.1.0',
10
+ projectRoot: '/test',
11
+ syncState: { status: 'clean', lastSyncAt: new Date().toISOString(), lockHash: 'a', contractHash: 'b' },
12
+ modules: {},
13
+ functions: {
14
+ 'fn:auth:verifyToken': {
15
+ id: 'fn:auth:verifyToken',
16
+ name: 'verifyToken',
17
+ file: 'src/auth/verify.ts',
18
+ startLine: 1, endLine: 10,
19
+ hash: 'h1', calls: [], calledBy: [],
20
+ moduleId: 'auth',
21
+ purpose: 'Verify and decode a JWT token',
22
+ params: [{ name: 'token', type: 'string', optional: false }],
23
+ returnType: 'UserPayload',
24
+ },
25
+ 'fn:db:saveUser': {
26
+ id: 'fn:db:saveUser',
27
+ name: 'saveUser',
28
+ file: 'src/db/users.ts',
29
+ startLine: 1, endLine: 15,
30
+ hash: 'h2', calls: [], calledBy: [],
31
+ moduleId: 'db',
32
+ purpose: 'Persist a user record to the database',
33
+ params: [{ name: 'user', type: 'User', optional: false }],
34
+ returnType: 'Promise<void>',
35
+ },
36
+ 'fn:email:sendWelcome': {
37
+ id: 'fn:email:sendWelcome',
38
+ name: 'sendWelcomeEmail',
39
+ file: 'src/email/sender.ts',
40
+ startLine: 1, endLine: 20,
41
+ hash: 'h3', calls: [], calledBy: [],
42
+ moduleId: 'email',
43
+ purpose: 'Send a welcome email to a newly registered user',
44
+ params: [{ name: 'to', type: 'string', optional: false }],
45
+ returnType: 'Promise<void>',
46
+ },
47
+ 'fn:api:handleLogin': {
48
+ id: 'fn:api:handleLogin',
49
+ name: 'handleLogin',
50
+ file: 'src/api/login.ts',
51
+ startLine: 1, endLine: 30,
52
+ hash: 'h4', calls: [], calledBy: [],
53
+ moduleId: 'api',
54
+ purpose: 'Handle HTTP login request and return JWT',
55
+ params: [{ name: 'req', type: 'Request', optional: false }],
56
+ returnType: 'Promise<Response>',
57
+ },
58
+ },
59
+ files: {},
60
+ graph: { nodes: 4, edges: 0, rootHash: 'root' },
61
+ }
62
+
63
+ // ── Tests ─────────────────────────────────────────────────────────────────────
64
+
65
+ describe('SemanticSearcher', () => {
66
+ let searcher: SemanticSearcher
67
+
68
+ test('isAvailable() returns true (package is installed)', async () => {
69
+ const ok = await SemanticSearcher.isAvailable()
70
+ expect(ok).toBe(true)
71
+ })
72
+
73
+ describe('with indexed lock', () => {
74
+ beforeAll(async () => {
75
+ searcher = new SemanticSearcher('/tmp/mikk-test-' + Date.now())
76
+ await searcher.index(mockLock)
77
+ }, 60_000) // model download can take time on first run
78
+
79
+ test('returns results for any query', async () => {
80
+ const results = await searcher.search('authenticate user', mockLock, 4)
81
+ expect(results.length).toBeGreaterThan(0)
82
+ expect(results[0].score).toBeGreaterThanOrEqual(0)
83
+ expect(results[0].score).toBeLessThanOrEqual(1)
84
+ })
85
+
86
+ test('JWT-related query ranks verifyToken or handleLogin highest', async () => {
87
+ const results = await searcher.search('validate JWT token', mockLock, 4)
88
+ const top2Names = results.slice(0, 2).map(r => r.name)
89
+ const hasJwtMatch = top2Names.some(n => n === 'verifyToken' || n === 'handleLogin')
90
+ expect(hasJwtMatch).toBe(true)
91
+ })
92
+
93
+ test('email query ranks sendWelcomeEmail highest', async () => {
94
+ const results = await searcher.search('send email to new user', mockLock, 4)
95
+ expect(results[0].name).toBe('sendWelcomeEmail')
96
+ })
97
+
98
+ test('database persistence query ranks saveUser highest', async () => {
99
+ const results = await searcher.search('save user to database', mockLock, 4)
100
+ expect(results[0].name).toBe('saveUser')
101
+ })
102
+
103
+ test('results include required fields', async () => {
104
+ const results = await searcher.search('login', mockLock, 2)
105
+ for (const r of results) {
106
+ expect(typeof r.id).toBe('string')
107
+ expect(typeof r.name).toBe('string')
108
+ expect(typeof r.file).toBe('string')
109
+ expect(typeof r.score).toBe('number')
110
+ expect(typeof r.lines).toBe('string')
111
+ }
112
+ })
113
+
114
+ test('topK limits the number of results', async () => {
115
+ const results = await searcher.search('function', mockLock, 2)
116
+ expect(results.length).toBeLessThanOrEqual(2)
117
+ })
118
+
119
+ test('second call uses cache (no re-embedding)', async () => {
120
+ const t0 = Date.now()
121
+ await searcher.index(mockLock) // should be cache hit
122
+ const elapsed = Date.now() - t0
123
+ // Cache hit should be sub-100ms (disk read only)
124
+ expect(elapsed).toBeLessThan(500)
125
+ })
126
+
127
+ test('topK = 0 returns empty array', async () => {
128
+ const results = await searcher.search('login', mockLock, 0)
129
+ expect(results).toEqual([])
130
+ })
131
+
132
+ test('empty query string does not crash', async () => {
133
+ const results = await searcher.search('', mockLock, 2)
134
+ expect(Array.isArray(results)).toBe(true)
135
+ })
136
+
137
+ test('stale cache IDs not present in lock are silently skipped', async () => {
138
+ // Build a lock that has one fewer function than what was cached
139
+ const shrunkLock = { ...mockLock, functions: { 'fn:auth:verifyToken': mockLock.functions['fn:auth:verifyToken']! } }
140
+ // searcher.cache still has embeddings for all 4 IDs from beforeAll
141
+ const results = await searcher.search('user', shrunkLock, 10)
142
+ // Only the one function present in the shrunk lock should appear
143
+ expect(results.every(r => r.id in shrunkLock.functions)).toBe(true)
144
+ })
145
+
146
+ test('re-index with changed lock rebuilds embeddings', async () => {
147
+ const newFn = {
148
+ ...mockLock.functions['fn:auth:verifyToken']!,
149
+ id: 'fn:new:brandNewFn',
150
+ name: 'brandNewFn',
151
+ purpose: 'A completely unique purpose for testing fingerprint change',
152
+ }
153
+ const changedLock: MikkLock = {
154
+ ...mockLock,
155
+ functions: { ...mockLock.functions, 'fn:new:brandNewFn': newFn },
156
+ }
157
+ const freshSearcher = new SemanticSearcher('/tmp/mikk-reindex-' + Date.now())
158
+ await freshSearcher.index(changedLock) // fingerprint differs → full recompute
159
+ const results = await freshSearcher.search('unique purpose', changedLock, 5)
160
+ expect(results.some(r => r.name === 'brandNewFn')).toBe(true)
161
+ }, 30_000)
162
+ })
163
+
164
+ describe('edge cases', () => {
165
+ test('search() before index() throws', async () => {
166
+ const fresh = new SemanticSearcher('/tmp/mikk-never-indexed-' + Date.now())
167
+ await expect(fresh.search('query', mockLock)).rejects.toThrow('Call index() before search()')
168
+ })
169
+
170
+ test('empty lock: index() and search() succeed, return empty array', async () => {
171
+ const emptyLock: MikkLock = { ...mockLock, functions: {} }
172
+ const s = new SemanticSearcher('/tmp/mikk-empty-' + Date.now())
173
+ await s.index(emptyLock)
174
+ const results = await s.search('anything', emptyLock, 5)
175
+ expect(results).toEqual([])
176
+ })
177
+ })
178
+ })