@getmikk/core 1.7.1 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -412
- package/package.json +3 -1
- package/src/contract/contract-reader.ts +2 -2
- package/src/contract/lock-compiler.ts +15 -14
- package/src/contract/lock-reader.ts +14 -14
- package/src/contract/schema.ts +3 -3
- package/src/index.ts +2 -1
- package/src/parser/base-parser.ts +1 -1
- package/src/parser/boundary-checker.ts +74 -212
- package/src/parser/go/go-extractor.ts +10 -10
- package/src/parser/go/go-parser.ts +2 -2
- package/src/parser/index.ts +45 -31
- package/src/parser/javascript/js-extractor.ts +9 -9
- package/src/parser/javascript/js-parser.ts +2 -2
- package/src/parser/tree-sitter/parser.ts +228 -0
- package/src/parser/tree-sitter/queries.ts +181 -0
- package/src/parser/types.ts +1 -1
- package/src/parser/typescript/ts-extractor.ts +15 -15
- package/src/parser/typescript/ts-parser.ts +1 -1
- package/src/parser/typescript/ts-resolver.ts +2 -2
- package/src/search/bm25.ts +206 -0
- package/src/search/index.ts +3 -0
- package/src/utils/fs.ts +95 -31
- package/src/utils/minimatch.ts +23 -14
- package/test-output.txt +0 -0
- package/tests/go-parser.test.ts +10 -10
- package/tests/js-parser.test.ts +34 -19
- package/tests/parser.test.ts +5 -5
- package/tests/tree-sitter-parser.test.ts +168 -0
- package/tests/ts-parser.test.ts +49 -1
- package/out.log +0 -0
|
@@ -3,7 +3,7 @@ import type { ParsedFunction, ParsedClass, ParsedImport, ParsedExport, ParsedPar
|
|
|
3
3
|
import { hashContent } from '../../hash/file-hasher.js'
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
* TypeScript AST extractor
|
|
6
|
+
* TypeScript AST extractor walks the TypeScript AST using the TS Compiler API
|
|
7
7
|
* and extracts functions, classes, imports, exports and call relationships.
|
|
8
8
|
*/
|
|
9
9
|
export class TypeScriptExtractor {
|
|
@@ -286,7 +286,7 @@ export class TypeScriptExtractor {
|
|
|
286
286
|
middlewares.push(arg.text)
|
|
287
287
|
}
|
|
288
288
|
} else if (ts.isCallExpression(arg)) {
|
|
289
|
-
// e.g. upload.single("file")
|
|
289
|
+
// e.g. upload.single("file") middleware call
|
|
290
290
|
middlewares.push(arg.expression.getText(this.sourceFile))
|
|
291
291
|
} else if (ts.isArrowFunction(arg) || ts.isFunctionExpression(arg)) {
|
|
292
292
|
handler = 'anonymous'
|
|
@@ -309,7 +309,7 @@ export class TypeScriptExtractor {
|
|
|
309
309
|
return routes
|
|
310
310
|
}
|
|
311
311
|
|
|
312
|
-
//
|
|
312
|
+
// Protected Helpers ------------------------------------------------------
|
|
313
313
|
|
|
314
314
|
protected parseFunctionDeclaration(node: ts.FunctionDeclaration): ParsedFunction {
|
|
315
315
|
const name = node.name!.text
|
|
@@ -511,11 +511,11 @@ export class TypeScriptExtractor {
|
|
|
511
511
|
if (ts.isIdentifier(callee)) {
|
|
512
512
|
calls.push(callee.text)
|
|
513
513
|
} else if (ts.isPropertyAccessExpression(callee)) {
|
|
514
|
-
// e.g., obj.method()
|
|
514
|
+
// e.g., obj.method() we capture the full dotted name
|
|
515
515
|
calls.push(callee.getText(this.sourceFile))
|
|
516
516
|
}
|
|
517
517
|
}
|
|
518
|
-
// Track constructor calls: new Foo(...)
|
|
518
|
+
// Track constructor calls: new Foo(...) -> "Foo"
|
|
519
519
|
if (ts.isNewExpression(n)) {
|
|
520
520
|
const callee = n.expression
|
|
521
521
|
if (ts.isIdentifier(callee)) {
|
|
@@ -547,12 +547,12 @@ export class TypeScriptExtractor {
|
|
|
547
547
|
}
|
|
548
548
|
|
|
549
549
|
// Skip divider lines (lines with 3+ repeated special characters)
|
|
550
|
-
if (/^[
|
|
550
|
+
if (/^[\-_=\*]{3,}$/.test(clean)) continue
|
|
551
551
|
|
|
552
552
|
if (clean) meaningfulLines.push(clean)
|
|
553
553
|
}
|
|
554
554
|
|
|
555
|
-
// Return the first meaningful line
|
|
555
|
+
// Return the first meaningful line in JSDoc, the first line is the summary.
|
|
556
556
|
const fromComment = meaningfulLines.length > 0 ? meaningfulLines[0].split('\n')[0].trim() : ''
|
|
557
557
|
if (fromComment) return fromComment
|
|
558
558
|
}
|
|
@@ -662,10 +662,10 @@ export class TypeScriptExtractor {
|
|
|
662
662
|
if (modifiers) {
|
|
663
663
|
for (const decorator of modifiers) {
|
|
664
664
|
if (ts.isCallExpression(decorator.expression)) {
|
|
665
|
-
// @Injectable()
|
|
665
|
+
// @Injectable() decorator with arguments
|
|
666
666
|
decorators.push(decorator.expression.expression.getText(this.sourceFile))
|
|
667
667
|
} else if (ts.isIdentifier(decorator.expression)) {
|
|
668
|
-
// @Sealed
|
|
668
|
+
// @Sealed decorator without arguments
|
|
669
669
|
decorators.push(decorator.expression.text)
|
|
670
670
|
}
|
|
671
671
|
}
|
|
@@ -694,7 +694,7 @@ export class TypeScriptExtractor {
|
|
|
694
694
|
return this.sourceFile.getLineAndCharacterOfPosition(pos).line + 1
|
|
695
695
|
}
|
|
696
696
|
|
|
697
|
-
/** Walk the top-level children of a node (non-recursive
|
|
697
|
+
/** Walk the top-level children of a node (non-recursive callbacks decide depth) */
|
|
698
698
|
protected walkNode(node: ts.Node, callback: (node: ts.Node) => void): void {
|
|
699
699
|
ts.forEachChild(node, (child) => {
|
|
700
700
|
callback(child)
|
|
@@ -702,15 +702,15 @@ export class TypeScriptExtractor {
|
|
|
702
702
|
}
|
|
703
703
|
}
|
|
704
704
|
|
|
705
|
-
//
|
|
705
|
+
//
|
|
706
706
|
|
|
707
707
|
/**
|
|
708
708
|
* Derive a human-readable purpose sentence from a camelCase/PascalCase identifier.
|
|
709
709
|
* Examples:
|
|
710
|
-
* validateJwtToken
|
|
711
|
-
* buildGraphFromLock
|
|
712
|
-
* UserRepository
|
|
713
|
-
* parseFiles
|
|
710
|
+
* validateJwtToken -> "Validate jwt token"
|
|
711
|
+
* buildGraphFromLock -> "Build graph from lock"
|
|
712
|
+
* UserRepository -> "User repository"
|
|
713
|
+
* parseFiles -> "Parse files"
|
|
714
714
|
*/
|
|
715
715
|
function normalizeTypeAnnotation(type: string): string {
|
|
716
716
|
return type.replace(/\s*\n\s*/g, ' ').replace(/\s{2,}/g, ' ').trim()
|
|
@@ -12,7 +12,7 @@ import type { ParsedFile } from '../types.js'
|
|
|
12
12
|
*/
|
|
13
13
|
export class TypeScriptParser extends BaseParser {
|
|
14
14
|
/** Parse a single TypeScript file */
|
|
15
|
-
parse(filePath: string, content: string): ParsedFile {
|
|
15
|
+
async parse(filePath: string, content: string): Promise<ParsedFile> {
|
|
16
16
|
const extractor = new TypeScriptExtractor(filePath, content)
|
|
17
17
|
const functions = extractor.extractFunctions()
|
|
18
18
|
const classes = extractor.extractClasses()
|
|
@@ -33,7 +33,7 @@ export class TypeScriptResolver {
|
|
|
33
33
|
private resolvePath(source: string, fromFile: string, allProjectFiles: string[]): string {
|
|
34
34
|
let resolvedSource = source
|
|
35
35
|
|
|
36
|
-
// 1. Handle path aliases: @/utils/jwt
|
|
36
|
+
// 1. Handle path aliases: @/utils/jwt -> src/utils/jwt
|
|
37
37
|
for (const [alias, targets] of Object.entries(this.aliases)) {
|
|
38
38
|
const aliasPrefix = alias.replace('/*', '')
|
|
39
39
|
if (source.startsWith(aliasPrefix)) {
|
|
@@ -57,7 +57,7 @@ export class TypeScriptResolver {
|
|
|
57
57
|
resolved = resolved.replace(/\\/g, '/')
|
|
58
58
|
|
|
59
59
|
// 3. Try to find exact match with extensions
|
|
60
|
-
const extensions = ['.ts', '.tsx', '/index.ts', '/index.tsx']
|
|
60
|
+
const extensions = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '/index.ts', '/index.tsx', '/index.js', '/index.jsx']
|
|
61
61
|
|
|
62
62
|
// If the path already has an extension, return it
|
|
63
63
|
if (resolved.endsWith('.ts') || resolved.endsWith('.tsx')) {
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Search Index — Okapi BM25 ranking for function search.
|
|
3
|
+
*
|
|
4
|
+
* BM25 is a probabilistic ranking function that considers:
|
|
5
|
+
* - Term frequency (TF) — how often query terms appear in a document
|
|
6
|
+
* - Inverse document frequency (IDF) — rarity of terms across all documents
|
|
7
|
+
* - Document length normalization — penalizes very long documents
|
|
8
|
+
*
|
|
9
|
+
* This gives dramatically better search results than naive substring matching.
|
|
10
|
+
* Combined with substring matching via Reciprocal Rank Fusion (RRF), it
|
|
11
|
+
* produces GitNexus-quality hybrid search.
|
|
12
|
+
*
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/** A searchable document with an ID and tokenized content */
|
|
17
|
+
interface BM25Document {
|
|
18
|
+
id: string
|
|
19
|
+
tokens: string[]
|
|
20
|
+
length: number
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** A single search result with score */
|
|
24
|
+
export interface BM25Result {
|
|
25
|
+
id: string
|
|
26
|
+
score: number
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** BM25 parameters */
|
|
30
|
+
const K1 = 1.2 // Term frequency saturation — higher = more weight on TF
|
|
31
|
+
const B = 0.75 // Document length normalization — 0 = no normalization, 1 = full
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* In-memory BM25 index. Build once, query many times.
|
|
35
|
+
*
|
|
36
|
+
* Usage:
|
|
37
|
+
* const index = new BM25Index()
|
|
38
|
+
* index.addDocument('fn:auth.ts:verify', ['verify', 'token', 'jwt', 'auth'])
|
|
39
|
+
* index.addDocument('fn:user.ts:getUser', ['get', 'user', 'fetch', 'database'])
|
|
40
|
+
* const results = index.search('verify jwt token')
|
|
41
|
+
*/
|
|
42
|
+
export class BM25Index {
|
|
43
|
+
private documents: BM25Document[] = []
|
|
44
|
+
private documentFrequency = new Map<string, number>() // term → how many docs contain it
|
|
45
|
+
private avgDocLength = 0
|
|
46
|
+
|
|
47
|
+
/** Clear the index */
|
|
48
|
+
clear(): void {
|
|
49
|
+
this.documents = []
|
|
50
|
+
this.documentFrequency.clear()
|
|
51
|
+
this.avgDocLength = 0
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Add a document with pre-tokenized terms */
|
|
55
|
+
addDocument(id: string, tokens: string[]): void {
|
|
56
|
+
const normalizedTokens = tokens.map(t => t.toLowerCase())
|
|
57
|
+
this.documents.push({ id, tokens: normalizedTokens, length: normalizedTokens.length })
|
|
58
|
+
|
|
59
|
+
// Count unique terms for IDF
|
|
60
|
+
const uniqueTerms = new Set(normalizedTokens)
|
|
61
|
+
for (const term of uniqueTerms) {
|
|
62
|
+
this.documentFrequency.set(term, (this.documentFrequency.get(term) ?? 0) + 1)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Recompute average document length
|
|
66
|
+
this.avgDocLength = this.documents.reduce((sum, d) => sum + d.length, 0) / this.documents.length
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Search the index and return ranked results */
|
|
70
|
+
search(query: string, limit = 20): BM25Result[] {
|
|
71
|
+
const queryTokens = tokenize(query)
|
|
72
|
+
if (queryTokens.length === 0 || this.documents.length === 0) return []
|
|
73
|
+
|
|
74
|
+
const N = this.documents.length
|
|
75
|
+
const results: BM25Result[] = []
|
|
76
|
+
|
|
77
|
+
for (const doc of this.documents) {
|
|
78
|
+
let score = 0
|
|
79
|
+
|
|
80
|
+
for (const term of queryTokens) {
|
|
81
|
+
const df = this.documentFrequency.get(term) ?? 0
|
|
82
|
+
if (df === 0) continue
|
|
83
|
+
|
|
84
|
+
// IDF: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
85
|
+
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1)
|
|
86
|
+
|
|
87
|
+
// TF in this document
|
|
88
|
+
let tf = 0
|
|
89
|
+
for (const t of doc.tokens) {
|
|
90
|
+
if (t === term) tf++
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// BM25 score component
|
|
94
|
+
const tfNorm = (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (doc.length / this.avgDocLength)))
|
|
95
|
+
score += idf * tfNorm
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (score > 0) {
|
|
99
|
+
results.push({ id: doc.id, score })
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Sort by score descending
|
|
104
|
+
results.sort((a, b) => b.score - a.score)
|
|
105
|
+
return results.slice(0, limit)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Reciprocal Rank Fusion — merge multiple ranked lists into one.
|
|
111
|
+
*
|
|
112
|
+
* RRF is used by GitNexus to combine BM25 + semantic search. We use it
|
|
113
|
+
* to combine BM25 + substring match results.
|
|
114
|
+
*
|
|
115
|
+
* Formula: score = Σ 1 / (k + rank_i) where k = 60 (standard)
|
|
116
|
+
*/
|
|
117
|
+
export function reciprocalRankFusion(
|
|
118
|
+
...rankedLists: { id: string; score: number }[][]
|
|
119
|
+
): { id: string; score: number }[] {
|
|
120
|
+
const K = 60 // Standard RRF constant
|
|
121
|
+
const scores = new Map<string, number>()
|
|
122
|
+
|
|
123
|
+
for (const list of rankedLists) {
|
|
124
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
125
|
+
const item = list[rank]
|
|
126
|
+
scores.set(item.id, (scores.get(item.id) ?? 0) + 1 / (K + rank + 1))
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return [...scores.entries()]
|
|
131
|
+
.map(([id, score]) => ({ id, score }))
|
|
132
|
+
.sort((a, b) => b.score - a.score)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Tokenize a string into searchable terms.
|
|
137
|
+
*
|
|
138
|
+
* Handles:
|
|
139
|
+
* - camelCase splitting: "parseFiles" → ["parse", "files"]
|
|
140
|
+
* - snake_case splitting: "parse_files" → ["parse", "files"]
|
|
141
|
+
* - kebab-case splitting: "parse-files" → ["parse", "files"]
|
|
142
|
+
* - Lowercasing
|
|
143
|
+
* - Minimum 2-char filter
|
|
144
|
+
*/
|
|
145
|
+
export function tokenize(text: string): string[] {
|
|
146
|
+
const tokens: string[] = []
|
|
147
|
+
|
|
148
|
+
// Split on non-alphanumeric chars
|
|
149
|
+
const words = text.split(/[^a-zA-Z0-9]+/).filter(Boolean)
|
|
150
|
+
|
|
151
|
+
for (const word of words) {
|
|
152
|
+
// Split camelCase: "parseFiles" → ["parse", "Files"]
|
|
153
|
+
const camelParts = word.replace(/([a-z])([A-Z])/g, '$1 $2').split(' ')
|
|
154
|
+
|
|
155
|
+
for (const part of camelParts) {
|
|
156
|
+
const lower = part.toLowerCase()
|
|
157
|
+
if (lower.length >= 2) {
|
|
158
|
+
tokens.push(lower)
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return tokens
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Build search tokens for a function — combines name, purpose, params, file path.
|
|
168
|
+
* This gives BM25 rich content to index beyond just the function name.
|
|
169
|
+
*/
|
|
170
|
+
export function buildFunctionTokens(fn: {
|
|
171
|
+
name: string
|
|
172
|
+
file: string
|
|
173
|
+
purpose?: string
|
|
174
|
+
params?: { name: string; type: string }[]
|
|
175
|
+
returnType?: string
|
|
176
|
+
}): string[] {
|
|
177
|
+
const parts: string[] = []
|
|
178
|
+
|
|
179
|
+
// Function name tokens (highest signal)
|
|
180
|
+
parts.push(...tokenize(fn.name))
|
|
181
|
+
parts.push(...tokenize(fn.name)) // Double-weight the name
|
|
182
|
+
|
|
183
|
+
// File path tokens
|
|
184
|
+
const filename = fn.file.split('/').pop() ?? fn.file
|
|
185
|
+
parts.push(...tokenize(filename.replace(/\.[^.]+$/, ''))) // Strip extension
|
|
186
|
+
|
|
187
|
+
// Purpose tokens
|
|
188
|
+
if (fn.purpose) {
|
|
189
|
+
parts.push(...tokenize(fn.purpose))
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Parameter name tokens
|
|
193
|
+
if (fn.params) {
|
|
194
|
+
for (const p of fn.params) {
|
|
195
|
+
parts.push(...tokenize(p.name))
|
|
196
|
+
parts.push(...tokenize(p.type))
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Return type tokens
|
|
201
|
+
if (fn.returnType) {
|
|
202
|
+
parts.push(...tokenize(fn.returnType))
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return parts
|
|
206
|
+
}
|
package/src/utils/fs.ts
CHANGED
|
@@ -2,27 +2,27 @@ import * as fs from 'node:fs/promises'
|
|
|
2
2
|
import * as path from 'node:path'
|
|
3
3
|
import fg from 'fast-glob'
|
|
4
4
|
|
|
5
|
-
//
|
|
5
|
+
// --- Well-known patterns for schema/config/route files ---------------------
|
|
6
6
|
// These are structural files an AI agent needs but aren't source code.
|
|
7
7
|
// Mikk auto-discovers them so the AI doesn't have to explore the filesystem.
|
|
8
|
-
// Patterns are language-agnostic
|
|
8
|
+
// Patterns are language-agnostic -- unused patterns simply return zero matches.
|
|
9
9
|
const CONTEXT_FILE_PATTERNS = [
|
|
10
|
-
// Data models / schemas
|
|
10
|
+
// Data models / schemas -- JS/TS
|
|
11
11
|
'**/prisma/schema.prisma',
|
|
12
12
|
'**/drizzle/**/*.ts',
|
|
13
13
|
'**/schema/**/*.{ts,js,graphql,gql,sql}',
|
|
14
14
|
'**/models/**/*.{ts,js}',
|
|
15
15
|
'**/*.schema.{ts,js}',
|
|
16
16
|
'**/*.model.{ts,js}',
|
|
17
|
-
// Data models / schemas
|
|
17
|
+
// Data models / schemas -- Python
|
|
18
18
|
'**/models.py',
|
|
19
19
|
'**/schemas.py',
|
|
20
20
|
'**/serializers.py',
|
|
21
21
|
'**/models/**/*.py',
|
|
22
|
-
// Data models / schemas
|
|
22
|
+
// Data models / schemas -- Ruby
|
|
23
23
|
'**/app/models/**/*.rb',
|
|
24
24
|
'**/db/schema.rb',
|
|
25
|
-
// Data models / schemas
|
|
25
|
+
// Data models / schemas -- Go / Rust / Java / PHP
|
|
26
26
|
'**/models/*.go',
|
|
27
27
|
'**/*_model.go',
|
|
28
28
|
'**/schema.rs',
|
|
@@ -42,7 +42,7 @@ const CONTEXT_FILE_PATTERNS = [
|
|
|
42
42
|
// Route definitions
|
|
43
43
|
'**/routes/**/*.{ts,js}',
|
|
44
44
|
'**/router.{ts,js}',
|
|
45
|
-
// Database migrations (latest only)
|
|
45
|
+
// Database migrations (latest only) -- multi-language
|
|
46
46
|
'**/migrations/**/migration.sql',
|
|
47
47
|
'**/db/migrate/**/*.rb',
|
|
48
48
|
'**/alembic/**/*.py',
|
|
@@ -56,7 +56,7 @@ const CONTEXT_FILE_PATTERNS = [
|
|
|
56
56
|
'**/Dockerfile',
|
|
57
57
|
'.env.example',
|
|
58
58
|
'.env.local.example',
|
|
59
|
-
// Schema definitions
|
|
59
|
+
// Schema definitions -- general
|
|
60
60
|
'**/schema.{yaml,yml,json}',
|
|
61
61
|
'**/*.avsc',
|
|
62
62
|
'**/*.thrift',
|
|
@@ -115,10 +115,10 @@ export interface ContextFile {
|
|
|
115
115
|
size: number
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
-
/** Maximum size (in bytes) for a single context file
|
|
118
|
+
/** Maximum size (in bytes) for a single context file -- skip huge files */
|
|
119
119
|
const MAX_CONTEXT_FILE_SIZE = 50_000 // ~50KB
|
|
120
120
|
|
|
121
|
-
//
|
|
121
|
+
// --- .mikkignore support ----------------------------------------------------
|
|
122
122
|
|
|
123
123
|
/**
|
|
124
124
|
* Read a .mikkignore file from the project root and parse it into
|
|
@@ -138,7 +138,7 @@ export async function readMikkIgnore(projectRoot: string): Promise<string[]> {
|
|
|
138
138
|
const content = await fs.readFile(ignorePath, 'utf-8')
|
|
139
139
|
return parseMikkIgnore(content)
|
|
140
140
|
} catch {
|
|
141
|
-
return [] // no .mikkignore
|
|
141
|
+
return [] // no .mikkignore -- that's fine
|
|
142
142
|
}
|
|
143
143
|
}
|
|
144
144
|
|
|
@@ -151,24 +151,24 @@ export function parseMikkIgnore(content: string): string[] {
|
|
|
151
151
|
if (line.startsWith('!')) continue // negations not yet supported
|
|
152
152
|
|
|
153
153
|
const isDir = line.endsWith('/')
|
|
154
|
-
// If pattern has no slash (ignoring trailing slash), match anywhere
|
|
154
|
+
// If pattern has no slash (ignoring trailing slash), match anywhere -> prepend **/
|
|
155
155
|
const stripped = isDir ? line.slice(0, -1) : line
|
|
156
156
|
const hasSlash = stripped.includes('/')
|
|
157
157
|
|
|
158
158
|
if (!hasSlash) {
|
|
159
159
|
if (isDir) {
|
|
160
|
-
// e.g. "dist/"
|
|
160
|
+
// e.g. "dist/" -> "**/{dist}/**" -- ignore the directory and everything within it
|
|
161
161
|
patterns.push(`**/${stripped}/**`)
|
|
162
162
|
} else {
|
|
163
|
-
// e.g. "*.svg"
|
|
163
|
+
// e.g. "*.svg" -> "**/*.svg"
|
|
164
164
|
patterns.push(`**/${line}`)
|
|
165
165
|
}
|
|
166
166
|
} else {
|
|
167
167
|
if (isDir) {
|
|
168
|
-
// e.g. "packages/*/tests/"
|
|
168
|
+
// e.g. "packages/*/tests/" -> "packages/*/tests/**"
|
|
169
169
|
patterns.push(`${stripped}/**`)
|
|
170
170
|
} else {
|
|
171
|
-
// e.g. "components/ui/**"
|
|
171
|
+
// e.g. "components/ui/**" -- relative to root, already valid
|
|
172
172
|
patterns.push(line)
|
|
173
173
|
}
|
|
174
174
|
}
|
|
@@ -181,7 +181,7 @@ export function parseMikkIgnore(content: string): string[] {
|
|
|
181
181
|
* the project's data models, API definitions, route structure, and config.
|
|
182
182
|
*
|
|
183
183
|
* This is technology-agnostic: it works for Prisma, Drizzle, GraphQL, SQL,
|
|
184
|
-
* Protobuf, Docker, OpenAPI, and more
|
|
184
|
+
* Protobuf, Docker, OpenAPI, and more -- anything with a well-known file pattern.
|
|
185
185
|
*/
|
|
186
186
|
export async function discoverContextFiles(projectRoot: string): Promise<ContextFile[]> {
|
|
187
187
|
const mikkIgnore = await readMikkIgnore(projectRoot)
|
|
@@ -194,7 +194,7 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
|
|
|
194
194
|
|
|
195
195
|
const normalised = files.map(f => f.replace(/\\/g, '/'))
|
|
196
196
|
|
|
197
|
-
// Deduplicate
|
|
197
|
+
// Deduplicate -- some patterns overlap (e.g. models/*.ts also matched by source discovery)
|
|
198
198
|
const unique = [...new Set(normalised)]
|
|
199
199
|
|
|
200
200
|
const results: ContextFile[] = []
|
|
@@ -211,7 +211,7 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
|
|
|
211
211
|
|
|
212
212
|
results.push({ path: relPath, content, type, size: stat.size })
|
|
213
213
|
} catch {
|
|
214
|
-
// File unreadable
|
|
214
|
+
// File unreadable -- skip
|
|
215
215
|
}
|
|
216
216
|
}
|
|
217
217
|
|
|
@@ -229,7 +229,7 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
|
|
|
229
229
|
results.sort((a, b) => priority[a.type] - priority[b.type])
|
|
230
230
|
|
|
231
231
|
// If we have a schema file (e.g. prisma/schema.prisma), the migrations
|
|
232
|
-
// are redundant
|
|
232
|
+
// are redundant -- they represent historical deltas, not the current state.
|
|
233
233
|
// Including them wastes AI tokens and can be actively misleading.
|
|
234
234
|
const hasSchema = results.some(f => f.type === 'schema')
|
|
235
235
|
if (hasSchema) {
|
|
@@ -242,7 +242,7 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
|
|
|
242
242
|
/** Infer the context file's category from its path */
|
|
243
243
|
function inferContextFileType(filePath: string): ContextFileType {
|
|
244
244
|
const lower = filePath.toLowerCase()
|
|
245
|
-
// Schema files
|
|
245
|
+
// Schema files -- multi-language
|
|
246
246
|
if (lower.includes('prisma/schema') || lower.endsWith('.prisma')) return 'schema'
|
|
247
247
|
if (lower.includes('drizzle/') || lower.includes('.schema.')) return 'schema'
|
|
248
248
|
if (lower.endsWith('.graphql') || lower.endsWith('.gql')) return 'schema'
|
|
@@ -251,12 +251,12 @@ function inferContextFileType(filePath: string): ContextFileType {
|
|
|
251
251
|
if (lower.endsWith('schema.rs')) return 'schema'
|
|
252
252
|
if (lower.endsWith('.proto')) return 'api-spec'
|
|
253
253
|
if (lower.includes('openapi') || lower.includes('swagger')) return 'api-spec'
|
|
254
|
-
// Migrations
|
|
254
|
+
// Migrations -- multi-language
|
|
255
255
|
if (lower.endsWith('.sql') && lower.includes('migration')) return 'migration'
|
|
256
256
|
if (lower.includes('db/migrate/')) return 'migration'
|
|
257
257
|
if (lower.includes('alembic/')) return 'migration'
|
|
258
258
|
if (lower.endsWith('.sql')) return 'schema'
|
|
259
|
-
// Models
|
|
259
|
+
// Models -- any language
|
|
260
260
|
if (lower.includes('/models/') || lower.includes('/model/')) return 'model'
|
|
261
261
|
if (lower.endsWith('.model.ts') || lower.endsWith('.model.js') || lower.endsWith('.model.go')) return 'model'
|
|
262
262
|
if (lower.endsWith('models.py') || lower.endsWith('serializers.py') || lower.endsWith('schemas.py')) return 'model'
|
|
@@ -286,7 +286,7 @@ export async function detectProjectLanguage(projectRoot: string): Promise<Projec
|
|
|
286
286
|
const matches = await fg(pattern, { cwd: projectRoot, onlyFiles: true, deep: 1 })
|
|
287
287
|
return matches.length > 0
|
|
288
288
|
}
|
|
289
|
-
// Check in priority order
|
|
289
|
+
// Check in priority order -- most specific first
|
|
290
290
|
if (await exists('tsconfig.json') || await hasGlob('tsconfig.*.json')) return 'typescript'
|
|
291
291
|
if (await exists('Cargo.toml')) return 'rust'
|
|
292
292
|
if (await exists('go.mod')) return 'go'
|
|
@@ -432,7 +432,7 @@ export async function setupMikkDirectory(projectRoot: string): Promise<void> {
|
|
|
432
432
|
}
|
|
433
433
|
}
|
|
434
434
|
|
|
435
|
-
//
|
|
435
|
+
// --- .mikkignore auto-generation --------------------------------------------
|
|
436
436
|
|
|
437
437
|
/** Default ignore patterns shared across all languages */
|
|
438
438
|
const COMMON_IGNORE_PATTERNS = [
|
|
@@ -520,7 +520,7 @@ const LANGUAGE_IGNORE_TEMPLATES: Record<ProjectLanguage, string[]> = {
|
|
|
520
520
|
'',
|
|
521
521
|
],
|
|
522
522
|
rust: [
|
|
523
|
-
'# Test files (inline tests are kept
|
|
523
|
+
'# Test files (inline tests are kept -- only test binaries excluded)',
|
|
524
524
|
'target/',
|
|
525
525
|
'tests/fixtures/',
|
|
526
526
|
'',
|
|
@@ -589,7 +589,7 @@ export async function generateMikkIgnore(projectRoot: string, language: ProjectL
|
|
|
589
589
|
if (await fileExists(ignorePath)) return false
|
|
590
590
|
|
|
591
591
|
const lines: string[] = [
|
|
592
|
-
'# .mikkignore
|
|
592
|
+
'# .mikkignore -- files/directories Mikk should skip during analysis',
|
|
593
593
|
'# Syntax: gitignore-style patterns. Lines starting with # are comments.',
|
|
594
594
|
'# Paths without / match anywhere. Paths with / are relative to project root.',
|
|
595
595
|
'',
|
|
@@ -607,7 +607,7 @@ export async function generateMikkIgnore(projectRoot: string, language: ProjectL
|
|
|
607
607
|
: pkg.workspaces?.packages
|
|
608
608
|
|
|
609
609
|
if (workspaces && workspaces.length > 0) {
|
|
610
|
-
lines.push('# Monorepo
|
|
610
|
+
lines.push('# Monorepo -- test/fixture directories across all packages')
|
|
611
611
|
for (const ws of workspaces) {
|
|
612
612
|
// ws is like "packages/*" or "apps/*"
|
|
613
613
|
const base = ws.replace(/\/?\*$/, '')
|
|
@@ -618,13 +618,13 @@ export async function generateMikkIgnore(projectRoot: string, language: ProjectL
|
|
|
618
618
|
lines.push('')
|
|
619
619
|
}
|
|
620
620
|
} catch {
|
|
621
|
-
// No package.json or not JSON
|
|
621
|
+
// No package.json or not JSON -- skip monorepo detection
|
|
622
622
|
}
|
|
623
623
|
|
|
624
624
|
// Turbo / pnpm workspace detection
|
|
625
625
|
try {
|
|
626
626
|
const turboRaw = await fs.readFile(path.join(projectRoot, 'turbo.json'), 'utf-8')
|
|
627
|
-
// turbo.json exists
|
|
627
|
+
// turbo.json exists -- likely a monorepo already handled above
|
|
628
628
|
void turboRaw
|
|
629
629
|
} catch {
|
|
630
630
|
// not a turbo project
|
|
@@ -639,7 +639,7 @@ export async function generateMikkIgnore(projectRoot: string, language: ProjectL
|
|
|
639
639
|
.map(l => l.replace(/^\s*-\s*['"]?/, '').replace(/['"]?\s*$/, '').trim())
|
|
640
640
|
|
|
641
641
|
if (packageLines.length > 0 && !lines.some(l => l.includes('Monorepo'))) {
|
|
642
|
-
lines.push('# Monorepo (pnpm)
|
|
642
|
+
lines.push('# Monorepo (pnpm) -- test/fixture directories across all packages')
|
|
643
643
|
for (const ws of packageLines) {
|
|
644
644
|
const base = ws.replace(/\/?\*$/, '')
|
|
645
645
|
lines.push(`${base}/*/tests/`)
|
|
@@ -655,3 +655,67 @@ export async function generateMikkIgnore(projectRoot: string, language: ProjectL
|
|
|
655
655
|
await fs.writeFile(ignorePath, lines.join('\n'), 'utf-8')
|
|
656
656
|
return true
|
|
657
657
|
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Automatically add .mikk/ to the project's .gitignore file if it exists.
|
|
661
|
+
* Returns true if the file was modified, false otherwise.
|
|
662
|
+
*/
|
|
663
|
+
export async function updateGitIgnore(projectRoot: string): Promise<boolean> {
|
|
664
|
+
const gitIgnorePath = path.join(projectRoot, '.gitignore')
|
|
665
|
+
|
|
666
|
+
// If no .gitignore, we don't create one (don't assume the project uses Git)
|
|
667
|
+
if (!await fileExists(gitIgnorePath)) return false
|
|
668
|
+
|
|
669
|
+
try {
|
|
670
|
+
const content = await fs.readFile(gitIgnorePath, 'utf-8')
|
|
671
|
+
const lines = content.split('\n')
|
|
672
|
+
|
|
673
|
+
// Check if already ignored
|
|
674
|
+
const alreadyIgnored = lines.some(line => {
|
|
675
|
+
const trimmed = line.trim()
|
|
676
|
+
return trimmed === '.mikk' || trimmed === '.mikk/' || trimmed === '**/.mikk/**'
|
|
677
|
+
})
|
|
678
|
+
|
|
679
|
+
if (alreadyIgnored) return false
|
|
680
|
+
|
|
681
|
+
// Append to .gitignore
|
|
682
|
+
const newContent = content.endsWith('\n')
|
|
683
|
+
? `${content}\n# Mikk internal\n.mikk/\n`
|
|
684
|
+
: `${content}\n\n# Mikk internal\n.mikk/\n`
|
|
685
|
+
|
|
686
|
+
await fs.writeFile(gitIgnorePath, newContent, 'utf-8')
|
|
687
|
+
return true
|
|
688
|
+
} catch {
|
|
689
|
+
return false
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/**
|
|
694
|
+
* Remove Mikk entries from .gitignore.
|
|
695
|
+
*/
|
|
696
|
+
export async function cleanupGitIgnore(projectRoot: string): Promise<boolean> {
|
|
697
|
+
const gitIgnorePath = path.join(projectRoot, '.gitignore')
|
|
698
|
+
if (!await fileExists(gitIgnorePath)) return false
|
|
699
|
+
|
|
700
|
+
try {
|
|
701
|
+
const content = await fs.readFile(gitIgnorePath, 'utf-8')
|
|
702
|
+
const lines = content.split('\n')
|
|
703
|
+
|
|
704
|
+
let modified = false
|
|
705
|
+
const filtered = lines.filter(line => {
|
|
706
|
+
const trimmed = line.trim()
|
|
707
|
+
const isMikkEntry = trimmed === '.mikk' || trimmed === '.mikk/' || trimmed === '**/.mikk/**' || trimmed === '# Mikk internal'
|
|
708
|
+
if (isMikkEntry) modified = true
|
|
709
|
+
return !isMikkEntry
|
|
710
|
+
})
|
|
711
|
+
|
|
712
|
+
if (!modified) return false
|
|
713
|
+
|
|
714
|
+
// Joins lines and trim trailing newlines to avoid growing whitespace
|
|
715
|
+
const newContent = filtered.join('\n').trim() + '\n'
|
|
716
|
+
await fs.writeFile(gitIgnorePath, newContent, 'utf-8')
|
|
717
|
+
return true
|
|
718
|
+
} catch {
|
|
719
|
+
return false
|
|
720
|
+
}
|
|
721
|
+
}
|