sverklo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +133 -0
  3. package/dist/bin/sverklo.d.ts +2 -0
  4. package/dist/bin/sverklo.js +41 -0
  5. package/dist/bin/sverklo.js.map +1 -0
  6. package/dist/src/index.d.ts +3 -0
  7. package/dist/src/index.js +3 -0
  8. package/dist/src/index.js.map +1 -0
  9. package/dist/src/indexer/describer.d.ts +2 -0
  10. package/dist/src/indexer/describer.js +112 -0
  11. package/dist/src/indexer/describer.js.map +1 -0
  12. package/dist/src/indexer/embedder.d.ts +4 -0
  13. package/dist/src/indexer/embedder.js +249 -0
  14. package/dist/src/indexer/embedder.js.map +1 -0
  15. package/dist/src/indexer/file-discovery.d.ts +9 -0
  16. package/dist/src/indexer/file-discovery.js +52 -0
  17. package/dist/src/indexer/file-discovery.js.map +1 -0
  18. package/dist/src/indexer/graph-builder.d.ts +5 -0
  19. package/dist/src/indexer/graph-builder.js +77 -0
  20. package/dist/src/indexer/graph-builder.js.map +1 -0
  21. package/dist/src/indexer/indexer.d.ts +26 -0
  22. package/dist/src/indexer/indexer.js +180 -0
  23. package/dist/src/indexer/indexer.js.map +1 -0
  24. package/dist/src/indexer/parser.d.ts +2 -0
  25. package/dist/src/indexer/parser.js +467 -0
  26. package/dist/src/indexer/parser.js.map +1 -0
  27. package/dist/src/indexer/setup.d.ts +1 -0
  28. package/dist/src/indexer/setup.js +38 -0
  29. package/dist/src/indexer/setup.js.map +1 -0
  30. package/dist/src/indexer/watcher.d.ts +2 -0
  31. package/dist/src/indexer/watcher.js +51 -0
  32. package/dist/src/indexer/watcher.js.map +1 -0
  33. package/dist/src/memory/git-state.d.ts +4 -0
  34. package/dist/src/memory/git-state.js +12 -0
  35. package/dist/src/memory/git-state.js.map +1 -0
  36. package/dist/src/memory/staleness.d.ts +4 -0
  37. package/dist/src/memory/staleness.js +19 -0
  38. package/dist/src/memory/staleness.js.map +1 -0
  39. package/dist/src/search/hybrid-search.d.ts +13 -0
  40. package/dist/src/search/hybrid-search.js +97 -0
  41. package/dist/src/search/hybrid-search.js.map +1 -0
  42. package/dist/src/search/pagerank.d.ts +4 -0
  43. package/dist/src/search/pagerank.js +64 -0
  44. package/dist/src/search/pagerank.js.map +1 -0
  45. package/dist/src/search/token-budget.d.ts +10 -0
  46. package/dist/src/search/token-budget.js +67 -0
  47. package/dist/src/search/token-budget.js.map +1 -0
  48. package/dist/src/server/mcp-server.d.ts +1 -0
  49. package/dist/src/server/mcp-server.js +120 -0
  50. package/dist/src/server/mcp-server.js.map +1 -0
  51. package/dist/src/server/tools/dependencies.d.ts +29 -0
  52. package/dist/src/server/tools/dependencies.js +118 -0
  53. package/dist/src/server/tools/dependencies.js.map +1 -0
  54. package/dist/src/server/tools/find-references.d.ts +20 -0
  55. package/dist/src/server/tools/find-references.js +80 -0
  56. package/dist/src/server/tools/find-references.js.map +1 -0
  57. package/dist/src/server/tools/forget.d.ts +16 -0
  58. package/dist/src/server/tools/forget.js +24 -0
  59. package/dist/src/server/tools/forget.js.map +1 -0
  60. package/dist/src/server/tools/index-status.d.ts +10 -0
  61. package/dist/src/server/tools/index-status.js +21 -0
  62. package/dist/src/server/tools/index-status.js.map +1 -0
  63. package/dist/src/server/tools/lookup.d.ts +25 -0
  64. package/dist/src/server/tools/lookup.js +54 -0
  65. package/dist/src/server/tools/lookup.js.map +1 -0
  66. package/dist/src/server/tools/memories.d.ts +24 -0
  67. package/dist/src/server/tools/memories.js +61 -0
  68. package/dist/src/server/tools/memories.js.map +1 -0
  69. package/dist/src/server/tools/overview.d.ts +19 -0
  70. package/dist/src/server/tools/overview.js +32 -0
  71. package/dist/src/server/tools/overview.js.map +1 -0
  72. package/dist/src/server/tools/recall.d.ts +29 -0
  73. package/dist/src/server/tools/recall.js +113 -0
  74. package/dist/src/server/tools/recall.js.map +1 -0
  75. package/dist/src/server/tools/remember.d.ts +39 -0
  76. package/dist/src/server/tools/remember.js +56 -0
  77. package/dist/src/server/tools/remember.js.map +1 -0
  78. package/dist/src/server/tools/search.d.ts +33 -0
  79. package/dist/src/server/tools/search.js +43 -0
  80. package/dist/src/server/tools/search.js.map +1 -0
  81. package/dist/src/storage/chunk-store.d.ts +25 -0
  82. package/dist/src/storage/chunk-store.js +74 -0
  83. package/dist/src/storage/chunk-store.js.map +1 -0
  84. package/dist/src/storage/database.d.ts +2 -0
  85. package/dist/src/storage/database.js +140 -0
  86. package/dist/src/storage/database.js.map +1 -0
  87. package/dist/src/storage/embedding-store.d.ts +14 -0
  88. package/dist/src/storage/embedding-store.js +42 -0
  89. package/dist/src/storage/embedding-store.js.map +1 -0
  90. package/dist/src/storage/file-store.d.ts +19 -0
  91. package/dist/src/storage/file-store.js +44 -0
  92. package/dist/src/storage/file-store.js.map +1 -0
  93. package/dist/src/storage/graph-store.d.ts +16 -0
  94. package/dist/src/storage/graph-store.js +37 -0
  95. package/dist/src/storage/graph-store.js.map +1 -0
  96. package/dist/src/storage/memory-embedding-store.d.ts +13 -0
  97. package/dist/src/storage/memory-embedding-store.js +38 -0
  98. package/dist/src/storage/memory-embedding-store.js.map +1 -0
  99. package/dist/src/storage/memory-store.d.ts +29 -0
  100. package/dist/src/storage/memory-store.js +86 -0
  101. package/dist/src/storage/memory-store.js.map +1 -0
  102. package/dist/src/types/index.d.ts +87 -0
  103. package/dist/src/types/index.js +21 -0
  104. package/dist/src/types/index.js.map +1 -0
  105. package/dist/src/utils/config.d.ts +2 -0
  106. package/dist/src/utils/config.js +18 -0
  107. package/dist/src/utils/config.js.map +1 -0
  108. package/dist/src/utils/ignore.d.ts +2 -0
  109. package/dist/src/utils/ignore.js +72 -0
  110. package/dist/src/utils/ignore.js.map +1 -0
  111. package/dist/src/utils/logger.d.ts +2 -0
  112. package/dist/src/utils/logger.js +16 -0
  113. package/dist/src/utils/logger.js.map +1 -0
  114. package/dist/src/utils/tokens.d.ts +2 -0
  115. package/dist/src/utils/tokens.js +12 -0
  116. package/dist/src/utils/tokens.js.map +1 -0
  117. package/package.json +59 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 codesearch-mcp contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,133 @@
1
+ # Sverklo
2
+
3
+ Code intelligence for AI agents. Local-first, zero config, semantic search.
4
+
5
+ Sverklo gives AI coding agents (Claude Code, Cursor, any MCP client) deep codebase understanding through hybrid text + semantic search, structural analysis, and dependency-aware ranking.
6
+
7
+ ## Why
8
+
9
+ AI coding agents waste tokens reading irrelevant files. Claude Code has no built-in codebase indexing. Existing solutions are either cloud-dependent (Augment, Greptile) or incomplete (CocoIndex lacks graph ranking, Aider lacks MCP).
10
+
11
+ Sverklo fills the gap:
12
+ - **AST-aware parsing** — extracts functions, classes, types, interfaces from 10 languages
13
+ - **PageRank ranking** — structurally important files surface first
14
+ - **Semantic embeddings** — all-MiniLM-L6-v2 via ONNX, runs locally, no API keys
15
+ - **Hybrid search** — BM25 text + vector similarity + Reciprocal Rank Fusion
16
+ - **Token-budgeted** — returns exactly what fits in your context window
17
+ - **Incremental** — watches for file changes, updates in real-time
18
+ - **Zero config** — auto-detects project, auto-indexes, respects .gitignore
19
+
20
+ ## Quick Start
21
+
22
+ ```bash
23
+ # 1. Install
24
+ git clone https://github.com/nicenemo/sverklo
25
+ cd sverklo
26
+ npm install && npm run build
27
+
28
+ # 2. Download the embedding model (~90MB, one-time)
29
+ npx sverklo setup
30
+
31
+ # 3. Add to Claude Code
32
+ claude mcp add sverklo -- node /path/to/sverklo/dist/bin/sverklo.js .
33
+ ```
34
+
35
+ ## MCP Tools
36
+
37
+ ### `search`
38
+ Hybrid text + semantic code search with PageRank boosting.
39
+
40
+ ```
41
+ query: "authentication middleware that validates JWT tokens"
42
+ token_budget: 4000 # max tokens to return
43
+ scope: "src/api/" # limit to path prefix
44
+ language: "typescript" # filter by language
45
+ type: "function" # filter by symbol type
46
+ ```
47
+
48
+ ### `overview`
49
+ Structural codebase map. Shows most important files and their symbols, ranked by PageRank.
50
+
51
+ ```
52
+ path: "src/" # directory to overview
53
+ token_budget: 4000
54
+ ```
55
+
56
+ ### `lookup`
57
+ Direct symbol lookup by name. Returns full definitions.
58
+
59
+ ```
60
+ symbol: "createRouter"
61
+ type: "function" # function, class, type, interface, method, variable
62
+ ```
63
+
64
+ ### `find_references`
65
+ Find all references to a symbol across the codebase.
66
+
67
+ ```
68
+ symbol: "UserService"
69
+ token_budget: 3000
70
+ ```
71
+
72
+ ### `dependencies`
73
+ Show a file's import graph — what it depends on and what depends on it.
74
+
75
+ ```
76
+ path: "src/api/router.ts"
77
+ direction: "both" # imports, importers, or both
78
+ depth: 2 # traversal depth
79
+ ```
80
+
81
+ ### `index_status`
82
+ Check index health — file count, chunk count, languages, indexing progress.
83
+
84
+ ## Supported Languages
85
+
86
+ TypeScript, JavaScript, Python, Go, Rust, Java, C, C++, Ruby, PHP
87
+
88
+ ## How It Works
89
+
90
+ 1. **File discovery** — walks the project, respects .gitignore and .sverkloignore
91
+ 2. **AST parsing** — structural extraction of functions, classes, types, imports
92
+ 3. **NL descriptions** — generates natural language descriptions from code metadata (embed descriptions, not raw code)
93
+ 4. **Embeddings** — all-MiniLM-L6-v2 ONNX model, 384d vectors, fully local
94
+ 5. **Dependency graph** — resolves imports, builds file-level graph, computes PageRank
95
+ 6. **Hybrid search** — BM25 + cosine similarity + PageRank via Reciprocal Rank Fusion
96
+ 7. **Token budgeting** — packs results to fit within the specified budget
97
+
98
+ ## Performance
99
+
100
+ On a 30-file TypeScript codebase (71 code chunks):
101
+ - Indexing: **681ms** (including ONNX embedding generation)
102
+ - Search: **<50ms** per query
103
+ - Memory: **~200MB** (ONNX runtime + cached vectors)
104
+
105
+ ## Configuration
106
+
107
+ | Setting | Location |
108
+ |---------|----------|
109
+ | Model files | `~/.sverklo/models/model.onnx` and `tokenizer.json` |
110
+ | Index database | `~/.sverklo/<project-hash>/index.db` |
111
+ | Custom ignores | `.sverkloignore` in project root |
112
+ | Debug logging | `SVERKLO_DEBUG=1` |
113
+
114
+ ## Free vs Pro
115
+
116
+ The core is **free and open source** (MIT). Use it forever, no limits.
117
+
118
+ **Sverklo Pro** (coming soon):
119
+ - Session memory — decisions, preferences, patterns across sessions
120
+ - Memory quality scoring — confidence levels, staleness detection
121
+ - Git-state linked memories — what the code looked like when a decision was made
122
+ - Cross-project pattern transfer
123
+ - Better embedding models
124
+
125
+ **Sverklo Team** (coming soon):
126
+ - Shared team memory — architectural decisions, conventions
127
+ - Cross-developer AI coordination
128
+ - On-prem deployment
129
+ - Admin dashboard
130
+
131
+ ## License
132
+
133
+ MIT
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env node
2
+ import { resolve } from "node:path";
3
+ const args = process.argv.slice(2);
4
+ const command = args[0];
5
+ if (command === "setup" || command === "install") {
6
+ const { setupModels } = await import("../src/indexer/setup.js");
7
+ await setupModels();
8
+ process.exit(0);
9
+ }
10
+ if (command === "--help" || command === "-h") {
11
+ console.log(`
12
+ sverklo — code intelligence for AI agents
13
+
14
+ Usage:
15
+ sverklo [project-path] Start the MCP server (stdio transport)
16
+ sverklo setup Download the embedding model (~90MB)
17
+ sverklo --help Show this help
18
+
19
+ MCP Tools:
20
+ search Hybrid text + semantic code search
21
+ overview Structural codebase map ranked by importance
22
+ lookup Direct symbol lookup by name
23
+ find_references Find all references to a symbol
24
+ dependencies Show file dependency graph
25
+ index_status Check index health
26
+
27
+ Add to Claude Code:
28
+ claude mcp add sverklo -- npx sverklo .
29
+
30
+ Environment:
31
+ SVERKLO_DEBUG=1 Enable debug logging to stderr
32
+ `);
33
+ process.exit(0);
34
+ }
35
+ const rootPath = resolve(command || process.cwd());
36
+ const { startMcpServer } = await import("../src/index.js");
37
+ startMcpServer(rootPath).catch((err) => {
38
+ console.error("Failed to start sverklo:", err);
39
+ process.exit(1);
40
+ });
41
+ //# sourceMappingURL=sverklo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sverklo.js","sourceRoot":"","sources":["../../bin/sverklo.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AACnC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;AAExB,IAAI,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;IACjD,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAChE,MAAM,WAAW,EAAE,CAAC;IACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;CAqBb,CAAC,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AAEnD,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAC3D,cAAc,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACrC,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,GAAG,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { startMcpServer } from "./server/mcp-server.js";
2
+ export { Indexer } from "./indexer/indexer.js";
3
+ export type { SearchResult, CodeChunk, FileRecord, IndexStatus } from "./types/index.js";
@@ -0,0 +1,3 @@
1
+ export { startMcpServer } from "./server/mcp-server.js";
2
+ export { Indexer } from "./indexer/indexer.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { ParsedChunk } from "../types/index.js";
2
+ export declare function describeChunk(chunk: ParsedChunk, filePath: string, language: string): string;
@@ -0,0 +1,112 @@
1
+ // Generate natural language descriptions from AST metadata.
2
+ // Key insight from Greptile: embedding NL descriptions produces
3
+ // much better semantic search results than embedding raw code.
4
+ export function describeChunk(chunk, filePath, language) {
5
+ const parts = [];
6
+ const typeName = chunk.type === "block" ? "code block" : chunk.type;
7
+ if (chunk.name) {
8
+ parts.push(`${typeName} '${chunk.name}'`);
9
+ }
10
+ else {
11
+ parts.push(`anonymous ${typeName}`);
12
+ }
13
+ parts.push(`in ${filePath}`);
14
+ if (chunk.signature && chunk.signature !== chunk.name) {
15
+ // Extract parameter info from signature
16
+ const params = extractParams(chunk.signature, language);
17
+ if (params) {
18
+ parts.push(`with parameters: ${params}`);
19
+ }
20
+ const returnType = extractReturnType(chunk.signature, language);
21
+ if (returnType) {
22
+ parts.push(`returns ${returnType}`);
23
+ }
24
+ }
25
+ // Extract first line of docstring/comment if present
26
+ const docstring = extractDocstring(chunk.content, language);
27
+ if (docstring) {
28
+ parts.push(`— ${docstring}`);
29
+ }
30
+ return parts.join(" ");
31
+ }
32
+ function extractParams(signature, language) {
33
+ const m = signature.match(/\(([^)]*)\)/);
34
+ if (!m || !m[1].trim())
35
+ return null;
36
+ // Clean up parameter list
37
+ const params = m[1]
38
+ .split(",")
39
+ .map((p) => p.trim())
40
+ .filter((p) => p && p !== "self" && p !== "cls" && p !== "this")
41
+ .map((p) => {
42
+ // Simplify to just param names for description
43
+ if (language === "python")
44
+ return p.split(":")[0].split("=")[0].trim();
45
+ if (language === "go")
46
+ return p.split(/\s+/)[0];
47
+ return p.split(":")[0].replace(/^(const|let|var|mut|&)\s+/, "").trim();
48
+ })
49
+ .filter(Boolean);
50
+ return params.length > 0 ? params.join(", ") : null;
51
+ }
52
+ function extractReturnType(signature, language) {
53
+ if (language === "typescript" || language === "javascript") {
54
+ // function foo(): ReturnType
55
+ const m = signature.match(/\)\s*:\s*([^{=]+)/);
56
+ return m ? m[1].trim() : null;
57
+ }
58
+ if (language === "python") {
59
+ const m = signature.match(/->\s*(.+?)(?:\s*:)?\s*$/);
60
+ return m ? m[1].trim() : null;
61
+ }
62
+ if (language === "go") {
63
+ // func foo() (ReturnType, error)
64
+ const m = signature.match(/\)\s*(?:\(([^)]+)\)|(\w+(?:\.\w+)?))\s*\{?$/);
65
+ return m ? (m[1] || m[2])?.trim() || null : null;
66
+ }
67
+ if (language === "rust") {
68
+ const m = signature.match(/->\s*(.+?)\s*(?:\{|where)/);
69
+ return m ? m[1].trim() : null;
70
+ }
71
+ return null;
72
+ }
73
+ function extractDocstring(content, language) {
74
+ const lines = content.split("\n");
75
+ if (language === "python") {
76
+ // Look for triple-quote docstring
77
+ for (let i = 1; i < Math.min(lines.length, 5); i++) {
78
+ const trimmed = lines[i].trim();
79
+ if (trimmed.startsWith('"""') || trimmed.startsWith("'''")) {
80
+ const doc = trimmed.replace(/^["']{3}/, "").replace(/["']{3}$/, "").trim();
81
+ if (doc)
82
+ return doc;
83
+ // Multi-line docstring
84
+ if (i + 1 < lines.length)
85
+ return lines[i + 1].trim();
86
+ }
87
+ }
88
+ }
89
+ // JSDoc or block comment before the definition
90
+ if (lines[0].trim().startsWith("/**") || lines[0].trim().startsWith("///")) {
91
+ for (let i = 0; i < Math.min(lines.length, 5); i++) {
92
+ const clean = lines[i]
93
+ .trim()
94
+ .replace(/^\/\*\*\s*/, "")
95
+ .replace(/^\*\s*/, "")
96
+ .replace(/^\/\/\/\s*/, "")
97
+ .replace(/\*\/$/, "")
98
+ .trim();
99
+ if (clean && !clean.startsWith("@"))
100
+ return clean;
101
+ }
102
+ }
103
+ // Single-line comment right after definition
104
+ if (lines.length > 1) {
105
+ const secondLine = lines[1].trim();
106
+ if (secondLine.startsWith("//") || secondLine.startsWith("#")) {
107
+ return secondLine.replace(/^\/\/\s*/, "").replace(/^#\s*/, "").trim() || null;
108
+ }
109
+ }
110
+ return null;
111
+ }
112
+ //# sourceMappingURL=describer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"describer.js","sourceRoot":"","sources":["../../../src/indexer/describer.ts"],"names":[],"mappings":"AAEA,4DAA4D;AAC5D,gEAAgE;AAChE,+DAA+D;AAE/D,MAAM,UAAU,aAAa,CAC3B,KAAkB,EAClB,QAAgB,EAChB,QAAgB;IAEhB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;IAEpE,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,KAAK,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC;IAC5C,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,MAAM,QAAQ,EAAE,CAAC,CAAC;IAE7B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,IAAI,EAAE,CAAC;QACtD,wCAAwC;QACxC,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,CAAC,IAAI,CAAC,oBAAoB,MAAM,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,UAAU,GAAG,iBAAiB,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChE,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,qDAAqD;IACrD,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC5D,IAAI,SAAS,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,aAAa,CACpB,SAAiB,EACjB,QAAgB;IAEhB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACzC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAEpC,0BAA0B;IAC1B,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;SAChB,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,MAAM,CAAC;SAC/D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,+CAA+C;QAC/C,IAAI,QAAQ,KAAK,QAAQ;YAAE,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvE,IAAI,QAAQ,KAAK,IAAI;YAAE,OAAO,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACzE,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACtD,CAAC;AAED,SAAS,iBAAiB,CACxB,SAAiB,EACjB,QAAgB;IAEhB,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC3D,6BAA6B;QAC7B,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACrD,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACtB,iCAAiC;QACjC,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;QACzE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACnD,CAAC;IACD,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QACvD,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAgB;IACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1B,kCAAkC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAChC,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC3E,IAAI,GAAG;oBAAE,OAAO,GAAG,CAAC;gBACpB,uBAAuB;gBACvB,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM;oBAAE,OAAO,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,+CAA+C;IAC/C,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC;iBACnB,IAAI,EAAE;iBACN,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;iBACzB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;iBACrB,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;iBACzB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;iBACpB,IAAI,EAAE,CAAC;YACV,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;QACpD,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACnC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9D,OAAO,UAAU,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;QAChF,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare function initEmbedder(): Promise<void>;
2
+ export declare function embed(texts: string[]): Promise<Float32Array[]>;
3
+ export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
4
+ export declare const EMBEDDING_DIM = 384;
@@ -0,0 +1,249 @@
1
+ import { readFileSync, existsSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { homedir } from "node:os";
4
+ import { log, logError } from "../utils/logger.js";
5
+ const MODEL_DIR = join(homedir(), ".sverklo", "models");
6
+ const BUNDLED_MODEL_DIR = join(import.meta.dirname ?? ".", "..", "..", "models");
7
+ const MODEL_DIM = 384;
8
+ const MAX_SEQ_LEN = 128; // Short sequences for code chunks — speed over completeness
9
+ const CLS_TOKEN = 101;
10
+ const SEP_TOKEN = 102;
11
+ const PAD_TOKEN = 0;
12
+ const UNK_TOKEN = 100;
13
+ let ort = null;
14
+ let session = null;
15
+ let vocab = null;
16
+ let initialized = false;
17
+ export async function initEmbedder() {
18
+ if (initialized)
19
+ return;
20
+ // Find model files
21
+ const modelPath = findFile("model.onnx");
22
+ const tokenizerPath = findFile("tokenizer.json");
23
+ if (!modelPath || !tokenizerPath) {
24
+ logError("Model files not found. Falling back to lightweight embeddings.\n" +
25
+ "For better search quality, place model.onnx and tokenizer.json in ~/.sverklo/models/");
26
+ initialized = true;
27
+ return;
28
+ }
29
+ try {
30
+ // Load ONNX runtime
31
+ ort = await import("onnxruntime-node");
32
+ // Load model
33
+ session = await ort.InferenceSession.create(modelPath, {
34
+ executionProviders: ["cpu"],
35
+ graphOptimizationLevel: "all",
36
+ intraOpNumThreads: 2,
37
+ });
38
+ log(`ONNX model loaded: ${modelPath}`);
39
+ log(`Input names: ${session.inputNames}, Output names: ${session.outputNames}`);
40
+ // Load tokenizer vocab
41
+ const tokenizer = JSON.parse(readFileSync(tokenizerPath, "utf-8"));
42
+ vocab = new Map(Object.entries(tokenizer.model.vocab));
43
+ log(`Tokenizer loaded: ${vocab.size} tokens`);
44
+ initialized = true;
45
+ }
46
+ catch (err) {
47
+ logError("Failed to initialize ONNX embedder", err);
48
+ session = null;
49
+ vocab = null;
50
+ initialized = true;
51
+ }
52
+ }
53
+ function findFile(filename) {
54
+ // Check ~/.sverklo/models/ first, then bundled
55
+ const userPath = join(MODEL_DIR, filename);
56
+ if (existsSync(userPath))
57
+ return userPath;
58
+ const bundledPath = join(BUNDLED_MODEL_DIR, filename);
59
+ if (existsSync(bundledPath))
60
+ return bundledPath;
61
+ return null;
62
+ }
63
+ // ── WordPiece Tokenizer ─────────────────────────────────────────────
64
+ function tokenize(text) {
65
+ if (!vocab) {
66
+ return fallbackTokenize(text);
67
+ }
68
+ // BERT-style preprocessing: lowercase, strip accents
69
+ const cleaned = text.toLowerCase().replace(/[\u0300-\u036f]/g, "");
70
+ // Split on whitespace and punctuation (BertPreTokenizer behavior)
71
+ const words = cleaned.split(/(\s+|[^\w\s])/g).filter((w) => w.trim());
72
+ const tokens = [CLS_TOKEN];
73
+ for (const word of words) {
74
+ if (tokens.length >= MAX_SEQ_LEN - 1)
75
+ break;
76
+ const subTokens = wordPieceTokenize(word.trim());
77
+ for (const st of subTokens) {
78
+ if (tokens.length >= MAX_SEQ_LEN - 1)
79
+ break;
80
+ tokens.push(st);
81
+ }
82
+ }
83
+ tokens.push(SEP_TOKEN);
84
+ // Pad to MAX_SEQ_LEN
85
+ const inputIds = new Array(MAX_SEQ_LEN).fill(PAD_TOKEN);
86
+ const attentionMask = new Array(MAX_SEQ_LEN).fill(0);
87
+ for (let i = 0; i < tokens.length; i++) {
88
+ inputIds[i] = tokens[i];
89
+ attentionMask[i] = 1;
90
+ }
91
+ return { inputIds, attentionMask };
92
+ }
93
+ function wordPieceTokenize(word) {
94
+ if (!vocab)
95
+ return [UNK_TOKEN];
96
+ const tokens = [];
97
+ let start = 0;
98
+ while (start < word.length) {
99
+ let end = word.length;
100
+ let found = false;
101
+ while (start < end) {
102
+ const substr = start === 0 ? word.slice(start, end) : "##" + word.slice(start, end);
103
+ const id = vocab.get(substr);
104
+ if (id !== undefined) {
105
+ tokens.push(id);
106
+ found = true;
107
+ start = end;
108
+ break;
109
+ }
110
+ end--;
111
+ }
112
+ if (!found) {
113
+ tokens.push(UNK_TOKEN);
114
+ start++;
115
+ }
116
+ }
117
+ return tokens;
118
+ }
119
+ function fallbackTokenize(text) {
120
+ // Hash-based token IDs when no real tokenizer is available
121
+ const words = text.toLowerCase().split(/\s+/).slice(0, MAX_SEQ_LEN - 2);
122
+ const inputIds = new Array(MAX_SEQ_LEN).fill(PAD_TOKEN);
123
+ const attentionMask = new Array(MAX_SEQ_LEN).fill(0);
124
+ inputIds[0] = CLS_TOKEN;
125
+ attentionMask[0] = 1;
126
+ for (let i = 0; i < words.length; i++) {
127
+ let hash = 0;
128
+ for (let j = 0; j < words[i].length; j++) {
129
+ hash = ((hash << 5) - hash + words[i].charCodeAt(j)) | 0;
130
+ }
131
+ inputIds[i + 1] = (Math.abs(hash) % 30000) + 1000;
132
+ attentionMask[i + 1] = 1;
133
+ }
134
+ inputIds[words.length + 1] = SEP_TOKEN;
135
+ attentionMask[words.length + 1] = 1;
136
+ return { inputIds, attentionMask };
137
+ }
138
+ // ── ONNX Inference ──────────────────────────────────────────────────
139
+ export async function embed(texts) {
140
+ if (!session || !ort) {
141
+ return fallbackEmbed(texts);
142
+ }
143
+ const results = [];
144
+ // Process in small batches for memory efficiency
145
+ const BATCH = 16;
146
+ for (let b = 0; b < texts.length; b += BATCH) {
147
+ const batch = texts.slice(b, b + BATCH);
148
+ const batchSize = batch.length;
149
+ // Tokenize batch
150
+ const allInputIds = new BigInt64Array(batchSize * MAX_SEQ_LEN);
151
+ const allAttentionMask = new BigInt64Array(batchSize * MAX_SEQ_LEN);
152
+ for (let i = 0; i < batchSize; i++) {
153
+ const { inputIds, attentionMask } = tokenize(batch[i]);
154
+ for (let j = 0; j < MAX_SEQ_LEN; j++) {
155
+ allInputIds[i * MAX_SEQ_LEN + j] = BigInt(inputIds[j]);
156
+ allAttentionMask[i * MAX_SEQ_LEN + j] = BigInt(attentionMask[j]);
157
+ }
158
+ }
159
+ // Create tensors
160
+ const inputIdsTensor = new ort.Tensor("int64", allInputIds, [batchSize, MAX_SEQ_LEN]);
161
+ const attentionMaskTensor = new ort.Tensor("int64", allAttentionMask, [batchSize, MAX_SEQ_LEN]);
162
+ // Also need token_type_ids (all zeros for single-sentence)
163
+ const tokenTypeIds = new BigInt64Array(batchSize * MAX_SEQ_LEN); // all zeros
164
+ const tokenTypeTensor = new ort.Tensor("int64", tokenTypeIds, [batchSize, MAX_SEQ_LEN]);
165
+ // Run inference
166
+ const feeds = {
167
+ input_ids: inputIdsTensor,
168
+ attention_mask: attentionMaskTensor,
169
+ };
170
+ // Add token_type_ids if the model expects it
171
+ if (session.inputNames.includes("token_type_ids")) {
172
+ feeds.token_type_ids = tokenTypeTensor;
173
+ }
174
+ const output = await session.run(feeds);
175
+ // Get the output — usually "last_hidden_state" or the first output
176
+ const outputName = session.outputNames[0];
177
+ const outputData = output[outputName].data;
178
+ const outputDims = output[outputName].dims; // [batchSize, seqLen, hiddenSize]
179
+ const hiddenSize = outputDims[outputDims.length - 1];
180
+ const seqLen = outputDims.length === 3 ? outputDims[1] : MAX_SEQ_LEN;
181
+ // Mean pooling with attention mask
182
+ for (let i = 0; i < batchSize; i++) {
183
+ const { attentionMask } = tokenize(batch[i]);
184
+ const pooled = new Float32Array(hiddenSize);
185
+ let maskSum = 0;
186
+ for (let t = 0; t < seqLen; t++) {
187
+ const mask = attentionMask[t] || 0;
188
+ maskSum += mask;
189
+ if (mask === 0)
190
+ continue;
191
+ for (let d = 0; d < hiddenSize; d++) {
192
+ pooled[d] += outputData[i * seqLen * hiddenSize + t * hiddenSize + d] * mask;
193
+ }
194
+ }
195
+ if (maskSum > 0) {
196
+ for (let d = 0; d < hiddenSize; d++) {
197
+ pooled[d] /= maskSum;
198
+ }
199
+ }
200
+ // L2 normalize
201
+ let norm = 0;
202
+ for (let d = 0; d < hiddenSize; d++)
203
+ norm += pooled[d] * pooled[d];
204
+ norm = Math.sqrt(norm);
205
+ if (norm > 0) {
206
+ for (let d = 0; d < hiddenSize; d++)
207
+ pooled[d] /= norm;
208
+ }
209
+ results.push(pooled);
210
+ }
211
+ }
212
+ return results;
213
+ }
214
+ // Fallback when ONNX is not available
215
+ function fallbackEmbed(texts) {
216
+ return texts.map((text) => {
217
+ const vec = new Float32Array(MODEL_DIM);
218
+ const words = text.toLowerCase().split(/\s+/);
219
+ for (let w = 0; w < words.length; w++) {
220
+ const word = words[w];
221
+ for (let i = 0; i < word.length; i++) {
222
+ const charCode = word.charCodeAt(i);
223
+ for (let d = 0; d < MODEL_DIM; d++) {
224
+ vec[d] +=
225
+ Math.sin(charCode * (d + 1) * 0.01 + w * 0.1) *
226
+ (1 / (1 + w * 0.1));
227
+ }
228
+ }
229
+ }
230
+ // L2 normalize
231
+ let norm = 0;
232
+ for (let i = 0; i < MODEL_DIM; i++)
233
+ norm += vec[i] * vec[i];
234
+ norm = Math.sqrt(norm);
235
+ if (norm > 0) {
236
+ for (let i = 0; i < MODEL_DIM; i++)
237
+ vec[i] /= norm;
238
+ }
239
+ return vec;
240
+ });
241
+ }
242
+ export function cosineSimilarity(a, b) {
243
+ let dot = 0;
244
+ for (let i = 0; i < a.length; i++)
245
+ dot += a[i] * b[i];
246
+ return dot; // vectors are already normalized
247
+ }
248
+ export const EMBEDDING_DIM = MODEL_DIM;
249
+ //# sourceMappingURL=embedder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/indexer/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AACxD,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;AAEjF,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,WAAW,GAAG,GAAG,CAAC,CAAC,4DAA4D;AACrF,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,OAAO,GAAQ,IAAI,CAAC;AACxB,IAAI,KAAK,GAA+B,IAAI,CAAC;AAC7C,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,WAAW;QAAE,OAAO;IAExB,mBAAmB;IACnB,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;IACzC,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,CAAC,aAAa,EAAE,CAAC;QACjC,QAAQ,CACN,kEAAkE;YAChE,sFAAsF,CACzF,CAAC;QACF,WAAW,GAAG,IAAI,CAAC;QACnB,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,oBAAoB;QACpB,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAEvC,aAAa;QACb,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,EAAE;YACrD,kBAAkB,EAAE,CAAC,KAAK,CAAC;YAC3B,sBAAsB,EAAE,KAAK;YAC7B,iBAAiB,EAAE,CAAC;SACrB,CAAC,CAAC;QAEH,GAAG,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAC;QACvC,GAAG,CAAC,gBAAgB,OAAO,CAAC,UAAU,mBAAmB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QAEhF,uBAAuB;QACvB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,CAAC;QACnE,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAuB,CAAC,CAAC;QAE7E,GAAG,CAAC,qBAAqB,KAAK,CAAC,IAAI,SAAS,CAAC,CAAC;QAC9C,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,oCAAoC,EAAE,GAAG,CAAC,CAAC;QACpD,OAAO,GAAG,IAAI,CAAC;QACf,KAAK,GAAG,IAAI,CAAC;QACb,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,QAAgB;IAChC,+CAA+C;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC3C,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;IACtD,IAAI,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,WAAW,CAAC;IAChD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,uEAAuE;AAEvE,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,qDAAqD;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IAEnE,kEAAkE;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAa,CAAC,SAAS,CAAC,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW,GAAG,CAAC;YAAE,MAAM;QAE5C,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;YAC3B,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW,GAAG,CAAC;gBAAE,MAAM;YAC5C,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAEvB,qBAAqB;IACrB,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAErD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,QAAQ,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,IAAI,CAAC,KAAK;QAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,KAAK,GAAG,KAAK,CAAC;QAElB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACpF,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAE7B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAChB,KAAK,GAAG,IAAI,CAAC;gBACb,KAAK,GAAG,GAAG,CAAC;gBACZ,MAAM;YACR,CAAC;YACD,GAAG,EAAE,CAAC;QACR,CAAC;QAED,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,KAAK,EAAE,CAAC;QACV,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,2DAA2D;IAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC;IACxE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAErD,QAAQ,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;IACxB,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,CAAC;QACD,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC;QAClD,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3B,CAAC;IAED,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;IACvC,aAAa,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAEpC,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AACrC,CAAC;AAED,uEAAuE;AAEvE,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,KAAe;IACzC,IAAI,CAAC,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC;QACrB,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,iDAAiD;IACjD,MAAM,KAAK,GAAG,EAAE,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;QAC7C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;QAE/B,iBAAiB;QACjB,MAAM,WAAW,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;QAC/D,MAAM,gBAAgB,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;QAEpE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;gBACrC,WAAW,CAAC,CAAC,GAAG,WAAW,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;gBACvD,gBAAgB,CAAC,CAAC,GAAG,WAAW,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;YACnE,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QACtF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QAEhG,2DAA2D;QAC3D,MAAM,YAAY,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,YAAY;QAC7E,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QAExF,gBAAgB;QAChB,MAAM,KAAK,GAAwB;YACjC,SAAS,EAAE,cAAc;YACzB,cAAc,EAAE,mBAAmB;SACpC,CAAC;QAEF,6CAA6C;QAC7C,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAClD,KAAK,CAAC,cAAc,GAAG,eAAe,CAAC;QACzC,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAExC,mEAAmE;QACnE,MAAM,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,IAAoB,CAAC;QAC3D,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,kCAAkC;QAC9E,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QAErE,mCAAmC;QACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,EAAE,aAAa,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,CAAC;YAC5C,IAAI,OAAO,GAAG,CAAC,CAAC;YAEhB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACnC,OAAO,IAAI,IAAI,CAAC;gBAChB,IAAI,IAAI,KAAK,CAAC;oBAAE,SAAS;gBAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;oBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,GAAG,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,UAAU,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;gBAC/E,CAAC;YACH,CAAC;YAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;oBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC;gBACvB,CAAC;YACH,CAAC;YAED,eAAe;YACf,IAAI,IAAI,GAAG,CAAC,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE;gBAAE,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACnE,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;gBACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE;oBAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;YACzD,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,sCAAsC;AACtC,SAAS,aAAa,CAAC,KAAe;IACpC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,GAAG,CAAC,CAAC,CAAC;wBACJ,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,GAAG,CAAC;4BAC7C,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;QACH,CAAC;QAED,eAAe;QACf,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;YAAE,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC5D,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;QACrD,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC,CAAC,iCAAiC;AAC/C,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,SAAS,CAAC"}
@@ -0,0 +1,9 @@
1
+ import type { Ignore } from "ignore";
2
+ export interface DiscoveredFile {
3
+ absolutePath: string;
4
+ relativePath: string;
5
+ language: string;
6
+ lastModified: number;
7
+ sizeBytes: number;
8
+ }
9
+ export declare function discoverFiles(rootPath: string, ignoreFilter: Ignore): DiscoveredFile[];