sverklo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +133 -0
- package/dist/bin/sverklo.d.ts +2 -0
- package/dist/bin/sverklo.js +41 -0
- package/dist/bin/sverklo.js.map +1 -0
- package/dist/src/index.d.ts +3 -0
- package/dist/src/index.js +3 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/indexer/describer.d.ts +2 -0
- package/dist/src/indexer/describer.js +112 -0
- package/dist/src/indexer/describer.js.map +1 -0
- package/dist/src/indexer/embedder.d.ts +4 -0
- package/dist/src/indexer/embedder.js +249 -0
- package/dist/src/indexer/embedder.js.map +1 -0
- package/dist/src/indexer/file-discovery.d.ts +9 -0
- package/dist/src/indexer/file-discovery.js +52 -0
- package/dist/src/indexer/file-discovery.js.map +1 -0
- package/dist/src/indexer/graph-builder.d.ts +5 -0
- package/dist/src/indexer/graph-builder.js +77 -0
- package/dist/src/indexer/graph-builder.js.map +1 -0
- package/dist/src/indexer/indexer.d.ts +26 -0
- package/dist/src/indexer/indexer.js +180 -0
- package/dist/src/indexer/indexer.js.map +1 -0
- package/dist/src/indexer/parser.d.ts +2 -0
- package/dist/src/indexer/parser.js +467 -0
- package/dist/src/indexer/parser.js.map +1 -0
- package/dist/src/indexer/setup.d.ts +1 -0
- package/dist/src/indexer/setup.js +38 -0
- package/dist/src/indexer/setup.js.map +1 -0
- package/dist/src/indexer/watcher.d.ts +2 -0
- package/dist/src/indexer/watcher.js +51 -0
- package/dist/src/indexer/watcher.js.map +1 -0
- package/dist/src/memory/git-state.d.ts +4 -0
- package/dist/src/memory/git-state.js +12 -0
- package/dist/src/memory/git-state.js.map +1 -0
- package/dist/src/memory/staleness.d.ts +4 -0
- package/dist/src/memory/staleness.js +19 -0
- package/dist/src/memory/staleness.js.map +1 -0
- package/dist/src/search/hybrid-search.d.ts +13 -0
- package/dist/src/search/hybrid-search.js +97 -0
- package/dist/src/search/hybrid-search.js.map +1 -0
- package/dist/src/search/pagerank.d.ts +4 -0
- package/dist/src/search/pagerank.js +64 -0
- package/dist/src/search/pagerank.js.map +1 -0
- package/dist/src/search/token-budget.d.ts +10 -0
- package/dist/src/search/token-budget.js +67 -0
- package/dist/src/search/token-budget.js.map +1 -0
- package/dist/src/server/mcp-server.d.ts +1 -0
- package/dist/src/server/mcp-server.js +120 -0
- package/dist/src/server/mcp-server.js.map +1 -0
- package/dist/src/server/tools/dependencies.d.ts +29 -0
- package/dist/src/server/tools/dependencies.js +118 -0
- package/dist/src/server/tools/dependencies.js.map +1 -0
- package/dist/src/server/tools/find-references.d.ts +20 -0
- package/dist/src/server/tools/find-references.js +80 -0
- package/dist/src/server/tools/find-references.js.map +1 -0
- package/dist/src/server/tools/forget.d.ts +16 -0
- package/dist/src/server/tools/forget.js +24 -0
- package/dist/src/server/tools/forget.js.map +1 -0
- package/dist/src/server/tools/index-status.d.ts +10 -0
- package/dist/src/server/tools/index-status.js +21 -0
- package/dist/src/server/tools/index-status.js.map +1 -0
- package/dist/src/server/tools/lookup.d.ts +25 -0
- package/dist/src/server/tools/lookup.js +54 -0
- package/dist/src/server/tools/lookup.js.map +1 -0
- package/dist/src/server/tools/memories.d.ts +24 -0
- package/dist/src/server/tools/memories.js +61 -0
- package/dist/src/server/tools/memories.js.map +1 -0
- package/dist/src/server/tools/overview.d.ts +19 -0
- package/dist/src/server/tools/overview.js +32 -0
- package/dist/src/server/tools/overview.js.map +1 -0
- package/dist/src/server/tools/recall.d.ts +29 -0
- package/dist/src/server/tools/recall.js +113 -0
- package/dist/src/server/tools/recall.js.map +1 -0
- package/dist/src/server/tools/remember.d.ts +39 -0
- package/dist/src/server/tools/remember.js +56 -0
- package/dist/src/server/tools/remember.js.map +1 -0
- package/dist/src/server/tools/search.d.ts +33 -0
- package/dist/src/server/tools/search.js +43 -0
- package/dist/src/server/tools/search.js.map +1 -0
- package/dist/src/storage/chunk-store.d.ts +25 -0
- package/dist/src/storage/chunk-store.js +74 -0
- package/dist/src/storage/chunk-store.js.map +1 -0
- package/dist/src/storage/database.d.ts +2 -0
- package/dist/src/storage/database.js +140 -0
- package/dist/src/storage/database.js.map +1 -0
- package/dist/src/storage/embedding-store.d.ts +14 -0
- package/dist/src/storage/embedding-store.js +42 -0
- package/dist/src/storage/embedding-store.js.map +1 -0
- package/dist/src/storage/file-store.d.ts +19 -0
- package/dist/src/storage/file-store.js +44 -0
- package/dist/src/storage/file-store.js.map +1 -0
- package/dist/src/storage/graph-store.d.ts +16 -0
- package/dist/src/storage/graph-store.js +37 -0
- package/dist/src/storage/graph-store.js.map +1 -0
- package/dist/src/storage/memory-embedding-store.d.ts +13 -0
- package/dist/src/storage/memory-embedding-store.js +38 -0
- package/dist/src/storage/memory-embedding-store.js.map +1 -0
- package/dist/src/storage/memory-store.d.ts +29 -0
- package/dist/src/storage/memory-store.js +86 -0
- package/dist/src/storage/memory-store.js.map +1 -0
- package/dist/src/types/index.d.ts +87 -0
- package/dist/src/types/index.js +21 -0
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/utils/config.d.ts +2 -0
- package/dist/src/utils/config.js +18 -0
- package/dist/src/utils/config.js.map +1 -0
- package/dist/src/utils/ignore.d.ts +2 -0
- package/dist/src/utils/ignore.js +72 -0
- package/dist/src/utils/ignore.js.map +1 -0
- package/dist/src/utils/logger.d.ts +2 -0
- package/dist/src/utils/logger.js +16 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/dist/src/utils/tokens.d.ts +2 -0
- package/dist/src/utils/tokens.js +12 -0
- package/dist/src/utils/tokens.js.map +1 -0
- package/package.json +59 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 codesearch-mcp contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Sverklo
|
|
2
|
+
|
|
3
|
+
Code intelligence for AI agents. Local-first, zero config, semantic search.
|
|
4
|
+
|
|
5
|
+
Sverklo gives AI coding agents (Claude Code, Cursor, any MCP client) deep codebase understanding through hybrid text + semantic search, structural analysis, and dependency-aware ranking.
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
AI coding agents waste tokens reading irrelevant files. Claude Code has no built-in codebase indexing. Existing solutions are either cloud-dependent (Augment, Greptile) or incomplete (CocoIndex lacks graph ranking, Aider lacks MCP).
|
|
10
|
+
|
|
11
|
+
Sverklo fills the gap:
|
|
12
|
+
- **AST-aware parsing** — extracts functions, classes, types, interfaces from 10 languages
|
|
13
|
+
- **PageRank ranking** — structurally important files surface first
|
|
14
|
+
- **Semantic embeddings** — all-MiniLM-L6-v2 via ONNX, runs locally, no API keys
|
|
15
|
+
- **Hybrid search** — BM25 text + vector similarity + Reciprocal Rank Fusion
|
|
16
|
+
- **Token-budgeted** — returns exactly what fits in your context window
|
|
17
|
+
- **Incremental** — watches for file changes, updates in real-time
|
|
18
|
+
- **Zero config** — auto-detects project, auto-indexes, respects .gitignore
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# 1. Install
|
|
24
|
+
git clone https://github.com/nicenemo/sverklo
|
|
25
|
+
cd sverklo
|
|
26
|
+
npm install && npm run build
|
|
27
|
+
|
|
28
|
+
# 2. Download the embedding model (~90MB, one-time)
|
|
29
|
+
npx sverklo setup
|
|
30
|
+
|
|
31
|
+
# 3. Add to Claude Code
|
|
32
|
+
claude mcp add sverklo -- node /path/to/sverklo/dist/bin/sverklo.js .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## MCP Tools
|
|
36
|
+
|
|
37
|
+
### `search`
|
|
38
|
+
Hybrid text + semantic code search with PageRank boosting.
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
query: "authentication middleware that validates JWT tokens"
|
|
42
|
+
token_budget: 4000 # max tokens to return
|
|
43
|
+
scope: "src/api/" # limit to path prefix
|
|
44
|
+
language: "typescript" # filter by language
|
|
45
|
+
type: "function" # filter by symbol type
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### `overview`
|
|
49
|
+
Structural codebase map. Shows most important files and their symbols, ranked by PageRank.
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
path: "src/" # directory to overview
|
|
53
|
+
token_budget: 4000
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### `lookup`
|
|
57
|
+
Direct symbol lookup by name. Returns full definitions.
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
symbol: "createRouter"
|
|
61
|
+
type: "function" # function, class, type, interface, method, variable
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### `find_references`
|
|
65
|
+
Find all references to a symbol across the codebase.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
symbol: "UserService"
|
|
69
|
+
token_budget: 3000
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### `dependencies`
|
|
73
|
+
Show a file's import graph — what it depends on and what depends on it.
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
path: "src/api/router.ts"
|
|
77
|
+
direction: "both" # imports, importers, or both
|
|
78
|
+
depth: 2 # traversal depth
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### `index_status`
|
|
82
|
+
Check index health — file count, chunk count, languages, indexing progress.
|
|
83
|
+
|
|
84
|
+
## Supported Languages
|
|
85
|
+
|
|
86
|
+
TypeScript, JavaScript, Python, Go, Rust, Java, C, C++, Ruby, PHP
|
|
87
|
+
|
|
88
|
+
## How It Works
|
|
89
|
+
|
|
90
|
+
1. **File discovery** — walks the project, respects .gitignore and .sverkloignore
|
|
91
|
+
2. **AST parsing** — structural extraction of functions, classes, types, imports
|
|
92
|
+
3. **NL descriptions** — generates natural language descriptions from code metadata (embed descriptions, not raw code)
|
|
93
|
+
4. **Embeddings** — all-MiniLM-L6-v2 ONNX model, 384d vectors, fully local
|
|
94
|
+
5. **Dependency graph** — resolves imports, builds file-level graph, computes PageRank
|
|
95
|
+
6. **Hybrid search** — BM25 + cosine similarity + PageRank via Reciprocal Rank Fusion
|
|
96
|
+
7. **Token budgeting** — packs results to fit within the specified budget
|
|
97
|
+
|
|
98
|
+
## Performance
|
|
99
|
+
|
|
100
|
+
On a 30-file TypeScript codebase (71 code chunks):
|
|
101
|
+
- Indexing: **681ms** (including ONNX embedding generation)
|
|
102
|
+
- Search: **<50ms** per query
|
|
103
|
+
- Memory: **~200MB** (ONNX runtime + cached vectors)
|
|
104
|
+
|
|
105
|
+
## Configuration
|
|
106
|
+
|
|
107
|
+
| Setting | Location |
|
|
108
|
+
|---------|----------|
|
|
109
|
+
| Model files | `~/.sverklo/models/model.onnx` and `tokenizer.json` |
|
|
110
|
+
| Index database | `~/.sverklo/<project-hash>/index.db` |
|
|
111
|
+
| Custom ignores | `.sverkloignore` in project root |
|
|
112
|
+
| Debug logging | `SVERKLO_DEBUG=1` |
|
|
113
|
+
|
|
114
|
+
## Free vs Pro
|
|
115
|
+
|
|
116
|
+
The core is **free and open source** (MIT). Use it forever, no limits.
|
|
117
|
+
|
|
118
|
+
**Sverklo Pro** (coming soon):
|
|
119
|
+
- Session memory — decisions, preferences, patterns across sessions
|
|
120
|
+
- Memory quality scoring — confidence levels, staleness detection
|
|
121
|
+
- Git-state linked memories — what the code looked like when a decision was made
|
|
122
|
+
- Cross-project pattern transfer
|
|
123
|
+
- Better embedding models
|
|
124
|
+
|
|
125
|
+
**Sverklo Team** (coming soon):
|
|
126
|
+
- Shared team memory — architectural decisions, conventions
|
|
127
|
+
- Cross-developer AI coordination
|
|
128
|
+
- On-prem deployment
|
|
129
|
+
- Admin dashboard
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
const args = process.argv.slice(2);
|
|
4
|
+
const command = args[0];
|
|
5
|
+
if (command === "setup" || command === "install") {
|
|
6
|
+
const { setupModels } = await import("../src/indexer/setup.js");
|
|
7
|
+
await setupModels();
|
|
8
|
+
process.exit(0);
|
|
9
|
+
}
|
|
10
|
+
if (command === "--help" || command === "-h") {
|
|
11
|
+
console.log(`
|
|
12
|
+
sverklo — code intelligence for AI agents
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
sverklo [project-path] Start the MCP server (stdio transport)
|
|
16
|
+
sverklo setup Download the embedding model (~90MB)
|
|
17
|
+
sverklo --help Show this help
|
|
18
|
+
|
|
19
|
+
MCP Tools:
|
|
20
|
+
search Hybrid text + semantic code search
|
|
21
|
+
overview Structural codebase map ranked by importance
|
|
22
|
+
lookup Direct symbol lookup by name
|
|
23
|
+
find_references Find all references to a symbol
|
|
24
|
+
dependencies Show file dependency graph
|
|
25
|
+
index_status Check index health
|
|
26
|
+
|
|
27
|
+
Add to Claude Code:
|
|
28
|
+
claude mcp add sverklo -- npx sverklo .
|
|
29
|
+
|
|
30
|
+
Environment:
|
|
31
|
+
SVERKLO_DEBUG=1 Enable debug logging to stderr
|
|
32
|
+
`);
|
|
33
|
+
process.exit(0);
|
|
34
|
+
}
|
|
35
|
+
const rootPath = resolve(command || process.cwd());
|
|
36
|
+
const { startMcpServer } = await import("../src/index.js");
|
|
37
|
+
startMcpServer(rootPath).catch((err) => {
|
|
38
|
+
console.error("Failed to start sverklo:", err);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
});
|
|
41
|
+
//# sourceMappingURL=sverklo.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sverklo.js","sourceRoot":"","sources":["../../bin/sverklo.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AACnC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;AAExB,IAAI,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;IACjD,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAChE,MAAM,WAAW,EAAE,CAAC;IACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;CAqBb,CAAC,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AAEnD,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAC3D,cAAc,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACrC,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,GAAG,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// Generate natural language descriptions from AST metadata.
|
|
2
|
+
// Key insight from Greptile: embedding NL descriptions produces
|
|
3
|
+
// much better semantic search results than embedding raw code.
|
|
4
|
+
export function describeChunk(chunk, filePath, language) {
|
|
5
|
+
const parts = [];
|
|
6
|
+
const typeName = chunk.type === "block" ? "code block" : chunk.type;
|
|
7
|
+
if (chunk.name) {
|
|
8
|
+
parts.push(`${typeName} '${chunk.name}'`);
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
parts.push(`anonymous ${typeName}`);
|
|
12
|
+
}
|
|
13
|
+
parts.push(`in ${filePath}`);
|
|
14
|
+
if (chunk.signature && chunk.signature !== chunk.name) {
|
|
15
|
+
// Extract parameter info from signature
|
|
16
|
+
const params = extractParams(chunk.signature, language);
|
|
17
|
+
if (params) {
|
|
18
|
+
parts.push(`with parameters: ${params}`);
|
|
19
|
+
}
|
|
20
|
+
const returnType = extractReturnType(chunk.signature, language);
|
|
21
|
+
if (returnType) {
|
|
22
|
+
parts.push(`returns ${returnType}`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// Extract first line of docstring/comment if present
|
|
26
|
+
const docstring = extractDocstring(chunk.content, language);
|
|
27
|
+
if (docstring) {
|
|
28
|
+
parts.push(`— ${docstring}`);
|
|
29
|
+
}
|
|
30
|
+
return parts.join(" ");
|
|
31
|
+
}
|
|
32
|
+
function extractParams(signature, language) {
|
|
33
|
+
const m = signature.match(/\(([^)]*)\)/);
|
|
34
|
+
if (!m || !m[1].trim())
|
|
35
|
+
return null;
|
|
36
|
+
// Clean up parameter list
|
|
37
|
+
const params = m[1]
|
|
38
|
+
.split(",")
|
|
39
|
+
.map((p) => p.trim())
|
|
40
|
+
.filter((p) => p && p !== "self" && p !== "cls" && p !== "this")
|
|
41
|
+
.map((p) => {
|
|
42
|
+
// Simplify to just param names for description
|
|
43
|
+
if (language === "python")
|
|
44
|
+
return p.split(":")[0].split("=")[0].trim();
|
|
45
|
+
if (language === "go")
|
|
46
|
+
return p.split(/\s+/)[0];
|
|
47
|
+
return p.split(":")[0].replace(/^(const|let|var|mut|&)\s+/, "").trim();
|
|
48
|
+
})
|
|
49
|
+
.filter(Boolean);
|
|
50
|
+
return params.length > 0 ? params.join(", ") : null;
|
|
51
|
+
}
|
|
52
|
+
function extractReturnType(signature, language) {
|
|
53
|
+
if (language === "typescript" || language === "javascript") {
|
|
54
|
+
// function foo(): ReturnType
|
|
55
|
+
const m = signature.match(/\)\s*:\s*([^{=]+)/);
|
|
56
|
+
return m ? m[1].trim() : null;
|
|
57
|
+
}
|
|
58
|
+
if (language === "python") {
|
|
59
|
+
const m = signature.match(/->\s*(.+?)(?:\s*:)?\s*$/);
|
|
60
|
+
return m ? m[1].trim() : null;
|
|
61
|
+
}
|
|
62
|
+
if (language === "go") {
|
|
63
|
+
// func foo() (ReturnType, error)
|
|
64
|
+
const m = signature.match(/\)\s*(?:\(([^)]+)\)|(\w+(?:\.\w+)?))\s*\{?$/);
|
|
65
|
+
return m ? (m[1] || m[2])?.trim() || null : null;
|
|
66
|
+
}
|
|
67
|
+
if (language === "rust") {
|
|
68
|
+
const m = signature.match(/->\s*(.+?)\s*(?:\{|where)/);
|
|
69
|
+
return m ? m[1].trim() : null;
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
function extractDocstring(content, language) {
|
|
74
|
+
const lines = content.split("\n");
|
|
75
|
+
if (language === "python") {
|
|
76
|
+
// Look for triple-quote docstring
|
|
77
|
+
for (let i = 1; i < Math.min(lines.length, 5); i++) {
|
|
78
|
+
const trimmed = lines[i].trim();
|
|
79
|
+
if (trimmed.startsWith('"""') || trimmed.startsWith("'''")) {
|
|
80
|
+
const doc = trimmed.replace(/^["']{3}/, "").replace(/["']{3}$/, "").trim();
|
|
81
|
+
if (doc)
|
|
82
|
+
return doc;
|
|
83
|
+
// Multi-line docstring
|
|
84
|
+
if (i + 1 < lines.length)
|
|
85
|
+
return lines[i + 1].trim();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// JSDoc or block comment before the definition
|
|
90
|
+
if (lines[0].trim().startsWith("/**") || lines[0].trim().startsWith("///")) {
|
|
91
|
+
for (let i = 0; i < Math.min(lines.length, 5); i++) {
|
|
92
|
+
const clean = lines[i]
|
|
93
|
+
.trim()
|
|
94
|
+
.replace(/^\/\*\*\s*/, "")
|
|
95
|
+
.replace(/^\*\s*/, "")
|
|
96
|
+
.replace(/^\/\/\/\s*/, "")
|
|
97
|
+
.replace(/\*\/$/, "")
|
|
98
|
+
.trim();
|
|
99
|
+
if (clean && !clean.startsWith("@"))
|
|
100
|
+
return clean;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Single-line comment right after definition
|
|
104
|
+
if (lines.length > 1) {
|
|
105
|
+
const secondLine = lines[1].trim();
|
|
106
|
+
if (secondLine.startsWith("//") || secondLine.startsWith("#")) {
|
|
107
|
+
return secondLine.replace(/^\/\/\s*/, "").replace(/^#\s*/, "").trim() || null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=describer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"describer.js","sourceRoot":"","sources":["../../../src/indexer/describer.ts"],"names":[],"mappings":"AAEA,4DAA4D;AAC5D,gEAAgE;AAChE,+DAA+D;AAE/D,MAAM,UAAU,aAAa,CAC3B,KAAkB,EAClB,QAAgB,EAChB,QAAgB;IAEhB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;IAEpE,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,KAAK,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC;IAC5C,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,MAAM,QAAQ,EAAE,CAAC,CAAC;IAE7B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,IAAI,EAAE,CAAC;QACtD,wCAAwC;QACxC,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,CAAC,IAAI,CAAC,oBAAoB,MAAM,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,UAAU,GAAG,iBAAiB,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChE,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,qDAAqD;IACrD,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC5D,IAAI,SAAS,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,aAAa,CACpB,SAAiB,EACjB,QAAgB;IAEhB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACzC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAEpC,0BAA0B;IAC1B,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;SAChB,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,MAAM,CAAC;SAC/D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,+CAA+C;QAC/C,IAAI,QAAQ,KAAK,QAAQ;YAAE,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvE,IAAI,QAAQ,KAAK,IAAI;YAAE,OAAO,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACzE,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACtD,CAAC;AAED,SAAS,iBAAiB,CACxB,SAAiB,EACjB,QAAgB;IAEhB,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC3D,6BAA6B;QAC7B,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACrD,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACtB,iCAAiC;QACjC,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;QACzE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACnD,CAAC;IACD,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QACvD,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAgB;IACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1B,kCAAkC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAChC,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC3E,IAAI,GAAG;oBAAE,OAAO,GAAG,CAAC;gBACpB,uBAAuB;gBACvB,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM;oBAAE,OAAO,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,+CAA+C;IAC/C,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC;iBACnB,IAAI,EAAE;iBACN,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;iBACzB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;iBACrB,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;iBACzB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;iBACpB,IAAI,EAAE,CAAC;YACV,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;QACpD,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACnC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9D,OAAO,UAAU,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;QAChF,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { log, logError } from "../utils/logger.js";
|
|
5
|
+
const MODEL_DIR = join(homedir(), ".sverklo", "models");
|
|
6
|
+
const BUNDLED_MODEL_DIR = join(import.meta.dirname ?? ".", "..", "..", "models");
|
|
7
|
+
const MODEL_DIM = 384;
|
|
8
|
+
const MAX_SEQ_LEN = 128; // Short sequences for code chunks — speed over completeness
|
|
9
|
+
const CLS_TOKEN = 101;
|
|
10
|
+
const SEP_TOKEN = 102;
|
|
11
|
+
const PAD_TOKEN = 0;
|
|
12
|
+
const UNK_TOKEN = 100;
|
|
13
|
+
let ort = null;
|
|
14
|
+
let session = null;
|
|
15
|
+
let vocab = null;
|
|
16
|
+
let initialized = false;
|
|
17
|
+
export async function initEmbedder() {
|
|
18
|
+
if (initialized)
|
|
19
|
+
return;
|
|
20
|
+
// Find model files
|
|
21
|
+
const modelPath = findFile("model.onnx");
|
|
22
|
+
const tokenizerPath = findFile("tokenizer.json");
|
|
23
|
+
if (!modelPath || !tokenizerPath) {
|
|
24
|
+
logError("Model files not found. Falling back to lightweight embeddings.\n" +
|
|
25
|
+
"For better search quality, place model.onnx and tokenizer.json in ~/.sverklo/models/");
|
|
26
|
+
initialized = true;
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
try {
|
|
30
|
+
// Load ONNX runtime
|
|
31
|
+
ort = await import("onnxruntime-node");
|
|
32
|
+
// Load model
|
|
33
|
+
session = await ort.InferenceSession.create(modelPath, {
|
|
34
|
+
executionProviders: ["cpu"],
|
|
35
|
+
graphOptimizationLevel: "all",
|
|
36
|
+
intraOpNumThreads: 2,
|
|
37
|
+
});
|
|
38
|
+
log(`ONNX model loaded: ${modelPath}`);
|
|
39
|
+
log(`Input names: ${session.inputNames}, Output names: ${session.outputNames}`);
|
|
40
|
+
// Load tokenizer vocab
|
|
41
|
+
const tokenizer = JSON.parse(readFileSync(tokenizerPath, "utf-8"));
|
|
42
|
+
vocab = new Map(Object.entries(tokenizer.model.vocab));
|
|
43
|
+
log(`Tokenizer loaded: ${vocab.size} tokens`);
|
|
44
|
+
initialized = true;
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
logError("Failed to initialize ONNX embedder", err);
|
|
48
|
+
session = null;
|
|
49
|
+
vocab = null;
|
|
50
|
+
initialized = true;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function findFile(filename) {
|
|
54
|
+
// Check ~/.sverklo/models/ first, then bundled
|
|
55
|
+
const userPath = join(MODEL_DIR, filename);
|
|
56
|
+
if (existsSync(userPath))
|
|
57
|
+
return userPath;
|
|
58
|
+
const bundledPath = join(BUNDLED_MODEL_DIR, filename);
|
|
59
|
+
if (existsSync(bundledPath))
|
|
60
|
+
return bundledPath;
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
// ── WordPiece Tokenizer ─────────────────────────────────────────────
|
|
64
|
+
function tokenize(text) {
|
|
65
|
+
if (!vocab) {
|
|
66
|
+
return fallbackTokenize(text);
|
|
67
|
+
}
|
|
68
|
+
// BERT-style preprocessing: lowercase, strip accents
|
|
69
|
+
const cleaned = text.toLowerCase().replace(/[\u0300-\u036f]/g, "");
|
|
70
|
+
// Split on whitespace and punctuation (BertPreTokenizer behavior)
|
|
71
|
+
const words = cleaned.split(/(\s+|[^\w\s])/g).filter((w) => w.trim());
|
|
72
|
+
const tokens = [CLS_TOKEN];
|
|
73
|
+
for (const word of words) {
|
|
74
|
+
if (tokens.length >= MAX_SEQ_LEN - 1)
|
|
75
|
+
break;
|
|
76
|
+
const subTokens = wordPieceTokenize(word.trim());
|
|
77
|
+
for (const st of subTokens) {
|
|
78
|
+
if (tokens.length >= MAX_SEQ_LEN - 1)
|
|
79
|
+
break;
|
|
80
|
+
tokens.push(st);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
tokens.push(SEP_TOKEN);
|
|
84
|
+
// Pad to MAX_SEQ_LEN
|
|
85
|
+
const inputIds = new Array(MAX_SEQ_LEN).fill(PAD_TOKEN);
|
|
86
|
+
const attentionMask = new Array(MAX_SEQ_LEN).fill(0);
|
|
87
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
88
|
+
inputIds[i] = tokens[i];
|
|
89
|
+
attentionMask[i] = 1;
|
|
90
|
+
}
|
|
91
|
+
return { inputIds, attentionMask };
|
|
92
|
+
}
|
|
93
|
+
function wordPieceTokenize(word) {
|
|
94
|
+
if (!vocab)
|
|
95
|
+
return [UNK_TOKEN];
|
|
96
|
+
const tokens = [];
|
|
97
|
+
let start = 0;
|
|
98
|
+
while (start < word.length) {
|
|
99
|
+
let end = word.length;
|
|
100
|
+
let found = false;
|
|
101
|
+
while (start < end) {
|
|
102
|
+
const substr = start === 0 ? word.slice(start, end) : "##" + word.slice(start, end);
|
|
103
|
+
const id = vocab.get(substr);
|
|
104
|
+
if (id !== undefined) {
|
|
105
|
+
tokens.push(id);
|
|
106
|
+
found = true;
|
|
107
|
+
start = end;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
end--;
|
|
111
|
+
}
|
|
112
|
+
if (!found) {
|
|
113
|
+
tokens.push(UNK_TOKEN);
|
|
114
|
+
start++;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return tokens;
|
|
118
|
+
}
|
|
119
|
+
function fallbackTokenize(text) {
|
|
120
|
+
// Hash-based token IDs when no real tokenizer is available
|
|
121
|
+
const words = text.toLowerCase().split(/\s+/).slice(0, MAX_SEQ_LEN - 2);
|
|
122
|
+
const inputIds = new Array(MAX_SEQ_LEN).fill(PAD_TOKEN);
|
|
123
|
+
const attentionMask = new Array(MAX_SEQ_LEN).fill(0);
|
|
124
|
+
inputIds[0] = CLS_TOKEN;
|
|
125
|
+
attentionMask[0] = 1;
|
|
126
|
+
for (let i = 0; i < words.length; i++) {
|
|
127
|
+
let hash = 0;
|
|
128
|
+
for (let j = 0; j < words[i].length; j++) {
|
|
129
|
+
hash = ((hash << 5) - hash + words[i].charCodeAt(j)) | 0;
|
|
130
|
+
}
|
|
131
|
+
inputIds[i + 1] = (Math.abs(hash) % 30000) + 1000;
|
|
132
|
+
attentionMask[i + 1] = 1;
|
|
133
|
+
}
|
|
134
|
+
inputIds[words.length + 1] = SEP_TOKEN;
|
|
135
|
+
attentionMask[words.length + 1] = 1;
|
|
136
|
+
return { inputIds, attentionMask };
|
|
137
|
+
}
|
|
138
|
+
// ── ONNX Inference ──────────────────────────────────────────────────
|
|
139
|
+
export async function embed(texts) {
|
|
140
|
+
if (!session || !ort) {
|
|
141
|
+
return fallbackEmbed(texts);
|
|
142
|
+
}
|
|
143
|
+
const results = [];
|
|
144
|
+
// Process in small batches for memory efficiency
|
|
145
|
+
const BATCH = 16;
|
|
146
|
+
for (let b = 0; b < texts.length; b += BATCH) {
|
|
147
|
+
const batch = texts.slice(b, b + BATCH);
|
|
148
|
+
const batchSize = batch.length;
|
|
149
|
+
// Tokenize batch
|
|
150
|
+
const allInputIds = new BigInt64Array(batchSize * MAX_SEQ_LEN);
|
|
151
|
+
const allAttentionMask = new BigInt64Array(batchSize * MAX_SEQ_LEN);
|
|
152
|
+
for (let i = 0; i < batchSize; i++) {
|
|
153
|
+
const { inputIds, attentionMask } = tokenize(batch[i]);
|
|
154
|
+
for (let j = 0; j < MAX_SEQ_LEN; j++) {
|
|
155
|
+
allInputIds[i * MAX_SEQ_LEN + j] = BigInt(inputIds[j]);
|
|
156
|
+
allAttentionMask[i * MAX_SEQ_LEN + j] = BigInt(attentionMask[j]);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Create tensors
|
|
160
|
+
const inputIdsTensor = new ort.Tensor("int64", allInputIds, [batchSize, MAX_SEQ_LEN]);
|
|
161
|
+
const attentionMaskTensor = new ort.Tensor("int64", allAttentionMask, [batchSize, MAX_SEQ_LEN]);
|
|
162
|
+
// Also need token_type_ids (all zeros for single-sentence)
|
|
163
|
+
const tokenTypeIds = new BigInt64Array(batchSize * MAX_SEQ_LEN); // all zeros
|
|
164
|
+
const tokenTypeTensor = new ort.Tensor("int64", tokenTypeIds, [batchSize, MAX_SEQ_LEN]);
|
|
165
|
+
// Run inference
|
|
166
|
+
const feeds = {
|
|
167
|
+
input_ids: inputIdsTensor,
|
|
168
|
+
attention_mask: attentionMaskTensor,
|
|
169
|
+
};
|
|
170
|
+
// Add token_type_ids if the model expects it
|
|
171
|
+
if (session.inputNames.includes("token_type_ids")) {
|
|
172
|
+
feeds.token_type_ids = tokenTypeTensor;
|
|
173
|
+
}
|
|
174
|
+
const output = await session.run(feeds);
|
|
175
|
+
// Get the output — usually "last_hidden_state" or the first output
|
|
176
|
+
const outputName = session.outputNames[0];
|
|
177
|
+
const outputData = output[outputName].data;
|
|
178
|
+
const outputDims = output[outputName].dims; // [batchSize, seqLen, hiddenSize]
|
|
179
|
+
const hiddenSize = outputDims[outputDims.length - 1];
|
|
180
|
+
const seqLen = outputDims.length === 3 ? outputDims[1] : MAX_SEQ_LEN;
|
|
181
|
+
// Mean pooling with attention mask
|
|
182
|
+
for (let i = 0; i < batchSize; i++) {
|
|
183
|
+
const { attentionMask } = tokenize(batch[i]);
|
|
184
|
+
const pooled = new Float32Array(hiddenSize);
|
|
185
|
+
let maskSum = 0;
|
|
186
|
+
for (let t = 0; t < seqLen; t++) {
|
|
187
|
+
const mask = attentionMask[t] || 0;
|
|
188
|
+
maskSum += mask;
|
|
189
|
+
if (mask === 0)
|
|
190
|
+
continue;
|
|
191
|
+
for (let d = 0; d < hiddenSize; d++) {
|
|
192
|
+
pooled[d] += outputData[i * seqLen * hiddenSize + t * hiddenSize + d] * mask;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (maskSum > 0) {
|
|
196
|
+
for (let d = 0; d < hiddenSize; d++) {
|
|
197
|
+
pooled[d] /= maskSum;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// L2 normalize
|
|
201
|
+
let norm = 0;
|
|
202
|
+
for (let d = 0; d < hiddenSize; d++)
|
|
203
|
+
norm += pooled[d] * pooled[d];
|
|
204
|
+
norm = Math.sqrt(norm);
|
|
205
|
+
if (norm > 0) {
|
|
206
|
+
for (let d = 0; d < hiddenSize; d++)
|
|
207
|
+
pooled[d] /= norm;
|
|
208
|
+
}
|
|
209
|
+
results.push(pooled);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return results;
|
|
213
|
+
}
|
|
214
|
+
// Fallback when ONNX is not available
|
|
215
|
+
function fallbackEmbed(texts) {
|
|
216
|
+
return texts.map((text) => {
|
|
217
|
+
const vec = new Float32Array(MODEL_DIM);
|
|
218
|
+
const words = text.toLowerCase().split(/\s+/);
|
|
219
|
+
for (let w = 0; w < words.length; w++) {
|
|
220
|
+
const word = words[w];
|
|
221
|
+
for (let i = 0; i < word.length; i++) {
|
|
222
|
+
const charCode = word.charCodeAt(i);
|
|
223
|
+
for (let d = 0; d < MODEL_DIM; d++) {
|
|
224
|
+
vec[d] +=
|
|
225
|
+
Math.sin(charCode * (d + 1) * 0.01 + w * 0.1) *
|
|
226
|
+
(1 / (1 + w * 0.1));
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// L2 normalize
|
|
231
|
+
let norm = 0;
|
|
232
|
+
for (let i = 0; i < MODEL_DIM; i++)
|
|
233
|
+
norm += vec[i] * vec[i];
|
|
234
|
+
norm = Math.sqrt(norm);
|
|
235
|
+
if (norm > 0) {
|
|
236
|
+
for (let i = 0; i < MODEL_DIM; i++)
|
|
237
|
+
vec[i] /= norm;
|
|
238
|
+
}
|
|
239
|
+
return vec;
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
export function cosineSimilarity(a, b) {
|
|
243
|
+
let dot = 0;
|
|
244
|
+
for (let i = 0; i < a.length; i++)
|
|
245
|
+
dot += a[i] * b[i];
|
|
246
|
+
return dot; // vectors are already normalized
|
|
247
|
+
}
|
|
248
|
+
export const EMBEDDING_DIM = MODEL_DIM;
|
|
249
|
+
//# sourceMappingURL=embedder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/indexer/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AACxD,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;AAEjF,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,WAAW,GAAG,GAAG,CAAC,CAAC,4DAA4D;AACrF,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,SAAS,GAAG,GAAG,CAAC;AACtB,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,OAAO,GAAQ,IAAI,CAAC;AACxB,IAAI,KAAK,GAA+B,IAAI,CAAC;AAC7C,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,WAAW;QAAE,OAAO;IAExB,mBAAmB;IACnB,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;IACzC,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,CAAC,aAAa,EAAE,CAAC;QACjC,QAAQ,CACN,kEAAkE;YAChE,sFAAsF,CACzF,CAAC;QACF,WAAW,GAAG,IAAI,CAAC;QACnB,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,oBAAoB;QACpB,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAEvC,aAAa;QACb,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,EAAE;YACrD,kBAAkB,EAAE,CAAC,KAAK,CAAC;YAC3B,sBAAsB,EAAE,KAAK;YAC7B,iBAAiB,EAAE,CAAC;SACrB,CAAC,CAAC;QAEH,GAAG,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAC;QACvC,GAAG,CAAC,gBAAgB,OAAO,CAAC,UAAU,mBAAmB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QAEhF,uBAAuB;QACvB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,CAAC;QACnE,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAuB,CAAC,CAAC;QAE7E,GAAG,CAAC,qBAAqB,KAAK,CAAC,IAAI,SAAS,CAAC,CAAC;QAC9C,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,oCAAoC,EAAE,GAAG,CAAC,CAAC;QACpD,OAAO,GAAG,IAAI,CAAC;QACf,KAAK,GAAG,IAAI,CAAC;QACb,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,QAAgB;IAChC,+CAA+C;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC3C,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;IACtD,IAAI,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,WAAW,CAAC;IAChD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,uEAAuE;AAEvE,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,qDAAqD;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IAEnE,kEAAkE;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAa,CAAC,SAAS,CAAC,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW,GAAG,CAAC;YAAE,MAAM;QAE5C,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;YAC3B,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW,GAAG,CAAC;gBAAE,MAAM;YAC5C,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAEvB,qBAAqB;IACrB,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAErD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,QAAQ,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,IAAI,CAAC,KAAK;QAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,KAAK,GAAG,KAAK,CAAC;QAElB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACpF,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAE7B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAChB,KAAK,GAAG,IAAI,CAAC;gBACb,KAAK,GAAG,GAAG,CAAC;gBACZ,MAAM;YACR,CAAC;YACD,GAAG,EAAE,CAAC;QACR,CAAC;QAED,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,KAAK,EAAE,CAAC;QACV,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,2DAA2D;IAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC;IACxE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAErD,QAAQ,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;IACxB,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,CAAC;QACD,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC;QAClD,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3B,CAAC;IAED,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;IACvC,aAAa,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAEpC,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AACrC,CAAC;AAED,uEAAuE;AAEvE,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,KAAe;IACzC,IAAI,CAAC,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC;QACrB,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,iDAAiD;IACjD,MAAM,KAAK,GAAG,EAAE,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;QAC7C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;QAE/B,iBAAiB;QACjB,MAAM,WAAW,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;QAC/D,MAAM,gBAAgB,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;QAEpE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;gBACrC,WAAW,CAAC,CAAC,GAAG,WAAW,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;gBACvD,gBAAgB,CAAC,CAAC,GAAG,WAAW,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;YACnE,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QACtF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QAEhG,2DAA2D;QAC3D,MAAM,YAAY,GAAG,IAAI,aAAa,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,YAAY;QAC7E,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;QAExF,gBAAgB;QAChB,MAAM,KAAK,GAAwB;YACjC,SAAS,EAAE,cAAc;YACzB,cAAc,EAAE,mBAAmB;SACpC,CAAC;QAEF,6CAA6C;QAC7C,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAClD,KAAK,CAAC,cAAc,GAAG,eAAe,CAAC;QACzC,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAExC,mEAAmE;QACnE,MAAM,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,IAAoB,CAAC;QAC3D,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,kCAAkC;QAC9E,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QAErE,mCAAmC;QACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,EAAE,aAAa,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,CAAC;YAC5C,IAAI,OAAO,GAAG,CAAC,CAAC;YAEhB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACnC,OAAO,IAAI,IAAI,CAAC;gBAChB,IAAI,IAAI,KAAK,CAAC;oBAAE,SAAS;gBAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;oBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,GAAG,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,UAAU,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;gBAC/E,CAAC;YACH,CAAC;YAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;oBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC;gBACvB,CAAC;YACH,CAAC;YAED,eAAe;YACf,IAAI,IAAI,GAAG,CAAC,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE;gBAAE,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACnE,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;gBACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE;oBAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;YACzD,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,sCAAsC;AACtC,SAAS,aAAa,CAAC,KAAe;IACpC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,GAAG,CAAC,CAAC,CAAC;wBACJ,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,GAAG,CAAC;4BAC7C,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;QACH,CAAC;QAED,eAAe;QACf,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;YAAE,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC5D,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;QACrD,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC,CAAC,iCAAiC;AAC/C,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,SAAS,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Ignore } from "ignore";
|
|
2
|
+
export interface DiscoveredFile {
|
|
3
|
+
absolutePath: string;
|
|
4
|
+
relativePath: string;
|
|
5
|
+
language: string;
|
|
6
|
+
lastModified: number;
|
|
7
|
+
sizeBytes: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function discoverFiles(rootPath: string, ignoreFilter: Ignore): DiscoveredFile[];
|