inbed 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +108 -0
- package/demo.js +29 -0
- package/dist/Inbed.js +247 -0
- package/dist/demo.js +22 -0
- package/dist/embeddings/Embedder.js +1 -0
- package/dist/embeddings/LocalEmbedder.js +15 -0
- package/dist/embeddings/Ollama.js +52 -0
- package/dist/embeddings/OpenAI.js +50 -0
- package/dist/embeddings/OpenRouter.js +84 -0
- package/dist/index.js +5 -0
- package/dist/types.js +1 -0
- package/dist/utils.js +12 -0
- package/johankit.yml +6 -0
- package/package.json +30 -0
- package/src/Inbed.ts +268 -0
- package/src/embeddings/Embedder.ts +7 -0
- package/src/embeddings/Ollama.ts +69 -0
- package/src/embeddings/OpenAI.ts +58 -0
- package/src/embeddings/OpenRouter.ts +100 -0
- package/src/index.ts +6 -0
- package/src/types.ts +28 -0
- package/src/utils.ts +14 -0
- package/tsconfig.json +15 -0
package/README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Inbed
|
|
2
|
+
|
|
3
|
+
**Inbed** is a TypeScript library for **semantic indexing and retrieval of source code**, built to map **natural language intent** to **real codebases**.
|
|
4
|
+
|
|
5
|
+
It helps LLMs and developer tools find the **right files and code snippets** for a given user request.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What it’s for
|
|
10
|
+
|
|
11
|
+
- 🔍 Semantic code search
|
|
12
|
+
- 🤖 LLM / RAG context retrieval
|
|
13
|
+
- 🧠 AI copilots & agents
|
|
14
|
+
- 🧭 Navigating large or legacy repos
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install inbed
|
|
22
|
+
````
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Basic Usage
|
|
27
|
+
|
|
28
|
+
### Create an embedder
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
import { OpenAIEmbedder } from 'inbed';
|
|
32
|
+
|
|
33
|
+
const embedder = new OpenAIEmbedder(
|
|
34
|
+
process.env.OPENAI_API_KEY!
|
|
35
|
+
);
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Initialize Inbed
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { Inbed } from 'inbed';
|
|
42
|
+
|
|
43
|
+
const inbed = new Inbed(embedder, {
|
|
44
|
+
rootDir: process.cwd(),
|
|
45
|
+
fileExtensions: ['.ts', '.js'],
|
|
46
|
+
ignorePatterns: ['dist/**', 'node_modules/**']
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
await inbed.load();
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Semantic Search
|
|
55
|
+
|
|
56
|
+
```ts
|
|
57
|
+
const results = await inbed.semanticSearch(
|
|
58
|
+
'where cache is written to disk',
|
|
59
|
+
5
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
results.forEach(r => {
|
|
63
|
+
console.log(r.path, r.score);
|
|
64
|
+
});
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Returns the most relevant files and snippets for the query.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Common Pattern: Prompt → Files
|
|
72
|
+
|
|
73
|
+
```ts
|
|
74
|
+
async function selectRelevantFiles(prompt: string) {
|
|
75
|
+
const results = await inbed.semanticSearch(prompt, 5);
|
|
76
|
+
return results.map(r => r.path);
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Use this to feed only the necessary code into an LLM prompt.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## How it works (short)
|
|
85
|
+
|
|
86
|
+
1. Scans the project
|
|
87
|
+
2. Splits files into chunks (AST-aware for TS)
|
|
88
|
+
3. Generates embeddings (cached locally)
|
|
89
|
+
4. Watches files for changes
|
|
90
|
+
5. Searches by vector similarity
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Embedding Providers
|
|
95
|
+
|
|
96
|
+
* OpenAI
|
|
97
|
+
* Ollama (local)
|
|
98
|
+
* OpenRouter
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## What Inbed is not
|
|
103
|
+
|
|
104
|
+
* A static analyzer
|
|
105
|
+
* A refactor engine
|
|
106
|
+
* A bug detector
|
|
107
|
+
|
|
108
|
+
It retrieves **relevant context** — the LLM does the reasoning.
|
package/demo.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { OpenRouterEmbedder } from './dist/embeddings/OpenRouter.js';
|
|
2
|
+
import { Inbed } from './dist/Inbed.js';
|
|
3
|
+
|
|
4
|
+
import 'dotenv/config';
|
|
5
|
+
|
|
6
|
+
async function demo() {
|
|
7
|
+
try {
|
|
8
|
+
// const embedder = new OllamaEmbedder('https://ollama.johan.chat', 'mxbai-embed-large');
|
|
9
|
+
const embedder = new OpenRouterEmbedder(process.env.OPENAI_API_KEY, 'mistralai/codestral-embed-2505');
|
|
10
|
+
|
|
11
|
+
const project = new Inbed(embedder, { rootDir: './' });
|
|
12
|
+
await project.load();
|
|
13
|
+
|
|
14
|
+
console.log('Loaded files:', project.listFiles().map(f => f.path));
|
|
15
|
+
|
|
16
|
+
const results = await project.semanticSearch('demo');
|
|
17
|
+
console.log('Semantic search results:');
|
|
18
|
+
results.forEach(result => {
|
|
19
|
+
console.log(result)
|
|
20
|
+
console.log(`- ${result.path}: ${result.score.toFixed(4)}`);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
} catch (error) {
|
|
24
|
+
console.error('Demo failed:', error);
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
demo();
|
package/dist/Inbed.js
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import crypto from 'crypto';
|
|
4
|
+
import chokidar from 'chokidar';
|
|
5
|
+
import { glob } from 'glob';
|
|
6
|
+
import ts from 'typescript';
|
|
7
|
+
export class Inbed {
|
|
8
|
+
constructor(embedder, options) {
|
|
9
|
+
this.files = new Map();
|
|
10
|
+
// concurrency
|
|
11
|
+
this.embeddingQueue = [];
|
|
12
|
+
this.maxConcurrentEmbeds = 4;
|
|
13
|
+
// watcher debounce
|
|
14
|
+
this.debounceMap = new Map();
|
|
15
|
+
this.options = {
|
|
16
|
+
recursive: true,
|
|
17
|
+
fileExtensions: ['.ts', '.js'],
|
|
18
|
+
ignorePatterns: ['johankit.yaml', 'dist/**', 'node_modules/**', '.git/**', '.inbed/**', ...(options.ignorePatterns || [])],
|
|
19
|
+
maxDepth: 10,
|
|
20
|
+
...options
|
|
21
|
+
};
|
|
22
|
+
this.embedder = embedder;
|
|
23
|
+
this.chunkLimit = options.chunkLimit || 10;
|
|
24
|
+
this.storageDir = path.join(this.options.rootDir, '.inbed');
|
|
25
|
+
if (!fs.existsSync(this.storageDir))
|
|
26
|
+
fs.mkdirSync(this.storageDir, { recursive: true });
|
|
27
|
+
}
|
|
28
|
+
async load() {
|
|
29
|
+
const patterns = this.options.fileExtensions.map(ext => `**/*${ext}`);
|
|
30
|
+
const files = patterns.flatMap(pattern => glob.sync(pattern, { cwd: this.options.rootDir, ignore: this.options.ignorePatterns, nodir: true }));
|
|
31
|
+
for (const filePath of files) {
|
|
32
|
+
const fullPath = path.join(this.options.rootDir, filePath);
|
|
33
|
+
try {
|
|
34
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
35
|
+
await this.enqueueAdd(path.normalize(filePath), content);
|
|
36
|
+
}
|
|
37
|
+
catch { }
|
|
38
|
+
}
|
|
39
|
+
await this.flushQueue();
|
|
40
|
+
if (this.options.recursive) {
|
|
41
|
+
for (const file of Array.from(this.files.values())) {
|
|
42
|
+
this.resolveImports(file, 0);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
this.watchFiles();
|
|
46
|
+
}
|
|
47
|
+
hashContent(content) {
|
|
48
|
+
const version = 'chunker:v2';
|
|
49
|
+
return crypto.createHash('sha256').update(version + content + this.chunkLimit + this.embedder.model, 'utf-8').digest('hex');
|
|
50
|
+
}
|
|
51
|
+
normalize(vec) {
|
|
52
|
+
let norm = 0;
|
|
53
|
+
for (const v of vec)
|
|
54
|
+
norm += v * v;
|
|
55
|
+
norm = Math.sqrt(norm);
|
|
56
|
+
if (norm === 0)
|
|
57
|
+
return vec;
|
|
58
|
+
return vec.map(v => v / norm);
|
|
59
|
+
}
|
|
60
|
+
async enqueueAdd(filePath, content) {
|
|
61
|
+
const task = async () => {
|
|
62
|
+
await this.addFile(filePath, content);
|
|
63
|
+
};
|
|
64
|
+
while (this.embeddingQueue.length >= this.maxConcurrentEmbeds) {
|
|
65
|
+
await Promise.race(this.embeddingQueue);
|
|
66
|
+
}
|
|
67
|
+
const p = task().finally(() => {
|
|
68
|
+
this.embeddingQueue = this.embeddingQueue.filter(x => x !== p);
|
|
69
|
+
});
|
|
70
|
+
this.embeddingQueue.push(p);
|
|
71
|
+
}
|
|
72
|
+
async flushQueue() {
|
|
73
|
+
await Promise.allSettled(this.embeddingQueue);
|
|
74
|
+
}
|
|
75
|
+
async addFile(filePath, content) {
|
|
76
|
+
const storagePath = path.join(this.storageDir, `${filePath.replace(/\\/g, '_')}_${this.embedder.model}.json`);
|
|
77
|
+
let embedding;
|
|
78
|
+
const currentHash = this.hashContent(content);
|
|
79
|
+
if (fs.existsSync(storagePath)) {
|
|
80
|
+
try {
|
|
81
|
+
const saved = JSON.parse(fs.readFileSync(storagePath, 'utf-8'));
|
|
82
|
+
if (saved.hash === currentHash)
|
|
83
|
+
embedding = saved.embedding;
|
|
84
|
+
}
|
|
85
|
+
catch { }
|
|
86
|
+
}
|
|
87
|
+
let chunks = this.isSmall(content) ? [content] : this.chunkByAST(content);
|
|
88
|
+
if (chunks.length > this.chunkLimit)
|
|
89
|
+
chunks = chunks.slice(0, this.chunkLimit);
|
|
90
|
+
if (!embedding) {
|
|
91
|
+
const raw = await this.embedder.embed({ path: filePath, content, chunks, imports: [] });
|
|
92
|
+
embedding = raw.map(v => this.normalize(v));
|
|
93
|
+
try {
|
|
94
|
+
fs.writeFileSync(storagePath, JSON.stringify({ hash: currentHash, embedding }, null, 2));
|
|
95
|
+
}
|
|
96
|
+
catch { }
|
|
97
|
+
}
|
|
98
|
+
this.files.set(filePath, { path: filePath, content, chunks, imports: [], embedding });
|
|
99
|
+
}
|
|
100
|
+
isSmall(content) {
|
|
101
|
+
return content.split('\n').length < 50;
|
|
102
|
+
}
|
|
103
|
+
chunkByAST(content) {
|
|
104
|
+
try {
|
|
105
|
+
const sourceFile = ts.createSourceFile('temp.ts', content, ts.ScriptTarget.Latest);
|
|
106
|
+
const chunks = [];
|
|
107
|
+
let buffer = '';
|
|
108
|
+
ts.forEachChild(sourceFile, node => {
|
|
109
|
+
const text = node.getFullText(sourceFile).trim();
|
|
110
|
+
if (!text)
|
|
111
|
+
return;
|
|
112
|
+
if ((buffer + text).length > 800) {
|
|
113
|
+
chunks.push(buffer);
|
|
114
|
+
buffer = text;
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
buffer += '\n' + text;
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
if (buffer.trim())
|
|
121
|
+
chunks.push(buffer.trim());
|
|
122
|
+
return chunks.length ? chunks : [content];
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
return [content];
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
resolveImports(file, depth) {
|
|
129
|
+
if (depth > (this.options.maxDepth || 10))
|
|
130
|
+
return;
|
|
131
|
+
const imports = [];
|
|
132
|
+
// captura import e require
|
|
133
|
+
const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
|
|
134
|
+
let match;
|
|
135
|
+
while ((match = importRegex.exec(file.content)) !== null) {
|
|
136
|
+
const sourceImport = match[1] || match[2];
|
|
137
|
+
if (!sourceImport)
|
|
138
|
+
continue;
|
|
139
|
+
const base = path.join(path.dirname(file.path), sourceImport);
|
|
140
|
+
for (const ext of ['', '.ts', '.tsx', '.js', '/index.ts', '/index.js']) {
|
|
141
|
+
const candidate = path.normalize(base + ext);
|
|
142
|
+
if (this.files.has(candidate)) {
|
|
143
|
+
imports.push({ source: sourceImport, resolved: candidate });
|
|
144
|
+
// recursão
|
|
145
|
+
this.resolveImports(this.files.get(candidate), depth + 1);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// deduplicação
|
|
151
|
+
file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
|
|
152
|
+
}
|
|
153
|
+
async upsertFile(filePath, content) {
|
|
154
|
+
await this.enqueueAdd(path.normalize(filePath), content);
|
|
155
|
+
}
|
|
156
|
+
getFile(filePath) {
|
|
157
|
+
return this.files.get(path.normalize(filePath));
|
|
158
|
+
}
|
|
159
|
+
listFiles() {
|
|
160
|
+
return Array.from(this.files.values());
|
|
161
|
+
}
|
|
162
|
+
async semanticSearch(query, topK = 5) {
|
|
163
|
+
const queryEmbedding = this.normalize(await this.embedder.embedQuery(query));
|
|
164
|
+
const results = [];
|
|
165
|
+
for (const file of this.files.values()) {
|
|
166
|
+
if (!file.embedding)
|
|
167
|
+
continue;
|
|
168
|
+
let bestScore = 0;
|
|
169
|
+
let bestChunk = '';
|
|
170
|
+
for (let i = 0; i < file.embedding.length; i++) {
|
|
171
|
+
const score = this.cosineSimilarity(queryEmbedding, file.embedding[i]);
|
|
172
|
+
if (score > bestScore) {
|
|
173
|
+
bestScore = score;
|
|
174
|
+
bestChunk = file.chunks[i] || '';
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
if (bestScore > 0) {
|
|
178
|
+
const importsText = Array.isArray(file.imports)
|
|
179
|
+
? file.imports
|
|
180
|
+
.map((imp) => typeof imp === 'string' ? imp : `${imp.source} -> ${imp.resolved}`)
|
|
181
|
+
.join('\n')
|
|
182
|
+
: '';
|
|
183
|
+
const context = `FILE: ${file.path}
|
|
184
|
+
IMPORTS:
|
|
185
|
+
${importsText || '(none)'}
|
|
186
|
+
---
|
|
187
|
+
${bestChunk}`;
|
|
188
|
+
results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
192
|
+
}
|
|
193
|
+
cosineSimilarity(a, b) {
|
|
194
|
+
let dot = 0;
|
|
195
|
+
const len = Math.min(a.length, b.length);
|
|
196
|
+
for (let i = 0; i < len; i++)
|
|
197
|
+
dot += a[i] * b[i];
|
|
198
|
+
return dot;
|
|
199
|
+
}
|
|
200
|
+
watchFiles() {
|
|
201
|
+
this.watcher = chokidar.watch(this.options.rootDir, {
|
|
202
|
+
ignored: this.options.ignorePatterns,
|
|
203
|
+
persistent: true,
|
|
204
|
+
ignoreInitial: true
|
|
205
|
+
});
|
|
206
|
+
const schedule = (fullPath, handler) => {
|
|
207
|
+
if (this.debounceMap.has(fullPath))
|
|
208
|
+
clearTimeout(this.debounceMap.get(fullPath));
|
|
209
|
+
this.debounceMap.set(fullPath, setTimeout(handler, 200));
|
|
210
|
+
};
|
|
211
|
+
this.watcher
|
|
212
|
+
.on('add', fullPath => schedule(fullPath, async () => {
|
|
213
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
214
|
+
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
215
|
+
try {
|
|
216
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
217
|
+
await this.enqueueAdd(relPath, content);
|
|
218
|
+
}
|
|
219
|
+
catch { }
|
|
220
|
+
}
|
|
221
|
+
}))
|
|
222
|
+
.on('change', fullPath => schedule(fullPath, async () => {
|
|
223
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
224
|
+
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
225
|
+
try {
|
|
226
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
227
|
+
await this.enqueueAdd(relPath, content);
|
|
228
|
+
}
|
|
229
|
+
catch { }
|
|
230
|
+
}
|
|
231
|
+
}))
|
|
232
|
+
.on('unlink', fullPath => {
|
|
233
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
234
|
+
this.files.delete(relPath);
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
stopWatching() {
|
|
238
|
+
if (this.watcher) {
|
|
239
|
+
this.watcher.close();
|
|
240
|
+
this.watcher = null;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
clear() {
|
|
244
|
+
this.stopWatching();
|
|
245
|
+
this.files.clear();
|
|
246
|
+
}
|
|
247
|
+
}
|
package/dist/demo.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { OllamaEmbedder } from './embeddings/Ollama.js';
|
|
2
|
+
import { Inbed } from './Inbed.js';
|
|
3
|
+
async function demo() {
|
|
4
|
+
try {
|
|
5
|
+
const embedder = new OllamaEmbedder('https://ollama.johan.chat', 'mxbai-embed-large');
|
|
6
|
+
const project = new Inbed(embedder, { rootDir: './' });
|
|
7
|
+
await project.load();
|
|
8
|
+
console.log('Loaded files:', project.listFiles().map(f => f.path));
|
|
9
|
+
const results = await project.semanticSearch('package');
|
|
10
|
+
console.log('Semantic search results:');
|
|
11
|
+
results.forEach(result => {
|
|
12
|
+
console.log(`- ${result.path}: ${result.score.toFixed(4)}`);
|
|
13
|
+
});
|
|
14
|
+
// Clean up
|
|
15
|
+
project.clear();
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
console.error('Demo failed:', error);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
demo();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export class LocalEmbedder {
|
|
2
|
+
async embed(file) {
|
|
3
|
+
return file.chunks.map(chunk => this.stringToVector(chunk));
|
|
4
|
+
}
|
|
5
|
+
async embedQuery(query) {
|
|
6
|
+
return this.stringToVector(query);
|
|
7
|
+
}
|
|
8
|
+
stringToVector(text) {
|
|
9
|
+
const vec = [];
|
|
10
|
+
for (let i = 0; i < text.length; i++) {
|
|
11
|
+
vec.push(text.charCodeAt(i) / 255);
|
|
12
|
+
}
|
|
13
|
+
return vec;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
/**
|
|
3
|
+
* Interface InbedFile
|
|
4
|
+
* export interface InbedFile {
|
|
5
|
+
* name: string;
|
|
6
|
+
* chunks: string[];
|
|
7
|
+
* }
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Interface Embedder
|
|
11
|
+
* export interface Embedder {
|
|
12
|
+
* embed(file: InbedFile): Promise<number[][]>;
|
|
13
|
+
* embedQuery(query: string): Promise<number[]>;
|
|
14
|
+
* }
|
|
15
|
+
*/
|
|
16
|
+
export class OllamaEmbedder {
|
|
17
|
+
constructor(baseUrl = 'http://localhost:11434', model = 'mxbai-embed-large') {
|
|
18
|
+
this.baseUrl = baseUrl;
|
|
19
|
+
this.model = model;
|
|
20
|
+
}
|
|
21
|
+
async embed(file) {
|
|
22
|
+
const embeddings = [];
|
|
23
|
+
for (const chunk of file.chunks) {
|
|
24
|
+
const emb = await this.embedText(chunk);
|
|
25
|
+
if (emb && emb.length > 0)
|
|
26
|
+
embeddings.push(emb);
|
|
27
|
+
else
|
|
28
|
+
console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
29
|
+
}
|
|
30
|
+
return embeddings;
|
|
31
|
+
}
|
|
32
|
+
async embedQuery(query) {
|
|
33
|
+
return this.embedText(query);
|
|
34
|
+
}
|
|
35
|
+
async embedText(text) {
|
|
36
|
+
try {
|
|
37
|
+
const response = await axios.post(`${this.baseUrl}/api/embed`, {
|
|
38
|
+
model: this.model,
|
|
39
|
+
input: text
|
|
40
|
+
}, { timeout: 30000 });
|
|
41
|
+
if (response.data?.embeddings && response.data.embeddings.length > 0) {
|
|
42
|
+
return response.data.embeddings[0];
|
|
43
|
+
}
|
|
44
|
+
console.error('Resposta da API Ollama inesperada:', response.data);
|
|
45
|
+
return [];
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
console.error('Erro em embedText (Ollama):', error.response?.data ? JSON.stringify(error.response.data) : error.message);
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
export class OpenAIEmbedder {
|
|
3
|
+
constructor(apiKey, model = 'text-embedding-3-small') {
|
|
4
|
+
this.client = new OpenAI({ apiKey });
|
|
5
|
+
this.model = model;
|
|
6
|
+
}
|
|
7
|
+
async embed(file) {
|
|
8
|
+
const embeddings = [];
|
|
9
|
+
for (const chunk of file.chunks) {
|
|
10
|
+
try {
|
|
11
|
+
const emb = await this.embedText(chunk);
|
|
12
|
+
if (emb && emb.length > 0)
|
|
13
|
+
embeddings.push(emb);
|
|
14
|
+
else
|
|
15
|
+
console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
console.error('Error embedding chunk:', error);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return embeddings;
|
|
22
|
+
}
|
|
23
|
+
async embedQuery(query) {
|
|
24
|
+
try {
|
|
25
|
+
return await this.embedText(query);
|
|
26
|
+
}
|
|
27
|
+
catch (error) {
|
|
28
|
+
console.error('Error embedding query:', error);
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
async embedText(text) {
|
|
33
|
+
try {
|
|
34
|
+
const response = await this.client.embeddings.create({
|
|
35
|
+
model: this.model,
|
|
36
|
+
input: text,
|
|
37
|
+
});
|
|
38
|
+
// A resposta retorna embeddings no campo data[0].embedding
|
|
39
|
+
if (response.data && response.data.length > 0) {
|
|
40
|
+
return response.data[0].embedding;
|
|
41
|
+
}
|
|
42
|
+
console.error('Resposta da API de Embedding inesperada:', response);
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
console.error('Error in embedText:', error.response?.data ?? error.message);
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
/**
|
|
3
|
+
* Interface InbedFile (exemplo)
|
|
4
|
+
* export interface InbedFile {
|
|
5
|
+
* name: string;
|
|
6
|
+
* chunks: string[];
|
|
7
|
+
* }
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Interface Embedder (exemplo)
|
|
11
|
+
* export interface Embedder {
|
|
12
|
+
* embed(file: InbedFile): Promise<number[][]>;
|
|
13
|
+
* embedQuery(query: string): Promise<number[]>;
|
|
14
|
+
* }
|
|
15
|
+
*/
|
|
16
|
+
export class OpenRouterEmbedder {
|
|
17
|
+
constructor(apiKey, model) {
|
|
18
|
+
this.apiKey = apiKey;
|
|
19
|
+
this.model = model;
|
|
20
|
+
}
|
|
21
|
+
async embed(file) {
|
|
22
|
+
try {
|
|
23
|
+
const embeddings = [];
|
|
24
|
+
// Itera sobre todos os 'chunks' (pedaços) do arquivo
|
|
25
|
+
for (const chunk of file.chunks) {
|
|
26
|
+
// Gera o embedding para cada pedaço
|
|
27
|
+
const emb = await this.embedText(chunk);
|
|
28
|
+
// Adiciona o embedding resultante ao array de embeddings
|
|
29
|
+
if (emb && emb.length > 0) {
|
|
30
|
+
embeddings.push(emb);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
// Trata o caso em que embedText retorna um array vazio (erro)
|
|
34
|
+
console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return embeddings;
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
console.error('Error embedding file:', error);
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async embedQuery(query) {
|
|
45
|
+
try {
|
|
46
|
+
// Usa o mesmo método para embutir a query
|
|
47
|
+
return await this.embedText(query);
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
console.error('Error embedding query:', error);
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
async embedText(text) {
|
|
55
|
+
try {
|
|
56
|
+
const response = await axios.post('https://openrouter.ai/api/v1/embeddings', {
|
|
57
|
+
input: text,
|
|
58
|
+
model: this.model
|
|
59
|
+
}, {
|
|
60
|
+
headers: {
|
|
61
|
+
// Utiliza a chave da instância
|
|
62
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
63
|
+
'Content-Type': 'application/json',
|
|
64
|
+
},
|
|
65
|
+
timeout: 30000
|
|
66
|
+
});
|
|
67
|
+
// A resposta da OpenRouter/OpenAI retorna um array em 'data',
|
|
68
|
+
// e o primeiro objeto contém o 'embedding'
|
|
69
|
+
// Exemplo de estrutura de resposta: { "data": [{ "embedding": [0.1, 0.2, ...], ... }], ... }
|
|
70
|
+
if (response.data && response.data.data && response.data.data.length > 0) {
|
|
71
|
+
return response.data.data[0].embedding;
|
|
72
|
+
}
|
|
73
|
+
// Se a resposta for inesperada, loga e retorna array vazio
|
|
74
|
+
console.error('Resposta da API de Embedding inesperada:', response.data);
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
// Loga detalhes do erro (como o corpo da resposta da API, se disponível)
|
|
79
|
+
console.error('Error in embedText:', error.response?.data ? JSON.stringify(error.response.data) : error.message);
|
|
80
|
+
// Retorna um array vazio para que a chamada externa possa continuar
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
package/dist/index.js
ADDED
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
export function readFileSafe(filePath) {
|
|
3
|
+
try {
|
|
4
|
+
return fs.readFileSync(filePath, 'utf-8');
|
|
5
|
+
}
|
|
6
|
+
catch {
|
|
7
|
+
return null;
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
export function isIgnored(filePath, ignorePatterns) {
|
|
11
|
+
return ignorePatterns.some(pattern => filePath.includes(pattern));
|
|
12
|
+
}
|
package/johankit.yml
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "inbed",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "Semantic indexing and search over source code using vector embeddings",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"start": "node dist/index.js",
|
|
10
|
+
"demo": "node demo.js"
|
|
11
|
+
},
|
|
12
|
+
"keywords": ["embedding", "semantic", "codebase", "AST", "typescript", "javascript"],
|
|
13
|
+
"author": "Johan Labs",
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"type": "module",
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@openrouter/sdk": "^0.2.11",
|
|
18
|
+
"@xenova/transformers": "^2.17.2",
|
|
19
|
+
"axios": "^1.13.2",
|
|
20
|
+
"chokidar": "^5.0.0",
|
|
21
|
+
"dotenv": "^17.2.3",
|
|
22
|
+
"glob": "^13.0.0",
|
|
23
|
+
"openai": "^6.10.0",
|
|
24
|
+
"typescript": "^5.2.2"
|
|
25
|
+
},
|
|
26
|
+
"devDependencies": {
|
|
27
|
+
"nodemon": "^3.1.11",
|
|
28
|
+
"ts-node": "^10.9.1"
|
|
29
|
+
}
|
|
30
|
+
}
|
package/src/Inbed.ts
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import crypto from 'crypto';
|
|
4
|
+
import chokidar from 'chokidar';
|
|
5
|
+
|
|
6
|
+
import { glob } from 'glob';
|
|
7
|
+
import ts from 'typescript';
|
|
8
|
+
import { InbedFile, InbedOptions, SearchResult, InbedImport } from './types.js';
|
|
9
|
+
|
|
10
|
+
import { Embedder } from './embeddings/Embedder.js';
|
|
11
|
+
|
|
12
|
+
export class Inbed {
|
|
13
|
+
private files: Map<string, InbedFile> = new Map();
|
|
14
|
+
private options: InbedOptions;
|
|
15
|
+
private embedder: Embedder;
|
|
16
|
+
private watcher: any;
|
|
17
|
+
private storageDir: string;
|
|
18
|
+
private chunkLimit: number;
|
|
19
|
+
|
|
20
|
+
private embeddingQueue: Promise<void>[] = [];
|
|
21
|
+
private maxConcurrentEmbeds = 4;
|
|
22
|
+
|
|
23
|
+
private debounceMap: Map<string, NodeJS.Timeout> = new Map();
|
|
24
|
+
|
|
25
|
+
constructor(embedder: Embedder, options: InbedOptions) {
|
|
26
|
+
this.options = {
|
|
27
|
+
recursive: true,
|
|
28
|
+
fileExtensions: ['.ts', '.js'],
|
|
29
|
+
ignorePatterns: ['johankit.yaml', 'dist/**', 'node_modules/**', '.git/**', '.inbed/**', ...(options.ignorePatterns || [])],
|
|
30
|
+
maxDepth: 10,
|
|
31
|
+
...options
|
|
32
|
+
};
|
|
33
|
+
this.embedder = embedder;
|
|
34
|
+
this.chunkLimit = (options as any).chunkLimit || 10;
|
|
35
|
+
this.storageDir = path.join(this.options.rootDir, '.inbed');
|
|
36
|
+
if (!fs.existsSync(this.storageDir)) fs.mkdirSync(this.storageDir, { recursive: true });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async load(): Promise<void> {
|
|
40
|
+
const patterns = this.options.fileExtensions!.map(ext => `**/*${ext}`);
|
|
41
|
+
const files = patterns.flatMap(pattern => glob.sync(pattern, { cwd: this.options.rootDir, ignore: this.options.ignorePatterns, nodir: true }));
|
|
42
|
+
|
|
43
|
+
for (const filePath of files) {
|
|
44
|
+
const fullPath = path.join(this.options.rootDir, filePath);
|
|
45
|
+
try {
|
|
46
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
47
|
+
await this.enqueueAdd(path.normalize(filePath), content);
|
|
48
|
+
} catch {}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
await this.flushQueue();
|
|
52
|
+
|
|
53
|
+
if (this.options.recursive) {
|
|
54
|
+
for (const file of Array.from(this.files.values())) {
|
|
55
|
+
this.resolveImports(file, 0);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
this.watchFiles();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
private hashContent(content: string): string {
|
|
63
|
+
const version = 'chunker:v2';
|
|
64
|
+
return crypto.createHash('sha256').update(version + content + this.chunkLimit + this.embedder.model, 'utf-8').digest('hex');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private normalize(vec: number[]): number[] {
|
|
68
|
+
let norm = 0;
|
|
69
|
+
for (const v of vec) norm += v * v;
|
|
70
|
+
norm = Math.sqrt(norm);
|
|
71
|
+
if (norm === 0) return vec;
|
|
72
|
+
return vec.map(v => v / norm);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
private async enqueueAdd(filePath: string, content: string) {
|
|
76
|
+
const task = async () => {
|
|
77
|
+
await this.addFile(filePath, content);
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
while (this.embeddingQueue.length >= this.maxConcurrentEmbeds) {
|
|
81
|
+
await Promise.race(this.embeddingQueue);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const p = task().finally(() => {
|
|
85
|
+
this.embeddingQueue = this.embeddingQueue.filter(x => x !== p);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
this.embeddingQueue.push(p);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
private async flushQueue() {
|
|
92
|
+
await Promise.allSettled(this.embeddingQueue);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private async addFile(filePath: string, content: string) {
|
|
96
|
+
const storagePath = path.join(this.storageDir, `${filePath.replace(/\\/g, '_')}_${this.embedder.model}.json`);
|
|
97
|
+
let embedding: number[][] | undefined;
|
|
98
|
+
const currentHash = this.hashContent(content);
|
|
99
|
+
|
|
100
|
+
if (fs.existsSync(storagePath)) {
|
|
101
|
+
try {
|
|
102
|
+
const saved = JSON.parse(fs.readFileSync(storagePath, 'utf-8'));
|
|
103
|
+
if (saved.hash === currentHash) embedding = saved.embedding;
|
|
104
|
+
} catch {}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let chunks = this.isSmall(content) ? [content] : this.chunkByAST(content);
|
|
108
|
+
if (chunks.length > this.chunkLimit) chunks = chunks.slice(0, this.chunkLimit);
|
|
109
|
+
|
|
110
|
+
if (!embedding) {
|
|
111
|
+
const raw = await this.embedder.embed({ path: filePath, content, chunks, imports: [] });
|
|
112
|
+
embedding = raw.map(v => this.normalize(v));
|
|
113
|
+
try {
|
|
114
|
+
fs.writeFileSync(storagePath, JSON.stringify({ hash: currentHash, embedding }, null, 2));
|
|
115
|
+
} catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
this.files.set(filePath, { path: filePath, content, chunks, imports: [], embedding });
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
private isSmall(content: string): boolean {
|
|
122
|
+
return content.split('\n').length < 50;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
private chunkByAST(content: string): string[] {
|
|
126
|
+
try {
|
|
127
|
+
const sourceFile = ts.createSourceFile('temp.ts', content, ts.ScriptTarget.Latest);
|
|
128
|
+
const chunks: string[] = [];
|
|
129
|
+
let buffer = '';
|
|
130
|
+
ts.forEachChild(sourceFile, node => {
|
|
131
|
+
const text = node.getFullText(sourceFile).trim();
|
|
132
|
+
if (!text) return;
|
|
133
|
+
if ((buffer + text).length > 800) {
|
|
134
|
+
chunks.push(buffer);
|
|
135
|
+
buffer = text;
|
|
136
|
+
} else {
|
|
137
|
+
buffer += '\n' + text;
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
if (buffer.trim()) chunks.push(buffer.trim());
|
|
141
|
+
return chunks.length ? chunks : [content];
|
|
142
|
+
} catch {
|
|
143
|
+
return [content];
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private resolveImports(file: InbedFile, depth: number) {
|
|
148
|
+
if (depth > (this.options.maxDepth || 10)) return;
|
|
149
|
+
|
|
150
|
+
const imports: InbedImport[] = [];
|
|
151
|
+
|
|
152
|
+
const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
|
|
153
|
+
|
|
154
|
+
let match;
|
|
155
|
+
while ((match = importRegex.exec(file.content)) !== null) {
|
|
156
|
+
const sourceImport = match[1] || match[2];
|
|
157
|
+
if (!sourceImport) continue;
|
|
158
|
+
|
|
159
|
+
const base = path.join(path.dirname(file.path), sourceImport);
|
|
160
|
+
|
|
161
|
+
for (const ext of ['', '.ts', '.tsx', '.js', '/index.ts', '/index.js']) {
|
|
162
|
+
const candidate = path.normalize(base + ext);
|
|
163
|
+
if (this.files.has(candidate)) {
|
|
164
|
+
imports.push({ source: sourceImport, resolved: candidate });
|
|
165
|
+
this.resolveImports(this.files.get(candidate)!, depth + 1);
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async upsertFile(filePath: string, content: string) {
|
|
175
|
+
await this.enqueueAdd(path.normalize(filePath), content);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
getFile(filePath: string): InbedFile | undefined {
|
|
179
|
+
return this.files.get(path.normalize(filePath));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
listFiles(): InbedFile[] {
|
|
183
|
+
return Array.from(this.files.values());
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async semanticSearch(query: string, topK: number = 5): Promise<SearchResult[]> {
|
|
187
|
+
const queryEmbedding = this.normalize(await this.embedder.embedQuery(query));
|
|
188
|
+
const results: SearchResult[] = [];
|
|
189
|
+
|
|
190
|
+
for (const file of this.files.values()) {
|
|
191
|
+
if (!file.embedding) continue;
|
|
192
|
+
let bestScore = 0;
|
|
193
|
+
let bestChunk = '';
|
|
194
|
+
for (let i = 0; i < file.embedding.length; i++) {
|
|
195
|
+
const score = this.cosineSimilarity(queryEmbedding, file.embedding[i]);
|
|
196
|
+
if (score > bestScore) {
|
|
197
|
+
bestScore = score;
|
|
198
|
+
bestChunk = file.chunks[i] || '';
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (bestScore > 0) {
|
|
202
|
+
const importsText = file.imports.map(imp => `${imp.source} -> ${imp.resolved}`).join('\n');
|
|
203
|
+
const context = `FILE: ${file.path}\nIMPORTS:\n${importsText || '(none)'}\n---\n${bestChunk}`;
|
|
204
|
+
results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
private cosineSimilarity(a: number[], b: number[]): number {
|
|
212
|
+
let dot = 0;
|
|
213
|
+
const len = Math.min(a.length, b.length);
|
|
214
|
+
for (let i = 0; i < len; i++) dot += a[i] * b[i];
|
|
215
|
+
return dot;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
private watchFiles() {
|
|
219
|
+
this.watcher = chokidar.watch(this.options.rootDir, {
|
|
220
|
+
ignored: this.options.ignorePatterns,
|
|
221
|
+
persistent: true,
|
|
222
|
+
ignoreInitial: true
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
const schedule = (fullPath: string, handler: () => void) => {
|
|
226
|
+
if (this.debounceMap.has(fullPath)) clearTimeout(this.debounceMap.get(fullPath)!);
|
|
227
|
+
this.debounceMap.set(fullPath, setTimeout(handler, 200));
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
this.watcher
|
|
231
|
+
.on('add', fullPath => schedule(fullPath, async () => {
|
|
232
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
233
|
+
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
234
|
+
try {
|
|
235
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
236
|
+
await this.enqueueAdd(relPath, content);
|
|
237
|
+
if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
|
|
238
|
+
} catch {}
|
|
239
|
+
}
|
|
240
|
+
}))
|
|
241
|
+
.on('change', fullPath => schedule(fullPath, async () => {
|
|
242
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
243
|
+
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
244
|
+
try {
|
|
245
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
246
|
+
await this.enqueueAdd(relPath, content);
|
|
247
|
+
if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
|
|
248
|
+
} catch {}
|
|
249
|
+
}
|
|
250
|
+
}))
|
|
251
|
+
.on('unlink', fullPath => {
|
|
252
|
+
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
253
|
+
this.files.delete(relPath);
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
stopWatching() {
|
|
258
|
+
if (this.watcher) {
|
|
259
|
+
this.watcher.close();
|
|
260
|
+
this.watcher = null;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
clear(): void {
|
|
265
|
+
this.stopWatching();
|
|
266
|
+
this.files.clear();
|
|
267
|
+
}
|
|
268
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { InbedFile } from '../types.js';
|
|
3
|
+
import { Embedder } from './Embedder.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Interface InbedFile
|
|
7
|
+
* export interface InbedFile {
|
|
8
|
+
* name: string;
|
|
9
|
+
* chunks: string[];
|
|
10
|
+
* }
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Interface Embedder
|
|
15
|
+
* export interface Embedder {
|
|
16
|
+
* embed(file: InbedFile): Promise<number[][]>;
|
|
17
|
+
* embedQuery(query: string): Promise<number[]>;
|
|
18
|
+
* }
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export class OllamaEmbedder implements Embedder {
|
|
22
|
+
private baseUrl: string;
|
|
23
|
+
model: string;
|
|
24
|
+
|
|
25
|
+
constructor(baseUrl: string = 'http://localhost:11434', model: string = 'mxbai-embed-large') {
|
|
26
|
+
this.baseUrl = baseUrl;
|
|
27
|
+
this.model = model;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async embed(file: InbedFile): Promise<number[][]> {
|
|
31
|
+
const embeddings: number[][] = [];
|
|
32
|
+
for (const chunk of file.chunks) {
|
|
33
|
+
const emb = await this.embedText(chunk);
|
|
34
|
+
if (emb && emb.length > 0) embeddings.push(emb);
|
|
35
|
+
else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
36
|
+
}
|
|
37
|
+
return embeddings;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async embedQuery(query: string): Promise<number[]> {
|
|
41
|
+
return this.embedText(query);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private async embedText(text: string): Promise<number[]> {
|
|
45
|
+
try {
|
|
46
|
+
const response = await axios.post(
|
|
47
|
+
`${this.baseUrl}/api/embed`,
|
|
48
|
+
{
|
|
49
|
+
model: this.model,
|
|
50
|
+
input: text
|
|
51
|
+
},
|
|
52
|
+
{ timeout: 30000 }
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
if (response.data?.embeddings && response.data.embeddings.length > 0) {
|
|
56
|
+
return response.data.embeddings[0];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
console.error('Resposta da API Ollama inesperada:', response.data);
|
|
60
|
+
return [];
|
|
61
|
+
} catch (error: any) {
|
|
62
|
+
console.error(
|
|
63
|
+
'Erro em embedText (Ollama):',
|
|
64
|
+
error.response?.data ? JSON.stringify(error.response.data) : error.message
|
|
65
|
+
);
|
|
66
|
+
return [];
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { InbedFile } from '../types.js';
|
|
2
|
+
import { Embedder } from './Embedder.js';
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
|
|
5
|
+
export class OpenAIEmbedder implements Embedder {
|
|
6
|
+
private client: OpenAI;
|
|
7
|
+
model: string;
|
|
8
|
+
|
|
9
|
+
constructor(apiKey: string, model: string = 'text-embedding-3-small') {
|
|
10
|
+
this.client = new OpenAI({ apiKey });
|
|
11
|
+
this.model = model;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
async embed(file: InbedFile): Promise<number[][]> {
|
|
15
|
+
const embeddings: number[][] = [];
|
|
16
|
+
|
|
17
|
+
for (const chunk of file.chunks) {
|
|
18
|
+
try {
|
|
19
|
+
const emb = await this.embedText(chunk);
|
|
20
|
+
if (emb && emb.length > 0) embeddings.push(emb);
|
|
21
|
+
else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
22
|
+
} catch (error) {
|
|
23
|
+
console.error('Error embedding chunk:', error);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return embeddings;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async embedQuery(query: string): Promise<number[]> {
|
|
31
|
+
try {
|
|
32
|
+
return await this.embedText(query);
|
|
33
|
+
} catch (error) {
|
|
34
|
+
console.error('Error embedding query:', error);
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
private async embedText(text: string): Promise<number[]> {
|
|
40
|
+
try {
|
|
41
|
+
const response = await this.client.embeddings.create({
|
|
42
|
+
model: this.model,
|
|
43
|
+
input: text,
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
// A resposta retorna embeddings no campo data[0].embedding
|
|
47
|
+
if (response.data && response.data.length > 0) {
|
|
48
|
+
return response.data[0].embedding;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
console.error('Resposta da API de Embedding inesperada:', response);
|
|
52
|
+
return [];
|
|
53
|
+
} catch (error: any) {
|
|
54
|
+
console.error('Error in embedText:', error.response?.data ?? error.message);
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { InbedFile } from '../types.js'; // Assumindo que este import está correto
|
|
3
|
+
import { Embedder } from './Embedder.js'; // Assumindo que este import está correto
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Interface InbedFile (exemplo)
|
|
7
|
+
* export interface InbedFile {
|
|
8
|
+
* name: string;
|
|
9
|
+
* chunks: string[];
|
|
10
|
+
* }
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Interface Embedder (exemplo)
|
|
15
|
+
* export interface Embedder {
|
|
16
|
+
* embed(file: InbedFile): Promise<number[][]>;
|
|
17
|
+
* embedQuery(query: string): Promise<number[]>;
|
|
18
|
+
* }
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export class OpenRouterEmbedder implements Embedder {
|
|
22
|
+
private apiKey: string;
|
|
23
|
+
model: string;
|
|
24
|
+
|
|
25
|
+
constructor(apiKey: string, model: string) {
|
|
26
|
+
this.apiKey = apiKey;
|
|
27
|
+
this.model = model;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async embed(file: InbedFile): Promise<number[][]> {
|
|
31
|
+
try {
|
|
32
|
+
const embeddings: number[][] = [];
|
|
33
|
+
// Itera sobre todos os 'chunks' (pedaços) do arquivo
|
|
34
|
+
for (const chunk of file.chunks) {
|
|
35
|
+
// Gera o embedding para cada pedaço
|
|
36
|
+
const emb = await this.embedText(chunk);
|
|
37
|
+
// Adiciona o embedding resultante ao array de embeddings
|
|
38
|
+
if (emb && emb.length > 0) {
|
|
39
|
+
embeddings.push(emb);
|
|
40
|
+
} else {
|
|
41
|
+
// Trata o caso em que embedText retorna um array vazio (erro)
|
|
42
|
+
console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return embeddings;
|
|
46
|
+
} catch (error) {
|
|
47
|
+
console.error('Error embedding file:', error);
|
|
48
|
+
return [];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async embedQuery(query: string): Promise<number[]> {
|
|
53
|
+
try {
|
|
54
|
+
// Usa o mesmo método para embutir a query
|
|
55
|
+
return await this.embedText(query);
|
|
56
|
+
} catch (error) {
|
|
57
|
+
console.error('Error embedding query:', error);
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
private async embedText(text: string): Promise<number[]> {
|
|
63
|
+
try {
|
|
64
|
+
const response = await axios.post(
|
|
65
|
+
'https://openrouter.ai/api/v1/embeddings',
|
|
66
|
+
{
|
|
67
|
+
input: text,
|
|
68
|
+
model: this.model
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
headers: {
|
|
72
|
+
// Utiliza a chave da instância
|
|
73
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
74
|
+
'Content-Type': 'application/json',
|
|
75
|
+
},
|
|
76
|
+
timeout: 30000
|
|
77
|
+
}
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
// A resposta da OpenRouter/OpenAI retorna um array em 'data',
|
|
81
|
+
// e o primeiro objeto contém o 'embedding'
|
|
82
|
+
// Exemplo de estrutura de resposta: { "data": [{ "embedding": [0.1, 0.2, ...], ... }], ... }
|
|
83
|
+
if (response.data && response.data.data && response.data.data.length > 0) {
|
|
84
|
+
return response.data.data[0].embedding;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Se a resposta for inesperada, loga e retorna array vazio
|
|
88
|
+
console.error('Resposta da API de Embedding inesperada:', response.data);
|
|
89
|
+
return [];
|
|
90
|
+
} catch (error: any) {
|
|
91
|
+
// Loga detalhes do erro (como o corpo da resposta da API, se disponível)
|
|
92
|
+
console.error(
|
|
93
|
+
'Error in embedText:',
|
|
94
|
+
error.response?.data ? JSON.stringify(error.response.data) : error.message
|
|
95
|
+
);
|
|
96
|
+
// Retorna um array vazio para que a chamada externa possa continuar
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
package/src/index.ts
ADDED
package/src/types.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Embedder } from "./embeddings/Embedder.js";
|
|
2
|
+
|
|
3
|
+
export interface InbedImport {
|
|
4
|
+
source: string;
|
|
5
|
+
resolved: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface InbedFile {
|
|
9
|
+
path: string;
|
|
10
|
+
content: string;
|
|
11
|
+
chunks: string[];
|
|
12
|
+
imports: InbedImport[];
|
|
13
|
+
embedding?: number[][];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface SearchResult {
|
|
17
|
+
path: string;
|
|
18
|
+
snippet: string;
|
|
19
|
+
score: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface InbedOptions {
|
|
23
|
+
rootDir: string;
|
|
24
|
+
recursive?: boolean;
|
|
25
|
+
fileExtensions?: string[];
|
|
26
|
+
ignorePatterns?: string[];
|
|
27
|
+
maxDepth?: number;
|
|
28
|
+
}
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
export function readFileSafe(filePath: string): string | null {
|
|
5
|
+
try {
|
|
6
|
+
return fs.readFileSync(filePath, 'utf-8');
|
|
7
|
+
} catch {
|
|
8
|
+
return null;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function isIgnored(filePath: string, ignorePatterns: string[]): boolean {
|
|
13
|
+
return ignorePatterns.some(pattern => filePath.includes(pattern));
|
|
14
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "Node",
|
|
6
|
+
"outDir": "dist",
|
|
7
|
+
"rootDir": "src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"forceConsistentCasingInFileNames": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"lib": ["ES2020", "DOM"]
|
|
13
|
+
},
|
|
14
|
+
"include": ["src", "demo.js"]
|
|
15
|
+
}
|