inbed 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Inbed.js +9 -18
- package/dist/index.js +1 -0
- package/package.json +2 -1
- package/demo.js +0 -29
- package/johankit.yml +0 -6
- package/src/Inbed.ts +0 -268
- package/src/embeddings/Embedder.ts +0 -7
- package/src/embeddings/Ollama.ts +0 -69
- package/src/embeddings/OpenAI.ts +0 -58
- package/src/embeddings/OpenRouter.ts +0 -100
- package/src/index.ts +0 -6
- package/src/types.ts +0 -28
- package/src/utils.ts +0 -14
- package/tsconfig.json +0 -15
package/dist/Inbed.js
CHANGED
|
@@ -7,10 +7,8 @@ import ts from 'typescript';
|
|
|
7
7
|
export class Inbed {
|
|
8
8
|
constructor(embedder, options) {
|
|
9
9
|
this.files = new Map();
|
|
10
|
-
// concurrency
|
|
11
10
|
this.embeddingQueue = [];
|
|
12
11
|
this.maxConcurrentEmbeds = 4;
|
|
13
|
-
// watcher debounce
|
|
14
12
|
this.debounceMap = new Map();
|
|
15
13
|
this.options = {
|
|
16
14
|
recursive: true,
|
|
@@ -129,7 +127,6 @@ export class Inbed {
|
|
|
129
127
|
if (depth > (this.options.maxDepth || 10))
|
|
130
128
|
return;
|
|
131
129
|
const imports = [];
|
|
132
|
-
// captura import e require
|
|
133
130
|
const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
|
|
134
131
|
let match;
|
|
135
132
|
while ((match = importRegex.exec(file.content)) !== null) {
|
|
@@ -141,13 +138,11 @@ export class Inbed {
|
|
|
141
138
|
const candidate = path.normalize(base + ext);
|
|
142
139
|
if (this.files.has(candidate)) {
|
|
143
140
|
imports.push({ source: sourceImport, resolved: candidate });
|
|
144
|
-
// recursão
|
|
145
141
|
this.resolveImports(this.files.get(candidate), depth + 1);
|
|
146
142
|
break;
|
|
147
143
|
}
|
|
148
144
|
}
|
|
149
145
|
}
|
|
150
|
-
// deduplicação
|
|
151
146
|
file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
|
|
152
147
|
}
|
|
153
148
|
async upsertFile(filePath, content) {
|
|
@@ -175,16 +170,8 @@ export class Inbed {
|
|
|
175
170
|
}
|
|
176
171
|
}
|
|
177
172
|
if (bestScore > 0) {
|
|
178
|
-
const importsText =
|
|
179
|
-
|
|
180
|
-
.map((imp) => typeof imp === 'string' ? imp : `${imp.source} -> ${imp.resolved}`)
|
|
181
|
-
.join('\n')
|
|
182
|
-
: '';
|
|
183
|
-
const context = `FILE: ${file.path}
|
|
184
|
-
IMPORTS:
|
|
185
|
-
${importsText || '(none)'}
|
|
186
|
-
---
|
|
187
|
-
${bestChunk}`;
|
|
173
|
+
const importsText = file.imports.map(imp => `${imp.source} -> ${imp.resolved}`).join('\n');
|
|
174
|
+
const context = `FILE: ${file.path}\nIMPORTS:\n${importsText || '(none)'}\n---\n${bestChunk}`;
|
|
188
175
|
results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
|
|
189
176
|
}
|
|
190
177
|
}
|
|
@@ -209,27 +196,31 @@ ${bestChunk}`;
|
|
|
209
196
|
this.debounceMap.set(fullPath, setTimeout(handler, 200));
|
|
210
197
|
};
|
|
211
198
|
this.watcher
|
|
212
|
-
.on('add', fullPath => schedule(fullPath, async () => {
|
|
199
|
+
.on('add', (fullPath) => schedule(fullPath, async () => {
|
|
213
200
|
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
214
201
|
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
215
202
|
try {
|
|
216
203
|
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
217
204
|
await this.enqueueAdd(relPath, content);
|
|
205
|
+
if (this.options.recursive)
|
|
206
|
+
this.resolveImports(this.files.get(relPath), 0);
|
|
218
207
|
}
|
|
219
208
|
catch { }
|
|
220
209
|
}
|
|
221
210
|
}))
|
|
222
|
-
.on('change', fullPath => schedule(fullPath, async () => {
|
|
211
|
+
.on('change', (fullPath) => schedule(fullPath, async () => {
|
|
223
212
|
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
224
213
|
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
225
214
|
try {
|
|
226
215
|
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
227
216
|
await this.enqueueAdd(relPath, content);
|
|
217
|
+
if (this.options.recursive)
|
|
218
|
+
this.resolveImports(this.files.get(relPath), 0);
|
|
228
219
|
}
|
|
229
220
|
catch { }
|
|
230
221
|
}
|
|
231
222
|
}))
|
|
232
|
-
.on('unlink', fullPath => {
|
|
223
|
+
.on('unlink', (fullPath) => {
|
|
233
224
|
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
234
225
|
this.files.delete(relPath);
|
|
235
226
|
});
|
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "inbed",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Semantic indexing and search over source code using vector embeddings",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
+
"files": ["dist"],
|
|
7
8
|
"scripts": {
|
|
8
9
|
"build": "tsc",
|
|
9
10
|
"start": "node dist/index.js",
|
package/demo.js
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import { OpenRouterEmbedder } from './dist/embeddings/OpenRouter.js';
|
|
2
|
-
import { Inbed } from './dist/Inbed.js';
|
|
3
|
-
|
|
4
|
-
import 'dotenv/config';
|
|
5
|
-
|
|
6
|
-
async function demo() {
|
|
7
|
-
try {
|
|
8
|
-
// const embedder = new OllamaEmbedder('https://ollama.johan.chat', 'mxbai-embed-large');
|
|
9
|
-
const embedder = new OpenRouterEmbedder(process.env.OPENAI_API_KEY, 'mistralai/codestral-embed-2505');
|
|
10
|
-
|
|
11
|
-
const project = new Inbed(embedder, { rootDir: './' });
|
|
12
|
-
await project.load();
|
|
13
|
-
|
|
14
|
-
console.log('Loaded files:', project.listFiles().map(f => f.path));
|
|
15
|
-
|
|
16
|
-
const results = await project.semanticSearch('demo');
|
|
17
|
-
console.log('Semantic search results:');
|
|
18
|
-
results.forEach(result => {
|
|
19
|
-
console.log(result)
|
|
20
|
-
console.log(`- ${result.path}: ${result.score.toFixed(4)}`);
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
} catch (error) {
|
|
24
|
-
console.error('Demo failed:', error);
|
|
25
|
-
process.exit(1);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
demo();
|
package/johankit.yml
DELETED
package/src/Inbed.ts
DELETED
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import crypto from 'crypto';
|
|
4
|
-
import chokidar from 'chokidar';
|
|
5
|
-
|
|
6
|
-
import { glob } from 'glob';
|
|
7
|
-
import ts from 'typescript';
|
|
8
|
-
import { InbedFile, InbedOptions, SearchResult, InbedImport } from './types.js';
|
|
9
|
-
|
|
10
|
-
import { Embedder } from './embeddings/Embedder.js';
|
|
11
|
-
|
|
12
|
-
export class Inbed {
|
|
13
|
-
private files: Map<string, InbedFile> = new Map();
|
|
14
|
-
private options: InbedOptions;
|
|
15
|
-
private embedder: Embedder;
|
|
16
|
-
private watcher: any;
|
|
17
|
-
private storageDir: string;
|
|
18
|
-
private chunkLimit: number;
|
|
19
|
-
|
|
20
|
-
private embeddingQueue: Promise<void>[] = [];
|
|
21
|
-
private maxConcurrentEmbeds = 4;
|
|
22
|
-
|
|
23
|
-
private debounceMap: Map<string, NodeJS.Timeout> = new Map();
|
|
24
|
-
|
|
25
|
-
constructor(embedder: Embedder, options: InbedOptions) {
|
|
26
|
-
this.options = {
|
|
27
|
-
recursive: true,
|
|
28
|
-
fileExtensions: ['.ts', '.js'],
|
|
29
|
-
ignorePatterns: ['johankit.yaml', 'dist/**', 'node_modules/**', '.git/**', '.inbed/**', ...(options.ignorePatterns || [])],
|
|
30
|
-
maxDepth: 10,
|
|
31
|
-
...options
|
|
32
|
-
};
|
|
33
|
-
this.embedder = embedder;
|
|
34
|
-
this.chunkLimit = (options as any).chunkLimit || 10;
|
|
35
|
-
this.storageDir = path.join(this.options.rootDir, '.inbed');
|
|
36
|
-
if (!fs.existsSync(this.storageDir)) fs.mkdirSync(this.storageDir, { recursive: true });
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
async load(): Promise<void> {
|
|
40
|
-
const patterns = this.options.fileExtensions!.map(ext => `**/*${ext}`);
|
|
41
|
-
const files = patterns.flatMap(pattern => glob.sync(pattern, { cwd: this.options.rootDir, ignore: this.options.ignorePatterns, nodir: true }));
|
|
42
|
-
|
|
43
|
-
for (const filePath of files) {
|
|
44
|
-
const fullPath = path.join(this.options.rootDir, filePath);
|
|
45
|
-
try {
|
|
46
|
-
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
47
|
-
await this.enqueueAdd(path.normalize(filePath), content);
|
|
48
|
-
} catch {}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
await this.flushQueue();
|
|
52
|
-
|
|
53
|
-
if (this.options.recursive) {
|
|
54
|
-
for (const file of Array.from(this.files.values())) {
|
|
55
|
-
this.resolveImports(file, 0);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
this.watchFiles();
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
private hashContent(content: string): string {
|
|
63
|
-
const version = 'chunker:v2';
|
|
64
|
-
return crypto.createHash('sha256').update(version + content + this.chunkLimit + this.embedder.model, 'utf-8').digest('hex');
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
private normalize(vec: number[]): number[] {
|
|
68
|
-
let norm = 0;
|
|
69
|
-
for (const v of vec) norm += v * v;
|
|
70
|
-
norm = Math.sqrt(norm);
|
|
71
|
-
if (norm === 0) return vec;
|
|
72
|
-
return vec.map(v => v / norm);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
private async enqueueAdd(filePath: string, content: string) {
|
|
76
|
-
const task = async () => {
|
|
77
|
-
await this.addFile(filePath, content);
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
while (this.embeddingQueue.length >= this.maxConcurrentEmbeds) {
|
|
81
|
-
await Promise.race(this.embeddingQueue);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const p = task().finally(() => {
|
|
85
|
-
this.embeddingQueue = this.embeddingQueue.filter(x => x !== p);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
this.embeddingQueue.push(p);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
private async flushQueue() {
|
|
92
|
-
await Promise.allSettled(this.embeddingQueue);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
private async addFile(filePath: string, content: string) {
|
|
96
|
-
const storagePath = path.join(this.storageDir, `${filePath.replace(/\\/g, '_')}_${this.embedder.model}.json`);
|
|
97
|
-
let embedding: number[][] | undefined;
|
|
98
|
-
const currentHash = this.hashContent(content);
|
|
99
|
-
|
|
100
|
-
if (fs.existsSync(storagePath)) {
|
|
101
|
-
try {
|
|
102
|
-
const saved = JSON.parse(fs.readFileSync(storagePath, 'utf-8'));
|
|
103
|
-
if (saved.hash === currentHash) embedding = saved.embedding;
|
|
104
|
-
} catch {}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
let chunks = this.isSmall(content) ? [content] : this.chunkByAST(content);
|
|
108
|
-
if (chunks.length > this.chunkLimit) chunks = chunks.slice(0, this.chunkLimit);
|
|
109
|
-
|
|
110
|
-
if (!embedding) {
|
|
111
|
-
const raw = await this.embedder.embed({ path: filePath, content, chunks, imports: [] });
|
|
112
|
-
embedding = raw.map(v => this.normalize(v));
|
|
113
|
-
try {
|
|
114
|
-
fs.writeFileSync(storagePath, JSON.stringify({ hash: currentHash, embedding }, null, 2));
|
|
115
|
-
} catch {}
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
this.files.set(filePath, { path: filePath, content, chunks, imports: [], embedding });
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
private isSmall(content: string): boolean {
|
|
122
|
-
return content.split('\n').length < 50;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
private chunkByAST(content: string): string[] {
|
|
126
|
-
try {
|
|
127
|
-
const sourceFile = ts.createSourceFile('temp.ts', content, ts.ScriptTarget.Latest);
|
|
128
|
-
const chunks: string[] = [];
|
|
129
|
-
let buffer = '';
|
|
130
|
-
ts.forEachChild(sourceFile, node => {
|
|
131
|
-
const text = node.getFullText(sourceFile).trim();
|
|
132
|
-
if (!text) return;
|
|
133
|
-
if ((buffer + text).length > 800) {
|
|
134
|
-
chunks.push(buffer);
|
|
135
|
-
buffer = text;
|
|
136
|
-
} else {
|
|
137
|
-
buffer += '\n' + text;
|
|
138
|
-
}
|
|
139
|
-
});
|
|
140
|
-
if (buffer.trim()) chunks.push(buffer.trim());
|
|
141
|
-
return chunks.length ? chunks : [content];
|
|
142
|
-
} catch {
|
|
143
|
-
return [content];
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
private resolveImports(file: InbedFile, depth: number) {
|
|
148
|
-
if (depth > (this.options.maxDepth || 10)) return;
|
|
149
|
-
|
|
150
|
-
const imports: InbedImport[] = [];
|
|
151
|
-
|
|
152
|
-
const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
|
|
153
|
-
|
|
154
|
-
let match;
|
|
155
|
-
while ((match = importRegex.exec(file.content)) !== null) {
|
|
156
|
-
const sourceImport = match[1] || match[2];
|
|
157
|
-
if (!sourceImport) continue;
|
|
158
|
-
|
|
159
|
-
const base = path.join(path.dirname(file.path), sourceImport);
|
|
160
|
-
|
|
161
|
-
for (const ext of ['', '.ts', '.tsx', '.js', '/index.ts', '/index.js']) {
|
|
162
|
-
const candidate = path.normalize(base + ext);
|
|
163
|
-
if (this.files.has(candidate)) {
|
|
164
|
-
imports.push({ source: sourceImport, resolved: candidate });
|
|
165
|
-
this.resolveImports(this.files.get(candidate)!, depth + 1);
|
|
166
|
-
break;
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
async upsertFile(filePath: string, content: string) {
|
|
175
|
-
await this.enqueueAdd(path.normalize(filePath), content);
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
getFile(filePath: string): InbedFile | undefined {
|
|
179
|
-
return this.files.get(path.normalize(filePath));
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
listFiles(): InbedFile[] {
|
|
183
|
-
return Array.from(this.files.values());
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
async semanticSearch(query: string, topK: number = 5): Promise<SearchResult[]> {
|
|
187
|
-
const queryEmbedding = this.normalize(await this.embedder.embedQuery(query));
|
|
188
|
-
const results: SearchResult[] = [];
|
|
189
|
-
|
|
190
|
-
for (const file of this.files.values()) {
|
|
191
|
-
if (!file.embedding) continue;
|
|
192
|
-
let bestScore = 0;
|
|
193
|
-
let bestChunk = '';
|
|
194
|
-
for (let i = 0; i < file.embedding.length; i++) {
|
|
195
|
-
const score = this.cosineSimilarity(queryEmbedding, file.embedding[i]);
|
|
196
|
-
if (score > bestScore) {
|
|
197
|
-
bestScore = score;
|
|
198
|
-
bestChunk = file.chunks[i] || '';
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
if (bestScore > 0) {
|
|
202
|
-
const importsText = file.imports.map(imp => `${imp.source} -> ${imp.resolved}`).join('\n');
|
|
203
|
-
const context = `FILE: ${file.path}\nIMPORTS:\n${importsText || '(none)'}\n---\n${bestChunk}`;
|
|
204
|
-
results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
private cosineSimilarity(a: number[], b: number[]): number {
|
|
212
|
-
let dot = 0;
|
|
213
|
-
const len = Math.min(a.length, b.length);
|
|
214
|
-
for (let i = 0; i < len; i++) dot += a[i] * b[i];
|
|
215
|
-
return dot;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
private watchFiles() {
|
|
219
|
-
this.watcher = chokidar.watch(this.options.rootDir, {
|
|
220
|
-
ignored: this.options.ignorePatterns,
|
|
221
|
-
persistent: true,
|
|
222
|
-
ignoreInitial: true
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
const schedule = (fullPath: string, handler: () => void) => {
|
|
226
|
-
if (this.debounceMap.has(fullPath)) clearTimeout(this.debounceMap.get(fullPath)!);
|
|
227
|
-
this.debounceMap.set(fullPath, setTimeout(handler, 200));
|
|
228
|
-
};
|
|
229
|
-
|
|
230
|
-
this.watcher
|
|
231
|
-
.on('add', fullPath => schedule(fullPath, async () => {
|
|
232
|
-
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
233
|
-
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
234
|
-
try {
|
|
235
|
-
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
236
|
-
await this.enqueueAdd(relPath, content);
|
|
237
|
-
if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
|
|
238
|
-
} catch {}
|
|
239
|
-
}
|
|
240
|
-
}))
|
|
241
|
-
.on('change', fullPath => schedule(fullPath, async () => {
|
|
242
|
-
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
243
|
-
if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
|
|
244
|
-
try {
|
|
245
|
-
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
246
|
-
await this.enqueueAdd(relPath, content);
|
|
247
|
-
if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
|
|
248
|
-
} catch {}
|
|
249
|
-
}
|
|
250
|
-
}))
|
|
251
|
-
.on('unlink', fullPath => {
|
|
252
|
-
const relPath = path.relative(this.options.rootDir, fullPath);
|
|
253
|
-
this.files.delete(relPath);
|
|
254
|
-
});
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
stopWatching() {
|
|
258
|
-
if (this.watcher) {
|
|
259
|
-
this.watcher.close();
|
|
260
|
-
this.watcher = null;
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
clear(): void {
|
|
265
|
-
this.stopWatching();
|
|
266
|
-
this.files.clear();
|
|
267
|
-
}
|
|
268
|
-
}
|
package/src/embeddings/Ollama.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import axios from 'axios';
|
|
2
|
-
import { InbedFile } from '../types.js';
|
|
3
|
-
import { Embedder } from './Embedder.js';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Interface InbedFile
|
|
7
|
-
* export interface InbedFile {
|
|
8
|
-
* name: string;
|
|
9
|
-
* chunks: string[];
|
|
10
|
-
* }
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Interface Embedder
|
|
15
|
-
* export interface Embedder {
|
|
16
|
-
* embed(file: InbedFile): Promise<number[][]>;
|
|
17
|
-
* embedQuery(query: string): Promise<number[]>;
|
|
18
|
-
* }
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
export class OllamaEmbedder implements Embedder {
|
|
22
|
-
private baseUrl: string;
|
|
23
|
-
model: string;
|
|
24
|
-
|
|
25
|
-
constructor(baseUrl: string = 'http://localhost:11434', model: string = 'mxbai-embed-large') {
|
|
26
|
-
this.baseUrl = baseUrl;
|
|
27
|
-
this.model = model;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async embed(file: InbedFile): Promise<number[][]> {
|
|
31
|
-
const embeddings: number[][] = [];
|
|
32
|
-
for (const chunk of file.chunks) {
|
|
33
|
-
const emb = await this.embedText(chunk);
|
|
34
|
-
if (emb && emb.length > 0) embeddings.push(emb);
|
|
35
|
-
else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
36
|
-
}
|
|
37
|
-
return embeddings;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
async embedQuery(query: string): Promise<number[]> {
|
|
41
|
-
return this.embedText(query);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
private async embedText(text: string): Promise<number[]> {
|
|
45
|
-
try {
|
|
46
|
-
const response = await axios.post(
|
|
47
|
-
`${this.baseUrl}/api/embed`,
|
|
48
|
-
{
|
|
49
|
-
model: this.model,
|
|
50
|
-
input: text
|
|
51
|
-
},
|
|
52
|
-
{ timeout: 30000 }
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
if (response.data?.embeddings && response.data.embeddings.length > 0) {
|
|
56
|
-
return response.data.embeddings[0];
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
console.error('Resposta da API Ollama inesperada:', response.data);
|
|
60
|
-
return [];
|
|
61
|
-
} catch (error: any) {
|
|
62
|
-
console.error(
|
|
63
|
-
'Erro em embedText (Ollama):',
|
|
64
|
-
error.response?.data ? JSON.stringify(error.response.data) : error.message
|
|
65
|
-
);
|
|
66
|
-
return [];
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
package/src/embeddings/OpenAI.ts
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { InbedFile } from '../types.js';
|
|
2
|
-
import { Embedder } from './Embedder.js';
|
|
3
|
-
import OpenAI from 'openai';
|
|
4
|
-
|
|
5
|
-
export class OpenAIEmbedder implements Embedder {
|
|
6
|
-
private client: OpenAI;
|
|
7
|
-
model: string;
|
|
8
|
-
|
|
9
|
-
constructor(apiKey: string, model: string = 'text-embedding-3-small') {
|
|
10
|
-
this.client = new OpenAI({ apiKey });
|
|
11
|
-
this.model = model;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
async embed(file: InbedFile): Promise<number[][]> {
|
|
15
|
-
const embeddings: number[][] = [];
|
|
16
|
-
|
|
17
|
-
for (const chunk of file.chunks) {
|
|
18
|
-
try {
|
|
19
|
-
const emb = await this.embedText(chunk);
|
|
20
|
-
if (emb && emb.length > 0) embeddings.push(emb);
|
|
21
|
-
else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
22
|
-
} catch (error) {
|
|
23
|
-
console.error('Error embedding chunk:', error);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
return embeddings;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async embedQuery(query: string): Promise<number[]> {
|
|
31
|
-
try {
|
|
32
|
-
return await this.embedText(query);
|
|
33
|
-
} catch (error) {
|
|
34
|
-
console.error('Error embedding query:', error);
|
|
35
|
-
return [];
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
private async embedText(text: string): Promise<number[]> {
|
|
40
|
-
try {
|
|
41
|
-
const response = await this.client.embeddings.create({
|
|
42
|
-
model: this.model,
|
|
43
|
-
input: text,
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
// A resposta retorna embeddings no campo data[0].embedding
|
|
47
|
-
if (response.data && response.data.length > 0) {
|
|
48
|
-
return response.data[0].embedding;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
console.error('Resposta da API de Embedding inesperada:', response);
|
|
52
|
-
return [];
|
|
53
|
-
} catch (error: any) {
|
|
54
|
-
console.error('Error in embedText:', error.response?.data ?? error.message);
|
|
55
|
-
return [];
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
}
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
import axios from 'axios';
|
|
2
|
-
import { InbedFile } from '../types.js'; // Assumindo que este import está correto
|
|
3
|
-
import { Embedder } from './Embedder.js'; // Assumindo que este import está correto
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Interface InbedFile (exemplo)
|
|
7
|
-
* export interface InbedFile {
|
|
8
|
-
* name: string;
|
|
9
|
-
* chunks: string[];
|
|
10
|
-
* }
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Interface Embedder (exemplo)
|
|
15
|
-
* export interface Embedder {
|
|
16
|
-
* embed(file: InbedFile): Promise<number[][]>;
|
|
17
|
-
* embedQuery(query: string): Promise<number[]>;
|
|
18
|
-
* }
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
export class OpenRouterEmbedder implements Embedder {
|
|
22
|
-
private apiKey: string;
|
|
23
|
-
model: string;
|
|
24
|
-
|
|
25
|
-
constructor(apiKey: string, model: string) {
|
|
26
|
-
this.apiKey = apiKey;
|
|
27
|
-
this.model = model;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async embed(file: InbedFile): Promise<number[][]> {
|
|
31
|
-
try {
|
|
32
|
-
const embeddings: number[][] = [];
|
|
33
|
-
// Itera sobre todos os 'chunks' (pedaços) do arquivo
|
|
34
|
-
for (const chunk of file.chunks) {
|
|
35
|
-
// Gera o embedding para cada pedaço
|
|
36
|
-
const emb = await this.embedText(chunk);
|
|
37
|
-
// Adiciona o embedding resultante ao array de embeddings
|
|
38
|
-
if (emb && emb.length > 0) {
|
|
39
|
-
embeddings.push(emb);
|
|
40
|
-
} else {
|
|
41
|
-
// Trata o caso em que embedText retorna um array vazio (erro)
|
|
42
|
-
console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
return embeddings;
|
|
46
|
-
} catch (error) {
|
|
47
|
-
console.error('Error embedding file:', error);
|
|
48
|
-
return [];
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
async embedQuery(query: string): Promise<number[]> {
|
|
53
|
-
try {
|
|
54
|
-
// Usa o mesmo método para embutir a query
|
|
55
|
-
return await this.embedText(query);
|
|
56
|
-
} catch (error) {
|
|
57
|
-
console.error('Error embedding query:', error);
|
|
58
|
-
return [];
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
private async embedText(text: string): Promise<number[]> {
|
|
63
|
-
try {
|
|
64
|
-
const response = await axios.post(
|
|
65
|
-
'https://openrouter.ai/api/v1/embeddings',
|
|
66
|
-
{
|
|
67
|
-
input: text,
|
|
68
|
-
model: this.model
|
|
69
|
-
},
|
|
70
|
-
{
|
|
71
|
-
headers: {
|
|
72
|
-
// Utiliza a chave da instância
|
|
73
|
-
'Authorization': `Bearer ${this.apiKey}`,
|
|
74
|
-
'Content-Type': 'application/json',
|
|
75
|
-
},
|
|
76
|
-
timeout: 30000
|
|
77
|
-
}
|
|
78
|
-
);
|
|
79
|
-
|
|
80
|
-
// A resposta da OpenRouter/OpenAI retorna um array em 'data',
|
|
81
|
-
// e o primeiro objeto contém o 'embedding'
|
|
82
|
-
// Exemplo de estrutura de resposta: { "data": [{ "embedding": [0.1, 0.2, ...], ... }], ... }
|
|
83
|
-
if (response.data && response.data.data && response.data.data.length > 0) {
|
|
84
|
-
return response.data.data[0].embedding;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// Se a resposta for inesperada, loga e retorna array vazio
|
|
88
|
-
console.error('Resposta da API de Embedding inesperada:', response.data);
|
|
89
|
-
return [];
|
|
90
|
-
} catch (error: any) {
|
|
91
|
-
// Loga detalhes do erro (como o corpo da resposta da API, se disponível)
|
|
92
|
-
console.error(
|
|
93
|
-
'Error in embedText:',
|
|
94
|
-
error.response?.data ? JSON.stringify(error.response.data) : error.message
|
|
95
|
-
);
|
|
96
|
-
// Retorna um array vazio para que a chamada externa possa continuar
|
|
97
|
-
return [];
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
}
|
package/src/index.ts
DELETED
package/src/types.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { Embedder } from "./embeddings/Embedder.js";
|
|
2
|
-
|
|
3
|
-
export interface InbedImport {
|
|
4
|
-
source: string;
|
|
5
|
-
resolved: string;
|
|
6
|
-
}
|
|
7
|
-
|
|
8
|
-
export interface InbedFile {
|
|
9
|
-
path: string;
|
|
10
|
-
content: string;
|
|
11
|
-
chunks: string[];
|
|
12
|
-
imports: InbedImport[];
|
|
13
|
-
embedding?: number[][];
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface SearchResult {
|
|
17
|
-
path: string;
|
|
18
|
-
snippet: string;
|
|
19
|
-
score: number;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface InbedOptions {
|
|
23
|
-
rootDir: string;
|
|
24
|
-
recursive?: boolean;
|
|
25
|
-
fileExtensions?: string[];
|
|
26
|
-
ignorePatterns?: string[];
|
|
27
|
-
maxDepth?: number;
|
|
28
|
-
}
|
package/src/utils.ts
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
|
|
4
|
-
export function readFileSafe(filePath: string): string | null {
|
|
5
|
-
try {
|
|
6
|
-
return fs.readFileSync(filePath, 'utf-8');
|
|
7
|
-
} catch {
|
|
8
|
-
return null;
|
|
9
|
-
}
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export function isIgnored(filePath: string, ignorePatterns: string[]): boolean {
|
|
13
|
-
return ignorePatterns.some(pattern => filePath.includes(pattern));
|
|
14
|
-
}
|
package/tsconfig.json
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"target": "ES2020",
|
|
4
|
-
"module": "ESNext",
|
|
5
|
-
"moduleResolution": "Node",
|
|
6
|
-
"outDir": "dist",
|
|
7
|
-
"rootDir": "src",
|
|
8
|
-
"strict": true,
|
|
9
|
-
"esModuleInterop": true,
|
|
10
|
-
"forceConsistentCasingInFileNames": true,
|
|
11
|
-
"skipLibCheck": true,
|
|
12
|
-
"lib": ["ES2020", "DOM"]
|
|
13
|
-
},
|
|
14
|
-
"include": ["src", "demo.js"]
|
|
15
|
-
}
|