inbed 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,108 @@
1
+ # Inbed
2
+
3
+ **Inbed** is a TypeScript library for **semantic indexing and retrieval of source code**, built to map **natural language intent** to **real codebases**.
4
+
5
+ It helps LLMs and developer tools find the **right files and code snippets** for a given user request.
6
+
7
+ ---
8
+
9
+ ## What it’s for
10
+
11
+ - 🔍 Semantic code search
12
+ - 🤖 LLM / RAG context retrieval
13
+ - 🧠 AI copilots & agents
14
+ - 🧭 Navigating large or legacy repos
15
+
16
+ ---
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ npm install inbed
22
+ ````
23
+
24
+ ---
25
+
26
+ ## Basic Usage
27
+
28
+ ### Create an embedder
29
+
30
+ ```ts
31
+ import { OpenAIEmbedder } from 'inbed';
32
+
33
+ const embedder = new OpenAIEmbedder(
34
+ process.env.OPENAI_API_KEY!
35
+ );
36
+ ```
37
+
38
+ ### Initialize Inbed
39
+
40
+ ```ts
41
+ import { Inbed } from 'inbed';
42
+
43
+ const inbed = new Inbed(embedder, {
44
+ rootDir: process.cwd(),
45
+ fileExtensions: ['.ts', '.js'],
46
+ ignorePatterns: ['dist/**', 'node_modules/**']
47
+ });
48
+
49
+ await inbed.load();
50
+ ```
51
+
52
+ ---
53
+
54
+ ## Semantic Search
55
+
56
+ ```ts
57
+ const results = await inbed.semanticSearch(
58
+ 'where cache is written to disk',
59
+ 5
60
+ );
61
+
62
+ results.forEach(r => {
63
+ console.log(r.path, r.score);
64
+ });
65
+ ```
66
+
67
+ Returns the most relevant files and snippets for the query.
68
+
69
+ ---
70
+
71
+ ## Common Pattern: Prompt → Files
72
+
73
+ ```ts
74
+ async function selectRelevantFiles(prompt: string) {
75
+ const results = await inbed.semanticSearch(prompt, 5);
76
+ return results.map(r => r.path);
77
+ }
78
+ ```
79
+
80
+ Use this to feed only the necessary code into an LLM prompt.
81
+
82
+ ---
83
+
84
+ ## How it works (short)
85
+
86
+ 1. Scans the project
87
+ 2. Splits files into chunks (AST-aware for TS)
88
+ 3. Generates embeddings (cached locally)
89
+ 4. Watches files for changes
90
+ 5. Searches by vector similarity
91
+
92
+ ---
93
+
94
+ ## Embedding Providers
95
+
96
+ * OpenAI
97
+ * Ollama (local)
98
+ * OpenRouter
99
+
100
+ ---
101
+
102
+ ## What Inbed is not
103
+
104
+ * A static analyzer
105
+ * A refactor engine
106
+ * A bug detector
107
+
108
+ It retrieves **relevant context** — the LLM does the reasoning.
package/demo.js ADDED
@@ -0,0 +1,29 @@
1
+ import { OpenRouterEmbedder } from './dist/embeddings/OpenRouter.js';
2
+ import { Inbed } from './dist/Inbed.js';
3
+
4
+ import 'dotenv/config';
5
+
6
+ async function demo() {
7
+ try {
8
+ // const embedder = new OllamaEmbedder('https://ollama.johan.chat', 'mxbai-embed-large');
9
+ const embedder = new OpenRouterEmbedder(process.env.OPENAI_API_KEY, 'mistralai/codestral-embed-2505');
10
+
11
+ const project = new Inbed(embedder, { rootDir: './' });
12
+ await project.load();
13
+
14
+ console.log('Loaded files:', project.listFiles().map(f => f.path));
15
+
16
+ const results = await project.semanticSearch('demo');
17
+ console.log('Semantic search results:');
18
+ results.forEach(result => {
19
+ console.log(result)
20
+ console.log(`- ${result.path}: ${result.score.toFixed(4)}`);
21
+ });
22
+
23
+ } catch (error) {
24
+ console.error('Demo failed:', error);
25
+ process.exit(1);
26
+ }
27
+ }
28
+
29
+ demo();
package/dist/Inbed.js ADDED
@@ -0,0 +1,247 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import crypto from 'crypto';
4
+ import chokidar from 'chokidar';
5
+ import { glob } from 'glob';
6
+ import ts from 'typescript';
7
+ export class Inbed {
8
+ constructor(embedder, options) {
9
+ this.files = new Map();
10
+ // concurrency
11
+ this.embeddingQueue = [];
12
+ this.maxConcurrentEmbeds = 4;
13
+ // watcher debounce
14
+ this.debounceMap = new Map();
15
+ this.options = {
16
+ recursive: true,
17
+ fileExtensions: ['.ts', '.js'],
18
+ ignorePatterns: ['johankit.yaml', 'dist/**', 'node_modules/**', '.git/**', '.inbed/**', ...(options.ignorePatterns || [])],
19
+ maxDepth: 10,
20
+ ...options
21
+ };
22
+ this.embedder = embedder;
23
+ this.chunkLimit = options.chunkLimit || 10;
24
+ this.storageDir = path.join(this.options.rootDir, '.inbed');
25
+ if (!fs.existsSync(this.storageDir))
26
+ fs.mkdirSync(this.storageDir, { recursive: true });
27
+ }
28
+ async load() {
29
+ const patterns = this.options.fileExtensions.map(ext => `**/*${ext}`);
30
+ const files = patterns.flatMap(pattern => glob.sync(pattern, { cwd: this.options.rootDir, ignore: this.options.ignorePatterns, nodir: true }));
31
+ for (const filePath of files) {
32
+ const fullPath = path.join(this.options.rootDir, filePath);
33
+ try {
34
+ const content = fs.readFileSync(fullPath, 'utf-8');
35
+ await this.enqueueAdd(path.normalize(filePath), content);
36
+ }
37
+ catch { }
38
+ }
39
+ await this.flushQueue();
40
+ if (this.options.recursive) {
41
+ for (const file of Array.from(this.files.values())) {
42
+ this.resolveImports(file, 0);
43
+ }
44
+ }
45
+ this.watchFiles();
46
+ }
47
+ hashContent(content) {
48
+ const version = 'chunker:v2';
49
+ return crypto.createHash('sha256').update(version + content + this.chunkLimit + this.embedder.model, 'utf-8').digest('hex');
50
+ }
51
+ normalize(vec) {
52
+ let norm = 0;
53
+ for (const v of vec)
54
+ norm += v * v;
55
+ norm = Math.sqrt(norm);
56
+ if (norm === 0)
57
+ return vec;
58
+ return vec.map(v => v / norm);
59
+ }
60
+ async enqueueAdd(filePath, content) {
61
+ const task = async () => {
62
+ await this.addFile(filePath, content);
63
+ };
64
+ while (this.embeddingQueue.length >= this.maxConcurrentEmbeds) {
65
+ await Promise.race(this.embeddingQueue);
66
+ }
67
+ const p = task().finally(() => {
68
+ this.embeddingQueue = this.embeddingQueue.filter(x => x !== p);
69
+ });
70
+ this.embeddingQueue.push(p);
71
+ }
72
+ async flushQueue() {
73
+ await Promise.allSettled(this.embeddingQueue);
74
+ }
75
+ async addFile(filePath, content) {
76
+ const storagePath = path.join(this.storageDir, `${filePath.replace(/\\/g, '_')}_${this.embedder.model}.json`);
77
+ let embedding;
78
+ const currentHash = this.hashContent(content);
79
+ if (fs.existsSync(storagePath)) {
80
+ try {
81
+ const saved = JSON.parse(fs.readFileSync(storagePath, 'utf-8'));
82
+ if (saved.hash === currentHash)
83
+ embedding = saved.embedding;
84
+ }
85
+ catch { }
86
+ }
87
+ let chunks = this.isSmall(content) ? [content] : this.chunkByAST(content);
88
+ if (chunks.length > this.chunkLimit)
89
+ chunks = chunks.slice(0, this.chunkLimit);
90
+ if (!embedding) {
91
+ const raw = await this.embedder.embed({ path: filePath, content, chunks, imports: [] });
92
+ embedding = raw.map(v => this.normalize(v));
93
+ try {
94
+ fs.writeFileSync(storagePath, JSON.stringify({ hash: currentHash, embedding }, null, 2));
95
+ }
96
+ catch { }
97
+ }
98
+ this.files.set(filePath, { path: filePath, content, chunks, imports: [], embedding });
99
+ }
100
+ isSmall(content) {
101
+ return content.split('\n').length < 50;
102
+ }
103
+ chunkByAST(content) {
104
+ try {
105
+ const sourceFile = ts.createSourceFile('temp.ts', content, ts.ScriptTarget.Latest);
106
+ const chunks = [];
107
+ let buffer = '';
108
+ ts.forEachChild(sourceFile, node => {
109
+ const text = node.getFullText(sourceFile).trim();
110
+ if (!text)
111
+ return;
112
+ if ((buffer + text).length > 800) {
113
+ chunks.push(buffer);
114
+ buffer = text;
115
+ }
116
+ else {
117
+ buffer += '\n' + text;
118
+ }
119
+ });
120
+ if (buffer.trim())
121
+ chunks.push(buffer.trim());
122
+ return chunks.length ? chunks : [content];
123
+ }
124
+ catch {
125
+ return [content];
126
+ }
127
+ }
128
+ resolveImports(file, depth) {
129
+ if (depth > (this.options.maxDepth || 10))
130
+ return;
131
+ const imports = [];
132
+ // captura import e require
133
+ const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
134
+ let match;
135
+ while ((match = importRegex.exec(file.content)) !== null) {
136
+ const sourceImport = match[1] || match[2];
137
+ if (!sourceImport)
138
+ continue;
139
+ const base = path.join(path.dirname(file.path), sourceImport);
140
+ for (const ext of ['', '.ts', '.tsx', '.js', '/index.ts', '/index.js']) {
141
+ const candidate = path.normalize(base + ext);
142
+ if (this.files.has(candidate)) {
143
+ imports.push({ source: sourceImport, resolved: candidate });
144
+ // recursão
145
+ this.resolveImports(this.files.get(candidate), depth + 1);
146
+ break;
147
+ }
148
+ }
149
+ }
150
+ // deduplicação
151
+ file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
152
+ }
153
+ async upsertFile(filePath, content) {
154
+ await this.enqueueAdd(path.normalize(filePath), content);
155
+ }
156
+ getFile(filePath) {
157
+ return this.files.get(path.normalize(filePath));
158
+ }
159
+ listFiles() {
160
+ return Array.from(this.files.values());
161
+ }
162
+ async semanticSearch(query, topK = 5) {
163
+ const queryEmbedding = this.normalize(await this.embedder.embedQuery(query));
164
+ const results = [];
165
+ for (const file of this.files.values()) {
166
+ if (!file.embedding)
167
+ continue;
168
+ let bestScore = 0;
169
+ let bestChunk = '';
170
+ for (let i = 0; i < file.embedding.length; i++) {
171
+ const score = this.cosineSimilarity(queryEmbedding, file.embedding[i]);
172
+ if (score > bestScore) {
173
+ bestScore = score;
174
+ bestChunk = file.chunks[i] || '';
175
+ }
176
+ }
177
+ if (bestScore > 0) {
178
+ const importsText = Array.isArray(file.imports)
179
+ ? file.imports
180
+ .map((imp) => typeof imp === 'string' ? imp : `${imp.source} -> ${imp.resolved}`)
181
+ .join('\n')
182
+ : '';
183
+ const context = `FILE: ${file.path}
184
+ IMPORTS:
185
+ ${importsText || '(none)'}
186
+ ---
187
+ ${bestChunk}`;
188
+ results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
189
+ }
190
+ }
191
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
192
+ }
193
+ cosineSimilarity(a, b) {
194
+ let dot = 0;
195
+ const len = Math.min(a.length, b.length);
196
+ for (let i = 0; i < len; i++)
197
+ dot += a[i] * b[i];
198
+ return dot;
199
+ }
200
+ watchFiles() {
201
+ this.watcher = chokidar.watch(this.options.rootDir, {
202
+ ignored: this.options.ignorePatterns,
203
+ persistent: true,
204
+ ignoreInitial: true
205
+ });
206
+ const schedule = (fullPath, handler) => {
207
+ if (this.debounceMap.has(fullPath))
208
+ clearTimeout(this.debounceMap.get(fullPath));
209
+ this.debounceMap.set(fullPath, setTimeout(handler, 200));
210
+ };
211
+ this.watcher
212
+ .on('add', fullPath => schedule(fullPath, async () => {
213
+ const relPath = path.relative(this.options.rootDir, fullPath);
214
+ if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
215
+ try {
216
+ const content = fs.readFileSync(fullPath, 'utf-8');
217
+ await this.enqueueAdd(relPath, content);
218
+ }
219
+ catch { }
220
+ }
221
+ }))
222
+ .on('change', fullPath => schedule(fullPath, async () => {
223
+ const relPath = path.relative(this.options.rootDir, fullPath);
224
+ if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
225
+ try {
226
+ const content = fs.readFileSync(fullPath, 'utf-8');
227
+ await this.enqueueAdd(relPath, content);
228
+ }
229
+ catch { }
230
+ }
231
+ }))
232
+ .on('unlink', fullPath => {
233
+ const relPath = path.relative(this.options.rootDir, fullPath);
234
+ this.files.delete(relPath);
235
+ });
236
+ }
237
+ stopWatching() {
238
+ if (this.watcher) {
239
+ this.watcher.close();
240
+ this.watcher = null;
241
+ }
242
+ }
243
+ clear() {
244
+ this.stopWatching();
245
+ this.files.clear();
246
+ }
247
+ }
package/dist/demo.js ADDED
@@ -0,0 +1,22 @@
1
+ import { OllamaEmbedder } from './embeddings/Ollama.js';
2
+ import { Inbed } from './Inbed.js';
3
+ async function demo() {
4
+ try {
5
+ const embedder = new OllamaEmbedder('https://ollama.johan.chat', 'mxbai-embed-large');
6
+ const project = new Inbed(embedder, { rootDir: './' });
7
+ await project.load();
8
+ console.log('Loaded files:', project.listFiles().map(f => f.path));
9
+ const results = await project.semanticSearch('package');
10
+ console.log('Semantic search results:');
11
+ results.forEach(result => {
12
+ console.log(`- ${result.path}: ${result.score.toFixed(4)}`);
13
+ });
14
+ // Clean up
15
+ project.clear();
16
+ }
17
+ catch (error) {
18
+ console.error('Demo failed:', error);
19
+ process.exit(1);
20
+ }
21
+ }
22
+ demo();
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,15 @@
1
+ export class LocalEmbedder {
2
+ async embed(file) {
3
+ return file.chunks.map(chunk => this.stringToVector(chunk));
4
+ }
5
+ async embedQuery(query) {
6
+ return this.stringToVector(query);
7
+ }
8
+ stringToVector(text) {
9
+ const vec = [];
10
+ for (let i = 0; i < text.length; i++) {
11
+ vec.push(text.charCodeAt(i) / 255);
12
+ }
13
+ return vec;
14
+ }
15
+ }
@@ -0,0 +1,52 @@
1
+ import axios from 'axios';
2
+ /**
3
+ * Interface InbedFile
4
+ * export interface InbedFile {
5
+ * name: string;
6
+ * chunks: string[];
7
+ * }
8
+ */
9
+ /**
10
+ * Interface Embedder
11
+ * export interface Embedder {
12
+ * embed(file: InbedFile): Promise<number[][]>;
13
+ * embedQuery(query: string): Promise<number[]>;
14
+ * }
15
+ */
16
+ export class OllamaEmbedder {
17
+ constructor(baseUrl = 'http://localhost:11434', model = 'mxbai-embed-large') {
18
+ this.baseUrl = baseUrl;
19
+ this.model = model;
20
+ }
21
+ async embed(file) {
22
+ const embeddings = [];
23
+ for (const chunk of file.chunks) {
24
+ const emb = await this.embedText(chunk);
25
+ if (emb && emb.length > 0)
26
+ embeddings.push(emb);
27
+ else
28
+ console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
29
+ }
30
+ return embeddings;
31
+ }
32
+ async embedQuery(query) {
33
+ return this.embedText(query);
34
+ }
35
+ async embedText(text) {
36
+ try {
37
+ const response = await axios.post(`${this.baseUrl}/api/embed`, {
38
+ model: this.model,
39
+ input: text
40
+ }, { timeout: 30000 });
41
+ if (response.data?.embeddings && response.data.embeddings.length > 0) {
42
+ return response.data.embeddings[0];
43
+ }
44
+ console.error('Resposta da API Ollama inesperada:', response.data);
45
+ return [];
46
+ }
47
+ catch (error) {
48
+ console.error('Erro em embedText (Ollama):', error.response?.data ? JSON.stringify(error.response.data) : error.message);
49
+ return [];
50
+ }
51
+ }
52
+ }
@@ -0,0 +1,50 @@
1
+ import OpenAI from 'openai';
2
+ export class OpenAIEmbedder {
3
+ constructor(apiKey, model = 'text-embedding-3-small') {
4
+ this.client = new OpenAI({ apiKey });
5
+ this.model = model;
6
+ }
7
+ async embed(file) {
8
+ const embeddings = [];
9
+ for (const chunk of file.chunks) {
10
+ try {
11
+ const emb = await this.embedText(chunk);
12
+ if (emb && emb.length > 0)
13
+ embeddings.push(emb);
14
+ else
15
+ console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
16
+ }
17
+ catch (error) {
18
+ console.error('Error embedding chunk:', error);
19
+ }
20
+ }
21
+ return embeddings;
22
+ }
23
+ async embedQuery(query) {
24
+ try {
25
+ return await this.embedText(query);
26
+ }
27
+ catch (error) {
28
+ console.error('Error embedding query:', error);
29
+ return [];
30
+ }
31
+ }
32
+ async embedText(text) {
33
+ try {
34
+ const response = await this.client.embeddings.create({
35
+ model: this.model,
36
+ input: text,
37
+ });
38
+ // A resposta retorna embeddings no campo data[0].embedding
39
+ if (response.data && response.data.length > 0) {
40
+ return response.data[0].embedding;
41
+ }
42
+ console.error('Resposta da API de Embedding inesperada:', response);
43
+ return [];
44
+ }
45
+ catch (error) {
46
+ console.error('Error in embedText:', error.response?.data ?? error.message);
47
+ return [];
48
+ }
49
+ }
50
+ }
@@ -0,0 +1,84 @@
1
+ import axios from 'axios';
2
+ /**
3
+ * Interface InbedFile (exemplo)
4
+ * export interface InbedFile {
5
+ * name: string;
6
+ * chunks: string[];
7
+ * }
8
+ */
9
+ /**
10
+ * Interface Embedder (exemplo)
11
+ * export interface Embedder {
12
+ * embed(file: InbedFile): Promise<number[][]>;
13
+ * embedQuery(query: string): Promise<number[]>;
14
+ * }
15
+ */
16
+ export class OpenRouterEmbedder {
17
+ constructor(apiKey, model) {
18
+ this.apiKey = apiKey;
19
+ this.model = model;
20
+ }
21
+ async embed(file) {
22
+ try {
23
+ const embeddings = [];
24
+ // Itera sobre todos os 'chunks' (pedaços) do arquivo
25
+ for (const chunk of file.chunks) {
26
+ // Gera o embedding para cada pedaço
27
+ const emb = await this.embedText(chunk);
28
+ // Adiciona o embedding resultante ao array de embeddings
29
+ if (emb && emb.length > 0) {
30
+ embeddings.push(emb);
31
+ }
32
+ else {
33
+ // Trata o caso em que embedText retorna um array vazio (erro)
34
+ console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
35
+ }
36
+ }
37
+ return embeddings;
38
+ }
39
+ catch (error) {
40
+ console.error('Error embedding file:', error);
41
+ return [];
42
+ }
43
+ }
44
+ async embedQuery(query) {
45
+ try {
46
+ // Usa o mesmo método para embutir a query
47
+ return await this.embedText(query);
48
+ }
49
+ catch (error) {
50
+ console.error('Error embedding query:', error);
51
+ return [];
52
+ }
53
+ }
54
+ async embedText(text) {
55
+ try {
56
+ const response = await axios.post('https://openrouter.ai/api/v1/embeddings', {
57
+ input: text,
58
+ model: this.model
59
+ }, {
60
+ headers: {
61
+ // Utiliza a chave da instância
62
+ 'Authorization': `Bearer ${this.apiKey}`,
63
+ 'Content-Type': 'application/json',
64
+ },
65
+ timeout: 30000
66
+ });
67
+ // A resposta da OpenRouter/OpenAI retorna um array em 'data',
68
+ // e o primeiro objeto contém o 'embedding'
69
+ // Exemplo de estrutura de resposta: { "data": [{ "embedding": [0.1, 0.2, ...], ... }], ... }
70
+ if (response.data && response.data.data && response.data.data.length > 0) {
71
+ return response.data.data[0].embedding;
72
+ }
73
+ // Se a resposta for inesperada, loga e retorna array vazio
74
+ console.error('Resposta da API de Embedding inesperada:', response.data);
75
+ return [];
76
+ }
77
+ catch (error) {
78
+ // Loga detalhes do erro (como o corpo da resposta da API, se disponível)
79
+ console.error('Error in embedText:', error.response?.data ? JSON.stringify(error.response.data) : error.message);
80
+ // Retorna um array vazio para que a chamada externa possa continuar
81
+ return [];
82
+ }
83
+ }
84
+ }
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export { Inbed } from './Inbed.js';
2
+ export * from './types.js';
3
+ export * from './embeddings/Embedder.js';
4
+ export * from './embeddings/Ollama.js';
5
+ export * from './embeddings/OpenRouter.js';
package/dist/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
package/dist/utils.js ADDED
@@ -0,0 +1,12 @@
1
+ import fs from 'fs';
2
+ export function readFileSafe(filePath) {
3
+ try {
4
+ return fs.readFileSync(filePath, 'utf-8');
5
+ }
6
+ catch {
7
+ return null;
8
+ }
9
+ }
10
+ export function isIgnored(filePath, ignorePatterns) {
11
+ return ignorePatterns.some(pattern => filePath.includes(pattern));
12
+ }
package/johankit.yml ADDED
@@ -0,0 +1,6 @@
1
+ ignore:
2
+ - dist
3
+ - .inbed
4
+ - node_modules
5
+ - package-lock.json
6
+ - yarn.lock
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "inbed",
3
+ "version": "1.0.1",
4
+ "description": "Semantic indexing and search over source code using vector embeddings",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "start": "node dist/index.js",
10
+ "demo": "node demo.js"
11
+ },
12
+ "keywords": ["embedding", "semantic", "codebase", "AST", "typescript", "javascript"],
13
+ "author": "Johan Labs",
14
+ "license": "MIT",
15
+ "type": "module",
16
+ "dependencies": {
17
+ "@openrouter/sdk": "^0.2.11",
18
+ "@xenova/transformers": "^2.17.2",
19
+ "axios": "^1.13.2",
20
+ "chokidar": "^5.0.0",
21
+ "dotenv": "^17.2.3",
22
+ "glob": "^13.0.0",
23
+ "openai": "^6.10.0",
24
+ "typescript": "^5.2.2"
25
+ },
26
+ "devDependencies": {
27
+ "nodemon": "^3.1.11",
28
+ "ts-node": "^10.9.1"
29
+ }
30
+ }
package/src/Inbed.ts ADDED
@@ -0,0 +1,268 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import crypto from 'crypto';
4
+ import chokidar from 'chokidar';
5
+
6
+ import { glob } from 'glob';
7
+ import ts from 'typescript';
8
+ import { InbedFile, InbedOptions, SearchResult, InbedImport } from './types.js';
9
+
10
+ import { Embedder } from './embeddings/Embedder.js';
11
+
12
+ export class Inbed {
13
+ private files: Map<string, InbedFile> = new Map();
14
+ private options: InbedOptions;
15
+ private embedder: Embedder;
16
+ private watcher: any;
17
+ private storageDir: string;
18
+ private chunkLimit: number;
19
+
20
+ private embeddingQueue: Promise<void>[] = [];
21
+ private maxConcurrentEmbeds = 4;
22
+
23
+ private debounceMap: Map<string, NodeJS.Timeout> = new Map();
24
+
25
+ constructor(embedder: Embedder, options: InbedOptions) {
26
+ this.options = {
27
+ recursive: true,
28
+ fileExtensions: ['.ts', '.js'],
29
+ ignorePatterns: ['johankit.yaml', 'dist/**', 'node_modules/**', '.git/**', '.inbed/**', ...(options.ignorePatterns || [])],
30
+ maxDepth: 10,
31
+ ...options
32
+ };
33
+ this.embedder = embedder;
34
+ this.chunkLimit = (options as any).chunkLimit || 10;
35
+ this.storageDir = path.join(this.options.rootDir, '.inbed');
36
+ if (!fs.existsSync(this.storageDir)) fs.mkdirSync(this.storageDir, { recursive: true });
37
+ }
38
+
39
+ async load(): Promise<void> {
40
+ const patterns = this.options.fileExtensions!.map(ext => `**/*${ext}`);
41
+ const files = patterns.flatMap(pattern => glob.sync(pattern, { cwd: this.options.rootDir, ignore: this.options.ignorePatterns, nodir: true }));
42
+
43
+ for (const filePath of files) {
44
+ const fullPath = path.join(this.options.rootDir, filePath);
45
+ try {
46
+ const content = fs.readFileSync(fullPath, 'utf-8');
47
+ await this.enqueueAdd(path.normalize(filePath), content);
48
+ } catch {}
49
+ }
50
+
51
+ await this.flushQueue();
52
+
53
+ if (this.options.recursive) {
54
+ for (const file of Array.from(this.files.values())) {
55
+ this.resolveImports(file, 0);
56
+ }
57
+ }
58
+
59
+ this.watchFiles();
60
+ }
61
+
62
+ private hashContent(content: string): string {
63
+ const version = 'chunker:v2';
64
+ return crypto.createHash('sha256').update(version + content + this.chunkLimit + this.embedder.model, 'utf-8').digest('hex');
65
+ }
66
+
67
+ private normalize(vec: number[]): number[] {
68
+ let norm = 0;
69
+ for (const v of vec) norm += v * v;
70
+ norm = Math.sqrt(norm);
71
+ if (norm === 0) return vec;
72
+ return vec.map(v => v / norm);
73
+ }
74
+
75
+ private async enqueueAdd(filePath: string, content: string) {
76
+ const task = async () => {
77
+ await this.addFile(filePath, content);
78
+ };
79
+
80
+ while (this.embeddingQueue.length >= this.maxConcurrentEmbeds) {
81
+ await Promise.race(this.embeddingQueue);
82
+ }
83
+
84
+ const p = task().finally(() => {
85
+ this.embeddingQueue = this.embeddingQueue.filter(x => x !== p);
86
+ });
87
+
88
+ this.embeddingQueue.push(p);
89
+ }
90
+
91
+ private async flushQueue() {
92
+ await Promise.allSettled(this.embeddingQueue);
93
+ }
94
+
95
+ private async addFile(filePath: string, content: string) {
96
+ const storagePath = path.join(this.storageDir, `${filePath.replace(/\\/g, '_')}_${this.embedder.model}.json`);
97
+ let embedding: number[][] | undefined;
98
+ const currentHash = this.hashContent(content);
99
+
100
+ if (fs.existsSync(storagePath)) {
101
+ try {
102
+ const saved = JSON.parse(fs.readFileSync(storagePath, 'utf-8'));
103
+ if (saved.hash === currentHash) embedding = saved.embedding;
104
+ } catch {}
105
+ }
106
+
107
+ let chunks = this.isSmall(content) ? [content] : this.chunkByAST(content);
108
+ if (chunks.length > this.chunkLimit) chunks = chunks.slice(0, this.chunkLimit);
109
+
110
+ if (!embedding) {
111
+ const raw = await this.embedder.embed({ path: filePath, content, chunks, imports: [] });
112
+ embedding = raw.map(v => this.normalize(v));
113
+ try {
114
+ fs.writeFileSync(storagePath, JSON.stringify({ hash: currentHash, embedding }, null, 2));
115
+ } catch {}
116
+ }
117
+
118
+ this.files.set(filePath, { path: filePath, content, chunks, imports: [], embedding });
119
+ }
120
+
121
+ private isSmall(content: string): boolean {
122
+ return content.split('\n').length < 50;
123
+ }
124
+
125
+ private chunkByAST(content: string): string[] {
126
+ try {
127
+ const sourceFile = ts.createSourceFile('temp.ts', content, ts.ScriptTarget.Latest);
128
+ const chunks: string[] = [];
129
+ let buffer = '';
130
+ ts.forEachChild(sourceFile, node => {
131
+ const text = node.getFullText(sourceFile).trim();
132
+ if (!text) return;
133
+ if ((buffer + text).length > 800) {
134
+ chunks.push(buffer);
135
+ buffer = text;
136
+ } else {
137
+ buffer += '\n' + text;
138
+ }
139
+ });
140
+ if (buffer.trim()) chunks.push(buffer.trim());
141
+ return chunks.length ? chunks : [content];
142
+ } catch {
143
+ return [content];
144
+ }
145
+ }
146
+
147
+ private resolveImports(file: InbedFile, depth: number) {
148
+ if (depth > (this.options.maxDepth || 10)) return;
149
+
150
+ const imports: InbedImport[] = [];
151
+
152
+ const importRegex = /import\s+(?:[^'"\n]+)\s+from\s+['"](.+)['"]|require\(\s*['"](.+)['"]\s*\)/g;
153
+
154
+ let match;
155
+ while ((match = importRegex.exec(file.content)) !== null) {
156
+ const sourceImport = match[1] || match[2];
157
+ if (!sourceImport) continue;
158
+
159
+ const base = path.join(path.dirname(file.path), sourceImport);
160
+
161
+ for (const ext of ['', '.ts', '.tsx', '.js', '/index.ts', '/index.js']) {
162
+ const candidate = path.normalize(base + ext);
163
+ if (this.files.has(candidate)) {
164
+ imports.push({ source: sourceImport, resolved: candidate });
165
+ this.resolveImports(this.files.get(candidate)!, depth + 1);
166
+ break;
167
+ }
168
+ }
169
+ }
170
+
171
+ file.imports = imports.filter((v, i, arr) => arr.findIndex(x => x.resolved === v.resolved) === i);
172
+ }
173
+
174
+ async upsertFile(filePath: string, content: string) {
175
+ await this.enqueueAdd(path.normalize(filePath), content);
176
+ }
177
+
178
+ getFile(filePath: string): InbedFile | undefined {
179
+ return this.files.get(path.normalize(filePath));
180
+ }
181
+
182
+ listFiles(): InbedFile[] {
183
+ return Array.from(this.files.values());
184
+ }
185
+
186
+ async semanticSearch(query: string, topK: number = 5): Promise<SearchResult[]> {
187
+ const queryEmbedding = this.normalize(await this.embedder.embedQuery(query));
188
+ const results: SearchResult[] = [];
189
+
190
+ for (const file of this.files.values()) {
191
+ if (!file.embedding) continue;
192
+ let bestScore = 0;
193
+ let bestChunk = '';
194
+ for (let i = 0; i < file.embedding.length; i++) {
195
+ const score = this.cosineSimilarity(queryEmbedding, file.embedding[i]);
196
+ if (score > bestScore) {
197
+ bestScore = score;
198
+ bestChunk = file.chunks[i] || '';
199
+ }
200
+ }
201
+ if (bestScore > 0) {
202
+ const importsText = file.imports.map(imp => `${imp.source} -> ${imp.resolved}`).join('\n');
203
+ const context = `FILE: ${file.path}\nIMPORTS:\n${importsText || '(none)'}\n---\n${bestChunk}`;
204
+ results.push({ path: file.path, snippet: context.slice(0, 500), score: bestScore });
205
+ }
206
+ }
207
+
208
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
209
+ }
210
+
211
+ private cosineSimilarity(a: number[], b: number[]): number {
212
+ let dot = 0;
213
+ const len = Math.min(a.length, b.length);
214
+ for (let i = 0; i < len; i++) dot += a[i] * b[i];
215
+ return dot;
216
+ }
217
+
218
+ private watchFiles() {
219
+ this.watcher = chokidar.watch(this.options.rootDir, {
220
+ ignored: this.options.ignorePatterns,
221
+ persistent: true,
222
+ ignoreInitial: true
223
+ });
224
+
225
+ const schedule = (fullPath: string, handler: () => void) => {
226
+ if (this.debounceMap.has(fullPath)) clearTimeout(this.debounceMap.get(fullPath)!);
227
+ this.debounceMap.set(fullPath, setTimeout(handler, 200));
228
+ };
229
+
230
+ this.watcher
231
+ .on('add', fullPath => schedule(fullPath, async () => {
232
+ const relPath = path.relative(this.options.rootDir, fullPath);
233
+ if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
234
+ try {
235
+ const content = fs.readFileSync(fullPath, 'utf-8');
236
+ await this.enqueueAdd(relPath, content);
237
+ if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
238
+ } catch {}
239
+ }
240
+ }))
241
+ .on('change', fullPath => schedule(fullPath, async () => {
242
+ const relPath = path.relative(this.options.rootDir, fullPath);
243
+ if (this.options.fileExtensions?.some(ext => fullPath.endsWith(ext))) {
244
+ try {
245
+ const content = fs.readFileSync(fullPath, 'utf-8');
246
+ await this.enqueueAdd(relPath, content);
247
+ if (this.options.recursive) this.resolveImports(this.files.get(relPath)!, 0);
248
+ } catch {}
249
+ }
250
+ }))
251
+ .on('unlink', fullPath => {
252
+ const relPath = path.relative(this.options.rootDir, fullPath);
253
+ this.files.delete(relPath);
254
+ });
255
+ }
256
+
257
+ stopWatching() {
258
+ if (this.watcher) {
259
+ this.watcher.close();
260
+ this.watcher = null;
261
+ }
262
+ }
263
+
264
+ clear(): void {
265
+ this.stopWatching();
266
+ this.files.clear();
267
+ }
268
+ }
@@ -0,0 +1,7 @@
1
+ import { InbedFile } from '../types.js';
2
+
3
+ export interface Embedder {
4
+ model: string;
5
+ embed(file: InbedFile): Promise<number[][]>;
6
+ embedQuery(query: string): Promise<number[]>;
7
+ }
@@ -0,0 +1,69 @@
1
+ import axios from 'axios';
2
+ import { InbedFile } from '../types.js';
3
+ import { Embedder } from './Embedder.js';
4
+
5
+ /**
6
+ * Interface InbedFile
7
+ * export interface InbedFile {
8
+ * name: string;
9
+ * chunks: string[];
10
+ * }
11
+ */
12
+
13
+ /**
14
+ * Interface Embedder
15
+ * export interface Embedder {
16
+ * embed(file: InbedFile): Promise<number[][]>;
17
+ * embedQuery(query: string): Promise<number[]>;
18
+ * }
19
+ */
20
+
21
+ export class OllamaEmbedder implements Embedder {
22
+ private baseUrl: string;
23
+ model: string;
24
+
25
+ constructor(baseUrl: string = 'http://localhost:11434', model: string = 'mxbai-embed-large') {
26
+ this.baseUrl = baseUrl;
27
+ this.model = model;
28
+ }
29
+
30
+ async embed(file: InbedFile): Promise<number[][]> {
31
+ const embeddings: number[][] = [];
32
+ for (const chunk of file.chunks) {
33
+ const emb = await this.embedText(chunk);
34
+ if (emb && emb.length > 0) embeddings.push(emb);
35
+ else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
36
+ }
37
+ return embeddings;
38
+ }
39
+
40
+ async embedQuery(query: string): Promise<number[]> {
41
+ return this.embedText(query);
42
+ }
43
+
44
+ private async embedText(text: string): Promise<number[]> {
45
+ try {
46
+ const response = await axios.post(
47
+ `${this.baseUrl}/api/embed`,
48
+ {
49
+ model: this.model,
50
+ input: text
51
+ },
52
+ { timeout: 30000 }
53
+ );
54
+
55
+ if (response.data?.embeddings && response.data.embeddings.length > 0) {
56
+ return response.data.embeddings[0];
57
+ }
58
+
59
+ console.error('Resposta da API Ollama inesperada:', response.data);
60
+ return [];
61
+ } catch (error: any) {
62
+ console.error(
63
+ 'Erro em embedText (Ollama):',
64
+ error.response?.data ? JSON.stringify(error.response.data) : error.message
65
+ );
66
+ return [];
67
+ }
68
+ }
69
+ }
@@ -0,0 +1,58 @@
1
+ import { InbedFile } from '../types.js';
2
+ import { Embedder } from './Embedder.js';
3
+ import OpenAI from 'openai';
4
+
5
+ export class OpenAIEmbedder implements Embedder {
6
+ private client: OpenAI;
7
+ model: string;
8
+
9
+ constructor(apiKey: string, model: string = 'text-embedding-3-small') {
10
+ this.client = new OpenAI({ apiKey });
11
+ this.model = model;
12
+ }
13
+
14
+ async embed(file: InbedFile): Promise<number[][]> {
15
+ const embeddings: number[][] = [];
16
+
17
+ for (const chunk of file.chunks) {
18
+ try {
19
+ const emb = await this.embedText(chunk);
20
+ if (emb && emb.length > 0) embeddings.push(emb);
21
+ else console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
22
+ } catch (error) {
23
+ console.error('Error embedding chunk:', error);
24
+ }
25
+ }
26
+
27
+ return embeddings;
28
+ }
29
+
30
+ async embedQuery(query: string): Promise<number[]> {
31
+ try {
32
+ return await this.embedText(query);
33
+ } catch (error) {
34
+ console.error('Error embedding query:', error);
35
+ return [];
36
+ }
37
+ }
38
+
39
+ private async embedText(text: string): Promise<number[]> {
40
+ try {
41
+ const response = await this.client.embeddings.create({
42
+ model: this.model,
43
+ input: text,
44
+ });
45
+
46
+ // A resposta retorna embeddings no campo data[0].embedding
47
+ if (response.data && response.data.length > 0) {
48
+ return response.data[0].embedding;
49
+ }
50
+
51
+ console.error('Resposta da API de Embedding inesperada:', response);
52
+ return [];
53
+ } catch (error: any) {
54
+ console.error('Error in embedText:', error.response?.data ?? error.message);
55
+ return [];
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,100 @@
1
+ import axios from 'axios';
2
+ import { InbedFile } from '../types.js'; // Assumindo que este import está correto
3
+ import { Embedder } from './Embedder.js'; // Assumindo que este import está correto
4
+
5
+ /**
6
+ * Interface InbedFile (exemplo)
7
+ * export interface InbedFile {
8
+ * name: string;
9
+ * chunks: string[];
10
+ * }
11
+ */
12
+
13
+ /**
14
+ * Interface Embedder (exemplo)
15
+ * export interface Embedder {
16
+ * embed(file: InbedFile): Promise<number[][]>;
17
+ * embedQuery(query: string): Promise<number[]>;
18
+ * }
19
+ */
20
+
21
+ export class OpenRouterEmbedder implements Embedder {
22
+ private apiKey: string;
23
+ model: string;
24
+
25
+ constructor(apiKey: string, model: string) {
26
+ this.apiKey = apiKey;
27
+ this.model = model;
28
+ }
29
+
30
+ async embed(file: InbedFile): Promise<number[][]> {
31
+ try {
32
+ const embeddings: number[][] = [];
33
+ // Itera sobre todos os 'chunks' (pedaços) do arquivo
34
+ for (const chunk of file.chunks) {
35
+ // Gera o embedding para cada pedaço
36
+ const emb = await this.embedText(chunk);
37
+ // Adiciona o embedding resultante ao array de embeddings
38
+ if (emb && emb.length > 0) {
39
+ embeddings.push(emb);
40
+ } else {
41
+ // Trata o caso em que embedText retorna um array vazio (erro)
42
+ console.warn(`Skipping chunk due to failed embedding: ${chunk.substring(0, 50)}...`);
43
+ }
44
+ }
45
+ return embeddings;
46
+ } catch (error) {
47
+ console.error('Error embedding file:', error);
48
+ return [];
49
+ }
50
+ }
51
+
52
+ async embedQuery(query: string): Promise<number[]> {
53
+ try {
54
+ // Usa o mesmo método para embutir a query
55
+ return await this.embedText(query);
56
+ } catch (error) {
57
+ console.error('Error embedding query:', error);
58
+ return [];
59
+ }
60
+ }
61
+
62
+ private async embedText(text: string): Promise<number[]> {
63
+ try {
64
+ const response = await axios.post(
65
+ 'https://openrouter.ai/api/v1/embeddings',
66
+ {
67
+ input: text,
68
+ model: this.model
69
+ },
70
+ {
71
+ headers: {
72
+ // Utiliza a chave da instância
73
+ 'Authorization': `Bearer ${this.apiKey}`,
74
+ 'Content-Type': 'application/json',
75
+ },
76
+ timeout: 30000
77
+ }
78
+ );
79
+
80
+ // A resposta da OpenRouter/OpenAI retorna um array em 'data',
81
+ // e o primeiro objeto contém o 'embedding'
82
+ // Exemplo de estrutura de resposta: { "data": [{ "embedding": [0.1, 0.2, ...], ... }], ... }
83
+ if (response.data && response.data.data && response.data.data.length > 0) {
84
+ return response.data.data[0].embedding;
85
+ }
86
+
87
+ // Se a resposta for inesperada, loga e retorna array vazio
88
+ console.error('Resposta da API de Embedding inesperada:', response.data);
89
+ return [];
90
+ } catch (error: any) {
91
+ // Loga detalhes do erro (como o corpo da resposta da API, se disponível)
92
+ console.error(
93
+ 'Error in embedText:',
94
+ error.response?.data ? JSON.stringify(error.response.data) : error.message
95
+ );
96
+ // Retorna um array vazio para que a chamada externa possa continuar
97
+ return [];
98
+ }
99
+ }
100
+ }
package/src/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ export { Inbed } from './Inbed.js';
2
+ export * from './types.js';
3
+ export * from './embeddings/Embedder.js';
4
+ export * from './embeddings/Ollama.js';
5
+ export * from './embeddings/OpenRouter.js';
6
+ export * from './embeddings/OpenAI.js';
package/src/types.ts ADDED
@@ -0,0 +1,28 @@
1
+ import { Embedder } from "./embeddings/Embedder.js";
2
+
3
+ export interface InbedImport {
4
+ source: string;
5
+ resolved: string;
6
+ }
7
+
8
+ export interface InbedFile {
9
+ path: string;
10
+ content: string;
11
+ chunks: string[];
12
+ imports: InbedImport[];
13
+ embedding?: number[][];
14
+ }
15
+
16
+ export interface SearchResult {
17
+ path: string;
18
+ snippet: string;
19
+ score: number;
20
+ }
21
+
22
+ export interface InbedOptions {
23
+ rootDir: string;
24
+ recursive?: boolean;
25
+ fileExtensions?: string[];
26
+ ignorePatterns?: string[];
27
+ maxDepth?: number;
28
+ }
package/src/utils.ts ADDED
@@ -0,0 +1,14 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ export function readFileSafe(filePath: string): string | null {
5
+ try {
6
+ return fs.readFileSync(filePath, 'utf-8');
7
+ } catch {
8
+ return null;
9
+ }
10
+ }
11
+
12
+ export function isIgnored(filePath: string, ignorePatterns: string[]): boolean {
13
+ return ignorePatterns.some(pattern => filePath.includes(pattern));
14
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,15 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "ESNext",
5
+ "moduleResolution": "Node",
6
+ "outDir": "dist",
7
+ "rootDir": "src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "forceConsistentCasingInFileNames": true,
11
+ "skipLibCheck": true,
12
+ "lib": ["ES2020", "DOM"]
13
+ },
14
+ "include": ["src", "demo.js"]
15
+ }