ctxloom-pro 1.5.6 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,214 +0,0 @@
1
- import {
2
- logger
3
- } from "./chunk-TYDMSHV7.js";
4
-
5
- // packages/core/src/indexer/embedder.ts
6
- import fs from "fs";
7
- import path from "path";
8
- var EMBEDDING_DIMENSION = 384;
9
- var MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2";
10
- var CHUNK_SIZE = 4096;
11
- var MIN_MODEL_BYTES = 80 * 1024 * 1024;
12
- var embedder = null;
13
- var embedderInitInFlight = null;
14
- async function loadEmbedder() {
15
- const { pipeline } = await import("@huggingface/transformers");
16
- return await pipeline("feature-extraction", MODEL_ID, {
17
- dtype: "fp32"
18
- });
19
- }
20
- function extractModelPathFromProtobufError(message) {
21
- const match = /Load model from (.+) failed:Protobuf parsing failed/i.exec(message);
22
- return match ? match[1] : null;
23
- }
24
- function tryRemoveTruncatedModel(modelPath) {
25
- try {
26
- const stat = fs.statSync(modelPath);
27
- if (stat.size >= MIN_MODEL_BYTES) return false;
28
- fs.unlinkSync(modelPath);
29
- logger.warn("Removed truncated embedding model; next attempt will re-download", {
30
- path: modelPath,
31
- sizeBytes: stat.size,
32
- minBytes: MIN_MODEL_BYTES
33
- });
34
- return true;
35
- } catch (err) {
36
- logger.warn("Could not inspect/remove suspected truncated model", {
37
- path: modelPath,
38
- detail: err instanceof Error ? err.message : String(err)
39
- });
40
- return false;
41
- }
42
- }
43
- async function getEmbedder() {
44
- if (embedder) return embedder;
45
- if (embedderInitInFlight) return embedderInitInFlight;
46
- embedderInitInFlight = (async () => {
47
- const MAX_ATTEMPTS = 3;
48
- let lastErr;
49
- for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
50
- try {
51
- const pipe = await loadEmbedder();
52
- embedder = pipe;
53
- return pipe;
54
- } catch (err) {
55
- lastErr = err;
56
- const msg = err instanceof Error ? err.message : String(err);
57
- const isProtobufRace = /protobuf parsing failed/i.test(msg);
58
- if (!isProtobufRace || attempt === MAX_ATTEMPTS) break;
59
- const modelPath = extractModelPathFromProtobufError(msg);
60
- if (modelPath && tryRemoveTruncatedModel(modelPath)) {
61
- logger.warn("Retrying embedding model load after truncated-cache removal", {
62
- attempt
63
- });
64
- continue;
65
- }
66
- const delay = attempt * 1e3;
67
- logger.warn("Embedding model load failed; retrying after FS settle", {
68
- attempt,
69
- delayMs: delay
70
- });
71
- await new Promise((resolve) => setTimeout(resolve, delay));
72
- }
73
- }
74
- embedderInitInFlight = null;
75
- throw lastErr;
76
- })();
77
- try {
78
- return await embedderInitInFlight;
79
- } finally {
80
- if (embedder) embedderInitInFlight = null;
81
- }
82
- }
83
- async function generateEmbedding(text) {
84
- const pipe = await getEmbedder();
85
- const output = await pipe(text.slice(0, CHUNK_SIZE), {
86
- pooling: "mean",
87
- normalize: true
88
- });
89
- const data = output.tolist();
90
- if (Array.isArray(data[0])) {
91
- return data[0];
92
- }
93
- return data;
94
- }
95
- function collectFiles(dir, results = []) {
96
- const IGNORED_DIRS = /* @__PURE__ */ new Set([
97
- // Build artifacts + dependency caches
98
- "node_modules",
99
- "dist",
100
- "build",
101
- "out",
102
- "target",
103
- "coverage",
104
- ".cache",
105
- ".turbo",
106
- ".next",
107
- ".nuxt",
108
- // Version control + ctxloom state
109
- ".git",
110
- ".ctxloom",
111
- // Other tools' working state (often contains duplicated source)
112
- ".claude",
113
- ".code-review-graph",
114
- ".vscode-test"
115
- ]);
116
- const SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([
117
- ".ts",
118
- ".tsx",
119
- ".js",
120
- ".jsx",
121
- ".mjs",
122
- ".vue",
123
- ".py",
124
- ".rs",
125
- ".go",
126
- ".java",
127
- ".cs",
128
- ".rb",
129
- ".kt",
130
- ".kts",
131
- ".swift",
132
- ".php",
133
- ".dart",
134
- ".c",
135
- ".cpp",
136
- ".h",
137
- ".md",
138
- ".json",
139
- ".yaml",
140
- ".yml",
141
- ".toml",
142
- ".ipynb"
143
- ]);
144
- const entries = fs.readdirSync(dir, { withFileTypes: true });
145
- for (const entry of entries) {
146
- const fullPath = path.join(dir, entry.name);
147
- if (entry.isDirectory()) {
148
- if (!IGNORED_DIRS.has(entry.name)) {
149
- collectFiles(fullPath, results);
150
- }
151
- } else if (entry.isFile()) {
152
- const ext = path.extname(entry.name);
153
- if (SUPPORTED_EXTENSIONS.has(ext)) {
154
- results.push(fullPath);
155
- }
156
- }
157
- }
158
- return results;
159
- }
160
- async function indexDirectory(rootDir, onProgress) {
161
- const { VectorStore } = await import("./VectorStore-2LVECRTY.js");
162
- const store = new VectorStore(path.join(rootDir, ".ctxloom", "vectors.lancedb"));
163
- await store.init();
164
- const files = collectFiles(rootDir);
165
- const total = files.length;
166
- let indexed = 0;
167
- let errors = 0;
168
- let processed = 0;
169
- const CONCURRENCY = 4;
170
- try {
171
- for (let i = 0; i < files.length; i += CONCURRENCY) {
172
- const batch = files.slice(i, i + CONCURRENCY);
173
- const results = await Promise.allSettled(
174
- batch.map(async (filePath) => {
175
- const MAX_INDEX_SIZE = 5 * 1024 * 1024;
176
- const stat = fs.statSync(filePath);
177
- if (stat.size > MAX_INDEX_SIZE) {
178
- logger.warn("Skipping oversized file", { file: filePath, size: stat.size });
179
- return null;
180
- }
181
- const content = fs.readFileSync(filePath, "utf-8");
182
- if (!content.trim()) return null;
183
- const relPath = path.relative(rootDir, filePath);
184
- const embedding = await generateEmbedding(content);
185
- await store.upsert(relPath, embedding, content);
186
- return relPath;
187
- })
188
- );
189
- for (const result of results) {
190
- processed++;
191
- if (result.status === "fulfilled") {
192
- if (result.value !== null) {
193
- indexed++;
194
- onProgress?.(result.value, processed, total);
195
- }
196
- } else {
197
- errors++;
198
- logger.error("Failed to index file", { detail: result.reason instanceof Error ? result.reason.message : String(result.reason) });
199
- }
200
- }
201
- }
202
- } finally {
203
- await store.close();
204
- }
205
- return { indexed, errors };
206
- }
207
-
208
- export {
209
- EMBEDDING_DIMENSION,
210
- generateEmbedding,
211
- collectFiles,
212
- indexDirectory
213
- };
214
- //# sourceMappingURL=chunk-WDX4PJGL.js.map
@@ -1,14 +0,0 @@
1
- import {
2
- EMBEDDING_DIMENSION,
3
- collectFiles,
4
- generateEmbedding,
5
- indexDirectory
6
- } from "./chunk-WDX4PJGL.js";
7
- import "./chunk-TYDMSHV7.js";
8
- export {
9
- EMBEDDING_DIMENSION,
10
- collectFiles,
11
- generateEmbedding,
12
- indexDirectory
13
- };
14
- //# sourceMappingURL=embedder-3AE4CSR7.js.map