@ulpi/codemap 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +73 -0
- package/README.md +386 -0
- package/dist/chunk-5ISZDDN7.js +80 -0
- package/dist/chunk-7SDODQZD.js +200 -0
- package/dist/chunk-SNG7R3UC.js +38 -0
- package/dist/chunk-WUVKW5JG.js +72 -0
- package/dist/index.js +4440 -0
- package/dist/ollama-3XCUZMZT-J6Z4WWWO.js +7 -0
- package/dist/openai-RC75RP4O-NSFZC5O6.js +8 -0
- package/dist/ulpi-KLKEAQC3-5ATUONU7.js +10 -0
- package/package.json +35 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
resolveApiKey,
|
|
4
|
+
resolveUlpiUrl
|
|
5
|
+
} from "./chunk-SNG7R3UC.js";
|
|
6
|
+
|
|
7
|
+
// ../../packages/intelligence/embed-engine/dist/chunk-PDPVAD7W.js
|
|
8
|
+
var MAX_RETRIES = 3;
|
|
9
|
+
var RETRY_BASE_MS = 1e3;
|
|
10
|
+
var BATCH_POLL_INTERVAL_MS = 2e3;
|
|
11
|
+
var UlpiEmbedder = class {
|
|
12
|
+
provider = "ulpi";
|
|
13
|
+
model;
|
|
14
|
+
dimensions;
|
|
15
|
+
supportsBatch = true;
|
|
16
|
+
baseUrl;
|
|
17
|
+
apiKey;
|
|
18
|
+
constructor(model = "code", dimensions = 1024, baseUrl) {
|
|
19
|
+
this.model = model;
|
|
20
|
+
this.dimensions = dimensions;
|
|
21
|
+
this.baseUrl = baseUrl ?? resolveUlpiUrl();
|
|
22
|
+
this.apiKey = resolveApiKey("ulpi");
|
|
23
|
+
}
|
|
24
|
+
get headers() {
|
|
25
|
+
const h = { "Content-Type": "application/json" };
|
|
26
|
+
if (this.apiKey) h["Authorization"] = `Bearer ${this.apiKey}`;
|
|
27
|
+
return h;
|
|
28
|
+
}
|
|
29
|
+
async embed(texts) {
|
|
30
|
+
if (texts.length === 0) return [];
|
|
31
|
+
const body = {
|
|
32
|
+
model: this.model,
|
|
33
|
+
input: texts,
|
|
34
|
+
encoding_format: "float"
|
|
35
|
+
};
|
|
36
|
+
let lastError = null;
|
|
37
|
+
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
38
|
+
try {
|
|
39
|
+
const response = await fetch(`${this.baseUrl}/api/v1/embeddings`, {
|
|
40
|
+
method: "POST",
|
|
41
|
+
headers: this.headers,
|
|
42
|
+
body: JSON.stringify(body),
|
|
43
|
+
redirect: "error"
|
|
44
|
+
});
|
|
45
|
+
if (!response.ok) {
|
|
46
|
+
lastError = await this.handleErrorResponse(response);
|
|
47
|
+
if (response.status === 429 || response.status >= 500) {
|
|
48
|
+
await sleep(RETRY_BASE_MS * Math.pow(2, attempt));
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
throw lastError;
|
|
52
|
+
}
|
|
53
|
+
const data = await this.safeJson(response);
|
|
54
|
+
if (!Array.isArray(data.data)) {
|
|
55
|
+
throw new Error(
|
|
56
|
+
`ULPI embedding API error (${response.status}): ${JSON.stringify(data)}`
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
const sorted = data.data.sort((a, b) => a.index - b.index);
|
|
60
|
+
return sorted.map((item) => item.embedding);
|
|
61
|
+
} catch (err) {
|
|
62
|
+
lastError = this.classifyError(err, attempt);
|
|
63
|
+
if (lastError !== err && attempt < MAX_RETRIES - 1) {
|
|
64
|
+
await sleep(RETRY_BASE_MS * Math.pow(2, attempt));
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
throw lastError;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
throw lastError ?? new Error("ULPI embedding failed after retries");
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Submit all texts as an async batch (up to 10,000).
|
|
74
|
+
* Returns immediately with a batch ID for polling.
|
|
75
|
+
*/
|
|
76
|
+
async submitBatch(texts, webhookUrl) {
|
|
77
|
+
if (texts.length === 0) throw new Error("Cannot submit empty batch");
|
|
78
|
+
const body = {
|
|
79
|
+
model: this.model,
|
|
80
|
+
input: texts
|
|
81
|
+
};
|
|
82
|
+
if (webhookUrl) {
|
|
83
|
+
body.webhook_url = webhookUrl;
|
|
84
|
+
}
|
|
85
|
+
const payload = JSON.stringify(body);
|
|
86
|
+
const totalChars = texts.reduce((sum, t) => sum + t.length, 0);
|
|
87
|
+
const avgChars = Math.round(totalChars / texts.length);
|
|
88
|
+
const maxChars = Math.max(...texts.map((t) => t.length));
|
|
89
|
+
console.error(
|
|
90
|
+
`[ulpi-embedder] submitBatch: ${texts.length} texts, payload=${(payload.length / 1024).toFixed(0)}KB, chars: total=${totalChars} avg=${avgChars} max=${maxChars}, model=${this.model}, url=${this.baseUrl}/api/v1/embeddings/batch`
|
|
91
|
+
);
|
|
92
|
+
const controller = new AbortController();
|
|
93
|
+
const timeoutId = setTimeout(() => controller.abort(), 6e4);
|
|
94
|
+
try {
|
|
95
|
+
const response = await fetch(`${this.baseUrl}/api/v1/embeddings/batch`, {
|
|
96
|
+
method: "POST",
|
|
97
|
+
headers: this.headers,
|
|
98
|
+
body: payload,
|
|
99
|
+
redirect: "error",
|
|
100
|
+
signal: controller.signal
|
|
101
|
+
});
|
|
102
|
+
if (!response.ok) {
|
|
103
|
+
throw await this.handleErrorResponse(response);
|
|
104
|
+
}
|
|
105
|
+
const result = await this.safeJson(response);
|
|
106
|
+
if (!result.data?.id) {
|
|
107
|
+
throw new Error(
|
|
108
|
+
`ULPI batch API error (${response.status}): ${JSON.stringify(result)}`
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
console.error(`[ulpi-embedder] submitBatch: got batchId=${result.data.id}`);
|
|
112
|
+
return result.data.id;
|
|
113
|
+
} finally {
|
|
114
|
+
clearTimeout(timeoutId);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Poll the status of an async batch.
|
|
119
|
+
* Returns all available results on every poll (cumulative, sorted by index).
|
|
120
|
+
* This enables progressive storage — callers can store partial results
|
|
121
|
+
* as they arrive rather than waiting for full batch completion.
|
|
122
|
+
*/
|
|
123
|
+
async pollBatch(batchId) {
|
|
124
|
+
const controller = new AbortController();
|
|
125
|
+
const timeoutId = setTimeout(() => controller.abort(), 3e4);
|
|
126
|
+
try {
|
|
127
|
+
const response = await fetch(
|
|
128
|
+
`${this.baseUrl}/api/v1/embeddings/batch/${encodeURIComponent(batchId)}`,
|
|
129
|
+
{ headers: this.headers, redirect: "error", signal: controller.signal }
|
|
130
|
+
);
|
|
131
|
+
if (!response.ok) {
|
|
132
|
+
throw await this.handleErrorResponse(response);
|
|
133
|
+
}
|
|
134
|
+
const json = await this.safeJson(response);
|
|
135
|
+
const batch = json.data;
|
|
136
|
+
let results;
|
|
137
|
+
if (json.results?.data && json.results.data.length > 0) {
|
|
138
|
+
const sorted = json.results.data.sort((a, b) => a.index - b.index);
|
|
139
|
+
results = sorted.map((item) => item.embedding);
|
|
140
|
+
}
|
|
141
|
+
const allProcessed = batch.processed_inputs >= batch.total_inputs;
|
|
142
|
+
return {
|
|
143
|
+
id: batch.id,
|
|
144
|
+
status: allProcessed && batch.status !== "failed" ? "completed" : batch.status,
|
|
145
|
+
totalInputs: batch.total_inputs,
|
|
146
|
+
processedInputs: batch.processed_inputs,
|
|
147
|
+
results
|
|
148
|
+
};
|
|
149
|
+
} finally {
|
|
150
|
+
clearTimeout(timeoutId);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Parse JSON from response, guarding against HTML error pages.
|
|
155
|
+
*/
|
|
156
|
+
async safeJson(response) {
|
|
157
|
+
const ct = response.headers.get("content-type") ?? "";
|
|
158
|
+
if (ct.includes("text/html")) {
|
|
159
|
+
const snippet = (await response.text()).slice(0, 200);
|
|
160
|
+
throw new Error(
|
|
161
|
+
`ULPI embedding API returned HTML instead of JSON (${response.status}). The endpoint may be misconfigured. Response: ${snippet}`
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
return await response.json();
|
|
165
|
+
}
|
|
166
|
+
async handleErrorResponse(response) {
|
|
167
|
+
const errorText = await response.text();
|
|
168
|
+
if (response.status === 401 || response.status === 403) {
|
|
169
|
+
return new Error(
|
|
170
|
+
`Authentication failed (${response.status}). Check your ULPI API key with: ulpi config get ulpi-key`
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
if (response.status === 402) {
|
|
174
|
+
return new Error(
|
|
175
|
+
"Insufficient credits. Visit https://ulpi.io/billing to add credits to your account."
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
return new Error(`ULPI embedding API error (${response.status}): ${errorText.slice(0, 200)}`);
|
|
179
|
+
}
|
|
180
|
+
classifyError(err, _attempt) {
|
|
181
|
+
if (!(err instanceof Error)) return new Error(String(err));
|
|
182
|
+
if (err.message.startsWith("Authentication failed") || err.message.startsWith("Insufficient credits")) {
|
|
183
|
+
throw err;
|
|
184
|
+
}
|
|
185
|
+
if (err.message.includes("fetch") || err.message.includes("ECONNREFUSED")) {
|
|
186
|
+
throw new Error(
|
|
187
|
+
`Cannot connect to ULPI embedding service at ${this.baseUrl}. Check your network connection or configure the URL with: ulpi config set ulpi-url <url>`
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
return err;
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
function sleep(ms) {
|
|
194
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
export {
|
|
198
|
+
BATCH_POLL_INTERVAL_MS,
|
|
199
|
+
UlpiEmbedder
|
|
200
|
+
};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// ../../packages/intelligence/embed-engine/dist/chunk-GWXOGQ7U.js
|
|
4
|
+
import { readFileSync } from "fs";
|
|
5
|
+
import { homedir } from "os";
|
|
6
|
+
import { join } from "path";
|
|
7
|
+
var _cached;
|
|
8
|
+
function loadSettings() {
|
|
9
|
+
if (_cached) return _cached;
|
|
10
|
+
try {
|
|
11
|
+
const raw = readFileSync(join(homedir(), ".ulpi", "settings.json"), "utf8");
|
|
12
|
+
_cached = JSON.parse(raw);
|
|
13
|
+
return _cached;
|
|
14
|
+
} catch {
|
|
15
|
+
_cached = {};
|
|
16
|
+
return _cached;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
function resolveApiKey(provider) {
|
|
20
|
+
const settings = loadSettings();
|
|
21
|
+
const fromSettings = settings.apiKeys?.[provider];
|
|
22
|
+
if (fromSettings) return fromSettings;
|
|
23
|
+
const envMap = {
|
|
24
|
+
openai: "OPENAI_API_KEY",
|
|
25
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
26
|
+
ulpi: "ULPI_API_KEY"
|
|
27
|
+
};
|
|
28
|
+
return process.env[envMap[provider]];
|
|
29
|
+
}
|
|
30
|
+
function resolveUlpiUrl() {
|
|
31
|
+
const settings = loadSettings();
|
|
32
|
+
return settings.ulpiUrl ?? process.env.ULPI_EMBED_URL ?? "http://localhost:10271";
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export {
|
|
36
|
+
resolveApiKey,
|
|
37
|
+
resolveUlpiUrl
|
|
38
|
+
};
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// ../../packages/intelligence/embed-engine/dist/chunk-RU4VHMIP.js
|
|
4
|
+
var DEFAULT_OLLAMA_URL = "http://localhost:11434";
|
|
5
|
+
var MAX_RETRIES = 3;
|
|
6
|
+
var RETRY_BASE_MS = 500;
|
|
7
|
+
var OllamaEmbedder = class {
|
|
8
|
+
provider = "ollama";
|
|
9
|
+
model;
|
|
10
|
+
dimensions;
|
|
11
|
+
baseUrl;
|
|
12
|
+
constructor(model = "nomic-embed-text", dimensions = 768) {
|
|
13
|
+
this.model = model;
|
|
14
|
+
this.dimensions = dimensions;
|
|
15
|
+
this.baseUrl = process.env.OLLAMA_HOST ?? DEFAULT_OLLAMA_URL;
|
|
16
|
+
}
|
|
17
|
+
async embed(texts) {
|
|
18
|
+
if (texts.length === 0) return [];
|
|
19
|
+
const results = [];
|
|
20
|
+
for (const text of texts) {
|
|
21
|
+
const embedding = await this.embedSingle(text);
|
|
22
|
+
results.push(embedding);
|
|
23
|
+
}
|
|
24
|
+
return results;
|
|
25
|
+
}
|
|
26
|
+
async embedSingle(text) {
|
|
27
|
+
let lastError = null;
|
|
28
|
+
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
29
|
+
try {
|
|
30
|
+
const response = await fetch(`${this.baseUrl}/api/embed`, {
|
|
31
|
+
method: "POST",
|
|
32
|
+
headers: { "Content-Type": "application/json" },
|
|
33
|
+
body: JSON.stringify({ model: this.model, input: text })
|
|
34
|
+
});
|
|
35
|
+
if (!response.ok) {
|
|
36
|
+
const errorText = await response.text();
|
|
37
|
+
lastError = new Error(`Ollama API error (${response.status}): ${errorText.slice(0, 200)}`);
|
|
38
|
+
if (response.status >= 500) {
|
|
39
|
+
const delay = RETRY_BASE_MS * Math.pow(2, attempt);
|
|
40
|
+
await sleep(delay);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
throw lastError;
|
|
44
|
+
}
|
|
45
|
+
const data = await response.json();
|
|
46
|
+
if (!data.embeddings || data.embeddings.length === 0) {
|
|
47
|
+
throw new Error("Ollama returned empty embeddings");
|
|
48
|
+
}
|
|
49
|
+
return data.embeddings[0];
|
|
50
|
+
} catch (err) {
|
|
51
|
+
if (err instanceof Error && err.message.includes("fetch")) {
|
|
52
|
+
throw new Error(
|
|
53
|
+
`Cannot connect to Ollama at ${this.baseUrl}. Is Ollama running? Start it with: ollama serve`
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
57
|
+
if (attempt < MAX_RETRIES - 1) {
|
|
58
|
+
const delay = RETRY_BASE_MS * Math.pow(2, attempt);
|
|
59
|
+
await sleep(delay);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
throw lastError ?? new Error("Ollama embedding failed after retries");
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
function sleep(ms) {
|
|
67
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export {
|
|
71
|
+
OllamaEmbedder
|
|
72
|
+
};
|