@arabold/docs-mcp-server 2.0.4 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -2
- package/db/migrations/012-add-source-content-type.sql +11 -0
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +673 -630
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +7 -18952
- package/dist/index.js.map +1 -1
- package/dist/logger-CLtABTNb.js +99 -0
- package/dist/logger-CLtABTNb.js.map +1 -0
- package/dist/main-ntnRQ8Za.js +18182 -0
- package/dist/main-ntnRQ8Za.js.map +1 -0
- package/dist/utils-CZz1DsHw.js +1063 -0
- package/dist/utils-CZz1DsHw.js.map +1 -0
- package/package.json +48 -45
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +673 -630
- package/public/assets/main.js.map +1 -1
|
@@ -0,0 +1,1063 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import fs, { existsSync } from "node:fs";
|
|
3
|
+
import { chromium } from "playwright";
|
|
4
|
+
import EventEmitter from "node:events";
|
|
5
|
+
import { l as logger, n as normalizeEnvValue } from "./logger-CLtABTNb.js";
|
|
6
|
+
import { randomUUID } from "node:crypto";
|
|
7
|
+
import path from "node:path";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
9
|
+
import envPaths from "env-paths";
|
|
10
|
+
import { PostHog } from "posthog-node";
|
|
11
|
+
let projectRoot = null;
|
|
12
|
+
function getProjectRoot() {
|
|
13
|
+
if (projectRoot) {
|
|
14
|
+
return projectRoot;
|
|
15
|
+
}
|
|
16
|
+
const currentFilePath = fileURLToPath(import.meta.url);
|
|
17
|
+
let currentDir = path.dirname(currentFilePath);
|
|
18
|
+
while (true) {
|
|
19
|
+
const packageJsonPath = path.join(currentDir, "package.json");
|
|
20
|
+
if (fs.existsSync(packageJsonPath)) {
|
|
21
|
+
projectRoot = currentDir;
|
|
22
|
+
return currentDir;
|
|
23
|
+
}
|
|
24
|
+
const parentDir = path.dirname(currentDir);
|
|
25
|
+
if (parentDir === currentDir) {
|
|
26
|
+
throw new Error("Could not find project root containing package.json.");
|
|
27
|
+
}
|
|
28
|
+
currentDir = parentDir;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function resolveStorePath(storePath) {
|
|
32
|
+
let dbDir;
|
|
33
|
+
if (storePath) {
|
|
34
|
+
dbDir = path.resolve(storePath);
|
|
35
|
+
} else {
|
|
36
|
+
const projectRoot2 = getProjectRoot();
|
|
37
|
+
const oldDbDir = path.join(projectRoot2, ".store");
|
|
38
|
+
const oldDbPath = path.join(oldDbDir, "documents.db");
|
|
39
|
+
const oldDbExists = fs.existsSync(oldDbPath);
|
|
40
|
+
if (oldDbExists) {
|
|
41
|
+
dbDir = oldDbDir;
|
|
42
|
+
} else {
|
|
43
|
+
const standardPaths = envPaths("docs-mcp-server", { suffix: "" });
|
|
44
|
+
dbDir = standardPaths.data;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
fs.mkdirSync(dbDir, { recursive: true });
|
|
49
|
+
} catch (error) {
|
|
50
|
+
logger.warn(`⚠️ Failed to create database directory ${dbDir}: ${error}`);
|
|
51
|
+
}
|
|
52
|
+
return dbDir;
|
|
53
|
+
}
|
|
54
|
+
class TelemetryConfig {
|
|
55
|
+
static instance;
|
|
56
|
+
enabled = true;
|
|
57
|
+
// Default to enabled
|
|
58
|
+
constructor() {
|
|
59
|
+
}
|
|
60
|
+
isEnabled() {
|
|
61
|
+
return this.enabled;
|
|
62
|
+
}
|
|
63
|
+
setEnabled(enabled) {
|
|
64
|
+
this.enabled = enabled;
|
|
65
|
+
}
|
|
66
|
+
static getInstance() {
|
|
67
|
+
if (!TelemetryConfig.instance) {
|
|
68
|
+
TelemetryConfig.instance = new TelemetryConfig();
|
|
69
|
+
}
|
|
70
|
+
return TelemetryConfig.instance;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
function generateInstallationId(storePath) {
|
|
74
|
+
try {
|
|
75
|
+
const dataDir = resolveStorePath(storePath);
|
|
76
|
+
const installationIdPath = path.join(dataDir, "installation.id");
|
|
77
|
+
if (fs.existsSync(installationIdPath)) {
|
|
78
|
+
const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
|
|
79
|
+
if (existingId) {
|
|
80
|
+
return existingId;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const newId = randomUUID();
|
|
84
|
+
fs.mkdirSync(dataDir, { recursive: true });
|
|
85
|
+
fs.writeFileSync(installationIdPath, newId, "utf8");
|
|
86
|
+
return newId;
|
|
87
|
+
} catch {
|
|
88
|
+
return randomUUID();
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function shouldEnableTelemetry() {
|
|
92
|
+
return TelemetryConfig.getInstance().isEnabled();
|
|
93
|
+
}
|
|
94
|
+
var EventType = /* @__PURE__ */ ((EventType2) => {
|
|
95
|
+
EventType2["JOB_STATUS_CHANGE"] = "JOB_STATUS_CHANGE";
|
|
96
|
+
EventType2["JOB_PROGRESS"] = "JOB_PROGRESS";
|
|
97
|
+
EventType2["LIBRARY_CHANGE"] = "LIBRARY_CHANGE";
|
|
98
|
+
EventType2["JOB_LIST_CHANGE"] = "JOB_LIST_CHANGE";
|
|
99
|
+
return EventType2;
|
|
100
|
+
})(EventType || {});
|
|
101
|
+
const ServerEventName = {
|
|
102
|
+
[
|
|
103
|
+
"JOB_STATUS_CHANGE"
|
|
104
|
+
/* JOB_STATUS_CHANGE */
|
|
105
|
+
]: "job-status-change",
|
|
106
|
+
[
|
|
107
|
+
"JOB_PROGRESS"
|
|
108
|
+
/* JOB_PROGRESS */
|
|
109
|
+
]: "job-progress",
|
|
110
|
+
[
|
|
111
|
+
"LIBRARY_CHANGE"
|
|
112
|
+
/* LIBRARY_CHANGE */
|
|
113
|
+
]: "library-change",
|
|
114
|
+
[
|
|
115
|
+
"JOB_LIST_CHANGE"
|
|
116
|
+
/* JOB_LIST_CHANGE */
|
|
117
|
+
]: "job-list-change"
|
|
118
|
+
};
|
|
119
|
+
var PipelineJobStatus = /* @__PURE__ */ ((PipelineJobStatus2) => {
|
|
120
|
+
PipelineJobStatus2["QUEUED"] = "queued";
|
|
121
|
+
PipelineJobStatus2["RUNNING"] = "running";
|
|
122
|
+
PipelineJobStatus2["COMPLETED"] = "completed";
|
|
123
|
+
PipelineJobStatus2["FAILED"] = "failed";
|
|
124
|
+
PipelineJobStatus2["CANCELLING"] = "cancelling";
|
|
125
|
+
PipelineJobStatus2["CANCELLED"] = "cancelled";
|
|
126
|
+
return PipelineJobStatus2;
|
|
127
|
+
})(PipelineJobStatus || {});
|
|
128
|
+
function camelToSnakeCase(str) {
|
|
129
|
+
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
130
|
+
}
|
|
131
|
+
function convertPropertiesToSnakeCase(obj) {
|
|
132
|
+
const result = {};
|
|
133
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
134
|
+
const snakeKey = camelToSnakeCase(key);
|
|
135
|
+
if (value && typeof value === "object" && !Array.isArray(value) && !(value instanceof Date)) {
|
|
136
|
+
result[snakeKey] = convertPropertiesToSnakeCase(value);
|
|
137
|
+
} else if (Array.isArray(value)) {
|
|
138
|
+
result[snakeKey] = value.map(
|
|
139
|
+
(item) => item && typeof item === "object" && !(item instanceof Date) ? convertPropertiesToSnakeCase(item) : item
|
|
140
|
+
);
|
|
141
|
+
} else {
|
|
142
|
+
result[snakeKey] = value;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return result;
|
|
146
|
+
}
|
|
147
|
+
function addPostHogStandardProperties(properties) {
|
|
148
|
+
const result = { ...properties };
|
|
149
|
+
if (properties.sessionId) {
|
|
150
|
+
result.$session_id = properties.sessionId;
|
|
151
|
+
delete result.sessionId;
|
|
152
|
+
}
|
|
153
|
+
if (properties.startTime) {
|
|
154
|
+
result.$start_timestamp = properties.startTime.toISOString();
|
|
155
|
+
delete result.startTime;
|
|
156
|
+
}
|
|
157
|
+
if (properties.appVersion) {
|
|
158
|
+
result.$app_version = properties.appVersion;
|
|
159
|
+
delete result.appVersion;
|
|
160
|
+
}
|
|
161
|
+
return result;
|
|
162
|
+
}
|
|
163
|
+
class PostHogClient {
|
|
164
|
+
client;
|
|
165
|
+
enabled;
|
|
166
|
+
// PostHog configuration
|
|
167
|
+
static CONFIG = {
|
|
168
|
+
host: "https://app.posthog.com",
|
|
169
|
+
// Performance optimizations
|
|
170
|
+
flushAt: 20,
|
|
171
|
+
// Batch size - send after 20 events
|
|
172
|
+
flushInterval: 1e4,
|
|
173
|
+
// 10 seconds - send after time
|
|
174
|
+
// Privacy settings
|
|
175
|
+
disableGeoip: true,
|
|
176
|
+
// Don't collect IP geolocation
|
|
177
|
+
disableSessionRecording: true,
|
|
178
|
+
// Never record sessions
|
|
179
|
+
disableSurveys: true,
|
|
180
|
+
// No user surveys
|
|
181
|
+
// Data handling
|
|
182
|
+
persistence: "memory"
|
|
183
|
+
// No disk persistence for privacy
|
|
184
|
+
};
|
|
185
|
+
constructor(enabled) {
|
|
186
|
+
this.enabled = enabled;
|
|
187
|
+
if (!this.enabled) {
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
try {
|
|
191
|
+
this.client = new PostHog("phc_g7pXZZdUiAQXdnwUANjloQWMvO0amEDTBaeDSWgXgrQ", {
|
|
192
|
+
host: PostHogClient.CONFIG.host,
|
|
193
|
+
flushAt: PostHogClient.CONFIG.flushAt,
|
|
194
|
+
flushInterval: PostHogClient.CONFIG.flushInterval,
|
|
195
|
+
disableGeoip: PostHogClient.CONFIG.disableGeoip
|
|
196
|
+
});
|
|
197
|
+
logger.debug("PostHog client initialized");
|
|
198
|
+
} catch (error) {
|
|
199
|
+
logger.debug(
|
|
200
|
+
`PostHog initialization failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
201
|
+
);
|
|
202
|
+
this.enabled = false;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Send event to PostHog
|
|
207
|
+
*/
|
|
208
|
+
capture(distinctId, event, properties) {
|
|
209
|
+
if (!this.enabled || !this.client) return;
|
|
210
|
+
try {
|
|
211
|
+
const enhancedProperties = addPostHogStandardProperties(properties);
|
|
212
|
+
const snakeCaseProperties = convertPropertiesToSnakeCase(enhancedProperties);
|
|
213
|
+
this.client.capture({
|
|
214
|
+
distinctId,
|
|
215
|
+
event,
|
|
216
|
+
properties: snakeCaseProperties
|
|
217
|
+
});
|
|
218
|
+
logger.debug(`PostHog event captured: ${event}`);
|
|
219
|
+
} catch (error) {
|
|
220
|
+
logger.debug(
|
|
221
|
+
`PostHog capture error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Capture exception using PostHog's native error tracking
|
|
227
|
+
*/
|
|
228
|
+
captureException(distinctId, error, properties) {
|
|
229
|
+
if (!this.enabled || !this.client) return;
|
|
230
|
+
try {
|
|
231
|
+
const enhancedProperties = addPostHogStandardProperties(properties || {});
|
|
232
|
+
const snakeCaseProperties = convertPropertiesToSnakeCase(enhancedProperties);
|
|
233
|
+
this.client.captureException({
|
|
234
|
+
error,
|
|
235
|
+
distinctId,
|
|
236
|
+
properties: snakeCaseProperties
|
|
237
|
+
});
|
|
238
|
+
logger.debug(`PostHog exception captured: ${error.constructor.name}`);
|
|
239
|
+
} catch (captureError) {
|
|
240
|
+
logger.debug(
|
|
241
|
+
`PostHog captureException error: ${captureError instanceof Error ? captureError.message : "Unknown error"}`
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Graceful shutdown with event flushing
|
|
247
|
+
*/
|
|
248
|
+
async shutdown() {
|
|
249
|
+
if (this.client) {
|
|
250
|
+
try {
|
|
251
|
+
await this.client.shutdown();
|
|
252
|
+
logger.debug("PostHog client shutdown complete");
|
|
253
|
+
} catch (error) {
|
|
254
|
+
logger.debug(
|
|
255
|
+
`PostHog shutdown error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Check if client is enabled and ready
|
|
262
|
+
*/
|
|
263
|
+
isEnabled() {
|
|
264
|
+
return this.enabled && !!this.client;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
|
|
268
|
+
TelemetryEvent2["APP_STARTED"] = "app_started";
|
|
269
|
+
TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
|
|
270
|
+
TelemetryEvent2["CLI_COMMAND"] = "cli_command";
|
|
271
|
+
TelemetryEvent2["TOOL_USED"] = "tool_used";
|
|
272
|
+
TelemetryEvent2["PIPELINE_JOB_STARTED"] = "pipeline_job_started";
|
|
273
|
+
TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
|
|
274
|
+
TelemetryEvent2["PIPELINE_JOB_FAILED"] = "pipeline_job_failed";
|
|
275
|
+
return TelemetryEvent2;
|
|
276
|
+
})(TelemetryEvent || {});
|
|
277
|
+
class Telemetry {
|
|
278
|
+
postHogClient;
|
|
279
|
+
enabled;
|
|
280
|
+
distinctId;
|
|
281
|
+
globalContext = {};
|
|
282
|
+
/**
|
|
283
|
+
* Create a new Telemetry instance with proper initialization
|
|
284
|
+
* This is the recommended way to create Telemetry instances
|
|
285
|
+
*/
|
|
286
|
+
static create() {
|
|
287
|
+
const config = TelemetryConfig.getInstance();
|
|
288
|
+
const shouldEnable = config.isEnabled() && true;
|
|
289
|
+
const telemetry2 = new Telemetry(shouldEnable);
|
|
290
|
+
if (telemetry2.isEnabled()) {
|
|
291
|
+
logger.debug("Telemetry enabled");
|
|
292
|
+
} else if (!config.isEnabled()) {
|
|
293
|
+
logger.debug("Telemetry disabled (user preference)");
|
|
294
|
+
} else {
|
|
295
|
+
logger.debug("Telemetry disabled");
|
|
296
|
+
}
|
|
297
|
+
return telemetry2;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Private constructor - use Telemetry.create() instead
|
|
301
|
+
*/
|
|
302
|
+
constructor(enabled = true) {
|
|
303
|
+
this.enabled = enabled;
|
|
304
|
+
this.distinctId = generateInstallationId();
|
|
305
|
+
this.postHogClient = new PostHogClient(this.enabled);
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Set global application context that will be included in all events
|
|
309
|
+
*/
|
|
310
|
+
setGlobalContext(context) {
|
|
311
|
+
this.globalContext = { ...context };
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Get current global context
|
|
315
|
+
*/
|
|
316
|
+
getGlobalContext() {
|
|
317
|
+
return { ...this.globalContext };
|
|
318
|
+
}
|
|
319
|
+
track(event, properties = {}) {
|
|
320
|
+
if (!this.enabled) return;
|
|
321
|
+
const enrichedProperties = {
|
|
322
|
+
...this.globalContext,
|
|
323
|
+
...properties,
|
|
324
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
325
|
+
};
|
|
326
|
+
logger.debug(`Tracking event: ${event}`);
|
|
327
|
+
this.postHogClient.capture(this.distinctId, event, enrichedProperties);
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Capture exception using PostHog's native error tracking with global context
|
|
331
|
+
*/
|
|
332
|
+
captureException(error, properties = {}) {
|
|
333
|
+
if (!this.enabled) return;
|
|
334
|
+
const enrichedProperties = {
|
|
335
|
+
...this.globalContext,
|
|
336
|
+
...properties,
|
|
337
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
338
|
+
};
|
|
339
|
+
logger.debug(
|
|
340
|
+
`Capturing exception: ${error instanceof Error ? error.message : String(error)}`
|
|
341
|
+
);
|
|
342
|
+
this.postHogClient.captureException(
|
|
343
|
+
this.distinctId,
|
|
344
|
+
error instanceof Error ? error : new Error(String(error)),
|
|
345
|
+
enrichedProperties
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Graceful shutdown with event flushing
|
|
350
|
+
*/
|
|
351
|
+
async shutdown() {
|
|
352
|
+
if (!this.enabled) return;
|
|
353
|
+
await this.postHogClient.shutdown();
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Check if telemetry is enabled
|
|
357
|
+
*/
|
|
358
|
+
isEnabled() {
|
|
359
|
+
return this.enabled;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
let telemetryInstance = null;
|
|
363
|
+
function getTelemetryInstance() {
|
|
364
|
+
if (!telemetryInstance) {
|
|
365
|
+
telemetryInstance = Telemetry.create();
|
|
366
|
+
}
|
|
367
|
+
return telemetryInstance;
|
|
368
|
+
}
|
|
369
|
+
function initTelemetry(options) {
|
|
370
|
+
TelemetryConfig.getInstance().setEnabled(options.enabled);
|
|
371
|
+
generateInstallationId(options.storePath);
|
|
372
|
+
telemetryInstance = Telemetry.create();
|
|
373
|
+
}
|
|
374
|
+
const telemetry = new Proxy({}, {
|
|
375
|
+
get(_target, prop) {
|
|
376
|
+
const instance = getTelemetryInstance();
|
|
377
|
+
const value = instance[prop];
|
|
378
|
+
if (typeof value === "function") {
|
|
379
|
+
return value.bind(instance);
|
|
380
|
+
}
|
|
381
|
+
return value;
|
|
382
|
+
}
|
|
383
|
+
});
|
|
384
|
+
class TelemetryService {
|
|
385
|
+
eventBus;
|
|
386
|
+
unsubscribers = [];
|
|
387
|
+
constructor(eventBus) {
|
|
388
|
+
this.eventBus = eventBus;
|
|
389
|
+
this.setupEventListeners();
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Sets up event listeners for pipeline events.
|
|
393
|
+
*/
|
|
394
|
+
setupEventListeners() {
|
|
395
|
+
const unsubStatusChange = this.eventBus.on(
|
|
396
|
+
EventType.JOB_STATUS_CHANGE,
|
|
397
|
+
this.handleJobStatusChange.bind(this)
|
|
398
|
+
);
|
|
399
|
+
const unsubProgress = this.eventBus.on(
|
|
400
|
+
EventType.JOB_PROGRESS,
|
|
401
|
+
this.handleJobProgress.bind(this)
|
|
402
|
+
);
|
|
403
|
+
this.unsubscribers.push(unsubStatusChange, unsubProgress);
|
|
404
|
+
logger.debug("TelemetryService initialized and listening to events");
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Handles job status change events and tracks them to analytics.
|
|
408
|
+
* Only tracks events for meaningful state transitions: started, completed, and failed.
|
|
409
|
+
*/
|
|
410
|
+
handleJobStatusChange(job) {
|
|
411
|
+
const duration = job.startedAt ? Date.now() - job.startedAt.getTime() : null;
|
|
412
|
+
const queueWaitTime = job.startedAt && job.createdAt ? job.startedAt.getTime() - job.createdAt.getTime() : null;
|
|
413
|
+
switch (job.status) {
|
|
414
|
+
case PipelineJobStatus.RUNNING:
|
|
415
|
+
telemetry.track(TelemetryEvent.PIPELINE_JOB_STARTED, {
|
|
416
|
+
jobId: job.id,
|
|
417
|
+
library: job.library,
|
|
418
|
+
hasVersion: !!job.version,
|
|
419
|
+
maxPagesConfigured: job.progressMaxPages || 0,
|
|
420
|
+
queueWaitTimeMs: queueWaitTime
|
|
421
|
+
});
|
|
422
|
+
break;
|
|
423
|
+
case PipelineJobStatus.COMPLETED:
|
|
424
|
+
telemetry.track(TelemetryEvent.PIPELINE_JOB_COMPLETED, {
|
|
425
|
+
jobId: job.id,
|
|
426
|
+
library: job.library,
|
|
427
|
+
durationMs: duration,
|
|
428
|
+
pagesProcessed: job.progressPages || 0,
|
|
429
|
+
maxPagesConfigured: job.progressMaxPages || 0,
|
|
430
|
+
hasVersion: !!job.version,
|
|
431
|
+
throughputPagesPerSecond: duration && job.progressPages ? Math.round(job.progressPages / duration * 1e3) : 0
|
|
432
|
+
});
|
|
433
|
+
break;
|
|
434
|
+
case PipelineJobStatus.FAILED:
|
|
435
|
+
telemetry.track(TelemetryEvent.PIPELINE_JOB_FAILED, {
|
|
436
|
+
jobId: job.id,
|
|
437
|
+
library: job.library,
|
|
438
|
+
durationMs: duration,
|
|
439
|
+
pagesProcessed: job.progressPages || 0,
|
|
440
|
+
maxPagesConfigured: job.progressMaxPages || 0,
|
|
441
|
+
hasVersion: !!job.version,
|
|
442
|
+
hasError: !!job.error,
|
|
443
|
+
errorMessage: job.error?.message
|
|
444
|
+
});
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
/**
|
|
449
|
+
* Handles job progress events. Currently a no-op but can be extended
|
|
450
|
+
* for progress-specific telemetry tracking.
|
|
451
|
+
*/
|
|
452
|
+
handleJobProgress(_event) {
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Cleans up event listeners.
|
|
456
|
+
*/
|
|
457
|
+
shutdown() {
|
|
458
|
+
for (const unsubscribe of this.unsubscribers) {
|
|
459
|
+
unsubscribe();
|
|
460
|
+
}
|
|
461
|
+
this.unsubscribers = [];
|
|
462
|
+
logger.debug("TelemetryService shut down");
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
class EventBusService {
|
|
466
|
+
emitter;
|
|
467
|
+
constructor() {
|
|
468
|
+
this.emitter = new EventEmitter();
|
|
469
|
+
this.emitter.setMaxListeners(100);
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Emit an event to all subscribers.
|
|
473
|
+
*/
|
|
474
|
+
emit(eventType, payload) {
|
|
475
|
+
logger.debug(`Event emitted: ${eventType}`);
|
|
476
|
+
this.emitter.emit(eventType, payload);
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Subscribe to events of a specific type.
|
|
480
|
+
* Returns an unsubscribe function.
|
|
481
|
+
*/
|
|
482
|
+
on(eventType, listener) {
|
|
483
|
+
this.emitter.on(eventType, listener);
|
|
484
|
+
return () => this.emitter.off(eventType, listener);
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Subscribe to events once (auto-unsubscribes after first event).
|
|
488
|
+
*/
|
|
489
|
+
once(eventType, listener) {
|
|
490
|
+
this.emitter.once(eventType, listener);
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Remove a specific listener.
|
|
494
|
+
*/
|
|
495
|
+
off(eventType, listener) {
|
|
496
|
+
this.emitter.off(eventType, listener);
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Remove all listeners for a specific event type, or all listeners if no type specified.
|
|
500
|
+
*/
|
|
501
|
+
removeAllListeners(eventType) {
|
|
502
|
+
if (eventType === void 0) {
|
|
503
|
+
this.emitter.removeAllListeners();
|
|
504
|
+
} else {
|
|
505
|
+
this.emitter.removeAllListeners(eventType);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* Get the count of listeners for a specific event type.
|
|
510
|
+
*/
|
|
511
|
+
listenerCount(eventType) {
|
|
512
|
+
return this.emitter.listenerCount(eventType);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
class EmbeddingConfig {
|
|
516
|
+
static instance = null;
|
|
517
|
+
/**
|
|
518
|
+
* Get the singleton instance of EmbeddingConfig.
|
|
519
|
+
* Creates the instance if it doesn't exist.
|
|
520
|
+
*/
|
|
521
|
+
static getInstance() {
|
|
522
|
+
if (EmbeddingConfig.instance === null) {
|
|
523
|
+
EmbeddingConfig.instance = new EmbeddingConfig();
|
|
524
|
+
}
|
|
525
|
+
return EmbeddingConfig.instance;
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* Reset the singleton instance (useful for testing).
|
|
529
|
+
*/
|
|
530
|
+
static resetInstance() {
|
|
531
|
+
EmbeddingConfig.instance = null;
|
|
532
|
+
}
|
|
533
|
+
/**
|
|
534
|
+
* Known dimensions for common embedding models.
|
|
535
|
+
* This avoids expensive API calls for dimension detection in telemetry.
|
|
536
|
+
*
|
|
537
|
+
* Note: The "openai" provider also supports OpenAI-compatible APIs like:
|
|
538
|
+
* - Ollama (local models)
|
|
539
|
+
* - LMStudio (local models)
|
|
540
|
+
* - Any service implementing OpenAI's embedding API
|
|
541
|
+
*/
|
|
542
|
+
knownModelDimensions = {
|
|
543
|
+
// OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
|
|
544
|
+
"text-embedding-3-small": 1536,
|
|
545
|
+
"text-embedding-3-large": 3072,
|
|
546
|
+
"text-embedding-ada-002": 1536,
|
|
547
|
+
// Google Vertex AI models
|
|
548
|
+
"text-embedding-004": 768,
|
|
549
|
+
"textembedding-gecko@003": 768,
|
|
550
|
+
"textembedding-gecko@002": 768,
|
|
551
|
+
"textembedding-gecko@001": 768,
|
|
552
|
+
// Google Gemini models (with MRL support)
|
|
553
|
+
"text-embedding-preview-0409": 768,
|
|
554
|
+
"embedding-001": 768,
|
|
555
|
+
"gemini-embedding-2-preview": 3072,
|
|
556
|
+
// AWS Bedrock models
|
|
557
|
+
// Amazon Titan models
|
|
558
|
+
"amazon.titan-embed-text-v1": 1536,
|
|
559
|
+
"amazon.titan-embed-text-v2:0": 1024,
|
|
560
|
+
"amazon.titan-embed-image-v1": 1024,
|
|
561
|
+
// Image embedding model
|
|
562
|
+
// Cohere models
|
|
563
|
+
"cohere.embed-english-v3": 1024,
|
|
564
|
+
"cohere.embed-multilingual-v3": 1024,
|
|
565
|
+
// SageMaker models (hosted on AWS SageMaker)
|
|
566
|
+
"intfloat/multilingual-e5-large": 1024,
|
|
567
|
+
// Additional AWS models that might be supported
|
|
568
|
+
// Note: Some of these might be placeholders - verify dimensions before use
|
|
569
|
+
// "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
|
|
570
|
+
// MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
|
|
571
|
+
// Top performing models from Massive Text Embedding Benchmark
|
|
572
|
+
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
|
573
|
+
"gemini-embedding-001": 3072,
|
|
574
|
+
"Qwen/Qwen3-Embedding-8B": 4096,
|
|
575
|
+
"Qwen/Qwen3-Embedding-4B": 2560,
|
|
576
|
+
"Qwen/Qwen3-Embedding-0.6B": 1024,
|
|
577
|
+
"Linq-AI-Research/Linq-Embed-Mistral": 4096,
|
|
578
|
+
"Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
|
|
579
|
+
"intfloat/multilingual-e5-large-instruct": 1024,
|
|
580
|
+
"Salesforce/SFR-Embedding-Mistral": 4096,
|
|
581
|
+
"text-multilingual-embedding-002": 768,
|
|
582
|
+
"GritLM/GritLM-7B": 4096,
|
|
583
|
+
"GritLM/GritLM-8x7B": 4096,
|
|
584
|
+
"intfloat/e5-mistral-7b-instruct": 4096,
|
|
585
|
+
"Cohere/Cohere-embed-multilingual-v3.0": 1024,
|
|
586
|
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
|
|
587
|
+
"Lajavaness/bilingual-embedding-large": 1024,
|
|
588
|
+
"Salesforce/SFR-Embedding-2_R": 4096,
|
|
589
|
+
"NovaSearch/stella_en_1.5B_v5": 8960,
|
|
590
|
+
"NovaSearch/jasper_en_vision_language_v1": 8960,
|
|
591
|
+
"nvidia/NV-Embed-v2": 4096,
|
|
592
|
+
"OrdalieTech/Solon-embeddings-large-0.1": 1024,
|
|
593
|
+
"BAAI/bge-m3": 1024,
|
|
594
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
|
|
595
|
+
"jinaai/jina-embeddings-v3": 1024,
|
|
596
|
+
"Alibaba-NLP/gte-multilingual-base": 768,
|
|
597
|
+
"Lajavaness/bilingual-embedding-base": 768,
|
|
598
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
|
|
599
|
+
"nvidia/NV-Embed-v1": 4096,
|
|
600
|
+
"Cohere/Cohere-embed-multilingual-light-v3.0": 384,
|
|
601
|
+
"manu/bge-m3-custom-fr": 1024,
|
|
602
|
+
"Lajavaness/bilingual-embedding-small": 384,
|
|
603
|
+
"Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
|
|
604
|
+
"intfloat/multilingual-e5-base": 768,
|
|
605
|
+
"voyage-3-lite": 512,
|
|
606
|
+
"voyage-3": 1024,
|
|
607
|
+
"intfloat/multilingual-e5-small": 384,
|
|
608
|
+
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
|
|
609
|
+
"Snowflake/snowflake-arctic-embed-m-v2.0": 768,
|
|
610
|
+
"deepvk/USER-bge-m3": 1024,
|
|
611
|
+
"Cohere/Cohere-embed-english-v3.0": 1024,
|
|
612
|
+
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
|
|
613
|
+
"ibm-granite/granite-embedding-278m-multilingual": 768,
|
|
614
|
+
"NovaSearch/stella_en_400M_v5": 4096,
|
|
615
|
+
"omarelshehy/arabic-english-sts-matryoshka": 1024,
|
|
616
|
+
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
|
|
617
|
+
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
|
|
618
|
+
"Haon-Chen/speed-embedding-7b-instruct": 4096,
|
|
619
|
+
"sentence-transformers/LaBSE": 768,
|
|
620
|
+
"WhereIsAI/UAE-Large-V1": 1024,
|
|
621
|
+
"ibm-granite/granite-embedding-107m-multilingual": 384,
|
|
622
|
+
"mixedbread-ai/mxbai-embed-large-v1": 1024,
|
|
623
|
+
"intfloat/e5-large-v2": 1024,
|
|
624
|
+
"avsolatorio/GIST-large-Embedding-v0": 1024,
|
|
625
|
+
"sdadas/mmlw-e5-large": 1024,
|
|
626
|
+
"nomic-ai/nomic-embed-text-v1": 768,
|
|
627
|
+
"nomic-ai/nomic-embed-text-v1-ablated": 768,
|
|
628
|
+
"intfloat/e5-base-v2": 768,
|
|
629
|
+
"BAAI/bge-large-en-v1.5": 1024,
|
|
630
|
+
"intfloat/e5-large": 1024,
|
|
631
|
+
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
|
|
632
|
+
"Cohere/Cohere-embed-english-light-v3.0": 384,
|
|
633
|
+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
|
|
634
|
+
"Gameselo/STS-multilingual-mpnet-base-v2": 768,
|
|
635
|
+
"thenlper/gte-large": 1024,
|
|
636
|
+
"avsolatorio/GIST-Embedding-v0": 768,
|
|
637
|
+
"nomic-ai/nomic-embed-text-v1-unsupervised": 768,
|
|
638
|
+
"infgrad/stella-base-en-v2": 768,
|
|
639
|
+
"avsolatorio/NoInstruct-small-Embedding-v0": 384,
|
|
640
|
+
"dwzhu/e5-base-4k": 768,
|
|
641
|
+
"sdadas/mmlw-e5-base": 768,
|
|
642
|
+
"voyage-multilingual-2": 1024,
|
|
643
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
|
|
644
|
+
"BAAI/bge-base-en-v1.5": 768,
|
|
645
|
+
"avsolatorio/GIST-small-Embedding-v0": 384,
|
|
646
|
+
"sdadas/mmlw-roberta-large": 1024,
|
|
647
|
+
"nomic-ai/nomic-embed-text-v1.5": 768,
|
|
648
|
+
"minishlab/potion-multilingual-128M": 256,
|
|
649
|
+
"shibing624/text2vec-base-multilingual": 384,
|
|
650
|
+
"thenlper/gte-base": 768,
|
|
651
|
+
"intfloat/e5-small-v2": 384,
|
|
652
|
+
"intfloat/e5-base": 768,
|
|
653
|
+
"sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
|
|
654
|
+
"manu/sentence_croissant_alpha_v0.3": 2048,
|
|
655
|
+
"BAAI/bge-small-en-v1.5": 512,
|
|
656
|
+
"thenlper/gte-small": 384,
|
|
657
|
+
"sdadas/mmlw-e5-small": 384,
|
|
658
|
+
"manu/sentence_croissant_alpha_v0.4": 2048,
|
|
659
|
+
"manu/sentence_croissant_alpha_v0.2": 2048,
|
|
660
|
+
"abhinand/MedEmbed-small-v0.1": 384,
|
|
661
|
+
"ibm-granite/granite-embedding-125m-english": 768,
|
|
662
|
+
"intfloat/e5-small": 384,
|
|
663
|
+
"voyage-large-2-instruct": 1024,
|
|
664
|
+
"sdadas/mmlw-roberta-base": 768,
|
|
665
|
+
"Snowflake/snowflake-arctic-embed-l": 1024,
|
|
666
|
+
"Mihaiii/Ivysaur": 384,
|
|
667
|
+
"Snowflake/snowflake-arctic-embed-m-long": 768,
|
|
668
|
+
"bigscience/sgpt-bloom-7b1-msmarco": 4096,
|
|
669
|
+
"avsolatorio/GIST-all-MiniLM-L6-v2": 384,
|
|
670
|
+
"sergeyzh/LaBSE-ru-turbo": 768,
|
|
671
|
+
"sentence-transformers/all-mpnet-base-v2": 768,
|
|
672
|
+
"Snowflake/snowflake-arctic-embed-m": 768,
|
|
673
|
+
"Snowflake/snowflake-arctic-embed-s": 384,
|
|
674
|
+
"sentence-transformers/all-MiniLM-L12-v2": 384,
|
|
675
|
+
"Mihaiii/gte-micro-v4": 384,
|
|
676
|
+
"Snowflake/snowflake-arctic-embed-m-v1.5": 768,
|
|
677
|
+
"cointegrated/LaBSE-en-ru": 768,
|
|
678
|
+
"Mihaiii/Bulbasaur": 384,
|
|
679
|
+
"ibm-granite/granite-embedding-30m-english": 384,
|
|
680
|
+
"deepfile/embedder-100p": 768,
|
|
681
|
+
"Jaume/gemma-2b-embeddings": 2048,
|
|
682
|
+
"OrlikB/KartonBERT-USE-base-v1": 768,
|
|
683
|
+
"izhx/udever-bloom-7b1": 4096,
|
|
684
|
+
"izhx/udever-bloom-1b1": 1024,
|
|
685
|
+
"brahmairesearch/slx-v0.1": 384,
|
|
686
|
+
"Mihaiii/Wartortle": 384,
|
|
687
|
+
"izhx/udever-bloom-3b": 2048,
|
|
688
|
+
"deepvk/USER-base": 768,
|
|
689
|
+
"ai-forever/ru-en-RoSBERTa": 1024,
|
|
690
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
|
|
691
|
+
"Mihaiii/Venusaur": 384,
|
|
692
|
+
"Snowflake/snowflake-arctic-embed-xs": 384,
|
|
693
|
+
"jinaai/jina-embedding-b-en-v1": 768,
|
|
694
|
+
"Mihaiii/gte-micro": 384,
|
|
695
|
+
"aari1995/German_Semantic_STS_V2": 1024,
|
|
696
|
+
"Mihaiii/Squirtle": 384,
|
|
697
|
+
"OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
|
|
698
|
+
"sergeyzh/rubert-tiny-turbo": 312,
|
|
699
|
+
"minishlab/potion-base-8M": 256,
|
|
700
|
+
"minishlab/M2V_base_glove_subword": 256,
|
|
701
|
+
"jinaai/jina-embedding-s-en-v1": 512,
|
|
702
|
+
"minishlab/potion-base-4M": 128,
|
|
703
|
+
"minishlab/M2V_base_output": 256,
|
|
704
|
+
"DeepPavlov/rubert-base-cased-sentence": 768,
|
|
705
|
+
"jinaai/jina-embeddings-v2-small-en": 512,
|
|
706
|
+
"cointegrated/rubert-tiny2": 312,
|
|
707
|
+
"minishlab/M2V_base_glove": 256,
|
|
708
|
+
"cointegrated/rubert-tiny": 312,
|
|
709
|
+
"silma-ai/silma-embeddding-matryoshka-v0.1": 768,
|
|
710
|
+
"DeepPavlov/rubert-base-cased": 768,
|
|
711
|
+
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
|
|
712
|
+
"izhx/udever-bloom-560m": 1024,
|
|
713
|
+
"minishlab/potion-base-2M": 64,
|
|
714
|
+
"DeepPavlov/distilrubert-small-cased-conversational": 768,
|
|
715
|
+
"consciousAI/cai-lunaris-text-embeddings": 1024,
|
|
716
|
+
"deepvk/deberta-v1-base": 768,
|
|
717
|
+
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
|
|
718
|
+
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
|
|
719
|
+
"ai-forever/sbert_large_mt_nlu_ru": 1024,
|
|
720
|
+
"ai-forever/sbert_large_nlu_ru": 1024,
|
|
721
|
+
"malenia1/ternary-weight-embedding": 1024,
|
|
722
|
+
"jinaai/jina-embeddings-v2-base-en": 768,
|
|
723
|
+
"VPLabs/SearchMap_Preview": 4096,
|
|
724
|
+
"Hum-Works/lodestone-base-4096-v1": 768,
|
|
725
|
+
"jinaai/jina-embeddings-v4": 2048
|
|
726
|
+
};
|
|
727
|
+
/**
|
|
728
|
+
* Lowercase lookup map for case-insensitive model dimension queries.
|
|
729
|
+
* Built lazily from knownModelDimensions to ensure consistency.
|
|
730
|
+
*/
|
|
731
|
+
modelLookup;
|
|
732
|
+
constructor() {
|
|
733
|
+
this.modelLookup = /* @__PURE__ */ new Map();
|
|
734
|
+
for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
|
|
735
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Parse embedding model configuration from a provided model specification.
|
|
740
|
+
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
741
|
+
*
|
|
742
|
+
* Supports various providers:
|
|
743
|
+
* - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
|
|
744
|
+
* - vertex: Google Cloud Vertex AI
|
|
745
|
+
* - gemini: Google Generative AI
|
|
746
|
+
* - aws: AWS Bedrock models
|
|
747
|
+
* - microsoft: Azure OpenAI
|
|
748
|
+
* - sagemaker: AWS SageMaker hosted models
|
|
749
|
+
*
|
|
750
|
+
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
751
|
+
* @returns Parsed embedding model configuration
|
|
752
|
+
*/
|
|
753
|
+
parse(modelSpec) {
|
|
754
|
+
const spec = normalizeEnvValue(modelSpec || "text-embedding-3-small");
|
|
755
|
+
const colonIndex = spec.indexOf(":");
|
|
756
|
+
let provider;
|
|
757
|
+
let model;
|
|
758
|
+
if (colonIndex === -1) {
|
|
759
|
+
provider = "openai";
|
|
760
|
+
model = spec;
|
|
761
|
+
} else {
|
|
762
|
+
provider = spec.substring(0, colonIndex);
|
|
763
|
+
model = spec.substring(colonIndex + 1);
|
|
764
|
+
}
|
|
765
|
+
const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
|
|
766
|
+
return {
|
|
767
|
+
provider,
|
|
768
|
+
model,
|
|
769
|
+
dimensions,
|
|
770
|
+
modelSpec: spec
|
|
771
|
+
};
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Get the known dimensions for a specific model.
|
|
775
|
+
* Returns null if the model dimensions are not known.
|
|
776
|
+
* Uses case-insensitive lookup.
|
|
777
|
+
*
|
|
778
|
+
* @param model The model name (e.g., "text-embedding-3-small")
|
|
779
|
+
* @returns Known dimensions or null
|
|
780
|
+
*/
|
|
781
|
+
getKnownDimensions(model) {
|
|
782
|
+
return this.modelLookup?.get(model.toLowerCase()) || null;
|
|
783
|
+
}
|
|
784
|
+
/**
|
|
785
|
+
* Add or update known dimensions for a model.
|
|
786
|
+
* This can be used to cache discovered dimensions.
|
|
787
|
+
* Stores both original case and lowercase for consistent lookup.
|
|
788
|
+
*
|
|
789
|
+
* @param model The model name
|
|
790
|
+
* @param dimensions The dimensions to cache
|
|
791
|
+
*/
|
|
792
|
+
setKnownDimensions(model, dimensions) {
|
|
793
|
+
this.knownModelDimensions[model] = dimensions;
|
|
794
|
+
if (this.modelLookup) {
|
|
795
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
/**
|
|
799
|
+
* Static method to parse embedding model configuration using the singleton instance.
|
|
800
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
801
|
+
*/
|
|
802
|
+
static parseEmbeddingConfig(modelSpec) {
|
|
803
|
+
return EmbeddingConfig.getInstance().parse(modelSpec);
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Static method to get known model dimensions using the singleton instance.
|
|
807
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
808
|
+
*/
|
|
809
|
+
static getKnownModelDimensions(model) {
|
|
810
|
+
return EmbeddingConfig.getInstance().getKnownDimensions(model);
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Static method to set known model dimensions using the singleton instance.
|
|
814
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
815
|
+
*/
|
|
816
|
+
static setKnownModelDimensions(model, dimensions) {
|
|
817
|
+
EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
function getEventBus(argv) {
|
|
821
|
+
const eventBus = argv._eventBus;
|
|
822
|
+
if (!eventBus) {
|
|
823
|
+
throw new Error("EventBusService not initialized");
|
|
824
|
+
}
|
|
825
|
+
return eventBus;
|
|
826
|
+
}
|
|
827
|
+
function getGlobalOptions(argv) {
|
|
828
|
+
return argv;
|
|
829
|
+
}
|
|
830
|
+
function ensurePlaywrightBrowsersInstalled() {
|
|
831
|
+
if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
|
|
832
|
+
logger.debug(
|
|
833
|
+
"PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
|
|
834
|
+
);
|
|
835
|
+
return;
|
|
836
|
+
}
|
|
837
|
+
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
838
|
+
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
839
|
+
logger.debug(
|
|
840
|
+
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
841
|
+
);
|
|
842
|
+
return;
|
|
843
|
+
}
|
|
844
|
+
try {
|
|
845
|
+
const chromiumPath = chromium.executablePath();
|
|
846
|
+
if (!chromiumPath || !existsSync(chromiumPath)) {
|
|
847
|
+
throw new Error("Playwright Chromium browser not found");
|
|
848
|
+
}
|
|
849
|
+
} catch (error) {
|
|
850
|
+
logger.debug(String(error));
|
|
851
|
+
try {
|
|
852
|
+
logger.info(
|
|
853
|
+
"🌐 Installing Playwright Chromium browser... (this may take a moment)"
|
|
854
|
+
);
|
|
855
|
+
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
856
|
+
stdio: "ignore",
|
|
857
|
+
// Suppress output
|
|
858
|
+
cwd: getProjectRoot()
|
|
859
|
+
});
|
|
860
|
+
} catch (_installErr) {
|
|
861
|
+
logger.error(
|
|
862
|
+
"❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
|
|
863
|
+
);
|
|
864
|
+
process.exit(1);
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
function resolveProtocol(protocol) {
|
|
869
|
+
if (protocol === "auto") {
|
|
870
|
+
if (!process.stdin.isTTY && !process.stdout.isTTY) {
|
|
871
|
+
return "stdio";
|
|
872
|
+
}
|
|
873
|
+
return "http";
|
|
874
|
+
}
|
|
875
|
+
if (protocol === "stdio" || protocol === "http") {
|
|
876
|
+
return protocol;
|
|
877
|
+
}
|
|
878
|
+
throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
|
|
879
|
+
}
|
|
880
|
+
function validateResumeFlag(resume, serverUrl) {
|
|
881
|
+
if (resume && serverUrl) {
|
|
882
|
+
throw new Error(
|
|
883
|
+
"--resume flag is incompatible with --server-url. External workers handle their own job recovery."
|
|
884
|
+
);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
function validatePort(portString) {
|
|
888
|
+
const port = Number.parseInt(portString, 10);
|
|
889
|
+
if (Number.isNaN(port) || port < 1 || port > 65535) {
|
|
890
|
+
throw new Error("Invalid port number");
|
|
891
|
+
}
|
|
892
|
+
return port;
|
|
893
|
+
}
|
|
894
|
+
function validateHost(hostString) {
|
|
895
|
+
const trimmed = hostString.trim();
|
|
896
|
+
if (!trimmed) {
|
|
897
|
+
throw new Error("Host cannot be empty");
|
|
898
|
+
}
|
|
899
|
+
if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
|
|
900
|
+
throw new Error("Host cannot contain whitespace");
|
|
901
|
+
}
|
|
902
|
+
return trimmed;
|
|
903
|
+
}
|
|
904
|
+
function createAppServerConfig(options) {
|
|
905
|
+
return {
|
|
906
|
+
enableWebInterface: options.enableWebInterface ?? false,
|
|
907
|
+
enableMcpServer: options.enableMcpServer ?? true,
|
|
908
|
+
enableApiServer: options.enableApiServer ?? false,
|
|
909
|
+
enableWorker: options.enableWorker ?? true,
|
|
910
|
+
port: options.port,
|
|
911
|
+
externalWorkerUrl: options.externalWorkerUrl,
|
|
912
|
+
showLogo: options.showLogo ?? true,
|
|
913
|
+
startupContext: options.startupContext
|
|
914
|
+
};
|
|
915
|
+
}
|
|
916
|
+
function parseHeaders(headerOptions) {
|
|
917
|
+
const headers = {};
|
|
918
|
+
if (Array.isArray(headerOptions)) {
|
|
919
|
+
for (const entry of headerOptions) {
|
|
920
|
+
const idx = entry.indexOf(":");
|
|
921
|
+
if (idx > 0) {
|
|
922
|
+
const name = entry.slice(0, idx).trim();
|
|
923
|
+
const value = entry.slice(idx + 1).trim();
|
|
924
|
+
if (name) headers[name] = value;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
return headers;
|
|
929
|
+
}
|
|
930
|
+
function parseAuthConfig(options) {
|
|
931
|
+
if (!options.authEnabled) {
|
|
932
|
+
return void 0;
|
|
933
|
+
}
|
|
934
|
+
return {
|
|
935
|
+
enabled: true,
|
|
936
|
+
issuerUrl: options.authIssuerUrl,
|
|
937
|
+
audience: options.authAudience,
|
|
938
|
+
scopes: ["openid", "profile"]
|
|
939
|
+
// Default scopes for OAuth2/OIDC
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
function validateAuthConfig(authConfig) {
|
|
943
|
+
if (!authConfig.enabled) {
|
|
944
|
+
return;
|
|
945
|
+
}
|
|
946
|
+
const errors = [];
|
|
947
|
+
if (!authConfig.issuerUrl) {
|
|
948
|
+
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
949
|
+
} else {
|
|
950
|
+
try {
|
|
951
|
+
const url = new URL(authConfig.issuerUrl);
|
|
952
|
+
if (url.protocol !== "https:") {
|
|
953
|
+
errors.push("Issuer URL must use HTTPS protocol");
|
|
954
|
+
}
|
|
955
|
+
} catch {
|
|
956
|
+
errors.push("Issuer URL must be a valid URL");
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
if (!authConfig.audience) {
|
|
960
|
+
errors.push("--auth-audience is required when auth is enabled");
|
|
961
|
+
} else {
|
|
962
|
+
try {
|
|
963
|
+
const url = new URL(authConfig.audience);
|
|
964
|
+
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
965
|
+
logger.warn(
|
|
966
|
+
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
967
|
+
);
|
|
968
|
+
}
|
|
969
|
+
if (url.hash) {
|
|
970
|
+
errors.push("Audience must not contain URL fragments");
|
|
971
|
+
}
|
|
972
|
+
} catch {
|
|
973
|
+
if (authConfig.audience.startsWith("urn:")) {
|
|
974
|
+
const urnParts = authConfig.audience.split(":");
|
|
975
|
+
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
976
|
+
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
977
|
+
}
|
|
978
|
+
} else {
|
|
979
|
+
errors.push(
|
|
980
|
+
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
981
|
+
);
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
if (errors.length > 0) {
|
|
986
|
+
throw new Error(`Auth configuration validation failed:
|
|
987
|
+
${errors.join("\n")}`);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
function warnHttpUsage(authConfig, port) {
|
|
991
|
+
if (!authConfig?.enabled) {
|
|
992
|
+
return;
|
|
993
|
+
}
|
|
994
|
+
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
995
|
+
process.env.HOSTNAME?.includes("localhost");
|
|
996
|
+
if (!isLocalhost) {
|
|
997
|
+
logger.warn(
|
|
998
|
+
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
999
|
+
);
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
function createEventServices() {
|
|
1003
|
+
const eventBus = new EventBusService();
|
|
1004
|
+
const telemetryService = new TelemetryService(eventBus);
|
|
1005
|
+
return { eventBus, telemetryService };
|
|
1006
|
+
}
|
|
1007
|
+
function resolveEmbeddingContext(embeddingModel) {
|
|
1008
|
+
try {
|
|
1009
|
+
const modelSpec = embeddingModel;
|
|
1010
|
+
if (!modelSpec) {
|
|
1011
|
+
logger.debug("No embedding model specified. Embeddings are disabled.");
|
|
1012
|
+
return null;
|
|
1013
|
+
}
|
|
1014
|
+
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
1015
|
+
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
1016
|
+
} catch (error) {
|
|
1017
|
+
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
1018
|
+
return null;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
const utils = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
1022
|
+
__proto__: null,
|
|
1023
|
+
createAppServerConfig,
|
|
1024
|
+
createEventServices,
|
|
1025
|
+
ensurePlaywrightBrowsersInstalled,
|
|
1026
|
+
getEventBus,
|
|
1027
|
+
getGlobalOptions,
|
|
1028
|
+
parseAuthConfig,
|
|
1029
|
+
parseHeaders,
|
|
1030
|
+
resolveEmbeddingContext,
|
|
1031
|
+
resolveProtocol,
|
|
1032
|
+
validateAuthConfig,
|
|
1033
|
+
validateHost,
|
|
1034
|
+
validatePort,
|
|
1035
|
+
validateResumeFlag,
|
|
1036
|
+
warnHttpUsage
|
|
1037
|
+
}, Symbol.toStringTag, { value: "Module" }));
|
|
1038
|
+
export {
|
|
1039
|
+
EmbeddingConfig as E,
|
|
1040
|
+
PipelineJobStatus as P,
|
|
1041
|
+
ServerEventName as S,
|
|
1042
|
+
TelemetryEvent as T,
|
|
1043
|
+
EventType as a,
|
|
1044
|
+
getEventBus as b,
|
|
1045
|
+
createAppServerConfig as c,
|
|
1046
|
+
parseHeaders as d,
|
|
1047
|
+
ensurePlaywrightBrowsersInstalled as e,
|
|
1048
|
+
validatePort as f,
|
|
1049
|
+
getProjectRoot as g,
|
|
1050
|
+
validateHost as h,
|
|
1051
|
+
resolveStorePath as i,
|
|
1052
|
+
initTelemetry as j,
|
|
1053
|
+
EventBusService as k,
|
|
1054
|
+
TelemetryService as l,
|
|
1055
|
+
parseAuthConfig as p,
|
|
1056
|
+
resolveProtocol as r,
|
|
1057
|
+
shouldEnableTelemetry as s,
|
|
1058
|
+
telemetry as t,
|
|
1059
|
+
utils as u,
|
|
1060
|
+
validateAuthConfig as v,
|
|
1061
|
+
warnHttpUsage as w
|
|
1062
|
+
};
|
|
1063
|
+
//# sourceMappingURL=utils-CZz1DsHw.js.map
|