@arabold/docs-mcp-server 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1062 @@
1
+ import { execSync } from "node:child_process";
2
+ import fs, { existsSync } from "node:fs";
3
+ import { chromium } from "playwright";
4
+ import EventEmitter from "node:events";
5
+ import { l as logger, n as normalizeEnvValue } from "./logger-CLtABTNb.js";
6
+ import { randomUUID } from "node:crypto";
7
+ import path from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+ import envPaths from "env-paths";
10
+ import { PostHog } from "posthog-node";
11
+ let projectRoot = null;
12
+ function getProjectRoot() {
13
+ if (projectRoot) {
14
+ return projectRoot;
15
+ }
16
+ const currentFilePath = fileURLToPath(import.meta.url);
17
+ let currentDir = path.dirname(currentFilePath);
18
+ while (true) {
19
+ const packageJsonPath = path.join(currentDir, "package.json");
20
+ if (fs.existsSync(packageJsonPath)) {
21
+ projectRoot = currentDir;
22
+ return currentDir;
23
+ }
24
+ const parentDir = path.dirname(currentDir);
25
+ if (parentDir === currentDir) {
26
+ throw new Error("Could not find project root containing package.json.");
27
+ }
28
+ currentDir = parentDir;
29
+ }
30
+ }
31
+ function resolveStorePath(storePath) {
32
+ let dbDir;
33
+ if (storePath) {
34
+ dbDir = path.resolve(storePath);
35
+ } else {
36
+ const projectRoot2 = getProjectRoot();
37
+ const oldDbDir = path.join(projectRoot2, ".store");
38
+ const oldDbPath = path.join(oldDbDir, "documents.db");
39
+ const oldDbExists = fs.existsSync(oldDbPath);
40
+ if (oldDbExists) {
41
+ dbDir = oldDbDir;
42
+ } else {
43
+ const standardPaths = envPaths("docs-mcp-server", { suffix: "" });
44
+ dbDir = standardPaths.data;
45
+ }
46
+ }
47
+ try {
48
+ fs.mkdirSync(dbDir, { recursive: true });
49
+ } catch (error) {
50
+ logger.warn(`⚠️ Failed to create database directory ${dbDir}: ${error}`);
51
+ }
52
+ return dbDir;
53
+ }
54
+ class TelemetryConfig {
55
+ static instance;
56
+ enabled = true;
57
+ // Default to enabled
58
+ constructor() {
59
+ }
60
+ isEnabled() {
61
+ return this.enabled;
62
+ }
63
+ setEnabled(enabled) {
64
+ this.enabled = enabled;
65
+ }
66
+ static getInstance() {
67
+ if (!TelemetryConfig.instance) {
68
+ TelemetryConfig.instance = new TelemetryConfig();
69
+ }
70
+ return TelemetryConfig.instance;
71
+ }
72
+ }
73
+ function generateInstallationId(storePath) {
74
+ try {
75
+ const dataDir = resolveStorePath(storePath);
76
+ const installationIdPath = path.join(dataDir, "installation.id");
77
+ if (fs.existsSync(installationIdPath)) {
78
+ const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
79
+ if (existingId) {
80
+ return existingId;
81
+ }
82
+ }
83
+ const newId = randomUUID();
84
+ fs.mkdirSync(dataDir, { recursive: true });
85
+ fs.writeFileSync(installationIdPath, newId, "utf8");
86
+ return newId;
87
+ } catch {
88
+ return randomUUID();
89
+ }
90
+ }
91
+ function shouldEnableTelemetry() {
92
+ return TelemetryConfig.getInstance().isEnabled();
93
+ }
94
+ var EventType = /* @__PURE__ */ ((EventType2) => {
95
+ EventType2["JOB_STATUS_CHANGE"] = "JOB_STATUS_CHANGE";
96
+ EventType2["JOB_PROGRESS"] = "JOB_PROGRESS";
97
+ EventType2["LIBRARY_CHANGE"] = "LIBRARY_CHANGE";
98
+ EventType2["JOB_LIST_CHANGE"] = "JOB_LIST_CHANGE";
99
+ return EventType2;
100
+ })(EventType || {});
101
+ const ServerEventName = {
102
+ [
103
+ "JOB_STATUS_CHANGE"
104
+ /* JOB_STATUS_CHANGE */
105
+ ]: "job-status-change",
106
+ [
107
+ "JOB_PROGRESS"
108
+ /* JOB_PROGRESS */
109
+ ]: "job-progress",
110
+ [
111
+ "LIBRARY_CHANGE"
112
+ /* LIBRARY_CHANGE */
113
+ ]: "library-change",
114
+ [
115
+ "JOB_LIST_CHANGE"
116
+ /* JOB_LIST_CHANGE */
117
+ ]: "job-list-change"
118
+ };
119
+ var PipelineJobStatus = /* @__PURE__ */ ((PipelineJobStatus2) => {
120
+ PipelineJobStatus2["QUEUED"] = "queued";
121
+ PipelineJobStatus2["RUNNING"] = "running";
122
+ PipelineJobStatus2["COMPLETED"] = "completed";
123
+ PipelineJobStatus2["FAILED"] = "failed";
124
+ PipelineJobStatus2["CANCELLING"] = "cancelling";
125
+ PipelineJobStatus2["CANCELLED"] = "cancelled";
126
+ return PipelineJobStatus2;
127
+ })(PipelineJobStatus || {});
128
+ function camelToSnakeCase(str) {
129
+ return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
130
+ }
131
+ function convertPropertiesToSnakeCase(obj) {
132
+ const result = {};
133
+ for (const [key, value] of Object.entries(obj)) {
134
+ const snakeKey = camelToSnakeCase(key);
135
+ if (value && typeof value === "object" && !Array.isArray(value) && !(value instanceof Date)) {
136
+ result[snakeKey] = convertPropertiesToSnakeCase(value);
137
+ } else if (Array.isArray(value)) {
138
+ result[snakeKey] = value.map(
139
+ (item) => item && typeof item === "object" && !(item instanceof Date) ? convertPropertiesToSnakeCase(item) : item
140
+ );
141
+ } else {
142
+ result[snakeKey] = value;
143
+ }
144
+ }
145
+ return result;
146
+ }
147
+ function addPostHogStandardProperties(properties) {
148
+ const result = { ...properties };
149
+ if (properties.sessionId) {
150
+ result.$session_id = properties.sessionId;
151
+ delete result.sessionId;
152
+ }
153
+ if (properties.startTime) {
154
+ result.$start_timestamp = properties.startTime.toISOString();
155
+ delete result.startTime;
156
+ }
157
+ if (properties.appVersion) {
158
+ result.$app_version = properties.appVersion;
159
+ delete result.appVersion;
160
+ }
161
+ return result;
162
+ }
163
+ class PostHogClient {
164
+ client;
165
+ enabled;
166
+ // PostHog configuration
167
+ static CONFIG = {
168
+ host: "https://app.posthog.com",
169
+ // Performance optimizations
170
+ flushAt: 20,
171
+ // Batch size - send after 20 events
172
+ flushInterval: 1e4,
173
+ // 10 seconds - send after time
174
+ // Privacy settings
175
+ disableGeoip: true,
176
+ // Don't collect IP geolocation
177
+ disableSessionRecording: true,
178
+ // Never record sessions
179
+ disableSurveys: true,
180
+ // No user surveys
181
+ // Data handling
182
+ persistence: "memory"
183
+ // No disk persistence for privacy
184
+ };
185
+ constructor(enabled) {
186
+ this.enabled = enabled;
187
+ if (!this.enabled) {
188
+ return;
189
+ }
190
+ try {
191
+ this.client = new PostHog("phc_g7pXZZdUiAQXdnwUANjloQWMvO0amEDTBaeDSWgXgrQ", {
192
+ host: PostHogClient.CONFIG.host,
193
+ flushAt: PostHogClient.CONFIG.flushAt,
194
+ flushInterval: PostHogClient.CONFIG.flushInterval,
195
+ disableGeoip: PostHogClient.CONFIG.disableGeoip
196
+ });
197
+ logger.debug("PostHog client initialized");
198
+ } catch (error) {
199
+ logger.debug(
200
+ `PostHog initialization failed: ${error instanceof Error ? error.message : "Unknown error"}`
201
+ );
202
+ this.enabled = false;
203
+ }
204
+ }
205
+ /**
206
+ * Send event to PostHog
207
+ */
208
+ capture(distinctId, event, properties) {
209
+ if (!this.enabled || !this.client) return;
210
+ try {
211
+ const enhancedProperties = addPostHogStandardProperties(properties);
212
+ const snakeCaseProperties = convertPropertiesToSnakeCase(enhancedProperties);
213
+ this.client.capture({
214
+ distinctId,
215
+ event,
216
+ properties: snakeCaseProperties
217
+ });
218
+ logger.debug(`PostHog event captured: ${event}`);
219
+ } catch (error) {
220
+ logger.debug(
221
+ `PostHog capture error: ${error instanceof Error ? error.message : "Unknown error"}`
222
+ );
223
+ }
224
+ }
225
+ /**
226
+ * Capture exception using PostHog's native error tracking
227
+ */
228
+ captureException(distinctId, error, properties) {
229
+ if (!this.enabled || !this.client) return;
230
+ try {
231
+ const enhancedProperties = addPostHogStandardProperties(properties || {});
232
+ const snakeCaseProperties = convertPropertiesToSnakeCase(enhancedProperties);
233
+ this.client.captureException({
234
+ error,
235
+ distinctId,
236
+ properties: snakeCaseProperties
237
+ });
238
+ logger.debug(`PostHog exception captured: ${error.constructor.name}`);
239
+ } catch (captureError) {
240
+ logger.debug(
241
+ `PostHog captureException error: ${captureError instanceof Error ? captureError.message : "Unknown error"}`
242
+ );
243
+ }
244
+ }
245
+ /**
246
+ * Graceful shutdown with event flushing
247
+ */
248
+ async shutdown() {
249
+ if (this.client) {
250
+ try {
251
+ await this.client.shutdown();
252
+ logger.debug("PostHog client shutdown complete");
253
+ } catch (error) {
254
+ logger.debug(
255
+ `PostHog shutdown error: ${error instanceof Error ? error.message : "Unknown error"}`
256
+ );
257
+ }
258
+ }
259
+ }
260
+ /**
261
+ * Check if client is enabled and ready
262
+ */
263
+ isEnabled() {
264
+ return this.enabled && !!this.client;
265
+ }
266
+ }
267
+ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
268
+ TelemetryEvent2["APP_STARTED"] = "app_started";
269
+ TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
270
+ TelemetryEvent2["CLI_COMMAND"] = "cli_command";
271
+ TelemetryEvent2["TOOL_USED"] = "tool_used";
272
+ TelemetryEvent2["PIPELINE_JOB_STARTED"] = "pipeline_job_started";
273
+ TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
274
+ TelemetryEvent2["PIPELINE_JOB_FAILED"] = "pipeline_job_failed";
275
+ return TelemetryEvent2;
276
+ })(TelemetryEvent || {});
277
+ class Telemetry {
278
+ postHogClient;
279
+ enabled;
280
+ distinctId;
281
+ globalContext = {};
282
+ /**
283
+ * Create a new Telemetry instance with proper initialization
284
+ * This is the recommended way to create Telemetry instances
285
+ */
286
+ static create() {
287
+ const config = TelemetryConfig.getInstance();
288
+ const shouldEnable = config.isEnabled() && true;
289
+ const telemetry2 = new Telemetry(shouldEnable);
290
+ if (telemetry2.isEnabled()) {
291
+ logger.debug("Telemetry enabled");
292
+ } else if (!config.isEnabled()) {
293
+ logger.debug("Telemetry disabled (user preference)");
294
+ } else {
295
+ logger.debug("Telemetry disabled");
296
+ }
297
+ return telemetry2;
298
+ }
299
+ /**
300
+ * Private constructor - use Telemetry.create() instead
301
+ */
302
+ constructor(enabled = true) {
303
+ this.enabled = enabled;
304
+ this.distinctId = generateInstallationId();
305
+ this.postHogClient = new PostHogClient(this.enabled);
306
+ }
307
+ /**
308
+ * Set global application context that will be included in all events
309
+ */
310
+ setGlobalContext(context) {
311
+ this.globalContext = { ...context };
312
+ }
313
+ /**
314
+ * Get current global context
315
+ */
316
+ getGlobalContext() {
317
+ return { ...this.globalContext };
318
+ }
319
+ track(event, properties = {}) {
320
+ if (!this.enabled) return;
321
+ const enrichedProperties = {
322
+ ...this.globalContext,
323
+ ...properties,
324
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
325
+ };
326
+ logger.debug(`Tracking event: ${event}`);
327
+ this.postHogClient.capture(this.distinctId, event, enrichedProperties);
328
+ }
329
+ /**
330
+ * Capture exception using PostHog's native error tracking with global context
331
+ */
332
+ captureException(error, properties = {}) {
333
+ if (!this.enabled) return;
334
+ const enrichedProperties = {
335
+ ...this.globalContext,
336
+ ...properties,
337
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
338
+ };
339
+ logger.debug(
340
+ `Capturing exception: ${error instanceof Error ? error.message : String(error)}`
341
+ );
342
+ this.postHogClient.captureException(
343
+ this.distinctId,
344
+ error instanceof Error ? error : new Error(String(error)),
345
+ enrichedProperties
346
+ );
347
+ }
348
+ /**
349
+ * Graceful shutdown with event flushing
350
+ */
351
+ async shutdown() {
352
+ if (!this.enabled) return;
353
+ await this.postHogClient.shutdown();
354
+ }
355
+ /**
356
+ * Check if telemetry is enabled
357
+ */
358
+ isEnabled() {
359
+ return this.enabled;
360
+ }
361
+ }
362
+ let telemetryInstance = null;
363
+ function getTelemetryInstance() {
364
+ if (!telemetryInstance) {
365
+ telemetryInstance = Telemetry.create();
366
+ }
367
+ return telemetryInstance;
368
+ }
369
+ function initTelemetry(options) {
370
+ TelemetryConfig.getInstance().setEnabled(options.enabled);
371
+ generateInstallationId(options.storePath);
372
+ telemetryInstance = Telemetry.create();
373
+ }
374
+ const telemetry = new Proxy({}, {
375
+ get(_target, prop) {
376
+ const instance = getTelemetryInstance();
377
+ const value = instance[prop];
378
+ if (typeof value === "function") {
379
+ return value.bind(instance);
380
+ }
381
+ return value;
382
+ }
383
+ });
384
+ class TelemetryService {
385
+ eventBus;
386
+ unsubscribers = [];
387
+ constructor(eventBus) {
388
+ this.eventBus = eventBus;
389
+ this.setupEventListeners();
390
+ }
391
+ /**
392
+ * Sets up event listeners for pipeline events.
393
+ */
394
+ setupEventListeners() {
395
+ const unsubStatusChange = this.eventBus.on(
396
+ EventType.JOB_STATUS_CHANGE,
397
+ this.handleJobStatusChange.bind(this)
398
+ );
399
+ const unsubProgress = this.eventBus.on(
400
+ EventType.JOB_PROGRESS,
401
+ this.handleJobProgress.bind(this)
402
+ );
403
+ this.unsubscribers.push(unsubStatusChange, unsubProgress);
404
+ logger.debug("TelemetryService initialized and listening to events");
405
+ }
406
+ /**
407
+ * Handles job status change events and tracks them to analytics.
408
+ * Only tracks events for meaningful state transitions: started, completed, and failed.
409
+ */
410
+ handleJobStatusChange(job) {
411
+ const duration = job.startedAt ? Date.now() - job.startedAt.getTime() : null;
412
+ const queueWaitTime = job.startedAt && job.createdAt ? job.startedAt.getTime() - job.createdAt.getTime() : null;
413
+ switch (job.status) {
414
+ case PipelineJobStatus.RUNNING:
415
+ telemetry.track(TelemetryEvent.PIPELINE_JOB_STARTED, {
416
+ jobId: job.id,
417
+ library: job.library,
418
+ hasVersion: !!job.version,
419
+ maxPagesConfigured: job.progressMaxPages || 0,
420
+ queueWaitTimeMs: queueWaitTime
421
+ });
422
+ break;
423
+ case PipelineJobStatus.COMPLETED:
424
+ telemetry.track(TelemetryEvent.PIPELINE_JOB_COMPLETED, {
425
+ jobId: job.id,
426
+ library: job.library,
427
+ durationMs: duration,
428
+ pagesProcessed: job.progressPages || 0,
429
+ maxPagesConfigured: job.progressMaxPages || 0,
430
+ hasVersion: !!job.version,
431
+ throughputPagesPerSecond: duration && job.progressPages ? Math.round(job.progressPages / duration * 1e3) : 0
432
+ });
433
+ break;
434
+ case PipelineJobStatus.FAILED:
435
+ telemetry.track(TelemetryEvent.PIPELINE_JOB_FAILED, {
436
+ jobId: job.id,
437
+ library: job.library,
438
+ durationMs: duration,
439
+ pagesProcessed: job.progressPages || 0,
440
+ maxPagesConfigured: job.progressMaxPages || 0,
441
+ hasVersion: !!job.version,
442
+ hasError: !!job.error,
443
+ errorMessage: job.error?.message
444
+ });
445
+ break;
446
+ }
447
+ }
448
+ /**
449
+ * Handles job progress events. Currently a no-op but can be extended
450
+ * for progress-specific telemetry tracking.
451
+ */
452
+ handleJobProgress(_event) {
453
+ }
454
+ /**
455
+ * Cleans up event listeners.
456
+ */
457
+ shutdown() {
458
+ for (const unsubscribe of this.unsubscribers) {
459
+ unsubscribe();
460
+ }
461
+ this.unsubscribers = [];
462
+ logger.debug("TelemetryService shut down");
463
+ }
464
+ }
465
+ class EventBusService {
466
+ emitter;
467
+ constructor() {
468
+ this.emitter = new EventEmitter();
469
+ this.emitter.setMaxListeners(100);
470
+ }
471
+ /**
472
+ * Emit an event to all subscribers.
473
+ */
474
+ emit(eventType, payload) {
475
+ logger.debug(`Event emitted: ${eventType}`);
476
+ this.emitter.emit(eventType, payload);
477
+ }
478
+ /**
479
+ * Subscribe to events of a specific type.
480
+ * Returns an unsubscribe function.
481
+ */
482
+ on(eventType, listener) {
483
+ this.emitter.on(eventType, listener);
484
+ return () => this.emitter.off(eventType, listener);
485
+ }
486
+ /**
487
+ * Subscribe to events once (auto-unsubscribes after first event).
488
+ */
489
+ once(eventType, listener) {
490
+ this.emitter.once(eventType, listener);
491
+ }
492
+ /**
493
+ * Remove a specific listener.
494
+ */
495
+ off(eventType, listener) {
496
+ this.emitter.off(eventType, listener);
497
+ }
498
+ /**
499
+ * Remove all listeners for a specific event type, or all listeners if no type specified.
500
+ */
501
+ removeAllListeners(eventType) {
502
+ if (eventType === void 0) {
503
+ this.emitter.removeAllListeners();
504
+ } else {
505
+ this.emitter.removeAllListeners(eventType);
506
+ }
507
+ }
508
+ /**
509
+ * Get the count of listeners for a specific event type.
510
+ */
511
+ listenerCount(eventType) {
512
+ return this.emitter.listenerCount(eventType);
513
+ }
514
+ }
515
+ class EmbeddingConfig {
516
+ static instance = null;
517
+ /**
518
+ * Get the singleton instance of EmbeddingConfig.
519
+ * Creates the instance if it doesn't exist.
520
+ */
521
+ static getInstance() {
522
+ if (EmbeddingConfig.instance === null) {
523
+ EmbeddingConfig.instance = new EmbeddingConfig();
524
+ }
525
+ return EmbeddingConfig.instance;
526
+ }
527
+ /**
528
+ * Reset the singleton instance (useful for testing).
529
+ */
530
+ static resetInstance() {
531
+ EmbeddingConfig.instance = null;
532
+ }
533
+ /**
534
+ * Known dimensions for common embedding models.
535
+ * This avoids expensive API calls for dimension detection in telemetry.
536
+ *
537
+ * Note: The "openai" provider also supports OpenAI-compatible APIs like:
538
+ * - Ollama (local models)
539
+ * - LMStudio (local models)
540
+ * - Any service implementing OpenAI's embedding API
541
+ */
542
+ knownModelDimensions = {
543
+ // OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
544
+ "text-embedding-3-small": 1536,
545
+ "text-embedding-3-large": 3072,
546
+ "text-embedding-ada-002": 1536,
547
+ // Google Vertex AI models
548
+ "text-embedding-004": 768,
549
+ "textembedding-gecko@003": 768,
550
+ "textembedding-gecko@002": 768,
551
+ "textembedding-gecko@001": 768,
552
+ // Google Gemini models (with MRL support)
553
+ "text-embedding-preview-0409": 768,
554
+ "embedding-001": 768,
555
+ // AWS Bedrock models
556
+ // Amazon Titan models
557
+ "amazon.titan-embed-text-v1": 1536,
558
+ "amazon.titan-embed-text-v2:0": 1024,
559
+ "amazon.titan-embed-image-v1": 1024,
560
+ // Image embedding model
561
+ // Cohere models
562
+ "cohere.embed-english-v3": 1024,
563
+ "cohere.embed-multilingual-v3": 1024,
564
+ // SageMaker models (hosted on AWS SageMaker)
565
+ "intfloat/multilingual-e5-large": 1024,
566
+ // Additional AWS models that might be supported
567
+ // Note: Some of these might be placeholders - verify dimensions before use
568
+ // "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
569
+ // MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
570
+ // Top performing models from Massive Text Embedding Benchmark
571
+ "sentence-transformers/all-MiniLM-L6-v2": 384,
572
+ "gemini-embedding-001": 3072,
573
+ "Qwen/Qwen3-Embedding-8B": 4096,
574
+ "Qwen/Qwen3-Embedding-4B": 2560,
575
+ "Qwen/Qwen3-Embedding-0.6B": 1024,
576
+ "Linq-AI-Research/Linq-Embed-Mistral": 4096,
577
+ "Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
578
+ "intfloat/multilingual-e5-large-instruct": 1024,
579
+ "Salesforce/SFR-Embedding-Mistral": 4096,
580
+ "text-multilingual-embedding-002": 768,
581
+ "GritLM/GritLM-7B": 4096,
582
+ "GritLM/GritLM-8x7B": 4096,
583
+ "intfloat/e5-mistral-7b-instruct": 4096,
584
+ "Cohere/Cohere-embed-multilingual-v3.0": 1024,
585
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
586
+ "Lajavaness/bilingual-embedding-large": 1024,
587
+ "Salesforce/SFR-Embedding-2_R": 4096,
588
+ "NovaSearch/stella_en_1.5B_v5": 8960,
589
+ "NovaSearch/jasper_en_vision_language_v1": 8960,
590
+ "nvidia/NV-Embed-v2": 4096,
591
+ "OrdalieTech/Solon-embeddings-large-0.1": 1024,
592
+ "BAAI/bge-m3": 1024,
593
+ "HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
594
+ "jinaai/jina-embeddings-v3": 1024,
595
+ "Alibaba-NLP/gte-multilingual-base": 768,
596
+ "Lajavaness/bilingual-embedding-base": 768,
597
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
598
+ "nvidia/NV-Embed-v1": 4096,
599
+ "Cohere/Cohere-embed-multilingual-light-v3.0": 384,
600
+ "manu/bge-m3-custom-fr": 1024,
601
+ "Lajavaness/bilingual-embedding-small": 384,
602
+ "Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
603
+ "intfloat/multilingual-e5-base": 768,
604
+ "voyage-3-lite": 512,
605
+ "voyage-3": 1024,
606
+ "intfloat/multilingual-e5-small": 384,
607
+ "Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
608
+ "Snowflake/snowflake-arctic-embed-m-v2.0": 768,
609
+ "deepvk/USER-bge-m3": 1024,
610
+ "Cohere/Cohere-embed-english-v3.0": 1024,
611
+ "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
612
+ "ibm-granite/granite-embedding-278m-multilingual": 768,
613
+ "NovaSearch/stella_en_400M_v5": 4096,
614
+ "omarelshehy/arabic-english-sts-matryoshka": 1024,
615
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
616
+ "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
617
+ "Haon-Chen/speed-embedding-7b-instruct": 4096,
618
+ "sentence-transformers/LaBSE": 768,
619
+ "WhereIsAI/UAE-Large-V1": 1024,
620
+ "ibm-granite/granite-embedding-107m-multilingual": 384,
621
+ "mixedbread-ai/mxbai-embed-large-v1": 1024,
622
+ "intfloat/e5-large-v2": 1024,
623
+ "avsolatorio/GIST-large-Embedding-v0": 1024,
624
+ "sdadas/mmlw-e5-large": 1024,
625
+ "nomic-ai/nomic-embed-text-v1": 768,
626
+ "nomic-ai/nomic-embed-text-v1-ablated": 768,
627
+ "intfloat/e5-base-v2": 768,
628
+ "BAAI/bge-large-en-v1.5": 1024,
629
+ "intfloat/e5-large": 1024,
630
+ "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
631
+ "Cohere/Cohere-embed-english-light-v3.0": 384,
632
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
633
+ "Gameselo/STS-multilingual-mpnet-base-v2": 768,
634
+ "thenlper/gte-large": 1024,
635
+ "avsolatorio/GIST-Embedding-v0": 768,
636
+ "nomic-ai/nomic-embed-text-v1-unsupervised": 768,
637
+ "infgrad/stella-base-en-v2": 768,
638
+ "avsolatorio/NoInstruct-small-Embedding-v0": 384,
639
+ "dwzhu/e5-base-4k": 768,
640
+ "sdadas/mmlw-e5-base": 768,
641
+ "voyage-multilingual-2": 1024,
642
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
643
+ "BAAI/bge-base-en-v1.5": 768,
644
+ "avsolatorio/GIST-small-Embedding-v0": 384,
645
+ "sdadas/mmlw-roberta-large": 1024,
646
+ "nomic-ai/nomic-embed-text-v1.5": 768,
647
+ "minishlab/potion-multilingual-128M": 256,
648
+ "shibing624/text2vec-base-multilingual": 384,
649
+ "thenlper/gte-base": 768,
650
+ "intfloat/e5-small-v2": 384,
651
+ "intfloat/e5-base": 768,
652
+ "sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
653
+ "manu/sentence_croissant_alpha_v0.3": 2048,
654
+ "BAAI/bge-small-en-v1.5": 512,
655
+ "thenlper/gte-small": 384,
656
+ "sdadas/mmlw-e5-small": 384,
657
+ "manu/sentence_croissant_alpha_v0.4": 2048,
658
+ "manu/sentence_croissant_alpha_v0.2": 2048,
659
+ "abhinand/MedEmbed-small-v0.1": 384,
660
+ "ibm-granite/granite-embedding-125m-english": 768,
661
+ "intfloat/e5-small": 384,
662
+ "voyage-large-2-instruct": 1024,
663
+ "sdadas/mmlw-roberta-base": 768,
664
+ "Snowflake/snowflake-arctic-embed-l": 1024,
665
+ "Mihaiii/Ivysaur": 384,
666
+ "Snowflake/snowflake-arctic-embed-m-long": 768,
667
+ "bigscience/sgpt-bloom-7b1-msmarco": 4096,
668
+ "avsolatorio/GIST-all-MiniLM-L6-v2": 384,
669
+ "sergeyzh/LaBSE-ru-turbo": 768,
670
+ "sentence-transformers/all-mpnet-base-v2": 768,
671
+ "Snowflake/snowflake-arctic-embed-m": 768,
672
+ "Snowflake/snowflake-arctic-embed-s": 384,
673
+ "sentence-transformers/all-MiniLM-L12-v2": 384,
674
+ "Mihaiii/gte-micro-v4": 384,
675
+ "Snowflake/snowflake-arctic-embed-m-v1.5": 768,
676
+ "cointegrated/LaBSE-en-ru": 768,
677
+ "Mihaiii/Bulbasaur": 384,
678
+ "ibm-granite/granite-embedding-30m-english": 384,
679
+ "deepfile/embedder-100p": 768,
680
+ "Jaume/gemma-2b-embeddings": 2048,
681
+ "OrlikB/KartonBERT-USE-base-v1": 768,
682
+ "izhx/udever-bloom-7b1": 4096,
683
+ "izhx/udever-bloom-1b1": 1024,
684
+ "brahmairesearch/slx-v0.1": 384,
685
+ "Mihaiii/Wartortle": 384,
686
+ "izhx/udever-bloom-3b": 2048,
687
+ "deepvk/USER-base": 768,
688
+ "ai-forever/ru-en-RoSBERTa": 1024,
689
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
690
+ "Mihaiii/Venusaur": 384,
691
+ "Snowflake/snowflake-arctic-embed-xs": 384,
692
+ "jinaai/jina-embedding-b-en-v1": 768,
693
+ "Mihaiii/gte-micro": 384,
694
+ "aari1995/German_Semantic_STS_V2": 1024,
695
+ "Mihaiii/Squirtle": 384,
696
+ "OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
697
+ "sergeyzh/rubert-tiny-turbo": 312,
698
+ "minishlab/potion-base-8M": 256,
699
+ "minishlab/M2V_base_glove_subword": 256,
700
+ "jinaai/jina-embedding-s-en-v1": 512,
701
+ "minishlab/potion-base-4M": 128,
702
+ "minishlab/M2V_base_output": 256,
703
+ "DeepPavlov/rubert-base-cased-sentence": 768,
704
+ "jinaai/jina-embeddings-v2-small-en": 512,
705
+ "cointegrated/rubert-tiny2": 312,
706
+ "minishlab/M2V_base_glove": 256,
707
+ "cointegrated/rubert-tiny": 312,
708
+ "silma-ai/silma-embeddding-matryoshka-v0.1": 768,
709
+ "DeepPavlov/rubert-base-cased": 768,
710
+ "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
711
+ "izhx/udever-bloom-560m": 1024,
712
+ "minishlab/potion-base-2M": 64,
713
+ "DeepPavlov/distilrubert-small-cased-conversational": 768,
714
+ "consciousAI/cai-lunaris-text-embeddings": 1024,
715
+ "deepvk/deberta-v1-base": 768,
716
+ "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
717
+ "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
718
+ "ai-forever/sbert_large_mt_nlu_ru": 1024,
719
+ "ai-forever/sbert_large_nlu_ru": 1024,
720
+ "malenia1/ternary-weight-embedding": 1024,
721
+ "jinaai/jina-embeddings-v2-base-en": 768,
722
+ "VPLabs/SearchMap_Preview": 4096,
723
+ "Hum-Works/lodestone-base-4096-v1": 768,
724
+ "jinaai/jina-embeddings-v4": 2048
725
+ };
726
+ /**
727
+ * Lowercase lookup map for case-insensitive model dimension queries.
728
+ * Built lazily from knownModelDimensions to ensure consistency.
729
+ */
730
+ modelLookup;
731
+ constructor() {
732
+ this.modelLookup = /* @__PURE__ */ new Map();
733
+ for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
734
+ this.modelLookup.set(model.toLowerCase(), dimensions);
735
+ }
736
+ }
737
+ /**
738
+ * Parse embedding model configuration from a provided model specification.
739
+ * This is a synchronous operation that extracts provider, model, and known dimensions.
740
+ *
741
+ * Supports various providers:
742
+ * - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
743
+ * - vertex: Google Cloud Vertex AI
744
+ * - gemini: Google Generative AI
745
+ * - aws: AWS Bedrock models
746
+ * - microsoft: Azure OpenAI
747
+ * - sagemaker: AWS SageMaker hosted models
748
+ *
749
+ * @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
750
+ * @returns Parsed embedding model configuration
751
+ */
752
+ parse(modelSpec) {
753
+ const spec = normalizeEnvValue(modelSpec || "text-embedding-3-small");
754
+ const colonIndex = spec.indexOf(":");
755
+ let provider;
756
+ let model;
757
+ if (colonIndex === -1) {
758
+ provider = "openai";
759
+ model = spec;
760
+ } else {
761
+ provider = spec.substring(0, colonIndex);
762
+ model = spec.substring(colonIndex + 1);
763
+ }
764
+ const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
765
+ return {
766
+ provider,
767
+ model,
768
+ dimensions,
769
+ modelSpec: spec
770
+ };
771
+ }
772
+ /**
773
+ * Get the known dimensions for a specific model.
774
+ * Returns null if the model dimensions are not known.
775
+ * Uses case-insensitive lookup.
776
+ *
777
+ * @param model The model name (e.g., "text-embedding-3-small")
778
+ * @returns Known dimensions or null
779
+ */
780
+ getKnownDimensions(model) {
781
+ return this.modelLookup?.get(model.toLowerCase()) || null;
782
+ }
783
+ /**
784
+ * Add or update known dimensions for a model.
785
+ * This can be used to cache discovered dimensions.
786
+ * Stores both original case and lowercase for consistent lookup.
787
+ *
788
+ * @param model The model name
789
+ * @param dimensions The dimensions to cache
790
+ */
791
+ setKnownDimensions(model, dimensions) {
792
+ this.knownModelDimensions[model] = dimensions;
793
+ if (this.modelLookup) {
794
+ this.modelLookup.set(model.toLowerCase(), dimensions);
795
+ }
796
+ }
797
+ /**
798
+ * Static method to parse embedding model configuration using the singleton instance.
799
+ * This maintains backward compatibility while using the class-based approach.
800
+ */
801
+ static parseEmbeddingConfig(modelSpec) {
802
+ return EmbeddingConfig.getInstance().parse(modelSpec);
803
+ }
804
+ /**
805
+ * Static method to get known model dimensions using the singleton instance.
806
+ * This maintains backward compatibility while using the class-based approach.
807
+ */
808
+ static getKnownModelDimensions(model) {
809
+ return EmbeddingConfig.getInstance().getKnownDimensions(model);
810
+ }
811
+ /**
812
+ * Static method to set known model dimensions using the singleton instance.
813
+ * This maintains backward compatibility while using the class-based approach.
814
+ */
815
+ static setKnownModelDimensions(model, dimensions) {
816
+ EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
817
+ }
818
+ }
819
+ function getEventBus(argv) {
820
+ const eventBus = argv._eventBus;
821
+ if (!eventBus) {
822
+ throw new Error("EventBusService not initialized");
823
+ }
824
+ return eventBus;
825
+ }
826
+ function getGlobalOptions(argv) {
827
+ return argv;
828
+ }
829
+ function ensurePlaywrightBrowsersInstalled() {
830
+ if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
831
+ logger.debug(
832
+ "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
833
+ );
834
+ return;
835
+ }
836
+ const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
837
+ if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
838
+ logger.debug(
839
+ `PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
840
+ );
841
+ return;
842
+ }
843
+ try {
844
+ const chromiumPath = chromium.executablePath();
845
+ if (!chromiumPath || !existsSync(chromiumPath)) {
846
+ throw new Error("Playwright Chromium browser not found");
847
+ }
848
+ } catch (error) {
849
+ logger.debug(String(error));
850
+ try {
851
+ logger.info(
852
+ "🌐 Installing Playwright Chromium browser... (this may take a moment)"
853
+ );
854
+ execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
855
+ stdio: "ignore",
856
+ // Suppress output
857
+ cwd: getProjectRoot()
858
+ });
859
+ } catch (_installErr) {
860
+ logger.error(
861
+ "❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
862
+ );
863
+ process.exit(1);
864
+ }
865
+ }
866
+ }
867
+ function resolveProtocol(protocol) {
868
+ if (protocol === "auto") {
869
+ if (!process.stdin.isTTY && !process.stdout.isTTY) {
870
+ return "stdio";
871
+ }
872
+ return "http";
873
+ }
874
+ if (protocol === "stdio" || protocol === "http") {
875
+ return protocol;
876
+ }
877
+ throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
878
+ }
879
+ function validateResumeFlag(resume, serverUrl) {
880
+ if (resume && serverUrl) {
881
+ throw new Error(
882
+ "--resume flag is incompatible with --server-url. External workers handle their own job recovery."
883
+ );
884
+ }
885
+ }
886
+ function validatePort(portString) {
887
+ const port = Number.parseInt(portString, 10);
888
+ if (Number.isNaN(port) || port < 1 || port > 65535) {
889
+ throw new Error("Invalid port number");
890
+ }
891
+ return port;
892
+ }
893
+ function validateHost(hostString) {
894
+ const trimmed = hostString.trim();
895
+ if (!trimmed) {
896
+ throw new Error("Host cannot be empty");
897
+ }
898
+ if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
899
+ throw new Error("Host cannot contain whitespace");
900
+ }
901
+ return trimmed;
902
+ }
903
+ function createAppServerConfig(options) {
904
+ return {
905
+ enableWebInterface: options.enableWebInterface ?? false,
906
+ enableMcpServer: options.enableMcpServer ?? true,
907
+ enableApiServer: options.enableApiServer ?? false,
908
+ enableWorker: options.enableWorker ?? true,
909
+ port: options.port,
910
+ externalWorkerUrl: options.externalWorkerUrl,
911
+ showLogo: options.showLogo ?? true,
912
+ startupContext: options.startupContext
913
+ };
914
+ }
915
+ function parseHeaders(headerOptions) {
916
+ const headers = {};
917
+ if (Array.isArray(headerOptions)) {
918
+ for (const entry of headerOptions) {
919
+ const idx = entry.indexOf(":");
920
+ if (idx > 0) {
921
+ const name = entry.slice(0, idx).trim();
922
+ const value = entry.slice(idx + 1).trim();
923
+ if (name) headers[name] = value;
924
+ }
925
+ }
926
+ }
927
+ return headers;
928
+ }
929
+ function parseAuthConfig(options) {
930
+ if (!options.authEnabled) {
931
+ return void 0;
932
+ }
933
+ return {
934
+ enabled: true,
935
+ issuerUrl: options.authIssuerUrl,
936
+ audience: options.authAudience,
937
+ scopes: ["openid", "profile"]
938
+ // Default scopes for OAuth2/OIDC
939
+ };
940
+ }
941
+ function validateAuthConfig(authConfig) {
942
+ if (!authConfig.enabled) {
943
+ return;
944
+ }
945
+ const errors = [];
946
+ if (!authConfig.issuerUrl) {
947
+ errors.push("--auth-issuer-url is required when auth is enabled");
948
+ } else {
949
+ try {
950
+ const url = new URL(authConfig.issuerUrl);
951
+ if (url.protocol !== "https:") {
952
+ errors.push("Issuer URL must use HTTPS protocol");
953
+ }
954
+ } catch {
955
+ errors.push("Issuer URL must be a valid URL");
956
+ }
957
+ }
958
+ if (!authConfig.audience) {
959
+ errors.push("--auth-audience is required when auth is enabled");
960
+ } else {
961
+ try {
962
+ const url = new URL(authConfig.audience);
963
+ if (url.protocol === "http:" && url.hostname !== "localhost") {
964
+ logger.warn(
965
+ "⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
966
+ );
967
+ }
968
+ if (url.hash) {
969
+ errors.push("Audience must not contain URL fragments");
970
+ }
971
+ } catch {
972
+ if (authConfig.audience.startsWith("urn:")) {
973
+ const urnParts = authConfig.audience.split(":");
974
+ if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
975
+ errors.push("URN audience must follow format: urn:namespace:specific-string");
976
+ }
977
+ } else {
978
+ errors.push(
979
+ "Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
980
+ );
981
+ }
982
+ }
983
+ }
984
+ if (errors.length > 0) {
985
+ throw new Error(`Auth configuration validation failed:
986
+ ${errors.join("\n")}`);
987
+ }
988
+ }
989
+ function warnHttpUsage(authConfig, port) {
990
+ if (!authConfig?.enabled) {
991
+ return;
992
+ }
993
+ const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
994
+ process.env.HOSTNAME?.includes("localhost");
995
+ if (!isLocalhost) {
996
+ logger.warn(
997
+ "⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
998
+ );
999
+ }
1000
+ }
1001
+ function createEventServices() {
1002
+ const eventBus = new EventBusService();
1003
+ const telemetryService = new TelemetryService(eventBus);
1004
+ return { eventBus, telemetryService };
1005
+ }
1006
+ function resolveEmbeddingContext(embeddingModel) {
1007
+ try {
1008
+ const modelSpec = embeddingModel;
1009
+ if (!modelSpec) {
1010
+ logger.debug("No embedding model specified. Embeddings are disabled.");
1011
+ return null;
1012
+ }
1013
+ logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
1014
+ return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
1015
+ } catch (error) {
1016
+ logger.debug(`Failed to resolve embedding configuration: ${error}`);
1017
+ return null;
1018
+ }
1019
+ }
1020
+ const utils = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
1021
+ __proto__: null,
1022
+ createAppServerConfig,
1023
+ createEventServices,
1024
+ ensurePlaywrightBrowsersInstalled,
1025
+ getEventBus,
1026
+ getGlobalOptions,
1027
+ parseAuthConfig,
1028
+ parseHeaders,
1029
+ resolveEmbeddingContext,
1030
+ resolveProtocol,
1031
+ validateAuthConfig,
1032
+ validateHost,
1033
+ validatePort,
1034
+ validateResumeFlag,
1035
+ warnHttpUsage
1036
+ }, Symbol.toStringTag, { value: "Module" }));
1037
+ export {
1038
+ EmbeddingConfig as E,
1039
+ PipelineJobStatus as P,
1040
+ ServerEventName as S,
1041
+ TelemetryEvent as T,
1042
+ EventType as a,
1043
+ getEventBus as b,
1044
+ createAppServerConfig as c,
1045
+ parseHeaders as d,
1046
+ ensurePlaywrightBrowsersInstalled as e,
1047
+ validatePort as f,
1048
+ getProjectRoot as g,
1049
+ validateHost as h,
1050
+ resolveStorePath as i,
1051
+ initTelemetry as j,
1052
+ EventBusService as k,
1053
+ TelemetryService as l,
1054
+ parseAuthConfig as p,
1055
+ resolveProtocol as r,
1056
+ shouldEnableTelemetry as s,
1057
+ telemetry as t,
1058
+ utils as u,
1059
+ validateAuthConfig as v,
1060
+ warnHttpUsage as w
1061
+ };
1062
+ //# sourceMappingURL=utils-XwXdQlZ_.js.map