@arabold/docs-mcp-server 1.28.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -19,14 +19,13 @@ import Fastify from "fastify";
19
19
  import { WebSocketServer } from "ws";
20
20
  import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
21
21
  import { createRemoteJWKSet, jwtVerify } from "jose";
22
- import { execSync } from "node:child_process";
23
- import { chromium } from "playwright";
24
22
  import { createWSClient, createTRPCClient, splitLink, httpBatchLink, wsLink, createTRPCProxyClient } from "@trpc/client";
25
23
  import superjson from "superjson";
26
24
  import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
27
25
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
28
26
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
29
27
  import { z } from "zod/v3";
28
+ import { chromium } from "playwright";
30
29
  import mime from "mime";
31
30
  import { HeaderGenerator } from "header-generator";
32
31
  import fs$1 from "node:fs/promises";
@@ -56,12 +55,13 @@ import { fastifyTRPCPlugin } from "@trpc/server/adapters/fastify";
56
55
  import { applyWSSHandler } from "@trpc/server/adapters/ws";
57
56
  import { observable } from "@trpc/server/observable";
58
57
  import { z as z$1 } from "zod";
59
- import { jsxs, jsx, Fragment } from "@kitajs/html/jsx-runtime";
58
+ import { jsx, jsxs, Fragment } from "@kitajs/html/jsx-runtime";
60
59
  import DOMPurify from "dompurify";
61
60
  import { escapeHtml } from "@kitajs/html";
62
61
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
63
62
  import { v4 } from "uuid";
64
63
  import { minimatch } from "minimatch";
64
+ import { execSync } from "node:child_process";
65
65
  class StoreError extends Error {
66
66
  constructor(message, cause) {
67
67
  super(cause ? `${message} caused by ${cause}` : message);
@@ -268,15 +268,19 @@ function createEmbeddingModel(providerAndModel) {
268
268
  if (!process.env.OPENAI_API_KEY) {
269
269
  throw new MissingCredentialsError("openai", ["OPENAI_API_KEY"]);
270
270
  }
271
+ const timeoutMs = 3e4;
271
272
  const config = {
272
273
  ...baseConfig,
273
274
  modelName: model,
274
- batchSize: 512
275
+ batchSize: 512,
275
276
  // OpenAI supports large batches
277
+ timeout: timeoutMs
276
278
  };
277
279
  const baseURL = process.env.OPENAI_API_BASE;
278
280
  if (baseURL) {
279
- config.configuration = { baseURL };
281
+ config.configuration = { baseURL, timeout: timeoutMs };
282
+ } else {
283
+ config.configuration = { timeout: timeoutMs };
280
284
  }
281
285
  return new OpenAIEmbeddings(config);
282
286
  }
@@ -1011,7 +1015,7 @@ class ProxyAuthManager {
1011
1015
  logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
1012
1016
  if (capabilities.length === 0) {
1013
1017
  logger.warn(
1014
- "⚠️ No token validation mechanisms available - authentication may fail"
1018
+ "⚠️ No token validation mechanisms available - authentication may fail"
1015
1019
  );
1016
1020
  }
1017
1021
  this.proxyProvider = new ProxyOAuthServerProvider({
@@ -1349,667 +1353,154 @@ class ProxyAuthManager {
1349
1353
  }
1350
1354
  }
1351
1355
  }
1352
- class EmbeddingConfig {
1353
- static instance = null;
1356
+ class RemoteEventProxy {
1357
+ constructor(remoteWorkerUrl, localEventBus) {
1358
+ this.remoteWorkerUrl = remoteWorkerUrl;
1359
+ this.localEventBus = localEventBus;
1360
+ }
1361
+ trpcClient = null;
1362
+ wsClient = null;
1363
+ subscription = null;
1364
+ isConnected = false;
1354
1365
  /**
1355
- * Get the singleton instance of EmbeddingConfig.
1356
- * Creates the instance if it doesn't exist.
1366
+ * Start subscribing to remote events and forwarding them locally.
1357
1367
  */
1358
- static getInstance() {
1359
- if (EmbeddingConfig.instance === null) {
1360
- EmbeddingConfig.instance = new EmbeddingConfig();
1368
+ async connect() {
1369
+ if (this.isConnected) {
1370
+ logger.warn("Remote event proxy already connected");
1371
+ return;
1372
+ }
1373
+ logger.debug(`Connecting to remote worker at ${this.remoteWorkerUrl}`);
1374
+ try {
1375
+ const url = new URL(this.remoteWorkerUrl);
1376
+ const baseUrl = `${url.protocol}//${url.host}`;
1377
+ const wsUrl = baseUrl.replace(/^http/, "ws");
1378
+ this.wsClient = createWSClient({
1379
+ url: wsUrl
1380
+ });
1381
+ this.trpcClient = createTRPCClient({
1382
+ links: [
1383
+ splitLink({
1384
+ condition: (op) => op.type === "subscription",
1385
+ true: wsLink({ client: this.wsClient, transformer: superjson }),
1386
+ false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
1387
+ })
1388
+ ]
1389
+ });
1390
+ this.subscription = this.trpcClient.events.subscribe.subscribe(
1391
+ {},
1392
+ // Subscribe to all event types
1393
+ {
1394
+ onData: (data) => {
1395
+ logger.debug(`Received remote event: ${data.type}`);
1396
+ this.localEventBus.emit(data.type, data.payload);
1397
+ },
1398
+ onError: (error) => {
1399
+ logger.error(`❌ Remote event subscription error: ${error}`);
1400
+ this.isConnected = false;
1401
+ this.scheduleReconnect();
1402
+ },
1403
+ onStarted: () => {
1404
+ logger.debug("Remote event subscription started");
1405
+ this.isConnected = true;
1406
+ },
1407
+ onComplete: () => {
1408
+ logger.debug("Remote event subscription completed");
1409
+ this.isConnected = false;
1410
+ }
1411
+ }
1412
+ );
1413
+ } catch (error) {
1414
+ logger.error(`❌ Failed to connect to remote worker: ${error}`);
1415
+ this.scheduleReconnect();
1361
1416
  }
1362
- return EmbeddingConfig.instance;
1363
1417
  }
1364
1418
  /**
1365
- * Reset the singleton instance (useful for testing).
1419
+ * Disconnect from the remote worker and stop forwarding events.
1366
1420
  */
1367
- static resetInstance() {
1368
- EmbeddingConfig.instance = null;
1421
+ disconnect() {
1422
+ if (this.subscription) {
1423
+ this.subscription.unsubscribe();
1424
+ this.subscription = null;
1425
+ }
1426
+ if (this.wsClient) {
1427
+ this.wsClient.close();
1428
+ this.wsClient = null;
1429
+ }
1430
+ this.isConnected = false;
1431
+ logger.info("🚫 Disconnected from remote worker");
1369
1432
  }
1370
1433
  /**
1371
- * Known dimensions for common embedding models.
1372
- * This avoids expensive API calls for dimension detection in telemetry.
1373
- *
1374
- * Note: The "openai" provider also supports OpenAI-compatible APIs like:
1375
- * - Ollama (local models)
1376
- * - LMStudio (local models)
1377
- * - Any service implementing OpenAI's embedding API
1434
+ * Check if the proxy is currently connected to the remote worker.
1378
1435
  */
1379
- knownModelDimensions = {
1380
- // OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
1381
- "text-embedding-3-small": 1536,
1382
- "text-embedding-3-large": 3072,
1383
- "text-embedding-ada-002": 1536,
1384
- // Google Vertex AI models
1385
- "text-embedding-004": 768,
1386
- "textembedding-gecko@003": 768,
1387
- "textembedding-gecko@002": 768,
1388
- "textembedding-gecko@001": 768,
1389
- // Google Gemini models (with MRL support)
1390
- "text-embedding-preview-0409": 768,
1391
- "embedding-001": 768,
1392
- // AWS Bedrock models
1393
- // Amazon Titan models
1394
- "amazon.titan-embed-text-v1": 1536,
1395
- "amazon.titan-embed-text-v2:0": 1024,
1396
- "amazon.titan-embed-image-v1": 1024,
1397
- // Image embedding model
1398
- // Cohere models
1399
- "cohere.embed-english-v3": 1024,
1400
- "cohere.embed-multilingual-v3": 1024,
1401
- // SageMaker models (hosted on AWS SageMaker)
1402
- "intfloat/multilingual-e5-large": 1024,
1403
- // Additional AWS models that might be supported
1404
- // Note: Some of these might be placeholders - verify dimensions before use
1405
- // "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
1406
- // MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
1407
- // Top performing models from Massive Text Embedding Benchmark
1408
- "sentence-transformers/all-MiniLM-L6-v2": 384,
1409
- "gemini-embedding-001": 3072,
1410
- "Qwen/Qwen3-Embedding-8B": 4096,
1411
- "Qwen/Qwen3-Embedding-4B": 2560,
1412
- "Qwen/Qwen3-Embedding-0.6B": 1024,
1413
- "Linq-AI-Research/Linq-Embed-Mistral": 4096,
1414
- "Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
1415
- "intfloat/multilingual-e5-large-instruct": 1024,
1416
- "Salesforce/SFR-Embedding-Mistral": 4096,
1417
- "text-multilingual-embedding-002": 768,
1418
- "GritLM/GritLM-7B": 4096,
1419
- "GritLM/GritLM-8x7B": 4096,
1420
- "intfloat/e5-mistral-7b-instruct": 4096,
1421
- "Cohere/Cohere-embed-multilingual-v3.0": 1024,
1422
- "Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
1423
- "Lajavaness/bilingual-embedding-large": 1024,
1424
- "Salesforce/SFR-Embedding-2_R": 4096,
1425
- "NovaSearch/stella_en_1.5B_v5": 8960,
1426
- "NovaSearch/jasper_en_vision_language_v1": 8960,
1427
- "nvidia/NV-Embed-v2": 4096,
1428
- "OrdalieTech/Solon-embeddings-large-0.1": 1024,
1429
- "BAAI/bge-m3": 1024,
1430
- "HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
1431
- "jinaai/jina-embeddings-v3": 1024,
1432
- "Alibaba-NLP/gte-multilingual-base": 768,
1433
- "Lajavaness/bilingual-embedding-base": 768,
1434
- "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
1435
- "nvidia/NV-Embed-v1": 4096,
1436
- "Cohere/Cohere-embed-multilingual-light-v3.0": 384,
1437
- "manu/bge-m3-custom-fr": 1024,
1438
- "Lajavaness/bilingual-embedding-small": 384,
1439
- "Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
1440
- "intfloat/multilingual-e5-base": 768,
1441
- "voyage-3-lite": 512,
1442
- "voyage-3": 1024,
1443
- "intfloat/multilingual-e5-small": 384,
1444
- "Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
1445
- "Snowflake/snowflake-arctic-embed-m-v2.0": 768,
1446
- "deepvk/USER-bge-m3": 1024,
1447
- "Cohere/Cohere-embed-english-v3.0": 1024,
1448
- "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
1449
- "ibm-granite/granite-embedding-278m-multilingual": 768,
1450
- "NovaSearch/stella_en_400M_v5": 4096,
1451
- "omarelshehy/arabic-english-sts-matryoshka": 1024,
1452
- "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
1453
- "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
1454
- "Haon-Chen/speed-embedding-7b-instruct": 4096,
1455
- "sentence-transformers/LaBSE": 768,
1456
- "WhereIsAI/UAE-Large-V1": 1024,
1457
- "ibm-granite/granite-embedding-107m-multilingual": 384,
1458
- "mixedbread-ai/mxbai-embed-large-v1": 1024,
1459
- "intfloat/e5-large-v2": 1024,
1460
- "avsolatorio/GIST-large-Embedding-v0": 1024,
1461
- "sdadas/mmlw-e5-large": 1024,
1462
- "nomic-ai/nomic-embed-text-v1": 768,
1463
- "nomic-ai/nomic-embed-text-v1-ablated": 768,
1464
- "intfloat/e5-base-v2": 768,
1465
- "BAAI/bge-large-en-v1.5": 1024,
1466
- "intfloat/e5-large": 1024,
1467
- "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
1468
- "Cohere/Cohere-embed-english-light-v3.0": 384,
1469
- "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
1470
- "Gameselo/STS-multilingual-mpnet-base-v2": 768,
1471
- "thenlper/gte-large": 1024,
1472
- "avsolatorio/GIST-Embedding-v0": 768,
1473
- "nomic-ai/nomic-embed-text-v1-unsupervised": 768,
1474
- "infgrad/stella-base-en-v2": 768,
1475
- "avsolatorio/NoInstruct-small-Embedding-v0": 384,
1476
- "dwzhu/e5-base-4k": 768,
1477
- "sdadas/mmlw-e5-base": 768,
1478
- "voyage-multilingual-2": 1024,
1479
- "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
1480
- "BAAI/bge-base-en-v1.5": 768,
1481
- "avsolatorio/GIST-small-Embedding-v0": 384,
1482
- "sdadas/mmlw-roberta-large": 1024,
1483
- "nomic-ai/nomic-embed-text-v1.5": 768,
1484
- "minishlab/potion-multilingual-128M": 256,
1485
- "shibing624/text2vec-base-multilingual": 384,
1486
- "thenlper/gte-base": 768,
1487
- "intfloat/e5-small-v2": 384,
1488
- "intfloat/e5-base": 768,
1489
- "sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
1490
- "manu/sentence_croissant_alpha_v0.3": 2048,
1491
- "BAAI/bge-small-en-v1.5": 512,
1492
- "thenlper/gte-small": 384,
1493
- "sdadas/mmlw-e5-small": 384,
1494
- "manu/sentence_croissant_alpha_v0.4": 2048,
1495
- "manu/sentence_croissant_alpha_v0.2": 2048,
1496
- "abhinand/MedEmbed-small-v0.1": 384,
1497
- "ibm-granite/granite-embedding-125m-english": 768,
1498
- "intfloat/e5-small": 384,
1499
- "voyage-large-2-instruct": 1024,
1500
- "sdadas/mmlw-roberta-base": 768,
1501
- "Snowflake/snowflake-arctic-embed-l": 1024,
1502
- "Mihaiii/Ivysaur": 384,
1503
- "Snowflake/snowflake-arctic-embed-m-long": 768,
1504
- "bigscience/sgpt-bloom-7b1-msmarco": 4096,
1505
- "avsolatorio/GIST-all-MiniLM-L6-v2": 384,
1506
- "sergeyzh/LaBSE-ru-turbo": 768,
1507
- "sentence-transformers/all-mpnet-base-v2": 768,
1508
- "Snowflake/snowflake-arctic-embed-m": 768,
1509
- "Snowflake/snowflake-arctic-embed-s": 384,
1510
- "sentence-transformers/all-MiniLM-L12-v2": 384,
1511
- "Mihaiii/gte-micro-v4": 384,
1512
- "Snowflake/snowflake-arctic-embed-m-v1.5": 768,
1513
- "cointegrated/LaBSE-en-ru": 768,
1514
- "Mihaiii/Bulbasaur": 384,
1515
- "ibm-granite/granite-embedding-30m-english": 384,
1516
- "deepfile/embedder-100p": 768,
1517
- "Jaume/gemma-2b-embeddings": 2048,
1518
- "OrlikB/KartonBERT-USE-base-v1": 768,
1519
- "izhx/udever-bloom-7b1": 4096,
1520
- "izhx/udever-bloom-1b1": 1024,
1521
- "brahmairesearch/slx-v0.1": 384,
1522
- "Mihaiii/Wartortle": 384,
1523
- "izhx/udever-bloom-3b": 2048,
1524
- "deepvk/USER-base": 768,
1525
- "ai-forever/ru-en-RoSBERTa": 1024,
1526
- "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
1527
- "Mihaiii/Venusaur": 384,
1528
- "Snowflake/snowflake-arctic-embed-xs": 384,
1529
- "jinaai/jina-embedding-b-en-v1": 768,
1530
- "Mihaiii/gte-micro": 384,
1531
- "aari1995/German_Semantic_STS_V2": 1024,
1532
- "Mihaiii/Squirtle": 384,
1533
- "OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
1534
- "sergeyzh/rubert-tiny-turbo": 312,
1535
- "minishlab/potion-base-8M": 256,
1536
- "minishlab/M2V_base_glove_subword": 256,
1537
- "jinaai/jina-embedding-s-en-v1": 512,
1538
- "minishlab/potion-base-4M": 128,
1539
- "minishlab/M2V_base_output": 256,
1540
- "DeepPavlov/rubert-base-cased-sentence": 768,
1541
- "jinaai/jina-embeddings-v2-small-en": 512,
1542
- "cointegrated/rubert-tiny2": 312,
1543
- "minishlab/M2V_base_glove": 256,
1544
- "cointegrated/rubert-tiny": 312,
1545
- "silma-ai/silma-embeddding-matryoshka-v0.1": 768,
1546
- "DeepPavlov/rubert-base-cased": 768,
1547
- "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
1548
- "izhx/udever-bloom-560m": 1024,
1549
- "minishlab/potion-base-2M": 64,
1550
- "DeepPavlov/distilrubert-small-cased-conversational": 768,
1551
- "consciousAI/cai-lunaris-text-embeddings": 1024,
1552
- "deepvk/deberta-v1-base": 768,
1553
- "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
1554
- "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
1555
- "ai-forever/sbert_large_mt_nlu_ru": 1024,
1556
- "ai-forever/sbert_large_nlu_ru": 1024,
1557
- "malenia1/ternary-weight-embedding": 1024,
1558
- "jinaai/jina-embeddings-v2-base-en": 768,
1559
- "VPLabs/SearchMap_Preview": 4096,
1560
- "Hum-Works/lodestone-base-4096-v1": 768,
1561
- "jinaai/jina-embeddings-v4": 2048
1562
- };
1563
- /**
1564
- * Lowercase lookup map for case-insensitive model dimension queries.
1565
- * Built lazily from knownModelDimensions to ensure consistency.
1566
- */
1567
- modelLookup;
1568
- constructor() {
1569
- this.modelLookup = /* @__PURE__ */ new Map();
1570
- for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
1571
- this.modelLookup.set(model.toLowerCase(), dimensions);
1572
- }
1573
- }
1574
- /**
1575
- * Parse embedding model configuration from a provided model specification.
1576
- * This is a synchronous operation that extracts provider, model, and known dimensions.
1577
- *
1578
- * Supports various providers:
1579
- * - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
1580
- * - vertex: Google Cloud Vertex AI
1581
- * - gemini: Google Generative AI
1582
- * - aws: AWS Bedrock models
1583
- * - microsoft: Azure OpenAI
1584
- * - sagemaker: AWS SageMaker hosted models
1585
- *
1586
- * @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
1587
- * @returns Parsed embedding model configuration
1588
- */
1589
- parse(modelSpec) {
1590
- const spec = modelSpec || "text-embedding-3-small";
1591
- const colonIndex = spec.indexOf(":");
1592
- let provider;
1593
- let model;
1594
- if (colonIndex === -1) {
1595
- provider = "openai";
1596
- model = spec;
1597
- } else {
1598
- provider = spec.substring(0, colonIndex);
1599
- model = spec.substring(colonIndex + 1);
1600
- }
1601
- const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
1602
- return {
1603
- provider,
1604
- model,
1605
- dimensions,
1606
- modelSpec: spec
1607
- };
1608
- }
1609
- /**
1610
- * Get the known dimensions for a specific model.
1611
- * Returns null if the model dimensions are not known.
1612
- * Uses case-insensitive lookup.
1613
- *
1614
- * @param model The model name (e.g., "text-embedding-3-small")
1615
- * @returns Known dimensions or null
1616
- */
1617
- getKnownDimensions(model) {
1618
- return this.modelLookup?.get(model.toLowerCase()) || null;
1619
- }
1620
- /**
1621
- * Add or update known dimensions for a model.
1622
- * This can be used to cache discovered dimensions.
1623
- * Stores both original case and lowercase for consistent lookup.
1624
- *
1625
- * @param model The model name
1626
- * @param dimensions The dimensions to cache
1627
- */
1628
- setKnownDimensions(model, dimensions) {
1629
- this.knownModelDimensions[model] = dimensions;
1630
- if (this.modelLookup) {
1631
- this.modelLookup.set(model.toLowerCase(), dimensions);
1632
- }
1633
- }
1634
- /**
1635
- * Static method to parse embedding model configuration using the singleton instance.
1636
- * This maintains backward compatibility while using the class-based approach.
1637
- */
1638
- static parseEmbeddingConfig(modelSpec) {
1639
- return EmbeddingConfig.getInstance().parse(modelSpec);
1640
- }
1641
- /**
1642
- * Static method to get known model dimensions using the singleton instance.
1643
- * This maintains backward compatibility while using the class-based approach.
1644
- */
1645
- static getKnownModelDimensions(model) {
1646
- return EmbeddingConfig.getInstance().getKnownDimensions(model);
1436
+ isActive() {
1437
+ return this.isConnected;
1647
1438
  }
1648
1439
  /**
1649
- * Static method to set known model dimensions using the singleton instance.
1650
- * This maintains backward compatibility while using the class-based approach.
1440
+ * Schedule a reconnection attempt after a delay.
1651
1441
  */
1652
- static setKnownModelDimensions(model, dimensions) {
1653
- EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
1654
- }
1655
- }
1656
- function getGlobalOptions(command) {
1657
- let rootCommand = command;
1658
- while (rootCommand?.parent) {
1659
- rootCommand = rootCommand.parent;
1660
- }
1661
- return rootCommand?.opts() || {};
1662
- }
1663
- function getEventBus(command) {
1664
- const eventBus = command?._eventBus;
1665
- if (!eventBus) {
1666
- throw new Error("EventBusService not initialized");
1667
- }
1668
- return eventBus;
1669
- }
1670
- function ensurePlaywrightBrowsersInstalled() {
1671
- if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
1672
- logger.debug(
1673
- "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
1674
- );
1675
- return;
1676
- }
1677
- const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
1678
- if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
1679
- logger.debug(
1680
- `PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
1681
- );
1682
- return;
1683
- }
1684
- try {
1685
- const chromiumPath = chromium.executablePath();
1686
- if (!chromiumPath || !existsSync(chromiumPath)) {
1687
- throw new Error("Playwright Chromium browser not found");
1688
- }
1689
- } catch (error) {
1690
- logger.debug(String(error));
1691
- try {
1692
- console.log(
1693
- "🌐 Installing Playwright Chromium browser... (this may take a moment)"
1694
- );
1695
- execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
1696
- stdio: "ignore",
1697
- // Suppress output
1698
- cwd: getProjectRoot()
1699
- });
1700
- } catch (_installErr) {
1701
- console.error(
1702
- "❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
1703
- );
1704
- process.exit(1);
1705
- }
1706
- }
1707
- }
1708
- function resolveProtocol(protocol) {
1709
- if (protocol === "auto") {
1710
- if (!process.stdin.isTTY && !process.stdout.isTTY) {
1711
- return "stdio";
1712
- }
1713
- return "http";
1714
- }
1715
- if (protocol === "stdio" || protocol === "http") {
1716
- return protocol;
1442
+ scheduleReconnect() {
1443
+ logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
1444
+ setTimeout(() => {
1445
+ if (!this.isConnected) {
1446
+ this.connect();
1447
+ }
1448
+ }, 5e3);
1717
1449
  }
1718
- throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
1719
1450
  }
1720
- const formatOutput = (data) => JSON.stringify(data, null, 2);
1721
- function setupLogging(options, protocol) {
1722
- if (options.silent) {
1723
- setLogLevel(LogLevel.ERROR);
1724
- } else if (options.verbose) {
1725
- setLogLevel(LogLevel.DEBUG);
1451
+ class ToolError extends Error {
1452
+ constructor(message, toolName) {
1453
+ super(message);
1454
+ this.toolName = toolName;
1455
+ this.name = this.constructor.name;
1726
1456
  }
1727
1457
  }
1728
- function validatePort(portString) {
1729
- const port = Number.parseInt(portString, 10);
1730
- if (Number.isNaN(port) || port < 1 || port > 65535) {
1731
- throw new Error("❌ Invalid port number");
1732
- }
1733
- return port;
1458
+ class ValidationError extends ToolError {
1734
1459
  }
1735
- function validateHost(hostString) {
1736
- const trimmed = hostString.trim();
1737
- if (!trimmed) {
1738
- throw new Error("❌ Host cannot be empty");
1739
- }
1740
- if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
1741
- throw new Error("❌ Host cannot contain whitespace");
1742
- }
1743
- return trimmed;
1460
+ const DEFAULT_MAX_PAGES = 1e3;
1461
+ const DEFAULT_MAX_DEPTH$1 = 3;
1462
+ const DEFAULT_MAX_CONCURRENCY = 3;
1463
+ const DEFAULT_PROTOCOL = "auto";
1464
+ const DEFAULT_HTTP_PORT = 6280;
1465
+ const DEFAULT_WEB_PORT = 6281;
1466
+ const DEFAULT_HOST = "127.0.0.1";
1467
+ const DEFAULT_PAGE_TIMEOUT = 5e3;
1468
+ const FETCHER_MAX_RETRIES = 6;
1469
+ const FETCHER_BASE_DELAY = 1e3;
1470
+ const FETCHER_MAX_CACHE_ITEMS = 200;
1471
+ const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
1472
+ const SPLITTER_MIN_CHUNK_SIZE = 500;
1473
+ const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
1474
+ const SPLITTER_MAX_CHUNK_SIZE = 5e3;
1475
+ const EMBEDDING_BATCH_SIZE = 100;
1476
+ const EMBEDDING_BATCH_CHARS = 5e4;
1477
+ const MIGRATION_MAX_RETRIES = 5;
1478
+ const MIGRATION_RETRY_DELAY_MS = 300;
1479
+ const SEARCH_OVERFETCH_FACTOR = 2;
1480
+ const SEARCH_WEIGHT_VEC = 1;
1481
+ const SEARCH_WEIGHT_FTS = 1;
1482
+ const VECTOR_SEARCH_MULTIPLIER = 10;
1483
+ function createResponse(text) {
1484
+ return {
1485
+ content: [
1486
+ {
1487
+ type: "text",
1488
+ text
1489
+ }
1490
+ ],
1491
+ isError: false
1492
+ };
1744
1493
  }
1745
- function createAppServerConfig(options) {
1494
+ function createError(errorOrText) {
1495
+ const text = errorOrText instanceof Error ? errorOrText.message : String(errorOrText);
1746
1496
  return {
1747
- enableWebInterface: options.enableWebInterface ?? false,
1748
- enableMcpServer: options.enableMcpServer ?? true,
1749
- enableApiServer: options.enableApiServer ?? false,
1750
- enableWorker: options.enableWorker ?? true,
1751
- port: options.port,
1752
- host: options.host,
1753
- externalWorkerUrl: options.externalWorkerUrl,
1754
- readOnly: options.readOnly ?? false,
1755
- auth: options.auth,
1756
- startupContext: options.startupContext
1757
- };
1758
- }
1759
- function parseHeaders(headerOptions) {
1760
- const headers = {};
1761
- if (Array.isArray(headerOptions)) {
1762
- for (const entry of headerOptions) {
1763
- const idx = entry.indexOf(":");
1764
- if (idx > 0) {
1765
- const name = entry.slice(0, idx).trim();
1766
- const value = entry.slice(idx + 1).trim();
1767
- if (name) headers[name] = value;
1768
- }
1769
- }
1770
- }
1771
- return headers;
1772
- }
1773
- function parseAuthConfig(options) {
1774
- if (!options.authEnabled) {
1775
- return void 0;
1776
- }
1777
- return {
1778
- enabled: true,
1779
- issuerUrl: options.authIssuerUrl,
1780
- audience: options.authAudience,
1781
- scopes: ["openid", "profile"]
1782
- // Default scopes for OAuth2/OIDC
1783
- };
1784
- }
1785
- function validateAuthConfig(authConfig) {
1786
- if (!authConfig.enabled) {
1787
- return;
1788
- }
1789
- const errors = [];
1790
- if (!authConfig.issuerUrl) {
1791
- errors.push("--auth-issuer-url is required when auth is enabled");
1792
- } else {
1793
- try {
1794
- const url = new URL(authConfig.issuerUrl);
1795
- if (url.protocol !== "https:") {
1796
- errors.push("Issuer URL must use HTTPS protocol");
1797
- }
1798
- } catch {
1799
- errors.push("Issuer URL must be a valid URL");
1800
- }
1801
- }
1802
- if (!authConfig.audience) {
1803
- errors.push("--auth-audience is required when auth is enabled");
1804
- } else {
1805
- try {
1806
- const url = new URL(authConfig.audience);
1807
- if (url.protocol === "http:" && url.hostname !== "localhost") {
1808
- logger.warn(
1809
- "⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
1810
- );
1811
- }
1812
- if (url.hash) {
1813
- errors.push("Audience must not contain URL fragments");
1814
- }
1815
- } catch {
1816
- if (authConfig.audience.startsWith("urn:")) {
1817
- const urnParts = authConfig.audience.split(":");
1818
- if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
1819
- errors.push("URN audience must follow format: urn:namespace:specific-string");
1820
- }
1821
- } else {
1822
- errors.push(
1823
- "Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
1824
- );
1825
- }
1826
- }
1827
- }
1828
- if (errors.length > 0) {
1829
- throw new Error(`Auth configuration validation failed:
1830
- ${errors.join("\n")}`);
1831
- }
1832
- }
1833
- function warnHttpUsage(authConfig, port) {
1834
- if (!authConfig?.enabled) {
1835
- return;
1836
- }
1837
- const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
1838
- process.env.HOSTNAME?.includes("localhost");
1839
- if (!isLocalhost) {
1840
- logger.warn(
1841
- "⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
1842
- );
1843
- }
1844
- }
1845
- function resolveEmbeddingContext(embeddingModel) {
1846
- try {
1847
- let modelSpec = embeddingModel;
1848
- if (!modelSpec && process.env.OPENAI_API_KEY) {
1849
- modelSpec = "text-embedding-3-small";
1850
- logger.debug(
1851
- "Using default OpenAI embedding model due to OPENAI_API_KEY presence."
1852
- );
1853
- }
1854
- if (!modelSpec) {
1855
- logger.debug(
1856
- "No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
1857
- );
1858
- return null;
1859
- }
1860
- logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
1861
- return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
1862
- } catch (error) {
1863
- logger.debug(`Failed to resolve embedding configuration: ${error}`);
1864
- return null;
1865
- }
1866
- }
1867
- class RemoteEventProxy {
1868
- constructor(remoteWorkerUrl, localEventBus) {
1869
- this.remoteWorkerUrl = remoteWorkerUrl;
1870
- this.localEventBus = localEventBus;
1871
- }
1872
- trpcClient = null;
1873
- wsClient = null;
1874
- subscription = null;
1875
- isConnected = false;
1876
- /**
1877
- * Start subscribing to remote events and forwarding them locally.
1878
- */
1879
- async connect() {
1880
- if (this.isConnected) {
1881
- logger.warn("Remote event proxy already connected");
1882
- return;
1883
- }
1884
- logger.info(`📡 Connecting to remote worker at ${this.remoteWorkerUrl}`);
1885
- try {
1886
- const url = new URL(this.remoteWorkerUrl);
1887
- const baseUrl = `${url.protocol}//${url.host}`;
1888
- const wsUrl = baseUrl.replace(/^http/, "ws");
1889
- this.wsClient = createWSClient({
1890
- url: wsUrl
1891
- });
1892
- this.trpcClient = createTRPCClient({
1893
- links: [
1894
- splitLink({
1895
- condition: (op) => op.type === "subscription",
1896
- true: wsLink({ client: this.wsClient, transformer: superjson }),
1897
- false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
1898
- })
1899
- ]
1900
- });
1901
- this.subscription = this.trpcClient.events.subscribe.subscribe(
1902
- {},
1903
- // Subscribe to all event types
1904
- {
1905
- onData: (data) => {
1906
- logger.debug(`📥 Received remote event: ${data.type}`);
1907
- this.localEventBus.emit(data.type, data.payload);
1908
- },
1909
- onError: (error) => {
1910
- logger.error(`❌ Remote event subscription error: ${error}`);
1911
- this.isConnected = false;
1912
- this.scheduleReconnect();
1913
- },
1914
- onStarted: () => {
1915
- logger.info("✅ Remote event subscription started");
1916
- this.isConnected = true;
1917
- },
1918
- onComplete: () => {
1919
- logger.info("✅ Remote event subscription completed");
1920
- this.isConnected = false;
1921
- }
1922
- }
1923
- );
1924
- } catch (error) {
1925
- logger.error(`❌ Failed to connect to remote worker: ${error}`);
1926
- this.scheduleReconnect();
1927
- }
1928
- }
1929
- /**
1930
- * Disconnect from the remote worker and stop forwarding events.
1931
- */
1932
- disconnect() {
1933
- if (this.subscription) {
1934
- this.subscription.unsubscribe();
1935
- this.subscription = null;
1936
- }
1937
- if (this.wsClient) {
1938
- this.wsClient.close();
1939
- this.wsClient = null;
1940
- }
1941
- this.isConnected = false;
1942
- logger.info("🚫 Disconnected from remote worker");
1943
- }
1944
- /**
1945
- * Check if the proxy is currently connected to the remote worker.
1946
- */
1947
- isActive() {
1948
- return this.isConnected;
1949
- }
1950
- /**
1951
- * Schedule a reconnection attempt after a delay.
1952
- */
1953
- scheduleReconnect() {
1954
- logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
1955
- setTimeout(() => {
1956
- if (!this.isConnected) {
1957
- this.connect();
1958
- }
1959
- }, 5e3);
1960
- }
1961
- }
1962
- class ToolError extends Error {
1963
- constructor(message, toolName) {
1964
- super(message);
1965
- this.toolName = toolName;
1966
- this.name = this.constructor.name;
1967
- }
1968
- }
1969
- class ValidationError extends ToolError {
1970
- }
1971
- const DEFAULT_MAX_PAGES = 1e3;
1972
- const DEFAULT_MAX_DEPTH$1 = 3;
1973
- const DEFAULT_MAX_CONCURRENCY = 3;
1974
- const DEFAULT_PROTOCOL = "auto";
1975
- const DEFAULT_HTTP_PORT = 6280;
1976
- const DEFAULT_WEB_PORT = 6281;
1977
- const DEFAULT_HOST = "127.0.0.1";
1978
- const DEFAULT_PAGE_TIMEOUT = 5e3;
1979
- const FETCHER_MAX_RETRIES = 6;
1980
- const FETCHER_BASE_DELAY = 1e3;
1981
- const SPLITTER_MIN_CHUNK_SIZE = 500;
1982
- const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
1983
- const SPLITTER_MAX_CHUNK_SIZE = 5e3;
1984
- const EMBEDDING_BATCH_SIZE = 100;
1985
- const EMBEDDING_BATCH_CHARS = 5e4;
1986
- const MIGRATION_MAX_RETRIES = 5;
1987
- const MIGRATION_RETRY_DELAY_MS = 300;
1988
- const SEARCH_OVERFETCH_FACTOR = 2;
1989
- const SEARCH_WEIGHT_VEC = 1;
1990
- const SEARCH_WEIGHT_FTS = 1;
1991
- const VECTOR_SEARCH_MULTIPLIER = 10;
1992
- function createResponse(text) {
1993
- return {
1994
- content: [
1995
- {
1996
- type: "text",
1997
- text
1998
- }
1999
- ],
2000
- isError: false
2001
- };
2002
- }
2003
- function createError(errorOrText) {
2004
- const text = errorOrText instanceof Error ? errorOrText.message : String(errorOrText);
2005
- return {
2006
- content: [
2007
- {
2008
- type: "text",
2009
- text
2010
- }
2011
- ],
2012
- isError: true
1497
+ content: [
1498
+ {
1499
+ type: "text",
1500
+ text
1501
+ }
1502
+ ],
1503
+ isError: true
2013
1504
  };
2014
1505
  }
2015
1506
  function createMcpServerInstance(tools, readOnly = false) {
@@ -2900,7 +2391,7 @@ class BrowserFetcher {
2900
2391
  }
2901
2392
  logger.debug("Browser closed successfully");
2902
2393
  } catch (error) {
2903
- logger.warn(`⚠️ Error closing browser: ${error}`);
2394
+ logger.warn(`⚠️ Error closing browser: ${error}`);
2904
2395
  }
2905
2396
  }
2906
2397
  }
@@ -5497,10 +4988,80 @@ var ScrapeMode = /* @__PURE__ */ ((ScrapeMode2) => {
5497
4988
  ScrapeMode2["Auto"] = "auto";
5498
4989
  return ScrapeMode2;
5499
4990
  })(ScrapeMode || {});
5500
- class HtmlPlaywrightMiddleware {
5501
- browser = null;
4991
+ class SimpleMemoryCache {
4992
+ cache;
4993
+ maxSize;
4994
+ constructor(maxSize) {
4995
+ if (maxSize <= 0) {
4996
+ throw new Error("maxSize must be positive");
4997
+ }
4998
+ this.cache = /* @__PURE__ */ new Map();
4999
+ this.maxSize = maxSize;
5000
+ }
5502
5001
  /**
5503
- * Initializes the Playwright browser instance.
5002
+ * Retrieve a value from the cache.
5003
+ * Marks the key as recently used (moves to end of Map).
5004
+ */
5005
+ get(key) {
5006
+ const value = this.cache.get(key);
5007
+ if (value !== void 0) {
5008
+ this.cache.delete(key);
5009
+ this.cache.set(key, value);
5010
+ }
5011
+ return value;
5012
+ }
5013
+ /**
5014
+ * Store a value in the cache.
5015
+ * If cache is full, evicts the oldest entry first.
5016
+ */
5017
+ set(key, value) {
5018
+ if (this.cache.has(key)) {
5019
+ this.cache.delete(key);
5020
+ } else if (this.cache.size >= this.maxSize) {
5021
+ const oldestKey = this.cache.keys().next().value;
5022
+ if (oldestKey !== void 0) {
5023
+ this.cache.delete(oldestKey);
5024
+ }
5025
+ }
5026
+ this.cache.set(key, value);
5027
+ }
5028
+ /**
5029
+ * Check if a key exists in the cache.
5030
+ * Marks the key as recently used (moves to end of Map) to maintain LRU semantics.
5031
+ */
5032
+ has(key) {
5033
+ const exists = this.cache.has(key);
5034
+ if (exists) {
5035
+ const value = this.cache.get(key);
5036
+ if (value !== void 0) {
5037
+ this.cache.delete(key);
5038
+ this.cache.set(key, value);
5039
+ }
5040
+ }
5041
+ return exists;
5042
+ }
5043
+ /**
5044
+ * Get current cache size.
5045
+ */
5046
+ get size() {
5047
+ return this.cache.size;
5048
+ }
5049
+ /**
5050
+ * Clear all entries from the cache.
5051
+ */
5052
+ clear() {
5053
+ this.cache.clear();
5054
+ }
5055
+ }
5056
+ class HtmlPlaywrightMiddleware {
5057
+ browser = null;
5058
+ // Static LRU cache shared across all instances for all fetched resources
5059
+ // Max 200 entries, each limited in size to prevent caching large resources
5060
+ static resourceCache = new SimpleMemoryCache(
5061
+ FETCHER_MAX_CACHE_ITEMS
5062
+ );
5063
+ /**
5064
+ * Initializes the Playwright browser instance.
5504
5065
  * Consider making this more robust (e.g., lazy initialization, singleton).
5505
5066
  */
5506
5067
  async ensureBrowser() {
@@ -5843,25 +5404,97 @@ class HtmlPlaywrightMiddleware {
5843
5404
  return [];
5844
5405
  }
5845
5406
  }
5407
+ /**
5408
+ * Sets up caching route interception for a Playwright page.
5409
+ * This handles:
5410
+ * - Aborting non-essential resources (images, fonts, media)
5411
+ * - Caching GET requests to speed up subsequent loads
5412
+ * - Forwarding custom headers and credentials for same-origin requests
5413
+ *
5414
+ * @param page The Playwright page to set up routing for
5415
+ * @param customHeaders Custom headers to forward with requests
5416
+ * @param credentials Optional credentials for same-origin requests
5417
+ * @param origin The origin for same-origin credential checking
5418
+ */
5419
+ async setupCachingRouteInterception(page, customHeaders = {}, credentials, origin) {
5420
+ await page.route("**/*", async (route) => {
5421
+ const reqUrl = route.request().url();
5422
+ const reqOrigin = (() => {
5423
+ try {
5424
+ return new URL(reqUrl).origin;
5425
+ } catch {
5426
+ return null;
5427
+ }
5428
+ })();
5429
+ const resourceType = route.request().resourceType();
5430
+ if (["image", "font", "media"].includes(resourceType)) {
5431
+ return route.abort();
5432
+ }
5433
+ if (route.request().method() === "GET") {
5434
+ const cached = HtmlPlaywrightMiddleware.resourceCache.get(reqUrl);
5435
+ if (cached !== void 0) {
5436
+ logger.debug(`✓ Cache hit for ${resourceType}: ${reqUrl}`);
5437
+ return route.fulfill({
5438
+ status: 200,
5439
+ contentType: cached.contentType,
5440
+ body: cached.body
5441
+ });
5442
+ }
5443
+ const headers2 = mergePlaywrightHeaders(
5444
+ route.request().headers(),
5445
+ customHeaders,
5446
+ credentials,
5447
+ origin,
5448
+ reqOrigin ?? void 0
5449
+ );
5450
+ const response = await route.fetch({ headers: headers2 });
5451
+ const body = await response.text();
5452
+ if (response.status() >= 200 && response.status() < 300 && body.length > 0) {
5453
+ const contentSizeBytes = Buffer.byteLength(body, "utf8");
5454
+ if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
5455
+ const contentType = response.headers()["content-type"] || "application/octet-stream";
5456
+ HtmlPlaywrightMiddleware.resourceCache.set(reqUrl, { body, contentType });
5457
+ logger.debug(
5458
+ `Cached ${resourceType}: ${reqUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
5459
+ );
5460
+ } else {
5461
+ logger.debug(
5462
+ `Resource too large to cache: ${reqUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
5463
+ );
5464
+ }
5465
+ }
5466
+ return route.fulfill({ response });
5467
+ }
5468
+ const headers = mergePlaywrightHeaders(
5469
+ route.request().headers(),
5470
+ customHeaders,
5471
+ credentials,
5472
+ origin,
5473
+ reqOrigin ?? void 0
5474
+ );
5475
+ return route.continue({ headers });
5476
+ });
5477
+ }
5846
5478
  /**
5847
5479
  * Fetches content from a frame URL by navigating to it in a new page.
5480
+ * Uses LRU cache to avoid re-fetching identical frames across multiple pages.
5848
5481
  *
5849
5482
  * @param parentPage The parent page (used to resolve relative URLs and share context)
5850
5483
  * @param frameUrl The URL of the frame to fetch content from
5851
5484
  * @returns The HTML content of the frame
5852
5485
  */
5853
5486
  async fetchFrameContent(parentPage, frameUrl) {
5487
+ const resolvedUrl = new URL(frameUrl, parentPage.url()).href;
5488
+ const cached = HtmlPlaywrightMiddleware.resourceCache.get(resolvedUrl);
5489
+ if (cached !== void 0) {
5490
+ logger.debug(`✓ Cache hit for frame: ${resolvedUrl}`);
5491
+ return cached.body;
5492
+ }
5493
+ logger.debug(`Cache miss for frame: ${resolvedUrl}`);
5854
5494
  let framePage = null;
5855
5495
  try {
5856
- const resolvedUrl = new URL(frameUrl, parentPage.url()).href;
5857
5496
  framePage = await parentPage.context().newPage();
5858
- await framePage.route("**/*", async (route) => {
5859
- const resourceType = route.request().resourceType();
5860
- if (["image", "font", "media"].includes(resourceType)) {
5861
- return route.abort();
5862
- }
5863
- return route.continue();
5864
- });
5497
+ await this.setupCachingRouteInterception(framePage);
5865
5498
  logger.debug(`Fetching frame content from: ${resolvedUrl}`);
5866
5499
  await framePage.goto(resolvedUrl, {
5867
5500
  waitUntil: "load",
@@ -5873,8 +5506,23 @@ class HtmlPlaywrightMiddleware {
5873
5506
  "body",
5874
5507
  (el) => el.innerHTML
5875
5508
  );
5509
+ const content = bodyContent || "";
5510
+ const contentSizeBytes = Buffer.byteLength(content, "utf8");
5511
+ if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
5512
+ HtmlPlaywrightMiddleware.resourceCache.set(resolvedUrl, {
5513
+ body: content,
5514
+ contentType: "text/html; charset=utf-8"
5515
+ });
5516
+ logger.debug(
5517
+ `Cached frame content: ${resolvedUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
5518
+ );
5519
+ } else {
5520
+ logger.debug(
5521
+ `Frame content too large to cache: ${resolvedUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
5522
+ );
5523
+ }
5876
5524
  logger.debug(`Successfully fetched frame content from: ${resolvedUrl}`);
5877
- return bodyContent || "";
5525
+ return content;
5878
5526
  } catch (error) {
5879
5527
  logger.debug(`Error fetching frame content from ${frameUrl}: ${error}`);
5880
5528
  return "";
@@ -5973,25 +5621,59 @@ ${frame.content}
5973
5621
  await this.injectShadowDOMExtractor(page);
5974
5622
  await page.route("**/*", async (route) => {
5975
5623
  const reqUrl = route.request().url();
5976
- const reqOrigin = (() => {
5977
- try {
5978
- return new URL(reqUrl).origin;
5979
- } catch {
5980
- return null;
5981
- }
5982
- })();
5983
5624
  if (reqUrl === context.source) {
5984
5625
  return route.fulfill({
5985
5626
  status: 200,
5986
5627
  contentType: "text/html; charset=utf-8",
5987
5628
  body: context.content
5988
- // context.content is always a string in middleware
5989
5629
  });
5990
5630
  }
5631
+ const reqOrigin = (() => {
5632
+ try {
5633
+ return new URL(reqUrl).origin;
5634
+ } catch {
5635
+ return null;
5636
+ }
5637
+ })();
5991
5638
  const resourceType = route.request().resourceType();
5992
5639
  if (["image", "font", "media"].includes(resourceType)) {
5993
5640
  return route.abort();
5994
5641
  }
5642
+ if (route.request().method() === "GET") {
5643
+ const cached = HtmlPlaywrightMiddleware.resourceCache.get(reqUrl);
5644
+ if (cached !== void 0) {
5645
+ logger.debug(`✓ Cache hit for ${resourceType}: ${reqUrl}`);
5646
+ return route.fulfill({
5647
+ status: 200,
5648
+ contentType: cached.contentType,
5649
+ body: cached.body
5650
+ });
5651
+ }
5652
+ const headers2 = mergePlaywrightHeaders(
5653
+ route.request().headers(),
5654
+ customHeaders,
5655
+ credentials ?? void 0,
5656
+ origin ?? void 0,
5657
+ reqOrigin ?? void 0
5658
+ );
5659
+ const response = await route.fetch({ headers: headers2 });
5660
+ const body = await response.text();
5661
+ if (response.status() >= 200 && response.status() < 300 && body.length > 0) {
5662
+ const contentSizeBytes = Buffer.byteLength(body, "utf8");
5663
+ if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
5664
+ const contentType2 = response.headers()["content-type"] || "application/octet-stream";
5665
+ HtmlPlaywrightMiddleware.resourceCache.set(reqUrl, { body, contentType: contentType2 });
5666
+ logger.debug(
5667
+ `Cached ${resourceType}: ${reqUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
5668
+ );
5669
+ } else {
5670
+ logger.debug(
5671
+ `Resource too large to cache: ${reqUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
5672
+ );
5673
+ }
5674
+ }
5675
+ return route.fulfill({ response });
5676
+ }
5995
5677
  const headers = mergePlaywrightHeaders(
5996
5678
  route.request().headers(),
5997
5679
  customHeaders,
@@ -6172,6 +5854,8 @@ class HtmlSanitizerMiddleware {
6172
5854
  return;
6173
5855
  }
6174
5856
  try {
5857
+ const bodyBeforeSanitization = $("body").html() || "";
5858
+ const textLengthBefore = $("body").text().trim().length;
6175
5859
  const selectorsToRemove = [
6176
5860
  ...context.options.excludeSelectors || [],
6177
5861
  // Use options from the context
@@ -6184,9 +5868,13 @@ class HtmlSanitizerMiddleware {
6184
5868
  for (const selector of selectorsToRemove) {
6185
5869
  try {
6186
5870
  const elements = $(selector);
6187
- const count = elements.length;
5871
+ const filteredElements = elements.filter(function() {
5872
+ const tagName = $(this).prop("tagName")?.toLowerCase();
5873
+ return tagName !== "html" && tagName !== "body";
5874
+ });
5875
+ const count = filteredElements.length;
6188
5876
  if (count > 0) {
6189
- elements.remove();
5877
+ filteredElements.remove();
6190
5878
  removedCount += count;
6191
5879
  }
6192
5880
  } catch (selectorError) {
@@ -6199,6 +5887,13 @@ class HtmlSanitizerMiddleware {
6199
5887
  }
6200
5888
  }
6201
5889
  logger.debug(`Removed ${removedCount} elements for ${context.source}`);
5890
+ const textLengthAfter = $("body").text().trim().length;
5891
+ if (textLengthBefore > 0 && textLengthAfter === 0) {
5892
+ logger.warn(
5893
+ `⚠️ Sanitization removed all content from ${context.source}. Reverting to pre-sanitization state.`
5894
+ );
5895
+ $("body").html(bodyBeforeSanitization);
5896
+ }
6202
5897
  } catch (error) {
6203
5898
  logger.error(
6204
5899
  `❌ Error during HTML element removal for ${context.source}: ${error}`
@@ -6349,6 +6044,29 @@ class MarkdownMetadataExtractorMiddleware {
6349
6044
  }
6350
6045
  }
6351
6046
  class HtmlNormalizationMiddleware {
6047
+ // Known tracking/analytics domains and patterns to filter out
6048
+ trackingPatterns = [
6049
+ "adroll.com",
6050
+ "doubleclick.net",
6051
+ "google-analytics.com",
6052
+ "googletagmanager.com",
6053
+ "analytics.twitter.com",
6054
+ "twitter.com/1/i/adsct",
6055
+ "t.co/1/i/adsct",
6056
+ "bat.bing.com",
6057
+ "pixel.rubiconproject.com",
6058
+ "casalemedia.com",
6059
+ "tremorhub.com",
6060
+ "rlcdn.com",
6061
+ "facebook.com/tr",
6062
+ "linkedin.com/px",
6063
+ "quantserve.com",
6064
+ "scorecardresearch.com",
6065
+ "hotjar.com",
6066
+ "mouseflow.com",
6067
+ "crazyegg.com",
6068
+ "clarity.ms"
6069
+ ];
6352
6070
  async process(context, next) {
6353
6071
  if (!context.dom) {
6354
6072
  logger.debug(
@@ -6372,14 +6090,34 @@ class HtmlNormalizationMiddleware {
6372
6090
  }
6373
6091
  await next();
6374
6092
  }
6093
+ /**
6094
+ * Checks if an image should be kept based on its source URL.
6095
+ * Filters out tracking pixels and analytics beacons.
6096
+ */
6097
+ shouldKeepImage(src) {
6098
+ const srcLower = src.toLowerCase();
6099
+ return !this.trackingPatterns.some((pattern) => srcLower.includes(pattern));
6100
+ }
6375
6101
  /**
6376
6102
  * Normalizes image URLs by converting relative URLs to absolute URLs.
6103
+ * Removes tracking/analytics images.
6104
+ * Preserves data URIs (inline images).
6377
6105
  */
6378
6106
  normalizeImageUrls($, baseUrl) {
6379
6107
  $("img").each((_index, element) => {
6380
6108
  const $img = $(element);
6381
6109
  const src = $img.attr("src");
6382
- if (!src) return;
6110
+ if (!src) {
6111
+ $img.remove();
6112
+ return;
6113
+ }
6114
+ if (src.startsWith("data:")) {
6115
+ return;
6116
+ }
6117
+ if (!this.shouldKeepImage(src)) {
6118
+ $img.remove();
6119
+ return;
6120
+ }
6383
6121
  try {
6384
6122
  new URL(src);
6385
6123
  } catch {
@@ -6388,6 +6126,7 @@ class HtmlNormalizationMiddleware {
6388
6126
  $img.attr("src", absoluteUrl);
6389
6127
  } catch (error) {
6390
6128
  logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
6129
+ $img.remove();
6391
6130
  }
6392
6131
  }
6393
6132
  });
@@ -7260,6 +6999,9 @@ Please verify the server URL includes the correct port (default 8080) and ends w
7260
6999
  async storeScraperOptions(versionId, options) {
7261
7000
  await this.client.storeScraperOptions.mutate({ versionId, options });
7262
7001
  }
7002
+ getActiveEmbeddingConfig() {
7003
+ return null;
7004
+ }
7263
7005
  }
7264
7006
  class JsonPipeline extends BasePipeline {
7265
7007
  middleware;
@@ -8058,7 +7800,7 @@ async function applyMigrations(db) {
8058
7800
  db.pragma("temp_store = MEMORY");
8059
7801
  logger.debug("Applied performance optimizations for migration");
8060
7802
  } catch (_error) {
8061
- logger.warn("⚠️ Could not apply all performance optimizations for migration");
7803
+ logger.warn("⚠️ Could not apply all performance optimizations for migration");
8062
7804
  }
8063
7805
  const overallTransaction = db.transaction(() => {
8064
7806
  logger.debug("Checking database migrations...");
@@ -8111,7 +7853,7 @@ async function applyMigrations(db) {
8111
7853
  db.exec("VACUUM");
8112
7854
  logger.debug("Database vacuum completed successfully");
8113
7855
  } catch (error) {
8114
- logger.warn(`⚠️ Could not vacuum database after migrations: ${error}`);
7856
+ logger.warn(`⚠️ Could not vacuum database after migrations: ${error}`);
8115
7857
  }
8116
7858
  } else {
8117
7859
  logger.debug("Skipping VACUUM - no migrations were applied");
@@ -8137,17 +7879,321 @@ async function applyMigrations(db) {
8137
7879
  }
8138
7880
  }
8139
7881
  }
8140
- try {
8141
- db.pragma("journal_mode = WAL");
8142
- db.pragma("wal_autocheckpoint = 1000");
8143
- db.pragma("busy_timeout = 30000");
8144
- db.pragma("foreign_keys = ON");
8145
- db.pragma("synchronous = NORMAL");
8146
- logger.debug(
8147
- "Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
8148
- );
8149
- } catch (_error) {
8150
- logger.warn("⚠️ Could not apply all production database settings");
7882
+ try {
7883
+ db.pragma("journal_mode = WAL");
7884
+ db.pragma("wal_autocheckpoint = 1000");
7885
+ db.pragma("busy_timeout = 30000");
7886
+ db.pragma("foreign_keys = ON");
7887
+ db.pragma("synchronous = NORMAL");
7888
+ logger.debug(
7889
+ "Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
7890
+ );
7891
+ } catch (_error) {
7892
+ logger.warn("⚠️ Could not apply all production database settings");
7893
+ }
7894
+ }
7895
+ class EmbeddingConfig {
7896
+ static instance = null;
7897
+ /**
7898
+ * Get the singleton instance of EmbeddingConfig.
7899
+ * Creates the instance if it doesn't exist.
7900
+ */
7901
+ static getInstance() {
7902
+ if (EmbeddingConfig.instance === null) {
7903
+ EmbeddingConfig.instance = new EmbeddingConfig();
7904
+ }
7905
+ return EmbeddingConfig.instance;
7906
+ }
7907
+ /**
7908
+ * Reset the singleton instance (useful for testing).
7909
+ */
7910
+ static resetInstance() {
7911
+ EmbeddingConfig.instance = null;
7912
+ }
7913
+ /**
7914
+ * Known dimensions for common embedding models.
7915
+ * This avoids expensive API calls for dimension detection in telemetry.
7916
+ *
7917
+ * Note: The "openai" provider also supports OpenAI-compatible APIs like:
7918
+ * - Ollama (local models)
7919
+ * - LMStudio (local models)
7920
+ * - Any service implementing OpenAI's embedding API
7921
+ */
7922
+ knownModelDimensions = {
7923
+ // OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
7924
+ "text-embedding-3-small": 1536,
7925
+ "text-embedding-3-large": 3072,
7926
+ "text-embedding-ada-002": 1536,
7927
+ // Google Vertex AI models
7928
+ "text-embedding-004": 768,
7929
+ "textembedding-gecko@003": 768,
7930
+ "textembedding-gecko@002": 768,
7931
+ "textembedding-gecko@001": 768,
7932
+ // Google Gemini models (with MRL support)
7933
+ "text-embedding-preview-0409": 768,
7934
+ "embedding-001": 768,
7935
+ // AWS Bedrock models
7936
+ // Amazon Titan models
7937
+ "amazon.titan-embed-text-v1": 1536,
7938
+ "amazon.titan-embed-text-v2:0": 1024,
7939
+ "amazon.titan-embed-image-v1": 1024,
7940
+ // Image embedding model
7941
+ // Cohere models
7942
+ "cohere.embed-english-v3": 1024,
7943
+ "cohere.embed-multilingual-v3": 1024,
7944
+ // SageMaker models (hosted on AWS SageMaker)
7945
+ "intfloat/multilingual-e5-large": 1024,
7946
+ // Additional AWS models that might be supported
7947
+ // Note: Some of these might be placeholders - verify dimensions before use
7948
+ // "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
7949
+ // MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
7950
+ // Top performing models from Massive Text Embedding Benchmark
7951
+ "sentence-transformers/all-MiniLM-L6-v2": 384,
7952
+ "gemini-embedding-001": 3072,
7953
+ "Qwen/Qwen3-Embedding-8B": 4096,
7954
+ "Qwen/Qwen3-Embedding-4B": 2560,
7955
+ "Qwen/Qwen3-Embedding-0.6B": 1024,
7956
+ "Linq-AI-Research/Linq-Embed-Mistral": 4096,
7957
+ "Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
7958
+ "intfloat/multilingual-e5-large-instruct": 1024,
7959
+ "Salesforce/SFR-Embedding-Mistral": 4096,
7960
+ "text-multilingual-embedding-002": 768,
7961
+ "GritLM/GritLM-7B": 4096,
7962
+ "GritLM/GritLM-8x7B": 4096,
7963
+ "intfloat/e5-mistral-7b-instruct": 4096,
7964
+ "Cohere/Cohere-embed-multilingual-v3.0": 1024,
7965
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
7966
+ "Lajavaness/bilingual-embedding-large": 1024,
7967
+ "Salesforce/SFR-Embedding-2_R": 4096,
7968
+ "NovaSearch/stella_en_1.5B_v5": 8960,
7969
+ "NovaSearch/jasper_en_vision_language_v1": 8960,
7970
+ "nvidia/NV-Embed-v2": 4096,
7971
+ "OrdalieTech/Solon-embeddings-large-0.1": 1024,
7972
+ "BAAI/bge-m3": 1024,
7973
+ "HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
7974
+ "jinaai/jina-embeddings-v3": 1024,
7975
+ "Alibaba-NLP/gte-multilingual-base": 768,
7976
+ "Lajavaness/bilingual-embedding-base": 768,
7977
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
7978
+ "nvidia/NV-Embed-v1": 4096,
7979
+ "Cohere/Cohere-embed-multilingual-light-v3.0": 384,
7980
+ "manu/bge-m3-custom-fr": 1024,
7981
+ "Lajavaness/bilingual-embedding-small": 384,
7982
+ "Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
7983
+ "intfloat/multilingual-e5-base": 768,
7984
+ "voyage-3-lite": 512,
7985
+ "voyage-3": 1024,
7986
+ "intfloat/multilingual-e5-small": 384,
7987
+ "Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
7988
+ "Snowflake/snowflake-arctic-embed-m-v2.0": 768,
7989
+ "deepvk/USER-bge-m3": 1024,
7990
+ "Cohere/Cohere-embed-english-v3.0": 1024,
7991
+ "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
7992
+ "ibm-granite/granite-embedding-278m-multilingual": 768,
7993
+ "NovaSearch/stella_en_400M_v5": 4096,
7994
+ "omarelshehy/arabic-english-sts-matryoshka": 1024,
7995
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
7996
+ "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
7997
+ "Haon-Chen/speed-embedding-7b-instruct": 4096,
7998
+ "sentence-transformers/LaBSE": 768,
7999
+ "WhereIsAI/UAE-Large-V1": 1024,
8000
+ "ibm-granite/granite-embedding-107m-multilingual": 384,
8001
+ "mixedbread-ai/mxbai-embed-large-v1": 1024,
8002
+ "intfloat/e5-large-v2": 1024,
8003
+ "avsolatorio/GIST-large-Embedding-v0": 1024,
8004
+ "sdadas/mmlw-e5-large": 1024,
8005
+ "nomic-ai/nomic-embed-text-v1": 768,
8006
+ "nomic-ai/nomic-embed-text-v1-ablated": 768,
8007
+ "intfloat/e5-base-v2": 768,
8008
+ "BAAI/bge-large-en-v1.5": 1024,
8009
+ "intfloat/e5-large": 1024,
8010
+ "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
8011
+ "Cohere/Cohere-embed-english-light-v3.0": 384,
8012
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
8013
+ "Gameselo/STS-multilingual-mpnet-base-v2": 768,
8014
+ "thenlper/gte-large": 1024,
8015
+ "avsolatorio/GIST-Embedding-v0": 768,
8016
+ "nomic-ai/nomic-embed-text-v1-unsupervised": 768,
8017
+ "infgrad/stella-base-en-v2": 768,
8018
+ "avsolatorio/NoInstruct-small-Embedding-v0": 384,
8019
+ "dwzhu/e5-base-4k": 768,
8020
+ "sdadas/mmlw-e5-base": 768,
8021
+ "voyage-multilingual-2": 1024,
8022
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
8023
+ "BAAI/bge-base-en-v1.5": 768,
8024
+ "avsolatorio/GIST-small-Embedding-v0": 384,
8025
+ "sdadas/mmlw-roberta-large": 1024,
8026
+ "nomic-ai/nomic-embed-text-v1.5": 768,
8027
+ "minishlab/potion-multilingual-128M": 256,
8028
+ "shibing624/text2vec-base-multilingual": 384,
8029
+ "thenlper/gte-base": 768,
8030
+ "intfloat/e5-small-v2": 384,
8031
+ "intfloat/e5-base": 768,
8032
+ "sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
8033
+ "manu/sentence_croissant_alpha_v0.3": 2048,
8034
+ "BAAI/bge-small-en-v1.5": 512,
8035
+ "thenlper/gte-small": 384,
8036
+ "sdadas/mmlw-e5-small": 384,
8037
+ "manu/sentence_croissant_alpha_v0.4": 2048,
8038
+ "manu/sentence_croissant_alpha_v0.2": 2048,
8039
+ "abhinand/MedEmbed-small-v0.1": 384,
8040
+ "ibm-granite/granite-embedding-125m-english": 768,
8041
+ "intfloat/e5-small": 384,
8042
+ "voyage-large-2-instruct": 1024,
8043
+ "sdadas/mmlw-roberta-base": 768,
8044
+ "Snowflake/snowflake-arctic-embed-l": 1024,
8045
+ "Mihaiii/Ivysaur": 384,
8046
+ "Snowflake/snowflake-arctic-embed-m-long": 768,
8047
+ "bigscience/sgpt-bloom-7b1-msmarco": 4096,
8048
+ "avsolatorio/GIST-all-MiniLM-L6-v2": 384,
8049
+ "sergeyzh/LaBSE-ru-turbo": 768,
8050
+ "sentence-transformers/all-mpnet-base-v2": 768,
8051
+ "Snowflake/snowflake-arctic-embed-m": 768,
8052
+ "Snowflake/snowflake-arctic-embed-s": 384,
8053
+ "sentence-transformers/all-MiniLM-L12-v2": 384,
8054
+ "Mihaiii/gte-micro-v4": 384,
8055
+ "Snowflake/snowflake-arctic-embed-m-v1.5": 768,
8056
+ "cointegrated/LaBSE-en-ru": 768,
8057
+ "Mihaiii/Bulbasaur": 384,
8058
+ "ibm-granite/granite-embedding-30m-english": 384,
8059
+ "deepfile/embedder-100p": 768,
8060
+ "Jaume/gemma-2b-embeddings": 2048,
8061
+ "OrlikB/KartonBERT-USE-base-v1": 768,
8062
+ "izhx/udever-bloom-7b1": 4096,
8063
+ "izhx/udever-bloom-1b1": 1024,
8064
+ "brahmairesearch/slx-v0.1": 384,
8065
+ "Mihaiii/Wartortle": 384,
8066
+ "izhx/udever-bloom-3b": 2048,
8067
+ "deepvk/USER-base": 768,
8068
+ "ai-forever/ru-en-RoSBERTa": 1024,
8069
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
8070
+ "Mihaiii/Venusaur": 384,
8071
+ "Snowflake/snowflake-arctic-embed-xs": 384,
8072
+ "jinaai/jina-embedding-b-en-v1": 768,
8073
+ "Mihaiii/gte-micro": 384,
8074
+ "aari1995/German_Semantic_STS_V2": 1024,
8075
+ "Mihaiii/Squirtle": 384,
8076
+ "OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
8077
+ "sergeyzh/rubert-tiny-turbo": 312,
8078
+ "minishlab/potion-base-8M": 256,
8079
+ "minishlab/M2V_base_glove_subword": 256,
8080
+ "jinaai/jina-embedding-s-en-v1": 512,
8081
+ "minishlab/potion-base-4M": 128,
8082
+ "minishlab/M2V_base_output": 256,
8083
+ "DeepPavlov/rubert-base-cased-sentence": 768,
8084
+ "jinaai/jina-embeddings-v2-small-en": 512,
8085
+ "cointegrated/rubert-tiny2": 312,
8086
+ "minishlab/M2V_base_glove": 256,
8087
+ "cointegrated/rubert-tiny": 312,
8088
+ "silma-ai/silma-embeddding-matryoshka-v0.1": 768,
8089
+ "DeepPavlov/rubert-base-cased": 768,
8090
+ "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
8091
+ "izhx/udever-bloom-560m": 1024,
8092
+ "minishlab/potion-base-2M": 64,
8093
+ "DeepPavlov/distilrubert-small-cased-conversational": 768,
8094
+ "consciousAI/cai-lunaris-text-embeddings": 1024,
8095
+ "deepvk/deberta-v1-base": 768,
8096
+ "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
8097
+ "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
8098
+ "ai-forever/sbert_large_mt_nlu_ru": 1024,
8099
+ "ai-forever/sbert_large_nlu_ru": 1024,
8100
+ "malenia1/ternary-weight-embedding": 1024,
8101
+ "jinaai/jina-embeddings-v2-base-en": 768,
8102
+ "VPLabs/SearchMap_Preview": 4096,
8103
+ "Hum-Works/lodestone-base-4096-v1": 768,
8104
+ "jinaai/jina-embeddings-v4": 2048
8105
+ };
8106
+ /**
8107
+ * Lowercase lookup map for case-insensitive model dimension queries.
8108
+ * Built lazily from knownModelDimensions to ensure consistency.
8109
+ */
8110
+ modelLookup;
8111
+ constructor() {
8112
+ this.modelLookup = /* @__PURE__ */ new Map();
8113
+ for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
8114
+ this.modelLookup.set(model.toLowerCase(), dimensions);
8115
+ }
8116
+ }
8117
+ /**
8118
+ * Parse embedding model configuration from a provided model specification.
8119
+ * This is a synchronous operation that extracts provider, model, and known dimensions.
8120
+ *
8121
+ * Supports various providers:
8122
+ * - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
8123
+ * - vertex: Google Cloud Vertex AI
8124
+ * - gemini: Google Generative AI
8125
+ * - aws: AWS Bedrock models
8126
+ * - microsoft: Azure OpenAI
8127
+ * - sagemaker: AWS SageMaker hosted models
8128
+ *
8129
+ * @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
8130
+ * @returns Parsed embedding model configuration
8131
+ */
8132
+ parse(modelSpec) {
8133
+ const spec = modelSpec || "text-embedding-3-small";
8134
+ const colonIndex = spec.indexOf(":");
8135
+ let provider;
8136
+ let model;
8137
+ if (colonIndex === -1) {
8138
+ provider = "openai";
8139
+ model = spec;
8140
+ } else {
8141
+ provider = spec.substring(0, colonIndex);
8142
+ model = spec.substring(colonIndex + 1);
8143
+ }
8144
+ const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
8145
+ return {
8146
+ provider,
8147
+ model,
8148
+ dimensions,
8149
+ modelSpec: spec
8150
+ };
8151
+ }
8152
+ /**
8153
+ * Get the known dimensions for a specific model.
8154
+ * Returns null if the model dimensions are not known.
8155
+ * Uses case-insensitive lookup.
8156
+ *
8157
+ * @param model The model name (e.g., "text-embedding-3-small")
8158
+ * @returns Known dimensions or null
8159
+ */
8160
+ getKnownDimensions(model) {
8161
+ return this.modelLookup?.get(model.toLowerCase()) || null;
8162
+ }
8163
+ /**
8164
+ * Add or update known dimensions for a model.
8165
+ * This can be used to cache discovered dimensions.
8166
+ * Stores both original case and lowercase for consistent lookup.
8167
+ *
8168
+ * @param model The model name
8169
+ * @param dimensions The dimensions to cache
8170
+ */
8171
+ setKnownDimensions(model, dimensions) {
8172
+ this.knownModelDimensions[model] = dimensions;
8173
+ if (this.modelLookup) {
8174
+ this.modelLookup.set(model.toLowerCase(), dimensions);
8175
+ }
8176
+ }
8177
+ /**
8178
+ * Static method to parse embedding model configuration using the singleton instance.
8179
+ * This maintains backward compatibility while using the class-based approach.
8180
+ */
8181
+ static parseEmbeddingConfig(modelSpec) {
8182
+ return EmbeddingConfig.getInstance().parse(modelSpec);
8183
+ }
8184
+ /**
8185
+ * Static method to get known model dimensions using the singleton instance.
8186
+ * This maintains backward compatibility while using the class-based approach.
8187
+ */
8188
+ static getKnownModelDimensions(model) {
8189
+ return EmbeddingConfig.getInstance().getKnownDimensions(model);
8190
+ }
8191
+ /**
8192
+ * Static method to set known model dimensions using the singleton instance.
8193
+ * This maintains backward compatibility while using the class-based approach.
8194
+ */
8195
+ static setKnownModelDimensions(model, dimensions) {
8196
+ EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
8151
8197
  }
8152
8198
  }
8153
8199
  class DocumentStore {
@@ -8157,6 +8203,16 @@ class DocumentStore {
8157
8203
  modelDimension;
8158
8204
  embeddingConfig;
8159
8205
  isVectorSearchEnabled = false;
8206
+ /**
8207
+ * Returns the active embedding configuration if vector search is enabled,
8208
+ * or null if embeddings are disabled (no config provided or credentials unavailable).
8209
+ */
8210
+ getActiveEmbeddingConfig() {
8211
+ if (!this.isVectorSearchEnabled || !this.embeddingConfig) {
8212
+ return null;
8213
+ }
8214
+ return this.embeddingConfig;
8215
+ }
8160
8216
  statements;
8161
8217
  /**
8162
8218
  * Calculates Reciprocal Rank Fusion score for a result with configurable weights
@@ -8436,7 +8492,7 @@ class DocumentStore {
8436
8492
  const config = this.embeddingConfig;
8437
8493
  if (!areCredentialsAvailable(config.provider)) {
8438
8494
  logger.warn(
8439
- `⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
8495
+ `⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
8440
8496
  Only full-text search will be available. To enable vector search, please configure the required
8441
8497
  environment variables for ${config.provider} or choose a different provider.
8442
8498
  See README.md for configuration options or run with --help for more details.`
@@ -8448,8 +8504,26 @@ class DocumentStore {
8448
8504
  if (config.dimensions !== null) {
8449
8505
  this.modelDimension = config.dimensions;
8450
8506
  } else {
8451
- const testVector = await this.embeddings.embedQuery("test");
8452
- this.modelDimension = testVector.length;
8507
+ const EMBEDDING_INIT_TIMEOUT_MS = 3e4;
8508
+ const testPromise = this.embeddings.embedQuery("test");
8509
+ let timeoutId;
8510
+ const timeoutPromise = new Promise((_, reject) => {
8511
+ timeoutId = setTimeout(() => {
8512
+ reject(
8513
+ new Error(
8514
+ `Embedding service connection timed out after ${EMBEDDING_INIT_TIMEOUT_MS / 1e3} seconds`
8515
+ )
8516
+ );
8517
+ }, EMBEDDING_INIT_TIMEOUT_MS);
8518
+ });
8519
+ try {
8520
+ const testVector = await Promise.race([testPromise, timeoutPromise]);
8521
+ this.modelDimension = testVector.length;
8522
+ } finally {
8523
+ if (timeoutId !== void 0) {
8524
+ clearTimeout(timeoutId);
8525
+ }
8526
+ }
8453
8527
  EmbeddingConfig.setKnownModelDimensions(config.model, this.modelDimension);
8454
8528
  }
8455
8529
  if (this.modelDimension > this.dbDimension) {
@@ -8463,18 +8537,26 @@ class DocumentStore {
8463
8537
  if (error instanceof Error) {
8464
8538
  if (error.message.includes("does not exist") || error.message.includes("MODEL_NOT_FOUND")) {
8465
8539
  throw new ModelConfigurationError(
8466
- `❌ Invalid embedding model: ${config.model}
8540
+ `Invalid embedding model: ${config.model}
8467
8541
  The model "${config.model}" is not available or you don't have access to it.
8468
8542
  See README.md for supported models or run with --help for more details.`
8469
8543
  );
8470
8544
  }
8471
8545
  if (error.message.includes("API key") || error.message.includes("401") || error.message.includes("authentication")) {
8472
8546
  throw new ModelConfigurationError(
8473
- `❌ Authentication failed for ${config.provider} embedding provider
8547
+ `Authentication failed for ${config.provider} embedding provider
8474
8548
  Please check your API key configuration.
8475
8549
  See README.md for configuration options or run with --help for more details.`
8476
8550
  );
8477
8551
  }
8552
+ if (error.message.includes("timed out") || error.message.includes("ECONNREFUSED") || error.message.includes("ENOTFOUND") || error.message.includes("ETIMEDOUT") || error.message.includes("ECONNRESET") || error.message.includes("network") || error.message.includes("fetch failed")) {
8553
+ throw new ModelConfigurationError(
8554
+ `Failed to connect to ${config.provider} embedding service
8555
+ ${error.message}
8556
+ Please check that the embedding service is running and accessible.
8557
+ If using a local model (e.g., Ollama), ensure the service is started.`
8558
+ );
8559
+ }
8478
8560
  }
8479
8561
  throw error;
8480
8562
  }
@@ -8543,8 +8625,8 @@ class DocumentStore {
8543
8625
  return escapedTokens[0];
8544
8626
  }
8545
8627
  const exactMatch = `"${tokens.join(" ").replace(/"/g, '""')}"`;
8546
- const termsQuery = escapedTokens.join(" ");
8547
- return `${exactMatch} OR (${termsQuery})`;
8628
+ const termsQuery = escapedTokens.join(" OR ");
8629
+ return `${exactMatch} OR ${termsQuery}`;
8548
8630
  }
8549
8631
  /**
8550
8632
  * Initializes database connection and ensures readiness
@@ -8672,6 +8754,35 @@ class DocumentStore {
8672
8754
  throw new StoreError(`Failed to get library by ID: ${error}`);
8673
8755
  }
8674
8756
  }
8757
+ /**
8758
+ * Retrieves a library by its name.
8759
+ * @param name The library name to retrieve
8760
+ * @returns The library record, or null if not found
8761
+ */
8762
+ async getLibrary(name) {
8763
+ try {
8764
+ const normalizedName = name.toLowerCase();
8765
+ const row = this.statements.getLibraryIdByName.get(normalizedName);
8766
+ if (!row) {
8767
+ return null;
8768
+ }
8769
+ return { id: row.id, name: normalizedName };
8770
+ } catch (error) {
8771
+ throw new StoreError(`Failed to get library by name: ${error}`);
8772
+ }
8773
+ }
8774
+ /**
8775
+ * Deletes a library by its ID.
8776
+ * This should only be called when the library has no remaining versions.
8777
+ * @param libraryId The library ID to delete
8778
+ */
8779
+ async deleteLibrary(libraryId) {
8780
+ try {
8781
+ this.statements.deleteLibraryById.run(libraryId);
8782
+ } catch (error) {
8783
+ throw new StoreError(`Failed to delete library: ${error}`);
8784
+ }
8785
+ }
8675
8786
  /**
8676
8787
  * Stores scraper options for a version to enable reproducible indexing.
8677
8788
  * @param versionId The version ID to update
@@ -8709,7 +8820,7 @@ class DocumentStore {
8709
8820
  try {
8710
8821
  parsed = JSON.parse(row.scraper_options);
8711
8822
  } catch (e) {
8712
- logger.warn(`⚠️ Invalid scraper_options JSON for version ${versionId}: ${e}`);
8823
+ logger.warn(`⚠️ Invalid scraper_options JSON for version ${versionId}: ${e}`);
8713
8824
  parsed = {};
8714
8825
  }
8715
8826
  }
@@ -9428,13 +9539,6 @@ class DocumentManagementService {
9428
9539
  documentRetriever;
9429
9540
  pipelines;
9430
9541
  eventBus;
9431
- /**
9432
- * Normalizes a version string, converting null or undefined to an empty string
9433
- * and converting to lowercase.
9434
- */
9435
- normalizeVersion(version) {
9436
- return (version ?? "").toLowerCase();
9437
- }
9438
9542
  constructor(storePath, eventBus, embeddingConfig, pipelineConfig) {
9439
9543
  this.eventBus = eventBus;
9440
9544
  const dbPath = storePath === ":memory:" ? ":memory:" : path.join(storePath, "documents.db");
@@ -9443,6 +9547,20 @@ class DocumentManagementService {
9443
9547
  this.documentRetriever = new DocumentRetrieverService(this.store);
9444
9548
  this.pipelines = PipelineFactory$1.createStandardPipelines(pipelineConfig);
9445
9549
  }
9550
+ /**
9551
+ * Returns the active embedding configuration if vector search is enabled,
9552
+ * or null if embeddings are disabled.
9553
+ */
9554
+ getActiveEmbeddingConfig() {
9555
+ return this.store.getActiveEmbeddingConfig();
9556
+ }
9557
+ /**
9558
+ * Normalizes a version string, converting null or undefined to an empty string
9559
+ * and converting to lowercase.
9560
+ */
9561
+ normalizeVersion(version) {
9562
+ return (version ?? "").toLowerCase();
9563
+ }
9446
9564
  /**
9447
9565
  * Initializes the underlying document store.
9448
9566
  */
@@ -9533,30 +9651,26 @@ class DocumentManagementService {
9533
9651
  return this.store.findVersionsBySourceUrl(url);
9534
9652
  }
9535
9653
  /**
9536
- * Validates if a library exists in the store (either versioned or unversioned).
9654
+ * Validates if a library exists in the store.
9655
+ * Checks if the library record exists in the database, regardless of whether it has versions or documents.
9537
9656
  * Throws LibraryNotFoundInStoreError with suggestions if the library is not found.
9538
9657
  * @param library The name of the library to validate.
9539
9658
  * @throws {LibraryNotFoundInStoreError} If the library does not exist.
9540
9659
  */
9541
9660
  async validateLibraryExists(library) {
9542
9661
  logger.info(`🔎 Validating existence of library: ${library}`);
9543
- const normalizedLibrary = library.toLowerCase();
9544
- const versions = await this.listVersions(normalizedLibrary);
9545
- const hasUnversioned = await this.exists(normalizedLibrary, "");
9546
- if (versions.length === 0 && !hasUnversioned) {
9662
+ const libraryRecord = await this.store.getLibrary(library);
9663
+ if (!libraryRecord) {
9547
9664
  logger.warn(`⚠️ Library '${library}' not found.`);
9548
9665
  const allLibraries = await this.listLibraries();
9549
9666
  const libraryNames = allLibraries.map((lib) => lib.library);
9550
9667
  let suggestions = [];
9551
9668
  if (libraryNames.length > 0) {
9552
9669
  const fuse = new Fuse(libraryNames, {
9553
- // Configure fuse.js options if needed (e.g., threshold)
9554
- // isCaseSensitive: false, // Handled by normalizing library names
9555
- // includeScore: true,
9556
9670
  threshold: 0.7
9557
9671
  // Adjust threshold for desired fuzziness (0=exact, 1=match anything)
9558
9672
  });
9559
- const results = fuse.search(normalizedLibrary);
9673
+ const results = fuse.search(library.toLowerCase());
9560
9674
  suggestions = results.slice(0, 3).map((result) => result.item);
9561
9675
  logger.info(`🔍 Found suggestions: ${suggestions.join(", ")}`);
9562
9676
  }
@@ -9672,6 +9786,7 @@ class DocumentManagementService {
9672
9786
  /**
9673
9787
  * Completely removes a library version and all associated documents.
9674
9788
  * Also removes the library if no other versions remain.
9789
+ * If the specified version doesn't exist but the library exists with no versions, removes the library.
9675
9790
  * @param library Library name
9676
9791
  * @param version Version string (null/undefined for unversioned)
9677
9792
  */
@@ -9686,8 +9801,17 @@ class DocumentManagementService {
9686
9801
  logger.info(`🗑️ Removed version ${library}@${normalizedVersion || "[no version]"}`);
9687
9802
  } else {
9688
9803
  logger.warn(
9689
- `⚠️ Version ${library}@${normalizedVersion || "[no version]"} not found`
9804
+ `⚠️ Version ${library}@${normalizedVersion || "[no version]"} not found`
9690
9805
  );
9806
+ const libraryRecord = await this.store.getLibrary(library);
9807
+ if (libraryRecord) {
9808
+ const versions = await this.store.queryUniqueVersions(library);
9809
+ if (versions.length === 0) {
9810
+ logger.info(`🗑️ Library ${library} has no versions, removing library record`);
9811
+ await this.store.deleteLibrary(libraryRecord.id);
9812
+ logger.info(`🗑️ Completely removed library ${library} (had no versions)`);
9813
+ }
9814
+ }
9691
9815
  }
9692
9816
  this.eventBus.emit(EventType.LIBRARY_CHANGE, void 0);
9693
9817
  }
@@ -10365,7 +10489,7 @@ function registerEventsRoute(server, eventBus) {
10365
10489
  // Disable buffering in nginx
10366
10490
  });
10367
10491
  reply.raw.write("data: connected\n\n");
10368
- logger.info("📡 SSE client connected");
10492
+ logger.debug("SSE client connected");
10369
10493
  const allEventTypes = [
10370
10494
  EventType.JOB_STATUS_CHANGE,
10371
10495
  EventType.JOB_PROGRESS,
@@ -10403,24 +10527,146 @@ function registerEventsRoute(server, eventBus) {
10403
10527
  }
10404
10528
  }, 3e4);
10405
10529
  request.raw.on("close", () => {
10406
- logger.info("📡 SSE client disconnected");
10530
+ logger.debug("SSE client disconnected");
10407
10531
  cleanup();
10408
10532
  clearInterval(heartbeatInterval);
10409
10533
  });
10410
10534
  request.raw.on("error", (error) => {
10411
- logger.error(`❌ SSE connection error: ${error}`);
10535
+ logger.debug(`SSE connection error: ${error}`);
10412
10536
  cleanup();
10413
10537
  clearInterval(heartbeatInterval);
10414
10538
  });
10415
10539
  });
10416
10540
  }
10541
+ const Toast = () => {
10542
+ return /* @__PURE__ */ jsx(
10543
+ "div",
10544
+ {
10545
+ "x-data": true,
10546
+ "x-show": "$store.toast.visible",
10547
+ "x-transition:enter": "transition ease-out duration-300",
10548
+ "x-transition:enter-start": "opacity-0 transform translate-y-2",
10549
+ "x-transition:enter-end": "opacity-100 transform translate-y-0",
10550
+ "x-transition:leave": "transition ease-in duration-200",
10551
+ "x-transition:leave-start": "opacity-100",
10552
+ "x-transition:leave-end": "opacity-0",
10553
+ class: "fixed top-5 right-5 z-50",
10554
+ style: "display: none;",
10555
+ children: /* @__PURE__ */ jsxs(
10556
+ "div",
10557
+ {
10558
+ class: "flex items-center w-full max-w-xs p-4 text-gray-500 bg-white rounded-lg shadow dark:text-gray-400 dark:bg-gray-800",
10559
+ role: "alert",
10560
+ children: [
10561
+ /* @__PURE__ */ jsxs(
10562
+ "div",
10563
+ {
10564
+ class: "inline-flex items-center justify-center shrink-0 w-8 h-8 rounded-lg",
10565
+ "x-bind:class": "{\n 'text-green-500 bg-green-100 dark:bg-green-800 dark:text-green-200': $store.toast.type === 'success',\n 'text-red-500 bg-red-100 dark:bg-red-800 dark:text-red-200': $store.toast.type === 'error',\n 'text-orange-500 bg-orange-100 dark:bg-orange-700 dark:text-orange-200': $store.toast.type === 'warning',\n 'text-blue-500 bg-blue-100 dark:bg-blue-800 dark:text-blue-200': $store.toast.type === 'info'\n }",
10566
+ children: [
10567
+ /* @__PURE__ */ jsx(
10568
+ "svg",
10569
+ {
10570
+ "x-show": "$store.toast.type === 'success'",
10571
+ class: "w-5 h-5",
10572
+ "aria-hidden": "true",
10573
+ xmlns: "http://www.w3.org/2000/svg",
10574
+ fill: "currentColor",
10575
+ viewBox: "0 0 20 20",
10576
+ children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 8.207-4 4a1 1 0 0 1-1.414 0l-2-2a1 1 0 0 1 1.414-1.414L9 10.586l3.293-3.293a1 1 0 0 1 1.414 1.414Z" })
10577
+ }
10578
+ ),
10579
+ /* @__PURE__ */ jsx(
10580
+ "svg",
10581
+ {
10582
+ "x-show": "$store.toast.type === 'error'",
10583
+ class: "w-5 h-5",
10584
+ "aria-hidden": "true",
10585
+ xmlns: "http://www.w3.org/2000/svg",
10586
+ fill: "currentColor",
10587
+ viewBox: "0 0 20 20",
10588
+ children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 11.793a1 1 0 1 1-1.414 1.414L10 11.414l-2.293 2.293a1 1 0 0 1-1.414-1.414L8.586 10 6.293 7.707a1 1 0 0 1 1.414-1.414L10 8.586l2.293-2.293a1 1 0 0 1 1.414 1.414L11.414 10l2.293 2.293Z" })
10589
+ }
10590
+ ),
10591
+ /* @__PURE__ */ jsx(
10592
+ "svg",
10593
+ {
10594
+ "x-show": "$store.toast.type === 'warning'",
10595
+ class: "w-5 h-5",
10596
+ "aria-hidden": "true",
10597
+ xmlns: "http://www.w3.org/2000/svg",
10598
+ fill: "currentColor",
10599
+ viewBox: "0 0 20 20",
10600
+ children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM10 15a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-4a1 1 0 0 1-2 0V6a1 1 0 0 1 2 0v5Z" })
10601
+ }
10602
+ ),
10603
+ /* @__PURE__ */ jsx(
10604
+ "svg",
10605
+ {
10606
+ "x-show": "$store.toast.type === 'info'",
10607
+ class: "w-5 h-5",
10608
+ "aria-hidden": "true",
10609
+ xmlns: "http://www.w3.org/2000/svg",
10610
+ fill: "currentColor",
10611
+ viewBox: "0 0 20 20",
10612
+ children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM9.5 4a1.5 1.5 0 1 1 0 3 1.5 1.5 0 0 1 0-3ZM12 15H8a1 1 0 0 1 0-2h1v-3H8a1 1 0 0 1 0-2h2a1 1 0 0 1 1 1v4h1a1 1 0 0 1 0 2Z" })
10613
+ }
10614
+ )
10615
+ ]
10616
+ }
10617
+ ),
10618
+ /* @__PURE__ */ jsx(
10619
+ "div",
10620
+ {
10621
+ class: "ml-3 text-sm font-normal",
10622
+ "x-text": "$store.toast.message"
10623
+ }
10624
+ ),
10625
+ /* @__PURE__ */ jsxs(
10626
+ "button",
10627
+ {
10628
+ type: "button",
10629
+ class: "ml-auto -mx-1.5 -my-1.5 bg-white text-gray-400 hover:text-gray-900 rounded-lg focus:ring-2 focus:ring-gray-300 p-1.5 hover:bg-gray-100 inline-flex items-center justify-center h-8 w-8 dark:text-gray-500 dark:hover:text-white dark:bg-gray-800 dark:hover:bg-gray-700",
10630
+ "x-on:click": "$store.toast.hide()",
10631
+ "aria-label": "Close",
10632
+ children: [
10633
+ /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Close" }),
10634
+ /* @__PURE__ */ jsx(
10635
+ "svg",
10636
+ {
10637
+ class: "w-3 h-3",
10638
+ "aria-hidden": "true",
10639
+ xmlns: "http://www.w3.org/2000/svg",
10640
+ fill: "none",
10641
+ viewBox: "0 0 14 14",
10642
+ children: /* @__PURE__ */ jsx(
10643
+ "path",
10644
+ {
10645
+ stroke: "currentColor",
10646
+ "stroke-linecap": "round",
10647
+ "stroke-linejoin": "round",
10648
+ "stroke-width": "2",
10649
+ d: "m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"
10650
+ }
10651
+ )
10652
+ }
10653
+ )
10654
+ ]
10655
+ }
10656
+ )
10657
+ ]
10658
+ }
10659
+ )
10660
+ }
10661
+ );
10662
+ };
10417
10663
  const Layout = ({
10418
10664
  title,
10419
10665
  version,
10420
10666
  children,
10421
10667
  eventClientConfig
10422
10668
  }) => {
10423
- const versionString = version || "1.28.0";
10669
+ const versionString = version || "1.30.0";
10424
10670
  const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
10425
10671
  return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
10426
10672
  /* @__PURE__ */ jsxs("head", { children: [
@@ -10565,7 +10811,8 @@ const Layout = ({
10565
10811
  form .spinner { display: none; }
10566
10812
  ` })
10567
10813
  ] }),
10568
- /* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
10814
+ /* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", "hx-ext": "morph", children: [
10815
+ /* @__PURE__ */ jsx(Toast, {}),
10569
10816
  /* @__PURE__ */ jsx(
10570
10817
  "header",
10571
10818
  {
@@ -10719,19 +10966,35 @@ function registerIndexRoute(server, config) {
10719
10966
  trpcUrl
10720
10967
  },
10721
10968
  children: [
10969
+ /* @__PURE__ */ jsx(
10970
+ "div",
10971
+ {
10972
+ id: "analytics-stats",
10973
+ "hx-get": "/web/stats",
10974
+ "hx-trigger": "load, library-change from:body",
10975
+ "hx-swap": "morph:innerHTML",
10976
+ children: /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-pulse", children: [
10977
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
10978
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
10979
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" })
10980
+ ] })
10981
+ }
10982
+ ),
10722
10983
  /* @__PURE__ */ jsxs("section", { class: "mb-4 p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: [
10723
10984
  /* @__PURE__ */ jsxs("div", { class: "flex items-center justify-between mb-2", children: [
10724
10985
  /* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: "Job Queue" }),
10725
10986
  /* @__PURE__ */ jsx(
10726
10987
  "button",
10727
10988
  {
10989
+ id: "clear-completed-btn",
10728
10990
  type: "button",
10729
- class: "text-xs px-3 py-1.5 text-gray-700 bg-gray-100 border border-gray-300 rounded-lg hover:bg-gray-200 focus:ring-4 focus:outline-none focus:ring-gray-100 dark:bg-gray-600 dark:text-gray-300 dark:border-gray-500 dark:hover:bg-gray-700 dark:focus:ring-gray-700 transition-colors duration-150",
10991
+ class: "text-xs px-3 py-1.5 text-gray-400 bg-gray-50 border border-gray-200 rounded-lg cursor-not-allowed focus:ring-4 focus:outline-none transition-colors duration-150 dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600",
10730
10992
  title: "Clear all completed, cancelled, and failed jobs",
10731
10993
  "hx-post": "/web/jobs/clear-completed",
10732
10994
  "hx-trigger": "click",
10733
10995
  "hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
10734
10996
  "hx-swap": "none",
10997
+ disabled: true,
10735
10998
  children: "Clear Completed Jobs"
10736
10999
  }
10737
11000
  )
@@ -10741,7 +11004,8 @@ function registerIndexRoute(server, config) {
10741
11004
  {
10742
11005
  id: "job-queue",
10743
11006
  "hx-get": "/web/jobs",
10744
- "hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body",
11007
+ "hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body, job-list-refresh from:body",
11008
+ "hx-swap": "morph:innerHTML",
10745
11009
  children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
10746
11010
  /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
10747
11011
  /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
@@ -10750,11 +11014,17 @@ function registerIndexRoute(server, config) {
10750
11014
  }
10751
11015
  )
10752
11016
  ] }),
10753
- /* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm", "hx-get": "/web/jobs/new", "hx-trigger": "load", children: /* @__PURE__ */ jsxs("div", { class: "p-6 bg-white rounded-lg shadow dark:bg-gray-800 animate-pulse", children: [
10754
- /* @__PURE__ */ jsx("div", { class: "h-6 bg-gray-200 rounded-full dark:bg-gray-700 w-1/3 mb-4" }),
10755
- /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
10756
- /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" })
10757
- ] }) }) }),
11017
+ /* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm", children: /* @__PURE__ */ jsx(
11018
+ "button",
11019
+ {
11020
+ type: "button",
11021
+ "hx-get": "/web/jobs/new",
11022
+ "hx-target": "#addJobForm",
11023
+ "hx-swap": "innerHTML",
11024
+ class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
11025
+ children: "Add New Documentation"
11026
+ }
11027
+ ) }) }),
10758
11028
  /* @__PURE__ */ jsxs("div", { children: [
10759
11029
  /* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold mb-2 text-gray-900 dark:text-white", children: "Indexed Documentation" }),
10760
11030
  /* @__PURE__ */ jsx(
@@ -10763,6 +11033,7 @@ function registerIndexRoute(server, config) {
10763
11033
  id: "indexed-docs",
10764
11034
  "hx-get": "/web/libraries",
10765
11035
  "hx-trigger": "load, library-change from:body",
11036
+ "hx-swap": "morph:innerHTML",
10766
11037
  children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
10767
11038
  /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
10768
11039
  /* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
@@ -10917,76 +11188,53 @@ const LoadingSpinner = () => /* @__PURE__ */ jsxs(
10917
11188
  const JobItem = ({ job }) => {
10918
11189
  job.dbStatus || job.status;
10919
11190
  const isActiveJob = job.dbStatus ? isActiveStatus(job.dbStatus) : job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING;
10920
- return /* @__PURE__ */ jsx("div", { class: "block p-3 bg-gray-50 dark:bg-gray-700 rounded-lg border border-gray-200 dark:border-gray-600", children: /* @__PURE__ */ jsxs("div", { class: "flex items-start justify-between", children: [
10921
- /* @__PURE__ */ jsxs("div", { class: "flex-1", children: [
10922
- /* @__PURE__ */ jsxs("p", { class: "text-sm font-medium text-gray-900 dark:text-white", children: [
10923
- /* @__PURE__ */ jsx("span", { safe: true, children: job.library }),
10924
- " ",
10925
- /* @__PURE__ */ jsx(VersionBadge, { version: job.version })
10926
- ] }),
10927
- /* @__PURE__ */ jsx("div", { class: "text-xs text-gray-500 dark:text-gray-400 mt-1", children: job.startedAt ? /* @__PURE__ */ jsxs("div", { children: [
10928
- "Last Indexed:",
10929
- " ",
10930
- /* @__PURE__ */ jsx("span", { safe: true, children: new Date(job.startedAt).toLocaleString() })
10931
- ] }) : null }),
10932
- job.progress && job.progress.totalPages > 0 && isActiveJob ? /* @__PURE__ */ jsx("div", { class: "mt-2", children: /* @__PURE__ */ jsx(ProgressBar, { progress: job.progress }) }) : null,
10933
- job.errorMessage || job.error ? /* @__PURE__ */ jsxs("div", { class: "mt-2 p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs", children: [
10934
- /* @__PURE__ */ jsx("div", { class: "font-medium text-red-800 dark:text-red-300 mb-1", children: "Error:" }),
10935
- /* @__PURE__ */ jsx("div", { safe: true, class: "text-red-700 dark:text-red-400", children: job.errorMessage || job.error })
10936
- ] }) : null
10937
- ] }),
10938
- /* @__PURE__ */ jsxs("div", { class: "flex flex-col items-end gap-2 ml-4", children: [
10939
- /* @__PURE__ */ jsxs("div", { class: "flex items-center gap-2", children: [
10940
- job.dbStatus ? /* @__PURE__ */ jsx(StatusBadge, { status: job.dbStatus }) : /* @__PURE__ */ jsx(
10941
- "span",
10942
- {
10943
- class: `px-1.5 py-0.5 text-xs font-medium rounded ${job.status === PipelineJobStatus.COMPLETED ? "bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-300" : job.error ? "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-300" : "bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-300"}`,
10944
- children: job.status
10945
- }
10946
- ),
10947
- isActiveJob && /* @__PURE__ */ jsxs(
10948
- "button",
10949
- {
10950
- type: "button",
10951
- class: "font-medium rounded-lg text-xs p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out border border-gray-300 bg-white text-red-600 hover:bg-red-50 focus:ring-4 focus:outline-none focus:ring-red-100 dark:border-gray-600 dark:bg-gray-800 dark:text-red-400 dark:hover:bg-gray-700 dark:focus:ring-red-900",
10952
- title: "Stop this job",
10953
- "x-data": "{}",
10954
- "x-on:click": `
10955
- if ($store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}') {
10956
- $store.confirmingAction.isStopping = true;
10957
- fetch('/web/jobs/' + '${job.id}' + '/cancel', {
10958
- method: 'POST',
10959
- headers: { 'Accept': 'application/json' },
10960
- })
10961
- .then(r => r.json())
10962
- .then(() => {
10963
- $store.confirmingAction.type = null;
10964
- $store.confirmingAction.id = null;
10965
- $store.confirmingAction.isStopping = false;
10966
- if ($store.confirmingAction.timeoutId) { clearTimeout($store.confirmingAction.timeoutId); $store.confirmingAction.timeoutId = null; }
10967
- document.dispatchEvent(new CustomEvent('job-list-refresh'));
10968
- })
10969
- .catch(() => { $store.confirmingAction.isStopping = false; });
10970
- } else {
10971
- if ($store.confirmingAction.timeoutId) { clearTimeout($store.confirmingAction.timeoutId); $store.confirmingAction.timeoutId = null; }
10972
- $store.confirmingAction.type = 'job-cancel';
10973
- $store.confirmingAction.id = '${job.id}';
10974
- $store.confirmingAction.isStopping = false;
10975
- $store.confirmingAction.timeoutId = setTimeout(() => {
10976
- $store.confirmingAction.type = null;
10977
- $store.confirmingAction.id = null;
10978
- $store.confirmingAction.isStopping = false;
10979
- $store.confirmingAction.timeoutId = null;
10980
- }, 3000);
10981
- }
10982
- `,
10983
- "x-bind:disabled": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && $store.confirmingAction.isStopping`,
10984
- children: [
10985
- /* @__PURE__ */ jsxs(
10986
- "span",
10987
- {
10988
- "x-show": `$store.confirmingAction.type !== 'job-cancel' || $store.confirmingAction.id !== '${job.id}' || $store.confirmingAction.isStopping`,
10989
- children: [
11191
+ const defaultStateClasses = "border border-gray-300 bg-white text-red-600 hover:bg-red-50 focus:ring-4 focus:outline-none focus:ring-red-100 dark:border-gray-600 dark:bg-gray-800 dark:text-red-400 dark:hover:bg-gray-700 dark:focus:ring-red-900";
11192
+ const confirmingStateClasses = "bg-red-600 text-white border-red-600 focus:ring-4 focus:outline-none focus:ring-red-300 dark:bg-red-700 dark:border-red-700 dark:focus:ring-red-800";
11193
+ return /* @__PURE__ */ jsx(
11194
+ "div",
11195
+ {
11196
+ id: `job-item-${job.id}`,
11197
+ class: "block p-3 bg-gray-50 dark:bg-gray-700 rounded-lg border border-gray-200 dark:border-gray-600",
11198
+ "data-job-id": job.id,
11199
+ "x-data": "{ jobId: $el.dataset.jobId, confirming: $el.dataset.confirming === 'true', isStopping: false }",
11200
+ children: /* @__PURE__ */ jsxs("div", { class: "flex items-start justify-between", children: [
11201
+ /* @__PURE__ */ jsxs("div", { class: "flex-1", children: [
11202
+ /* @__PURE__ */ jsxs("p", { class: "text-sm font-medium text-gray-900 dark:text-white", children: [
11203
+ /* @__PURE__ */ jsx("span", { safe: true, children: job.library }),
11204
+ " ",
11205
+ /* @__PURE__ */ jsx(VersionBadge, { version: job.version })
11206
+ ] }),
11207
+ /* @__PURE__ */ jsx("div", { class: "text-xs text-gray-500 dark:text-gray-400 mt-1", children: job.startedAt ? /* @__PURE__ */ jsxs("div", { children: [
11208
+ "Last Indexed:",
11209
+ " ",
11210
+ /* @__PURE__ */ jsx("span", { safe: true, children: new Date(job.startedAt).toLocaleString() })
11211
+ ] }) : null }),
11212
+ job.progress && job.progress.totalPages > 0 && isActiveJob ? /* @__PURE__ */ jsx("div", { class: "mt-2", children: /* @__PURE__ */ jsx(ProgressBar, { progress: job.progress }) }) : null,
11213
+ job.errorMessage || job.error ? /* @__PURE__ */ jsxs("div", { class: "mt-2 p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs", children: [
11214
+ /* @__PURE__ */ jsx("div", { class: "font-medium text-red-800 dark:text-red-300 mb-1", children: "Error:" }),
11215
+ /* @__PURE__ */ jsx("div", { safe: true, class: "text-red-700 dark:text-red-400", children: job.errorMessage || job.error })
11216
+ ] }) : null
11217
+ ] }),
11218
+ /* @__PURE__ */ jsxs("div", { class: "flex flex-col items-end gap-2 ml-4", children: [
11219
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center gap-2", children: [
11220
+ job.dbStatus ? /* @__PURE__ */ jsx(StatusBadge, { status: job.dbStatus }) : /* @__PURE__ */ jsx(
11221
+ "span",
11222
+ {
11223
+ class: `px-1.5 py-0.5 text-xs font-medium rounded ${job.status === PipelineJobStatus.COMPLETED ? "bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-300" : job.error ? "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-300" : "bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-300"}`,
11224
+ children: job.status
11225
+ }
11226
+ ),
11227
+ isActiveJob && /* @__PURE__ */ jsxs(
11228
+ "button",
11229
+ {
11230
+ type: "button",
11231
+ class: "font-medium rounded-lg text-xs p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
11232
+ title: "Stop this job",
11233
+ "x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
11234
+ "x-on:click": "\n if (confirming) {\n isStopping = true;\n window.confirmationManager.clear($root.id);\n fetch('/web/jobs/' + jobId + '/cancel', {\n method: 'POST',\n headers: { 'Accept': 'application/json' },\n })\n .then(r => r.json())\n .then(() => {\n confirming = false;\n isStopping = false;\n document.dispatchEvent(new CustomEvent('job-list-refresh'));\n })\n .catch(() => { isStopping = false; });\n } else {\n confirming = true;\n isStopping = false;\n window.confirmationManager.start($root.id);\n }\n ",
11235
+ "x-bind:disabled": "isStopping",
11236
+ children: [
11237
+ /* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isStopping", children: [
10990
11238
  /* @__PURE__ */ jsx(
10991
11239
  "svg",
10992
11240
  {
@@ -10998,39 +11246,47 @@ const JobItem = ({ job }) => {
10998
11246
  }
10999
11247
  ),
11000
11248
  /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stop job" })
11001
- ]
11002
- }
11003
- ),
11004
- /* @__PURE__ */ jsx(
11005
- "span",
11006
- {
11007
- "x-show": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && !$store.confirmingAction.isStopping`,
11008
- class: "px-2",
11009
- children: "Cancel?"
11010
- }
11011
- ),
11012
- /* @__PURE__ */ jsxs(
11013
- "span",
11014
- {
11015
- "x-show": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && $store.confirmingAction.isStopping`,
11016
- children: [
11249
+ ] }),
11250
+ /* @__PURE__ */ jsx("span", { "x-show": "confirming && !isStopping", class: "px-2", children: "Cancel?" }),
11251
+ /* @__PURE__ */ jsxs("span", { "x-show": "isStopping", children: [
11017
11252
  /* @__PURE__ */ jsx(LoadingSpinner, {}),
11018
11253
  /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stopping..." })
11019
- ]
11020
- }
11021
- )
11022
- ]
11023
- }
11024
- )
11025
- ] }),
11026
- job.error ? (
11027
- // Keep the error badge for clarity if an error occurred
11028
- /* @__PURE__ */ jsx("span", { class: "bg-red-100 text-red-800 text-xs font-medium px-1.5 py-0.5 rounded dark:bg-red-900 dark:text-red-300", children: "Error" })
11029
- ) : null
11030
- ] })
11031
- ] }) });
11254
+ ] })
11255
+ ]
11256
+ }
11257
+ )
11258
+ ] }),
11259
+ job.error ? (
11260
+ // Keep the error badge for clarity if an error occurred
11261
+ /* @__PURE__ */ jsx("span", { class: "bg-red-100 text-red-800 text-xs font-medium px-1.5 py-0.5 rounded dark:bg-red-900 dark:text-red-300", children: "Error" })
11262
+ ) : null
11263
+ ] })
11264
+ ] })
11265
+ }
11266
+ );
11267
+ };
11268
+ const JobList = ({ jobs }) => {
11269
+ const hasJobs = jobs.length > 0;
11270
+ return /* @__PURE__ */ jsxs(Fragment, { children: [
11271
+ /* @__PURE__ */ jsx("div", { id: "job-list", class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]", children: hasJobs ? jobs.map((job) => /* @__PURE__ */ jsx(JobItem, { job })) : /* @__PURE__ */ jsx("p", { class: "text-center text-gray-500 dark:text-gray-400", children: "No pending jobs." }) }),
11272
+ /* @__PURE__ */ jsx(
11273
+ "button",
11274
+ {
11275
+ id: "clear-completed-btn",
11276
+ "hx-swap-oob": "true",
11277
+ type: "button",
11278
+ class: `text-xs px-3 py-1.5 rounded-lg focus:ring-4 focus:outline-none transition-colors duration-150 ${hasJobs ? "text-gray-700 bg-gray-100 border border-gray-300 hover:bg-gray-200 focus:ring-gray-100 dark:bg-gray-600 dark:text-gray-300 dark:border-gray-500 dark:hover:bg-gray-700 dark:focus:ring-gray-700" : "text-gray-400 bg-gray-50 border border-gray-200 cursor-not-allowed dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600"}`,
11279
+ title: "Clear all completed, cancelled, and failed jobs",
11280
+ "hx-post": "/web/jobs/clear-completed",
11281
+ "hx-trigger": "click",
11282
+ "hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
11283
+ "hx-swap": "none",
11284
+ disabled: !hasJobs,
11285
+ children: "Clear Completed Jobs"
11286
+ }
11287
+ )
11288
+ ] });
11032
11289
  };
11033
- const JobList = ({ jobs }) => /* @__PURE__ */ jsx("div", { id: "job-list", class: "space-y-2", children: jobs.length === 0 ? /* @__PURE__ */ jsx("p", { class: "text-center text-gray-500 dark:text-gray-400", children: "No pending jobs." }) : jobs.map((job) => /* @__PURE__ */ jsx(JobItem, { job })) });
11034
11290
  function registerJobListRoutes(server, listJobsTool) {
11035
11291
  server.get("/web/jobs", async () => {
11036
11292
  const result = await listJobsTool.execute({});
@@ -11048,7 +11304,7 @@ const Alert = ({ type, title, message }) => {
11048
11304
  iconSvg = /* @__PURE__ */ jsx(
11049
11305
  "svg",
11050
11306
  {
11051
- class: "flex-shrink-0 inline w-4 h-4 me-3",
11307
+ class: "shrink-0 inline w-4 h-4 me-3",
11052
11308
  "aria-hidden": "true",
11053
11309
  xmlns: "http://www.w3.org/2000/svg",
11054
11310
  fill: "currentColor",
@@ -11063,7 +11319,7 @@ const Alert = ({ type, title, message }) => {
11063
11319
  iconSvg = /* @__PURE__ */ jsx(
11064
11320
  "svg",
11065
11321
  {
11066
- class: "flex-shrink-0 inline w-4 h-4 me-3",
11322
+ class: "shrink-0 inline w-4 h-4 me-3",
11067
11323
  "aria-hidden": "true",
11068
11324
  xmlns: "http://www.w3.org/2000/svg",
11069
11325
  fill: "currentColor",
@@ -11078,7 +11334,7 @@ const Alert = ({ type, title, message }) => {
11078
11334
  iconSvg = /* @__PURE__ */ jsx(
11079
11335
  "svg",
11080
11336
  {
11081
- class: "flex-shrink-0 inline w-4 h-4 me-3",
11337
+ class: "shrink-0 inline w-4 h-4 me-3",
11082
11338
  "aria-hidden": "true",
11083
11339
  xmlns: "http://www.w3.org/2000/svg",
11084
11340
  fill: "currentColor",
@@ -11094,7 +11350,7 @@ const Alert = ({ type, title, message }) => {
11094
11350
  iconSvg = /* @__PURE__ */ jsx(
11095
11351
  "svg",
11096
11352
  {
11097
- class: "flex-shrink-0 inline w-4 h-4 me-3",
11353
+ class: "shrink-0 inline w-4 h-4 me-3",
11098
11354
  "aria-hidden": "true",
11099
11355
  xmlns: "http://www.w3.org/2000/svg",
11100
11356
  fill: "currentColor",
@@ -11184,13 +11440,43 @@ const ScrapeFormContent = ({
11184
11440
  defaultExcludePatterns
11185
11441
  }) => {
11186
11442
  const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
11187
- return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
11188
- /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
11443
+ return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600 relative animate-[fadeSlideIn_0.2s_ease-out]", children: [
11444
+ /* @__PURE__ */ jsx(
11445
+ "button",
11446
+ {
11447
+ type: "button",
11448
+ "hx-get": "/web/jobs/new-button",
11449
+ "hx-target": "#addJobForm",
11450
+ "hx-swap": "innerHTML",
11451
+ class: "absolute top-3 right-3 p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300 rounded-full hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors duration-150",
11452
+ title: "Close",
11453
+ children: /* @__PURE__ */ jsx(
11454
+ "svg",
11455
+ {
11456
+ class: "w-5 h-5",
11457
+ fill: "none",
11458
+ stroke: "currentColor",
11459
+ viewBox: "0 0 24 24",
11460
+ xmlns: "http://www.w3.org/2000/svg",
11461
+ children: /* @__PURE__ */ jsx(
11462
+ "path",
11463
+ {
11464
+ "stroke-linecap": "round",
11465
+ "stroke-linejoin": "round",
11466
+ "stroke-width": "2",
11467
+ d: "M6 18L18 6M6 6l12 12"
11468
+ }
11469
+ )
11470
+ }
11471
+ )
11472
+ }
11473
+ ),
11474
+ /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2 pr-8", children: "Add New Documentation" }),
11189
11475
  /* @__PURE__ */ jsxs(
11190
11476
  "form",
11191
11477
  {
11192
11478
  "hx-post": "/web/jobs/scrape",
11193
- "hx-target": "#job-response",
11479
+ "hx-target": "#addJobForm",
11194
11480
  "hx-swap": "innerHTML",
11195
11481
  class: "space-y-2",
11196
11482
  "x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
@@ -11292,313 +11578,338 @@ const ScrapeFormContent = ({
11292
11578
  ),
11293
11579
  /* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
11294
11580
  ] }),
11295
- /* @__PURE__ */ jsx(
11296
- "input",
11297
- {
11298
- type: "text",
11299
- name: "version",
11300
- id: "version",
11301
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11302
- }
11303
- )
11304
- ] }),
11305
- /* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
11306
- /* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
11307
- /* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
11308
- /* @__PURE__ */ jsxs("div", { children: [
11309
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11310
- /* @__PURE__ */ jsx(
11311
- "label",
11312
- {
11313
- for: "maxPages",
11314
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11315
- children: "Max Pages"
11316
- }
11317
- ),
11318
- /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
11319
- ] }),
11320
- /* @__PURE__ */ jsx(
11321
- "input",
11322
- {
11323
- type: "number",
11324
- name: "maxPages",
11325
- id: "maxPages",
11326
- min: "1",
11327
- placeholder: "1000",
11328
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11329
- }
11330
- )
11331
- ] }),
11332
- /* @__PURE__ */ jsxs("div", { children: [
11333
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11334
- /* @__PURE__ */ jsx(
11335
- "label",
11336
- {
11337
- for: "maxDepth",
11338
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11339
- children: "Max Depth"
11340
- }
11341
- ),
11342
- /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
11343
- ] }),
11344
- /* @__PURE__ */ jsx(
11345
- "input",
11346
- {
11347
- type: "number",
11348
- name: "maxDepth",
11349
- id: "maxDepth",
11350
- min: "0",
11351
- placeholder: "3",
11352
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11353
- }
11354
- )
11355
- ] }),
11356
- /* @__PURE__ */ jsxs("div", { children: [
11357
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11358
- /* @__PURE__ */ jsx(
11359
- "label",
11360
- {
11361
- for: "scope",
11362
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11363
- children: "Scope"
11364
- }
11365
- ),
11366
- /* @__PURE__ */ jsx(
11367
- Tooltip,
11368
- {
11369
- text: /* @__PURE__ */ jsxs("div", { children: [
11370
- "Controls which pages are scraped:",
11371
- /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
11372
- /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
11373
- /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
11374
- /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
11375
- ] })
11376
- ] })
11377
- }
11378
- )
11379
- ] }),
11581
+ /* @__PURE__ */ jsx(
11582
+ "input",
11583
+ {
11584
+ type: "text",
11585
+ name: "version",
11586
+ id: "version",
11587
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11588
+ }
11589
+ )
11590
+ ] }),
11591
+ /* @__PURE__ */ jsxs(
11592
+ "div",
11593
+ {
11594
+ class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md",
11595
+ "x-data": "{ open: false, headers: [] }",
11596
+ children: [
11380
11597
  /* @__PURE__ */ jsxs(
11381
- "select",
11598
+ "button",
11382
11599
  {
11383
- name: "scope",
11384
- id: "scope",
11385
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
11600
+ type: "button",
11601
+ class: "w-full flex items-center gap-1.5 cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400 hover:text-gray-800 dark:hover:text-gray-200 transition-colors",
11602
+ "x-on:click": "open = !open",
11386
11603
  children: [
11387
- /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
11388
- /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
11389
- /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
11604
+ /* @__PURE__ */ jsx(
11605
+ "svg",
11606
+ {
11607
+ class: "w-4 h-4 transform transition-transform duration-200",
11608
+ "x-bind:class": "{ 'rotate-90': open }",
11609
+ fill: "none",
11610
+ stroke: "currentColor",
11611
+ viewBox: "0 0 24 24",
11612
+ children: /* @__PURE__ */ jsx(
11613
+ "path",
11614
+ {
11615
+ "stroke-linecap": "round",
11616
+ "stroke-linejoin": "round",
11617
+ "stroke-width": "2",
11618
+ d: "M9 5l7 7-7 7"
11619
+ }
11620
+ )
11621
+ }
11622
+ ),
11623
+ /* @__PURE__ */ jsx("span", { children: "Advanced Options" })
11390
11624
  ]
11391
11625
  }
11392
- )
11393
- ] }),
11394
- /* @__PURE__ */ jsxs("div", { children: [
11395
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11396
- /* @__PURE__ */ jsx(
11397
- "label",
11398
- {
11399
- for: "includePatterns",
11400
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11401
- children: "Include Patterns"
11402
- }
11403
- ),
11404
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
11405
- ] }),
11406
- /* @__PURE__ */ jsx(
11407
- "textarea",
11408
- {
11409
- name: "includePatterns",
11410
- id: "includePatterns",
11411
- rows: "2",
11412
- placeholder: "e.g. docs/* or /api\\/v1.*/",
11413
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11414
- }
11415
- )
11416
- ] }),
11417
- /* @__PURE__ */ jsxs("div", { children: [
11418
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11419
- /* @__PURE__ */ jsx(
11420
- "label",
11421
- {
11422
- for: "excludePatterns",
11423
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11424
- children: "Exclude Patterns"
11425
- }
11426
- ),
11427
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
11428
- ] }),
11429
- /* @__PURE__ */ jsx(
11430
- "textarea",
11431
- {
11432
- name: "excludePatterns",
11433
- id: "excludePatterns",
11434
- rows: "5",
11435
- safe: true,
11436
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
11437
- children: defaultExcludePatternsText
11438
- }
11439
11626
  ),
11440
- /* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
11441
- ] }),
11442
- /* @__PURE__ */ jsxs("div", { children: [
11443
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11444
- /* @__PURE__ */ jsx(
11445
- "label",
11446
- {
11447
- for: "scrapeMode",
11448
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11449
- children: "Scrape Mode"
11450
- }
11451
- ),
11452
- /* @__PURE__ */ jsx(
11453
- Tooltip,
11454
- {
11455
- text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
11456
- /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
11457
- /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
11458
- /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
11459
- ] }) })
11460
- }
11461
- )
11462
- ] }),
11463
- /* @__PURE__ */ jsxs(
11464
- "select",
11465
- {
11466
- name: "scrapeMode",
11467
- id: "scrapeMode",
11468
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
11469
- children: [
11470
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
11471
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
11472
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
11473
- ]
11474
- }
11475
- )
11476
- ] }),
11477
- /* @__PURE__ */ jsxs("div", { children: [
11478
- /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
11479
- /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
11480
- /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
11481
- ] }),
11482
- /* @__PURE__ */ jsxs("div", { children: [
11483
- /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
11627
+ /* @__PURE__ */ jsxs("div", { "x-show": "open", "x-cloak": true, "x-collapse": true, class: "mt-2 space-y-2", children: [
11628
+ /* @__PURE__ */ jsxs("div", { children: [
11629
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11630
+ /* @__PURE__ */ jsx(
11631
+ "label",
11632
+ {
11633
+ for: "maxPages",
11634
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11635
+ children: "Max Pages"
11636
+ }
11637
+ ),
11638
+ /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
11639
+ ] }),
11484
11640
  /* @__PURE__ */ jsx(
11485
11641
  "input",
11486
11642
  {
11487
- type: "text",
11488
- class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
11489
- placeholder: "Header Name",
11490
- "x-model": "header.name",
11491
- required: true
11643
+ type: "number",
11644
+ name: "maxPages",
11645
+ id: "maxPages",
11646
+ min: "1",
11647
+ placeholder: "1000",
11648
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11492
11649
  }
11493
- ),
11494
- /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
11650
+ )
11651
+ ] }),
11652
+ /* @__PURE__ */ jsxs("div", { children: [
11653
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11654
+ /* @__PURE__ */ jsx(
11655
+ "label",
11656
+ {
11657
+ for: "maxDepth",
11658
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11659
+ children: "Max Depth"
11660
+ }
11661
+ ),
11662
+ /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
11663
+ ] }),
11495
11664
  /* @__PURE__ */ jsx(
11496
11665
  "input",
11497
11666
  {
11498
- type: "text",
11499
- class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
11500
- placeholder: "Header Value",
11501
- "x-model": "header.value",
11502
- required: true
11667
+ type: "number",
11668
+ name: "maxDepth",
11669
+ id: "maxDepth",
11670
+ min: "0",
11671
+ placeholder: "3",
11672
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11673
+ }
11674
+ )
11675
+ ] }),
11676
+ /* @__PURE__ */ jsxs("div", { children: [
11677
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11678
+ /* @__PURE__ */ jsx(
11679
+ "label",
11680
+ {
11681
+ for: "scope",
11682
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11683
+ children: "Scope"
11684
+ }
11685
+ ),
11686
+ /* @__PURE__ */ jsx(
11687
+ Tooltip,
11688
+ {
11689
+ text: /* @__PURE__ */ jsxs("div", { children: [
11690
+ "Controls which pages are scraped:",
11691
+ /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
11692
+ /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
11693
+ /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
11694
+ /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
11695
+ ] })
11696
+ ] })
11697
+ }
11698
+ )
11699
+ ] }),
11700
+ /* @__PURE__ */ jsxs(
11701
+ "select",
11702
+ {
11703
+ name: "scope",
11704
+ id: "scope",
11705
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
11706
+ children: [
11707
+ /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
11708
+ /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
11709
+ /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
11710
+ ]
11711
+ }
11712
+ )
11713
+ ] }),
11714
+ /* @__PURE__ */ jsxs("div", { children: [
11715
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11716
+ /* @__PURE__ */ jsx(
11717
+ "label",
11718
+ {
11719
+ for: "includePatterns",
11720
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11721
+ children: "Include Patterns"
11722
+ }
11723
+ ),
11724
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
11725
+ ] }),
11726
+ /* @__PURE__ */ jsx(
11727
+ "textarea",
11728
+ {
11729
+ name: "includePatterns",
11730
+ id: "includePatterns",
11731
+ rows: "2",
11732
+ placeholder: "e.g. docs/* or /api\\/v1.*/",
11733
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
11734
+ }
11735
+ )
11736
+ ] }),
11737
+ /* @__PURE__ */ jsxs("div", { children: [
11738
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11739
+ /* @__PURE__ */ jsx(
11740
+ "label",
11741
+ {
11742
+ for: "excludePatterns",
11743
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11744
+ children: "Exclude Patterns"
11745
+ }
11746
+ ),
11747
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
11748
+ ] }),
11749
+ /* @__PURE__ */ jsx(
11750
+ "textarea",
11751
+ {
11752
+ name: "excludePatterns",
11753
+ id: "excludePatterns",
11754
+ rows: "5",
11755
+ safe: true,
11756
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
11757
+ children: defaultExcludePatternsText
11503
11758
  }
11504
11759
  ),
11760
+ /* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
11761
+ ] }),
11762
+ /* @__PURE__ */ jsxs("div", { children: [
11763
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11764
+ /* @__PURE__ */ jsx(
11765
+ "label",
11766
+ {
11767
+ for: "scrapeMode",
11768
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
11769
+ children: "Scrape Mode"
11770
+ }
11771
+ ),
11772
+ /* @__PURE__ */ jsx(
11773
+ Tooltip,
11774
+ {
11775
+ text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
11776
+ /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
11777
+ /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
11778
+ /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
11779
+ ] }) })
11780
+ }
11781
+ )
11782
+ ] }),
11783
+ /* @__PURE__ */ jsxs(
11784
+ "select",
11785
+ {
11786
+ name: "scrapeMode",
11787
+ id: "scrapeMode",
11788
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
11789
+ children: [
11790
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
11791
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
11792
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
11793
+ ]
11794
+ }
11795
+ )
11796
+ ] }),
11797
+ /* @__PURE__ */ jsxs("div", { children: [
11798
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
11799
+ /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
11800
+ /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
11801
+ ] }),
11802
+ /* @__PURE__ */ jsxs("div", { children: [
11803
+ /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
11804
+ /* @__PURE__ */ jsx(
11805
+ "input",
11806
+ {
11807
+ type: "text",
11808
+ class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
11809
+ placeholder: "Header Name",
11810
+ "x-model": "header.name",
11811
+ required: true
11812
+ }
11813
+ ),
11814
+ /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
11815
+ /* @__PURE__ */ jsx(
11816
+ "input",
11817
+ {
11818
+ type: "text",
11819
+ class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
11820
+ placeholder: "Header Value",
11821
+ "x-model": "header.value",
11822
+ required: true
11823
+ }
11824
+ ),
11825
+ /* @__PURE__ */ jsx(
11826
+ "button",
11827
+ {
11828
+ type: "button",
11829
+ class: "text-red-500 hover:text-red-700 text-xs",
11830
+ "x-on:click": "headers.splice(idx, 1)",
11831
+ children: "Remove"
11832
+ }
11833
+ ),
11834
+ /* @__PURE__ */ jsx(
11835
+ "input",
11836
+ {
11837
+ type: "hidden",
11838
+ name: "header[]",
11839
+ "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
11840
+ }
11841
+ )
11842
+ ] }) }),
11843
+ /* @__PURE__ */ jsx(
11844
+ "button",
11845
+ {
11846
+ type: "button",
11847
+ class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
11848
+ "x-on:click": "headers.push({ name: '', value: '' })",
11849
+ children: "+ Add Header"
11850
+ }
11851
+ )
11852
+ ] })
11853
+ ] }),
11854
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11505
11855
  /* @__PURE__ */ jsx(
11506
- "button",
11856
+ "input",
11507
11857
  {
11508
- type: "button",
11509
- class: "text-red-500 hover:text-red-700 text-xs",
11510
- "x-on:click": "headers.splice(idx, 1)",
11511
- children: "Remove"
11858
+ id: "followRedirects",
11859
+ name: "followRedirects",
11860
+ type: "checkbox",
11861
+ checked: true,
11862
+ class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
11512
11863
  }
11513
11864
  ),
11865
+ /* @__PURE__ */ jsx(
11866
+ "label",
11867
+ {
11868
+ for: "followRedirects",
11869
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
11870
+ children: "Follow Redirects"
11871
+ }
11872
+ )
11873
+ ] }),
11874
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11514
11875
  /* @__PURE__ */ jsx(
11515
11876
  "input",
11516
11877
  {
11517
- type: "hidden",
11518
- name: "header[]",
11519
- "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
11878
+ id: "ignoreErrors",
11879
+ name: "ignoreErrors",
11880
+ type: "checkbox",
11881
+ checked: true,
11882
+ class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
11883
+ }
11884
+ ),
11885
+ /* @__PURE__ */ jsx(
11886
+ "label",
11887
+ {
11888
+ for: "ignoreErrors",
11889
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
11890
+ children: "Ignore Errors During Scraping"
11520
11891
  }
11521
11892
  )
11522
- ] }) }),
11523
- /* @__PURE__ */ jsx(
11524
- "button",
11525
- {
11526
- type: "button",
11527
- class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
11528
- "x-on:click": "headers.push({ name: '', value: '' })",
11529
- children: "+ Add Header"
11530
- }
11531
- )
11893
+ ] })
11532
11894
  ] })
11533
- ] }),
11534
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11535
- /* @__PURE__ */ jsx(
11536
- "input",
11537
- {
11538
- id: "followRedirects",
11539
- name: "followRedirects",
11540
- type: "checkbox",
11541
- checked: true,
11542
- class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
11543
- }
11544
- ),
11545
- /* @__PURE__ */ jsx(
11546
- "label",
11547
- {
11548
- for: "followRedirects",
11549
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
11550
- children: "Follow Redirects"
11551
- }
11552
- )
11553
- ] }),
11554
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
11555
- /* @__PURE__ */ jsx(
11556
- "input",
11557
- {
11558
- id: "ignoreErrors",
11559
- name: "ignoreErrors",
11560
- type: "checkbox",
11561
- checked: true,
11562
- class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
11563
- }
11564
- ),
11565
- /* @__PURE__ */ jsx(
11566
- "label",
11567
- {
11568
- for: "ignoreErrors",
11569
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
11570
- children: "Ignore Errors During Scraping"
11571
- }
11572
- )
11573
- ] })
11574
- ] })
11575
- ] }),
11895
+ ]
11896
+ }
11897
+ ),
11576
11898
  /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
11577
11899
  "button",
11578
11900
  {
11579
11901
  type: "submit",
11580
11902
  class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500",
11581
- children: "Queue Job"
11903
+ children: "Start Indexing"
11582
11904
  }
11583
11905
  ) })
11584
11906
  ]
11585
11907
  }
11586
11908
  ),
11587
- /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" }),
11588
- /* @__PURE__ */ jsx("script", { children: `
11589
- document.addEventListener('htmx:responseError', function(evt) {
11590
- // Handle error responses from the form submission
11591
- if (evt.detail.xhr && evt.detail.xhr.response) {
11592
- const responseDiv = document.getElementById('job-response');
11593
- if (responseDiv) {
11594
- responseDiv.innerHTML = evt.detail.xhr.response;
11595
- }
11596
- }
11597
- });
11598
- ` })
11909
+ /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
11599
11910
  ] });
11600
11911
  };
11601
- const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
11912
+ const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", class: "animate-[fadeSlideIn_0.2s_ease-out]", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
11602
11913
  const DEFAULT_FILE_EXCLUSIONS = [
11603
11914
  // CHANGELOG files (case variations)
11604
11915
  "**/CHANGELOG.md",
@@ -11698,10 +12009,24 @@ function getEffectiveExclusionPatterns(userPatterns) {
11698
12009
  }
11699
12010
  return DEFAULT_EXCLUSION_PATTERNS;
11700
12011
  }
12012
+ const ScrapeFormButton = () => /* @__PURE__ */ jsx(
12013
+ "button",
12014
+ {
12015
+ type: "button",
12016
+ "hx-get": "/web/jobs/new",
12017
+ "hx-target": "#addJobForm",
12018
+ "hx-swap": "innerHTML",
12019
+ class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
12020
+ children: "Add New Documentation"
12021
+ }
12022
+ );
11701
12023
  function registerNewJobRoutes(server, scrapeTool) {
11702
12024
  server.get("/web/jobs/new", async () => {
11703
12025
  return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
11704
12026
  });
12027
+ server.get("/web/jobs/new-button", async () => {
12028
+ return /* @__PURE__ */ jsx(ScrapeFormButton, {});
12029
+ });
11705
12030
  server.post(
11706
12031
  "/web/jobs/scrape",
11707
12032
  async (request, reply) => {
@@ -11759,25 +12084,16 @@ function registerNewJobRoutes(server, scrapeTool) {
11759
12084
  };
11760
12085
  const result = await scrapeTool.execute(scrapeOptions);
11761
12086
  if ("jobId" in result) {
11762
- return /* @__PURE__ */ jsxs(Fragment, { children: [
11763
- /* @__PURE__ */ jsx(
11764
- Alert,
11765
- {
11766
- type: "success",
11767
- message: /* @__PURE__ */ jsxs(Fragment, { children: [
11768
- "Job queued successfully! ID:",
11769
- " ",
11770
- /* @__PURE__ */ jsx("span", { safe: true, children: result.jobId })
11771
- ] })
11772
- }
11773
- ),
11774
- /* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(
11775
- ScrapeFormContent,
11776
- {
11777
- defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS
12087
+ reply.header(
12088
+ "HX-Trigger",
12089
+ JSON.stringify({
12090
+ toast: {
12091
+ message: "Job queued successfully!",
12092
+ type: "success"
11778
12093
  }
11779
- ) })
11780
- ] });
12094
+ })
12095
+ );
12096
+ return /* @__PURE__ */ jsx(ScrapeFormButton, {});
11781
12097
  }
11782
12098
  return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
11783
12099
  } catch (error) {
@@ -11821,6 +12137,9 @@ const VersionDetailsRow = ({
11821
12137
  {
11822
12138
  id: rowId,
11823
12139
  class: "flex justify-between items-center py-1 border-b border-gray-200 dark:border-gray-600 last:border-b-0",
12140
+ "data-library-name": libraryName,
12141
+ "data-version-param": versionParam,
12142
+ "x-data": "{ library: $el.dataset.libraryName, version: $el.dataset.versionParam, confirming: $el.dataset.confirming === 'true', isDeleting: false }",
11824
12143
  children: [
11825
12144
  /* @__PURE__ */ jsx(
11826
12145
  "span",
@@ -11837,7 +12156,7 @@ const VersionDetailsRow = ({
11837
12156
  /* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.uniqueUrls.toLocaleString() })
11838
12157
  ] }),
11839
12158
  /* @__PURE__ */ jsxs("span", { title: "Number of indexed snippets", children: [
11840
- "Snippets:",
12159
+ "Chunks:",
11841
12160
  " ",
11842
12161
  /* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.documents.toLocaleString() })
11843
12162
  ] }),
@@ -11853,81 +12172,45 @@ const VersionDetailsRow = ({
11853
12172
  type: "button",
11854
12173
  class: "ml-2 font-medium rounded-lg text-sm p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
11855
12174
  title: "Remove this version",
11856
- "x-data": "{}",
11857
- "x-bind:class": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
11858
- "x-bind:disabled": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && $store.confirmingAction.isDeleting`,
11859
- "x-on:click": `
11860
- if ($store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}') {
11861
- $store.confirmingAction.isDeleting = true;
11862
- $el.dispatchEvent(new CustomEvent('confirmed-delete', { bubbles: true }));
11863
- } else {
11864
- if ($store.confirmingAction.timeoutId) { clearTimeout($store.confirmingAction.timeoutId); $store.confirmingAction.timeoutId = null; }
11865
- $store.confirmingAction.type = 'version-delete';
11866
- $store.confirmingAction.id = '${libraryName}:${versionParam}';
11867
- $store.confirmingAction.isDeleting = false;
11868
- $store.confirmingAction.timeoutId = setTimeout(() => {
11869
- $store.confirmingAction.type = null;
11870
- $store.confirmingAction.id = null;
11871
- $store.confirmingAction.isDeleting = false;
11872
- $store.confirmingAction.timeoutId = null;
11873
- }, 3000);
11874
- }
11875
- `,
12175
+ "x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
12176
+ "x-bind:disabled": "isDeleting",
12177
+ "x-on:click": "\n if (confirming) {\n isDeleting = true;\n window.confirmationManager.clear($root.id);\n $el.dispatchEvent(new CustomEvent('confirmed-delete', { bubbles: true }));\n } else {\n confirming = true;\n isDeleting = false;\n window.confirmationManager.start($root.id);\n }\n ",
11876
12178
  "hx-delete": `/web/libraries/${encodeURIComponent(libraryName)}/versions/${encodeURIComponent(versionParam)}`,
11877
12179
  "hx-target": `#${rowId}`,
11878
12180
  "hx-swap": "outerHTML",
11879
12181
  "hx-trigger": "confirmed-delete",
11880
12182
  children: [
11881
- /* @__PURE__ */ jsxs(
11882
- "span",
11883
- {
11884
- "x-show": `!($store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && $store.confirmingAction.isDeleting)`,
11885
- children: [
11886
- /* @__PURE__ */ jsx(
11887
- "svg",
12183
+ /* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isDeleting", children: [
12184
+ /* @__PURE__ */ jsx(
12185
+ "svg",
12186
+ {
12187
+ class: "w-4 h-4",
12188
+ "aria-hidden": "true",
12189
+ xmlns: "http://www.w3.org/2000/svg",
12190
+ fill: "none",
12191
+ viewBox: "0 0 18 20",
12192
+ children: /* @__PURE__ */ jsx(
12193
+ "path",
11888
12194
  {
11889
- class: "w-4 h-4",
11890
- "aria-hidden": "true",
11891
- xmlns: "http://www.w3.org/2000/svg",
11892
- fill: "none",
11893
- viewBox: "0 0 18 20",
11894
- children: /* @__PURE__ */ jsx(
11895
- "path",
11896
- {
11897
- stroke: "currentColor",
11898
- "stroke-linecap": "round",
11899
- "stroke-linejoin": "round",
11900
- "stroke-width": "2",
11901
- d: "M1 5h16M7 8v8m4-8v8M7 1h4a1 1 0 0 1 1 1v3H6V2a1 1 0 0 1-1-1ZM3 5h12v13a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V5Z"
11902
- }
11903
- )
12195
+ stroke: "currentColor",
12196
+ "stroke-linecap": "round",
12197
+ "stroke-linejoin": "round",
12198
+ "stroke-width": "2",
12199
+ d: "M1 5h16M7 8v8m4-8v8M7 1h4a1 1 0 0 1 1 1v3H6V2a1 1 0 0 1-1-1ZM3 5h12v13a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V5Z"
11904
12200
  }
11905
- ),
11906
- /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Remove version" })
11907
- ]
11908
- }
11909
- ),
11910
- /* @__PURE__ */ jsxs(
11911
- "span",
11912
- {
11913
- "x-show": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && !$store.confirmingAction.isDeleting`,
11914
- class: "mx-1",
11915
- children: [
11916
- "Confirm?",
11917
- /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
11918
- ]
11919
- }
11920
- ),
11921
- /* @__PURE__ */ jsxs(
11922
- "span",
11923
- {
11924
- "x-show": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && $store.confirmingAction.isDeleting`,
11925
- children: [
11926
- /* @__PURE__ */ jsx(LoadingSpinner, {}),
11927
- /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Loading..." })
11928
- ]
11929
- }
11930
- )
12201
+ )
12202
+ }
12203
+ ),
12204
+ /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Remove version" })
12205
+ ] }),
12206
+ /* @__PURE__ */ jsxs("span", { "x-show": "confirming && !isDeleting", class: "mx-1", children: [
12207
+ "Confirm?",
12208
+ /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
12209
+ ] }),
12210
+ /* @__PURE__ */ jsxs("span", { "x-show": "isDeleting", children: [
12211
+ /* @__PURE__ */ jsx(LoadingSpinner, {}),
12212
+ /* @__PURE__ */ jsx("span", { class: "sr-only", children: "Loading..." })
12213
+ ] })
11931
12214
  ]
11932
12215
  }
11933
12216
  )
@@ -12135,7 +12418,8 @@ function registerLibraryDetailRoutes(server, listLibrariesTool, searchTool) {
12135
12418
  } catch (error) {
12136
12419
  server.log.error(error, `Failed to search library ${libraryName}`);
12137
12420
  reply.type("text/html; charset=utf-8");
12138
- return /* @__PURE__ */ jsx("p", { class: "text-red-500 dark:text-red-400 italic", children: "An unexpected error occurred during the search." });
12421
+ const errorMessage = error instanceof Error ? error.message : "An unexpected error occurred during the search.";
12422
+ return /* @__PURE__ */ jsx(Alert, { type: "error", message: errorMessage });
12139
12423
  }
12140
12424
  }
12141
12425
  );
@@ -12145,48 +12429,90 @@ const LibraryItem = ({ library }) => {
12145
12429
  const latestVersion = versions[0];
12146
12430
  return (
12147
12431
  // Use Flowbite Card structure with updated padding and border, and white background
12148
- /* @__PURE__ */ jsxs("div", { class: "block px-4 py-2 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600", children: [
12149
- /* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white", children: /* @__PURE__ */ jsx(
12150
- "a",
12151
- {
12152
- href: `/libraries/${encodeURIComponent(library.name)}`,
12153
- class: "hover:underline",
12154
- children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name })
12155
- }
12156
- ) }),
12157
- latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400", children: /* @__PURE__ */ jsx(
12158
- "a",
12159
- {
12160
- href: latestVersion.sourceUrl,
12161
- target: "_blank",
12162
- class: "hover:underline",
12163
- safe: true,
12164
- children: latestVersion.sourceUrl
12165
- }
12166
- ) }) : null,
12167
- /* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
12168
- const adapted = {
12169
- id: -1,
12170
- ref: { library: library.name, version: v.version },
12171
- status: v.status,
12172
- progress: v.progress,
12173
- counts: {
12174
- documents: v.documentCount,
12175
- uniqueUrls: v.uniqueUrlCount
12176
- },
12177
- indexedAt: v.indexedAt,
12178
- sourceUrl: v.sourceUrl ?? void 0
12179
- };
12180
- return /* @__PURE__ */ jsx(VersionDetailsRow, { libraryName: library.name, version: adapted });
12181
- }) : (
12182
- // Display message if no versions are indexed
12183
- /* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." })
12184
- ) })
12185
- ] })
12432
+ /* @__PURE__ */ jsxs(
12433
+ "div",
12434
+ {
12435
+ id: `library-item-${library.name}`,
12436
+ class: "block px-4 py-2 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600",
12437
+ children: [
12438
+ /* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white", children: /* @__PURE__ */ jsx(
12439
+ "a",
12440
+ {
12441
+ href: `/libraries/${encodeURIComponent(library.name)}`,
12442
+ class: "hover:underline",
12443
+ children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name })
12444
+ }
12445
+ ) }),
12446
+ latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400 overflow-hidden h-5 @container", children: /* @__PURE__ */ jsx(
12447
+ "a",
12448
+ {
12449
+ href: latestVersion.sourceUrl,
12450
+ target: "_blank",
12451
+ class: "inline-block whitespace-nowrap hover:underline hover:animate-[scrollText_2s_ease-in-out_forwards]",
12452
+ title: latestVersion.sourceUrl,
12453
+ safe: true,
12454
+ children: latestVersion.sourceUrl
12455
+ }
12456
+ ) }) : null,
12457
+ /* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
12458
+ const adapted = {
12459
+ id: -1,
12460
+ ref: { library: library.name, version: v.version },
12461
+ status: v.status,
12462
+ progress: v.progress,
12463
+ counts: {
12464
+ documents: v.documentCount,
12465
+ uniqueUrls: v.uniqueUrlCount
12466
+ },
12467
+ indexedAt: v.indexedAt,
12468
+ sourceUrl: v.sourceUrl ?? void 0
12469
+ };
12470
+ return /* @__PURE__ */ jsx(VersionDetailsRow, { libraryName: library.name, version: adapted });
12471
+ }) : (
12472
+ // Display message if no versions are indexed
12473
+ /* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." })
12474
+ ) })
12475
+ ]
12476
+ }
12477
+ )
12186
12478
  );
12187
12479
  };
12188
12480
  const LibraryList = ({ libraries }) => {
12189
- return /* @__PURE__ */ jsx(Fragment, { children: /* @__PURE__ */ jsx("div", { class: "space-y-2", children: libraries.map((library) => /* @__PURE__ */ jsx(LibraryItem, { library })) }) });
12481
+ if (libraries.length === 0) {
12482
+ return /* @__PURE__ */ jsx(
12483
+ Alert,
12484
+ {
12485
+ type: "info",
12486
+ title: "Welcome!",
12487
+ message: /* @__PURE__ */ jsxs(Fragment, { children: [
12488
+ "To get started, click",
12489
+ " ",
12490
+ /* @__PURE__ */ jsx("span", { class: "font-semibold", children: "Add New Documentation" }),
12491
+ " above and enter the URL of a documentation site to index. For more information, check the",
12492
+ " ",
12493
+ /* @__PURE__ */ jsx(
12494
+ "a",
12495
+ {
12496
+ href: "https://grounded.tools",
12497
+ target: "_blank",
12498
+ rel: "noopener noreferrer",
12499
+ class: "font-medium underline hover:no-underline",
12500
+ children: "official website"
12501
+ }
12502
+ ),
12503
+ "."
12504
+ ] })
12505
+ }
12506
+ );
12507
+ }
12508
+ return /* @__PURE__ */ jsx(
12509
+ "div",
12510
+ {
12511
+ id: "library-list",
12512
+ class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]",
12513
+ children: libraries.map((library) => /* @__PURE__ */ jsx(LibraryItem, { library }))
12514
+ }
12515
+ );
12190
12516
  };
12191
12517
  function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
12192
12518
  server.get("/web/libraries", async (_request, reply) => {
@@ -12216,6 +12542,75 @@ function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
12216
12542
  }
12217
12543
  );
12218
12544
  }
12545
+ function formatNumber(num) {
12546
+ if (num >= 1e9) {
12547
+ return `${(num / 1e9).toFixed(1)}B`;
12548
+ }
12549
+ if (num >= 1e6) {
12550
+ return `${(num / 1e6).toFixed(1)}M`;
12551
+ }
12552
+ if (num >= 1e3) {
12553
+ return `${(num / 1e3).toFixed(1)}K`;
12554
+ }
12555
+ return num.toString();
12556
+ }
12557
+ const AnalyticsCards = ({
12558
+ totalChunks,
12559
+ activeLibraries,
12560
+ activeVersions,
12561
+ indexedPages
12562
+ }) => /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-[fadeSlideIn_0.2s_ease-out]", children: [
12563
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
12564
+ /* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Total Knowledge Base" }),
12565
+ /* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: [
12566
+ formatNumber(totalChunks),
12567
+ " Chunks"
12568
+ ] })
12569
+ ] }) }) }),
12570
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
12571
+ /* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Libraries / Versions" }),
12572
+ /* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: [
12573
+ activeLibraries,
12574
+ " / ",
12575
+ activeVersions
12576
+ ] })
12577
+ ] }) }) }),
12578
+ /* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
12579
+ /* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Indexed Pages" }),
12580
+ /* @__PURE__ */ jsx("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: formatNumber(indexedPages) })
12581
+ ] }) }) })
12582
+ ] });
12583
+ function registerStatsRoute(server, docService) {
12584
+ server.get("/web/stats", async (_request, reply) => {
12585
+ try {
12586
+ const libraries = await docService.listLibraries();
12587
+ let totalChunks = 0;
12588
+ let indexedPages = 0;
12589
+ let activeVersions = 0;
12590
+ for (const lib of libraries) {
12591
+ activeVersions += lib.versions.length;
12592
+ for (const version of lib.versions) {
12593
+ totalChunks += version.counts.documents;
12594
+ indexedPages += version.counts.uniqueUrls;
12595
+ }
12596
+ }
12597
+ const activeLibraries = libraries.length;
12598
+ reply.type("text/html; charset=utf-8");
12599
+ return /* @__PURE__ */ jsx(
12600
+ AnalyticsCards,
12601
+ {
12602
+ totalChunks,
12603
+ activeLibraries,
12604
+ activeVersions,
12605
+ indexedPages
12606
+ }
12607
+ );
12608
+ } catch (error) {
12609
+ logger.error(`Failed to fetch stats: ${error}`);
12610
+ reply.status(500).send("Internal Server Error");
12611
+ }
12612
+ });
12613
+ }
12219
12614
  async function registerWebService(server, docService, pipeline, eventBus, config) {
12220
12615
  const listLibrariesTool = new ListLibrariesTool(docService);
12221
12616
  const listJobsTool = new ListJobsTool(pipeline);
@@ -12232,6 +12627,7 @@ async function registerWebService(server, docService, pipeline, eventBus, config
12232
12627
  registerCancelJobRoute(server, cancelJobTool);
12233
12628
  registerClearCompletedJobsRoute(server, clearCompletedJobsTool);
12234
12629
  registerEventsRoute(server, eventBus);
12630
+ registerStatsRoute(server, docService);
12235
12631
  }
12236
12632
  async function registerWorkerService(pipeline) {
12237
12633
  await pipeline.start();
@@ -12256,7 +12652,6 @@ class AppServer {
12256
12652
  mcpServer = null;
12257
12653
  authManager = null;
12258
12654
  config;
12259
- embeddingConfig = null;
12260
12655
  remoteEventProxy = null;
12261
12656
  wss = null;
12262
12657
  /**
@@ -12283,22 +12678,22 @@ class AppServer {
12283
12678
  */
12284
12679
  async start() {
12285
12680
  this.validateConfig();
12286
- this.embeddingConfig = resolveEmbeddingContext();
12681
+ const embeddingConfig = this.docService.getActiveEmbeddingConfig();
12287
12682
  if (this.config.telemetry !== false && shouldEnableTelemetry()) {
12288
12683
  try {
12289
12684
  if (telemetry.isEnabled()) {
12290
12685
  telemetry.setGlobalContext({
12291
- appVersion: "1.28.0",
12686
+ appVersion: "1.30.0",
12292
12687
  appPlatform: process.platform,
12293
12688
  appNodeVersion: process.version,
12294
12689
  appServicesEnabled: this.getActiveServicesList(),
12295
12690
  appAuthEnabled: Boolean(this.config.auth),
12296
12691
  appReadOnly: Boolean(this.config.readOnly),
12297
12692
  // Add embedding configuration to global context
12298
- ...this.embeddingConfig && {
12299
- aiEmbeddingProvider: this.embeddingConfig.provider,
12300
- aiEmbeddingModel: this.embeddingConfig.model,
12301
- aiEmbeddingDimensions: this.embeddingConfig.dimensions
12693
+ ...embeddingConfig && {
12694
+ aiEmbeddingProvider: embeddingConfig.provider,
12695
+ aiEmbeddingModel: embeddingConfig.model,
12696
+ aiEmbeddingDimensions: embeddingConfig.dimensions
12302
12697
  }
12303
12698
  });
12304
12699
  telemetry.track(TelemetryEvent.APP_STARTED, {
@@ -12356,6 +12751,9 @@ class AppServer {
12356
12751
  await cleanupMcpService(this.mcpServer);
12357
12752
  }
12358
12753
  if (this.wss) {
12754
+ for (const client of this.wss.clients) {
12755
+ client.terminate();
12756
+ }
12359
12757
  await new Promise((resolve, reject) => {
12360
12758
  this.wss?.close((err) => {
12361
12759
  if (err) {
@@ -12374,6 +12772,9 @@ class AppServer {
12374
12772
  });
12375
12773
  }
12376
12774
  await telemetry.shutdown();
12775
+ if (this.server.server) {
12776
+ this.server.server.closeAllConnections();
12777
+ }
12377
12778
  await this.server.close();
12378
12779
  logger.info("🛑 AppServer stopped");
12379
12780
  } catch (error) {
@@ -12596,28 +12997,38 @@ class AppServer {
12596
12997
  * Log startup information showing which services are enabled.
12597
12998
  */
12598
12999
  logStartupInfo(address) {
12599
- logger.info(`🚀 AppServer available at ${address}`);
13000
+ const isWorkerOnly = this.config.enableWorker && !this.config.enableWebInterface && !this.config.enableMcpServer;
13001
+ const isWebOnly = this.config.enableWebInterface && !this.config.enableWorker && !this.config.enableMcpServer;
13002
+ const isMcpOnly = this.config.enableMcpServer && !this.config.enableWebInterface && !this.config.enableWorker;
13003
+ if (isWorkerOnly) {
13004
+ logger.info(`🚀 Worker available at ${address}`);
13005
+ } else if (isWebOnly) {
13006
+ logger.info(`🚀 Web interface available at ${address}`);
13007
+ } else if (isMcpOnly) {
13008
+ logger.info(`🚀 MCP server available at ${address}`);
13009
+ } else {
13010
+ logger.info(`🚀 Grounded Docs available at ${address}`);
13011
+ }
13012
+ const isCombined = !isWorkerOnly && !isWebOnly && !isMcpOnly;
12600
13013
  const enabledServices = [];
12601
- if (this.config.enableWebInterface) {
13014
+ if (this.config.enableWebInterface && isCombined) {
12602
13015
  enabledServices.push(`Web interface: ${address}`);
12603
13016
  }
12604
13017
  if (this.config.enableMcpServer) {
12605
13018
  enabledServices.push(`MCP endpoints: ${address}/mcp, ${address}/sse`);
12606
13019
  }
12607
- if (this.config.enableApiServer) {
12608
- enabledServices.push(`API: ${address}/api`);
12609
- }
12610
- if (this.config.enableWorker) {
12611
- enabledServices.push("Worker: internal");
12612
- } else if (this.config.externalWorkerUrl) {
13020
+ if (!this.config.enableWorker && this.config.externalWorkerUrl) {
12613
13021
  enabledServices.push(`Worker: ${this.config.externalWorkerUrl}`);
12614
13022
  }
12615
- if (this.embeddingConfig) {
12616
- enabledServices.push(
12617
- `Embeddings: ${this.embeddingConfig.provider}:${this.embeddingConfig.model}`
12618
- );
12619
- } else {
12620
- enabledServices.push(`Embeddings: disabled (full text search only)`);
13023
+ if (this.config.enableWorker) {
13024
+ const embeddingConfig = this.docService.getActiveEmbeddingConfig();
13025
+ if (embeddingConfig) {
13026
+ enabledServices.push(
13027
+ `Embeddings: ${embeddingConfig.provider}:${embeddingConfig.model}`
13028
+ );
13029
+ } else {
13030
+ enabledServices.push(`Embeddings: disabled (full text search only)`);
13031
+ }
12621
13032
  }
12622
13033
  for (const service of enabledServices) {
12623
13034
  logger.info(` • ${service}`);
@@ -14113,7 +14524,7 @@ class PipelineManager {
14113
14524
  parsedScraperOptions = JSON.parse(version.scraper_options);
14114
14525
  } catch (error) {
14115
14526
  logger.warn(
14116
- `⚠️ Failed to parse scraper options for ${version.library_name}@${version.name || "unversioned"}: ${error}`
14527
+ `⚠️ Failed to parse scraper options for ${version.library_name}@${version.name || "unversioned"}: ${error}`
14117
14528
  );
14118
14529
  }
14119
14530
  }
@@ -14481,7 +14892,7 @@ class PipelineManager {
14481
14892
  },
14482
14893
  onJobError: async (internalJob, error, document2) => {
14483
14894
  logger.warn(
14484
- `⚠️ Job ${internalJob.id} error ${document2 ? `on document ${document2.url}` : ""}: ${error.message}`
14895
+ `⚠️ Job ${internalJob.id} error ${document2 ? `on document ${document2.url}` : ""}: ${error.message}`
14485
14896
  );
14486
14897
  }
14487
14898
  });
@@ -14562,7 +14973,7 @@ class PipelineManager {
14562
14973
  );
14563
14974
  } catch (optionsError) {
14564
14975
  logger.warn(
14565
- `⚠️ Failed to store scraper options for job ${job.id}: ${optionsError}`
14976
+ `⚠️ Failed to store scraper options for job ${job.id}: ${optionsError}`
14566
14977
  );
14567
14978
  }
14568
14979
  }
@@ -14631,6 +15042,217 @@ var PipelineFactory2;
14631
15042
  }
14632
15043
  PipelineFactory22.createPipeline = createPipeline;
14633
15044
  })(PipelineFactory2 || (PipelineFactory2 = {}));
15045
+ function getGlobalOptions(command) {
15046
+ let rootCommand = command;
15047
+ while (rootCommand?.parent) {
15048
+ rootCommand = rootCommand.parent;
15049
+ }
15050
+ return rootCommand?.opts() || {};
15051
+ }
15052
+ function getEventBus(command) {
15053
+ const eventBus = command?._eventBus;
15054
+ if (!eventBus) {
15055
+ throw new Error("EventBusService not initialized");
15056
+ }
15057
+ return eventBus;
15058
+ }
15059
+ function ensurePlaywrightBrowsersInstalled() {
15060
+ if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
15061
+ logger.debug(
15062
+ "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
15063
+ );
15064
+ return;
15065
+ }
15066
+ const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
15067
+ if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
15068
+ logger.debug(
15069
+ `PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
15070
+ );
15071
+ return;
15072
+ }
15073
+ try {
15074
+ const chromiumPath = chromium.executablePath();
15075
+ if (!chromiumPath || !existsSync(chromiumPath)) {
15076
+ throw new Error("Playwright Chromium browser not found");
15077
+ }
15078
+ } catch (error) {
15079
+ logger.debug(String(error));
15080
+ try {
15081
+ console.log(
15082
+ "🌐 Installing Playwright Chromium browser... (this may take a moment)"
15083
+ );
15084
+ execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
15085
+ stdio: "ignore",
15086
+ // Suppress output
15087
+ cwd: getProjectRoot()
15088
+ });
15089
+ } catch (_installErr) {
15090
+ console.error(
15091
+ "❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
15092
+ );
15093
+ process.exit(1);
15094
+ }
15095
+ }
15096
+ }
15097
+ function resolveProtocol(protocol) {
15098
+ if (protocol === "auto") {
15099
+ if (!process.stdin.isTTY && !process.stdout.isTTY) {
15100
+ return "stdio";
15101
+ }
15102
+ return "http";
15103
+ }
15104
+ if (protocol === "stdio" || protocol === "http") {
15105
+ return protocol;
15106
+ }
15107
+ throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
15108
+ }
15109
+ const formatOutput = (data) => JSON.stringify(data, null, 2);
15110
+ function setupLogging(options, protocol) {
15111
+ if (options.silent) {
15112
+ setLogLevel(LogLevel.ERROR);
15113
+ } else if (options.verbose) {
15114
+ setLogLevel(LogLevel.DEBUG);
15115
+ }
15116
+ }
15117
+ function validatePort(portString) {
15118
+ const port = Number.parseInt(portString, 10);
15119
+ if (Number.isNaN(port) || port < 1 || port > 65535) {
15120
+ throw new Error("Invalid port number");
15121
+ }
15122
+ return port;
15123
+ }
15124
+ function validateHost(hostString) {
15125
+ const trimmed = hostString.trim();
15126
+ if (!trimmed) {
15127
+ throw new Error("Host cannot be empty");
15128
+ }
15129
+ if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
15130
+ throw new Error("Host cannot contain whitespace");
15131
+ }
15132
+ return trimmed;
15133
+ }
15134
+ function createAppServerConfig(options) {
15135
+ return {
15136
+ enableWebInterface: options.enableWebInterface ?? false,
15137
+ enableMcpServer: options.enableMcpServer ?? true,
15138
+ enableApiServer: options.enableApiServer ?? false,
15139
+ enableWorker: options.enableWorker ?? true,
15140
+ port: options.port,
15141
+ host: options.host,
15142
+ externalWorkerUrl: options.externalWorkerUrl,
15143
+ readOnly: options.readOnly ?? false,
15144
+ auth: options.auth,
15145
+ startupContext: options.startupContext
15146
+ };
15147
+ }
15148
+ function parseHeaders(headerOptions) {
15149
+ const headers = {};
15150
+ if (Array.isArray(headerOptions)) {
15151
+ for (const entry of headerOptions) {
15152
+ const idx = entry.indexOf(":");
15153
+ if (idx > 0) {
15154
+ const name = entry.slice(0, idx).trim();
15155
+ const value = entry.slice(idx + 1).trim();
15156
+ if (name) headers[name] = value;
15157
+ }
15158
+ }
15159
+ }
15160
+ return headers;
15161
+ }
15162
+ function parseAuthConfig(options) {
15163
+ if (!options.authEnabled) {
15164
+ return void 0;
15165
+ }
15166
+ return {
15167
+ enabled: true,
15168
+ issuerUrl: options.authIssuerUrl,
15169
+ audience: options.authAudience,
15170
+ scopes: ["openid", "profile"]
15171
+ // Default scopes for OAuth2/OIDC
15172
+ };
15173
+ }
15174
+ function validateAuthConfig(authConfig) {
15175
+ if (!authConfig.enabled) {
15176
+ return;
15177
+ }
15178
+ const errors = [];
15179
+ if (!authConfig.issuerUrl) {
15180
+ errors.push("--auth-issuer-url is required when auth is enabled");
15181
+ } else {
15182
+ try {
15183
+ const url = new URL(authConfig.issuerUrl);
15184
+ if (url.protocol !== "https:") {
15185
+ errors.push("Issuer URL must use HTTPS protocol");
15186
+ }
15187
+ } catch {
15188
+ errors.push("Issuer URL must be a valid URL");
15189
+ }
15190
+ }
15191
+ if (!authConfig.audience) {
15192
+ errors.push("--auth-audience is required when auth is enabled");
15193
+ } else {
15194
+ try {
15195
+ const url = new URL(authConfig.audience);
15196
+ if (url.protocol === "http:" && url.hostname !== "localhost") {
15197
+ logger.warn(
15198
+ "⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
15199
+ );
15200
+ }
15201
+ if (url.hash) {
15202
+ errors.push("Audience must not contain URL fragments");
15203
+ }
15204
+ } catch {
15205
+ if (authConfig.audience.startsWith("urn:")) {
15206
+ const urnParts = authConfig.audience.split(":");
15207
+ if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
15208
+ errors.push("URN audience must follow format: urn:namespace:specific-string");
15209
+ }
15210
+ } else {
15211
+ errors.push(
15212
+ "Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
15213
+ );
15214
+ }
15215
+ }
15216
+ }
15217
+ if (errors.length > 0) {
15218
+ throw new Error(`Auth configuration validation failed:
15219
+ ${errors.join("\n")}`);
15220
+ }
15221
+ }
15222
+ function warnHttpUsage(authConfig, port) {
15223
+ if (!authConfig?.enabled) {
15224
+ return;
15225
+ }
15226
+ const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
15227
+ process.env.HOSTNAME?.includes("localhost");
15228
+ if (!isLocalhost) {
15229
+ logger.warn(
15230
+ "⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
15231
+ );
15232
+ }
15233
+ }
15234
+ function resolveEmbeddingContext(embeddingModel) {
15235
+ try {
15236
+ let modelSpec = embeddingModel;
15237
+ if (!modelSpec && process.env.OPENAI_API_KEY) {
15238
+ modelSpec = "text-embedding-3-small";
15239
+ logger.debug(
15240
+ "Using default OpenAI embedding model due to OPENAI_API_KEY presence."
15241
+ );
15242
+ }
15243
+ if (!modelSpec) {
15244
+ logger.debug(
15245
+ "No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
15246
+ );
15247
+ return null;
15248
+ }
15249
+ logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
15250
+ return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
15251
+ } catch (error) {
15252
+ logger.debug(`Failed to resolve embedding configuration: ${error}`);
15253
+ return null;
15254
+ }
15255
+ }
14634
15256
  function createDefaultAction(program) {
14635
15257
  return program.addOption(
14636
15258
  new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
@@ -14976,7 +15598,6 @@ function createMcpCommand(program) {
14976
15598
  );
14977
15599
  if (resolvedProtocol === "stdio") {
14978
15600
  logger.debug(`Auto-detected stdio protocol (no TTY)`);
14979
- logger.info("🚀 Starting MCP server (stdio mode)");
14980
15601
  await pipeline.start();
14981
15602
  const mcpTools = await initializeTools(docService, pipeline);
14982
15603
  const mcpServer = await startStdioServer(mcpTools, cmdOptions.readOnly);
@@ -14989,7 +15610,6 @@ function createMcpCommand(program) {
14989
15610
  });
14990
15611
  } else {
14991
15612
  logger.debug(`Auto-detected http protocol (TTY available)`);
14992
- logger.info("🚀 Starting MCP server (http mode)");
14993
15613
  const config = createAppServerConfig({
14994
15614
  enableWebInterface: false,
14995
15615
  // Never enable web interface in mcp command
@@ -15458,9 +16078,6 @@ function createWebCommand(program) {
15458
16078
  cliCommand: "web"
15459
16079
  }
15460
16080
  });
15461
- logger.info(
15462
- `🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
15463
- );
15464
16081
  const appServer = await startAppServer(docService, pipeline, eventBus, config);
15465
16082
  registerGlobalServices({
15466
16083
  appServer,
@@ -15503,7 +16120,6 @@ function createWorkerCommand(program) {
15503
16120
  const port = validatePort(cmdOptions.port);
15504
16121
  const host = validateHost(cmdOptions.host);
15505
16122
  try {
15506
- logger.info(`🚀 Starting external pipeline worker on port ${port}`);
15507
16123
  ensurePlaywrightBrowsersInstalled();
15508
16124
  const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
15509
16125
  const globalOptions = program.opts();
@@ -15554,7 +16170,7 @@ function createCliProgram() {
15554
16170
  const commandStartTimes = /* @__PURE__ */ new Map();
15555
16171
  let globalEventBus = null;
15556
16172
  let globalTelemetryService = null;
15557
- program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.28.0").addOption(
16173
+ program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.30.0").addOption(
15558
16174
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
15559
16175
  ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
15560
16176
  new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
@@ -15588,7 +16204,7 @@ function createCliProgram() {
15588
16204
  if (shouldEnableTelemetry()) {
15589
16205
  if (telemetry.isEnabled()) {
15590
16206
  telemetry.setGlobalContext({
15591
- appVersion: "1.28.0",
16207
+ appVersion: "1.30.0",
15592
16208
  appPlatform: process.platform,
15593
16209
  appNodeVersion: process.version,
15594
16210
  appInterface: "cli",