@arabold/docs-mcp-server 1.28.0 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +1130 -477
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +1874 -1258
- package/dist/index.js.map +1 -1
- package/package.json +4 -1
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +1130 -477
- package/public/assets/main.js.map +1 -1
package/dist/index.js
CHANGED
|
@@ -19,14 +19,13 @@ import Fastify from "fastify";
|
|
|
19
19
|
import { WebSocketServer } from "ws";
|
|
20
20
|
import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
|
|
21
21
|
import { createRemoteJWKSet, jwtVerify } from "jose";
|
|
22
|
-
import { execSync } from "node:child_process";
|
|
23
|
-
import { chromium } from "playwright";
|
|
24
22
|
import { createWSClient, createTRPCClient, splitLink, httpBatchLink, wsLink, createTRPCProxyClient } from "@trpc/client";
|
|
25
23
|
import superjson from "superjson";
|
|
26
24
|
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
|
27
25
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
28
26
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
29
27
|
import { z } from "zod/v3";
|
|
28
|
+
import { chromium } from "playwright";
|
|
30
29
|
import mime from "mime";
|
|
31
30
|
import { HeaderGenerator } from "header-generator";
|
|
32
31
|
import fs$1 from "node:fs/promises";
|
|
@@ -56,12 +55,13 @@ import { fastifyTRPCPlugin } from "@trpc/server/adapters/fastify";
|
|
|
56
55
|
import { applyWSSHandler } from "@trpc/server/adapters/ws";
|
|
57
56
|
import { observable } from "@trpc/server/observable";
|
|
58
57
|
import { z as z$1 } from "zod";
|
|
59
|
-
import {
|
|
58
|
+
import { jsx, jsxs, Fragment } from "@kitajs/html/jsx-runtime";
|
|
60
59
|
import DOMPurify from "dompurify";
|
|
61
60
|
import { escapeHtml } from "@kitajs/html";
|
|
62
61
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
63
62
|
import { v4 } from "uuid";
|
|
64
63
|
import { minimatch } from "minimatch";
|
|
64
|
+
import { execSync } from "node:child_process";
|
|
65
65
|
class StoreError extends Error {
|
|
66
66
|
constructor(message, cause) {
|
|
67
67
|
super(cause ? `${message} caused by ${cause}` : message);
|
|
@@ -268,15 +268,19 @@ function createEmbeddingModel(providerAndModel) {
|
|
|
268
268
|
if (!process.env.OPENAI_API_KEY) {
|
|
269
269
|
throw new MissingCredentialsError("openai", ["OPENAI_API_KEY"]);
|
|
270
270
|
}
|
|
271
|
+
const timeoutMs = 3e4;
|
|
271
272
|
const config = {
|
|
272
273
|
...baseConfig,
|
|
273
274
|
modelName: model,
|
|
274
|
-
batchSize: 512
|
|
275
|
+
batchSize: 512,
|
|
275
276
|
// OpenAI supports large batches
|
|
277
|
+
timeout: timeoutMs
|
|
276
278
|
};
|
|
277
279
|
const baseURL = process.env.OPENAI_API_BASE;
|
|
278
280
|
if (baseURL) {
|
|
279
|
-
config.configuration = { baseURL };
|
|
281
|
+
config.configuration = { baseURL, timeout: timeoutMs };
|
|
282
|
+
} else {
|
|
283
|
+
config.configuration = { timeout: timeoutMs };
|
|
280
284
|
}
|
|
281
285
|
return new OpenAIEmbeddings(config);
|
|
282
286
|
}
|
|
@@ -1011,7 +1015,7 @@ class ProxyAuthManager {
|
|
|
1011
1015
|
logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
|
|
1012
1016
|
if (capabilities.length === 0) {
|
|
1013
1017
|
logger.warn(
|
|
1014
|
-
"⚠️
|
|
1018
|
+
"⚠️ No token validation mechanisms available - authentication may fail"
|
|
1015
1019
|
);
|
|
1016
1020
|
}
|
|
1017
1021
|
this.proxyProvider = new ProxyOAuthServerProvider({
|
|
@@ -1349,667 +1353,154 @@ class ProxyAuthManager {
|
|
|
1349
1353
|
}
|
|
1350
1354
|
}
|
|
1351
1355
|
}
|
|
1352
|
-
class
|
|
1353
|
-
|
|
1356
|
+
class RemoteEventProxy {
|
|
1357
|
+
constructor(remoteWorkerUrl, localEventBus) {
|
|
1358
|
+
this.remoteWorkerUrl = remoteWorkerUrl;
|
|
1359
|
+
this.localEventBus = localEventBus;
|
|
1360
|
+
}
|
|
1361
|
+
trpcClient = null;
|
|
1362
|
+
wsClient = null;
|
|
1363
|
+
subscription = null;
|
|
1364
|
+
isConnected = false;
|
|
1354
1365
|
/**
|
|
1355
|
-
*
|
|
1356
|
-
* Creates the instance if it doesn't exist.
|
|
1366
|
+
* Start subscribing to remote events and forwarding them locally.
|
|
1357
1367
|
*/
|
|
1358
|
-
|
|
1359
|
-
if (
|
|
1360
|
-
|
|
1368
|
+
async connect() {
|
|
1369
|
+
if (this.isConnected) {
|
|
1370
|
+
logger.warn("Remote event proxy already connected");
|
|
1371
|
+
return;
|
|
1372
|
+
}
|
|
1373
|
+
logger.debug(`Connecting to remote worker at ${this.remoteWorkerUrl}`);
|
|
1374
|
+
try {
|
|
1375
|
+
const url = new URL(this.remoteWorkerUrl);
|
|
1376
|
+
const baseUrl = `${url.protocol}//${url.host}`;
|
|
1377
|
+
const wsUrl = baseUrl.replace(/^http/, "ws");
|
|
1378
|
+
this.wsClient = createWSClient({
|
|
1379
|
+
url: wsUrl
|
|
1380
|
+
});
|
|
1381
|
+
this.trpcClient = createTRPCClient({
|
|
1382
|
+
links: [
|
|
1383
|
+
splitLink({
|
|
1384
|
+
condition: (op) => op.type === "subscription",
|
|
1385
|
+
true: wsLink({ client: this.wsClient, transformer: superjson }),
|
|
1386
|
+
false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
|
|
1387
|
+
})
|
|
1388
|
+
]
|
|
1389
|
+
});
|
|
1390
|
+
this.subscription = this.trpcClient.events.subscribe.subscribe(
|
|
1391
|
+
{},
|
|
1392
|
+
// Subscribe to all event types
|
|
1393
|
+
{
|
|
1394
|
+
onData: (data) => {
|
|
1395
|
+
logger.debug(`Received remote event: ${data.type}`);
|
|
1396
|
+
this.localEventBus.emit(data.type, data.payload);
|
|
1397
|
+
},
|
|
1398
|
+
onError: (error) => {
|
|
1399
|
+
logger.error(`❌ Remote event subscription error: ${error}`);
|
|
1400
|
+
this.isConnected = false;
|
|
1401
|
+
this.scheduleReconnect();
|
|
1402
|
+
},
|
|
1403
|
+
onStarted: () => {
|
|
1404
|
+
logger.debug("Remote event subscription started");
|
|
1405
|
+
this.isConnected = true;
|
|
1406
|
+
},
|
|
1407
|
+
onComplete: () => {
|
|
1408
|
+
logger.debug("Remote event subscription completed");
|
|
1409
|
+
this.isConnected = false;
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
);
|
|
1413
|
+
} catch (error) {
|
|
1414
|
+
logger.error(`❌ Failed to connect to remote worker: ${error}`);
|
|
1415
|
+
this.scheduleReconnect();
|
|
1361
1416
|
}
|
|
1362
|
-
return EmbeddingConfig.instance;
|
|
1363
1417
|
}
|
|
1364
1418
|
/**
|
|
1365
|
-
*
|
|
1419
|
+
* Disconnect from the remote worker and stop forwarding events.
|
|
1366
1420
|
*/
|
|
1367
|
-
|
|
1368
|
-
|
|
1421
|
+
disconnect() {
|
|
1422
|
+
if (this.subscription) {
|
|
1423
|
+
this.subscription.unsubscribe();
|
|
1424
|
+
this.subscription = null;
|
|
1425
|
+
}
|
|
1426
|
+
if (this.wsClient) {
|
|
1427
|
+
this.wsClient.close();
|
|
1428
|
+
this.wsClient = null;
|
|
1429
|
+
}
|
|
1430
|
+
this.isConnected = false;
|
|
1431
|
+
logger.info("🚫 Disconnected from remote worker");
|
|
1369
1432
|
}
|
|
1370
1433
|
/**
|
|
1371
|
-
*
|
|
1372
|
-
* This avoids expensive API calls for dimension detection in telemetry.
|
|
1373
|
-
*
|
|
1374
|
-
* Note: The "openai" provider also supports OpenAI-compatible APIs like:
|
|
1375
|
-
* - Ollama (local models)
|
|
1376
|
-
* - LMStudio (local models)
|
|
1377
|
-
* - Any service implementing OpenAI's embedding API
|
|
1434
|
+
* Check if the proxy is currently connected to the remote worker.
|
|
1378
1435
|
*/
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
"text-embedding-3-small": 1536,
|
|
1382
|
-
"text-embedding-3-large": 3072,
|
|
1383
|
-
"text-embedding-ada-002": 1536,
|
|
1384
|
-
// Google Vertex AI models
|
|
1385
|
-
"text-embedding-004": 768,
|
|
1386
|
-
"textembedding-gecko@003": 768,
|
|
1387
|
-
"textembedding-gecko@002": 768,
|
|
1388
|
-
"textembedding-gecko@001": 768,
|
|
1389
|
-
// Google Gemini models (with MRL support)
|
|
1390
|
-
"text-embedding-preview-0409": 768,
|
|
1391
|
-
"embedding-001": 768,
|
|
1392
|
-
// AWS Bedrock models
|
|
1393
|
-
// Amazon Titan models
|
|
1394
|
-
"amazon.titan-embed-text-v1": 1536,
|
|
1395
|
-
"amazon.titan-embed-text-v2:0": 1024,
|
|
1396
|
-
"amazon.titan-embed-image-v1": 1024,
|
|
1397
|
-
// Image embedding model
|
|
1398
|
-
// Cohere models
|
|
1399
|
-
"cohere.embed-english-v3": 1024,
|
|
1400
|
-
"cohere.embed-multilingual-v3": 1024,
|
|
1401
|
-
// SageMaker models (hosted on AWS SageMaker)
|
|
1402
|
-
"intfloat/multilingual-e5-large": 1024,
|
|
1403
|
-
// Additional AWS models that might be supported
|
|
1404
|
-
// Note: Some of these might be placeholders - verify dimensions before use
|
|
1405
|
-
// "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
|
|
1406
|
-
// MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
|
|
1407
|
-
// Top performing models from Massive Text Embedding Benchmark
|
|
1408
|
-
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
|
1409
|
-
"gemini-embedding-001": 3072,
|
|
1410
|
-
"Qwen/Qwen3-Embedding-8B": 4096,
|
|
1411
|
-
"Qwen/Qwen3-Embedding-4B": 2560,
|
|
1412
|
-
"Qwen/Qwen3-Embedding-0.6B": 1024,
|
|
1413
|
-
"Linq-AI-Research/Linq-Embed-Mistral": 4096,
|
|
1414
|
-
"Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
|
|
1415
|
-
"intfloat/multilingual-e5-large-instruct": 1024,
|
|
1416
|
-
"Salesforce/SFR-Embedding-Mistral": 4096,
|
|
1417
|
-
"text-multilingual-embedding-002": 768,
|
|
1418
|
-
"GritLM/GritLM-7B": 4096,
|
|
1419
|
-
"GritLM/GritLM-8x7B": 4096,
|
|
1420
|
-
"intfloat/e5-mistral-7b-instruct": 4096,
|
|
1421
|
-
"Cohere/Cohere-embed-multilingual-v3.0": 1024,
|
|
1422
|
-
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
|
|
1423
|
-
"Lajavaness/bilingual-embedding-large": 1024,
|
|
1424
|
-
"Salesforce/SFR-Embedding-2_R": 4096,
|
|
1425
|
-
"NovaSearch/stella_en_1.5B_v5": 8960,
|
|
1426
|
-
"NovaSearch/jasper_en_vision_language_v1": 8960,
|
|
1427
|
-
"nvidia/NV-Embed-v2": 4096,
|
|
1428
|
-
"OrdalieTech/Solon-embeddings-large-0.1": 1024,
|
|
1429
|
-
"BAAI/bge-m3": 1024,
|
|
1430
|
-
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
|
|
1431
|
-
"jinaai/jina-embeddings-v3": 1024,
|
|
1432
|
-
"Alibaba-NLP/gte-multilingual-base": 768,
|
|
1433
|
-
"Lajavaness/bilingual-embedding-base": 768,
|
|
1434
|
-
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
|
|
1435
|
-
"nvidia/NV-Embed-v1": 4096,
|
|
1436
|
-
"Cohere/Cohere-embed-multilingual-light-v3.0": 384,
|
|
1437
|
-
"manu/bge-m3-custom-fr": 1024,
|
|
1438
|
-
"Lajavaness/bilingual-embedding-small": 384,
|
|
1439
|
-
"Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
|
|
1440
|
-
"intfloat/multilingual-e5-base": 768,
|
|
1441
|
-
"voyage-3-lite": 512,
|
|
1442
|
-
"voyage-3": 1024,
|
|
1443
|
-
"intfloat/multilingual-e5-small": 384,
|
|
1444
|
-
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
|
|
1445
|
-
"Snowflake/snowflake-arctic-embed-m-v2.0": 768,
|
|
1446
|
-
"deepvk/USER-bge-m3": 1024,
|
|
1447
|
-
"Cohere/Cohere-embed-english-v3.0": 1024,
|
|
1448
|
-
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
|
|
1449
|
-
"ibm-granite/granite-embedding-278m-multilingual": 768,
|
|
1450
|
-
"NovaSearch/stella_en_400M_v5": 4096,
|
|
1451
|
-
"omarelshehy/arabic-english-sts-matryoshka": 1024,
|
|
1452
|
-
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
|
|
1453
|
-
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
|
|
1454
|
-
"Haon-Chen/speed-embedding-7b-instruct": 4096,
|
|
1455
|
-
"sentence-transformers/LaBSE": 768,
|
|
1456
|
-
"WhereIsAI/UAE-Large-V1": 1024,
|
|
1457
|
-
"ibm-granite/granite-embedding-107m-multilingual": 384,
|
|
1458
|
-
"mixedbread-ai/mxbai-embed-large-v1": 1024,
|
|
1459
|
-
"intfloat/e5-large-v2": 1024,
|
|
1460
|
-
"avsolatorio/GIST-large-Embedding-v0": 1024,
|
|
1461
|
-
"sdadas/mmlw-e5-large": 1024,
|
|
1462
|
-
"nomic-ai/nomic-embed-text-v1": 768,
|
|
1463
|
-
"nomic-ai/nomic-embed-text-v1-ablated": 768,
|
|
1464
|
-
"intfloat/e5-base-v2": 768,
|
|
1465
|
-
"BAAI/bge-large-en-v1.5": 1024,
|
|
1466
|
-
"intfloat/e5-large": 1024,
|
|
1467
|
-
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
|
|
1468
|
-
"Cohere/Cohere-embed-english-light-v3.0": 384,
|
|
1469
|
-
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
|
|
1470
|
-
"Gameselo/STS-multilingual-mpnet-base-v2": 768,
|
|
1471
|
-
"thenlper/gte-large": 1024,
|
|
1472
|
-
"avsolatorio/GIST-Embedding-v0": 768,
|
|
1473
|
-
"nomic-ai/nomic-embed-text-v1-unsupervised": 768,
|
|
1474
|
-
"infgrad/stella-base-en-v2": 768,
|
|
1475
|
-
"avsolatorio/NoInstruct-small-Embedding-v0": 384,
|
|
1476
|
-
"dwzhu/e5-base-4k": 768,
|
|
1477
|
-
"sdadas/mmlw-e5-base": 768,
|
|
1478
|
-
"voyage-multilingual-2": 1024,
|
|
1479
|
-
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
|
|
1480
|
-
"BAAI/bge-base-en-v1.5": 768,
|
|
1481
|
-
"avsolatorio/GIST-small-Embedding-v0": 384,
|
|
1482
|
-
"sdadas/mmlw-roberta-large": 1024,
|
|
1483
|
-
"nomic-ai/nomic-embed-text-v1.5": 768,
|
|
1484
|
-
"minishlab/potion-multilingual-128M": 256,
|
|
1485
|
-
"shibing624/text2vec-base-multilingual": 384,
|
|
1486
|
-
"thenlper/gte-base": 768,
|
|
1487
|
-
"intfloat/e5-small-v2": 384,
|
|
1488
|
-
"intfloat/e5-base": 768,
|
|
1489
|
-
"sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
|
|
1490
|
-
"manu/sentence_croissant_alpha_v0.3": 2048,
|
|
1491
|
-
"BAAI/bge-small-en-v1.5": 512,
|
|
1492
|
-
"thenlper/gte-small": 384,
|
|
1493
|
-
"sdadas/mmlw-e5-small": 384,
|
|
1494
|
-
"manu/sentence_croissant_alpha_v0.4": 2048,
|
|
1495
|
-
"manu/sentence_croissant_alpha_v0.2": 2048,
|
|
1496
|
-
"abhinand/MedEmbed-small-v0.1": 384,
|
|
1497
|
-
"ibm-granite/granite-embedding-125m-english": 768,
|
|
1498
|
-
"intfloat/e5-small": 384,
|
|
1499
|
-
"voyage-large-2-instruct": 1024,
|
|
1500
|
-
"sdadas/mmlw-roberta-base": 768,
|
|
1501
|
-
"Snowflake/snowflake-arctic-embed-l": 1024,
|
|
1502
|
-
"Mihaiii/Ivysaur": 384,
|
|
1503
|
-
"Snowflake/snowflake-arctic-embed-m-long": 768,
|
|
1504
|
-
"bigscience/sgpt-bloom-7b1-msmarco": 4096,
|
|
1505
|
-
"avsolatorio/GIST-all-MiniLM-L6-v2": 384,
|
|
1506
|
-
"sergeyzh/LaBSE-ru-turbo": 768,
|
|
1507
|
-
"sentence-transformers/all-mpnet-base-v2": 768,
|
|
1508
|
-
"Snowflake/snowflake-arctic-embed-m": 768,
|
|
1509
|
-
"Snowflake/snowflake-arctic-embed-s": 384,
|
|
1510
|
-
"sentence-transformers/all-MiniLM-L12-v2": 384,
|
|
1511
|
-
"Mihaiii/gte-micro-v4": 384,
|
|
1512
|
-
"Snowflake/snowflake-arctic-embed-m-v1.5": 768,
|
|
1513
|
-
"cointegrated/LaBSE-en-ru": 768,
|
|
1514
|
-
"Mihaiii/Bulbasaur": 384,
|
|
1515
|
-
"ibm-granite/granite-embedding-30m-english": 384,
|
|
1516
|
-
"deepfile/embedder-100p": 768,
|
|
1517
|
-
"Jaume/gemma-2b-embeddings": 2048,
|
|
1518
|
-
"OrlikB/KartonBERT-USE-base-v1": 768,
|
|
1519
|
-
"izhx/udever-bloom-7b1": 4096,
|
|
1520
|
-
"izhx/udever-bloom-1b1": 1024,
|
|
1521
|
-
"brahmairesearch/slx-v0.1": 384,
|
|
1522
|
-
"Mihaiii/Wartortle": 384,
|
|
1523
|
-
"izhx/udever-bloom-3b": 2048,
|
|
1524
|
-
"deepvk/USER-base": 768,
|
|
1525
|
-
"ai-forever/ru-en-RoSBERTa": 1024,
|
|
1526
|
-
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
|
|
1527
|
-
"Mihaiii/Venusaur": 384,
|
|
1528
|
-
"Snowflake/snowflake-arctic-embed-xs": 384,
|
|
1529
|
-
"jinaai/jina-embedding-b-en-v1": 768,
|
|
1530
|
-
"Mihaiii/gte-micro": 384,
|
|
1531
|
-
"aari1995/German_Semantic_STS_V2": 1024,
|
|
1532
|
-
"Mihaiii/Squirtle": 384,
|
|
1533
|
-
"OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
|
|
1534
|
-
"sergeyzh/rubert-tiny-turbo": 312,
|
|
1535
|
-
"minishlab/potion-base-8M": 256,
|
|
1536
|
-
"minishlab/M2V_base_glove_subword": 256,
|
|
1537
|
-
"jinaai/jina-embedding-s-en-v1": 512,
|
|
1538
|
-
"minishlab/potion-base-4M": 128,
|
|
1539
|
-
"minishlab/M2V_base_output": 256,
|
|
1540
|
-
"DeepPavlov/rubert-base-cased-sentence": 768,
|
|
1541
|
-
"jinaai/jina-embeddings-v2-small-en": 512,
|
|
1542
|
-
"cointegrated/rubert-tiny2": 312,
|
|
1543
|
-
"minishlab/M2V_base_glove": 256,
|
|
1544
|
-
"cointegrated/rubert-tiny": 312,
|
|
1545
|
-
"silma-ai/silma-embeddding-matryoshka-v0.1": 768,
|
|
1546
|
-
"DeepPavlov/rubert-base-cased": 768,
|
|
1547
|
-
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
|
|
1548
|
-
"izhx/udever-bloom-560m": 1024,
|
|
1549
|
-
"minishlab/potion-base-2M": 64,
|
|
1550
|
-
"DeepPavlov/distilrubert-small-cased-conversational": 768,
|
|
1551
|
-
"consciousAI/cai-lunaris-text-embeddings": 1024,
|
|
1552
|
-
"deepvk/deberta-v1-base": 768,
|
|
1553
|
-
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
|
|
1554
|
-
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
|
|
1555
|
-
"ai-forever/sbert_large_mt_nlu_ru": 1024,
|
|
1556
|
-
"ai-forever/sbert_large_nlu_ru": 1024,
|
|
1557
|
-
"malenia1/ternary-weight-embedding": 1024,
|
|
1558
|
-
"jinaai/jina-embeddings-v2-base-en": 768,
|
|
1559
|
-
"VPLabs/SearchMap_Preview": 4096,
|
|
1560
|
-
"Hum-Works/lodestone-base-4096-v1": 768,
|
|
1561
|
-
"jinaai/jina-embeddings-v4": 2048
|
|
1562
|
-
};
|
|
1563
|
-
/**
|
|
1564
|
-
* Lowercase lookup map for case-insensitive model dimension queries.
|
|
1565
|
-
* Built lazily from knownModelDimensions to ensure consistency.
|
|
1566
|
-
*/
|
|
1567
|
-
modelLookup;
|
|
1568
|
-
constructor() {
|
|
1569
|
-
this.modelLookup = /* @__PURE__ */ new Map();
|
|
1570
|
-
for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
|
|
1571
|
-
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
1572
|
-
}
|
|
1573
|
-
}
|
|
1574
|
-
/**
|
|
1575
|
-
* Parse embedding model configuration from a provided model specification.
|
|
1576
|
-
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
1577
|
-
*
|
|
1578
|
-
* Supports various providers:
|
|
1579
|
-
* - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
|
|
1580
|
-
* - vertex: Google Cloud Vertex AI
|
|
1581
|
-
* - gemini: Google Generative AI
|
|
1582
|
-
* - aws: AWS Bedrock models
|
|
1583
|
-
* - microsoft: Azure OpenAI
|
|
1584
|
-
* - sagemaker: AWS SageMaker hosted models
|
|
1585
|
-
*
|
|
1586
|
-
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
1587
|
-
* @returns Parsed embedding model configuration
|
|
1588
|
-
*/
|
|
1589
|
-
parse(modelSpec) {
|
|
1590
|
-
const spec = modelSpec || "text-embedding-3-small";
|
|
1591
|
-
const colonIndex = spec.indexOf(":");
|
|
1592
|
-
let provider;
|
|
1593
|
-
let model;
|
|
1594
|
-
if (colonIndex === -1) {
|
|
1595
|
-
provider = "openai";
|
|
1596
|
-
model = spec;
|
|
1597
|
-
} else {
|
|
1598
|
-
provider = spec.substring(0, colonIndex);
|
|
1599
|
-
model = spec.substring(colonIndex + 1);
|
|
1600
|
-
}
|
|
1601
|
-
const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
|
|
1602
|
-
return {
|
|
1603
|
-
provider,
|
|
1604
|
-
model,
|
|
1605
|
-
dimensions,
|
|
1606
|
-
modelSpec: spec
|
|
1607
|
-
};
|
|
1608
|
-
}
|
|
1609
|
-
/**
|
|
1610
|
-
* Get the known dimensions for a specific model.
|
|
1611
|
-
* Returns null if the model dimensions are not known.
|
|
1612
|
-
* Uses case-insensitive lookup.
|
|
1613
|
-
*
|
|
1614
|
-
* @param model The model name (e.g., "text-embedding-3-small")
|
|
1615
|
-
* @returns Known dimensions or null
|
|
1616
|
-
*/
|
|
1617
|
-
getKnownDimensions(model) {
|
|
1618
|
-
return this.modelLookup?.get(model.toLowerCase()) || null;
|
|
1619
|
-
}
|
|
1620
|
-
/**
|
|
1621
|
-
* Add or update known dimensions for a model.
|
|
1622
|
-
* This can be used to cache discovered dimensions.
|
|
1623
|
-
* Stores both original case and lowercase for consistent lookup.
|
|
1624
|
-
*
|
|
1625
|
-
* @param model The model name
|
|
1626
|
-
* @param dimensions The dimensions to cache
|
|
1627
|
-
*/
|
|
1628
|
-
setKnownDimensions(model, dimensions) {
|
|
1629
|
-
this.knownModelDimensions[model] = dimensions;
|
|
1630
|
-
if (this.modelLookup) {
|
|
1631
|
-
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
1632
|
-
}
|
|
1633
|
-
}
|
|
1634
|
-
/**
|
|
1635
|
-
* Static method to parse embedding model configuration using the singleton instance.
|
|
1636
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1637
|
-
*/
|
|
1638
|
-
static parseEmbeddingConfig(modelSpec) {
|
|
1639
|
-
return EmbeddingConfig.getInstance().parse(modelSpec);
|
|
1640
|
-
}
|
|
1641
|
-
/**
|
|
1642
|
-
* Static method to get known model dimensions using the singleton instance.
|
|
1643
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1644
|
-
*/
|
|
1645
|
-
static getKnownModelDimensions(model) {
|
|
1646
|
-
return EmbeddingConfig.getInstance().getKnownDimensions(model);
|
|
1436
|
+
isActive() {
|
|
1437
|
+
return this.isConnected;
|
|
1647
1438
|
}
|
|
1648
1439
|
/**
|
|
1649
|
-
*
|
|
1650
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1440
|
+
* Schedule a reconnection attempt after a delay.
|
|
1651
1441
|
*/
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
rootCommand = rootCommand.parent;
|
|
1660
|
-
}
|
|
1661
|
-
return rootCommand?.opts() || {};
|
|
1662
|
-
}
|
|
1663
|
-
function getEventBus(command) {
|
|
1664
|
-
const eventBus = command?._eventBus;
|
|
1665
|
-
if (!eventBus) {
|
|
1666
|
-
throw new Error("EventBusService not initialized");
|
|
1667
|
-
}
|
|
1668
|
-
return eventBus;
|
|
1669
|
-
}
|
|
1670
|
-
function ensurePlaywrightBrowsersInstalled() {
|
|
1671
|
-
if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
|
|
1672
|
-
logger.debug(
|
|
1673
|
-
"PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
|
|
1674
|
-
);
|
|
1675
|
-
return;
|
|
1676
|
-
}
|
|
1677
|
-
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
1678
|
-
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
1679
|
-
logger.debug(
|
|
1680
|
-
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
1681
|
-
);
|
|
1682
|
-
return;
|
|
1683
|
-
}
|
|
1684
|
-
try {
|
|
1685
|
-
const chromiumPath = chromium.executablePath();
|
|
1686
|
-
if (!chromiumPath || !existsSync(chromiumPath)) {
|
|
1687
|
-
throw new Error("Playwright Chromium browser not found");
|
|
1688
|
-
}
|
|
1689
|
-
} catch (error) {
|
|
1690
|
-
logger.debug(String(error));
|
|
1691
|
-
try {
|
|
1692
|
-
console.log(
|
|
1693
|
-
"🌐 Installing Playwright Chromium browser... (this may take a moment)"
|
|
1694
|
-
);
|
|
1695
|
-
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
1696
|
-
stdio: "ignore",
|
|
1697
|
-
// Suppress output
|
|
1698
|
-
cwd: getProjectRoot()
|
|
1699
|
-
});
|
|
1700
|
-
} catch (_installErr) {
|
|
1701
|
-
console.error(
|
|
1702
|
-
"❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
|
|
1703
|
-
);
|
|
1704
|
-
process.exit(1);
|
|
1705
|
-
}
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
function resolveProtocol(protocol) {
|
|
1709
|
-
if (protocol === "auto") {
|
|
1710
|
-
if (!process.stdin.isTTY && !process.stdout.isTTY) {
|
|
1711
|
-
return "stdio";
|
|
1712
|
-
}
|
|
1713
|
-
return "http";
|
|
1714
|
-
}
|
|
1715
|
-
if (protocol === "stdio" || protocol === "http") {
|
|
1716
|
-
return protocol;
|
|
1442
|
+
scheduleReconnect() {
|
|
1443
|
+
logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
|
|
1444
|
+
setTimeout(() => {
|
|
1445
|
+
if (!this.isConnected) {
|
|
1446
|
+
this.connect();
|
|
1447
|
+
}
|
|
1448
|
+
}, 5e3);
|
|
1717
1449
|
}
|
|
1718
|
-
throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
|
|
1719
1450
|
}
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
setLogLevel(LogLevel.DEBUG);
|
|
1451
|
+
class ToolError extends Error {
|
|
1452
|
+
constructor(message, toolName) {
|
|
1453
|
+
super(message);
|
|
1454
|
+
this.toolName = toolName;
|
|
1455
|
+
this.name = this.constructor.name;
|
|
1726
1456
|
}
|
|
1727
1457
|
}
|
|
1728
|
-
|
|
1729
|
-
const port = Number.parseInt(portString, 10);
|
|
1730
|
-
if (Number.isNaN(port) || port < 1 || port > 65535) {
|
|
1731
|
-
throw new Error("❌ Invalid port number");
|
|
1732
|
-
}
|
|
1733
|
-
return port;
|
|
1458
|
+
class ValidationError extends ToolError {
|
|
1734
1459
|
}
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1460
|
+
const DEFAULT_MAX_PAGES = 1e3;
|
|
1461
|
+
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
1462
|
+
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
1463
|
+
const DEFAULT_PROTOCOL = "auto";
|
|
1464
|
+
const DEFAULT_HTTP_PORT = 6280;
|
|
1465
|
+
const DEFAULT_WEB_PORT = 6281;
|
|
1466
|
+
const DEFAULT_HOST = "127.0.0.1";
|
|
1467
|
+
const DEFAULT_PAGE_TIMEOUT = 5e3;
|
|
1468
|
+
const FETCHER_MAX_RETRIES = 6;
|
|
1469
|
+
const FETCHER_BASE_DELAY = 1e3;
|
|
1470
|
+
const FETCHER_MAX_CACHE_ITEMS = 200;
|
|
1471
|
+
const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
|
|
1472
|
+
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
1473
|
+
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
1474
|
+
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
1475
|
+
const EMBEDDING_BATCH_SIZE = 100;
|
|
1476
|
+
const EMBEDDING_BATCH_CHARS = 5e4;
|
|
1477
|
+
const MIGRATION_MAX_RETRIES = 5;
|
|
1478
|
+
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
1479
|
+
const SEARCH_OVERFETCH_FACTOR = 2;
|
|
1480
|
+
const SEARCH_WEIGHT_VEC = 1;
|
|
1481
|
+
const SEARCH_WEIGHT_FTS = 1;
|
|
1482
|
+
const VECTOR_SEARCH_MULTIPLIER = 10;
|
|
1483
|
+
function createResponse(text) {
|
|
1484
|
+
return {
|
|
1485
|
+
content: [
|
|
1486
|
+
{
|
|
1487
|
+
type: "text",
|
|
1488
|
+
text
|
|
1489
|
+
}
|
|
1490
|
+
],
|
|
1491
|
+
isError: false
|
|
1492
|
+
};
|
|
1744
1493
|
}
|
|
1745
|
-
function
|
|
1494
|
+
function createError(errorOrText) {
|
|
1495
|
+
const text = errorOrText instanceof Error ? errorOrText.message : String(errorOrText);
|
|
1746
1496
|
return {
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
readOnly: options.readOnly ?? false,
|
|
1755
|
-
auth: options.auth,
|
|
1756
|
-
startupContext: options.startupContext
|
|
1757
|
-
};
|
|
1758
|
-
}
|
|
1759
|
-
function parseHeaders(headerOptions) {
|
|
1760
|
-
const headers = {};
|
|
1761
|
-
if (Array.isArray(headerOptions)) {
|
|
1762
|
-
for (const entry of headerOptions) {
|
|
1763
|
-
const idx = entry.indexOf(":");
|
|
1764
|
-
if (idx > 0) {
|
|
1765
|
-
const name = entry.slice(0, idx).trim();
|
|
1766
|
-
const value = entry.slice(idx + 1).trim();
|
|
1767
|
-
if (name) headers[name] = value;
|
|
1768
|
-
}
|
|
1769
|
-
}
|
|
1770
|
-
}
|
|
1771
|
-
return headers;
|
|
1772
|
-
}
|
|
1773
|
-
function parseAuthConfig(options) {
|
|
1774
|
-
if (!options.authEnabled) {
|
|
1775
|
-
return void 0;
|
|
1776
|
-
}
|
|
1777
|
-
return {
|
|
1778
|
-
enabled: true,
|
|
1779
|
-
issuerUrl: options.authIssuerUrl,
|
|
1780
|
-
audience: options.authAudience,
|
|
1781
|
-
scopes: ["openid", "profile"]
|
|
1782
|
-
// Default scopes for OAuth2/OIDC
|
|
1783
|
-
};
|
|
1784
|
-
}
|
|
1785
|
-
function validateAuthConfig(authConfig) {
|
|
1786
|
-
if (!authConfig.enabled) {
|
|
1787
|
-
return;
|
|
1788
|
-
}
|
|
1789
|
-
const errors = [];
|
|
1790
|
-
if (!authConfig.issuerUrl) {
|
|
1791
|
-
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
1792
|
-
} else {
|
|
1793
|
-
try {
|
|
1794
|
-
const url = new URL(authConfig.issuerUrl);
|
|
1795
|
-
if (url.protocol !== "https:") {
|
|
1796
|
-
errors.push("Issuer URL must use HTTPS protocol");
|
|
1797
|
-
}
|
|
1798
|
-
} catch {
|
|
1799
|
-
errors.push("Issuer URL must be a valid URL");
|
|
1800
|
-
}
|
|
1801
|
-
}
|
|
1802
|
-
if (!authConfig.audience) {
|
|
1803
|
-
errors.push("--auth-audience is required when auth is enabled");
|
|
1804
|
-
} else {
|
|
1805
|
-
try {
|
|
1806
|
-
const url = new URL(authConfig.audience);
|
|
1807
|
-
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
1808
|
-
logger.warn(
|
|
1809
|
-
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
1810
|
-
);
|
|
1811
|
-
}
|
|
1812
|
-
if (url.hash) {
|
|
1813
|
-
errors.push("Audience must not contain URL fragments");
|
|
1814
|
-
}
|
|
1815
|
-
} catch {
|
|
1816
|
-
if (authConfig.audience.startsWith("urn:")) {
|
|
1817
|
-
const urnParts = authConfig.audience.split(":");
|
|
1818
|
-
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
1819
|
-
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
1820
|
-
}
|
|
1821
|
-
} else {
|
|
1822
|
-
errors.push(
|
|
1823
|
-
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
1824
|
-
);
|
|
1825
|
-
}
|
|
1826
|
-
}
|
|
1827
|
-
}
|
|
1828
|
-
if (errors.length > 0) {
|
|
1829
|
-
throw new Error(`Auth configuration validation failed:
|
|
1830
|
-
${errors.join("\n")}`);
|
|
1831
|
-
}
|
|
1832
|
-
}
|
|
1833
|
-
function warnHttpUsage(authConfig, port) {
|
|
1834
|
-
if (!authConfig?.enabled) {
|
|
1835
|
-
return;
|
|
1836
|
-
}
|
|
1837
|
-
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
1838
|
-
process.env.HOSTNAME?.includes("localhost");
|
|
1839
|
-
if (!isLocalhost) {
|
|
1840
|
-
logger.warn(
|
|
1841
|
-
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
1842
|
-
);
|
|
1843
|
-
}
|
|
1844
|
-
}
|
|
1845
|
-
function resolveEmbeddingContext(embeddingModel) {
|
|
1846
|
-
try {
|
|
1847
|
-
let modelSpec = embeddingModel;
|
|
1848
|
-
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
1849
|
-
modelSpec = "text-embedding-3-small";
|
|
1850
|
-
logger.debug(
|
|
1851
|
-
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
1852
|
-
);
|
|
1853
|
-
}
|
|
1854
|
-
if (!modelSpec) {
|
|
1855
|
-
logger.debug(
|
|
1856
|
-
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
1857
|
-
);
|
|
1858
|
-
return null;
|
|
1859
|
-
}
|
|
1860
|
-
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
1861
|
-
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
1862
|
-
} catch (error) {
|
|
1863
|
-
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
1864
|
-
return null;
|
|
1865
|
-
}
|
|
1866
|
-
}
|
|
1867
|
-
class RemoteEventProxy {
|
|
1868
|
-
constructor(remoteWorkerUrl, localEventBus) {
|
|
1869
|
-
this.remoteWorkerUrl = remoteWorkerUrl;
|
|
1870
|
-
this.localEventBus = localEventBus;
|
|
1871
|
-
}
|
|
1872
|
-
trpcClient = null;
|
|
1873
|
-
wsClient = null;
|
|
1874
|
-
subscription = null;
|
|
1875
|
-
isConnected = false;
|
|
1876
|
-
/**
|
|
1877
|
-
* Start subscribing to remote events and forwarding them locally.
|
|
1878
|
-
*/
|
|
1879
|
-
async connect() {
|
|
1880
|
-
if (this.isConnected) {
|
|
1881
|
-
logger.warn("Remote event proxy already connected");
|
|
1882
|
-
return;
|
|
1883
|
-
}
|
|
1884
|
-
logger.info(`📡 Connecting to remote worker at ${this.remoteWorkerUrl}`);
|
|
1885
|
-
try {
|
|
1886
|
-
const url = new URL(this.remoteWorkerUrl);
|
|
1887
|
-
const baseUrl = `${url.protocol}//${url.host}`;
|
|
1888
|
-
const wsUrl = baseUrl.replace(/^http/, "ws");
|
|
1889
|
-
this.wsClient = createWSClient({
|
|
1890
|
-
url: wsUrl
|
|
1891
|
-
});
|
|
1892
|
-
this.trpcClient = createTRPCClient({
|
|
1893
|
-
links: [
|
|
1894
|
-
splitLink({
|
|
1895
|
-
condition: (op) => op.type === "subscription",
|
|
1896
|
-
true: wsLink({ client: this.wsClient, transformer: superjson }),
|
|
1897
|
-
false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
|
|
1898
|
-
})
|
|
1899
|
-
]
|
|
1900
|
-
});
|
|
1901
|
-
this.subscription = this.trpcClient.events.subscribe.subscribe(
|
|
1902
|
-
{},
|
|
1903
|
-
// Subscribe to all event types
|
|
1904
|
-
{
|
|
1905
|
-
onData: (data) => {
|
|
1906
|
-
logger.debug(`📥 Received remote event: ${data.type}`);
|
|
1907
|
-
this.localEventBus.emit(data.type, data.payload);
|
|
1908
|
-
},
|
|
1909
|
-
onError: (error) => {
|
|
1910
|
-
logger.error(`❌ Remote event subscription error: ${error}`);
|
|
1911
|
-
this.isConnected = false;
|
|
1912
|
-
this.scheduleReconnect();
|
|
1913
|
-
},
|
|
1914
|
-
onStarted: () => {
|
|
1915
|
-
logger.info("✅ Remote event subscription started");
|
|
1916
|
-
this.isConnected = true;
|
|
1917
|
-
},
|
|
1918
|
-
onComplete: () => {
|
|
1919
|
-
logger.info("✅ Remote event subscription completed");
|
|
1920
|
-
this.isConnected = false;
|
|
1921
|
-
}
|
|
1922
|
-
}
|
|
1923
|
-
);
|
|
1924
|
-
} catch (error) {
|
|
1925
|
-
logger.error(`❌ Failed to connect to remote worker: ${error}`);
|
|
1926
|
-
this.scheduleReconnect();
|
|
1927
|
-
}
|
|
1928
|
-
}
|
|
1929
|
-
/**
|
|
1930
|
-
* Disconnect from the remote worker and stop forwarding events.
|
|
1931
|
-
*/
|
|
1932
|
-
disconnect() {
|
|
1933
|
-
if (this.subscription) {
|
|
1934
|
-
this.subscription.unsubscribe();
|
|
1935
|
-
this.subscription = null;
|
|
1936
|
-
}
|
|
1937
|
-
if (this.wsClient) {
|
|
1938
|
-
this.wsClient.close();
|
|
1939
|
-
this.wsClient = null;
|
|
1940
|
-
}
|
|
1941
|
-
this.isConnected = false;
|
|
1942
|
-
logger.info("🚫 Disconnected from remote worker");
|
|
1943
|
-
}
|
|
1944
|
-
/**
|
|
1945
|
-
* Check if the proxy is currently connected to the remote worker.
|
|
1946
|
-
*/
|
|
1947
|
-
isActive() {
|
|
1948
|
-
return this.isConnected;
|
|
1949
|
-
}
|
|
1950
|
-
/**
|
|
1951
|
-
* Schedule a reconnection attempt after a delay.
|
|
1952
|
-
*/
|
|
1953
|
-
scheduleReconnect() {
|
|
1954
|
-
logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
|
|
1955
|
-
setTimeout(() => {
|
|
1956
|
-
if (!this.isConnected) {
|
|
1957
|
-
this.connect();
|
|
1958
|
-
}
|
|
1959
|
-
}, 5e3);
|
|
1960
|
-
}
|
|
1961
|
-
}
|
|
1962
|
-
class ToolError extends Error {
|
|
1963
|
-
constructor(message, toolName) {
|
|
1964
|
-
super(message);
|
|
1965
|
-
this.toolName = toolName;
|
|
1966
|
-
this.name = this.constructor.name;
|
|
1967
|
-
}
|
|
1968
|
-
}
|
|
1969
|
-
class ValidationError extends ToolError {
|
|
1970
|
-
}
|
|
1971
|
-
const DEFAULT_MAX_PAGES = 1e3;
|
|
1972
|
-
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
1973
|
-
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
1974
|
-
const DEFAULT_PROTOCOL = "auto";
|
|
1975
|
-
const DEFAULT_HTTP_PORT = 6280;
|
|
1976
|
-
const DEFAULT_WEB_PORT = 6281;
|
|
1977
|
-
const DEFAULT_HOST = "127.0.0.1";
|
|
1978
|
-
const DEFAULT_PAGE_TIMEOUT = 5e3;
|
|
1979
|
-
const FETCHER_MAX_RETRIES = 6;
|
|
1980
|
-
const FETCHER_BASE_DELAY = 1e3;
|
|
1981
|
-
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
1982
|
-
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
1983
|
-
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
1984
|
-
const EMBEDDING_BATCH_SIZE = 100;
|
|
1985
|
-
const EMBEDDING_BATCH_CHARS = 5e4;
|
|
1986
|
-
const MIGRATION_MAX_RETRIES = 5;
|
|
1987
|
-
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
1988
|
-
const SEARCH_OVERFETCH_FACTOR = 2;
|
|
1989
|
-
const SEARCH_WEIGHT_VEC = 1;
|
|
1990
|
-
const SEARCH_WEIGHT_FTS = 1;
|
|
1991
|
-
const VECTOR_SEARCH_MULTIPLIER = 10;
|
|
1992
|
-
function createResponse(text) {
|
|
1993
|
-
return {
|
|
1994
|
-
content: [
|
|
1995
|
-
{
|
|
1996
|
-
type: "text",
|
|
1997
|
-
text
|
|
1998
|
-
}
|
|
1999
|
-
],
|
|
2000
|
-
isError: false
|
|
2001
|
-
};
|
|
2002
|
-
}
|
|
2003
|
-
function createError(errorOrText) {
|
|
2004
|
-
const text = errorOrText instanceof Error ? errorOrText.message : String(errorOrText);
|
|
2005
|
-
return {
|
|
2006
|
-
content: [
|
|
2007
|
-
{
|
|
2008
|
-
type: "text",
|
|
2009
|
-
text
|
|
2010
|
-
}
|
|
2011
|
-
],
|
|
2012
|
-
isError: true
|
|
1497
|
+
content: [
|
|
1498
|
+
{
|
|
1499
|
+
type: "text",
|
|
1500
|
+
text
|
|
1501
|
+
}
|
|
1502
|
+
],
|
|
1503
|
+
isError: true
|
|
2013
1504
|
};
|
|
2014
1505
|
}
|
|
2015
1506
|
function createMcpServerInstance(tools, readOnly = false) {
|
|
@@ -2900,7 +2391,7 @@ class BrowserFetcher {
|
|
|
2900
2391
|
}
|
|
2901
2392
|
logger.debug("Browser closed successfully");
|
|
2902
2393
|
} catch (error) {
|
|
2903
|
-
logger.warn(`⚠️
|
|
2394
|
+
logger.warn(`⚠️ Error closing browser: ${error}`);
|
|
2904
2395
|
}
|
|
2905
2396
|
}
|
|
2906
2397
|
}
|
|
@@ -5497,10 +4988,80 @@ var ScrapeMode = /* @__PURE__ */ ((ScrapeMode2) => {
|
|
|
5497
4988
|
ScrapeMode2["Auto"] = "auto";
|
|
5498
4989
|
return ScrapeMode2;
|
|
5499
4990
|
})(ScrapeMode || {});
|
|
5500
|
-
class
|
|
5501
|
-
|
|
4991
|
+
class SimpleMemoryCache {
|
|
4992
|
+
cache;
|
|
4993
|
+
maxSize;
|
|
4994
|
+
constructor(maxSize) {
|
|
4995
|
+
if (maxSize <= 0) {
|
|
4996
|
+
throw new Error("maxSize must be positive");
|
|
4997
|
+
}
|
|
4998
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
4999
|
+
this.maxSize = maxSize;
|
|
5000
|
+
}
|
|
5502
5001
|
/**
|
|
5503
|
-
*
|
|
5002
|
+
* Retrieve a value from the cache.
|
|
5003
|
+
* Marks the key as recently used (moves to end of Map).
|
|
5004
|
+
*/
|
|
5005
|
+
get(key) {
|
|
5006
|
+
const value = this.cache.get(key);
|
|
5007
|
+
if (value !== void 0) {
|
|
5008
|
+
this.cache.delete(key);
|
|
5009
|
+
this.cache.set(key, value);
|
|
5010
|
+
}
|
|
5011
|
+
return value;
|
|
5012
|
+
}
|
|
5013
|
+
/**
|
|
5014
|
+
* Store a value in the cache.
|
|
5015
|
+
* If cache is full, evicts the oldest entry first.
|
|
5016
|
+
*/
|
|
5017
|
+
set(key, value) {
|
|
5018
|
+
if (this.cache.has(key)) {
|
|
5019
|
+
this.cache.delete(key);
|
|
5020
|
+
} else if (this.cache.size >= this.maxSize) {
|
|
5021
|
+
const oldestKey = this.cache.keys().next().value;
|
|
5022
|
+
if (oldestKey !== void 0) {
|
|
5023
|
+
this.cache.delete(oldestKey);
|
|
5024
|
+
}
|
|
5025
|
+
}
|
|
5026
|
+
this.cache.set(key, value);
|
|
5027
|
+
}
|
|
5028
|
+
/**
|
|
5029
|
+
* Check if a key exists in the cache.
|
|
5030
|
+
* Marks the key as recently used (moves to end of Map) to maintain LRU semantics.
|
|
5031
|
+
*/
|
|
5032
|
+
has(key) {
|
|
5033
|
+
const exists = this.cache.has(key);
|
|
5034
|
+
if (exists) {
|
|
5035
|
+
const value = this.cache.get(key);
|
|
5036
|
+
if (value !== void 0) {
|
|
5037
|
+
this.cache.delete(key);
|
|
5038
|
+
this.cache.set(key, value);
|
|
5039
|
+
}
|
|
5040
|
+
}
|
|
5041
|
+
return exists;
|
|
5042
|
+
}
|
|
5043
|
+
/**
|
|
5044
|
+
* Get current cache size.
|
|
5045
|
+
*/
|
|
5046
|
+
get size() {
|
|
5047
|
+
return this.cache.size;
|
|
5048
|
+
}
|
|
5049
|
+
/**
|
|
5050
|
+
* Clear all entries from the cache.
|
|
5051
|
+
*/
|
|
5052
|
+
clear() {
|
|
5053
|
+
this.cache.clear();
|
|
5054
|
+
}
|
|
5055
|
+
}
|
|
5056
|
+
class HtmlPlaywrightMiddleware {
|
|
5057
|
+
browser = null;
|
|
5058
|
+
// Static LRU cache shared across all instances for all fetched resources
|
|
5059
|
+
// Max 200 entries, each limited in size to prevent caching large resources
|
|
5060
|
+
static resourceCache = new SimpleMemoryCache(
|
|
5061
|
+
FETCHER_MAX_CACHE_ITEMS
|
|
5062
|
+
);
|
|
5063
|
+
/**
|
|
5064
|
+
* Initializes the Playwright browser instance.
|
|
5504
5065
|
* Consider making this more robust (e.g., lazy initialization, singleton).
|
|
5505
5066
|
*/
|
|
5506
5067
|
async ensureBrowser() {
|
|
@@ -5843,25 +5404,97 @@ class HtmlPlaywrightMiddleware {
|
|
|
5843
5404
|
return [];
|
|
5844
5405
|
}
|
|
5845
5406
|
}
|
|
5407
|
+
/**
|
|
5408
|
+
* Sets up caching route interception for a Playwright page.
|
|
5409
|
+
* This handles:
|
|
5410
|
+
* - Aborting non-essential resources (images, fonts, media)
|
|
5411
|
+
* - Caching GET requests to speed up subsequent loads
|
|
5412
|
+
* - Forwarding custom headers and credentials for same-origin requests
|
|
5413
|
+
*
|
|
5414
|
+
* @param page The Playwright page to set up routing for
|
|
5415
|
+
* @param customHeaders Custom headers to forward with requests
|
|
5416
|
+
* @param credentials Optional credentials for same-origin requests
|
|
5417
|
+
* @param origin The origin for same-origin credential checking
|
|
5418
|
+
*/
|
|
5419
|
+
async setupCachingRouteInterception(page, customHeaders = {}, credentials, origin) {
|
|
5420
|
+
await page.route("**/*", async (route) => {
|
|
5421
|
+
const reqUrl = route.request().url();
|
|
5422
|
+
const reqOrigin = (() => {
|
|
5423
|
+
try {
|
|
5424
|
+
return new URL(reqUrl).origin;
|
|
5425
|
+
} catch {
|
|
5426
|
+
return null;
|
|
5427
|
+
}
|
|
5428
|
+
})();
|
|
5429
|
+
const resourceType = route.request().resourceType();
|
|
5430
|
+
if (["image", "font", "media"].includes(resourceType)) {
|
|
5431
|
+
return route.abort();
|
|
5432
|
+
}
|
|
5433
|
+
if (route.request().method() === "GET") {
|
|
5434
|
+
const cached = HtmlPlaywrightMiddleware.resourceCache.get(reqUrl);
|
|
5435
|
+
if (cached !== void 0) {
|
|
5436
|
+
logger.debug(`✓ Cache hit for ${resourceType}: ${reqUrl}`);
|
|
5437
|
+
return route.fulfill({
|
|
5438
|
+
status: 200,
|
|
5439
|
+
contentType: cached.contentType,
|
|
5440
|
+
body: cached.body
|
|
5441
|
+
});
|
|
5442
|
+
}
|
|
5443
|
+
const headers2 = mergePlaywrightHeaders(
|
|
5444
|
+
route.request().headers(),
|
|
5445
|
+
customHeaders,
|
|
5446
|
+
credentials,
|
|
5447
|
+
origin,
|
|
5448
|
+
reqOrigin ?? void 0
|
|
5449
|
+
);
|
|
5450
|
+
const response = await route.fetch({ headers: headers2 });
|
|
5451
|
+
const body = await response.text();
|
|
5452
|
+
if (response.status() >= 200 && response.status() < 300 && body.length > 0) {
|
|
5453
|
+
const contentSizeBytes = Buffer.byteLength(body, "utf8");
|
|
5454
|
+
if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
|
|
5455
|
+
const contentType = response.headers()["content-type"] || "application/octet-stream";
|
|
5456
|
+
HtmlPlaywrightMiddleware.resourceCache.set(reqUrl, { body, contentType });
|
|
5457
|
+
logger.debug(
|
|
5458
|
+
`Cached ${resourceType}: ${reqUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
|
|
5459
|
+
);
|
|
5460
|
+
} else {
|
|
5461
|
+
logger.debug(
|
|
5462
|
+
`Resource too large to cache: ${reqUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
|
|
5463
|
+
);
|
|
5464
|
+
}
|
|
5465
|
+
}
|
|
5466
|
+
return route.fulfill({ response });
|
|
5467
|
+
}
|
|
5468
|
+
const headers = mergePlaywrightHeaders(
|
|
5469
|
+
route.request().headers(),
|
|
5470
|
+
customHeaders,
|
|
5471
|
+
credentials,
|
|
5472
|
+
origin,
|
|
5473
|
+
reqOrigin ?? void 0
|
|
5474
|
+
);
|
|
5475
|
+
return route.continue({ headers });
|
|
5476
|
+
});
|
|
5477
|
+
}
|
|
5846
5478
|
/**
|
|
5847
5479
|
* Fetches content from a frame URL by navigating to it in a new page.
|
|
5480
|
+
* Uses LRU cache to avoid re-fetching identical frames across multiple pages.
|
|
5848
5481
|
*
|
|
5849
5482
|
* @param parentPage The parent page (used to resolve relative URLs and share context)
|
|
5850
5483
|
* @param frameUrl The URL of the frame to fetch content from
|
|
5851
5484
|
* @returns The HTML content of the frame
|
|
5852
5485
|
*/
|
|
5853
5486
|
async fetchFrameContent(parentPage, frameUrl) {
|
|
5487
|
+
const resolvedUrl = new URL(frameUrl, parentPage.url()).href;
|
|
5488
|
+
const cached = HtmlPlaywrightMiddleware.resourceCache.get(resolvedUrl);
|
|
5489
|
+
if (cached !== void 0) {
|
|
5490
|
+
logger.debug(`✓ Cache hit for frame: ${resolvedUrl}`);
|
|
5491
|
+
return cached.body;
|
|
5492
|
+
}
|
|
5493
|
+
logger.debug(`Cache miss for frame: ${resolvedUrl}`);
|
|
5854
5494
|
let framePage = null;
|
|
5855
5495
|
try {
|
|
5856
|
-
const resolvedUrl = new URL(frameUrl, parentPage.url()).href;
|
|
5857
5496
|
framePage = await parentPage.context().newPage();
|
|
5858
|
-
await
|
|
5859
|
-
const resourceType = route.request().resourceType();
|
|
5860
|
-
if (["image", "font", "media"].includes(resourceType)) {
|
|
5861
|
-
return route.abort();
|
|
5862
|
-
}
|
|
5863
|
-
return route.continue();
|
|
5864
|
-
});
|
|
5497
|
+
await this.setupCachingRouteInterception(framePage);
|
|
5865
5498
|
logger.debug(`Fetching frame content from: ${resolvedUrl}`);
|
|
5866
5499
|
await framePage.goto(resolvedUrl, {
|
|
5867
5500
|
waitUntil: "load",
|
|
@@ -5873,8 +5506,23 @@ class HtmlPlaywrightMiddleware {
|
|
|
5873
5506
|
"body",
|
|
5874
5507
|
(el) => el.innerHTML
|
|
5875
5508
|
);
|
|
5509
|
+
const content = bodyContent || "";
|
|
5510
|
+
const contentSizeBytes = Buffer.byteLength(content, "utf8");
|
|
5511
|
+
if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
|
|
5512
|
+
HtmlPlaywrightMiddleware.resourceCache.set(resolvedUrl, {
|
|
5513
|
+
body: content,
|
|
5514
|
+
contentType: "text/html; charset=utf-8"
|
|
5515
|
+
});
|
|
5516
|
+
logger.debug(
|
|
5517
|
+
`Cached frame content: ${resolvedUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
|
|
5518
|
+
);
|
|
5519
|
+
} else {
|
|
5520
|
+
logger.debug(
|
|
5521
|
+
`Frame content too large to cache: ${resolvedUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
|
|
5522
|
+
);
|
|
5523
|
+
}
|
|
5876
5524
|
logger.debug(`Successfully fetched frame content from: ${resolvedUrl}`);
|
|
5877
|
-
return
|
|
5525
|
+
return content;
|
|
5878
5526
|
} catch (error) {
|
|
5879
5527
|
logger.debug(`Error fetching frame content from ${frameUrl}: ${error}`);
|
|
5880
5528
|
return "";
|
|
@@ -5973,25 +5621,59 @@ ${frame.content}
|
|
|
5973
5621
|
await this.injectShadowDOMExtractor(page);
|
|
5974
5622
|
await page.route("**/*", async (route) => {
|
|
5975
5623
|
const reqUrl = route.request().url();
|
|
5976
|
-
const reqOrigin = (() => {
|
|
5977
|
-
try {
|
|
5978
|
-
return new URL(reqUrl).origin;
|
|
5979
|
-
} catch {
|
|
5980
|
-
return null;
|
|
5981
|
-
}
|
|
5982
|
-
})();
|
|
5983
5624
|
if (reqUrl === context.source) {
|
|
5984
5625
|
return route.fulfill({
|
|
5985
5626
|
status: 200,
|
|
5986
5627
|
contentType: "text/html; charset=utf-8",
|
|
5987
5628
|
body: context.content
|
|
5988
|
-
// context.content is always a string in middleware
|
|
5989
5629
|
});
|
|
5990
5630
|
}
|
|
5631
|
+
const reqOrigin = (() => {
|
|
5632
|
+
try {
|
|
5633
|
+
return new URL(reqUrl).origin;
|
|
5634
|
+
} catch {
|
|
5635
|
+
return null;
|
|
5636
|
+
}
|
|
5637
|
+
})();
|
|
5991
5638
|
const resourceType = route.request().resourceType();
|
|
5992
5639
|
if (["image", "font", "media"].includes(resourceType)) {
|
|
5993
5640
|
return route.abort();
|
|
5994
5641
|
}
|
|
5642
|
+
if (route.request().method() === "GET") {
|
|
5643
|
+
const cached = HtmlPlaywrightMiddleware.resourceCache.get(reqUrl);
|
|
5644
|
+
if (cached !== void 0) {
|
|
5645
|
+
logger.debug(`✓ Cache hit for ${resourceType}: ${reqUrl}`);
|
|
5646
|
+
return route.fulfill({
|
|
5647
|
+
status: 200,
|
|
5648
|
+
contentType: cached.contentType,
|
|
5649
|
+
body: cached.body
|
|
5650
|
+
});
|
|
5651
|
+
}
|
|
5652
|
+
const headers2 = mergePlaywrightHeaders(
|
|
5653
|
+
route.request().headers(),
|
|
5654
|
+
customHeaders,
|
|
5655
|
+
credentials ?? void 0,
|
|
5656
|
+
origin ?? void 0,
|
|
5657
|
+
reqOrigin ?? void 0
|
|
5658
|
+
);
|
|
5659
|
+
const response = await route.fetch({ headers: headers2 });
|
|
5660
|
+
const body = await response.text();
|
|
5661
|
+
if (response.status() >= 200 && response.status() < 300 && body.length > 0) {
|
|
5662
|
+
const contentSizeBytes = Buffer.byteLength(body, "utf8");
|
|
5663
|
+
if (contentSizeBytes <= FETCHER_MAX_CACHE_ITEM_SIZE_BYTES) {
|
|
5664
|
+
const contentType2 = response.headers()["content-type"] || "application/octet-stream";
|
|
5665
|
+
HtmlPlaywrightMiddleware.resourceCache.set(reqUrl, { body, contentType: contentType2 });
|
|
5666
|
+
logger.debug(
|
|
5667
|
+
`Cached ${resourceType}: ${reqUrl} (${contentSizeBytes} bytes, cache size: ${HtmlPlaywrightMiddleware.resourceCache.size})`
|
|
5668
|
+
);
|
|
5669
|
+
} else {
|
|
5670
|
+
logger.debug(
|
|
5671
|
+
`Resource too large to cache: ${reqUrl} (${contentSizeBytes} bytes > ${FETCHER_MAX_CACHE_ITEM_SIZE_BYTES} bytes limit)`
|
|
5672
|
+
);
|
|
5673
|
+
}
|
|
5674
|
+
}
|
|
5675
|
+
return route.fulfill({ response });
|
|
5676
|
+
}
|
|
5995
5677
|
const headers = mergePlaywrightHeaders(
|
|
5996
5678
|
route.request().headers(),
|
|
5997
5679
|
customHeaders,
|
|
@@ -6172,6 +5854,8 @@ class HtmlSanitizerMiddleware {
|
|
|
6172
5854
|
return;
|
|
6173
5855
|
}
|
|
6174
5856
|
try {
|
|
5857
|
+
const bodyBeforeSanitization = $("body").html() || "";
|
|
5858
|
+
const textLengthBefore = $("body").text().trim().length;
|
|
6175
5859
|
const selectorsToRemove = [
|
|
6176
5860
|
...context.options.excludeSelectors || [],
|
|
6177
5861
|
// Use options from the context
|
|
@@ -6184,9 +5868,13 @@ class HtmlSanitizerMiddleware {
|
|
|
6184
5868
|
for (const selector of selectorsToRemove) {
|
|
6185
5869
|
try {
|
|
6186
5870
|
const elements = $(selector);
|
|
6187
|
-
const
|
|
5871
|
+
const filteredElements = elements.filter(function() {
|
|
5872
|
+
const tagName = $(this).prop("tagName")?.toLowerCase();
|
|
5873
|
+
return tagName !== "html" && tagName !== "body";
|
|
5874
|
+
});
|
|
5875
|
+
const count = filteredElements.length;
|
|
6188
5876
|
if (count > 0) {
|
|
6189
|
-
|
|
5877
|
+
filteredElements.remove();
|
|
6190
5878
|
removedCount += count;
|
|
6191
5879
|
}
|
|
6192
5880
|
} catch (selectorError) {
|
|
@@ -6199,6 +5887,13 @@ class HtmlSanitizerMiddleware {
|
|
|
6199
5887
|
}
|
|
6200
5888
|
}
|
|
6201
5889
|
logger.debug(`Removed ${removedCount} elements for ${context.source}`);
|
|
5890
|
+
const textLengthAfter = $("body").text().trim().length;
|
|
5891
|
+
if (textLengthBefore > 0 && textLengthAfter === 0) {
|
|
5892
|
+
logger.warn(
|
|
5893
|
+
`⚠️ Sanitization removed all content from ${context.source}. Reverting to pre-sanitization state.`
|
|
5894
|
+
);
|
|
5895
|
+
$("body").html(bodyBeforeSanitization);
|
|
5896
|
+
}
|
|
6202
5897
|
} catch (error) {
|
|
6203
5898
|
logger.error(
|
|
6204
5899
|
`❌ Error during HTML element removal for ${context.source}: ${error}`
|
|
@@ -6349,6 +6044,29 @@ class MarkdownMetadataExtractorMiddleware {
|
|
|
6349
6044
|
}
|
|
6350
6045
|
}
|
|
6351
6046
|
class HtmlNormalizationMiddleware {
|
|
6047
|
+
// Known tracking/analytics domains and patterns to filter out
|
|
6048
|
+
trackingPatterns = [
|
|
6049
|
+
"adroll.com",
|
|
6050
|
+
"doubleclick.net",
|
|
6051
|
+
"google-analytics.com",
|
|
6052
|
+
"googletagmanager.com",
|
|
6053
|
+
"analytics.twitter.com",
|
|
6054
|
+
"twitter.com/1/i/adsct",
|
|
6055
|
+
"t.co/1/i/adsct",
|
|
6056
|
+
"bat.bing.com",
|
|
6057
|
+
"pixel.rubiconproject.com",
|
|
6058
|
+
"casalemedia.com",
|
|
6059
|
+
"tremorhub.com",
|
|
6060
|
+
"rlcdn.com",
|
|
6061
|
+
"facebook.com/tr",
|
|
6062
|
+
"linkedin.com/px",
|
|
6063
|
+
"quantserve.com",
|
|
6064
|
+
"scorecardresearch.com",
|
|
6065
|
+
"hotjar.com",
|
|
6066
|
+
"mouseflow.com",
|
|
6067
|
+
"crazyegg.com",
|
|
6068
|
+
"clarity.ms"
|
|
6069
|
+
];
|
|
6352
6070
|
async process(context, next) {
|
|
6353
6071
|
if (!context.dom) {
|
|
6354
6072
|
logger.debug(
|
|
@@ -6372,14 +6090,34 @@ class HtmlNormalizationMiddleware {
|
|
|
6372
6090
|
}
|
|
6373
6091
|
await next();
|
|
6374
6092
|
}
|
|
6093
|
+
/**
|
|
6094
|
+
* Checks if an image should be kept based on its source URL.
|
|
6095
|
+
* Filters out tracking pixels and analytics beacons.
|
|
6096
|
+
*/
|
|
6097
|
+
shouldKeepImage(src) {
|
|
6098
|
+
const srcLower = src.toLowerCase();
|
|
6099
|
+
return !this.trackingPatterns.some((pattern) => srcLower.includes(pattern));
|
|
6100
|
+
}
|
|
6375
6101
|
/**
|
|
6376
6102
|
* Normalizes image URLs by converting relative URLs to absolute URLs.
|
|
6103
|
+
* Removes tracking/analytics images.
|
|
6104
|
+
* Preserves data URIs (inline images).
|
|
6377
6105
|
*/
|
|
6378
6106
|
normalizeImageUrls($, baseUrl) {
|
|
6379
6107
|
$("img").each((_index, element) => {
|
|
6380
6108
|
const $img = $(element);
|
|
6381
6109
|
const src = $img.attr("src");
|
|
6382
|
-
if (!src)
|
|
6110
|
+
if (!src) {
|
|
6111
|
+
$img.remove();
|
|
6112
|
+
return;
|
|
6113
|
+
}
|
|
6114
|
+
if (src.startsWith("data:")) {
|
|
6115
|
+
return;
|
|
6116
|
+
}
|
|
6117
|
+
if (!this.shouldKeepImage(src)) {
|
|
6118
|
+
$img.remove();
|
|
6119
|
+
return;
|
|
6120
|
+
}
|
|
6383
6121
|
try {
|
|
6384
6122
|
new URL(src);
|
|
6385
6123
|
} catch {
|
|
@@ -6388,6 +6126,7 @@ class HtmlNormalizationMiddleware {
|
|
|
6388
6126
|
$img.attr("src", absoluteUrl);
|
|
6389
6127
|
} catch (error) {
|
|
6390
6128
|
logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
|
|
6129
|
+
$img.remove();
|
|
6391
6130
|
}
|
|
6392
6131
|
}
|
|
6393
6132
|
});
|
|
@@ -7260,6 +6999,9 @@ Please verify the server URL includes the correct port (default 8080) and ends w
|
|
|
7260
6999
|
async storeScraperOptions(versionId, options) {
|
|
7261
7000
|
await this.client.storeScraperOptions.mutate({ versionId, options });
|
|
7262
7001
|
}
|
|
7002
|
+
getActiveEmbeddingConfig() {
|
|
7003
|
+
return null;
|
|
7004
|
+
}
|
|
7263
7005
|
}
|
|
7264
7006
|
class JsonPipeline extends BasePipeline {
|
|
7265
7007
|
middleware;
|
|
@@ -8058,7 +7800,7 @@ async function applyMigrations(db) {
|
|
|
8058
7800
|
db.pragma("temp_store = MEMORY");
|
|
8059
7801
|
logger.debug("Applied performance optimizations for migration");
|
|
8060
7802
|
} catch (_error) {
|
|
8061
|
-
logger.warn("⚠️
|
|
7803
|
+
logger.warn("⚠️ Could not apply all performance optimizations for migration");
|
|
8062
7804
|
}
|
|
8063
7805
|
const overallTransaction = db.transaction(() => {
|
|
8064
7806
|
logger.debug("Checking database migrations...");
|
|
@@ -8111,7 +7853,7 @@ async function applyMigrations(db) {
|
|
|
8111
7853
|
db.exec("VACUUM");
|
|
8112
7854
|
logger.debug("Database vacuum completed successfully");
|
|
8113
7855
|
} catch (error) {
|
|
8114
|
-
logger.warn(`⚠️
|
|
7856
|
+
logger.warn(`⚠️ Could not vacuum database after migrations: ${error}`);
|
|
8115
7857
|
}
|
|
8116
7858
|
} else {
|
|
8117
7859
|
logger.debug("Skipping VACUUM - no migrations were applied");
|
|
@@ -8137,17 +7879,321 @@ async function applyMigrations(db) {
|
|
|
8137
7879
|
}
|
|
8138
7880
|
}
|
|
8139
7881
|
}
|
|
8140
|
-
try {
|
|
8141
|
-
db.pragma("journal_mode = WAL");
|
|
8142
|
-
db.pragma("wal_autocheckpoint = 1000");
|
|
8143
|
-
db.pragma("busy_timeout = 30000");
|
|
8144
|
-
db.pragma("foreign_keys = ON");
|
|
8145
|
-
db.pragma("synchronous = NORMAL");
|
|
8146
|
-
logger.debug(
|
|
8147
|
-
"Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
|
|
8148
|
-
);
|
|
8149
|
-
} catch (_error) {
|
|
8150
|
-
logger.warn("⚠️
|
|
7882
|
+
try {
|
|
7883
|
+
db.pragma("journal_mode = WAL");
|
|
7884
|
+
db.pragma("wal_autocheckpoint = 1000");
|
|
7885
|
+
db.pragma("busy_timeout = 30000");
|
|
7886
|
+
db.pragma("foreign_keys = ON");
|
|
7887
|
+
db.pragma("synchronous = NORMAL");
|
|
7888
|
+
logger.debug(
|
|
7889
|
+
"Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
|
|
7890
|
+
);
|
|
7891
|
+
} catch (_error) {
|
|
7892
|
+
logger.warn("⚠️ Could not apply all production database settings");
|
|
7893
|
+
}
|
|
7894
|
+
}
|
|
7895
|
+
class EmbeddingConfig {
|
|
7896
|
+
static instance = null;
|
|
7897
|
+
/**
|
|
7898
|
+
* Get the singleton instance of EmbeddingConfig.
|
|
7899
|
+
* Creates the instance if it doesn't exist.
|
|
7900
|
+
*/
|
|
7901
|
+
static getInstance() {
|
|
7902
|
+
if (EmbeddingConfig.instance === null) {
|
|
7903
|
+
EmbeddingConfig.instance = new EmbeddingConfig();
|
|
7904
|
+
}
|
|
7905
|
+
return EmbeddingConfig.instance;
|
|
7906
|
+
}
|
|
7907
|
+
/**
|
|
7908
|
+
* Reset the singleton instance (useful for testing).
|
|
7909
|
+
*/
|
|
7910
|
+
static resetInstance() {
|
|
7911
|
+
EmbeddingConfig.instance = null;
|
|
7912
|
+
}
|
|
7913
|
+
/**
|
|
7914
|
+
* Known dimensions for common embedding models.
|
|
7915
|
+
* This avoids expensive API calls for dimension detection in telemetry.
|
|
7916
|
+
*
|
|
7917
|
+
* Note: The "openai" provider also supports OpenAI-compatible APIs like:
|
|
7918
|
+
* - Ollama (local models)
|
|
7919
|
+
* - LMStudio (local models)
|
|
7920
|
+
* - Any service implementing OpenAI's embedding API
|
|
7921
|
+
*/
|
|
7922
|
+
knownModelDimensions = {
|
|
7923
|
+
// OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
|
|
7924
|
+
"text-embedding-3-small": 1536,
|
|
7925
|
+
"text-embedding-3-large": 3072,
|
|
7926
|
+
"text-embedding-ada-002": 1536,
|
|
7927
|
+
// Google Vertex AI models
|
|
7928
|
+
"text-embedding-004": 768,
|
|
7929
|
+
"textembedding-gecko@003": 768,
|
|
7930
|
+
"textembedding-gecko@002": 768,
|
|
7931
|
+
"textembedding-gecko@001": 768,
|
|
7932
|
+
// Google Gemini models (with MRL support)
|
|
7933
|
+
"text-embedding-preview-0409": 768,
|
|
7934
|
+
"embedding-001": 768,
|
|
7935
|
+
// AWS Bedrock models
|
|
7936
|
+
// Amazon Titan models
|
|
7937
|
+
"amazon.titan-embed-text-v1": 1536,
|
|
7938
|
+
"amazon.titan-embed-text-v2:0": 1024,
|
|
7939
|
+
"amazon.titan-embed-image-v1": 1024,
|
|
7940
|
+
// Image embedding model
|
|
7941
|
+
// Cohere models
|
|
7942
|
+
"cohere.embed-english-v3": 1024,
|
|
7943
|
+
"cohere.embed-multilingual-v3": 1024,
|
|
7944
|
+
// SageMaker models (hosted on AWS SageMaker)
|
|
7945
|
+
"intfloat/multilingual-e5-large": 1024,
|
|
7946
|
+
// Additional AWS models that might be supported
|
|
7947
|
+
// Note: Some of these might be placeholders - verify dimensions before use
|
|
7948
|
+
// "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
|
|
7949
|
+
// MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
|
|
7950
|
+
// Top performing models from Massive Text Embedding Benchmark
|
|
7951
|
+
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
|
7952
|
+
"gemini-embedding-001": 3072,
|
|
7953
|
+
"Qwen/Qwen3-Embedding-8B": 4096,
|
|
7954
|
+
"Qwen/Qwen3-Embedding-4B": 2560,
|
|
7955
|
+
"Qwen/Qwen3-Embedding-0.6B": 1024,
|
|
7956
|
+
"Linq-AI-Research/Linq-Embed-Mistral": 4096,
|
|
7957
|
+
"Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
|
|
7958
|
+
"intfloat/multilingual-e5-large-instruct": 1024,
|
|
7959
|
+
"Salesforce/SFR-Embedding-Mistral": 4096,
|
|
7960
|
+
"text-multilingual-embedding-002": 768,
|
|
7961
|
+
"GritLM/GritLM-7B": 4096,
|
|
7962
|
+
"GritLM/GritLM-8x7B": 4096,
|
|
7963
|
+
"intfloat/e5-mistral-7b-instruct": 4096,
|
|
7964
|
+
"Cohere/Cohere-embed-multilingual-v3.0": 1024,
|
|
7965
|
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
|
|
7966
|
+
"Lajavaness/bilingual-embedding-large": 1024,
|
|
7967
|
+
"Salesforce/SFR-Embedding-2_R": 4096,
|
|
7968
|
+
"NovaSearch/stella_en_1.5B_v5": 8960,
|
|
7969
|
+
"NovaSearch/jasper_en_vision_language_v1": 8960,
|
|
7970
|
+
"nvidia/NV-Embed-v2": 4096,
|
|
7971
|
+
"OrdalieTech/Solon-embeddings-large-0.1": 1024,
|
|
7972
|
+
"BAAI/bge-m3": 1024,
|
|
7973
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
|
|
7974
|
+
"jinaai/jina-embeddings-v3": 1024,
|
|
7975
|
+
"Alibaba-NLP/gte-multilingual-base": 768,
|
|
7976
|
+
"Lajavaness/bilingual-embedding-base": 768,
|
|
7977
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
|
|
7978
|
+
"nvidia/NV-Embed-v1": 4096,
|
|
7979
|
+
"Cohere/Cohere-embed-multilingual-light-v3.0": 384,
|
|
7980
|
+
"manu/bge-m3-custom-fr": 1024,
|
|
7981
|
+
"Lajavaness/bilingual-embedding-small": 384,
|
|
7982
|
+
"Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
|
|
7983
|
+
"intfloat/multilingual-e5-base": 768,
|
|
7984
|
+
"voyage-3-lite": 512,
|
|
7985
|
+
"voyage-3": 1024,
|
|
7986
|
+
"intfloat/multilingual-e5-small": 384,
|
|
7987
|
+
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
|
|
7988
|
+
"Snowflake/snowflake-arctic-embed-m-v2.0": 768,
|
|
7989
|
+
"deepvk/USER-bge-m3": 1024,
|
|
7990
|
+
"Cohere/Cohere-embed-english-v3.0": 1024,
|
|
7991
|
+
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
|
|
7992
|
+
"ibm-granite/granite-embedding-278m-multilingual": 768,
|
|
7993
|
+
"NovaSearch/stella_en_400M_v5": 4096,
|
|
7994
|
+
"omarelshehy/arabic-english-sts-matryoshka": 1024,
|
|
7995
|
+
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
|
|
7996
|
+
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
|
|
7997
|
+
"Haon-Chen/speed-embedding-7b-instruct": 4096,
|
|
7998
|
+
"sentence-transformers/LaBSE": 768,
|
|
7999
|
+
"WhereIsAI/UAE-Large-V1": 1024,
|
|
8000
|
+
"ibm-granite/granite-embedding-107m-multilingual": 384,
|
|
8001
|
+
"mixedbread-ai/mxbai-embed-large-v1": 1024,
|
|
8002
|
+
"intfloat/e5-large-v2": 1024,
|
|
8003
|
+
"avsolatorio/GIST-large-Embedding-v0": 1024,
|
|
8004
|
+
"sdadas/mmlw-e5-large": 1024,
|
|
8005
|
+
"nomic-ai/nomic-embed-text-v1": 768,
|
|
8006
|
+
"nomic-ai/nomic-embed-text-v1-ablated": 768,
|
|
8007
|
+
"intfloat/e5-base-v2": 768,
|
|
8008
|
+
"BAAI/bge-large-en-v1.5": 1024,
|
|
8009
|
+
"intfloat/e5-large": 1024,
|
|
8010
|
+
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
|
|
8011
|
+
"Cohere/Cohere-embed-english-light-v3.0": 384,
|
|
8012
|
+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
|
|
8013
|
+
"Gameselo/STS-multilingual-mpnet-base-v2": 768,
|
|
8014
|
+
"thenlper/gte-large": 1024,
|
|
8015
|
+
"avsolatorio/GIST-Embedding-v0": 768,
|
|
8016
|
+
"nomic-ai/nomic-embed-text-v1-unsupervised": 768,
|
|
8017
|
+
"infgrad/stella-base-en-v2": 768,
|
|
8018
|
+
"avsolatorio/NoInstruct-small-Embedding-v0": 384,
|
|
8019
|
+
"dwzhu/e5-base-4k": 768,
|
|
8020
|
+
"sdadas/mmlw-e5-base": 768,
|
|
8021
|
+
"voyage-multilingual-2": 1024,
|
|
8022
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
|
|
8023
|
+
"BAAI/bge-base-en-v1.5": 768,
|
|
8024
|
+
"avsolatorio/GIST-small-Embedding-v0": 384,
|
|
8025
|
+
"sdadas/mmlw-roberta-large": 1024,
|
|
8026
|
+
"nomic-ai/nomic-embed-text-v1.5": 768,
|
|
8027
|
+
"minishlab/potion-multilingual-128M": 256,
|
|
8028
|
+
"shibing624/text2vec-base-multilingual": 384,
|
|
8029
|
+
"thenlper/gte-base": 768,
|
|
8030
|
+
"intfloat/e5-small-v2": 384,
|
|
8031
|
+
"intfloat/e5-base": 768,
|
|
8032
|
+
"sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
|
|
8033
|
+
"manu/sentence_croissant_alpha_v0.3": 2048,
|
|
8034
|
+
"BAAI/bge-small-en-v1.5": 512,
|
|
8035
|
+
"thenlper/gte-small": 384,
|
|
8036
|
+
"sdadas/mmlw-e5-small": 384,
|
|
8037
|
+
"manu/sentence_croissant_alpha_v0.4": 2048,
|
|
8038
|
+
"manu/sentence_croissant_alpha_v0.2": 2048,
|
|
8039
|
+
"abhinand/MedEmbed-small-v0.1": 384,
|
|
8040
|
+
"ibm-granite/granite-embedding-125m-english": 768,
|
|
8041
|
+
"intfloat/e5-small": 384,
|
|
8042
|
+
"voyage-large-2-instruct": 1024,
|
|
8043
|
+
"sdadas/mmlw-roberta-base": 768,
|
|
8044
|
+
"Snowflake/snowflake-arctic-embed-l": 1024,
|
|
8045
|
+
"Mihaiii/Ivysaur": 384,
|
|
8046
|
+
"Snowflake/snowflake-arctic-embed-m-long": 768,
|
|
8047
|
+
"bigscience/sgpt-bloom-7b1-msmarco": 4096,
|
|
8048
|
+
"avsolatorio/GIST-all-MiniLM-L6-v2": 384,
|
|
8049
|
+
"sergeyzh/LaBSE-ru-turbo": 768,
|
|
8050
|
+
"sentence-transformers/all-mpnet-base-v2": 768,
|
|
8051
|
+
"Snowflake/snowflake-arctic-embed-m": 768,
|
|
8052
|
+
"Snowflake/snowflake-arctic-embed-s": 384,
|
|
8053
|
+
"sentence-transformers/all-MiniLM-L12-v2": 384,
|
|
8054
|
+
"Mihaiii/gte-micro-v4": 384,
|
|
8055
|
+
"Snowflake/snowflake-arctic-embed-m-v1.5": 768,
|
|
8056
|
+
"cointegrated/LaBSE-en-ru": 768,
|
|
8057
|
+
"Mihaiii/Bulbasaur": 384,
|
|
8058
|
+
"ibm-granite/granite-embedding-30m-english": 384,
|
|
8059
|
+
"deepfile/embedder-100p": 768,
|
|
8060
|
+
"Jaume/gemma-2b-embeddings": 2048,
|
|
8061
|
+
"OrlikB/KartonBERT-USE-base-v1": 768,
|
|
8062
|
+
"izhx/udever-bloom-7b1": 4096,
|
|
8063
|
+
"izhx/udever-bloom-1b1": 1024,
|
|
8064
|
+
"brahmairesearch/slx-v0.1": 384,
|
|
8065
|
+
"Mihaiii/Wartortle": 384,
|
|
8066
|
+
"izhx/udever-bloom-3b": 2048,
|
|
8067
|
+
"deepvk/USER-base": 768,
|
|
8068
|
+
"ai-forever/ru-en-RoSBERTa": 1024,
|
|
8069
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
|
|
8070
|
+
"Mihaiii/Venusaur": 384,
|
|
8071
|
+
"Snowflake/snowflake-arctic-embed-xs": 384,
|
|
8072
|
+
"jinaai/jina-embedding-b-en-v1": 768,
|
|
8073
|
+
"Mihaiii/gte-micro": 384,
|
|
8074
|
+
"aari1995/German_Semantic_STS_V2": 1024,
|
|
8075
|
+
"Mihaiii/Squirtle": 384,
|
|
8076
|
+
"OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
|
|
8077
|
+
"sergeyzh/rubert-tiny-turbo": 312,
|
|
8078
|
+
"minishlab/potion-base-8M": 256,
|
|
8079
|
+
"minishlab/M2V_base_glove_subword": 256,
|
|
8080
|
+
"jinaai/jina-embedding-s-en-v1": 512,
|
|
8081
|
+
"minishlab/potion-base-4M": 128,
|
|
8082
|
+
"minishlab/M2V_base_output": 256,
|
|
8083
|
+
"DeepPavlov/rubert-base-cased-sentence": 768,
|
|
8084
|
+
"jinaai/jina-embeddings-v2-small-en": 512,
|
|
8085
|
+
"cointegrated/rubert-tiny2": 312,
|
|
8086
|
+
"minishlab/M2V_base_glove": 256,
|
|
8087
|
+
"cointegrated/rubert-tiny": 312,
|
|
8088
|
+
"silma-ai/silma-embeddding-matryoshka-v0.1": 768,
|
|
8089
|
+
"DeepPavlov/rubert-base-cased": 768,
|
|
8090
|
+
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
|
|
8091
|
+
"izhx/udever-bloom-560m": 1024,
|
|
8092
|
+
"minishlab/potion-base-2M": 64,
|
|
8093
|
+
"DeepPavlov/distilrubert-small-cased-conversational": 768,
|
|
8094
|
+
"consciousAI/cai-lunaris-text-embeddings": 1024,
|
|
8095
|
+
"deepvk/deberta-v1-base": 768,
|
|
8096
|
+
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
|
|
8097
|
+
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
|
|
8098
|
+
"ai-forever/sbert_large_mt_nlu_ru": 1024,
|
|
8099
|
+
"ai-forever/sbert_large_nlu_ru": 1024,
|
|
8100
|
+
"malenia1/ternary-weight-embedding": 1024,
|
|
8101
|
+
"jinaai/jina-embeddings-v2-base-en": 768,
|
|
8102
|
+
"VPLabs/SearchMap_Preview": 4096,
|
|
8103
|
+
"Hum-Works/lodestone-base-4096-v1": 768,
|
|
8104
|
+
"jinaai/jina-embeddings-v4": 2048
|
|
8105
|
+
};
|
|
8106
|
+
/**
|
|
8107
|
+
* Lowercase lookup map for case-insensitive model dimension queries.
|
|
8108
|
+
* Built lazily from knownModelDimensions to ensure consistency.
|
|
8109
|
+
*/
|
|
8110
|
+
modelLookup;
|
|
8111
|
+
constructor() {
|
|
8112
|
+
this.modelLookup = /* @__PURE__ */ new Map();
|
|
8113
|
+
for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
|
|
8114
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
8115
|
+
}
|
|
8116
|
+
}
|
|
8117
|
+
/**
|
|
8118
|
+
* Parse embedding model configuration from a provided model specification.
|
|
8119
|
+
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
8120
|
+
*
|
|
8121
|
+
* Supports various providers:
|
|
8122
|
+
* - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
|
|
8123
|
+
* - vertex: Google Cloud Vertex AI
|
|
8124
|
+
* - gemini: Google Generative AI
|
|
8125
|
+
* - aws: AWS Bedrock models
|
|
8126
|
+
* - microsoft: Azure OpenAI
|
|
8127
|
+
* - sagemaker: AWS SageMaker hosted models
|
|
8128
|
+
*
|
|
8129
|
+
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
8130
|
+
* @returns Parsed embedding model configuration
|
|
8131
|
+
*/
|
|
8132
|
+
parse(modelSpec) {
|
|
8133
|
+
const spec = modelSpec || "text-embedding-3-small";
|
|
8134
|
+
const colonIndex = spec.indexOf(":");
|
|
8135
|
+
let provider;
|
|
8136
|
+
let model;
|
|
8137
|
+
if (colonIndex === -1) {
|
|
8138
|
+
provider = "openai";
|
|
8139
|
+
model = spec;
|
|
8140
|
+
} else {
|
|
8141
|
+
provider = spec.substring(0, colonIndex);
|
|
8142
|
+
model = spec.substring(colonIndex + 1);
|
|
8143
|
+
}
|
|
8144
|
+
const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
|
|
8145
|
+
return {
|
|
8146
|
+
provider,
|
|
8147
|
+
model,
|
|
8148
|
+
dimensions,
|
|
8149
|
+
modelSpec: spec
|
|
8150
|
+
};
|
|
8151
|
+
}
|
|
8152
|
+
/**
|
|
8153
|
+
* Get the known dimensions for a specific model.
|
|
8154
|
+
* Returns null if the model dimensions are not known.
|
|
8155
|
+
* Uses case-insensitive lookup.
|
|
8156
|
+
*
|
|
8157
|
+
* @param model The model name (e.g., "text-embedding-3-small")
|
|
8158
|
+
* @returns Known dimensions or null
|
|
8159
|
+
*/
|
|
8160
|
+
getKnownDimensions(model) {
|
|
8161
|
+
return this.modelLookup?.get(model.toLowerCase()) || null;
|
|
8162
|
+
}
|
|
8163
|
+
/**
|
|
8164
|
+
* Add or update known dimensions for a model.
|
|
8165
|
+
* This can be used to cache discovered dimensions.
|
|
8166
|
+
* Stores both original case and lowercase for consistent lookup.
|
|
8167
|
+
*
|
|
8168
|
+
* @param model The model name
|
|
8169
|
+
* @param dimensions The dimensions to cache
|
|
8170
|
+
*/
|
|
8171
|
+
setKnownDimensions(model, dimensions) {
|
|
8172
|
+
this.knownModelDimensions[model] = dimensions;
|
|
8173
|
+
if (this.modelLookup) {
|
|
8174
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
8175
|
+
}
|
|
8176
|
+
}
|
|
8177
|
+
/**
|
|
8178
|
+
* Static method to parse embedding model configuration using the singleton instance.
|
|
8179
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8180
|
+
*/
|
|
8181
|
+
static parseEmbeddingConfig(modelSpec) {
|
|
8182
|
+
return EmbeddingConfig.getInstance().parse(modelSpec);
|
|
8183
|
+
}
|
|
8184
|
+
/**
|
|
8185
|
+
* Static method to get known model dimensions using the singleton instance.
|
|
8186
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8187
|
+
*/
|
|
8188
|
+
static getKnownModelDimensions(model) {
|
|
8189
|
+
return EmbeddingConfig.getInstance().getKnownDimensions(model);
|
|
8190
|
+
}
|
|
8191
|
+
/**
|
|
8192
|
+
* Static method to set known model dimensions using the singleton instance.
|
|
8193
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8194
|
+
*/
|
|
8195
|
+
static setKnownModelDimensions(model, dimensions) {
|
|
8196
|
+
EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
|
|
8151
8197
|
}
|
|
8152
8198
|
}
|
|
8153
8199
|
class DocumentStore {
|
|
@@ -8157,6 +8203,16 @@ class DocumentStore {
|
|
|
8157
8203
|
modelDimension;
|
|
8158
8204
|
embeddingConfig;
|
|
8159
8205
|
isVectorSearchEnabled = false;
|
|
8206
|
+
/**
|
|
8207
|
+
* Returns the active embedding configuration if vector search is enabled,
|
|
8208
|
+
* or null if embeddings are disabled (no config provided or credentials unavailable).
|
|
8209
|
+
*/
|
|
8210
|
+
getActiveEmbeddingConfig() {
|
|
8211
|
+
if (!this.isVectorSearchEnabled || !this.embeddingConfig) {
|
|
8212
|
+
return null;
|
|
8213
|
+
}
|
|
8214
|
+
return this.embeddingConfig;
|
|
8215
|
+
}
|
|
8160
8216
|
statements;
|
|
8161
8217
|
/**
|
|
8162
8218
|
* Calculates Reciprocal Rank Fusion score for a result with configurable weights
|
|
@@ -8436,7 +8492,7 @@ class DocumentStore {
|
|
|
8436
8492
|
const config = this.embeddingConfig;
|
|
8437
8493
|
if (!areCredentialsAvailable(config.provider)) {
|
|
8438
8494
|
logger.warn(
|
|
8439
|
-
`⚠️
|
|
8495
|
+
`⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
|
|
8440
8496
|
Only full-text search will be available. To enable vector search, please configure the required
|
|
8441
8497
|
environment variables for ${config.provider} or choose a different provider.
|
|
8442
8498
|
See README.md for configuration options or run with --help for more details.`
|
|
@@ -8448,8 +8504,26 @@ class DocumentStore {
|
|
|
8448
8504
|
if (config.dimensions !== null) {
|
|
8449
8505
|
this.modelDimension = config.dimensions;
|
|
8450
8506
|
} else {
|
|
8451
|
-
const
|
|
8452
|
-
|
|
8507
|
+
const EMBEDDING_INIT_TIMEOUT_MS = 3e4;
|
|
8508
|
+
const testPromise = this.embeddings.embedQuery("test");
|
|
8509
|
+
let timeoutId;
|
|
8510
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
8511
|
+
timeoutId = setTimeout(() => {
|
|
8512
|
+
reject(
|
|
8513
|
+
new Error(
|
|
8514
|
+
`Embedding service connection timed out after ${EMBEDDING_INIT_TIMEOUT_MS / 1e3} seconds`
|
|
8515
|
+
)
|
|
8516
|
+
);
|
|
8517
|
+
}, EMBEDDING_INIT_TIMEOUT_MS);
|
|
8518
|
+
});
|
|
8519
|
+
try {
|
|
8520
|
+
const testVector = await Promise.race([testPromise, timeoutPromise]);
|
|
8521
|
+
this.modelDimension = testVector.length;
|
|
8522
|
+
} finally {
|
|
8523
|
+
if (timeoutId !== void 0) {
|
|
8524
|
+
clearTimeout(timeoutId);
|
|
8525
|
+
}
|
|
8526
|
+
}
|
|
8453
8527
|
EmbeddingConfig.setKnownModelDimensions(config.model, this.modelDimension);
|
|
8454
8528
|
}
|
|
8455
8529
|
if (this.modelDimension > this.dbDimension) {
|
|
@@ -8463,18 +8537,26 @@ class DocumentStore {
|
|
|
8463
8537
|
if (error instanceof Error) {
|
|
8464
8538
|
if (error.message.includes("does not exist") || error.message.includes("MODEL_NOT_FOUND")) {
|
|
8465
8539
|
throw new ModelConfigurationError(
|
|
8466
|
-
|
|
8540
|
+
`Invalid embedding model: ${config.model}
|
|
8467
8541
|
The model "${config.model}" is not available or you don't have access to it.
|
|
8468
8542
|
See README.md for supported models or run with --help for more details.`
|
|
8469
8543
|
);
|
|
8470
8544
|
}
|
|
8471
8545
|
if (error.message.includes("API key") || error.message.includes("401") || error.message.includes("authentication")) {
|
|
8472
8546
|
throw new ModelConfigurationError(
|
|
8473
|
-
|
|
8547
|
+
`Authentication failed for ${config.provider} embedding provider
|
|
8474
8548
|
Please check your API key configuration.
|
|
8475
8549
|
See README.md for configuration options or run with --help for more details.`
|
|
8476
8550
|
);
|
|
8477
8551
|
}
|
|
8552
|
+
if (error.message.includes("timed out") || error.message.includes("ECONNREFUSED") || error.message.includes("ENOTFOUND") || error.message.includes("ETIMEDOUT") || error.message.includes("ECONNRESET") || error.message.includes("network") || error.message.includes("fetch failed")) {
|
|
8553
|
+
throw new ModelConfigurationError(
|
|
8554
|
+
`Failed to connect to ${config.provider} embedding service
|
|
8555
|
+
${error.message}
|
|
8556
|
+
Please check that the embedding service is running and accessible.
|
|
8557
|
+
If using a local model (e.g., Ollama), ensure the service is started.`
|
|
8558
|
+
);
|
|
8559
|
+
}
|
|
8478
8560
|
}
|
|
8479
8561
|
throw error;
|
|
8480
8562
|
}
|
|
@@ -8543,8 +8625,8 @@ class DocumentStore {
|
|
|
8543
8625
|
return escapedTokens[0];
|
|
8544
8626
|
}
|
|
8545
8627
|
const exactMatch = `"${tokens.join(" ").replace(/"/g, '""')}"`;
|
|
8546
|
-
const termsQuery = escapedTokens.join(" ");
|
|
8547
|
-
return `${exactMatch} OR
|
|
8628
|
+
const termsQuery = escapedTokens.join(" OR ");
|
|
8629
|
+
return `${exactMatch} OR ${termsQuery}`;
|
|
8548
8630
|
}
|
|
8549
8631
|
/**
|
|
8550
8632
|
* Initializes database connection and ensures readiness
|
|
@@ -8672,6 +8754,35 @@ class DocumentStore {
|
|
|
8672
8754
|
throw new StoreError(`Failed to get library by ID: ${error}`);
|
|
8673
8755
|
}
|
|
8674
8756
|
}
|
|
8757
|
+
/**
|
|
8758
|
+
* Retrieves a library by its name.
|
|
8759
|
+
* @param name The library name to retrieve
|
|
8760
|
+
* @returns The library record, or null if not found
|
|
8761
|
+
*/
|
|
8762
|
+
async getLibrary(name) {
|
|
8763
|
+
try {
|
|
8764
|
+
const normalizedName = name.toLowerCase();
|
|
8765
|
+
const row = this.statements.getLibraryIdByName.get(normalizedName);
|
|
8766
|
+
if (!row) {
|
|
8767
|
+
return null;
|
|
8768
|
+
}
|
|
8769
|
+
return { id: row.id, name: normalizedName };
|
|
8770
|
+
} catch (error) {
|
|
8771
|
+
throw new StoreError(`Failed to get library by name: ${error}`);
|
|
8772
|
+
}
|
|
8773
|
+
}
|
|
8774
|
+
/**
|
|
8775
|
+
* Deletes a library by its ID.
|
|
8776
|
+
* This should only be called when the library has no remaining versions.
|
|
8777
|
+
* @param libraryId The library ID to delete
|
|
8778
|
+
*/
|
|
8779
|
+
async deleteLibrary(libraryId) {
|
|
8780
|
+
try {
|
|
8781
|
+
this.statements.deleteLibraryById.run(libraryId);
|
|
8782
|
+
} catch (error) {
|
|
8783
|
+
throw new StoreError(`Failed to delete library: ${error}`);
|
|
8784
|
+
}
|
|
8785
|
+
}
|
|
8675
8786
|
/**
|
|
8676
8787
|
* Stores scraper options for a version to enable reproducible indexing.
|
|
8677
8788
|
* @param versionId The version ID to update
|
|
@@ -8709,7 +8820,7 @@ class DocumentStore {
|
|
|
8709
8820
|
try {
|
|
8710
8821
|
parsed = JSON.parse(row.scraper_options);
|
|
8711
8822
|
} catch (e) {
|
|
8712
|
-
logger.warn(`⚠️
|
|
8823
|
+
logger.warn(`⚠️ Invalid scraper_options JSON for version ${versionId}: ${e}`);
|
|
8713
8824
|
parsed = {};
|
|
8714
8825
|
}
|
|
8715
8826
|
}
|
|
@@ -9428,13 +9539,6 @@ class DocumentManagementService {
|
|
|
9428
9539
|
documentRetriever;
|
|
9429
9540
|
pipelines;
|
|
9430
9541
|
eventBus;
|
|
9431
|
-
/**
|
|
9432
|
-
* Normalizes a version string, converting null or undefined to an empty string
|
|
9433
|
-
* and converting to lowercase.
|
|
9434
|
-
*/
|
|
9435
|
-
normalizeVersion(version) {
|
|
9436
|
-
return (version ?? "").toLowerCase();
|
|
9437
|
-
}
|
|
9438
9542
|
constructor(storePath, eventBus, embeddingConfig, pipelineConfig) {
|
|
9439
9543
|
this.eventBus = eventBus;
|
|
9440
9544
|
const dbPath = storePath === ":memory:" ? ":memory:" : path.join(storePath, "documents.db");
|
|
@@ -9443,6 +9547,20 @@ class DocumentManagementService {
|
|
|
9443
9547
|
this.documentRetriever = new DocumentRetrieverService(this.store);
|
|
9444
9548
|
this.pipelines = PipelineFactory$1.createStandardPipelines(pipelineConfig);
|
|
9445
9549
|
}
|
|
9550
|
+
/**
|
|
9551
|
+
* Returns the active embedding configuration if vector search is enabled,
|
|
9552
|
+
* or null if embeddings are disabled.
|
|
9553
|
+
*/
|
|
9554
|
+
getActiveEmbeddingConfig() {
|
|
9555
|
+
return this.store.getActiveEmbeddingConfig();
|
|
9556
|
+
}
|
|
9557
|
+
/**
|
|
9558
|
+
* Normalizes a version string, converting null or undefined to an empty string
|
|
9559
|
+
* and converting to lowercase.
|
|
9560
|
+
*/
|
|
9561
|
+
normalizeVersion(version) {
|
|
9562
|
+
return (version ?? "").toLowerCase();
|
|
9563
|
+
}
|
|
9446
9564
|
/**
|
|
9447
9565
|
* Initializes the underlying document store.
|
|
9448
9566
|
*/
|
|
@@ -9533,30 +9651,26 @@ class DocumentManagementService {
|
|
|
9533
9651
|
return this.store.findVersionsBySourceUrl(url);
|
|
9534
9652
|
}
|
|
9535
9653
|
/**
|
|
9536
|
-
* Validates if a library exists in the store
|
|
9654
|
+
* Validates if a library exists in the store.
|
|
9655
|
+
* Checks if the library record exists in the database, regardless of whether it has versions or documents.
|
|
9537
9656
|
* Throws LibraryNotFoundInStoreError with suggestions if the library is not found.
|
|
9538
9657
|
* @param library The name of the library to validate.
|
|
9539
9658
|
* @throws {LibraryNotFoundInStoreError} If the library does not exist.
|
|
9540
9659
|
*/
|
|
9541
9660
|
async validateLibraryExists(library) {
|
|
9542
9661
|
logger.info(`🔎 Validating existence of library: ${library}`);
|
|
9543
|
-
const
|
|
9544
|
-
|
|
9545
|
-
const hasUnversioned = await this.exists(normalizedLibrary, "");
|
|
9546
|
-
if (versions.length === 0 && !hasUnversioned) {
|
|
9662
|
+
const libraryRecord = await this.store.getLibrary(library);
|
|
9663
|
+
if (!libraryRecord) {
|
|
9547
9664
|
logger.warn(`⚠️ Library '${library}' not found.`);
|
|
9548
9665
|
const allLibraries = await this.listLibraries();
|
|
9549
9666
|
const libraryNames = allLibraries.map((lib) => lib.library);
|
|
9550
9667
|
let suggestions = [];
|
|
9551
9668
|
if (libraryNames.length > 0) {
|
|
9552
9669
|
const fuse = new Fuse(libraryNames, {
|
|
9553
|
-
// Configure fuse.js options if needed (e.g., threshold)
|
|
9554
|
-
// isCaseSensitive: false, // Handled by normalizing library names
|
|
9555
|
-
// includeScore: true,
|
|
9556
9670
|
threshold: 0.7
|
|
9557
9671
|
// Adjust threshold for desired fuzziness (0=exact, 1=match anything)
|
|
9558
9672
|
});
|
|
9559
|
-
const results = fuse.search(
|
|
9673
|
+
const results = fuse.search(library.toLowerCase());
|
|
9560
9674
|
suggestions = results.slice(0, 3).map((result) => result.item);
|
|
9561
9675
|
logger.info(`🔍 Found suggestions: ${suggestions.join(", ")}`);
|
|
9562
9676
|
}
|
|
@@ -9672,6 +9786,7 @@ class DocumentManagementService {
|
|
|
9672
9786
|
/**
|
|
9673
9787
|
* Completely removes a library version and all associated documents.
|
|
9674
9788
|
* Also removes the library if no other versions remain.
|
|
9789
|
+
* If the specified version doesn't exist but the library exists with no versions, removes the library.
|
|
9675
9790
|
* @param library Library name
|
|
9676
9791
|
* @param version Version string (null/undefined for unversioned)
|
|
9677
9792
|
*/
|
|
@@ -9686,8 +9801,17 @@ class DocumentManagementService {
|
|
|
9686
9801
|
logger.info(`🗑️ Removed version ${library}@${normalizedVersion || "[no version]"}`);
|
|
9687
9802
|
} else {
|
|
9688
9803
|
logger.warn(
|
|
9689
|
-
`⚠️
|
|
9804
|
+
`⚠️ Version ${library}@${normalizedVersion || "[no version]"} not found`
|
|
9690
9805
|
);
|
|
9806
|
+
const libraryRecord = await this.store.getLibrary(library);
|
|
9807
|
+
if (libraryRecord) {
|
|
9808
|
+
const versions = await this.store.queryUniqueVersions(library);
|
|
9809
|
+
if (versions.length === 0) {
|
|
9810
|
+
logger.info(`🗑️ Library ${library} has no versions, removing library record`);
|
|
9811
|
+
await this.store.deleteLibrary(libraryRecord.id);
|
|
9812
|
+
logger.info(`🗑️ Completely removed library ${library} (had no versions)`);
|
|
9813
|
+
}
|
|
9814
|
+
}
|
|
9691
9815
|
}
|
|
9692
9816
|
this.eventBus.emit(EventType.LIBRARY_CHANGE, void 0);
|
|
9693
9817
|
}
|
|
@@ -10365,7 +10489,7 @@ function registerEventsRoute(server, eventBus) {
|
|
|
10365
10489
|
// Disable buffering in nginx
|
|
10366
10490
|
});
|
|
10367
10491
|
reply.raw.write("data: connected\n\n");
|
|
10368
|
-
logger.
|
|
10492
|
+
logger.debug("SSE client connected");
|
|
10369
10493
|
const allEventTypes = [
|
|
10370
10494
|
EventType.JOB_STATUS_CHANGE,
|
|
10371
10495
|
EventType.JOB_PROGRESS,
|
|
@@ -10403,24 +10527,146 @@ function registerEventsRoute(server, eventBus) {
|
|
|
10403
10527
|
}
|
|
10404
10528
|
}, 3e4);
|
|
10405
10529
|
request.raw.on("close", () => {
|
|
10406
|
-
logger.
|
|
10530
|
+
logger.debug("SSE client disconnected");
|
|
10407
10531
|
cleanup();
|
|
10408
10532
|
clearInterval(heartbeatInterval);
|
|
10409
10533
|
});
|
|
10410
10534
|
request.raw.on("error", (error) => {
|
|
10411
|
-
logger.
|
|
10535
|
+
logger.debug(`SSE connection error: ${error}`);
|
|
10412
10536
|
cleanup();
|
|
10413
10537
|
clearInterval(heartbeatInterval);
|
|
10414
10538
|
});
|
|
10415
10539
|
});
|
|
10416
10540
|
}
|
|
10541
|
+
const Toast = () => {
|
|
10542
|
+
return /* @__PURE__ */ jsx(
|
|
10543
|
+
"div",
|
|
10544
|
+
{
|
|
10545
|
+
"x-data": true,
|
|
10546
|
+
"x-show": "$store.toast.visible",
|
|
10547
|
+
"x-transition:enter": "transition ease-out duration-300",
|
|
10548
|
+
"x-transition:enter-start": "opacity-0 transform translate-y-2",
|
|
10549
|
+
"x-transition:enter-end": "opacity-100 transform translate-y-0",
|
|
10550
|
+
"x-transition:leave": "transition ease-in duration-200",
|
|
10551
|
+
"x-transition:leave-start": "opacity-100",
|
|
10552
|
+
"x-transition:leave-end": "opacity-0",
|
|
10553
|
+
class: "fixed top-5 right-5 z-50",
|
|
10554
|
+
style: "display: none;",
|
|
10555
|
+
children: /* @__PURE__ */ jsxs(
|
|
10556
|
+
"div",
|
|
10557
|
+
{
|
|
10558
|
+
class: "flex items-center w-full max-w-xs p-4 text-gray-500 bg-white rounded-lg shadow dark:text-gray-400 dark:bg-gray-800",
|
|
10559
|
+
role: "alert",
|
|
10560
|
+
children: [
|
|
10561
|
+
/* @__PURE__ */ jsxs(
|
|
10562
|
+
"div",
|
|
10563
|
+
{
|
|
10564
|
+
class: "inline-flex items-center justify-center shrink-0 w-8 h-8 rounded-lg",
|
|
10565
|
+
"x-bind:class": "{\n 'text-green-500 bg-green-100 dark:bg-green-800 dark:text-green-200': $store.toast.type === 'success',\n 'text-red-500 bg-red-100 dark:bg-red-800 dark:text-red-200': $store.toast.type === 'error',\n 'text-orange-500 bg-orange-100 dark:bg-orange-700 dark:text-orange-200': $store.toast.type === 'warning',\n 'text-blue-500 bg-blue-100 dark:bg-blue-800 dark:text-blue-200': $store.toast.type === 'info'\n }",
|
|
10566
|
+
children: [
|
|
10567
|
+
/* @__PURE__ */ jsx(
|
|
10568
|
+
"svg",
|
|
10569
|
+
{
|
|
10570
|
+
"x-show": "$store.toast.type === 'success'",
|
|
10571
|
+
class: "w-5 h-5",
|
|
10572
|
+
"aria-hidden": "true",
|
|
10573
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
10574
|
+
fill: "currentColor",
|
|
10575
|
+
viewBox: "0 0 20 20",
|
|
10576
|
+
children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 8.207-4 4a1 1 0 0 1-1.414 0l-2-2a1 1 0 0 1 1.414-1.414L9 10.586l3.293-3.293a1 1 0 0 1 1.414 1.414Z" })
|
|
10577
|
+
}
|
|
10578
|
+
),
|
|
10579
|
+
/* @__PURE__ */ jsx(
|
|
10580
|
+
"svg",
|
|
10581
|
+
{
|
|
10582
|
+
"x-show": "$store.toast.type === 'error'",
|
|
10583
|
+
class: "w-5 h-5",
|
|
10584
|
+
"aria-hidden": "true",
|
|
10585
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
10586
|
+
fill: "currentColor",
|
|
10587
|
+
viewBox: "0 0 20 20",
|
|
10588
|
+
children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 11.793a1 1 0 1 1-1.414 1.414L10 11.414l-2.293 2.293a1 1 0 0 1-1.414-1.414L8.586 10 6.293 7.707a1 1 0 0 1 1.414-1.414L10 8.586l2.293-2.293a1 1 0 0 1 1.414 1.414L11.414 10l2.293 2.293Z" })
|
|
10589
|
+
}
|
|
10590
|
+
),
|
|
10591
|
+
/* @__PURE__ */ jsx(
|
|
10592
|
+
"svg",
|
|
10593
|
+
{
|
|
10594
|
+
"x-show": "$store.toast.type === 'warning'",
|
|
10595
|
+
class: "w-5 h-5",
|
|
10596
|
+
"aria-hidden": "true",
|
|
10597
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
10598
|
+
fill: "currentColor",
|
|
10599
|
+
viewBox: "0 0 20 20",
|
|
10600
|
+
children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM10 15a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-4a1 1 0 0 1-2 0V6a1 1 0 0 1 2 0v5Z" })
|
|
10601
|
+
}
|
|
10602
|
+
),
|
|
10603
|
+
/* @__PURE__ */ jsx(
|
|
10604
|
+
"svg",
|
|
10605
|
+
{
|
|
10606
|
+
"x-show": "$store.toast.type === 'info'",
|
|
10607
|
+
class: "w-5 h-5",
|
|
10608
|
+
"aria-hidden": "true",
|
|
10609
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
10610
|
+
fill: "currentColor",
|
|
10611
|
+
viewBox: "0 0 20 20",
|
|
10612
|
+
children: /* @__PURE__ */ jsx("path", { d: "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM9.5 4a1.5 1.5 0 1 1 0 3 1.5 1.5 0 0 1 0-3ZM12 15H8a1 1 0 0 1 0-2h1v-3H8a1 1 0 0 1 0-2h2a1 1 0 0 1 1 1v4h1a1 1 0 0 1 0 2Z" })
|
|
10613
|
+
}
|
|
10614
|
+
)
|
|
10615
|
+
]
|
|
10616
|
+
}
|
|
10617
|
+
),
|
|
10618
|
+
/* @__PURE__ */ jsx(
|
|
10619
|
+
"div",
|
|
10620
|
+
{
|
|
10621
|
+
class: "ml-3 text-sm font-normal",
|
|
10622
|
+
"x-text": "$store.toast.message"
|
|
10623
|
+
}
|
|
10624
|
+
),
|
|
10625
|
+
/* @__PURE__ */ jsxs(
|
|
10626
|
+
"button",
|
|
10627
|
+
{
|
|
10628
|
+
type: "button",
|
|
10629
|
+
class: "ml-auto -mx-1.5 -my-1.5 bg-white text-gray-400 hover:text-gray-900 rounded-lg focus:ring-2 focus:ring-gray-300 p-1.5 hover:bg-gray-100 inline-flex items-center justify-center h-8 w-8 dark:text-gray-500 dark:hover:text-white dark:bg-gray-800 dark:hover:bg-gray-700",
|
|
10630
|
+
"x-on:click": "$store.toast.hide()",
|
|
10631
|
+
"aria-label": "Close",
|
|
10632
|
+
children: [
|
|
10633
|
+
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Close" }),
|
|
10634
|
+
/* @__PURE__ */ jsx(
|
|
10635
|
+
"svg",
|
|
10636
|
+
{
|
|
10637
|
+
class: "w-3 h-3",
|
|
10638
|
+
"aria-hidden": "true",
|
|
10639
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
10640
|
+
fill: "none",
|
|
10641
|
+
viewBox: "0 0 14 14",
|
|
10642
|
+
children: /* @__PURE__ */ jsx(
|
|
10643
|
+
"path",
|
|
10644
|
+
{
|
|
10645
|
+
stroke: "currentColor",
|
|
10646
|
+
"stroke-linecap": "round",
|
|
10647
|
+
"stroke-linejoin": "round",
|
|
10648
|
+
"stroke-width": "2",
|
|
10649
|
+
d: "m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"
|
|
10650
|
+
}
|
|
10651
|
+
)
|
|
10652
|
+
}
|
|
10653
|
+
)
|
|
10654
|
+
]
|
|
10655
|
+
}
|
|
10656
|
+
)
|
|
10657
|
+
]
|
|
10658
|
+
}
|
|
10659
|
+
)
|
|
10660
|
+
}
|
|
10661
|
+
);
|
|
10662
|
+
};
|
|
10417
10663
|
const Layout = ({
|
|
10418
10664
|
title,
|
|
10419
10665
|
version,
|
|
10420
10666
|
children,
|
|
10421
10667
|
eventClientConfig
|
|
10422
10668
|
}) => {
|
|
10423
|
-
const versionString = version || "1.
|
|
10669
|
+
const versionString = version || "1.30.0";
|
|
10424
10670
|
const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
|
|
10425
10671
|
return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
|
|
10426
10672
|
/* @__PURE__ */ jsxs("head", { children: [
|
|
@@ -10565,7 +10811,8 @@ const Layout = ({
|
|
|
10565
10811
|
form .spinner { display: none; }
|
|
10566
10812
|
` })
|
|
10567
10813
|
] }),
|
|
10568
|
-
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
|
|
10814
|
+
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", "hx-ext": "morph", children: [
|
|
10815
|
+
/* @__PURE__ */ jsx(Toast, {}),
|
|
10569
10816
|
/* @__PURE__ */ jsx(
|
|
10570
10817
|
"header",
|
|
10571
10818
|
{
|
|
@@ -10719,19 +10966,35 @@ function registerIndexRoute(server, config) {
|
|
|
10719
10966
|
trpcUrl
|
|
10720
10967
|
},
|
|
10721
10968
|
children: [
|
|
10969
|
+
/* @__PURE__ */ jsx(
|
|
10970
|
+
"div",
|
|
10971
|
+
{
|
|
10972
|
+
id: "analytics-stats",
|
|
10973
|
+
"hx-get": "/web/stats",
|
|
10974
|
+
"hx-trigger": "load, library-change from:body",
|
|
10975
|
+
"hx-swap": "morph:innerHTML",
|
|
10976
|
+
children: /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-pulse", children: [
|
|
10977
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
|
|
10978
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
|
|
10979
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" })
|
|
10980
|
+
] })
|
|
10981
|
+
}
|
|
10982
|
+
),
|
|
10722
10983
|
/* @__PURE__ */ jsxs("section", { class: "mb-4 p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: [
|
|
10723
10984
|
/* @__PURE__ */ jsxs("div", { class: "flex items-center justify-between mb-2", children: [
|
|
10724
10985
|
/* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: "Job Queue" }),
|
|
10725
10986
|
/* @__PURE__ */ jsx(
|
|
10726
10987
|
"button",
|
|
10727
10988
|
{
|
|
10989
|
+
id: "clear-completed-btn",
|
|
10728
10990
|
type: "button",
|
|
10729
|
-
class: "text-xs px-3 py-1.5 text-gray-
|
|
10991
|
+
class: "text-xs px-3 py-1.5 text-gray-400 bg-gray-50 border border-gray-200 rounded-lg cursor-not-allowed focus:ring-4 focus:outline-none transition-colors duration-150 dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600",
|
|
10730
10992
|
title: "Clear all completed, cancelled, and failed jobs",
|
|
10731
10993
|
"hx-post": "/web/jobs/clear-completed",
|
|
10732
10994
|
"hx-trigger": "click",
|
|
10733
10995
|
"hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
|
|
10734
10996
|
"hx-swap": "none",
|
|
10997
|
+
disabled: true,
|
|
10735
10998
|
children: "Clear Completed Jobs"
|
|
10736
10999
|
}
|
|
10737
11000
|
)
|
|
@@ -10741,7 +11004,8 @@ function registerIndexRoute(server, config) {
|
|
|
10741
11004
|
{
|
|
10742
11005
|
id: "job-queue",
|
|
10743
11006
|
"hx-get": "/web/jobs",
|
|
10744
|
-
"hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body",
|
|
11007
|
+
"hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body, job-list-refresh from:body",
|
|
11008
|
+
"hx-swap": "morph:innerHTML",
|
|
10745
11009
|
children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
|
|
10746
11010
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
|
|
10747
11011
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
|
|
@@ -10750,11 +11014,17 @@ function registerIndexRoute(server, config) {
|
|
|
10750
11014
|
}
|
|
10751
11015
|
)
|
|
10752
11016
|
] }),
|
|
10753
|
-
/* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm",
|
|
10754
|
-
|
|
10755
|
-
|
|
10756
|
-
|
|
10757
|
-
|
|
11017
|
+
/* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm", children: /* @__PURE__ */ jsx(
|
|
11018
|
+
"button",
|
|
11019
|
+
{
|
|
11020
|
+
type: "button",
|
|
11021
|
+
"hx-get": "/web/jobs/new",
|
|
11022
|
+
"hx-target": "#addJobForm",
|
|
11023
|
+
"hx-swap": "innerHTML",
|
|
11024
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
|
|
11025
|
+
children: "Add New Documentation"
|
|
11026
|
+
}
|
|
11027
|
+
) }) }),
|
|
10758
11028
|
/* @__PURE__ */ jsxs("div", { children: [
|
|
10759
11029
|
/* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold mb-2 text-gray-900 dark:text-white", children: "Indexed Documentation" }),
|
|
10760
11030
|
/* @__PURE__ */ jsx(
|
|
@@ -10763,6 +11033,7 @@ function registerIndexRoute(server, config) {
|
|
|
10763
11033
|
id: "indexed-docs",
|
|
10764
11034
|
"hx-get": "/web/libraries",
|
|
10765
11035
|
"hx-trigger": "load, library-change from:body",
|
|
11036
|
+
"hx-swap": "morph:innerHTML",
|
|
10766
11037
|
children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
|
|
10767
11038
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
|
|
10768
11039
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
|
|
@@ -10917,76 +11188,53 @@ const LoadingSpinner = () => /* @__PURE__ */ jsxs(
|
|
|
10917
11188
|
const JobItem = ({ job }) => {
|
|
10918
11189
|
job.dbStatus || job.status;
|
|
10919
11190
|
const isActiveJob = job.dbStatus ? isActiveStatus(job.dbStatus) : job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING;
|
|
10920
|
-
|
|
10921
|
-
|
|
10922
|
-
|
|
10923
|
-
|
|
10924
|
-
|
|
10925
|
-
|
|
10926
|
-
|
|
10927
|
-
|
|
10928
|
-
|
|
10929
|
-
|
|
10930
|
-
/* @__PURE__ */
|
|
10931
|
-
|
|
10932
|
-
|
|
10933
|
-
|
|
10934
|
-
|
|
10935
|
-
|
|
10936
|
-
|
|
10937
|
-
|
|
10938
|
-
|
|
10939
|
-
|
|
10940
|
-
|
|
10941
|
-
"
|
|
10942
|
-
{
|
|
10943
|
-
|
|
10944
|
-
children: job.
|
|
10945
|
-
}
|
|
10946
|
-
),
|
|
10947
|
-
|
|
10948
|
-
"
|
|
10949
|
-
|
|
10950
|
-
|
|
10951
|
-
|
|
10952
|
-
|
|
10953
|
-
|
|
10954
|
-
|
|
10955
|
-
|
|
10956
|
-
|
|
10957
|
-
|
|
10958
|
-
|
|
10959
|
-
|
|
10960
|
-
|
|
10961
|
-
|
|
10962
|
-
|
|
10963
|
-
|
|
10964
|
-
|
|
10965
|
-
|
|
10966
|
-
|
|
10967
|
-
document.dispatchEvent(new CustomEvent('job-list-refresh'));
|
|
10968
|
-
})
|
|
10969
|
-
.catch(() => { $store.confirmingAction.isStopping = false; });
|
|
10970
|
-
} else {
|
|
10971
|
-
if ($store.confirmingAction.timeoutId) { clearTimeout($store.confirmingAction.timeoutId); $store.confirmingAction.timeoutId = null; }
|
|
10972
|
-
$store.confirmingAction.type = 'job-cancel';
|
|
10973
|
-
$store.confirmingAction.id = '${job.id}';
|
|
10974
|
-
$store.confirmingAction.isStopping = false;
|
|
10975
|
-
$store.confirmingAction.timeoutId = setTimeout(() => {
|
|
10976
|
-
$store.confirmingAction.type = null;
|
|
10977
|
-
$store.confirmingAction.id = null;
|
|
10978
|
-
$store.confirmingAction.isStopping = false;
|
|
10979
|
-
$store.confirmingAction.timeoutId = null;
|
|
10980
|
-
}, 3000);
|
|
10981
|
-
}
|
|
10982
|
-
`,
|
|
10983
|
-
"x-bind:disabled": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && $store.confirmingAction.isStopping`,
|
|
10984
|
-
children: [
|
|
10985
|
-
/* @__PURE__ */ jsxs(
|
|
10986
|
-
"span",
|
|
10987
|
-
{
|
|
10988
|
-
"x-show": `$store.confirmingAction.type !== 'job-cancel' || $store.confirmingAction.id !== '${job.id}' || $store.confirmingAction.isStopping`,
|
|
10989
|
-
children: [
|
|
11191
|
+
const defaultStateClasses = "border border-gray-300 bg-white text-red-600 hover:bg-red-50 focus:ring-4 focus:outline-none focus:ring-red-100 dark:border-gray-600 dark:bg-gray-800 dark:text-red-400 dark:hover:bg-gray-700 dark:focus:ring-red-900";
|
|
11192
|
+
const confirmingStateClasses = "bg-red-600 text-white border-red-600 focus:ring-4 focus:outline-none focus:ring-red-300 dark:bg-red-700 dark:border-red-700 dark:focus:ring-red-800";
|
|
11193
|
+
return /* @__PURE__ */ jsx(
|
|
11194
|
+
"div",
|
|
11195
|
+
{
|
|
11196
|
+
id: `job-item-${job.id}`,
|
|
11197
|
+
class: "block p-3 bg-gray-50 dark:bg-gray-700 rounded-lg border border-gray-200 dark:border-gray-600",
|
|
11198
|
+
"data-job-id": job.id,
|
|
11199
|
+
"x-data": "{ jobId: $el.dataset.jobId, confirming: $el.dataset.confirming === 'true', isStopping: false }",
|
|
11200
|
+
children: /* @__PURE__ */ jsxs("div", { class: "flex items-start justify-between", children: [
|
|
11201
|
+
/* @__PURE__ */ jsxs("div", { class: "flex-1", children: [
|
|
11202
|
+
/* @__PURE__ */ jsxs("p", { class: "text-sm font-medium text-gray-900 dark:text-white", children: [
|
|
11203
|
+
/* @__PURE__ */ jsx("span", { safe: true, children: job.library }),
|
|
11204
|
+
" ",
|
|
11205
|
+
/* @__PURE__ */ jsx(VersionBadge, { version: job.version })
|
|
11206
|
+
] }),
|
|
11207
|
+
/* @__PURE__ */ jsx("div", { class: "text-xs text-gray-500 dark:text-gray-400 mt-1", children: job.startedAt ? /* @__PURE__ */ jsxs("div", { children: [
|
|
11208
|
+
"Last Indexed:",
|
|
11209
|
+
" ",
|
|
11210
|
+
/* @__PURE__ */ jsx("span", { safe: true, children: new Date(job.startedAt).toLocaleString() })
|
|
11211
|
+
] }) : null }),
|
|
11212
|
+
job.progress && job.progress.totalPages > 0 && isActiveJob ? /* @__PURE__ */ jsx("div", { class: "mt-2", children: /* @__PURE__ */ jsx(ProgressBar, { progress: job.progress }) }) : null,
|
|
11213
|
+
job.errorMessage || job.error ? /* @__PURE__ */ jsxs("div", { class: "mt-2 p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs", children: [
|
|
11214
|
+
/* @__PURE__ */ jsx("div", { class: "font-medium text-red-800 dark:text-red-300 mb-1", children: "Error:" }),
|
|
11215
|
+
/* @__PURE__ */ jsx("div", { safe: true, class: "text-red-700 dark:text-red-400", children: job.errorMessage || job.error })
|
|
11216
|
+
] }) : null
|
|
11217
|
+
] }),
|
|
11218
|
+
/* @__PURE__ */ jsxs("div", { class: "flex flex-col items-end gap-2 ml-4", children: [
|
|
11219
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center gap-2", children: [
|
|
11220
|
+
job.dbStatus ? /* @__PURE__ */ jsx(StatusBadge, { status: job.dbStatus }) : /* @__PURE__ */ jsx(
|
|
11221
|
+
"span",
|
|
11222
|
+
{
|
|
11223
|
+
class: `px-1.5 py-0.5 text-xs font-medium rounded ${job.status === PipelineJobStatus.COMPLETED ? "bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-300" : job.error ? "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-300" : "bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-300"}`,
|
|
11224
|
+
children: job.status
|
|
11225
|
+
}
|
|
11226
|
+
),
|
|
11227
|
+
isActiveJob && /* @__PURE__ */ jsxs(
|
|
11228
|
+
"button",
|
|
11229
|
+
{
|
|
11230
|
+
type: "button",
|
|
11231
|
+
class: "font-medium rounded-lg text-xs p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
|
|
11232
|
+
title: "Stop this job",
|
|
11233
|
+
"x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
|
|
11234
|
+
"x-on:click": "\n if (confirming) {\n isStopping = true;\n window.confirmationManager.clear($root.id);\n fetch('/web/jobs/' + jobId + '/cancel', {\n method: 'POST',\n headers: { 'Accept': 'application/json' },\n })\n .then(r => r.json())\n .then(() => {\n confirming = false;\n isStopping = false;\n document.dispatchEvent(new CustomEvent('job-list-refresh'));\n })\n .catch(() => { isStopping = false; });\n } else {\n confirming = true;\n isStopping = false;\n window.confirmationManager.start($root.id);\n }\n ",
|
|
11235
|
+
"x-bind:disabled": "isStopping",
|
|
11236
|
+
children: [
|
|
11237
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isStopping", children: [
|
|
10990
11238
|
/* @__PURE__ */ jsx(
|
|
10991
11239
|
"svg",
|
|
10992
11240
|
{
|
|
@@ -10998,39 +11246,47 @@ const JobItem = ({ job }) => {
|
|
|
10998
11246
|
}
|
|
10999
11247
|
),
|
|
11000
11248
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stop job" })
|
|
11001
|
-
]
|
|
11002
|
-
|
|
11003
|
-
|
|
11004
|
-
/* @__PURE__ */ jsx(
|
|
11005
|
-
"span",
|
|
11006
|
-
{
|
|
11007
|
-
"x-show": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && !$store.confirmingAction.isStopping`,
|
|
11008
|
-
class: "px-2",
|
|
11009
|
-
children: "Cancel?"
|
|
11010
|
-
}
|
|
11011
|
-
),
|
|
11012
|
-
/* @__PURE__ */ jsxs(
|
|
11013
|
-
"span",
|
|
11014
|
-
{
|
|
11015
|
-
"x-show": `$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === '${job.id}' && $store.confirmingAction.isStopping`,
|
|
11016
|
-
children: [
|
|
11249
|
+
] }),
|
|
11250
|
+
/* @__PURE__ */ jsx("span", { "x-show": "confirming && !isStopping", class: "px-2", children: "Cancel?" }),
|
|
11251
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "isStopping", children: [
|
|
11017
11252
|
/* @__PURE__ */ jsx(LoadingSpinner, {}),
|
|
11018
11253
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stopping..." })
|
|
11019
|
-
]
|
|
11020
|
-
|
|
11021
|
-
|
|
11022
|
-
|
|
11023
|
-
}
|
|
11024
|
-
|
|
11025
|
-
|
|
11026
|
-
|
|
11027
|
-
|
|
11028
|
-
|
|
11029
|
-
)
|
|
11030
|
-
|
|
11031
|
-
|
|
11254
|
+
] })
|
|
11255
|
+
]
|
|
11256
|
+
}
|
|
11257
|
+
)
|
|
11258
|
+
] }),
|
|
11259
|
+
job.error ? (
|
|
11260
|
+
// Keep the error badge for clarity if an error occurred
|
|
11261
|
+
/* @__PURE__ */ jsx("span", { class: "bg-red-100 text-red-800 text-xs font-medium px-1.5 py-0.5 rounded dark:bg-red-900 dark:text-red-300", children: "Error" })
|
|
11262
|
+
) : null
|
|
11263
|
+
] })
|
|
11264
|
+
] })
|
|
11265
|
+
}
|
|
11266
|
+
);
|
|
11267
|
+
};
|
|
11268
|
+
const JobList = ({ jobs }) => {
|
|
11269
|
+
const hasJobs = jobs.length > 0;
|
|
11270
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
11271
|
+
/* @__PURE__ */ jsx("div", { id: "job-list", class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]", children: hasJobs ? jobs.map((job) => /* @__PURE__ */ jsx(JobItem, { job })) : /* @__PURE__ */ jsx("p", { class: "text-center text-gray-500 dark:text-gray-400", children: "No pending jobs." }) }),
|
|
11272
|
+
/* @__PURE__ */ jsx(
|
|
11273
|
+
"button",
|
|
11274
|
+
{
|
|
11275
|
+
id: "clear-completed-btn",
|
|
11276
|
+
"hx-swap-oob": "true",
|
|
11277
|
+
type: "button",
|
|
11278
|
+
class: `text-xs px-3 py-1.5 rounded-lg focus:ring-4 focus:outline-none transition-colors duration-150 ${hasJobs ? "text-gray-700 bg-gray-100 border border-gray-300 hover:bg-gray-200 focus:ring-gray-100 dark:bg-gray-600 dark:text-gray-300 dark:border-gray-500 dark:hover:bg-gray-700 dark:focus:ring-gray-700" : "text-gray-400 bg-gray-50 border border-gray-200 cursor-not-allowed dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600"}`,
|
|
11279
|
+
title: "Clear all completed, cancelled, and failed jobs",
|
|
11280
|
+
"hx-post": "/web/jobs/clear-completed",
|
|
11281
|
+
"hx-trigger": "click",
|
|
11282
|
+
"hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
|
|
11283
|
+
"hx-swap": "none",
|
|
11284
|
+
disabled: !hasJobs,
|
|
11285
|
+
children: "Clear Completed Jobs"
|
|
11286
|
+
}
|
|
11287
|
+
)
|
|
11288
|
+
] });
|
|
11032
11289
|
};
|
|
11033
|
-
const JobList = ({ jobs }) => /* @__PURE__ */ jsx("div", { id: "job-list", class: "space-y-2", children: jobs.length === 0 ? /* @__PURE__ */ jsx("p", { class: "text-center text-gray-500 dark:text-gray-400", children: "No pending jobs." }) : jobs.map((job) => /* @__PURE__ */ jsx(JobItem, { job })) });
|
|
11034
11290
|
function registerJobListRoutes(server, listJobsTool) {
|
|
11035
11291
|
server.get("/web/jobs", async () => {
|
|
11036
11292
|
const result = await listJobsTool.execute({});
|
|
@@ -11048,7 +11304,7 @@ const Alert = ({ type, title, message }) => {
|
|
|
11048
11304
|
iconSvg = /* @__PURE__ */ jsx(
|
|
11049
11305
|
"svg",
|
|
11050
11306
|
{
|
|
11051
|
-
class: "
|
|
11307
|
+
class: "shrink-0 inline w-4 h-4 me-3",
|
|
11052
11308
|
"aria-hidden": "true",
|
|
11053
11309
|
xmlns: "http://www.w3.org/2000/svg",
|
|
11054
11310
|
fill: "currentColor",
|
|
@@ -11063,7 +11319,7 @@ const Alert = ({ type, title, message }) => {
|
|
|
11063
11319
|
iconSvg = /* @__PURE__ */ jsx(
|
|
11064
11320
|
"svg",
|
|
11065
11321
|
{
|
|
11066
|
-
class: "
|
|
11322
|
+
class: "shrink-0 inline w-4 h-4 me-3",
|
|
11067
11323
|
"aria-hidden": "true",
|
|
11068
11324
|
xmlns: "http://www.w3.org/2000/svg",
|
|
11069
11325
|
fill: "currentColor",
|
|
@@ -11078,7 +11334,7 @@ const Alert = ({ type, title, message }) => {
|
|
|
11078
11334
|
iconSvg = /* @__PURE__ */ jsx(
|
|
11079
11335
|
"svg",
|
|
11080
11336
|
{
|
|
11081
|
-
class: "
|
|
11337
|
+
class: "shrink-0 inline w-4 h-4 me-3",
|
|
11082
11338
|
"aria-hidden": "true",
|
|
11083
11339
|
xmlns: "http://www.w3.org/2000/svg",
|
|
11084
11340
|
fill: "currentColor",
|
|
@@ -11094,7 +11350,7 @@ const Alert = ({ type, title, message }) => {
|
|
|
11094
11350
|
iconSvg = /* @__PURE__ */ jsx(
|
|
11095
11351
|
"svg",
|
|
11096
11352
|
{
|
|
11097
|
-
class: "
|
|
11353
|
+
class: "shrink-0 inline w-4 h-4 me-3",
|
|
11098
11354
|
"aria-hidden": "true",
|
|
11099
11355
|
xmlns: "http://www.w3.org/2000/svg",
|
|
11100
11356
|
fill: "currentColor",
|
|
@@ -11184,13 +11440,43 @@ const ScrapeFormContent = ({
|
|
|
11184
11440
|
defaultExcludePatterns
|
|
11185
11441
|
}) => {
|
|
11186
11442
|
const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
|
|
11187
|
-
return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
|
|
11188
|
-
/* @__PURE__ */ jsx(
|
|
11443
|
+
return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600 relative animate-[fadeSlideIn_0.2s_ease-out]", children: [
|
|
11444
|
+
/* @__PURE__ */ jsx(
|
|
11445
|
+
"button",
|
|
11446
|
+
{
|
|
11447
|
+
type: "button",
|
|
11448
|
+
"hx-get": "/web/jobs/new-button",
|
|
11449
|
+
"hx-target": "#addJobForm",
|
|
11450
|
+
"hx-swap": "innerHTML",
|
|
11451
|
+
class: "absolute top-3 right-3 p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300 rounded-full hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors duration-150",
|
|
11452
|
+
title: "Close",
|
|
11453
|
+
children: /* @__PURE__ */ jsx(
|
|
11454
|
+
"svg",
|
|
11455
|
+
{
|
|
11456
|
+
class: "w-5 h-5",
|
|
11457
|
+
fill: "none",
|
|
11458
|
+
stroke: "currentColor",
|
|
11459
|
+
viewBox: "0 0 24 24",
|
|
11460
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
11461
|
+
children: /* @__PURE__ */ jsx(
|
|
11462
|
+
"path",
|
|
11463
|
+
{
|
|
11464
|
+
"stroke-linecap": "round",
|
|
11465
|
+
"stroke-linejoin": "round",
|
|
11466
|
+
"stroke-width": "2",
|
|
11467
|
+
d: "M6 18L18 6M6 6l12 12"
|
|
11468
|
+
}
|
|
11469
|
+
)
|
|
11470
|
+
}
|
|
11471
|
+
)
|
|
11472
|
+
}
|
|
11473
|
+
),
|
|
11474
|
+
/* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2 pr-8", children: "Add New Documentation" }),
|
|
11189
11475
|
/* @__PURE__ */ jsxs(
|
|
11190
11476
|
"form",
|
|
11191
11477
|
{
|
|
11192
11478
|
"hx-post": "/web/jobs/scrape",
|
|
11193
|
-
"hx-target": "#
|
|
11479
|
+
"hx-target": "#addJobForm",
|
|
11194
11480
|
"hx-swap": "innerHTML",
|
|
11195
11481
|
class: "space-y-2",
|
|
11196
11482
|
"x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
|
|
@@ -11292,313 +11578,338 @@ const ScrapeFormContent = ({
|
|
|
11292
11578
|
),
|
|
11293
11579
|
/* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
|
|
11294
11580
|
] }),
|
|
11295
|
-
/* @__PURE__ */ jsx(
|
|
11296
|
-
"input",
|
|
11297
|
-
{
|
|
11298
|
-
type: "text",
|
|
11299
|
-
name: "version",
|
|
11300
|
-
id: "version",
|
|
11301
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11302
|
-
}
|
|
11303
|
-
)
|
|
11304
|
-
] }),
|
|
11305
|
-
/* @__PURE__ */ jsxs(
|
|
11306
|
-
|
|
11307
|
-
|
|
11308
|
-
|
|
11309
|
-
|
|
11310
|
-
|
|
11311
|
-
"label",
|
|
11312
|
-
{
|
|
11313
|
-
for: "maxPages",
|
|
11314
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11315
|
-
children: "Max Pages"
|
|
11316
|
-
}
|
|
11317
|
-
),
|
|
11318
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
|
|
11319
|
-
] }),
|
|
11320
|
-
/* @__PURE__ */ jsx(
|
|
11321
|
-
"input",
|
|
11322
|
-
{
|
|
11323
|
-
type: "number",
|
|
11324
|
-
name: "maxPages",
|
|
11325
|
-
id: "maxPages",
|
|
11326
|
-
min: "1",
|
|
11327
|
-
placeholder: "1000",
|
|
11328
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11329
|
-
}
|
|
11330
|
-
)
|
|
11331
|
-
] }),
|
|
11332
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11333
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11334
|
-
/* @__PURE__ */ jsx(
|
|
11335
|
-
"label",
|
|
11336
|
-
{
|
|
11337
|
-
for: "maxDepth",
|
|
11338
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11339
|
-
children: "Max Depth"
|
|
11340
|
-
}
|
|
11341
|
-
),
|
|
11342
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
|
|
11343
|
-
] }),
|
|
11344
|
-
/* @__PURE__ */ jsx(
|
|
11345
|
-
"input",
|
|
11346
|
-
{
|
|
11347
|
-
type: "number",
|
|
11348
|
-
name: "maxDepth",
|
|
11349
|
-
id: "maxDepth",
|
|
11350
|
-
min: "0",
|
|
11351
|
-
placeholder: "3",
|
|
11352
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11353
|
-
}
|
|
11354
|
-
)
|
|
11355
|
-
] }),
|
|
11356
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11357
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11358
|
-
/* @__PURE__ */ jsx(
|
|
11359
|
-
"label",
|
|
11360
|
-
{
|
|
11361
|
-
for: "scope",
|
|
11362
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11363
|
-
children: "Scope"
|
|
11364
|
-
}
|
|
11365
|
-
),
|
|
11366
|
-
/* @__PURE__ */ jsx(
|
|
11367
|
-
Tooltip,
|
|
11368
|
-
{
|
|
11369
|
-
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
11370
|
-
"Controls which pages are scraped:",
|
|
11371
|
-
/* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11372
|
-
/* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
|
|
11373
|
-
/* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
|
|
11374
|
-
/* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
|
|
11375
|
-
] })
|
|
11376
|
-
] })
|
|
11377
|
-
}
|
|
11378
|
-
)
|
|
11379
|
-
] }),
|
|
11581
|
+
/* @__PURE__ */ jsx(
|
|
11582
|
+
"input",
|
|
11583
|
+
{
|
|
11584
|
+
type: "text",
|
|
11585
|
+
name: "version",
|
|
11586
|
+
id: "version",
|
|
11587
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11588
|
+
}
|
|
11589
|
+
)
|
|
11590
|
+
] }),
|
|
11591
|
+
/* @__PURE__ */ jsxs(
|
|
11592
|
+
"div",
|
|
11593
|
+
{
|
|
11594
|
+
class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md",
|
|
11595
|
+
"x-data": "{ open: false, headers: [] }",
|
|
11596
|
+
children: [
|
|
11380
11597
|
/* @__PURE__ */ jsxs(
|
|
11381
|
-
"
|
|
11598
|
+
"button",
|
|
11382
11599
|
{
|
|
11383
|
-
|
|
11384
|
-
|
|
11385
|
-
|
|
11600
|
+
type: "button",
|
|
11601
|
+
class: "w-full flex items-center gap-1.5 cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400 hover:text-gray-800 dark:hover:text-gray-200 transition-colors",
|
|
11602
|
+
"x-on:click": "open = !open",
|
|
11386
11603
|
children: [
|
|
11387
|
-
/* @__PURE__ */ jsx(
|
|
11388
|
-
|
|
11389
|
-
|
|
11604
|
+
/* @__PURE__ */ jsx(
|
|
11605
|
+
"svg",
|
|
11606
|
+
{
|
|
11607
|
+
class: "w-4 h-4 transform transition-transform duration-200",
|
|
11608
|
+
"x-bind:class": "{ 'rotate-90': open }",
|
|
11609
|
+
fill: "none",
|
|
11610
|
+
stroke: "currentColor",
|
|
11611
|
+
viewBox: "0 0 24 24",
|
|
11612
|
+
children: /* @__PURE__ */ jsx(
|
|
11613
|
+
"path",
|
|
11614
|
+
{
|
|
11615
|
+
"stroke-linecap": "round",
|
|
11616
|
+
"stroke-linejoin": "round",
|
|
11617
|
+
"stroke-width": "2",
|
|
11618
|
+
d: "M9 5l7 7-7 7"
|
|
11619
|
+
}
|
|
11620
|
+
)
|
|
11621
|
+
}
|
|
11622
|
+
),
|
|
11623
|
+
/* @__PURE__ */ jsx("span", { children: "Advanced Options" })
|
|
11390
11624
|
]
|
|
11391
11625
|
}
|
|
11392
|
-
)
|
|
11393
|
-
] }),
|
|
11394
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11395
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11396
|
-
/* @__PURE__ */ jsx(
|
|
11397
|
-
"label",
|
|
11398
|
-
{
|
|
11399
|
-
for: "includePatterns",
|
|
11400
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11401
|
-
children: "Include Patterns"
|
|
11402
|
-
}
|
|
11403
|
-
),
|
|
11404
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
11405
|
-
] }),
|
|
11406
|
-
/* @__PURE__ */ jsx(
|
|
11407
|
-
"textarea",
|
|
11408
|
-
{
|
|
11409
|
-
name: "includePatterns",
|
|
11410
|
-
id: "includePatterns",
|
|
11411
|
-
rows: "2",
|
|
11412
|
-
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
11413
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11414
|
-
}
|
|
11415
|
-
)
|
|
11416
|
-
] }),
|
|
11417
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11418
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11419
|
-
/* @__PURE__ */ jsx(
|
|
11420
|
-
"label",
|
|
11421
|
-
{
|
|
11422
|
-
for: "excludePatterns",
|
|
11423
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11424
|
-
children: "Exclude Patterns"
|
|
11425
|
-
}
|
|
11426
|
-
),
|
|
11427
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
|
|
11428
|
-
] }),
|
|
11429
|
-
/* @__PURE__ */ jsx(
|
|
11430
|
-
"textarea",
|
|
11431
|
-
{
|
|
11432
|
-
name: "excludePatterns",
|
|
11433
|
-
id: "excludePatterns",
|
|
11434
|
-
rows: "5",
|
|
11435
|
-
safe: true,
|
|
11436
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
11437
|
-
children: defaultExcludePatternsText
|
|
11438
|
-
}
|
|
11439
11626
|
),
|
|
11440
|
-
/* @__PURE__ */
|
|
11441
|
-
|
|
11442
|
-
|
|
11443
|
-
|
|
11444
|
-
|
|
11445
|
-
|
|
11446
|
-
|
|
11447
|
-
|
|
11448
|
-
|
|
11449
|
-
|
|
11450
|
-
|
|
11451
|
-
|
|
11452
|
-
|
|
11453
|
-
Tooltip,
|
|
11454
|
-
{
|
|
11455
|
-
text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11456
|
-
/* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
|
|
11457
|
-
/* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
|
|
11458
|
-
/* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
|
|
11459
|
-
] }) })
|
|
11460
|
-
}
|
|
11461
|
-
)
|
|
11462
|
-
] }),
|
|
11463
|
-
/* @__PURE__ */ jsxs(
|
|
11464
|
-
"select",
|
|
11465
|
-
{
|
|
11466
|
-
name: "scrapeMode",
|
|
11467
|
-
id: "scrapeMode",
|
|
11468
|
-
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11469
|
-
children: [
|
|
11470
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
11471
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
11472
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
11473
|
-
]
|
|
11474
|
-
}
|
|
11475
|
-
)
|
|
11476
|
-
] }),
|
|
11477
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11478
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
11479
|
-
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
11480
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
11481
|
-
] }),
|
|
11482
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11483
|
-
/* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
|
|
11627
|
+
/* @__PURE__ */ jsxs("div", { "x-show": "open", "x-cloak": true, "x-collapse": true, class: "mt-2 space-y-2", children: [
|
|
11628
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11629
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11630
|
+
/* @__PURE__ */ jsx(
|
|
11631
|
+
"label",
|
|
11632
|
+
{
|
|
11633
|
+
for: "maxPages",
|
|
11634
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11635
|
+
children: "Max Pages"
|
|
11636
|
+
}
|
|
11637
|
+
),
|
|
11638
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
|
|
11639
|
+
] }),
|
|
11484
11640
|
/* @__PURE__ */ jsx(
|
|
11485
11641
|
"input",
|
|
11486
11642
|
{
|
|
11487
|
-
type: "
|
|
11488
|
-
|
|
11489
|
-
|
|
11490
|
-
|
|
11491
|
-
|
|
11643
|
+
type: "number",
|
|
11644
|
+
name: "maxPages",
|
|
11645
|
+
id: "maxPages",
|
|
11646
|
+
min: "1",
|
|
11647
|
+
placeholder: "1000",
|
|
11648
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11492
11649
|
}
|
|
11493
|
-
)
|
|
11494
|
-
|
|
11650
|
+
)
|
|
11651
|
+
] }),
|
|
11652
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11653
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11654
|
+
/* @__PURE__ */ jsx(
|
|
11655
|
+
"label",
|
|
11656
|
+
{
|
|
11657
|
+
for: "maxDepth",
|
|
11658
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11659
|
+
children: "Max Depth"
|
|
11660
|
+
}
|
|
11661
|
+
),
|
|
11662
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
|
|
11663
|
+
] }),
|
|
11495
11664
|
/* @__PURE__ */ jsx(
|
|
11496
11665
|
"input",
|
|
11497
11666
|
{
|
|
11498
|
-
type: "
|
|
11499
|
-
|
|
11500
|
-
|
|
11501
|
-
|
|
11502
|
-
|
|
11667
|
+
type: "number",
|
|
11668
|
+
name: "maxDepth",
|
|
11669
|
+
id: "maxDepth",
|
|
11670
|
+
min: "0",
|
|
11671
|
+
placeholder: "3",
|
|
11672
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11673
|
+
}
|
|
11674
|
+
)
|
|
11675
|
+
] }),
|
|
11676
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11677
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11678
|
+
/* @__PURE__ */ jsx(
|
|
11679
|
+
"label",
|
|
11680
|
+
{
|
|
11681
|
+
for: "scope",
|
|
11682
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11683
|
+
children: "Scope"
|
|
11684
|
+
}
|
|
11685
|
+
),
|
|
11686
|
+
/* @__PURE__ */ jsx(
|
|
11687
|
+
Tooltip,
|
|
11688
|
+
{
|
|
11689
|
+
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
11690
|
+
"Controls which pages are scraped:",
|
|
11691
|
+
/* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11692
|
+
/* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
|
|
11693
|
+
/* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
|
|
11694
|
+
/* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
|
|
11695
|
+
] })
|
|
11696
|
+
] })
|
|
11697
|
+
}
|
|
11698
|
+
)
|
|
11699
|
+
] }),
|
|
11700
|
+
/* @__PURE__ */ jsxs(
|
|
11701
|
+
"select",
|
|
11702
|
+
{
|
|
11703
|
+
name: "scope",
|
|
11704
|
+
id: "scope",
|
|
11705
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11706
|
+
children: [
|
|
11707
|
+
/* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
|
|
11708
|
+
/* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
|
|
11709
|
+
/* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
|
|
11710
|
+
]
|
|
11711
|
+
}
|
|
11712
|
+
)
|
|
11713
|
+
] }),
|
|
11714
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11715
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11716
|
+
/* @__PURE__ */ jsx(
|
|
11717
|
+
"label",
|
|
11718
|
+
{
|
|
11719
|
+
for: "includePatterns",
|
|
11720
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11721
|
+
children: "Include Patterns"
|
|
11722
|
+
}
|
|
11723
|
+
),
|
|
11724
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
11725
|
+
] }),
|
|
11726
|
+
/* @__PURE__ */ jsx(
|
|
11727
|
+
"textarea",
|
|
11728
|
+
{
|
|
11729
|
+
name: "includePatterns",
|
|
11730
|
+
id: "includePatterns",
|
|
11731
|
+
rows: "2",
|
|
11732
|
+
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
11733
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11734
|
+
}
|
|
11735
|
+
)
|
|
11736
|
+
] }),
|
|
11737
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11738
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11739
|
+
/* @__PURE__ */ jsx(
|
|
11740
|
+
"label",
|
|
11741
|
+
{
|
|
11742
|
+
for: "excludePatterns",
|
|
11743
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11744
|
+
children: "Exclude Patterns"
|
|
11745
|
+
}
|
|
11746
|
+
),
|
|
11747
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
|
|
11748
|
+
] }),
|
|
11749
|
+
/* @__PURE__ */ jsx(
|
|
11750
|
+
"textarea",
|
|
11751
|
+
{
|
|
11752
|
+
name: "excludePatterns",
|
|
11753
|
+
id: "excludePatterns",
|
|
11754
|
+
rows: "5",
|
|
11755
|
+
safe: true,
|
|
11756
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
11757
|
+
children: defaultExcludePatternsText
|
|
11503
11758
|
}
|
|
11504
11759
|
),
|
|
11760
|
+
/* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
|
|
11761
|
+
] }),
|
|
11762
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11763
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11764
|
+
/* @__PURE__ */ jsx(
|
|
11765
|
+
"label",
|
|
11766
|
+
{
|
|
11767
|
+
for: "scrapeMode",
|
|
11768
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11769
|
+
children: "Scrape Mode"
|
|
11770
|
+
}
|
|
11771
|
+
),
|
|
11772
|
+
/* @__PURE__ */ jsx(
|
|
11773
|
+
Tooltip,
|
|
11774
|
+
{
|
|
11775
|
+
text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11776
|
+
/* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
|
|
11777
|
+
/* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
|
|
11778
|
+
/* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
|
|
11779
|
+
] }) })
|
|
11780
|
+
}
|
|
11781
|
+
)
|
|
11782
|
+
] }),
|
|
11783
|
+
/* @__PURE__ */ jsxs(
|
|
11784
|
+
"select",
|
|
11785
|
+
{
|
|
11786
|
+
name: "scrapeMode",
|
|
11787
|
+
id: "scrapeMode",
|
|
11788
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11789
|
+
children: [
|
|
11790
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
11791
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
11792
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
11793
|
+
]
|
|
11794
|
+
}
|
|
11795
|
+
)
|
|
11796
|
+
] }),
|
|
11797
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11798
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
11799
|
+
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
11800
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
11801
|
+
] }),
|
|
11802
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11803
|
+
/* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
|
|
11804
|
+
/* @__PURE__ */ jsx(
|
|
11805
|
+
"input",
|
|
11806
|
+
{
|
|
11807
|
+
type: "text",
|
|
11808
|
+
class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
11809
|
+
placeholder: "Header Name",
|
|
11810
|
+
"x-model": "header.name",
|
|
11811
|
+
required: true
|
|
11812
|
+
}
|
|
11813
|
+
),
|
|
11814
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
|
|
11815
|
+
/* @__PURE__ */ jsx(
|
|
11816
|
+
"input",
|
|
11817
|
+
{
|
|
11818
|
+
type: "text",
|
|
11819
|
+
class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
11820
|
+
placeholder: "Header Value",
|
|
11821
|
+
"x-model": "header.value",
|
|
11822
|
+
required: true
|
|
11823
|
+
}
|
|
11824
|
+
),
|
|
11825
|
+
/* @__PURE__ */ jsx(
|
|
11826
|
+
"button",
|
|
11827
|
+
{
|
|
11828
|
+
type: "button",
|
|
11829
|
+
class: "text-red-500 hover:text-red-700 text-xs",
|
|
11830
|
+
"x-on:click": "headers.splice(idx, 1)",
|
|
11831
|
+
children: "Remove"
|
|
11832
|
+
}
|
|
11833
|
+
),
|
|
11834
|
+
/* @__PURE__ */ jsx(
|
|
11835
|
+
"input",
|
|
11836
|
+
{
|
|
11837
|
+
type: "hidden",
|
|
11838
|
+
name: "header[]",
|
|
11839
|
+
"x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
|
|
11840
|
+
}
|
|
11841
|
+
)
|
|
11842
|
+
] }) }),
|
|
11843
|
+
/* @__PURE__ */ jsx(
|
|
11844
|
+
"button",
|
|
11845
|
+
{
|
|
11846
|
+
type: "button",
|
|
11847
|
+
class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
|
|
11848
|
+
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
11849
|
+
children: "+ Add Header"
|
|
11850
|
+
}
|
|
11851
|
+
)
|
|
11852
|
+
] })
|
|
11853
|
+
] }),
|
|
11854
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11505
11855
|
/* @__PURE__ */ jsx(
|
|
11506
|
-
"
|
|
11856
|
+
"input",
|
|
11507
11857
|
{
|
|
11508
|
-
|
|
11509
|
-
|
|
11510
|
-
|
|
11511
|
-
|
|
11858
|
+
id: "followRedirects",
|
|
11859
|
+
name: "followRedirects",
|
|
11860
|
+
type: "checkbox",
|
|
11861
|
+
checked: true,
|
|
11862
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11512
11863
|
}
|
|
11513
11864
|
),
|
|
11865
|
+
/* @__PURE__ */ jsx(
|
|
11866
|
+
"label",
|
|
11867
|
+
{
|
|
11868
|
+
for: "followRedirects",
|
|
11869
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11870
|
+
children: "Follow Redirects"
|
|
11871
|
+
}
|
|
11872
|
+
)
|
|
11873
|
+
] }),
|
|
11874
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11514
11875
|
/* @__PURE__ */ jsx(
|
|
11515
11876
|
"input",
|
|
11516
11877
|
{
|
|
11517
|
-
|
|
11518
|
-
name: "
|
|
11519
|
-
|
|
11878
|
+
id: "ignoreErrors",
|
|
11879
|
+
name: "ignoreErrors",
|
|
11880
|
+
type: "checkbox",
|
|
11881
|
+
checked: true,
|
|
11882
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11883
|
+
}
|
|
11884
|
+
),
|
|
11885
|
+
/* @__PURE__ */ jsx(
|
|
11886
|
+
"label",
|
|
11887
|
+
{
|
|
11888
|
+
for: "ignoreErrors",
|
|
11889
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11890
|
+
children: "Ignore Errors During Scraping"
|
|
11520
11891
|
}
|
|
11521
11892
|
)
|
|
11522
|
-
] })
|
|
11523
|
-
/* @__PURE__ */ jsx(
|
|
11524
|
-
"button",
|
|
11525
|
-
{
|
|
11526
|
-
type: "button",
|
|
11527
|
-
class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
|
|
11528
|
-
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
11529
|
-
children: "+ Add Header"
|
|
11530
|
-
}
|
|
11531
|
-
)
|
|
11893
|
+
] })
|
|
11532
11894
|
] })
|
|
11533
|
-
]
|
|
11534
|
-
|
|
11535
|
-
|
|
11536
|
-
"input",
|
|
11537
|
-
{
|
|
11538
|
-
id: "followRedirects",
|
|
11539
|
-
name: "followRedirects",
|
|
11540
|
-
type: "checkbox",
|
|
11541
|
-
checked: true,
|
|
11542
|
-
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11543
|
-
}
|
|
11544
|
-
),
|
|
11545
|
-
/* @__PURE__ */ jsx(
|
|
11546
|
-
"label",
|
|
11547
|
-
{
|
|
11548
|
-
for: "followRedirects",
|
|
11549
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11550
|
-
children: "Follow Redirects"
|
|
11551
|
-
}
|
|
11552
|
-
)
|
|
11553
|
-
] }),
|
|
11554
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11555
|
-
/* @__PURE__ */ jsx(
|
|
11556
|
-
"input",
|
|
11557
|
-
{
|
|
11558
|
-
id: "ignoreErrors",
|
|
11559
|
-
name: "ignoreErrors",
|
|
11560
|
-
type: "checkbox",
|
|
11561
|
-
checked: true,
|
|
11562
|
-
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11563
|
-
}
|
|
11564
|
-
),
|
|
11565
|
-
/* @__PURE__ */ jsx(
|
|
11566
|
-
"label",
|
|
11567
|
-
{
|
|
11568
|
-
for: "ignoreErrors",
|
|
11569
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11570
|
-
children: "Ignore Errors During Scraping"
|
|
11571
|
-
}
|
|
11572
|
-
)
|
|
11573
|
-
] })
|
|
11574
|
-
] })
|
|
11575
|
-
] }),
|
|
11895
|
+
]
|
|
11896
|
+
}
|
|
11897
|
+
),
|
|
11576
11898
|
/* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
|
|
11577
11899
|
"button",
|
|
11578
11900
|
{
|
|
11579
11901
|
type: "submit",
|
|
11580
11902
|
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500",
|
|
11581
|
-
children: "
|
|
11903
|
+
children: "Start Indexing"
|
|
11582
11904
|
}
|
|
11583
11905
|
) })
|
|
11584
11906
|
]
|
|
11585
11907
|
}
|
|
11586
11908
|
),
|
|
11587
|
-
/* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
|
|
11588
|
-
/* @__PURE__ */ jsx("script", { children: `
|
|
11589
|
-
document.addEventListener('htmx:responseError', function(evt) {
|
|
11590
|
-
// Handle error responses from the form submission
|
|
11591
|
-
if (evt.detail.xhr && evt.detail.xhr.response) {
|
|
11592
|
-
const responseDiv = document.getElementById('job-response');
|
|
11593
|
-
if (responseDiv) {
|
|
11594
|
-
responseDiv.innerHTML = evt.detail.xhr.response;
|
|
11595
|
-
}
|
|
11596
|
-
}
|
|
11597
|
-
});
|
|
11598
|
-
` })
|
|
11909
|
+
/* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
|
|
11599
11910
|
] });
|
|
11600
11911
|
};
|
|
11601
|
-
const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
|
|
11912
|
+
const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", class: "animate-[fadeSlideIn_0.2s_ease-out]", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
|
|
11602
11913
|
const DEFAULT_FILE_EXCLUSIONS = [
|
|
11603
11914
|
// CHANGELOG files (case variations)
|
|
11604
11915
|
"**/CHANGELOG.md",
|
|
@@ -11698,10 +12009,24 @@ function getEffectiveExclusionPatterns(userPatterns) {
|
|
|
11698
12009
|
}
|
|
11699
12010
|
return DEFAULT_EXCLUSION_PATTERNS;
|
|
11700
12011
|
}
|
|
12012
|
+
const ScrapeFormButton = () => /* @__PURE__ */ jsx(
|
|
12013
|
+
"button",
|
|
12014
|
+
{
|
|
12015
|
+
type: "button",
|
|
12016
|
+
"hx-get": "/web/jobs/new",
|
|
12017
|
+
"hx-target": "#addJobForm",
|
|
12018
|
+
"hx-swap": "innerHTML",
|
|
12019
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
|
|
12020
|
+
children: "Add New Documentation"
|
|
12021
|
+
}
|
|
12022
|
+
);
|
|
11701
12023
|
function registerNewJobRoutes(server, scrapeTool) {
|
|
11702
12024
|
server.get("/web/jobs/new", async () => {
|
|
11703
12025
|
return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
|
|
11704
12026
|
});
|
|
12027
|
+
server.get("/web/jobs/new-button", async () => {
|
|
12028
|
+
return /* @__PURE__ */ jsx(ScrapeFormButton, {});
|
|
12029
|
+
});
|
|
11705
12030
|
server.post(
|
|
11706
12031
|
"/web/jobs/scrape",
|
|
11707
12032
|
async (request, reply) => {
|
|
@@ -11759,25 +12084,16 @@ function registerNewJobRoutes(server, scrapeTool) {
|
|
|
11759
12084
|
};
|
|
11760
12085
|
const result = await scrapeTool.execute(scrapeOptions);
|
|
11761
12086
|
if ("jobId" in result) {
|
|
11762
|
-
|
|
11763
|
-
|
|
11764
|
-
|
|
11765
|
-
{
|
|
11766
|
-
|
|
11767
|
-
|
|
11768
|
-
"Job queued successfully! ID:",
|
|
11769
|
-
" ",
|
|
11770
|
-
/* @__PURE__ */ jsx("span", { safe: true, children: result.jobId })
|
|
11771
|
-
] })
|
|
11772
|
-
}
|
|
11773
|
-
),
|
|
11774
|
-
/* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(
|
|
11775
|
-
ScrapeFormContent,
|
|
11776
|
-
{
|
|
11777
|
-
defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS
|
|
12087
|
+
reply.header(
|
|
12088
|
+
"HX-Trigger",
|
|
12089
|
+
JSON.stringify({
|
|
12090
|
+
toast: {
|
|
12091
|
+
message: "Job queued successfully!",
|
|
12092
|
+
type: "success"
|
|
11778
12093
|
}
|
|
11779
|
-
|
|
11780
|
-
|
|
12094
|
+
})
|
|
12095
|
+
);
|
|
12096
|
+
return /* @__PURE__ */ jsx(ScrapeFormButton, {});
|
|
11781
12097
|
}
|
|
11782
12098
|
return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
|
|
11783
12099
|
} catch (error) {
|
|
@@ -11821,6 +12137,9 @@ const VersionDetailsRow = ({
|
|
|
11821
12137
|
{
|
|
11822
12138
|
id: rowId,
|
|
11823
12139
|
class: "flex justify-between items-center py-1 border-b border-gray-200 dark:border-gray-600 last:border-b-0",
|
|
12140
|
+
"data-library-name": libraryName,
|
|
12141
|
+
"data-version-param": versionParam,
|
|
12142
|
+
"x-data": "{ library: $el.dataset.libraryName, version: $el.dataset.versionParam, confirming: $el.dataset.confirming === 'true', isDeleting: false }",
|
|
11824
12143
|
children: [
|
|
11825
12144
|
/* @__PURE__ */ jsx(
|
|
11826
12145
|
"span",
|
|
@@ -11837,7 +12156,7 @@ const VersionDetailsRow = ({
|
|
|
11837
12156
|
/* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.uniqueUrls.toLocaleString() })
|
|
11838
12157
|
] }),
|
|
11839
12158
|
/* @__PURE__ */ jsxs("span", { title: "Number of indexed snippets", children: [
|
|
11840
|
-
"
|
|
12159
|
+
"Chunks:",
|
|
11841
12160
|
" ",
|
|
11842
12161
|
/* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.documents.toLocaleString() })
|
|
11843
12162
|
] }),
|
|
@@ -11853,81 +12172,45 @@ const VersionDetailsRow = ({
|
|
|
11853
12172
|
type: "button",
|
|
11854
12173
|
class: "ml-2 font-medium rounded-lg text-sm p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
|
|
11855
12174
|
title: "Remove this version",
|
|
11856
|
-
"x-
|
|
11857
|
-
"x-bind:
|
|
11858
|
-
"x-
|
|
11859
|
-
"x-on:click": `
|
|
11860
|
-
if ($store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}') {
|
|
11861
|
-
$store.confirmingAction.isDeleting = true;
|
|
11862
|
-
$el.dispatchEvent(new CustomEvent('confirmed-delete', { bubbles: true }));
|
|
11863
|
-
} else {
|
|
11864
|
-
if ($store.confirmingAction.timeoutId) { clearTimeout($store.confirmingAction.timeoutId); $store.confirmingAction.timeoutId = null; }
|
|
11865
|
-
$store.confirmingAction.type = 'version-delete';
|
|
11866
|
-
$store.confirmingAction.id = '${libraryName}:${versionParam}';
|
|
11867
|
-
$store.confirmingAction.isDeleting = false;
|
|
11868
|
-
$store.confirmingAction.timeoutId = setTimeout(() => {
|
|
11869
|
-
$store.confirmingAction.type = null;
|
|
11870
|
-
$store.confirmingAction.id = null;
|
|
11871
|
-
$store.confirmingAction.isDeleting = false;
|
|
11872
|
-
$store.confirmingAction.timeoutId = null;
|
|
11873
|
-
}, 3000);
|
|
11874
|
-
}
|
|
11875
|
-
`,
|
|
12175
|
+
"x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
|
|
12176
|
+
"x-bind:disabled": "isDeleting",
|
|
12177
|
+
"x-on:click": "\n if (confirming) {\n isDeleting = true;\n window.confirmationManager.clear($root.id);\n $el.dispatchEvent(new CustomEvent('confirmed-delete', { bubbles: true }));\n } else {\n confirming = true;\n isDeleting = false;\n window.confirmationManager.start($root.id);\n }\n ",
|
|
11876
12178
|
"hx-delete": `/web/libraries/${encodeURIComponent(libraryName)}/versions/${encodeURIComponent(versionParam)}`,
|
|
11877
12179
|
"hx-target": `#${rowId}`,
|
|
11878
12180
|
"hx-swap": "outerHTML",
|
|
11879
12181
|
"hx-trigger": "confirmed-delete",
|
|
11880
12182
|
children: [
|
|
11881
|
-
/* @__PURE__ */ jsxs(
|
|
11882
|
-
|
|
11883
|
-
|
|
11884
|
-
|
|
11885
|
-
|
|
11886
|
-
|
|
11887
|
-
|
|
12183
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isDeleting", children: [
|
|
12184
|
+
/* @__PURE__ */ jsx(
|
|
12185
|
+
"svg",
|
|
12186
|
+
{
|
|
12187
|
+
class: "w-4 h-4",
|
|
12188
|
+
"aria-hidden": "true",
|
|
12189
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
12190
|
+
fill: "none",
|
|
12191
|
+
viewBox: "0 0 18 20",
|
|
12192
|
+
children: /* @__PURE__ */ jsx(
|
|
12193
|
+
"path",
|
|
11888
12194
|
{
|
|
11889
|
-
|
|
11890
|
-
"
|
|
11891
|
-
|
|
11892
|
-
|
|
11893
|
-
|
|
11894
|
-
children: /* @__PURE__ */ jsx(
|
|
11895
|
-
"path",
|
|
11896
|
-
{
|
|
11897
|
-
stroke: "currentColor",
|
|
11898
|
-
"stroke-linecap": "round",
|
|
11899
|
-
"stroke-linejoin": "round",
|
|
11900
|
-
"stroke-width": "2",
|
|
11901
|
-
d: "M1 5h16M7 8v8m4-8v8M7 1h4a1 1 0 0 1 1 1v3H6V2a1 1 0 0 1-1-1ZM3 5h12v13a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V5Z"
|
|
11902
|
-
}
|
|
11903
|
-
)
|
|
12195
|
+
stroke: "currentColor",
|
|
12196
|
+
"stroke-linecap": "round",
|
|
12197
|
+
"stroke-linejoin": "round",
|
|
12198
|
+
"stroke-width": "2",
|
|
12199
|
+
d: "M1 5h16M7 8v8m4-8v8M7 1h4a1 1 0 0 1 1 1v3H6V2a1 1 0 0 1-1-1ZM3 5h12v13a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V5Z"
|
|
11904
12200
|
}
|
|
11905
|
-
)
|
|
11906
|
-
|
|
11907
|
-
|
|
11908
|
-
}
|
|
11909
|
-
),
|
|
11910
|
-
/* @__PURE__ */ jsxs(
|
|
11911
|
-
"
|
|
11912
|
-
{
|
|
11913
|
-
|
|
11914
|
-
|
|
11915
|
-
|
|
11916
|
-
|
|
11917
|
-
|
|
11918
|
-
]
|
|
11919
|
-
}
|
|
11920
|
-
),
|
|
11921
|
-
/* @__PURE__ */ jsxs(
|
|
11922
|
-
"span",
|
|
11923
|
-
{
|
|
11924
|
-
"x-show": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && $store.confirmingAction.isDeleting`,
|
|
11925
|
-
children: [
|
|
11926
|
-
/* @__PURE__ */ jsx(LoadingSpinner, {}),
|
|
11927
|
-
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Loading..." })
|
|
11928
|
-
]
|
|
11929
|
-
}
|
|
11930
|
-
)
|
|
12201
|
+
)
|
|
12202
|
+
}
|
|
12203
|
+
),
|
|
12204
|
+
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Remove version" })
|
|
12205
|
+
] }),
|
|
12206
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "confirming && !isDeleting", class: "mx-1", children: [
|
|
12207
|
+
"Confirm?",
|
|
12208
|
+
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
|
|
12209
|
+
] }),
|
|
12210
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "isDeleting", children: [
|
|
12211
|
+
/* @__PURE__ */ jsx(LoadingSpinner, {}),
|
|
12212
|
+
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Loading..." })
|
|
12213
|
+
] })
|
|
11931
12214
|
]
|
|
11932
12215
|
}
|
|
11933
12216
|
)
|
|
@@ -12135,7 +12418,8 @@ function registerLibraryDetailRoutes(server, listLibrariesTool, searchTool) {
|
|
|
12135
12418
|
} catch (error) {
|
|
12136
12419
|
server.log.error(error, `Failed to search library ${libraryName}`);
|
|
12137
12420
|
reply.type("text/html; charset=utf-8");
|
|
12138
|
-
|
|
12421
|
+
const errorMessage = error instanceof Error ? error.message : "An unexpected error occurred during the search.";
|
|
12422
|
+
return /* @__PURE__ */ jsx(Alert, { type: "error", message: errorMessage });
|
|
12139
12423
|
}
|
|
12140
12424
|
}
|
|
12141
12425
|
);
|
|
@@ -12145,48 +12429,90 @@ const LibraryItem = ({ library }) => {
|
|
|
12145
12429
|
const latestVersion = versions[0];
|
|
12146
12430
|
return (
|
|
12147
12431
|
// Use Flowbite Card structure with updated padding and border, and white background
|
|
12148
|
-
/* @__PURE__ */ jsxs(
|
|
12149
|
-
|
|
12150
|
-
|
|
12151
|
-
{
|
|
12152
|
-
|
|
12153
|
-
|
|
12154
|
-
|
|
12155
|
-
|
|
12156
|
-
|
|
12157
|
-
|
|
12158
|
-
|
|
12159
|
-
|
|
12160
|
-
|
|
12161
|
-
|
|
12162
|
-
class: "
|
|
12163
|
-
|
|
12164
|
-
|
|
12165
|
-
|
|
12166
|
-
|
|
12167
|
-
|
|
12168
|
-
|
|
12169
|
-
|
|
12170
|
-
|
|
12171
|
-
|
|
12172
|
-
|
|
12173
|
-
|
|
12174
|
-
|
|
12175
|
-
|
|
12176
|
-
|
|
12177
|
-
|
|
12178
|
-
|
|
12179
|
-
|
|
12180
|
-
|
|
12181
|
-
|
|
12182
|
-
|
|
12183
|
-
|
|
12184
|
-
|
|
12185
|
-
|
|
12432
|
+
/* @__PURE__ */ jsxs(
|
|
12433
|
+
"div",
|
|
12434
|
+
{
|
|
12435
|
+
id: `library-item-${library.name}`,
|
|
12436
|
+
class: "block px-4 py-2 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600",
|
|
12437
|
+
children: [
|
|
12438
|
+
/* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white", children: /* @__PURE__ */ jsx(
|
|
12439
|
+
"a",
|
|
12440
|
+
{
|
|
12441
|
+
href: `/libraries/${encodeURIComponent(library.name)}`,
|
|
12442
|
+
class: "hover:underline",
|
|
12443
|
+
children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name })
|
|
12444
|
+
}
|
|
12445
|
+
) }),
|
|
12446
|
+
latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400 overflow-hidden h-5 @container", children: /* @__PURE__ */ jsx(
|
|
12447
|
+
"a",
|
|
12448
|
+
{
|
|
12449
|
+
href: latestVersion.sourceUrl,
|
|
12450
|
+
target: "_blank",
|
|
12451
|
+
class: "inline-block whitespace-nowrap hover:underline hover:animate-[scrollText_2s_ease-in-out_forwards]",
|
|
12452
|
+
title: latestVersion.sourceUrl,
|
|
12453
|
+
safe: true,
|
|
12454
|
+
children: latestVersion.sourceUrl
|
|
12455
|
+
}
|
|
12456
|
+
) }) : null,
|
|
12457
|
+
/* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
|
|
12458
|
+
const adapted = {
|
|
12459
|
+
id: -1,
|
|
12460
|
+
ref: { library: library.name, version: v.version },
|
|
12461
|
+
status: v.status,
|
|
12462
|
+
progress: v.progress,
|
|
12463
|
+
counts: {
|
|
12464
|
+
documents: v.documentCount,
|
|
12465
|
+
uniqueUrls: v.uniqueUrlCount
|
|
12466
|
+
},
|
|
12467
|
+
indexedAt: v.indexedAt,
|
|
12468
|
+
sourceUrl: v.sourceUrl ?? void 0
|
|
12469
|
+
};
|
|
12470
|
+
return /* @__PURE__ */ jsx(VersionDetailsRow, { libraryName: library.name, version: adapted });
|
|
12471
|
+
}) : (
|
|
12472
|
+
// Display message if no versions are indexed
|
|
12473
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." })
|
|
12474
|
+
) })
|
|
12475
|
+
]
|
|
12476
|
+
}
|
|
12477
|
+
)
|
|
12186
12478
|
);
|
|
12187
12479
|
};
|
|
12188
12480
|
const LibraryList = ({ libraries }) => {
|
|
12189
|
-
|
|
12481
|
+
if (libraries.length === 0) {
|
|
12482
|
+
return /* @__PURE__ */ jsx(
|
|
12483
|
+
Alert,
|
|
12484
|
+
{
|
|
12485
|
+
type: "info",
|
|
12486
|
+
title: "Welcome!",
|
|
12487
|
+
message: /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
12488
|
+
"To get started, click",
|
|
12489
|
+
" ",
|
|
12490
|
+
/* @__PURE__ */ jsx("span", { class: "font-semibold", children: "Add New Documentation" }),
|
|
12491
|
+
" above and enter the URL of a documentation site to index. For more information, check the",
|
|
12492
|
+
" ",
|
|
12493
|
+
/* @__PURE__ */ jsx(
|
|
12494
|
+
"a",
|
|
12495
|
+
{
|
|
12496
|
+
href: "https://grounded.tools",
|
|
12497
|
+
target: "_blank",
|
|
12498
|
+
rel: "noopener noreferrer",
|
|
12499
|
+
class: "font-medium underline hover:no-underline",
|
|
12500
|
+
children: "official website"
|
|
12501
|
+
}
|
|
12502
|
+
),
|
|
12503
|
+
"."
|
|
12504
|
+
] })
|
|
12505
|
+
}
|
|
12506
|
+
);
|
|
12507
|
+
}
|
|
12508
|
+
return /* @__PURE__ */ jsx(
|
|
12509
|
+
"div",
|
|
12510
|
+
{
|
|
12511
|
+
id: "library-list",
|
|
12512
|
+
class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]",
|
|
12513
|
+
children: libraries.map((library) => /* @__PURE__ */ jsx(LibraryItem, { library }))
|
|
12514
|
+
}
|
|
12515
|
+
);
|
|
12190
12516
|
};
|
|
12191
12517
|
function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
|
|
12192
12518
|
server.get("/web/libraries", async (_request, reply) => {
|
|
@@ -12216,6 +12542,75 @@ function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
|
|
|
12216
12542
|
}
|
|
12217
12543
|
);
|
|
12218
12544
|
}
|
|
12545
|
+
function formatNumber(num) {
|
|
12546
|
+
if (num >= 1e9) {
|
|
12547
|
+
return `${(num / 1e9).toFixed(1)}B`;
|
|
12548
|
+
}
|
|
12549
|
+
if (num >= 1e6) {
|
|
12550
|
+
return `${(num / 1e6).toFixed(1)}M`;
|
|
12551
|
+
}
|
|
12552
|
+
if (num >= 1e3) {
|
|
12553
|
+
return `${(num / 1e3).toFixed(1)}K`;
|
|
12554
|
+
}
|
|
12555
|
+
return num.toString();
|
|
12556
|
+
}
|
|
12557
|
+
const AnalyticsCards = ({
|
|
12558
|
+
totalChunks,
|
|
12559
|
+
activeLibraries,
|
|
12560
|
+
activeVersions,
|
|
12561
|
+
indexedPages
|
|
12562
|
+
}) => /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-[fadeSlideIn_0.2s_ease-out]", children: [
|
|
12563
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12564
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Total Knowledge Base" }),
|
|
12565
|
+
/* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: [
|
|
12566
|
+
formatNumber(totalChunks),
|
|
12567
|
+
" Chunks"
|
|
12568
|
+
] })
|
|
12569
|
+
] }) }) }),
|
|
12570
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12571
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Libraries / Versions" }),
|
|
12572
|
+
/* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: [
|
|
12573
|
+
activeLibraries,
|
|
12574
|
+
" / ",
|
|
12575
|
+
activeVersions
|
|
12576
|
+
] })
|
|
12577
|
+
] }) }) }),
|
|
12578
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12579
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Indexed Pages" }),
|
|
12580
|
+
/* @__PURE__ */ jsx("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: formatNumber(indexedPages) })
|
|
12581
|
+
] }) }) })
|
|
12582
|
+
] });
|
|
12583
|
+
function registerStatsRoute(server, docService) {
|
|
12584
|
+
server.get("/web/stats", async (_request, reply) => {
|
|
12585
|
+
try {
|
|
12586
|
+
const libraries = await docService.listLibraries();
|
|
12587
|
+
let totalChunks = 0;
|
|
12588
|
+
let indexedPages = 0;
|
|
12589
|
+
let activeVersions = 0;
|
|
12590
|
+
for (const lib of libraries) {
|
|
12591
|
+
activeVersions += lib.versions.length;
|
|
12592
|
+
for (const version of lib.versions) {
|
|
12593
|
+
totalChunks += version.counts.documents;
|
|
12594
|
+
indexedPages += version.counts.uniqueUrls;
|
|
12595
|
+
}
|
|
12596
|
+
}
|
|
12597
|
+
const activeLibraries = libraries.length;
|
|
12598
|
+
reply.type("text/html; charset=utf-8");
|
|
12599
|
+
return /* @__PURE__ */ jsx(
|
|
12600
|
+
AnalyticsCards,
|
|
12601
|
+
{
|
|
12602
|
+
totalChunks,
|
|
12603
|
+
activeLibraries,
|
|
12604
|
+
activeVersions,
|
|
12605
|
+
indexedPages
|
|
12606
|
+
}
|
|
12607
|
+
);
|
|
12608
|
+
} catch (error) {
|
|
12609
|
+
logger.error(`Failed to fetch stats: ${error}`);
|
|
12610
|
+
reply.status(500).send("Internal Server Error");
|
|
12611
|
+
}
|
|
12612
|
+
});
|
|
12613
|
+
}
|
|
12219
12614
|
async function registerWebService(server, docService, pipeline, eventBus, config) {
|
|
12220
12615
|
const listLibrariesTool = new ListLibrariesTool(docService);
|
|
12221
12616
|
const listJobsTool = new ListJobsTool(pipeline);
|
|
@@ -12232,6 +12627,7 @@ async function registerWebService(server, docService, pipeline, eventBus, config
|
|
|
12232
12627
|
registerCancelJobRoute(server, cancelJobTool);
|
|
12233
12628
|
registerClearCompletedJobsRoute(server, clearCompletedJobsTool);
|
|
12234
12629
|
registerEventsRoute(server, eventBus);
|
|
12630
|
+
registerStatsRoute(server, docService);
|
|
12235
12631
|
}
|
|
12236
12632
|
async function registerWorkerService(pipeline) {
|
|
12237
12633
|
await pipeline.start();
|
|
@@ -12256,7 +12652,6 @@ class AppServer {
|
|
|
12256
12652
|
mcpServer = null;
|
|
12257
12653
|
authManager = null;
|
|
12258
12654
|
config;
|
|
12259
|
-
embeddingConfig = null;
|
|
12260
12655
|
remoteEventProxy = null;
|
|
12261
12656
|
wss = null;
|
|
12262
12657
|
/**
|
|
@@ -12283,22 +12678,22 @@ class AppServer {
|
|
|
12283
12678
|
*/
|
|
12284
12679
|
async start() {
|
|
12285
12680
|
this.validateConfig();
|
|
12286
|
-
|
|
12681
|
+
const embeddingConfig = this.docService.getActiveEmbeddingConfig();
|
|
12287
12682
|
if (this.config.telemetry !== false && shouldEnableTelemetry()) {
|
|
12288
12683
|
try {
|
|
12289
12684
|
if (telemetry.isEnabled()) {
|
|
12290
12685
|
telemetry.setGlobalContext({
|
|
12291
|
-
appVersion: "1.
|
|
12686
|
+
appVersion: "1.30.0",
|
|
12292
12687
|
appPlatform: process.platform,
|
|
12293
12688
|
appNodeVersion: process.version,
|
|
12294
12689
|
appServicesEnabled: this.getActiveServicesList(),
|
|
12295
12690
|
appAuthEnabled: Boolean(this.config.auth),
|
|
12296
12691
|
appReadOnly: Boolean(this.config.readOnly),
|
|
12297
12692
|
// Add embedding configuration to global context
|
|
12298
|
-
...
|
|
12299
|
-
aiEmbeddingProvider:
|
|
12300
|
-
aiEmbeddingModel:
|
|
12301
|
-
aiEmbeddingDimensions:
|
|
12693
|
+
...embeddingConfig && {
|
|
12694
|
+
aiEmbeddingProvider: embeddingConfig.provider,
|
|
12695
|
+
aiEmbeddingModel: embeddingConfig.model,
|
|
12696
|
+
aiEmbeddingDimensions: embeddingConfig.dimensions
|
|
12302
12697
|
}
|
|
12303
12698
|
});
|
|
12304
12699
|
telemetry.track(TelemetryEvent.APP_STARTED, {
|
|
@@ -12356,6 +12751,9 @@ class AppServer {
|
|
|
12356
12751
|
await cleanupMcpService(this.mcpServer);
|
|
12357
12752
|
}
|
|
12358
12753
|
if (this.wss) {
|
|
12754
|
+
for (const client of this.wss.clients) {
|
|
12755
|
+
client.terminate();
|
|
12756
|
+
}
|
|
12359
12757
|
await new Promise((resolve, reject) => {
|
|
12360
12758
|
this.wss?.close((err) => {
|
|
12361
12759
|
if (err) {
|
|
@@ -12374,6 +12772,9 @@ class AppServer {
|
|
|
12374
12772
|
});
|
|
12375
12773
|
}
|
|
12376
12774
|
await telemetry.shutdown();
|
|
12775
|
+
if (this.server.server) {
|
|
12776
|
+
this.server.server.closeAllConnections();
|
|
12777
|
+
}
|
|
12377
12778
|
await this.server.close();
|
|
12378
12779
|
logger.info("🛑 AppServer stopped");
|
|
12379
12780
|
} catch (error) {
|
|
@@ -12596,28 +12997,38 @@ class AppServer {
|
|
|
12596
12997
|
* Log startup information showing which services are enabled.
|
|
12597
12998
|
*/
|
|
12598
12999
|
logStartupInfo(address) {
|
|
12599
|
-
|
|
13000
|
+
const isWorkerOnly = this.config.enableWorker && !this.config.enableWebInterface && !this.config.enableMcpServer;
|
|
13001
|
+
const isWebOnly = this.config.enableWebInterface && !this.config.enableWorker && !this.config.enableMcpServer;
|
|
13002
|
+
const isMcpOnly = this.config.enableMcpServer && !this.config.enableWebInterface && !this.config.enableWorker;
|
|
13003
|
+
if (isWorkerOnly) {
|
|
13004
|
+
logger.info(`🚀 Worker available at ${address}`);
|
|
13005
|
+
} else if (isWebOnly) {
|
|
13006
|
+
logger.info(`🚀 Web interface available at ${address}`);
|
|
13007
|
+
} else if (isMcpOnly) {
|
|
13008
|
+
logger.info(`🚀 MCP server available at ${address}`);
|
|
13009
|
+
} else {
|
|
13010
|
+
logger.info(`🚀 Grounded Docs available at ${address}`);
|
|
13011
|
+
}
|
|
13012
|
+
const isCombined = !isWorkerOnly && !isWebOnly && !isMcpOnly;
|
|
12600
13013
|
const enabledServices = [];
|
|
12601
|
-
if (this.config.enableWebInterface) {
|
|
13014
|
+
if (this.config.enableWebInterface && isCombined) {
|
|
12602
13015
|
enabledServices.push(`Web interface: ${address}`);
|
|
12603
13016
|
}
|
|
12604
13017
|
if (this.config.enableMcpServer) {
|
|
12605
13018
|
enabledServices.push(`MCP endpoints: ${address}/mcp, ${address}/sse`);
|
|
12606
13019
|
}
|
|
12607
|
-
if (this.config.
|
|
12608
|
-
enabledServices.push(`API: ${address}/api`);
|
|
12609
|
-
}
|
|
12610
|
-
if (this.config.enableWorker) {
|
|
12611
|
-
enabledServices.push("Worker: internal");
|
|
12612
|
-
} else if (this.config.externalWorkerUrl) {
|
|
13020
|
+
if (!this.config.enableWorker && this.config.externalWorkerUrl) {
|
|
12613
13021
|
enabledServices.push(`Worker: ${this.config.externalWorkerUrl}`);
|
|
12614
13022
|
}
|
|
12615
|
-
if (this.
|
|
12616
|
-
|
|
12617
|
-
|
|
12618
|
-
|
|
12619
|
-
|
|
12620
|
-
|
|
13023
|
+
if (this.config.enableWorker) {
|
|
13024
|
+
const embeddingConfig = this.docService.getActiveEmbeddingConfig();
|
|
13025
|
+
if (embeddingConfig) {
|
|
13026
|
+
enabledServices.push(
|
|
13027
|
+
`Embeddings: ${embeddingConfig.provider}:${embeddingConfig.model}`
|
|
13028
|
+
);
|
|
13029
|
+
} else {
|
|
13030
|
+
enabledServices.push(`Embeddings: disabled (full text search only)`);
|
|
13031
|
+
}
|
|
12621
13032
|
}
|
|
12622
13033
|
for (const service of enabledServices) {
|
|
12623
13034
|
logger.info(` • ${service}`);
|
|
@@ -14113,7 +14524,7 @@ class PipelineManager {
|
|
|
14113
14524
|
parsedScraperOptions = JSON.parse(version.scraper_options);
|
|
14114
14525
|
} catch (error) {
|
|
14115
14526
|
logger.warn(
|
|
14116
|
-
`⚠️
|
|
14527
|
+
`⚠️ Failed to parse scraper options for ${version.library_name}@${version.name || "unversioned"}: ${error}`
|
|
14117
14528
|
);
|
|
14118
14529
|
}
|
|
14119
14530
|
}
|
|
@@ -14481,7 +14892,7 @@ class PipelineManager {
|
|
|
14481
14892
|
},
|
|
14482
14893
|
onJobError: async (internalJob, error, document2) => {
|
|
14483
14894
|
logger.warn(
|
|
14484
|
-
`⚠️
|
|
14895
|
+
`⚠️ Job ${internalJob.id} error ${document2 ? `on document ${document2.url}` : ""}: ${error.message}`
|
|
14485
14896
|
);
|
|
14486
14897
|
}
|
|
14487
14898
|
});
|
|
@@ -14562,7 +14973,7 @@ class PipelineManager {
|
|
|
14562
14973
|
);
|
|
14563
14974
|
} catch (optionsError) {
|
|
14564
14975
|
logger.warn(
|
|
14565
|
-
`⚠️
|
|
14976
|
+
`⚠️ Failed to store scraper options for job ${job.id}: ${optionsError}`
|
|
14566
14977
|
);
|
|
14567
14978
|
}
|
|
14568
14979
|
}
|
|
@@ -14631,6 +15042,217 @@ var PipelineFactory2;
|
|
|
14631
15042
|
}
|
|
14632
15043
|
PipelineFactory22.createPipeline = createPipeline;
|
|
14633
15044
|
})(PipelineFactory2 || (PipelineFactory2 = {}));
|
|
15045
|
+
function getGlobalOptions(command) {
|
|
15046
|
+
let rootCommand = command;
|
|
15047
|
+
while (rootCommand?.parent) {
|
|
15048
|
+
rootCommand = rootCommand.parent;
|
|
15049
|
+
}
|
|
15050
|
+
return rootCommand?.opts() || {};
|
|
15051
|
+
}
|
|
15052
|
+
function getEventBus(command) {
|
|
15053
|
+
const eventBus = command?._eventBus;
|
|
15054
|
+
if (!eventBus) {
|
|
15055
|
+
throw new Error("EventBusService not initialized");
|
|
15056
|
+
}
|
|
15057
|
+
return eventBus;
|
|
15058
|
+
}
|
|
15059
|
+
function ensurePlaywrightBrowsersInstalled() {
|
|
15060
|
+
if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
|
|
15061
|
+
logger.debug(
|
|
15062
|
+
"PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
|
|
15063
|
+
);
|
|
15064
|
+
return;
|
|
15065
|
+
}
|
|
15066
|
+
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
15067
|
+
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
15068
|
+
logger.debug(
|
|
15069
|
+
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
15070
|
+
);
|
|
15071
|
+
return;
|
|
15072
|
+
}
|
|
15073
|
+
try {
|
|
15074
|
+
const chromiumPath = chromium.executablePath();
|
|
15075
|
+
if (!chromiumPath || !existsSync(chromiumPath)) {
|
|
15076
|
+
throw new Error("Playwright Chromium browser not found");
|
|
15077
|
+
}
|
|
15078
|
+
} catch (error) {
|
|
15079
|
+
logger.debug(String(error));
|
|
15080
|
+
try {
|
|
15081
|
+
console.log(
|
|
15082
|
+
"🌐 Installing Playwright Chromium browser... (this may take a moment)"
|
|
15083
|
+
);
|
|
15084
|
+
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
15085
|
+
stdio: "ignore",
|
|
15086
|
+
// Suppress output
|
|
15087
|
+
cwd: getProjectRoot()
|
|
15088
|
+
});
|
|
15089
|
+
} catch (_installErr) {
|
|
15090
|
+
console.error(
|
|
15091
|
+
"❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
|
|
15092
|
+
);
|
|
15093
|
+
process.exit(1);
|
|
15094
|
+
}
|
|
15095
|
+
}
|
|
15096
|
+
}
|
|
15097
|
+
function resolveProtocol(protocol) {
|
|
15098
|
+
if (protocol === "auto") {
|
|
15099
|
+
if (!process.stdin.isTTY && !process.stdout.isTTY) {
|
|
15100
|
+
return "stdio";
|
|
15101
|
+
}
|
|
15102
|
+
return "http";
|
|
15103
|
+
}
|
|
15104
|
+
if (protocol === "stdio" || protocol === "http") {
|
|
15105
|
+
return protocol;
|
|
15106
|
+
}
|
|
15107
|
+
throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
|
|
15108
|
+
}
|
|
15109
|
+
const formatOutput = (data) => JSON.stringify(data, null, 2);
|
|
15110
|
+
function setupLogging(options, protocol) {
|
|
15111
|
+
if (options.silent) {
|
|
15112
|
+
setLogLevel(LogLevel.ERROR);
|
|
15113
|
+
} else if (options.verbose) {
|
|
15114
|
+
setLogLevel(LogLevel.DEBUG);
|
|
15115
|
+
}
|
|
15116
|
+
}
|
|
15117
|
+
function validatePort(portString) {
|
|
15118
|
+
const port = Number.parseInt(portString, 10);
|
|
15119
|
+
if (Number.isNaN(port) || port < 1 || port > 65535) {
|
|
15120
|
+
throw new Error("Invalid port number");
|
|
15121
|
+
}
|
|
15122
|
+
return port;
|
|
15123
|
+
}
|
|
15124
|
+
function validateHost(hostString) {
|
|
15125
|
+
const trimmed = hostString.trim();
|
|
15126
|
+
if (!trimmed) {
|
|
15127
|
+
throw new Error("Host cannot be empty");
|
|
15128
|
+
}
|
|
15129
|
+
if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
|
|
15130
|
+
throw new Error("Host cannot contain whitespace");
|
|
15131
|
+
}
|
|
15132
|
+
return trimmed;
|
|
15133
|
+
}
|
|
15134
|
+
function createAppServerConfig(options) {
|
|
15135
|
+
return {
|
|
15136
|
+
enableWebInterface: options.enableWebInterface ?? false,
|
|
15137
|
+
enableMcpServer: options.enableMcpServer ?? true,
|
|
15138
|
+
enableApiServer: options.enableApiServer ?? false,
|
|
15139
|
+
enableWorker: options.enableWorker ?? true,
|
|
15140
|
+
port: options.port,
|
|
15141
|
+
host: options.host,
|
|
15142
|
+
externalWorkerUrl: options.externalWorkerUrl,
|
|
15143
|
+
readOnly: options.readOnly ?? false,
|
|
15144
|
+
auth: options.auth,
|
|
15145
|
+
startupContext: options.startupContext
|
|
15146
|
+
};
|
|
15147
|
+
}
|
|
15148
|
+
function parseHeaders(headerOptions) {
|
|
15149
|
+
const headers = {};
|
|
15150
|
+
if (Array.isArray(headerOptions)) {
|
|
15151
|
+
for (const entry of headerOptions) {
|
|
15152
|
+
const idx = entry.indexOf(":");
|
|
15153
|
+
if (idx > 0) {
|
|
15154
|
+
const name = entry.slice(0, idx).trim();
|
|
15155
|
+
const value = entry.slice(idx + 1).trim();
|
|
15156
|
+
if (name) headers[name] = value;
|
|
15157
|
+
}
|
|
15158
|
+
}
|
|
15159
|
+
}
|
|
15160
|
+
return headers;
|
|
15161
|
+
}
|
|
15162
|
+
function parseAuthConfig(options) {
|
|
15163
|
+
if (!options.authEnabled) {
|
|
15164
|
+
return void 0;
|
|
15165
|
+
}
|
|
15166
|
+
return {
|
|
15167
|
+
enabled: true,
|
|
15168
|
+
issuerUrl: options.authIssuerUrl,
|
|
15169
|
+
audience: options.authAudience,
|
|
15170
|
+
scopes: ["openid", "profile"]
|
|
15171
|
+
// Default scopes for OAuth2/OIDC
|
|
15172
|
+
};
|
|
15173
|
+
}
|
|
15174
|
+
function validateAuthConfig(authConfig) {
|
|
15175
|
+
if (!authConfig.enabled) {
|
|
15176
|
+
return;
|
|
15177
|
+
}
|
|
15178
|
+
const errors = [];
|
|
15179
|
+
if (!authConfig.issuerUrl) {
|
|
15180
|
+
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
15181
|
+
} else {
|
|
15182
|
+
try {
|
|
15183
|
+
const url = new URL(authConfig.issuerUrl);
|
|
15184
|
+
if (url.protocol !== "https:") {
|
|
15185
|
+
errors.push("Issuer URL must use HTTPS protocol");
|
|
15186
|
+
}
|
|
15187
|
+
} catch {
|
|
15188
|
+
errors.push("Issuer URL must be a valid URL");
|
|
15189
|
+
}
|
|
15190
|
+
}
|
|
15191
|
+
if (!authConfig.audience) {
|
|
15192
|
+
errors.push("--auth-audience is required when auth is enabled");
|
|
15193
|
+
} else {
|
|
15194
|
+
try {
|
|
15195
|
+
const url = new URL(authConfig.audience);
|
|
15196
|
+
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
15197
|
+
logger.warn(
|
|
15198
|
+
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
15199
|
+
);
|
|
15200
|
+
}
|
|
15201
|
+
if (url.hash) {
|
|
15202
|
+
errors.push("Audience must not contain URL fragments");
|
|
15203
|
+
}
|
|
15204
|
+
} catch {
|
|
15205
|
+
if (authConfig.audience.startsWith("urn:")) {
|
|
15206
|
+
const urnParts = authConfig.audience.split(":");
|
|
15207
|
+
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
15208
|
+
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
15209
|
+
}
|
|
15210
|
+
} else {
|
|
15211
|
+
errors.push(
|
|
15212
|
+
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
15213
|
+
);
|
|
15214
|
+
}
|
|
15215
|
+
}
|
|
15216
|
+
}
|
|
15217
|
+
if (errors.length > 0) {
|
|
15218
|
+
throw new Error(`Auth configuration validation failed:
|
|
15219
|
+
${errors.join("\n")}`);
|
|
15220
|
+
}
|
|
15221
|
+
}
|
|
15222
|
+
function warnHttpUsage(authConfig, port) {
|
|
15223
|
+
if (!authConfig?.enabled) {
|
|
15224
|
+
return;
|
|
15225
|
+
}
|
|
15226
|
+
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
15227
|
+
process.env.HOSTNAME?.includes("localhost");
|
|
15228
|
+
if (!isLocalhost) {
|
|
15229
|
+
logger.warn(
|
|
15230
|
+
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
15231
|
+
);
|
|
15232
|
+
}
|
|
15233
|
+
}
|
|
15234
|
+
function resolveEmbeddingContext(embeddingModel) {
|
|
15235
|
+
try {
|
|
15236
|
+
let modelSpec = embeddingModel;
|
|
15237
|
+
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
15238
|
+
modelSpec = "text-embedding-3-small";
|
|
15239
|
+
logger.debug(
|
|
15240
|
+
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
15241
|
+
);
|
|
15242
|
+
}
|
|
15243
|
+
if (!modelSpec) {
|
|
15244
|
+
logger.debug(
|
|
15245
|
+
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
15246
|
+
);
|
|
15247
|
+
return null;
|
|
15248
|
+
}
|
|
15249
|
+
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
15250
|
+
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
15251
|
+
} catch (error) {
|
|
15252
|
+
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
15253
|
+
return null;
|
|
15254
|
+
}
|
|
15255
|
+
}
|
|
14634
15256
|
function createDefaultAction(program) {
|
|
14635
15257
|
return program.addOption(
|
|
14636
15258
|
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
|
|
@@ -14976,7 +15598,6 @@ function createMcpCommand(program) {
|
|
|
14976
15598
|
);
|
|
14977
15599
|
if (resolvedProtocol === "stdio") {
|
|
14978
15600
|
logger.debug(`Auto-detected stdio protocol (no TTY)`);
|
|
14979
|
-
logger.info("🚀 Starting MCP server (stdio mode)");
|
|
14980
15601
|
await pipeline.start();
|
|
14981
15602
|
const mcpTools = await initializeTools(docService, pipeline);
|
|
14982
15603
|
const mcpServer = await startStdioServer(mcpTools, cmdOptions.readOnly);
|
|
@@ -14989,7 +15610,6 @@ function createMcpCommand(program) {
|
|
|
14989
15610
|
});
|
|
14990
15611
|
} else {
|
|
14991
15612
|
logger.debug(`Auto-detected http protocol (TTY available)`);
|
|
14992
|
-
logger.info("🚀 Starting MCP server (http mode)");
|
|
14993
15613
|
const config = createAppServerConfig({
|
|
14994
15614
|
enableWebInterface: false,
|
|
14995
15615
|
// Never enable web interface in mcp command
|
|
@@ -15458,9 +16078,6 @@ function createWebCommand(program) {
|
|
|
15458
16078
|
cliCommand: "web"
|
|
15459
16079
|
}
|
|
15460
16080
|
});
|
|
15461
|
-
logger.info(
|
|
15462
|
-
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
15463
|
-
);
|
|
15464
16081
|
const appServer = await startAppServer(docService, pipeline, eventBus, config);
|
|
15465
16082
|
registerGlobalServices({
|
|
15466
16083
|
appServer,
|
|
@@ -15503,7 +16120,6 @@ function createWorkerCommand(program) {
|
|
|
15503
16120
|
const port = validatePort(cmdOptions.port);
|
|
15504
16121
|
const host = validateHost(cmdOptions.host);
|
|
15505
16122
|
try {
|
|
15506
|
-
logger.info(`🚀 Starting external pipeline worker on port ${port}`);
|
|
15507
16123
|
ensurePlaywrightBrowsersInstalled();
|
|
15508
16124
|
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
15509
16125
|
const globalOptions = program.opts();
|
|
@@ -15554,7 +16170,7 @@ function createCliProgram() {
|
|
|
15554
16170
|
const commandStartTimes = /* @__PURE__ */ new Map();
|
|
15555
16171
|
let globalEventBus = null;
|
|
15556
16172
|
let globalTelemetryService = null;
|
|
15557
|
-
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.
|
|
16173
|
+
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.30.0").addOption(
|
|
15558
16174
|
new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
|
|
15559
16175
|
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
15560
16176
|
new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
|
|
@@ -15588,7 +16204,7 @@ function createCliProgram() {
|
|
|
15588
16204
|
if (shouldEnableTelemetry()) {
|
|
15589
16205
|
if (telemetry.isEnabled()) {
|
|
15590
16206
|
telemetry.setGlobalContext({
|
|
15591
|
-
appVersion: "1.
|
|
16207
|
+
appVersion: "1.30.0",
|
|
15592
16208
|
appPlatform: process.platform,
|
|
15593
16209
|
appNodeVersion: process.version,
|
|
15594
16210
|
appInterface: "cli",
|