@arabold/docs-mcp-server 1.29.0 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +1091 -480
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +1489 -1221
- package/dist/index.js.map +1 -1
- package/package.json +4 -1
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +1091 -480
- package/public/assets/main.js.map +1 -1
package/dist/index.js
CHANGED
|
@@ -19,14 +19,13 @@ import Fastify from "fastify";
|
|
|
19
19
|
import { WebSocketServer } from "ws";
|
|
20
20
|
import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
|
|
21
21
|
import { createRemoteJWKSet, jwtVerify } from "jose";
|
|
22
|
-
import { execSync } from "node:child_process";
|
|
23
|
-
import { chromium } from "playwright";
|
|
24
22
|
import { createWSClient, createTRPCClient, splitLink, httpBatchLink, wsLink, createTRPCProxyClient } from "@trpc/client";
|
|
25
23
|
import superjson from "superjson";
|
|
26
24
|
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
|
27
25
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
28
26
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
29
27
|
import { z } from "zod/v3";
|
|
28
|
+
import { chromium } from "playwright";
|
|
30
29
|
import mime from "mime";
|
|
31
30
|
import { HeaderGenerator } from "header-generator";
|
|
32
31
|
import fs$1 from "node:fs/promises";
|
|
@@ -62,6 +61,7 @@ import { escapeHtml } from "@kitajs/html";
|
|
|
62
61
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
63
62
|
import { v4 } from "uuid";
|
|
64
63
|
import { minimatch } from "minimatch";
|
|
64
|
+
import { execSync } from "node:child_process";
|
|
65
65
|
class StoreError extends Error {
|
|
66
66
|
constructor(message, cause) {
|
|
67
67
|
super(cause ? `${message} caused by ${cause}` : message);
|
|
@@ -268,15 +268,19 @@ function createEmbeddingModel(providerAndModel) {
|
|
|
268
268
|
if (!process.env.OPENAI_API_KEY) {
|
|
269
269
|
throw new MissingCredentialsError("openai", ["OPENAI_API_KEY"]);
|
|
270
270
|
}
|
|
271
|
+
const timeoutMs = 3e4;
|
|
271
272
|
const config = {
|
|
272
273
|
...baseConfig,
|
|
273
274
|
modelName: model,
|
|
274
|
-
batchSize: 512
|
|
275
|
+
batchSize: 512,
|
|
275
276
|
// OpenAI supports large batches
|
|
277
|
+
timeout: timeoutMs
|
|
276
278
|
};
|
|
277
279
|
const baseURL = process.env.OPENAI_API_BASE;
|
|
278
280
|
if (baseURL) {
|
|
279
|
-
config.configuration = { baseURL };
|
|
281
|
+
config.configuration = { baseURL, timeout: timeoutMs };
|
|
282
|
+
} else {
|
|
283
|
+
config.configuration = { timeout: timeoutMs };
|
|
280
284
|
}
|
|
281
285
|
return new OpenAIEmbeddings(config);
|
|
282
286
|
}
|
|
@@ -1011,7 +1015,7 @@ class ProxyAuthManager {
|
|
|
1011
1015
|
logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
|
|
1012
1016
|
if (capabilities.length === 0) {
|
|
1013
1017
|
logger.warn(
|
|
1014
|
-
"⚠️
|
|
1018
|
+
"⚠️ No token validation mechanisms available - authentication may fail"
|
|
1015
1019
|
);
|
|
1016
1020
|
}
|
|
1017
1021
|
this.proxyProvider = new ProxyOAuthServerProvider({
|
|
@@ -1349,657 +1353,142 @@ class ProxyAuthManager {
|
|
|
1349
1353
|
}
|
|
1350
1354
|
}
|
|
1351
1355
|
}
|
|
1352
|
-
class
|
|
1353
|
-
|
|
1356
|
+
class RemoteEventProxy {
|
|
1357
|
+
constructor(remoteWorkerUrl, localEventBus) {
|
|
1358
|
+
this.remoteWorkerUrl = remoteWorkerUrl;
|
|
1359
|
+
this.localEventBus = localEventBus;
|
|
1360
|
+
}
|
|
1361
|
+
trpcClient = null;
|
|
1362
|
+
wsClient = null;
|
|
1363
|
+
subscription = null;
|
|
1364
|
+
isConnected = false;
|
|
1354
1365
|
/**
|
|
1355
|
-
*
|
|
1356
|
-
* Creates the instance if it doesn't exist.
|
|
1366
|
+
* Start subscribing to remote events and forwarding them locally.
|
|
1357
1367
|
*/
|
|
1358
|
-
|
|
1359
|
-
if (
|
|
1360
|
-
|
|
1368
|
+
async connect() {
|
|
1369
|
+
if (this.isConnected) {
|
|
1370
|
+
logger.warn("Remote event proxy already connected");
|
|
1371
|
+
return;
|
|
1372
|
+
}
|
|
1373
|
+
logger.debug(`Connecting to remote worker at ${this.remoteWorkerUrl}`);
|
|
1374
|
+
try {
|
|
1375
|
+
const url = new URL(this.remoteWorkerUrl);
|
|
1376
|
+
const baseUrl = `${url.protocol}//${url.host}`;
|
|
1377
|
+
const wsUrl = baseUrl.replace(/^http/, "ws");
|
|
1378
|
+
this.wsClient = createWSClient({
|
|
1379
|
+
url: wsUrl
|
|
1380
|
+
});
|
|
1381
|
+
this.trpcClient = createTRPCClient({
|
|
1382
|
+
links: [
|
|
1383
|
+
splitLink({
|
|
1384
|
+
condition: (op) => op.type === "subscription",
|
|
1385
|
+
true: wsLink({ client: this.wsClient, transformer: superjson }),
|
|
1386
|
+
false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
|
|
1387
|
+
})
|
|
1388
|
+
]
|
|
1389
|
+
});
|
|
1390
|
+
this.subscription = this.trpcClient.events.subscribe.subscribe(
|
|
1391
|
+
{},
|
|
1392
|
+
// Subscribe to all event types
|
|
1393
|
+
{
|
|
1394
|
+
onData: (data) => {
|
|
1395
|
+
logger.debug(`Received remote event: ${data.type}`);
|
|
1396
|
+
this.localEventBus.emit(data.type, data.payload);
|
|
1397
|
+
},
|
|
1398
|
+
onError: (error) => {
|
|
1399
|
+
logger.error(`❌ Remote event subscription error: ${error}`);
|
|
1400
|
+
this.isConnected = false;
|
|
1401
|
+
this.scheduleReconnect();
|
|
1402
|
+
},
|
|
1403
|
+
onStarted: () => {
|
|
1404
|
+
logger.debug("Remote event subscription started");
|
|
1405
|
+
this.isConnected = true;
|
|
1406
|
+
},
|
|
1407
|
+
onComplete: () => {
|
|
1408
|
+
logger.debug("Remote event subscription completed");
|
|
1409
|
+
this.isConnected = false;
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
);
|
|
1413
|
+
} catch (error) {
|
|
1414
|
+
logger.error(`❌ Failed to connect to remote worker: ${error}`);
|
|
1415
|
+
this.scheduleReconnect();
|
|
1361
1416
|
}
|
|
1362
|
-
return EmbeddingConfig.instance;
|
|
1363
1417
|
}
|
|
1364
1418
|
/**
|
|
1365
|
-
*
|
|
1419
|
+
* Disconnect from the remote worker and stop forwarding events.
|
|
1366
1420
|
*/
|
|
1367
|
-
|
|
1368
|
-
|
|
1421
|
+
disconnect() {
|
|
1422
|
+
if (this.subscription) {
|
|
1423
|
+
this.subscription.unsubscribe();
|
|
1424
|
+
this.subscription = null;
|
|
1425
|
+
}
|
|
1426
|
+
if (this.wsClient) {
|
|
1427
|
+
this.wsClient.close();
|
|
1428
|
+
this.wsClient = null;
|
|
1429
|
+
}
|
|
1430
|
+
this.isConnected = false;
|
|
1431
|
+
logger.info("🚫 Disconnected from remote worker");
|
|
1369
1432
|
}
|
|
1370
1433
|
/**
|
|
1371
|
-
*
|
|
1372
|
-
* This avoids expensive API calls for dimension detection in telemetry.
|
|
1373
|
-
*
|
|
1374
|
-
* Note: The "openai" provider also supports OpenAI-compatible APIs like:
|
|
1375
|
-
* - Ollama (local models)
|
|
1376
|
-
* - LMStudio (local models)
|
|
1377
|
-
* - Any service implementing OpenAI's embedding API
|
|
1434
|
+
* Check if the proxy is currently connected to the remote worker.
|
|
1378
1435
|
*/
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
"text-embedding-3-small": 1536,
|
|
1382
|
-
"text-embedding-3-large": 3072,
|
|
1383
|
-
"text-embedding-ada-002": 1536,
|
|
1384
|
-
// Google Vertex AI models
|
|
1385
|
-
"text-embedding-004": 768,
|
|
1386
|
-
"textembedding-gecko@003": 768,
|
|
1387
|
-
"textembedding-gecko@002": 768,
|
|
1388
|
-
"textembedding-gecko@001": 768,
|
|
1389
|
-
// Google Gemini models (with MRL support)
|
|
1390
|
-
"text-embedding-preview-0409": 768,
|
|
1391
|
-
"embedding-001": 768,
|
|
1392
|
-
// AWS Bedrock models
|
|
1393
|
-
// Amazon Titan models
|
|
1394
|
-
"amazon.titan-embed-text-v1": 1536,
|
|
1395
|
-
"amazon.titan-embed-text-v2:0": 1024,
|
|
1396
|
-
"amazon.titan-embed-image-v1": 1024,
|
|
1397
|
-
// Image embedding model
|
|
1398
|
-
// Cohere models
|
|
1399
|
-
"cohere.embed-english-v3": 1024,
|
|
1400
|
-
"cohere.embed-multilingual-v3": 1024,
|
|
1401
|
-
// SageMaker models (hosted on AWS SageMaker)
|
|
1402
|
-
"intfloat/multilingual-e5-large": 1024,
|
|
1403
|
-
// Additional AWS models that might be supported
|
|
1404
|
-
// Note: Some of these might be placeholders - verify dimensions before use
|
|
1405
|
-
// "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
|
|
1406
|
-
// MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
|
|
1407
|
-
// Top performing models from Massive Text Embedding Benchmark
|
|
1408
|
-
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
|
1409
|
-
"gemini-embedding-001": 3072,
|
|
1410
|
-
"Qwen/Qwen3-Embedding-8B": 4096,
|
|
1411
|
-
"Qwen/Qwen3-Embedding-4B": 2560,
|
|
1412
|
-
"Qwen/Qwen3-Embedding-0.6B": 1024,
|
|
1413
|
-
"Linq-AI-Research/Linq-Embed-Mistral": 4096,
|
|
1414
|
-
"Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
|
|
1415
|
-
"intfloat/multilingual-e5-large-instruct": 1024,
|
|
1416
|
-
"Salesforce/SFR-Embedding-Mistral": 4096,
|
|
1417
|
-
"text-multilingual-embedding-002": 768,
|
|
1418
|
-
"GritLM/GritLM-7B": 4096,
|
|
1419
|
-
"GritLM/GritLM-8x7B": 4096,
|
|
1420
|
-
"intfloat/e5-mistral-7b-instruct": 4096,
|
|
1421
|
-
"Cohere/Cohere-embed-multilingual-v3.0": 1024,
|
|
1422
|
-
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
|
|
1423
|
-
"Lajavaness/bilingual-embedding-large": 1024,
|
|
1424
|
-
"Salesforce/SFR-Embedding-2_R": 4096,
|
|
1425
|
-
"NovaSearch/stella_en_1.5B_v5": 8960,
|
|
1426
|
-
"NovaSearch/jasper_en_vision_language_v1": 8960,
|
|
1427
|
-
"nvidia/NV-Embed-v2": 4096,
|
|
1428
|
-
"OrdalieTech/Solon-embeddings-large-0.1": 1024,
|
|
1429
|
-
"BAAI/bge-m3": 1024,
|
|
1430
|
-
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
|
|
1431
|
-
"jinaai/jina-embeddings-v3": 1024,
|
|
1432
|
-
"Alibaba-NLP/gte-multilingual-base": 768,
|
|
1433
|
-
"Lajavaness/bilingual-embedding-base": 768,
|
|
1434
|
-
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
|
|
1435
|
-
"nvidia/NV-Embed-v1": 4096,
|
|
1436
|
-
"Cohere/Cohere-embed-multilingual-light-v3.0": 384,
|
|
1437
|
-
"manu/bge-m3-custom-fr": 1024,
|
|
1438
|
-
"Lajavaness/bilingual-embedding-small": 384,
|
|
1439
|
-
"Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
|
|
1440
|
-
"intfloat/multilingual-e5-base": 768,
|
|
1441
|
-
"voyage-3-lite": 512,
|
|
1442
|
-
"voyage-3": 1024,
|
|
1443
|
-
"intfloat/multilingual-e5-small": 384,
|
|
1444
|
-
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
|
|
1445
|
-
"Snowflake/snowflake-arctic-embed-m-v2.0": 768,
|
|
1446
|
-
"deepvk/USER-bge-m3": 1024,
|
|
1447
|
-
"Cohere/Cohere-embed-english-v3.0": 1024,
|
|
1448
|
-
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
|
|
1449
|
-
"ibm-granite/granite-embedding-278m-multilingual": 768,
|
|
1450
|
-
"NovaSearch/stella_en_400M_v5": 4096,
|
|
1451
|
-
"omarelshehy/arabic-english-sts-matryoshka": 1024,
|
|
1452
|
-
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
|
|
1453
|
-
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
|
|
1454
|
-
"Haon-Chen/speed-embedding-7b-instruct": 4096,
|
|
1455
|
-
"sentence-transformers/LaBSE": 768,
|
|
1456
|
-
"WhereIsAI/UAE-Large-V1": 1024,
|
|
1457
|
-
"ibm-granite/granite-embedding-107m-multilingual": 384,
|
|
1458
|
-
"mixedbread-ai/mxbai-embed-large-v1": 1024,
|
|
1459
|
-
"intfloat/e5-large-v2": 1024,
|
|
1460
|
-
"avsolatorio/GIST-large-Embedding-v0": 1024,
|
|
1461
|
-
"sdadas/mmlw-e5-large": 1024,
|
|
1462
|
-
"nomic-ai/nomic-embed-text-v1": 768,
|
|
1463
|
-
"nomic-ai/nomic-embed-text-v1-ablated": 768,
|
|
1464
|
-
"intfloat/e5-base-v2": 768,
|
|
1465
|
-
"BAAI/bge-large-en-v1.5": 1024,
|
|
1466
|
-
"intfloat/e5-large": 1024,
|
|
1467
|
-
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
|
|
1468
|
-
"Cohere/Cohere-embed-english-light-v3.0": 384,
|
|
1469
|
-
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
|
|
1470
|
-
"Gameselo/STS-multilingual-mpnet-base-v2": 768,
|
|
1471
|
-
"thenlper/gte-large": 1024,
|
|
1472
|
-
"avsolatorio/GIST-Embedding-v0": 768,
|
|
1473
|
-
"nomic-ai/nomic-embed-text-v1-unsupervised": 768,
|
|
1474
|
-
"infgrad/stella-base-en-v2": 768,
|
|
1475
|
-
"avsolatorio/NoInstruct-small-Embedding-v0": 384,
|
|
1476
|
-
"dwzhu/e5-base-4k": 768,
|
|
1477
|
-
"sdadas/mmlw-e5-base": 768,
|
|
1478
|
-
"voyage-multilingual-2": 1024,
|
|
1479
|
-
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
|
|
1480
|
-
"BAAI/bge-base-en-v1.5": 768,
|
|
1481
|
-
"avsolatorio/GIST-small-Embedding-v0": 384,
|
|
1482
|
-
"sdadas/mmlw-roberta-large": 1024,
|
|
1483
|
-
"nomic-ai/nomic-embed-text-v1.5": 768,
|
|
1484
|
-
"minishlab/potion-multilingual-128M": 256,
|
|
1485
|
-
"shibing624/text2vec-base-multilingual": 384,
|
|
1486
|
-
"thenlper/gte-base": 768,
|
|
1487
|
-
"intfloat/e5-small-v2": 384,
|
|
1488
|
-
"intfloat/e5-base": 768,
|
|
1489
|
-
"sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
|
|
1490
|
-
"manu/sentence_croissant_alpha_v0.3": 2048,
|
|
1491
|
-
"BAAI/bge-small-en-v1.5": 512,
|
|
1492
|
-
"thenlper/gte-small": 384,
|
|
1493
|
-
"sdadas/mmlw-e5-small": 384,
|
|
1494
|
-
"manu/sentence_croissant_alpha_v0.4": 2048,
|
|
1495
|
-
"manu/sentence_croissant_alpha_v0.2": 2048,
|
|
1496
|
-
"abhinand/MedEmbed-small-v0.1": 384,
|
|
1497
|
-
"ibm-granite/granite-embedding-125m-english": 768,
|
|
1498
|
-
"intfloat/e5-small": 384,
|
|
1499
|
-
"voyage-large-2-instruct": 1024,
|
|
1500
|
-
"sdadas/mmlw-roberta-base": 768,
|
|
1501
|
-
"Snowflake/snowflake-arctic-embed-l": 1024,
|
|
1502
|
-
"Mihaiii/Ivysaur": 384,
|
|
1503
|
-
"Snowflake/snowflake-arctic-embed-m-long": 768,
|
|
1504
|
-
"bigscience/sgpt-bloom-7b1-msmarco": 4096,
|
|
1505
|
-
"avsolatorio/GIST-all-MiniLM-L6-v2": 384,
|
|
1506
|
-
"sergeyzh/LaBSE-ru-turbo": 768,
|
|
1507
|
-
"sentence-transformers/all-mpnet-base-v2": 768,
|
|
1508
|
-
"Snowflake/snowflake-arctic-embed-m": 768,
|
|
1509
|
-
"Snowflake/snowflake-arctic-embed-s": 384,
|
|
1510
|
-
"sentence-transformers/all-MiniLM-L12-v2": 384,
|
|
1511
|
-
"Mihaiii/gte-micro-v4": 384,
|
|
1512
|
-
"Snowflake/snowflake-arctic-embed-m-v1.5": 768,
|
|
1513
|
-
"cointegrated/LaBSE-en-ru": 768,
|
|
1514
|
-
"Mihaiii/Bulbasaur": 384,
|
|
1515
|
-
"ibm-granite/granite-embedding-30m-english": 384,
|
|
1516
|
-
"deepfile/embedder-100p": 768,
|
|
1517
|
-
"Jaume/gemma-2b-embeddings": 2048,
|
|
1518
|
-
"OrlikB/KartonBERT-USE-base-v1": 768,
|
|
1519
|
-
"izhx/udever-bloom-7b1": 4096,
|
|
1520
|
-
"izhx/udever-bloom-1b1": 1024,
|
|
1521
|
-
"brahmairesearch/slx-v0.1": 384,
|
|
1522
|
-
"Mihaiii/Wartortle": 384,
|
|
1523
|
-
"izhx/udever-bloom-3b": 2048,
|
|
1524
|
-
"deepvk/USER-base": 768,
|
|
1525
|
-
"ai-forever/ru-en-RoSBERTa": 1024,
|
|
1526
|
-
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
|
|
1527
|
-
"Mihaiii/Venusaur": 384,
|
|
1528
|
-
"Snowflake/snowflake-arctic-embed-xs": 384,
|
|
1529
|
-
"jinaai/jina-embedding-b-en-v1": 768,
|
|
1530
|
-
"Mihaiii/gte-micro": 384,
|
|
1531
|
-
"aari1995/German_Semantic_STS_V2": 1024,
|
|
1532
|
-
"Mihaiii/Squirtle": 384,
|
|
1533
|
-
"OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
|
|
1534
|
-
"sergeyzh/rubert-tiny-turbo": 312,
|
|
1535
|
-
"minishlab/potion-base-8M": 256,
|
|
1536
|
-
"minishlab/M2V_base_glove_subword": 256,
|
|
1537
|
-
"jinaai/jina-embedding-s-en-v1": 512,
|
|
1538
|
-
"minishlab/potion-base-4M": 128,
|
|
1539
|
-
"minishlab/M2V_base_output": 256,
|
|
1540
|
-
"DeepPavlov/rubert-base-cased-sentence": 768,
|
|
1541
|
-
"jinaai/jina-embeddings-v2-small-en": 512,
|
|
1542
|
-
"cointegrated/rubert-tiny2": 312,
|
|
1543
|
-
"minishlab/M2V_base_glove": 256,
|
|
1544
|
-
"cointegrated/rubert-tiny": 312,
|
|
1545
|
-
"silma-ai/silma-embeddding-matryoshka-v0.1": 768,
|
|
1546
|
-
"DeepPavlov/rubert-base-cased": 768,
|
|
1547
|
-
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
|
|
1548
|
-
"izhx/udever-bloom-560m": 1024,
|
|
1549
|
-
"minishlab/potion-base-2M": 64,
|
|
1550
|
-
"DeepPavlov/distilrubert-small-cased-conversational": 768,
|
|
1551
|
-
"consciousAI/cai-lunaris-text-embeddings": 1024,
|
|
1552
|
-
"deepvk/deberta-v1-base": 768,
|
|
1553
|
-
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
|
|
1554
|
-
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
|
|
1555
|
-
"ai-forever/sbert_large_mt_nlu_ru": 1024,
|
|
1556
|
-
"ai-forever/sbert_large_nlu_ru": 1024,
|
|
1557
|
-
"malenia1/ternary-weight-embedding": 1024,
|
|
1558
|
-
"jinaai/jina-embeddings-v2-base-en": 768,
|
|
1559
|
-
"VPLabs/SearchMap_Preview": 4096,
|
|
1560
|
-
"Hum-Works/lodestone-base-4096-v1": 768,
|
|
1561
|
-
"jinaai/jina-embeddings-v4": 2048
|
|
1562
|
-
};
|
|
1563
|
-
/**
|
|
1564
|
-
* Lowercase lookup map for case-insensitive model dimension queries.
|
|
1565
|
-
* Built lazily from knownModelDimensions to ensure consistency.
|
|
1566
|
-
*/
|
|
1567
|
-
modelLookup;
|
|
1568
|
-
constructor() {
|
|
1569
|
-
this.modelLookup = /* @__PURE__ */ new Map();
|
|
1570
|
-
for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
|
|
1571
|
-
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
1572
|
-
}
|
|
1573
|
-
}
|
|
1574
|
-
/**
|
|
1575
|
-
* Parse embedding model configuration from a provided model specification.
|
|
1576
|
-
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
1577
|
-
*
|
|
1578
|
-
* Supports various providers:
|
|
1579
|
-
* - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
|
|
1580
|
-
* - vertex: Google Cloud Vertex AI
|
|
1581
|
-
* - gemini: Google Generative AI
|
|
1582
|
-
* - aws: AWS Bedrock models
|
|
1583
|
-
* - microsoft: Azure OpenAI
|
|
1584
|
-
* - sagemaker: AWS SageMaker hosted models
|
|
1585
|
-
*
|
|
1586
|
-
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
1587
|
-
* @returns Parsed embedding model configuration
|
|
1588
|
-
*/
|
|
1589
|
-
parse(modelSpec) {
|
|
1590
|
-
const spec = modelSpec || "text-embedding-3-small";
|
|
1591
|
-
const colonIndex = spec.indexOf(":");
|
|
1592
|
-
let provider;
|
|
1593
|
-
let model;
|
|
1594
|
-
if (colonIndex === -1) {
|
|
1595
|
-
provider = "openai";
|
|
1596
|
-
model = spec;
|
|
1597
|
-
} else {
|
|
1598
|
-
provider = spec.substring(0, colonIndex);
|
|
1599
|
-
model = spec.substring(colonIndex + 1);
|
|
1600
|
-
}
|
|
1601
|
-
const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
|
|
1602
|
-
return {
|
|
1603
|
-
provider,
|
|
1604
|
-
model,
|
|
1605
|
-
dimensions,
|
|
1606
|
-
modelSpec: spec
|
|
1607
|
-
};
|
|
1608
|
-
}
|
|
1609
|
-
/**
|
|
1610
|
-
* Get the known dimensions for a specific model.
|
|
1611
|
-
* Returns null if the model dimensions are not known.
|
|
1612
|
-
* Uses case-insensitive lookup.
|
|
1613
|
-
*
|
|
1614
|
-
* @param model The model name (e.g., "text-embedding-3-small")
|
|
1615
|
-
* @returns Known dimensions or null
|
|
1616
|
-
*/
|
|
1617
|
-
getKnownDimensions(model) {
|
|
1618
|
-
return this.modelLookup?.get(model.toLowerCase()) || null;
|
|
1619
|
-
}
|
|
1620
|
-
/**
|
|
1621
|
-
* Add or update known dimensions for a model.
|
|
1622
|
-
* This can be used to cache discovered dimensions.
|
|
1623
|
-
* Stores both original case and lowercase for consistent lookup.
|
|
1624
|
-
*
|
|
1625
|
-
* @param model The model name
|
|
1626
|
-
* @param dimensions The dimensions to cache
|
|
1627
|
-
*/
|
|
1628
|
-
setKnownDimensions(model, dimensions) {
|
|
1629
|
-
this.knownModelDimensions[model] = dimensions;
|
|
1630
|
-
if (this.modelLookup) {
|
|
1631
|
-
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
1632
|
-
}
|
|
1633
|
-
}
|
|
1634
|
-
/**
|
|
1635
|
-
* Static method to parse embedding model configuration using the singleton instance.
|
|
1636
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1637
|
-
*/
|
|
1638
|
-
static parseEmbeddingConfig(modelSpec) {
|
|
1639
|
-
return EmbeddingConfig.getInstance().parse(modelSpec);
|
|
1640
|
-
}
|
|
1641
|
-
/**
|
|
1642
|
-
* Static method to get known model dimensions using the singleton instance.
|
|
1643
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1644
|
-
*/
|
|
1645
|
-
static getKnownModelDimensions(model) {
|
|
1646
|
-
return EmbeddingConfig.getInstance().getKnownDimensions(model);
|
|
1436
|
+
isActive() {
|
|
1437
|
+
return this.isConnected;
|
|
1647
1438
|
}
|
|
1648
1439
|
/**
|
|
1649
|
-
*
|
|
1650
|
-
* This maintains backward compatibility while using the class-based approach.
|
|
1440
|
+
* Schedule a reconnection attempt after a delay.
|
|
1651
1441
|
*/
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
rootCommand = rootCommand.parent;
|
|
1660
|
-
}
|
|
1661
|
-
return rootCommand?.opts() || {};
|
|
1662
|
-
}
|
|
1663
|
-
function getEventBus(command) {
|
|
1664
|
-
const eventBus = command?._eventBus;
|
|
1665
|
-
if (!eventBus) {
|
|
1666
|
-
throw new Error("EventBusService not initialized");
|
|
1667
|
-
}
|
|
1668
|
-
return eventBus;
|
|
1669
|
-
}
|
|
1670
|
-
function ensurePlaywrightBrowsersInstalled() {
|
|
1671
|
-
if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
|
|
1672
|
-
logger.debug(
|
|
1673
|
-
"PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
|
|
1674
|
-
);
|
|
1675
|
-
return;
|
|
1676
|
-
}
|
|
1677
|
-
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
1678
|
-
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
1679
|
-
logger.debug(
|
|
1680
|
-
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
1681
|
-
);
|
|
1682
|
-
return;
|
|
1683
|
-
}
|
|
1684
|
-
try {
|
|
1685
|
-
const chromiumPath = chromium.executablePath();
|
|
1686
|
-
if (!chromiumPath || !existsSync(chromiumPath)) {
|
|
1687
|
-
throw new Error("Playwright Chromium browser not found");
|
|
1688
|
-
}
|
|
1689
|
-
} catch (error) {
|
|
1690
|
-
logger.debug(String(error));
|
|
1691
|
-
try {
|
|
1692
|
-
console.log(
|
|
1693
|
-
"🌐 Installing Playwright Chromium browser... (this may take a moment)"
|
|
1694
|
-
);
|
|
1695
|
-
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
1696
|
-
stdio: "ignore",
|
|
1697
|
-
// Suppress output
|
|
1698
|
-
cwd: getProjectRoot()
|
|
1699
|
-
});
|
|
1700
|
-
} catch (_installErr) {
|
|
1701
|
-
console.error(
|
|
1702
|
-
"❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
|
|
1703
|
-
);
|
|
1704
|
-
process.exit(1);
|
|
1705
|
-
}
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
function resolveProtocol(protocol) {
|
|
1709
|
-
if (protocol === "auto") {
|
|
1710
|
-
if (!process.stdin.isTTY && !process.stdout.isTTY) {
|
|
1711
|
-
return "stdio";
|
|
1712
|
-
}
|
|
1713
|
-
return "http";
|
|
1714
|
-
}
|
|
1715
|
-
if (protocol === "stdio" || protocol === "http") {
|
|
1716
|
-
return protocol;
|
|
1717
|
-
}
|
|
1718
|
-
throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
|
|
1719
|
-
}
|
|
1720
|
-
const formatOutput = (data) => JSON.stringify(data, null, 2);
|
|
1721
|
-
function setupLogging(options, protocol) {
|
|
1722
|
-
if (options.silent) {
|
|
1723
|
-
setLogLevel(LogLevel.ERROR);
|
|
1724
|
-
} else if (options.verbose) {
|
|
1725
|
-
setLogLevel(LogLevel.DEBUG);
|
|
1442
|
+
scheduleReconnect() {
|
|
1443
|
+
logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
|
|
1444
|
+
setTimeout(() => {
|
|
1445
|
+
if (!this.isConnected) {
|
|
1446
|
+
this.connect();
|
|
1447
|
+
}
|
|
1448
|
+
}, 5e3);
|
|
1726
1449
|
}
|
|
1727
1450
|
}
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1451
|
+
class ToolError extends Error {
|
|
1452
|
+
constructor(message, toolName) {
|
|
1453
|
+
super(message);
|
|
1454
|
+
this.toolName = toolName;
|
|
1455
|
+
this.name = this.constructor.name;
|
|
1732
1456
|
}
|
|
1733
|
-
return port;
|
|
1734
1457
|
}
|
|
1735
|
-
|
|
1736
|
-
const trimmed = hostString.trim();
|
|
1737
|
-
if (!trimmed) {
|
|
1738
|
-
throw new Error("❌ Host cannot be empty");
|
|
1739
|
-
}
|
|
1740
|
-
if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
|
|
1741
|
-
throw new Error("❌ Host cannot contain whitespace");
|
|
1742
|
-
}
|
|
1743
|
-
return trimmed;
|
|
1458
|
+
class ValidationError extends ToolError {
|
|
1744
1459
|
}
|
|
1745
|
-
|
|
1460
|
+
const DEFAULT_MAX_PAGES = 1e3;
|
|
1461
|
+
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
1462
|
+
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
1463
|
+
const DEFAULT_PROTOCOL = "auto";
|
|
1464
|
+
const DEFAULT_HTTP_PORT = 6280;
|
|
1465
|
+
const DEFAULT_WEB_PORT = 6281;
|
|
1466
|
+
const DEFAULT_HOST = "127.0.0.1";
|
|
1467
|
+
const DEFAULT_PAGE_TIMEOUT = 5e3;
|
|
1468
|
+
const FETCHER_MAX_RETRIES = 6;
|
|
1469
|
+
const FETCHER_BASE_DELAY = 1e3;
|
|
1470
|
+
const FETCHER_MAX_CACHE_ITEMS = 200;
|
|
1471
|
+
const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
|
|
1472
|
+
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
1473
|
+
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
1474
|
+
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
1475
|
+
const EMBEDDING_BATCH_SIZE = 100;
|
|
1476
|
+
const EMBEDDING_BATCH_CHARS = 5e4;
|
|
1477
|
+
const MIGRATION_MAX_RETRIES = 5;
|
|
1478
|
+
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
1479
|
+
const SEARCH_OVERFETCH_FACTOR = 2;
|
|
1480
|
+
const SEARCH_WEIGHT_VEC = 1;
|
|
1481
|
+
const SEARCH_WEIGHT_FTS = 1;
|
|
1482
|
+
const VECTOR_SEARCH_MULTIPLIER = 10;
|
|
1483
|
+
function createResponse(text) {
|
|
1746
1484
|
return {
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
readOnly: options.readOnly ?? false,
|
|
1755
|
-
auth: options.auth,
|
|
1756
|
-
startupContext: options.startupContext
|
|
1757
|
-
};
|
|
1758
|
-
}
|
|
1759
|
-
function parseHeaders(headerOptions) {
|
|
1760
|
-
const headers = {};
|
|
1761
|
-
if (Array.isArray(headerOptions)) {
|
|
1762
|
-
for (const entry of headerOptions) {
|
|
1763
|
-
const idx = entry.indexOf(":");
|
|
1764
|
-
if (idx > 0) {
|
|
1765
|
-
const name = entry.slice(0, idx).trim();
|
|
1766
|
-
const value = entry.slice(idx + 1).trim();
|
|
1767
|
-
if (name) headers[name] = value;
|
|
1768
|
-
}
|
|
1769
|
-
}
|
|
1770
|
-
}
|
|
1771
|
-
return headers;
|
|
1772
|
-
}
|
|
1773
|
-
function parseAuthConfig(options) {
|
|
1774
|
-
if (!options.authEnabled) {
|
|
1775
|
-
return void 0;
|
|
1776
|
-
}
|
|
1777
|
-
return {
|
|
1778
|
-
enabled: true,
|
|
1779
|
-
issuerUrl: options.authIssuerUrl,
|
|
1780
|
-
audience: options.authAudience,
|
|
1781
|
-
scopes: ["openid", "profile"]
|
|
1782
|
-
// Default scopes for OAuth2/OIDC
|
|
1783
|
-
};
|
|
1784
|
-
}
|
|
1785
|
-
function validateAuthConfig(authConfig) {
|
|
1786
|
-
if (!authConfig.enabled) {
|
|
1787
|
-
return;
|
|
1788
|
-
}
|
|
1789
|
-
const errors = [];
|
|
1790
|
-
if (!authConfig.issuerUrl) {
|
|
1791
|
-
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
1792
|
-
} else {
|
|
1793
|
-
try {
|
|
1794
|
-
const url = new URL(authConfig.issuerUrl);
|
|
1795
|
-
if (url.protocol !== "https:") {
|
|
1796
|
-
errors.push("Issuer URL must use HTTPS protocol");
|
|
1797
|
-
}
|
|
1798
|
-
} catch {
|
|
1799
|
-
errors.push("Issuer URL must be a valid URL");
|
|
1800
|
-
}
|
|
1801
|
-
}
|
|
1802
|
-
if (!authConfig.audience) {
|
|
1803
|
-
errors.push("--auth-audience is required when auth is enabled");
|
|
1804
|
-
} else {
|
|
1805
|
-
try {
|
|
1806
|
-
const url = new URL(authConfig.audience);
|
|
1807
|
-
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
1808
|
-
logger.warn(
|
|
1809
|
-
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
1810
|
-
);
|
|
1811
|
-
}
|
|
1812
|
-
if (url.hash) {
|
|
1813
|
-
errors.push("Audience must not contain URL fragments");
|
|
1814
|
-
}
|
|
1815
|
-
} catch {
|
|
1816
|
-
if (authConfig.audience.startsWith("urn:")) {
|
|
1817
|
-
const urnParts = authConfig.audience.split(":");
|
|
1818
|
-
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
1819
|
-
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
1820
|
-
}
|
|
1821
|
-
} else {
|
|
1822
|
-
errors.push(
|
|
1823
|
-
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
1824
|
-
);
|
|
1825
|
-
}
|
|
1826
|
-
}
|
|
1827
|
-
}
|
|
1828
|
-
if (errors.length > 0) {
|
|
1829
|
-
throw new Error(`Auth configuration validation failed:
|
|
1830
|
-
${errors.join("\n")}`);
|
|
1831
|
-
}
|
|
1832
|
-
}
|
|
1833
|
-
function warnHttpUsage(authConfig, port) {
|
|
1834
|
-
if (!authConfig?.enabled) {
|
|
1835
|
-
return;
|
|
1836
|
-
}
|
|
1837
|
-
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
1838
|
-
process.env.HOSTNAME?.includes("localhost");
|
|
1839
|
-
if (!isLocalhost) {
|
|
1840
|
-
logger.warn(
|
|
1841
|
-
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
1842
|
-
);
|
|
1843
|
-
}
|
|
1844
|
-
}
|
|
1845
|
-
function resolveEmbeddingContext(embeddingModel) {
|
|
1846
|
-
try {
|
|
1847
|
-
let modelSpec = embeddingModel;
|
|
1848
|
-
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
1849
|
-
modelSpec = "text-embedding-3-small";
|
|
1850
|
-
logger.debug(
|
|
1851
|
-
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
1852
|
-
);
|
|
1853
|
-
}
|
|
1854
|
-
if (!modelSpec) {
|
|
1855
|
-
logger.debug(
|
|
1856
|
-
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
1857
|
-
);
|
|
1858
|
-
return null;
|
|
1859
|
-
}
|
|
1860
|
-
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
1861
|
-
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
1862
|
-
} catch (error) {
|
|
1863
|
-
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
1864
|
-
return null;
|
|
1865
|
-
}
|
|
1866
|
-
}
|
|
1867
|
-
class RemoteEventProxy {
|
|
1868
|
-
constructor(remoteWorkerUrl, localEventBus) {
|
|
1869
|
-
this.remoteWorkerUrl = remoteWorkerUrl;
|
|
1870
|
-
this.localEventBus = localEventBus;
|
|
1871
|
-
}
|
|
1872
|
-
trpcClient = null;
|
|
1873
|
-
wsClient = null;
|
|
1874
|
-
subscription = null;
|
|
1875
|
-
isConnected = false;
|
|
1876
|
-
/**
|
|
1877
|
-
* Start subscribing to remote events and forwarding them locally.
|
|
1878
|
-
*/
|
|
1879
|
-
async connect() {
|
|
1880
|
-
if (this.isConnected) {
|
|
1881
|
-
logger.warn("Remote event proxy already connected");
|
|
1882
|
-
return;
|
|
1883
|
-
}
|
|
1884
|
-
logger.info(`📡 Connecting to remote worker at ${this.remoteWorkerUrl}`);
|
|
1885
|
-
try {
|
|
1886
|
-
const url = new URL(this.remoteWorkerUrl);
|
|
1887
|
-
const baseUrl = `${url.protocol}//${url.host}`;
|
|
1888
|
-
const wsUrl = baseUrl.replace(/^http/, "ws");
|
|
1889
|
-
this.wsClient = createWSClient({
|
|
1890
|
-
url: wsUrl
|
|
1891
|
-
});
|
|
1892
|
-
this.trpcClient = createTRPCClient({
|
|
1893
|
-
links: [
|
|
1894
|
-
splitLink({
|
|
1895
|
-
condition: (op) => op.type === "subscription",
|
|
1896
|
-
true: wsLink({ client: this.wsClient, transformer: superjson }),
|
|
1897
|
-
false: httpBatchLink({ url: this.remoteWorkerUrl, transformer: superjson })
|
|
1898
|
-
})
|
|
1899
|
-
]
|
|
1900
|
-
});
|
|
1901
|
-
this.subscription = this.trpcClient.events.subscribe.subscribe(
|
|
1902
|
-
{},
|
|
1903
|
-
// Subscribe to all event types
|
|
1904
|
-
{
|
|
1905
|
-
onData: (data) => {
|
|
1906
|
-
logger.debug(`📥 Received remote event: ${data.type}`);
|
|
1907
|
-
this.localEventBus.emit(data.type, data.payload);
|
|
1908
|
-
},
|
|
1909
|
-
onError: (error) => {
|
|
1910
|
-
logger.error(`❌ Remote event subscription error: ${error}`);
|
|
1911
|
-
this.isConnected = false;
|
|
1912
|
-
this.scheduleReconnect();
|
|
1913
|
-
},
|
|
1914
|
-
onStarted: () => {
|
|
1915
|
-
logger.info("✅ Remote event subscription started");
|
|
1916
|
-
this.isConnected = true;
|
|
1917
|
-
},
|
|
1918
|
-
onComplete: () => {
|
|
1919
|
-
logger.info("✅ Remote event subscription completed");
|
|
1920
|
-
this.isConnected = false;
|
|
1921
|
-
}
|
|
1922
|
-
}
|
|
1923
|
-
);
|
|
1924
|
-
} catch (error) {
|
|
1925
|
-
logger.error(`❌ Failed to connect to remote worker: ${error}`);
|
|
1926
|
-
this.scheduleReconnect();
|
|
1927
|
-
}
|
|
1928
|
-
}
|
|
1929
|
-
/**
|
|
1930
|
-
* Disconnect from the remote worker and stop forwarding events.
|
|
1931
|
-
*/
|
|
1932
|
-
disconnect() {
|
|
1933
|
-
if (this.subscription) {
|
|
1934
|
-
this.subscription.unsubscribe();
|
|
1935
|
-
this.subscription = null;
|
|
1936
|
-
}
|
|
1937
|
-
if (this.wsClient) {
|
|
1938
|
-
this.wsClient.close();
|
|
1939
|
-
this.wsClient = null;
|
|
1940
|
-
}
|
|
1941
|
-
this.isConnected = false;
|
|
1942
|
-
logger.info("🚫 Disconnected from remote worker");
|
|
1943
|
-
}
|
|
1944
|
-
/**
|
|
1945
|
-
* Check if the proxy is currently connected to the remote worker.
|
|
1946
|
-
*/
|
|
1947
|
-
isActive() {
|
|
1948
|
-
return this.isConnected;
|
|
1949
|
-
}
|
|
1950
|
-
/**
|
|
1951
|
-
* Schedule a reconnection attempt after a delay.
|
|
1952
|
-
*/
|
|
1953
|
-
scheduleReconnect() {
|
|
1954
|
-
logger.info("🔄 Scheduling reconnect to remote worker in 5 seconds...");
|
|
1955
|
-
setTimeout(() => {
|
|
1956
|
-
if (!this.isConnected) {
|
|
1957
|
-
this.connect();
|
|
1958
|
-
}
|
|
1959
|
-
}, 5e3);
|
|
1960
|
-
}
|
|
1961
|
-
}
|
|
1962
|
-
class ToolError extends Error {
|
|
1963
|
-
constructor(message, toolName) {
|
|
1964
|
-
super(message);
|
|
1965
|
-
this.toolName = toolName;
|
|
1966
|
-
this.name = this.constructor.name;
|
|
1967
|
-
}
|
|
1968
|
-
}
|
|
1969
|
-
class ValidationError extends ToolError {
|
|
1970
|
-
}
|
|
1971
|
-
const DEFAULT_MAX_PAGES = 1e3;
|
|
1972
|
-
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
1973
|
-
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
1974
|
-
const DEFAULT_PROTOCOL = "auto";
|
|
1975
|
-
const DEFAULT_HTTP_PORT = 6280;
|
|
1976
|
-
const DEFAULT_WEB_PORT = 6281;
|
|
1977
|
-
const DEFAULT_HOST = "127.0.0.1";
|
|
1978
|
-
const DEFAULT_PAGE_TIMEOUT = 5e3;
|
|
1979
|
-
const FETCHER_MAX_RETRIES = 6;
|
|
1980
|
-
const FETCHER_BASE_DELAY = 1e3;
|
|
1981
|
-
const FETCHER_MAX_CACHE_ITEMS = 200;
|
|
1982
|
-
const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
|
|
1983
|
-
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
1984
|
-
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
1985
|
-
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
1986
|
-
const EMBEDDING_BATCH_SIZE = 100;
|
|
1987
|
-
const EMBEDDING_BATCH_CHARS = 5e4;
|
|
1988
|
-
const MIGRATION_MAX_RETRIES = 5;
|
|
1989
|
-
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
1990
|
-
const SEARCH_OVERFETCH_FACTOR = 2;
|
|
1991
|
-
const SEARCH_WEIGHT_VEC = 1;
|
|
1992
|
-
const SEARCH_WEIGHT_FTS = 1;
|
|
1993
|
-
const VECTOR_SEARCH_MULTIPLIER = 10;
|
|
1994
|
-
function createResponse(text) {
|
|
1995
|
-
return {
|
|
1996
|
-
content: [
|
|
1997
|
-
{
|
|
1998
|
-
type: "text",
|
|
1999
|
-
text
|
|
2000
|
-
}
|
|
2001
|
-
],
|
|
2002
|
-
isError: false
|
|
1485
|
+
content: [
|
|
1486
|
+
{
|
|
1487
|
+
type: "text",
|
|
1488
|
+
text
|
|
1489
|
+
}
|
|
1490
|
+
],
|
|
1491
|
+
isError: false
|
|
2003
1492
|
};
|
|
2004
1493
|
}
|
|
2005
1494
|
function createError(errorOrText) {
|
|
@@ -2902,7 +2391,7 @@ class BrowserFetcher {
|
|
|
2902
2391
|
}
|
|
2903
2392
|
logger.debug("Browser closed successfully");
|
|
2904
2393
|
} catch (error) {
|
|
2905
|
-
logger.warn(`⚠️
|
|
2394
|
+
logger.warn(`⚠️ Error closing browser: ${error}`);
|
|
2906
2395
|
}
|
|
2907
2396
|
}
|
|
2908
2397
|
}
|
|
@@ -7510,6 +6999,9 @@ Please verify the server URL includes the correct port (default 8080) and ends w
|
|
|
7510
6999
|
async storeScraperOptions(versionId, options) {
|
|
7511
7000
|
await this.client.storeScraperOptions.mutate({ versionId, options });
|
|
7512
7001
|
}
|
|
7002
|
+
getActiveEmbeddingConfig() {
|
|
7003
|
+
return null;
|
|
7004
|
+
}
|
|
7513
7005
|
}
|
|
7514
7006
|
class JsonPipeline extends BasePipeline {
|
|
7515
7007
|
middleware;
|
|
@@ -8205,199 +7697,503 @@ function createContentAssemblyStrategy(mimeType) {
|
|
|
8205
7697
|
if (strategy.canHandle(mimeType)) {
|
|
8206
7698
|
return strategy;
|
|
8207
7699
|
}
|
|
8208
|
-
}
|
|
8209
|
-
return new MarkdownAssemblyStrategy();
|
|
8210
|
-
}
|
|
8211
|
-
class DocumentRetrieverService {
|
|
8212
|
-
documentStore;
|
|
8213
|
-
constructor(documentStore) {
|
|
8214
|
-
this.documentStore = documentStore;
|
|
7700
|
+
}
|
|
7701
|
+
return new MarkdownAssemblyStrategy();
|
|
7702
|
+
}
|
|
7703
|
+
class DocumentRetrieverService {
|
|
7704
|
+
documentStore;
|
|
7705
|
+
constructor(documentStore) {
|
|
7706
|
+
this.documentStore = documentStore;
|
|
7707
|
+
}
|
|
7708
|
+
/**
|
|
7709
|
+
* Searches for documents and expands the context around the matches using content-type-aware strategies.
|
|
7710
|
+
* @param library The library name.
|
|
7711
|
+
* @param version The library version.
|
|
7712
|
+
* @param query The search query.
|
|
7713
|
+
* @param limit The optional limit for the initial search results.
|
|
7714
|
+
* @returns An array of search results with content assembled according to content type.
|
|
7715
|
+
*/
|
|
7716
|
+
async search(library, version, query, limit) {
|
|
7717
|
+
const normalizedVersion = (version ?? "").toLowerCase();
|
|
7718
|
+
const initialResults = await this.documentStore.findByContent(
|
|
7719
|
+
library,
|
|
7720
|
+
normalizedVersion,
|
|
7721
|
+
query,
|
|
7722
|
+
limit ?? 10
|
|
7723
|
+
);
|
|
7724
|
+
if (initialResults.length === 0) {
|
|
7725
|
+
return [];
|
|
7726
|
+
}
|
|
7727
|
+
const resultsByUrl = this.groupResultsByUrl(initialResults);
|
|
7728
|
+
const results = [];
|
|
7729
|
+
for (const [url, urlResults] of resultsByUrl.entries()) {
|
|
7730
|
+
const result = await this.processUrlGroup(
|
|
7731
|
+
library,
|
|
7732
|
+
normalizedVersion,
|
|
7733
|
+
url,
|
|
7734
|
+
urlResults
|
|
7735
|
+
);
|
|
7736
|
+
results.push(result);
|
|
7737
|
+
}
|
|
7738
|
+
return results;
|
|
7739
|
+
}
|
|
7740
|
+
/**
|
|
7741
|
+
* Groups search results by URL.
|
|
7742
|
+
*/
|
|
7743
|
+
groupResultsByUrl(results) {
|
|
7744
|
+
const resultsByUrl = /* @__PURE__ */ new Map();
|
|
7745
|
+
for (const result of results) {
|
|
7746
|
+
const url = result.url;
|
|
7747
|
+
if (!resultsByUrl.has(url)) {
|
|
7748
|
+
resultsByUrl.set(url, []);
|
|
7749
|
+
}
|
|
7750
|
+
const urlResults = resultsByUrl.get(url);
|
|
7751
|
+
if (urlResults) {
|
|
7752
|
+
urlResults.push(result);
|
|
7753
|
+
}
|
|
7754
|
+
}
|
|
7755
|
+
return resultsByUrl;
|
|
7756
|
+
}
|
|
7757
|
+
/**
|
|
7758
|
+
* Processes a group of search results from the same URL using appropriate strategy.
|
|
7759
|
+
*/
|
|
7760
|
+
async processUrlGroup(library, version, url, initialChunks) {
|
|
7761
|
+
const mimeType = initialChunks.length > 0 ? initialChunks[0].content_type : void 0;
|
|
7762
|
+
const maxScore = Math.max(...initialChunks.map((chunk) => chunk.score));
|
|
7763
|
+
const strategy = createContentAssemblyStrategy(mimeType);
|
|
7764
|
+
const selectedChunks = await strategy.selectChunks(
|
|
7765
|
+
library,
|
|
7766
|
+
version,
|
|
7767
|
+
initialChunks,
|
|
7768
|
+
this.documentStore
|
|
7769
|
+
);
|
|
7770
|
+
const content = strategy.assembleContent(selectedChunks);
|
|
7771
|
+
return {
|
|
7772
|
+
url,
|
|
7773
|
+
content,
|
|
7774
|
+
score: maxScore,
|
|
7775
|
+
mimeType
|
|
7776
|
+
};
|
|
7777
|
+
}
|
|
7778
|
+
}
|
|
7779
|
+
const MIGRATIONS_DIR = path.join(getProjectRoot(), "db", "migrations");
|
|
7780
|
+
const MIGRATIONS_TABLE = "_schema_migrations";
|
|
7781
|
+
function ensureMigrationsTable(db) {
|
|
7782
|
+
db.exec(`
|
|
7783
|
+
CREATE TABLE IF NOT EXISTS ${MIGRATIONS_TABLE} (
|
|
7784
|
+
id TEXT PRIMARY KEY,
|
|
7785
|
+
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
7786
|
+
);
|
|
7787
|
+
`);
|
|
7788
|
+
}
|
|
7789
|
+
function getAppliedMigrations(db) {
|
|
7790
|
+
const stmt = db.prepare(`SELECT id FROM ${MIGRATIONS_TABLE}`);
|
|
7791
|
+
const rows = stmt.all();
|
|
7792
|
+
return new Set(rows.map((row) => row.id));
|
|
7793
|
+
}
|
|
7794
|
+
async function applyMigrations(db) {
|
|
7795
|
+
try {
|
|
7796
|
+
db.pragma("journal_mode = OFF");
|
|
7797
|
+
db.pragma("synchronous = OFF");
|
|
7798
|
+
db.pragma("mmap_size = 268435456");
|
|
7799
|
+
db.pragma("cache_size = -64000");
|
|
7800
|
+
db.pragma("temp_store = MEMORY");
|
|
7801
|
+
logger.debug("Applied performance optimizations for migration");
|
|
7802
|
+
} catch (_error) {
|
|
7803
|
+
logger.warn("⚠️ Could not apply all performance optimizations for migration");
|
|
7804
|
+
}
|
|
7805
|
+
const overallTransaction = db.transaction(() => {
|
|
7806
|
+
logger.debug("Checking database migrations...");
|
|
7807
|
+
ensureMigrationsTable(db);
|
|
7808
|
+
const appliedMigrations = getAppliedMigrations(db);
|
|
7809
|
+
if (!fs.existsSync(MIGRATIONS_DIR)) {
|
|
7810
|
+
throw new StoreError("Migrations directory not found");
|
|
7811
|
+
}
|
|
7812
|
+
const migrationFiles = fs.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
|
|
7813
|
+
const pendingMigrations = migrationFiles.filter(
|
|
7814
|
+
(filename) => !appliedMigrations.has(filename)
|
|
7815
|
+
);
|
|
7816
|
+
if (pendingMigrations.length > 0) {
|
|
7817
|
+
logger.info(`🔄 Applying ${pendingMigrations.length} database migration(s)...`);
|
|
7818
|
+
}
|
|
7819
|
+
let appliedCount = 0;
|
|
7820
|
+
for (const filename of pendingMigrations) {
|
|
7821
|
+
logger.debug(`Applying migration: ${filename}`);
|
|
7822
|
+
const filePath = path.join(MIGRATIONS_DIR, filename);
|
|
7823
|
+
const sql = fs.readFileSync(filePath, "utf8");
|
|
7824
|
+
try {
|
|
7825
|
+
db.exec(sql);
|
|
7826
|
+
const insertStmt = db.prepare(`INSERT INTO ${MIGRATIONS_TABLE} (id) VALUES (?)`);
|
|
7827
|
+
insertStmt.run(filename);
|
|
7828
|
+
logger.debug(`Applied migration: ${filename}`);
|
|
7829
|
+
appliedCount++;
|
|
7830
|
+
} catch (error) {
|
|
7831
|
+
logger.error(`❌ Failed to apply migration: ${filename} - ${error}`);
|
|
7832
|
+
throw new StoreError(`Migration failed: ${filename}`, error);
|
|
7833
|
+
}
|
|
7834
|
+
}
|
|
7835
|
+
if (appliedCount > 0) {
|
|
7836
|
+
logger.info(`✅ Successfully applied ${appliedCount} migration(s)`);
|
|
7837
|
+
} else {
|
|
7838
|
+
logger.debug("Database schema is up to date");
|
|
7839
|
+
}
|
|
7840
|
+
return appliedCount;
|
|
7841
|
+
});
|
|
7842
|
+
let retries = 0;
|
|
7843
|
+
let appliedMigrationsCount = 0;
|
|
7844
|
+
while (true) {
|
|
7845
|
+
try {
|
|
7846
|
+
appliedMigrationsCount = overallTransaction.immediate();
|
|
7847
|
+
logger.debug("Database migrations completed successfully");
|
|
7848
|
+
if (appliedMigrationsCount > 0) {
|
|
7849
|
+
try {
|
|
7850
|
+
logger.debug(
|
|
7851
|
+
`Running VACUUM after applying ${appliedMigrationsCount} migration(s)...`
|
|
7852
|
+
);
|
|
7853
|
+
db.exec("VACUUM");
|
|
7854
|
+
logger.debug("Database vacuum completed successfully");
|
|
7855
|
+
} catch (error) {
|
|
7856
|
+
logger.warn(`⚠️ Could not vacuum database after migrations: ${error}`);
|
|
7857
|
+
}
|
|
7858
|
+
} else {
|
|
7859
|
+
logger.debug("Skipping VACUUM - no migrations were applied");
|
|
7860
|
+
}
|
|
7861
|
+
break;
|
|
7862
|
+
} catch (error) {
|
|
7863
|
+
if (error?.code === "SQLITE_BUSY" && retries < MIGRATION_MAX_RETRIES) {
|
|
7864
|
+
retries++;
|
|
7865
|
+
logger.warn(
|
|
7866
|
+
`⚠️ Migrations busy (SQLITE_BUSY), retrying attempt ${retries}/${MIGRATION_MAX_RETRIES} in ${MIGRATION_RETRY_DELAY_MS}ms...`
|
|
7867
|
+
);
|
|
7868
|
+
await new Promise((resolve) => setTimeout(resolve, MIGRATION_RETRY_DELAY_MS));
|
|
7869
|
+
} else {
|
|
7870
|
+
if (error?.code === "SQLITE_BUSY") {
|
|
7871
|
+
logger.error(
|
|
7872
|
+
`❌ Migrations still busy after ${MIGRATION_MAX_RETRIES} retries. Giving up: ${error}`
|
|
7873
|
+
);
|
|
7874
|
+
}
|
|
7875
|
+
if (error instanceof StoreError) {
|
|
7876
|
+
throw error;
|
|
7877
|
+
}
|
|
7878
|
+
throw new StoreError("Failed during migration process", error);
|
|
7879
|
+
}
|
|
7880
|
+
}
|
|
7881
|
+
}
|
|
7882
|
+
try {
|
|
7883
|
+
db.pragma("journal_mode = WAL");
|
|
7884
|
+
db.pragma("wal_autocheckpoint = 1000");
|
|
7885
|
+
db.pragma("busy_timeout = 30000");
|
|
7886
|
+
db.pragma("foreign_keys = ON");
|
|
7887
|
+
db.pragma("synchronous = NORMAL");
|
|
7888
|
+
logger.debug(
|
|
7889
|
+
"Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
|
|
7890
|
+
);
|
|
7891
|
+
} catch (_error) {
|
|
7892
|
+
logger.warn("⚠️ Could not apply all production database settings");
|
|
7893
|
+
}
|
|
7894
|
+
}
|
|
7895
|
+
class EmbeddingConfig {
|
|
7896
|
+
static instance = null;
|
|
7897
|
+
/**
|
|
7898
|
+
* Get the singleton instance of EmbeddingConfig.
|
|
7899
|
+
* Creates the instance if it doesn't exist.
|
|
7900
|
+
*/
|
|
7901
|
+
static getInstance() {
|
|
7902
|
+
if (EmbeddingConfig.instance === null) {
|
|
7903
|
+
EmbeddingConfig.instance = new EmbeddingConfig();
|
|
7904
|
+
}
|
|
7905
|
+
return EmbeddingConfig.instance;
|
|
7906
|
+
}
|
|
7907
|
+
/**
|
|
7908
|
+
* Reset the singleton instance (useful for testing).
|
|
7909
|
+
*/
|
|
7910
|
+
static resetInstance() {
|
|
7911
|
+
EmbeddingConfig.instance = null;
|
|
7912
|
+
}
|
|
7913
|
+
/**
|
|
7914
|
+
* Known dimensions for common embedding models.
|
|
7915
|
+
* This avoids expensive API calls for dimension detection in telemetry.
|
|
7916
|
+
*
|
|
7917
|
+
* Note: The "openai" provider also supports OpenAI-compatible APIs like:
|
|
7918
|
+
* - Ollama (local models)
|
|
7919
|
+
* - LMStudio (local models)
|
|
7920
|
+
* - Any service implementing OpenAI's embedding API
|
|
7921
|
+
*/
|
|
7922
|
+
knownModelDimensions = {
|
|
7923
|
+
// OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs)
|
|
7924
|
+
"text-embedding-3-small": 1536,
|
|
7925
|
+
"text-embedding-3-large": 3072,
|
|
7926
|
+
"text-embedding-ada-002": 1536,
|
|
7927
|
+
// Google Vertex AI models
|
|
7928
|
+
"text-embedding-004": 768,
|
|
7929
|
+
"textembedding-gecko@003": 768,
|
|
7930
|
+
"textembedding-gecko@002": 768,
|
|
7931
|
+
"textembedding-gecko@001": 768,
|
|
7932
|
+
// Google Gemini models (with MRL support)
|
|
7933
|
+
"text-embedding-preview-0409": 768,
|
|
7934
|
+
"embedding-001": 768,
|
|
7935
|
+
// AWS Bedrock models
|
|
7936
|
+
// Amazon Titan models
|
|
7937
|
+
"amazon.titan-embed-text-v1": 1536,
|
|
7938
|
+
"amazon.titan-embed-text-v2:0": 1024,
|
|
7939
|
+
"amazon.titan-embed-image-v1": 1024,
|
|
7940
|
+
// Image embedding model
|
|
7941
|
+
// Cohere models
|
|
7942
|
+
"cohere.embed-english-v3": 1024,
|
|
7943
|
+
"cohere.embed-multilingual-v3": 1024,
|
|
7944
|
+
// SageMaker models (hosted on AWS SageMaker)
|
|
7945
|
+
"intfloat/multilingual-e5-large": 1024,
|
|
7946
|
+
// Additional AWS models that might be supported
|
|
7947
|
+
// Note: Some of these might be placeholders - verify dimensions before use
|
|
7948
|
+
// "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source
|
|
7949
|
+
// MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard)
|
|
7950
|
+
// Top performing models from Massive Text Embedding Benchmark
|
|
7951
|
+
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
|
7952
|
+
"gemini-embedding-001": 3072,
|
|
7953
|
+
"Qwen/Qwen3-Embedding-8B": 4096,
|
|
7954
|
+
"Qwen/Qwen3-Embedding-4B": 2560,
|
|
7955
|
+
"Qwen/Qwen3-Embedding-0.6B": 1024,
|
|
7956
|
+
"Linq-AI-Research/Linq-Embed-Mistral": 4096,
|
|
7957
|
+
"Alibaba-NLP/gte-Qwen2-7B-instruct": 3584,
|
|
7958
|
+
"intfloat/multilingual-e5-large-instruct": 1024,
|
|
7959
|
+
"Salesforce/SFR-Embedding-Mistral": 4096,
|
|
7960
|
+
"text-multilingual-embedding-002": 768,
|
|
7961
|
+
"GritLM/GritLM-7B": 4096,
|
|
7962
|
+
"GritLM/GritLM-8x7B": 4096,
|
|
7963
|
+
"intfloat/e5-mistral-7b-instruct": 4096,
|
|
7964
|
+
"Cohere/Cohere-embed-multilingual-v3.0": 1024,
|
|
7965
|
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960,
|
|
7966
|
+
"Lajavaness/bilingual-embedding-large": 1024,
|
|
7967
|
+
"Salesforce/SFR-Embedding-2_R": 4096,
|
|
7968
|
+
"NovaSearch/stella_en_1.5B_v5": 8960,
|
|
7969
|
+
"NovaSearch/jasper_en_vision_language_v1": 8960,
|
|
7970
|
+
"nvidia/NV-Embed-v2": 4096,
|
|
7971
|
+
"OrdalieTech/Solon-embeddings-large-0.1": 1024,
|
|
7972
|
+
"BAAI/bge-m3": 1024,
|
|
7973
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896,
|
|
7974
|
+
"jinaai/jina-embeddings-v3": 1024,
|
|
7975
|
+
"Alibaba-NLP/gte-multilingual-base": 768,
|
|
7976
|
+
"Lajavaness/bilingual-embedding-base": 768,
|
|
7977
|
+
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896,
|
|
7978
|
+
"nvidia/NV-Embed-v1": 4096,
|
|
7979
|
+
"Cohere/Cohere-embed-multilingual-light-v3.0": 384,
|
|
7980
|
+
"manu/bge-m3-custom-fr": 1024,
|
|
7981
|
+
"Lajavaness/bilingual-embedding-small": 384,
|
|
7982
|
+
"Snowflake/snowflake-arctic-embed-l-v2.0": 1024,
|
|
7983
|
+
"intfloat/multilingual-e5-base": 768,
|
|
7984
|
+
"voyage-3-lite": 512,
|
|
7985
|
+
"voyage-3": 1024,
|
|
7986
|
+
"intfloat/multilingual-e5-small": 384,
|
|
7987
|
+
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096,
|
|
7988
|
+
"Snowflake/snowflake-arctic-embed-m-v2.0": 768,
|
|
7989
|
+
"deepvk/USER-bge-m3": 1024,
|
|
7990
|
+
"Cohere/Cohere-embed-english-v3.0": 1024,
|
|
7991
|
+
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768,
|
|
7992
|
+
"ibm-granite/granite-embedding-278m-multilingual": 768,
|
|
7993
|
+
"NovaSearch/stella_en_400M_v5": 4096,
|
|
7994
|
+
"omarelshehy/arabic-english-sts-matryoshka": 1024,
|
|
7995
|
+
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768,
|
|
7996
|
+
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768,
|
|
7997
|
+
"Haon-Chen/speed-embedding-7b-instruct": 4096,
|
|
7998
|
+
"sentence-transformers/LaBSE": 768,
|
|
7999
|
+
"WhereIsAI/UAE-Large-V1": 1024,
|
|
8000
|
+
"ibm-granite/granite-embedding-107m-multilingual": 384,
|
|
8001
|
+
"mixedbread-ai/mxbai-embed-large-v1": 1024,
|
|
8002
|
+
"intfloat/e5-large-v2": 1024,
|
|
8003
|
+
"avsolatorio/GIST-large-Embedding-v0": 1024,
|
|
8004
|
+
"sdadas/mmlw-e5-large": 1024,
|
|
8005
|
+
"nomic-ai/nomic-embed-text-v1": 768,
|
|
8006
|
+
"nomic-ai/nomic-embed-text-v1-ablated": 768,
|
|
8007
|
+
"intfloat/e5-base-v2": 768,
|
|
8008
|
+
"BAAI/bge-large-en-v1.5": 1024,
|
|
8009
|
+
"intfloat/e5-large": 1024,
|
|
8010
|
+
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384,
|
|
8011
|
+
"Cohere/Cohere-embed-english-light-v3.0": 384,
|
|
8012
|
+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768,
|
|
8013
|
+
"Gameselo/STS-multilingual-mpnet-base-v2": 768,
|
|
8014
|
+
"thenlper/gte-large": 1024,
|
|
8015
|
+
"avsolatorio/GIST-Embedding-v0": 768,
|
|
8016
|
+
"nomic-ai/nomic-embed-text-v1-unsupervised": 768,
|
|
8017
|
+
"infgrad/stella-base-en-v2": 768,
|
|
8018
|
+
"avsolatorio/NoInstruct-small-Embedding-v0": 384,
|
|
8019
|
+
"dwzhu/e5-base-4k": 768,
|
|
8020
|
+
"sdadas/mmlw-e5-base": 768,
|
|
8021
|
+
"voyage-multilingual-2": 1024,
|
|
8022
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096,
|
|
8023
|
+
"BAAI/bge-base-en-v1.5": 768,
|
|
8024
|
+
"avsolatorio/GIST-small-Embedding-v0": 384,
|
|
8025
|
+
"sdadas/mmlw-roberta-large": 1024,
|
|
8026
|
+
"nomic-ai/nomic-embed-text-v1.5": 768,
|
|
8027
|
+
"minishlab/potion-multilingual-128M": 256,
|
|
8028
|
+
"shibing624/text2vec-base-multilingual": 384,
|
|
8029
|
+
"thenlper/gte-base": 768,
|
|
8030
|
+
"intfloat/e5-small-v2": 384,
|
|
8031
|
+
"intfloat/e5-base": 768,
|
|
8032
|
+
"sentence-transformers/static-similarity-mrl-multilingual-v1": 1024,
|
|
8033
|
+
"manu/sentence_croissant_alpha_v0.3": 2048,
|
|
8034
|
+
"BAAI/bge-small-en-v1.5": 512,
|
|
8035
|
+
"thenlper/gte-small": 384,
|
|
8036
|
+
"sdadas/mmlw-e5-small": 384,
|
|
8037
|
+
"manu/sentence_croissant_alpha_v0.4": 2048,
|
|
8038
|
+
"manu/sentence_croissant_alpha_v0.2": 2048,
|
|
8039
|
+
"abhinand/MedEmbed-small-v0.1": 384,
|
|
8040
|
+
"ibm-granite/granite-embedding-125m-english": 768,
|
|
8041
|
+
"intfloat/e5-small": 384,
|
|
8042
|
+
"voyage-large-2-instruct": 1024,
|
|
8043
|
+
"sdadas/mmlw-roberta-base": 768,
|
|
8044
|
+
"Snowflake/snowflake-arctic-embed-l": 1024,
|
|
8045
|
+
"Mihaiii/Ivysaur": 384,
|
|
8046
|
+
"Snowflake/snowflake-arctic-embed-m-long": 768,
|
|
8047
|
+
"bigscience/sgpt-bloom-7b1-msmarco": 4096,
|
|
8048
|
+
"avsolatorio/GIST-all-MiniLM-L6-v2": 384,
|
|
8049
|
+
"sergeyzh/LaBSE-ru-turbo": 768,
|
|
8050
|
+
"sentence-transformers/all-mpnet-base-v2": 768,
|
|
8051
|
+
"Snowflake/snowflake-arctic-embed-m": 768,
|
|
8052
|
+
"Snowflake/snowflake-arctic-embed-s": 384,
|
|
8053
|
+
"sentence-transformers/all-MiniLM-L12-v2": 384,
|
|
8054
|
+
"Mihaiii/gte-micro-v4": 384,
|
|
8055
|
+
"Snowflake/snowflake-arctic-embed-m-v1.5": 768,
|
|
8056
|
+
"cointegrated/LaBSE-en-ru": 768,
|
|
8057
|
+
"Mihaiii/Bulbasaur": 384,
|
|
8058
|
+
"ibm-granite/granite-embedding-30m-english": 384,
|
|
8059
|
+
"deepfile/embedder-100p": 768,
|
|
8060
|
+
"Jaume/gemma-2b-embeddings": 2048,
|
|
8061
|
+
"OrlikB/KartonBERT-USE-base-v1": 768,
|
|
8062
|
+
"izhx/udever-bloom-7b1": 4096,
|
|
8063
|
+
"izhx/udever-bloom-1b1": 1024,
|
|
8064
|
+
"brahmairesearch/slx-v0.1": 384,
|
|
8065
|
+
"Mihaiii/Wartortle": 384,
|
|
8066
|
+
"izhx/udever-bloom-3b": 2048,
|
|
8067
|
+
"deepvk/USER-base": 768,
|
|
8068
|
+
"ai-forever/ru-en-RoSBERTa": 1024,
|
|
8069
|
+
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096,
|
|
8070
|
+
"Mihaiii/Venusaur": 384,
|
|
8071
|
+
"Snowflake/snowflake-arctic-embed-xs": 384,
|
|
8072
|
+
"jinaai/jina-embedding-b-en-v1": 768,
|
|
8073
|
+
"Mihaiii/gte-micro": 384,
|
|
8074
|
+
"aari1995/German_Semantic_STS_V2": 1024,
|
|
8075
|
+
"Mihaiii/Squirtle": 384,
|
|
8076
|
+
"OrlikB/st-polish-kartonberta-base-alpha-v1": 768,
|
|
8077
|
+
"sergeyzh/rubert-tiny-turbo": 312,
|
|
8078
|
+
"minishlab/potion-base-8M": 256,
|
|
8079
|
+
"minishlab/M2V_base_glove_subword": 256,
|
|
8080
|
+
"jinaai/jina-embedding-s-en-v1": 512,
|
|
8081
|
+
"minishlab/potion-base-4M": 128,
|
|
8082
|
+
"minishlab/M2V_base_output": 256,
|
|
8083
|
+
"DeepPavlov/rubert-base-cased-sentence": 768,
|
|
8084
|
+
"jinaai/jina-embeddings-v2-small-en": 512,
|
|
8085
|
+
"cointegrated/rubert-tiny2": 312,
|
|
8086
|
+
"minishlab/M2V_base_glove": 256,
|
|
8087
|
+
"cointegrated/rubert-tiny": 312,
|
|
8088
|
+
"silma-ai/silma-embeddding-matryoshka-v0.1": 768,
|
|
8089
|
+
"DeepPavlov/rubert-base-cased": 768,
|
|
8090
|
+
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768,
|
|
8091
|
+
"izhx/udever-bloom-560m": 1024,
|
|
8092
|
+
"minishlab/potion-base-2M": 64,
|
|
8093
|
+
"DeepPavlov/distilrubert-small-cased-conversational": 768,
|
|
8094
|
+
"consciousAI/cai-lunaris-text-embeddings": 1024,
|
|
8095
|
+
"deepvk/deberta-v1-base": 768,
|
|
8096
|
+
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768,
|
|
8097
|
+
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768,
|
|
8098
|
+
"ai-forever/sbert_large_mt_nlu_ru": 1024,
|
|
8099
|
+
"ai-forever/sbert_large_nlu_ru": 1024,
|
|
8100
|
+
"malenia1/ternary-weight-embedding": 1024,
|
|
8101
|
+
"jinaai/jina-embeddings-v2-base-en": 768,
|
|
8102
|
+
"VPLabs/SearchMap_Preview": 4096,
|
|
8103
|
+
"Hum-Works/lodestone-base-4096-v1": 768,
|
|
8104
|
+
"jinaai/jina-embeddings-v4": 2048
|
|
8105
|
+
};
|
|
8106
|
+
/**
|
|
8107
|
+
* Lowercase lookup map for case-insensitive model dimension queries.
|
|
8108
|
+
* Built lazily from knownModelDimensions to ensure consistency.
|
|
8109
|
+
*/
|
|
8110
|
+
modelLookup;
|
|
8111
|
+
constructor() {
|
|
8112
|
+
this.modelLookup = /* @__PURE__ */ new Map();
|
|
8113
|
+
for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) {
|
|
8114
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
8115
|
+
}
|
|
8116
|
+
}
|
|
8117
|
+
/**
|
|
8118
|
+
* Parse embedding model configuration from a provided model specification.
|
|
8119
|
+
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
8120
|
+
*
|
|
8121
|
+
* Supports various providers:
|
|
8122
|
+
* - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.)
|
|
8123
|
+
* - vertex: Google Cloud Vertex AI
|
|
8124
|
+
* - gemini: Google Generative AI
|
|
8125
|
+
* - aws: AWS Bedrock models
|
|
8126
|
+
* - microsoft: Azure OpenAI
|
|
8127
|
+
* - sagemaker: AWS SageMaker hosted models
|
|
8128
|
+
*
|
|
8129
|
+
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
8130
|
+
* @returns Parsed embedding model configuration
|
|
8131
|
+
*/
|
|
8132
|
+
parse(modelSpec) {
|
|
8133
|
+
const spec = modelSpec || "text-embedding-3-small";
|
|
8134
|
+
const colonIndex = spec.indexOf(":");
|
|
8135
|
+
let provider;
|
|
8136
|
+
let model;
|
|
8137
|
+
if (colonIndex === -1) {
|
|
8138
|
+
provider = "openai";
|
|
8139
|
+
model = spec;
|
|
8140
|
+
} else {
|
|
8141
|
+
provider = spec.substring(0, colonIndex);
|
|
8142
|
+
model = spec.substring(colonIndex + 1);
|
|
8143
|
+
}
|
|
8144
|
+
const dimensions = this.modelLookup?.get(model.toLowerCase()) || null;
|
|
8145
|
+
return {
|
|
8146
|
+
provider,
|
|
8147
|
+
model,
|
|
8148
|
+
dimensions,
|
|
8149
|
+
modelSpec: spec
|
|
8150
|
+
};
|
|
8215
8151
|
}
|
|
8216
8152
|
/**
|
|
8217
|
-
*
|
|
8218
|
-
*
|
|
8219
|
-
*
|
|
8220
|
-
*
|
|
8221
|
-
* @param
|
|
8222
|
-
* @returns
|
|
8153
|
+
* Get the known dimensions for a specific model.
|
|
8154
|
+
* Returns null if the model dimensions are not known.
|
|
8155
|
+
* Uses case-insensitive lookup.
|
|
8156
|
+
*
|
|
8157
|
+
* @param model The model name (e.g., "text-embedding-3-small")
|
|
8158
|
+
* @returns Known dimensions or null
|
|
8223
8159
|
*/
|
|
8224
|
-
|
|
8225
|
-
|
|
8226
|
-
const initialResults = await this.documentStore.findByContent(
|
|
8227
|
-
library,
|
|
8228
|
-
normalizedVersion,
|
|
8229
|
-
query,
|
|
8230
|
-
limit ?? 10
|
|
8231
|
-
);
|
|
8232
|
-
if (initialResults.length === 0) {
|
|
8233
|
-
return [];
|
|
8234
|
-
}
|
|
8235
|
-
const resultsByUrl = this.groupResultsByUrl(initialResults);
|
|
8236
|
-
const results = [];
|
|
8237
|
-
for (const [url, urlResults] of resultsByUrl.entries()) {
|
|
8238
|
-
const result = await this.processUrlGroup(
|
|
8239
|
-
library,
|
|
8240
|
-
normalizedVersion,
|
|
8241
|
-
url,
|
|
8242
|
-
urlResults
|
|
8243
|
-
);
|
|
8244
|
-
results.push(result);
|
|
8245
|
-
}
|
|
8246
|
-
return results;
|
|
8160
|
+
getKnownDimensions(model) {
|
|
8161
|
+
return this.modelLookup?.get(model.toLowerCase()) || null;
|
|
8247
8162
|
}
|
|
8248
8163
|
/**
|
|
8249
|
-
*
|
|
8164
|
+
* Add or update known dimensions for a model.
|
|
8165
|
+
* This can be used to cache discovered dimensions.
|
|
8166
|
+
* Stores both original case and lowercase for consistent lookup.
|
|
8167
|
+
*
|
|
8168
|
+
* @param model The model name
|
|
8169
|
+
* @param dimensions The dimensions to cache
|
|
8250
8170
|
*/
|
|
8251
|
-
|
|
8252
|
-
|
|
8253
|
-
|
|
8254
|
-
|
|
8255
|
-
if (!resultsByUrl.has(url)) {
|
|
8256
|
-
resultsByUrl.set(url, []);
|
|
8257
|
-
}
|
|
8258
|
-
const urlResults = resultsByUrl.get(url);
|
|
8259
|
-
if (urlResults) {
|
|
8260
|
-
urlResults.push(result);
|
|
8261
|
-
}
|
|
8171
|
+
setKnownDimensions(model, dimensions) {
|
|
8172
|
+
this.knownModelDimensions[model] = dimensions;
|
|
8173
|
+
if (this.modelLookup) {
|
|
8174
|
+
this.modelLookup.set(model.toLowerCase(), dimensions);
|
|
8262
8175
|
}
|
|
8263
|
-
return resultsByUrl;
|
|
8264
8176
|
}
|
|
8265
8177
|
/**
|
|
8266
|
-
*
|
|
8178
|
+
* Static method to parse embedding model configuration using the singleton instance.
|
|
8179
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8267
8180
|
*/
|
|
8268
|
-
|
|
8269
|
-
|
|
8270
|
-
const maxScore = Math.max(...initialChunks.map((chunk) => chunk.score));
|
|
8271
|
-
const strategy = createContentAssemblyStrategy(mimeType);
|
|
8272
|
-
const selectedChunks = await strategy.selectChunks(
|
|
8273
|
-
library,
|
|
8274
|
-
version,
|
|
8275
|
-
initialChunks,
|
|
8276
|
-
this.documentStore
|
|
8277
|
-
);
|
|
8278
|
-
const content = strategy.assembleContent(selectedChunks);
|
|
8279
|
-
return {
|
|
8280
|
-
url,
|
|
8281
|
-
content,
|
|
8282
|
-
score: maxScore,
|
|
8283
|
-
mimeType
|
|
8284
|
-
};
|
|
8285
|
-
}
|
|
8286
|
-
}
|
|
8287
|
-
const MIGRATIONS_DIR = path.join(getProjectRoot(), "db", "migrations");
|
|
8288
|
-
const MIGRATIONS_TABLE = "_schema_migrations";
|
|
8289
|
-
function ensureMigrationsTable(db) {
|
|
8290
|
-
db.exec(`
|
|
8291
|
-
CREATE TABLE IF NOT EXISTS ${MIGRATIONS_TABLE} (
|
|
8292
|
-
id TEXT PRIMARY KEY,
|
|
8293
|
-
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
8294
|
-
);
|
|
8295
|
-
`);
|
|
8296
|
-
}
|
|
8297
|
-
function getAppliedMigrations(db) {
|
|
8298
|
-
const stmt = db.prepare(`SELECT id FROM ${MIGRATIONS_TABLE}`);
|
|
8299
|
-
const rows = stmt.all();
|
|
8300
|
-
return new Set(rows.map((row) => row.id));
|
|
8301
|
-
}
|
|
8302
|
-
async function applyMigrations(db) {
|
|
8303
|
-
try {
|
|
8304
|
-
db.pragma("journal_mode = OFF");
|
|
8305
|
-
db.pragma("synchronous = OFF");
|
|
8306
|
-
db.pragma("mmap_size = 268435456");
|
|
8307
|
-
db.pragma("cache_size = -64000");
|
|
8308
|
-
db.pragma("temp_store = MEMORY");
|
|
8309
|
-
logger.debug("Applied performance optimizations for migration");
|
|
8310
|
-
} catch (_error) {
|
|
8311
|
-
logger.warn("⚠️ Could not apply all performance optimizations for migration");
|
|
8181
|
+
static parseEmbeddingConfig(modelSpec) {
|
|
8182
|
+
return EmbeddingConfig.getInstance().parse(modelSpec);
|
|
8312
8183
|
}
|
|
8313
|
-
|
|
8314
|
-
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
|
|
8318
|
-
|
|
8319
|
-
}
|
|
8320
|
-
const migrationFiles = fs.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
|
|
8321
|
-
const pendingMigrations = migrationFiles.filter(
|
|
8322
|
-
(filename) => !appliedMigrations.has(filename)
|
|
8323
|
-
);
|
|
8324
|
-
if (pendingMigrations.length > 0) {
|
|
8325
|
-
logger.info(`🔄 Applying ${pendingMigrations.length} database migration(s)...`);
|
|
8326
|
-
}
|
|
8327
|
-
let appliedCount = 0;
|
|
8328
|
-
for (const filename of pendingMigrations) {
|
|
8329
|
-
logger.debug(`Applying migration: ${filename}`);
|
|
8330
|
-
const filePath = path.join(MIGRATIONS_DIR, filename);
|
|
8331
|
-
const sql = fs.readFileSync(filePath, "utf8");
|
|
8332
|
-
try {
|
|
8333
|
-
db.exec(sql);
|
|
8334
|
-
const insertStmt = db.prepare(`INSERT INTO ${MIGRATIONS_TABLE} (id) VALUES (?)`);
|
|
8335
|
-
insertStmt.run(filename);
|
|
8336
|
-
logger.debug(`Applied migration: ${filename}`);
|
|
8337
|
-
appliedCount++;
|
|
8338
|
-
} catch (error) {
|
|
8339
|
-
logger.error(`❌ Failed to apply migration: ${filename} - ${error}`);
|
|
8340
|
-
throw new StoreError(`Migration failed: ${filename}`, error);
|
|
8341
|
-
}
|
|
8342
|
-
}
|
|
8343
|
-
if (appliedCount > 0) {
|
|
8344
|
-
logger.info(`✅ Successfully applied ${appliedCount} migration(s)`);
|
|
8345
|
-
} else {
|
|
8346
|
-
logger.debug("Database schema is up to date");
|
|
8347
|
-
}
|
|
8348
|
-
return appliedCount;
|
|
8349
|
-
});
|
|
8350
|
-
let retries = 0;
|
|
8351
|
-
let appliedMigrationsCount = 0;
|
|
8352
|
-
while (true) {
|
|
8353
|
-
try {
|
|
8354
|
-
appliedMigrationsCount = overallTransaction.immediate();
|
|
8355
|
-
logger.debug("Database migrations completed successfully");
|
|
8356
|
-
if (appliedMigrationsCount > 0) {
|
|
8357
|
-
try {
|
|
8358
|
-
logger.debug(
|
|
8359
|
-
`Running VACUUM after applying ${appliedMigrationsCount} migration(s)...`
|
|
8360
|
-
);
|
|
8361
|
-
db.exec("VACUUM");
|
|
8362
|
-
logger.debug("Database vacuum completed successfully");
|
|
8363
|
-
} catch (error) {
|
|
8364
|
-
logger.warn(`⚠️ Could not vacuum database after migrations: ${error}`);
|
|
8365
|
-
}
|
|
8366
|
-
} else {
|
|
8367
|
-
logger.debug("Skipping VACUUM - no migrations were applied");
|
|
8368
|
-
}
|
|
8369
|
-
break;
|
|
8370
|
-
} catch (error) {
|
|
8371
|
-
if (error?.code === "SQLITE_BUSY" && retries < MIGRATION_MAX_RETRIES) {
|
|
8372
|
-
retries++;
|
|
8373
|
-
logger.warn(
|
|
8374
|
-
`⚠️ Migrations busy (SQLITE_BUSY), retrying attempt ${retries}/${MIGRATION_MAX_RETRIES} in ${MIGRATION_RETRY_DELAY_MS}ms...`
|
|
8375
|
-
);
|
|
8376
|
-
await new Promise((resolve) => setTimeout(resolve, MIGRATION_RETRY_DELAY_MS));
|
|
8377
|
-
} else {
|
|
8378
|
-
if (error?.code === "SQLITE_BUSY") {
|
|
8379
|
-
logger.error(
|
|
8380
|
-
`❌ Migrations still busy after ${MIGRATION_MAX_RETRIES} retries. Giving up: ${error}`
|
|
8381
|
-
);
|
|
8382
|
-
}
|
|
8383
|
-
if (error instanceof StoreError) {
|
|
8384
|
-
throw error;
|
|
8385
|
-
}
|
|
8386
|
-
throw new StoreError("Failed during migration process", error);
|
|
8387
|
-
}
|
|
8388
|
-
}
|
|
8184
|
+
/**
|
|
8185
|
+
* Static method to get known model dimensions using the singleton instance.
|
|
8186
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8187
|
+
*/
|
|
8188
|
+
static getKnownModelDimensions(model) {
|
|
8189
|
+
return EmbeddingConfig.getInstance().getKnownDimensions(model);
|
|
8389
8190
|
}
|
|
8390
|
-
|
|
8391
|
-
|
|
8392
|
-
|
|
8393
|
-
|
|
8394
|
-
|
|
8395
|
-
|
|
8396
|
-
logger.debug(
|
|
8397
|
-
"Applied production database configuration (WAL mode, autocheckpoint, foreign keys, busy timeout)"
|
|
8398
|
-
);
|
|
8399
|
-
} catch (_error) {
|
|
8400
|
-
logger.warn("⚠️ Could not apply all production database settings");
|
|
8191
|
+
/**
|
|
8192
|
+
* Static method to set known model dimensions using the singleton instance.
|
|
8193
|
+
* This maintains backward compatibility while using the class-based approach.
|
|
8194
|
+
*/
|
|
8195
|
+
static setKnownModelDimensions(model, dimensions) {
|
|
8196
|
+
EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions);
|
|
8401
8197
|
}
|
|
8402
8198
|
}
|
|
8403
8199
|
class DocumentStore {
|
|
@@ -8407,6 +8203,16 @@ class DocumentStore {
|
|
|
8407
8203
|
modelDimension;
|
|
8408
8204
|
embeddingConfig;
|
|
8409
8205
|
isVectorSearchEnabled = false;
|
|
8206
|
+
/**
|
|
8207
|
+
* Returns the active embedding configuration if vector search is enabled,
|
|
8208
|
+
* or null if embeddings are disabled (no config provided or credentials unavailable).
|
|
8209
|
+
*/
|
|
8210
|
+
getActiveEmbeddingConfig() {
|
|
8211
|
+
if (!this.isVectorSearchEnabled || !this.embeddingConfig) {
|
|
8212
|
+
return null;
|
|
8213
|
+
}
|
|
8214
|
+
return this.embeddingConfig;
|
|
8215
|
+
}
|
|
8410
8216
|
statements;
|
|
8411
8217
|
/**
|
|
8412
8218
|
* Calculates Reciprocal Rank Fusion score for a result with configurable weights
|
|
@@ -8686,7 +8492,7 @@ class DocumentStore {
|
|
|
8686
8492
|
const config = this.embeddingConfig;
|
|
8687
8493
|
if (!areCredentialsAvailable(config.provider)) {
|
|
8688
8494
|
logger.warn(
|
|
8689
|
-
`⚠️
|
|
8495
|
+
`⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
|
|
8690
8496
|
Only full-text search will be available. To enable vector search, please configure the required
|
|
8691
8497
|
environment variables for ${config.provider} or choose a different provider.
|
|
8692
8498
|
See README.md for configuration options or run with --help for more details.`
|
|
@@ -8698,8 +8504,26 @@ class DocumentStore {
|
|
|
8698
8504
|
if (config.dimensions !== null) {
|
|
8699
8505
|
this.modelDimension = config.dimensions;
|
|
8700
8506
|
} else {
|
|
8701
|
-
const
|
|
8702
|
-
|
|
8507
|
+
const EMBEDDING_INIT_TIMEOUT_MS = 3e4;
|
|
8508
|
+
const testPromise = this.embeddings.embedQuery("test");
|
|
8509
|
+
let timeoutId;
|
|
8510
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
8511
|
+
timeoutId = setTimeout(() => {
|
|
8512
|
+
reject(
|
|
8513
|
+
new Error(
|
|
8514
|
+
`Embedding service connection timed out after ${EMBEDDING_INIT_TIMEOUT_MS / 1e3} seconds`
|
|
8515
|
+
)
|
|
8516
|
+
);
|
|
8517
|
+
}, EMBEDDING_INIT_TIMEOUT_MS);
|
|
8518
|
+
});
|
|
8519
|
+
try {
|
|
8520
|
+
const testVector = await Promise.race([testPromise, timeoutPromise]);
|
|
8521
|
+
this.modelDimension = testVector.length;
|
|
8522
|
+
} finally {
|
|
8523
|
+
if (timeoutId !== void 0) {
|
|
8524
|
+
clearTimeout(timeoutId);
|
|
8525
|
+
}
|
|
8526
|
+
}
|
|
8703
8527
|
EmbeddingConfig.setKnownModelDimensions(config.model, this.modelDimension);
|
|
8704
8528
|
}
|
|
8705
8529
|
if (this.modelDimension > this.dbDimension) {
|
|
@@ -8713,18 +8537,26 @@ class DocumentStore {
|
|
|
8713
8537
|
if (error instanceof Error) {
|
|
8714
8538
|
if (error.message.includes("does not exist") || error.message.includes("MODEL_NOT_FOUND")) {
|
|
8715
8539
|
throw new ModelConfigurationError(
|
|
8716
|
-
|
|
8540
|
+
`Invalid embedding model: ${config.model}
|
|
8717
8541
|
The model "${config.model}" is not available or you don't have access to it.
|
|
8718
8542
|
See README.md for supported models or run with --help for more details.`
|
|
8719
8543
|
);
|
|
8720
8544
|
}
|
|
8721
8545
|
if (error.message.includes("API key") || error.message.includes("401") || error.message.includes("authentication")) {
|
|
8722
8546
|
throw new ModelConfigurationError(
|
|
8723
|
-
|
|
8547
|
+
`Authentication failed for ${config.provider} embedding provider
|
|
8724
8548
|
Please check your API key configuration.
|
|
8725
8549
|
See README.md for configuration options or run with --help for more details.`
|
|
8726
8550
|
);
|
|
8727
8551
|
}
|
|
8552
|
+
if (error.message.includes("timed out") || error.message.includes("ECONNREFUSED") || error.message.includes("ENOTFOUND") || error.message.includes("ETIMEDOUT") || error.message.includes("ECONNRESET") || error.message.includes("network") || error.message.includes("fetch failed")) {
|
|
8553
|
+
throw new ModelConfigurationError(
|
|
8554
|
+
`Failed to connect to ${config.provider} embedding service
|
|
8555
|
+
${error.message}
|
|
8556
|
+
Please check that the embedding service is running and accessible.
|
|
8557
|
+
If using a local model (e.g., Ollama), ensure the service is started.`
|
|
8558
|
+
);
|
|
8559
|
+
}
|
|
8728
8560
|
}
|
|
8729
8561
|
throw error;
|
|
8730
8562
|
}
|
|
@@ -8988,7 +8820,7 @@ class DocumentStore {
|
|
|
8988
8820
|
try {
|
|
8989
8821
|
parsed = JSON.parse(row.scraper_options);
|
|
8990
8822
|
} catch (e) {
|
|
8991
|
-
logger.warn(`⚠️
|
|
8823
|
+
logger.warn(`⚠️ Invalid scraper_options JSON for version ${versionId}: ${e}`);
|
|
8992
8824
|
parsed = {};
|
|
8993
8825
|
}
|
|
8994
8826
|
}
|
|
@@ -9707,13 +9539,6 @@ class DocumentManagementService {
|
|
|
9707
9539
|
documentRetriever;
|
|
9708
9540
|
pipelines;
|
|
9709
9541
|
eventBus;
|
|
9710
|
-
/**
|
|
9711
|
-
* Normalizes a version string, converting null or undefined to an empty string
|
|
9712
|
-
* and converting to lowercase.
|
|
9713
|
-
*/
|
|
9714
|
-
normalizeVersion(version) {
|
|
9715
|
-
return (version ?? "").toLowerCase();
|
|
9716
|
-
}
|
|
9717
9542
|
constructor(storePath, eventBus, embeddingConfig, pipelineConfig) {
|
|
9718
9543
|
this.eventBus = eventBus;
|
|
9719
9544
|
const dbPath = storePath === ":memory:" ? ":memory:" : path.join(storePath, "documents.db");
|
|
@@ -9722,6 +9547,20 @@ class DocumentManagementService {
|
|
|
9722
9547
|
this.documentRetriever = new DocumentRetrieverService(this.store);
|
|
9723
9548
|
this.pipelines = PipelineFactory$1.createStandardPipelines(pipelineConfig);
|
|
9724
9549
|
}
|
|
9550
|
+
/**
|
|
9551
|
+
* Returns the active embedding configuration if vector search is enabled,
|
|
9552
|
+
* or null if embeddings are disabled.
|
|
9553
|
+
*/
|
|
9554
|
+
getActiveEmbeddingConfig() {
|
|
9555
|
+
return this.store.getActiveEmbeddingConfig();
|
|
9556
|
+
}
|
|
9557
|
+
/**
|
|
9558
|
+
* Normalizes a version string, converting null or undefined to an empty string
|
|
9559
|
+
* and converting to lowercase.
|
|
9560
|
+
*/
|
|
9561
|
+
normalizeVersion(version) {
|
|
9562
|
+
return (version ?? "").toLowerCase();
|
|
9563
|
+
}
|
|
9725
9564
|
/**
|
|
9726
9565
|
* Initializes the underlying document store.
|
|
9727
9566
|
*/
|
|
@@ -9962,7 +9801,7 @@ class DocumentManagementService {
|
|
|
9962
9801
|
logger.info(`🗑️ Removed version ${library}@${normalizedVersion || "[no version]"}`);
|
|
9963
9802
|
} else {
|
|
9964
9803
|
logger.warn(
|
|
9965
|
-
`⚠️
|
|
9804
|
+
`⚠️ Version ${library}@${normalizedVersion || "[no version]"} not found`
|
|
9966
9805
|
);
|
|
9967
9806
|
const libraryRecord = await this.store.getLibrary(library);
|
|
9968
9807
|
if (libraryRecord) {
|
|
@@ -10650,7 +10489,7 @@ function registerEventsRoute(server, eventBus) {
|
|
|
10650
10489
|
// Disable buffering in nginx
|
|
10651
10490
|
});
|
|
10652
10491
|
reply.raw.write("data: connected\n\n");
|
|
10653
|
-
logger.
|
|
10492
|
+
logger.debug("SSE client connected");
|
|
10654
10493
|
const allEventTypes = [
|
|
10655
10494
|
EventType.JOB_STATUS_CHANGE,
|
|
10656
10495
|
EventType.JOB_PROGRESS,
|
|
@@ -10688,12 +10527,12 @@ function registerEventsRoute(server, eventBus) {
|
|
|
10688
10527
|
}
|
|
10689
10528
|
}, 3e4);
|
|
10690
10529
|
request.raw.on("close", () => {
|
|
10691
|
-
logger.
|
|
10530
|
+
logger.debug("SSE client disconnected");
|
|
10692
10531
|
cleanup();
|
|
10693
10532
|
clearInterval(heartbeatInterval);
|
|
10694
10533
|
});
|
|
10695
10534
|
request.raw.on("error", (error) => {
|
|
10696
|
-
logger.
|
|
10535
|
+
logger.debug(`SSE connection error: ${error}`);
|
|
10697
10536
|
cleanup();
|
|
10698
10537
|
clearInterval(heartbeatInterval);
|
|
10699
10538
|
});
|
|
@@ -10827,7 +10666,7 @@ const Layout = ({
|
|
|
10827
10666
|
children,
|
|
10828
10667
|
eventClientConfig
|
|
10829
10668
|
}) => {
|
|
10830
|
-
const versionString = version || "1.
|
|
10669
|
+
const versionString = version || "1.30.0";
|
|
10831
10670
|
const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
|
|
10832
10671
|
return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
|
|
10833
10672
|
/* @__PURE__ */ jsxs("head", { children: [
|
|
@@ -10972,7 +10811,7 @@ const Layout = ({
|
|
|
10972
10811
|
form .spinner { display: none; }
|
|
10973
10812
|
` })
|
|
10974
10813
|
] }),
|
|
10975
|
-
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
|
|
10814
|
+
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", "hx-ext": "morph", children: [
|
|
10976
10815
|
/* @__PURE__ */ jsx(Toast, {}),
|
|
10977
10816
|
/* @__PURE__ */ jsx(
|
|
10978
10817
|
"header",
|
|
@@ -11127,19 +10966,35 @@ function registerIndexRoute(server, config) {
|
|
|
11127
10966
|
trpcUrl
|
|
11128
10967
|
},
|
|
11129
10968
|
children: [
|
|
10969
|
+
/* @__PURE__ */ jsx(
|
|
10970
|
+
"div",
|
|
10971
|
+
{
|
|
10972
|
+
id: "analytics-stats",
|
|
10973
|
+
"hx-get": "/web/stats",
|
|
10974
|
+
"hx-trigger": "load, library-change from:body",
|
|
10975
|
+
"hx-swap": "morph:innerHTML",
|
|
10976
|
+
children: /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-pulse", children: [
|
|
10977
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
|
|
10978
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" }),
|
|
10979
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600 h-20" })
|
|
10980
|
+
] })
|
|
10981
|
+
}
|
|
10982
|
+
),
|
|
11130
10983
|
/* @__PURE__ */ jsxs("section", { class: "mb-4 p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: [
|
|
11131
10984
|
/* @__PURE__ */ jsxs("div", { class: "flex items-center justify-between mb-2", children: [
|
|
11132
10985
|
/* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: "Job Queue" }),
|
|
11133
10986
|
/* @__PURE__ */ jsx(
|
|
11134
10987
|
"button",
|
|
11135
10988
|
{
|
|
10989
|
+
id: "clear-completed-btn",
|
|
11136
10990
|
type: "button",
|
|
11137
|
-
class: "text-xs px-3 py-1.5 text-gray-
|
|
10991
|
+
class: "text-xs px-3 py-1.5 text-gray-400 bg-gray-50 border border-gray-200 rounded-lg cursor-not-allowed focus:ring-4 focus:outline-none transition-colors duration-150 dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600",
|
|
11138
10992
|
title: "Clear all completed, cancelled, and failed jobs",
|
|
11139
10993
|
"hx-post": "/web/jobs/clear-completed",
|
|
11140
10994
|
"hx-trigger": "click",
|
|
11141
10995
|
"hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
|
|
11142
10996
|
"hx-swap": "none",
|
|
10997
|
+
disabled: true,
|
|
11143
10998
|
children: "Clear Completed Jobs"
|
|
11144
10999
|
}
|
|
11145
11000
|
)
|
|
@@ -11149,7 +11004,8 @@ function registerIndexRoute(server, config) {
|
|
|
11149
11004
|
{
|
|
11150
11005
|
id: "job-queue",
|
|
11151
11006
|
"hx-get": "/web/jobs",
|
|
11152
|
-
"hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body",
|
|
11007
|
+
"hx-trigger": "load, job-status-change from:body, job-progress from:body, job-list-change from:body, job-list-refresh from:body",
|
|
11008
|
+
"hx-swap": "morph:innerHTML",
|
|
11153
11009
|
children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
|
|
11154
11010
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
|
|
11155
11011
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
|
|
@@ -11158,11 +11014,17 @@ function registerIndexRoute(server, config) {
|
|
|
11158
11014
|
}
|
|
11159
11015
|
)
|
|
11160
11016
|
] }),
|
|
11161
|
-
/* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm",
|
|
11162
|
-
|
|
11163
|
-
|
|
11164
|
-
|
|
11165
|
-
|
|
11017
|
+
/* @__PURE__ */ jsx("section", { class: "mb-8", children: /* @__PURE__ */ jsx("div", { id: "addJobForm", children: /* @__PURE__ */ jsx(
|
|
11018
|
+
"button",
|
|
11019
|
+
{
|
|
11020
|
+
type: "button",
|
|
11021
|
+
"hx-get": "/web/jobs/new",
|
|
11022
|
+
"hx-target": "#addJobForm",
|
|
11023
|
+
"hx-swap": "innerHTML",
|
|
11024
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
|
|
11025
|
+
children: "Add New Documentation"
|
|
11026
|
+
}
|
|
11027
|
+
) }) }),
|
|
11166
11028
|
/* @__PURE__ */ jsxs("div", { children: [
|
|
11167
11029
|
/* @__PURE__ */ jsx("h2", { class: "text-xl font-semibold mb-2 text-gray-900 dark:text-white", children: "Indexed Documentation" }),
|
|
11168
11030
|
/* @__PURE__ */ jsx(
|
|
@@ -11171,6 +11033,7 @@ function registerIndexRoute(server, config) {
|
|
|
11171
11033
|
id: "indexed-docs",
|
|
11172
11034
|
"hx-get": "/web/libraries",
|
|
11173
11035
|
"hx-trigger": "load, library-change from:body",
|
|
11036
|
+
"hx-swap": "morph:innerHTML",
|
|
11174
11037
|
children: /* @__PURE__ */ jsxs("div", { class: "animate-pulse", children: [
|
|
11175
11038
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-48 mb-4" }),
|
|
11176
11039
|
/* @__PURE__ */ jsx("div", { class: "h-[0.8em] bg-gray-200 rounded-full dark:bg-gray-700 w-full mb-2.5" }),
|
|
@@ -11325,14 +11188,15 @@ const LoadingSpinner = () => /* @__PURE__ */ jsxs(
|
|
|
11325
11188
|
const JobItem = ({ job }) => {
|
|
11326
11189
|
job.dbStatus || job.status;
|
|
11327
11190
|
const isActiveJob = job.dbStatus ? isActiveStatus(job.dbStatus) : job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING;
|
|
11191
|
+
const defaultStateClasses = "border border-gray-300 bg-white text-red-600 hover:bg-red-50 focus:ring-4 focus:outline-none focus:ring-red-100 dark:border-gray-600 dark:bg-gray-800 dark:text-red-400 dark:hover:bg-gray-700 dark:focus:ring-red-900";
|
|
11192
|
+
const confirmingStateClasses = "bg-red-600 text-white border-red-600 focus:ring-4 focus:outline-none focus:ring-red-300 dark:bg-red-700 dark:border-red-700 dark:focus:ring-red-800";
|
|
11328
11193
|
return /* @__PURE__ */ jsx(
|
|
11329
11194
|
"div",
|
|
11330
11195
|
{
|
|
11331
11196
|
id: `job-item-${job.id}`,
|
|
11332
11197
|
class: "block p-3 bg-gray-50 dark:bg-gray-700 rounded-lg border border-gray-200 dark:border-gray-600",
|
|
11333
11198
|
"data-job-id": job.id,
|
|
11334
|
-
"x-data": "{ jobId: $el.dataset.jobId }",
|
|
11335
|
-
"x-bind:hx-preserve": "$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === jobId",
|
|
11199
|
+
"x-data": "{ jobId: $el.dataset.jobId, confirming: $el.dataset.confirming === 'true', isStopping: false }",
|
|
11336
11200
|
children: /* @__PURE__ */ jsxs("div", { class: "flex items-start justify-between", children: [
|
|
11337
11201
|
/* @__PURE__ */ jsxs("div", { class: "flex-1", children: [
|
|
11338
11202
|
/* @__PURE__ */ jsxs("p", { class: "text-sm font-medium text-gray-900 dark:text-white", children: [
|
|
@@ -11364,12 +11228,13 @@ const JobItem = ({ job }) => {
|
|
|
11364
11228
|
"button",
|
|
11365
11229
|
{
|
|
11366
11230
|
type: "button",
|
|
11367
|
-
class: "font-medium rounded-lg text-xs p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out
|
|
11231
|
+
class: "font-medium rounded-lg text-xs p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
|
|
11368
11232
|
title: "Stop this job",
|
|
11369
|
-
"x-
|
|
11370
|
-
"x-
|
|
11233
|
+
"x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
|
|
11234
|
+
"x-on:click": "\n if (confirming) {\n isStopping = true;\n window.confirmationManager.clear($root.id);\n fetch('/web/jobs/' + jobId + '/cancel', {\n method: 'POST',\n headers: { 'Accept': 'application/json' },\n })\n .then(r => r.json())\n .then(() => {\n confirming = false;\n isStopping = false;\n document.dispatchEvent(new CustomEvent('job-list-refresh'));\n })\n .catch(() => { isStopping = false; });\n } else {\n confirming = true;\n isStopping = false;\n window.confirmationManager.start($root.id);\n }\n ",
|
|
11235
|
+
"x-bind:disabled": "isStopping",
|
|
11371
11236
|
children: [
|
|
11372
|
-
/* @__PURE__ */ jsxs("span", { "x-show": "
|
|
11237
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isStopping", children: [
|
|
11373
11238
|
/* @__PURE__ */ jsx(
|
|
11374
11239
|
"svg",
|
|
11375
11240
|
{
|
|
@@ -11382,15 +11247,8 @@ const JobItem = ({ job }) => {
|
|
|
11382
11247
|
),
|
|
11383
11248
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stop job" })
|
|
11384
11249
|
] }),
|
|
11385
|
-
/* @__PURE__ */ jsx(
|
|
11386
|
-
|
|
11387
|
-
{
|
|
11388
|
-
"x-show": "$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === jobId && !$store.confirmingAction.isStopping",
|
|
11389
|
-
class: "px-2",
|
|
11390
|
-
children: "Cancel?"
|
|
11391
|
-
}
|
|
11392
|
-
),
|
|
11393
|
-
/* @__PURE__ */ jsxs("span", { "x-show": "$store.confirmingAction.type === 'job-cancel' && $store.confirmingAction.id === jobId && $store.confirmingAction.isStopping", children: [
|
|
11250
|
+
/* @__PURE__ */ jsx("span", { "x-show": "confirming && !isStopping", class: "px-2", children: "Cancel?" }),
|
|
11251
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "isStopping", children: [
|
|
11394
11252
|
/* @__PURE__ */ jsx(LoadingSpinner, {}),
|
|
11395
11253
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Stopping..." })
|
|
11396
11254
|
] })
|
|
@@ -11407,7 +11265,28 @@ const JobItem = ({ job }) => {
|
|
|
11407
11265
|
}
|
|
11408
11266
|
);
|
|
11409
11267
|
};
|
|
11410
|
-
const JobList = ({ jobs }) =>
|
|
11268
|
+
const JobList = ({ jobs }) => {
|
|
11269
|
+
const hasJobs = jobs.length > 0;
|
|
11270
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
11271
|
+
/* @__PURE__ */ jsx("div", { id: "job-list", class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]", children: hasJobs ? jobs.map((job) => /* @__PURE__ */ jsx(JobItem, { job })) : /* @__PURE__ */ jsx("p", { class: "text-center text-gray-500 dark:text-gray-400", children: "No pending jobs." }) }),
|
|
11272
|
+
/* @__PURE__ */ jsx(
|
|
11273
|
+
"button",
|
|
11274
|
+
{
|
|
11275
|
+
id: "clear-completed-btn",
|
|
11276
|
+
"hx-swap-oob": "true",
|
|
11277
|
+
type: "button",
|
|
11278
|
+
class: `text-xs px-3 py-1.5 rounded-lg focus:ring-4 focus:outline-none transition-colors duration-150 ${hasJobs ? "text-gray-700 bg-gray-100 border border-gray-300 hover:bg-gray-200 focus:ring-gray-100 dark:bg-gray-600 dark:text-gray-300 dark:border-gray-500 dark:hover:bg-gray-700 dark:focus:ring-gray-700" : "text-gray-400 bg-gray-50 border border-gray-200 cursor-not-allowed dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600"}`,
|
|
11279
|
+
title: "Clear all completed, cancelled, and failed jobs",
|
|
11280
|
+
"hx-post": "/web/jobs/clear-completed",
|
|
11281
|
+
"hx-trigger": "click",
|
|
11282
|
+
"hx-on": "htmx:afterRequest: document.dispatchEvent(new Event('job-list-refresh'))",
|
|
11283
|
+
"hx-swap": "none",
|
|
11284
|
+
disabled: !hasJobs,
|
|
11285
|
+
children: "Clear Completed Jobs"
|
|
11286
|
+
}
|
|
11287
|
+
)
|
|
11288
|
+
] });
|
|
11289
|
+
};
|
|
11411
11290
|
function registerJobListRoutes(server, listJobsTool) {
|
|
11412
11291
|
server.get("/web/jobs", async () => {
|
|
11413
11292
|
const result = await listJobsTool.execute({});
|
|
@@ -11561,13 +11440,43 @@ const ScrapeFormContent = ({
|
|
|
11561
11440
|
defaultExcludePatterns
|
|
11562
11441
|
}) => {
|
|
11563
11442
|
const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
|
|
11564
|
-
return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
|
|
11565
|
-
/* @__PURE__ */ jsx(
|
|
11443
|
+
return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600 relative animate-[fadeSlideIn_0.2s_ease-out]", children: [
|
|
11444
|
+
/* @__PURE__ */ jsx(
|
|
11445
|
+
"button",
|
|
11446
|
+
{
|
|
11447
|
+
type: "button",
|
|
11448
|
+
"hx-get": "/web/jobs/new-button",
|
|
11449
|
+
"hx-target": "#addJobForm",
|
|
11450
|
+
"hx-swap": "innerHTML",
|
|
11451
|
+
class: "absolute top-3 right-3 p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300 rounded-full hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors duration-150",
|
|
11452
|
+
title: "Close",
|
|
11453
|
+
children: /* @__PURE__ */ jsx(
|
|
11454
|
+
"svg",
|
|
11455
|
+
{
|
|
11456
|
+
class: "w-5 h-5",
|
|
11457
|
+
fill: "none",
|
|
11458
|
+
stroke: "currentColor",
|
|
11459
|
+
viewBox: "0 0 24 24",
|
|
11460
|
+
xmlns: "http://www.w3.org/2000/svg",
|
|
11461
|
+
children: /* @__PURE__ */ jsx(
|
|
11462
|
+
"path",
|
|
11463
|
+
{
|
|
11464
|
+
"stroke-linecap": "round",
|
|
11465
|
+
"stroke-linejoin": "round",
|
|
11466
|
+
"stroke-width": "2",
|
|
11467
|
+
d: "M6 18L18 6M6 6l12 12"
|
|
11468
|
+
}
|
|
11469
|
+
)
|
|
11470
|
+
}
|
|
11471
|
+
)
|
|
11472
|
+
}
|
|
11473
|
+
),
|
|
11474
|
+
/* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2 pr-8", children: "Add New Documentation" }),
|
|
11566
11475
|
/* @__PURE__ */ jsxs(
|
|
11567
11476
|
"form",
|
|
11568
11477
|
{
|
|
11569
11478
|
"hx-post": "/web/jobs/scrape",
|
|
11570
|
-
"hx-target": "#
|
|
11479
|
+
"hx-target": "#addJobForm",
|
|
11571
11480
|
"hx-swap": "innerHTML",
|
|
11572
11481
|
class: "space-y-2",
|
|
11573
11482
|
"x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
|
|
@@ -11679,283 +11588,319 @@ const ScrapeFormContent = ({
|
|
|
11679
11588
|
}
|
|
11680
11589
|
)
|
|
11681
11590
|
] }),
|
|
11682
|
-
/* @__PURE__ */ jsxs(
|
|
11683
|
-
|
|
11684
|
-
|
|
11685
|
-
|
|
11686
|
-
|
|
11687
|
-
|
|
11688
|
-
"label",
|
|
11689
|
-
{
|
|
11690
|
-
for: "maxPages",
|
|
11691
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11692
|
-
children: "Max Pages"
|
|
11693
|
-
}
|
|
11694
|
-
),
|
|
11695
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
|
|
11696
|
-
] }),
|
|
11697
|
-
/* @__PURE__ */ jsx(
|
|
11698
|
-
"input",
|
|
11699
|
-
{
|
|
11700
|
-
type: "number",
|
|
11701
|
-
name: "maxPages",
|
|
11702
|
-
id: "maxPages",
|
|
11703
|
-
min: "1",
|
|
11704
|
-
placeholder: "1000",
|
|
11705
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11706
|
-
}
|
|
11707
|
-
)
|
|
11708
|
-
] }),
|
|
11709
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11710
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11711
|
-
/* @__PURE__ */ jsx(
|
|
11712
|
-
"label",
|
|
11713
|
-
{
|
|
11714
|
-
for: "maxDepth",
|
|
11715
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11716
|
-
children: "Max Depth"
|
|
11717
|
-
}
|
|
11718
|
-
),
|
|
11719
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
|
|
11720
|
-
] }),
|
|
11721
|
-
/* @__PURE__ */ jsx(
|
|
11722
|
-
"input",
|
|
11723
|
-
{
|
|
11724
|
-
type: "number",
|
|
11725
|
-
name: "maxDepth",
|
|
11726
|
-
id: "maxDepth",
|
|
11727
|
-
min: "0",
|
|
11728
|
-
placeholder: "3",
|
|
11729
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11730
|
-
}
|
|
11731
|
-
)
|
|
11732
|
-
] }),
|
|
11733
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11734
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11735
|
-
/* @__PURE__ */ jsx(
|
|
11736
|
-
"label",
|
|
11737
|
-
{
|
|
11738
|
-
for: "scope",
|
|
11739
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11740
|
-
children: "Scope"
|
|
11741
|
-
}
|
|
11742
|
-
),
|
|
11743
|
-
/* @__PURE__ */ jsx(
|
|
11744
|
-
Tooltip,
|
|
11745
|
-
{
|
|
11746
|
-
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
11747
|
-
"Controls which pages are scraped:",
|
|
11748
|
-
/* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11749
|
-
/* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
|
|
11750
|
-
/* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
|
|
11751
|
-
/* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
|
|
11752
|
-
] })
|
|
11753
|
-
] })
|
|
11754
|
-
}
|
|
11755
|
-
)
|
|
11756
|
-
] }),
|
|
11591
|
+
/* @__PURE__ */ jsxs(
|
|
11592
|
+
"div",
|
|
11593
|
+
{
|
|
11594
|
+
class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md",
|
|
11595
|
+
"x-data": "{ open: false, headers: [] }",
|
|
11596
|
+
children: [
|
|
11757
11597
|
/* @__PURE__ */ jsxs(
|
|
11758
|
-
"
|
|
11598
|
+
"button",
|
|
11759
11599
|
{
|
|
11760
|
-
|
|
11761
|
-
|
|
11762
|
-
|
|
11600
|
+
type: "button",
|
|
11601
|
+
class: "w-full flex items-center gap-1.5 cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400 hover:text-gray-800 dark:hover:text-gray-200 transition-colors",
|
|
11602
|
+
"x-on:click": "open = !open",
|
|
11763
11603
|
children: [
|
|
11764
|
-
/* @__PURE__ */ jsx(
|
|
11765
|
-
|
|
11766
|
-
|
|
11604
|
+
/* @__PURE__ */ jsx(
|
|
11605
|
+
"svg",
|
|
11606
|
+
{
|
|
11607
|
+
class: "w-4 h-4 transform transition-transform duration-200",
|
|
11608
|
+
"x-bind:class": "{ 'rotate-90': open }",
|
|
11609
|
+
fill: "none",
|
|
11610
|
+
stroke: "currentColor",
|
|
11611
|
+
viewBox: "0 0 24 24",
|
|
11612
|
+
children: /* @__PURE__ */ jsx(
|
|
11613
|
+
"path",
|
|
11614
|
+
{
|
|
11615
|
+
"stroke-linecap": "round",
|
|
11616
|
+
"stroke-linejoin": "round",
|
|
11617
|
+
"stroke-width": "2",
|
|
11618
|
+
d: "M9 5l7 7-7 7"
|
|
11619
|
+
}
|
|
11620
|
+
)
|
|
11621
|
+
}
|
|
11622
|
+
),
|
|
11623
|
+
/* @__PURE__ */ jsx("span", { children: "Advanced Options" })
|
|
11767
11624
|
]
|
|
11768
11625
|
}
|
|
11769
|
-
)
|
|
11770
|
-
] }),
|
|
11771
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11772
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11773
|
-
/* @__PURE__ */ jsx(
|
|
11774
|
-
"label",
|
|
11775
|
-
{
|
|
11776
|
-
for: "includePatterns",
|
|
11777
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11778
|
-
children: "Include Patterns"
|
|
11779
|
-
}
|
|
11780
|
-
),
|
|
11781
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
11782
|
-
] }),
|
|
11783
|
-
/* @__PURE__ */ jsx(
|
|
11784
|
-
"textarea",
|
|
11785
|
-
{
|
|
11786
|
-
name: "includePatterns",
|
|
11787
|
-
id: "includePatterns",
|
|
11788
|
-
rows: "2",
|
|
11789
|
-
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
11790
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11791
|
-
}
|
|
11792
|
-
)
|
|
11793
|
-
] }),
|
|
11794
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11795
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11796
|
-
/* @__PURE__ */ jsx(
|
|
11797
|
-
"label",
|
|
11798
|
-
{
|
|
11799
|
-
for: "excludePatterns",
|
|
11800
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11801
|
-
children: "Exclude Patterns"
|
|
11802
|
-
}
|
|
11803
|
-
),
|
|
11804
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
|
|
11805
|
-
] }),
|
|
11806
|
-
/* @__PURE__ */ jsx(
|
|
11807
|
-
"textarea",
|
|
11808
|
-
{
|
|
11809
|
-
name: "excludePatterns",
|
|
11810
|
-
id: "excludePatterns",
|
|
11811
|
-
rows: "5",
|
|
11812
|
-
safe: true,
|
|
11813
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
11814
|
-
children: defaultExcludePatternsText
|
|
11815
|
-
}
|
|
11816
11626
|
),
|
|
11817
|
-
/* @__PURE__ */
|
|
11818
|
-
|
|
11819
|
-
|
|
11820
|
-
|
|
11821
|
-
|
|
11822
|
-
|
|
11823
|
-
|
|
11824
|
-
|
|
11825
|
-
|
|
11826
|
-
|
|
11827
|
-
|
|
11828
|
-
|
|
11829
|
-
|
|
11830
|
-
Tooltip,
|
|
11831
|
-
{
|
|
11832
|
-
text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11833
|
-
/* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
|
|
11834
|
-
/* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
|
|
11835
|
-
/* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
|
|
11836
|
-
] }) })
|
|
11837
|
-
}
|
|
11838
|
-
)
|
|
11839
|
-
] }),
|
|
11840
|
-
/* @__PURE__ */ jsxs(
|
|
11841
|
-
"select",
|
|
11842
|
-
{
|
|
11843
|
-
name: "scrapeMode",
|
|
11844
|
-
id: "scrapeMode",
|
|
11845
|
-
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11846
|
-
children: [
|
|
11847
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
11848
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
11849
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
11850
|
-
]
|
|
11851
|
-
}
|
|
11852
|
-
)
|
|
11853
|
-
] }),
|
|
11854
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11855
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
11856
|
-
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
11857
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
11858
|
-
] }),
|
|
11859
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
11860
|
-
/* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
|
|
11627
|
+
/* @__PURE__ */ jsxs("div", { "x-show": "open", "x-cloak": true, "x-collapse": true, class: "mt-2 space-y-2", children: [
|
|
11628
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11629
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11630
|
+
/* @__PURE__ */ jsx(
|
|
11631
|
+
"label",
|
|
11632
|
+
{
|
|
11633
|
+
for: "maxPages",
|
|
11634
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11635
|
+
children: "Max Pages"
|
|
11636
|
+
}
|
|
11637
|
+
),
|
|
11638
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
|
|
11639
|
+
] }),
|
|
11861
11640
|
/* @__PURE__ */ jsx(
|
|
11862
11641
|
"input",
|
|
11863
11642
|
{
|
|
11864
|
-
type: "
|
|
11865
|
-
|
|
11866
|
-
|
|
11867
|
-
|
|
11868
|
-
|
|
11643
|
+
type: "number",
|
|
11644
|
+
name: "maxPages",
|
|
11645
|
+
id: "maxPages",
|
|
11646
|
+
min: "1",
|
|
11647
|
+
placeholder: "1000",
|
|
11648
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11869
11649
|
}
|
|
11870
|
-
)
|
|
11871
|
-
|
|
11650
|
+
)
|
|
11651
|
+
] }),
|
|
11652
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11653
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11654
|
+
/* @__PURE__ */ jsx(
|
|
11655
|
+
"label",
|
|
11656
|
+
{
|
|
11657
|
+
for: "maxDepth",
|
|
11658
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11659
|
+
children: "Max Depth"
|
|
11660
|
+
}
|
|
11661
|
+
),
|
|
11662
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
|
|
11663
|
+
] }),
|
|
11872
11664
|
/* @__PURE__ */ jsx(
|
|
11873
11665
|
"input",
|
|
11874
11666
|
{
|
|
11875
|
-
type: "
|
|
11876
|
-
|
|
11877
|
-
|
|
11878
|
-
|
|
11879
|
-
|
|
11667
|
+
type: "number",
|
|
11668
|
+
name: "maxDepth",
|
|
11669
|
+
id: "maxDepth",
|
|
11670
|
+
min: "0",
|
|
11671
|
+
placeholder: "3",
|
|
11672
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11673
|
+
}
|
|
11674
|
+
)
|
|
11675
|
+
] }),
|
|
11676
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11677
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11678
|
+
/* @__PURE__ */ jsx(
|
|
11679
|
+
"label",
|
|
11680
|
+
{
|
|
11681
|
+
for: "scope",
|
|
11682
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11683
|
+
children: "Scope"
|
|
11684
|
+
}
|
|
11685
|
+
),
|
|
11686
|
+
/* @__PURE__ */ jsx(
|
|
11687
|
+
Tooltip,
|
|
11688
|
+
{
|
|
11689
|
+
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
11690
|
+
"Controls which pages are scraped:",
|
|
11691
|
+
/* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11692
|
+
/* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
|
|
11693
|
+
/* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
|
|
11694
|
+
/* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
|
|
11695
|
+
] })
|
|
11696
|
+
] })
|
|
11697
|
+
}
|
|
11698
|
+
)
|
|
11699
|
+
] }),
|
|
11700
|
+
/* @__PURE__ */ jsxs(
|
|
11701
|
+
"select",
|
|
11702
|
+
{
|
|
11703
|
+
name: "scope",
|
|
11704
|
+
id: "scope",
|
|
11705
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11706
|
+
children: [
|
|
11707
|
+
/* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
|
|
11708
|
+
/* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
|
|
11709
|
+
/* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
|
|
11710
|
+
]
|
|
11711
|
+
}
|
|
11712
|
+
)
|
|
11713
|
+
] }),
|
|
11714
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11715
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11716
|
+
/* @__PURE__ */ jsx(
|
|
11717
|
+
"label",
|
|
11718
|
+
{
|
|
11719
|
+
for: "includePatterns",
|
|
11720
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11721
|
+
children: "Include Patterns"
|
|
11722
|
+
}
|
|
11723
|
+
),
|
|
11724
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
11725
|
+
] }),
|
|
11726
|
+
/* @__PURE__ */ jsx(
|
|
11727
|
+
"textarea",
|
|
11728
|
+
{
|
|
11729
|
+
name: "includePatterns",
|
|
11730
|
+
id: "includePatterns",
|
|
11731
|
+
rows: "2",
|
|
11732
|
+
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
11733
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
11734
|
+
}
|
|
11735
|
+
)
|
|
11736
|
+
] }),
|
|
11737
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11738
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11739
|
+
/* @__PURE__ */ jsx(
|
|
11740
|
+
"label",
|
|
11741
|
+
{
|
|
11742
|
+
for: "excludePatterns",
|
|
11743
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11744
|
+
children: "Exclude Patterns"
|
|
11745
|
+
}
|
|
11746
|
+
),
|
|
11747
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
|
|
11748
|
+
] }),
|
|
11749
|
+
/* @__PURE__ */ jsx(
|
|
11750
|
+
"textarea",
|
|
11751
|
+
{
|
|
11752
|
+
name: "excludePatterns",
|
|
11753
|
+
id: "excludePatterns",
|
|
11754
|
+
rows: "5",
|
|
11755
|
+
safe: true,
|
|
11756
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
11757
|
+
children: defaultExcludePatternsText
|
|
11880
11758
|
}
|
|
11881
11759
|
),
|
|
11760
|
+
/* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
|
|
11761
|
+
] }),
|
|
11762
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11763
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11764
|
+
/* @__PURE__ */ jsx(
|
|
11765
|
+
"label",
|
|
11766
|
+
{
|
|
11767
|
+
for: "scrapeMode",
|
|
11768
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
11769
|
+
children: "Scrape Mode"
|
|
11770
|
+
}
|
|
11771
|
+
),
|
|
11772
|
+
/* @__PURE__ */ jsx(
|
|
11773
|
+
Tooltip,
|
|
11774
|
+
{
|
|
11775
|
+
text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
11776
|
+
/* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
|
|
11777
|
+
/* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
|
|
11778
|
+
/* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
|
|
11779
|
+
] }) })
|
|
11780
|
+
}
|
|
11781
|
+
)
|
|
11782
|
+
] }),
|
|
11783
|
+
/* @__PURE__ */ jsxs(
|
|
11784
|
+
"select",
|
|
11785
|
+
{
|
|
11786
|
+
name: "scrapeMode",
|
|
11787
|
+
id: "scrapeMode",
|
|
11788
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
11789
|
+
children: [
|
|
11790
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
11791
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
11792
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
11793
|
+
]
|
|
11794
|
+
}
|
|
11795
|
+
)
|
|
11796
|
+
] }),
|
|
11797
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11798
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
11799
|
+
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
11800
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
11801
|
+
] }),
|
|
11802
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
11803
|
+
/* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
|
|
11804
|
+
/* @__PURE__ */ jsx(
|
|
11805
|
+
"input",
|
|
11806
|
+
{
|
|
11807
|
+
type: "text",
|
|
11808
|
+
class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
11809
|
+
placeholder: "Header Name",
|
|
11810
|
+
"x-model": "header.name",
|
|
11811
|
+
required: true
|
|
11812
|
+
}
|
|
11813
|
+
),
|
|
11814
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
|
|
11815
|
+
/* @__PURE__ */ jsx(
|
|
11816
|
+
"input",
|
|
11817
|
+
{
|
|
11818
|
+
type: "text",
|
|
11819
|
+
class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
11820
|
+
placeholder: "Header Value",
|
|
11821
|
+
"x-model": "header.value",
|
|
11822
|
+
required: true
|
|
11823
|
+
}
|
|
11824
|
+
),
|
|
11825
|
+
/* @__PURE__ */ jsx(
|
|
11826
|
+
"button",
|
|
11827
|
+
{
|
|
11828
|
+
type: "button",
|
|
11829
|
+
class: "text-red-500 hover:text-red-700 text-xs",
|
|
11830
|
+
"x-on:click": "headers.splice(idx, 1)",
|
|
11831
|
+
children: "Remove"
|
|
11832
|
+
}
|
|
11833
|
+
),
|
|
11834
|
+
/* @__PURE__ */ jsx(
|
|
11835
|
+
"input",
|
|
11836
|
+
{
|
|
11837
|
+
type: "hidden",
|
|
11838
|
+
name: "header[]",
|
|
11839
|
+
"x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
|
|
11840
|
+
}
|
|
11841
|
+
)
|
|
11842
|
+
] }) }),
|
|
11843
|
+
/* @__PURE__ */ jsx(
|
|
11844
|
+
"button",
|
|
11845
|
+
{
|
|
11846
|
+
type: "button",
|
|
11847
|
+
class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
|
|
11848
|
+
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
11849
|
+
children: "+ Add Header"
|
|
11850
|
+
}
|
|
11851
|
+
)
|
|
11852
|
+
] })
|
|
11853
|
+
] }),
|
|
11854
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11882
11855
|
/* @__PURE__ */ jsx(
|
|
11883
|
-
"
|
|
11856
|
+
"input",
|
|
11884
11857
|
{
|
|
11885
|
-
|
|
11886
|
-
|
|
11887
|
-
|
|
11888
|
-
|
|
11858
|
+
id: "followRedirects",
|
|
11859
|
+
name: "followRedirects",
|
|
11860
|
+
type: "checkbox",
|
|
11861
|
+
checked: true,
|
|
11862
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11889
11863
|
}
|
|
11890
11864
|
),
|
|
11865
|
+
/* @__PURE__ */ jsx(
|
|
11866
|
+
"label",
|
|
11867
|
+
{
|
|
11868
|
+
for: "followRedirects",
|
|
11869
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11870
|
+
children: "Follow Redirects"
|
|
11871
|
+
}
|
|
11872
|
+
)
|
|
11873
|
+
] }),
|
|
11874
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11891
11875
|
/* @__PURE__ */ jsx(
|
|
11892
11876
|
"input",
|
|
11893
11877
|
{
|
|
11894
|
-
|
|
11895
|
-
name: "
|
|
11896
|
-
|
|
11878
|
+
id: "ignoreErrors",
|
|
11879
|
+
name: "ignoreErrors",
|
|
11880
|
+
type: "checkbox",
|
|
11881
|
+
checked: true,
|
|
11882
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11883
|
+
}
|
|
11884
|
+
),
|
|
11885
|
+
/* @__PURE__ */ jsx(
|
|
11886
|
+
"label",
|
|
11887
|
+
{
|
|
11888
|
+
for: "ignoreErrors",
|
|
11889
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11890
|
+
children: "Ignore Errors During Scraping"
|
|
11897
11891
|
}
|
|
11898
11892
|
)
|
|
11899
|
-
] })
|
|
11900
|
-
/* @__PURE__ */ jsx(
|
|
11901
|
-
"button",
|
|
11902
|
-
{
|
|
11903
|
-
type: "button",
|
|
11904
|
-
class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
|
|
11905
|
-
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
11906
|
-
children: "+ Add Header"
|
|
11907
|
-
}
|
|
11908
|
-
)
|
|
11893
|
+
] })
|
|
11909
11894
|
] })
|
|
11910
|
-
]
|
|
11911
|
-
|
|
11912
|
-
|
|
11913
|
-
"input",
|
|
11914
|
-
{
|
|
11915
|
-
id: "followRedirects",
|
|
11916
|
-
name: "followRedirects",
|
|
11917
|
-
type: "checkbox",
|
|
11918
|
-
checked: true,
|
|
11919
|
-
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11920
|
-
}
|
|
11921
|
-
),
|
|
11922
|
-
/* @__PURE__ */ jsx(
|
|
11923
|
-
"label",
|
|
11924
|
-
{
|
|
11925
|
-
for: "followRedirects",
|
|
11926
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11927
|
-
children: "Follow Redirects"
|
|
11928
|
-
}
|
|
11929
|
-
)
|
|
11930
|
-
] }),
|
|
11931
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
11932
|
-
/* @__PURE__ */ jsx(
|
|
11933
|
-
"input",
|
|
11934
|
-
{
|
|
11935
|
-
id: "ignoreErrors",
|
|
11936
|
-
name: "ignoreErrors",
|
|
11937
|
-
type: "checkbox",
|
|
11938
|
-
checked: true,
|
|
11939
|
-
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
11940
|
-
}
|
|
11941
|
-
),
|
|
11942
|
-
/* @__PURE__ */ jsx(
|
|
11943
|
-
"label",
|
|
11944
|
-
{
|
|
11945
|
-
for: "ignoreErrors",
|
|
11946
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
11947
|
-
children: "Ignore Errors During Scraping"
|
|
11948
|
-
}
|
|
11949
|
-
)
|
|
11950
|
-
] })
|
|
11951
|
-
] })
|
|
11952
|
-
] }),
|
|
11895
|
+
]
|
|
11896
|
+
}
|
|
11897
|
+
),
|
|
11953
11898
|
/* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
|
|
11954
11899
|
"button",
|
|
11955
11900
|
{
|
|
11956
11901
|
type: "submit",
|
|
11957
11902
|
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500",
|
|
11958
|
-
children: "
|
|
11903
|
+
children: "Start Indexing"
|
|
11959
11904
|
}
|
|
11960
11905
|
) })
|
|
11961
11906
|
]
|
|
@@ -11964,7 +11909,7 @@ const ScrapeFormContent = ({
|
|
|
11964
11909
|
/* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
|
|
11965
11910
|
] });
|
|
11966
11911
|
};
|
|
11967
|
-
const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
|
|
11912
|
+
const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", class: "animate-[fadeSlideIn_0.2s_ease-out]", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
|
|
11968
11913
|
const DEFAULT_FILE_EXCLUSIONS = [
|
|
11969
11914
|
// CHANGELOG files (case variations)
|
|
11970
11915
|
"**/CHANGELOG.md",
|
|
@@ -12064,10 +12009,24 @@ function getEffectiveExclusionPatterns(userPatterns) {
|
|
|
12064
12009
|
}
|
|
12065
12010
|
return DEFAULT_EXCLUSION_PATTERNS;
|
|
12066
12011
|
}
|
|
12012
|
+
const ScrapeFormButton = () => /* @__PURE__ */ jsx(
|
|
12013
|
+
"button",
|
|
12014
|
+
{
|
|
12015
|
+
type: "button",
|
|
12016
|
+
"hx-get": "/web/jobs/new",
|
|
12017
|
+
"hx-target": "#addJobForm",
|
|
12018
|
+
"hx-swap": "innerHTML",
|
|
12019
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500 transition-colors duration-150",
|
|
12020
|
+
children: "Add New Documentation"
|
|
12021
|
+
}
|
|
12022
|
+
);
|
|
12067
12023
|
function registerNewJobRoutes(server, scrapeTool) {
|
|
12068
12024
|
server.get("/web/jobs/new", async () => {
|
|
12069
12025
|
return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
|
|
12070
12026
|
});
|
|
12027
|
+
server.get("/web/jobs/new-button", async () => {
|
|
12028
|
+
return /* @__PURE__ */ jsx(ScrapeFormButton, {});
|
|
12029
|
+
});
|
|
12071
12030
|
server.post(
|
|
12072
12031
|
"/web/jobs/scrape",
|
|
12073
12032
|
async (request, reply) => {
|
|
@@ -12125,25 +12084,16 @@ function registerNewJobRoutes(server, scrapeTool) {
|
|
|
12125
12084
|
};
|
|
12126
12085
|
const result = await scrapeTool.execute(scrapeOptions);
|
|
12127
12086
|
if ("jobId" in result) {
|
|
12128
|
-
|
|
12129
|
-
|
|
12130
|
-
|
|
12131
|
-
{
|
|
12132
|
-
|
|
12133
|
-
|
|
12134
|
-
"Job queued successfully! ID:",
|
|
12135
|
-
" ",
|
|
12136
|
-
/* @__PURE__ */ jsx("span", { safe: true, children: result.jobId })
|
|
12137
|
-
] })
|
|
12138
|
-
}
|
|
12139
|
-
),
|
|
12140
|
-
/* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(
|
|
12141
|
-
ScrapeFormContent,
|
|
12142
|
-
{
|
|
12143
|
-
defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS
|
|
12087
|
+
reply.header(
|
|
12088
|
+
"HX-Trigger",
|
|
12089
|
+
JSON.stringify({
|
|
12090
|
+
toast: {
|
|
12091
|
+
message: "Job queued successfully!",
|
|
12092
|
+
type: "success"
|
|
12144
12093
|
}
|
|
12145
|
-
|
|
12146
|
-
|
|
12094
|
+
})
|
|
12095
|
+
);
|
|
12096
|
+
return /* @__PURE__ */ jsx(ScrapeFormButton, {});
|
|
12147
12097
|
}
|
|
12148
12098
|
return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
|
|
12149
12099
|
} catch (error) {
|
|
@@ -12189,8 +12139,7 @@ const VersionDetailsRow = ({
|
|
|
12189
12139
|
class: "flex justify-between items-center py-1 border-b border-gray-200 dark:border-gray-600 last:border-b-0",
|
|
12190
12140
|
"data-library-name": libraryName,
|
|
12191
12141
|
"data-version-param": versionParam,
|
|
12192
|
-
"x-data": "{ library: $el.dataset.libraryName, version: $el.dataset.versionParam,
|
|
12193
|
-
"x-bind:hx-preserve": "$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === deleteId",
|
|
12142
|
+
"x-data": "{ library: $el.dataset.libraryName, version: $el.dataset.versionParam, confirming: $el.dataset.confirming === 'true', isDeleting: false }",
|
|
12194
12143
|
children: [
|
|
12195
12144
|
/* @__PURE__ */ jsx(
|
|
12196
12145
|
"span",
|
|
@@ -12207,7 +12156,7 @@ const VersionDetailsRow = ({
|
|
|
12207
12156
|
/* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.uniqueUrls.toLocaleString() })
|
|
12208
12157
|
] }),
|
|
12209
12158
|
/* @__PURE__ */ jsxs("span", { title: "Number of indexed snippets", children: [
|
|
12210
|
-
"
|
|
12159
|
+
"Chunks:",
|
|
12211
12160
|
" ",
|
|
12212
12161
|
/* @__PURE__ */ jsx("span", { class: "font-semibold", safe: true, children: version.counts.documents.toLocaleString() })
|
|
12213
12162
|
] }),
|
|
@@ -12223,15 +12172,15 @@ const VersionDetailsRow = ({
|
|
|
12223
12172
|
type: "button",
|
|
12224
12173
|
class: "ml-2 font-medium rounded-lg text-sm p-1 text-center inline-flex items-center transition-colors duration-150 ease-in-out",
|
|
12225
12174
|
title: "Remove this version",
|
|
12226
|
-
"x-bind:class":
|
|
12227
|
-
"x-bind:disabled": "
|
|
12228
|
-
"x-on:click": "\n if (
|
|
12175
|
+
"x-bind:class": `confirming ? '${confirmingStateClasses}' : '${defaultStateClasses}'`,
|
|
12176
|
+
"x-bind:disabled": "isDeleting",
|
|
12177
|
+
"x-on:click": "\n if (confirming) {\n isDeleting = true;\n window.confirmationManager.clear($root.id);\n $el.dispatchEvent(new CustomEvent('confirmed-delete', { bubbles: true }));\n } else {\n confirming = true;\n isDeleting = false;\n window.confirmationManager.start($root.id);\n }\n ",
|
|
12229
12178
|
"hx-delete": `/web/libraries/${encodeURIComponent(libraryName)}/versions/${encodeURIComponent(versionParam)}`,
|
|
12230
12179
|
"hx-target": `#${rowId}`,
|
|
12231
12180
|
"hx-swap": "outerHTML",
|
|
12232
12181
|
"hx-trigger": "confirmed-delete",
|
|
12233
12182
|
children: [
|
|
12234
|
-
/* @__PURE__ */ jsxs("span", { "x-show": "!
|
|
12183
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "!confirming && !isDeleting", children: [
|
|
12235
12184
|
/* @__PURE__ */ jsx(
|
|
12236
12185
|
"svg",
|
|
12237
12186
|
{
|
|
@@ -12254,18 +12203,11 @@ const VersionDetailsRow = ({
|
|
|
12254
12203
|
),
|
|
12255
12204
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Remove version" })
|
|
12256
12205
|
] }),
|
|
12257
|
-
/* @__PURE__ */ jsxs(
|
|
12258
|
-
"
|
|
12259
|
-
{
|
|
12260
|
-
|
|
12261
|
-
|
|
12262
|
-
children: [
|
|
12263
|
-
"Confirm?",
|
|
12264
|
-
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
|
|
12265
|
-
]
|
|
12266
|
-
}
|
|
12267
|
-
),
|
|
12268
|
-
/* @__PURE__ */ jsxs("span", { "x-show": "$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === deleteId && $store.confirmingAction.isDeleting", children: [
|
|
12206
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "confirming && !isDeleting", class: "mx-1", children: [
|
|
12207
|
+
"Confirm?",
|
|
12208
|
+
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
|
|
12209
|
+
] }),
|
|
12210
|
+
/* @__PURE__ */ jsxs("span", { "x-show": "isDeleting", children: [
|
|
12269
12211
|
/* @__PURE__ */ jsx(LoadingSpinner, {}),
|
|
12270
12212
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Loading..." })
|
|
12271
12213
|
] })
|
|
@@ -12487,48 +12429,90 @@ const LibraryItem = ({ library }) => {
|
|
|
12487
12429
|
const latestVersion = versions[0];
|
|
12488
12430
|
return (
|
|
12489
12431
|
// Use Flowbite Card structure with updated padding and border, and white background
|
|
12490
|
-
/* @__PURE__ */ jsxs(
|
|
12491
|
-
|
|
12492
|
-
|
|
12493
|
-
{
|
|
12494
|
-
|
|
12495
|
-
|
|
12496
|
-
|
|
12497
|
-
|
|
12498
|
-
|
|
12499
|
-
|
|
12500
|
-
|
|
12501
|
-
|
|
12502
|
-
|
|
12503
|
-
|
|
12504
|
-
class: "
|
|
12505
|
-
|
|
12506
|
-
|
|
12507
|
-
|
|
12508
|
-
|
|
12509
|
-
|
|
12510
|
-
|
|
12511
|
-
|
|
12512
|
-
|
|
12513
|
-
|
|
12514
|
-
|
|
12515
|
-
|
|
12516
|
-
|
|
12517
|
-
|
|
12518
|
-
|
|
12519
|
-
|
|
12520
|
-
|
|
12521
|
-
|
|
12522
|
-
|
|
12523
|
-
|
|
12524
|
-
|
|
12525
|
-
|
|
12526
|
-
|
|
12527
|
-
|
|
12432
|
+
/* @__PURE__ */ jsxs(
|
|
12433
|
+
"div",
|
|
12434
|
+
{
|
|
12435
|
+
id: `library-item-${library.name}`,
|
|
12436
|
+
class: "block px-4 py-2 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600",
|
|
12437
|
+
children: [
|
|
12438
|
+
/* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white", children: /* @__PURE__ */ jsx(
|
|
12439
|
+
"a",
|
|
12440
|
+
{
|
|
12441
|
+
href: `/libraries/${encodeURIComponent(library.name)}`,
|
|
12442
|
+
class: "hover:underline",
|
|
12443
|
+
children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name })
|
|
12444
|
+
}
|
|
12445
|
+
) }),
|
|
12446
|
+
latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400 overflow-hidden h-5 @container", children: /* @__PURE__ */ jsx(
|
|
12447
|
+
"a",
|
|
12448
|
+
{
|
|
12449
|
+
href: latestVersion.sourceUrl,
|
|
12450
|
+
target: "_blank",
|
|
12451
|
+
class: "inline-block whitespace-nowrap hover:underline hover:animate-[scrollText_2s_ease-in-out_forwards]",
|
|
12452
|
+
title: latestVersion.sourceUrl,
|
|
12453
|
+
safe: true,
|
|
12454
|
+
children: latestVersion.sourceUrl
|
|
12455
|
+
}
|
|
12456
|
+
) }) : null,
|
|
12457
|
+
/* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
|
|
12458
|
+
const adapted = {
|
|
12459
|
+
id: -1,
|
|
12460
|
+
ref: { library: library.name, version: v.version },
|
|
12461
|
+
status: v.status,
|
|
12462
|
+
progress: v.progress,
|
|
12463
|
+
counts: {
|
|
12464
|
+
documents: v.documentCount,
|
|
12465
|
+
uniqueUrls: v.uniqueUrlCount
|
|
12466
|
+
},
|
|
12467
|
+
indexedAt: v.indexedAt,
|
|
12468
|
+
sourceUrl: v.sourceUrl ?? void 0
|
|
12469
|
+
};
|
|
12470
|
+
return /* @__PURE__ */ jsx(VersionDetailsRow, { libraryName: library.name, version: adapted });
|
|
12471
|
+
}) : (
|
|
12472
|
+
// Display message if no versions are indexed
|
|
12473
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." })
|
|
12474
|
+
) })
|
|
12475
|
+
]
|
|
12476
|
+
}
|
|
12477
|
+
)
|
|
12528
12478
|
);
|
|
12529
12479
|
};
|
|
12530
12480
|
const LibraryList = ({ libraries }) => {
|
|
12531
|
-
|
|
12481
|
+
if (libraries.length === 0) {
|
|
12482
|
+
return /* @__PURE__ */ jsx(
|
|
12483
|
+
Alert,
|
|
12484
|
+
{
|
|
12485
|
+
type: "info",
|
|
12486
|
+
title: "Welcome!",
|
|
12487
|
+
message: /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
12488
|
+
"To get started, click",
|
|
12489
|
+
" ",
|
|
12490
|
+
/* @__PURE__ */ jsx("span", { class: "font-semibold", children: "Add New Documentation" }),
|
|
12491
|
+
" above and enter the URL of a documentation site to index. For more information, check the",
|
|
12492
|
+
" ",
|
|
12493
|
+
/* @__PURE__ */ jsx(
|
|
12494
|
+
"a",
|
|
12495
|
+
{
|
|
12496
|
+
href: "https://grounded.tools",
|
|
12497
|
+
target: "_blank",
|
|
12498
|
+
rel: "noopener noreferrer",
|
|
12499
|
+
class: "font-medium underline hover:no-underline",
|
|
12500
|
+
children: "official website"
|
|
12501
|
+
}
|
|
12502
|
+
),
|
|
12503
|
+
"."
|
|
12504
|
+
] })
|
|
12505
|
+
}
|
|
12506
|
+
);
|
|
12507
|
+
}
|
|
12508
|
+
return /* @__PURE__ */ jsx(
|
|
12509
|
+
"div",
|
|
12510
|
+
{
|
|
12511
|
+
id: "library-list",
|
|
12512
|
+
class: "space-y-2 animate-[fadeSlideIn_0.2s_ease-out]",
|
|
12513
|
+
children: libraries.map((library) => /* @__PURE__ */ jsx(LibraryItem, { library }))
|
|
12514
|
+
}
|
|
12515
|
+
);
|
|
12532
12516
|
};
|
|
12533
12517
|
function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
|
|
12534
12518
|
server.get("/web/libraries", async (_request, reply) => {
|
|
@@ -12558,6 +12542,75 @@ function registerLibrariesRoutes(server, listLibrariesTool, removeTool) {
|
|
|
12558
12542
|
}
|
|
12559
12543
|
);
|
|
12560
12544
|
}
|
|
12545
|
+
function formatNumber(num) {
|
|
12546
|
+
if (num >= 1e9) {
|
|
12547
|
+
return `${(num / 1e9).toFixed(1)}B`;
|
|
12548
|
+
}
|
|
12549
|
+
if (num >= 1e6) {
|
|
12550
|
+
return `${(num / 1e6).toFixed(1)}M`;
|
|
12551
|
+
}
|
|
12552
|
+
if (num >= 1e3) {
|
|
12553
|
+
return `${(num / 1e3).toFixed(1)}K`;
|
|
12554
|
+
}
|
|
12555
|
+
return num.toString();
|
|
12556
|
+
}
|
|
12557
|
+
const AnalyticsCards = ({
|
|
12558
|
+
totalChunks,
|
|
12559
|
+
activeLibraries,
|
|
12560
|
+
activeVersions,
|
|
12561
|
+
indexedPages
|
|
12562
|
+
}) => /* @__PURE__ */ jsxs("div", { class: "grid grid-cols-1 sm:grid-cols-3 gap-4 mb-4 animate-[fadeSlideIn_0.2s_ease-out]", children: [
|
|
12563
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12564
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Total Knowledge Base" }),
|
|
12565
|
+
/* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: [
|
|
12566
|
+
formatNumber(totalChunks),
|
|
12567
|
+
" Chunks"
|
|
12568
|
+
] })
|
|
12569
|
+
] }) }) }),
|
|
12570
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12571
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Libraries / Versions" }),
|
|
12572
|
+
/* @__PURE__ */ jsxs("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", children: [
|
|
12573
|
+
activeLibraries,
|
|
12574
|
+
" / ",
|
|
12575
|
+
activeVersions
|
|
12576
|
+
] })
|
|
12577
|
+
] }) }) }),
|
|
12578
|
+
/* @__PURE__ */ jsx("div", { class: "p-4 bg-white rounded-lg shadow dark:bg-gray-800 border border-gray-300 dark:border-gray-600", children: /* @__PURE__ */ jsx("div", { class: "flex items-center", children: /* @__PURE__ */ jsxs("div", { children: [
|
|
12579
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm font-medium text-gray-500 dark:text-gray-400", children: "Indexed Pages" }),
|
|
12580
|
+
/* @__PURE__ */ jsx("p", { class: "text-xl font-semibold text-gray-900 dark:text-white", safe: true, children: formatNumber(indexedPages) })
|
|
12581
|
+
] }) }) })
|
|
12582
|
+
] });
|
|
12583
|
+
function registerStatsRoute(server, docService) {
|
|
12584
|
+
server.get("/web/stats", async (_request, reply) => {
|
|
12585
|
+
try {
|
|
12586
|
+
const libraries = await docService.listLibraries();
|
|
12587
|
+
let totalChunks = 0;
|
|
12588
|
+
let indexedPages = 0;
|
|
12589
|
+
let activeVersions = 0;
|
|
12590
|
+
for (const lib of libraries) {
|
|
12591
|
+
activeVersions += lib.versions.length;
|
|
12592
|
+
for (const version of lib.versions) {
|
|
12593
|
+
totalChunks += version.counts.documents;
|
|
12594
|
+
indexedPages += version.counts.uniqueUrls;
|
|
12595
|
+
}
|
|
12596
|
+
}
|
|
12597
|
+
const activeLibraries = libraries.length;
|
|
12598
|
+
reply.type("text/html; charset=utf-8");
|
|
12599
|
+
return /* @__PURE__ */ jsx(
|
|
12600
|
+
AnalyticsCards,
|
|
12601
|
+
{
|
|
12602
|
+
totalChunks,
|
|
12603
|
+
activeLibraries,
|
|
12604
|
+
activeVersions,
|
|
12605
|
+
indexedPages
|
|
12606
|
+
}
|
|
12607
|
+
);
|
|
12608
|
+
} catch (error) {
|
|
12609
|
+
logger.error(`Failed to fetch stats: ${error}`);
|
|
12610
|
+
reply.status(500).send("Internal Server Error");
|
|
12611
|
+
}
|
|
12612
|
+
});
|
|
12613
|
+
}
|
|
12561
12614
|
async function registerWebService(server, docService, pipeline, eventBus, config) {
|
|
12562
12615
|
const listLibrariesTool = new ListLibrariesTool(docService);
|
|
12563
12616
|
const listJobsTool = new ListJobsTool(pipeline);
|
|
@@ -12574,6 +12627,7 @@ async function registerWebService(server, docService, pipeline, eventBus, config
|
|
|
12574
12627
|
registerCancelJobRoute(server, cancelJobTool);
|
|
12575
12628
|
registerClearCompletedJobsRoute(server, clearCompletedJobsTool);
|
|
12576
12629
|
registerEventsRoute(server, eventBus);
|
|
12630
|
+
registerStatsRoute(server, docService);
|
|
12577
12631
|
}
|
|
12578
12632
|
async function registerWorkerService(pipeline) {
|
|
12579
12633
|
await pipeline.start();
|
|
@@ -12598,7 +12652,6 @@ class AppServer {
|
|
|
12598
12652
|
mcpServer = null;
|
|
12599
12653
|
authManager = null;
|
|
12600
12654
|
config;
|
|
12601
|
-
embeddingConfig = null;
|
|
12602
12655
|
remoteEventProxy = null;
|
|
12603
12656
|
wss = null;
|
|
12604
12657
|
/**
|
|
@@ -12625,22 +12678,22 @@ class AppServer {
|
|
|
12625
12678
|
*/
|
|
12626
12679
|
async start() {
|
|
12627
12680
|
this.validateConfig();
|
|
12628
|
-
|
|
12681
|
+
const embeddingConfig = this.docService.getActiveEmbeddingConfig();
|
|
12629
12682
|
if (this.config.telemetry !== false && shouldEnableTelemetry()) {
|
|
12630
12683
|
try {
|
|
12631
12684
|
if (telemetry.isEnabled()) {
|
|
12632
12685
|
telemetry.setGlobalContext({
|
|
12633
|
-
appVersion: "1.
|
|
12686
|
+
appVersion: "1.30.0",
|
|
12634
12687
|
appPlatform: process.platform,
|
|
12635
12688
|
appNodeVersion: process.version,
|
|
12636
12689
|
appServicesEnabled: this.getActiveServicesList(),
|
|
12637
12690
|
appAuthEnabled: Boolean(this.config.auth),
|
|
12638
12691
|
appReadOnly: Boolean(this.config.readOnly),
|
|
12639
12692
|
// Add embedding configuration to global context
|
|
12640
|
-
...
|
|
12641
|
-
aiEmbeddingProvider:
|
|
12642
|
-
aiEmbeddingModel:
|
|
12643
|
-
aiEmbeddingDimensions:
|
|
12693
|
+
...embeddingConfig && {
|
|
12694
|
+
aiEmbeddingProvider: embeddingConfig.provider,
|
|
12695
|
+
aiEmbeddingModel: embeddingConfig.model,
|
|
12696
|
+
aiEmbeddingDimensions: embeddingConfig.dimensions
|
|
12644
12697
|
}
|
|
12645
12698
|
});
|
|
12646
12699
|
telemetry.track(TelemetryEvent.APP_STARTED, {
|
|
@@ -12944,28 +12997,38 @@ class AppServer {
|
|
|
12944
12997
|
* Log startup information showing which services are enabled.
|
|
12945
12998
|
*/
|
|
12946
12999
|
logStartupInfo(address) {
|
|
12947
|
-
|
|
13000
|
+
const isWorkerOnly = this.config.enableWorker && !this.config.enableWebInterface && !this.config.enableMcpServer;
|
|
13001
|
+
const isWebOnly = this.config.enableWebInterface && !this.config.enableWorker && !this.config.enableMcpServer;
|
|
13002
|
+
const isMcpOnly = this.config.enableMcpServer && !this.config.enableWebInterface && !this.config.enableWorker;
|
|
13003
|
+
if (isWorkerOnly) {
|
|
13004
|
+
logger.info(`🚀 Worker available at ${address}`);
|
|
13005
|
+
} else if (isWebOnly) {
|
|
13006
|
+
logger.info(`🚀 Web interface available at ${address}`);
|
|
13007
|
+
} else if (isMcpOnly) {
|
|
13008
|
+
logger.info(`🚀 MCP server available at ${address}`);
|
|
13009
|
+
} else {
|
|
13010
|
+
logger.info(`🚀 Grounded Docs available at ${address}`);
|
|
13011
|
+
}
|
|
13012
|
+
const isCombined = !isWorkerOnly && !isWebOnly && !isMcpOnly;
|
|
12948
13013
|
const enabledServices = [];
|
|
12949
|
-
if (this.config.enableWebInterface) {
|
|
13014
|
+
if (this.config.enableWebInterface && isCombined) {
|
|
12950
13015
|
enabledServices.push(`Web interface: ${address}`);
|
|
12951
13016
|
}
|
|
12952
13017
|
if (this.config.enableMcpServer) {
|
|
12953
13018
|
enabledServices.push(`MCP endpoints: ${address}/mcp, ${address}/sse`);
|
|
12954
13019
|
}
|
|
12955
|
-
if (this.config.
|
|
12956
|
-
enabledServices.push(`API: ${address}/api`);
|
|
12957
|
-
}
|
|
12958
|
-
if (this.config.enableWorker) {
|
|
12959
|
-
enabledServices.push("Worker: internal");
|
|
12960
|
-
} else if (this.config.externalWorkerUrl) {
|
|
13020
|
+
if (!this.config.enableWorker && this.config.externalWorkerUrl) {
|
|
12961
13021
|
enabledServices.push(`Worker: ${this.config.externalWorkerUrl}`);
|
|
12962
13022
|
}
|
|
12963
|
-
if (this.
|
|
12964
|
-
|
|
12965
|
-
|
|
12966
|
-
|
|
12967
|
-
|
|
12968
|
-
|
|
13023
|
+
if (this.config.enableWorker) {
|
|
13024
|
+
const embeddingConfig = this.docService.getActiveEmbeddingConfig();
|
|
13025
|
+
if (embeddingConfig) {
|
|
13026
|
+
enabledServices.push(
|
|
13027
|
+
`Embeddings: ${embeddingConfig.provider}:${embeddingConfig.model}`
|
|
13028
|
+
);
|
|
13029
|
+
} else {
|
|
13030
|
+
enabledServices.push(`Embeddings: disabled (full text search only)`);
|
|
13031
|
+
}
|
|
12969
13032
|
}
|
|
12970
13033
|
for (const service of enabledServices) {
|
|
12971
13034
|
logger.info(` • ${service}`);
|
|
@@ -14461,7 +14524,7 @@ class PipelineManager {
|
|
|
14461
14524
|
parsedScraperOptions = JSON.parse(version.scraper_options);
|
|
14462
14525
|
} catch (error) {
|
|
14463
14526
|
logger.warn(
|
|
14464
|
-
`⚠️
|
|
14527
|
+
`⚠️ Failed to parse scraper options for ${version.library_name}@${version.name || "unversioned"}: ${error}`
|
|
14465
14528
|
);
|
|
14466
14529
|
}
|
|
14467
14530
|
}
|
|
@@ -14829,7 +14892,7 @@ class PipelineManager {
|
|
|
14829
14892
|
},
|
|
14830
14893
|
onJobError: async (internalJob, error, document2) => {
|
|
14831
14894
|
logger.warn(
|
|
14832
|
-
`⚠️
|
|
14895
|
+
`⚠️ Job ${internalJob.id} error ${document2 ? `on document ${document2.url}` : ""}: ${error.message}`
|
|
14833
14896
|
);
|
|
14834
14897
|
}
|
|
14835
14898
|
});
|
|
@@ -14910,7 +14973,7 @@ class PipelineManager {
|
|
|
14910
14973
|
);
|
|
14911
14974
|
} catch (optionsError) {
|
|
14912
14975
|
logger.warn(
|
|
14913
|
-
`⚠️
|
|
14976
|
+
`⚠️ Failed to store scraper options for job ${job.id}: ${optionsError}`
|
|
14914
14977
|
);
|
|
14915
14978
|
}
|
|
14916
14979
|
}
|
|
@@ -14979,6 +15042,217 @@ var PipelineFactory2;
|
|
|
14979
15042
|
}
|
|
14980
15043
|
PipelineFactory22.createPipeline = createPipeline;
|
|
14981
15044
|
})(PipelineFactory2 || (PipelineFactory2 = {}));
|
|
15045
|
+
function getGlobalOptions(command) {
|
|
15046
|
+
let rootCommand = command;
|
|
15047
|
+
while (rootCommand?.parent) {
|
|
15048
|
+
rootCommand = rootCommand.parent;
|
|
15049
|
+
}
|
|
15050
|
+
return rootCommand?.opts() || {};
|
|
15051
|
+
}
|
|
15052
|
+
function getEventBus(command) {
|
|
15053
|
+
const eventBus = command?._eventBus;
|
|
15054
|
+
if (!eventBus) {
|
|
15055
|
+
throw new Error("EventBusService not initialized");
|
|
15056
|
+
}
|
|
15057
|
+
return eventBus;
|
|
15058
|
+
}
|
|
15059
|
+
function ensurePlaywrightBrowsersInstalled() {
|
|
15060
|
+
if (process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === "1") {
|
|
15061
|
+
logger.debug(
|
|
15062
|
+
"PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD is set, skipping Playwright browser install."
|
|
15063
|
+
);
|
|
15064
|
+
return;
|
|
15065
|
+
}
|
|
15066
|
+
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
15067
|
+
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
15068
|
+
logger.debug(
|
|
15069
|
+
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
15070
|
+
);
|
|
15071
|
+
return;
|
|
15072
|
+
}
|
|
15073
|
+
try {
|
|
15074
|
+
const chromiumPath = chromium.executablePath();
|
|
15075
|
+
if (!chromiumPath || !existsSync(chromiumPath)) {
|
|
15076
|
+
throw new Error("Playwright Chromium browser not found");
|
|
15077
|
+
}
|
|
15078
|
+
} catch (error) {
|
|
15079
|
+
logger.debug(String(error));
|
|
15080
|
+
try {
|
|
15081
|
+
console.log(
|
|
15082
|
+
"🌐 Installing Playwright Chromium browser... (this may take a moment)"
|
|
15083
|
+
);
|
|
15084
|
+
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
15085
|
+
stdio: "ignore",
|
|
15086
|
+
// Suppress output
|
|
15087
|
+
cwd: getProjectRoot()
|
|
15088
|
+
});
|
|
15089
|
+
} catch (_installErr) {
|
|
15090
|
+
console.error(
|
|
15091
|
+
"❌ Failed to install Playwright browsers automatically. Please run:\n npx playwright install --no-shell --with-deps chromium\nand try again."
|
|
15092
|
+
);
|
|
15093
|
+
process.exit(1);
|
|
15094
|
+
}
|
|
15095
|
+
}
|
|
15096
|
+
}
|
|
15097
|
+
function resolveProtocol(protocol) {
|
|
15098
|
+
if (protocol === "auto") {
|
|
15099
|
+
if (!process.stdin.isTTY && !process.stdout.isTTY) {
|
|
15100
|
+
return "stdio";
|
|
15101
|
+
}
|
|
15102
|
+
return "http";
|
|
15103
|
+
}
|
|
15104
|
+
if (protocol === "stdio" || protocol === "http") {
|
|
15105
|
+
return protocol;
|
|
15106
|
+
}
|
|
15107
|
+
throw new Error(`Invalid protocol: ${protocol}. Must be 'auto', 'stdio', or 'http'`);
|
|
15108
|
+
}
|
|
15109
|
+
const formatOutput = (data) => JSON.stringify(data, null, 2);
|
|
15110
|
+
function setupLogging(options, protocol) {
|
|
15111
|
+
if (options.silent) {
|
|
15112
|
+
setLogLevel(LogLevel.ERROR);
|
|
15113
|
+
} else if (options.verbose) {
|
|
15114
|
+
setLogLevel(LogLevel.DEBUG);
|
|
15115
|
+
}
|
|
15116
|
+
}
|
|
15117
|
+
function validatePort(portString) {
|
|
15118
|
+
const port = Number.parseInt(portString, 10);
|
|
15119
|
+
if (Number.isNaN(port) || port < 1 || port > 65535) {
|
|
15120
|
+
throw new Error("Invalid port number");
|
|
15121
|
+
}
|
|
15122
|
+
return port;
|
|
15123
|
+
}
|
|
15124
|
+
function validateHost(hostString) {
|
|
15125
|
+
const trimmed = hostString.trim();
|
|
15126
|
+
if (!trimmed) {
|
|
15127
|
+
throw new Error("Host cannot be empty");
|
|
15128
|
+
}
|
|
15129
|
+
if (trimmed.includes(" ") || trimmed.includes(" ") || trimmed.includes("\n")) {
|
|
15130
|
+
throw new Error("Host cannot contain whitespace");
|
|
15131
|
+
}
|
|
15132
|
+
return trimmed;
|
|
15133
|
+
}
|
|
15134
|
+
function createAppServerConfig(options) {
|
|
15135
|
+
return {
|
|
15136
|
+
enableWebInterface: options.enableWebInterface ?? false,
|
|
15137
|
+
enableMcpServer: options.enableMcpServer ?? true,
|
|
15138
|
+
enableApiServer: options.enableApiServer ?? false,
|
|
15139
|
+
enableWorker: options.enableWorker ?? true,
|
|
15140
|
+
port: options.port,
|
|
15141
|
+
host: options.host,
|
|
15142
|
+
externalWorkerUrl: options.externalWorkerUrl,
|
|
15143
|
+
readOnly: options.readOnly ?? false,
|
|
15144
|
+
auth: options.auth,
|
|
15145
|
+
startupContext: options.startupContext
|
|
15146
|
+
};
|
|
15147
|
+
}
|
|
15148
|
+
function parseHeaders(headerOptions) {
|
|
15149
|
+
const headers = {};
|
|
15150
|
+
if (Array.isArray(headerOptions)) {
|
|
15151
|
+
for (const entry of headerOptions) {
|
|
15152
|
+
const idx = entry.indexOf(":");
|
|
15153
|
+
if (idx > 0) {
|
|
15154
|
+
const name = entry.slice(0, idx).trim();
|
|
15155
|
+
const value = entry.slice(idx + 1).trim();
|
|
15156
|
+
if (name) headers[name] = value;
|
|
15157
|
+
}
|
|
15158
|
+
}
|
|
15159
|
+
}
|
|
15160
|
+
return headers;
|
|
15161
|
+
}
|
|
15162
|
+
function parseAuthConfig(options) {
|
|
15163
|
+
if (!options.authEnabled) {
|
|
15164
|
+
return void 0;
|
|
15165
|
+
}
|
|
15166
|
+
return {
|
|
15167
|
+
enabled: true,
|
|
15168
|
+
issuerUrl: options.authIssuerUrl,
|
|
15169
|
+
audience: options.authAudience,
|
|
15170
|
+
scopes: ["openid", "profile"]
|
|
15171
|
+
// Default scopes for OAuth2/OIDC
|
|
15172
|
+
};
|
|
15173
|
+
}
|
|
15174
|
+
function validateAuthConfig(authConfig) {
|
|
15175
|
+
if (!authConfig.enabled) {
|
|
15176
|
+
return;
|
|
15177
|
+
}
|
|
15178
|
+
const errors = [];
|
|
15179
|
+
if (!authConfig.issuerUrl) {
|
|
15180
|
+
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
15181
|
+
} else {
|
|
15182
|
+
try {
|
|
15183
|
+
const url = new URL(authConfig.issuerUrl);
|
|
15184
|
+
if (url.protocol !== "https:") {
|
|
15185
|
+
errors.push("Issuer URL must use HTTPS protocol");
|
|
15186
|
+
}
|
|
15187
|
+
} catch {
|
|
15188
|
+
errors.push("Issuer URL must be a valid URL");
|
|
15189
|
+
}
|
|
15190
|
+
}
|
|
15191
|
+
if (!authConfig.audience) {
|
|
15192
|
+
errors.push("--auth-audience is required when auth is enabled");
|
|
15193
|
+
} else {
|
|
15194
|
+
try {
|
|
15195
|
+
const url = new URL(authConfig.audience);
|
|
15196
|
+
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
15197
|
+
logger.warn(
|
|
15198
|
+
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
15199
|
+
);
|
|
15200
|
+
}
|
|
15201
|
+
if (url.hash) {
|
|
15202
|
+
errors.push("Audience must not contain URL fragments");
|
|
15203
|
+
}
|
|
15204
|
+
} catch {
|
|
15205
|
+
if (authConfig.audience.startsWith("urn:")) {
|
|
15206
|
+
const urnParts = authConfig.audience.split(":");
|
|
15207
|
+
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
15208
|
+
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
15209
|
+
}
|
|
15210
|
+
} else {
|
|
15211
|
+
errors.push(
|
|
15212
|
+
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
15213
|
+
);
|
|
15214
|
+
}
|
|
15215
|
+
}
|
|
15216
|
+
}
|
|
15217
|
+
if (errors.length > 0) {
|
|
15218
|
+
throw new Error(`Auth configuration validation failed:
|
|
15219
|
+
${errors.join("\n")}`);
|
|
15220
|
+
}
|
|
15221
|
+
}
|
|
15222
|
+
function warnHttpUsage(authConfig, port) {
|
|
15223
|
+
if (!authConfig?.enabled) {
|
|
15224
|
+
return;
|
|
15225
|
+
}
|
|
15226
|
+
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
15227
|
+
process.env.HOSTNAME?.includes("localhost");
|
|
15228
|
+
if (!isLocalhost) {
|
|
15229
|
+
logger.warn(
|
|
15230
|
+
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
15231
|
+
);
|
|
15232
|
+
}
|
|
15233
|
+
}
|
|
15234
|
+
function resolveEmbeddingContext(embeddingModel) {
|
|
15235
|
+
try {
|
|
15236
|
+
let modelSpec = embeddingModel;
|
|
15237
|
+
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
15238
|
+
modelSpec = "text-embedding-3-small";
|
|
15239
|
+
logger.debug(
|
|
15240
|
+
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
15241
|
+
);
|
|
15242
|
+
}
|
|
15243
|
+
if (!modelSpec) {
|
|
15244
|
+
logger.debug(
|
|
15245
|
+
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
15246
|
+
);
|
|
15247
|
+
return null;
|
|
15248
|
+
}
|
|
15249
|
+
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
15250
|
+
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
15251
|
+
} catch (error) {
|
|
15252
|
+
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
15253
|
+
return null;
|
|
15254
|
+
}
|
|
15255
|
+
}
|
|
14982
15256
|
function createDefaultAction(program) {
|
|
14983
15257
|
return program.addOption(
|
|
14984
15258
|
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
|
|
@@ -15324,7 +15598,6 @@ function createMcpCommand(program) {
|
|
|
15324
15598
|
);
|
|
15325
15599
|
if (resolvedProtocol === "stdio") {
|
|
15326
15600
|
logger.debug(`Auto-detected stdio protocol (no TTY)`);
|
|
15327
|
-
logger.info("🚀 Starting MCP server (stdio mode)");
|
|
15328
15601
|
await pipeline.start();
|
|
15329
15602
|
const mcpTools = await initializeTools(docService, pipeline);
|
|
15330
15603
|
const mcpServer = await startStdioServer(mcpTools, cmdOptions.readOnly);
|
|
@@ -15337,7 +15610,6 @@ function createMcpCommand(program) {
|
|
|
15337
15610
|
});
|
|
15338
15611
|
} else {
|
|
15339
15612
|
logger.debug(`Auto-detected http protocol (TTY available)`);
|
|
15340
|
-
logger.info("🚀 Starting MCP server (http mode)");
|
|
15341
15613
|
const config = createAppServerConfig({
|
|
15342
15614
|
enableWebInterface: false,
|
|
15343
15615
|
// Never enable web interface in mcp command
|
|
@@ -15806,9 +16078,6 @@ function createWebCommand(program) {
|
|
|
15806
16078
|
cliCommand: "web"
|
|
15807
16079
|
}
|
|
15808
16080
|
});
|
|
15809
|
-
logger.info(
|
|
15810
|
-
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
15811
|
-
);
|
|
15812
16081
|
const appServer = await startAppServer(docService, pipeline, eventBus, config);
|
|
15813
16082
|
registerGlobalServices({
|
|
15814
16083
|
appServer,
|
|
@@ -15851,7 +16120,6 @@ function createWorkerCommand(program) {
|
|
|
15851
16120
|
const port = validatePort(cmdOptions.port);
|
|
15852
16121
|
const host = validateHost(cmdOptions.host);
|
|
15853
16122
|
try {
|
|
15854
|
-
logger.info(`🚀 Starting external pipeline worker on port ${port}`);
|
|
15855
16123
|
ensurePlaywrightBrowsersInstalled();
|
|
15856
16124
|
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
15857
16125
|
const globalOptions = program.opts();
|
|
@@ -15902,7 +16170,7 @@ function createCliProgram() {
|
|
|
15902
16170
|
const commandStartTimes = /* @__PURE__ */ new Map();
|
|
15903
16171
|
let globalEventBus = null;
|
|
15904
16172
|
let globalTelemetryService = null;
|
|
15905
|
-
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.
|
|
16173
|
+
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.30.0").addOption(
|
|
15906
16174
|
new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
|
|
15907
16175
|
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
15908
16176
|
new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
|
|
@@ -15936,7 +16204,7 @@ function createCliProgram() {
|
|
|
15936
16204
|
if (shouldEnableTelemetry()) {
|
|
15937
16205
|
if (telemetry.isEnabled()) {
|
|
15938
16206
|
telemetry.setGlobalContext({
|
|
15939
|
-
appVersion: "1.
|
|
16207
|
+
appVersion: "1.30.0",
|
|
15940
16208
|
appPlatform: process.platform,
|
|
15941
16209
|
appNodeVersion: process.version,
|
|
15942
16210
|
appInterface: "cli",
|