bluera-knowledge 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.env.example +11 -6
- package/CHANGELOG.md +31 -0
- package/dist/{chunk-565OVW3C.js → chunk-AHS2EILR.js} +7 -3
- package/dist/chunk-AHS2EILR.js.map +1 -0
- package/dist/{chunk-2WBITQWZ.js → chunk-TTV7P7HG.js} +2 -2
- package/dist/{chunk-TRDMYKGC.js → chunk-UB3L33JF.js} +19 -5
- package/dist/chunk-UB3L33JF.js.map +1 -0
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +43 -18
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +1 -1
- package/src/mcp/server.ts +5 -1
- package/src/services/search.service.test.ts +112 -0
- package/src/services/search.service.ts +17 -4
- package/src/workers/background-worker-cli.test.ts +35 -0
- package/src/workers/background-worker-cli.ts +35 -15
- package/src/workers/background-worker.test.ts +26 -0
- package/src/workers/background-worker.ts +10 -0
- package/vitest.config.ts +7 -0
- package/dist/chunk-565OVW3C.js.map +0 -1
- package/dist/chunk-TRDMYKGC.js.map +0 -1
- /package/dist/{chunk-2WBITQWZ.js.map → chunk-TTV7P7HG.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -6,10 +6,10 @@ import {
|
|
|
6
6
|
isRepoStoreDefinition,
|
|
7
7
|
isWebStoreDefinition,
|
|
8
8
|
runMCPServer
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-AHS2EILR.js";
|
|
10
10
|
import {
|
|
11
11
|
IntelligentCrawler
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-TTV7P7HG.js";
|
|
13
13
|
import {
|
|
14
14
|
ASTParser,
|
|
15
15
|
AdapterRegistry,
|
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
err,
|
|
23
23
|
extractRepoName,
|
|
24
24
|
ok
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-UB3L33JF.js";
|
|
26
26
|
import "./chunk-6FHWC36B.js";
|
|
27
27
|
|
|
28
28
|
// src/index.ts
|
package/dist/mcp/server.js
CHANGED
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
IntelligentCrawler
|
|
4
|
-
} from "../chunk-
|
|
4
|
+
} from "../chunk-TTV7P7HG.js";
|
|
5
5
|
import {
|
|
6
6
|
JobService,
|
|
7
7
|
createDocumentId,
|
|
8
|
+
createLogger,
|
|
8
9
|
createServices,
|
|
9
|
-
createStoreId
|
|
10
|
-
|
|
10
|
+
createStoreId,
|
|
11
|
+
shutdownLogger
|
|
12
|
+
} from "../chunk-UB3L33JF.js";
|
|
11
13
|
import "../chunk-6FHWC36B.js";
|
|
12
14
|
|
|
13
15
|
// src/workers/background-worker.ts
|
|
14
16
|
import { createHash } from "crypto";
|
|
17
|
+
var logger = createLogger("background-worker");
|
|
15
18
|
function calculateIndexProgress(current, total, scale = 100) {
|
|
16
19
|
if (total === 0) return 0;
|
|
17
20
|
return current / total * scale;
|
|
@@ -33,6 +36,7 @@ var BackgroundWorker = class {
|
|
|
33
36
|
throw new Error(`Job ${jobId} not found`);
|
|
34
37
|
}
|
|
35
38
|
try {
|
|
39
|
+
logger.info({ jobId, type: job.type }, "Starting job execution");
|
|
36
40
|
this.jobService.updateJob(jobId, {
|
|
37
41
|
status: "running",
|
|
38
42
|
message: `Starting ${job.type} operation...`,
|
|
@@ -59,6 +63,10 @@ var BackgroundWorker = class {
|
|
|
59
63
|
details: { completedAt: (/* @__PURE__ */ new Date()).toISOString() }
|
|
60
64
|
});
|
|
61
65
|
} catch (error) {
|
|
66
|
+
logger.error(
|
|
67
|
+
{ jobId, error: error instanceof Error ? error.message : String(error) },
|
|
68
|
+
"Job failed"
|
|
69
|
+
);
|
|
62
70
|
const errorDetails = {
|
|
63
71
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
64
72
|
};
|
|
@@ -268,12 +276,13 @@ function buildPidFilePath(jobsDir, jobId) {
|
|
|
268
276
|
}
|
|
269
277
|
|
|
270
278
|
// src/workers/background-worker-cli.ts
|
|
279
|
+
var logger2 = createLogger("background-worker-cli");
|
|
271
280
|
async function main() {
|
|
272
281
|
const jobId = process.argv[2];
|
|
273
282
|
const dataDir = process.env["BLUERA_DATA_DIR"];
|
|
274
283
|
if (jobId === void 0 || jobId === "") {
|
|
275
|
-
|
|
276
|
-
|
|
284
|
+
logger2.error("Job ID required. Usage: background-worker-cli <job-id>");
|
|
285
|
+
await shutdownLogger();
|
|
277
286
|
process.exit(1);
|
|
278
287
|
}
|
|
279
288
|
const jobService = new JobService(dataDir);
|
|
@@ -286,22 +295,27 @@ async function main() {
|
|
|
286
295
|
try {
|
|
287
296
|
writePidFile(pidFile, process.pid);
|
|
288
297
|
} catch (error) {
|
|
289
|
-
|
|
298
|
+
logger2.error(
|
|
299
|
+
{ error: error instanceof Error ? error.message : String(error) },
|
|
300
|
+
"Failed to write PID file"
|
|
301
|
+
);
|
|
302
|
+
await shutdownLogger();
|
|
290
303
|
process.exit(1);
|
|
291
304
|
}
|
|
292
305
|
process.on("SIGTERM", () => {
|
|
293
|
-
|
|
306
|
+
logger2.info({ jobId }, "Received SIGTERM, cancelling job");
|
|
294
307
|
jobService.updateJob(jobId, {
|
|
295
308
|
status: "cancelled",
|
|
296
309
|
message: "Job cancelled by user"
|
|
297
310
|
});
|
|
298
311
|
const deleteResult = deletePidFile(pidFile, "sigterm");
|
|
299
312
|
if (!deleteResult.success && deleteResult.error !== void 0) {
|
|
300
|
-
|
|
301
|
-
|
|
313
|
+
logger2.warn(
|
|
314
|
+
{ jobId, error: deleteResult.error.message },
|
|
315
|
+
"Could not remove PID file during SIGTERM"
|
|
302
316
|
);
|
|
303
317
|
}
|
|
304
|
-
process.exit(0);
|
|
318
|
+
void shutdownLogger().finally(() => process.exit(0));
|
|
305
319
|
});
|
|
306
320
|
const worker = new BackgroundWorker(
|
|
307
321
|
jobService,
|
|
@@ -314,25 +328,36 @@ async function main() {
|
|
|
314
328
|
await worker.executeJob(jobId);
|
|
315
329
|
const successCleanup = deletePidFile(pidFile, "success");
|
|
316
330
|
if (!successCleanup.success && successCleanup.error !== void 0) {
|
|
317
|
-
|
|
318
|
-
|
|
331
|
+
logger2.warn(
|
|
332
|
+
{ jobId, error: successCleanup.error.message },
|
|
333
|
+
"Could not remove PID file after success"
|
|
319
334
|
);
|
|
320
335
|
}
|
|
321
|
-
|
|
336
|
+
logger2.info({ jobId }, "Job completed successfully");
|
|
337
|
+
await shutdownLogger();
|
|
322
338
|
process.exit(0);
|
|
323
339
|
} catch (error) {
|
|
324
|
-
|
|
340
|
+
logger2.error(
|
|
341
|
+
{ jobId, error: error instanceof Error ? error.message : String(error) },
|
|
342
|
+
"Job failed"
|
|
343
|
+
);
|
|
325
344
|
const failureCleanup = deletePidFile(pidFile, "failure");
|
|
326
345
|
if (!failureCleanup.success && failureCleanup.error !== void 0) {
|
|
327
|
-
|
|
328
|
-
|
|
346
|
+
logger2.warn(
|
|
347
|
+
{ jobId, error: failureCleanup.error.message },
|
|
348
|
+
"Could not remove PID file after failure"
|
|
329
349
|
);
|
|
330
350
|
}
|
|
351
|
+
await shutdownLogger();
|
|
331
352
|
process.exit(1);
|
|
332
353
|
}
|
|
333
354
|
}
|
|
334
|
-
main().catch((error) => {
|
|
335
|
-
|
|
355
|
+
main().catch(async (error) => {
|
|
356
|
+
logger2.error(
|
|
357
|
+
{ error: error instanceof Error ? error.message : String(error) },
|
|
358
|
+
"Fatal error in background worker"
|
|
359
|
+
);
|
|
360
|
+
await shutdownLogger();
|
|
336
361
|
process.exit(1);
|
|
337
362
|
});
|
|
338
363
|
//# sourceMappingURL=background-worker-cli.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/workers/background-worker.ts","../../src/workers/pid-file.ts","../../src/workers/background-worker-cli.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { IntelligentCrawler, type CrawlProgress } from '../crawl/intelligent-crawler.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler();\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n","#!/usr/bin/env node\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n console.error('Error: Job ID required');\n console.error('Usage: background-worker-cli <job-id>');\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n console.log(`[${jobId}] Received SIGTERM, cancelling job...`);\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n console.error(\n `Warning: Could not remove PID file during SIGTERM: ${deleteResult.error.message}`\n );\n }\n\n process.exit(0);\n });\n\n // Create worker and execute job\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n console.error(\n `Warning: Could not remove PID file after success: ${successCleanup.error.message}`\n );\n }\n\n console.log(`[${jobId}] Job completed successfully`);\n process.exit(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n console.error(`[${jobId}] Job failed:`, error);\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n console.error(\n `Warning: Could not remove PID file after failure: ${failureCleanup.error.message}`\n );\n }\n\n process.exit(1);\n }\n}\n\nmain().catch((error: unknown) => {\n console.error('Fatal error in background worker:', error);\n process.exit(1);\n});\n"],"mappings":";;;;;;;;;;;;;AAAA,SAAS,kBAAkB;AAkBpB,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACjB;AALiB;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AAEF,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AAEd,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB;AAGvC,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,WAAW,oBAAI,KAAK;AAAA,UACtB;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAED,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACvTA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;ACnEA,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,YAAQ,MAAM,wBAAwB;AACtC,YAAQ,MAAM,uCAAuC;AACrD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,YAAQ,IAAI,IAAI,KAAK,uCAAuC;AAC5D,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,cAAQ;AAAA,QACN,sDAAsD,aAAa,MAAM,OAAO;AAAA,MAClF;AAAA,IACF;AAEA,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AAGD,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,cAAQ;AAAA,QACN,qDAAqD,eAAe,MAAM,OAAO;AAAA,MACnF;AAAA,IACF;AAEA,YAAQ,IAAI,IAAI,KAAK,8BAA8B;AACnD,YAAQ,KAAK,CAAC;AAAA,EAChB,SAAS,OAAO;AAEd,YAAQ,MAAM,IAAI,KAAK,iBAAiB,KAAK;AAG7C,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,cAAQ;AAAA,QACN,qDAAqD,eAAe,MAAM,OAAO;AAAA,MACnF;AAAA,IACF;AAEA,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;AAEA,KAAK,EAAE,MAAM,CAAC,UAAmB;AAC/B,UAAQ,MAAM,qCAAqC,KAAK;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/workers/background-worker.ts","../../src/workers/pid-file.ts","../../src/workers/background-worker-cli.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { IntelligentCrawler, type CrawlProgress } from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler();\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n","#!/usr/bin/env node\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Create worker and execute job\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await shutdownLogger();\n process.exit(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await shutdownLogger();\n process.exit(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n process.exit(1);\n});\n"],"mappings":";;;;;;;;;;;;;;;AAAA,SAAS,kBAAkB;AAY3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACjB;AALiB;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB;AAGvC,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,WAAW,oBAAI,KAAK;AAAA,UACtB;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAED,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACjUA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AC1EA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":["logger"]}
|
package/package.json
CHANGED
package/src/mcp/server.ts
CHANGED
|
@@ -202,10 +202,14 @@ const scriptPath = process.argv[1] ?? '';
|
|
|
202
202
|
const isMCPServerEntry = scriptPath.endsWith('mcp/server.js') || scriptPath.endsWith('mcp/server');
|
|
203
203
|
|
|
204
204
|
if (isMCPServerEntry) {
|
|
205
|
+
const projectRoot = process.env['PROJECT_ROOT'];
|
|
206
|
+
if (projectRoot === undefined) {
|
|
207
|
+
throw new Error('PROJECT_ROOT environment variable is required');
|
|
208
|
+
}
|
|
205
209
|
runMCPServer({
|
|
206
210
|
dataDir: process.env['DATA_DIR'],
|
|
207
211
|
config: process.env['CONFIG_PATH'],
|
|
208
|
-
projectRoot
|
|
212
|
+
projectRoot,
|
|
209
213
|
}).catch((error: unknown) => {
|
|
210
214
|
logger.error(
|
|
211
215
|
{ error: error instanceof Error ? error.message : String(error) },
|
|
@@ -2180,3 +2180,115 @@ describe('SearchService - Raw Score and Confidence', () => {
|
|
|
2180
2180
|
expect(results.confidence).toBe('high');
|
|
2181
2181
|
});
|
|
2182
2182
|
});
|
|
2183
|
+
|
|
2184
|
+
describe('SearchService Environment Variables', () => {
|
|
2185
|
+
afterEach(() => {
|
|
2186
|
+
vi.unstubAllEnvs();
|
|
2187
|
+
});
|
|
2188
|
+
|
|
2189
|
+
it('throws if SEARCH_CONFIDENCE_HIGH is not set', async () => {
|
|
2190
|
+
vi.stubEnv('SEARCH_CONFIDENCE_HIGH', undefined as unknown as string);
|
|
2191
|
+
vi.stubEnv('SEARCH_CONFIDENCE_MEDIUM', '0.3');
|
|
2192
|
+
vi.stubEnv('SEARCH_TEST_FILE_BOOST', '0.5');
|
|
2193
|
+
|
|
2194
|
+
const tempDir = await mkdtemp(join(tmpdir(), 'search-env-test-'));
|
|
2195
|
+
const lanceStore = new LanceStore(tempDir);
|
|
2196
|
+
const embeddingEngine = new EmbeddingEngine();
|
|
2197
|
+
await embeddingEngine.initialize();
|
|
2198
|
+
|
|
2199
|
+
const storeId = createStoreId('env-test-store');
|
|
2200
|
+
await lanceStore.initialize(storeId);
|
|
2201
|
+
|
|
2202
|
+
const text = 'test document';
|
|
2203
|
+
const vector = await embeddingEngine.embed(text);
|
|
2204
|
+
await lanceStore.addDocuments(storeId, [
|
|
2205
|
+
{
|
|
2206
|
+
id: createDocumentId('doc-1'),
|
|
2207
|
+
content: text,
|
|
2208
|
+
vector,
|
|
2209
|
+
metadata: { type: 'file', storeId, indexedAt: new Date() },
|
|
2210
|
+
},
|
|
2211
|
+
]);
|
|
2212
|
+
|
|
2213
|
+
const searchService = new SearchService(lanceStore, embeddingEngine);
|
|
2214
|
+
|
|
2215
|
+
await expect(searchService.search({ query: 'test', stores: [storeId] })).rejects.toThrow(
|
|
2216
|
+
'SEARCH_CONFIDENCE_HIGH environment variable is required'
|
|
2217
|
+
);
|
|
2218
|
+
|
|
2219
|
+
await rm(tempDir, { recursive: true });
|
|
2220
|
+
});
|
|
2221
|
+
|
|
2222
|
+
it('throws if SEARCH_CONFIDENCE_MEDIUM is not set', async () => {
|
|
2223
|
+
vi.stubEnv('SEARCH_CONFIDENCE_HIGH', '0.5');
|
|
2224
|
+
vi.stubEnv('SEARCH_CONFIDENCE_MEDIUM', undefined as unknown as string);
|
|
2225
|
+
vi.stubEnv('SEARCH_TEST_FILE_BOOST', '0.5');
|
|
2226
|
+
|
|
2227
|
+
const tempDir = await mkdtemp(join(tmpdir(), 'search-env-test-'));
|
|
2228
|
+
const lanceStore = new LanceStore(tempDir);
|
|
2229
|
+
const embeddingEngine = new EmbeddingEngine();
|
|
2230
|
+
await embeddingEngine.initialize();
|
|
2231
|
+
|
|
2232
|
+
const storeId = createStoreId('env-test-store');
|
|
2233
|
+
await lanceStore.initialize(storeId);
|
|
2234
|
+
|
|
2235
|
+
const text = 'test document';
|
|
2236
|
+
const vector = await embeddingEngine.embed(text);
|
|
2237
|
+
await lanceStore.addDocuments(storeId, [
|
|
2238
|
+
{
|
|
2239
|
+
id: createDocumentId('doc-1'),
|
|
2240
|
+
content: text,
|
|
2241
|
+
vector,
|
|
2242
|
+
metadata: { type: 'file', storeId, indexedAt: new Date() },
|
|
2243
|
+
},
|
|
2244
|
+
]);
|
|
2245
|
+
|
|
2246
|
+
const searchService = new SearchService(lanceStore, embeddingEngine);
|
|
2247
|
+
|
|
2248
|
+
await expect(searchService.search({ query: 'test', stores: [storeId] })).rejects.toThrow(
|
|
2249
|
+
'SEARCH_CONFIDENCE_MEDIUM environment variable is required'
|
|
2250
|
+
);
|
|
2251
|
+
|
|
2252
|
+
await rm(tempDir, { recursive: true });
|
|
2253
|
+
});
|
|
2254
|
+
|
|
2255
|
+
it('throws if SEARCH_TEST_FILE_BOOST is not set when ranking test files', async () => {
|
|
2256
|
+
vi.stubEnv('SEARCH_CONFIDENCE_HIGH', '0.5');
|
|
2257
|
+
vi.stubEnv('SEARCH_CONFIDENCE_MEDIUM', '0.3');
|
|
2258
|
+
vi.stubEnv('SEARCH_TEST_FILE_BOOST', undefined as unknown as string);
|
|
2259
|
+
|
|
2260
|
+
const tempDir = await mkdtemp(join(tmpdir(), 'search-env-test-'));
|
|
2261
|
+
const lanceStore = new LanceStore(tempDir);
|
|
2262
|
+
const embeddingEngine = new EmbeddingEngine();
|
|
2263
|
+
await embeddingEngine.initialize();
|
|
2264
|
+
|
|
2265
|
+
const storeId = createStoreId('env-test-store');
|
|
2266
|
+
await lanceStore.initialize(storeId);
|
|
2267
|
+
|
|
2268
|
+
// Add a test file document with fileType: 'test'
|
|
2269
|
+
const text = 'test document content';
|
|
2270
|
+
const vector = await embeddingEngine.embed(text);
|
|
2271
|
+
await lanceStore.addDocuments(storeId, [
|
|
2272
|
+
{
|
|
2273
|
+
id: createDocumentId('test-file'),
|
|
2274
|
+
content: text,
|
|
2275
|
+
vector,
|
|
2276
|
+
metadata: {
|
|
2277
|
+
type: 'file',
|
|
2278
|
+
storeId,
|
|
2279
|
+
indexedAt: new Date(),
|
|
2280
|
+
filePath: 'tests/example.test.ts',
|
|
2281
|
+
fileType: 'test', // Triggers SEARCH_TEST_FILE_BOOST check
|
|
2282
|
+
},
|
|
2283
|
+
},
|
|
2284
|
+
]);
|
|
2285
|
+
|
|
2286
|
+
const searchService = new SearchService(lanceStore, embeddingEngine);
|
|
2287
|
+
|
|
2288
|
+
await expect(searchService.search({ query: 'test', stores: [storeId] })).rejects.toThrow(
|
|
2289
|
+
'SEARCH_TEST_FILE_BOOST environment variable is required'
|
|
2290
|
+
);
|
|
2291
|
+
|
|
2292
|
+
await rm(tempDir, { recursive: true });
|
|
2293
|
+
});
|
|
2294
|
+
});
|
|
@@ -252,8 +252,16 @@ export class SearchService {
|
|
|
252
252
|
* Configurable via environment variables.
|
|
253
253
|
*/
|
|
254
254
|
private calculateConfidence(maxRawScore: number): SearchConfidence {
|
|
255
|
-
const
|
|
256
|
-
const
|
|
255
|
+
const highEnv = process.env['SEARCH_CONFIDENCE_HIGH'];
|
|
256
|
+
const mediumEnv = process.env['SEARCH_CONFIDENCE_MEDIUM'];
|
|
257
|
+
if (highEnv === undefined) {
|
|
258
|
+
throw new Error('SEARCH_CONFIDENCE_HIGH environment variable is required');
|
|
259
|
+
}
|
|
260
|
+
if (mediumEnv === undefined) {
|
|
261
|
+
throw new Error('SEARCH_CONFIDENCE_MEDIUM environment variable is required');
|
|
262
|
+
}
|
|
263
|
+
const highThreshold = parseFloat(highEnv);
|
|
264
|
+
const mediumThreshold = parseFloat(mediumEnv);
|
|
257
265
|
|
|
258
266
|
if (maxRawScore >= highThreshold) return 'high';
|
|
259
267
|
if (maxRawScore >= mediumThreshold) return 'medium';
|
|
@@ -751,9 +759,14 @@ export class SearchService {
|
|
|
751
759
|
case 'source-internal':
|
|
752
760
|
baseBoost = 0.75; // Internal implementation files (not too harsh)
|
|
753
761
|
break;
|
|
754
|
-
case 'test':
|
|
755
|
-
|
|
762
|
+
case 'test': {
|
|
763
|
+
const testBoostEnv = process.env['SEARCH_TEST_FILE_BOOST'];
|
|
764
|
+
if (testBoostEnv === undefined) {
|
|
765
|
+
throw new Error('SEARCH_TEST_FILE_BOOST environment variable is required');
|
|
766
|
+
}
|
|
767
|
+
baseBoost = parseFloat(testBoostEnv);
|
|
756
768
|
break;
|
|
769
|
+
}
|
|
757
770
|
case 'config':
|
|
758
771
|
baseBoost = 0.5; // Config files rarely answer questions
|
|
759
772
|
break;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { readFileSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Tests to verify background-worker-cli.ts uses file logging instead of console.
|
|
7
|
+
* This prevents silent failures when the worker is spawned with stdio: 'ignore'.
|
|
8
|
+
*/
|
|
9
|
+
describe('BackgroundWorkerCLI Logging', () => {
|
|
10
|
+
const sourceFile = join(process.cwd(), 'src/workers/background-worker-cli.ts');
|
|
11
|
+
const source = readFileSync(sourceFile, 'utf-8');
|
|
12
|
+
|
|
13
|
+
it('imports createLogger from logging module', () => {
|
|
14
|
+
expect(source).toContain('createLogger');
|
|
15
|
+
expect(source).toContain("from '../logging/index.js'");
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('imports shutdownLogger for graceful exit', () => {
|
|
19
|
+
expect(source).toContain('shutdownLogger');
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('does not use console.log', () => {
|
|
23
|
+
// Filter out comments
|
|
24
|
+
const lines = source.split('\n').filter((line) => !line.trim().startsWith('//'));
|
|
25
|
+
const hasConsoleLog = lines.some((line) => /\bconsole\.log\b/.test(line));
|
|
26
|
+
expect(hasConsoleLog).toBe(false);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('does not use console.error', () => {
|
|
30
|
+
// Filter out comments
|
|
31
|
+
const lines = source.split('\n').filter((line) => !line.trim().startsWith('//'));
|
|
32
|
+
const hasConsoleError = lines.some((line) => /\bconsole\.error\b/.test(line));
|
|
33
|
+
expect(hasConsoleError).toBe(false);
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { BackgroundWorker } from './background-worker.js';
|
|
3
3
|
import { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';
|
|
4
|
+
import { createLogger, shutdownLogger } from '../logging/index.js';
|
|
4
5
|
import { createServices } from '../services/index.js';
|
|
5
6
|
import { JobService } from '../services/job.service.js';
|
|
6
7
|
|
|
8
|
+
const logger = createLogger('background-worker-cli');
|
|
9
|
+
|
|
7
10
|
/**
|
|
8
11
|
* Background worker CLI entry point
|
|
9
12
|
*
|
|
@@ -17,8 +20,8 @@ async function main(): Promise<void> {
|
|
|
17
20
|
const dataDir = process.env['BLUERA_DATA_DIR'];
|
|
18
21
|
|
|
19
22
|
if (jobId === undefined || jobId === '') {
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
logger.error('Job ID required. Usage: background-worker-cli <job-id>');
|
|
24
|
+
await shutdownLogger();
|
|
22
25
|
process.exit(1);
|
|
23
26
|
}
|
|
24
27
|
|
|
@@ -36,13 +39,17 @@ async function main(): Promise<void> {
|
|
|
36
39
|
writePidFile(pidFile, process.pid);
|
|
37
40
|
} catch (error) {
|
|
38
41
|
// CRITICAL: Cannot proceed without PID file - job would be uncancellable
|
|
39
|
-
|
|
42
|
+
logger.error(
|
|
43
|
+
{ error: error instanceof Error ? error.message : String(error) },
|
|
44
|
+
'Failed to write PID file'
|
|
45
|
+
);
|
|
46
|
+
await shutdownLogger();
|
|
40
47
|
process.exit(1);
|
|
41
48
|
}
|
|
42
49
|
|
|
43
50
|
// Handle SIGTERM for graceful shutdown
|
|
44
51
|
process.on('SIGTERM', () => {
|
|
45
|
-
|
|
52
|
+
logger.info({ jobId }, 'Received SIGTERM, cancelling job');
|
|
46
53
|
jobService.updateJob(jobId, {
|
|
47
54
|
status: 'cancelled',
|
|
48
55
|
message: 'Job cancelled by user',
|
|
@@ -51,12 +58,14 @@ async function main(): Promise<void> {
|
|
|
51
58
|
// Clean up PID file (best-effort - don't block shutdown)
|
|
52
59
|
const deleteResult = deletePidFile(pidFile, 'sigterm');
|
|
53
60
|
if (!deleteResult.success && deleteResult.error !== undefined) {
|
|
54
|
-
|
|
55
|
-
|
|
61
|
+
logger.warn(
|
|
62
|
+
{ jobId, error: deleteResult.error.message },
|
|
63
|
+
'Could not remove PID file during SIGTERM'
|
|
56
64
|
);
|
|
57
65
|
}
|
|
58
66
|
|
|
59
|
-
|
|
67
|
+
// Flush logs before exit (best-effort, don't await in signal handler)
|
|
68
|
+
void shutdownLogger().finally(() => process.exit(0));
|
|
60
69
|
});
|
|
61
70
|
|
|
62
71
|
// Create worker and execute job
|
|
@@ -74,30 +83,41 @@ async function main(): Promise<void> {
|
|
|
74
83
|
// Clean up PID file on success (best-effort - don't change exit code)
|
|
75
84
|
const successCleanup = deletePidFile(pidFile, 'success');
|
|
76
85
|
if (!successCleanup.success && successCleanup.error !== undefined) {
|
|
77
|
-
|
|
78
|
-
|
|
86
|
+
logger.warn(
|
|
87
|
+
{ jobId, error: successCleanup.error.message },
|
|
88
|
+
'Could not remove PID file after success'
|
|
79
89
|
);
|
|
80
90
|
}
|
|
81
91
|
|
|
82
|
-
|
|
92
|
+
logger.info({ jobId }, 'Job completed successfully');
|
|
93
|
+
await shutdownLogger();
|
|
83
94
|
process.exit(0);
|
|
84
95
|
} catch (error) {
|
|
85
96
|
// Job service already updated with failure status in BackgroundWorker
|
|
86
|
-
|
|
97
|
+
logger.error(
|
|
98
|
+
{ jobId, error: error instanceof Error ? error.message : String(error) },
|
|
99
|
+
'Job failed'
|
|
100
|
+
);
|
|
87
101
|
|
|
88
102
|
// Clean up PID file on failure (best-effort - exit code reflects job failure)
|
|
89
103
|
const failureCleanup = deletePidFile(pidFile, 'failure');
|
|
90
104
|
if (!failureCleanup.success && failureCleanup.error !== undefined) {
|
|
91
|
-
|
|
92
|
-
|
|
105
|
+
logger.warn(
|
|
106
|
+
{ jobId, error: failureCleanup.error.message },
|
|
107
|
+
'Could not remove PID file after failure'
|
|
93
108
|
);
|
|
94
109
|
}
|
|
95
110
|
|
|
111
|
+
await shutdownLogger();
|
|
96
112
|
process.exit(1);
|
|
97
113
|
}
|
|
98
114
|
}
|
|
99
115
|
|
|
100
|
-
main().catch((error: unknown) => {
|
|
101
|
-
|
|
116
|
+
main().catch(async (error: unknown) => {
|
|
117
|
+
logger.error(
|
|
118
|
+
{ error: error instanceof Error ? error.message : String(error) },
|
|
119
|
+
'Fatal error in background worker'
|
|
120
|
+
);
|
|
121
|
+
await shutdownLogger();
|
|
102
122
|
process.exit(1);
|
|
103
123
|
});
|
|
@@ -191,3 +191,29 @@ describe('BackgroundWorker', () => {
|
|
|
191
191
|
});
|
|
192
192
|
});
|
|
193
193
|
});
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Tests to verify background-worker.ts uses file logging for visibility.
|
|
197
|
+
* This ensures job execution is observable in log files.
|
|
198
|
+
*/
|
|
199
|
+
describe('BackgroundWorker Logging', () => {
|
|
200
|
+
const { readFileSync } = require('fs');
|
|
201
|
+
const source = readFileSync('src/workers/background-worker.ts', 'utf-8');
|
|
202
|
+
|
|
203
|
+
it('imports createLogger from logging module', () => {
|
|
204
|
+
expect(source).toContain('createLogger');
|
|
205
|
+
expect(source).toContain("from '../logging/index.js'");
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('logs job start with jobId and type', () => {
|
|
209
|
+
// Verify logger.info is called with job start pattern
|
|
210
|
+
expect(source).toContain('logger.info');
|
|
211
|
+
expect(source).toContain('Starting job');
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('logs job failure with error details', () => {
|
|
215
|
+
// Verify logger.error is called for failures
|
|
216
|
+
expect(source).toContain('logger.error');
|
|
217
|
+
expect(source).toContain('Job failed');
|
|
218
|
+
});
|
|
219
|
+
});
|