bluera-knowledge 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,10 +7,10 @@ import {
7
7
  isWebStoreDefinition,
8
8
  runMCPServer,
9
9
  spawnBackgroundWorker
10
- } from "./chunk-PFBSZTP3.js";
10
+ } from "./chunk-4ZBK7V54.js";
11
11
  import {
12
12
  IntelligentCrawler
13
- } from "./chunk-QCSFBMYW.js";
13
+ } from "./chunk-6777ULXC.js";
14
14
  import {
15
15
  ASTParser,
16
16
  AdapterRegistry,
@@ -24,7 +24,7 @@ import {
24
24
  err,
25
25
  extractRepoName,
26
26
  ok
27
- } from "./chunk-C4SYGLAI.js";
27
+ } from "./chunk-VTATT3IR.js";
28
28
  import "./chunk-HRQD3MPH.js";
29
29
 
30
30
  // src/index.ts
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createMCPServer,
3
3
  runMCPServer
4
- } from "../chunk-PFBSZTP3.js";
5
- import "../chunk-C4SYGLAI.js";
4
+ } from "../chunk-4ZBK7V54.js";
5
+ import "../chunk-VTATT3IR.js";
6
6
  import "../chunk-HRQD3MPH.js";
7
7
  export {
8
8
  createMCPServer,
@@ -1,17 +1,21 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IntelligentCrawler
4
- } from "../chunk-QCSFBMYW.js";
4
+ } from "../chunk-6777ULXC.js";
5
5
  import {
6
6
  JobService,
7
7
  createDocumentId,
8
8
  createLogger,
9
9
  createServices,
10
10
  createStoreId,
11
+ destroyServices,
11
12
  shutdownLogger
12
- } from "../chunk-C4SYGLAI.js";
13
+ } from "../chunk-VTATT3IR.js";
13
14
  import "../chunk-HRQD3MPH.js";
14
15
 
16
+ // src/workers/background-worker-cli.ts
17
+ import { platform } from "os";
18
+
15
19
  // src/workers/background-worker.ts
16
20
  import { createHash } from "crypto";
17
21
  var logger = createLogger("background-worker");
@@ -276,6 +280,15 @@ function buildPidFilePath(jobsDir, jobId) {
276
280
  }
277
281
 
278
282
  // src/workers/background-worker-cli.ts
283
+ function forceExitOnMacOS(exitCode) {
284
+ if (platform() === "darwin") {
285
+ setTimeout(() => {
286
+ process.kill(process.pid, "SIGKILL");
287
+ }, 100);
288
+ } else {
289
+ process.exit(exitCode);
290
+ }
291
+ }
279
292
  var logger2 = createLogger("background-worker-cli");
280
293
  async function main() {
281
294
  const jobId = process.argv[2];
@@ -334,8 +347,9 @@ async function main() {
334
347
  );
335
348
  }
336
349
  logger2.info({ jobId }, "Job completed successfully");
350
+ await destroyServices(services);
337
351
  await shutdownLogger();
338
- process.exit(0);
352
+ forceExitOnMacOS(0);
339
353
  } catch (error) {
340
354
  logger2.error(
341
355
  { jobId, error: error instanceof Error ? error.message : String(error) },
@@ -348,8 +362,9 @@ async function main() {
348
362
  "Could not remove PID file after failure"
349
363
  );
350
364
  }
365
+ await destroyServices(services);
351
366
  await shutdownLogger();
352
- process.exit(1);
367
+ forceExitOnMacOS(1);
353
368
  }
354
369
  }
355
370
  main().catch(async (error) => {
@@ -358,6 +373,6 @@ main().catch(async (error) => {
358
373
  "Fatal error in background worker"
359
374
  );
360
375
  await shutdownLogger();
361
- process.exit(1);
376
+ forceExitOnMacOS(1);
362
377
  });
363
378
  //# sourceMappingURL=background-worker-cli.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/workers/background-worker.ts","../../src/workers/pid-file.ts","../../src/workers/background-worker-cli.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { IntelligentCrawler, type CrawlProgress } from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler();\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n","#!/usr/bin/env node\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Create worker and execute job\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await shutdownLogger();\n process.exit(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await shutdownLogger();\n process.exit(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n process.exit(1);\n});\n"],"mappings":";;;;;;;;;;;;;;;AAAA,SAAS,kBAAkB;AAY3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACjB;AALiB;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB;AAGvC,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,WAAW,oBAAI,KAAK;AAAA,UACtB;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAED,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACjUA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AC1EA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":["logger"]}
1
+ {"version":3,"sources":["../../src/workers/background-worker-cli.ts","../../src/workers/background-worker.ts","../../src/workers/pid-file.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { platform } from 'os';\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices, destroyServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Force exit the process to avoid ONNX runtime mutex crash on macOS.\n *\n * On macOS, the ONNX runtime (used by transformers.js for embeddings) has a known\n * bug where static mutex cleanup fails during process exit, causing a crash with:\n * \"mutex lock failed: Invalid argument\"\n *\n * This doesn't affect job completion - all work is done and persisted before exit.\n * Using SIGKILL bypasses the problematic cleanup code.\n *\n * See: https://github.com/microsoft/onnxruntime/issues/24579\n */\nfunction forceExitOnMacOS(exitCode: number): void {\n if (platform() === 'darwin') {\n // Give time for any pending I/O to flush\n setTimeout(() => {\n process.kill(process.pid, 'SIGKILL');\n }, 100);\n } else {\n process.exit(exitCode);\n }\n}\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Create worker and execute job\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n forceExitOnMacOS(1);\n});\n","import { createHash } from 'node:crypto';\nimport { IntelligentCrawler, type CrawlProgress } from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler();\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n"],"mappings":";;;;;;;;;;;;;;;;AACA,SAAS,gBAAgB;;;ACDzB,SAAS,kBAAkB;AAY3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACjB;AALiB;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB;AAGvC,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,WAAW,oBAAI,KAAK;AAAA,UACtB;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAED,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACjUA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AF7DA,SAAS,iBAAiB,UAAwB;AAChD,MAAI,SAAS,MAAM,UAAU;AAE3B,eAAW,MAAM;AACf,cAAQ,KAAK,QAAQ,KAAK,SAAS;AAAA,IACrC,GAAG,GAAG;AAAA,EACR,OAAO;AACL,YAAQ,KAAK,QAAQ;AAAA,EACvB;AACF;AAEA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,mBAAiB,CAAC;AACpB,CAAC;","names":["logger"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.12.7",
3
+ "version": "0.12.9",
4
4
  "description": "CLI tool for managing knowledge stores with semantic search",
5
5
  "type": "module",
6
6
  "bin": {
@@ -64,4 +64,15 @@ export class EmbeddingEngine {
64
64
  getDimensions(): number {
65
65
  return this.dimensions;
66
66
  }
67
+
68
+ /**
69
+ * Dispose the embedding pipeline to free resources.
70
+ * Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
71
+ */
72
+ async dispose(): Promise<void> {
73
+ if (this.extractor !== null) {
74
+ await this.extractor.dispose();
75
+ this.extractor = null;
76
+ }
77
+ }
67
78
  }
package/src/db/lance.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as lancedb from '@lancedb/lancedb';
2
2
  import { createDocumentId } from '../types/brands.js';
3
+ import { DocumentMetadataSchema } from '../types/document.js';
3
4
  import type { StoreId, DocumentId } from '../types/brands.js';
4
5
  import type { Document, DocumentMetadata } from '../types/document.js';
5
6
  import type { Table, Connection } from '@lancedb/lancedb';
@@ -88,13 +89,17 @@ export class LanceStore {
88
89
 
89
90
  // Return all results - threshold filtering is applied after score normalization
90
91
  // in search.service.ts to match displayed scores
91
- return results.map((r) => ({
92
- id: createDocumentId(r.id),
93
- content: r.content,
94
- score: 1 - r._distance,
95
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
96
- metadata: JSON.parse(r.metadata) as DocumentMetadata,
97
- }));
92
+ return results.map((r) => {
93
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
94
+ return {
95
+ id: createDocumentId(r.id),
96
+ content: r.content,
97
+ score: 1 - r._distance,
98
+ // Schema validates structure, cast to branded type
99
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
100
+ metadata: metadata as DocumentMetadata,
101
+ };
102
+ });
98
103
  }
99
104
 
100
105
  async createFtsIndex(storeId: StoreId): Promise<void> {
@@ -121,13 +126,17 @@ export class LanceStore {
121
126
  _score: number;
122
127
  }>;
123
128
 
124
- return results.map((r) => ({
125
- id: createDocumentId(r.id),
126
- content: r.content,
127
- score: r._score,
128
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
129
- metadata: JSON.parse(r.metadata) as DocumentMetadata,
130
- }));
129
+ return results.map((r) => {
130
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
131
+ return {
132
+ id: createDocumentId(r.id),
133
+ content: r.content,
134
+ score: r._score,
135
+ // Schema validates structure, cast to branded type
136
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
137
+ metadata: metadata as DocumentMetadata,
138
+ };
139
+ });
131
140
  }
132
141
 
133
142
  async deleteStore(storeId: StoreId): Promise<void> {
@@ -88,6 +88,15 @@ export async function destroyServices(services: ServiceContainer): Promise<void>
88
88
  errors.push(error);
89
89
  }
90
90
 
91
+ // Dispose embedding engine to free ONNX runtime resources
92
+ try {
93
+ await services.embeddings.dispose();
94
+ } catch (e) {
95
+ const error = e instanceof Error ? e : new Error(String(e));
96
+ logger.error({ error }, 'Error disposing EmbeddingEngine');
97
+ errors.push(error);
98
+ }
99
+
91
100
  try {
92
101
  await services.pythonBridge.stop();
93
102
  } catch (e) {
@@ -1,6 +1,7 @@
1
1
  import { randomUUID } from 'crypto';
2
2
  import fs from 'fs';
3
3
  import path from 'path';
4
+ import { JobSchema } from '../types/job.js';
4
5
  import { Result, ok, err } from '../types/result.js';
5
6
  import type { Job, CreateJobParams, UpdateJobParams, JobStatus } from '../types/job.js';
6
7
 
@@ -90,8 +91,7 @@ export class JobService {
90
91
 
91
92
  try {
92
93
  const content = fs.readFileSync(jobFile, 'utf-8');
93
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
94
- return JSON.parse(content) as Job;
94
+ return JobSchema.parse(JSON.parse(content));
95
95
  } catch (error) {
96
96
  throw new Error(
97
97
  `Failed to read job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
@@ -117,8 +117,7 @@ export class JobService {
117
117
 
118
118
  try {
119
119
  const content = fs.readFileSync(path.join(this.jobsDir, file), 'utf-8');
120
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
121
- const job = JSON.parse(content) as Job;
120
+ const job = JobSchema.parse(JSON.parse(content));
122
121
 
123
122
  if (statusFilter !== undefined) {
124
123
  const filters = Array.isArray(statusFilter) ? statusFilter : [statusFilter];
@@ -1,11 +1,13 @@
1
1
  import { describe, it, expect, vi, beforeEach } from 'vitest';
2
2
  import { destroyServices, type ServiceContainer } from './index.js';
3
3
  import type { PythonBridge } from '../crawl/bridge.js';
4
+ import type { EmbeddingEngine } from '../db/embeddings.js';
4
5
  import type { LanceStore } from '../db/lance.js';
5
6
 
6
7
  describe('destroyServices', () => {
7
8
  let mockPythonBridge: { stop: ReturnType<typeof vi.fn> };
8
9
  let mockLance: { closeAsync: ReturnType<typeof vi.fn>; close: ReturnType<typeof vi.fn> };
10
+ let mockEmbeddings: { dispose: ReturnType<typeof vi.fn> };
9
11
  let mockServices: ServiceContainer;
10
12
 
11
13
  beforeEach(() => {
@@ -18,9 +20,14 @@ describe('destroyServices', () => {
18
20
  close: vi.fn(),
19
21
  };
20
22
 
23
+ mockEmbeddings = {
24
+ dispose: vi.fn().mockResolvedValue(undefined),
25
+ };
26
+
21
27
  mockServices = {
22
28
  pythonBridge: mockPythonBridge as unknown as PythonBridge,
23
29
  lance: mockLance as unknown as LanceStore,
30
+ embeddings: mockEmbeddings as unknown as EmbeddingEngine,
24
31
  } as unknown as ServiceContainer;
25
32
  });
26
33
 
@@ -84,4 +91,18 @@ describe('destroyServices', () => {
84
91
  // Should have waited for closeAsync to complete
85
92
  expect(closeCompleted).toBe(true);
86
93
  });
94
+
95
+ it('calls dispose on EmbeddingEngine to free ONNX runtime resources', async () => {
96
+ await destroyServices(mockServices);
97
+
98
+ expect(mockEmbeddings.dispose).toHaveBeenCalledTimes(1);
99
+ });
100
+
101
+ it('throws on EmbeddingEngine dispose errors', async () => {
102
+ mockEmbeddings.dispose.mockRejectedValue(new Error('dispose failed'));
103
+
104
+ await expect(destroyServices(mockServices)).rejects.toThrow(
105
+ 'Service shutdown failed: dispose failed'
106
+ );
107
+ });
87
108
  });
@@ -538,6 +538,30 @@ describe('StoreService', () => {
538
538
 
539
539
  await rm(corruptDir, { recursive: true, force: true });
540
540
  });
541
+
542
+ it('filters out null entries from stores array on load', async () => {
543
+ const nullDir = await mkdtemp(join(tmpdir(), 'null-entry-'));
544
+ const registryPath = join(nullDir, 'stores.json');
545
+ const validStore = {
546
+ id: 'test-id',
547
+ type: 'file',
548
+ name: 'valid-store',
549
+ path: '/some/path',
550
+ status: 'ready',
551
+ createdAt: new Date().toISOString(),
552
+ updatedAt: new Date().toISOString(),
553
+ };
554
+ await writeFile(registryPath, JSON.stringify({ stores: [null, validStore, null] }));
555
+
556
+ const freshService = new StoreService(nullDir);
557
+ await freshService.initialize();
558
+
559
+ const stores = await freshService.list();
560
+ expect(stores).toHaveLength(1);
561
+ expect(stores[0]?.name).toBe('valid-store');
562
+
563
+ await rm(nullDir, { recursive: true, force: true });
564
+ });
541
565
  });
542
566
 
543
567
  describe('store definition auto-update', () => {
@@ -218,6 +218,12 @@ export class StoreService {
218
218
  updatedAt: now,
219
219
  } satisfies WebStore;
220
220
  break;
221
+
222
+ default: {
223
+ // Exhaustive check - if this is reached, input.type is invalid
224
+ const invalidType: never = input.type;
225
+ return err(new Error(`Invalid store type: ${String(invalidType)}`));
226
+ }
221
227
  }
222
228
 
223
229
  this.registry.stores.push(store);
@@ -332,14 +338,16 @@ export class StoreService {
332
338
  const content = await readFile(registryPath, 'utf-8');
333
339
  try {
334
340
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
335
- const data = JSON.parse(content) as { stores: Store[] };
341
+ const data = JSON.parse(content) as { stores: (Store | null)[] };
336
342
  this.registry = {
337
- stores: data.stores.map((s) => ({
338
- ...s,
339
- id: createStoreId(s.id),
340
- createdAt: new Date(s.createdAt),
341
- updatedAt: new Date(s.updatedAt),
342
- })),
343
+ stores: data.stores
344
+ .filter((s): s is Store => s !== null)
345
+ .map((s) => ({
346
+ ...s,
347
+ id: createStoreId(s.id),
348
+ createdAt: new Date(s.createdAt),
349
+ updatedAt: new Date(s.updatedAt),
350
+ })),
343
351
  };
344
352
  } catch (error) {
345
353
  throw new Error(
@@ -1,6 +1,30 @@
1
+ import { z } from 'zod';
1
2
  import type { DocumentId, StoreId } from './brands.js';
2
3
 
3
- export type DocumentType = 'file' | 'chunk' | 'web';
4
+ // ============================================================================
5
+ // Zod Schemas
6
+ // ============================================================================
7
+
8
+ export const DocumentTypeSchema = z.enum(['file', 'chunk', 'web']);
9
+
10
+ export const DocumentMetadataSchema = z
11
+ .object({
12
+ path: z.string().optional(),
13
+ url: z.string().optional(),
14
+ type: DocumentTypeSchema,
15
+ storeId: z.string(),
16
+ indexedAt: z.union([z.string(), z.date()]),
17
+ fileHash: z.string().optional(),
18
+ chunkIndex: z.number().optional(),
19
+ totalChunks: z.number().optional(),
20
+ })
21
+ .loose(); // Allow additional fields per index signature
22
+
23
+ // ============================================================================
24
+ // Types
25
+ // ============================================================================
26
+
27
+ export type DocumentType = z.infer<typeof DocumentTypeSchema>;
4
28
 
5
29
  export interface DocumentMetadata {
6
30
  readonly path?: string | undefined;
package/src/types/job.ts CHANGED
@@ -1,36 +1,51 @@
1
- export type JobType = 'clone' | 'index' | 'crawl';
2
- export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
3
-
4
- export interface JobDetails {
5
- storeName?: string;
6
- storeId?: string;
7
- url?: string;
8
- path?: string;
9
- filesProcessed?: number;
10
- totalFiles?: number;
11
- startedAt?: string;
12
- completedAt?: string;
13
- cancelledAt?: string;
14
- error?: string;
1
+ import { z } from 'zod';
2
+
3
+ // ============================================================================
4
+ // Zod Schemas
5
+ // ============================================================================
6
+
7
+ export const JobTypeSchema = z.enum(['clone', 'index', 'crawl']);
8
+ export const JobStatusSchema = z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']);
9
+
10
+ export const JobDetailsSchema = z.object({
11
+ storeName: z.string().optional(),
12
+ storeId: z.string().optional(),
13
+ url: z.string().optional(),
14
+ path: z.string().optional(),
15
+ filesProcessed: z.number().optional(),
16
+ totalFiles: z.number().optional(),
17
+ startedAt: z.string().optional(),
18
+ completedAt: z.string().optional(),
19
+ cancelledAt: z.string().optional(),
20
+ error: z.string().optional(),
15
21
  // Crawl-specific fields
16
- crawlInstruction?: string;
17
- extractInstruction?: string;
18
- maxPages?: number;
19
- simple?: boolean;
20
- useHeadless?: boolean;
21
- pagesCrawled?: number;
22
- }
22
+ crawlInstruction: z.string().optional(),
23
+ extractInstruction: z.string().optional(),
24
+ maxPages: z.number().optional(),
25
+ simple: z.boolean().optional(),
26
+ useHeadless: z.boolean().optional(),
27
+ pagesCrawled: z.number().optional(),
28
+ });
23
29
 
24
- export interface Job {
25
- id: string;
26
- type: JobType;
27
- status: JobStatus;
28
- progress: number; // 0-100
29
- message: string;
30
- details: JobDetails;
31
- createdAt: string;
32
- updatedAt: string;
33
- }
30
+ export const JobSchema = z.object({
31
+ id: z.string(),
32
+ type: JobTypeSchema,
33
+ status: JobStatusSchema,
34
+ progress: z.number().min(0).max(100),
35
+ message: z.string(),
36
+ details: JobDetailsSchema.default({}),
37
+ createdAt: z.string(),
38
+ updatedAt: z.string(),
39
+ });
40
+
41
+ // ============================================================================
42
+ // Types (inferred from schemas)
43
+ // ============================================================================
44
+
45
+ export type JobType = z.infer<typeof JobTypeSchema>;
46
+ export type JobStatus = z.infer<typeof JobStatusSchema>;
47
+ export type JobDetails = z.infer<typeof JobDetailsSchema>;
48
+ export type Job = z.infer<typeof JobSchema>;
34
49
 
35
50
  export interface CreateJobParams {
36
51
  type: JobType;