bluera-knowledge 0.33.1 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createMCPServer,
3
3
  runMCPServer
4
- } from "../chunk-YDTTD53Y.js";
5
- import "../chunk-3TB7TDVF.js";
4
+ } from "../chunk-K2EB4PGE.js";
5
+ import "../chunk-FYHKBCIH.js";
6
6
  import "../chunk-CLIMKLTW.js";
7
7
  import "../chunk-N3XYMAU3.js";
8
8
  import "../chunk-DGUM43GV.js";
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IntelligentCrawler
4
- } from "../chunk-KDZDLJUY.js";
4
+ } from "../chunk-4S6LWHKI.js";
5
5
  import {
6
6
  JobService,
7
7
  createLogger,
8
8
  createServices,
9
9
  destroyServices,
10
10
  shutdownLogger
11
- } from "../chunk-3TB7TDVF.js";
11
+ } from "../chunk-FYHKBCIH.js";
12
12
  import {
13
13
  createDocumentId,
14
14
  createStoreId
@@ -182,7 +182,7 @@ var BackgroundWorker = class {
182
182
  * Execute a crawl job (web crawling + indexing)
183
183
  */
184
184
  async executeCrawlJob(job) {
185
- const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } = job.details;
185
+ const { storeId, url, crawlInstruction, extractInstruction, maxPages, useHeadless } = job.details;
186
186
  if (storeId === void 0 || typeof storeId !== "string") {
187
187
  throw new Error("Store ID required for crawl job");
188
188
  }
@@ -218,13 +218,10 @@ var BackgroundWorker = class {
218
218
  const docs = [];
219
219
  const crawlOptions = {
220
220
  maxPages: resolvedMaxPages,
221
- simple: simple ?? false,
222
- useHeadless: useHeadless ?? true
221
+ useHeadless: useHeadless ?? true,
223
222
  // Default to headless for reliability
223
+ crawlInstruction: typeof crawlInstruction === "string" ? crawlInstruction : "crawl all pages linked from this URL"
224
224
  };
225
- if (crawlInstruction !== void 0) {
226
- crawlOptions.crawlInstruction = crawlInstruction;
227
- }
228
225
  if (extractInstruction !== void 0) {
229
226
  crawlOptions.extractInstruction = extractInstruction;
230
227
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/workers/background-worker-cli.ts","../../src/workers/background-worker.ts","../../src/workers/pid-file.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { platform } from 'os';\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices, destroyServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Force exit the process to avoid ONNX runtime mutex crash on macOS.\n *\n * On macOS, the ONNX runtime (used by transformers.js for embeddings) has a known\n * bug where static mutex cleanup fails during process exit, causing a crash with:\n * \"mutex lock failed: Invalid argument\"\n *\n * This doesn't affect job completion - all work is done and persisted before exit.\n * Using SIGKILL bypasses the problematic cleanup code.\n *\n * See: https://github.com/microsoft/onnxruntime/issues/24579\n */\nfunction forceExitOnMacOS(exitCode: number): void {\n if (platform() === 'darwin') {\n // Give time for any pending I/O to flush\n setTimeout(() => {\n process.kill(process.pid, 'SIGKILL');\n }, 100);\n } else {\n process.exit(exitCode);\n }\n}\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Load config and create worker\n const appConfig = await services.config.load();\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings,\n appConfig.crawl\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n forceExitOnMacOS(1);\n});\n","import { createHash } from 'node:crypto';\nimport {\n IntelligentCrawler,\n type CrawlConfig,\n type CrawlProgress,\n} from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine,\n private readonly crawlConfig?: CrawlConfig\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%), transition to indexing phase\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n details: { phase: 'indexing', phaseStep: 2, phaseTotalSteps: 2 },\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Set indexing phase (single phase for index jobs)\n this.jobService.updateJob(job.id, {\n details: { phase: 'indexing', phaseStep: 1, phaseTotalSteps: 1 },\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n\n // Upgrade schema v1 stores to v2 after successful reindex\n // This adds modelId so the store becomes searchable\n if (!store.modelId) {\n await this.storeService.upgradeStoreSchema(store.id);\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler(this.crawlConfig);\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: {\n pagesCrawled: progress.pagesVisited,\n phase: 'crawling',\n phaseStep: 1,\n phaseTotalSteps: 2,\n },\n });\n });\n\n try {\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embedDocument(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date().toISOString(),\n },\n });\n }\n\n // Index all documents (remaining 20%) - transition to indexing phase\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n details: { phase: 'indexing', phaseStep: 2, phaseTotalSteps: 2 },\n });\n\n // Clear existing documents to prevent duplicates on re-crawl\n await this.lanceStore.clearAllDocuments(store.id);\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n\n // Upgrade schema v1 stores to current version after successful re-crawl\n // This adds modelId so the store becomes searchable\n if (!store.modelId) {\n await this.storeService.upgradeStoreSchema(store.id);\n }\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AACA,SAAS,gBAAgB;;;ACDzB,SAAS,kBAAkB;AAgB3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACA,aACjB;AANiB;AACA;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,MACV,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,IACjE,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,IACjE,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAIA,QAAI,CAAC,MAAM,SAAS;AAClB,YAAM,KAAK,aAAa,mBAAmB,MAAM,EAAE;AAAA,IACrD;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB,KAAK,WAAW;AAGvD,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS;AAAA,UACP,cAAc,SAAS;AAAA,UACvB,OAAO;AAAA,UACP,WAAW;AAAA,UACX,iBAAiB;AAAA,QACnB;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,WAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,cAAc,cAAc;AAEtE,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,UACV,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,QACjE,CAAC;AAGD,cAAM,KAAK,WAAW,kBAAkB,MAAM,EAAE;AAChD,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAID,UAAI,CAAC,MAAM,SAAS;AAClB,cAAM,KAAK,aAAa,mBAAmB,MAAM,EAAE;AAAA,MACrD;AAAA,IACF,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACzWA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AF7DA,SAAS,iBAAiB,UAAwB;AAChD,MAAI,SAAS,MAAM,UAAU;AAE3B,eAAW,MAAM;AACf,cAAQ,KAAK,QAAQ,KAAK,SAAS;AAAA,IACrC,GAAG,GAAG;AAAA,EACR,OAAO;AACL,YAAQ,KAAK,QAAQ;AAAA,EACvB;AACF;AAEA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,YAAY,MAAM,SAAS,OAAO,KAAK;AAC7C,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,mBAAiB,CAAC;AACpB,CAAC;","names":["logger"]}
1
+ {"version":3,"sources":["../../src/workers/background-worker-cli.ts","../../src/workers/background-worker.ts","../../src/workers/pid-file.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { platform } from 'os';\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices, destroyServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Force exit the process to avoid ONNX runtime mutex crash on macOS.\n *\n * On macOS, the ONNX runtime (used by transformers.js for embeddings) has a known\n * bug where static mutex cleanup fails during process exit, causing a crash with:\n * \"mutex lock failed: Invalid argument\"\n *\n * This doesn't affect job completion - all work is done and persisted before exit.\n * Using SIGKILL bypasses the problematic cleanup code.\n *\n * See: https://github.com/microsoft/onnxruntime/issues/24579\n */\nfunction forceExitOnMacOS(exitCode: number): void {\n if (platform() === 'darwin') {\n // Give time for any pending I/O to flush\n setTimeout(() => {\n process.kill(process.pid, 'SIGKILL');\n }, 100);\n } else {\n process.exit(exitCode);\n }\n}\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Load config and create worker\n const appConfig = await services.config.load();\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings,\n appConfig.crawl\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n forceExitOnMacOS(1);\n});\n","import { createHash } from 'node:crypto';\nimport {\n IntelligentCrawler,\n type CrawlConfig,\n type CrawlProgress,\n} from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine,\n private readonly crawlConfig?: CrawlConfig\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%), transition to indexing phase\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n details: { phase: 'indexing', phaseStep: 2, phaseTotalSteps: 2 },\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Set indexing phase (single phase for index jobs)\n this.jobService.updateJob(job.id, {\n details: { phase: 'indexing', phaseStep: 1, phaseTotalSteps: 1 },\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n\n // Upgrade schema v1 stores to v2 after successful reindex\n // This adds modelId so the store becomes searchable\n if (!store.modelId) {\n await this.storeService.upgradeStoreSchema(store.id);\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler(this.crawlConfig);\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: {\n pagesCrawled: progress.pagesVisited,\n phase: 'crawling',\n phaseStep: 1,\n phaseTotalSteps: 2,\n },\n });\n });\n\n try {\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n useHeadless: boolean;\n crawlInstruction: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n crawlInstruction:\n typeof crawlInstruction === 'string'\n ? crawlInstruction\n : 'crawl all pages linked from this URL',\n };\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embedDocument(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date().toISOString(),\n },\n });\n }\n\n // Index all documents (remaining 20%) - transition to indexing phase\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n details: { phase: 'indexing', phaseStep: 2, phaseTotalSteps: 2 },\n });\n\n // Clear existing documents to prevent duplicates on re-crawl\n await this.lanceStore.clearAllDocuments(store.id);\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n\n // Upgrade schema v1 stores to current version after successful re-crawl\n // This adds modelId so the store becomes searchable\n if (!store.modelId) {\n await this.storeService.upgradeStoreSchema(store.id);\n }\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AACA,SAAS,gBAAgB;;;ACDzB,SAAS,kBAAkB;AAgB3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACA,aACjB;AANiB;AACA;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,MACV,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,IACjE,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,IACjE,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAIA,QAAI,CAAC,MAAM,SAAS;AAClB,YAAM,KAAK,aAAa,mBAAmB,MAAM,EAAE;AAAA,IACrD;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,YAAY,IAChF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB,KAAK,WAAW;AAGvD,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS;AAAA,UACP,cAAc,SAAS;AAAA,UACvB,OAAO;AAAA,UACP,WAAW;AAAA,UACX,iBAAiB;AAAA,QACnB;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,WAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAKF;AAAA,QACF,UAAU;AAAA,QACV,aAAa,eAAe;AAAA;AAAA,QAC5B,kBACE,OAAO,qBAAqB,WACxB,mBACA;AAAA,MACR;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,cAAc,cAAc;AAEtE,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,UACV,SAAS,EAAE,OAAO,YAAY,WAAW,GAAG,iBAAiB,EAAE;AAAA,QACjE,CAAC;AAGD,cAAM,KAAK,WAAW,kBAAkB,MAAM,EAAE;AAChD,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAID,UAAI,CAAC,MAAM,SAAS;AAClB,cAAM,KAAK,aAAa,mBAAmB,MAAM,EAAE;AAAA,MACrD;AAAA,IACF,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACxWA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AF7DA,SAAS,iBAAiB,UAAwB;AAChD,MAAI,SAAS,MAAM,UAAU;AAE3B,eAAW,MAAM;AACf,cAAQ,KAAK,QAAQ,KAAK,SAAS;AAAA,IACrC,GAAG,GAAG;AAAA,EACR,OAAO;AACL,YAAQ,KAAK,QAAQ;AAAA,EACvB;AACF;AAEA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,YAAY,MAAM,SAAS,OAAO,KAAK;AAC7C,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,mBAAiB,CAAC;AACpB,CAAC;","names":["logger"]}
package/hooks/hooks.json CHANGED
@@ -22,18 +22,6 @@
22
22
  ]
23
23
  }
24
24
  ],
25
- "PreToolUse": [
26
- {
27
- "matcher": "Grep|Read",
28
- "hooks": [
29
- {
30
- "type": "command",
31
- "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/pretooluse-bk-suggest.py",
32
- "timeout": 2
33
- }
34
- ]
35
- }
36
- ],
37
25
  "PostToolUse": [
38
26
  {
39
27
  "matcher": "Grep",
@@ -74,16 +62,6 @@
74
62
  "timeout": 2
75
63
  }
76
64
  ]
77
- },
78
- {
79
- "matcher": "mcp__.*bluera-knowledge__search",
80
- "hooks": [
81
- {
82
- "type": "command",
83
- "command": "echo 'TIP: Use mcp__bluera-knowledge__get_full_context with the result ID for complete code context.'",
84
- "timeout": 1
85
- }
86
- ]
87
65
  }
88
66
  ],
89
67
  "UserPromptSubmit": [
@@ -94,11 +72,6 @@
94
72
  "command": "${CLAUDE_PLUGIN_ROOT:-.}/hooks/job-status-hook.sh",
95
73
  "timeout": 2,
96
74
  "async": true
97
- },
98
- {
99
- "type": "command",
100
- "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/skill-activation.py",
101
- "timeout": 2
102
75
  }
103
76
  ]
104
77
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.33.1",
3
+ "version": "0.34.0",
4
4
  "description": "CLI tool for managing knowledge stores with semantic search",
5
5
  "type": "module",
6
6
  "bin": {
@@ -293,23 +293,7 @@ run_test_contains "Sync created store" "sync-test-store" "bluera-knowledge store
293
293
  # Clean up sync test store
294
294
  bluera-knowledge store delete "sync-test-store" --force -d "$DATA_DIR" 2>/dev/null || true
295
295
 
296
- # Test crawl (simple mode - using local mock server for fast, deterministic tests)
297
- log_header "Testing crawl (simple mode)"
298
- CRAWL_SIMPLE_STORE="local-validation-crawl-simple-$TIMESTAMP"
299
296
  MOCK_PORT="${MOCK_SERVER_PORT:-8765}"
300
- # Create web store first
301
- run_test "bluera-knowledge store create (web, simple)" "bluera-knowledge store create '$CRAWL_SIMPLE_STORE' -t web -s 'http://127.0.0.1:$MOCK_PORT' -d '$DATA_DIR'"
302
- # Crawl using --simple mode (BFS, no Claude CLI) against local mock server
303
- log "Running: bluera-knowledge crawl 'http://127.0.0.1:$MOCK_PORT/' '$CRAWL_SIMPLE_STORE' --simple --max-pages 3 --fast -d '$DATA_DIR'"
304
- CRAWL_OUTPUT=$(bluera-knowledge crawl "http://127.0.0.1:$MOCK_PORT/" "$CRAWL_SIMPLE_STORE" --simple --max-pages 3 --fast -d "$DATA_DIR" 2>&1 | tee -a "$LOG_FILE")
305
- if echo "$CRAWL_OUTPUT" | grep -q "Crawled 0 pages"; then
306
- fail "bluera-knowledge crawl --simple (0 pages from mock server)"
307
- else
308
- pass "bluera-knowledge crawl --simple"
309
- # Verify crawl indexed content
310
- run_test_contains "Simple crawl indexed content" "Mock" "bluera-knowledge search 'mock' --stores '$CRAWL_SIMPLE_STORE' -d '$DATA_DIR' --detail full"
311
- fi
312
- bluera-knowledge store delete "$CRAWL_SIMPLE_STORE" --force -d "$DATA_DIR" 2>/dev/null || true
313
297
 
314
298
  # Test crawl (intelligent mode - requires Claude CLI, uses local mock server)
315
299
  log_header "Testing crawl (intelligent mode)"
@@ -18,7 +18,7 @@ Crawling and indexing: $ARGUMENTS
18
18
  node ${CLAUDE_PLUGIN_ROOT}/dist/index.js crawl $ARGUMENTS
19
19
  ```
20
20
 
21
- The web pages will be crawled with intelligent link selection and optional natural language extraction, then indexed for searching.
21
+ The web pages will be crawled with Claude-driven intelligent link selection and optional natural language extraction, then indexed for searching. Requires Claude Code to be installed.
22
22
 
23
23
  **Note:** The web store is auto-created if it doesn't exist. No need to create the store first.
24
24
 
@@ -39,11 +39,6 @@ The web pages will be crawled with intelligent link selection and optional natur
39
39
  /bluera-knowledge:crawl https://docs.example.com my-docs --crawl "API reference pages" --extract "API endpoints and parameters"
40
40
  ```
41
41
 
42
- **Simple BFS mode:**
43
- ```
44
- /bluera-knowledge:crawl https://example.com/docs docs-store --simple
45
- ```
46
-
47
42
  **Fast mode (axios-only, no JavaScript rendering):**
48
43
  ```
49
44
  /bluera-knowledge:crawl https://example.com/docs docs-store --fast --max-pages 20
@@ -53,7 +48,6 @@ The web pages will be crawled with intelligent link selection and optional natur
53
48
 
54
49
  - `--crawl <instruction>` - Natural language instruction for which pages to crawl (e.g., "all Getting Started pages")
55
50
  - `--extract <instruction>` - Natural language instruction for what content to extract (e.g., "extract API references")
56
- - `--simple` - Use simple BFS (breadth-first search) mode instead of intelligent crawling
57
51
  - `--max-pages <number>` - Maximum number of pages to crawl (default: 50)
58
52
  - `--fast` - Use fast axios-only mode instead of headless browser
59
53
  - Default behavior uses headless browser (Playwright via crawl4ai) for JavaScript-rendered sites
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: knowledge-search
3
- description: Query BK for library internals via vector search or direct Grep/Read
3
+ description: Use when the user asks about libraries, frameworks, dependencies, API references, implementation details, or needs code examples from third-party packages. Activates for library API questions, dependency errors, framework configuration, or code generation involving external packages.
4
4
  ---
5
5
 
6
6
  # Using Bluera Knowledge (BK)