bluera-knowledge 0.18.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +19 -28
- package/dist/{brands-3EYIYV6T.js → brands-XDTIHFNU.js} +2 -1
- package/dist/{chunk-EZXJ3W5X.js → chunk-27Y4ENUD.js} +2 -2
- package/dist/chunk-DGUM43GV.js +11 -0
- package/dist/{chunk-VUGQ7HAR.js → chunk-EQYSYRQJ.js} +129 -50
- package/dist/chunk-EQYSYRQJ.js.map +1 -0
- package/dist/{chunk-RDDGZIDL.js → chunk-KQLTWB4T.js} +44 -120
- package/dist/chunk-KQLTWB4T.js.map +1 -0
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.d.ts +0 -29
- package/dist/mcp/server.js +3 -2
- package/dist/{watch.service-VDSUQ72Z.js → watch.service-NXRWLJG6.js} +2 -1
- package/dist/watch.service-NXRWLJG6.js.map +1 -0
- package/dist/workers/background-worker-cli.js +3 -2
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +7 -5
- package/python/ast_worker.py +209 -0
- package/dist/chunk-RDDGZIDL.js.map +0 -1
- package/dist/chunk-VUGQ7HAR.js.map +0 -1
- package/python/crawl_worker.py +0 -280
- /package/dist/{brands-3EYIYV6T.js.map → brands-XDTIHFNU.js.map} +0 -0
- /package/dist/{chunk-EZXJ3W5X.js.map → chunk-27Y4ENUD.js.map} +0 -0
- /package/dist/{watch.service-VDSUQ72Z.js.map → chunk-DGUM43GV.js.map} +0 -0
package/dist/mcp/server.d.ts
CHANGED
|
@@ -76,33 +76,6 @@ declare class CodeGraph {
|
|
|
76
76
|
};
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
-
declare const CrawlResultSchema: z.ZodObject<{
|
|
80
|
-
pages: z.ZodArray<z.ZodObject<{
|
|
81
|
-
url: z.ZodString;
|
|
82
|
-
title: z.ZodString;
|
|
83
|
-
content: z.ZodString;
|
|
84
|
-
links: z.ZodArray<z.ZodString>;
|
|
85
|
-
crawledAt: z.ZodString;
|
|
86
|
-
}, z.core.$strip>>;
|
|
87
|
-
}, z.core.$strip>;
|
|
88
|
-
declare const HeadlessResultSchema: z.ZodObject<{
|
|
89
|
-
html: z.ZodString;
|
|
90
|
-
markdown: z.ZodString;
|
|
91
|
-
links: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
92
|
-
href: z.ZodString;
|
|
93
|
-
text: z.ZodString;
|
|
94
|
-
title: z.ZodOptional<z.ZodString>;
|
|
95
|
-
base_domain: z.ZodOptional<z.ZodString>;
|
|
96
|
-
head_data: z.ZodOptional<z.ZodUnknown>;
|
|
97
|
-
head_extraction_status: z.ZodOptional<z.ZodUnknown>;
|
|
98
|
-
head_extraction_error: z.ZodOptional<z.ZodUnknown>;
|
|
99
|
-
intrinsic_score: z.ZodOptional<z.ZodNumber>;
|
|
100
|
-
contextual_score: z.ZodOptional<z.ZodUnknown>;
|
|
101
|
-
total_score: z.ZodOptional<z.ZodUnknown>;
|
|
102
|
-
}, z.core.$strip>, z.ZodString]>>;
|
|
103
|
-
}, z.core.$strip>;
|
|
104
|
-
type CrawlResult = z.infer<typeof CrawlResultSchema>;
|
|
105
|
-
type HeadlessResult = z.infer<typeof HeadlessResultSchema>;
|
|
106
79
|
declare const ParsePythonResultSchema: z.ZodObject<{
|
|
107
80
|
nodes: z.ZodArray<z.ZodObject<{
|
|
108
81
|
type: z.ZodEnum<{
|
|
@@ -140,8 +113,6 @@ declare class PythonBridge {
|
|
|
140
113
|
private stdoutReadline;
|
|
141
114
|
private stderrReadline;
|
|
142
115
|
start(): Promise<void>;
|
|
143
|
-
crawl(url: string, timeoutMs?: number): Promise<CrawlResult>;
|
|
144
|
-
fetchHeadless(url: string, timeoutMs?: number): Promise<HeadlessResult>;
|
|
145
116
|
parsePython(code: string, filePath: string, timeoutMs?: number): Promise<ParsePythonResult>;
|
|
146
117
|
stop(): Promise<void>;
|
|
147
118
|
private rejectAllPending;
|
package/dist/mcp/server.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createMCPServer,
|
|
3
3
|
runMCPServer
|
|
4
|
-
} from "../chunk-
|
|
5
|
-
import "../chunk-
|
|
4
|
+
} from "../chunk-27Y4ENUD.js";
|
|
5
|
+
import "../chunk-KQLTWB4T.js";
|
|
6
6
|
import "../chunk-CLIMKLTW.js";
|
|
7
7
|
import "../chunk-HXBIIMYL.js";
|
|
8
|
+
import "../chunk-DGUM43GV.js";
|
|
8
9
|
export {
|
|
9
10
|
createMCPServer,
|
|
10
11
|
runMCPServer
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
IntelligentCrawler
|
|
4
|
-
} from "../chunk-
|
|
4
|
+
} from "../chunk-EQYSYRQJ.js";
|
|
5
5
|
import {
|
|
6
6
|
JobService,
|
|
7
7
|
createLogger,
|
|
8
8
|
createServices,
|
|
9
9
|
destroyServices,
|
|
10
10
|
shutdownLogger
|
|
11
|
-
} from "../chunk-
|
|
11
|
+
} from "../chunk-KQLTWB4T.js";
|
|
12
12
|
import {
|
|
13
13
|
createDocumentId,
|
|
14
14
|
createStoreId
|
|
15
15
|
} from "../chunk-CLIMKLTW.js";
|
|
16
16
|
import "../chunk-HXBIIMYL.js";
|
|
17
|
+
import "../chunk-DGUM43GV.js";
|
|
17
18
|
|
|
18
19
|
// src/workers/background-worker-cli.ts
|
|
19
20
|
import { platform } from "os";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/workers/background-worker-cli.ts","../../src/workers/background-worker.ts","../../src/workers/pid-file.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { platform } from 'os';\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices, destroyServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Force exit the process to avoid ONNX runtime mutex crash on macOS.\n *\n * On macOS, the ONNX runtime (used by transformers.js for embeddings) has a known\n * bug where static mutex cleanup fails during process exit, causing a crash with:\n * \"mutex lock failed: Invalid argument\"\n *\n * This doesn't affect job completion - all work is done and persisted before exit.\n * Using SIGKILL bypasses the problematic cleanup code.\n *\n * See: https://github.com/microsoft/onnxruntime/issues/24579\n */\nfunction forceExitOnMacOS(exitCode: number): void {\n if (platform() === 'darwin') {\n // Give time for any pending I/O to flush\n setTimeout(() => {\n process.kill(process.pid, 'SIGKILL');\n }, 100);\n } else {\n process.exit(exitCode);\n }\n}\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Load config and create worker\n const appConfig = await services.config.load();\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings,\n appConfig.crawl\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n forceExitOnMacOS(1);\n});\n","import { createHash } from 'node:crypto';\nimport {\n IntelligentCrawler,\n type CrawlConfig,\n type CrawlProgress,\n} from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine,\n private readonly crawlConfig?: CrawlConfig\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler(this.crawlConfig);\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date().toISOString(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n // Clear existing documents to prevent duplicates on re-crawl\n await this.lanceStore.clearAllDocuments(store.id);\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AACA,SAAS,gBAAgB;;;ACDzB,SAAS,kBAAkB;AAgB3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACA,aACjB;AANiB;AACA;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB,KAAK,WAAW;AAGvD,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,WAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAGD,cAAM,KAAK,WAAW,kBAAkB,MAAM,EAAE;AAChD,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACjVA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AF7DA,SAAS,iBAAiB,UAAwB;AAChD,MAAI,SAAS,MAAM,UAAU;AAE3B,eAAW,MAAM;AACf,cAAQ,KAAK,QAAQ,KAAK,SAAS;AAAA,IACrC,GAAG,GAAG;AAAA,EACR,OAAO;AACL,YAAQ,KAAK,QAAQ;AAAA,EACvB;AACF;AAEA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,YAAY,MAAM,SAAS,OAAO,KAAK;AAC7C,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,mBAAiB,CAAC;AACpB,CAAC;","names":["logger"]}
|
|
1
|
+
{"version":3,"sources":["../../src/workers/background-worker-cli.ts","../../src/workers/background-worker.ts","../../src/workers/pid-file.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { platform } from 'os';\nimport { BackgroundWorker } from './background-worker.js';\nimport { writePidFile, deletePidFile, buildPidFilePath } from './pid-file.js';\nimport { createLogger, shutdownLogger } from '../logging/index.js';\nimport { createServices, destroyServices } from '../services/index.js';\nimport { JobService } from '../services/job.service.js';\n\n/**\n * Force exit the process to avoid ONNX runtime mutex crash on macOS.\n *\n * On macOS, the ONNX runtime (used by transformers.js for embeddings) has a known\n * bug where static mutex cleanup fails during process exit, causing a crash with:\n * \"mutex lock failed: Invalid argument\"\n *\n * This doesn't affect job completion - all work is done and persisted before exit.\n * Using SIGKILL bypasses the problematic cleanup code.\n *\n * See: https://github.com/microsoft/onnxruntime/issues/24579\n */\nfunction forceExitOnMacOS(exitCode: number): void {\n if (platform() === 'darwin') {\n // Give time for any pending I/O to flush\n setTimeout(() => {\n process.kill(process.pid, 'SIGKILL');\n }, 100);\n } else {\n process.exit(exitCode);\n }\n}\n\nconst logger = createLogger('background-worker-cli');\n\n/**\n * Background worker CLI entry point\n *\n * Usage: background-worker-cli <job-id>\n *\n * This process runs detached from the parent and executes a single job.\n */\n\nasync function main(): Promise<void> {\n const jobId = process.argv[2];\n const dataDir = process.env['BLUERA_DATA_DIR'];\n\n if (jobId === undefined || jobId === '') {\n logger.error('Job ID required. Usage: background-worker-cli <job-id>');\n await shutdownLogger();\n process.exit(1);\n }\n\n // Initialize services\n const jobService = new JobService(dataDir);\n const services = await createServices(undefined, dataDir);\n\n // Write PID file for job cancellation - CRITICAL: must succeed or job cannot be cancelled\n const pidFile = buildPidFilePath(\n jobService['jobsDir'], // Access private field for PID path\n jobId\n );\n\n try {\n writePidFile(pidFile, process.pid);\n } catch (error) {\n // CRITICAL: Cannot proceed without PID file - job would be uncancellable\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Failed to write PID file'\n );\n await shutdownLogger();\n process.exit(1);\n }\n\n // Handle SIGTERM for graceful shutdown\n process.on('SIGTERM', () => {\n logger.info({ jobId }, 'Received SIGTERM, cancelling job');\n jobService.updateJob(jobId, {\n status: 'cancelled',\n message: 'Job cancelled by user',\n });\n\n // Clean up PID file (best-effort - don't block shutdown)\n const deleteResult = deletePidFile(pidFile, 'sigterm');\n if (!deleteResult.success && deleteResult.error !== undefined) {\n logger.warn(\n { jobId, error: deleteResult.error.message },\n 'Could not remove PID file during SIGTERM'\n );\n }\n\n // Flush logs before exit (best-effort, don't await in signal handler)\n void shutdownLogger().finally(() => process.exit(0));\n });\n\n // Load config and create worker\n const appConfig = await services.config.load();\n const worker = new BackgroundWorker(\n jobService,\n services.store,\n services.index,\n services.lance,\n services.embeddings,\n appConfig.crawl\n );\n\n try {\n await worker.executeJob(jobId);\n\n // Clean up PID file on success (best-effort - don't change exit code)\n const successCleanup = deletePidFile(pidFile, 'success');\n if (!successCleanup.success && successCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: successCleanup.error.message },\n 'Could not remove PID file after success'\n );\n }\n\n logger.info({ jobId }, 'Job completed successfully');\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(0);\n } catch (error) {\n // Job service already updated with failure status in BackgroundWorker\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Clean up PID file on failure (best-effort - exit code reflects job failure)\n const failureCleanup = deletePidFile(pidFile, 'failure');\n if (!failureCleanup.success && failureCleanup.error !== undefined) {\n logger.warn(\n { jobId, error: failureCleanup.error.message },\n 'Could not remove PID file after failure'\n );\n }\n\n await destroyServices(services);\n await shutdownLogger();\n forceExitOnMacOS(1);\n }\n}\n\nmain().catch(async (error: unknown) => {\n logger.error(\n { error: error instanceof Error ? error.message : String(error) },\n 'Fatal error in background worker'\n );\n await shutdownLogger();\n forceExitOnMacOS(1);\n});\n","import { createHash } from 'node:crypto';\nimport {\n IntelligentCrawler,\n type CrawlConfig,\n type CrawlProgress,\n} from '../crawl/intelligent-crawler.js';\nimport { createLogger } from '../logging/index.js';\nimport { IndexService } from '../services/index.service.js';\nimport { JobService } from '../services/job.service.js';\nimport { StoreService } from '../services/store.service.js';\nimport { createStoreId, createDocumentId } from '../types/brands.js';\nimport type { EmbeddingEngine } from '../db/embeddings.js';\nimport type { LanceStore } from '../db/lance.js';\nimport type { Document } from '../types/document.js';\nimport type { Job } from '../types/job.js';\n\nconst logger = createLogger('background-worker');\n\n/**\n * Calculate index progress as a percentage, handling division by zero.\n * @param current - Current number of items processed\n * @param total - Total number of items (may be 0)\n * @param scale - Scale factor for progress (default 100 for 0-100%)\n * @returns Progress value, or 0 if total is 0\n */\nexport function calculateIndexProgress(\n current: number,\n total: number,\n scale: number = 100\n): number {\n if (total === 0) return 0;\n return (current / total) * scale;\n}\n\nexport class BackgroundWorker {\n constructor(\n private readonly jobService: JobService,\n private readonly storeService: StoreService,\n private readonly indexService: IndexService,\n private readonly lanceStore: LanceStore,\n private readonly embeddingEngine: EmbeddingEngine,\n private readonly crawlConfig?: CrawlConfig\n ) {}\n\n /**\n * Execute a job based on its type\n */\n async executeJob(jobId: string): Promise<void> {\n const job = this.jobService.getJob(jobId);\n\n if (!job) {\n throw new Error(`Job ${jobId} not found`);\n }\n\n try {\n logger.info({ jobId, type: job.type }, 'Starting job execution');\n\n // Update to running status\n this.jobService.updateJob(jobId, {\n status: 'running',\n message: `Starting ${job.type} operation...`,\n progress: 0,\n details: { startedAt: new Date().toISOString() },\n });\n\n // Execute based on job type\n switch (job.type) {\n case 'clone':\n await this.executeCloneJob(job);\n break;\n case 'index':\n await this.executeIndexJob(job);\n break;\n case 'crawl':\n await this.executeCrawlJob(job);\n break;\n default:\n throw new Error(`Unknown job type: ${String(job.type)}`);\n }\n\n // Mark as completed\n this.jobService.updateJob(jobId, {\n status: 'completed',\n progress: 100,\n message: `${job.type} operation completed successfully`,\n details: { completedAt: new Date().toISOString() },\n });\n } catch (error) {\n logger.error(\n { jobId, error: error instanceof Error ? error.message : String(error) },\n 'Job failed'\n );\n\n // Mark as failed\n const errorDetails: Record<string, unknown> = {\n completedAt: new Date().toISOString(),\n };\n if (error instanceof Error && error.stack !== undefined) {\n errorDetails['error'] = error.stack;\n } else {\n errorDetails['error'] = String(error);\n }\n this.jobService.updateJob(jobId, {\n status: 'failed',\n message: error instanceof Error ? error.message : 'Unknown error',\n details: errorDetails,\n });\n throw error;\n }\n }\n\n /**\n * Execute a clone job (git clone + initial indexing)\n */\n private async executeCloneJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for clone job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Clone is already done by the time the job is created\n // (happens in StoreService.create), so we just need to index\n\n // Update progress - cloning considered done (30%)\n this.jobService.updateJob(job.id, {\n status: 'running',\n message: 'Repository cloned, starting indexing...',\n progress: 30,\n });\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index the repository with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Indexing is 70% of total progress (30-100%)\n const indexProgress = calculateIndexProgress(event.current, event.total, 70);\n const totalProgress = 30 + indexProgress;\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, totalProgress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute an index job (re-indexing existing store)\n */\n private async executeIndexJob(job: Job): Promise<void> {\n const { storeId } = job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for index job');\n }\n\n // Get the store\n const store = await this.storeService.getByIdOrName(createStoreId(storeId));\n if (!store) {\n throw new Error(`Store ${storeId} not found`);\n }\n\n // Initialize LanceStore with dimensions before indexing\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n\n // Index with progress updates\n const result = await this.indexService.indexStore(\n store,\n (event: { type: string; current: number; total: number; message: string }) => {\n // Check if job was cancelled\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n const progress = calculateIndexProgress(event.current, event.total);\n\n this.jobService.updateJob(job.id, {\n message: `Indexed ${String(event.current)}/${String(event.total)} files`,\n progress: Math.min(99, progress), // Cap at 99 until fully complete\n details: {\n filesProcessed: event.current,\n totalFiles: event.total,\n },\n });\n }\n );\n\n if (!result.success) {\n throw result.error;\n }\n }\n\n /**\n * Execute a crawl job (web crawling + indexing)\n */\n private async executeCrawlJob(job: Job): Promise<void> {\n const { storeId, url, crawlInstruction, extractInstruction, maxPages, simple, useHeadless } =\n job.details;\n\n if (storeId === undefined || typeof storeId !== 'string') {\n throw new Error('Store ID required for crawl job');\n }\n if (url === undefined || typeof url !== 'string') {\n throw new Error('URL required for crawl job');\n }\n\n // Get the store\n const store = await this.storeService.get(createStoreId(storeId));\n if (store?.type !== 'web') {\n throw new Error(`Web store ${storeId} not found`);\n }\n\n const resolvedMaxPages = typeof maxPages === 'number' ? maxPages : 50;\n const crawler = new IntelligentCrawler(this.crawlConfig);\n\n // Listen for progress events\n crawler.on('progress', (progress: CrawlProgress) => {\n // Check if job was cancelled - just return early, for-await loop will throw and finally will cleanup\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n return;\n }\n\n // Crawling is 80% of total progress (0-80%)\n const crawlProgress = (progress.pagesVisited / resolvedMaxPages) * 80;\n\n this.jobService.updateJob(job.id, {\n message:\n progress.message ??\n `Crawling page ${String(progress.pagesVisited)}/${String(resolvedMaxPages)}`,\n progress: Math.min(80, crawlProgress),\n details: { pagesCrawled: progress.pagesVisited },\n });\n });\n\n try {\n this.lanceStore.setDimensions(await this.embeddingEngine.ensureDimensions());\n await this.lanceStore.initialize(store.id);\n const docs: Document[] = [];\n\n // Build crawl options, only including defined values\n const crawlOptions: {\n maxPages: number;\n simple: boolean;\n useHeadless: boolean;\n crawlInstruction?: string;\n extractInstruction?: string;\n } = {\n maxPages: resolvedMaxPages,\n simple: simple ?? false,\n useHeadless: useHeadless ?? true, // Default to headless for reliability\n };\n if (crawlInstruction !== undefined) {\n crawlOptions.crawlInstruction = crawlInstruction;\n }\n if (extractInstruction !== undefined) {\n crawlOptions.extractInstruction = extractInstruction;\n }\n\n // Crawl pages using IntelligentCrawler\n for await (const result of crawler.crawl(url, crawlOptions)) {\n // Check cancellation between pages\n const currentJob = this.jobService.getJob(job.id);\n if (currentJob?.status === 'cancelled') {\n throw new Error('Job cancelled by user');\n }\n\n // Embed and index the content (use extracted if available, otherwise markdown)\n const contentToEmbed = result.extracted ?? result.markdown;\n const vector = await this.embeddingEngine.embed(contentToEmbed);\n\n docs.push({\n id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),\n content: contentToEmbed,\n vector,\n metadata: {\n type: 'web',\n storeId: store.id,\n url: result.url,\n title: result.title,\n extracted: result.extracted !== undefined,\n depth: result.depth,\n indexedAt: new Date().toISOString(),\n },\n });\n }\n\n // Index all documents (remaining 20%)\n if (docs.length > 0) {\n this.jobService.updateJob(job.id, {\n message: 'Indexing crawled documents...',\n progress: 85,\n });\n\n // Clear existing documents to prevent duplicates on re-crawl\n await this.lanceStore.clearAllDocuments(store.id);\n await this.lanceStore.addDocuments(store.id, docs);\n // Create FTS index for full-text search\n await this.lanceStore.createFtsIndex(store.id);\n }\n\n this.jobService.updateJob(job.id, {\n message: `Crawled and indexed ${String(docs.length)} pages`,\n progress: 100,\n details: { pagesCrawled: docs.length },\n });\n } finally {\n await crawler.stop();\n }\n }\n}\n","import fs from 'fs';\nimport path from 'path';\n\n/**\n * Result of a PID file delete operation.\n * Delete operations are best-effort and should not throw.\n */\nexport interface PidFileResult {\n success: boolean;\n error?: Error;\n}\n\n/**\n * Context for PID file deletion - indicates when the delete is happening.\n * Used for logging/debugging purposes.\n */\nexport type PidFileDeleteContext = 'sigterm' | 'success' | 'failure';\n\n/**\n * Write PID file - CRITICAL operation that must succeed.\n *\n * If the PID file cannot be written, the job cannot be cancelled through\n * the job management system. This is a critical failure and the job\n * should not proceed.\n *\n * @param pidFile - Absolute path to the PID file\n * @param pid - Process ID to write\n * @throws Error if PID file cannot be written\n */\nexport function writePidFile(pidFile: string, pid: number): void {\n try {\n fs.writeFileSync(pidFile, pid.toString(), 'utf-8');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(\n `CRITICAL: Failed to write PID file ${pidFile}. ` +\n `Job cannot be cancelled without PID file. ` +\n `Original error: ${message}`\n );\n }\n}\n\n/**\n * Delete PID file - best-effort cleanup during shutdown.\n *\n * This operation should NEVER throw. During process shutdown (SIGTERM,\n * job success, job failure), failing to delete a PID file should not\n * prevent the process from exiting cleanly.\n *\n * Stale PID files are cleaned up by JobService.cleanupOldJobs().\n *\n * @param pidFile - Absolute path to the PID file\n * @param _context - Context indicating when the delete is happening (for future logging)\n * @returns Result indicating success or failure with error details\n */\nexport function deletePidFile(pidFile: string, _context: PidFileDeleteContext): PidFileResult {\n try {\n fs.unlinkSync(pidFile);\n return { success: true };\n } catch (error) {\n // ENOENT = file doesn't exist - that's success (nothing to delete)\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n return { success: true };\n }\n // Any other error = failure (permission denied, etc.)\n return {\n success: false,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Build the path to a PID file for a given job.\n *\n * @param jobsDir - Directory where job files are stored\n * @param jobId - Job identifier\n * @returns Absolute path to the PID file\n */\nexport function buildPidFilePath(jobsDir: string, jobId: string): string {\n return path.join(jobsDir, `${jobId}.pid`);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AACA,SAAS,gBAAgB;;;ACDzB,SAAS,kBAAkB;AAgB3B,IAAM,SAAS,aAAa,mBAAmB;AASxC,SAAS,uBACd,SACA,OACA,QAAgB,KACR;AACR,MAAI,UAAU,EAAG,QAAO;AACxB,SAAQ,UAAU,QAAS;AAC7B;AAEO,IAAM,mBAAN,MAAuB;AAAA,EAC5B,YACmB,YACA,cACA,cACA,YACA,iBACA,aACjB;AANiB;AACA;AACA;AACA;AACA;AACA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKH,MAAM,WAAW,OAA8B;AAC7C,UAAM,MAAM,KAAK,WAAW,OAAO,KAAK;AAExC,QAAI,CAAC,KAAK;AACR,YAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAAA,IAC1C;AAEA,QAAI;AACF,aAAO,KAAK,EAAE,OAAO,MAAM,IAAI,KAAK,GAAG,wBAAwB;AAG/D,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,YAAY,IAAI,IAAI;AAAA,QAC7B,UAAU;AAAA,QACV,SAAS,EAAE,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACjD,CAAC;AAGD,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF,KAAK;AACH,gBAAM,KAAK,gBAAgB,GAAG;AAC9B;AAAA,QACF;AACE,gBAAM,IAAI,MAAM,qBAAqB,OAAO,IAAI,IAAI,CAAC,EAAE;AAAA,MAC3D;AAGA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,GAAG,IAAI,IAAI;AAAA,QACpB,SAAS,EAAE,cAAa,oBAAI,KAAK,GAAE,YAAY,EAAE;AAAA,MACnD,CAAC;AAAA,IACH,SAAS,OAAO;AACd,aAAO;AAAA,QACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACvE;AAAA,MACF;AAGA,YAAM,eAAwC;AAAA,QAC5C,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,MACtC;AACA,UAAI,iBAAiB,SAAS,MAAM,UAAU,QAAW;AACvD,qBAAa,OAAO,IAAI,MAAM;AAAA,MAChC,OAAO;AACL,qBAAa,OAAO,IAAI,OAAO,KAAK;AAAA,MACtC;AACA,WAAK,WAAW,UAAU,OAAO;AAAA,QAC/B,QAAQ;AAAA,QACR,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AAAA,QAClD,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAMA,SAAK,WAAW,UAAU,IAAI,IAAI;AAAA,MAChC,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,CAAC;AAGD,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,gBAAgB,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AAC3E,cAAM,gBAAgB,KAAK;AAE3B,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA;AAAA,UACpC,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,QAAQ,IAAI,IAAI;AAExB,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,cAAc,cAAc,OAAO,CAAC;AAC1E,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,SAAS,OAAO,YAAY;AAAA,IAC9C;AAGA,SAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,UAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AAGzC,UAAM,SAAS,MAAM,KAAK,aAAa;AAAA,MACrC;AAAA,MACA,CAAC,UAA6E;AAE5E,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAEA,cAAM,WAAW,uBAAuB,MAAM,SAAS,MAAM,KAAK;AAElE,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS,WAAW,OAAO,MAAM,OAAO,CAAC,IAAI,OAAO,MAAM,KAAK,CAAC;AAAA,UAChE,UAAU,KAAK,IAAI,IAAI,QAAQ;AAAA;AAAA,UAC/B,SAAS;AAAA,YACP,gBAAgB,MAAM;AAAA,YACtB,YAAY,MAAM;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,OAAO;AAAA,IACf;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBAAgB,KAAyB;AACrD,UAAM,EAAE,SAAS,KAAK,kBAAkB,oBAAoB,UAAU,QAAQ,YAAY,IACxF,IAAI;AAEN,QAAI,YAAY,UAAa,OAAO,YAAY,UAAU;AACxD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,QAAI,QAAQ,UAAa,OAAO,QAAQ,UAAU;AAChD,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAGA,UAAM,QAAQ,MAAM,KAAK,aAAa,IAAI,cAAc,OAAO,CAAC;AAChE,QAAI,OAAO,SAAS,OAAO;AACzB,YAAM,IAAI,MAAM,aAAa,OAAO,YAAY;AAAA,IAClD;AAEA,UAAM,mBAAmB,OAAO,aAAa,WAAW,WAAW;AACnE,UAAM,UAAU,IAAI,mBAAmB,KAAK,WAAW;AAGvD,YAAQ,GAAG,YAAY,CAAC,aAA4B;AAElD,YAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,UAAI,YAAY,WAAW,aAAa;AACtC;AAAA,MACF;AAGA,YAAM,gBAAiB,SAAS,eAAe,mBAAoB;AAEnE,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SACE,SAAS,WACT,iBAAiB,OAAO,SAAS,YAAY,CAAC,IAAI,OAAO,gBAAgB,CAAC;AAAA,QAC5E,UAAU,KAAK,IAAI,IAAI,aAAa;AAAA,QACpC,SAAS,EAAE,cAAc,SAAS,aAAa;AAAA,MACjD,CAAC;AAAA,IACH,CAAC;AAED,QAAI;AACF,WAAK,WAAW,cAAc,MAAM,KAAK,gBAAgB,iBAAiB,CAAC;AAC3E,YAAM,KAAK,WAAW,WAAW,MAAM,EAAE;AACzC,YAAM,OAAmB,CAAC;AAG1B,YAAM,eAMF;AAAA,QACF,UAAU;AAAA,QACV,QAAQ,UAAU;AAAA,QAClB,aAAa,eAAe;AAAA;AAAA,MAC9B;AACA,UAAI,qBAAqB,QAAW;AAClC,qBAAa,mBAAmB;AAAA,MAClC;AACA,UAAI,uBAAuB,QAAW;AACpC,qBAAa,qBAAqB;AAAA,MACpC;AAGA,uBAAiB,UAAU,QAAQ,MAAM,KAAK,YAAY,GAAG;AAE3D,cAAM,aAAa,KAAK,WAAW,OAAO,IAAI,EAAE;AAChD,YAAI,YAAY,WAAW,aAAa;AACtC,gBAAM,IAAI,MAAM,uBAAuB;AAAA,QACzC;AAGA,cAAM,iBAAiB,OAAO,aAAa,OAAO;AAClD,cAAM,SAAS,MAAM,KAAK,gBAAgB,MAAM,cAAc;AAE9D,aAAK,KAAK;AAAA,UACR,IAAI,iBAAiB,GAAG,MAAM,EAAE,IAAI,WAAW,KAAK,EAAE,OAAO,OAAO,GAAG,EAAE,OAAO,KAAK,CAAC,EAAE;AAAA,UACxF,SAAS;AAAA,UACT;AAAA,UACA,UAAU;AAAA,YACR,MAAM;AAAA,YACN,SAAS,MAAM;AAAA,YACf,KAAK,OAAO;AAAA,YACZ,OAAO,OAAO;AAAA,YACd,WAAW,OAAO,cAAc;AAAA,YAChC,OAAO,OAAO;AAAA,YACd,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC;AAAA,QACF,CAAC;AAAA,MACH;AAGA,UAAI,KAAK,SAAS,GAAG;AACnB,aAAK,WAAW,UAAU,IAAI,IAAI;AAAA,UAChC,SAAS;AAAA,UACT,UAAU;AAAA,QACZ,CAAC;AAGD,cAAM,KAAK,WAAW,kBAAkB,MAAM,EAAE;AAChD,cAAM,KAAK,WAAW,aAAa,MAAM,IAAI,IAAI;AAEjD,cAAM,KAAK,WAAW,eAAe,MAAM,EAAE;AAAA,MAC/C;AAEA,WAAK,WAAW,UAAU,IAAI,IAAI;AAAA,QAChC,SAAS,uBAAuB,OAAO,KAAK,MAAM,CAAC;AAAA,QACnD,UAAU;AAAA,QACV,SAAS,EAAE,cAAc,KAAK,OAAO;AAAA,MACvC,CAAC;AAAA,IACH,UAAE;AACA,YAAM,QAAQ,KAAK;AAAA,IACrB;AAAA,EACF;AACF;;;ACjVA,OAAO,QAAQ;AACf,OAAO,UAAU;AA4BV,SAAS,aAAa,SAAiB,KAAmB;AAC/D,MAAI;AACF,OAAG,cAAc,SAAS,IAAI,SAAS,GAAG,OAAO;AAAA,EACnD,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,+DAExB,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AAeO,SAAS,cAAc,SAAiB,UAA+C;AAC5F,MAAI;AACF,OAAG,WAAW,OAAO;AACrB,WAAO,EAAE,SAAS,KAAK;AAAA,EACzB,SAAS,OAAO;AAEd,QAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,aAAO,EAAE,SAAS,KAAK;AAAA,IACzB;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IACjE;AAAA,EACF;AACF;AASO,SAAS,iBAAiB,SAAiB,OAAuB;AACvE,SAAO,KAAK,KAAK,SAAS,GAAG,KAAK,MAAM;AAC1C;;;AF7DA,SAAS,iBAAiB,UAAwB;AAChD,MAAI,SAAS,MAAM,UAAU;AAE3B,eAAW,MAAM;AACf,cAAQ,KAAK,QAAQ,KAAK,SAAS;AAAA,IACrC,GAAG,GAAG;AAAA,EACR,OAAO;AACL,YAAQ,KAAK,QAAQ;AAAA,EACvB;AACF;AAEA,IAAMA,UAAS,aAAa,uBAAuB;AAUnD,eAAe,OAAsB;AACnC,QAAM,QAAQ,QAAQ,KAAK,CAAC;AAC5B,QAAM,UAAU,QAAQ,IAAI,iBAAiB;AAE7C,MAAI,UAAU,UAAa,UAAU,IAAI;AACvC,IAAAA,QAAO,MAAM,wDAAwD;AACrE,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,QAAM,aAAa,IAAI,WAAW,OAAO;AACzC,QAAM,WAAW,MAAM,eAAe,QAAW,OAAO;AAGxD,QAAM,UAAU;AAAA,IACd,WAAW,SAAS;AAAA;AAAA,IACpB;AAAA,EACF;AAEA,MAAI;AACF,iBAAa,SAAS,QAAQ,GAAG;AAAA,EACnC,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MAChE;AAAA,IACF;AACA,UAAM,eAAe;AACrB,YAAQ,KAAK,CAAC;AAAA,EAChB;AAGA,UAAQ,GAAG,WAAW,MAAM;AAC1B,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,kCAAkC;AACzD,eAAW,UAAU,OAAO;AAAA,MAC1B,QAAQ;AAAA,MACR,SAAS;AAAA,IACX,CAAC;AAGD,UAAM,eAAe,cAAc,SAAS,SAAS;AACrD,QAAI,CAAC,aAAa,WAAW,aAAa,UAAU,QAAW;AAC7D,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,aAAa,MAAM,QAAQ;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAGA,SAAK,eAAe,EAAE,QAAQ,MAAM,QAAQ,KAAK,CAAC,CAAC;AAAA,EACrD,CAAC;AAGD,QAAM,YAAY,MAAM,SAAS,OAAO,KAAK;AAC7C,QAAM,SAAS,IAAI;AAAA,IACjB;AAAA,IACA,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAEA,MAAI;AACF,UAAM,OAAO,WAAW,KAAK;AAG7B,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,IAAAA,QAAO,KAAK,EAAE,MAAM,GAAG,4BAA4B;AACnD,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB,SAAS,OAAO;AAEd,IAAAA,QAAO;AAAA,MACL,EAAE,OAAO,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,MACvE;AAAA,IACF;AAGA,UAAM,iBAAiB,cAAc,SAAS,SAAS;AACvD,QAAI,CAAC,eAAe,WAAW,eAAe,UAAU,QAAW;AACjE,MAAAA,QAAO;AAAA,QACL,EAAE,OAAO,OAAO,eAAe,MAAM,QAAQ;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,QAAQ;AAC9B,UAAM,eAAe;AACrB,qBAAiB,CAAC;AAAA,EACpB;AACF;AAEA,KAAK,EAAE,MAAM,OAAO,UAAmB;AACrC,EAAAA,QAAO;AAAA,IACL,EAAE,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,IAChE;AAAA,EACF;AACA,QAAM,eAAe;AACrB,mBAAiB,CAAC;AACpB,CAAC;","names":["logger"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bluera-knowledge",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.19.0",
|
|
4
4
|
"description": "CLI tool for managing knowledge stores with semantic search",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
"start": "node dist/index.js",
|
|
17
17
|
"test": "vitest",
|
|
18
18
|
"test:run": "vitest run",
|
|
19
|
-
"test:changed": "bash -c '
|
|
20
|
-
"test:changed:verbose": "vitest run --changed
|
|
21
|
-
"test:coverage": "bash -c '
|
|
19
|
+
"test:changed": "bash -c 'OUTPUT=$(vitest run --changed --reporter=dot --silent 2>&1); echo \"$OUTPUT\" | tail -5; if echo \"$OUTPUT\" | grep -qE \"Tests.*[0-9]+ failed\"; then exit 1; fi'",
|
|
20
|
+
"test:changed:verbose": "vitest run --changed",
|
|
21
|
+
"test:coverage": "bash -c 'OUTPUT=$(vitest run --coverage --reporter=dot 2>&1); echo \"$OUTPUT\" | grep -E \"(Test Files|Tests|Coverage|threshold|ERROR)\" | tail -10; if echo \"$OUTPUT\" | grep -qE \"Tests.*[0-9]+ failed\"; then exit 1; fi; if echo \"$OUTPUT\" | grep -qE \"ERROR.*threshold\"; then exit 1; fi'",
|
|
22
22
|
"test:coverage:verbose": "vitest run --coverage",
|
|
23
23
|
"format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
|
|
24
24
|
"format:check": "prettier --check \"src/**/*.ts\" \"tests/**/*.ts\" --log-level warn && echo '✓ Format check passed'",
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
"release:minor": "commit-and-tag-version --release-as minor && git push --follow-tags",
|
|
42
42
|
"release:major": "commit-and-tag-version --release-as major && git push --follow-tags",
|
|
43
43
|
"validate:npm": "./scripts/validate-npm-release.sh",
|
|
44
|
+
"validate:local": "./scripts/validate-local.sh",
|
|
44
45
|
"gh:status": "gh run list --limit 5",
|
|
45
46
|
"gh:watch": "bash -c 'RUN_ID=$(gh run list --limit 1 --json databaseId -q \".[0].databaseId\") && echo \"Watching run $RUN_ID...\" && gh run watch $RUN_ID --exit-status >/dev/null 2>&1 && echo \"✓ Workflow passed\" || { echo \"✗ Workflow failed:\"; gh run view $RUN_ID --log-failed 2>&1 | head -50; }'",
|
|
46
47
|
"gh:watch:verbose": "gh run list --limit 1 --json databaseId --jq '.[0].databaseId' | xargs -I {} gh run watch {} --exit-status",
|
|
@@ -77,7 +78,7 @@
|
|
|
77
78
|
"@types/babel__traverse": "^7.28.0",
|
|
78
79
|
"@types/node": "^25.0.3",
|
|
79
80
|
"@types/turndown": "^5.0.6",
|
|
80
|
-
"@vitest/coverage-
|
|
81
|
+
"@vitest/coverage-istanbul": "^4.0.18",
|
|
81
82
|
"commit-and-tag-version": "^12.6.1",
|
|
82
83
|
"eslint": "^9.39.2",
|
|
83
84
|
"eslint-config-prettier": "^10.1.8",
|
|
@@ -115,6 +116,7 @@
|
|
|
115
116
|
"ora": "^9.0.0",
|
|
116
117
|
"pino": "^10.1.0",
|
|
117
118
|
"pino-roll": "^4.0.0",
|
|
119
|
+
"playwright": "^1.58.0",
|
|
118
120
|
"slurp-ai": "^1.0.6",
|
|
119
121
|
"tree-sitter": "^0.25.0",
|
|
120
122
|
"tree-sitter-go": "^0.25.0",
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python AST Worker - JSON-RPC service for parsing Python code
|
|
4
|
+
|
|
5
|
+
This worker reads JSON-RPC requests from stdin and writes responses to stdout.
|
|
6
|
+
It uses Python's built-in ast module to parse Python code and extract:
|
|
7
|
+
- Function definitions (sync and async)
|
|
8
|
+
- Class definitions with methods
|
|
9
|
+
- Import statements
|
|
10
|
+
- Function calls within code
|
|
11
|
+
|
|
12
|
+
No external dependencies required - uses only Python standard library.
|
|
13
|
+
"""
|
|
14
|
+
import sys
|
|
15
|
+
import json
|
|
16
|
+
import ast
|
|
17
|
+
from typing import List, Dict, Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_exported(node: ast.AST) -> bool:
|
|
21
|
+
"""Check if a function or class is exported (not private).
|
|
22
|
+
|
|
23
|
+
In Python, names starting with '_' are conventionally private.
|
|
24
|
+
"""
|
|
25
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
26
|
+
return not node.name.startswith('_')
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
31
|
+
"""Extract function signature from AST node."""
|
|
32
|
+
args_list = []
|
|
33
|
+
|
|
34
|
+
for arg in node.args.args:
|
|
35
|
+
arg_str = arg.arg
|
|
36
|
+
if arg.annotation:
|
|
37
|
+
arg_str += f': {ast.unparse(arg.annotation)}'
|
|
38
|
+
args_list.append(arg_str)
|
|
39
|
+
|
|
40
|
+
return_annotation = ''
|
|
41
|
+
if node.returns:
|
|
42
|
+
return_annotation = f' -> {ast.unparse(node.returns)}'
|
|
43
|
+
|
|
44
|
+
return f"{node.name}({', '.join(args_list)}){return_annotation}"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def extract_imports(tree: ast.AST) -> List[Dict[str, Any]]:
|
|
48
|
+
"""Extract import statements from AST."""
|
|
49
|
+
imports = []
|
|
50
|
+
|
|
51
|
+
for node in ast.walk(tree):
|
|
52
|
+
if isinstance(node, ast.Import):
|
|
53
|
+
for alias in node.names:
|
|
54
|
+
imports.append({
|
|
55
|
+
'source': alias.name,
|
|
56
|
+
'imported': alias.asname if alias.asname else alias.name
|
|
57
|
+
})
|
|
58
|
+
elif isinstance(node, ast.ImportFrom):
|
|
59
|
+
module = node.module if node.module else ''
|
|
60
|
+
for alias in node.names:
|
|
61
|
+
imports.append({
|
|
62
|
+
'source': module,
|
|
63
|
+
'imported': alias.name,
|
|
64
|
+
'alias': alias.asname if alias.asname else None
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
return imports
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def extract_calls(node: ast.AST) -> List[str]:
|
|
71
|
+
"""Extract function calls from a function/method body."""
|
|
72
|
+
calls = []
|
|
73
|
+
|
|
74
|
+
for child in ast.walk(node):
|
|
75
|
+
if isinstance(child, ast.Call):
|
|
76
|
+
if isinstance(child.func, ast.Name):
|
|
77
|
+
calls.append(child.func.id)
|
|
78
|
+
elif isinstance(child.func, ast.Attribute):
|
|
79
|
+
calls.append(child.func.attr)
|
|
80
|
+
|
|
81
|
+
return calls
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def parse_python_ast(code: str, file_path: str) -> Dict[str, Any]:
|
|
85
|
+
"""Parse Python code and return CodeNode array.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
code: Python source code to parse
|
|
89
|
+
file_path: Path to the file (for error messages)
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Dictionary with 'nodes' and 'imports' arrays
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
Exception: If parsing fails due to syntax errors
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
tree = ast.parse(code)
|
|
99
|
+
nodes = []
|
|
100
|
+
|
|
101
|
+
for node in tree.body:
|
|
102
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
103
|
+
nodes.append({
|
|
104
|
+
'type': 'function',
|
|
105
|
+
'name': node.name,
|
|
106
|
+
'exported': is_exported(node),
|
|
107
|
+
'startLine': node.lineno,
|
|
108
|
+
'endLine': node.end_lineno if node.end_lineno else node.lineno,
|
|
109
|
+
'async': isinstance(node, ast.AsyncFunctionDef),
|
|
110
|
+
'signature': get_signature(node),
|
|
111
|
+
'calls': extract_calls(node)
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
elif isinstance(node, ast.ClassDef):
|
|
115
|
+
methods = []
|
|
116
|
+
for item in node.body:
|
|
117
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
118
|
+
methods.append({
|
|
119
|
+
'name': item.name,
|
|
120
|
+
'async': isinstance(item, ast.AsyncFunctionDef),
|
|
121
|
+
'signature': get_signature(item),
|
|
122
|
+
'startLine': item.lineno,
|
|
123
|
+
'endLine': item.end_lineno if item.end_lineno else item.lineno,
|
|
124
|
+
'calls': extract_calls(item)
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
nodes.append({
|
|
128
|
+
'type': 'class',
|
|
129
|
+
'name': node.name,
|
|
130
|
+
'exported': is_exported(node),
|
|
131
|
+
'startLine': node.lineno,
|
|
132
|
+
'endLine': node.end_lineno if node.end_lineno else node.lineno,
|
|
133
|
+
'methods': methods
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
imports = extract_imports(tree)
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
'nodes': nodes,
|
|
140
|
+
'imports': imports
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
except SyntaxError as e:
|
|
144
|
+
raise Exception(f"Python syntax error at line {e.lineno}: {e.msg}")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
raise Exception(f"Failed to parse Python AST: {str(e)}")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def process_parse_python(request: Dict[str, Any]) -> None:
|
|
150
|
+
"""Process a parse_python JSON-RPC request."""
|
|
151
|
+
try:
|
|
152
|
+
params = request.get('params', {})
|
|
153
|
+
code = params.get('code')
|
|
154
|
+
file_path = params.get('filePath', '<unknown>')
|
|
155
|
+
|
|
156
|
+
if code is None:
|
|
157
|
+
raise ValueError('code parameter is required')
|
|
158
|
+
|
|
159
|
+
result = parse_python_ast(code, file_path)
|
|
160
|
+
response = {
|
|
161
|
+
'jsonrpc': '2.0',
|
|
162
|
+
'id': request.get('id'),
|
|
163
|
+
'result': result
|
|
164
|
+
}
|
|
165
|
+
print(json.dumps(response), flush=True)
|
|
166
|
+
except Exception as e:
|
|
167
|
+
error_response = {
|
|
168
|
+
'jsonrpc': '2.0',
|
|
169
|
+
'id': request.get('id'),
|
|
170
|
+
'error': {'code': -1, 'message': str(e)}
|
|
171
|
+
}
|
|
172
|
+
print(json.dumps(error_response), flush=True)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def main():
|
|
176
|
+
"""Main loop processing stdin requests."""
|
|
177
|
+
for line in sys.stdin:
|
|
178
|
+
try:
|
|
179
|
+
request = json.loads(line.strip())
|
|
180
|
+
method = request.get('method')
|
|
181
|
+
|
|
182
|
+
if method == 'parse_python':
|
|
183
|
+
process_parse_python(request)
|
|
184
|
+
else:
|
|
185
|
+
error_response = {
|
|
186
|
+
'jsonrpc': '2.0',
|
|
187
|
+
'id': request.get('id'),
|
|
188
|
+
'error': {'code': -32601, 'message': f'Unknown method: {method}'}
|
|
189
|
+
}
|
|
190
|
+
print(json.dumps(error_response), flush=True)
|
|
191
|
+
|
|
192
|
+
except json.JSONDecodeError as e:
|
|
193
|
+
error_response = {
|
|
194
|
+
'jsonrpc': '2.0',
|
|
195
|
+
'id': None,
|
|
196
|
+
'error': {'code': -32700, 'message': f'Parse error: {str(e)}'}
|
|
197
|
+
}
|
|
198
|
+
print(json.dumps(error_response), flush=True)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
error_response = {
|
|
201
|
+
'jsonrpc': '2.0',
|
|
202
|
+
'id': request.get('id') if 'request' in dir() and isinstance(request, dict) else None,
|
|
203
|
+
'error': {'code': -1, 'message': str(e)}
|
|
204
|
+
}
|
|
205
|
+
print(json.dumps(error_response), flush=True)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == '__main__':
|
|
209
|
+
main()
|