bluera-knowledge 0.12.6 → 0.12.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +36 -0
- package/dist/{chunk-QCSFBMYW.js → chunk-BMY3BWB6.js} +2 -2
- package/dist/{chunk-CC6EGZ4D.js → chunk-PDGOW3CH.js} +6 -6
- package/dist/chunk-PDGOW3CH.js.map +1 -0
- package/dist/{chunk-C4SYGLAI.js → chunk-RISACKN5.js} +128 -66
- package/dist/chunk-RISACKN5.js.map +1 -0
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/package.json +1 -1
- package/src/db/lance.ts +23 -14
- package/src/mcp/handlers/store.handler.ts +4 -4
- package/src/services/job.service.ts +3 -4
- package/src/services/store.service.test.ts +24 -0
- package/src/services/store.service.ts +15 -7
- package/src/types/document.ts +25 -1
- package/src/types/job.ts +46 -31
- package/src/workers/background-worker.test.ts +4 -1
- package/src/workers/spawn-worker.test.ts +24 -0
- package/src/workers/spawn-worker.ts +3 -2
- package/dist/chunk-C4SYGLAI.js.map +0 -1
- package/dist/chunk-CC6EGZ4D.js.map +0 -1
- /package/dist/{chunk-QCSFBMYW.js.map → chunk-BMY3BWB6.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -7,10 +7,10 @@ import {
|
|
|
7
7
|
isWebStoreDefinition,
|
|
8
8
|
runMCPServer,
|
|
9
9
|
spawnBackgroundWorker
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-PDGOW3CH.js";
|
|
11
11
|
import {
|
|
12
12
|
IntelligentCrawler
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-BMY3BWB6.js";
|
|
14
14
|
import {
|
|
15
15
|
ASTParser,
|
|
16
16
|
AdapterRegistry,
|
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
err,
|
|
25
25
|
extractRepoName,
|
|
26
26
|
ok
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-RISACKN5.js";
|
|
28
28
|
import "./chunk-HRQD3MPH.js";
|
|
29
29
|
|
|
30
30
|
// src/index.ts
|
package/dist/mcp/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
IntelligentCrawler
|
|
4
|
-
} from "../chunk-
|
|
4
|
+
} from "../chunk-BMY3BWB6.js";
|
|
5
5
|
import {
|
|
6
6
|
JobService,
|
|
7
7
|
createDocumentId,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
createServices,
|
|
10
10
|
createStoreId,
|
|
11
11
|
shutdownLogger
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-RISACKN5.js";
|
|
13
13
|
import "../chunk-HRQD3MPH.js";
|
|
14
14
|
|
|
15
15
|
// src/workers/background-worker.ts
|
package/package.json
CHANGED
package/src/db/lance.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as lancedb from '@lancedb/lancedb';
|
|
2
2
|
import { createDocumentId } from '../types/brands.js';
|
|
3
|
+
import { DocumentMetadataSchema } from '../types/document.js';
|
|
3
4
|
import type { StoreId, DocumentId } from '../types/brands.js';
|
|
4
5
|
import type { Document, DocumentMetadata } from '../types/document.js';
|
|
5
6
|
import type { Table, Connection } from '@lancedb/lancedb';
|
|
@@ -88,13 +89,17 @@ export class LanceStore {
|
|
|
88
89
|
|
|
89
90
|
// Return all results - threshold filtering is applied after score normalization
|
|
90
91
|
// in search.service.ts to match displayed scores
|
|
91
|
-
return results.map((r) =>
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
92
|
+
return results.map((r) => {
|
|
93
|
+
const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
|
|
94
|
+
return {
|
|
95
|
+
id: createDocumentId(r.id),
|
|
96
|
+
content: r.content,
|
|
97
|
+
score: 1 - r._distance,
|
|
98
|
+
// Schema validates structure, cast to branded type
|
|
99
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
100
|
+
metadata: metadata as DocumentMetadata,
|
|
101
|
+
};
|
|
102
|
+
});
|
|
98
103
|
}
|
|
99
104
|
|
|
100
105
|
async createFtsIndex(storeId: StoreId): Promise<void> {
|
|
@@ -121,13 +126,17 @@ export class LanceStore {
|
|
|
121
126
|
_score: number;
|
|
122
127
|
}>;
|
|
123
128
|
|
|
124
|
-
return results.map((r) =>
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
return results.map((r) => {
|
|
130
|
+
const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
|
|
131
|
+
return {
|
|
132
|
+
id: createDocumentId(r.id),
|
|
133
|
+
content: r.content,
|
|
134
|
+
score: r._score,
|
|
135
|
+
// Schema validates structure, cast to branded type
|
|
136
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
137
|
+
metadata: metadata as DocumentMetadata,
|
|
138
|
+
};
|
|
139
|
+
});
|
|
131
140
|
}
|
|
132
141
|
|
|
133
142
|
async deleteStore(storeId: StoreId): Promise<void> {
|
|
@@ -157,8 +157,8 @@ export const handleCreateStore: ToolHandler<CreateStoreArgs> = async (
|
|
|
157
157
|
message: `Indexing ${result.data.name}...`,
|
|
158
158
|
});
|
|
159
159
|
|
|
160
|
-
// Spawn background worker
|
|
161
|
-
spawnBackgroundWorker(job.id, options.dataDir
|
|
160
|
+
// Spawn background worker
|
|
161
|
+
spawnBackgroundWorker(job.id, options.dataDir);
|
|
162
162
|
|
|
163
163
|
return {
|
|
164
164
|
content: [
|
|
@@ -223,8 +223,8 @@ export const handleIndexStore: ToolHandler<IndexStoreArgs> = async (
|
|
|
223
223
|
message: `Re-indexing ${store.name}...`,
|
|
224
224
|
});
|
|
225
225
|
|
|
226
|
-
// Spawn background worker
|
|
227
|
-
spawnBackgroundWorker(job.id, options.dataDir
|
|
226
|
+
// Spawn background worker
|
|
227
|
+
spawnBackgroundWorker(job.id, options.dataDir);
|
|
228
228
|
|
|
229
229
|
return {
|
|
230
230
|
content: [
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { randomUUID } from 'crypto';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
+
import { JobSchema } from '../types/job.js';
|
|
4
5
|
import { Result, ok, err } from '../types/result.js';
|
|
5
6
|
import type { Job, CreateJobParams, UpdateJobParams, JobStatus } from '../types/job.js';
|
|
6
7
|
|
|
@@ -90,8 +91,7 @@ export class JobService {
|
|
|
90
91
|
|
|
91
92
|
try {
|
|
92
93
|
const content = fs.readFileSync(jobFile, 'utf-8');
|
|
93
|
-
|
|
94
|
-
return JSON.parse(content) as Job;
|
|
94
|
+
return JobSchema.parse(JSON.parse(content));
|
|
95
95
|
} catch (error) {
|
|
96
96
|
throw new Error(
|
|
97
97
|
`Failed to read job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -117,8 +117,7 @@ export class JobService {
|
|
|
117
117
|
|
|
118
118
|
try {
|
|
119
119
|
const content = fs.readFileSync(path.join(this.jobsDir, file), 'utf-8');
|
|
120
|
-
|
|
121
|
-
const job = JSON.parse(content) as Job;
|
|
120
|
+
const job = JobSchema.parse(JSON.parse(content));
|
|
122
121
|
|
|
123
122
|
if (statusFilter !== undefined) {
|
|
124
123
|
const filters = Array.isArray(statusFilter) ? statusFilter : [statusFilter];
|
|
@@ -538,6 +538,30 @@ describe('StoreService', () => {
|
|
|
538
538
|
|
|
539
539
|
await rm(corruptDir, { recursive: true, force: true });
|
|
540
540
|
});
|
|
541
|
+
|
|
542
|
+
it('filters out null entries from stores array on load', async () => {
|
|
543
|
+
const nullDir = await mkdtemp(join(tmpdir(), 'null-entry-'));
|
|
544
|
+
const registryPath = join(nullDir, 'stores.json');
|
|
545
|
+
const validStore = {
|
|
546
|
+
id: 'test-id',
|
|
547
|
+
type: 'file',
|
|
548
|
+
name: 'valid-store',
|
|
549
|
+
path: '/some/path',
|
|
550
|
+
status: 'ready',
|
|
551
|
+
createdAt: new Date().toISOString(),
|
|
552
|
+
updatedAt: new Date().toISOString(),
|
|
553
|
+
};
|
|
554
|
+
await writeFile(registryPath, JSON.stringify({ stores: [null, validStore, null] }));
|
|
555
|
+
|
|
556
|
+
const freshService = new StoreService(nullDir);
|
|
557
|
+
await freshService.initialize();
|
|
558
|
+
|
|
559
|
+
const stores = await freshService.list();
|
|
560
|
+
expect(stores).toHaveLength(1);
|
|
561
|
+
expect(stores[0]?.name).toBe('valid-store');
|
|
562
|
+
|
|
563
|
+
await rm(nullDir, { recursive: true, force: true });
|
|
564
|
+
});
|
|
541
565
|
});
|
|
542
566
|
|
|
543
567
|
describe('store definition auto-update', () => {
|
|
@@ -218,6 +218,12 @@ export class StoreService {
|
|
|
218
218
|
updatedAt: now,
|
|
219
219
|
} satisfies WebStore;
|
|
220
220
|
break;
|
|
221
|
+
|
|
222
|
+
default: {
|
|
223
|
+
// Exhaustive check - if this is reached, input.type is invalid
|
|
224
|
+
const invalidType: never = input.type;
|
|
225
|
+
return err(new Error(`Invalid store type: ${String(invalidType)}`));
|
|
226
|
+
}
|
|
221
227
|
}
|
|
222
228
|
|
|
223
229
|
this.registry.stores.push(store);
|
|
@@ -332,14 +338,16 @@ export class StoreService {
|
|
|
332
338
|
const content = await readFile(registryPath, 'utf-8');
|
|
333
339
|
try {
|
|
334
340
|
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
335
|
-
const data = JSON.parse(content) as { stores: Store[] };
|
|
341
|
+
const data = JSON.parse(content) as { stores: (Store | null)[] };
|
|
336
342
|
this.registry = {
|
|
337
|
-
stores: data.stores
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
+
stores: data.stores
|
|
344
|
+
.filter((s): s is Store => s !== null)
|
|
345
|
+
.map((s) => ({
|
|
346
|
+
...s,
|
|
347
|
+
id: createStoreId(s.id),
|
|
348
|
+
createdAt: new Date(s.createdAt),
|
|
349
|
+
updatedAt: new Date(s.updatedAt),
|
|
350
|
+
})),
|
|
343
351
|
};
|
|
344
352
|
} catch (error) {
|
|
345
353
|
throw new Error(
|
package/src/types/document.ts
CHANGED
|
@@ -1,6 +1,30 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
1
2
|
import type { DocumentId, StoreId } from './brands.js';
|
|
2
3
|
|
|
3
|
-
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Zod Schemas
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
export const DocumentTypeSchema = z.enum(['file', 'chunk', 'web']);
|
|
9
|
+
|
|
10
|
+
export const DocumentMetadataSchema = z
|
|
11
|
+
.object({
|
|
12
|
+
path: z.string().optional(),
|
|
13
|
+
url: z.string().optional(),
|
|
14
|
+
type: DocumentTypeSchema,
|
|
15
|
+
storeId: z.string(),
|
|
16
|
+
indexedAt: z.union([z.string(), z.date()]),
|
|
17
|
+
fileHash: z.string().optional(),
|
|
18
|
+
chunkIndex: z.number().optional(),
|
|
19
|
+
totalChunks: z.number().optional(),
|
|
20
|
+
})
|
|
21
|
+
.loose(); // Allow additional fields per index signature
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Types
|
|
25
|
+
// ============================================================================
|
|
26
|
+
|
|
27
|
+
export type DocumentType = z.infer<typeof DocumentTypeSchema>;
|
|
4
28
|
|
|
5
29
|
export interface DocumentMetadata {
|
|
6
30
|
readonly path?: string | undefined;
|
package/src/types/job.ts
CHANGED
|
@@ -1,36 +1,51 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
// ============================================================================
|
|
4
|
+
// Zod Schemas
|
|
5
|
+
// ============================================================================
|
|
6
|
+
|
|
7
|
+
export const JobTypeSchema = z.enum(['clone', 'index', 'crawl']);
|
|
8
|
+
export const JobStatusSchema = z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']);
|
|
9
|
+
|
|
10
|
+
export const JobDetailsSchema = z.object({
|
|
11
|
+
storeName: z.string().optional(),
|
|
12
|
+
storeId: z.string().optional(),
|
|
13
|
+
url: z.string().optional(),
|
|
14
|
+
path: z.string().optional(),
|
|
15
|
+
filesProcessed: z.number().optional(),
|
|
16
|
+
totalFiles: z.number().optional(),
|
|
17
|
+
startedAt: z.string().optional(),
|
|
18
|
+
completedAt: z.string().optional(),
|
|
19
|
+
cancelledAt: z.string().optional(),
|
|
20
|
+
error: z.string().optional(),
|
|
15
21
|
// Crawl-specific fields
|
|
16
|
-
crawlInstruction
|
|
17
|
-
extractInstruction
|
|
18
|
-
maxPages
|
|
19
|
-
simple
|
|
20
|
-
useHeadless
|
|
21
|
-
pagesCrawled
|
|
22
|
-
}
|
|
22
|
+
crawlInstruction: z.string().optional(),
|
|
23
|
+
extractInstruction: z.string().optional(),
|
|
24
|
+
maxPages: z.number().optional(),
|
|
25
|
+
simple: z.boolean().optional(),
|
|
26
|
+
useHeadless: z.boolean().optional(),
|
|
27
|
+
pagesCrawled: z.number().optional(),
|
|
28
|
+
});
|
|
23
29
|
|
|
24
|
-
export
|
|
25
|
-
id: string
|
|
26
|
-
type:
|
|
27
|
-
status:
|
|
28
|
-
progress: number
|
|
29
|
-
message: string
|
|
30
|
-
details:
|
|
31
|
-
createdAt: string
|
|
32
|
-
updatedAt: string
|
|
33
|
-
}
|
|
30
|
+
export const JobSchema = z.object({
|
|
31
|
+
id: z.string(),
|
|
32
|
+
type: JobTypeSchema,
|
|
33
|
+
status: JobStatusSchema,
|
|
34
|
+
progress: z.number().min(0).max(100),
|
|
35
|
+
message: z.string(),
|
|
36
|
+
details: JobDetailsSchema.default({}),
|
|
37
|
+
createdAt: z.string(),
|
|
38
|
+
updatedAt: z.string(),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Types (inferred from schemas)
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
export type JobType = z.infer<typeof JobTypeSchema>;
|
|
46
|
+
export type JobStatus = z.infer<typeof JobStatusSchema>;
|
|
47
|
+
export type JobDetails = z.infer<typeof JobDetailsSchema>;
|
|
48
|
+
export type Job = z.infer<typeof JobSchema>;
|
|
34
49
|
|
|
35
50
|
export interface CreateJobParams {
|
|
36
51
|
type: JobType;
|
|
@@ -59,7 +59,10 @@ describe('BackgroundWorker', () => {
|
|
|
59
59
|
details: { storeId: 'test' },
|
|
60
60
|
});
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
// Zod validation catches invalid job type when reading the job file
|
|
63
|
+
await expect(worker.executeJob(job.id)).rejects.toThrow(
|
|
64
|
+
/Invalid option.*clone.*index.*crawl/
|
|
65
|
+
);
|
|
63
66
|
});
|
|
64
67
|
|
|
65
68
|
it('should set job to running status before execution', async () => {
|
|
@@ -92,6 +92,30 @@ describe('spawnBackgroundWorker', () => {
|
|
|
92
92
|
|
|
93
93
|
expect(options.env.BLUERA_DATA_DIR).toBe(testDataDir);
|
|
94
94
|
});
|
|
95
|
+
|
|
96
|
+
it('should NOT set BLUERA_DATA_DIR when dataDir is undefined', () => {
|
|
97
|
+
spawnBackgroundWorker('test-job');
|
|
98
|
+
|
|
99
|
+
const [, , options] = mockSpawn.mock.calls[0] as [
|
|
100
|
+
string,
|
|
101
|
+
string[],
|
|
102
|
+
{ env: Record<string, string> },
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
expect(options.env).not.toHaveProperty('BLUERA_DATA_DIR');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should NOT set BLUERA_DATA_DIR when dataDir is empty string', () => {
|
|
109
|
+
spawnBackgroundWorker('test-job', '');
|
|
110
|
+
|
|
111
|
+
const [, , options] = mockSpawn.mock.calls[0] as [
|
|
112
|
+
string,
|
|
113
|
+
string[],
|
|
114
|
+
{ env: Record<string, string> },
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
expect(options.env).not.toHaveProperty('BLUERA_DATA_DIR');
|
|
118
|
+
});
|
|
95
119
|
});
|
|
96
120
|
|
|
97
121
|
// Test production mode with separate import to get fresh module
|
|
@@ -9,8 +9,9 @@ import { fileURLToPath } from 'url';
|
|
|
9
9
|
* parent to exit while the worker continues running.
|
|
10
10
|
*
|
|
11
11
|
* @param jobId - The ID of the job to execute
|
|
12
|
+
* @param dataDir - Optional data directory (uses default if undefined)
|
|
12
13
|
*/
|
|
13
|
-
export function spawnBackgroundWorker(jobId: string, dataDir
|
|
14
|
+
export function spawnBackgroundWorker(jobId: string, dataDir?: string): void {
|
|
14
15
|
// Determine the worker script path
|
|
15
16
|
// In production, this will be the compiled dist file
|
|
16
17
|
// In development, we need to use tsx to run TypeScript
|
|
@@ -40,7 +41,7 @@ export function spawnBackgroundWorker(jobId: string, dataDir: string): void {
|
|
|
40
41
|
stdio: 'ignore', // Don't pipe stdio (fully independent)
|
|
41
42
|
env: {
|
|
42
43
|
...process.env, // Inherit environment variables
|
|
43
|
-
BLUERA_DATA_DIR: dataDir, //
|
|
44
|
+
...(dataDir !== undefined && dataDir !== '' ? { BLUERA_DATA_DIR: dataDir } : {}), // Only set if provided
|
|
44
45
|
},
|
|
45
46
|
});
|
|
46
47
|
|