bluera-knowledge 0.12.6 → 0.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,10 +7,10 @@ import {
7
7
  isWebStoreDefinition,
8
8
  runMCPServer,
9
9
  spawnBackgroundWorker
10
- } from "./chunk-CC6EGZ4D.js";
10
+ } from "./chunk-PDGOW3CH.js";
11
11
  import {
12
12
  IntelligentCrawler
13
- } from "./chunk-QCSFBMYW.js";
13
+ } from "./chunk-BMY3BWB6.js";
14
14
  import {
15
15
  ASTParser,
16
16
  AdapterRegistry,
@@ -24,7 +24,7 @@ import {
24
24
  err,
25
25
  extractRepoName,
26
26
  ok
27
- } from "./chunk-C4SYGLAI.js";
27
+ } from "./chunk-RISACKN5.js";
28
28
  import "./chunk-HRQD3MPH.js";
29
29
 
30
30
  // src/index.ts
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createMCPServer,
3
3
  runMCPServer
4
- } from "../chunk-CC6EGZ4D.js";
5
- import "../chunk-C4SYGLAI.js";
4
+ } from "../chunk-PDGOW3CH.js";
5
+ import "../chunk-RISACKN5.js";
6
6
  import "../chunk-HRQD3MPH.js";
7
7
  export {
8
8
  createMCPServer,
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IntelligentCrawler
4
- } from "../chunk-QCSFBMYW.js";
4
+ } from "../chunk-BMY3BWB6.js";
5
5
  import {
6
6
  JobService,
7
7
  createDocumentId,
@@ -9,7 +9,7 @@ import {
9
9
  createServices,
10
10
  createStoreId,
11
11
  shutdownLogger
12
- } from "../chunk-C4SYGLAI.js";
12
+ } from "../chunk-RISACKN5.js";
13
13
  import "../chunk-HRQD3MPH.js";
14
14
 
15
15
  // src/workers/background-worker.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.12.6",
3
+ "version": "0.12.8",
4
4
  "description": "CLI tool for managing knowledge stores with semantic search",
5
5
  "type": "module",
6
6
  "bin": {
package/src/db/lance.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as lancedb from '@lancedb/lancedb';
2
2
  import { createDocumentId } from '../types/brands.js';
3
+ import { DocumentMetadataSchema } from '../types/document.js';
3
4
  import type { StoreId, DocumentId } from '../types/brands.js';
4
5
  import type { Document, DocumentMetadata } from '../types/document.js';
5
6
  import type { Table, Connection } from '@lancedb/lancedb';
@@ -88,13 +89,17 @@ export class LanceStore {
88
89
 
89
90
  // Return all results - threshold filtering is applied after score normalization
90
91
  // in search.service.ts to match displayed scores
91
- return results.map((r) => ({
92
- id: createDocumentId(r.id),
93
- content: r.content,
94
- score: 1 - r._distance,
95
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
96
- metadata: JSON.parse(r.metadata) as DocumentMetadata,
97
- }));
92
+ return results.map((r) => {
93
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
94
+ return {
95
+ id: createDocumentId(r.id),
96
+ content: r.content,
97
+ score: 1 - r._distance,
98
+ // Schema validates structure, cast to branded type
99
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
100
+ metadata: metadata as DocumentMetadata,
101
+ };
102
+ });
98
103
  }
99
104
 
100
105
  async createFtsIndex(storeId: StoreId): Promise<void> {
@@ -121,13 +126,17 @@ export class LanceStore {
121
126
  _score: number;
122
127
  }>;
123
128
 
124
- return results.map((r) => ({
125
- id: createDocumentId(r.id),
126
- content: r.content,
127
- score: r._score,
128
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
129
- metadata: JSON.parse(r.metadata) as DocumentMetadata,
130
- }));
129
+ return results.map((r) => {
130
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
131
+ return {
132
+ id: createDocumentId(r.id),
133
+ content: r.content,
134
+ score: r._score,
135
+ // Schema validates structure, cast to branded type
136
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
137
+ metadata: metadata as DocumentMetadata,
138
+ };
139
+ });
131
140
  }
132
141
 
133
142
  async deleteStore(storeId: StoreId): Promise<void> {
@@ -157,8 +157,8 @@ export const handleCreateStore: ToolHandler<CreateStoreArgs> = async (
157
157
  message: `Indexing ${result.data.name}...`,
158
158
  });
159
159
 
160
- // Spawn background worker (dataDir defaults to project-local .bluera if undefined)
161
- spawnBackgroundWorker(job.id, options.dataDir ?? '');
160
+ // Spawn background worker
161
+ spawnBackgroundWorker(job.id, options.dataDir);
162
162
 
163
163
  return {
164
164
  content: [
@@ -223,8 +223,8 @@ export const handleIndexStore: ToolHandler<IndexStoreArgs> = async (
223
223
  message: `Re-indexing ${store.name}...`,
224
224
  });
225
225
 
226
- // Spawn background worker (dataDir defaults to project-local .bluera if undefined)
227
- spawnBackgroundWorker(job.id, options.dataDir ?? '');
226
+ // Spawn background worker
227
+ spawnBackgroundWorker(job.id, options.dataDir);
228
228
 
229
229
  return {
230
230
  content: [
@@ -1,6 +1,7 @@
1
1
  import { randomUUID } from 'crypto';
2
2
  import fs from 'fs';
3
3
  import path from 'path';
4
+ import { JobSchema } from '../types/job.js';
4
5
  import { Result, ok, err } from '../types/result.js';
5
6
  import type { Job, CreateJobParams, UpdateJobParams, JobStatus } from '../types/job.js';
6
7
 
@@ -90,8 +91,7 @@ export class JobService {
90
91
 
91
92
  try {
92
93
  const content = fs.readFileSync(jobFile, 'utf-8');
93
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
94
- return JSON.parse(content) as Job;
94
+ return JobSchema.parse(JSON.parse(content));
95
95
  } catch (error) {
96
96
  throw new Error(
97
97
  `Failed to read job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
@@ -117,8 +117,7 @@ export class JobService {
117
117
 
118
118
  try {
119
119
  const content = fs.readFileSync(path.join(this.jobsDir, file), 'utf-8');
120
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
121
- const job = JSON.parse(content) as Job;
120
+ const job = JobSchema.parse(JSON.parse(content));
122
121
 
123
122
  if (statusFilter !== undefined) {
124
123
  const filters = Array.isArray(statusFilter) ? statusFilter : [statusFilter];
@@ -538,6 +538,30 @@ describe('StoreService', () => {
538
538
 
539
539
  await rm(corruptDir, { recursive: true, force: true });
540
540
  });
541
+
542
+ it('filters out null entries from stores array on load', async () => {
543
+ const nullDir = await mkdtemp(join(tmpdir(), 'null-entry-'));
544
+ const registryPath = join(nullDir, 'stores.json');
545
+ const validStore = {
546
+ id: 'test-id',
547
+ type: 'file',
548
+ name: 'valid-store',
549
+ path: '/some/path',
550
+ status: 'ready',
551
+ createdAt: new Date().toISOString(),
552
+ updatedAt: new Date().toISOString(),
553
+ };
554
+ await writeFile(registryPath, JSON.stringify({ stores: [null, validStore, null] }));
555
+
556
+ const freshService = new StoreService(nullDir);
557
+ await freshService.initialize();
558
+
559
+ const stores = await freshService.list();
560
+ expect(stores).toHaveLength(1);
561
+ expect(stores[0]?.name).toBe('valid-store');
562
+
563
+ await rm(nullDir, { recursive: true, force: true });
564
+ });
541
565
  });
542
566
 
543
567
  describe('store definition auto-update', () => {
@@ -218,6 +218,12 @@ export class StoreService {
218
218
  updatedAt: now,
219
219
  } satisfies WebStore;
220
220
  break;
221
+
222
+ default: {
223
+ // Exhaustive check - if this is reached, input.type is invalid
224
+ const invalidType: never = input.type;
225
+ return err(new Error(`Invalid store type: ${String(invalidType)}`));
226
+ }
221
227
  }
222
228
 
223
229
  this.registry.stores.push(store);
@@ -332,14 +338,16 @@ export class StoreService {
332
338
  const content = await readFile(registryPath, 'utf-8');
333
339
  try {
334
340
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
335
- const data = JSON.parse(content) as { stores: Store[] };
341
+ const data = JSON.parse(content) as { stores: (Store | null)[] };
336
342
  this.registry = {
337
- stores: data.stores.map((s) => ({
338
- ...s,
339
- id: createStoreId(s.id),
340
- createdAt: new Date(s.createdAt),
341
- updatedAt: new Date(s.updatedAt),
342
- })),
343
+ stores: data.stores
344
+ .filter((s): s is Store => s !== null)
345
+ .map((s) => ({
346
+ ...s,
347
+ id: createStoreId(s.id),
348
+ createdAt: new Date(s.createdAt),
349
+ updatedAt: new Date(s.updatedAt),
350
+ })),
343
351
  };
344
352
  } catch (error) {
345
353
  throw new Error(
@@ -1,6 +1,30 @@
1
+ import { z } from 'zod';
1
2
  import type { DocumentId, StoreId } from './brands.js';
2
3
 
3
- export type DocumentType = 'file' | 'chunk' | 'web';
4
+ // ============================================================================
5
+ // Zod Schemas
6
+ // ============================================================================
7
+
8
+ export const DocumentTypeSchema = z.enum(['file', 'chunk', 'web']);
9
+
10
+ export const DocumentMetadataSchema = z
11
+ .object({
12
+ path: z.string().optional(),
13
+ url: z.string().optional(),
14
+ type: DocumentTypeSchema,
15
+ storeId: z.string(),
16
+ indexedAt: z.union([z.string(), z.date()]),
17
+ fileHash: z.string().optional(),
18
+ chunkIndex: z.number().optional(),
19
+ totalChunks: z.number().optional(),
20
+ })
21
+ .loose(); // Allow additional fields per index signature
22
+
23
+ // ============================================================================
24
+ // Types
25
+ // ============================================================================
26
+
27
+ export type DocumentType = z.infer<typeof DocumentTypeSchema>;
4
28
 
5
29
  export interface DocumentMetadata {
6
30
  readonly path?: string | undefined;
package/src/types/job.ts CHANGED
@@ -1,36 +1,51 @@
1
- export type JobType = 'clone' | 'index' | 'crawl';
2
- export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
3
-
4
- export interface JobDetails {
5
- storeName?: string;
6
- storeId?: string;
7
- url?: string;
8
- path?: string;
9
- filesProcessed?: number;
10
- totalFiles?: number;
11
- startedAt?: string;
12
- completedAt?: string;
13
- cancelledAt?: string;
14
- error?: string;
1
+ import { z } from 'zod';
2
+
3
+ // ============================================================================
4
+ // Zod Schemas
5
+ // ============================================================================
6
+
7
+ export const JobTypeSchema = z.enum(['clone', 'index', 'crawl']);
8
+ export const JobStatusSchema = z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']);
9
+
10
+ export const JobDetailsSchema = z.object({
11
+ storeName: z.string().optional(),
12
+ storeId: z.string().optional(),
13
+ url: z.string().optional(),
14
+ path: z.string().optional(),
15
+ filesProcessed: z.number().optional(),
16
+ totalFiles: z.number().optional(),
17
+ startedAt: z.string().optional(),
18
+ completedAt: z.string().optional(),
19
+ cancelledAt: z.string().optional(),
20
+ error: z.string().optional(),
15
21
  // Crawl-specific fields
16
- crawlInstruction?: string;
17
- extractInstruction?: string;
18
- maxPages?: number;
19
- simple?: boolean;
20
- useHeadless?: boolean;
21
- pagesCrawled?: number;
22
- }
22
+ crawlInstruction: z.string().optional(),
23
+ extractInstruction: z.string().optional(),
24
+ maxPages: z.number().optional(),
25
+ simple: z.boolean().optional(),
26
+ useHeadless: z.boolean().optional(),
27
+ pagesCrawled: z.number().optional(),
28
+ });
23
29
 
24
- export interface Job {
25
- id: string;
26
- type: JobType;
27
- status: JobStatus;
28
- progress: number; // 0-100
29
- message: string;
30
- details: JobDetails;
31
- createdAt: string;
32
- updatedAt: string;
33
- }
30
+ export const JobSchema = z.object({
31
+ id: z.string(),
32
+ type: JobTypeSchema,
33
+ status: JobStatusSchema,
34
+ progress: z.number().min(0).max(100),
35
+ message: z.string(),
36
+ details: JobDetailsSchema.default({}),
37
+ createdAt: z.string(),
38
+ updatedAt: z.string(),
39
+ });
40
+
41
+ // ============================================================================
42
+ // Types (inferred from schemas)
43
+ // ============================================================================
44
+
45
+ export type JobType = z.infer<typeof JobTypeSchema>;
46
+ export type JobStatus = z.infer<typeof JobStatusSchema>;
47
+ export type JobDetails = z.infer<typeof JobDetailsSchema>;
48
+ export type Job = z.infer<typeof JobSchema>;
34
49
 
35
50
  export interface CreateJobParams {
36
51
  type: JobType;
@@ -59,7 +59,10 @@ describe('BackgroundWorker', () => {
59
59
  details: { storeId: 'test' },
60
60
  });
61
61
 
62
- await expect(worker.executeJob(job.id)).rejects.toThrow('Unknown job type: unknown');
62
+ // Zod validation catches invalid job type when reading the job file
63
+ await expect(worker.executeJob(job.id)).rejects.toThrow(
64
+ /Invalid option.*clone.*index.*crawl/
65
+ );
63
66
  });
64
67
 
65
68
  it('should set job to running status before execution', async () => {
@@ -92,6 +92,30 @@ describe('spawnBackgroundWorker', () => {
92
92
 
93
93
  expect(options.env.BLUERA_DATA_DIR).toBe(testDataDir);
94
94
  });
95
+
96
+ it('should NOT set BLUERA_DATA_DIR when dataDir is undefined', () => {
97
+ spawnBackgroundWorker('test-job');
98
+
99
+ const [, , options] = mockSpawn.mock.calls[0] as [
100
+ string,
101
+ string[],
102
+ { env: Record<string, string> },
103
+ ];
104
+
105
+ expect(options.env).not.toHaveProperty('BLUERA_DATA_DIR');
106
+ });
107
+
108
+ it('should NOT set BLUERA_DATA_DIR when dataDir is empty string', () => {
109
+ spawnBackgroundWorker('test-job', '');
110
+
111
+ const [, , options] = mockSpawn.mock.calls[0] as [
112
+ string,
113
+ string[],
114
+ { env: Record<string, string> },
115
+ ];
116
+
117
+ expect(options.env).not.toHaveProperty('BLUERA_DATA_DIR');
118
+ });
95
119
  });
96
120
 
97
121
  // Test production mode with separate import to get fresh module
@@ -9,8 +9,9 @@ import { fileURLToPath } from 'url';
9
9
  * parent to exit while the worker continues running.
10
10
  *
11
11
  * @param jobId - The ID of the job to execute
12
+ * @param dataDir - Optional data directory (uses default if undefined)
12
13
  */
13
- export function spawnBackgroundWorker(jobId: string, dataDir: string): void {
14
+ export function spawnBackgroundWorker(jobId: string, dataDir?: string): void {
14
15
  // Determine the worker script path
15
16
  // In production, this will be the compiled dist file
16
17
  // In development, we need to use tsx to run TypeScript
@@ -40,7 +41,7 @@ export function spawnBackgroundWorker(jobId: string, dataDir: string): void {
40
41
  stdio: 'ignore', // Don't pipe stdio (fully independent)
41
42
  env: {
42
43
  ...process.env, // Inherit environment variables
43
- BLUERA_DATA_DIR: dataDir, // Pass dataDir to worker
44
+ ...(dataDir !== undefined && dataDir !== '' ? { BLUERA_DATA_DIR: dataDir } : {}), // Only set if provided
44
45
  },
45
46
  });
46
47