bluera-knowledge 0.12.6 → 0.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -250,6 +250,40 @@ import { randomUUID } from "crypto";
250
250
  import fs from "fs";
251
251
  import path from "path";
252
252
 
253
+ // src/types/job.ts
254
+ import { z } from "zod";
255
+ var JobTypeSchema = z.enum(["clone", "index", "crawl"]);
256
+ var JobStatusSchema = z.enum(["pending", "running", "completed", "failed", "cancelled"]);
257
+ var JobDetailsSchema = z.object({
258
+ storeName: z.string().optional(),
259
+ storeId: z.string().optional(),
260
+ url: z.string().optional(),
261
+ path: z.string().optional(),
262
+ filesProcessed: z.number().optional(),
263
+ totalFiles: z.number().optional(),
264
+ startedAt: z.string().optional(),
265
+ completedAt: z.string().optional(),
266
+ cancelledAt: z.string().optional(),
267
+ error: z.string().optional(),
268
+ // Crawl-specific fields
269
+ crawlInstruction: z.string().optional(),
270
+ extractInstruction: z.string().optional(),
271
+ maxPages: z.number().optional(),
272
+ simple: z.boolean().optional(),
273
+ useHeadless: z.boolean().optional(),
274
+ pagesCrawled: z.number().optional()
275
+ });
276
+ var JobSchema = z.object({
277
+ id: z.string(),
278
+ type: JobTypeSchema,
279
+ status: JobStatusSchema,
280
+ progress: z.number().min(0).max(100),
281
+ message: z.string(),
282
+ details: JobDetailsSchema.default({}),
283
+ createdAt: z.string(),
284
+ updatedAt: z.string()
285
+ });
286
+
253
287
  // src/types/result.ts
254
288
  function ok(data) {
255
289
  return { success: true, data };
@@ -327,7 +361,7 @@ var JobService = class {
327
361
  }
328
362
  try {
329
363
  const content = fs.readFileSync(jobFile, "utf-8");
330
- return JSON.parse(content);
364
+ return JobSchema.parse(JSON.parse(content));
331
365
  } catch (error) {
332
366
  throw new Error(
333
367
  `Failed to read job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
@@ -349,7 +383,7 @@ var JobService = class {
349
383
  }
350
384
  try {
351
385
  const content = fs.readFileSync(path.join(this.jobsDir, file), "utf-8");
352
- const job = JSON.parse(content);
386
+ const job = JobSchema.parse(JSON.parse(content));
353
387
  if (statusFilter !== void 0) {
354
388
  const filters = Array.isArray(statusFilter) ? statusFilter : [statusFilter];
355
389
  if (filters.includes(job.status)) {
@@ -3881,6 +3915,10 @@ var StoreService = class {
3881
3915
  updatedAt: now
3882
3916
  };
3883
3917
  break;
3918
+ default: {
3919
+ const invalidType = input.type;
3920
+ return err(new Error(`Invalid store type: ${String(invalidType)}`));
3921
+ }
3884
3922
  }
3885
3923
  this.registry.stores.push(store);
3886
3924
  await this.saveRegistry();
@@ -3964,7 +4002,7 @@ var StoreService = class {
3964
4002
  try {
3965
4003
  const data = JSON.parse(content);
3966
4004
  this.registry = {
3967
- stores: data.stores.map((s) => ({
4005
+ stores: data.stores.filter((s) => s !== null).map((s) => ({
3968
4006
  ...s,
3969
4007
  id: createStoreId(s.id),
3970
4008
  createdAt: new Date(s.createdAt),
@@ -3990,33 +4028,33 @@ import { createInterface } from "readline";
3990
4028
  import { ZodError } from "zod";
3991
4029
 
3992
4030
  // src/crawl/schemas.ts
3993
- import { z } from "zod";
3994
- var CrawledLinkSchema = z.object({
3995
- href: z.string(),
3996
- text: z.string(),
3997
- title: z.string().optional(),
3998
- base_domain: z.string().optional(),
3999
- head_data: z.unknown().optional(),
4000
- head_extraction_status: z.unknown().optional(),
4001
- head_extraction_error: z.unknown().optional(),
4002
- intrinsic_score: z.number().optional(),
4003
- contextual_score: z.unknown().optional(),
4004
- total_score: z.unknown().optional()
4031
+ import { z as z2 } from "zod";
4032
+ var CrawledLinkSchema = z2.object({
4033
+ href: z2.string(),
4034
+ text: z2.string(),
4035
+ title: z2.string().optional(),
4036
+ base_domain: z2.string().optional(),
4037
+ head_data: z2.unknown().optional(),
4038
+ head_extraction_status: z2.unknown().optional(),
4039
+ head_extraction_error: z2.unknown().optional(),
4040
+ intrinsic_score: z2.number().optional(),
4041
+ contextual_score: z2.unknown().optional(),
4042
+ total_score: z2.unknown().optional()
4005
4043
  });
4006
- var CrawlPageSchema = z.object({
4007
- url: z.string(),
4008
- title: z.string(),
4009
- content: z.string(),
4010
- links: z.array(z.string()),
4011
- crawledAt: z.string()
4044
+ var CrawlPageSchema = z2.object({
4045
+ url: z2.string(),
4046
+ title: z2.string(),
4047
+ content: z2.string(),
4048
+ links: z2.array(z2.string()),
4049
+ crawledAt: z2.string()
4012
4050
  });
4013
- var CrawlResultSchema = z.object({
4014
- pages: z.array(CrawlPageSchema)
4051
+ var CrawlResultSchema = z2.object({
4052
+ pages: z2.array(CrawlPageSchema)
4015
4053
  });
4016
- var HeadlessResultSchema = z.object({
4017
- html: z.string(),
4018
- markdown: z.string(),
4019
- links: z.array(z.union([CrawledLinkSchema, z.string()]))
4054
+ var HeadlessResultSchema = z2.object({
4055
+ html: z2.string(),
4056
+ markdown: z2.string(),
4057
+ links: z2.array(z2.union([CrawledLinkSchema, z2.string()]))
4020
4058
  });
4021
4059
  function validateHeadlessResult(data) {
4022
4060
  return HeadlessResultSchema.parse(data);
@@ -4024,33 +4062,33 @@ function validateHeadlessResult(data) {
4024
4062
  function validateCrawlResult(data) {
4025
4063
  return CrawlResultSchema.parse(data);
4026
4064
  }
4027
- var MethodInfoSchema = z.object({
4028
- name: z.string(),
4029
- async: z.boolean(),
4030
- signature: z.string(),
4031
- startLine: z.number(),
4032
- endLine: z.number(),
4033
- calls: z.array(z.string())
4065
+ var MethodInfoSchema = z2.object({
4066
+ name: z2.string(),
4067
+ async: z2.boolean(),
4068
+ signature: z2.string(),
4069
+ startLine: z2.number(),
4070
+ endLine: z2.number(),
4071
+ calls: z2.array(z2.string())
4034
4072
  });
4035
- var CodeNodeSchema = z.object({
4036
- type: z.enum(["function", "class"]),
4037
- name: z.string(),
4038
- exported: z.boolean(),
4039
- startLine: z.number(),
4040
- endLine: z.number(),
4041
- async: z.boolean().optional(),
4042
- signature: z.string().optional(),
4043
- calls: z.array(z.string()).optional(),
4044
- methods: z.array(MethodInfoSchema).optional()
4073
+ var CodeNodeSchema = z2.object({
4074
+ type: z2.enum(["function", "class"]),
4075
+ name: z2.string(),
4076
+ exported: z2.boolean(),
4077
+ startLine: z2.number(),
4078
+ endLine: z2.number(),
4079
+ async: z2.boolean().optional(),
4080
+ signature: z2.string().optional(),
4081
+ calls: z2.array(z2.string()).optional(),
4082
+ methods: z2.array(MethodInfoSchema).optional()
4045
4083
  });
4046
- var ImportInfoSchema = z.object({
4047
- source: z.string(),
4048
- imported: z.string(),
4049
- alias: z.string().optional().nullable()
4084
+ var ImportInfoSchema = z2.object({
4085
+ source: z2.string(),
4086
+ imported: z2.string(),
4087
+ alias: z2.string().optional().nullable()
4050
4088
  });
4051
- var ParsePythonResultSchema = z.object({
4052
- nodes: z.array(CodeNodeSchema),
4053
- imports: z.array(ImportInfoSchema)
4089
+ var ParsePythonResultSchema = z2.object({
4090
+ nodes: z2.array(CodeNodeSchema),
4091
+ imports: z2.array(ImportInfoSchema)
4054
4092
  });
4055
4093
  function validateParsePythonResult(data) {
4056
4094
  return ParsePythonResultSchema.parse(data);
@@ -4349,6 +4387,22 @@ var EmbeddingEngine = class {
4349
4387
 
4350
4388
  // src/db/lance.ts
4351
4389
  import * as lancedb from "@lancedb/lancedb";
4390
+
4391
+ // src/types/document.ts
4392
+ import { z as z3 } from "zod";
4393
+ var DocumentTypeSchema = z3.enum(["file", "chunk", "web"]);
4394
+ var DocumentMetadataSchema = z3.object({
4395
+ path: z3.string().optional(),
4396
+ url: z3.string().optional(),
4397
+ type: DocumentTypeSchema,
4398
+ storeId: z3.string(),
4399
+ indexedAt: z3.union([z3.string(), z3.date()]),
4400
+ fileHash: z3.string().optional(),
4401
+ chunkIndex: z3.number().optional(),
4402
+ totalChunks: z3.number().optional()
4403
+ }).loose();
4404
+
4405
+ // src/db/lance.ts
4352
4406
  var LanceStore = class {
4353
4407
  connection = null;
4354
4408
  tables = /* @__PURE__ */ new Map();
@@ -4395,13 +4449,17 @@ var LanceStore = class {
4395
4449
  const table = await this.getTable(storeId);
4396
4450
  const query = table.vectorSearch(vector).limit(limit).distanceType("cosine");
4397
4451
  const results = await query.toArray();
4398
- return results.map((r) => ({
4399
- id: createDocumentId(r.id),
4400
- content: r.content,
4401
- score: 1 - r._distance,
4402
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
4403
- metadata: JSON.parse(r.metadata)
4404
- }));
4452
+ return results.map((r) => {
4453
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
4454
+ return {
4455
+ id: createDocumentId(r.id),
4456
+ content: r.content,
4457
+ score: 1 - r._distance,
4458
+ // Schema validates structure, cast to branded type
4459
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
4460
+ metadata
4461
+ };
4462
+ });
4405
4463
  }
4406
4464
  async createFtsIndex(storeId) {
4407
4465
  const table = await this.getTable(storeId);
@@ -4412,13 +4470,17 @@ var LanceStore = class {
4412
4470
  async fullTextSearch(storeId, query, limit) {
4413
4471
  const table = await this.getTable(storeId);
4414
4472
  const results = await table.search(query, "fts").limit(limit).toArray();
4415
- return results.map((r) => ({
4416
- id: createDocumentId(r.id),
4417
- content: r.content,
4418
- score: r._score,
4419
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
4420
- metadata: JSON.parse(r.metadata)
4421
- }));
4473
+ return results.map((r) => {
4474
+ const metadata = DocumentMetadataSchema.parse(JSON.parse(r.metadata));
4475
+ return {
4476
+ id: createDocumentId(r.id),
4477
+ content: r.content,
4478
+ score: r._score,
4479
+ // Schema validates structure, cast to branded type
4480
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
4481
+ metadata
4482
+ };
4483
+ });
4422
4484
  }
4423
4485
  async deleteStore(storeId) {
4424
4486
  const tableName = this.getTableName(storeId);
@@ -4538,4 +4600,4 @@ export {
4538
4600
  createServices,
4539
4601
  destroyServices
4540
4602
  };
4541
- //# sourceMappingURL=chunk-C4SYGLAI.js.map
4603
+ //# sourceMappingURL=chunk-RISACKN5.js.map