@semiont/make-meaning 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10218,7 +10218,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
10218
10218
  archived: false,
10219
10219
  dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
10220
10220
  wasAttributedTo: didToAgent(event.userId),
10221
- creationMethod: event.payload.creationMethod
10221
+ creationMethod: event.payload.creationMethod,
10222
+ ...event.payload.storageUri ? { storageUri: event.payload.storageUri } : {}
10222
10223
  };
10223
10224
  }
10224
10225
  /**
@@ -10417,23 +10418,25 @@ var GraphDBConsumer = class _GraphDBConsumer {
10417
10418
  // src/smelter.ts
10418
10419
  var import_rxjs2 = __toESM(require_cjs(), 1);
10419
10420
  var import_operators2 = __toESM(require_operators(), 1);
10420
- import { EventQuery as EventQuery2 } from "@semiont/event-sourcing";
10421
10421
  import { burstBuffer as burstBuffer2 } from "@semiont/core";
10422
10422
  import { resourceId as makeResourceId2, annotationId as makeAnnotationId2 } from "@semiont/core";
10423
10423
  import { chunkText, DEFAULT_CHUNKING_CONFIG } from "@semiont/vectors";
10424
10424
  import { getExactText, getTargetSelector } from "@semiont/api-client";
10425
10425
  var Smelter = class _Smelter {
10426
- constructor(eventStore, eventBus, vectorStore, embeddingProvider, contentStore, logger, chunkingConfig) {
10427
- this.eventStore = eventStore;
10426
+ constructor(_eventStore, eventBus, vectorStore, embeddingProvider, contentStore, embeddingStore, viewStorage, logger, chunkingConfig) {
10428
10427
  this.eventBus = eventBus;
10429
10428
  this.vectorStore = vectorStore;
10430
10429
  this.embeddingProvider = embeddingProvider;
10431
10430
  this.contentStore = contentStore;
10431
+ this.embeddingStore = embeddingStore;
10432
+ this.viewStorage = viewStorage;
10432
10433
  this.logger = logger;
10433
10434
  this.chunkingConfig = chunkingConfig ?? DEFAULT_CHUNKING_CONFIG;
10434
10435
  }
10435
10436
  static SMELTER_RELEVANT_EVENTS = /* @__PURE__ */ new Set([
10436
10437
  "yield:created",
10438
+ "yield:updated",
10439
+ "yield:representation-added",
10437
10440
  "mark:archived",
10438
10441
  "mark:added",
10439
10442
  "mark:removed"
@@ -10485,68 +10488,121 @@ var Smelter = class _Smelter {
10485
10488
  this.logger.info("Smelter actor stopped");
10486
10489
  }
10487
10490
  /**
10488
- * Rebuild the vector store from persisted embedding events in the event log.
10489
- * Reads all embedding:computed / embedding:deleted events and replays them.
10490
- * Bypasses the live pipeline reads directly from the event store.
10491
+ * Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
10492
+ *
10493
+ * For each stored file, checks whether the model matches the configured
10494
+ * provider. On mismatch, re-embeds from the stored text and overwrites the
10495
+ * file before upserting into Qdrant. On match, loads the stored vectors
10496
+ * directly — no embedding provider calls needed.
10491
10497
  */
10492
10498
  async rebuildAll() {
10493
- this.logger.info("Rebuilding vector store from events");
10499
+ this.logger.info("Rebuilding vector store from EmbeddingStore");
10494
10500
  await this.vectorStore.clearAll();
10495
- const allResourceIds = await this.eventStore.log.getAllResourceIds();
10496
- this.logger.info("Found resources to scan", { count: allResourceIds.length });
10497
- const query = new EventQuery2(this.eventStore.log.storage);
10498
- let indexed = 0;
10499
- for (const rid of allResourceIds) {
10500
- const events = await query.getResourceEvents(makeResourceId2(rid));
10501
- const embeddingEvents = events.filter(
10502
- (e) => e.type === "embedding:computed" || e.type === "embedding:deleted"
10503
- );
10504
- if (embeddingEvents.length === 0) continue;
10505
- const lastEvent = embeddingEvents[embeddingEvents.length - 1];
10506
- if (lastEvent.type === "embedding:deleted" && !lastEvent.payload.annotationId) {
10507
- continue;
10508
- }
10509
- const deletedAnnotations = /* @__PURE__ */ new Set();
10510
- for (const e of embeddingEvents) {
10511
- if (e.type === "embedding:deleted") {
10512
- const payload = e.payload;
10513
- if (payload.annotationId) deletedAnnotations.add(String(payload.annotationId));
10514
- }
10515
- }
10516
- const resourceChunks = [];
10517
- for (const e of embeddingEvents) {
10518
- if (e.type !== "embedding:computed") continue;
10519
- const payload = e.payload;
10520
- if (payload.annotationId) {
10521
- if (deletedAnnotations.has(String(payload.annotationId))) continue;
10522
- await this.vectorStore.upsertAnnotationVector(
10523
- makeAnnotationId2(String(payload.annotationId)),
10524
- payload.embedding,
10525
- {
10526
- annotationId: makeAnnotationId2(String(payload.annotationId)),
10527
- resourceId: makeResourceId2(e.resourceId),
10528
- motivation: "",
10529
- entityTypes: [],
10530
- exactText: payload.chunkText
10531
- }
10532
- );
10533
- } else {
10534
- resourceChunks.push({
10535
- chunkIndex: payload.chunkIndex,
10536
- text: payload.chunkText,
10537
- embedding: payload.embedding
10538
- });
10539
- }
10540
- }
10541
- if (resourceChunks.length > 0) {
10542
- await this.vectorStore.upsertResourceVectors(
10543
- makeResourceId2(rid),
10544
- resourceChunks
10501
+ const currentModel = this.embeddingProvider.model();
10502
+ const currentDimensions = this.embeddingProvider.dimensions();
10503
+ const resourceIds = await this.embeddingStore.getAllResourceIds();
10504
+ this.logger.info("Found resource embedding files", { count: resourceIds.length });
10505
+ let resourcesIndexed = 0;
10506
+ for (const rid of resourceIds) {
10507
+ const resourceId7 = makeResourceId2(rid);
10508
+ const stored = await this.embeddingStore.readResourceEmbeddings(resourceId7);
10509
+ if (!stored || stored.chunks.length === 0) continue;
10510
+ let chunks;
10511
+ if (stored.model !== currentModel) {
10512
+ this.logger.info("Re-embedding resource (model mismatch)", {
10513
+ resourceId: rid,
10514
+ storedModel: stored.model,
10515
+ currentModel
10516
+ });
10517
+ const texts = stored.chunks.map((c) => c.text);
10518
+ const embeddings = await this.embeddingProvider.embedBatch(texts);
10519
+ chunks = stored.chunks.map((c, i) => ({
10520
+ chunkIndex: c.chunkIndex,
10521
+ text: c.text,
10522
+ embedding: embeddings[i]
10523
+ }));
10524
+ await this.embeddingStore.writeResourceChunks(resourceId7, currentModel, currentDimensions, chunks);
10525
+ } else {
10526
+ chunks = stored.chunks;
10527
+ }
10528
+ await this.vectorStore.upsertResourceVectors(resourceId7, chunks);
10529
+ resourcesIndexed++;
10530
+ }
10531
+ const annotationIds = await this.embeddingStore.getAllAnnotationIds();
10532
+ this.logger.info("Found annotation embedding files", { count: annotationIds.length });
10533
+ let annotationsIndexed = 0;
10534
+ for (const aid of annotationIds) {
10535
+ const annotationId3 = makeAnnotationId2(aid);
10536
+ const stored = await this.embeddingStore.readAnnotationEmbedding(annotationId3);
10537
+ if (!stored) continue;
10538
+ let embedding;
10539
+ if (stored.model !== currentModel) {
10540
+ this.logger.info("Re-embedding annotation (model mismatch)", {
10541
+ annotationId: aid,
10542
+ storedModel: stored.model,
10543
+ currentModel
10544
+ });
10545
+ embedding = await this.embeddingProvider.embed(stored.text);
10546
+ await this.embeddingStore.writeAnnotationEmbedding(
10547
+ annotationId3,
10548
+ makeResourceId2(stored.resourceId),
10549
+ currentModel,
10550
+ currentDimensions,
10551
+ stored.text,
10552
+ embedding,
10553
+ stored.motivation,
10554
+ stored.entityTypes
10545
10555
  );
10556
+ } else {
10557
+ embedding = stored.embedding;
10558
+ }
10559
+ const payload = {
10560
+ annotationId: annotationId3,
10561
+ resourceId: makeResourceId2(stored.resourceId),
10562
+ motivation: stored.motivation,
10563
+ entityTypes: stored.entityTypes,
10564
+ exactText: stored.text
10565
+ };
10566
+ await this.vectorStore.upsertAnnotationVector(annotationId3, embedding, payload);
10567
+ annotationsIndexed++;
10568
+ }
10569
+ const storedResourceIdSet = new Set(resourceIds);
10570
+ const allViews = await this.viewStorage.getAll();
10571
+ let backfilled = 0;
10572
+ for (const view of allViews) {
10573
+ const ridStr = view.resource["@id"];
10574
+ if (storedResourceIdSet.has(ridStr)) continue;
10575
+ if (view.resource.archived) continue;
10576
+ if (!view.resource.storageUri) continue;
10577
+ let content;
10578
+ try {
10579
+ content = await this.contentStore.retrieve(view.resource.storageUri);
10580
+ } catch (err) {
10581
+ this.logger.warn("Smelter back-fill skipped \u2014 content missing", {
10582
+ resourceId: ridStr,
10583
+ storageUri: view.resource.storageUri,
10584
+ error: err instanceof Error ? err.message : String(err)
10585
+ });
10586
+ continue;
10546
10587
  }
10547
- indexed++;
10588
+ const text = new TextDecoder().decode(content);
10589
+ if (!text.trim()) continue;
10590
+ const chunks = chunkText(text, this.chunkingConfig);
10591
+ if (chunks.length === 0) continue;
10592
+ const rid = makeResourceId2(ridStr);
10593
+ const embeddings = await this.embeddingProvider.embedBatch(chunks);
10594
+ const embeddingChunks = chunks.map((chunkText2, i) => ({
10595
+ chunkIndex: i,
10596
+ text: chunkText2,
10597
+ embedding: embeddings[i]
10598
+ }));
10599
+ await this.embeddingStore.writeResourceChunks(rid, currentModel, currentDimensions, embeddingChunks);
10600
+ await this.vectorStore.upsertResourceVectors(rid, embeddingChunks);
10601
+ backfilled++;
10602
+ resourcesIndexed++;
10603
+ this.logger.info("Smelter back-filled missing resource embedding", { resourceId: ridStr });
10548
10604
  }
10549
- this.logger.info("Vector store rebuild complete", { resourcesIndexed: indexed });
10605
+ this.logger.info("Vector store rebuild complete", { resourcesIndexed, annotationsIndexed, backfilled });
10550
10606
  }
10551
10607
  async processBatch(events) {
10552
10608
  const runs = partitionByType(events);
@@ -10568,8 +10624,6 @@ var Smelter = class _Smelter {
10568
10624
  }
10569
10625
  /**
10570
10626
  * Batch-optimized processing for consecutive events of the same type.
10571
- * Collects all texts across events, embeds in a single embedBatch() call,
10572
- * then distributes results back to their respective resources/annotations.
10573
10627
  */
10574
10628
  async applyBatchByType(events) {
10575
10629
  const type = events[0].type;
@@ -10587,8 +10641,8 @@ var Smelter = class _Smelter {
10587
10641
  }
10588
10642
  }
10589
10643
  /**
10590
- * Batch-embed chunks from multiple resource.created events in a single
10591
- * embedBatch() call, then emit events and index per resource.
10644
+ * Batch-embed chunks from multiple yield:created events in a single
10645
+ * embedBatch() call, then write to EmbeddingStore and index per resource.
10592
10646
  */
10593
10647
  async batchResourceCreated(events) {
10594
10648
  const resourceData = [];
@@ -10613,18 +10667,12 @@ var Smelter = class _Smelter {
10613
10667
  const dimensions = this.embeddingProvider.dimensions();
10614
10668
  let offset = 0;
10615
10669
  for (const { rid, chunks } of resourceData) {
10616
- const embeddingChunks = chunks.map((text, i) => {
10617
- const embedding = allEmbeddings[offset + i];
10618
- this.eventBus.get("embedding:compute").next({
10619
- resourceId: rid,
10620
- chunkIndex: i,
10621
- chunkText: text,
10622
- embedding,
10623
- model,
10624
- dimensions
10625
- });
10626
- return { chunkIndex: i, text, embedding };
10627
- });
10670
+ const embeddingChunks = chunks.map((text, i) => ({
10671
+ chunkIndex: i,
10672
+ text,
10673
+ embedding: allEmbeddings[offset + i]
10674
+ }));
10675
+ await this.embeddingStore.writeResourceChunks(rid, model, dimensions, embeddingChunks);
10628
10676
  await this.vectorStore.upsertResourceVectors(rid, embeddingChunks);
10629
10677
  this.logger.debug("Smelter batch-indexed resource", {
10630
10678
  resourceId: String(rid),
@@ -10634,8 +10682,8 @@ var Smelter = class _Smelter {
10634
10682
  }
10635
10683
  }
10636
10684
  /**
10637
- * Batch-embed exact texts from multiple annotation.added events in a
10638
- * single embedBatch() call, then emit events and index per annotation.
10685
+ * Batch-embed exact texts from multiple mark:added events in a single
10686
+ * embedBatch() call, then write to EmbeddingStore and index per annotation.
10639
10687
  */
10640
10688
  async batchAnnotationAdded(events) {
10641
10689
  const annotationData = [];
@@ -10650,30 +10698,34 @@ var Smelter = class _Smelter {
10650
10698
  rid: makeResourceId2(event.resourceId),
10651
10699
  aid: makeAnnotationId2(annotation.id),
10652
10700
  exactText,
10653
- annotation
10701
+ motivation: annotation.motivation ?? "",
10702
+ entityTypes: annotation.entityTypes ?? []
10654
10703
  });
10655
10704
  }
10656
10705
  if (annotationData.length === 0) return;
10657
10706
  const allEmbeddings = await this.embeddingProvider.embedBatch(
10658
10707
  annotationData.map((a) => a.exactText)
10659
10708
  );
10709
+ const model = this.embeddingProvider.model();
10710
+ const dimensions = this.embeddingProvider.dimensions();
10660
10711
  for (let i = 0; i < annotationData.length; i++) {
10661
- const { rid, aid, exactText, annotation } = annotationData[i];
10712
+ const { rid, aid, exactText, motivation, entityTypes } = annotationData[i];
10662
10713
  const embedding = allEmbeddings[i];
10663
- this.eventBus.get("embedding:compute").next({
10664
- resourceId: rid,
10665
- annotationId: aid,
10666
- chunkIndex: 0,
10667
- chunkText: exactText,
10714
+ await this.embeddingStore.writeAnnotationEmbedding(
10715
+ aid,
10716
+ rid,
10717
+ model,
10718
+ dimensions,
10719
+ exactText,
10668
10720
  embedding,
10669
- model: this.embeddingProvider.model(),
10670
- dimensions: this.embeddingProvider.dimensions()
10671
- });
10721
+ motivation,
10722
+ entityTypes
10723
+ );
10672
10724
  const payload = {
10673
10725
  annotationId: aid,
10674
10726
  resourceId: rid,
10675
- motivation: annotation.motivation ?? "",
10676
- entityTypes: annotation.entityTypes ?? [],
10727
+ motivation,
10728
+ entityTypes,
10677
10729
  exactText
10678
10730
  };
10679
10731
  await this.vectorStore.upsertAnnotationVector(aid, embedding, payload);
@@ -10696,19 +10748,24 @@ var Smelter = class _Smelter {
10696
10748
  }
10697
10749
  }
10698
10750
  async processEvent(storedEvent) {
10699
- const event = storedEvent;
10700
- switch (event.type) {
10751
+ switch (storedEvent.type) {
10701
10752
  case "yield:created":
10702
- await this.handleResourceCreated(event);
10753
+ await this.handleResourceCreated(storedEvent);
10754
+ break;
10755
+ case "yield:updated":
10756
+ await this.handleResourceUpdated(storedEvent);
10757
+ break;
10758
+ case "yield:representation-added":
10759
+ await this.handleRepresentationAdded(storedEvent);
10703
10760
  break;
10704
10761
  case "mark:archived":
10705
- await this.handleResourceArchived(event);
10762
+ await this.handleResourceArchived(storedEvent);
10706
10763
  break;
10707
10764
  case "mark:added":
10708
- await this.handleAnnotationAdded(event);
10765
+ await this.handleAnnotationAdded(storedEvent);
10709
10766
  break;
10710
10767
  case "mark:removed":
10711
- await this.handleAnnotationRemoved(event);
10768
+ await this.handleAnnotationRemoved(storedEvent);
10712
10769
  break;
10713
10770
  }
10714
10771
  }
@@ -10743,18 +10800,13 @@ var Smelter = class _Smelter {
10743
10800
  dimensions,
10744
10801
  heapMB: Math.round(process.memoryUsage().heapUsed / 1024 / 1024)
10745
10802
  });
10746
- const embeddingChunks = chunks.map((text2, i) => {
10747
- this.eventBus.get("embedding:compute").next({
10748
- resourceId: rid,
10749
- chunkIndex: i,
10750
- chunkText: text2,
10751
- embedding: embeddings[i],
10752
- model,
10753
- dimensions
10754
- });
10755
- return { chunkIndex: i, text: text2, embedding: embeddings[i] };
10756
- });
10757
- this.logger.info("Smelter emitted events", {
10803
+ const embeddingChunks = chunks.map((text2, i) => ({
10804
+ chunkIndex: i,
10805
+ text: text2,
10806
+ embedding: embeddings[i]
10807
+ }));
10808
+ await this.embeddingStore.writeResourceChunks(rid, model, dimensions, embeddingChunks);
10809
+ this.logger.info("Smelter wrote resource embeddings to store", {
10758
10810
  resourceId: String(rid),
10759
10811
  chunkCount: embeddingChunks.length,
10760
10812
  heapMB: Math.round(process.memoryUsage().heapUsed / 1024 / 1024)
@@ -10766,13 +10818,51 @@ var Smelter = class _Smelter {
10766
10818
  heapMB: Math.round(process.memoryUsage().heapUsed / 1024 / 1024)
10767
10819
  });
10768
10820
  }
10821
+ /**
10822
+ * Re-embed a resource whose content has changed in-place.
10823
+ *
10824
+ * Used by yield:updated and yield:representation-added handlers. Reads the
10825
+ * current storageUri from the materialized view (which is updated before the
10826
+ * EventBus fires), deletes stale Qdrant vectors, and overwrites the
10827
+ * EmbeddingStore file with fresh chunks.
10828
+ */
10829
+ async reembedResource(rid) {
10830
+ const view = await this.viewStorage.get(rid);
10831
+ const storageUri = view?.resource.storageUri;
10832
+ if (!storageUri) return;
10833
+ const content = await this.contentStore.retrieve(storageUri);
10834
+ if (!content) return;
10835
+ const text = new TextDecoder().decode(content);
10836
+ if (!text.trim()) return;
10837
+ const chunks = chunkText(text, this.chunkingConfig);
10838
+ if (chunks.length === 0) return;
10839
+ const embeddings = await this.embeddingProvider.embedBatch(chunks);
10840
+ const model = this.embeddingProvider.model();
10841
+ const dimensions = this.embeddingProvider.dimensions();
10842
+ const embeddingChunks = chunks.map((chunkText2, i) => ({
10843
+ chunkIndex: i,
10844
+ text: chunkText2,
10845
+ embedding: embeddings[i]
10846
+ }));
10847
+ await this.embeddingStore.writeResourceChunks(rid, model, dimensions, embeddingChunks);
10848
+ await this.vectorStore.deleteResourceVectors(rid);
10849
+ await this.vectorStore.upsertResourceVectors(rid, embeddingChunks);
10850
+ this.logger.debug("Smelter re-embedded resource", {
10851
+ resourceId: String(rid),
10852
+ chunks: embeddingChunks.length
10853
+ });
10854
+ }
10855
+ async handleResourceUpdated(event) {
10856
+ await this.reembedResource(makeResourceId2(event.resourceId));
10857
+ }
10858
+ async handleRepresentationAdded(event) {
10859
+ await this.reembedResource(makeResourceId2(event.resourceId));
10860
+ }
10769
10861
  async handleResourceArchived(event) {
10770
10862
  const rid = makeResourceId2(event.resourceId);
10771
10863
  await this.vectorStore.deleteResourceVectors(rid);
10772
- this.eventBus.get("embedding:delete").next({ resourceId: rid });
10773
- this.logger.debug("Smelter deleted resource vectors", {
10774
- resourceId: String(rid)
10775
- });
10864
+ await this.embeddingStore.deleteResourceEmbeddings(rid);
10865
+ this.logger.debug("Smelter deleted resource vectors", { resourceId: String(rid) });
10776
10866
  }
10777
10867
  async handleAnnotationAdded(event) {
10778
10868
  await new Promise((resolve2) => setTimeout(resolve2, 0));
@@ -10790,20 +10880,25 @@ var Smelter = class _Smelter {
10790
10880
  heapMB: Math.round(process.memoryUsage().heapUsed / 1024 / 1024)
10791
10881
  });
10792
10882
  const embedding = await this.embeddingProvider.embed(exactText);
10793
- this.eventBus.get("embedding:compute").next({
10794
- resourceId: rid,
10795
- annotationId: aid,
10796
- chunkIndex: 0,
10797
- chunkText: exactText,
10883
+ const model = this.embeddingProvider.model();
10884
+ const dimensions = this.embeddingProvider.dimensions();
10885
+ const motivation = annotation.motivation ?? "";
10886
+ const entityTypes = annotation.entityTypes ?? [];
10887
+ await this.embeddingStore.writeAnnotationEmbedding(
10888
+ aid,
10889
+ rid,
10890
+ model,
10891
+ dimensions,
10892
+ exactText,
10798
10893
  embedding,
10799
- model: this.embeddingProvider.model(),
10800
- dimensions: this.embeddingProvider.dimensions()
10801
- });
10894
+ motivation,
10895
+ entityTypes
10896
+ );
10802
10897
  const payload = {
10803
10898
  annotationId: aid,
10804
10899
  resourceId: rid,
10805
- motivation: annotation.motivation ?? "",
10806
- entityTypes: annotation.entityTypes ?? [],
10900
+ motivation,
10901
+ entityTypes,
10807
10902
  exactText
10808
10903
  };
10809
10904
  await this.vectorStore.upsertAnnotationVector(aid, embedding, payload);
@@ -10815,16 +10910,134 @@ var Smelter = class _Smelter {
10815
10910
  async handleAnnotationRemoved(event) {
10816
10911
  const annotationId3 = String(event.payload.annotationId);
10817
10912
  if (!annotationId3) return;
10818
- const rid = makeResourceId2(event.resourceId);
10819
10913
  const aid = makeAnnotationId2(annotationId3);
10820
10914
  await this.vectorStore.deleteAnnotationVector(aid);
10821
- this.eventBus.get("embedding:delete").next({
10822
- resourceId: rid,
10823
- annotationId: aid
10824
- });
10825
- this.logger.debug("Smelter deleted annotation vector", {
10826
- annotationId: String(aid)
10827
- });
10915
+ await this.embeddingStore.deleteAnnotationEmbedding(aid);
10916
+ this.logger.debug("Smelter deleted annotation vector", { annotationId: String(aid) });
10917
+ }
10918
+ };
10919
+
10920
+ // src/embedding-store.ts
10921
+ import { promises as fs } from "fs";
10922
+ import * as path from "path";
10923
+ import { getShardPath } from "@semiont/event-sourcing";
10924
+ var EmbeddingStore = class {
10925
+ constructor(project) {
10926
+ this.project = project;
10927
+ }
10928
+ // ── Path helpers ────────────────────────────────────────────────────────────
10929
+ resourceFilePath(resourceId7) {
10930
+ const [ab, cd] = getShardPath(String(resourceId7));
10931
+ return path.join(this.project.embeddingsDir, ab, cd, `${String(resourceId7)}.jsonl`);
10932
+ }
10933
+ annotationFilePath(annotationId3) {
10934
+ const [ab, cd] = getShardPath(String(annotationId3));
10935
+ return path.join(this.project.embeddingsDir, ab, cd, `${String(annotationId3)}.json`);
10936
+ }
10937
+ // ── Resource embeddings ─────────────────────────────────────────────────────
10938
+ async writeResourceChunks(resourceId7, model, dimensions, chunks) {
10939
+ const filePath = this.resourceFilePath(resourceId7);
10940
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
10941
+ const header = JSON.stringify({ model, dimensions });
10942
+ const lines = chunks.map(
10943
+ (c) => JSON.stringify({ chunkIndex: c.chunkIndex, text: c.text, embedding: c.embedding })
10944
+ );
10945
+ await fs.writeFile(filePath, [header, ...lines].join("\n") + "\n", "utf-8");
10946
+ }
10947
+ async readResourceEmbeddings(resourceId7) {
10948
+ const filePath = this.resourceFilePath(resourceId7);
10949
+ try {
10950
+ const content = await fs.readFile(filePath, "utf-8");
10951
+ const lines = content.trim().split("\n").filter((l) => l.trim());
10952
+ if (lines.length === 0) return null;
10953
+ const header = JSON.parse(lines[0]);
10954
+ const chunks = lines.slice(1).map((l) => JSON.parse(l));
10955
+ return { model: header.model, dimensions: header.dimensions, chunks };
10956
+ } catch (err) {
10957
+ if (err.code === "ENOENT") return null;
10958
+ throw err;
10959
+ }
10960
+ }
10961
+ async deleteResourceEmbeddings(resourceId7) {
10962
+ const filePath = this.resourceFilePath(resourceId7);
10963
+ try {
10964
+ await fs.unlink(filePath);
10965
+ } catch (err) {
10966
+ if (err.code !== "ENOENT") throw err;
10967
+ }
10968
+ }
10969
+ // ── Annotation embeddings ───────────────────────────────────────────────────
10970
+ async writeAnnotationEmbedding(annotationId3, resourceId7, model, dimensions, text, embedding, motivation, entityTypes) {
10971
+ const filePath = this.annotationFilePath(annotationId3);
10972
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
10973
+ const data = {
10974
+ model,
10975
+ dimensions,
10976
+ resourceId: String(resourceId7),
10977
+ text,
10978
+ embedding,
10979
+ motivation,
10980
+ entityTypes
10981
+ };
10982
+ await fs.writeFile(filePath, JSON.stringify(data), "utf-8");
10983
+ }
10984
+ async readAnnotationEmbedding(annotationId3) {
10985
+ const filePath = this.annotationFilePath(annotationId3);
10986
+ try {
10987
+ const content = await fs.readFile(filePath, "utf-8");
10988
+ return JSON.parse(content);
10989
+ } catch (err) {
10990
+ if (err.code === "ENOENT") return null;
10991
+ throw err;
10992
+ }
10993
+ }
10994
+ async deleteAnnotationEmbedding(annotationId3) {
10995
+ const filePath = this.annotationFilePath(annotationId3);
10996
+ try {
10997
+ await fs.unlink(filePath);
10998
+ } catch (err) {
10999
+ if (err.code !== "ENOENT") throw err;
11000
+ }
11001
+ }
11002
+ // ── Scan ────────────────────────────────────────────────────────────────────
11003
+ /**
11004
+ * Scan embeddings directory and return all resource IDs (from *.jsonl files).
11005
+ */
11006
+ async getAllResourceIds() {
11007
+ return this.scanIds((name) => name.endsWith(".jsonl"), ".jsonl");
11008
+ }
11009
+ /**
11010
+ * Scan embeddings directory and return all annotation IDs (from *.json files).
11011
+ */
11012
+ async getAllAnnotationIds() {
11013
+ return this.scanIds((name) => name.endsWith(".json"), ".json");
11014
+ }
11015
+ async scanIds(filter, ext) {
11016
+ const base = this.project.embeddingsDir;
11017
+ try {
11018
+ await fs.access(base);
11019
+ } catch {
11020
+ return [];
11021
+ }
11022
+ const results = [];
11023
+ const scan = async (dir) => {
11024
+ let entries;
11025
+ try {
11026
+ entries = await fs.readdir(dir, { withFileTypes: true });
11027
+ } catch {
11028
+ return;
11029
+ }
11030
+ for (const entry of entries) {
11031
+ const full = path.join(dir, entry.name);
11032
+ if (entry.isDirectory()) {
11033
+ await scan(full);
11034
+ } else if (filter(entry.name)) {
11035
+ results.push(entry.name.slice(0, -ext.length));
11036
+ }
11037
+ }
11038
+ };
11039
+ await scan(base);
11040
+ return results;
10828
11041
  }
10829
11042
  };
10830
11043
 
@@ -10856,12 +11069,15 @@ async function createKnowledgeBase(eventStore, project, graphDb, eventBus, logge
10856
11069
  };
10857
11070
  if (options?.vectorStore && options?.embeddingProvider) {
10858
11071
  kb.vectors = options.vectorStore;
11072
+ const embeddingStore = new EmbeddingStore(project);
10859
11073
  kb.smelter = new Smelter(
10860
11074
  eventStore,
10861
11075
  eventBus,
10862
11076
  options.vectorStore,
10863
11077
  options.embeddingProvider,
10864
11078
  content,
11079
+ embeddingStore,
11080
+ views,
10865
11081
  logger.child({ component: "smelter" }),
10866
11082
  options.chunkingConfig
10867
11083
  );
@@ -10917,7 +11133,7 @@ import { getEntityTypes } from "@semiont/ontology";
10917
11133
 
10918
11134
  // src/resource-context.ts
10919
11135
  import { getPrimaryRepresentation, decodeRepresentation } from "@semiont/api-client";
10920
- var ResourceContext = class {
11136
+ var ResourceContext = class _ResourceContext {
10921
11137
  /**
10922
11138
  * Get resource metadata from view storage
10923
11139
  */
@@ -10929,9 +11145,21 @@ var ResourceContext = class {
10929
11145
  return view.resource;
10930
11146
  }
10931
11147
  /**
10932
- * List all resources by scanning view storage
11148
+ * List resources, optionally filtered.
11149
+ *
11150
+ * When `search` is set, delegates to `kb.graph.searchResources`, which runs
11151
+ * the name match in the graph engine instead of scanning every view in JS.
11152
+ * The graph result is then narrowed by `archived` if requested.
11153
+ *
11154
+ * When `search` is unset, falls back to scanning all materialized views.
11155
+ * (TODO: also push the listing path through the graph for large KBs.)
10933
11156
  */
10934
11157
  static async listResources(filters, kb) {
11158
+ if (filters?.search) {
11159
+ const matches = await kb.graph.searchResources(filters.search);
11160
+ const filtered = filters.archived !== void 0 ? matches.filter((doc) => doc.archived === filters.archived) : matches;
11161
+ return _ResourceContext.sortByDateDesc(filtered);
11162
+ }
10935
11163
  const allViews = await kb.views.getAll();
10936
11164
  const resources = [];
10937
11165
  for (const view of allViews) {
@@ -10939,20 +11167,16 @@ var ResourceContext = class {
10939
11167
  if (filters?.archived !== void 0 && doc.archived !== filters.archived) {
10940
11168
  continue;
10941
11169
  }
10942
- if (filters?.search) {
10943
- const searchLower = filters.search.toLowerCase();
10944
- if (!doc.name.toLowerCase().includes(searchLower)) {
10945
- continue;
10946
- }
10947
- }
10948
11170
  resources.push(doc);
10949
11171
  }
10950
- resources.sort((a, b) => {
11172
+ return _ResourceContext.sortByDateDesc(resources);
11173
+ }
11174
+ static sortByDateDesc(resources) {
11175
+ return [...resources].sort((a, b) => {
10951
11176
  const aTime = a.dateCreated ? new Date(a.dateCreated).getTime() : 0;
10952
11177
  const bTime = b.dateCreated ? new Date(b.dateCreated).getTime() : 0;
10953
11178
  return bTime - aTime;
10954
11179
  });
10955
- return resources;
10956
11180
  }
10957
11181
  /**
10958
11182
  * Add content previews to resources (for search results)
@@ -12033,7 +12257,7 @@ For each candidate, output a line with the number and score, like:
12033
12257
  // src/stower.ts
12034
12258
  var import_rxjs5 = __toESM(require_cjs(), 1);
12035
12259
  var import_operators5 = __toESM(require_operators(), 1);
12036
- import { promises as fs } from "fs";
12260
+ import { promises as fs2 } from "fs";
12037
12261
  import { resourceId as resourceId3, userId as makeUserId, annotationId as makeAnnotationId4, CREATION_METHODS, generateUuid } from "@semiont/core";
12038
12262
  import { resolveStorageUri } from "@semiont/event-sourcing";
12039
12263
  var Stower = class {
@@ -12061,9 +12285,7 @@ var Stower = class {
12061
12285
  pipe("job:start", (e) => this.handleJobStart(e)),
12062
12286
  pipe("job:report-progress", (e) => this.handleJobReportProgress(e)),
12063
12287
  pipe("job:complete", (e) => this.handleJobComplete(e)),
12064
- pipe("job:fail", (e) => this.handleJobFail(e)),
12065
- pipe("embedding:compute", (e) => this.handleEmbeddingComputed(e)),
12066
- pipe("embedding:delete", (e) => this.handleEmbeddingDeleted(e))
12288
+ pipe("job:fail", (e) => this.handleJobFail(e))
12067
12289
  ).subscribe({
12068
12290
  error: (err) => this.logger.error("Stower pipeline error", { error: err })
12069
12291
  });
@@ -12267,7 +12489,7 @@ var Stower = class {
12267
12489
  if (event.storageUri) {
12268
12490
  const absPath = this.kb.content.resolveUri(event.storageUri);
12269
12491
  try {
12270
- await fs.access(absPath);
12492
+ await fs2.access(absPath);
12271
12493
  } catch {
12272
12494
  this.logger.warn("Unarchive failed: file not found at storageUri", { storageUri: event.storageUri });
12273
12495
  return;
@@ -12367,33 +12589,6 @@ var Stower = class {
12367
12589
  }
12368
12590
  });
12369
12591
  }
12370
- async handleEmbeddingComputed(event) {
12371
- await this.kb.eventStore.appendEvent({
12372
- type: "embedding:computed",
12373
- resourceId: resourceId3(event.resourceId),
12374
- userId: makeUserId("did:web:system:smelter"),
12375
- version: 1,
12376
- payload: {
12377
- annotationId: event.annotationId,
12378
- chunkIndex: event.chunkIndex,
12379
- chunkText: event.chunkText,
12380
- embedding: event.embedding,
12381
- model: event.model,
12382
- dimensions: event.dimensions
12383
- }
12384
- });
12385
- }
12386
- async handleEmbeddingDeleted(event) {
12387
- await this.kb.eventStore.appendEvent({
12388
- type: "embedding:deleted",
12389
- resourceId: resourceId3(event.resourceId),
12390
- userId: makeUserId("did:web:system:smelter"),
12391
- version: 1,
12392
- payload: {
12393
- annotationId: event.annotationId
12394
- }
12395
- });
12396
- }
12397
12592
  async stop() {
12398
12593
  this.subscription?.unsubscribe();
12399
12594
  this.subscription = null;
@@ -12404,25 +12599,25 @@ var Stower = class {
12404
12599
  // src/browser.ts
12405
12600
  var import_rxjs6 = __toESM(require_cjs(), 1);
12406
12601
  var import_operators6 = __toESM(require_operators(), 1);
12407
- import { promises as fs3 } from "fs";
12408
- import * as path2 from "path";
12602
+ import { promises as fs4 } from "fs";
12603
+ import * as path3 from "path";
12409
12604
  import { resourceId as resourceId4, annotationId } from "@semiont/core";
12410
12605
  import { getExactText as getExactText2, getTargetSource as getTargetSource2, getTargetSelector as getTargetSelector3, getResourceEntityTypes as getResourceEntityTypes5, getBodySource as getBodySource2 } from "@semiont/api-client";
12411
- import { EventQuery as EventQuery3 } from "@semiont/event-sourcing";
12606
+ import { EventQuery as EventQuery2 } from "@semiont/event-sourcing";
12412
12607
  import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
12413
12608
 
12414
12609
  // src/views/entity-types-reader.ts
12415
- import { promises as fs2 } from "fs";
12416
- import * as path from "path";
12610
+ import { promises as fs3 } from "fs";
12611
+ import * as path2 from "path";
12417
12612
  async function readEntityTypesProjection(project) {
12418
- const entityTypesPath = path.join(
12613
+ const entityTypesPath = path2.join(
12419
12614
  project.stateDir,
12420
12615
  "projections",
12421
12616
  "__system__",
12422
12617
  "entitytypes.json"
12423
12618
  );
12424
12619
  try {
12425
- const content = await fs2.readFile(entityTypesPath, "utf-8");
12620
+ const content = await fs3.readFile(entityTypesPath, "utf-8");
12426
12621
  const projection = JSON.parse(content);
12427
12622
  return projection.entityTypes || [];
12428
12623
  } catch (error) {
@@ -12465,7 +12660,7 @@ var Browser = class {
12465
12660
  // ========================================================================
12466
12661
  async handleBrowseResource(event) {
12467
12662
  try {
12468
- const eventQuery = new EventQuery3(this.kb.eventStore.log.storage);
12663
+ const eventQuery = new EventQuery2(this.kb.eventStore.log.storage);
12469
12664
  const events = await eventQuery.getResourceEvents(resourceId4(event.resourceId));
12470
12665
  const stored = await this.kb.eventStore.views.materializer.materialize(events, resourceId4(event.resourceId));
12471
12666
  if (!stored) {
@@ -12578,7 +12773,7 @@ var Browser = class {
12578
12773
  }
12579
12774
  async handleBrowseEvents(event) {
12580
12775
  try {
12581
- const eventQuery = new EventQuery3(this.kb.eventStore.log.storage);
12776
+ const eventQuery = new EventQuery2(this.kb.eventStore.log.storage);
12582
12777
  const filters = {
12583
12778
  resourceId: resourceId4(event.resourceId)
12584
12779
  };
@@ -12618,7 +12813,7 @@ var Browser = class {
12618
12813
  });
12619
12814
  return;
12620
12815
  }
12621
- const eventQuery = new EventQuery3(this.kb.eventStore.log.storage);
12816
+ const eventQuery = new EventQuery2(this.kb.eventStore.log.storage);
12622
12817
  const allEvents = await eventQuery.queryEvents({ resourceId: resourceId4(event.resourceId) });
12623
12818
  const annotationEvents = allEvents.filter((stored) => {
12624
12819
  const p = stored.payload;
@@ -12707,8 +12902,8 @@ var Browser = class {
12707
12902
  async handleBrowseDirectory(event) {
12708
12903
  const { correlationId, path: reqPath, sort = "name" } = event;
12709
12904
  const projectRoot = this.project.root;
12710
- const resolved = path2.resolve(projectRoot, reqPath);
12711
- if (!resolved.startsWith(projectRoot + path2.sep) && resolved !== projectRoot) {
12905
+ const resolved = path3.resolve(projectRoot, reqPath);
12906
+ if (!resolved.startsWith(projectRoot + path3.sep) && resolved !== projectRoot) {
12712
12907
  this.eventBus.get("browse:directory-failed").next({
12713
12908
  correlationId,
12714
12909
  path: reqPath,
@@ -12718,7 +12913,7 @@ var Browser = class {
12718
12913
  }
12719
12914
  let dirents;
12720
12915
  try {
12721
- dirents = await fs3.readdir(resolved, { withFileTypes: true, encoding: "utf8" });
12916
+ dirents = await fs4.readdir(resolved, { withFileTypes: true, encoding: "utf8" });
12722
12917
  } catch (err) {
12723
12918
  const msg = err.code === "ENOENT" ? "path not found" : String(err);
12724
12919
  this.eventBus.get("browse:directory-failed").next({
@@ -12732,16 +12927,16 @@ var Browser = class {
12732
12927
  const allViews = await this.views.getAll();
12733
12928
  const prefix = `file://${resolved}`;
12734
12929
  const viewsByUri = new Map(
12735
- allViews.filter((v) => v.resource.storageUri?.startsWith(prefix + "/") || v.resource.storageUri?.startsWith(prefix + path2.sep)).map((v) => [v.resource.storageUri, v])
12930
+ allViews.filter((v) => v.resource.storageUri?.startsWith(prefix + "/") || v.resource.storageUri?.startsWith(prefix + path3.sep)).map((v) => [v.resource.storageUri, v])
12736
12931
  );
12737
12932
  const entries = [];
12738
12933
  for (const dirent of visible) {
12739
- const entryPath = path2.join(resolved, dirent.name);
12740
- const relPath = path2.relative(projectRoot, entryPath);
12934
+ const entryPath = path3.join(resolved, dirent.name);
12935
+ const relPath = path3.relative(projectRoot, entryPath);
12741
12936
  if (dirent.isDirectory()) {
12742
12937
  let mtime = (/* @__PURE__ */ new Date(0)).toISOString();
12743
12938
  try {
12744
- const stat = await fs3.stat(entryPath);
12939
+ const stat = await fs4.stat(entryPath);
12745
12940
  mtime = stat.mtime.toISOString();
12746
12941
  } catch {
12747
12942
  }
@@ -12751,7 +12946,7 @@ var Browser = class {
12751
12946
  let size = 0;
12752
12947
  let mtime = (/* @__PURE__ */ new Date(0)).toISOString();
12753
12948
  try {
12754
- const stat = await fs3.stat(entryPath);
12949
+ const stat = await fs4.stat(entryPath);
12755
12950
  size = stat.size;
12756
12951
  mtime = stat.mtime.toISOString();
12757
12952
  } catch {
@@ -13422,9 +13617,7 @@ async function exportBackup(options, output) {
13422
13617
  for (const [streamId, events] of streamData) {
13423
13618
  streamSummaries.push({
13424
13619
  stream: streamId,
13425
- eventCount: events.length,
13426
- firstChecksum: events[0].metadata.checksum || "",
13427
- lastChecksum: events[events.length - 1].metadata.checksum || ""
13620
+ eventCount: events.length
13428
13621
  });
13429
13622
  }
13430
13623
  const manifestHeader = {
@@ -13493,26 +13686,11 @@ async function replayEventStream(jsonl, eventBus, resolveBlob, contentStore, log
13493
13686
  annotationsCreated: 0,
13494
13687
  entityTypesAdded: 0
13495
13688
  };
13496
- let hashChainValid = true;
13497
- for (let i = 1; i < storedEvents.length; i++) {
13498
- const prev = storedEvents[i - 1];
13499
- const curr = storedEvents[i];
13500
- if (curr.metadata.prevEventHash && prev.metadata.checksum) {
13501
- if (curr.metadata.prevEventHash !== prev.metadata.checksum) {
13502
- logger?.warn("Hash chain break", {
13503
- index: i,
13504
- expected: prev.metadata.checksum,
13505
- got: curr.metadata.prevEventHash
13506
- });
13507
- hashChainValid = false;
13508
- }
13509
- }
13510
- }
13511
13689
  for (const stored of storedEvents) {
13512
13690
  await replayEvent(stored, eventBus, resolveBlob, contentStore, stats, logger);
13513
13691
  stats.eventsReplayed++;
13514
13692
  }
13515
- return { stats, hashChainValid };
13693
+ return { stats };
13516
13694
  }
13517
13695
  async function replayEvent(event, eventBus, resolveBlob, contentStore, stats, logger) {
13518
13696
  switch (event.type) {
@@ -13741,7 +13919,6 @@ async function importBackup(archive, options) {
13741
13919
  const resolveBlob = buildBlobResolver(entries);
13742
13920
  const systemData = entries.get(".semiont/events/__system__.jsonl");
13743
13921
  let stats = { eventsReplayed: 0, resourcesCreated: 0, annotationsCreated: 0, entityTypesAdded: 0 };
13744
- let hashChainValid = true;
13745
13922
  if (systemData) {
13746
13923
  const result = await replayEventStream(
13747
13924
  systemData.toString("utf8"),
@@ -13751,7 +13928,6 @@ async function importBackup(archive, options) {
13751
13928
  logger
13752
13929
  );
13753
13930
  stats = mergeStats(stats, result.stats);
13754
- if (!result.hashChainValid) hashChainValid = false;
13755
13931
  }
13756
13932
  for (const summary of streamSummaries) {
13757
13933
  if (summary.stream === "__system__") continue;
@@ -13768,10 +13944,9 @@ async function importBackup(archive, options) {
13768
13944
  logger
13769
13945
  );
13770
13946
  stats = mergeStats(stats, result.stats);
13771
- if (!result.hashChainValid) hashChainValid = false;
13772
13947
  }
13773
- logger?.info("Backup import complete", { ...stats, hashChainValid });
13774
- return { manifest: header, stats, hashChainValid };
13948
+ logger?.info("Backup import complete", { ...stats });
13949
+ return { manifest: header, stats };
13775
13950
  }
13776
13951
  function mergeStats(a, b) {
13777
13952
  return {