@semiont/make-meaning 0.2.46 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3634,7 +3634,7 @@ var require_firstValueFrom = __commonJS({
3634
3634
  exports.firstValueFrom = void 0;
3635
3635
  var EmptyError_1 = require_EmptyError();
3636
3636
  var Subscriber_1 = require_Subscriber();
3637
- function firstValueFrom3(source, config) {
3637
+ function firstValueFrom5(source, config) {
3638
3638
  var hasConfig = typeof config === "object";
3639
3639
  return new Promise(function(resolve4, reject) {
3640
3640
  var subscriber = new Subscriber_1.SafeSubscriber({
@@ -3654,7 +3654,7 @@ var require_firstValueFrom = __commonJS({
3654
3654
  source.subscribe(subscriber);
3655
3655
  });
3656
3656
  }
3657
- exports.firstValueFrom = firstValueFrom3;
3657
+ exports.firstValueFrom = firstValueFrom5;
3658
3658
  }
3659
3659
  });
3660
3660
 
@@ -3799,7 +3799,7 @@ var require_map = __commonJS({
3799
3799
  exports.map = void 0;
3800
3800
  var lift_1 = require_lift();
3801
3801
  var OperatorSubscriber_1 = require_OperatorSubscriber();
3802
- function map3(project, thisArg) {
3802
+ function map5(project, thisArg) {
3803
3803
  return lift_1.operate(function(source, subscriber) {
3804
3804
  var index = 0;
3805
3805
  source.subscribe(OperatorSubscriber_1.createOperatorSubscriber(subscriber, function(value) {
@@ -3807,7 +3807,7 @@ var require_map = __commonJS({
3807
3807
  }));
3808
3808
  });
3809
3809
  }
3810
- exports.map = map3;
3810
+ exports.map = map5;
3811
3811
  }
3812
3812
  });
3813
3813
 
@@ -4657,7 +4657,7 @@ var require_timer = __commonJS({
4657
4657
  var async_1 = require_async();
4658
4658
  var isScheduler_1 = require_isScheduler();
4659
4659
  var isDate_1 = require_isDate();
4660
- function timer3(dueTime, intervalOrScheduler, scheduler) {
4660
+ function timer5(dueTime, intervalOrScheduler, scheduler) {
4661
4661
  if (dueTime === void 0) {
4662
4662
  dueTime = 0;
4663
4663
  }
@@ -4690,7 +4690,7 @@ var require_timer = __commonJS({
4690
4690
  }, due);
4691
4691
  });
4692
4692
  }
4693
- exports.timer = timer3;
4693
+ exports.timer = timer5;
4694
4694
  }
4695
4695
  });
4696
4696
 
@@ -4890,7 +4890,7 @@ var require_race = __commonJS({
4890
4890
  var innerFrom_1 = require_innerFrom();
4891
4891
  var argsOrArgArray_1 = require_argsOrArgArray();
4892
4892
  var OperatorSubscriber_1 = require_OperatorSubscriber();
4893
- function race3() {
4893
+ function race5() {
4894
4894
  var sources = [];
4895
4895
  for (var _i = 0; _i < arguments.length; _i++) {
4896
4896
  sources[_i] = arguments[_i];
@@ -4898,7 +4898,7 @@ var require_race = __commonJS({
4898
4898
  sources = argsOrArgArray_1.argsOrArgArray(sources);
4899
4899
  return sources.length === 1 ? innerFrom_1.innerFrom(sources[0]) : new Observable_1.Observable(raceInit(sources));
4900
4900
  }
4901
- exports.race = race3;
4901
+ exports.race = race5;
4902
4902
  function raceInit(sources) {
4903
4903
  return function(subscriber) {
4904
4904
  var subscriptions = [];
@@ -9468,14 +9468,14 @@ var require_race2 = __commonJS({
9468
9468
  exports.race = void 0;
9469
9469
  var argsOrArgArray_1 = require_argsOrArgArray();
9470
9470
  var raceWith_1 = require_raceWith();
9471
- function race3() {
9471
+ function race5() {
9472
9472
  var args = [];
9473
9473
  for (var _i = 0; _i < arguments.length; _i++) {
9474
9474
  args[_i] = arguments[_i];
9475
9475
  }
9476
9476
  return raceWith_1.raceWith.apply(void 0, __spreadArray([], __read(argsOrArgArray_1.argsOrArgArray(args))));
9477
9477
  }
9478
- exports.race = race3;
9478
+ exports.race = race5;
9479
9479
  }
9480
9480
  });
9481
9481
 
@@ -9966,11 +9966,9 @@ var import_rxjs = __toESM(require_cjs(), 1);
9966
9966
  var import_operators = __toESM(require_operators(), 1);
9967
9967
  import { EventQuery } from "@semiont/event-sourcing";
9968
9968
  import { didToAgent, burstBuffer } from "@semiont/core";
9969
- import { resourceId as makeResourceId, findBodyItem } from "@semiont/core";
9970
- import { toResourceUri, toAnnotationUri } from "@semiont/event-sourcing";
9969
+ import { resourceId as makeResourceId, annotationId as makeAnnotationId, findBodyItem } from "@semiont/core";
9971
9970
  var GraphDBConsumer = class _GraphDBConsumer {
9972
- constructor(config, eventStore, graphDb, logger) {
9973
- this.config = config;
9971
+ constructor(eventStore, graphDb, logger) {
9974
9972
  this.eventStore = eventStore;
9975
9973
  this.graphDb = graphDb;
9976
9974
  this.logger = logger;
@@ -10163,13 +10161,9 @@ var GraphDBConsumer = class _GraphDBConsumer {
10163
10161
  if (!event.resourceId) {
10164
10162
  throw new Error("resource.created requires resourceId");
10165
10163
  }
10166
- const resourceUri = toResourceUri(
10167
- { baseUrl: this.config.services.backend.publicURL },
10168
- event.resourceId
10169
- );
10170
10164
  return {
10171
10165
  "@context": "https://schema.org/",
10172
- "@id": resourceUri,
10166
+ "@id": event.resourceId,
10173
10167
  name: event.payload.name,
10174
10168
  entityTypes: event.payload.entityTypes || [],
10175
10169
  representations: [{
@@ -10203,13 +10197,13 @@ var GraphDBConsumer = class _GraphDBConsumer {
10203
10197
  }
10204
10198
  case "resource.archived":
10205
10199
  if (!event.resourceId) throw new Error("resource.archived requires resourceId");
10206
- await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
10200
+ await graphDb.updateResource(makeResourceId(event.resourceId), {
10207
10201
  archived: true
10208
10202
  });
10209
10203
  break;
10210
10204
  case "resource.unarchived":
10211
10205
  if (!event.resourceId) throw new Error("resource.unarchived requires resourceId");
10212
- await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
10206
+ await graphDb.updateResource(makeResourceId(event.resourceId), {
10213
10207
  archived: false
10214
10208
  });
10215
10209
  break;
@@ -10226,7 +10220,7 @@ var GraphDBConsumer = class _GraphDBConsumer {
10226
10220
  });
10227
10221
  break;
10228
10222
  case "annotation.removed":
10229
- await graphDb.deleteAnnotation(toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId));
10223
+ await graphDb.deleteAnnotation(makeAnnotationId(event.payload.annotationId));
10230
10224
  break;
10231
10225
  case "annotation.body.updated":
10232
10226
  this.logger.debug("Processing annotation.body.updated event", {
@@ -10234,8 +10228,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
10234
10228
  payload: event.payload
10235
10229
  });
10236
10230
  try {
10237
- const annotationUri = toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId);
10238
- const currentAnnotation = await graphDb.getAnnotation(annotationUri);
10231
+ const annId = makeAnnotationId(event.payload.annotationId);
10232
+ const currentAnnotation = await graphDb.getAnnotation(annId);
10239
10233
  if (currentAnnotation) {
10240
10234
  let bodyArray = Array.isArray(currentAnnotation.body) ? [...currentAnnotation.body] : currentAnnotation.body ? [currentAnnotation.body] : [];
10241
10235
  for (const op of event.payload.operations) {
@@ -10256,7 +10250,7 @@ var GraphDBConsumer = class _GraphDBConsumer {
10256
10250
  }
10257
10251
  }
10258
10252
  }
10259
- await graphDb.updateAnnotation(annotationUri, {
10253
+ await graphDb.updateAnnotation(annId, {
10260
10254
  body: bodyArray
10261
10255
  });
10262
10256
  this.logger.info("updateAnnotation completed successfully");
@@ -10274,9 +10268,10 @@ var GraphDBConsumer = class _GraphDBConsumer {
10274
10268
  case "entitytag.added":
10275
10269
  if (!event.resourceId) throw new Error("entitytag.added requires resourceId");
10276
10270
  {
10277
- const doc = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
10271
+ const rid = makeResourceId(event.resourceId);
10272
+ const doc = await graphDb.getResource(rid);
10278
10273
  if (doc) {
10279
- await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
10274
+ await graphDb.updateResource(rid, {
10280
10275
  entityTypes: [...doc.entityTypes || [], event.payload.entityType]
10281
10276
  });
10282
10277
  }
@@ -10285,9 +10280,10 @@ var GraphDBConsumer = class _GraphDBConsumer {
10285
10280
  case "entitytag.removed":
10286
10281
  if (!event.resourceId) throw new Error("entitytag.removed requires resourceId");
10287
10282
  {
10288
- const doc = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
10283
+ const rid = makeResourceId(event.resourceId);
10284
+ const doc = await graphDb.getResource(rid);
10289
10285
  if (doc) {
10290
- await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
10286
+ await graphDb.updateResource(rid, {
10291
10287
  entityTypes: (doc.entityTypes || []).filter((t) => t !== event.payload.entityType)
10292
10288
  });
10293
10289
  }
@@ -10304,20 +10300,20 @@ var GraphDBConsumer = class _GraphDBConsumer {
10304
10300
  * Rebuild entire resource from events.
10305
10301
  * Bypasses the live pipeline — reads directly from event store.
10306
10302
  */
10307
- async rebuildResource(resourceId2) {
10303
+ async rebuildResource(resourceId4) {
10308
10304
  const graphDb = this.ensureInitialized();
10309
- this.logger.info("Rebuilding resource from events", { resourceId: resourceId2 });
10305
+ this.logger.info("Rebuilding resource from events", { resourceId: resourceId4 });
10310
10306
  try {
10311
- await graphDb.deleteResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, makeResourceId(resourceId2)));
10307
+ await graphDb.deleteResource(resourceId4);
10312
10308
  } catch (error) {
10313
- this.logger.debug("No existing resource to delete", { resourceId: resourceId2 });
10309
+ this.logger.debug("No existing resource to delete", { resourceId: resourceId4 });
10314
10310
  }
10315
10311
  const query = new EventQuery(this.eventStore.log.storage);
10316
- const events = await query.getResourceEvents(resourceId2);
10312
+ const events = await query.getResourceEvents(resourceId4);
10317
10313
  for (const storedEvent of events) {
10318
10314
  await this.applyEventToGraph(storedEvent);
10319
10315
  }
10320
- this.logger.info("Resource rebuild complete", { resourceId: resourceId2, eventCount: events.length });
10316
+ this.logger.info("Resource rebuild complete", { resourceId: resourceId4, eventCount: events.length });
10321
10317
  }
10322
10318
  /**
10323
10319
  * Rebuild entire GraphDB from all events.
@@ -10333,8 +10329,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
10333
10329
  const allResourceIds = await this.eventStore.log.getAllResourceIds();
10334
10330
  this.logger.info("Found resources to rebuild", { count: allResourceIds.length });
10335
10331
  this.logger.info("PASS 1: Creating all nodes (resources + annotations)");
10336
- for (const resourceId2 of allResourceIds) {
10337
- const events = await query.getResourceEvents(makeResourceId(resourceId2));
10332
+ for (const resourceId4 of allResourceIds) {
10333
+ const events = await query.getResourceEvents(makeResourceId(resourceId4));
10338
10334
  for (const storedEvent of events) {
10339
10335
  if (storedEvent.event.type === "annotation.body.updated") {
10340
10336
  continue;
@@ -10344,8 +10340,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
10344
10340
  }
10345
10341
  this.logger.info("Pass 1 complete - all nodes created");
10346
10342
  this.logger.info("PASS 2: Creating all REFERENCES edges");
10347
- for (const resourceId2 of allResourceIds) {
10348
- const events = await query.getResourceEvents(makeResourceId(resourceId2));
10343
+ for (const resourceId4 of allResourceIds) {
10344
+ const events = await query.getResourceEvents(makeResourceId(resourceId4));
10349
10345
  for (const storedEvent of events) {
10350
10346
  if (storedEvent.event.type === "annotation.body.updated") {
10351
10347
  await this.applyEventToGraph(storedEvent);
@@ -10387,7 +10383,10 @@ async function bootstrapEntityTypes(eventBus, config, logger) {
10387
10383
  logger?.debug("Entity types bootstrap already completed, skipping");
10388
10384
  return;
10389
10385
  }
10390
- const configuredPath = config.services.filesystem.path;
10386
+ const configuredPath = config.services.filesystem?.path;
10387
+ if (!configuredPath) {
10388
+ throw new Error("services.filesystem.path is required for entity types bootstrap");
10389
+ }
10391
10390
  const projectRoot = config._metadata?.projectRoot;
10392
10391
  let basePath;
10393
10392
  if (path.isAbsolute(configuredPath)) {
@@ -10451,13 +10450,12 @@ function createKnowledgeBase(eventStore, basePath, projectRoot, graphDb, logger)
10451
10450
  // src/gatherer.ts
10452
10451
  var import_rxjs3 = __toESM(require_cjs(), 1);
10453
10452
  var import_operators3 = __toESM(require_operators(), 1);
10454
- import { annotationUri as makeAnnotationUri, uriToResourceId as uriToResourceId2, resourceId as makeResourceId3 } from "@semiont/core";
10453
+ import { annotationId as makeAnnotationId2, resourceId } from "@semiont/core";
10455
10454
  import { EventQuery as EventQuery2 } from "@semiont/event-sourcing";
10456
10455
  import { getResourceEntityTypes as getResourceEntityTypes4, getBodySource as getBodySource2 } from "@semiont/api-client";
10457
10456
  import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
10458
10457
 
10459
10458
  // src/generation/resource-generation.ts
10460
- import { getLocaleEnglishName } from "@semiont/api-client";
10461
10459
  async function generateResourceSummary(resourceName, content, entityTypes, client) {
10462
10460
  const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
10463
10461
  const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
@@ -10483,6 +10481,7 @@ Format as a simple list, one suggestion per line.`;
10483
10481
  // src/annotation-context.ts
10484
10482
  import {
10485
10483
  getBodySource,
10484
+ getResourceId,
10486
10485
  getTargetSource,
10487
10486
  getTargetSelector,
10488
10487
  getResourceEntityTypes,
@@ -10490,7 +10489,7 @@ import {
10490
10489
  getPrimaryRepresentation as getPrimaryRepresentation2,
10491
10490
  decodeRepresentation as decodeRepresentation2
10492
10491
  } from "@semiont/api-client";
10493
- import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
10492
+ import { resourceId as createResourceId } from "@semiont/core";
10494
10493
  import { getEntityTypes } from "@semiont/ontology";
10495
10494
 
10496
10495
  // src/resource-context.ts
@@ -10499,8 +10498,8 @@ var ResourceContext = class {
10499
10498
  /**
10500
10499
  * Get resource metadata from view storage
10501
10500
  */
10502
- static async getResourceMetadata(resourceId2, kb) {
10503
- const view = await kb.views.get(resourceId2);
10501
+ static async getResourceMetadata(resourceId4, kb) {
10502
+ const view = await kb.views.get(resourceId4);
10504
10503
  if (!view) {
10505
10504
  return null;
10506
10505
  }
@@ -10572,7 +10571,7 @@ var AnnotationContext = class {
10572
10571
  /**
10573
10572
  * Build LLM context for an annotation
10574
10573
  *
10575
- * @param annotationUri - Full annotation URI (e.g., http://localhost:4000/annotations/abc123)
10574
+ * @param annotationId - Bare annotation ID
10576
10575
  * @param resourceId - Source resource ID
10577
10576
  * @param kb - Knowledge base stores
10578
10577
  * @param options - Context building options
@@ -10580,7 +10579,7 @@ var AnnotationContext = class {
10580
10579
  * @returns Rich context for LLM processing
10581
10580
  * @throws Error if annotation or resource not found
10582
10581
  */
10583
- static async buildLLMContext(annotationUri, resourceId2, kb, options = {}, inferenceClient, logger) {
10582
+ static async buildLLMContext(annotationId2, resourceId4, kb, options = {}, inferenceClient, logger) {
10584
10583
  const {
10585
10584
  includeSourceContext = true,
10586
10585
  includeTargetContext = true,
@@ -10589,47 +10588,41 @@ var AnnotationContext = class {
10589
10588
  if (contextWindow < 100 || contextWindow > 5e3) {
10590
10589
  throw new Error("contextWindow must be between 100 and 5000");
10591
10590
  }
10592
- logger?.debug("Building LLM context", { annotationUri, resourceId: resourceId2 });
10593
- logger?.debug("Getting view for resource", { resourceId: resourceId2 });
10591
+ logger?.debug("Building LLM context", { annotationId: annotationId2, resourceId: resourceId4 });
10592
+ logger?.debug("Getting view for resource", { resourceId: resourceId4 });
10594
10593
  let sourceView;
10595
10594
  try {
10596
- sourceView = await kb.views.get(resourceId2);
10595
+ sourceView = await kb.views.get(resourceId4);
10597
10596
  logger?.debug("Retrieved view", { hasView: !!sourceView });
10598
10597
  if (!sourceView) {
10599
10598
  throw new Error("Source resource not found");
10600
10599
  }
10601
10600
  } catch (error) {
10602
- logger?.error("Error getting view", { resourceId: resourceId2, error });
10601
+ logger?.error("Error getting view", { resourceId: resourceId4, error });
10603
10602
  throw error;
10604
10603
  }
10605
10604
  logger?.debug("Looking for annotation in resource", {
10606
- annotationUri,
10607
- resourceId: resourceId2,
10605
+ annotationId: annotationId2,
10606
+ resourceId: resourceId4,
10608
10607
  totalAnnotations: sourceView.annotations.annotations.length,
10609
10608
  firstFiveIds: sourceView.annotations.annotations.slice(0, 5).map((a) => a.id)
10610
10609
  });
10611
- const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri);
10610
+ const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationId2);
10612
10611
  logger?.debug("Annotation search result", { found: !!annotation });
10613
10612
  if (!annotation) {
10614
10613
  throw new Error("Annotation not found in view");
10615
10614
  }
10616
10615
  const targetSource = getTargetSource(annotation.target);
10617
- const targetResourceId = targetSource.split("/").pop();
10618
- logger?.debug("Validating target resource", { targetSource, expectedResourceId: resourceId2, extractedId: targetResourceId });
10619
- if (targetResourceId !== resourceId2) {
10620
- throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
10616
+ logger?.debug("Validating target resource", { targetSource, expectedResourceId: resourceId4 });
10617
+ if (targetSource !== String(resourceId4)) {
10618
+ throw new Error(`Annotation target resource ID (${targetSource}) does not match expected resource ID (${resourceId4})`);
10621
10619
  }
10622
10620
  const sourceDoc = sourceView.resource;
10623
10621
  const bodySource = getBodySource(annotation.body);
10624
10622
  let targetDoc = null;
10625
10623
  if (bodySource) {
10626
- const parts = bodySource.split("/");
10627
- const lastPart = parts[parts.length - 1];
10628
- if (!lastPart) {
10629
- throw new Error(`Invalid body source URI: ${bodySource}`);
10630
- }
10631
- const targetResourceId2 = createResourceId(lastPart);
10632
- const targetView = await kb.views.get(targetResourceId2);
10624
+ const targetResourceId = createResourceId(bodySource);
10625
+ const targetView = await kb.views.get(targetResourceId);
10633
10626
  targetDoc = targetView?.resource || null;
10634
10627
  }
10635
10628
  let sourceContext;
@@ -10686,18 +10679,97 @@ var AnnotationContext = class {
10686
10679
  }
10687
10680
  }
10688
10681
  const suggestedResolution = void 0;
10689
- const generationContext = sourceContext ? {
10690
- sourceContext: {
10691
- before: sourceContext.before || "",
10692
- selected: sourceContext.selected,
10693
- after: sourceContext.after || ""
10694
- },
10682
+ logger?.debug("Building graph context", { resourceId: resourceId4 });
10683
+ const [connections, referencedByAnnotations, entityTypeStats] = await Promise.all([
10684
+ kb.graph.getResourceConnections(resourceId4),
10685
+ kb.graph.getResourceReferencedBy(resourceId4),
10686
+ kb.graph.getEntityTypeStats()
10687
+ ]);
10688
+ const citedByMap = /* @__PURE__ */ new Map();
10689
+ for (const ann of referencedByAnnotations) {
10690
+ const source = getTargetSource(ann.target);
10691
+ if (source && source !== String(resourceId4)) {
10692
+ const sourceResId = createResourceId(source);
10693
+ const sourceView2 = await kb.views.get(sourceResId);
10694
+ if (sourceView2?.resource) {
10695
+ citedByMap.set(source, sourceView2.resource.name);
10696
+ }
10697
+ }
10698
+ }
10699
+ const annotationEntityTypes = getEntityTypes(annotation);
10700
+ const siblingEntityTypes = /* @__PURE__ */ new Set();
10701
+ for (const ann of sourceView.annotations.annotations) {
10702
+ if (ann.id !== annotationId2) {
10703
+ for (const et of getEntityTypes(ann)) {
10704
+ siblingEntityTypes.add(et);
10705
+ }
10706
+ }
10707
+ }
10708
+ const entityTypeFrequencies = {};
10709
+ for (const stat of entityTypeStats) {
10710
+ entityTypeFrequencies[stat.type] = stat.count;
10711
+ }
10712
+ let inferredRelationshipSummary;
10713
+ if (inferenceClient && sourceContext) {
10714
+ try {
10715
+ const connNames = connections.map((c) => c.targetResource.name).slice(0, 10);
10716
+ const citedByNames = Array.from(citedByMap.values()).slice(0, 5);
10717
+ const siblingTypes = Array.from(siblingEntityTypes).slice(0, 10);
10718
+ const parts = [];
10719
+ parts.push(`Passage: "${sourceContext.selected}"`);
10720
+ if (connNames.length > 0) parts.push(`Connected resources: ${connNames.join(", ")}`);
10721
+ if (citedByNames.length > 0) parts.push(`Cited by: ${citedByNames.join(", ")}`);
10722
+ if (siblingTypes.length > 0) parts.push(`Sibling entity types: ${siblingTypes.join(", ")}`);
10723
+ if (annotationEntityTypes.length > 0) parts.push(`Annotation entity types: ${annotationEntityTypes.join(", ")}`);
10724
+ const relationshipPrompt = `Given this annotation passage and its knowledge graph neighborhood, write a 1-2 sentence summary of how this passage relates to its surrounding resources and what kind of resource would best resolve this reference.
10725
+
10726
+ ${parts.join("\n")}
10727
+
10728
+ Summary:`;
10729
+ inferredRelationshipSummary = await inferenceClient.generateText(relationshipPrompt, 150, 0.3);
10730
+ logger?.debug("Generated inferred relationship summary", { length: inferredRelationshipSummary.length });
10731
+ } catch (error) {
10732
+ logger?.warn("Failed to generate inferred relationship summary", { error });
10733
+ }
10734
+ }
10735
+ const graphContext = {
10736
+ connections: connections.map((conn) => ({
10737
+ resourceId: getResourceId(conn.targetResource) ?? "",
10738
+ resourceName: conn.targetResource.name,
10739
+ entityTypes: getResourceEntityTypes(conn.targetResource),
10740
+ bidirectional: conn.bidirectional
10741
+ })),
10742
+ citedByCount: citedByMap.size,
10743
+ citedBy: Array.from(citedByMap.entries()).map(([id, name]) => ({
10744
+ resourceId: id,
10745
+ resourceName: name
10746
+ })),
10747
+ siblingEntityTypes: Array.from(siblingEntityTypes),
10748
+ entityTypeFrequencies,
10749
+ ...inferredRelationshipSummary ? { inferredRelationshipSummary } : {}
10750
+ };
10751
+ logger?.debug("Built graph context", {
10752
+ connections: connections.length,
10753
+ citedByCount: citedByMap.size,
10754
+ siblingEntityTypes: siblingEntityTypes.size
10755
+ });
10756
+ const generationContext = {
10757
+ annotation,
10758
+ sourceResource: sourceDoc,
10695
10759
  metadata: {
10696
10760
  resourceType: "document",
10697
10761
  language: sourceDoc.language,
10698
- entityTypes: getEntityTypes(annotation)
10699
- }
10700
- } : void 0;
10762
+ entityTypes: annotationEntityTypes
10763
+ },
10764
+ graphContext
10765
+ };
10766
+ if (sourceContext) {
10767
+ generationContext.sourceContext = {
10768
+ before: sourceContext.before || "",
10769
+ selected: sourceContext.selected,
10770
+ after: sourceContext.after || ""
10771
+ };
10772
+ }
10701
10773
  const response = {
10702
10774
  annotation,
10703
10775
  sourceResource: sourceDoc,
@@ -10714,10 +10786,10 @@ var AnnotationContext = class {
10714
10786
  * Get resource annotations from view storage (fast path)
10715
10787
  * Throws if view missing
10716
10788
  */
10717
- static async getResourceAnnotations(resourceId2, kb) {
10718
- const view = await kb.views.get(resourceId2);
10789
+ static async getResourceAnnotations(resourceId4, kb) {
10790
+ const view = await kb.views.get(resourceId4);
10719
10791
  if (!view) {
10720
- throw new Error(`Resource ${resourceId2} not found in view storage`);
10792
+ throw new Error(`Resource ${resourceId4} not found in view storage`);
10721
10793
  }
10722
10794
  return view.annotations;
10723
10795
  }
@@ -10725,8 +10797,8 @@ var AnnotationContext = class {
10725
10797
  * Get all annotations
10726
10798
  * @returns Array of all annotation objects
10727
10799
  */
10728
- static async getAllAnnotations(resourceId2, kb) {
10729
- const annotations = await this.getResourceAnnotations(resourceId2, kb);
10800
+ static async getAllAnnotations(resourceId4, kb) {
10801
+ const annotations = await this.getResourceAnnotations(resourceId4, kb);
10730
10802
  return await this.enrichResolvedReferences(annotations.annotations, kb);
10731
10803
  }
10732
10804
  /**
@@ -10735,28 +10807,26 @@ var AnnotationContext = class {
10735
10807
  * @private
10736
10808
  */
10737
10809
  static async enrichResolvedReferences(annotations, kb) {
10738
- const resolvedUris = /* @__PURE__ */ new Set();
10810
+ const resolvedIds = /* @__PURE__ */ new Set();
10739
10811
  for (const ann of annotations) {
10740
10812
  if (ann.motivation === "linking" && ann.body) {
10741
10813
  const body = Array.isArray(ann.body) ? ann.body : [ann.body];
10742
10814
  for (const item of body) {
10743
10815
  if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
10744
- resolvedUris.add(item.source);
10816
+ resolvedIds.add(item.source);
10745
10817
  }
10746
10818
  }
10747
10819
  }
10748
10820
  }
10749
- if (resolvedUris.size === 0) {
10821
+ if (resolvedIds.size === 0) {
10750
10822
  return annotations;
10751
10823
  }
10752
- const metadataPromises = Array.from(resolvedUris).map(async (uri) => {
10753
- const docId = uri.split("/resources/")[1];
10754
- if (!docId) return null;
10824
+ const metadataPromises = Array.from(resolvedIds).map(async (id) => {
10755
10825
  try {
10756
- const view = await kb.views.get(docId);
10826
+ const view = await kb.views.get(id);
10757
10827
  if (view?.resource?.name) {
10758
10828
  return {
10759
- uri,
10829
+ id,
10760
10830
  metadata: {
10761
10831
  name: view.resource.name,
10762
10832
  mediaType: view.resource.mediaType
@@ -10768,10 +10838,10 @@ var AnnotationContext = class {
10768
10838
  return null;
10769
10839
  });
10770
10840
  const results = await Promise.all(metadataPromises);
10771
- const uriToMetadata = /* @__PURE__ */ new Map();
10841
+ const idToMetadata = /* @__PURE__ */ new Map();
10772
10842
  for (const result of results) {
10773
10843
  if (result) {
10774
- uriToMetadata.set(result.uri, result.metadata);
10844
+ idToMetadata.set(result.id, result.metadata);
10775
10845
  }
10776
10846
  }
10777
10847
  return annotations.map((ann) => {
@@ -10779,7 +10849,7 @@ var AnnotationContext = class {
10779
10849
  const body = Array.isArray(ann.body) ? ann.body : [ann.body];
10780
10850
  for (const item of body) {
10781
10851
  if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
10782
- const metadata = uriToMetadata.get(item.source);
10852
+ const metadata = idToMetadata.get(item.source);
10783
10853
  if (metadata) {
10784
10854
  return {
10785
10855
  ...ann,
@@ -10797,8 +10867,8 @@ var AnnotationContext = class {
10797
10867
  * Get resource stats (version info)
10798
10868
  * @returns Version and timestamp info for the annotations
10799
10869
  */
10800
- static async getResourceStats(resourceId2, kb) {
10801
- const annotations = await this.getResourceAnnotations(resourceId2, kb);
10870
+ static async getResourceStats(resourceId4, kb) {
10871
+ const annotations = await this.getResourceAnnotations(resourceId4, kb);
10802
10872
  return {
10803
10873
  resourceId: annotations.resourceId,
10804
10874
  version: annotations.version,
@@ -10808,19 +10878,16 @@ var AnnotationContext = class {
10808
10878
  /**
10809
10879
  * Check if resource exists in view storage
10810
10880
  */
10811
- static async resourceExists(resourceId2, kb) {
10812
- return await kb.views.exists(resourceId2);
10881
+ static async resourceExists(resourceId4, kb) {
10882
+ return await kb.views.exists(resourceId4);
10813
10883
  }
10814
10884
  /**
10815
10885
  * Get a single annotation by ID
10816
10886
  * O(1) lookup using resource ID to access view storage
10817
10887
  */
10818
- static async getAnnotation(annotationId2, resourceId2, kb) {
10819
- const annotations = await this.getResourceAnnotations(resourceId2, kb);
10820
- return annotations.annotations.find((a) => {
10821
- const shortId = a.id.split("/").pop();
10822
- return shortId === annotationId2;
10823
- }) || null;
10888
+ static async getAnnotation(annotationId2, resourceId4, kb) {
10889
+ const annotations = await this.getResourceAnnotations(resourceId4, kb);
10890
+ return annotations.annotations.find((a) => a.id === annotationId2) || null;
10824
10891
  }
10825
10892
  /**
10826
10893
  * List annotations with optional filtering
@@ -10836,13 +10903,13 @@ var AnnotationContext = class {
10836
10903
  /**
10837
10904
  * Get annotation context (selected text with surrounding context)
10838
10905
  */
10839
- static async getAnnotationContext(annotationId2, resourceId2, contextBefore, contextAfter, kb) {
10840
- const annotation = await this.getAnnotation(annotationId2, resourceId2, kb);
10906
+ static async getAnnotationContext(annotationId2, resourceId4, contextBefore, contextAfter, kb) {
10907
+ const annotation = await this.getAnnotation(annotationId2, resourceId4, kb);
10841
10908
  if (!annotation) {
10842
10909
  throw new Error("Annotation not found");
10843
10910
  }
10844
10911
  const resource = await ResourceContext.getResourceMetadata(
10845
- uriToResourceId(getTargetSource(annotation.target)),
10912
+ createResourceId(getTargetSource(annotation.target)),
10846
10913
  kb
10847
10914
  );
10848
10915
  if (!resource) {
@@ -10869,13 +10936,13 @@ var AnnotationContext = class {
10869
10936
  /**
10870
10937
  * Generate AI summary of annotation in context
10871
10938
  */
10872
- static async generateAnnotationSummary(annotationId2, resourceId2, kb, inferenceClient) {
10873
- const annotation = await this.getAnnotation(annotationId2, resourceId2, kb);
10939
+ static async generateAnnotationSummary(annotationId2, resourceId4, kb, inferenceClient) {
10940
+ const annotation = await this.getAnnotation(annotationId2, resourceId4, kb);
10874
10941
  if (!annotation) {
10875
10942
  throw new Error("Annotation not found");
10876
10943
  }
10877
10944
  const resource = await ResourceContext.getResourceMetadata(
10878
- uriToResourceId(getTargetSource(annotation.target)),
10945
+ createResourceId(getTargetSource(annotation.target)),
10879
10946
  kb
10880
10947
  );
10881
10948
  if (!resource) {
@@ -10950,16 +11017,14 @@ Entity types: ${entityTypes.join(", ")}`;
10950
11017
  };
10951
11018
 
10952
11019
  // src/graph-context.ts
10953
- import { resourceIdToURI } from "@semiont/core";
10954
- import { getResourceId, getResourceEntityTypes as getResourceEntityTypes2 } from "@semiont/api-client";
11020
+ import { getResourceId as getResourceId2, getResourceEntityTypes as getResourceEntityTypes2 } from "@semiont/api-client";
10955
11021
  var GraphContext = class {
10956
11022
  /**
10957
11023
  * Get all resources referencing this resource (backlinks)
10958
11024
  * Requires graph traversal - must use graph database
10959
11025
  */
10960
- static async getBacklinks(resourceId2, kb, publicURL) {
10961
- const resourceUri = resourceIdToURI(resourceId2, publicURL);
10962
- return await kb.graph.getResourceReferencedBy(resourceUri);
11026
+ static async getBacklinks(resourceId4, kb) {
11027
+ return await kb.graph.getResourceReferencedBy(resourceId4);
10963
11028
  }
10964
11029
  /**
10965
11030
  * Find shortest path between two resources
@@ -10972,8 +11037,8 @@ var GraphContext = class {
10972
11037
  * Get resource connections (graph edges)
10973
11038
  * Requires graph traversal - must use graph database
10974
11039
  */
10975
- static async getResourceConnections(resourceId2, kb) {
10976
- return await kb.graph.getResourceConnections(resourceId2);
11040
+ static async getResourceConnections(resourceId4, kb) {
11041
+ return await kb.graph.getResourceConnections(resourceId4);
10977
11042
  }
10978
11043
  /**
10979
11044
  * Search resources by name (cross-resource query)
@@ -10986,31 +11051,30 @@ var GraphContext = class {
10986
11051
  * Build graph representation with nodes and edges for a resource and its connections
10987
11052
  * Retrieves connections from graph and builds visualization-ready structure
10988
11053
  */
10989
- static async buildGraphRepresentation(resourceId2, maxRelated, kb, publicURL) {
10990
- const resourceUri = resourceIdToURI(resourceId2, publicURL);
10991
- const mainDoc = await kb.graph.getResource(resourceUri);
11054
+ static async buildGraphRepresentation(resourceId4, maxRelated, kb) {
11055
+ const mainDoc = await kb.graph.getResource(resourceId4);
10992
11056
  if (!mainDoc) {
10993
11057
  throw new Error("Resource not found");
10994
11058
  }
10995
- const connections = await kb.graph.getResourceConnections(resourceId2);
11059
+ const connections = await kb.graph.getResourceConnections(resourceId4);
10996
11060
  const relatedDocs = connections.map((conn) => conn.targetResource).slice(0, maxRelated - 1);
10997
11061
  const nodes = [
10998
11062
  {
10999
- id: getResourceId(mainDoc),
11063
+ id: getResourceId2(mainDoc),
11000
11064
  type: "resource",
11001
11065
  label: mainDoc.name,
11002
11066
  metadata: { entityTypes: getResourceEntityTypes2(mainDoc) }
11003
11067
  },
11004
11068
  ...relatedDocs.map((doc) => ({
11005
- id: getResourceId(doc),
11069
+ id: getResourceId2(doc),
11006
11070
  type: "resource",
11007
11071
  label: doc.name,
11008
11072
  metadata: { entityTypes: getResourceEntityTypes2(doc) }
11009
11073
  }))
11010
11074
  ].filter((node) => node.id !== void 0);
11011
11075
  const edges = connections.slice(0, maxRelated - 1).map((conn) => ({
11012
- source: resourceId2,
11013
- target: getResourceId(conn.targetResource),
11076
+ source: resourceId4,
11077
+ target: getResourceId2(conn.targetResource),
11014
11078
  type: conn.relationshipType || "link",
11015
11079
  metadata: {}
11016
11080
  })).filter((edge) => edge.target !== void 0);
@@ -11019,27 +11083,26 @@ var GraphContext = class {
11019
11083
  };
11020
11084
 
11021
11085
  // src/llm-context.ts
11022
- import { getResourceEntityTypes as getResourceEntityTypes3, getResourceId as getResourceId2 } from "@semiont/api-client";
11086
+ import { getResourceEntityTypes as getResourceEntityTypes3, getResourceId as getResourceId3 } from "@semiont/api-client";
11023
11087
  import { resourceId as makeResourceId2 } from "@semiont/core";
11024
11088
  var LLMContext = class {
11025
11089
  /**
11026
11090
  * Get comprehensive LLM context for a resource
11027
11091
  * Includes: main resource, related resources, annotations, graph, content, summary, references
11028
11092
  */
11029
- static async getResourceContext(resourceId2, options, kb, publicURL, inferenceClient) {
11030
- const mainDoc = await ResourceContext.getResourceMetadata(resourceId2, kb);
11093
+ static async getResourceContext(resourceId4, options, kb, inferenceClient) {
11094
+ const mainDoc = await ResourceContext.getResourceMetadata(resourceId4, kb);
11031
11095
  if (!mainDoc) {
11032
11096
  throw new Error("Resource not found");
11033
11097
  }
11034
11098
  const mainContent = options.includeContent ? await ResourceContext.getResourceContent(mainDoc, kb) : void 0;
11035
11099
  const graph = await GraphContext.buildGraphRepresentation(
11036
- resourceId2,
11100
+ resourceId4,
11037
11101
  options.maxResources,
11038
- kb,
11039
- publicURL
11102
+ kb
11040
11103
  );
11041
11104
  const relatedDocs = [];
11042
- const resourceIdStr = resourceId2.toString();
11105
+ const resourceIdStr = resourceId4.toString();
11043
11106
  for (const node of graph.nodes) {
11044
11107
  if (node.id !== resourceIdStr) {
11045
11108
  const relatedDoc = await ResourceContext.getResourceMetadata(makeResourceId2(node.id), kb);
@@ -11052,7 +11115,7 @@ var LLMContext = class {
11052
11115
  if (options.includeContent) {
11053
11116
  await Promise.all(
11054
11117
  relatedDocs.map(async (doc) => {
11055
- const docId = getResourceId2(doc);
11118
+ const docId = getResourceId3(doc);
11056
11119
  if (!docId) return;
11057
11120
  const content = await ResourceContext.getResourceContent(doc, kb);
11058
11121
  if (content) {
@@ -11061,7 +11124,7 @@ var LLMContext = class {
11061
11124
  })
11062
11125
  );
11063
11126
  }
11064
- const annotations = await AnnotationContext.getAllAnnotations(resourceId2, kb);
11127
+ const annotations = await AnnotationContext.getAllAnnotations(resourceId4, kb);
11065
11128
  const summary = options.includeSummary && mainContent ? await generateResourceSummary(
11066
11129
  mainDoc.name,
11067
11130
  mainContent,
@@ -11086,7 +11149,10 @@ var LLMContext = class {
11086
11149
  import { promises as fs2 } from "fs";
11087
11150
  import * as path2 from "path";
11088
11151
  async function readEntityTypesProjection(config) {
11089
- const configuredPath = config.services.filesystem.path;
11152
+ const configuredPath = config.services.filesystem?.path;
11153
+ if (!configuredPath) {
11154
+ throw new Error("services.filesystem.path is required for entity types reader");
11155
+ }
11090
11156
  const projectRoot = config._metadata?.projectRoot;
11091
11157
  let basePath;
11092
11158
  if (path2.isAbsolute(configuredPath)) {
@@ -11116,8 +11182,7 @@ async function readEntityTypesProjection(config) {
11116
11182
 
11117
11183
  // src/gatherer.ts
11118
11184
  var Gatherer = class {
11119
- constructor(publicURL, kb, eventBus, inferenceClient, logger, config) {
11120
- this.publicURL = publicURL;
11185
+ constructor(kb, eventBus, inferenceClient, logger, config) {
11121
11186
  this.kb = kb;
11122
11187
  this.eventBus = eventBus;
11123
11188
  this.inferenceClient = inferenceClient;
@@ -11130,7 +11195,7 @@ var Gatherer = class {
11130
11195
  this.logger.info("Gatherer actor initialized");
11131
11196
  const errorHandler = (err) => this.logger.error("Gatherer pipeline error", { error: err });
11132
11197
  const annotationGather$ = this.eventBus.get("gather:requested").pipe(
11133
- (0, import_operators3.groupBy)((event) => event.resourceUri),
11198
+ (0, import_operators3.groupBy)((event) => event.resourceId),
11134
11199
  (0, import_operators3.mergeMap)(
11135
11200
  (group$) => group$.pipe(
11136
11201
  (0, import_operators3.concatMap)((event) => (0, import_rxjs3.from)(this.handleAnnotationGather(event)))
@@ -11138,7 +11203,7 @@ var Gatherer = class {
11138
11203
  )
11139
11204
  );
11140
11205
  const resourceGather$ = this.eventBus.get("gather:resource-requested").pipe(
11141
- (0, import_operators3.groupBy)((event) => event.resourceUri),
11206
+ (0, import_operators3.groupBy)((event) => event.resourceId),
11142
11207
  (0, import_operators3.mergeMap)(
11143
11208
  (group$) => group$.pipe(
11144
11209
  (0, import_operators3.concatMap)((event) => (0, import_rxjs3.from)(this.handleResourceGather(event)))
@@ -11184,28 +11249,28 @@ var Gatherer = class {
11184
11249
  async handleAnnotationGather(event) {
11185
11250
  try {
11186
11251
  this.logger.debug("Gathering annotation context", {
11187
- annotationUri: event.annotationUri,
11188
- resourceUri: event.resourceUri
11252
+ annotationId: event.annotationId,
11253
+ resourceId: event.resourceId
11189
11254
  });
11190
11255
  const response = await AnnotationContext.buildLLMContext(
11191
- makeAnnotationUri(event.annotationUri),
11192
- uriToResourceId2(event.resourceUri),
11256
+ makeAnnotationId2(event.annotationId),
11257
+ event.resourceId,
11193
11258
  this.kb,
11194
11259
  event.options ?? {},
11195
11260
  this.inferenceClient,
11196
11261
  this.logger
11197
11262
  );
11198
11263
  this.eventBus.get("gather:complete").next({
11199
- annotationUri: event.annotationUri,
11264
+ annotationId: event.annotationId,
11200
11265
  response
11201
11266
  });
11202
11267
  } catch (error) {
11203
11268
  this.logger.error("Gather annotation context failed", {
11204
- annotationUri: event.annotationUri,
11269
+ annotationId: event.annotationId,
11205
11270
  error
11206
11271
  });
11207
11272
  this.eventBus.get("gather:failed").next({
11208
- annotationUri: event.annotationUri,
11273
+ annotationId: event.annotationId,
11209
11274
  error: error instanceof Error ? error : new Error(String(error))
11210
11275
  });
11211
11276
  }
@@ -11213,27 +11278,25 @@ var Gatherer = class {
11213
11278
  async handleResourceGather(event) {
11214
11279
  try {
11215
11280
  this.logger.debug("Gathering resource context", {
11216
- resourceUri: event.resourceUri
11281
+ resourceId: event.resourceId
11217
11282
  });
11218
- const publicURL = this.publicURL;
11219
11283
  const result = await LLMContext.getResourceContext(
11220
- uriToResourceId2(event.resourceUri),
11284
+ event.resourceId,
11221
11285
  event.options,
11222
11286
  this.kb,
11223
- publicURL,
11224
11287
  this.inferenceClient
11225
11288
  );
11226
11289
  this.eventBus.get("gather:resource-complete").next({
11227
- resourceUri: event.resourceUri,
11290
+ resourceId: event.resourceId,
11228
11291
  context: result
11229
11292
  });
11230
11293
  } catch (error) {
11231
11294
  this.logger.error("Gather resource context failed", {
11232
- resourceUri: event.resourceUri,
11295
+ resourceId: event.resourceId,
11233
11296
  error
11234
11297
  });
11235
11298
  this.eventBus.get("gather:resource-failed").next({
11236
- resourceUri: event.resourceUri,
11299
+ resourceId: event.resourceId,
11237
11300
  error: error instanceof Error ? error : new Error(String(error))
11238
11301
  });
11239
11302
  }
@@ -11336,8 +11399,7 @@ var Gatherer = class {
11336
11399
  let resolvedResource = null;
11337
11400
  const bodySource = getBodySource2(annotation.body);
11338
11401
  if (bodySource) {
11339
- const resolvedId = bodySource.split("/").pop();
11340
- resolvedResource = await ResourceContext.getResourceMetadata(makeResourceId3(resolvedId), this.kb);
11402
+ resolvedResource = await ResourceContext.getResourceMetadata(resourceId(bodySource), this.kb);
11341
11403
  }
11342
11404
  this.eventBus.get("browse:annotation-result").next({
11343
11405
  correlationId: event.correlationId,
@@ -11457,7 +11519,7 @@ var Gatherer = class {
11457
11519
  async handleEntityTypes(event) {
11458
11520
  try {
11459
11521
  if (!this.config) {
11460
- throw new Error("EnvironmentConfig required for entity type reads");
11522
+ throw new Error("MakeMeaningConfig required for entity type reads");
11461
11523
  }
11462
11524
  const entityTypes = await readEntityTypesProjection(this.config);
11463
11525
  this.eventBus.get("mark:entity-types-result").next({
@@ -11481,23 +11543,23 @@ var Gatherer = class {
11481
11543
  }
11482
11544
  };
11483
11545
 
11484
- // src/binder.ts
11546
+ // src/matcher.ts
11485
11547
  var import_rxjs4 = __toESM(require_cjs(), 1);
11486
11548
  var import_operators4 = __toESM(require_operators(), 1);
11487
- import { resourceIdToURI as resourceIdToURI2, resourceUri as makeResourceUri } from "@semiont/core";
11488
- import { getExactText, getTargetSource as getTargetSource2, getTargetSelector as getTargetSelector2 } from "@semiont/api-client";
11489
- var Binder = class {
11490
- constructor(kb, eventBus, logger, publicURL) {
11549
+ import { resourceId as resourceId2 } from "@semiont/core";
11550
+ import { getExactText, getResourceId as getResourceId4, getResourceEntityTypes as getResourceEntityTypes5, getTargetSource as getTargetSource2, getTargetSelector as getTargetSelector2 } from "@semiont/api-client";
11551
+ var Matcher = class {
11552
+ constructor(kb, eventBus, logger, inferenceClient) {
11491
11553
  this.kb = kb;
11492
11554
  this.eventBus = eventBus;
11493
- this.publicURL = publicURL;
11555
+ this.inferenceClient = inferenceClient;
11494
11556
  this.logger = logger;
11495
11557
  }
11496
11558
  subscriptions = [];
11497
11559
  logger;
11498
11560
  async initialize() {
11499
- this.logger.info("Binder actor initialized");
11500
- const errorHandler = (err) => this.logger.error("Binder pipeline error", { error: err });
11561
+ this.logger.info("Matcher actor initialized");
11562
+ const errorHandler = (err) => this.logger.error("Matcher pipeline error", { error: err });
11501
11563
  const search$ = this.eventBus.get("bind:search-requested").pipe(
11502
11564
  (0, import_operators4.concatMap)((event) => (0, import_rxjs4.from)(this.handleSearch(event)))
11503
11565
  );
@@ -11511,15 +11573,26 @@ var Binder = class {
11511
11573
  }
11512
11574
  async handleSearch(event) {
11513
11575
  try {
11576
+ const context = event.context;
11577
+ const selectedText = context.sourceContext?.selected ?? "";
11578
+ const userHint = context.userHint ?? "";
11579
+ const searchTerm = [selectedText, userHint].filter(Boolean).join(" ");
11514
11580
  this.logger.debug("Searching for binding candidates", {
11515
11581
  referenceId: event.referenceId,
11516
- searchTerm: event.searchTerm
11517
- });
11518
- const results = await this.kb.graph.searchResources(event.searchTerm);
11582
+ searchTerm,
11583
+ limit: event.limit,
11584
+ useSemanticScoring: event.useSemanticScoring
11585
+ });
11586
+ const scored = await this.contextDrivenSearch(
11587
+ searchTerm,
11588
+ context,
11589
+ event.useSemanticScoring
11590
+ );
11591
+ const limited = event.limit ? scored.slice(0, event.limit) : scored;
11519
11592
  this.eventBus.get("bind:search-results").next({
11520
11593
  referenceId: event.referenceId,
11521
- searchTerm: event.searchTerm,
11522
- results
11594
+ results: limited,
11595
+ correlationId: event.correlationId
11523
11596
  });
11524
11597
  } catch (error) {
11525
11598
  this.logger.error("Bind search failed", {
@@ -11528,24 +11601,240 @@ var Binder = class {
11528
11601
  });
11529
11602
  this.eventBus.get("bind:search-failed").next({
11530
11603
  referenceId: event.referenceId,
11531
- error: error instanceof Error ? error : new Error(String(error))
11604
+ error: error instanceof Error ? error : new Error(String(error)),
11605
+ correlationId: event.correlationId
11532
11606
  });
11533
11607
  }
11534
11608
  }
11609
+ /**
11610
+ * Context-driven search: multi-source retrieval + composite scoring
11611
+ *
11612
+ * Retrieval sources:
11613
+ * 1. Name match — graph.searchResources(searchTerm)
11614
+ * 2. Entity type match — graph.listResources({ entityTypes })
11615
+ * 3. Graph neighborhood — connections from GatheredContext
11616
+ *
11617
+ * Ranking signals:
11618
+ * - Entity type overlap (Jaccard similarity)
11619
+ * - Bidirectionality (already connected both ways)
11620
+ * - Citation weight (well-connected = important)
11621
+ * - Name match quality (exact > prefix > contains)
11622
+ * - Recency (newer resources scored higher)
11623
+ */
11624
+ async contextDrivenSearch(searchTerm, context, useSemanticScoring) {
11625
+ const annotationEntityTypes = context.metadata?.entityTypes ?? [];
11626
+ const connections = context.graphContext?.connections ?? [];
11627
+ const [nameMatches, entityTypeMatches] = await Promise.all([
11628
+ this.kb.graph.searchResources(searchTerm),
11629
+ annotationEntityTypes.length > 0 ? this.kb.graph.listResources({ entityTypes: annotationEntityTypes, limit: 50 }).then((r) => r.resources) : Promise.resolve([])
11630
+ ]);
11631
+ const neighborResources = await Promise.all(
11632
+ connections.map(
11633
+ (conn) => this.kb.graph.getResource(resourceId2(conn.resourceId)).catch(() => null)
11634
+ )
11635
+ );
11636
+ const candidateMap = /* @__PURE__ */ new Map();
11637
+ const addCandidate = (resource, source) => {
11638
+ const id = getResourceId4(resource);
11639
+ if (!id) return;
11640
+ const existing = candidateMap.get(id);
11641
+ if (existing) {
11642
+ existing.sources.add(source);
11643
+ } else {
11644
+ candidateMap.set(id, { resource, sources: /* @__PURE__ */ new Set([source]) });
11645
+ }
11646
+ };
11647
+ for (const r of nameMatches) addCandidate(r, "name");
11648
+ for (const r of entityTypeMatches) addCandidate(r, "entityType");
11649
+ for (const r of neighborResources) {
11650
+ if (r) addCandidate(r, "neighborhood");
11651
+ }
11652
+ this.logger.debug("Candidate retrieval", {
11653
+ nameMatches: nameMatches.length,
11654
+ entityTypeMatches: entityTypeMatches.length,
11655
+ neighborResources: neighborResources.filter(Boolean).length,
11656
+ totalCandidates: candidateMap.size
11657
+ });
11658
+ const connectionIds = new Set(connections.map((c) => c.resourceId));
11659
+ const bidirectionalIds = new Set(
11660
+ connections.filter((c) => c.bidirectional).map((c) => c.resourceId)
11661
+ );
11662
+ const entityTypeFreqs = context.graphContext?.entityTypeFrequencies ?? {};
11663
+ const searchTermLower = searchTerm.toLowerCase();
11664
+ const scored = Array.from(candidateMap.values()).map(({ resource, sources }) => {
11665
+ const id = getResourceId4(resource) ?? "";
11666
+ const candidateEntityTypes = getResourceEntityTypes5(resource);
11667
+ const reasons = [];
11668
+ let score = 0;
11669
+ if (annotationEntityTypes.length > 0 && candidateEntityTypes.length > 0) {
11670
+ const intersection = annotationEntityTypes.filter((t) => candidateEntityTypes.includes(t));
11671
+ const union = /* @__PURE__ */ new Set([...annotationEntityTypes, ...candidateEntityTypes]);
11672
+ const jaccard = intersection.length / union.size;
11673
+ let idfBoost = 0;
11674
+ for (const t of intersection) {
11675
+ const freq = entityTypeFreqs[t] ?? 1;
11676
+ idfBoost += 1 / Math.log2(freq + 1);
11677
+ }
11678
+ const entityScore = jaccard * 30 + idfBoost * 5;
11679
+ score += entityScore;
11680
+ if (intersection.length > 0) {
11681
+ reasons.push(`entity types: ${intersection.join(", ")}`);
11682
+ }
11683
+ }
11684
+ if (bidirectionalIds.has(id)) {
11685
+ score += 20;
11686
+ reasons.push("bidirectional connection");
11687
+ } else if (connectionIds.has(id)) {
11688
+ score += 10;
11689
+ reasons.push("connected");
11690
+ }
11691
+ const citedByCount = context.graphContext?.citedByCount ?? 0;
11692
+ if (sources.has("neighborhood") && citedByCount > 0) {
11693
+ score += Math.min(citedByCount * 2, 15);
11694
+ }
11695
+ const nameLower = (resource.name ?? "").toLowerCase();
11696
+ if (nameLower === searchTermLower) {
11697
+ score += 25;
11698
+ reasons.push("exact name match");
11699
+ } else if (nameLower.startsWith(searchTermLower)) {
11700
+ score += 15;
11701
+ reasons.push("prefix name match");
11702
+ } else if (nameLower.includes(searchTermLower)) {
11703
+ score += 10;
11704
+ reasons.push("contains name match");
11705
+ }
11706
+ const dateCreated = resource.dateCreated;
11707
+ if (dateCreated) {
11708
+ const ageMs = Date.now() - new Date(dateCreated).getTime();
11709
+ const ageDays = ageMs / (1e3 * 60 * 60 * 24);
11710
+ score += Math.max(0, 5 * (1 - ageDays / 30));
11711
+ }
11712
+ if (sources.size > 1) {
11713
+ score += sources.size * 3;
11714
+ reasons.push(`${sources.size} retrieval sources`);
11715
+ }
11716
+ return {
11717
+ ...resource,
11718
+ score: Math.round(score * 100) / 100,
11719
+ matchReason: reasons.join("; ") || "candidate"
11720
+ };
11721
+ });
11722
+ if (this.inferenceClient && scored.length > 0 && useSemanticScoring !== false) {
11723
+ try {
11724
+ const inferenceScores = await this.inferenceSemanticScore(
11725
+ searchTerm,
11726
+ context,
11727
+ scored.slice(0, 20)
11728
+ // Limit to top 20 candidates for cost
11729
+ );
11730
+ for (const item of scored) {
11731
+ const id = getResourceId4(item) ?? "";
11732
+ const inferenceScore = inferenceScores.get(id);
11733
+ if (inferenceScore !== void 0) {
11734
+ item.score += inferenceScore * 25;
11735
+ item.score = Math.round(item.score * 100) / 100;
11736
+ if (inferenceScore > 0.5) {
11737
+ item.matchReason = item.matchReason ? `${item.matchReason}; semantic match` : "semantic match";
11738
+ }
11739
+ }
11740
+ }
11741
+ } catch (error) {
11742
+ this.logger.warn("Inference semantic scoring failed, using structural scores only", { error });
11743
+ }
11744
+ }
11745
+ scored.sort((a, b) => b.score - a.score);
11746
+ this.logger.debug("Search results scored", {
11747
+ total: scored.length,
11748
+ topScore: scored[0]?.score,
11749
+ topReason: scored[0]?.matchReason
11750
+ });
11751
+ return scored;
11752
+ }
11753
+ /**
11754
+ * LLM-based semantic relevance scoring (GraphRAG-style)
11755
+ *
11756
+ * Batches candidates into a single prompt asking the LLM to score
11757
+ * each candidate's semantic relevance given the passage and graph context.
11758
+ *
11759
+ * @returns Map of resourceId → score (0-1)
11760
+ */
11761
+ async inferenceSemanticScore(searchTerm, context, candidates) {
11762
+ if (!this.inferenceClient) return /* @__PURE__ */ new Map();
11763
+ const passage = [context.sourceContext?.selected, context.userHint].filter(Boolean).join(" \u2014 ") || searchTerm;
11764
+ const entityTypes = context.metadata?.entityTypes ?? [];
11765
+ const graphConnections = context.graphContext?.connections;
11766
+ const connections = graphConnections ?? [];
11767
+ const candidateLines = candidates.map((c, i) => {
11768
+ const id = getResourceId4(c) ?? "";
11769
+ const cEntityTypes = getResourceEntityTypes5(c);
11770
+ return `${i + 1}. "${c.name}" (id: ${id}, types: ${cEntityTypes.join(", ") || "none"})`;
11771
+ }).join("\n");
11772
+ const contextParts = [];
11773
+ contextParts.push(`Annotation motivation: ${context.annotation.motivation}`);
11774
+ contextParts.push(`Source resource: ${context.sourceResource.name}`);
11775
+ const { motivation, body } = context.annotation;
11776
+ if (motivation === "commenting" || motivation === "assessing") {
11777
+ const bodyItem = Array.isArray(body) ? body[0] : body;
11778
+ if (bodyItem && "value" in bodyItem && bodyItem.value) {
11779
+ const label = motivation === "commenting" ? "Comment" : "Assessment";
11780
+ contextParts.push(`${label}: ${bodyItem.value}`);
11781
+ }
11782
+ }
11783
+ if (entityTypes.length > 0) contextParts.push(`Annotation entity types: ${entityTypes.join(", ")}`);
11784
+ if (connections.length > 0) {
11785
+ const connNames = connections.slice(0, 5).map((c) => c.resourceName);
11786
+ contextParts.push(`Connected resources: ${connNames.join(", ")}`);
11787
+ }
11788
+ if (context.graphContext?.inferredRelationshipSummary) {
11789
+ contextParts.push(`Relationship context: ${context.graphContext.inferredRelationshipSummary}`);
11790
+ }
11791
+ const prompt = `Given this passage and context, score each candidate resource's semantic relevance on a scale of 0.0 to 1.0.
11792
+
11793
+ Passage: "${passage}"
11794
+ Search term: "${searchTerm}"
11795
+ ${contextParts.length > 0 ? contextParts.join("\n") : ""}
11796
+
11797
+ Candidates:
11798
+ ${candidateLines}
11799
+
11800
+ For each candidate, output ONLY a line with the number and score, like:
11801
+ 1. 0.8
11802
+ 2. 0.3
11803
+ No explanations.`;
11804
+ const response = await this.inferenceClient.generateText(prompt, 200, 0.1);
11805
+ const scores = /* @__PURE__ */ new Map();
11806
+ const lines = response.trim().split("\n");
11807
+ for (const line of lines) {
11808
+ const match = line.match(/^(\d+)\.\s*([\d.]+)/);
11809
+ if (match) {
11810
+ const index = parseInt(match[1], 10) - 1;
11811
+ const score = parseFloat(match[2]);
11812
+ if (index >= 0 && index < candidates.length && !isNaN(score) && score >= 0 && score <= 1) {
11813
+ const id = getResourceId4(candidates[index]) ?? "";
11814
+ if (id) scores.set(id, score);
11815
+ }
11816
+ }
11817
+ }
11818
+ this.logger.debug("Inference semantic scores", {
11819
+ candidateCount: candidates.length,
11820
+ scoredCount: scores.size
11821
+ });
11822
+ return scores;
11823
+ }
11535
11824
  async handleReferencedBy(event) {
11536
11825
  try {
11537
- if (!this.publicURL) {
11538
- throw new Error("publicURL required for referenced-by queries");
11539
- }
11540
- const resourceUri = resourceIdToURI2(event.resourceId, this.publicURL);
11541
11826
  this.logger.debug("Looking for annotations referencing resource", {
11542
11827
  resourceId: event.resourceId,
11543
- resourceUri,
11544
11828
  motivation: event.motivation || "all"
11545
11829
  });
11546
- const references = await this.kb.graph.getResourceReferencedBy(resourceUri, event.motivation);
11547
- const docIds = [...new Set(references.map((ref) => getTargetSource2(ref.target)))];
11548
- const resources = await Promise.all(docIds.map((docId) => this.kb.graph.getResource(makeResourceUri(docId))));
11830
+ const references = await this.kb.graph.getResourceReferencedBy(event.resourceId, event.motivation);
11831
+ const sourceIds = [...new Set(references.map((ref) => getTargetSource2(ref.target)))];
11832
+ const resources = await Promise.all(sourceIds.map((id) => this.kb.graph.getResource(resourceId2(id))));
11833
+ for (let i = 0; i < sourceIds.length; i++) {
11834
+ if (resources[i] === null) {
11835
+ this.logger.warn("Referenced resource not found in graph", { resourceId: sourceIds[i] });
11836
+ }
11837
+ }
11549
11838
  const docMap = new Map(resources.filter((doc) => doc !== null).map((doc) => [doc["@id"], doc]));
11550
11839
  const referencedBy = references.map((ref) => {
11551
11840
  const targetSource = getTargetSource2(ref.target);
@@ -11582,18 +11871,17 @@ var Binder = class {
11582
11871
  sub.unsubscribe();
11583
11872
  }
11584
11873
  this.subscriptions = [];
11585
- this.logger.info("Binder actor stopped");
11874
+ this.logger.info("Matcher actor stopped");
11586
11875
  }
11587
11876
  };
11588
11877
 
11589
11878
  // src/stower.ts
11590
11879
  var import_rxjs5 = __toESM(require_cjs(), 1);
11591
11880
  var import_operators5 = __toESM(require_operators(), 1);
11592
- import { resourceId, uriToAnnotationId, CREATION_METHODS, generateUuid } from "@semiont/core";
11881
+ import { resourceId as resourceId3, annotationId as makeAnnotationId3, CREATION_METHODS, generateUuid } from "@semiont/core";
11593
11882
  var Stower = class {
11594
- constructor(kb, publicURL, eventBus, logger) {
11883
+ constructor(kb, eventBus, logger) {
11595
11884
  this.kb = kb;
11596
- this.publicURL = publicURL;
11597
11885
  this.eventBus = eventBus;
11598
11886
  this.logger = logger;
11599
11887
  }
@@ -11624,7 +11912,7 @@ var Stower = class {
11624
11912
  // ========================================================================
11625
11913
  async handleYieldCreate(event) {
11626
11914
  try {
11627
- const rId = resourceId(generateUuid());
11915
+ const rId = resourceId3(generateUuid());
11628
11916
  const storedRep = await this.kb.content.store(event.content, {
11629
11917
  mediaType: event.format,
11630
11918
  language: event.language || void 0,
@@ -11650,10 +11938,9 @@ var Stower = class {
11650
11938
  generationPrompt: event.generationPrompt
11651
11939
  }
11652
11940
  });
11653
- const normalizedBase = this.publicURL.endsWith("/") ? this.publicURL.slice(0, -1) : this.publicURL;
11654
11941
  const resource = {
11655
11942
  "@context": "https://schema.org/",
11656
- "@id": `${normalizedBase}/resources/${rId}`,
11943
+ "@id": rId,
11657
11944
  name: event.name,
11658
11945
  archived: false,
11659
11946
  entityTypes: event.entityTypes || [],
@@ -11687,7 +11974,7 @@ var Stower = class {
11687
11974
  version: 1,
11688
11975
  payload: { annotation: event.annotation }
11689
11976
  });
11690
- this.eventBus.get("mark:created").next({ annotationId: uriToAnnotationId(event.annotation.id) });
11977
+ this.eventBus.get("mark:created").next({ annotationId: makeAnnotationId3(event.annotation.id) });
11691
11978
  } catch (error) {
11692
11979
  this.logger.error("Failed to create annotation", { error });
11693
11980
  this.eventBus.get("mark:create-failed").next({
@@ -11854,7 +12141,7 @@ var Stower = class {
11854
12141
  var import_rxjs7 = __toESM(require_cjs(), 1);
11855
12142
  var import_operators7 = __toESM(require_operators(), 1);
11856
12143
  import { CREATION_METHODS as CREATION_METHODS2, cloneToken as makeCloneToken } from "@semiont/core";
11857
- import { getPrimaryRepresentation as getPrimaryRepresentation3, getResourceEntityTypes as getResourceEntityTypes5 } from "@semiont/api-client";
12144
+ import { getPrimaryRepresentation as getPrimaryRepresentation3, getResourceEntityTypes as getResourceEntityTypes6 } from "@semiont/api-client";
11858
12145
 
11859
12146
  // src/resource-operations.ts
11860
12147
  var import_rxjs6 = __toESM(require_cjs(), 1);
@@ -12062,12 +12349,12 @@ var CloneTokenManager = class {
12062
12349
  const mediaType = primaryRep?.mediaType || "text/plain";
12063
12350
  const validFormats = ["text/plain", "text/markdown"];
12064
12351
  const format = validFormats.includes(mediaType) ? mediaType : "text/plain";
12065
- const resourceId2 = await ResourceOperations.createResource(
12352
+ const resourceId4 = await ResourceOperations.createResource(
12066
12353
  {
12067
12354
  name: event.name,
12068
12355
  content: Buffer.from(event.content),
12069
12356
  format,
12070
- entityTypes: getResourceEntityTypes5(sourceDoc),
12357
+ entityTypes: getResourceEntityTypes6(sourceDoc),
12071
12358
  creationMethod: CREATION_METHODS2.CLONE
12072
12359
  },
12073
12360
  event.userId,
@@ -12087,7 +12374,7 @@ var CloneTokenManager = class {
12087
12374
  this.tokens.delete(token);
12088
12375
  this.eventBus.get("yield:clone-created").next({
12089
12376
  correlationId: event.correlationId,
12090
- response: { resourceId: resourceId2 }
12377
+ response: { resourceId: resourceId4 }
12091
12378
  });
12092
12379
  } catch (error) {
12093
12380
  this.logger.error("Clone create failed", { token: event.token, error });
@@ -12109,14 +12396,15 @@ var CloneTokenManager = class {
12109
12396
 
12110
12397
  // src/service.ts
12111
12398
  async function startMakeMeaning(config, eventBus, logger) {
12112
- const configuredPath = config.services?.filesystem?.path;
12113
- if (!configuredPath) {
12399
+ const filesystemConfig = config.services?.filesystem;
12400
+ if (!filesystemConfig?.path) {
12114
12401
  throw new Error("services.filesystem.path is required for make-meaning service");
12115
12402
  }
12116
- const baseUrl = config.services?.backend?.publicURL;
12117
- if (!baseUrl) {
12118
- throw new Error("services.backend.publicURL is required for make-meaning service");
12403
+ const graphConfig = config.services?.graph;
12404
+ if (!graphConfig) {
12405
+ throw new Error("services.graph is required for make-meaning service");
12119
12406
  }
12407
+ const configuredPath = filesystemConfig.path;
12120
12408
  const projectRoot = config._metadata?.projectRoot;
12121
12409
  let basePath;
12122
12410
  if (path3.isAbsolute(configuredPath)) {
@@ -12166,30 +12454,34 @@ async function startMakeMeaning(config, eventBus, logger) {
12166
12454
  error: (err) => jobQueueLogger.error("Job status pipeline error", { error: err })
12167
12455
  });
12168
12456
  const eventStoreLogger = logger.child({ component: "event-store" });
12169
- const eventStore = createEventStoreCore(basePath, baseUrl, void 0, eventBus, eventStoreLogger);
12457
+ const eventStore = createEventStoreCore(basePath, void 0, eventBus, eventStoreLogger);
12170
12458
  const inferenceLogger = logger.child({ component: "inference-client" });
12171
- const inferenceClient = await getInferenceClient(config, inferenceLogger);
12172
- const graphDb = await getGraphDatabase(config);
12459
+ const inferenceConfig = config.services?.inference;
12460
+ if (!inferenceConfig) {
12461
+ throw new Error("services.inference is required for make-meaning service");
12462
+ }
12463
+ const inferenceClient = await getInferenceClient(inferenceConfig, inferenceLogger);
12464
+ const graphDb = await getGraphDatabase(graphConfig);
12173
12465
  const kb = createKnowledgeBase(eventStore, basePath, projectRoot, graphDb, logger);
12174
12466
  const graphConsumerLogger = logger.child({ component: "graph-consumer" });
12175
- const graphConsumer = new GraphDBConsumer(config, eventStore, graphDb, graphConsumerLogger);
12467
+ const graphConsumer = new GraphDBConsumer(eventStore, graphDb, graphConsumerLogger);
12176
12468
  await graphConsumer.initialize();
12177
12469
  const stowerLogger = logger.child({ component: "stower" });
12178
- const stower = new Stower(kb, baseUrl, eventBus, stowerLogger);
12470
+ const stower = new Stower(kb, eventBus, stowerLogger);
12179
12471
  await stower.initialize();
12180
12472
  const bootstrapLogger = logger.child({ component: "entity-types-bootstrap" });
12181
12473
  await bootstrapEntityTypes(eventBus, config, bootstrapLogger);
12182
12474
  const gathererLogger = logger.child({ component: "gatherer" });
12183
- const gatherer = new Gatherer(baseUrl, kb, eventBus, inferenceClient, gathererLogger, config);
12475
+ const gatherer = new Gatherer(kb, eventBus, inferenceClient, gathererLogger, config);
12184
12476
  await gatherer.initialize();
12185
- const binderLogger = logger.child({ component: "binder" });
12186
- const binder = new Binder(kb, eventBus, binderLogger, baseUrl);
12187
- await binder.initialize();
12477
+ const matcherLogger = logger.child({ component: "matcher" });
12478
+ const matcher = new Matcher(kb, eventBus, matcherLogger, inferenceClient);
12479
+ await matcher.initialize();
12188
12480
  const cloneTokenLogger = logger.child({ component: "clone-token-manager" });
12189
12481
  const cloneTokenManager = new CloneTokenManager(kb, eventBus, cloneTokenLogger);
12190
12482
  await cloneTokenManager.initialize();
12191
- const contentFetcher = async (resourceId2) => {
12192
- const view = await kb.views.get(resourceId2);
12483
+ const contentFetcher = async (resourceId4) => {
12484
+ const view = await kb.views.get(resourceId4);
12193
12485
  if (!view) return null;
12194
12486
  const primaryRep = getPrimaryRepresentation4(view.resource);
12195
12487
  if (!primaryRep?.checksum || !primaryRep?.mediaType) return null;
@@ -12204,12 +12496,12 @@ async function startMakeMeaning(config, eventBus, logger) {
12204
12496
  const commentLogger = logger.child({ component: "comment-detection-worker" });
12205
12497
  const tagLogger = logger.child({ component: "tag-detection-worker" });
12206
12498
  const workers = {
12207
- detection: new ReferenceAnnotationWorker(jobQueue, config, inferenceClient, eventBus, contentFetcher, detectionLogger),
12208
- generation: new GenerationWorker(jobQueue, config, inferenceClient, eventBus, generationLogger),
12209
- highlight: new HighlightAnnotationWorker(jobQueue, config, inferenceClient, eventBus, contentFetcher, highlightLogger),
12210
- assessment: new AssessmentAnnotationWorker(jobQueue, config, inferenceClient, eventBus, contentFetcher, assessmentLogger),
12211
- comment: new CommentAnnotationWorker(jobQueue, config, inferenceClient, eventBus, contentFetcher, commentLogger),
12212
- tag: new TagAnnotationWorker(jobQueue, config, inferenceClient, eventBus, contentFetcher, tagLogger)
12499
+ detection: new ReferenceAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, detectionLogger),
12500
+ generation: new GenerationWorker(jobQueue, inferenceClient, eventBus, generationLogger),
12501
+ highlight: new HighlightAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, highlightLogger),
12502
+ assessment: new AssessmentAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, assessmentLogger),
12503
+ comment: new CommentAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, commentLogger),
12504
+ tag: new TagAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, tagLogger)
12213
12505
  };
12214
12506
  workers.detection.start().catch((error) => {
12215
12507
  detectionLogger.error("Worker stopped unexpectedly", { error });
@@ -12239,7 +12531,7 @@ async function startMakeMeaning(config, eventBus, logger) {
12239
12531
  graphConsumer,
12240
12532
  stower,
12241
12533
  gatherer,
12242
- binder,
12534
+ matcher,
12243
12535
  cloneTokenManager,
12244
12536
  stop: async () => {
12245
12537
  logger.info("Stopping Make-Meaning service");
@@ -12252,7 +12544,7 @@ async function startMakeMeaning(config, eventBus, logger) {
12252
12544
  workers.tag.stop()
12253
12545
  ]);
12254
12546
  await gatherer.stop();
12255
- await binder.stop();
12547
+ await matcher.stop();
12256
12548
  jobStatusSubscription.unsubscribe();
12257
12549
  await cloneTokenManager.stop();
12258
12550
  await stower.stop();
@@ -12263,6 +12555,858 @@ async function startMakeMeaning(config, eventBus, logger) {
12263
12555
  };
12264
12556
  }
12265
12557
 
12558
+ // src/exchange/backup-exporter.ts
12559
+ import { getExtensionForMimeType } from "@semiont/content";
12560
+
12561
+ // src/exchange/tar.ts
12562
+ import { createGzip, createGunzip } from "zlib";
12563
+ import { Readable as Readable2, pipeline } from "stream";
12564
+ import { promisify } from "util";
12565
+ var pipelineAsync = promisify(pipeline);
12566
+ var BLOCK_SIZE = 512;
12567
+ function createTarHeader(name, size) {
12568
+ const header = Buffer.alloc(BLOCK_SIZE, 0);
12569
+ header.write(name.slice(0, 100), 0, 100, "utf8");
12570
+ header.write("0000644\0", 100, 8, "utf8");
12571
+ header.write("0000000\0", 108, 8, "utf8");
12572
+ header.write("0000000\0", 116, 8, "utf8");
12573
+ header.write(size.toString(8).padStart(11, "0") + "\0", 124, 12, "utf8");
12574
+ const mtime = Math.floor(Date.now() / 1e3);
12575
+ header.write(mtime.toString(8).padStart(11, "0") + "\0", 136, 12, "utf8");
12576
+ header.write(" ", 148, 8, "utf8");
12577
+ header.write("0", 156, 1, "utf8");
12578
+ header.write("ustar\0", 257, 6, "utf8");
12579
+ header.write("00", 263, 2, "utf8");
12580
+ let checksum = 0;
12581
+ for (let i = 0; i < BLOCK_SIZE; i++) {
12582
+ checksum += header[i];
12583
+ }
12584
+ header.write(checksum.toString(8).padStart(6, "0") + "\0 ", 148, 8, "utf8");
12585
+ return header;
12586
+ }
12587
+ function paddingBytes(size) {
12588
+ const remainder = size % BLOCK_SIZE;
12589
+ return remainder === 0 ? 0 : BLOCK_SIZE - remainder;
12590
+ }
12591
+ async function writeTarGz(entries, output) {
12592
+ const gzip = createGzip();
12593
+ const tarStream = new Readable2({ read() {
12594
+ } });
12595
+ const pipePromise = pipelineAsync(tarStream, gzip, output);
12596
+ for await (const entry of entries) {
12597
+ const header = createTarHeader(entry.name, entry.data.length);
12598
+ tarStream.push(header);
12599
+ tarStream.push(entry.data);
12600
+ const pad = paddingBytes(entry.data.length);
12601
+ if (pad > 0) {
12602
+ tarStream.push(Buffer.alloc(pad, 0));
12603
+ }
12604
+ }
12605
+ tarStream.push(Buffer.alloc(BLOCK_SIZE * 2, 0));
12606
+ tarStream.push(null);
12607
+ await pipePromise;
12608
+ }
12609
+ async function decompressStream(input) {
12610
+ const gunzip = createGunzip();
12611
+ const chunks = [];
12612
+ return new Promise((resolve4, reject) => {
12613
+ gunzip.on("data", (chunk) => chunks.push(chunk));
12614
+ gunzip.on("end", () => resolve4(Buffer.concat(chunks)));
12615
+ gunzip.on("error", reject);
12616
+ input.on("error", reject);
12617
+ input.pipe(gunzip);
12618
+ });
12619
+ }
12620
+ function* parseTarEntries(decompressed) {
12621
+ let offset = 0;
12622
+ while (offset + BLOCK_SIZE <= decompressed.length) {
12623
+ const header = decompressed.subarray(offset, offset + BLOCK_SIZE);
12624
+ if (header.every((b) => b === 0)) break;
12625
+ const nameEnd = header.indexOf(0, 0);
12626
+ const name = header.subarray(0, Math.min(nameEnd, 100)).toString("utf8");
12627
+ const sizeStr = header.subarray(124, 135).toString("utf8").trim();
12628
+ const size = parseInt(sizeStr, 8);
12629
+ offset += BLOCK_SIZE;
12630
+ const data = decompressed.subarray(offset, offset + size);
12631
+ offset += size;
12632
+ offset += paddingBytes(size);
12633
+ yield { name, size, data };
12634
+ }
12635
+ }
12636
+ async function* readTarGz(input) {
12637
+ const decompressed = await decompressStream(input);
12638
+ yield* parseTarEntries(decompressed);
12639
+ }
12640
+
12641
+ // src/exchange/manifest.ts
12642
+ var BACKUP_FORMAT = "semiont-backup";
12643
+ var FORMAT_VERSION = 1;
12644
+ var LINKED_DATA_FORMAT = "semiont-linked-data";
12645
+ function isLinkedDataManifest(obj) {
12646
+ return typeof obj === "object" && obj !== null && obj["semiont:format"] === LINKED_DATA_FORMAT;
12647
+ }
12648
+ function isBackupManifest(obj) {
12649
+ return typeof obj === "object" && obj !== null && obj.format === BACKUP_FORMAT;
12650
+ }
12651
+ function validateManifestVersion(version) {
12652
+ if (version > FORMAT_VERSION) {
12653
+ throw new Error(
12654
+ `Unsupported format version ${version}. This tool supports version ${FORMAT_VERSION}.`
12655
+ );
12656
+ }
12657
+ }
12658
+
12659
+ // src/exchange/backup-exporter.ts
12660
+ var SYSTEM_STREAM = "__system__";
12661
+ async function exportBackup(options, output) {
12662
+ const { eventStore, content, sourceUrl, logger } = options;
12663
+ const resourceIds = await eventStore.log.storage.getAllResourceIds();
12664
+ logger?.info("Backup export: enumerating streams", { resourceCount: resourceIds.length });
12665
+ const allStreamIds = [SYSTEM_STREAM, ...resourceIds];
12666
+ const streamData = /* @__PURE__ */ new Map();
12667
+ let totalEvents = 0;
12668
+ for (const id of allStreamIds) {
12669
+ const events = await eventStore.log.getEvents(id);
12670
+ if (events.length > 0) {
12671
+ streamData.set(id, events);
12672
+ totalEvents += events.length;
12673
+ }
12674
+ }
12675
+ const contentRefs = collectContentRefs(streamData);
12676
+ logger?.info("Backup export: collected content refs", {
12677
+ streams: streamData.size,
12678
+ events: totalEvents,
12679
+ blobs: contentRefs.size
12680
+ });
12681
+ const contentBlobs = /* @__PURE__ */ new Map();
12682
+ let totalContentBytes = 0;
12683
+ for (const [checksum, mediaType] of contentRefs) {
12684
+ const data = await content.retrieve(checksum, mediaType);
12685
+ const ext = getExtensionForMimeType(mediaType);
12686
+ contentBlobs.set(checksum, { data, ext });
12687
+ totalContentBytes += data.length;
12688
+ }
12689
+ const streamSummaries = [];
12690
+ for (const [streamId, events] of streamData) {
12691
+ streamSummaries.push({
12692
+ stream: streamId,
12693
+ eventCount: events.length,
12694
+ firstChecksum: events[0].metadata.checksum || "",
12695
+ lastChecksum: events[events.length - 1].metadata.checksum || ""
12696
+ });
12697
+ }
12698
+ const manifestHeader = {
12699
+ format: BACKUP_FORMAT,
12700
+ version: FORMAT_VERSION,
12701
+ exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
12702
+ sourceUrl,
12703
+ stats: {
12704
+ streams: streamData.size,
12705
+ events: totalEvents,
12706
+ blobs: contentBlobs.size,
12707
+ contentBytes: totalContentBytes
12708
+ }
12709
+ };
12710
+ async function* generateEntries() {
12711
+ const manifestLines = [
12712
+ JSON.stringify(manifestHeader),
12713
+ ...streamSummaries.map((s) => JSON.stringify(s))
12714
+ ].join("\n") + "\n";
12715
+ yield { name: ".semiont/manifest.jsonl", data: Buffer.from(manifestLines, "utf8") };
12716
+ for (const [streamId, events] of streamData) {
12717
+ const fileName = streamId === SYSTEM_STREAM ? ".semiont/events/__system__.jsonl" : `.semiont/events/${streamId}.jsonl`;
12718
+ const jsonl = events.map((e) => JSON.stringify(e)).join("\n") + "\n";
12719
+ yield { name: fileName, data: Buffer.from(jsonl, "utf8") };
12720
+ }
12721
+ for (const [checksum, { data, ext }] of contentBlobs) {
12722
+ yield { name: `${checksum}${ext}`, data };
12723
+ }
12724
+ }
12725
+ await writeTarGz(generateEntries(), output);
12726
+ logger?.info("Backup export complete", {
12727
+ streams: streamData.size,
12728
+ events: totalEvents,
12729
+ blobs: contentBlobs.size,
12730
+ contentBytes: totalContentBytes
12731
+ });
12732
+ return manifestHeader;
12733
+ }
12734
+ function collectContentRefs(streamData) {
12735
+ const refs = /* @__PURE__ */ new Map();
12736
+ for (const [, events] of streamData) {
12737
+ for (const stored of events) {
12738
+ if (stored.event.type === "resource.created") {
12739
+ const payload = stored.event.payload;
12740
+ if (payload.contentChecksum && payload.format) {
12741
+ refs.set(payload.contentChecksum, payload.format);
12742
+ }
12743
+ }
12744
+ }
12745
+ }
12746
+ return refs;
12747
+ }
12748
+
12749
+ // src/exchange/replay.ts
12750
+ var import_rxjs9 = __toESM(require_cjs(), 1);
12751
+ var import_operators9 = __toESM(require_operators(), 1);
12752
+ var REPLAY_TIMEOUT_MS = 3e4;
12753
+ async function replayEventStream(jsonl, eventBus, resolveBlob, logger) {
12754
+ const lines = jsonl.trim().split("\n").filter((l) => l.length > 0);
12755
+ const storedEvents = lines.map((line) => JSON.parse(line));
12756
+ const stats = {
12757
+ eventsReplayed: 0,
12758
+ resourcesCreated: 0,
12759
+ annotationsCreated: 0,
12760
+ entityTypesAdded: 0
12761
+ };
12762
+ let hashChainValid = true;
12763
+ for (let i = 1; i < storedEvents.length; i++) {
12764
+ const prev = storedEvents[i - 1];
12765
+ const curr = storedEvents[i];
12766
+ if (curr.metadata.prevEventHash && prev.metadata.checksum) {
12767
+ if (curr.metadata.prevEventHash !== prev.metadata.checksum) {
12768
+ logger?.warn("Hash chain break", {
12769
+ index: i,
12770
+ expected: prev.metadata.checksum,
12771
+ got: curr.metadata.prevEventHash
12772
+ });
12773
+ hashChainValid = false;
12774
+ }
12775
+ }
12776
+ }
12777
+ for (const stored of storedEvents) {
12778
+ await replayEvent(stored.event, eventBus, resolveBlob, stats, logger);
12779
+ stats.eventsReplayed++;
12780
+ }
12781
+ return { stats, hashChainValid };
12782
+ }
12783
+ async function replayEvent(event, eventBus, resolveBlob, stats, logger) {
12784
+ switch (event.type) {
12785
+ case "entitytype.added":
12786
+ await replayEntityTypeAdded(event, eventBus, logger);
12787
+ stats.entityTypesAdded++;
12788
+ break;
12789
+ case "resource.created":
12790
+ await replayResourceCreated(event, eventBus, resolveBlob, logger);
12791
+ stats.resourcesCreated++;
12792
+ break;
12793
+ case "annotation.added":
12794
+ await replayAnnotationAdded(event, eventBus, logger);
12795
+ stats.annotationsCreated++;
12796
+ break;
12797
+ case "annotation.body.updated":
12798
+ await replayAnnotationBodyUpdated(event, eventBus, logger);
12799
+ break;
12800
+ case "annotation.removed":
12801
+ await replayAnnotationRemoved(event, eventBus, logger);
12802
+ break;
12803
+ case "resource.archived":
12804
+ await replayResourceArchived(event, eventBus, logger);
12805
+ break;
12806
+ case "resource.unarchived":
12807
+ await replayResourceUnarchived(event, eventBus, logger);
12808
+ break;
12809
+ case "entitytag.added":
12810
+ case "entitytag.removed":
12811
+ await replayEntityTagChange(event, eventBus, logger);
12812
+ break;
12813
+ // Job events are transient — skip during replay
12814
+ case "job.started":
12815
+ case "job.progress":
12816
+ case "job.completed":
12817
+ case "job.failed":
12818
+ logger?.debug("Skipping job event during replay", { type: event.type });
12819
+ break;
12820
+ // Representation events — content is already stored via resource.created replay
12821
+ case "representation.added":
12822
+ case "representation.removed":
12823
+ logger?.debug("Skipping representation event during replay", { type: event.type });
12824
+ break;
12825
+ default:
12826
+ logger?.warn("Unknown event type during replay", { type: event.type });
12827
+ }
12828
+ }
12829
+ async function replayEntityTypeAdded(event, eventBus, logger) {
12830
+ const result$ = (0, import_rxjs9.race)(
12831
+ eventBus.get("mark:entity-type-added").pipe((0, import_operators9.map)(() => "ok")),
12832
+ eventBus.get("mark:entity-type-add-failed").pipe((0, import_operators9.map)((e) => {
12833
+ throw e.error;
12834
+ })),
12835
+ (0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
12836
+ throw new Error("Timeout waiting for mark:entity-type-added");
12837
+ }))
12838
+ );
12839
+ eventBus.get("mark:add-entity-type").next({
12840
+ tag: event.payload.entityType,
12841
+ userId: event.userId
12842
+ });
12843
+ await (0, import_rxjs9.firstValueFrom)(result$);
12844
+ logger?.debug("Replayed entitytype.added", { entityType: event.payload.entityType });
12845
+ }
12846
+ async function replayResourceCreated(event, eventBus, resolveBlob, logger) {
12847
+ const { payload } = event;
12848
+ const blob = resolveBlob(payload.contentChecksum);
12849
+ if (!blob) {
12850
+ throw new Error(`Missing content blob for checksum ${payload.contentChecksum}`);
12851
+ }
12852
+ const result$ = (0, import_rxjs9.race)(
12853
+ eventBus.get("yield:created").pipe((0, import_operators9.map)((r) => r)),
12854
+ eventBus.get("yield:create-failed").pipe((0, import_operators9.map)((e) => {
12855
+ throw e.error;
12856
+ })),
12857
+ (0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
12858
+ throw new Error("Timeout waiting for yield:created");
12859
+ }))
12860
+ );
12861
+ eventBus.get("yield:create").next({
12862
+ name: payload.name,
12863
+ content: blob,
12864
+ format: payload.format,
12865
+ userId: event.userId,
12866
+ language: payload.language,
12867
+ entityTypes: payload.entityTypes,
12868
+ creationMethod: payload.creationMethod,
12869
+ isDraft: payload.isDraft,
12870
+ generatedFrom: payload.generatedFrom,
12871
+ generationPrompt: payload.generationPrompt
12872
+ });
12873
+ await (0, import_rxjs9.firstValueFrom)(result$);
12874
+ logger?.debug("Replayed resource.created", { name: payload.name });
12875
+ }
12876
+ async function replayAnnotationAdded(event, eventBus, logger) {
12877
+ const result$ = (0, import_rxjs9.race)(
12878
+ eventBus.get("mark:created").pipe((0, import_operators9.map)(() => "ok")),
12879
+ eventBus.get("mark:create-failed").pipe((0, import_operators9.map)((e) => {
12880
+ throw e.error;
12881
+ })),
12882
+ (0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
12883
+ throw new Error("Timeout waiting for mark:created");
12884
+ }))
12885
+ );
12886
+ eventBus.get("mark:create").next({
12887
+ annotation: event.payload.annotation,
12888
+ userId: event.userId,
12889
+ resourceId: event.resourceId
12890
+ });
12891
+ await (0, import_rxjs9.firstValueFrom)(result$);
12892
+ logger?.debug("Replayed annotation.added", { annotationId: event.payload.annotation.id });
12893
+ }
12894
+ async function replayAnnotationBodyUpdated(event, eventBus, logger) {
12895
+ const result$ = (0, import_rxjs9.race)(
12896
+ eventBus.get("mark:body-updated").pipe((0, import_operators9.map)(() => "ok")),
12897
+ eventBus.get("mark:body-update-failed").pipe((0, import_operators9.map)((e) => {
12898
+ throw e.error;
12899
+ })),
12900
+ (0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
12901
+ throw new Error("Timeout waiting for mark:body-updated");
12902
+ }))
12903
+ );
12904
+ eventBus.get("mark:update-body").next({
12905
+ annotationId: event.payload.annotationId,
12906
+ userId: event.userId,
12907
+ resourceId: event.resourceId,
12908
+ operations: event.payload.operations
12909
+ });
12910
+ await (0, import_rxjs9.firstValueFrom)(result$);
12911
+ logger?.debug("Replayed annotation.body.updated", { annotationId: event.payload.annotationId });
12912
+ }
12913
+ async function replayAnnotationRemoved(event, eventBus, logger) {
12914
+ const result$ = (0, import_rxjs9.race)(
12915
+ eventBus.get("mark:deleted").pipe((0, import_operators9.map)(() => "ok")),
12916
+ eventBus.get("mark:delete-failed").pipe((0, import_operators9.map)((e) => {
12917
+ throw e.error;
12918
+ })),
12919
+ (0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
12920
+ throw new Error("Timeout waiting for mark:deleted");
12921
+ }))
12922
+ );
12923
+ eventBus.get("mark:delete").next({
12924
+ annotationId: event.payload.annotationId,
12925
+ userId: event.userId,
12926
+ resourceId: event.resourceId
12927
+ });
12928
+ await (0, import_rxjs9.firstValueFrom)(result$);
12929
+ logger?.debug("Replayed annotation.removed", { annotationId: event.payload.annotationId });
12930
+ }
12931
+ async function replayResourceArchived(event, eventBus, logger) {
12932
+ eventBus.get("mark:archive").next({
12933
+ userId: event.userId,
12934
+ resourceId: event.resourceId
12935
+ });
12936
+ logger?.debug("Replayed resource.archived", { resourceId: event.resourceId });
12937
+ }
12938
+ async function replayResourceUnarchived(event, eventBus, logger) {
12939
+ eventBus.get("mark:unarchive").next({
12940
+ userId: event.userId,
12941
+ resourceId: event.resourceId
12942
+ });
12943
+ logger?.debug("Replayed resource.unarchived", { resourceId: event.resourceId });
12944
+ }
12945
+ async function replayEntityTagChange(event, eventBus, logger) {
12946
+ const resourceId4 = event.resourceId;
12947
+ const entityType = event.payload.entityType;
12948
+ if (event.type === "entitytag.added") {
12949
+ eventBus.get("mark:update-entity-types").next({
12950
+ resourceId: resourceId4,
12951
+ userId: event.userId,
12952
+ currentEntityTypes: [],
12953
+ updatedEntityTypes: [entityType]
12954
+ });
12955
+ } else {
12956
+ eventBus.get("mark:update-entity-types").next({
12957
+ resourceId: resourceId4,
12958
+ userId: event.userId,
12959
+ currentEntityTypes: [entityType],
12960
+ updatedEntityTypes: []
12961
+ });
12962
+ }
12963
+ logger?.debug("Replayed entity tag change", { type: event.type, entityType });
12964
+ }
12965
+
12966
+ // src/exchange/backup-importer.ts
12967
+ function buildBlobResolver(entries) {
12968
+ const checksumIndex = /* @__PURE__ */ new Map();
12969
+ for (const name of entries.keys()) {
12970
+ if (!name.startsWith(".semiont/")) {
12971
+ const dotIndex = name.lastIndexOf(".");
12972
+ const checksum = dotIndex >= 0 ? name.slice(0, dotIndex) : name;
12973
+ checksumIndex.set(checksum, name);
12974
+ }
12975
+ }
12976
+ return (checksum) => {
12977
+ const entryName = checksumIndex.get(checksum);
12978
+ return entryName ? entries.get(entryName) : void 0;
12979
+ };
12980
+ }
12981
+ async function importBackup(archive, options) {
12982
+ const { eventBus, logger } = options;
12983
+ const entries = /* @__PURE__ */ new Map();
12984
+ for await (const entry of readTarGz(archive)) {
12985
+ entries.set(entry.name, entry.data);
12986
+ }
12987
+ const manifestData = entries.get(".semiont/manifest.jsonl");
12988
+ if (!manifestData) {
12989
+ throw new Error("Invalid backup: missing .semiont/manifest.jsonl");
12990
+ }
12991
+ const manifestLines = manifestData.toString("utf8").trim().split("\n");
12992
+ const header = JSON.parse(manifestLines[0]);
12993
+ if (!isBackupManifest(header)) {
12994
+ throw new Error(`Invalid backup: expected format "${BACKUP_FORMAT}", got "${header.format}"`);
12995
+ }
12996
+ validateManifestVersion(header.version);
12997
+ const streamSummaries = manifestLines.slice(1).map((line) => JSON.parse(line));
12998
+ logger?.info("Backup import: parsed manifest", {
12999
+ streams: header.stats.streams,
13000
+ events: header.stats.events,
13001
+ blobs: header.stats.blobs
13002
+ });
13003
+ const resolveBlob = buildBlobResolver(entries);
13004
+ const systemData = entries.get(".semiont/events/__system__.jsonl");
13005
+ let stats = { eventsReplayed: 0, resourcesCreated: 0, annotationsCreated: 0, entityTypesAdded: 0 };
13006
+ let hashChainValid = true;
13007
+ if (systemData) {
13008
+ const result = await replayEventStream(
13009
+ systemData.toString("utf8"),
13010
+ eventBus,
13011
+ resolveBlob,
13012
+ logger
13013
+ );
13014
+ stats = mergeStats(stats, result.stats);
13015
+ if (!result.hashChainValid) hashChainValid = false;
13016
+ }
13017
+ for (const summary of streamSummaries) {
13018
+ if (summary.stream === "__system__") continue;
13019
+ const eventData = entries.get(`.semiont/events/${summary.stream}.jsonl`);
13020
+ if (!eventData) {
13021
+ logger?.warn("Backup import: missing event stream", { stream: summary.stream });
13022
+ continue;
13023
+ }
13024
+ const result = await replayEventStream(
13025
+ eventData.toString("utf8"),
13026
+ eventBus,
13027
+ resolveBlob,
13028
+ logger
13029
+ );
13030
+ stats = mergeStats(stats, result.stats);
13031
+ if (!result.hashChainValid) hashChainValid = false;
13032
+ }
13033
+ logger?.info("Backup import complete", { ...stats, hashChainValid });
13034
+ return { manifest: header, stats, hashChainValid };
13035
+ }
13036
+ function mergeStats(a, b) {
13037
+ return {
13038
+ eventsReplayed: a.eventsReplayed + b.eventsReplayed,
13039
+ resourcesCreated: a.resourcesCreated + b.resourcesCreated,
13040
+ annotationsCreated: a.annotationsCreated + b.annotationsCreated,
13041
+ entityTypesAdded: a.entityTypesAdded + b.entityTypesAdded
13042
+ };
13043
+ }
13044
+
13045
+ // src/exchange/linked-data-exporter.ts
13046
+ import { getExtensionForMimeType as getExtensionForMimeType2 } from "@semiont/content";
13047
+ var SEMIONT_CONTEXT = [
13048
+ "https://schema.org/",
13049
+ "http://www.w3.org/ns/anno.jsonld",
13050
+ {
13051
+ "semiont": "https://semiont.org/vocab/",
13052
+ "entityTypes": "semiont:entityTypes",
13053
+ "creationMethod": "semiont:creationMethod",
13054
+ "archived": "semiont:archived",
13055
+ "representations": { "@id": "semiont:representations", "@container": "@set" },
13056
+ "annotations": { "@id": "semiont:annotations", "@container": "@set" }
13057
+ }
13058
+ ];
13059
+ var MANIFEST_CONTEXT = {
13060
+ "semiont": "https://semiont.org/vocab/",
13061
+ "schema": "https://schema.org/",
13062
+ "dct": "http://purl.org/dc/terms/",
13063
+ "prov": "http://www.w3.org/ns/prov#",
13064
+ "void": "http://rdfs.org/ns/void#"
13065
+ };
13066
+ function hydrateAnnotation(annotation, baseUrl) {
13067
+ const hydrated = { ...annotation };
13068
+ if (hydrated.id && !hydrated.id.startsWith("http")) {
13069
+ hydrated.id = `${baseUrl}/annotations/${hydrated.id}`;
13070
+ }
13071
+ if (typeof hydrated.target === "string") {
13072
+ if (!hydrated.target.startsWith("http")) {
13073
+ hydrated.target = `${baseUrl}/resources/${hydrated.target}`;
13074
+ }
13075
+ } else if (hydrated.target && typeof hydrated.target === "object") {
13076
+ const target = { ...hydrated.target };
13077
+ if (target.source && !target.source.startsWith("http")) {
13078
+ target.source = `${baseUrl}/resources/${target.source}`;
13079
+ }
13080
+ hydrated.target = target;
13081
+ }
13082
+ hydrated.body = hydrateBody(hydrated.body, baseUrl);
13083
+ return hydrated;
13084
+ }
13085
+ function hydrateBody(body, baseUrl) {
13086
+ if (Array.isArray(body)) {
13087
+ return body.map((b) => hydrateBodyItem(b, baseUrl));
13088
+ }
13089
+ return hydrateBodyItem(body, baseUrl);
13090
+ }
13091
+ function hydrateBodyItem(item, baseUrl) {
13092
+ if (item && typeof item === "object" && "source" in item) {
13093
+ const source = item.source;
13094
+ if (typeof source === "string" && !source.startsWith("http")) {
13095
+ return { ...item, source: `${baseUrl}/resources/${source}` };
13096
+ }
13097
+ }
13098
+ return item;
13099
+ }
13100
+ async function exportLinkedData(options, output) {
13101
+ const { views, content, sourceUrl, entityTypes, includeArchived, logger } = options;
13102
+ const allViews = await views.getAll();
13103
+ const resourceViews = includeArchived ? allViews : allViews.filter((v) => !v.resource.archived);
13104
+ logger?.info("Linked data export: enumerating resources", { count: resourceViews.length });
13105
+ const contentRefs = /* @__PURE__ */ new Map();
13106
+ for (const view of resourceViews) {
13107
+ collectContentRefsFromResource(view.resource, contentRefs);
13108
+ }
13109
+ const contentBlobs = /* @__PURE__ */ new Map();
13110
+ for (const [checksum, mediaType] of contentRefs) {
13111
+ try {
13112
+ const data = await content.retrieve(checksum, mediaType);
13113
+ const ext = getExtensionForMimeType2(mediaType);
13114
+ contentBlobs.set(checksum, { data, ext });
13115
+ } catch (err) {
13116
+ logger?.warn("Failed to retrieve content blob", { checksum, mediaType, error: String(err) });
13117
+ }
13118
+ }
13119
+ const manifest = {
13120
+ "@context": MANIFEST_CONTEXT,
13121
+ "@type": "void:Dataset",
13122
+ "semiont:format": LINKED_DATA_FORMAT,
13123
+ "semiont:version": FORMAT_VERSION,
13124
+ "dct:created": (/* @__PURE__ */ new Date()).toISOString(),
13125
+ "prov:wasGeneratedBy": {
13126
+ "@type": "prov:Activity",
13127
+ "prov:used": sourceUrl
13128
+ },
13129
+ "semiont:entityTypes": entityTypes,
13130
+ "void:entities": resourceViews.length
13131
+ };
13132
+ async function* generateEntries() {
13133
+ yield {
13134
+ name: ".semiont/manifest.jsonld",
13135
+ data: Buffer.from(JSON.stringify(manifest, null, 2), "utf8")
13136
+ };
13137
+ for (const view of resourceViews) {
13138
+ const resourceId4 = view.resource["@id"];
13139
+ const jsonld = buildResourceJsonLd(view.resource, view.annotations.annotations, sourceUrl);
13140
+ yield {
13141
+ name: `.semiont/resources/${resourceId4}.jsonld`,
13142
+ data: Buffer.from(JSON.stringify(jsonld, null, 2), "utf8")
13143
+ };
13144
+ }
13145
+ for (const [checksum, { data, ext }] of contentBlobs) {
13146
+ yield { name: `${checksum}${ext}`, data };
13147
+ }
13148
+ }
13149
+ await writeTarGz(generateEntries(), output);
13150
+ logger?.info("Linked data export complete", {
13151
+ resources: resourceViews.length,
13152
+ blobs: contentBlobs.size
13153
+ });
13154
+ return manifest;
13155
+ }
13156
+ function buildResourceJsonLd(resource, annotations, sourceUrl) {
13157
+ const resourceId4 = resource["@id"];
13158
+ const resourceUri = resourceId4.startsWith("http") ? resourceId4 : `${sourceUrl}/resources/${resourceId4}`;
13159
+ const doc = {
13160
+ "@context": SEMIONT_CONTEXT,
13161
+ "@id": resourceUri,
13162
+ "@type": resource["@type"] ?? "DigitalDocument",
13163
+ "name": resource.name
13164
+ };
13165
+ if (resource.dateCreated) doc["dateCreated"] = resource.dateCreated;
13166
+ if (resource.dateModified) doc["dateModified"] = resource.dateModified;
13167
+ if (resource.description) doc["description"] = resource.description;
13168
+ const reps = normalizeRepresentations(resource.representations);
13169
+ if (reps.length > 0) {
13170
+ const primary = reps[0];
13171
+ if (primary.language) doc["inLanguage"] = primary.language;
13172
+ if (primary.mediaType) doc["encodingFormat"] = primary.mediaType;
13173
+ }
13174
+ if (resource.creationMethod) doc["creationMethod"] = resource.creationMethod;
13175
+ if (resource.entityTypes && resource.entityTypes.length > 0) doc["entityTypes"] = resource.entityTypes;
13176
+ if (resource.archived) doc["archived"] = resource.archived;
13177
+ if (resource.wasDerivedFrom) doc["wasDerivedFrom"] = resource.wasDerivedFrom;
13178
+ if (resource.wasAttributedTo) doc["wasAttributedTo"] = resource.wasAttributedTo;
13179
+ if (resource.sameAs && resource.sameAs.length > 0) doc["sameAs"] = resource.sameAs;
13180
+ if (resource.isPartOf && resource.isPartOf.length > 0) doc["isPartOf"] = resource.isPartOf;
13181
+ if (resource.hasPart && resource.hasPart.length > 0) doc["hasPart"] = resource.hasPart;
13182
+ if (reps.length > 0) {
13183
+ doc["representations"] = reps.map((rep) => {
13184
+ const mediaObj = {
13185
+ "@type": "schema:MediaObject",
13186
+ "encodingFormat": rep.mediaType
13187
+ };
13188
+ if (rep.byteSize !== void 0) mediaObj["contentSize"] = rep.byteSize;
13189
+ if (rep.checksum) {
13190
+ const rawChecksum = rep.checksum.startsWith("sha256:") ? rep.checksum.slice(7) : rep.checksum;
13191
+ mediaObj["sha256"] = rawChecksum;
13192
+ const ext = getExtensionForMimeType2(rep.mediaType);
13193
+ mediaObj["name"] = `${rawChecksum}${ext}`;
13194
+ }
13195
+ if (rep.language) mediaObj["inLanguage"] = rep.language;
13196
+ return mediaObj;
13197
+ });
13198
+ }
13199
+ if (annotations.length > 0) {
13200
+ doc["annotations"] = annotations.map((ann) => hydrateAnnotation(ann, sourceUrl));
13201
+ }
13202
+ return doc;
13203
+ }
13204
+ function normalizeRepresentations(reps) {
13205
+ if (!reps) return [];
13206
+ if (Array.isArray(reps)) return reps;
13207
+ return [reps];
13208
+ }
13209
+ function collectContentRefsFromResource(resource, refs) {
13210
+ const reps = normalizeRepresentations(resource.representations);
13211
+ for (const rep of reps) {
13212
+ if (rep.checksum && rep.mediaType) {
13213
+ const rawChecksum = rep.checksum.startsWith("sha256:") ? rep.checksum.slice(7) : rep.checksum;
13214
+ refs.set(rawChecksum, rep.mediaType);
13215
+ }
13216
+ }
13217
+ }
13218
+
13219
+ // src/exchange/linked-data-importer.ts
13220
+ var import_rxjs10 = __toESM(require_cjs(), 1);
13221
+ var import_operators10 = __toESM(require_operators(), 1);
13222
+ var IMPORT_TIMEOUT_MS = 3e4;
13223
+ function stripUriToId(uri) {
13224
+ if (!uri.includes("/")) return uri;
13225
+ const lastSlash = uri.lastIndexOf("/");
13226
+ return uri.slice(lastSlash + 1);
13227
+ }
13228
+ function dehydrateAnnotation(annotation) {
13229
+ const dehydrated = { ...annotation };
13230
+ if (dehydrated.id) {
13231
+ dehydrated.id = stripUriToId(dehydrated.id);
13232
+ }
13233
+ if (typeof dehydrated.target === "string") {
13234
+ dehydrated.target = stripUriToId(dehydrated.target);
13235
+ } else if (dehydrated.target && typeof dehydrated.target === "object") {
13236
+ const target = { ...dehydrated.target };
13237
+ if (target.source) {
13238
+ target.source = stripUriToId(target.source);
13239
+ }
13240
+ dehydrated.target = target;
13241
+ }
13242
+ dehydrated.body = dehydrateBody(dehydrated.body);
13243
+ return dehydrated;
13244
+ }
13245
+ function dehydrateBody(body) {
13246
+ if (Array.isArray(body)) {
13247
+ return body.map((b) => dehydrateBodyItem(b));
13248
+ }
13249
+ return dehydrateBodyItem(body);
13250
+ }
13251
+ function dehydrateBodyItem(item) {
13252
+ if (item && typeof item === "object" && "source" in item) {
13253
+ const source = item.source;
13254
+ if (typeof source === "string" && source.includes("/")) {
13255
+ return { ...item, source: stripUriToId(source) };
13256
+ }
13257
+ }
13258
+ return item;
13259
+ }
13260
+ function buildBlobResolver2(entries) {
13261
+ const checksumIndex = /* @__PURE__ */ new Map();
13262
+ for (const name of entries.keys()) {
13263
+ if (!name.startsWith(".semiont/")) {
13264
+ const dotIndex = name.lastIndexOf(".");
13265
+ const checksum = dotIndex >= 0 ? name.slice(0, dotIndex) : name;
13266
+ checksumIndex.set(checksum, name);
13267
+ }
13268
+ }
13269
+ return (checksum) => {
13270
+ const entryName = checksumIndex.get(checksum);
13271
+ return entryName ? entries.get(entryName) : void 0;
13272
+ };
13273
+ }
13274
+ async function importLinkedData(archive, options) {
13275
+ const { eventBus, userId: userId2, logger } = options;
13276
+ const entries = /* @__PURE__ */ new Map();
13277
+ for await (const entry of readTarGz(archive)) {
13278
+ entries.set(entry.name, entry.data);
13279
+ }
13280
+ const manifestData = entries.get(".semiont/manifest.jsonld");
13281
+ if (!manifestData) {
13282
+ throw new Error("Invalid linked data archive: missing .semiont/manifest.jsonld");
13283
+ }
13284
+ const manifest = JSON.parse(manifestData.toString("utf8"));
13285
+ if (!isLinkedDataManifest(manifest)) {
13286
+ throw new Error(
13287
+ `Invalid linked data archive: expected format "${LINKED_DATA_FORMAT}", got "${manifest["semiont:format"]}"`
13288
+ );
13289
+ }
13290
+ validateManifestVersion(manifest["semiont:version"]);
13291
+ logger?.info("Linked data import: parsed manifest", {
13292
+ entityTypes: manifest["semiont:entityTypes"].length,
13293
+ resources: manifest["void:entities"]
13294
+ });
13295
+ const resolveBlob = buildBlobResolver2(entries);
13296
+ let entityTypesAdded = 0;
13297
+ for (const entityType of manifest["semiont:entityTypes"]) {
13298
+ await addEntityType(entityType, userId2, eventBus, logger);
13299
+ entityTypesAdded++;
13300
+ }
13301
+ const resourceEntries = [...entries.keys()].filter((name) => name.startsWith(".semiont/resources/") && name.endsWith(".jsonld")).sort();
13302
+ let resourcesCreated = 0;
13303
+ let annotationsCreated = 0;
13304
+ for (const entryName of resourceEntries) {
13305
+ const resourceDoc = JSON.parse(entries.get(entryName).toString("utf8"));
13306
+ const result = await importResource(resourceDoc, userId2, eventBus, resolveBlob, logger);
13307
+ resourcesCreated++;
13308
+ annotationsCreated += result.annotationsCreated;
13309
+ }
13310
+ logger?.info("Linked data import complete", {
13311
+ resourcesCreated,
13312
+ annotationsCreated,
13313
+ entityTypesAdded
13314
+ });
13315
+ return {
13316
+ manifest,
13317
+ resourcesCreated,
13318
+ annotationsCreated,
13319
+ entityTypesAdded
13320
+ };
13321
+ }
13322
+ async function addEntityType(entityType, userId2, eventBus, logger) {
13323
+ const result$ = (0, import_rxjs10.race)(
13324
+ eventBus.get("mark:entity-type-added").pipe((0, import_operators10.map)(() => "ok")),
13325
+ eventBus.get("mark:entity-type-add-failed").pipe((0, import_operators10.map)((e) => {
13326
+ throw e.error;
13327
+ })),
13328
+ (0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
13329
+ throw new Error("Timeout waiting for mark:entity-type-added");
13330
+ }))
13331
+ );
13332
+ eventBus.get("mark:add-entity-type").next({
13333
+ tag: entityType,
13334
+ userId: userId2
13335
+ });
13336
+ await (0, import_rxjs10.firstValueFrom)(result$);
13337
+ logger?.debug("Added entity type", { entityType });
13338
+ }
13339
+ async function importResource(doc, userId2, eventBus, resolveBlob, logger) {
13340
+ const name = doc["name"];
13341
+ const representations = doc["representations"];
13342
+ const annotations = doc["annotations"];
13343
+ const entityTypes = doc["entityTypes"];
13344
+ const creationMethod = doc["creationMethod"];
13345
+ let format = "text/markdown";
13346
+ let language;
13347
+ let contentChecksum;
13348
+ if (representations && representations.length > 0) {
13349
+ const primary = representations[0];
13350
+ if (primary["encodingFormat"]) format = primary["encodingFormat"];
13351
+ if (primary["inLanguage"]) language = primary["inLanguage"];
13352
+ if (primary["sha256"]) contentChecksum = primary["sha256"];
13353
+ }
13354
+ if (!contentChecksum) {
13355
+ throw new Error(`Resource "${name}" has no content checksum in representations`);
13356
+ }
13357
+ const blob = resolveBlob(contentChecksum);
13358
+ if (!blob) {
13359
+ throw new Error(`Missing content blob for checksum ${contentChecksum} (resource "${name}")`);
13360
+ }
13361
+ const createResult$ = (0, import_rxjs10.race)(
13362
+ eventBus.get("yield:created").pipe((0, import_operators10.map)((r) => r)),
13363
+ eventBus.get("yield:create-failed").pipe((0, import_operators10.map)((e) => {
13364
+ throw e.error;
13365
+ })),
13366
+ (0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
13367
+ throw new Error("Timeout waiting for yield:created");
13368
+ }))
13369
+ );
13370
+ eventBus.get("yield:create").next({
13371
+ name,
13372
+ content: blob,
13373
+ format,
13374
+ userId: userId2,
13375
+ language,
13376
+ entityTypes: entityTypes ?? [],
13377
+ creationMethod
13378
+ });
13379
+ const created = await (0, import_rxjs10.firstValueFrom)(createResult$);
13380
+ const resourceId4 = created.resourceId;
13381
+ logger?.debug("Created resource from JSON-LD", { name, resourceId: resourceId4 });
13382
+ let annotationsCreated = 0;
13383
+ if (annotations && annotations.length > 0) {
13384
+ for (const annotation of annotations) {
13385
+ await createAnnotation(annotation, resourceId4, userId2, eventBus, logger);
13386
+ annotationsCreated++;
13387
+ }
13388
+ }
13389
+ return { annotationsCreated };
13390
+ }
13391
+ async function createAnnotation(annotation, resourceId4, userId2, eventBus, logger) {
13392
+ const result$ = (0, import_rxjs10.race)(
13393
+ eventBus.get("mark:created").pipe((0, import_operators10.map)(() => "ok")),
13394
+ eventBus.get("mark:create-failed").pipe((0, import_operators10.map)((e) => {
13395
+ throw e.error;
13396
+ })),
13397
+ (0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
13398
+ throw new Error("Timeout waiting for mark:created");
13399
+ }))
13400
+ );
13401
+ eventBus.get("mark:create").next({
13402
+ annotation: dehydrateAnnotation(annotation),
13403
+ userId: userId2,
13404
+ resourceId: resourceId4
13405
+ });
13406
+ await (0, import_rxjs10.firstValueFrom)(result$);
13407
+ logger?.debug("Created annotation", { annotationId: annotation.id });
13408
+ }
13409
+
12266
13410
  // src/annotation-assembly.ts
12267
13411
  import {
12268
13412
  assembleAnnotation,
@@ -12270,19 +13414,18 @@ import {
12270
13414
  } from "@semiont/core";
12271
13415
 
12272
13416
  // src/annotation-operations.ts
12273
- import { getTargetSource as getTargetSource3 } from "@semiont/api-client";
12274
- import { annotationId, uriToResourceId as uriToResourceId3, uriToAnnotationId as uriToAnnotationId2, assembleAnnotation as assembleAnnotation2, applyBodyOperations as applyBodyOperations2 } from "@semiont/core";
13417
+ import { annotationId, resourceId as makeResourceId3, assembleAnnotation as assembleAnnotation2, applyBodyOperations as applyBodyOperations2 } from "@semiont/core";
12275
13418
  var AnnotationOperations = class {
12276
13419
  /**
12277
13420
  * Create a new annotation via EventBus → Stower
12278
13421
  */
12279
- static async createAnnotation(request, userId2, creator, eventBus, publicURL) {
12280
- const { annotation } = assembleAnnotation2(request, creator, publicURL);
12281
- const resourceId2 = uriToResourceId3(request.target.source);
13422
+ static async createAnnotation(request, userId2, creator, eventBus) {
13423
+ const { annotation } = assembleAnnotation2(request, creator);
13424
+ const resId = makeResourceId3(request.target.source);
12282
13425
  eventBus.get("mark:create").next({
12283
13426
  annotation,
12284
13427
  userId: userId2,
12285
- resourceId: resourceId2
13428
+ resourceId: resId
12286
13429
  });
12287
13430
  return { annotation };
12288
13431
  }
@@ -12290,19 +13433,19 @@ var AnnotationOperations = class {
12290
13433
  * Update annotation body via EventBus → Stower
12291
13434
  */
12292
13435
  static async updateAnnotationBody(id, request, userId2, eventBus, kb) {
13436
+ const resId = makeResourceId3(request.resourceId);
12293
13437
  const annotation = await AnnotationContext.getAnnotation(
12294
13438
  annotationId(id),
12295
- uriToResourceId3(request.resourceId),
13439
+ resId,
12296
13440
  kb
12297
13441
  );
12298
13442
  if (!annotation) {
12299
13443
  throw new Error("Annotation not found");
12300
13444
  }
12301
- const resourceId2 = uriToResourceId3(getTargetSource3(annotation.target));
12302
13445
  eventBus.get("mark:update-body").next({
12303
13446
  annotationId: annotationId(id),
12304
13447
  userId: userId2,
12305
- resourceId: resourceId2,
13448
+ resourceId: resId,
12306
13449
  operations: request.operations
12307
13450
  });
12308
13451
  const updatedBody = applyBodyOperations2(annotation.body, request.operations);
@@ -12316,8 +13459,8 @@ var AnnotationOperations = class {
12316
13459
  /**
12317
13460
  * Delete an annotation via EventBus → Stower
12318
13461
  */
12319
- static async deleteAnnotation(id, resourceIdUri, userId2, eventBus, kb, logger) {
12320
- const resId = uriToResourceId3(resourceIdUri);
13462
+ static async deleteAnnotation(id, resourceIdStr, userId2, eventBus, kb, logger) {
13463
+ const resId = makeResourceId3(resourceIdStr);
12321
13464
  const projection = await AnnotationContext.getResourceAnnotations(resId, kb);
12322
13465
  const annotation = projection.annotations.find((a) => a.id === id);
12323
13466
  if (!annotation) {
@@ -12325,7 +13468,7 @@ var AnnotationOperations = class {
12325
13468
  }
12326
13469
  logger?.debug("Removing annotation via EventBus", { annotationId: id });
12327
13470
  eventBus.get("mark:delete").next({
12328
- annotationId: uriToAnnotationId2(id),
13471
+ annotationId: annotationId(id),
12329
13472
  userId: userId2,
12330
13473
  resourceId: resId
12331
13474
  });
@@ -12339,12 +13482,14 @@ var VERSION = "0.1.0";
12339
13482
  export {
12340
13483
  AnnotationContext,
12341
13484
  AnnotationOperations,
12342
- Binder,
13485
+ BACKUP_FORMAT,
12343
13486
  CloneTokenManager,
13487
+ FORMAT_VERSION,
12344
13488
  Gatherer,
12345
13489
  GraphContext,
12346
13490
  GraphDBConsumer,
12347
13491
  LLMContext,
13492
+ Matcher,
12348
13493
  PACKAGE_NAME,
12349
13494
  ResourceContext,
12350
13495
  ResourceOperations,
@@ -12354,10 +13499,16 @@ export {
12354
13499
  assembleAnnotation,
12355
13500
  bootstrapEntityTypes,
12356
13501
  createKnowledgeBase,
13502
+ exportBackup,
13503
+ exportLinkedData,
12357
13504
  generateReferenceSuggestions,
12358
13505
  generateResourceSummary,
13506
+ importBackup,
13507
+ importLinkedData,
13508
+ isBackupManifest,
12359
13509
  readEntityTypesProjection,
12360
13510
  resetBootstrap,
12361
- startMakeMeaning
13511
+ startMakeMeaning,
13512
+ validateManifestVersion
12362
13513
  };
12363
13514
  //# sourceMappingURL=index.js.map