@semiont/make-meaning 0.2.46 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.ts +283 -25
- package/dist/index.js +1354 -207
- package/dist/index.js.map +1 -1
- package/package.json +7 -7
package/dist/index.js
CHANGED
|
@@ -3634,7 +3634,7 @@ var require_firstValueFrom = __commonJS({
|
|
|
3634
3634
|
exports.firstValueFrom = void 0;
|
|
3635
3635
|
var EmptyError_1 = require_EmptyError();
|
|
3636
3636
|
var Subscriber_1 = require_Subscriber();
|
|
3637
|
-
function
|
|
3637
|
+
function firstValueFrom5(source, config) {
|
|
3638
3638
|
var hasConfig = typeof config === "object";
|
|
3639
3639
|
return new Promise(function(resolve4, reject) {
|
|
3640
3640
|
var subscriber = new Subscriber_1.SafeSubscriber({
|
|
@@ -3654,7 +3654,7 @@ var require_firstValueFrom = __commonJS({
|
|
|
3654
3654
|
source.subscribe(subscriber);
|
|
3655
3655
|
});
|
|
3656
3656
|
}
|
|
3657
|
-
exports.firstValueFrom =
|
|
3657
|
+
exports.firstValueFrom = firstValueFrom5;
|
|
3658
3658
|
}
|
|
3659
3659
|
});
|
|
3660
3660
|
|
|
@@ -3799,7 +3799,7 @@ var require_map = __commonJS({
|
|
|
3799
3799
|
exports.map = void 0;
|
|
3800
3800
|
var lift_1 = require_lift();
|
|
3801
3801
|
var OperatorSubscriber_1 = require_OperatorSubscriber();
|
|
3802
|
-
function
|
|
3802
|
+
function map5(project, thisArg) {
|
|
3803
3803
|
return lift_1.operate(function(source, subscriber) {
|
|
3804
3804
|
var index = 0;
|
|
3805
3805
|
source.subscribe(OperatorSubscriber_1.createOperatorSubscriber(subscriber, function(value) {
|
|
@@ -3807,7 +3807,7 @@ var require_map = __commonJS({
|
|
|
3807
3807
|
}));
|
|
3808
3808
|
});
|
|
3809
3809
|
}
|
|
3810
|
-
exports.map =
|
|
3810
|
+
exports.map = map5;
|
|
3811
3811
|
}
|
|
3812
3812
|
});
|
|
3813
3813
|
|
|
@@ -4657,7 +4657,7 @@ var require_timer = __commonJS({
|
|
|
4657
4657
|
var async_1 = require_async();
|
|
4658
4658
|
var isScheduler_1 = require_isScheduler();
|
|
4659
4659
|
var isDate_1 = require_isDate();
|
|
4660
|
-
function
|
|
4660
|
+
function timer5(dueTime, intervalOrScheduler, scheduler) {
|
|
4661
4661
|
if (dueTime === void 0) {
|
|
4662
4662
|
dueTime = 0;
|
|
4663
4663
|
}
|
|
@@ -4690,7 +4690,7 @@ var require_timer = __commonJS({
|
|
|
4690
4690
|
}, due);
|
|
4691
4691
|
});
|
|
4692
4692
|
}
|
|
4693
|
-
exports.timer =
|
|
4693
|
+
exports.timer = timer5;
|
|
4694
4694
|
}
|
|
4695
4695
|
});
|
|
4696
4696
|
|
|
@@ -4890,7 +4890,7 @@ var require_race = __commonJS({
|
|
|
4890
4890
|
var innerFrom_1 = require_innerFrom();
|
|
4891
4891
|
var argsOrArgArray_1 = require_argsOrArgArray();
|
|
4892
4892
|
var OperatorSubscriber_1 = require_OperatorSubscriber();
|
|
4893
|
-
function
|
|
4893
|
+
function race5() {
|
|
4894
4894
|
var sources = [];
|
|
4895
4895
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
4896
4896
|
sources[_i] = arguments[_i];
|
|
@@ -4898,7 +4898,7 @@ var require_race = __commonJS({
|
|
|
4898
4898
|
sources = argsOrArgArray_1.argsOrArgArray(sources);
|
|
4899
4899
|
return sources.length === 1 ? innerFrom_1.innerFrom(sources[0]) : new Observable_1.Observable(raceInit(sources));
|
|
4900
4900
|
}
|
|
4901
|
-
exports.race =
|
|
4901
|
+
exports.race = race5;
|
|
4902
4902
|
function raceInit(sources) {
|
|
4903
4903
|
return function(subscriber) {
|
|
4904
4904
|
var subscriptions = [];
|
|
@@ -9468,14 +9468,14 @@ var require_race2 = __commonJS({
|
|
|
9468
9468
|
exports.race = void 0;
|
|
9469
9469
|
var argsOrArgArray_1 = require_argsOrArgArray();
|
|
9470
9470
|
var raceWith_1 = require_raceWith();
|
|
9471
|
-
function
|
|
9471
|
+
function race5() {
|
|
9472
9472
|
var args = [];
|
|
9473
9473
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
9474
9474
|
args[_i] = arguments[_i];
|
|
9475
9475
|
}
|
|
9476
9476
|
return raceWith_1.raceWith.apply(void 0, __spreadArray([], __read(argsOrArgArray_1.argsOrArgArray(args))));
|
|
9477
9477
|
}
|
|
9478
|
-
exports.race =
|
|
9478
|
+
exports.race = race5;
|
|
9479
9479
|
}
|
|
9480
9480
|
});
|
|
9481
9481
|
|
|
@@ -9966,11 +9966,9 @@ var import_rxjs = __toESM(require_cjs(), 1);
|
|
|
9966
9966
|
var import_operators = __toESM(require_operators(), 1);
|
|
9967
9967
|
import { EventQuery } from "@semiont/event-sourcing";
|
|
9968
9968
|
import { didToAgent, burstBuffer } from "@semiont/core";
|
|
9969
|
-
import { resourceId as makeResourceId, findBodyItem } from "@semiont/core";
|
|
9970
|
-
import { toResourceUri, toAnnotationUri } from "@semiont/event-sourcing";
|
|
9969
|
+
import { resourceId as makeResourceId, annotationId as makeAnnotationId, findBodyItem } from "@semiont/core";
|
|
9971
9970
|
var GraphDBConsumer = class _GraphDBConsumer {
|
|
9972
|
-
constructor(
|
|
9973
|
-
this.config = config;
|
|
9971
|
+
constructor(eventStore, graphDb, logger) {
|
|
9974
9972
|
this.eventStore = eventStore;
|
|
9975
9973
|
this.graphDb = graphDb;
|
|
9976
9974
|
this.logger = logger;
|
|
@@ -10163,13 +10161,9 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10163
10161
|
if (!event.resourceId) {
|
|
10164
10162
|
throw new Error("resource.created requires resourceId");
|
|
10165
10163
|
}
|
|
10166
|
-
const resourceUri = toResourceUri(
|
|
10167
|
-
{ baseUrl: this.config.services.backend.publicURL },
|
|
10168
|
-
event.resourceId
|
|
10169
|
-
);
|
|
10170
10164
|
return {
|
|
10171
10165
|
"@context": "https://schema.org/",
|
|
10172
|
-
"@id":
|
|
10166
|
+
"@id": event.resourceId,
|
|
10173
10167
|
name: event.payload.name,
|
|
10174
10168
|
entityTypes: event.payload.entityTypes || [],
|
|
10175
10169
|
representations: [{
|
|
@@ -10203,13 +10197,13 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10203
10197
|
}
|
|
10204
10198
|
case "resource.archived":
|
|
10205
10199
|
if (!event.resourceId) throw new Error("resource.archived requires resourceId");
|
|
10206
|
-
await graphDb.updateResource(
|
|
10200
|
+
await graphDb.updateResource(makeResourceId(event.resourceId), {
|
|
10207
10201
|
archived: true
|
|
10208
10202
|
});
|
|
10209
10203
|
break;
|
|
10210
10204
|
case "resource.unarchived":
|
|
10211
10205
|
if (!event.resourceId) throw new Error("resource.unarchived requires resourceId");
|
|
10212
|
-
await graphDb.updateResource(
|
|
10206
|
+
await graphDb.updateResource(makeResourceId(event.resourceId), {
|
|
10213
10207
|
archived: false
|
|
10214
10208
|
});
|
|
10215
10209
|
break;
|
|
@@ -10226,7 +10220,7 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10226
10220
|
});
|
|
10227
10221
|
break;
|
|
10228
10222
|
case "annotation.removed":
|
|
10229
|
-
await graphDb.deleteAnnotation(
|
|
10223
|
+
await graphDb.deleteAnnotation(makeAnnotationId(event.payload.annotationId));
|
|
10230
10224
|
break;
|
|
10231
10225
|
case "annotation.body.updated":
|
|
10232
10226
|
this.logger.debug("Processing annotation.body.updated event", {
|
|
@@ -10234,8 +10228,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10234
10228
|
payload: event.payload
|
|
10235
10229
|
});
|
|
10236
10230
|
try {
|
|
10237
|
-
const
|
|
10238
|
-
const currentAnnotation = await graphDb.getAnnotation(
|
|
10231
|
+
const annId = makeAnnotationId(event.payload.annotationId);
|
|
10232
|
+
const currentAnnotation = await graphDb.getAnnotation(annId);
|
|
10239
10233
|
if (currentAnnotation) {
|
|
10240
10234
|
let bodyArray = Array.isArray(currentAnnotation.body) ? [...currentAnnotation.body] : currentAnnotation.body ? [currentAnnotation.body] : [];
|
|
10241
10235
|
for (const op of event.payload.operations) {
|
|
@@ -10256,7 +10250,7 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10256
10250
|
}
|
|
10257
10251
|
}
|
|
10258
10252
|
}
|
|
10259
|
-
await graphDb.updateAnnotation(
|
|
10253
|
+
await graphDb.updateAnnotation(annId, {
|
|
10260
10254
|
body: bodyArray
|
|
10261
10255
|
});
|
|
10262
10256
|
this.logger.info("updateAnnotation completed successfully");
|
|
@@ -10274,9 +10268,10 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10274
10268
|
case "entitytag.added":
|
|
10275
10269
|
if (!event.resourceId) throw new Error("entitytag.added requires resourceId");
|
|
10276
10270
|
{
|
|
10277
|
-
const
|
|
10271
|
+
const rid = makeResourceId(event.resourceId);
|
|
10272
|
+
const doc = await graphDb.getResource(rid);
|
|
10278
10273
|
if (doc) {
|
|
10279
|
-
await graphDb.updateResource(
|
|
10274
|
+
await graphDb.updateResource(rid, {
|
|
10280
10275
|
entityTypes: [...doc.entityTypes || [], event.payload.entityType]
|
|
10281
10276
|
});
|
|
10282
10277
|
}
|
|
@@ -10285,9 +10280,10 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10285
10280
|
case "entitytag.removed":
|
|
10286
10281
|
if (!event.resourceId) throw new Error("entitytag.removed requires resourceId");
|
|
10287
10282
|
{
|
|
10288
|
-
const
|
|
10283
|
+
const rid = makeResourceId(event.resourceId);
|
|
10284
|
+
const doc = await graphDb.getResource(rid);
|
|
10289
10285
|
if (doc) {
|
|
10290
|
-
await graphDb.updateResource(
|
|
10286
|
+
await graphDb.updateResource(rid, {
|
|
10291
10287
|
entityTypes: (doc.entityTypes || []).filter((t) => t !== event.payload.entityType)
|
|
10292
10288
|
});
|
|
10293
10289
|
}
|
|
@@ -10304,20 +10300,20 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10304
10300
|
* Rebuild entire resource from events.
|
|
10305
10301
|
* Bypasses the live pipeline — reads directly from event store.
|
|
10306
10302
|
*/
|
|
10307
|
-
async rebuildResource(
|
|
10303
|
+
async rebuildResource(resourceId4) {
|
|
10308
10304
|
const graphDb = this.ensureInitialized();
|
|
10309
|
-
this.logger.info("Rebuilding resource from events", { resourceId:
|
|
10305
|
+
this.logger.info("Rebuilding resource from events", { resourceId: resourceId4 });
|
|
10310
10306
|
try {
|
|
10311
|
-
await graphDb.deleteResource(
|
|
10307
|
+
await graphDb.deleteResource(resourceId4);
|
|
10312
10308
|
} catch (error) {
|
|
10313
|
-
this.logger.debug("No existing resource to delete", { resourceId:
|
|
10309
|
+
this.logger.debug("No existing resource to delete", { resourceId: resourceId4 });
|
|
10314
10310
|
}
|
|
10315
10311
|
const query = new EventQuery(this.eventStore.log.storage);
|
|
10316
|
-
const events = await query.getResourceEvents(
|
|
10312
|
+
const events = await query.getResourceEvents(resourceId4);
|
|
10317
10313
|
for (const storedEvent of events) {
|
|
10318
10314
|
await this.applyEventToGraph(storedEvent);
|
|
10319
10315
|
}
|
|
10320
|
-
this.logger.info("Resource rebuild complete", { resourceId:
|
|
10316
|
+
this.logger.info("Resource rebuild complete", { resourceId: resourceId4, eventCount: events.length });
|
|
10321
10317
|
}
|
|
10322
10318
|
/**
|
|
10323
10319
|
* Rebuild entire GraphDB from all events.
|
|
@@ -10333,8 +10329,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10333
10329
|
const allResourceIds = await this.eventStore.log.getAllResourceIds();
|
|
10334
10330
|
this.logger.info("Found resources to rebuild", { count: allResourceIds.length });
|
|
10335
10331
|
this.logger.info("PASS 1: Creating all nodes (resources + annotations)");
|
|
10336
|
-
for (const
|
|
10337
|
-
const events = await query.getResourceEvents(makeResourceId(
|
|
10332
|
+
for (const resourceId4 of allResourceIds) {
|
|
10333
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId4));
|
|
10338
10334
|
for (const storedEvent of events) {
|
|
10339
10335
|
if (storedEvent.event.type === "annotation.body.updated") {
|
|
10340
10336
|
continue;
|
|
@@ -10344,8 +10340,8 @@ var GraphDBConsumer = class _GraphDBConsumer {
|
|
|
10344
10340
|
}
|
|
10345
10341
|
this.logger.info("Pass 1 complete - all nodes created");
|
|
10346
10342
|
this.logger.info("PASS 2: Creating all REFERENCES edges");
|
|
10347
|
-
for (const
|
|
10348
|
-
const events = await query.getResourceEvents(makeResourceId(
|
|
10343
|
+
for (const resourceId4 of allResourceIds) {
|
|
10344
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId4));
|
|
10349
10345
|
for (const storedEvent of events) {
|
|
10350
10346
|
if (storedEvent.event.type === "annotation.body.updated") {
|
|
10351
10347
|
await this.applyEventToGraph(storedEvent);
|
|
@@ -10387,7 +10383,10 @@ async function bootstrapEntityTypes(eventBus, config, logger) {
|
|
|
10387
10383
|
logger?.debug("Entity types bootstrap already completed, skipping");
|
|
10388
10384
|
return;
|
|
10389
10385
|
}
|
|
10390
|
-
const configuredPath = config.services.filesystem
|
|
10386
|
+
const configuredPath = config.services.filesystem?.path;
|
|
10387
|
+
if (!configuredPath) {
|
|
10388
|
+
throw new Error("services.filesystem.path is required for entity types bootstrap");
|
|
10389
|
+
}
|
|
10391
10390
|
const projectRoot = config._metadata?.projectRoot;
|
|
10392
10391
|
let basePath;
|
|
10393
10392
|
if (path.isAbsolute(configuredPath)) {
|
|
@@ -10451,13 +10450,12 @@ function createKnowledgeBase(eventStore, basePath, projectRoot, graphDb, logger)
|
|
|
10451
10450
|
// src/gatherer.ts
|
|
10452
10451
|
var import_rxjs3 = __toESM(require_cjs(), 1);
|
|
10453
10452
|
var import_operators3 = __toESM(require_operators(), 1);
|
|
10454
|
-
import {
|
|
10453
|
+
import { annotationId as makeAnnotationId2, resourceId } from "@semiont/core";
|
|
10455
10454
|
import { EventQuery as EventQuery2 } from "@semiont/event-sourcing";
|
|
10456
10455
|
import { getResourceEntityTypes as getResourceEntityTypes4, getBodySource as getBodySource2 } from "@semiont/api-client";
|
|
10457
10456
|
import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
|
|
10458
10457
|
|
|
10459
10458
|
// src/generation/resource-generation.ts
|
|
10460
|
-
import { getLocaleEnglishName } from "@semiont/api-client";
|
|
10461
10459
|
async function generateResourceSummary(resourceName, content, entityTypes, client) {
|
|
10462
10460
|
const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
|
|
10463
10461
|
const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
|
|
@@ -10483,6 +10481,7 @@ Format as a simple list, one suggestion per line.`;
|
|
|
10483
10481
|
// src/annotation-context.ts
|
|
10484
10482
|
import {
|
|
10485
10483
|
getBodySource,
|
|
10484
|
+
getResourceId,
|
|
10486
10485
|
getTargetSource,
|
|
10487
10486
|
getTargetSelector,
|
|
10488
10487
|
getResourceEntityTypes,
|
|
@@ -10490,7 +10489,7 @@ import {
|
|
|
10490
10489
|
getPrimaryRepresentation as getPrimaryRepresentation2,
|
|
10491
10490
|
decodeRepresentation as decodeRepresentation2
|
|
10492
10491
|
} from "@semiont/api-client";
|
|
10493
|
-
import { resourceId as createResourceId
|
|
10492
|
+
import { resourceId as createResourceId } from "@semiont/core";
|
|
10494
10493
|
import { getEntityTypes } from "@semiont/ontology";
|
|
10495
10494
|
|
|
10496
10495
|
// src/resource-context.ts
|
|
@@ -10499,8 +10498,8 @@ var ResourceContext = class {
|
|
|
10499
10498
|
/**
|
|
10500
10499
|
* Get resource metadata from view storage
|
|
10501
10500
|
*/
|
|
10502
|
-
static async getResourceMetadata(
|
|
10503
|
-
const view = await kb.views.get(
|
|
10501
|
+
static async getResourceMetadata(resourceId4, kb) {
|
|
10502
|
+
const view = await kb.views.get(resourceId4);
|
|
10504
10503
|
if (!view) {
|
|
10505
10504
|
return null;
|
|
10506
10505
|
}
|
|
@@ -10572,7 +10571,7 @@ var AnnotationContext = class {
|
|
|
10572
10571
|
/**
|
|
10573
10572
|
* Build LLM context for an annotation
|
|
10574
10573
|
*
|
|
10575
|
-
* @param
|
|
10574
|
+
* @param annotationId - Bare annotation ID
|
|
10576
10575
|
* @param resourceId - Source resource ID
|
|
10577
10576
|
* @param kb - Knowledge base stores
|
|
10578
10577
|
* @param options - Context building options
|
|
@@ -10580,7 +10579,7 @@ var AnnotationContext = class {
|
|
|
10580
10579
|
* @returns Rich context for LLM processing
|
|
10581
10580
|
* @throws Error if annotation or resource not found
|
|
10582
10581
|
*/
|
|
10583
|
-
static async buildLLMContext(
|
|
10582
|
+
static async buildLLMContext(annotationId2, resourceId4, kb, options = {}, inferenceClient, logger) {
|
|
10584
10583
|
const {
|
|
10585
10584
|
includeSourceContext = true,
|
|
10586
10585
|
includeTargetContext = true,
|
|
@@ -10589,47 +10588,41 @@ var AnnotationContext = class {
|
|
|
10589
10588
|
if (contextWindow < 100 || contextWindow > 5e3) {
|
|
10590
10589
|
throw new Error("contextWindow must be between 100 and 5000");
|
|
10591
10590
|
}
|
|
10592
|
-
logger?.debug("Building LLM context", {
|
|
10593
|
-
logger?.debug("Getting view for resource", { resourceId:
|
|
10591
|
+
logger?.debug("Building LLM context", { annotationId: annotationId2, resourceId: resourceId4 });
|
|
10592
|
+
logger?.debug("Getting view for resource", { resourceId: resourceId4 });
|
|
10594
10593
|
let sourceView;
|
|
10595
10594
|
try {
|
|
10596
|
-
sourceView = await kb.views.get(
|
|
10595
|
+
sourceView = await kb.views.get(resourceId4);
|
|
10597
10596
|
logger?.debug("Retrieved view", { hasView: !!sourceView });
|
|
10598
10597
|
if (!sourceView) {
|
|
10599
10598
|
throw new Error("Source resource not found");
|
|
10600
10599
|
}
|
|
10601
10600
|
} catch (error) {
|
|
10602
|
-
logger?.error("Error getting view", { resourceId:
|
|
10601
|
+
logger?.error("Error getting view", { resourceId: resourceId4, error });
|
|
10603
10602
|
throw error;
|
|
10604
10603
|
}
|
|
10605
10604
|
logger?.debug("Looking for annotation in resource", {
|
|
10606
|
-
|
|
10607
|
-
resourceId:
|
|
10605
|
+
annotationId: annotationId2,
|
|
10606
|
+
resourceId: resourceId4,
|
|
10608
10607
|
totalAnnotations: sourceView.annotations.annotations.length,
|
|
10609
10608
|
firstFiveIds: sourceView.annotations.annotations.slice(0, 5).map((a) => a.id)
|
|
10610
10609
|
});
|
|
10611
|
-
const annotation = sourceView.annotations.annotations.find((a) => a.id ===
|
|
10610
|
+
const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationId2);
|
|
10612
10611
|
logger?.debug("Annotation search result", { found: !!annotation });
|
|
10613
10612
|
if (!annotation) {
|
|
10614
10613
|
throw new Error("Annotation not found in view");
|
|
10615
10614
|
}
|
|
10616
10615
|
const targetSource = getTargetSource(annotation.target);
|
|
10617
|
-
|
|
10618
|
-
|
|
10619
|
-
|
|
10620
|
-
throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
|
|
10616
|
+
logger?.debug("Validating target resource", { targetSource, expectedResourceId: resourceId4 });
|
|
10617
|
+
if (targetSource !== String(resourceId4)) {
|
|
10618
|
+
throw new Error(`Annotation target resource ID (${targetSource}) does not match expected resource ID (${resourceId4})`);
|
|
10621
10619
|
}
|
|
10622
10620
|
const sourceDoc = sourceView.resource;
|
|
10623
10621
|
const bodySource = getBodySource(annotation.body);
|
|
10624
10622
|
let targetDoc = null;
|
|
10625
10623
|
if (bodySource) {
|
|
10626
|
-
const
|
|
10627
|
-
const
|
|
10628
|
-
if (!lastPart) {
|
|
10629
|
-
throw new Error(`Invalid body source URI: ${bodySource}`);
|
|
10630
|
-
}
|
|
10631
|
-
const targetResourceId2 = createResourceId(lastPart);
|
|
10632
|
-
const targetView = await kb.views.get(targetResourceId2);
|
|
10624
|
+
const targetResourceId = createResourceId(bodySource);
|
|
10625
|
+
const targetView = await kb.views.get(targetResourceId);
|
|
10633
10626
|
targetDoc = targetView?.resource || null;
|
|
10634
10627
|
}
|
|
10635
10628
|
let sourceContext;
|
|
@@ -10686,7 +10679,83 @@ var AnnotationContext = class {
|
|
|
10686
10679
|
}
|
|
10687
10680
|
}
|
|
10688
10681
|
const suggestedResolution = void 0;
|
|
10682
|
+
logger?.debug("Building graph context", { resourceId: resourceId4 });
|
|
10683
|
+
const [connections, referencedByAnnotations, entityTypeStats] = await Promise.all([
|
|
10684
|
+
kb.graph.getResourceConnections(resourceId4),
|
|
10685
|
+
kb.graph.getResourceReferencedBy(resourceId4),
|
|
10686
|
+
kb.graph.getEntityTypeStats()
|
|
10687
|
+
]);
|
|
10688
|
+
const citedByMap = /* @__PURE__ */ new Map();
|
|
10689
|
+
for (const ann of referencedByAnnotations) {
|
|
10690
|
+
const source = getTargetSource(ann.target);
|
|
10691
|
+
if (source && source !== String(resourceId4)) {
|
|
10692
|
+
const sourceResId = createResourceId(source);
|
|
10693
|
+
const sourceView2 = await kb.views.get(sourceResId);
|
|
10694
|
+
if (sourceView2?.resource) {
|
|
10695
|
+
citedByMap.set(source, sourceView2.resource.name);
|
|
10696
|
+
}
|
|
10697
|
+
}
|
|
10698
|
+
}
|
|
10699
|
+
const annotationEntityTypes = getEntityTypes(annotation);
|
|
10700
|
+
const siblingEntityTypes = /* @__PURE__ */ new Set();
|
|
10701
|
+
for (const ann of sourceView.annotations.annotations) {
|
|
10702
|
+
if (ann.id !== annotationId2) {
|
|
10703
|
+
for (const et of getEntityTypes(ann)) {
|
|
10704
|
+
siblingEntityTypes.add(et);
|
|
10705
|
+
}
|
|
10706
|
+
}
|
|
10707
|
+
}
|
|
10708
|
+
const entityTypeFrequencies = {};
|
|
10709
|
+
for (const stat of entityTypeStats) {
|
|
10710
|
+
entityTypeFrequencies[stat.type] = stat.count;
|
|
10711
|
+
}
|
|
10712
|
+
let inferredRelationshipSummary;
|
|
10713
|
+
if (inferenceClient && sourceContext) {
|
|
10714
|
+
try {
|
|
10715
|
+
const connNames = connections.map((c) => c.targetResource.name).slice(0, 10);
|
|
10716
|
+
const citedByNames = Array.from(citedByMap.values()).slice(0, 5);
|
|
10717
|
+
const siblingTypes = Array.from(siblingEntityTypes).slice(0, 10);
|
|
10718
|
+
const parts = [];
|
|
10719
|
+
parts.push(`Passage: "${sourceContext.selected}"`);
|
|
10720
|
+
if (connNames.length > 0) parts.push(`Connected resources: ${connNames.join(", ")}`);
|
|
10721
|
+
if (citedByNames.length > 0) parts.push(`Cited by: ${citedByNames.join(", ")}`);
|
|
10722
|
+
if (siblingTypes.length > 0) parts.push(`Sibling entity types: ${siblingTypes.join(", ")}`);
|
|
10723
|
+
if (annotationEntityTypes.length > 0) parts.push(`Annotation entity types: ${annotationEntityTypes.join(", ")}`);
|
|
10724
|
+
const relationshipPrompt = `Given this annotation passage and its knowledge graph neighborhood, write a 1-2 sentence summary of how this passage relates to its surrounding resources and what kind of resource would best resolve this reference.
|
|
10725
|
+
|
|
10726
|
+
${parts.join("\n")}
|
|
10727
|
+
|
|
10728
|
+
Summary:`;
|
|
10729
|
+
inferredRelationshipSummary = await inferenceClient.generateText(relationshipPrompt, 150, 0.3);
|
|
10730
|
+
logger?.debug("Generated inferred relationship summary", { length: inferredRelationshipSummary.length });
|
|
10731
|
+
} catch (error) {
|
|
10732
|
+
logger?.warn("Failed to generate inferred relationship summary", { error });
|
|
10733
|
+
}
|
|
10734
|
+
}
|
|
10735
|
+
const graphContext = {
|
|
10736
|
+
connections: connections.map((conn) => ({
|
|
10737
|
+
resourceId: getResourceId(conn.targetResource) ?? "",
|
|
10738
|
+
resourceName: conn.targetResource.name,
|
|
10739
|
+
entityTypes: getResourceEntityTypes(conn.targetResource),
|
|
10740
|
+
bidirectional: conn.bidirectional
|
|
10741
|
+
})),
|
|
10742
|
+
citedByCount: citedByMap.size,
|
|
10743
|
+
citedBy: Array.from(citedByMap.entries()).map(([id, name]) => ({
|
|
10744
|
+
resourceId: id,
|
|
10745
|
+
resourceName: name
|
|
10746
|
+
})),
|
|
10747
|
+
siblingEntityTypes: Array.from(siblingEntityTypes),
|
|
10748
|
+
entityTypeFrequencies,
|
|
10749
|
+
...inferredRelationshipSummary ? { inferredRelationshipSummary } : {}
|
|
10750
|
+
};
|
|
10751
|
+
logger?.debug("Built graph context", {
|
|
10752
|
+
connections: connections.length,
|
|
10753
|
+
citedByCount: citedByMap.size,
|
|
10754
|
+
siblingEntityTypes: siblingEntityTypes.size
|
|
10755
|
+
});
|
|
10689
10756
|
const generationContext = sourceContext ? {
|
|
10757
|
+
annotation,
|
|
10758
|
+
sourceResource: sourceDoc,
|
|
10690
10759
|
sourceContext: {
|
|
10691
10760
|
before: sourceContext.before || "",
|
|
10692
10761
|
selected: sourceContext.selected,
|
|
@@ -10695,8 +10764,9 @@ var AnnotationContext = class {
|
|
|
10695
10764
|
metadata: {
|
|
10696
10765
|
resourceType: "document",
|
|
10697
10766
|
language: sourceDoc.language,
|
|
10698
|
-
entityTypes:
|
|
10699
|
-
}
|
|
10767
|
+
entityTypes: annotationEntityTypes
|
|
10768
|
+
},
|
|
10769
|
+
graphContext
|
|
10700
10770
|
} : void 0;
|
|
10701
10771
|
const response = {
|
|
10702
10772
|
annotation,
|
|
@@ -10714,10 +10784,10 @@ var AnnotationContext = class {
|
|
|
10714
10784
|
* Get resource annotations from view storage (fast path)
|
|
10715
10785
|
* Throws if view missing
|
|
10716
10786
|
*/
|
|
10717
|
-
static async getResourceAnnotations(
|
|
10718
|
-
const view = await kb.views.get(
|
|
10787
|
+
static async getResourceAnnotations(resourceId4, kb) {
|
|
10788
|
+
const view = await kb.views.get(resourceId4);
|
|
10719
10789
|
if (!view) {
|
|
10720
|
-
throw new Error(`Resource ${
|
|
10790
|
+
throw new Error(`Resource ${resourceId4} not found in view storage`);
|
|
10721
10791
|
}
|
|
10722
10792
|
return view.annotations;
|
|
10723
10793
|
}
|
|
@@ -10725,8 +10795,8 @@ var AnnotationContext = class {
|
|
|
10725
10795
|
* Get all annotations
|
|
10726
10796
|
* @returns Array of all annotation objects
|
|
10727
10797
|
*/
|
|
10728
|
-
static async getAllAnnotations(
|
|
10729
|
-
const annotations = await this.getResourceAnnotations(
|
|
10798
|
+
static async getAllAnnotations(resourceId4, kb) {
|
|
10799
|
+
const annotations = await this.getResourceAnnotations(resourceId4, kb);
|
|
10730
10800
|
return await this.enrichResolvedReferences(annotations.annotations, kb);
|
|
10731
10801
|
}
|
|
10732
10802
|
/**
|
|
@@ -10735,28 +10805,26 @@ var AnnotationContext = class {
|
|
|
10735
10805
|
* @private
|
|
10736
10806
|
*/
|
|
10737
10807
|
static async enrichResolvedReferences(annotations, kb) {
|
|
10738
|
-
const
|
|
10808
|
+
const resolvedIds = /* @__PURE__ */ new Set();
|
|
10739
10809
|
for (const ann of annotations) {
|
|
10740
10810
|
if (ann.motivation === "linking" && ann.body) {
|
|
10741
10811
|
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
10742
10812
|
for (const item of body) {
|
|
10743
10813
|
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
10744
|
-
|
|
10814
|
+
resolvedIds.add(item.source);
|
|
10745
10815
|
}
|
|
10746
10816
|
}
|
|
10747
10817
|
}
|
|
10748
10818
|
}
|
|
10749
|
-
if (
|
|
10819
|
+
if (resolvedIds.size === 0) {
|
|
10750
10820
|
return annotations;
|
|
10751
10821
|
}
|
|
10752
|
-
const metadataPromises = Array.from(
|
|
10753
|
-
const docId = uri.split("/resources/")[1];
|
|
10754
|
-
if (!docId) return null;
|
|
10822
|
+
const metadataPromises = Array.from(resolvedIds).map(async (id) => {
|
|
10755
10823
|
try {
|
|
10756
|
-
const view = await kb.views.get(
|
|
10824
|
+
const view = await kb.views.get(id);
|
|
10757
10825
|
if (view?.resource?.name) {
|
|
10758
10826
|
return {
|
|
10759
|
-
|
|
10827
|
+
id,
|
|
10760
10828
|
metadata: {
|
|
10761
10829
|
name: view.resource.name,
|
|
10762
10830
|
mediaType: view.resource.mediaType
|
|
@@ -10768,10 +10836,10 @@ var AnnotationContext = class {
|
|
|
10768
10836
|
return null;
|
|
10769
10837
|
});
|
|
10770
10838
|
const results = await Promise.all(metadataPromises);
|
|
10771
|
-
const
|
|
10839
|
+
const idToMetadata = /* @__PURE__ */ new Map();
|
|
10772
10840
|
for (const result of results) {
|
|
10773
10841
|
if (result) {
|
|
10774
|
-
|
|
10842
|
+
idToMetadata.set(result.id, result.metadata);
|
|
10775
10843
|
}
|
|
10776
10844
|
}
|
|
10777
10845
|
return annotations.map((ann) => {
|
|
@@ -10779,7 +10847,7 @@ var AnnotationContext = class {
|
|
|
10779
10847
|
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
10780
10848
|
for (const item of body) {
|
|
10781
10849
|
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
10782
|
-
const metadata =
|
|
10850
|
+
const metadata = idToMetadata.get(item.source);
|
|
10783
10851
|
if (metadata) {
|
|
10784
10852
|
return {
|
|
10785
10853
|
...ann,
|
|
@@ -10797,8 +10865,8 @@ var AnnotationContext = class {
|
|
|
10797
10865
|
* Get resource stats (version info)
|
|
10798
10866
|
* @returns Version and timestamp info for the annotations
|
|
10799
10867
|
*/
|
|
10800
|
-
static async getResourceStats(
|
|
10801
|
-
const annotations = await this.getResourceAnnotations(
|
|
10868
|
+
static async getResourceStats(resourceId4, kb) {
|
|
10869
|
+
const annotations = await this.getResourceAnnotations(resourceId4, kb);
|
|
10802
10870
|
return {
|
|
10803
10871
|
resourceId: annotations.resourceId,
|
|
10804
10872
|
version: annotations.version,
|
|
@@ -10808,19 +10876,16 @@ var AnnotationContext = class {
|
|
|
10808
10876
|
/**
|
|
10809
10877
|
* Check if resource exists in view storage
|
|
10810
10878
|
*/
|
|
10811
|
-
static async resourceExists(
|
|
10812
|
-
return await kb.views.exists(
|
|
10879
|
+
static async resourceExists(resourceId4, kb) {
|
|
10880
|
+
return await kb.views.exists(resourceId4);
|
|
10813
10881
|
}
|
|
10814
10882
|
/**
|
|
10815
10883
|
* Get a single annotation by ID
|
|
10816
10884
|
* O(1) lookup using resource ID to access view storage
|
|
10817
10885
|
*/
|
|
10818
|
-
static async getAnnotation(annotationId2,
|
|
10819
|
-
const annotations = await this.getResourceAnnotations(
|
|
10820
|
-
return annotations.annotations.find((a) =>
|
|
10821
|
-
const shortId = a.id.split("/").pop();
|
|
10822
|
-
return shortId === annotationId2;
|
|
10823
|
-
}) || null;
|
|
10886
|
+
static async getAnnotation(annotationId2, resourceId4, kb) {
|
|
10887
|
+
const annotations = await this.getResourceAnnotations(resourceId4, kb);
|
|
10888
|
+
return annotations.annotations.find((a) => a.id === annotationId2) || null;
|
|
10824
10889
|
}
|
|
10825
10890
|
/**
|
|
10826
10891
|
* List annotations with optional filtering
|
|
@@ -10836,13 +10901,13 @@ var AnnotationContext = class {
|
|
|
10836
10901
|
/**
|
|
10837
10902
|
* Get annotation context (selected text with surrounding context)
|
|
10838
10903
|
*/
|
|
10839
|
-
static async getAnnotationContext(annotationId2,
|
|
10840
|
-
const annotation = await this.getAnnotation(annotationId2,
|
|
10904
|
+
static async getAnnotationContext(annotationId2, resourceId4, contextBefore, contextAfter, kb) {
|
|
10905
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId4, kb);
|
|
10841
10906
|
if (!annotation) {
|
|
10842
10907
|
throw new Error("Annotation not found");
|
|
10843
10908
|
}
|
|
10844
10909
|
const resource = await ResourceContext.getResourceMetadata(
|
|
10845
|
-
|
|
10910
|
+
createResourceId(getTargetSource(annotation.target)),
|
|
10846
10911
|
kb
|
|
10847
10912
|
);
|
|
10848
10913
|
if (!resource) {
|
|
@@ -10869,13 +10934,13 @@ var AnnotationContext = class {
|
|
|
10869
10934
|
/**
|
|
10870
10935
|
* Generate AI summary of annotation in context
|
|
10871
10936
|
*/
|
|
10872
|
-
static async generateAnnotationSummary(annotationId2,
|
|
10873
|
-
const annotation = await this.getAnnotation(annotationId2,
|
|
10937
|
+
static async generateAnnotationSummary(annotationId2, resourceId4, kb, inferenceClient) {
|
|
10938
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId4, kb);
|
|
10874
10939
|
if (!annotation) {
|
|
10875
10940
|
throw new Error("Annotation not found");
|
|
10876
10941
|
}
|
|
10877
10942
|
const resource = await ResourceContext.getResourceMetadata(
|
|
10878
|
-
|
|
10943
|
+
createResourceId(getTargetSource(annotation.target)),
|
|
10879
10944
|
kb
|
|
10880
10945
|
);
|
|
10881
10946
|
if (!resource) {
|
|
@@ -10950,16 +11015,14 @@ Entity types: ${entityTypes.join(", ")}`;
|
|
|
10950
11015
|
};
|
|
10951
11016
|
|
|
10952
11017
|
// src/graph-context.ts
|
|
10953
|
-
import {
|
|
10954
|
-
import { getResourceId, getResourceEntityTypes as getResourceEntityTypes2 } from "@semiont/api-client";
|
|
11018
|
+
import { getResourceId as getResourceId2, getResourceEntityTypes as getResourceEntityTypes2 } from "@semiont/api-client";
|
|
10955
11019
|
var GraphContext = class {
|
|
10956
11020
|
/**
|
|
10957
11021
|
* Get all resources referencing this resource (backlinks)
|
|
10958
11022
|
* Requires graph traversal - must use graph database
|
|
10959
11023
|
*/
|
|
10960
|
-
static async getBacklinks(
|
|
10961
|
-
|
|
10962
|
-
return await kb.graph.getResourceReferencedBy(resourceUri);
|
|
11024
|
+
static async getBacklinks(resourceId4, kb) {
|
|
11025
|
+
return await kb.graph.getResourceReferencedBy(resourceId4);
|
|
10963
11026
|
}
|
|
10964
11027
|
/**
|
|
10965
11028
|
* Find shortest path between two resources
|
|
@@ -10972,8 +11035,8 @@ var GraphContext = class {
|
|
|
10972
11035
|
* Get resource connections (graph edges)
|
|
10973
11036
|
* Requires graph traversal - must use graph database
|
|
10974
11037
|
*/
|
|
10975
|
-
static async getResourceConnections(
|
|
10976
|
-
return await kb.graph.getResourceConnections(
|
|
11038
|
+
static async getResourceConnections(resourceId4, kb) {
|
|
11039
|
+
return await kb.graph.getResourceConnections(resourceId4);
|
|
10977
11040
|
}
|
|
10978
11041
|
/**
|
|
10979
11042
|
* Search resources by name (cross-resource query)
|
|
@@ -10986,31 +11049,30 @@ var GraphContext = class {
|
|
|
10986
11049
|
* Build graph representation with nodes and edges for a resource and its connections
|
|
10987
11050
|
* Retrieves connections from graph and builds visualization-ready structure
|
|
10988
11051
|
*/
|
|
10989
|
-
static async buildGraphRepresentation(
|
|
10990
|
-
const
|
|
10991
|
-
const mainDoc = await kb.graph.getResource(resourceUri);
|
|
11052
|
+
static async buildGraphRepresentation(resourceId4, maxRelated, kb) {
|
|
11053
|
+
const mainDoc = await kb.graph.getResource(resourceId4);
|
|
10992
11054
|
if (!mainDoc) {
|
|
10993
11055
|
throw new Error("Resource not found");
|
|
10994
11056
|
}
|
|
10995
|
-
const connections = await kb.graph.getResourceConnections(
|
|
11057
|
+
const connections = await kb.graph.getResourceConnections(resourceId4);
|
|
10996
11058
|
const relatedDocs = connections.map((conn) => conn.targetResource).slice(0, maxRelated - 1);
|
|
10997
11059
|
const nodes = [
|
|
10998
11060
|
{
|
|
10999
|
-
id:
|
|
11061
|
+
id: getResourceId2(mainDoc),
|
|
11000
11062
|
type: "resource",
|
|
11001
11063
|
label: mainDoc.name,
|
|
11002
11064
|
metadata: { entityTypes: getResourceEntityTypes2(mainDoc) }
|
|
11003
11065
|
},
|
|
11004
11066
|
...relatedDocs.map((doc) => ({
|
|
11005
|
-
id:
|
|
11067
|
+
id: getResourceId2(doc),
|
|
11006
11068
|
type: "resource",
|
|
11007
11069
|
label: doc.name,
|
|
11008
11070
|
metadata: { entityTypes: getResourceEntityTypes2(doc) }
|
|
11009
11071
|
}))
|
|
11010
11072
|
].filter((node) => node.id !== void 0);
|
|
11011
11073
|
const edges = connections.slice(0, maxRelated - 1).map((conn) => ({
|
|
11012
|
-
source:
|
|
11013
|
-
target:
|
|
11074
|
+
source: resourceId4,
|
|
11075
|
+
target: getResourceId2(conn.targetResource),
|
|
11014
11076
|
type: conn.relationshipType || "link",
|
|
11015
11077
|
metadata: {}
|
|
11016
11078
|
})).filter((edge) => edge.target !== void 0);
|
|
@@ -11019,27 +11081,26 @@ var GraphContext = class {
|
|
|
11019
11081
|
};
|
|
11020
11082
|
|
|
11021
11083
|
// src/llm-context.ts
|
|
11022
|
-
import { getResourceEntityTypes as getResourceEntityTypes3, getResourceId as
|
|
11084
|
+
import { getResourceEntityTypes as getResourceEntityTypes3, getResourceId as getResourceId3 } from "@semiont/api-client";
|
|
11023
11085
|
import { resourceId as makeResourceId2 } from "@semiont/core";
|
|
11024
11086
|
var LLMContext = class {
|
|
11025
11087
|
/**
|
|
11026
11088
|
* Get comprehensive LLM context for a resource
|
|
11027
11089
|
* Includes: main resource, related resources, annotations, graph, content, summary, references
|
|
11028
11090
|
*/
|
|
11029
|
-
static async getResourceContext(
|
|
11030
|
-
const mainDoc = await ResourceContext.getResourceMetadata(
|
|
11091
|
+
static async getResourceContext(resourceId4, options, kb, inferenceClient) {
|
|
11092
|
+
const mainDoc = await ResourceContext.getResourceMetadata(resourceId4, kb);
|
|
11031
11093
|
if (!mainDoc) {
|
|
11032
11094
|
throw new Error("Resource not found");
|
|
11033
11095
|
}
|
|
11034
11096
|
const mainContent = options.includeContent ? await ResourceContext.getResourceContent(mainDoc, kb) : void 0;
|
|
11035
11097
|
const graph = await GraphContext.buildGraphRepresentation(
|
|
11036
|
-
|
|
11098
|
+
resourceId4,
|
|
11037
11099
|
options.maxResources,
|
|
11038
|
-
kb
|
|
11039
|
-
publicURL
|
|
11100
|
+
kb
|
|
11040
11101
|
);
|
|
11041
11102
|
const relatedDocs = [];
|
|
11042
|
-
const resourceIdStr =
|
|
11103
|
+
const resourceIdStr = resourceId4.toString();
|
|
11043
11104
|
for (const node of graph.nodes) {
|
|
11044
11105
|
if (node.id !== resourceIdStr) {
|
|
11045
11106
|
const relatedDoc = await ResourceContext.getResourceMetadata(makeResourceId2(node.id), kb);
|
|
@@ -11052,7 +11113,7 @@ var LLMContext = class {
|
|
|
11052
11113
|
if (options.includeContent) {
|
|
11053
11114
|
await Promise.all(
|
|
11054
11115
|
relatedDocs.map(async (doc) => {
|
|
11055
|
-
const docId =
|
|
11116
|
+
const docId = getResourceId3(doc);
|
|
11056
11117
|
if (!docId) return;
|
|
11057
11118
|
const content = await ResourceContext.getResourceContent(doc, kb);
|
|
11058
11119
|
if (content) {
|
|
@@ -11061,7 +11122,7 @@ var LLMContext = class {
|
|
|
11061
11122
|
})
|
|
11062
11123
|
);
|
|
11063
11124
|
}
|
|
11064
|
-
const annotations = await AnnotationContext.getAllAnnotations(
|
|
11125
|
+
const annotations = await AnnotationContext.getAllAnnotations(resourceId4, kb);
|
|
11065
11126
|
const summary = options.includeSummary && mainContent ? await generateResourceSummary(
|
|
11066
11127
|
mainDoc.name,
|
|
11067
11128
|
mainContent,
|
|
@@ -11086,7 +11147,10 @@ var LLMContext = class {
|
|
|
11086
11147
|
import { promises as fs2 } from "fs";
|
|
11087
11148
|
import * as path2 from "path";
|
|
11088
11149
|
async function readEntityTypesProjection(config) {
|
|
11089
|
-
const configuredPath = config.services.filesystem
|
|
11150
|
+
const configuredPath = config.services.filesystem?.path;
|
|
11151
|
+
if (!configuredPath) {
|
|
11152
|
+
throw new Error("services.filesystem.path is required for entity types reader");
|
|
11153
|
+
}
|
|
11090
11154
|
const projectRoot = config._metadata?.projectRoot;
|
|
11091
11155
|
let basePath;
|
|
11092
11156
|
if (path2.isAbsolute(configuredPath)) {
|
|
@@ -11116,8 +11180,7 @@ async function readEntityTypesProjection(config) {
|
|
|
11116
11180
|
|
|
11117
11181
|
// src/gatherer.ts
|
|
11118
11182
|
var Gatherer = class {
|
|
11119
|
-
constructor(
|
|
11120
|
-
this.publicURL = publicURL;
|
|
11183
|
+
constructor(kb, eventBus, inferenceClient, logger, config) {
|
|
11121
11184
|
this.kb = kb;
|
|
11122
11185
|
this.eventBus = eventBus;
|
|
11123
11186
|
this.inferenceClient = inferenceClient;
|
|
@@ -11130,7 +11193,7 @@ var Gatherer = class {
|
|
|
11130
11193
|
this.logger.info("Gatherer actor initialized");
|
|
11131
11194
|
const errorHandler = (err) => this.logger.error("Gatherer pipeline error", { error: err });
|
|
11132
11195
|
const annotationGather$ = this.eventBus.get("gather:requested").pipe(
|
|
11133
|
-
(0, import_operators3.groupBy)((event) => event.
|
|
11196
|
+
(0, import_operators3.groupBy)((event) => event.resourceId),
|
|
11134
11197
|
(0, import_operators3.mergeMap)(
|
|
11135
11198
|
(group$) => group$.pipe(
|
|
11136
11199
|
(0, import_operators3.concatMap)((event) => (0, import_rxjs3.from)(this.handleAnnotationGather(event)))
|
|
@@ -11138,7 +11201,7 @@ var Gatherer = class {
|
|
|
11138
11201
|
)
|
|
11139
11202
|
);
|
|
11140
11203
|
const resourceGather$ = this.eventBus.get("gather:resource-requested").pipe(
|
|
11141
|
-
(0, import_operators3.groupBy)((event) => event.
|
|
11204
|
+
(0, import_operators3.groupBy)((event) => event.resourceId),
|
|
11142
11205
|
(0, import_operators3.mergeMap)(
|
|
11143
11206
|
(group$) => group$.pipe(
|
|
11144
11207
|
(0, import_operators3.concatMap)((event) => (0, import_rxjs3.from)(this.handleResourceGather(event)))
|
|
@@ -11184,28 +11247,28 @@ var Gatherer = class {
|
|
|
11184
11247
|
async handleAnnotationGather(event) {
|
|
11185
11248
|
try {
|
|
11186
11249
|
this.logger.debug("Gathering annotation context", {
|
|
11187
|
-
|
|
11188
|
-
|
|
11250
|
+
annotationId: event.annotationId,
|
|
11251
|
+
resourceId: event.resourceId
|
|
11189
11252
|
});
|
|
11190
11253
|
const response = await AnnotationContext.buildLLMContext(
|
|
11191
|
-
|
|
11192
|
-
|
|
11254
|
+
makeAnnotationId2(event.annotationId),
|
|
11255
|
+
event.resourceId,
|
|
11193
11256
|
this.kb,
|
|
11194
11257
|
event.options ?? {},
|
|
11195
11258
|
this.inferenceClient,
|
|
11196
11259
|
this.logger
|
|
11197
11260
|
);
|
|
11198
11261
|
this.eventBus.get("gather:complete").next({
|
|
11199
|
-
|
|
11262
|
+
annotationId: event.annotationId,
|
|
11200
11263
|
response
|
|
11201
11264
|
});
|
|
11202
11265
|
} catch (error) {
|
|
11203
11266
|
this.logger.error("Gather annotation context failed", {
|
|
11204
|
-
|
|
11267
|
+
annotationId: event.annotationId,
|
|
11205
11268
|
error
|
|
11206
11269
|
});
|
|
11207
11270
|
this.eventBus.get("gather:failed").next({
|
|
11208
|
-
|
|
11271
|
+
annotationId: event.annotationId,
|
|
11209
11272
|
error: error instanceof Error ? error : new Error(String(error))
|
|
11210
11273
|
});
|
|
11211
11274
|
}
|
|
@@ -11213,27 +11276,25 @@ var Gatherer = class {
|
|
|
11213
11276
|
async handleResourceGather(event) {
|
|
11214
11277
|
try {
|
|
11215
11278
|
this.logger.debug("Gathering resource context", {
|
|
11216
|
-
|
|
11279
|
+
resourceId: event.resourceId
|
|
11217
11280
|
});
|
|
11218
|
-
const publicURL = this.publicURL;
|
|
11219
11281
|
const result = await LLMContext.getResourceContext(
|
|
11220
|
-
|
|
11282
|
+
event.resourceId,
|
|
11221
11283
|
event.options,
|
|
11222
11284
|
this.kb,
|
|
11223
|
-
publicURL,
|
|
11224
11285
|
this.inferenceClient
|
|
11225
11286
|
);
|
|
11226
11287
|
this.eventBus.get("gather:resource-complete").next({
|
|
11227
|
-
|
|
11288
|
+
resourceId: event.resourceId,
|
|
11228
11289
|
context: result
|
|
11229
11290
|
});
|
|
11230
11291
|
} catch (error) {
|
|
11231
11292
|
this.logger.error("Gather resource context failed", {
|
|
11232
|
-
|
|
11293
|
+
resourceId: event.resourceId,
|
|
11233
11294
|
error
|
|
11234
11295
|
});
|
|
11235
11296
|
this.eventBus.get("gather:resource-failed").next({
|
|
11236
|
-
|
|
11297
|
+
resourceId: event.resourceId,
|
|
11237
11298
|
error: error instanceof Error ? error : new Error(String(error))
|
|
11238
11299
|
});
|
|
11239
11300
|
}
|
|
@@ -11336,8 +11397,7 @@ var Gatherer = class {
|
|
|
11336
11397
|
let resolvedResource = null;
|
|
11337
11398
|
const bodySource = getBodySource2(annotation.body);
|
|
11338
11399
|
if (bodySource) {
|
|
11339
|
-
|
|
11340
|
-
resolvedResource = await ResourceContext.getResourceMetadata(makeResourceId3(resolvedId), this.kb);
|
|
11400
|
+
resolvedResource = await ResourceContext.getResourceMetadata(resourceId(bodySource), this.kb);
|
|
11341
11401
|
}
|
|
11342
11402
|
this.eventBus.get("browse:annotation-result").next({
|
|
11343
11403
|
correlationId: event.correlationId,
|
|
@@ -11457,7 +11517,7 @@ var Gatherer = class {
|
|
|
11457
11517
|
async handleEntityTypes(event) {
|
|
11458
11518
|
try {
|
|
11459
11519
|
if (!this.config) {
|
|
11460
|
-
throw new Error("
|
|
11520
|
+
throw new Error("MakeMeaningConfig required for entity type reads");
|
|
11461
11521
|
}
|
|
11462
11522
|
const entityTypes = await readEntityTypesProjection(this.config);
|
|
11463
11523
|
this.eventBus.get("mark:entity-types-result").next({
|
|
@@ -11484,13 +11544,13 @@ var Gatherer = class {
|
|
|
11484
11544
|
// src/binder.ts
|
|
11485
11545
|
var import_rxjs4 = __toESM(require_cjs(), 1);
|
|
11486
11546
|
var import_operators4 = __toESM(require_operators(), 1);
|
|
11487
|
-
import {
|
|
11488
|
-
import { getExactText, getTargetSource as getTargetSource2, getTargetSelector as getTargetSelector2 } from "@semiont/api-client";
|
|
11547
|
+
import { resourceId as resourceId2 } from "@semiont/core";
|
|
11548
|
+
import { getExactText, getResourceId as getResourceId4, getResourceEntityTypes as getResourceEntityTypes5, getTargetSource as getTargetSource2, getTargetSelector as getTargetSelector2 } from "@semiont/api-client";
|
|
11489
11549
|
var Binder = class {
|
|
11490
|
-
constructor(kb, eventBus, logger,
|
|
11550
|
+
constructor(kb, eventBus, logger, inferenceClient) {
|
|
11491
11551
|
this.kb = kb;
|
|
11492
11552
|
this.eventBus = eventBus;
|
|
11493
|
-
this.
|
|
11553
|
+
this.inferenceClient = inferenceClient;
|
|
11494
11554
|
this.logger = logger;
|
|
11495
11555
|
}
|
|
11496
11556
|
subscriptions = [];
|
|
@@ -11511,15 +11571,24 @@ var Binder = class {
|
|
|
11511
11571
|
}
|
|
11512
11572
|
async handleSearch(event) {
|
|
11513
11573
|
try {
|
|
11574
|
+
const context = event.context;
|
|
11575
|
+
const searchTerm = context.sourceContext?.selected ?? "";
|
|
11514
11576
|
this.logger.debug("Searching for binding candidates", {
|
|
11515
11577
|
referenceId: event.referenceId,
|
|
11516
|
-
searchTerm
|
|
11517
|
-
|
|
11518
|
-
|
|
11578
|
+
searchTerm,
|
|
11579
|
+
limit: event.limit,
|
|
11580
|
+
useSemanticScoring: event.useSemanticScoring
|
|
11581
|
+
});
|
|
11582
|
+
const scored = await this.contextDrivenSearch(
|
|
11583
|
+
searchTerm,
|
|
11584
|
+
context,
|
|
11585
|
+
event.useSemanticScoring
|
|
11586
|
+
);
|
|
11587
|
+
const limited = event.limit ? scored.slice(0, event.limit) : scored;
|
|
11519
11588
|
this.eventBus.get("bind:search-results").next({
|
|
11520
11589
|
referenceId: event.referenceId,
|
|
11521
|
-
|
|
11522
|
-
|
|
11590
|
+
results: limited,
|
|
11591
|
+
correlationId: event.correlationId
|
|
11523
11592
|
});
|
|
11524
11593
|
} catch (error) {
|
|
11525
11594
|
this.logger.error("Bind search failed", {
|
|
@@ -11528,24 +11597,240 @@ var Binder = class {
|
|
|
11528
11597
|
});
|
|
11529
11598
|
this.eventBus.get("bind:search-failed").next({
|
|
11530
11599
|
referenceId: event.referenceId,
|
|
11531
|
-
error: error instanceof Error ? error : new Error(String(error))
|
|
11600
|
+
error: error instanceof Error ? error : new Error(String(error)),
|
|
11601
|
+
correlationId: event.correlationId
|
|
11532
11602
|
});
|
|
11533
11603
|
}
|
|
11534
11604
|
}
|
|
11605
|
+
/**
|
|
11606
|
+
* Context-driven search: multi-source retrieval + composite scoring
|
|
11607
|
+
*
|
|
11608
|
+
* Retrieval sources:
|
|
11609
|
+
* 1. Name match — graph.searchResources(searchTerm)
|
|
11610
|
+
* 2. Entity type match — graph.listResources({ entityTypes })
|
|
11611
|
+
* 3. Graph neighborhood — connections from GatheredContext
|
|
11612
|
+
*
|
|
11613
|
+
* Ranking signals:
|
|
11614
|
+
* - Entity type overlap (Jaccard similarity)
|
|
11615
|
+
* - Bidirectionality (already connected both ways)
|
|
11616
|
+
* - Citation weight (well-connected = important)
|
|
11617
|
+
* - Name match quality (exact > prefix > contains)
|
|
11618
|
+
* - Recency (newer resources scored higher)
|
|
11619
|
+
*/
|
|
11620
|
+
async contextDrivenSearch(searchTerm, context, useSemanticScoring) {
|
|
11621
|
+
const annotationEntityTypes = context.metadata?.entityTypes ?? [];
|
|
11622
|
+
const connections = context.graphContext?.connections ?? [];
|
|
11623
|
+
const [nameMatches, entityTypeMatches] = await Promise.all([
|
|
11624
|
+
this.kb.graph.searchResources(searchTerm),
|
|
11625
|
+
annotationEntityTypes.length > 0 ? this.kb.graph.listResources({ entityTypes: annotationEntityTypes, limit: 50 }).then((r) => r.resources) : Promise.resolve([])
|
|
11626
|
+
]);
|
|
11627
|
+
const neighborResources = await Promise.all(
|
|
11628
|
+
connections.map(
|
|
11629
|
+
(conn) => this.kb.graph.getResource(resourceId2(conn.resourceId)).catch(() => null)
|
|
11630
|
+
)
|
|
11631
|
+
);
|
|
11632
|
+
const candidateMap = /* @__PURE__ */ new Map();
|
|
11633
|
+
const addCandidate = (resource, source) => {
|
|
11634
|
+
const id = getResourceId4(resource);
|
|
11635
|
+
if (!id) return;
|
|
11636
|
+
const existing = candidateMap.get(id);
|
|
11637
|
+
if (existing) {
|
|
11638
|
+
existing.sources.add(source);
|
|
11639
|
+
} else {
|
|
11640
|
+
candidateMap.set(id, { resource, sources: /* @__PURE__ */ new Set([source]) });
|
|
11641
|
+
}
|
|
11642
|
+
};
|
|
11643
|
+
for (const r of nameMatches) addCandidate(r, "name");
|
|
11644
|
+
for (const r of entityTypeMatches) addCandidate(r, "entityType");
|
|
11645
|
+
for (const r of neighborResources) {
|
|
11646
|
+
if (r) addCandidate(r, "neighborhood");
|
|
11647
|
+
}
|
|
11648
|
+
this.logger.debug("Candidate retrieval", {
|
|
11649
|
+
nameMatches: nameMatches.length,
|
|
11650
|
+
entityTypeMatches: entityTypeMatches.length,
|
|
11651
|
+
neighborResources: neighborResources.filter(Boolean).length,
|
|
11652
|
+
totalCandidates: candidateMap.size
|
|
11653
|
+
});
|
|
11654
|
+
const connectionIds = new Set(connections.map((c) => c.resourceId));
|
|
11655
|
+
const bidirectionalIds = new Set(
|
|
11656
|
+
connections.filter((c) => c.bidirectional).map((c) => c.resourceId)
|
|
11657
|
+
);
|
|
11658
|
+
const entityTypeFreqs = context.graphContext?.entityTypeFrequencies ?? {};
|
|
11659
|
+
const searchTermLower = searchTerm.toLowerCase();
|
|
11660
|
+
const scored = Array.from(candidateMap.values()).map(({ resource, sources }) => {
|
|
11661
|
+
const id = getResourceId4(resource) ?? "";
|
|
11662
|
+
const candidateEntityTypes = getResourceEntityTypes5(resource);
|
|
11663
|
+
const reasons = [];
|
|
11664
|
+
let score = 0;
|
|
11665
|
+
if (annotationEntityTypes.length > 0 && candidateEntityTypes.length > 0) {
|
|
11666
|
+
const intersection = annotationEntityTypes.filter((t) => candidateEntityTypes.includes(t));
|
|
11667
|
+
const union = /* @__PURE__ */ new Set([...annotationEntityTypes, ...candidateEntityTypes]);
|
|
11668
|
+
const jaccard = intersection.length / union.size;
|
|
11669
|
+
let idfBoost = 0;
|
|
11670
|
+
for (const t of intersection) {
|
|
11671
|
+
const freq = entityTypeFreqs[t] ?? 1;
|
|
11672
|
+
idfBoost += 1 / Math.log2(freq + 1);
|
|
11673
|
+
}
|
|
11674
|
+
const entityScore = jaccard * 30 + idfBoost * 5;
|
|
11675
|
+
score += entityScore;
|
|
11676
|
+
if (intersection.length > 0) {
|
|
11677
|
+
reasons.push(`entity types: ${intersection.join(", ")}`);
|
|
11678
|
+
}
|
|
11679
|
+
}
|
|
11680
|
+
if (bidirectionalIds.has(id)) {
|
|
11681
|
+
score += 20;
|
|
11682
|
+
reasons.push("bidirectional connection");
|
|
11683
|
+
} else if (connectionIds.has(id)) {
|
|
11684
|
+
score += 10;
|
|
11685
|
+
reasons.push("connected");
|
|
11686
|
+
}
|
|
11687
|
+
const citedByCount = context.graphContext?.citedByCount ?? 0;
|
|
11688
|
+
if (sources.has("neighborhood") && citedByCount > 0) {
|
|
11689
|
+
score += Math.min(citedByCount * 2, 15);
|
|
11690
|
+
}
|
|
11691
|
+
const nameLower = (resource.name ?? "").toLowerCase();
|
|
11692
|
+
if (nameLower === searchTermLower) {
|
|
11693
|
+
score += 25;
|
|
11694
|
+
reasons.push("exact name match");
|
|
11695
|
+
} else if (nameLower.startsWith(searchTermLower)) {
|
|
11696
|
+
score += 15;
|
|
11697
|
+
reasons.push("prefix name match");
|
|
11698
|
+
} else if (nameLower.includes(searchTermLower)) {
|
|
11699
|
+
score += 10;
|
|
11700
|
+
reasons.push("contains name match");
|
|
11701
|
+
}
|
|
11702
|
+
const dateCreated = resource.dateCreated;
|
|
11703
|
+
if (dateCreated) {
|
|
11704
|
+
const ageMs = Date.now() - new Date(dateCreated).getTime();
|
|
11705
|
+
const ageDays = ageMs / (1e3 * 60 * 60 * 24);
|
|
11706
|
+
score += Math.max(0, 5 * (1 - ageDays / 30));
|
|
11707
|
+
}
|
|
11708
|
+
if (sources.size > 1) {
|
|
11709
|
+
score += sources.size * 3;
|
|
11710
|
+
reasons.push(`${sources.size} retrieval sources`);
|
|
11711
|
+
}
|
|
11712
|
+
return {
|
|
11713
|
+
...resource,
|
|
11714
|
+
score: Math.round(score * 100) / 100,
|
|
11715
|
+
matchReason: reasons.join("; ") || "candidate"
|
|
11716
|
+
};
|
|
11717
|
+
});
|
|
11718
|
+
if (this.inferenceClient && scored.length > 0 && useSemanticScoring !== false) {
|
|
11719
|
+
try {
|
|
11720
|
+
const inferenceScores = await this.inferenceSemanticScore(
|
|
11721
|
+
searchTerm,
|
|
11722
|
+
context,
|
|
11723
|
+
scored.slice(0, 20)
|
|
11724
|
+
// Limit to top 20 candidates for cost
|
|
11725
|
+
);
|
|
11726
|
+
for (const item of scored) {
|
|
11727
|
+
const id = getResourceId4(item) ?? "";
|
|
11728
|
+
const inferenceScore = inferenceScores.get(id);
|
|
11729
|
+
if (inferenceScore !== void 0) {
|
|
11730
|
+
item.score += inferenceScore * 25;
|
|
11731
|
+
item.score = Math.round(item.score * 100) / 100;
|
|
11732
|
+
if (inferenceScore > 0.5) {
|
|
11733
|
+
item.matchReason = item.matchReason ? `${item.matchReason}; semantic match` : "semantic match";
|
|
11734
|
+
}
|
|
11735
|
+
}
|
|
11736
|
+
}
|
|
11737
|
+
} catch (error) {
|
|
11738
|
+
this.logger.warn("Inference semantic scoring failed, using structural scores only", { error });
|
|
11739
|
+
}
|
|
11740
|
+
}
|
|
11741
|
+
scored.sort((a, b) => b.score - a.score);
|
|
11742
|
+
this.logger.debug("Search results scored", {
|
|
11743
|
+
total: scored.length,
|
|
11744
|
+
topScore: scored[0]?.score,
|
|
11745
|
+
topReason: scored[0]?.matchReason
|
|
11746
|
+
});
|
|
11747
|
+
return scored;
|
|
11748
|
+
}
|
|
11749
|
+
/**
|
|
11750
|
+
* LLM-based semantic relevance scoring (GraphRAG-style)
|
|
11751
|
+
*
|
|
11752
|
+
* Batches candidates into a single prompt asking the LLM to score
|
|
11753
|
+
* each candidate's semantic relevance given the passage and graph context.
|
|
11754
|
+
*
|
|
11755
|
+
* @returns Map of resourceId → score (0-1)
|
|
11756
|
+
*/
|
|
11757
|
+
async inferenceSemanticScore(searchTerm, context, candidates) {
|
|
11758
|
+
if (!this.inferenceClient) return /* @__PURE__ */ new Map();
|
|
11759
|
+
const passage = context.sourceContext?.selected ?? searchTerm;
|
|
11760
|
+
const entityTypes = context.metadata?.entityTypes ?? [];
|
|
11761
|
+
const graphConnections = context.graphContext?.connections;
|
|
11762
|
+
const connections = graphConnections ?? [];
|
|
11763
|
+
const candidateLines = candidates.map((c, i) => {
|
|
11764
|
+
const id = getResourceId4(c) ?? "";
|
|
11765
|
+
const cEntityTypes = getResourceEntityTypes5(c);
|
|
11766
|
+
return `${i + 1}. "${c.name}" (id: ${id}, types: ${cEntityTypes.join(", ") || "none"})`;
|
|
11767
|
+
}).join("\n");
|
|
11768
|
+
const contextParts = [];
|
|
11769
|
+
contextParts.push(`Annotation motivation: ${context.annotation.motivation}`);
|
|
11770
|
+
contextParts.push(`Source resource: ${context.sourceResource.name}`);
|
|
11771
|
+
const { motivation, body } = context.annotation;
|
|
11772
|
+
if (motivation === "commenting" || motivation === "assessing") {
|
|
11773
|
+
const bodyItem = Array.isArray(body) ? body[0] : body;
|
|
11774
|
+
if (bodyItem && "value" in bodyItem && bodyItem.value) {
|
|
11775
|
+
const label = motivation === "commenting" ? "Comment" : "Assessment";
|
|
11776
|
+
contextParts.push(`${label}: ${bodyItem.value}`);
|
|
11777
|
+
}
|
|
11778
|
+
}
|
|
11779
|
+
if (entityTypes.length > 0) contextParts.push(`Annotation entity types: ${entityTypes.join(", ")}`);
|
|
11780
|
+
if (connections.length > 0) {
|
|
11781
|
+
const connNames = connections.slice(0, 5).map((c) => c.resourceName);
|
|
11782
|
+
contextParts.push(`Connected resources: ${connNames.join(", ")}`);
|
|
11783
|
+
}
|
|
11784
|
+
if (context.graphContext?.inferredRelationshipSummary) {
|
|
11785
|
+
contextParts.push(`Relationship context: ${context.graphContext.inferredRelationshipSummary}`);
|
|
11786
|
+
}
|
|
11787
|
+
const prompt = `Given this passage and context, score each candidate resource's semantic relevance on a scale of 0.0 to 1.0.
|
|
11788
|
+
|
|
11789
|
+
Passage: "${passage}"
|
|
11790
|
+
Search term: "${searchTerm}"
|
|
11791
|
+
${contextParts.length > 0 ? contextParts.join("\n") : ""}
|
|
11792
|
+
|
|
11793
|
+
Candidates:
|
|
11794
|
+
${candidateLines}
|
|
11795
|
+
|
|
11796
|
+
For each candidate, output ONLY a line with the number and score, like:
|
|
11797
|
+
1. 0.8
|
|
11798
|
+
2. 0.3
|
|
11799
|
+
No explanations.`;
|
|
11800
|
+
const response = await this.inferenceClient.generateText(prompt, 200, 0.1);
|
|
11801
|
+
const scores = /* @__PURE__ */ new Map();
|
|
11802
|
+
const lines = response.trim().split("\n");
|
|
11803
|
+
for (const line of lines) {
|
|
11804
|
+
const match = line.match(/^(\d+)\.\s*([\d.]+)/);
|
|
11805
|
+
if (match) {
|
|
11806
|
+
const index = parseInt(match[1], 10) - 1;
|
|
11807
|
+
const score = parseFloat(match[2]);
|
|
11808
|
+
if (index >= 0 && index < candidates.length && !isNaN(score) && score >= 0 && score <= 1) {
|
|
11809
|
+
const id = getResourceId4(candidates[index]) ?? "";
|
|
11810
|
+
if (id) scores.set(id, score);
|
|
11811
|
+
}
|
|
11812
|
+
}
|
|
11813
|
+
}
|
|
11814
|
+
this.logger.debug("Inference semantic scores", {
|
|
11815
|
+
candidateCount: candidates.length,
|
|
11816
|
+
scoredCount: scores.size
|
|
11817
|
+
});
|
|
11818
|
+
return scores;
|
|
11819
|
+
}
|
|
11535
11820
|
async handleReferencedBy(event) {
|
|
11536
11821
|
try {
|
|
11537
|
-
if (!this.publicURL) {
|
|
11538
|
-
throw new Error("publicURL required for referenced-by queries");
|
|
11539
|
-
}
|
|
11540
|
-
const resourceUri = resourceIdToURI2(event.resourceId, this.publicURL);
|
|
11541
11822
|
this.logger.debug("Looking for annotations referencing resource", {
|
|
11542
11823
|
resourceId: event.resourceId,
|
|
11543
|
-
resourceUri,
|
|
11544
11824
|
motivation: event.motivation || "all"
|
|
11545
11825
|
});
|
|
11546
|
-
const references = await this.kb.graph.getResourceReferencedBy(
|
|
11547
|
-
const
|
|
11548
|
-
const resources = await Promise.all(
|
|
11826
|
+
const references = await this.kb.graph.getResourceReferencedBy(event.resourceId, event.motivation);
|
|
11827
|
+
const sourceIds = [...new Set(references.map((ref) => getTargetSource2(ref.target)))];
|
|
11828
|
+
const resources = await Promise.all(sourceIds.map((id) => this.kb.graph.getResource(resourceId2(id))));
|
|
11829
|
+
for (let i = 0; i < sourceIds.length; i++) {
|
|
11830
|
+
if (resources[i] === null) {
|
|
11831
|
+
this.logger.warn("Referenced resource not found in graph", { resourceId: sourceIds[i] });
|
|
11832
|
+
}
|
|
11833
|
+
}
|
|
11549
11834
|
const docMap = new Map(resources.filter((doc) => doc !== null).map((doc) => [doc["@id"], doc]));
|
|
11550
11835
|
const referencedBy = references.map((ref) => {
|
|
11551
11836
|
const targetSource = getTargetSource2(ref.target);
|
|
@@ -11589,11 +11874,10 @@ var Binder = class {
|
|
|
11589
11874
|
// src/stower.ts
|
|
11590
11875
|
var import_rxjs5 = __toESM(require_cjs(), 1);
|
|
11591
11876
|
var import_operators5 = __toESM(require_operators(), 1);
|
|
11592
|
-
import { resourceId,
|
|
11877
|
+
import { resourceId as resourceId3, annotationId as makeAnnotationId3, CREATION_METHODS, generateUuid } from "@semiont/core";
|
|
11593
11878
|
var Stower = class {
|
|
11594
|
-
constructor(kb,
|
|
11879
|
+
constructor(kb, eventBus, logger) {
|
|
11595
11880
|
this.kb = kb;
|
|
11596
|
-
this.publicURL = publicURL;
|
|
11597
11881
|
this.eventBus = eventBus;
|
|
11598
11882
|
this.logger = logger;
|
|
11599
11883
|
}
|
|
@@ -11624,7 +11908,7 @@ var Stower = class {
|
|
|
11624
11908
|
// ========================================================================
|
|
11625
11909
|
async handleYieldCreate(event) {
|
|
11626
11910
|
try {
|
|
11627
|
-
const rId =
|
|
11911
|
+
const rId = resourceId3(generateUuid());
|
|
11628
11912
|
const storedRep = await this.kb.content.store(event.content, {
|
|
11629
11913
|
mediaType: event.format,
|
|
11630
11914
|
language: event.language || void 0,
|
|
@@ -11650,10 +11934,9 @@ var Stower = class {
|
|
|
11650
11934
|
generationPrompt: event.generationPrompt
|
|
11651
11935
|
}
|
|
11652
11936
|
});
|
|
11653
|
-
const normalizedBase = this.publicURL.endsWith("/") ? this.publicURL.slice(0, -1) : this.publicURL;
|
|
11654
11937
|
const resource = {
|
|
11655
11938
|
"@context": "https://schema.org/",
|
|
11656
|
-
"@id":
|
|
11939
|
+
"@id": rId,
|
|
11657
11940
|
name: event.name,
|
|
11658
11941
|
archived: false,
|
|
11659
11942
|
entityTypes: event.entityTypes || [],
|
|
@@ -11687,7 +11970,7 @@ var Stower = class {
|
|
|
11687
11970
|
version: 1,
|
|
11688
11971
|
payload: { annotation: event.annotation }
|
|
11689
11972
|
});
|
|
11690
|
-
this.eventBus.get("mark:created").next({ annotationId:
|
|
11973
|
+
this.eventBus.get("mark:created").next({ annotationId: makeAnnotationId3(event.annotation.id) });
|
|
11691
11974
|
} catch (error) {
|
|
11692
11975
|
this.logger.error("Failed to create annotation", { error });
|
|
11693
11976
|
this.eventBus.get("mark:create-failed").next({
|
|
@@ -11854,7 +12137,7 @@ var Stower = class {
|
|
|
11854
12137
|
var import_rxjs7 = __toESM(require_cjs(), 1);
|
|
11855
12138
|
var import_operators7 = __toESM(require_operators(), 1);
|
|
11856
12139
|
import { CREATION_METHODS as CREATION_METHODS2, cloneToken as makeCloneToken } from "@semiont/core";
|
|
11857
|
-
import { getPrimaryRepresentation as getPrimaryRepresentation3, getResourceEntityTypes as
|
|
12140
|
+
import { getPrimaryRepresentation as getPrimaryRepresentation3, getResourceEntityTypes as getResourceEntityTypes6 } from "@semiont/api-client";
|
|
11858
12141
|
|
|
11859
12142
|
// src/resource-operations.ts
|
|
11860
12143
|
var import_rxjs6 = __toESM(require_cjs(), 1);
|
|
@@ -12062,12 +12345,12 @@ var CloneTokenManager = class {
|
|
|
12062
12345
|
const mediaType = primaryRep?.mediaType || "text/plain";
|
|
12063
12346
|
const validFormats = ["text/plain", "text/markdown"];
|
|
12064
12347
|
const format = validFormats.includes(mediaType) ? mediaType : "text/plain";
|
|
12065
|
-
const
|
|
12348
|
+
const resourceId4 = await ResourceOperations.createResource(
|
|
12066
12349
|
{
|
|
12067
12350
|
name: event.name,
|
|
12068
12351
|
content: Buffer.from(event.content),
|
|
12069
12352
|
format,
|
|
12070
|
-
entityTypes:
|
|
12353
|
+
entityTypes: getResourceEntityTypes6(sourceDoc),
|
|
12071
12354
|
creationMethod: CREATION_METHODS2.CLONE
|
|
12072
12355
|
},
|
|
12073
12356
|
event.userId,
|
|
@@ -12087,7 +12370,7 @@ var CloneTokenManager = class {
|
|
|
12087
12370
|
this.tokens.delete(token);
|
|
12088
12371
|
this.eventBus.get("yield:clone-created").next({
|
|
12089
12372
|
correlationId: event.correlationId,
|
|
12090
|
-
response: { resourceId:
|
|
12373
|
+
response: { resourceId: resourceId4 }
|
|
12091
12374
|
});
|
|
12092
12375
|
} catch (error) {
|
|
12093
12376
|
this.logger.error("Clone create failed", { token: event.token, error });
|
|
@@ -12109,14 +12392,15 @@ var CloneTokenManager = class {
|
|
|
12109
12392
|
|
|
12110
12393
|
// src/service.ts
|
|
12111
12394
|
async function startMakeMeaning(config, eventBus, logger) {
|
|
12112
|
-
const
|
|
12113
|
-
if (!
|
|
12395
|
+
const filesystemConfig = config.services?.filesystem;
|
|
12396
|
+
if (!filesystemConfig?.path) {
|
|
12114
12397
|
throw new Error("services.filesystem.path is required for make-meaning service");
|
|
12115
12398
|
}
|
|
12116
|
-
const
|
|
12117
|
-
if (!
|
|
12118
|
-
throw new Error("services.
|
|
12399
|
+
const graphConfig = config.services?.graph;
|
|
12400
|
+
if (!graphConfig) {
|
|
12401
|
+
throw new Error("services.graph is required for make-meaning service");
|
|
12119
12402
|
}
|
|
12403
|
+
const configuredPath = filesystemConfig.path;
|
|
12120
12404
|
const projectRoot = config._metadata?.projectRoot;
|
|
12121
12405
|
let basePath;
|
|
12122
12406
|
if (path3.isAbsolute(configuredPath)) {
|
|
@@ -12166,30 +12450,34 @@ async function startMakeMeaning(config, eventBus, logger) {
|
|
|
12166
12450
|
error: (err) => jobQueueLogger.error("Job status pipeline error", { error: err })
|
|
12167
12451
|
});
|
|
12168
12452
|
const eventStoreLogger = logger.child({ component: "event-store" });
|
|
12169
|
-
const eventStore = createEventStoreCore(basePath,
|
|
12453
|
+
const eventStore = createEventStoreCore(basePath, void 0, eventBus, eventStoreLogger);
|
|
12170
12454
|
const inferenceLogger = logger.child({ component: "inference-client" });
|
|
12171
|
-
const
|
|
12172
|
-
|
|
12455
|
+
const inferenceConfig = config.services?.inference;
|
|
12456
|
+
if (!inferenceConfig) {
|
|
12457
|
+
throw new Error("services.inference is required for make-meaning service");
|
|
12458
|
+
}
|
|
12459
|
+
const inferenceClient = await getInferenceClient(inferenceConfig, inferenceLogger);
|
|
12460
|
+
const graphDb = await getGraphDatabase(graphConfig);
|
|
12173
12461
|
const kb = createKnowledgeBase(eventStore, basePath, projectRoot, graphDb, logger);
|
|
12174
12462
|
const graphConsumerLogger = logger.child({ component: "graph-consumer" });
|
|
12175
|
-
const graphConsumer = new GraphDBConsumer(
|
|
12463
|
+
const graphConsumer = new GraphDBConsumer(eventStore, graphDb, graphConsumerLogger);
|
|
12176
12464
|
await graphConsumer.initialize();
|
|
12177
12465
|
const stowerLogger = logger.child({ component: "stower" });
|
|
12178
|
-
const stower = new Stower(kb,
|
|
12466
|
+
const stower = new Stower(kb, eventBus, stowerLogger);
|
|
12179
12467
|
await stower.initialize();
|
|
12180
12468
|
const bootstrapLogger = logger.child({ component: "entity-types-bootstrap" });
|
|
12181
12469
|
await bootstrapEntityTypes(eventBus, config, bootstrapLogger);
|
|
12182
12470
|
const gathererLogger = logger.child({ component: "gatherer" });
|
|
12183
|
-
const gatherer = new Gatherer(
|
|
12471
|
+
const gatherer = new Gatherer(kb, eventBus, inferenceClient, gathererLogger, config);
|
|
12184
12472
|
await gatherer.initialize();
|
|
12185
12473
|
const binderLogger = logger.child({ component: "binder" });
|
|
12186
|
-
const binder = new Binder(kb, eventBus, binderLogger,
|
|
12474
|
+
const binder = new Binder(kb, eventBus, binderLogger, inferenceClient);
|
|
12187
12475
|
await binder.initialize();
|
|
12188
12476
|
const cloneTokenLogger = logger.child({ component: "clone-token-manager" });
|
|
12189
12477
|
const cloneTokenManager = new CloneTokenManager(kb, eventBus, cloneTokenLogger);
|
|
12190
12478
|
await cloneTokenManager.initialize();
|
|
12191
|
-
const contentFetcher = async (
|
|
12192
|
-
const view = await kb.views.get(
|
|
12479
|
+
const contentFetcher = async (resourceId4) => {
|
|
12480
|
+
const view = await kb.views.get(resourceId4);
|
|
12193
12481
|
if (!view) return null;
|
|
12194
12482
|
const primaryRep = getPrimaryRepresentation4(view.resource);
|
|
12195
12483
|
if (!primaryRep?.checksum || !primaryRep?.mediaType) return null;
|
|
@@ -12204,12 +12492,12 @@ async function startMakeMeaning(config, eventBus, logger) {
|
|
|
12204
12492
|
const commentLogger = logger.child({ component: "comment-detection-worker" });
|
|
12205
12493
|
const tagLogger = logger.child({ component: "tag-detection-worker" });
|
|
12206
12494
|
const workers = {
|
|
12207
|
-
detection: new ReferenceAnnotationWorker(jobQueue,
|
|
12208
|
-
generation: new GenerationWorker(jobQueue,
|
|
12209
|
-
highlight: new HighlightAnnotationWorker(jobQueue,
|
|
12210
|
-
assessment: new AssessmentAnnotationWorker(jobQueue,
|
|
12211
|
-
comment: new CommentAnnotationWorker(jobQueue,
|
|
12212
|
-
tag: new TagAnnotationWorker(jobQueue,
|
|
12495
|
+
detection: new ReferenceAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, detectionLogger),
|
|
12496
|
+
generation: new GenerationWorker(jobQueue, inferenceClient, eventBus, generationLogger),
|
|
12497
|
+
highlight: new HighlightAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, highlightLogger),
|
|
12498
|
+
assessment: new AssessmentAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, assessmentLogger),
|
|
12499
|
+
comment: new CommentAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, commentLogger),
|
|
12500
|
+
tag: new TagAnnotationWorker(jobQueue, inferenceClient, eventBus, contentFetcher, tagLogger)
|
|
12213
12501
|
};
|
|
12214
12502
|
workers.detection.start().catch((error) => {
|
|
12215
12503
|
detectionLogger.error("Worker stopped unexpectedly", { error });
|
|
@@ -12263,6 +12551,858 @@ async function startMakeMeaning(config, eventBus, logger) {
|
|
|
12263
12551
|
};
|
|
12264
12552
|
}
|
|
12265
12553
|
|
|
12554
|
+
// src/exchange/backup-exporter.ts
|
|
12555
|
+
import { getExtensionForMimeType } from "@semiont/content";
|
|
12556
|
+
|
|
12557
|
+
// src/exchange/tar.ts
|
|
12558
|
+
import { createGzip, createGunzip } from "zlib";
|
|
12559
|
+
import { Readable as Readable2, pipeline } from "stream";
|
|
12560
|
+
import { promisify } from "util";
|
|
12561
|
+
var pipelineAsync = promisify(pipeline);
|
|
12562
|
+
var BLOCK_SIZE = 512;
|
|
12563
|
+
function createTarHeader(name, size) {
|
|
12564
|
+
const header = Buffer.alloc(BLOCK_SIZE, 0);
|
|
12565
|
+
header.write(name.slice(0, 100), 0, 100, "utf8");
|
|
12566
|
+
header.write("0000644\0", 100, 8, "utf8");
|
|
12567
|
+
header.write("0000000\0", 108, 8, "utf8");
|
|
12568
|
+
header.write("0000000\0", 116, 8, "utf8");
|
|
12569
|
+
header.write(size.toString(8).padStart(11, "0") + "\0", 124, 12, "utf8");
|
|
12570
|
+
const mtime = Math.floor(Date.now() / 1e3);
|
|
12571
|
+
header.write(mtime.toString(8).padStart(11, "0") + "\0", 136, 12, "utf8");
|
|
12572
|
+
header.write(" ", 148, 8, "utf8");
|
|
12573
|
+
header.write("0", 156, 1, "utf8");
|
|
12574
|
+
header.write("ustar\0", 257, 6, "utf8");
|
|
12575
|
+
header.write("00", 263, 2, "utf8");
|
|
12576
|
+
let checksum = 0;
|
|
12577
|
+
for (let i = 0; i < BLOCK_SIZE; i++) {
|
|
12578
|
+
checksum += header[i];
|
|
12579
|
+
}
|
|
12580
|
+
header.write(checksum.toString(8).padStart(6, "0") + "\0 ", 148, 8, "utf8");
|
|
12581
|
+
return header;
|
|
12582
|
+
}
|
|
12583
|
+
function paddingBytes(size) {
|
|
12584
|
+
const remainder = size % BLOCK_SIZE;
|
|
12585
|
+
return remainder === 0 ? 0 : BLOCK_SIZE - remainder;
|
|
12586
|
+
}
|
|
12587
|
+
async function writeTarGz(entries, output) {
|
|
12588
|
+
const gzip = createGzip();
|
|
12589
|
+
const tarStream = new Readable2({ read() {
|
|
12590
|
+
} });
|
|
12591
|
+
const pipePromise = pipelineAsync(tarStream, gzip, output);
|
|
12592
|
+
for await (const entry of entries) {
|
|
12593
|
+
const header = createTarHeader(entry.name, entry.data.length);
|
|
12594
|
+
tarStream.push(header);
|
|
12595
|
+
tarStream.push(entry.data);
|
|
12596
|
+
const pad = paddingBytes(entry.data.length);
|
|
12597
|
+
if (pad > 0) {
|
|
12598
|
+
tarStream.push(Buffer.alloc(pad, 0));
|
|
12599
|
+
}
|
|
12600
|
+
}
|
|
12601
|
+
tarStream.push(Buffer.alloc(BLOCK_SIZE * 2, 0));
|
|
12602
|
+
tarStream.push(null);
|
|
12603
|
+
await pipePromise;
|
|
12604
|
+
}
|
|
12605
|
+
async function decompressStream(input) {
|
|
12606
|
+
const gunzip = createGunzip();
|
|
12607
|
+
const chunks = [];
|
|
12608
|
+
return new Promise((resolve4, reject) => {
|
|
12609
|
+
gunzip.on("data", (chunk) => chunks.push(chunk));
|
|
12610
|
+
gunzip.on("end", () => resolve4(Buffer.concat(chunks)));
|
|
12611
|
+
gunzip.on("error", reject);
|
|
12612
|
+
input.on("error", reject);
|
|
12613
|
+
input.pipe(gunzip);
|
|
12614
|
+
});
|
|
12615
|
+
}
|
|
12616
|
+
function* parseTarEntries(decompressed) {
|
|
12617
|
+
let offset = 0;
|
|
12618
|
+
while (offset + BLOCK_SIZE <= decompressed.length) {
|
|
12619
|
+
const header = decompressed.subarray(offset, offset + BLOCK_SIZE);
|
|
12620
|
+
if (header.every((b) => b === 0)) break;
|
|
12621
|
+
const nameEnd = header.indexOf(0, 0);
|
|
12622
|
+
const name = header.subarray(0, Math.min(nameEnd, 100)).toString("utf8");
|
|
12623
|
+
const sizeStr = header.subarray(124, 135).toString("utf8").trim();
|
|
12624
|
+
const size = parseInt(sizeStr, 8);
|
|
12625
|
+
offset += BLOCK_SIZE;
|
|
12626
|
+
const data = decompressed.subarray(offset, offset + size);
|
|
12627
|
+
offset += size;
|
|
12628
|
+
offset += paddingBytes(size);
|
|
12629
|
+
yield { name, size, data };
|
|
12630
|
+
}
|
|
12631
|
+
}
|
|
12632
|
+
async function* readTarGz(input) {
|
|
12633
|
+
const decompressed = await decompressStream(input);
|
|
12634
|
+
yield* parseTarEntries(decompressed);
|
|
12635
|
+
}
|
|
12636
|
+
|
|
12637
|
+
// src/exchange/manifest.ts
|
|
12638
|
+
var BACKUP_FORMAT = "semiont-backup";
|
|
12639
|
+
var FORMAT_VERSION = 1;
|
|
12640
|
+
var LINKED_DATA_FORMAT = "semiont-linked-data";
|
|
12641
|
+
function isLinkedDataManifest(obj) {
|
|
12642
|
+
return typeof obj === "object" && obj !== null && obj["semiont:format"] === LINKED_DATA_FORMAT;
|
|
12643
|
+
}
|
|
12644
|
+
function isBackupManifest(obj) {
|
|
12645
|
+
return typeof obj === "object" && obj !== null && obj.format === BACKUP_FORMAT;
|
|
12646
|
+
}
|
|
12647
|
+
function validateManifestVersion(version) {
|
|
12648
|
+
if (version > FORMAT_VERSION) {
|
|
12649
|
+
throw new Error(
|
|
12650
|
+
`Unsupported format version ${version}. This tool supports version ${FORMAT_VERSION}.`
|
|
12651
|
+
);
|
|
12652
|
+
}
|
|
12653
|
+
}
|
|
12654
|
+
|
|
12655
|
+
// src/exchange/backup-exporter.ts
|
|
12656
|
+
var SYSTEM_STREAM = "__system__";
|
|
12657
|
+
async function exportBackup(options, output) {
|
|
12658
|
+
const { eventStore, content, sourceUrl, logger } = options;
|
|
12659
|
+
const resourceIds = await eventStore.log.storage.getAllResourceIds();
|
|
12660
|
+
logger?.info("Backup export: enumerating streams", { resourceCount: resourceIds.length });
|
|
12661
|
+
const allStreamIds = [SYSTEM_STREAM, ...resourceIds];
|
|
12662
|
+
const streamData = /* @__PURE__ */ new Map();
|
|
12663
|
+
let totalEvents = 0;
|
|
12664
|
+
for (const id of allStreamIds) {
|
|
12665
|
+
const events = await eventStore.log.getEvents(id);
|
|
12666
|
+
if (events.length > 0) {
|
|
12667
|
+
streamData.set(id, events);
|
|
12668
|
+
totalEvents += events.length;
|
|
12669
|
+
}
|
|
12670
|
+
}
|
|
12671
|
+
const contentRefs = collectContentRefs(streamData);
|
|
12672
|
+
logger?.info("Backup export: collected content refs", {
|
|
12673
|
+
streams: streamData.size,
|
|
12674
|
+
events: totalEvents,
|
|
12675
|
+
blobs: contentRefs.size
|
|
12676
|
+
});
|
|
12677
|
+
const contentBlobs = /* @__PURE__ */ new Map();
|
|
12678
|
+
let totalContentBytes = 0;
|
|
12679
|
+
for (const [checksum, mediaType] of contentRefs) {
|
|
12680
|
+
const data = await content.retrieve(checksum, mediaType);
|
|
12681
|
+
const ext = getExtensionForMimeType(mediaType);
|
|
12682
|
+
contentBlobs.set(checksum, { data, ext });
|
|
12683
|
+
totalContentBytes += data.length;
|
|
12684
|
+
}
|
|
12685
|
+
const streamSummaries = [];
|
|
12686
|
+
for (const [streamId, events] of streamData) {
|
|
12687
|
+
streamSummaries.push({
|
|
12688
|
+
stream: streamId,
|
|
12689
|
+
eventCount: events.length,
|
|
12690
|
+
firstChecksum: events[0].metadata.checksum || "",
|
|
12691
|
+
lastChecksum: events[events.length - 1].metadata.checksum || ""
|
|
12692
|
+
});
|
|
12693
|
+
}
|
|
12694
|
+
const manifestHeader = {
|
|
12695
|
+
format: BACKUP_FORMAT,
|
|
12696
|
+
version: FORMAT_VERSION,
|
|
12697
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12698
|
+
sourceUrl,
|
|
12699
|
+
stats: {
|
|
12700
|
+
streams: streamData.size,
|
|
12701
|
+
events: totalEvents,
|
|
12702
|
+
blobs: contentBlobs.size,
|
|
12703
|
+
contentBytes: totalContentBytes
|
|
12704
|
+
}
|
|
12705
|
+
};
|
|
12706
|
+
async function* generateEntries() {
|
|
12707
|
+
const manifestLines = [
|
|
12708
|
+
JSON.stringify(manifestHeader),
|
|
12709
|
+
...streamSummaries.map((s) => JSON.stringify(s))
|
|
12710
|
+
].join("\n") + "\n";
|
|
12711
|
+
yield { name: ".semiont/manifest.jsonl", data: Buffer.from(manifestLines, "utf8") };
|
|
12712
|
+
for (const [streamId, events] of streamData) {
|
|
12713
|
+
const fileName = streamId === SYSTEM_STREAM ? ".semiont/events/__system__.jsonl" : `.semiont/events/${streamId}.jsonl`;
|
|
12714
|
+
const jsonl = events.map((e) => JSON.stringify(e)).join("\n") + "\n";
|
|
12715
|
+
yield { name: fileName, data: Buffer.from(jsonl, "utf8") };
|
|
12716
|
+
}
|
|
12717
|
+
for (const [checksum, { data, ext }] of contentBlobs) {
|
|
12718
|
+
yield { name: `${checksum}${ext}`, data };
|
|
12719
|
+
}
|
|
12720
|
+
}
|
|
12721
|
+
await writeTarGz(generateEntries(), output);
|
|
12722
|
+
logger?.info("Backup export complete", {
|
|
12723
|
+
streams: streamData.size,
|
|
12724
|
+
events: totalEvents,
|
|
12725
|
+
blobs: contentBlobs.size,
|
|
12726
|
+
contentBytes: totalContentBytes
|
|
12727
|
+
});
|
|
12728
|
+
return manifestHeader;
|
|
12729
|
+
}
|
|
12730
|
+
function collectContentRefs(streamData) {
|
|
12731
|
+
const refs = /* @__PURE__ */ new Map();
|
|
12732
|
+
for (const [, events] of streamData) {
|
|
12733
|
+
for (const stored of events) {
|
|
12734
|
+
if (stored.event.type === "resource.created") {
|
|
12735
|
+
const payload = stored.event.payload;
|
|
12736
|
+
if (payload.contentChecksum && payload.format) {
|
|
12737
|
+
refs.set(payload.contentChecksum, payload.format);
|
|
12738
|
+
}
|
|
12739
|
+
}
|
|
12740
|
+
}
|
|
12741
|
+
}
|
|
12742
|
+
return refs;
|
|
12743
|
+
}
|
|
12744
|
+
|
|
12745
|
+
// src/exchange/replay.ts
|
|
12746
|
+
var import_rxjs9 = __toESM(require_cjs(), 1);
|
|
12747
|
+
var import_operators9 = __toESM(require_operators(), 1);
|
|
12748
|
+
var REPLAY_TIMEOUT_MS = 3e4;
|
|
12749
|
+
async function replayEventStream(jsonl, eventBus, resolveBlob, logger) {
|
|
12750
|
+
const lines = jsonl.trim().split("\n").filter((l) => l.length > 0);
|
|
12751
|
+
const storedEvents = lines.map((line) => JSON.parse(line));
|
|
12752
|
+
const stats = {
|
|
12753
|
+
eventsReplayed: 0,
|
|
12754
|
+
resourcesCreated: 0,
|
|
12755
|
+
annotationsCreated: 0,
|
|
12756
|
+
entityTypesAdded: 0
|
|
12757
|
+
};
|
|
12758
|
+
let hashChainValid = true;
|
|
12759
|
+
for (let i = 1; i < storedEvents.length; i++) {
|
|
12760
|
+
const prev = storedEvents[i - 1];
|
|
12761
|
+
const curr = storedEvents[i];
|
|
12762
|
+
if (curr.metadata.prevEventHash && prev.metadata.checksum) {
|
|
12763
|
+
if (curr.metadata.prevEventHash !== prev.metadata.checksum) {
|
|
12764
|
+
logger?.warn("Hash chain break", {
|
|
12765
|
+
index: i,
|
|
12766
|
+
expected: prev.metadata.checksum,
|
|
12767
|
+
got: curr.metadata.prevEventHash
|
|
12768
|
+
});
|
|
12769
|
+
hashChainValid = false;
|
|
12770
|
+
}
|
|
12771
|
+
}
|
|
12772
|
+
}
|
|
12773
|
+
for (const stored of storedEvents) {
|
|
12774
|
+
await replayEvent(stored.event, eventBus, resolveBlob, stats, logger);
|
|
12775
|
+
stats.eventsReplayed++;
|
|
12776
|
+
}
|
|
12777
|
+
return { stats, hashChainValid };
|
|
12778
|
+
}
|
|
12779
|
+
async function replayEvent(event, eventBus, resolveBlob, stats, logger) {
|
|
12780
|
+
switch (event.type) {
|
|
12781
|
+
case "entitytype.added":
|
|
12782
|
+
await replayEntityTypeAdded(event, eventBus, logger);
|
|
12783
|
+
stats.entityTypesAdded++;
|
|
12784
|
+
break;
|
|
12785
|
+
case "resource.created":
|
|
12786
|
+
await replayResourceCreated(event, eventBus, resolveBlob, logger);
|
|
12787
|
+
stats.resourcesCreated++;
|
|
12788
|
+
break;
|
|
12789
|
+
case "annotation.added":
|
|
12790
|
+
await replayAnnotationAdded(event, eventBus, logger);
|
|
12791
|
+
stats.annotationsCreated++;
|
|
12792
|
+
break;
|
|
12793
|
+
case "annotation.body.updated":
|
|
12794
|
+
await replayAnnotationBodyUpdated(event, eventBus, logger);
|
|
12795
|
+
break;
|
|
12796
|
+
case "annotation.removed":
|
|
12797
|
+
await replayAnnotationRemoved(event, eventBus, logger);
|
|
12798
|
+
break;
|
|
12799
|
+
case "resource.archived":
|
|
12800
|
+
await replayResourceArchived(event, eventBus, logger);
|
|
12801
|
+
break;
|
|
12802
|
+
case "resource.unarchived":
|
|
12803
|
+
await replayResourceUnarchived(event, eventBus, logger);
|
|
12804
|
+
break;
|
|
12805
|
+
case "entitytag.added":
|
|
12806
|
+
case "entitytag.removed":
|
|
12807
|
+
await replayEntityTagChange(event, eventBus, logger);
|
|
12808
|
+
break;
|
|
12809
|
+
// Job events are transient — skip during replay
|
|
12810
|
+
case "job.started":
|
|
12811
|
+
case "job.progress":
|
|
12812
|
+
case "job.completed":
|
|
12813
|
+
case "job.failed":
|
|
12814
|
+
logger?.debug("Skipping job event during replay", { type: event.type });
|
|
12815
|
+
break;
|
|
12816
|
+
// Representation events — content is already stored via resource.created replay
|
|
12817
|
+
case "representation.added":
|
|
12818
|
+
case "representation.removed":
|
|
12819
|
+
logger?.debug("Skipping representation event during replay", { type: event.type });
|
|
12820
|
+
break;
|
|
12821
|
+
default:
|
|
12822
|
+
logger?.warn("Unknown event type during replay", { type: event.type });
|
|
12823
|
+
}
|
|
12824
|
+
}
|
|
12825
|
+
async function replayEntityTypeAdded(event, eventBus, logger) {
|
|
12826
|
+
const result$ = (0, import_rxjs9.race)(
|
|
12827
|
+
eventBus.get("mark:entity-type-added").pipe((0, import_operators9.map)(() => "ok")),
|
|
12828
|
+
eventBus.get("mark:entity-type-add-failed").pipe((0, import_operators9.map)((e) => {
|
|
12829
|
+
throw e.error;
|
|
12830
|
+
})),
|
|
12831
|
+
(0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
|
|
12832
|
+
throw new Error("Timeout waiting for mark:entity-type-added");
|
|
12833
|
+
}))
|
|
12834
|
+
);
|
|
12835
|
+
eventBus.get("mark:add-entity-type").next({
|
|
12836
|
+
tag: event.payload.entityType,
|
|
12837
|
+
userId: event.userId
|
|
12838
|
+
});
|
|
12839
|
+
await (0, import_rxjs9.firstValueFrom)(result$);
|
|
12840
|
+
logger?.debug("Replayed entitytype.added", { entityType: event.payload.entityType });
|
|
12841
|
+
}
|
|
12842
|
+
async function replayResourceCreated(event, eventBus, resolveBlob, logger) {
|
|
12843
|
+
const { payload } = event;
|
|
12844
|
+
const blob = resolveBlob(payload.contentChecksum);
|
|
12845
|
+
if (!blob) {
|
|
12846
|
+
throw new Error(`Missing content blob for checksum ${payload.contentChecksum}`);
|
|
12847
|
+
}
|
|
12848
|
+
const result$ = (0, import_rxjs9.race)(
|
|
12849
|
+
eventBus.get("yield:created").pipe((0, import_operators9.map)((r) => r)),
|
|
12850
|
+
eventBus.get("yield:create-failed").pipe((0, import_operators9.map)((e) => {
|
|
12851
|
+
throw e.error;
|
|
12852
|
+
})),
|
|
12853
|
+
(0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
|
|
12854
|
+
throw new Error("Timeout waiting for yield:created");
|
|
12855
|
+
}))
|
|
12856
|
+
);
|
|
12857
|
+
eventBus.get("yield:create").next({
|
|
12858
|
+
name: payload.name,
|
|
12859
|
+
content: blob,
|
|
12860
|
+
format: payload.format,
|
|
12861
|
+
userId: event.userId,
|
|
12862
|
+
language: payload.language,
|
|
12863
|
+
entityTypes: payload.entityTypes,
|
|
12864
|
+
creationMethod: payload.creationMethod,
|
|
12865
|
+
isDraft: payload.isDraft,
|
|
12866
|
+
generatedFrom: payload.generatedFrom,
|
|
12867
|
+
generationPrompt: payload.generationPrompt
|
|
12868
|
+
});
|
|
12869
|
+
await (0, import_rxjs9.firstValueFrom)(result$);
|
|
12870
|
+
logger?.debug("Replayed resource.created", { name: payload.name });
|
|
12871
|
+
}
|
|
12872
|
+
async function replayAnnotationAdded(event, eventBus, logger) {
|
|
12873
|
+
const result$ = (0, import_rxjs9.race)(
|
|
12874
|
+
eventBus.get("mark:created").pipe((0, import_operators9.map)(() => "ok")),
|
|
12875
|
+
eventBus.get("mark:create-failed").pipe((0, import_operators9.map)((e) => {
|
|
12876
|
+
throw e.error;
|
|
12877
|
+
})),
|
|
12878
|
+
(0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
|
|
12879
|
+
throw new Error("Timeout waiting for mark:created");
|
|
12880
|
+
}))
|
|
12881
|
+
);
|
|
12882
|
+
eventBus.get("mark:create").next({
|
|
12883
|
+
annotation: event.payload.annotation,
|
|
12884
|
+
userId: event.userId,
|
|
12885
|
+
resourceId: event.resourceId
|
|
12886
|
+
});
|
|
12887
|
+
await (0, import_rxjs9.firstValueFrom)(result$);
|
|
12888
|
+
logger?.debug("Replayed annotation.added", { annotationId: event.payload.annotation.id });
|
|
12889
|
+
}
|
|
12890
|
+
async function replayAnnotationBodyUpdated(event, eventBus, logger) {
|
|
12891
|
+
const result$ = (0, import_rxjs9.race)(
|
|
12892
|
+
eventBus.get("mark:body-updated").pipe((0, import_operators9.map)(() => "ok")),
|
|
12893
|
+
eventBus.get("mark:body-update-failed").pipe((0, import_operators9.map)((e) => {
|
|
12894
|
+
throw e.error;
|
|
12895
|
+
})),
|
|
12896
|
+
(0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
|
|
12897
|
+
throw new Error("Timeout waiting for mark:body-updated");
|
|
12898
|
+
}))
|
|
12899
|
+
);
|
|
12900
|
+
eventBus.get("mark:update-body").next({
|
|
12901
|
+
annotationId: event.payload.annotationId,
|
|
12902
|
+
userId: event.userId,
|
|
12903
|
+
resourceId: event.resourceId,
|
|
12904
|
+
operations: event.payload.operations
|
|
12905
|
+
});
|
|
12906
|
+
await (0, import_rxjs9.firstValueFrom)(result$);
|
|
12907
|
+
logger?.debug("Replayed annotation.body.updated", { annotationId: event.payload.annotationId });
|
|
12908
|
+
}
|
|
12909
|
+
async function replayAnnotationRemoved(event, eventBus, logger) {
|
|
12910
|
+
const result$ = (0, import_rxjs9.race)(
|
|
12911
|
+
eventBus.get("mark:deleted").pipe((0, import_operators9.map)(() => "ok")),
|
|
12912
|
+
eventBus.get("mark:delete-failed").pipe((0, import_operators9.map)((e) => {
|
|
12913
|
+
throw e.error;
|
|
12914
|
+
})),
|
|
12915
|
+
(0, import_rxjs9.timer)(REPLAY_TIMEOUT_MS).pipe((0, import_operators9.map)(() => {
|
|
12916
|
+
throw new Error("Timeout waiting for mark:deleted");
|
|
12917
|
+
}))
|
|
12918
|
+
);
|
|
12919
|
+
eventBus.get("mark:delete").next({
|
|
12920
|
+
annotationId: event.payload.annotationId,
|
|
12921
|
+
userId: event.userId,
|
|
12922
|
+
resourceId: event.resourceId
|
|
12923
|
+
});
|
|
12924
|
+
await (0, import_rxjs9.firstValueFrom)(result$);
|
|
12925
|
+
logger?.debug("Replayed annotation.removed", { annotationId: event.payload.annotationId });
|
|
12926
|
+
}
|
|
12927
|
+
async function replayResourceArchived(event, eventBus, logger) {
|
|
12928
|
+
eventBus.get("mark:archive").next({
|
|
12929
|
+
userId: event.userId,
|
|
12930
|
+
resourceId: event.resourceId
|
|
12931
|
+
});
|
|
12932
|
+
logger?.debug("Replayed resource.archived", { resourceId: event.resourceId });
|
|
12933
|
+
}
|
|
12934
|
+
async function replayResourceUnarchived(event, eventBus, logger) {
|
|
12935
|
+
eventBus.get("mark:unarchive").next({
|
|
12936
|
+
userId: event.userId,
|
|
12937
|
+
resourceId: event.resourceId
|
|
12938
|
+
});
|
|
12939
|
+
logger?.debug("Replayed resource.unarchived", { resourceId: event.resourceId });
|
|
12940
|
+
}
|
|
12941
|
+
async function replayEntityTagChange(event, eventBus, logger) {
|
|
12942
|
+
const resourceId4 = event.resourceId;
|
|
12943
|
+
const entityType = event.payload.entityType;
|
|
12944
|
+
if (event.type === "entitytag.added") {
|
|
12945
|
+
eventBus.get("mark:update-entity-types").next({
|
|
12946
|
+
resourceId: resourceId4,
|
|
12947
|
+
userId: event.userId,
|
|
12948
|
+
currentEntityTypes: [],
|
|
12949
|
+
updatedEntityTypes: [entityType]
|
|
12950
|
+
});
|
|
12951
|
+
} else {
|
|
12952
|
+
eventBus.get("mark:update-entity-types").next({
|
|
12953
|
+
resourceId: resourceId4,
|
|
12954
|
+
userId: event.userId,
|
|
12955
|
+
currentEntityTypes: [entityType],
|
|
12956
|
+
updatedEntityTypes: []
|
|
12957
|
+
});
|
|
12958
|
+
}
|
|
12959
|
+
logger?.debug("Replayed entity tag change", { type: event.type, entityType });
|
|
12960
|
+
}
|
|
12961
|
+
|
|
12962
|
+
// src/exchange/backup-importer.ts
|
|
12963
|
+
function buildBlobResolver(entries) {
|
|
12964
|
+
const checksumIndex = /* @__PURE__ */ new Map();
|
|
12965
|
+
for (const name of entries.keys()) {
|
|
12966
|
+
if (!name.startsWith(".semiont/")) {
|
|
12967
|
+
const dotIndex = name.lastIndexOf(".");
|
|
12968
|
+
const checksum = dotIndex >= 0 ? name.slice(0, dotIndex) : name;
|
|
12969
|
+
checksumIndex.set(checksum, name);
|
|
12970
|
+
}
|
|
12971
|
+
}
|
|
12972
|
+
return (checksum) => {
|
|
12973
|
+
const entryName = checksumIndex.get(checksum);
|
|
12974
|
+
return entryName ? entries.get(entryName) : void 0;
|
|
12975
|
+
};
|
|
12976
|
+
}
|
|
12977
|
+
async function importBackup(archive, options) {
|
|
12978
|
+
const { eventBus, logger } = options;
|
|
12979
|
+
const entries = /* @__PURE__ */ new Map();
|
|
12980
|
+
for await (const entry of readTarGz(archive)) {
|
|
12981
|
+
entries.set(entry.name, entry.data);
|
|
12982
|
+
}
|
|
12983
|
+
const manifestData = entries.get(".semiont/manifest.jsonl");
|
|
12984
|
+
if (!manifestData) {
|
|
12985
|
+
throw new Error("Invalid backup: missing .semiont/manifest.jsonl");
|
|
12986
|
+
}
|
|
12987
|
+
const manifestLines = manifestData.toString("utf8").trim().split("\n");
|
|
12988
|
+
const header = JSON.parse(manifestLines[0]);
|
|
12989
|
+
if (!isBackupManifest(header)) {
|
|
12990
|
+
throw new Error(`Invalid backup: expected format "${BACKUP_FORMAT}", got "${header.format}"`);
|
|
12991
|
+
}
|
|
12992
|
+
validateManifestVersion(header.version);
|
|
12993
|
+
const streamSummaries = manifestLines.slice(1).map((line) => JSON.parse(line));
|
|
12994
|
+
logger?.info("Backup import: parsed manifest", {
|
|
12995
|
+
streams: header.stats.streams,
|
|
12996
|
+
events: header.stats.events,
|
|
12997
|
+
blobs: header.stats.blobs
|
|
12998
|
+
});
|
|
12999
|
+
const resolveBlob = buildBlobResolver(entries);
|
|
13000
|
+
const systemData = entries.get(".semiont/events/__system__.jsonl");
|
|
13001
|
+
let stats = { eventsReplayed: 0, resourcesCreated: 0, annotationsCreated: 0, entityTypesAdded: 0 };
|
|
13002
|
+
let hashChainValid = true;
|
|
13003
|
+
if (systemData) {
|
|
13004
|
+
const result = await replayEventStream(
|
|
13005
|
+
systemData.toString("utf8"),
|
|
13006
|
+
eventBus,
|
|
13007
|
+
resolveBlob,
|
|
13008
|
+
logger
|
|
13009
|
+
);
|
|
13010
|
+
stats = mergeStats(stats, result.stats);
|
|
13011
|
+
if (!result.hashChainValid) hashChainValid = false;
|
|
13012
|
+
}
|
|
13013
|
+
for (const summary of streamSummaries) {
|
|
13014
|
+
if (summary.stream === "__system__") continue;
|
|
13015
|
+
const eventData = entries.get(`.semiont/events/${summary.stream}.jsonl`);
|
|
13016
|
+
if (!eventData) {
|
|
13017
|
+
logger?.warn("Backup import: missing event stream", { stream: summary.stream });
|
|
13018
|
+
continue;
|
|
13019
|
+
}
|
|
13020
|
+
const result = await replayEventStream(
|
|
13021
|
+
eventData.toString("utf8"),
|
|
13022
|
+
eventBus,
|
|
13023
|
+
resolveBlob,
|
|
13024
|
+
logger
|
|
13025
|
+
);
|
|
13026
|
+
stats = mergeStats(stats, result.stats);
|
|
13027
|
+
if (!result.hashChainValid) hashChainValid = false;
|
|
13028
|
+
}
|
|
13029
|
+
logger?.info("Backup import complete", { ...stats, hashChainValid });
|
|
13030
|
+
return { manifest: header, stats, hashChainValid };
|
|
13031
|
+
}
|
|
13032
|
+
function mergeStats(a, b) {
|
|
13033
|
+
return {
|
|
13034
|
+
eventsReplayed: a.eventsReplayed + b.eventsReplayed,
|
|
13035
|
+
resourcesCreated: a.resourcesCreated + b.resourcesCreated,
|
|
13036
|
+
annotationsCreated: a.annotationsCreated + b.annotationsCreated,
|
|
13037
|
+
entityTypesAdded: a.entityTypesAdded + b.entityTypesAdded
|
|
13038
|
+
};
|
|
13039
|
+
}
|
|
13040
|
+
|
|
13041
|
+
// src/exchange/linked-data-exporter.ts
|
|
13042
|
+
import { getExtensionForMimeType as getExtensionForMimeType2 } from "@semiont/content";
|
|
13043
|
+
var SEMIONT_CONTEXT = [
|
|
13044
|
+
"https://schema.org/",
|
|
13045
|
+
"http://www.w3.org/ns/anno.jsonld",
|
|
13046
|
+
{
|
|
13047
|
+
"semiont": "https://semiont.org/vocab/",
|
|
13048
|
+
"entityTypes": "semiont:entityTypes",
|
|
13049
|
+
"creationMethod": "semiont:creationMethod",
|
|
13050
|
+
"archived": "semiont:archived",
|
|
13051
|
+
"representations": { "@id": "semiont:representations", "@container": "@set" },
|
|
13052
|
+
"annotations": { "@id": "semiont:annotations", "@container": "@set" }
|
|
13053
|
+
}
|
|
13054
|
+
];
|
|
13055
|
+
var MANIFEST_CONTEXT = {
|
|
13056
|
+
"semiont": "https://semiont.org/vocab/",
|
|
13057
|
+
"schema": "https://schema.org/",
|
|
13058
|
+
"dct": "http://purl.org/dc/terms/",
|
|
13059
|
+
"prov": "http://www.w3.org/ns/prov#",
|
|
13060
|
+
"void": "http://rdfs.org/ns/void#"
|
|
13061
|
+
};
|
|
13062
|
+
function hydrateAnnotation(annotation, baseUrl) {
|
|
13063
|
+
const hydrated = { ...annotation };
|
|
13064
|
+
if (hydrated.id && !hydrated.id.startsWith("http")) {
|
|
13065
|
+
hydrated.id = `${baseUrl}/annotations/${hydrated.id}`;
|
|
13066
|
+
}
|
|
13067
|
+
if (typeof hydrated.target === "string") {
|
|
13068
|
+
if (!hydrated.target.startsWith("http")) {
|
|
13069
|
+
hydrated.target = `${baseUrl}/resources/${hydrated.target}`;
|
|
13070
|
+
}
|
|
13071
|
+
} else if (hydrated.target && typeof hydrated.target === "object") {
|
|
13072
|
+
const target = { ...hydrated.target };
|
|
13073
|
+
if (target.source && !target.source.startsWith("http")) {
|
|
13074
|
+
target.source = `${baseUrl}/resources/${target.source}`;
|
|
13075
|
+
}
|
|
13076
|
+
hydrated.target = target;
|
|
13077
|
+
}
|
|
13078
|
+
hydrated.body = hydrateBody(hydrated.body, baseUrl);
|
|
13079
|
+
return hydrated;
|
|
13080
|
+
}
|
|
13081
|
+
function hydrateBody(body, baseUrl) {
|
|
13082
|
+
if (Array.isArray(body)) {
|
|
13083
|
+
return body.map((b) => hydrateBodyItem(b, baseUrl));
|
|
13084
|
+
}
|
|
13085
|
+
return hydrateBodyItem(body, baseUrl);
|
|
13086
|
+
}
|
|
13087
|
+
function hydrateBodyItem(item, baseUrl) {
|
|
13088
|
+
if (item && typeof item === "object" && "source" in item) {
|
|
13089
|
+
const source = item.source;
|
|
13090
|
+
if (typeof source === "string" && !source.startsWith("http")) {
|
|
13091
|
+
return { ...item, source: `${baseUrl}/resources/${source}` };
|
|
13092
|
+
}
|
|
13093
|
+
}
|
|
13094
|
+
return item;
|
|
13095
|
+
}
|
|
13096
|
+
async function exportLinkedData(options, output) {
|
|
13097
|
+
const { views, content, sourceUrl, entityTypes, includeArchived, logger } = options;
|
|
13098
|
+
const allViews = await views.getAll();
|
|
13099
|
+
const resourceViews = includeArchived ? allViews : allViews.filter((v) => !v.resource.archived);
|
|
13100
|
+
logger?.info("Linked data export: enumerating resources", { count: resourceViews.length });
|
|
13101
|
+
const contentRefs = /* @__PURE__ */ new Map();
|
|
13102
|
+
for (const view of resourceViews) {
|
|
13103
|
+
collectContentRefsFromResource(view.resource, contentRefs);
|
|
13104
|
+
}
|
|
13105
|
+
const contentBlobs = /* @__PURE__ */ new Map();
|
|
13106
|
+
for (const [checksum, mediaType] of contentRefs) {
|
|
13107
|
+
try {
|
|
13108
|
+
const data = await content.retrieve(checksum, mediaType);
|
|
13109
|
+
const ext = getExtensionForMimeType2(mediaType);
|
|
13110
|
+
contentBlobs.set(checksum, { data, ext });
|
|
13111
|
+
} catch (err) {
|
|
13112
|
+
logger?.warn("Failed to retrieve content blob", { checksum, mediaType, error: String(err) });
|
|
13113
|
+
}
|
|
13114
|
+
}
|
|
13115
|
+
const manifest = {
|
|
13116
|
+
"@context": MANIFEST_CONTEXT,
|
|
13117
|
+
"@type": "void:Dataset",
|
|
13118
|
+
"semiont:format": LINKED_DATA_FORMAT,
|
|
13119
|
+
"semiont:version": FORMAT_VERSION,
|
|
13120
|
+
"dct:created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
13121
|
+
"prov:wasGeneratedBy": {
|
|
13122
|
+
"@type": "prov:Activity",
|
|
13123
|
+
"prov:used": sourceUrl
|
|
13124
|
+
},
|
|
13125
|
+
"semiont:entityTypes": entityTypes,
|
|
13126
|
+
"void:entities": resourceViews.length
|
|
13127
|
+
};
|
|
13128
|
+
async function* generateEntries() {
|
|
13129
|
+
yield {
|
|
13130
|
+
name: ".semiont/manifest.jsonld",
|
|
13131
|
+
data: Buffer.from(JSON.stringify(manifest, null, 2), "utf8")
|
|
13132
|
+
};
|
|
13133
|
+
for (const view of resourceViews) {
|
|
13134
|
+
const resourceId4 = view.resource["@id"];
|
|
13135
|
+
const jsonld = buildResourceJsonLd(view.resource, view.annotations.annotations, sourceUrl);
|
|
13136
|
+
yield {
|
|
13137
|
+
name: `.semiont/resources/${resourceId4}.jsonld`,
|
|
13138
|
+
data: Buffer.from(JSON.stringify(jsonld, null, 2), "utf8")
|
|
13139
|
+
};
|
|
13140
|
+
}
|
|
13141
|
+
for (const [checksum, { data, ext }] of contentBlobs) {
|
|
13142
|
+
yield { name: `${checksum}${ext}`, data };
|
|
13143
|
+
}
|
|
13144
|
+
}
|
|
13145
|
+
await writeTarGz(generateEntries(), output);
|
|
13146
|
+
logger?.info("Linked data export complete", {
|
|
13147
|
+
resources: resourceViews.length,
|
|
13148
|
+
blobs: contentBlobs.size
|
|
13149
|
+
});
|
|
13150
|
+
return manifest;
|
|
13151
|
+
}
|
|
13152
|
+
function buildResourceJsonLd(resource, annotations, sourceUrl) {
|
|
13153
|
+
const resourceId4 = resource["@id"];
|
|
13154
|
+
const resourceUri = resourceId4.startsWith("http") ? resourceId4 : `${sourceUrl}/resources/${resourceId4}`;
|
|
13155
|
+
const doc = {
|
|
13156
|
+
"@context": SEMIONT_CONTEXT,
|
|
13157
|
+
"@id": resourceUri,
|
|
13158
|
+
"@type": resource["@type"] ?? "DigitalDocument",
|
|
13159
|
+
"name": resource.name
|
|
13160
|
+
};
|
|
13161
|
+
if (resource.dateCreated) doc["dateCreated"] = resource.dateCreated;
|
|
13162
|
+
if (resource.dateModified) doc["dateModified"] = resource.dateModified;
|
|
13163
|
+
if (resource.description) doc["description"] = resource.description;
|
|
13164
|
+
const reps = normalizeRepresentations(resource.representations);
|
|
13165
|
+
if (reps.length > 0) {
|
|
13166
|
+
const primary = reps[0];
|
|
13167
|
+
if (primary.language) doc["inLanguage"] = primary.language;
|
|
13168
|
+
if (primary.mediaType) doc["encodingFormat"] = primary.mediaType;
|
|
13169
|
+
}
|
|
13170
|
+
if (resource.creationMethod) doc["creationMethod"] = resource.creationMethod;
|
|
13171
|
+
if (resource.entityTypes && resource.entityTypes.length > 0) doc["entityTypes"] = resource.entityTypes;
|
|
13172
|
+
if (resource.archived) doc["archived"] = resource.archived;
|
|
13173
|
+
if (resource.wasDerivedFrom) doc["wasDerivedFrom"] = resource.wasDerivedFrom;
|
|
13174
|
+
if (resource.wasAttributedTo) doc["wasAttributedTo"] = resource.wasAttributedTo;
|
|
13175
|
+
if (resource.sameAs && resource.sameAs.length > 0) doc["sameAs"] = resource.sameAs;
|
|
13176
|
+
if (resource.isPartOf && resource.isPartOf.length > 0) doc["isPartOf"] = resource.isPartOf;
|
|
13177
|
+
if (resource.hasPart && resource.hasPart.length > 0) doc["hasPart"] = resource.hasPart;
|
|
13178
|
+
if (reps.length > 0) {
|
|
13179
|
+
doc["representations"] = reps.map((rep) => {
|
|
13180
|
+
const mediaObj = {
|
|
13181
|
+
"@type": "schema:MediaObject",
|
|
13182
|
+
"encodingFormat": rep.mediaType
|
|
13183
|
+
};
|
|
13184
|
+
if (rep.byteSize !== void 0) mediaObj["contentSize"] = rep.byteSize;
|
|
13185
|
+
if (rep.checksum) {
|
|
13186
|
+
const rawChecksum = rep.checksum.startsWith("sha256:") ? rep.checksum.slice(7) : rep.checksum;
|
|
13187
|
+
mediaObj["sha256"] = rawChecksum;
|
|
13188
|
+
const ext = getExtensionForMimeType2(rep.mediaType);
|
|
13189
|
+
mediaObj["name"] = `${rawChecksum}${ext}`;
|
|
13190
|
+
}
|
|
13191
|
+
if (rep.language) mediaObj["inLanguage"] = rep.language;
|
|
13192
|
+
return mediaObj;
|
|
13193
|
+
});
|
|
13194
|
+
}
|
|
13195
|
+
if (annotations.length > 0) {
|
|
13196
|
+
doc["annotations"] = annotations.map((ann) => hydrateAnnotation(ann, sourceUrl));
|
|
13197
|
+
}
|
|
13198
|
+
return doc;
|
|
13199
|
+
}
|
|
13200
|
+
function normalizeRepresentations(reps) {
|
|
13201
|
+
if (!reps) return [];
|
|
13202
|
+
if (Array.isArray(reps)) return reps;
|
|
13203
|
+
return [reps];
|
|
13204
|
+
}
|
|
13205
|
+
function collectContentRefsFromResource(resource, refs) {
|
|
13206
|
+
const reps = normalizeRepresentations(resource.representations);
|
|
13207
|
+
for (const rep of reps) {
|
|
13208
|
+
if (rep.checksum && rep.mediaType) {
|
|
13209
|
+
const rawChecksum = rep.checksum.startsWith("sha256:") ? rep.checksum.slice(7) : rep.checksum;
|
|
13210
|
+
refs.set(rawChecksum, rep.mediaType);
|
|
13211
|
+
}
|
|
13212
|
+
}
|
|
13213
|
+
}
|
|
13214
|
+
|
|
13215
|
+
// src/exchange/linked-data-importer.ts
|
|
13216
|
+
var import_rxjs10 = __toESM(require_cjs(), 1);
|
|
13217
|
+
var import_operators10 = __toESM(require_operators(), 1);
|
|
13218
|
+
var IMPORT_TIMEOUT_MS = 3e4;
|
|
13219
|
+
function stripUriToId(uri) {
|
|
13220
|
+
if (!uri.includes("/")) return uri;
|
|
13221
|
+
const lastSlash = uri.lastIndexOf("/");
|
|
13222
|
+
return uri.slice(lastSlash + 1);
|
|
13223
|
+
}
|
|
13224
|
+
function dehydrateAnnotation(annotation) {
|
|
13225
|
+
const dehydrated = { ...annotation };
|
|
13226
|
+
if (dehydrated.id) {
|
|
13227
|
+
dehydrated.id = stripUriToId(dehydrated.id);
|
|
13228
|
+
}
|
|
13229
|
+
if (typeof dehydrated.target === "string") {
|
|
13230
|
+
dehydrated.target = stripUriToId(dehydrated.target);
|
|
13231
|
+
} else if (dehydrated.target && typeof dehydrated.target === "object") {
|
|
13232
|
+
const target = { ...dehydrated.target };
|
|
13233
|
+
if (target.source) {
|
|
13234
|
+
target.source = stripUriToId(target.source);
|
|
13235
|
+
}
|
|
13236
|
+
dehydrated.target = target;
|
|
13237
|
+
}
|
|
13238
|
+
dehydrated.body = dehydrateBody(dehydrated.body);
|
|
13239
|
+
return dehydrated;
|
|
13240
|
+
}
|
|
13241
|
+
function dehydrateBody(body) {
|
|
13242
|
+
if (Array.isArray(body)) {
|
|
13243
|
+
return body.map((b) => dehydrateBodyItem(b));
|
|
13244
|
+
}
|
|
13245
|
+
return dehydrateBodyItem(body);
|
|
13246
|
+
}
|
|
13247
|
+
function dehydrateBodyItem(item) {
|
|
13248
|
+
if (item && typeof item === "object" && "source" in item) {
|
|
13249
|
+
const source = item.source;
|
|
13250
|
+
if (typeof source === "string" && source.includes("/")) {
|
|
13251
|
+
return { ...item, source: stripUriToId(source) };
|
|
13252
|
+
}
|
|
13253
|
+
}
|
|
13254
|
+
return item;
|
|
13255
|
+
}
|
|
13256
|
+
function buildBlobResolver2(entries) {
|
|
13257
|
+
const checksumIndex = /* @__PURE__ */ new Map();
|
|
13258
|
+
for (const name of entries.keys()) {
|
|
13259
|
+
if (!name.startsWith(".semiont/")) {
|
|
13260
|
+
const dotIndex = name.lastIndexOf(".");
|
|
13261
|
+
const checksum = dotIndex >= 0 ? name.slice(0, dotIndex) : name;
|
|
13262
|
+
checksumIndex.set(checksum, name);
|
|
13263
|
+
}
|
|
13264
|
+
}
|
|
13265
|
+
return (checksum) => {
|
|
13266
|
+
const entryName = checksumIndex.get(checksum);
|
|
13267
|
+
return entryName ? entries.get(entryName) : void 0;
|
|
13268
|
+
};
|
|
13269
|
+
}
|
|
13270
|
+
async function importLinkedData(archive, options) {
|
|
13271
|
+
const { eventBus, userId: userId2, logger } = options;
|
|
13272
|
+
const entries = /* @__PURE__ */ new Map();
|
|
13273
|
+
for await (const entry of readTarGz(archive)) {
|
|
13274
|
+
entries.set(entry.name, entry.data);
|
|
13275
|
+
}
|
|
13276
|
+
const manifestData = entries.get(".semiont/manifest.jsonld");
|
|
13277
|
+
if (!manifestData) {
|
|
13278
|
+
throw new Error("Invalid linked data archive: missing .semiont/manifest.jsonld");
|
|
13279
|
+
}
|
|
13280
|
+
const manifest = JSON.parse(manifestData.toString("utf8"));
|
|
13281
|
+
if (!isLinkedDataManifest(manifest)) {
|
|
13282
|
+
throw new Error(
|
|
13283
|
+
`Invalid linked data archive: expected format "${LINKED_DATA_FORMAT}", got "${manifest["semiont:format"]}"`
|
|
13284
|
+
);
|
|
13285
|
+
}
|
|
13286
|
+
validateManifestVersion(manifest["semiont:version"]);
|
|
13287
|
+
logger?.info("Linked data import: parsed manifest", {
|
|
13288
|
+
entityTypes: manifest["semiont:entityTypes"].length,
|
|
13289
|
+
resources: manifest["void:entities"]
|
|
13290
|
+
});
|
|
13291
|
+
const resolveBlob = buildBlobResolver2(entries);
|
|
13292
|
+
let entityTypesAdded = 0;
|
|
13293
|
+
for (const entityType of manifest["semiont:entityTypes"]) {
|
|
13294
|
+
await addEntityType(entityType, userId2, eventBus, logger);
|
|
13295
|
+
entityTypesAdded++;
|
|
13296
|
+
}
|
|
13297
|
+
const resourceEntries = [...entries.keys()].filter((name) => name.startsWith(".semiont/resources/") && name.endsWith(".jsonld")).sort();
|
|
13298
|
+
let resourcesCreated = 0;
|
|
13299
|
+
let annotationsCreated = 0;
|
|
13300
|
+
for (const entryName of resourceEntries) {
|
|
13301
|
+
const resourceDoc = JSON.parse(entries.get(entryName).toString("utf8"));
|
|
13302
|
+
const result = await importResource(resourceDoc, userId2, eventBus, resolveBlob, logger);
|
|
13303
|
+
resourcesCreated++;
|
|
13304
|
+
annotationsCreated += result.annotationsCreated;
|
|
13305
|
+
}
|
|
13306
|
+
logger?.info("Linked data import complete", {
|
|
13307
|
+
resourcesCreated,
|
|
13308
|
+
annotationsCreated,
|
|
13309
|
+
entityTypesAdded
|
|
13310
|
+
});
|
|
13311
|
+
return {
|
|
13312
|
+
manifest,
|
|
13313
|
+
resourcesCreated,
|
|
13314
|
+
annotationsCreated,
|
|
13315
|
+
entityTypesAdded
|
|
13316
|
+
};
|
|
13317
|
+
}
|
|
13318
|
+
async function addEntityType(entityType, userId2, eventBus, logger) {
|
|
13319
|
+
const result$ = (0, import_rxjs10.race)(
|
|
13320
|
+
eventBus.get("mark:entity-type-added").pipe((0, import_operators10.map)(() => "ok")),
|
|
13321
|
+
eventBus.get("mark:entity-type-add-failed").pipe((0, import_operators10.map)((e) => {
|
|
13322
|
+
throw e.error;
|
|
13323
|
+
})),
|
|
13324
|
+
(0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
|
|
13325
|
+
throw new Error("Timeout waiting for mark:entity-type-added");
|
|
13326
|
+
}))
|
|
13327
|
+
);
|
|
13328
|
+
eventBus.get("mark:add-entity-type").next({
|
|
13329
|
+
tag: entityType,
|
|
13330
|
+
userId: userId2
|
|
13331
|
+
});
|
|
13332
|
+
await (0, import_rxjs10.firstValueFrom)(result$);
|
|
13333
|
+
logger?.debug("Added entity type", { entityType });
|
|
13334
|
+
}
|
|
13335
|
+
async function importResource(doc, userId2, eventBus, resolveBlob, logger) {
|
|
13336
|
+
const name = doc["name"];
|
|
13337
|
+
const representations = doc["representations"];
|
|
13338
|
+
const annotations = doc["annotations"];
|
|
13339
|
+
const entityTypes = doc["entityTypes"];
|
|
13340
|
+
const creationMethod = doc["creationMethod"];
|
|
13341
|
+
let format = "text/markdown";
|
|
13342
|
+
let language;
|
|
13343
|
+
let contentChecksum;
|
|
13344
|
+
if (representations && representations.length > 0) {
|
|
13345
|
+
const primary = representations[0];
|
|
13346
|
+
if (primary["encodingFormat"]) format = primary["encodingFormat"];
|
|
13347
|
+
if (primary["inLanguage"]) language = primary["inLanguage"];
|
|
13348
|
+
if (primary["sha256"]) contentChecksum = primary["sha256"];
|
|
13349
|
+
}
|
|
13350
|
+
if (!contentChecksum) {
|
|
13351
|
+
throw new Error(`Resource "${name}" has no content checksum in representations`);
|
|
13352
|
+
}
|
|
13353
|
+
const blob = resolveBlob(contentChecksum);
|
|
13354
|
+
if (!blob) {
|
|
13355
|
+
throw new Error(`Missing content blob for checksum ${contentChecksum} (resource "${name}")`);
|
|
13356
|
+
}
|
|
13357
|
+
const createResult$ = (0, import_rxjs10.race)(
|
|
13358
|
+
eventBus.get("yield:created").pipe((0, import_operators10.map)((r) => r)),
|
|
13359
|
+
eventBus.get("yield:create-failed").pipe((0, import_operators10.map)((e) => {
|
|
13360
|
+
throw e.error;
|
|
13361
|
+
})),
|
|
13362
|
+
(0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
|
|
13363
|
+
throw new Error("Timeout waiting for yield:created");
|
|
13364
|
+
}))
|
|
13365
|
+
);
|
|
13366
|
+
eventBus.get("yield:create").next({
|
|
13367
|
+
name,
|
|
13368
|
+
content: blob,
|
|
13369
|
+
format,
|
|
13370
|
+
userId: userId2,
|
|
13371
|
+
language,
|
|
13372
|
+
entityTypes: entityTypes ?? [],
|
|
13373
|
+
creationMethod
|
|
13374
|
+
});
|
|
13375
|
+
const created = await (0, import_rxjs10.firstValueFrom)(createResult$);
|
|
13376
|
+
const resourceId4 = created.resourceId;
|
|
13377
|
+
logger?.debug("Created resource from JSON-LD", { name, resourceId: resourceId4 });
|
|
13378
|
+
let annotationsCreated = 0;
|
|
13379
|
+
if (annotations && annotations.length > 0) {
|
|
13380
|
+
for (const annotation of annotations) {
|
|
13381
|
+
await createAnnotation(annotation, resourceId4, userId2, eventBus, logger);
|
|
13382
|
+
annotationsCreated++;
|
|
13383
|
+
}
|
|
13384
|
+
}
|
|
13385
|
+
return { annotationsCreated };
|
|
13386
|
+
}
|
|
13387
|
+
async function createAnnotation(annotation, resourceId4, userId2, eventBus, logger) {
|
|
13388
|
+
const result$ = (0, import_rxjs10.race)(
|
|
13389
|
+
eventBus.get("mark:created").pipe((0, import_operators10.map)(() => "ok")),
|
|
13390
|
+
eventBus.get("mark:create-failed").pipe((0, import_operators10.map)((e) => {
|
|
13391
|
+
throw e.error;
|
|
13392
|
+
})),
|
|
13393
|
+
(0, import_rxjs10.timer)(IMPORT_TIMEOUT_MS).pipe((0, import_operators10.map)(() => {
|
|
13394
|
+
throw new Error("Timeout waiting for mark:created");
|
|
13395
|
+
}))
|
|
13396
|
+
);
|
|
13397
|
+
eventBus.get("mark:create").next({
|
|
13398
|
+
annotation: dehydrateAnnotation(annotation),
|
|
13399
|
+
userId: userId2,
|
|
13400
|
+
resourceId: resourceId4
|
|
13401
|
+
});
|
|
13402
|
+
await (0, import_rxjs10.firstValueFrom)(result$);
|
|
13403
|
+
logger?.debug("Created annotation", { annotationId: annotation.id });
|
|
13404
|
+
}
|
|
13405
|
+
|
|
12266
13406
|
// src/annotation-assembly.ts
|
|
12267
13407
|
import {
|
|
12268
13408
|
assembleAnnotation,
|
|
@@ -12270,19 +13410,18 @@ import {
|
|
|
12270
13410
|
} from "@semiont/core";
|
|
12271
13411
|
|
|
12272
13412
|
// src/annotation-operations.ts
|
|
12273
|
-
import {
|
|
12274
|
-
import { annotationId, uriToResourceId as uriToResourceId3, uriToAnnotationId as uriToAnnotationId2, assembleAnnotation as assembleAnnotation2, applyBodyOperations as applyBodyOperations2 } from "@semiont/core";
|
|
13413
|
+
import { annotationId, resourceId as makeResourceId3, assembleAnnotation as assembleAnnotation2, applyBodyOperations as applyBodyOperations2 } from "@semiont/core";
|
|
12275
13414
|
var AnnotationOperations = class {
|
|
12276
13415
|
/**
|
|
12277
13416
|
* Create a new annotation via EventBus → Stower
|
|
12278
13417
|
*/
|
|
12279
|
-
static async createAnnotation(request, userId2, creator, eventBus
|
|
12280
|
-
const { annotation } = assembleAnnotation2(request, creator
|
|
12281
|
-
const
|
|
13418
|
+
static async createAnnotation(request, userId2, creator, eventBus) {
|
|
13419
|
+
const { annotation } = assembleAnnotation2(request, creator);
|
|
13420
|
+
const resId = makeResourceId3(request.target.source);
|
|
12282
13421
|
eventBus.get("mark:create").next({
|
|
12283
13422
|
annotation,
|
|
12284
13423
|
userId: userId2,
|
|
12285
|
-
resourceId:
|
|
13424
|
+
resourceId: resId
|
|
12286
13425
|
});
|
|
12287
13426
|
return { annotation };
|
|
12288
13427
|
}
|
|
@@ -12290,19 +13429,19 @@ var AnnotationOperations = class {
|
|
|
12290
13429
|
* Update annotation body via EventBus → Stower
|
|
12291
13430
|
*/
|
|
12292
13431
|
static async updateAnnotationBody(id, request, userId2, eventBus, kb) {
|
|
13432
|
+
const resId = makeResourceId3(request.resourceId);
|
|
12293
13433
|
const annotation = await AnnotationContext.getAnnotation(
|
|
12294
13434
|
annotationId(id),
|
|
12295
|
-
|
|
13435
|
+
resId,
|
|
12296
13436
|
kb
|
|
12297
13437
|
);
|
|
12298
13438
|
if (!annotation) {
|
|
12299
13439
|
throw new Error("Annotation not found");
|
|
12300
13440
|
}
|
|
12301
|
-
const resourceId2 = uriToResourceId3(getTargetSource3(annotation.target));
|
|
12302
13441
|
eventBus.get("mark:update-body").next({
|
|
12303
13442
|
annotationId: annotationId(id),
|
|
12304
13443
|
userId: userId2,
|
|
12305
|
-
resourceId:
|
|
13444
|
+
resourceId: resId,
|
|
12306
13445
|
operations: request.operations
|
|
12307
13446
|
});
|
|
12308
13447
|
const updatedBody = applyBodyOperations2(annotation.body, request.operations);
|
|
@@ -12316,8 +13455,8 @@ var AnnotationOperations = class {
|
|
|
12316
13455
|
/**
|
|
12317
13456
|
* Delete an annotation via EventBus → Stower
|
|
12318
13457
|
*/
|
|
12319
|
-
static async deleteAnnotation(id,
|
|
12320
|
-
const resId =
|
|
13458
|
+
static async deleteAnnotation(id, resourceIdStr, userId2, eventBus, kb, logger) {
|
|
13459
|
+
const resId = makeResourceId3(resourceIdStr);
|
|
12321
13460
|
const projection = await AnnotationContext.getResourceAnnotations(resId, kb);
|
|
12322
13461
|
const annotation = projection.annotations.find((a) => a.id === id);
|
|
12323
13462
|
if (!annotation) {
|
|
@@ -12325,7 +13464,7 @@ var AnnotationOperations = class {
|
|
|
12325
13464
|
}
|
|
12326
13465
|
logger?.debug("Removing annotation via EventBus", { annotationId: id });
|
|
12327
13466
|
eventBus.get("mark:delete").next({
|
|
12328
|
-
annotationId:
|
|
13467
|
+
annotationId: annotationId(id),
|
|
12329
13468
|
userId: userId2,
|
|
12330
13469
|
resourceId: resId
|
|
12331
13470
|
});
|
|
@@ -12339,8 +13478,10 @@ var VERSION = "0.1.0";
|
|
|
12339
13478
|
export {
|
|
12340
13479
|
AnnotationContext,
|
|
12341
13480
|
AnnotationOperations,
|
|
13481
|
+
BACKUP_FORMAT,
|
|
12342
13482
|
Binder,
|
|
12343
13483
|
CloneTokenManager,
|
|
13484
|
+
FORMAT_VERSION,
|
|
12344
13485
|
Gatherer,
|
|
12345
13486
|
GraphContext,
|
|
12346
13487
|
GraphDBConsumer,
|
|
@@ -12354,10 +13495,16 @@ export {
|
|
|
12354
13495
|
assembleAnnotation,
|
|
12355
13496
|
bootstrapEntityTypes,
|
|
12356
13497
|
createKnowledgeBase,
|
|
13498
|
+
exportBackup,
|
|
13499
|
+
exportLinkedData,
|
|
12357
13500
|
generateReferenceSuggestions,
|
|
12358
13501
|
generateResourceSummary,
|
|
13502
|
+
importBackup,
|
|
13503
|
+
importLinkedData,
|
|
13504
|
+
isBackupManifest,
|
|
12359
13505
|
readEntityTypesProjection,
|
|
12360
13506
|
resetBootstrap,
|
|
12361
|
-
startMakeMeaning
|
|
13507
|
+
startMakeMeaning,
|
|
13508
|
+
validateManifestVersion
|
|
12362
13509
|
};
|
|
12363
13510
|
//# sourceMappingURL=index.js.map
|