@semiont/core 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -140
- package/dist/config/node-config-loader.d.ts +1 -5
- package/dist/config/node-config-loader.js +0 -4
- package/dist/config/node-config-loader.js.map +1 -1
- package/dist/index.d.ts +504 -198
- package/dist/index.js +443 -137
- package/dist/index.js.map +1 -1
- package/package.json +14 -13
package/dist/index.js
CHANGED
|
@@ -108,12 +108,14 @@ var PERSISTED_EVENT_TYPES = [
|
|
|
108
108
|
|
|
109
109
|
// src/bus-protocol.ts
|
|
110
110
|
var RESOURCE_BROADCAST_TYPES = [
|
|
111
|
-
//
|
|
112
|
-
//
|
|
113
|
-
//
|
|
114
|
-
// —
|
|
115
|
-
|
|
116
|
-
|
|
111
|
+
// Currently empty. `job:complete` / `job:fail` were moved to GLOBAL,
|
|
112
|
+
// `jobId`-keyed correlation delivery (#847): the dispatching caller
|
|
113
|
+
// filters by `jobId`, and resource viewers filter the same global stream
|
|
114
|
+
// by `resourceId` — no resource-scoped copy, so a client that is both
|
|
115
|
+
// dispatcher and viewer no longer receives it twice. This set remains as
|
|
116
|
+
// the extension point for *genuine* resource-bound broadcasts — events
|
|
117
|
+
// every viewer of a resource should see and no single caller owns (e.g.
|
|
118
|
+
// resource-generation progress for multiple viewers).
|
|
117
119
|
];
|
|
118
120
|
var CHANNEL_SCHEMAS = {
|
|
119
121
|
// ── YIELD FLOW ──────────────────────────────────────────────────
|
|
@@ -479,6 +481,8 @@ var ScopedEventBus = class _ScopedEventBus {
|
|
|
479
481
|
this.parent = parent;
|
|
480
482
|
this.scopePrefix = scopePrefix;
|
|
481
483
|
}
|
|
484
|
+
parent;
|
|
485
|
+
scopePrefix;
|
|
482
486
|
/**
|
|
483
487
|
* Get the RxJS Subject for a scoped event
|
|
484
488
|
*
|
|
@@ -913,6 +917,30 @@ function extractBoundingBox(svg) {
|
|
|
913
917
|
return null;
|
|
914
918
|
}
|
|
915
919
|
|
|
920
|
+
// src/pdf-coordinates.ts
|
|
921
|
+
function createFragmentSelector(coord) {
|
|
922
|
+
return `page=${coord.page}&viewrect=${coord.x},${coord.y},${coord.width},${coord.height}`;
|
|
923
|
+
}
|
|
924
|
+
function parseFragmentSelector(fragment) {
|
|
925
|
+
const pageMatch = fragment.match(/page=(\d+)/);
|
|
926
|
+
if (!pageMatch) return null;
|
|
927
|
+
const viewrectMatch = fragment.match(
|
|
928
|
+
/viewrect=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)/
|
|
929
|
+
);
|
|
930
|
+
if (!viewrectMatch) return null;
|
|
931
|
+
return {
|
|
932
|
+
page: parseInt(pageMatch[1], 10),
|
|
933
|
+
x: parseFloat(viewrectMatch[1]),
|
|
934
|
+
y: parseFloat(viewrectMatch[2]),
|
|
935
|
+
width: parseFloat(viewrectMatch[3]),
|
|
936
|
+
height: parseFloat(viewrectMatch[4])
|
|
937
|
+
};
|
|
938
|
+
}
|
|
939
|
+
function getPageFromFragment(fragment) {
|
|
940
|
+
const match = fragment.match(/page=(\d+)/);
|
|
941
|
+
return match ? parseInt(match[1], 10) : null;
|
|
942
|
+
}
|
|
943
|
+
|
|
916
944
|
// src/resource-utils.ts
|
|
917
945
|
function getResourceId(resource) {
|
|
918
946
|
if (!resource) return void 0;
|
|
@@ -1065,9 +1093,52 @@ function levenshteinDistance(str1, str2) {
|
|
|
1065
1093
|
}
|
|
1066
1094
|
return matrix[len1][len2];
|
|
1067
1095
|
}
|
|
1096
|
+
function normalizeTextWithMap(input) {
|
|
1097
|
+
let normalized = "";
|
|
1098
|
+
const map = [];
|
|
1099
|
+
let pendingWhitespaceStart = -1;
|
|
1100
|
+
const flushWhitespace = () => {
|
|
1101
|
+
if (pendingWhitespaceStart !== -1) {
|
|
1102
|
+
if (normalized.length > 0) {
|
|
1103
|
+
normalized += " ";
|
|
1104
|
+
map.push(pendingWhitespaceStart);
|
|
1105
|
+
}
|
|
1106
|
+
pendingWhitespaceStart = -1;
|
|
1107
|
+
}
|
|
1108
|
+
};
|
|
1109
|
+
for (let i = 0; i < input.length; i++) {
|
|
1110
|
+
const ch = input[i];
|
|
1111
|
+
if (/\s/.test(ch)) {
|
|
1112
|
+
if (pendingWhitespaceStart === -1) pendingWhitespaceStart = i;
|
|
1113
|
+
continue;
|
|
1114
|
+
}
|
|
1115
|
+
flushWhitespace();
|
|
1116
|
+
if (ch === "\u2018" || ch === "\u2019") {
|
|
1117
|
+
normalized += "'";
|
|
1118
|
+
map.push(i);
|
|
1119
|
+
} else if (ch === "\u201C" || ch === "\u201D") {
|
|
1120
|
+
normalized += '"';
|
|
1121
|
+
map.push(i);
|
|
1122
|
+
} else if (ch === "\u2014") {
|
|
1123
|
+
normalized += "--";
|
|
1124
|
+
map.push(i);
|
|
1125
|
+
map.push(i);
|
|
1126
|
+
} else if (ch === "\u2013") {
|
|
1127
|
+
normalized += "-";
|
|
1128
|
+
map.push(i);
|
|
1129
|
+
} else {
|
|
1130
|
+
normalized += ch;
|
|
1131
|
+
map.push(i);
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
map.push(input.length);
|
|
1135
|
+
return { normalized, map };
|
|
1136
|
+
}
|
|
1068
1137
|
function buildContentCache(content) {
|
|
1138
|
+
const { normalized, map } = normalizeTextWithMap(content);
|
|
1069
1139
|
return {
|
|
1070
|
-
normalizedContent:
|
|
1140
|
+
normalizedContent: normalized,
|
|
1141
|
+
normalizedMap: map,
|
|
1071
1142
|
lowerContent: content.toLowerCase()
|
|
1072
1143
|
};
|
|
1073
1144
|
}
|
|
@@ -1084,19 +1155,11 @@ function findBestTextMatch(content, searchText, positionHint, cache) {
|
|
|
1084
1155
|
const normalizedSearch = normalizeText(searchText);
|
|
1085
1156
|
const normalizedIndex = cache.normalizedContent.indexOf(normalizedSearch);
|
|
1086
1157
|
if (normalizedIndex !== -1) {
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
while (normalizedPos < normalizedIndex && actualPos < content.length) {
|
|
1090
|
-
const char = content[actualPos];
|
|
1091
|
-
const normalizedChar = normalizeText(char);
|
|
1092
|
-
if (normalizedChar) {
|
|
1093
|
-
normalizedPos += normalizedChar.length;
|
|
1094
|
-
}
|
|
1095
|
-
actualPos++;
|
|
1096
|
-
}
|
|
1158
|
+
const start = cache.normalizedMap[normalizedIndex] ?? 0;
|
|
1159
|
+
const end = cache.normalizedMap[normalizedIndex + normalizedSearch.length] ?? content.length;
|
|
1097
1160
|
return {
|
|
1098
|
-
start
|
|
1099
|
-
end
|
|
1161
|
+
start,
|
|
1162
|
+
end,
|
|
1100
1163
|
matchQuality: "normalized"
|
|
1101
1164
|
};
|
|
1102
1165
|
}
|
|
@@ -1132,58 +1195,154 @@ function findBestTextMatch(content, searchText, positionHint, cache) {
|
|
|
1132
1195
|
}
|
|
1133
1196
|
return null;
|
|
1134
1197
|
}
|
|
1135
|
-
function
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1198
|
+
function verifyPosition(content, position, expectedExact) {
|
|
1199
|
+
const actualText = content.substring(position.start, position.end);
|
|
1200
|
+
return actualText === expectedExact;
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
// src/anchor-annotation.ts
|
|
1204
|
+
var POSITION_WINDOW = 1024;
|
|
1205
|
+
var CONTEXT_FULL_WEIGHT = 10;
|
|
1206
|
+
var CONTEXT_PARTIAL_WEIGHT = 5;
|
|
1207
|
+
var POSITION_WEIGHT_MAX = 5;
|
|
1208
|
+
function anchorAnnotation(content, selectors) {
|
|
1209
|
+
const { position, quote } = selectors;
|
|
1210
|
+
if (!quote || !quote.exact) {
|
|
1211
|
+
if (!position) return null;
|
|
1212
|
+
if (position.start < 0 || position.end > content.length || position.start >= position.end) {
|
|
1213
|
+
return null;
|
|
1140
1214
|
}
|
|
1215
|
+
return {
|
|
1216
|
+
start: position.start,
|
|
1217
|
+
end: position.end,
|
|
1218
|
+
strategy: "position-fallback",
|
|
1219
|
+
confidence: "low"
|
|
1220
|
+
};
|
|
1141
1221
|
}
|
|
1142
|
-
const
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1222
|
+
const { exact, prefix, suffix } = quote;
|
|
1223
|
+
if (position) {
|
|
1224
|
+
const probeEnd = position.start + exact.length;
|
|
1225
|
+
if (position.start >= 0 && probeEnd <= content.length && content.substring(position.start, probeEnd) === exact) {
|
|
1226
|
+
return {
|
|
1227
|
+
start: position.start,
|
|
1228
|
+
end: probeEnd,
|
|
1229
|
+
strategy: "fast-path",
|
|
1230
|
+
confidence: "high"
|
|
1231
|
+
};
|
|
1152
1232
|
}
|
|
1153
|
-
return null;
|
|
1154
1233
|
}
|
|
1234
|
+
const occurrences = findAllOccurrences(content, exact);
|
|
1155
1235
|
if (occurrences.length === 1) {
|
|
1156
|
-
const
|
|
1157
|
-
return {
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1236
|
+
const start = occurrences[0];
|
|
1237
|
+
return {
|
|
1238
|
+
start,
|
|
1239
|
+
end: start + exact.length,
|
|
1240
|
+
strategy: "unique-occurrence",
|
|
1241
|
+
confidence: "high"
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
if (occurrences.length > 1) {
|
|
1245
|
+
const winner = pickByScore(content, occurrences, exact, prefix, suffix, position?.start);
|
|
1246
|
+
return winner;
|
|
1247
|
+
}
|
|
1248
|
+
if (position && position.start >= 0 && position.end <= content.length && position.start < position.end) {
|
|
1249
|
+
return {
|
|
1250
|
+
start: position.start,
|
|
1251
|
+
end: position.end,
|
|
1252
|
+
strategy: "position-fallback",
|
|
1253
|
+
confidence: "low"
|
|
1254
|
+
};
|
|
1255
|
+
}
|
|
1256
|
+
return null;
|
|
1257
|
+
}
|
|
1258
|
+
function findAllOccurrences(content, exact) {
|
|
1259
|
+
const out = [];
|
|
1260
|
+
let i = content.indexOf(exact);
|
|
1261
|
+
while (i !== -1) {
|
|
1262
|
+
out.push(i);
|
|
1263
|
+
i = content.indexOf(exact, i + 1);
|
|
1264
|
+
}
|
|
1265
|
+
return out;
|
|
1266
|
+
}
|
|
1267
|
+
function contextScoreAt(content, pos, exact, prefix, suffix) {
|
|
1268
|
+
let score = 0;
|
|
1269
|
+
let prefixFull = true;
|
|
1270
|
+
let suffixFull = true;
|
|
1271
|
+
if (prefix) {
|
|
1272
|
+
const adj = content.substring(Math.max(0, pos - prefix.length), pos);
|
|
1273
|
+
if (adj.endsWith(prefix)) {
|
|
1274
|
+
score += CONTEXT_FULL_WEIGHT;
|
|
1275
|
+
prefixFull = true;
|
|
1276
|
+
} else if (adj.includes(prefix.trim()) && prefix.trim().length > 0) {
|
|
1277
|
+
score += CONTEXT_PARTIAL_WEIGHT;
|
|
1278
|
+
prefixFull = false;
|
|
1279
|
+
} else {
|
|
1280
|
+
prefixFull = false;
|
|
1170
1281
|
}
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1282
|
+
}
|
|
1283
|
+
if (suffix) {
|
|
1284
|
+
const adj = content.substring(pos + exact.length, Math.min(content.length, pos + exact.length + suffix.length));
|
|
1285
|
+
if (adj.startsWith(suffix)) {
|
|
1286
|
+
score += CONTEXT_FULL_WEIGHT;
|
|
1287
|
+
suffixFull = true;
|
|
1288
|
+
} else if (adj.includes(suffix.trim()) && suffix.trim().length > 0) {
|
|
1289
|
+
score += CONTEXT_PARTIAL_WEIGHT;
|
|
1290
|
+
suffixFull = false;
|
|
1291
|
+
} else {
|
|
1292
|
+
suffixFull = false;
|
|
1179
1293
|
}
|
|
1180
1294
|
}
|
|
1181
|
-
const
|
|
1182
|
-
return {
|
|
1183
|
-
}
|
|
1184
|
-
function
|
|
1185
|
-
|
|
1186
|
-
|
|
1295
|
+
const full = (prefix === void 0 || prefixFull) && (suffix === void 0 || suffixFull) && (prefix !== void 0 || suffix !== void 0);
|
|
1296
|
+
return { score, full };
|
|
1297
|
+
}
|
|
1298
|
+
function positionScoreAt(pos, hint) {
|
|
1299
|
+
if (hint === void 0) return 0;
|
|
1300
|
+
const distance = Math.abs(pos - hint);
|
|
1301
|
+
if (distance >= POSITION_WINDOW) return 0;
|
|
1302
|
+
return POSITION_WEIGHT_MAX * (1 - distance / POSITION_WINDOW);
|
|
1303
|
+
}
|
|
1304
|
+
function pickByScore(content, occurrences, exact, prefix, suffix, hint) {
|
|
1305
|
+
let bestPos = occurrences[0];
|
|
1306
|
+
let bestScore = -1;
|
|
1307
|
+
let bestContextFull = false;
|
|
1308
|
+
let bestHasAnyContextSignal = false;
|
|
1309
|
+
let bestHasAnyPositionSignal = false;
|
|
1310
|
+
for (const pos of occurrences) {
|
|
1311
|
+
const ctx = contextScoreAt(content, pos, exact, prefix, suffix);
|
|
1312
|
+
const positionScore = positionScoreAt(pos, hint);
|
|
1313
|
+
const total = ctx.score + positionScore;
|
|
1314
|
+
if (total > bestScore) {
|
|
1315
|
+
bestScore = total;
|
|
1316
|
+
bestPos = pos;
|
|
1317
|
+
bestContextFull = ctx.full;
|
|
1318
|
+
bestHasAnyContextSignal = ctx.score > 0;
|
|
1319
|
+
bestHasAnyPositionSignal = positionScore > 0;
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
let strategy;
|
|
1323
|
+
let confidence;
|
|
1324
|
+
if (bestContextFull) {
|
|
1325
|
+
strategy = "context-disambiguated";
|
|
1326
|
+
confidence = "high";
|
|
1327
|
+
} else if (bestHasAnyContextSignal && !bestHasAnyPositionSignal) {
|
|
1328
|
+
strategy = "context-disambiguated";
|
|
1329
|
+
confidence = "medium";
|
|
1330
|
+
} else if (bestHasAnyContextSignal && bestHasAnyPositionSignal) {
|
|
1331
|
+
strategy = "position-tiebreaker";
|
|
1332
|
+
confidence = "medium";
|
|
1333
|
+
} else if (bestHasAnyPositionSignal) {
|
|
1334
|
+
strategy = "position-tiebreaker";
|
|
1335
|
+
confidence = "medium";
|
|
1336
|
+
} else {
|
|
1337
|
+
strategy = "position-tiebreaker";
|
|
1338
|
+
confidence = "low";
|
|
1339
|
+
}
|
|
1340
|
+
return {
|
|
1341
|
+
start: bestPos,
|
|
1342
|
+
end: bestPos + exact.length,
|
|
1343
|
+
strategy,
|
|
1344
|
+
confidence
|
|
1345
|
+
};
|
|
1187
1346
|
}
|
|
1188
1347
|
|
|
1189
1348
|
// src/locales.ts
|
|
@@ -1338,72 +1497,112 @@ function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHei
|
|
|
1338
1497
|
}
|
|
1339
1498
|
|
|
1340
1499
|
// src/text-context.ts
|
|
1500
|
+
var CONTEXT_LENGTH = 64;
|
|
1501
|
+
var MAX_EXTENSION = 32;
|
|
1502
|
+
var DISAMBIGUATION_MIN_WINDOW = 32;
|
|
1341
1503
|
function extractContext(content, start, end) {
|
|
1342
|
-
const
|
|
1343
|
-
const MAX_EXTENSION = 32;
|
|
1344
|
-
let prefix;
|
|
1504
|
+
const result = {};
|
|
1345
1505
|
if (start > 0) {
|
|
1346
1506
|
let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
|
|
1347
1507
|
let extensionCount = 0;
|
|
1348
1508
|
while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
|
|
1349
1509
|
const char = content[prefixStart - 1];
|
|
1350
|
-
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char))
|
|
1351
|
-
break;
|
|
1352
|
-
}
|
|
1510
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) break;
|
|
1353
1511
|
prefixStart--;
|
|
1354
1512
|
extensionCount++;
|
|
1355
1513
|
}
|
|
1356
|
-
prefix = content.substring(prefixStart, start);
|
|
1514
|
+
result.prefix = content.substring(prefixStart, start);
|
|
1357
1515
|
}
|
|
1358
|
-
let suffix;
|
|
1359
1516
|
if (end < content.length) {
|
|
1360
1517
|
let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
|
|
1361
1518
|
let extensionCount = 0;
|
|
1362
1519
|
while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
|
|
1363
1520
|
const char = content[suffixEnd];
|
|
1364
|
-
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char))
|
|
1365
|
-
break;
|
|
1366
|
-
}
|
|
1521
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) break;
|
|
1367
1522
|
suffixEnd++;
|
|
1368
1523
|
extensionCount++;
|
|
1369
1524
|
}
|
|
1370
|
-
suffix = content.substring(end, suffixEnd);
|
|
1525
|
+
result.suffix = content.substring(end, suffixEnd);
|
|
1371
1526
|
}
|
|
1372
|
-
return
|
|
1527
|
+
return result;
|
|
1373
1528
|
}
|
|
1374
|
-
function
|
|
1375
|
-
const
|
|
1376
|
-
if (
|
|
1377
|
-
|
|
1529
|
+
function reconcileSelector(content, llm) {
|
|
1530
|
+
const { exact, prefix: llmPrefix, suffix: llmSuffix } = llm;
|
|
1531
|
+
if (!exact) return null;
|
|
1532
|
+
const occurrences = [];
|
|
1533
|
+
let i = content.indexOf(exact);
|
|
1534
|
+
while (i !== -1) {
|
|
1535
|
+
occurrences.push(i);
|
|
1536
|
+
i = content.indexOf(exact, i + 1);
|
|
1537
|
+
}
|
|
1538
|
+
if (occurrences.length === 1) {
|
|
1539
|
+
const start = occurrences[0];
|
|
1540
|
+
const end = start + exact.length;
|
|
1541
|
+
const ctx2 = extractContext(content, start, end);
|
|
1378
1542
|
return {
|
|
1379
|
-
start
|
|
1380
|
-
end
|
|
1543
|
+
start,
|
|
1544
|
+
end,
|
|
1381
1545
|
exact,
|
|
1382
|
-
prefix:
|
|
1383
|
-
suffix:
|
|
1384
|
-
|
|
1385
|
-
|
|
1546
|
+
...ctx2.prefix !== void 0 ? { prefix: ctx2.prefix } : {},
|
|
1547
|
+
...ctx2.suffix !== void 0 ? { suffix: ctx2.suffix } : {},
|
|
1548
|
+
anchorMethod: "unique-match"
|
|
1549
|
+
};
|
|
1550
|
+
}
|
|
1551
|
+
if (occurrences.length > 1) {
|
|
1552
|
+
if (llmPrefix || llmSuffix) {
|
|
1553
|
+
const prefixWindow = Math.max(DISAMBIGUATION_MIN_WINDOW, llmPrefix?.length ?? 0);
|
|
1554
|
+
const suffixWindow = Math.max(DISAMBIGUATION_MIN_WINDOW, llmSuffix?.length ?? 0);
|
|
1555
|
+
for (const pos of occurrences) {
|
|
1556
|
+
const candPrefix = content.substring(Math.max(0, pos - prefixWindow), pos);
|
|
1557
|
+
const candSuffix = content.substring(
|
|
1558
|
+
pos + exact.length,
|
|
1559
|
+
Math.min(content.length, pos + exact.length + suffixWindow)
|
|
1560
|
+
);
|
|
1561
|
+
const prefixOk = !llmPrefix || candPrefix.endsWith(llmPrefix) || candPrefix.includes(llmPrefix.trim());
|
|
1562
|
+
const suffixOk = !llmSuffix || candSuffix.startsWith(llmSuffix) || candSuffix.includes(llmSuffix.trim());
|
|
1563
|
+
if (prefixOk && suffixOk) {
|
|
1564
|
+
const start2 = pos;
|
|
1565
|
+
const end2 = start2 + exact.length;
|
|
1566
|
+
const ctx3 = extractContext(content, start2, end2);
|
|
1567
|
+
return {
|
|
1568
|
+
start: start2,
|
|
1569
|
+
end: end2,
|
|
1570
|
+
exact,
|
|
1571
|
+
...ctx3.prefix !== void 0 ? { prefix: ctx3.prefix } : {},
|
|
1572
|
+
...ctx3.suffix !== void 0 ? { suffix: ctx3.suffix } : {},
|
|
1573
|
+
anchorMethod: "context-recovered"
|
|
1574
|
+
};
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
const start = occurrences[0];
|
|
1579
|
+
const end = start + exact.length;
|
|
1580
|
+
const ctx2 = extractContext(content, start, end);
|
|
1581
|
+
return {
|
|
1582
|
+
start,
|
|
1583
|
+
end,
|
|
1584
|
+
exact,
|
|
1585
|
+
...ctx2.prefix !== void 0 ? { prefix: ctx2.prefix } : {},
|
|
1586
|
+
...ctx2.suffix !== void 0 ? { suffix: ctx2.suffix } : {},
|
|
1587
|
+
anchorMethod: "first-of-many"
|
|
1386
1588
|
};
|
|
1387
1589
|
}
|
|
1388
1590
|
const cache = buildContentCache(content);
|
|
1389
|
-
const
|
|
1390
|
-
if (!
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
);
|
|
1394
|
-
}
|
|
1395
|
-
const actualText = content.substring(match.start, match.end);
|
|
1396
|
-
const context = extractContext(content, match.start, match.end);
|
|
1591
|
+
const fuzzy = findBestTextMatch(content, exact, void 0, cache);
|
|
1592
|
+
if (!fuzzy) return null;
|
|
1593
|
+
const actual = content.substring(fuzzy.start, fuzzy.end);
|
|
1594
|
+
const ctx = extractContext(content, fuzzy.start, fuzzy.end);
|
|
1397
1595
|
return {
|
|
1398
|
-
start:
|
|
1399
|
-
end:
|
|
1400
|
-
|
|
1401
|
-
//
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1596
|
+
start: fuzzy.start,
|
|
1597
|
+
end: fuzzy.end,
|
|
1598
|
+
// Use the actual source text, not the LLM's version — the LLM may
|
|
1599
|
+
// have emitted slightly different characters (smart vs straight
|
|
1600
|
+
// quotes, etc.) and we store what's verifiable.
|
|
1601
|
+
exact: actual,
|
|
1602
|
+
...ctx.prefix !== void 0 ? { prefix: ctx.prefix } : {},
|
|
1603
|
+
...ctx.suffix !== void 0 ? { suffix: ctx.suffix } : {},
|
|
1604
|
+
anchorMethod: "fuzzy-match",
|
|
1605
|
+
matchQuality: fuzzy.matchQuality
|
|
1407
1606
|
};
|
|
1408
1607
|
}
|
|
1409
1608
|
|
|
@@ -1464,35 +1663,142 @@ function isValidEmail(email2) {
|
|
|
1464
1663
|
return emailRegex.test(email2);
|
|
1465
1664
|
}
|
|
1466
1665
|
|
|
1467
|
-
// src/
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
}
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
}
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
}
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1666
|
+
// src/media-types.ts
|
|
1667
|
+
var storedBinary = (extension, label) => ({
|
|
1668
|
+
extension,
|
|
1669
|
+
label,
|
|
1670
|
+
render: "none",
|
|
1671
|
+
anchoring: "none",
|
|
1672
|
+
extractText: "none",
|
|
1673
|
+
authorable: false,
|
|
1674
|
+
uploadable: true
|
|
1675
|
+
});
|
|
1676
|
+
var storedText = (extension, label) => ({
|
|
1677
|
+
...storedBinary(extension, label),
|
|
1678
|
+
extractText: "decode"
|
|
1679
|
+
});
|
|
1680
|
+
var MEDIA_TYPES = {
|
|
1681
|
+
// Full-capability tier
|
|
1682
|
+
"text/markdown": { extension: ".md", label: "Markdown", render: "text", anchoring: "text-selector", extractText: "decode", authorable: true, uploadable: true },
|
|
1683
|
+
"text/plain": { extension: ".txt", label: "Plain Text", render: "text", anchoring: "text-selector", extractText: "decode", authorable: true, uploadable: true },
|
|
1684
|
+
"text/html": { extension: ".html", label: "HTML", render: "text", anchoring: "text-selector", extractText: "decode", authorable: true, uploadable: true },
|
|
1685
|
+
"application/json": { extension: ".json", label: "JSON", render: "text", anchoring: "text-selector", extractText: "decode", authorable: false, uploadable: true },
|
|
1686
|
+
"image/png": { extension: ".png", label: "PNG image", render: "image", anchoring: "spatial", extractText: "none", authorable: false, uploadable: true },
|
|
1687
|
+
"image/jpeg": { extension: ".jpg", label: "JPEG image", render: "image", anchoring: "spatial", extractText: "none", authorable: false, uploadable: true },
|
|
1688
|
+
"application/pdf": { extension: ".pdf", label: "PDF", render: "pdf", anchoring: "spatial", extractText: "pdf-text-layer", authorable: false, uploadable: true },
|
|
1689
|
+
// Storage tier — the big tent. Every row is a deliberate admission,
|
|
1690
|
+
// promotable by editing its row. Text-flavored rows embed (decode).
|
|
1691
|
+
// Text
|
|
1692
|
+
"text/css": storedText(".css", "CSS"),
|
|
1693
|
+
"text/csv": storedText(".csv", "CSV"),
|
|
1694
|
+
"text/xml": storedText(".xml", "XML"),
|
|
1695
|
+
// Structured-text application formats
|
|
1696
|
+
"application/xml": storedText(".xml", "XML"),
|
|
1697
|
+
"application/yaml": storedText(".yaml", "YAML"),
|
|
1698
|
+
"application/x-yaml": storedText(".yaml", "YAML"),
|
|
1699
|
+
// Programming languages
|
|
1700
|
+
"text/javascript": storedText(".js", "JavaScript"),
|
|
1701
|
+
"application/javascript": storedText(".js", "JavaScript"),
|
|
1702
|
+
"text/x-typescript": storedText(".ts", "TypeScript"),
|
|
1703
|
+
"application/typescript": storedText(".ts", "TypeScript"),
|
|
1704
|
+
"text/x-python": storedText(".py", "Python source"),
|
|
1705
|
+
"text/x-java": storedText(".java", "Java source"),
|
|
1706
|
+
"text/x-c": storedText(".c", "C source"),
|
|
1707
|
+
"text/x-c++": storedText(".cpp", "C++ source"),
|
|
1708
|
+
"text/x-csharp": storedText(".cs", "C# source"),
|
|
1709
|
+
"text/x-go": storedText(".go", "Go source"),
|
|
1710
|
+
"text/x-rust": storedText(".rs", "Rust source"),
|
|
1711
|
+
"text/x-ruby": storedText(".rb", "Ruby source"),
|
|
1712
|
+
"text/x-php": storedText(".php", "PHP source"),
|
|
1713
|
+
"text/x-swift": storedText(".swift", "Swift source"),
|
|
1714
|
+
"text/x-kotlin": storedText(".kt", "Kotlin source"),
|
|
1715
|
+
"text/x-shell": storedText(".sh", "Shell script"),
|
|
1716
|
+
// Documents
|
|
1717
|
+
"application/msword": storedBinary(".doc", "Word document (legacy)"),
|
|
1718
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": storedBinary(".docx", "Word document"),
|
|
1719
|
+
"application/vnd.ms-excel": storedBinary(".xls", "Excel spreadsheet (legacy)"),
|
|
1720
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": storedBinary(".xlsx", "Excel spreadsheet"),
|
|
1721
|
+
"application/vnd.ms-powerpoint": storedBinary(".ppt", "PowerPoint presentation (legacy)"),
|
|
1722
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": storedBinary(".pptx", "PowerPoint presentation"),
|
|
1723
|
+
// Archives
|
|
1724
|
+
"application/zip": storedBinary(".zip", "ZIP archive"),
|
|
1725
|
+
"application/gzip": storedBinary(".gz", "Gzip archive"),
|
|
1726
|
+
"application/x-tar": storedBinary(".tar", "TAR archive"),
|
|
1727
|
+
"application/x-7z-compressed": storedBinary(".7z", "7z archive"),
|
|
1728
|
+
// Binaries
|
|
1729
|
+
"application/octet-stream": storedBinary(".bin", "Binary"),
|
|
1730
|
+
"application/wasm": storedBinary(".wasm", "WebAssembly module"),
|
|
1731
|
+
// Images beyond the rendered pair
|
|
1732
|
+
"image/gif": storedBinary(".gif", "GIF image"),
|
|
1733
|
+
"image/webp": storedBinary(".webp", "WebP image"),
|
|
1734
|
+
"image/svg+xml": storedBinary(".svg", "SVG image"),
|
|
1735
|
+
"image/bmp": storedBinary(".bmp", "BMP image"),
|
|
1736
|
+
"image/tiff": storedBinary(".tiff", "TIFF image"),
|
|
1737
|
+
"image/x-icon": storedBinary(".ico", "Icon"),
|
|
1738
|
+
// Video (before audio so .webm resolves to video)
|
|
1739
|
+
"video/mp4": storedBinary(".mp4", "MP4 video"),
|
|
1740
|
+
"video/mpeg": storedBinary(".mpeg", "MPEG video"),
|
|
1741
|
+
"video/webm": storedBinary(".webm", "WebM video"),
|
|
1742
|
+
"video/ogg": storedBinary(".ogv", "Ogg video"),
|
|
1743
|
+
"video/quicktime": storedBinary(".mov", "QuickTime video"),
|
|
1744
|
+
"video/x-msvideo": storedBinary(".avi", "AVI video"),
|
|
1745
|
+
// Audio
|
|
1746
|
+
"audio/mpeg": storedBinary(".mp3", "MP3 audio"),
|
|
1747
|
+
"audio/wav": storedBinary(".wav", "WAV audio"),
|
|
1748
|
+
"audio/ogg": storedBinary(".ogg", "Ogg audio"),
|
|
1749
|
+
"audio/webm": storedBinary(".webm", "WebM audio"),
|
|
1750
|
+
"audio/aac": storedBinary(".aac", "AAC audio"),
|
|
1751
|
+
"audio/flac": storedBinary(".flac", "FLAC audio"),
|
|
1752
|
+
// Fonts
|
|
1753
|
+
"font/woff": storedBinary(".woff", "WOFF font"),
|
|
1754
|
+
"font/woff2": storedBinary(".woff2", "WOFF2 font"),
|
|
1755
|
+
"font/ttf": storedBinary(".ttf", "TrueType font"),
|
|
1756
|
+
"font/otf": storedBinary(".otf", "OpenType font")
|
|
1757
|
+
};
|
|
1758
|
+
var REGISTRY = MEDIA_TYPES;
|
|
1759
|
+
function baseMediaType(format) {
|
|
1760
|
+
return format.split(";")[0].trim().toLowerCase();
|
|
1761
|
+
}
|
|
1762
|
+
function isSupportedMediaType(format) {
|
|
1763
|
+
return Object.hasOwn(MEDIA_TYPES, format);
|
|
1764
|
+
}
|
|
1765
|
+
function capabilitiesOf(format) {
|
|
1766
|
+
return REGISTRY[baseMediaType(format)];
|
|
1767
|
+
}
|
|
1768
|
+
function extensionForMediaType(format) {
|
|
1769
|
+
return capabilitiesOf(format)?.extension ?? ".dat";
|
|
1770
|
+
}
|
|
1771
|
+
var EXTENSION_TO_MEDIA_TYPE = (() => {
|
|
1772
|
+
const map = /* @__PURE__ */ new Map();
|
|
1773
|
+
for (const type of Object.keys(MEDIA_TYPES)) {
|
|
1774
|
+
const ext = MEDIA_TYPES[type].extension;
|
|
1775
|
+
if (!map.has(ext)) map.set(ext, type);
|
|
1776
|
+
}
|
|
1777
|
+
return map;
|
|
1778
|
+
})();
|
|
1779
|
+
var EXTENSION_ALIASES = {
|
|
1780
|
+
".markdown": ".md",
|
|
1781
|
+
".htm": ".html",
|
|
1782
|
+
".jpeg": ".jpg",
|
|
1783
|
+
".yml": ".yaml"
|
|
1784
|
+
};
|
|
1785
|
+
function mediaTypeForExtension(ext) {
|
|
1786
|
+
const lower = ext.trim().toLowerCase();
|
|
1787
|
+
const dotted = lower.startsWith(".") ? lower : `.${lower}`;
|
|
1788
|
+
return EXTENSION_TO_MEDIA_TYPE.get(EXTENSION_ALIASES[dotted] ?? dotted);
|
|
1789
|
+
}
|
|
1790
|
+
function textExtractionOf(format) {
|
|
1791
|
+
const caps = capabilitiesOf(format);
|
|
1792
|
+
if (caps) return caps.extractText;
|
|
1793
|
+
return baseMediaType(format).startsWith("text/") ? "decode" : "none";
|
|
1794
|
+
}
|
|
1795
|
+
var REGISTRY_KEYS = Object.keys(MEDIA_TYPES);
|
|
1796
|
+
var AUTHORABLE_MEDIA_TYPES = REGISTRY_KEYS.filter(
|
|
1797
|
+
(type) => MEDIA_TYPES[type].authorable
|
|
1798
|
+
);
|
|
1799
|
+
var EMBEDDABLE_MEDIA_TYPES = REGISTRY_KEYS.filter(
|
|
1800
|
+
(type) => MEDIA_TYPES[type].extractText !== "none"
|
|
1801
|
+
);
|
|
1496
1802
|
|
|
1497
1803
|
// src/type-guards.ts
|
|
1498
1804
|
function isString(value) {
|
|
@@ -1535,6 +1841,8 @@ var SemiontError = class extends Error {
|
|
|
1535
1841
|
this.name = "SemiontError";
|
|
1536
1842
|
Error.captureStackTrace(this, this.constructor);
|
|
1537
1843
|
}
|
|
1844
|
+
code;
|
|
1845
|
+
details;
|
|
1538
1846
|
};
|
|
1539
1847
|
var ValidationError = class extends SemiontError {
|
|
1540
1848
|
constructor(message, details) {
|
|
@@ -1935,6 +2243,8 @@ var ConfigurationError = class extends Error {
|
|
|
1935
2243
|
this.name = "ConfigurationError";
|
|
1936
2244
|
this.cause = cause;
|
|
1937
2245
|
}
|
|
2246
|
+
environment;
|
|
2247
|
+
suggestion;
|
|
1938
2248
|
cause;
|
|
1939
2249
|
/**
|
|
1940
2250
|
* Format the error nicely for CLI output
|
|
@@ -1961,10 +2271,6 @@ function getAllPlatformTypes() {
|
|
|
1961
2271
|
return ["aws", "container", "posix", "external"];
|
|
1962
2272
|
}
|
|
1963
2273
|
|
|
1964
|
-
|
|
1965
|
-
var CORE_TYPES_VERSION = "0.1.0";
|
|
1966
|
-
var SDK_VERSION = "0.1.0";
|
|
1967
|
-
|
|
1968
|
-
export { BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CORE_TYPES_VERSION, ConfigurationError, ConflictError, EventBus, JWTTokenSchema, LOCALES, NotFoundError, PERSISTED_EVENT_TYPES, RESOURCE_BROADCAST_TYPES, SDK_VERSION, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, findTextWithContext, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getExtensionForMimeType, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isImageMimeType, isNull, isNullish, isNumber, isObject, isPdfMimeType, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isTag, isTextMimeType, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, normalizeCoordinates, normalizeText, parseEnvironment, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, userDID, userId, userToAgent, userToDid, validateAndCorrectOffsets, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
|
|
2274
|
+
export { AUTHORABLE_MEDIA_TYPES, BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CONTEXT_FULL_WEIGHT, CONTEXT_PARTIAL_WEIGHT, ConfigurationError, ConflictError, EMBEDDABLE_MEDIA_TYPES, EventBus, JWTTokenSchema, LOCALES, MEDIA_TYPES, NotFoundError, PERSISTED_EVENT_TYPES, POSITION_WEIGHT_MAX, POSITION_WINDOW, RESOURCE_BROADCAST_TYPES, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, anchorAnnotation, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseMediaType, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, capabilitiesOf, cloneToken, createCircleSvg, createFragmentSelector, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extensionForMediaType, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPageFromFragment, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isNull, isNullish, isNumber, isObject, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isSupportedMediaType, isTag, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, mediaTypeForExtension, normalizeCoordinates, normalizeText, parseEnvironment, parseFragmentSelector, parseSvgSelector, reconcileSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, textExtractionOf, userDID, userId, userToAgent, userToDid, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
|
|
1969
2275
|
//# sourceMappingURL=index.js.map
|
|
1970
2276
|
//# sourceMappingURL=index.js.map
|