@semiont/api-client 0.2.28-build.37 → 0.2.28-build.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/{index-DHh0ToZB.d.ts → index--2zlsZdR.d.ts} +269 -23
- package/dist/index.d.ts +2 -2
- package/dist/index.js +379 -36
- package/dist/index.js.map +1 -1
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +379 -36
- package/dist/utils/index.js.map +1 -1
- package/package.json +1 -1
package/dist/utils/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export { at as BoundingBox, Q as EventMetadata, aI as JWTTokenSchema, a9 as LOCALES, a8 as LocaleInfo, as as Point, a3 as ResourceCreationDetails, b0 as ResourceEvent, V as ResourceEventType, i as Selector, P as StoredEvent, h as SvgSelector, a5 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, aB as ValidatedAnnotation, aG as ValidationFailure, aH as ValidationResult, aF as ValidationSuccess, aw as createCircleSvg, av as createPolygonSvg, au as createRectangleSvg, ar as decodeRepresentation, aE as decodeWithCharset, O as extractBoundingBox, aD as extractCharset, aA as extractContext, a6 as findTextWithContext, Z as formatEventType, ad as formatLocaleDisplay, a0 as formatRelativeTime, ae as getAllLocaleCodes, F as getAnnotationExactText, W as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, ai as getChecksum, x as getCommentText, al as getCreator, am as getDerivedFrom, a1 as getEventDisplayContent, _ as getEventEmoji, a2 as getEventEntityTypes, D as getExactText, aj as getLanguage, ac as getLocaleEnglishName, aa as getLocaleInfo, ab as getLocaleNativeName, aq as getNodeEncoding, ah as getPrimaryMediaType, ag as getPrimaryRepresentation, H as getPrimarySelector, a4 as getResourceCreationDetails, ao as getResourceEntityTypes, af as getResourceId, ak as getStorageUri, L as getSvgSelector, n as getTargetSelector, m as getTargetSource, I as getTextPositionSelector, K as getTextQuoteSelector, q as hasTargetSelector, an as isArchived, t as isAssessment, l as isBodyResolved, u as isComment, ap as isDraft, X as isEventRelatedToAnnotation, r as isHighlight, s as isReference, z as isResolvedReference, Y as isResourceEvent, y as isStubReference, v as isTag, aK as isValidEmail, ay as normalizeCoordinates, ax as parseSvgSelector, az as scaleSvgToNative, aC as validateAndCorrectOffsets, aJ as validateData, N as validateSvgMarkup, a7 as verifyPosition } from '../index--2zlsZdR.js';
|
package/dist/utils/index.js
CHANGED
|
@@ -68,23 +68,6 @@ function getTargetSelector(target) {
|
|
|
68
68
|
function hasTargetSelector(target) {
|
|
69
69
|
return typeof target !== "string" && target.selector !== void 0;
|
|
70
70
|
}
|
|
71
|
-
function getEntityTypes(annotation) {
|
|
72
|
-
if (Array.isArray(annotation.body)) {
|
|
73
|
-
const entityTags = [];
|
|
74
|
-
for (const item of annotation.body) {
|
|
75
|
-
if (typeof item === "object" && item !== null && "type" in item && "value" in item && "purpose" in item) {
|
|
76
|
-
const itemType = item.type;
|
|
77
|
-
const itemValue = item.value;
|
|
78
|
-
const itemPurpose = item.purpose;
|
|
79
|
-
if (itemType === "TextualBody" && itemPurpose === "tagging" && typeof itemValue === "string" && itemValue.length > 0) {
|
|
80
|
-
entityTags.push(itemValue);
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return entityTags;
|
|
85
|
-
}
|
|
86
|
-
return [];
|
|
87
|
-
}
|
|
88
71
|
function isHighlight(annotation) {
|
|
89
72
|
return annotation.motivation === "highlighting";
|
|
90
73
|
}
|
|
@@ -108,24 +91,6 @@ function getCommentText(annotation) {
|
|
|
108
91
|
}
|
|
109
92
|
return void 0;
|
|
110
93
|
}
|
|
111
|
-
function getTagCategory(annotation) {
|
|
112
|
-
if (!isTag(annotation)) return void 0;
|
|
113
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
114
|
-
const taggingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "tagging");
|
|
115
|
-
if (taggingBody && "value" in taggingBody) {
|
|
116
|
-
return taggingBody.value;
|
|
117
|
-
}
|
|
118
|
-
return void 0;
|
|
119
|
-
}
|
|
120
|
-
function getTagSchemaId(annotation) {
|
|
121
|
-
if (!isTag(annotation)) return void 0;
|
|
122
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
123
|
-
const classifyingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "classifying");
|
|
124
|
-
if (classifyingBody && "value" in classifyingBody) {
|
|
125
|
-
return classifyingBody.value;
|
|
126
|
-
}
|
|
127
|
-
return void 0;
|
|
128
|
-
}
|
|
129
94
|
function isStubReference(annotation) {
|
|
130
95
|
return isReference(annotation) && !isBodyResolved(annotation.body);
|
|
131
96
|
}
|
|
@@ -467,6 +432,60 @@ function getResourceCreationDetails(event) {
|
|
|
467
432
|
return null;
|
|
468
433
|
}
|
|
469
434
|
|
|
435
|
+
// src/utils/fuzzy-anchor.ts
|
|
436
|
+
function findTextWithContext(content, exact, prefix, suffix) {
|
|
437
|
+
if (!exact) return null;
|
|
438
|
+
const occurrences = [];
|
|
439
|
+
let index = content.indexOf(exact);
|
|
440
|
+
while (index !== -1) {
|
|
441
|
+
occurrences.push(index);
|
|
442
|
+
index = content.indexOf(exact, index + 1);
|
|
443
|
+
}
|
|
444
|
+
if (occurrences.length === 0) {
|
|
445
|
+
console.warn(`[FuzzyAnchor] Text not found: "${exact.substring(0, 50)}..."`);
|
|
446
|
+
return null;
|
|
447
|
+
}
|
|
448
|
+
if (occurrences.length === 1) {
|
|
449
|
+
const pos2 = occurrences[0];
|
|
450
|
+
return { start: pos2, end: pos2 + exact.length };
|
|
451
|
+
}
|
|
452
|
+
if (prefix || suffix) {
|
|
453
|
+
for (const pos2 of occurrences) {
|
|
454
|
+
const actualPrefixStart = Math.max(0, pos2 - (prefix?.length || 0));
|
|
455
|
+
const actualPrefix = content.substring(actualPrefixStart, pos2);
|
|
456
|
+
const actualSuffixEnd = Math.min(content.length, pos2 + exact.length + (suffix?.length || 0));
|
|
457
|
+
const actualSuffix = content.substring(pos2 + exact.length, actualSuffixEnd);
|
|
458
|
+
const prefixMatch = !prefix || actualPrefix.endsWith(prefix);
|
|
459
|
+
const suffixMatch = !suffix || actualSuffix.startsWith(suffix);
|
|
460
|
+
if (prefixMatch && suffixMatch) {
|
|
461
|
+
return { start: pos2, end: pos2 + exact.length };
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
console.warn(
|
|
465
|
+
`[FuzzyAnchor] Multiple matches found but none match prefix/suffix exactly. Exact: "${exact.substring(0, 30)}...", Prefix: "${prefix?.substring(0, 20) || "none"}", Suffix: "${suffix?.substring(0, 20) || "none"}"`
|
|
466
|
+
);
|
|
467
|
+
for (const pos2 of occurrences) {
|
|
468
|
+
const actualPrefix = content.substring(Math.max(0, pos2 - (prefix?.length || 0)), pos2);
|
|
469
|
+
const actualSuffix = content.substring(pos2 + exact.length, pos2 + exact.length + (suffix?.length || 0));
|
|
470
|
+
const fuzzyPrefixMatch = !prefix || actualPrefix.includes(prefix.trim());
|
|
471
|
+
const fuzzySuffixMatch = !suffix || actualSuffix.includes(suffix.trim());
|
|
472
|
+
if (fuzzyPrefixMatch && fuzzySuffixMatch) {
|
|
473
|
+
console.warn(`[FuzzyAnchor] Using fuzzy match at position ${pos2}`);
|
|
474
|
+
return { start: pos2, end: pos2 + exact.length };
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
console.warn(
|
|
479
|
+
`[FuzzyAnchor] Multiple matches but no context match. Using first occurrence. Exact: "${exact.substring(0, 30)}..."`
|
|
480
|
+
);
|
|
481
|
+
const pos = occurrences[0];
|
|
482
|
+
return { start: pos, end: pos + exact.length };
|
|
483
|
+
}
|
|
484
|
+
function verifyPosition(content, position, expectedExact) {
|
|
485
|
+
const actualText = content.substring(position.start, position.end);
|
|
486
|
+
return actualText === expectedExact;
|
|
487
|
+
}
|
|
488
|
+
|
|
470
489
|
// src/utils/locales.ts
|
|
471
490
|
var LOCALES = [
|
|
472
491
|
{ code: "ar", nativeName: "\u0627\u0644\u0639\u0631\u0628\u064A\u0629", englishName: "Arabic" },
|
|
@@ -547,6 +566,330 @@ function getChecksum(resource) {
|
|
|
547
566
|
function getLanguage(resource) {
|
|
548
567
|
return getPrimaryRepresentation(resource)?.language;
|
|
549
568
|
}
|
|
569
|
+
function getStorageUri(resource) {
|
|
570
|
+
return getPrimaryRepresentation(resource)?.storageUri;
|
|
571
|
+
}
|
|
572
|
+
function getCreator(resource) {
|
|
573
|
+
if (!resource?.wasAttributedTo) return void 0;
|
|
574
|
+
return Array.isArray(resource.wasAttributedTo) ? resource.wasAttributedTo[0] : resource.wasAttributedTo;
|
|
575
|
+
}
|
|
576
|
+
function getDerivedFrom(resource) {
|
|
577
|
+
if (!resource?.wasDerivedFrom) return void 0;
|
|
578
|
+
return Array.isArray(resource.wasDerivedFrom) ? resource.wasDerivedFrom[0] : resource.wasDerivedFrom;
|
|
579
|
+
}
|
|
580
|
+
function isArchived(resource) {
|
|
581
|
+
return resource?.archived === true;
|
|
582
|
+
}
|
|
583
|
+
function getResourceEntityTypes(resource) {
|
|
584
|
+
return resource?.entityTypes || [];
|
|
585
|
+
}
|
|
586
|
+
function isDraft(resource) {
|
|
587
|
+
return resource?.isDraft === true;
|
|
588
|
+
}
|
|
589
|
+
function getNodeEncoding(charset) {
|
|
590
|
+
const normalized = charset.toLowerCase().replace(/[-_]/g, "");
|
|
591
|
+
const charsetMap = {
|
|
592
|
+
"utf8": "utf8",
|
|
593
|
+
"iso88591": "latin1",
|
|
594
|
+
"latin1": "latin1",
|
|
595
|
+
"ascii": "ascii",
|
|
596
|
+
"usascii": "ascii",
|
|
597
|
+
"utf16le": "utf16le",
|
|
598
|
+
"ucs2": "ucs2",
|
|
599
|
+
"binary": "binary",
|
|
600
|
+
"windows1252": "latin1",
|
|
601
|
+
// Windows-1252 is a superset of Latin-1
|
|
602
|
+
"cp1252": "latin1"
|
|
603
|
+
};
|
|
604
|
+
return charsetMap[normalized] || "utf8";
|
|
605
|
+
}
|
|
606
|
+
function decodeRepresentation(buffer, mediaType) {
|
|
607
|
+
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
608
|
+
const charset = (charsetMatch?.[1] || "utf-8").toLowerCase();
|
|
609
|
+
const encoding = getNodeEncoding(charset);
|
|
610
|
+
return buffer.toString(encoding);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// src/utils/svg-utils.ts
|
|
614
|
+
function createRectangleSvg(start, end) {
|
|
615
|
+
const x = Math.min(start.x, end.x);
|
|
616
|
+
const y = Math.min(start.y, end.y);
|
|
617
|
+
const width = Math.abs(end.x - start.x);
|
|
618
|
+
const height = Math.abs(end.y - start.y);
|
|
619
|
+
return `<svg xmlns="http://www.w3.org/2000/svg"><rect x="${x}" y="${y}" width="${width}" height="${height}"/></svg>`;
|
|
620
|
+
}
|
|
621
|
+
function createPolygonSvg(points) {
|
|
622
|
+
if (points.length < 3) {
|
|
623
|
+
throw new Error("Polygon requires at least 3 points");
|
|
624
|
+
}
|
|
625
|
+
const pointsStr = points.map((p) => `${p.x},${p.y}`).join(" ");
|
|
626
|
+
return `<svg xmlns="http://www.w3.org/2000/svg"><polygon points="${pointsStr}"/></svg>`;
|
|
627
|
+
}
|
|
628
|
+
function createCircleSvg(center, radius) {
|
|
629
|
+
if (radius <= 0) {
|
|
630
|
+
throw new Error("Circle radius must be positive");
|
|
631
|
+
}
|
|
632
|
+
return `<svg xmlns="http://www.w3.org/2000/svg"><circle cx="${center.x}" cy="${center.y}" r="${radius}"/></svg>`;
|
|
633
|
+
}
|
|
634
|
+
function parseSvgSelector(svg) {
|
|
635
|
+
const rectMatch = svg.match(/<rect\s+([^>]+)\/>/);
|
|
636
|
+
if (rectMatch && rectMatch[1]) {
|
|
637
|
+
const attrs = rectMatch[1];
|
|
638
|
+
const x = parseFloat(attrs.match(/x="([^"]+)"/)?.[1] || "0");
|
|
639
|
+
const y = parseFloat(attrs.match(/y="([^"]+)"/)?.[1] || "0");
|
|
640
|
+
const width = parseFloat(attrs.match(/width="([^"]+)"/)?.[1] || "0");
|
|
641
|
+
const height = parseFloat(attrs.match(/height="([^"]+)"/)?.[1] || "0");
|
|
642
|
+
return {
|
|
643
|
+
type: "rect",
|
|
644
|
+
data: { x, y, width, height }
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
const polygonMatch = svg.match(/<polygon\s+points="([^"]+)"/);
|
|
648
|
+
if (polygonMatch && polygonMatch[1]) {
|
|
649
|
+
const pointsStr = polygonMatch[1];
|
|
650
|
+
const points = pointsStr.split(/\s+/).map((pair) => {
|
|
651
|
+
const [x, y] = pair.split(",").map(parseFloat);
|
|
652
|
+
return { x, y };
|
|
653
|
+
});
|
|
654
|
+
return {
|
|
655
|
+
type: "polygon",
|
|
656
|
+
data: { points }
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
const circleMatch = svg.match(/<circle\s+([^>]+)\/>/);
|
|
660
|
+
if (circleMatch && circleMatch[1]) {
|
|
661
|
+
const attrs = circleMatch[1];
|
|
662
|
+
const cx = parseFloat(attrs.match(/cx="([^"]+)"/)?.[1] || "0");
|
|
663
|
+
const cy = parseFloat(attrs.match(/cy="([^"]+)"/)?.[1] || "0");
|
|
664
|
+
const r = parseFloat(attrs.match(/r="([^"]+)"/)?.[1] || "0");
|
|
665
|
+
return {
|
|
666
|
+
type: "circle",
|
|
667
|
+
data: { cx, cy, r }
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
return null;
|
|
671
|
+
}
|
|
672
|
+
function normalizeCoordinates(point, displayWidth, displayHeight, imageWidth, imageHeight) {
|
|
673
|
+
return {
|
|
674
|
+
x: point.x / displayWidth * imageWidth,
|
|
675
|
+
y: point.y / displayHeight * imageHeight
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHeight) {
|
|
679
|
+
const parsed = parseSvgSelector(svg);
|
|
680
|
+
if (!parsed) return svg;
|
|
681
|
+
const scaleX = imageWidth / displayWidth;
|
|
682
|
+
const scaleY = imageHeight / displayHeight;
|
|
683
|
+
switch (parsed.type) {
|
|
684
|
+
case "rect": {
|
|
685
|
+
const { x, y, width, height } = parsed.data;
|
|
686
|
+
return createRectangleSvg(
|
|
687
|
+
{ x: x * scaleX, y: y * scaleY },
|
|
688
|
+
{ x: (x + width) * scaleX, y: (y + height) * scaleY }
|
|
689
|
+
);
|
|
690
|
+
}
|
|
691
|
+
case "circle": {
|
|
692
|
+
const { cx, cy, r } = parsed.data;
|
|
693
|
+
return createCircleSvg(
|
|
694
|
+
{ x: cx * scaleX, y: cy * scaleY },
|
|
695
|
+
r * Math.min(scaleX, scaleY)
|
|
696
|
+
);
|
|
697
|
+
}
|
|
698
|
+
case "polygon": {
|
|
699
|
+
const points = parsed.data.points.map((p) => ({
|
|
700
|
+
x: p.x * scaleX,
|
|
701
|
+
y: p.y * scaleY
|
|
702
|
+
}));
|
|
703
|
+
return createPolygonSvg(points);
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
return svg;
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// src/utils/text-context.ts
|
|
710
|
+
function extractContext(content, start, end) {
|
|
711
|
+
const CONTEXT_LENGTH = 64;
|
|
712
|
+
const MAX_EXTENSION = 32;
|
|
713
|
+
let prefix;
|
|
714
|
+
if (start > 0) {
|
|
715
|
+
let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
|
|
716
|
+
let extensionCount = 0;
|
|
717
|
+
while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
|
|
718
|
+
const char = content[prefixStart - 1];
|
|
719
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
720
|
+
break;
|
|
721
|
+
}
|
|
722
|
+
prefixStart--;
|
|
723
|
+
extensionCount++;
|
|
724
|
+
}
|
|
725
|
+
prefix = content.substring(prefixStart, start);
|
|
726
|
+
}
|
|
727
|
+
let suffix;
|
|
728
|
+
if (end < content.length) {
|
|
729
|
+
let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
|
|
730
|
+
let extensionCount = 0;
|
|
731
|
+
while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
|
|
732
|
+
const char = content[suffixEnd];
|
|
733
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
734
|
+
break;
|
|
735
|
+
}
|
|
736
|
+
suffixEnd++;
|
|
737
|
+
extensionCount++;
|
|
738
|
+
}
|
|
739
|
+
suffix = content.substring(end, suffixEnd);
|
|
740
|
+
}
|
|
741
|
+
return { prefix, suffix };
|
|
742
|
+
}
|
|
743
|
+
function levenshteinDistance(str1, str2) {
|
|
744
|
+
const len1 = str1.length;
|
|
745
|
+
const len2 = str2.length;
|
|
746
|
+
const matrix = [];
|
|
747
|
+
for (let i = 0; i <= len1; i++) {
|
|
748
|
+
matrix[i] = [i];
|
|
749
|
+
}
|
|
750
|
+
for (let j = 0; j <= len2; j++) {
|
|
751
|
+
matrix[0][j] = j;
|
|
752
|
+
}
|
|
753
|
+
for (let i = 1; i <= len1; i++) {
|
|
754
|
+
for (let j = 1; j <= len2; j++) {
|
|
755
|
+
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
756
|
+
const deletion = matrix[i - 1][j] + 1;
|
|
757
|
+
const insertion = matrix[i][j - 1] + 1;
|
|
758
|
+
const substitution = matrix[i - 1][j - 1] + cost;
|
|
759
|
+
matrix[i][j] = Math.min(deletion, insertion, substitution);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return matrix[len1][len2];
|
|
763
|
+
}
|
|
764
|
+
function findBestMatch(content, searchText, aiStart, aiEnd) {
|
|
765
|
+
const maxFuzzyDistance = Math.max(5, Math.floor(searchText.length * 0.05));
|
|
766
|
+
const exactIndex = content.indexOf(searchText);
|
|
767
|
+
if (exactIndex !== -1) {
|
|
768
|
+
return {
|
|
769
|
+
start: exactIndex,
|
|
770
|
+
end: exactIndex + searchText.length,
|
|
771
|
+
matchQuality: "exact"
|
|
772
|
+
};
|
|
773
|
+
}
|
|
774
|
+
console.log("[findBestMatch] Exact match failed, trying case-insensitive...");
|
|
775
|
+
const lowerContent = content.toLowerCase();
|
|
776
|
+
const lowerSearch = searchText.toLowerCase();
|
|
777
|
+
const caseInsensitiveIndex = lowerContent.indexOf(lowerSearch);
|
|
778
|
+
if (caseInsensitiveIndex !== -1) {
|
|
779
|
+
console.log("[findBestMatch] Found case-insensitive match");
|
|
780
|
+
return {
|
|
781
|
+
start: caseInsensitiveIndex,
|
|
782
|
+
end: caseInsensitiveIndex + searchText.length,
|
|
783
|
+
matchQuality: "case-insensitive"
|
|
784
|
+
};
|
|
785
|
+
}
|
|
786
|
+
console.log("[findBestMatch] Case-insensitive failed, trying fuzzy match...");
|
|
787
|
+
const windowSize = searchText.length;
|
|
788
|
+
const searchRadius = Math.min(500, content.length);
|
|
789
|
+
const searchStart = Math.max(0, aiStart - searchRadius);
|
|
790
|
+
const searchEnd = Math.min(content.length, aiEnd + searchRadius);
|
|
791
|
+
let bestMatch = null;
|
|
792
|
+
for (let i = searchStart; i <= searchEnd - windowSize; i++) {
|
|
793
|
+
const candidate = content.substring(i, i + windowSize);
|
|
794
|
+
const distance = levenshteinDistance(searchText, candidate);
|
|
795
|
+
if (distance <= maxFuzzyDistance) {
|
|
796
|
+
if (!bestMatch || distance < bestMatch.distance) {
|
|
797
|
+
bestMatch = { start: i, distance };
|
|
798
|
+
console.log(`[findBestMatch] Found fuzzy match at ${i} with distance ${distance}`);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
if (bestMatch) {
|
|
803
|
+
return {
|
|
804
|
+
start: bestMatch.start,
|
|
805
|
+
end: bestMatch.start + windowSize,
|
|
806
|
+
matchQuality: "fuzzy"
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
console.log("[findBestMatch] No acceptable match found");
|
|
810
|
+
return null;
|
|
811
|
+
}
|
|
812
|
+
function validateAndCorrectOffsets(content, aiStart, aiEnd, exact) {
|
|
813
|
+
const exactPreview = exact.length > 50 ? exact.substring(0, 50) + "..." : exact;
|
|
814
|
+
const textAtOffset = content.substring(aiStart, aiEnd);
|
|
815
|
+
if (textAtOffset === exact) {
|
|
816
|
+
console.log(`[validateAndCorrectOffsets] \u2713 Offsets correct for: "${exactPreview}"`);
|
|
817
|
+
const context2 = extractContext(content, aiStart, aiEnd);
|
|
818
|
+
return {
|
|
819
|
+
start: aiStart,
|
|
820
|
+
end: aiEnd,
|
|
821
|
+
exact,
|
|
822
|
+
prefix: context2.prefix,
|
|
823
|
+
suffix: context2.suffix,
|
|
824
|
+
corrected: false,
|
|
825
|
+
matchQuality: "exact"
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
const foundPreview = textAtOffset.length > 50 ? textAtOffset.substring(0, 50) + "..." : textAtOffset;
|
|
829
|
+
console.warn(
|
|
830
|
+
`[validateAndCorrectOffsets] \u26A0 AI offset mismatch:
|
|
831
|
+
Expected text: "${exactPreview}"
|
|
832
|
+
Found at AI offset (${aiStart}-${aiEnd}): "${foundPreview}"
|
|
833
|
+
Attempting multi-strategy search...`
|
|
834
|
+
);
|
|
835
|
+
const match = findBestMatch(content, exact, aiStart, aiEnd);
|
|
836
|
+
if (!match) {
|
|
837
|
+
const exactLong = exact.length > 100 ? exact.substring(0, 100) + "..." : exact;
|
|
838
|
+
console.error(
|
|
839
|
+
`[validateAndCorrectOffsets] \u2717 No acceptable match found:
|
|
840
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
841
|
+
AI text: "${exactLong}"
|
|
842
|
+
Text at AI offset: "${foundPreview}"
|
|
843
|
+
All search strategies (exact, case-insensitive, fuzzy) failed.
|
|
844
|
+
This suggests the AI hallucinated text that doesn't exist in the document.`
|
|
845
|
+
);
|
|
846
|
+
throw new Error(
|
|
847
|
+
"Cannot find acceptable match for text in content. All search strategies failed. Text may be hallucinated."
|
|
848
|
+
);
|
|
849
|
+
}
|
|
850
|
+
const actualText = content.substring(match.start, match.end);
|
|
851
|
+
const actualPreview = actualText.length > 50 ? actualText.substring(0, 50) + "..." : actualText;
|
|
852
|
+
const offsetDelta = match.start - aiStart;
|
|
853
|
+
const matchSymbol = match.matchQuality === "exact" ? "\u2713" : match.matchQuality === "case-insensitive" ? "\u2248" : "~";
|
|
854
|
+
console.warn(
|
|
855
|
+
`[validateAndCorrectOffsets] ${matchSymbol} Found ${match.matchQuality} match:
|
|
856
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
857
|
+
Corrected: start=${match.start}, end=${match.end}
|
|
858
|
+
Offset delta: ${offsetDelta} characters
|
|
859
|
+
Actual text: "${actualPreview}"`
|
|
860
|
+
);
|
|
861
|
+
if (match.matchQuality === "fuzzy") {
|
|
862
|
+
console.warn(
|
|
863
|
+
`[validateAndCorrectOffsets] Fuzzy match details:
|
|
864
|
+
AI provided: "${exactPreview}"
|
|
865
|
+
Found in doc: "${actualPreview}"
|
|
866
|
+
Minor text differences detected - using document version`
|
|
867
|
+
);
|
|
868
|
+
}
|
|
869
|
+
const context = extractContext(content, match.start, match.end);
|
|
870
|
+
return {
|
|
871
|
+
start: match.start,
|
|
872
|
+
end: match.end,
|
|
873
|
+
exact: actualText,
|
|
874
|
+
// Use actual text from document, not AI's version
|
|
875
|
+
prefix: context.prefix,
|
|
876
|
+
suffix: context.suffix,
|
|
877
|
+
corrected: true,
|
|
878
|
+
fuzzyMatched: match.matchQuality !== "exact",
|
|
879
|
+
matchQuality: match.matchQuality
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
// src/utils/text-encoding.ts
|
|
884
|
+
function extractCharset(mediaType) {
|
|
885
|
+
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
886
|
+
return (charsetMatch?.[1] || "utf-8").toLowerCase();
|
|
887
|
+
}
|
|
888
|
+
function decodeWithCharset(buffer, mediaType) {
|
|
889
|
+
const charset = extractCharset(mediaType);
|
|
890
|
+
const decoder = new TextDecoder(charset);
|
|
891
|
+
return decoder.decode(buffer);
|
|
892
|
+
}
|
|
550
893
|
|
|
551
894
|
// src/utils/validation.ts
|
|
552
895
|
var JWTTokenSchema = {
|
|
@@ -594,6 +937,6 @@ function isValidEmail(email) {
|
|
|
594
937
|
return emailRegex.test(email);
|
|
595
938
|
}
|
|
596
939
|
|
|
597
|
-
export { JWTTokenSchema, LOCALES, extractBoundingBox, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText,
|
|
940
|
+
export { JWTTokenSchema, LOCALES, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, extractBoundingBox, extractCharset, extractContext, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isEventRelatedToAnnotation, isHighlight, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isValidEmail, normalizeCoordinates, parseSvgSelector, scaleSvgToNative, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
|
|
598
941
|
//# sourceMappingURL=index.js.map
|
|
599
942
|
//# sourceMappingURL=index.js.map
|