@semiont/api-client 0.2.28-build.38 → 0.2.28-build.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/{index-CBMGI-nS.d.ts → index--2zlsZdR.d.ts} +152 -23
- package/dist/index.d.ts +2 -2
- package/dist/index.js +218 -36
- package/dist/index.js.map +1 -1
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +218 -36
- package/dist/utils/index.js.map +1 -1
- package/package.json +1 -1
package/dist/utils/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export { at as BoundingBox, Q as EventMetadata, aI as JWTTokenSchema, a9 as LOCALES, a8 as LocaleInfo, as as Point, a3 as ResourceCreationDetails, b0 as ResourceEvent, V as ResourceEventType, i as Selector, P as StoredEvent, h as SvgSelector, a5 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, aB as ValidatedAnnotation, aG as ValidationFailure, aH as ValidationResult, aF as ValidationSuccess, aw as createCircleSvg, av as createPolygonSvg, au as createRectangleSvg, ar as decodeRepresentation, aE as decodeWithCharset, O as extractBoundingBox, aD as extractCharset, aA as extractContext, a6 as findTextWithContext, Z as formatEventType, ad as formatLocaleDisplay, a0 as formatRelativeTime, ae as getAllLocaleCodes, F as getAnnotationExactText, W as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, ai as getChecksum, x as getCommentText, al as getCreator, am as getDerivedFrom, a1 as getEventDisplayContent, _ as getEventEmoji, a2 as getEventEntityTypes, D as getExactText, aj as getLanguage, ac as getLocaleEnglishName, aa as getLocaleInfo, ab as getLocaleNativeName, aq as getNodeEncoding, ah as getPrimaryMediaType, ag as getPrimaryRepresentation, H as getPrimarySelector, a4 as getResourceCreationDetails, ao as getResourceEntityTypes, af as getResourceId, ak as getStorageUri, L as getSvgSelector, n as getTargetSelector, m as getTargetSource, I as getTextPositionSelector, K as getTextQuoteSelector, q as hasTargetSelector, an as isArchived, t as isAssessment, l as isBodyResolved, u as isComment, ap as isDraft, X as isEventRelatedToAnnotation, r as isHighlight, s as isReference, z as isResolvedReference, Y as isResourceEvent, y as isStubReference, v as isTag, aK as isValidEmail, ay as normalizeCoordinates, ax as parseSvgSelector, az as scaleSvgToNative, aC as validateAndCorrectOffsets, aJ as validateData, N as validateSvgMarkup, a7 as verifyPosition } from '../index--2zlsZdR.js';
|
package/dist/utils/index.js
CHANGED
|
@@ -68,23 +68,6 @@ function getTargetSelector(target) {
|
|
|
68
68
|
function hasTargetSelector(target) {
|
|
69
69
|
return typeof target !== "string" && target.selector !== void 0;
|
|
70
70
|
}
|
|
71
|
-
function getEntityTypes(annotation) {
|
|
72
|
-
if (Array.isArray(annotation.body)) {
|
|
73
|
-
const entityTags = [];
|
|
74
|
-
for (const item of annotation.body) {
|
|
75
|
-
if (typeof item === "object" && item !== null && "type" in item && "value" in item && "purpose" in item) {
|
|
76
|
-
const itemType = item.type;
|
|
77
|
-
const itemValue = item.value;
|
|
78
|
-
const itemPurpose = item.purpose;
|
|
79
|
-
if (itemType === "TextualBody" && itemPurpose === "tagging" && typeof itemValue === "string" && itemValue.length > 0) {
|
|
80
|
-
entityTags.push(itemValue);
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return entityTags;
|
|
85
|
-
}
|
|
86
|
-
return [];
|
|
87
|
-
}
|
|
88
71
|
function isHighlight(annotation) {
|
|
89
72
|
return annotation.motivation === "highlighting";
|
|
90
73
|
}
|
|
@@ -108,24 +91,6 @@ function getCommentText(annotation) {
|
|
|
108
91
|
}
|
|
109
92
|
return void 0;
|
|
110
93
|
}
|
|
111
|
-
function getTagCategory(annotation) {
|
|
112
|
-
if (!isTag(annotation)) return void 0;
|
|
113
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
114
|
-
const taggingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "tagging");
|
|
115
|
-
if (taggingBody && "value" in taggingBody) {
|
|
116
|
-
return taggingBody.value;
|
|
117
|
-
}
|
|
118
|
-
return void 0;
|
|
119
|
-
}
|
|
120
|
-
function getTagSchemaId(annotation) {
|
|
121
|
-
if (!isTag(annotation)) return void 0;
|
|
122
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
123
|
-
const classifyingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "classifying");
|
|
124
|
-
if (classifyingBody && "value" in classifyingBody) {
|
|
125
|
-
return classifyingBody.value;
|
|
126
|
-
}
|
|
127
|
-
return void 0;
|
|
128
|
-
}
|
|
129
94
|
function isStubReference(annotation) {
|
|
130
95
|
return isReference(annotation) && !isBodyResolved(annotation.body);
|
|
131
96
|
}
|
|
@@ -601,6 +566,49 @@ function getChecksum(resource) {
|
|
|
601
566
|
function getLanguage(resource) {
|
|
602
567
|
return getPrimaryRepresentation(resource)?.language;
|
|
603
568
|
}
|
|
569
|
+
function getStorageUri(resource) {
|
|
570
|
+
return getPrimaryRepresentation(resource)?.storageUri;
|
|
571
|
+
}
|
|
572
|
+
function getCreator(resource) {
|
|
573
|
+
if (!resource?.wasAttributedTo) return void 0;
|
|
574
|
+
return Array.isArray(resource.wasAttributedTo) ? resource.wasAttributedTo[0] : resource.wasAttributedTo;
|
|
575
|
+
}
|
|
576
|
+
function getDerivedFrom(resource) {
|
|
577
|
+
if (!resource?.wasDerivedFrom) return void 0;
|
|
578
|
+
return Array.isArray(resource.wasDerivedFrom) ? resource.wasDerivedFrom[0] : resource.wasDerivedFrom;
|
|
579
|
+
}
|
|
580
|
+
function isArchived(resource) {
|
|
581
|
+
return resource?.archived === true;
|
|
582
|
+
}
|
|
583
|
+
function getResourceEntityTypes(resource) {
|
|
584
|
+
return resource?.entityTypes || [];
|
|
585
|
+
}
|
|
586
|
+
function isDraft(resource) {
|
|
587
|
+
return resource?.isDraft === true;
|
|
588
|
+
}
|
|
589
|
+
function getNodeEncoding(charset) {
|
|
590
|
+
const normalized = charset.toLowerCase().replace(/[-_]/g, "");
|
|
591
|
+
const charsetMap = {
|
|
592
|
+
"utf8": "utf8",
|
|
593
|
+
"iso88591": "latin1",
|
|
594
|
+
"latin1": "latin1",
|
|
595
|
+
"ascii": "ascii",
|
|
596
|
+
"usascii": "ascii",
|
|
597
|
+
"utf16le": "utf16le",
|
|
598
|
+
"ucs2": "ucs2",
|
|
599
|
+
"binary": "binary",
|
|
600
|
+
"windows1252": "latin1",
|
|
601
|
+
// Windows-1252 is a superset of Latin-1
|
|
602
|
+
"cp1252": "latin1"
|
|
603
|
+
};
|
|
604
|
+
return charsetMap[normalized] || "utf8";
|
|
605
|
+
}
|
|
606
|
+
function decodeRepresentation(buffer, mediaType) {
|
|
607
|
+
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
608
|
+
const charset = (charsetMatch?.[1] || "utf-8").toLowerCase();
|
|
609
|
+
const encoding = getNodeEncoding(charset);
|
|
610
|
+
return buffer.toString(encoding);
|
|
611
|
+
}
|
|
604
612
|
|
|
605
613
|
// src/utils/svg-utils.ts
|
|
606
614
|
function createRectangleSvg(start, end) {
|
|
@@ -698,6 +706,180 @@ function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHei
|
|
|
698
706
|
return svg;
|
|
699
707
|
}
|
|
700
708
|
|
|
709
|
+
// src/utils/text-context.ts
|
|
710
|
+
function extractContext(content, start, end) {
|
|
711
|
+
const CONTEXT_LENGTH = 64;
|
|
712
|
+
const MAX_EXTENSION = 32;
|
|
713
|
+
let prefix;
|
|
714
|
+
if (start > 0) {
|
|
715
|
+
let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
|
|
716
|
+
let extensionCount = 0;
|
|
717
|
+
while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
|
|
718
|
+
const char = content[prefixStart - 1];
|
|
719
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
720
|
+
break;
|
|
721
|
+
}
|
|
722
|
+
prefixStart--;
|
|
723
|
+
extensionCount++;
|
|
724
|
+
}
|
|
725
|
+
prefix = content.substring(prefixStart, start);
|
|
726
|
+
}
|
|
727
|
+
let suffix;
|
|
728
|
+
if (end < content.length) {
|
|
729
|
+
let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
|
|
730
|
+
let extensionCount = 0;
|
|
731
|
+
while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
|
|
732
|
+
const char = content[suffixEnd];
|
|
733
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
734
|
+
break;
|
|
735
|
+
}
|
|
736
|
+
suffixEnd++;
|
|
737
|
+
extensionCount++;
|
|
738
|
+
}
|
|
739
|
+
suffix = content.substring(end, suffixEnd);
|
|
740
|
+
}
|
|
741
|
+
return { prefix, suffix };
|
|
742
|
+
}
|
|
743
|
+
function levenshteinDistance(str1, str2) {
|
|
744
|
+
const len1 = str1.length;
|
|
745
|
+
const len2 = str2.length;
|
|
746
|
+
const matrix = [];
|
|
747
|
+
for (let i = 0; i <= len1; i++) {
|
|
748
|
+
matrix[i] = [i];
|
|
749
|
+
}
|
|
750
|
+
for (let j = 0; j <= len2; j++) {
|
|
751
|
+
matrix[0][j] = j;
|
|
752
|
+
}
|
|
753
|
+
for (let i = 1; i <= len1; i++) {
|
|
754
|
+
for (let j = 1; j <= len2; j++) {
|
|
755
|
+
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
756
|
+
const deletion = matrix[i - 1][j] + 1;
|
|
757
|
+
const insertion = matrix[i][j - 1] + 1;
|
|
758
|
+
const substitution = matrix[i - 1][j - 1] + cost;
|
|
759
|
+
matrix[i][j] = Math.min(deletion, insertion, substitution);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return matrix[len1][len2];
|
|
763
|
+
}
|
|
764
|
+
function findBestMatch(content, searchText, aiStart, aiEnd) {
|
|
765
|
+
const maxFuzzyDistance = Math.max(5, Math.floor(searchText.length * 0.05));
|
|
766
|
+
const exactIndex = content.indexOf(searchText);
|
|
767
|
+
if (exactIndex !== -1) {
|
|
768
|
+
return {
|
|
769
|
+
start: exactIndex,
|
|
770
|
+
end: exactIndex + searchText.length,
|
|
771
|
+
matchQuality: "exact"
|
|
772
|
+
};
|
|
773
|
+
}
|
|
774
|
+
console.log("[findBestMatch] Exact match failed, trying case-insensitive...");
|
|
775
|
+
const lowerContent = content.toLowerCase();
|
|
776
|
+
const lowerSearch = searchText.toLowerCase();
|
|
777
|
+
const caseInsensitiveIndex = lowerContent.indexOf(lowerSearch);
|
|
778
|
+
if (caseInsensitiveIndex !== -1) {
|
|
779
|
+
console.log("[findBestMatch] Found case-insensitive match");
|
|
780
|
+
return {
|
|
781
|
+
start: caseInsensitiveIndex,
|
|
782
|
+
end: caseInsensitiveIndex + searchText.length,
|
|
783
|
+
matchQuality: "case-insensitive"
|
|
784
|
+
};
|
|
785
|
+
}
|
|
786
|
+
console.log("[findBestMatch] Case-insensitive failed, trying fuzzy match...");
|
|
787
|
+
const windowSize = searchText.length;
|
|
788
|
+
const searchRadius = Math.min(500, content.length);
|
|
789
|
+
const searchStart = Math.max(0, aiStart - searchRadius);
|
|
790
|
+
const searchEnd = Math.min(content.length, aiEnd + searchRadius);
|
|
791
|
+
let bestMatch = null;
|
|
792
|
+
for (let i = searchStart; i <= searchEnd - windowSize; i++) {
|
|
793
|
+
const candidate = content.substring(i, i + windowSize);
|
|
794
|
+
const distance = levenshteinDistance(searchText, candidate);
|
|
795
|
+
if (distance <= maxFuzzyDistance) {
|
|
796
|
+
if (!bestMatch || distance < bestMatch.distance) {
|
|
797
|
+
bestMatch = { start: i, distance };
|
|
798
|
+
console.log(`[findBestMatch] Found fuzzy match at ${i} with distance ${distance}`);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
if (bestMatch) {
|
|
803
|
+
return {
|
|
804
|
+
start: bestMatch.start,
|
|
805
|
+
end: bestMatch.start + windowSize,
|
|
806
|
+
matchQuality: "fuzzy"
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
console.log("[findBestMatch] No acceptable match found");
|
|
810
|
+
return null;
|
|
811
|
+
}
|
|
812
|
+
function validateAndCorrectOffsets(content, aiStart, aiEnd, exact) {
|
|
813
|
+
const exactPreview = exact.length > 50 ? exact.substring(0, 50) + "..." : exact;
|
|
814
|
+
const textAtOffset = content.substring(aiStart, aiEnd);
|
|
815
|
+
if (textAtOffset === exact) {
|
|
816
|
+
console.log(`[validateAndCorrectOffsets] \u2713 Offsets correct for: "${exactPreview}"`);
|
|
817
|
+
const context2 = extractContext(content, aiStart, aiEnd);
|
|
818
|
+
return {
|
|
819
|
+
start: aiStart,
|
|
820
|
+
end: aiEnd,
|
|
821
|
+
exact,
|
|
822
|
+
prefix: context2.prefix,
|
|
823
|
+
suffix: context2.suffix,
|
|
824
|
+
corrected: false,
|
|
825
|
+
matchQuality: "exact"
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
const foundPreview = textAtOffset.length > 50 ? textAtOffset.substring(0, 50) + "..." : textAtOffset;
|
|
829
|
+
console.warn(
|
|
830
|
+
`[validateAndCorrectOffsets] \u26A0 AI offset mismatch:
|
|
831
|
+
Expected text: "${exactPreview}"
|
|
832
|
+
Found at AI offset (${aiStart}-${aiEnd}): "${foundPreview}"
|
|
833
|
+
Attempting multi-strategy search...`
|
|
834
|
+
);
|
|
835
|
+
const match = findBestMatch(content, exact, aiStart, aiEnd);
|
|
836
|
+
if (!match) {
|
|
837
|
+
const exactLong = exact.length > 100 ? exact.substring(0, 100) + "..." : exact;
|
|
838
|
+
console.error(
|
|
839
|
+
`[validateAndCorrectOffsets] \u2717 No acceptable match found:
|
|
840
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
841
|
+
AI text: "${exactLong}"
|
|
842
|
+
Text at AI offset: "${foundPreview}"
|
|
843
|
+
All search strategies (exact, case-insensitive, fuzzy) failed.
|
|
844
|
+
This suggests the AI hallucinated text that doesn't exist in the document.`
|
|
845
|
+
);
|
|
846
|
+
throw new Error(
|
|
847
|
+
"Cannot find acceptable match for text in content. All search strategies failed. Text may be hallucinated."
|
|
848
|
+
);
|
|
849
|
+
}
|
|
850
|
+
const actualText = content.substring(match.start, match.end);
|
|
851
|
+
const actualPreview = actualText.length > 50 ? actualText.substring(0, 50) + "..." : actualText;
|
|
852
|
+
const offsetDelta = match.start - aiStart;
|
|
853
|
+
const matchSymbol = match.matchQuality === "exact" ? "\u2713" : match.matchQuality === "case-insensitive" ? "\u2248" : "~";
|
|
854
|
+
console.warn(
|
|
855
|
+
`[validateAndCorrectOffsets] ${matchSymbol} Found ${match.matchQuality} match:
|
|
856
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
857
|
+
Corrected: start=${match.start}, end=${match.end}
|
|
858
|
+
Offset delta: ${offsetDelta} characters
|
|
859
|
+
Actual text: "${actualPreview}"`
|
|
860
|
+
);
|
|
861
|
+
if (match.matchQuality === "fuzzy") {
|
|
862
|
+
console.warn(
|
|
863
|
+
`[validateAndCorrectOffsets] Fuzzy match details:
|
|
864
|
+
AI provided: "${exactPreview}"
|
|
865
|
+
Found in doc: "${actualPreview}"
|
|
866
|
+
Minor text differences detected - using document version`
|
|
867
|
+
);
|
|
868
|
+
}
|
|
869
|
+
const context = extractContext(content, match.start, match.end);
|
|
870
|
+
return {
|
|
871
|
+
start: match.start,
|
|
872
|
+
end: match.end,
|
|
873
|
+
exact: actualText,
|
|
874
|
+
// Use actual text from document, not AI's version
|
|
875
|
+
prefix: context.prefix,
|
|
876
|
+
suffix: context.suffix,
|
|
877
|
+
corrected: true,
|
|
878
|
+
fuzzyMatched: match.matchQuality !== "exact",
|
|
879
|
+
matchQuality: match.matchQuality
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
|
|
701
883
|
// src/utils/text-encoding.ts
|
|
702
884
|
function extractCharset(mediaType) {
|
|
703
885
|
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
@@ -755,6 +937,6 @@ function isValidEmail(email) {
|
|
|
755
937
|
return emailRegex.test(email);
|
|
756
938
|
}
|
|
757
939
|
|
|
758
|
-
export { JWTTokenSchema, LOCALES, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeWithCharset, extractBoundingBox, extractCharset, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText,
|
|
940
|
+
export { JWTTokenSchema, LOCALES, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, extractBoundingBox, extractCharset, extractContext, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isEventRelatedToAnnotation, isHighlight, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isValidEmail, normalizeCoordinates, parseSvgSelector, scaleSvgToNative, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
|
|
759
941
|
//# sourceMappingURL=index.js.map
|
|
760
942
|
//# sourceMappingURL=index.js.map
|