@semiont/api-client 0.2.28-build.38 → 0.2.28-build.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/{index-CBMGI-nS.d.ts → index--2zlsZdR.d.ts} +152 -23
- package/dist/index.d.ts +2 -2
- package/dist/index.js +218 -36
- package/dist/index.js.map +1 -1
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +218 -36
- package/dist/utils/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# @semiont/api-client
|
|
2
2
|
|
|
3
|
+
[](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+api-client%22)
|
|
3
4
|
[](https://www.npmjs.com/package/@semiont/api-client)
|
|
4
5
|
[](https://www.npmjs.com/package/@semiont/api-client)
|
|
5
6
|
[](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
|
|
@@ -104,12 +105,15 @@ const client = new SemiontApiClient({
|
|
|
104
105
|
|
|
105
106
|
📖 **[API Reference](./docs/API-Reference.md)** - Complete method documentation
|
|
106
107
|
|
|
108
|
+
🛠️ **[Utilities Guide](./docs/Utilities.md)** - Text encoding, fuzzy anchoring, SVG utilities
|
|
109
|
+
|
|
107
110
|
## Key Features
|
|
108
111
|
|
|
109
112
|
- **Type-safe** - Generated from OpenAPI spec with branded types
|
|
110
|
-
- **W3C compliant** - Web Annotation standard
|
|
113
|
+
- **W3C compliant** - Web Annotation standard with fuzzy text matching
|
|
111
114
|
- **Real-time** - SSE streaming for long operations
|
|
112
|
-
- **Framework-agnostic** -
|
|
115
|
+
- **Framework-agnostic** - Pure TypeScript utilities work everywhere
|
|
116
|
+
- **Character encoding** - Proper UTF-8, ISO-8859-1, Windows-1252 support
|
|
113
117
|
|
|
114
118
|
## Use Cases
|
|
115
119
|
|
|
@@ -3314,14 +3314,6 @@ declare function getTargetSelector(target: Annotation$1['target']): {
|
|
|
3314
3314
|
* Check if target has a selector
|
|
3315
3315
|
*/
|
|
3316
3316
|
declare function hasTargetSelector(target: Annotation$1['target']): boolean;
|
|
3317
|
-
/**
|
|
3318
|
-
* Extract entity types from annotation bodies
|
|
3319
|
-
* Entity types are stored as TextualBody with purpose: "tagging"
|
|
3320
|
-
* Accepts any object with a body property matching Annotation['body']
|
|
3321
|
-
*/
|
|
3322
|
-
declare function getEntityTypes(annotation: {
|
|
3323
|
-
body: Annotation$1['body'];
|
|
3324
|
-
}): string[];
|
|
3325
3317
|
/**
|
|
3326
3318
|
* Type guard to check if an annotation is a highlight
|
|
3327
3319
|
*/
|
|
@@ -3348,20 +3340,6 @@ declare function isTag(annotation: Annotation$1): annotation is Annotation$1;
|
|
|
3348
3340
|
* @returns The comment text, or undefined if not a comment or no text found
|
|
3349
3341
|
*/
|
|
3350
3342
|
declare function getCommentText(annotation: Annotation$1): string | undefined;
|
|
3351
|
-
/**
|
|
3352
|
-
* Extract tag category from a tag annotation's body
|
|
3353
|
-
* Tags use dual-body structure: first body has purpose: "tagging" with category value
|
|
3354
|
-
* @param annotation - The annotation to extract category from
|
|
3355
|
-
* @returns The tag category (e.g., "Issue", "Rule"), or undefined if not a tag or no category found
|
|
3356
|
-
*/
|
|
3357
|
-
declare function getTagCategory(annotation: Annotation$1): string | undefined;
|
|
3358
|
-
/**
|
|
3359
|
-
* Extract tag schema ID from a tag annotation's body
|
|
3360
|
-
* Tags use dual-body structure: second body has purpose: "classifying" with schema ID
|
|
3361
|
-
* @param annotation - The annotation to extract schema ID from
|
|
3362
|
-
* @returns The schema ID (e.g., "legal-irac"), or undefined if not a tag or no schema found
|
|
3363
|
-
*/
|
|
3364
|
-
declare function getTagSchemaId(annotation: Annotation$1): string | undefined;
|
|
3365
3343
|
/**
|
|
3366
3344
|
* Type guard to check if a reference annotation is a stub (unresolved)
|
|
3367
3345
|
* Stub if no SpecificResource in body array
|
|
@@ -3609,6 +3587,73 @@ declare function getChecksum(resource: ResourceDescriptor | undefined): string |
|
|
|
3609
3587
|
* Get the language from the primary representation
|
|
3610
3588
|
*/
|
|
3611
3589
|
declare function getLanguage(resource: ResourceDescriptor | undefined): string | undefined;
|
|
3590
|
+
/**
|
|
3591
|
+
* Get storage URI from primary representation
|
|
3592
|
+
*
|
|
3593
|
+
* @param resource - ResourceDescriptor
|
|
3594
|
+
* @returns Storage URI or undefined
|
|
3595
|
+
*/
|
|
3596
|
+
declare function getStorageUri(resource: ResourceDescriptor | undefined): string | undefined;
|
|
3597
|
+
/**
|
|
3598
|
+
* Get creator agent from wasAttributedTo
|
|
3599
|
+
* Handles both single agent and array of agents
|
|
3600
|
+
*
|
|
3601
|
+
* @param resource - ResourceDescriptor
|
|
3602
|
+
* @returns First agent or undefined
|
|
3603
|
+
*/
|
|
3604
|
+
declare function getCreator(resource: ResourceDescriptor | undefined): components['schemas']['Agent'] | undefined;
|
|
3605
|
+
/**
|
|
3606
|
+
* Get derived-from URI
|
|
3607
|
+
* Handles both single URI and array of URIs
|
|
3608
|
+
*
|
|
3609
|
+
* @param resource - ResourceDescriptor
|
|
3610
|
+
* @returns First derivation URI or undefined
|
|
3611
|
+
*/
|
|
3612
|
+
declare function getDerivedFrom(resource: ResourceDescriptor | undefined): string | undefined;
|
|
3613
|
+
/**
|
|
3614
|
+
* Check if resource is archived (application-specific field)
|
|
3615
|
+
*
|
|
3616
|
+
* @param resource - ResourceDescriptor
|
|
3617
|
+
* @returns True if archived, false otherwise
|
|
3618
|
+
*/
|
|
3619
|
+
declare function isArchived(resource: ResourceDescriptor | undefined): boolean;
|
|
3620
|
+
/**
|
|
3621
|
+
* Get entity types from resource (application-specific field)
|
|
3622
|
+
*
|
|
3623
|
+
* @param resource - ResourceDescriptor
|
|
3624
|
+
* @returns Array of entity types, empty if not set
|
|
3625
|
+
*/
|
|
3626
|
+
declare function getResourceEntityTypes(resource: ResourceDescriptor | undefined): string[];
|
|
3627
|
+
/**
|
|
3628
|
+
* Check if resource is a draft (application-specific field)
|
|
3629
|
+
*
|
|
3630
|
+
* @param resource - ResourceDescriptor
|
|
3631
|
+
* @returns True if draft, false otherwise
|
|
3632
|
+
*/
|
|
3633
|
+
declare function isDraft(resource: ResourceDescriptor | undefined): boolean;
|
|
3634
|
+
/**
|
|
3635
|
+
* Map charset names to Node.js Buffer encoding names
|
|
3636
|
+
* Node.js Buffer.toString() supports: 'utf8', 'utf16le', 'latin1', 'base64', 'hex', 'ascii', 'binary', 'ucs2'
|
|
3637
|
+
*
|
|
3638
|
+
* @param charset - Charset name (e.g., "UTF-8", "ISO-8859-1", "Windows-1252")
|
|
3639
|
+
* @returns Node.js BufferEncoding
|
|
3640
|
+
*/
|
|
3641
|
+
declare function getNodeEncoding(charset: string): BufferEncoding;
|
|
3642
|
+
/**
|
|
3643
|
+
* Decode a representation buffer to string using the correct charset
|
|
3644
|
+
* Extracts charset from media type and uses appropriate encoding
|
|
3645
|
+
*
|
|
3646
|
+
* @param buffer - The raw representation data
|
|
3647
|
+
* @param mediaType - Media type with optional charset (e.g., "text/plain; charset=iso-8859-1")
|
|
3648
|
+
* @returns Decoded string
|
|
3649
|
+
*
|
|
3650
|
+
* @example
|
|
3651
|
+
* ```typescript
|
|
3652
|
+
* const content = decodeRepresentation(buffer, "text/plain; charset=utf-8");
|
|
3653
|
+
* const legacy = decodeRepresentation(buffer, "text/plain; charset=windows-1252");
|
|
3654
|
+
* ```
|
|
3655
|
+
*/
|
|
3656
|
+
declare function decodeRepresentation(buffer: Buffer, mediaType: string): string;
|
|
3612
3657
|
|
|
3613
3658
|
/**
|
|
3614
3659
|
* SVG Utility Functions
|
|
@@ -3654,6 +3699,90 @@ declare function normalizeCoordinates(point: Point, displayWidth: number, displa
|
|
|
3654
3699
|
*/
|
|
3655
3700
|
declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
|
|
3656
3701
|
|
|
3702
|
+
/**
|
|
3703
|
+
* Text context extraction utilities for W3C Web Annotation TextQuoteSelector
|
|
3704
|
+
*
|
|
3705
|
+
* Provides robust prefix/suffix context extraction with word boundary detection
|
|
3706
|
+
* to ensure fuzzy anchoring works correctly when the same text appears multiple times.
|
|
3707
|
+
*
|
|
3708
|
+
* Also provides AI offset validation and correction for handling AI-generated annotations
|
|
3709
|
+
* where the model may return slightly incorrect character offsets.
|
|
3710
|
+
*
|
|
3711
|
+
* @see https://www.w3.org/TR/annotation-model/#text-quote-selector
|
|
3712
|
+
*/
|
|
3713
|
+
/**
|
|
3714
|
+
* Extract prefix and suffix context for TextQuoteSelector
|
|
3715
|
+
*
|
|
3716
|
+
* Extracts up to 64 characters before and after the selected text,
|
|
3717
|
+
* extending to word boundaries to avoid cutting words in half.
|
|
3718
|
+
* This ensures prefix/suffix are meaningful context for fuzzy anchoring.
|
|
3719
|
+
*
|
|
3720
|
+
* @param content - Full text content
|
|
3721
|
+
* @param start - Start offset of selection
|
|
3722
|
+
* @param end - End offset of selection
|
|
3723
|
+
* @returns Object with prefix and suffix (undefined if at boundaries)
|
|
3724
|
+
*
|
|
3725
|
+
* @example
|
|
3726
|
+
* ```typescript
|
|
3727
|
+
* const content = "The United States Congress...";
|
|
3728
|
+
* const context = extractContext(content, 4, 17); // "United States"
|
|
3729
|
+
* // Returns: { prefix: "The ", suffix: " Congress..." }
|
|
3730
|
+
* // NOT: { prefix: "nited ", suffix: "gress..." }
|
|
3731
|
+
* ```
|
|
3732
|
+
*/
|
|
3733
|
+
declare function extractContext(content: string, start: number, end: number): {
|
|
3734
|
+
prefix?: string;
|
|
3735
|
+
suffix?: string;
|
|
3736
|
+
};
|
|
3737
|
+
/**
|
|
3738
|
+
* Result of validating and correcting AI-provided annotation offsets
|
|
3739
|
+
*/
|
|
3740
|
+
interface ValidatedAnnotation {
|
|
3741
|
+
start: number;
|
|
3742
|
+
end: number;
|
|
3743
|
+
exact: string;
|
|
3744
|
+
prefix?: string;
|
|
3745
|
+
suffix?: string;
|
|
3746
|
+
corrected: boolean;
|
|
3747
|
+
fuzzyMatched?: boolean;
|
|
3748
|
+
matchQuality?: 'exact' | 'case-insensitive' | 'fuzzy';
|
|
3749
|
+
}
|
|
3750
|
+
/**
|
|
3751
|
+
* Validate and correct AI-provided annotation offsets with fuzzy matching tolerance
|
|
3752
|
+
*
|
|
3753
|
+
* AI models sometimes return offsets that don't match the actual text position,
|
|
3754
|
+
* or provide text with minor variations (case differences, whitespace, typos).
|
|
3755
|
+
*
|
|
3756
|
+
* This function uses a multi-strategy approach:
|
|
3757
|
+
* 1. Check if AI's offsets are exactly correct
|
|
3758
|
+
* 2. Try exact case-sensitive search
|
|
3759
|
+
* 3. Try case-insensitive search
|
|
3760
|
+
* 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
|
|
3761
|
+
*
|
|
3762
|
+
* This ensures we're maximally tolerant of AI errors while still maintaining
|
|
3763
|
+
* annotation quality and logging what corrections were made.
|
|
3764
|
+
*
|
|
3765
|
+
* @param content - Full text content
|
|
3766
|
+
* @param aiStart - Start offset from AI
|
|
3767
|
+
* @param aiEnd - End offset from AI
|
|
3768
|
+
* @param exact - The exact text that should be at this position (from AI)
|
|
3769
|
+
* @returns Validated annotation with corrected offsets and context
|
|
3770
|
+
* @throws Error if no acceptable match can be found
|
|
3771
|
+
*
|
|
3772
|
+
* @example
|
|
3773
|
+
* ```typescript
|
|
3774
|
+
* // AI said start=1143, but actual text is at 1161
|
|
3775
|
+
* const result = validateAndCorrectOffsets(
|
|
3776
|
+
* content,
|
|
3777
|
+
* 1143,
|
|
3778
|
+
* 1289,
|
|
3779
|
+
* "the question \"whether..."
|
|
3780
|
+
* );
|
|
3781
|
+
* // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
|
|
3782
|
+
* ```
|
|
3783
|
+
*/
|
|
3784
|
+
declare function validateAndCorrectOffsets(content: string, aiStart: number, aiEnd: number, exact: string): ValidatedAnnotation;
|
|
3785
|
+
|
|
3657
3786
|
/**
|
|
3658
3787
|
* Text encoding utilities for consistent charset handling
|
|
3659
3788
|
*
|
|
@@ -3745,4 +3874,4 @@ declare function validateData<T>(schema: {
|
|
|
3745
3874
|
*/
|
|
3746
3875
|
declare function isValidEmail(email: string): boolean;
|
|
3747
3876
|
|
|
3748
|
-
export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C,
|
|
3877
|
+
export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C, getExactText as D, type EntityType as E, getAnnotationExactText as F, type GoogleCredential as G, getPrimarySelector as H, getTextPositionSelector as I, type JobId as J, getTextQuoteSelector as K, getSvgSelector as L, type Motivation as M, validateSvgMarkup as N, extractBoundingBox as O, type StoredEvent as P, type EventMetadata as Q, type ResourceUri as R, type SearchQuery as S, type TextPositionSelector as T, type UserDID as U, type ResourceEventType as V, getAnnotationUriFromEvent as W, isEventRelatedToAnnotation as X, isResourceEvent as Y, formatEventType as Z, getEventEmoji as _, type AnnotationUri as a, resourceAnnotationUri as a$, formatRelativeTime as a0, getEventDisplayContent as a1, getEventEntityTypes as a2, type ResourceCreationDetails as a3, getResourceCreationDetails as a4, type TextPosition as a5, findTextWithContext as a6, verifyPosition as a7, type LocaleInfo as a8, LOCALES as a9, extractContext as aA, type ValidatedAnnotation as aB, validateAndCorrectOffsets as aC, extractCharset as aD, decodeWithCharset as aE, type ValidationSuccess as aF, type ValidationFailure as aG, type ValidationResult as aH, JWTTokenSchema as aI, validateData as aJ, isValidEmail as aK, type AuthCode as aL, type MCPToken as aM, email as aN, authCode as aO, googleCredential as aP, accessToken as aQ, refreshToken as aR, mcpToken as aS, cloneToken as aT, jobId as aU, userDID as aV, entityType as aW, searchQuery as aX, baseUrl as aY, resourceUri as aZ, annotationUri as a_, getLocaleInfo as aa, getLocaleNativeName as ab, getLocaleEnglishName as ac, formatLocaleDisplay as ad, getAllLocaleCodes as ae, getResourceId as af, getPrimaryRepresentation as ag, getPrimaryMediaType as ah, getChecksum as ai, getLanguage as aj, getStorageUri as ak, getCreator as al, getDerivedFrom as am, isArchived as an, getResourceEntityTypes as ao, isDraft as ap, getNodeEncoding as aq, decodeRepresentation as ar, type Point as as, type BoundingBox as at, createRectangleSvg as au, createPolygonSvg as av, createCircleSvg as aw, parseSvgSelector as ax, normalizeCoordinates as ay, scaleSvgToNative as az, type Email as b, type ResourceEvent as b0, type components as c, type RefreshToken as d, type CloneToken as e, type ResourceAnnotationUri as f, type TextQuoteSelector as g, type SvgSelector as h, type Selector as i, getBodySource as j, getBodyType as k, isBodyResolved as l, getTargetSource as m, getTargetSelector as n, type operations as o, type paths as p, hasTargetSelector as q, isHighlight as r, isReference as s, isAssessment as t, isComment as u, isTag as v, type webhooks as w, getCommentText as x, isStubReference as y, isResolvedReference as z };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index
|
|
2
|
-
export { $ as $defs,
|
|
1
|
+
import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index--2zlsZdR.js';
|
|
2
|
+
export { $ as $defs, aL as AuthCode, at as BoundingBox, Q as EventMetadata, aI as JWTTokenSchema, a9 as LOCALES, a8 as LocaleInfo, aM as MCPToken, as as Point, a3 as ResourceCreationDetails, V as ResourceEventType, i as Selector, P as StoredEvent, h as SvgSelector, a5 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, aB as ValidatedAnnotation, aG as ValidationFailure, aH as ValidationResult, aF as ValidationSuccess, aQ as accessToken, a_ as annotationUri, aO as authCode, aY as baseUrl, aT as cloneToken, aw as createCircleSvg, av as createPolygonSvg, au as createRectangleSvg, ar as decodeRepresentation, aE as decodeWithCharset, aN as email, aW as entityType, O as extractBoundingBox, aD as extractCharset, aA as extractContext, a6 as findTextWithContext, Z as formatEventType, ad as formatLocaleDisplay, a0 as formatRelativeTime, ae as getAllLocaleCodes, F as getAnnotationExactText, W as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, ai as getChecksum, x as getCommentText, al as getCreator, am as getDerivedFrom, a1 as getEventDisplayContent, _ as getEventEmoji, a2 as getEventEntityTypes, D as getExactText, aj as getLanguage, ac as getLocaleEnglishName, aa as getLocaleInfo, ab as getLocaleNativeName, aq as getNodeEncoding, ah as getPrimaryMediaType, ag as getPrimaryRepresentation, H as getPrimarySelector, a4 as getResourceCreationDetails, ao as getResourceEntityTypes, af as getResourceId, ak as getStorageUri, L as getSvgSelector, n as getTargetSelector, m as getTargetSource, I as getTextPositionSelector, K as getTextQuoteSelector, aP as googleCredential, q as hasTargetSelector, an as isArchived, t as isAssessment, l as isBodyResolved, u as isComment, ap as isDraft, X as isEventRelatedToAnnotation, r as isHighlight, s as isReference, z as isResolvedReference, Y as isResourceEvent, y as isStubReference, v as isTag, aK as isValidEmail, aU as jobId, aS as mcpToken, ay as normalizeCoordinates, o as operations, ax as parseSvgSelector, aR as refreshToken, a$ as resourceAnnotationUri, aZ as resourceUri, az as scaleSvgToNative, aX as searchQuery, aV as userDID, aC as validateAndCorrectOffsets, aJ as validateData, N as validateSvgMarkup, a7 as verifyPosition, w as webhooks } from './index--2zlsZdR.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* TypeScript types for Server-Sent Events (SSE) streaming
|
package/dist/index.js
CHANGED
|
@@ -1138,23 +1138,6 @@ function getTargetSelector(target) {
|
|
|
1138
1138
|
function hasTargetSelector(target) {
|
|
1139
1139
|
return typeof target !== "string" && target.selector !== void 0;
|
|
1140
1140
|
}
|
|
1141
|
-
function getEntityTypes(annotation) {
|
|
1142
|
-
if (Array.isArray(annotation.body)) {
|
|
1143
|
-
const entityTags = [];
|
|
1144
|
-
for (const item of annotation.body) {
|
|
1145
|
-
if (typeof item === "object" && item !== null && "type" in item && "value" in item && "purpose" in item) {
|
|
1146
|
-
const itemType = item.type;
|
|
1147
|
-
const itemValue = item.value;
|
|
1148
|
-
const itemPurpose = item.purpose;
|
|
1149
|
-
if (itemType === "TextualBody" && itemPurpose === "tagging" && typeof itemValue === "string" && itemValue.length > 0) {
|
|
1150
|
-
entityTags.push(itemValue);
|
|
1151
|
-
}
|
|
1152
|
-
}
|
|
1153
|
-
}
|
|
1154
|
-
return entityTags;
|
|
1155
|
-
}
|
|
1156
|
-
return [];
|
|
1157
|
-
}
|
|
1158
1141
|
function isHighlight(annotation) {
|
|
1159
1142
|
return annotation.motivation === "highlighting";
|
|
1160
1143
|
}
|
|
@@ -1178,24 +1161,6 @@ function getCommentText(annotation) {
|
|
|
1178
1161
|
}
|
|
1179
1162
|
return void 0;
|
|
1180
1163
|
}
|
|
1181
|
-
function getTagCategory(annotation) {
|
|
1182
|
-
if (!isTag(annotation)) return void 0;
|
|
1183
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
1184
|
-
const taggingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "tagging");
|
|
1185
|
-
if (taggingBody && "value" in taggingBody) {
|
|
1186
|
-
return taggingBody.value;
|
|
1187
|
-
}
|
|
1188
|
-
return void 0;
|
|
1189
|
-
}
|
|
1190
|
-
function getTagSchemaId(annotation) {
|
|
1191
|
-
if (!isTag(annotation)) return void 0;
|
|
1192
|
-
const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
|
|
1193
|
-
const classifyingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "classifying");
|
|
1194
|
-
if (classifyingBody && "value" in classifyingBody) {
|
|
1195
|
-
return classifyingBody.value;
|
|
1196
|
-
}
|
|
1197
|
-
return void 0;
|
|
1198
|
-
}
|
|
1199
1164
|
function isStubReference(annotation) {
|
|
1200
1165
|
return isReference(annotation) && !isBodyResolved(annotation.body);
|
|
1201
1166
|
}
|
|
@@ -1671,6 +1636,49 @@ function getChecksum(resource) {
|
|
|
1671
1636
|
function getLanguage(resource) {
|
|
1672
1637
|
return getPrimaryRepresentation(resource)?.language;
|
|
1673
1638
|
}
|
|
1639
|
+
function getStorageUri(resource) {
|
|
1640
|
+
return getPrimaryRepresentation(resource)?.storageUri;
|
|
1641
|
+
}
|
|
1642
|
+
function getCreator(resource) {
|
|
1643
|
+
if (!resource?.wasAttributedTo) return void 0;
|
|
1644
|
+
return Array.isArray(resource.wasAttributedTo) ? resource.wasAttributedTo[0] : resource.wasAttributedTo;
|
|
1645
|
+
}
|
|
1646
|
+
function getDerivedFrom(resource) {
|
|
1647
|
+
if (!resource?.wasDerivedFrom) return void 0;
|
|
1648
|
+
return Array.isArray(resource.wasDerivedFrom) ? resource.wasDerivedFrom[0] : resource.wasDerivedFrom;
|
|
1649
|
+
}
|
|
1650
|
+
function isArchived(resource) {
|
|
1651
|
+
return resource?.archived === true;
|
|
1652
|
+
}
|
|
1653
|
+
function getResourceEntityTypes(resource) {
|
|
1654
|
+
return resource?.entityTypes || [];
|
|
1655
|
+
}
|
|
1656
|
+
function isDraft(resource) {
|
|
1657
|
+
return resource?.isDraft === true;
|
|
1658
|
+
}
|
|
1659
|
+
function getNodeEncoding(charset) {
|
|
1660
|
+
const normalized = charset.toLowerCase().replace(/[-_]/g, "");
|
|
1661
|
+
const charsetMap = {
|
|
1662
|
+
"utf8": "utf8",
|
|
1663
|
+
"iso88591": "latin1",
|
|
1664
|
+
"latin1": "latin1",
|
|
1665
|
+
"ascii": "ascii",
|
|
1666
|
+
"usascii": "ascii",
|
|
1667
|
+
"utf16le": "utf16le",
|
|
1668
|
+
"ucs2": "ucs2",
|
|
1669
|
+
"binary": "binary",
|
|
1670
|
+
"windows1252": "latin1",
|
|
1671
|
+
// Windows-1252 is a superset of Latin-1
|
|
1672
|
+
"cp1252": "latin1"
|
|
1673
|
+
};
|
|
1674
|
+
return charsetMap[normalized] || "utf8";
|
|
1675
|
+
}
|
|
1676
|
+
function decodeRepresentation(buffer, mediaType) {
|
|
1677
|
+
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
1678
|
+
const charset = (charsetMatch?.[1] || "utf-8").toLowerCase();
|
|
1679
|
+
const encoding = getNodeEncoding(charset);
|
|
1680
|
+
return buffer.toString(encoding);
|
|
1681
|
+
}
|
|
1674
1682
|
|
|
1675
1683
|
// src/utils/svg-utils.ts
|
|
1676
1684
|
function createRectangleSvg(start, end) {
|
|
@@ -1768,6 +1776,180 @@ function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHei
|
|
|
1768
1776
|
return svg;
|
|
1769
1777
|
}
|
|
1770
1778
|
|
|
1779
|
+
// src/utils/text-context.ts
|
|
1780
|
+
function extractContext(content, start, end) {
|
|
1781
|
+
const CONTEXT_LENGTH = 64;
|
|
1782
|
+
const MAX_EXTENSION = 32;
|
|
1783
|
+
let prefix;
|
|
1784
|
+
if (start > 0) {
|
|
1785
|
+
let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
|
|
1786
|
+
let extensionCount = 0;
|
|
1787
|
+
while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
|
|
1788
|
+
const char = content[prefixStart - 1];
|
|
1789
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
1790
|
+
break;
|
|
1791
|
+
}
|
|
1792
|
+
prefixStart--;
|
|
1793
|
+
extensionCount++;
|
|
1794
|
+
}
|
|
1795
|
+
prefix = content.substring(prefixStart, start);
|
|
1796
|
+
}
|
|
1797
|
+
let suffix;
|
|
1798
|
+
if (end < content.length) {
|
|
1799
|
+
let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
|
|
1800
|
+
let extensionCount = 0;
|
|
1801
|
+
while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
|
|
1802
|
+
const char = content[suffixEnd];
|
|
1803
|
+
if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
|
|
1804
|
+
break;
|
|
1805
|
+
}
|
|
1806
|
+
suffixEnd++;
|
|
1807
|
+
extensionCount++;
|
|
1808
|
+
}
|
|
1809
|
+
suffix = content.substring(end, suffixEnd);
|
|
1810
|
+
}
|
|
1811
|
+
return { prefix, suffix };
|
|
1812
|
+
}
|
|
1813
|
+
function levenshteinDistance(str1, str2) {
|
|
1814
|
+
const len1 = str1.length;
|
|
1815
|
+
const len2 = str2.length;
|
|
1816
|
+
const matrix = [];
|
|
1817
|
+
for (let i = 0; i <= len1; i++) {
|
|
1818
|
+
matrix[i] = [i];
|
|
1819
|
+
}
|
|
1820
|
+
for (let j = 0; j <= len2; j++) {
|
|
1821
|
+
matrix[0][j] = j;
|
|
1822
|
+
}
|
|
1823
|
+
for (let i = 1; i <= len1; i++) {
|
|
1824
|
+
for (let j = 1; j <= len2; j++) {
|
|
1825
|
+
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
1826
|
+
const deletion = matrix[i - 1][j] + 1;
|
|
1827
|
+
const insertion = matrix[i][j - 1] + 1;
|
|
1828
|
+
const substitution = matrix[i - 1][j - 1] + cost;
|
|
1829
|
+
matrix[i][j] = Math.min(deletion, insertion, substitution);
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
return matrix[len1][len2];
|
|
1833
|
+
}
|
|
1834
|
+
function findBestMatch(content, searchText, aiStart, aiEnd) {
|
|
1835
|
+
const maxFuzzyDistance = Math.max(5, Math.floor(searchText.length * 0.05));
|
|
1836
|
+
const exactIndex = content.indexOf(searchText);
|
|
1837
|
+
if (exactIndex !== -1) {
|
|
1838
|
+
return {
|
|
1839
|
+
start: exactIndex,
|
|
1840
|
+
end: exactIndex + searchText.length,
|
|
1841
|
+
matchQuality: "exact"
|
|
1842
|
+
};
|
|
1843
|
+
}
|
|
1844
|
+
console.log("[findBestMatch] Exact match failed, trying case-insensitive...");
|
|
1845
|
+
const lowerContent = content.toLowerCase();
|
|
1846
|
+
const lowerSearch = searchText.toLowerCase();
|
|
1847
|
+
const caseInsensitiveIndex = lowerContent.indexOf(lowerSearch);
|
|
1848
|
+
if (caseInsensitiveIndex !== -1) {
|
|
1849
|
+
console.log("[findBestMatch] Found case-insensitive match");
|
|
1850
|
+
return {
|
|
1851
|
+
start: caseInsensitiveIndex,
|
|
1852
|
+
end: caseInsensitiveIndex + searchText.length,
|
|
1853
|
+
matchQuality: "case-insensitive"
|
|
1854
|
+
};
|
|
1855
|
+
}
|
|
1856
|
+
console.log("[findBestMatch] Case-insensitive failed, trying fuzzy match...");
|
|
1857
|
+
const windowSize = searchText.length;
|
|
1858
|
+
const searchRadius = Math.min(500, content.length);
|
|
1859
|
+
const searchStart = Math.max(0, aiStart - searchRadius);
|
|
1860
|
+
const searchEnd = Math.min(content.length, aiEnd + searchRadius);
|
|
1861
|
+
let bestMatch = null;
|
|
1862
|
+
for (let i = searchStart; i <= searchEnd - windowSize; i++) {
|
|
1863
|
+
const candidate = content.substring(i, i + windowSize);
|
|
1864
|
+
const distance = levenshteinDistance(searchText, candidate);
|
|
1865
|
+
if (distance <= maxFuzzyDistance) {
|
|
1866
|
+
if (!bestMatch || distance < bestMatch.distance) {
|
|
1867
|
+
bestMatch = { start: i, distance };
|
|
1868
|
+
console.log(`[findBestMatch] Found fuzzy match at ${i} with distance ${distance}`);
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
if (bestMatch) {
|
|
1873
|
+
return {
|
|
1874
|
+
start: bestMatch.start,
|
|
1875
|
+
end: bestMatch.start + windowSize,
|
|
1876
|
+
matchQuality: "fuzzy"
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
console.log("[findBestMatch] No acceptable match found");
|
|
1880
|
+
return null;
|
|
1881
|
+
}
|
|
1882
|
+
function validateAndCorrectOffsets(content, aiStart, aiEnd, exact) {
|
|
1883
|
+
const exactPreview = exact.length > 50 ? exact.substring(0, 50) + "..." : exact;
|
|
1884
|
+
const textAtOffset = content.substring(aiStart, aiEnd);
|
|
1885
|
+
if (textAtOffset === exact) {
|
|
1886
|
+
console.log(`[validateAndCorrectOffsets] \u2713 Offsets correct for: "${exactPreview}"`);
|
|
1887
|
+
const context2 = extractContext(content, aiStart, aiEnd);
|
|
1888
|
+
return {
|
|
1889
|
+
start: aiStart,
|
|
1890
|
+
end: aiEnd,
|
|
1891
|
+
exact,
|
|
1892
|
+
prefix: context2.prefix,
|
|
1893
|
+
suffix: context2.suffix,
|
|
1894
|
+
corrected: false,
|
|
1895
|
+
matchQuality: "exact"
|
|
1896
|
+
};
|
|
1897
|
+
}
|
|
1898
|
+
const foundPreview = textAtOffset.length > 50 ? textAtOffset.substring(0, 50) + "..." : textAtOffset;
|
|
1899
|
+
console.warn(
|
|
1900
|
+
`[validateAndCorrectOffsets] \u26A0 AI offset mismatch:
|
|
1901
|
+
Expected text: "${exactPreview}"
|
|
1902
|
+
Found at AI offset (${aiStart}-${aiEnd}): "${foundPreview}"
|
|
1903
|
+
Attempting multi-strategy search...`
|
|
1904
|
+
);
|
|
1905
|
+
const match = findBestMatch(content, exact, aiStart, aiEnd);
|
|
1906
|
+
if (!match) {
|
|
1907
|
+
const exactLong = exact.length > 100 ? exact.substring(0, 100) + "..." : exact;
|
|
1908
|
+
console.error(
|
|
1909
|
+
`[validateAndCorrectOffsets] \u2717 No acceptable match found:
|
|
1910
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
1911
|
+
AI text: "${exactLong}"
|
|
1912
|
+
Text at AI offset: "${foundPreview}"
|
|
1913
|
+
All search strategies (exact, case-insensitive, fuzzy) failed.
|
|
1914
|
+
This suggests the AI hallucinated text that doesn't exist in the document.`
|
|
1915
|
+
);
|
|
1916
|
+
throw new Error(
|
|
1917
|
+
"Cannot find acceptable match for text in content. All search strategies failed. Text may be hallucinated."
|
|
1918
|
+
);
|
|
1919
|
+
}
|
|
1920
|
+
const actualText = content.substring(match.start, match.end);
|
|
1921
|
+
const actualPreview = actualText.length > 50 ? actualText.substring(0, 50) + "..." : actualText;
|
|
1922
|
+
const offsetDelta = match.start - aiStart;
|
|
1923
|
+
const matchSymbol = match.matchQuality === "exact" ? "\u2713" : match.matchQuality === "case-insensitive" ? "\u2248" : "~";
|
|
1924
|
+
console.warn(
|
|
1925
|
+
`[validateAndCorrectOffsets] ${matchSymbol} Found ${match.matchQuality} match:
|
|
1926
|
+
AI offsets: start=${aiStart}, end=${aiEnd}
|
|
1927
|
+
Corrected: start=${match.start}, end=${match.end}
|
|
1928
|
+
Offset delta: ${offsetDelta} characters
|
|
1929
|
+
Actual text: "${actualPreview}"`
|
|
1930
|
+
);
|
|
1931
|
+
if (match.matchQuality === "fuzzy") {
|
|
1932
|
+
console.warn(
|
|
1933
|
+
`[validateAndCorrectOffsets] Fuzzy match details:
|
|
1934
|
+
AI provided: "${exactPreview}"
|
|
1935
|
+
Found in doc: "${actualPreview}"
|
|
1936
|
+
Minor text differences detected - using document version`
|
|
1937
|
+
);
|
|
1938
|
+
}
|
|
1939
|
+
const context = extractContext(content, match.start, match.end);
|
|
1940
|
+
return {
|
|
1941
|
+
start: match.start,
|
|
1942
|
+
end: match.end,
|
|
1943
|
+
exact: actualText,
|
|
1944
|
+
// Use actual text from document, not AI's version
|
|
1945
|
+
prefix: context.prefix,
|
|
1946
|
+
suffix: context.suffix,
|
|
1947
|
+
corrected: true,
|
|
1948
|
+
fuzzyMatched: match.matchQuality !== "exact",
|
|
1949
|
+
matchQuality: match.matchQuality
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1771
1953
|
// src/utils/text-encoding.ts
|
|
1772
1954
|
function extractCharset(mediaType) {
|
|
1773
1955
|
const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
|
|
@@ -1851,6 +2033,6 @@ function getMimeCategory(mimeType) {
|
|
|
1851
2033
|
return "unsupported";
|
|
1852
2034
|
}
|
|
1853
2035
|
|
|
1854
|
-
export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText,
|
|
2036
|
+
export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, extractContext, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getExtensionForMimeType, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isEventRelatedToAnnotation, isHighlight, isImageMimeType, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isTextMimeType, isValidEmail, jobId, mcpToken, normalizeCoordinates, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceUri, scaleSvgToNative, searchQuery, userDID, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
|
|
1855
2037
|
//# sourceMappingURL=index.js.map
|
|
1856
2038
|
//# sourceMappingURL=index.js.map
|