@semiont/api-client 0.2.28-build.38 → 0.2.28-build.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # @semiont/api-client
2
2
 
3
+ [![Tests](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml/badge.svg)](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+api-client%22)
3
4
  [![npm version](https://img.shields.io/npm/v/@semiont/api-client.svg)](https://www.npmjs.com/package/@semiont/api-client)
4
5
  [![npm downloads](https://img.shields.io/npm/dm/@semiont/api-client.svg)](https://www.npmjs.com/package/@semiont/api-client)
5
6
  [![License](https://img.shields.io/npm/l/@semiont/api-client.svg)](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
@@ -104,12 +105,15 @@ const client = new SemiontApiClient({
104
105
 
105
106
  📖 **[API Reference](./docs/API-Reference.md)** - Complete method documentation
106
107
 
108
+ 🛠️ **[Utilities Guide](./docs/Utilities.md)** - Text encoding, fuzzy anchoring, SVG utilities
109
+
107
110
  ## Key Features
108
111
 
109
112
  - **Type-safe** - Generated from OpenAPI spec with branded types
110
- - **W3C compliant** - Web Annotation standard
113
+ - **W3C compliant** - Web Annotation standard with fuzzy text matching
111
114
  - **Real-time** - SSE streaming for long operations
112
- - **Framework-agnostic** - Works everywhere JavaScript runs
115
+ - **Framework-agnostic** - Pure TypeScript utilities work everywhere
116
+ - **Character encoding** - Proper UTF-8, ISO-8859-1, Windows-1252 support
113
117
 
114
118
  ## Use Cases
115
119
 
@@ -3314,14 +3314,6 @@ declare function getTargetSelector(target: Annotation$1['target']): {
3314
3314
  * Check if target has a selector
3315
3315
  */
3316
3316
  declare function hasTargetSelector(target: Annotation$1['target']): boolean;
3317
- /**
3318
- * Extract entity types from annotation bodies
3319
- * Entity types are stored as TextualBody with purpose: "tagging"
3320
- * Accepts any object with a body property matching Annotation['body']
3321
- */
3322
- declare function getEntityTypes(annotation: {
3323
- body: Annotation$1['body'];
3324
- }): string[];
3325
3317
  /**
3326
3318
  * Type guard to check if an annotation is a highlight
3327
3319
  */
@@ -3348,20 +3340,6 @@ declare function isTag(annotation: Annotation$1): annotation is Annotation$1;
3348
3340
  * @returns The comment text, or undefined if not a comment or no text found
3349
3341
  */
3350
3342
  declare function getCommentText(annotation: Annotation$1): string | undefined;
3351
- /**
3352
- * Extract tag category from a tag annotation's body
3353
- * Tags use dual-body structure: first body has purpose: "tagging" with category value
3354
- * @param annotation - The annotation to extract category from
3355
- * @returns The tag category (e.g., "Issue", "Rule"), or undefined if not a tag or no category found
3356
- */
3357
- declare function getTagCategory(annotation: Annotation$1): string | undefined;
3358
- /**
3359
- * Extract tag schema ID from a tag annotation's body
3360
- * Tags use dual-body structure: second body has purpose: "classifying" with schema ID
3361
- * @param annotation - The annotation to extract schema ID from
3362
- * @returns The schema ID (e.g., "legal-irac"), or undefined if not a tag or no schema found
3363
- */
3364
- declare function getTagSchemaId(annotation: Annotation$1): string | undefined;
3365
3343
  /**
3366
3344
  * Type guard to check if a reference annotation is a stub (unresolved)
3367
3345
  * Stub if no SpecificResource in body array
@@ -3609,6 +3587,73 @@ declare function getChecksum(resource: ResourceDescriptor | undefined): string |
3609
3587
  * Get the language from the primary representation
3610
3588
  */
3611
3589
  declare function getLanguage(resource: ResourceDescriptor | undefined): string | undefined;
3590
+ /**
3591
+ * Get storage URI from primary representation
3592
+ *
3593
+ * @param resource - ResourceDescriptor
3594
+ * @returns Storage URI or undefined
3595
+ */
3596
+ declare function getStorageUri(resource: ResourceDescriptor | undefined): string | undefined;
3597
+ /**
3598
+ * Get creator agent from wasAttributedTo
3599
+ * Handles both single agent and array of agents
3600
+ *
3601
+ * @param resource - ResourceDescriptor
3602
+ * @returns First agent or undefined
3603
+ */
3604
+ declare function getCreator(resource: ResourceDescriptor | undefined): components['schemas']['Agent'] | undefined;
3605
+ /**
3606
+ * Get derived-from URI
3607
+ * Handles both single URI and array of URIs
3608
+ *
3609
+ * @param resource - ResourceDescriptor
3610
+ * @returns First derivation URI or undefined
3611
+ */
3612
+ declare function getDerivedFrom(resource: ResourceDescriptor | undefined): string | undefined;
3613
+ /**
3614
+ * Check if resource is archived (application-specific field)
3615
+ *
3616
+ * @param resource - ResourceDescriptor
3617
+ * @returns True if archived, false otherwise
3618
+ */
3619
+ declare function isArchived(resource: ResourceDescriptor | undefined): boolean;
3620
+ /**
3621
+ * Get entity types from resource (application-specific field)
3622
+ *
3623
+ * @param resource - ResourceDescriptor
3624
+ * @returns Array of entity types, empty if not set
3625
+ */
3626
+ declare function getResourceEntityTypes(resource: ResourceDescriptor | undefined): string[];
3627
+ /**
3628
+ * Check if resource is a draft (application-specific field)
3629
+ *
3630
+ * @param resource - ResourceDescriptor
3631
+ * @returns True if draft, false otherwise
3632
+ */
3633
+ declare function isDraft(resource: ResourceDescriptor | undefined): boolean;
3634
+ /**
3635
+ * Map charset names to Node.js Buffer encoding names
3636
+ * Node.js Buffer.toString() supports: 'utf8', 'utf16le', 'latin1', 'base64', 'hex', 'ascii', 'binary', 'ucs2'
3637
+ *
3638
+ * @param charset - Charset name (e.g., "UTF-8", "ISO-8859-1", "Windows-1252")
3639
+ * @returns Node.js BufferEncoding
3640
+ */
3641
+ declare function getNodeEncoding(charset: string): BufferEncoding;
3642
+ /**
3643
+ * Decode a representation buffer to string using the correct charset
3644
+ * Extracts charset from media type and uses appropriate encoding
3645
+ *
3646
+ * @param buffer - The raw representation data
3647
+ * @param mediaType - Media type with optional charset (e.g., "text/plain; charset=iso-8859-1")
3648
+ * @returns Decoded string
3649
+ *
3650
+ * @example
3651
+ * ```typescript
3652
+ * const content = decodeRepresentation(buffer, "text/plain; charset=utf-8");
3653
+ * const legacy = decodeRepresentation(buffer, "text/plain; charset=windows-1252");
3654
+ * ```
3655
+ */
3656
+ declare function decodeRepresentation(buffer: Buffer, mediaType: string): string;
3612
3657
 
3613
3658
  /**
3614
3659
  * SVG Utility Functions
@@ -3654,6 +3699,90 @@ declare function normalizeCoordinates(point: Point, displayWidth: number, displa
3654
3699
  */
3655
3700
  declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
3656
3701
 
3702
+ /**
3703
+ * Text context extraction utilities for W3C Web Annotation TextQuoteSelector
3704
+ *
3705
+ * Provides robust prefix/suffix context extraction with word boundary detection
3706
+ * to ensure fuzzy anchoring works correctly when the same text appears multiple times.
3707
+ *
3708
+ * Also provides AI offset validation and correction for handling AI-generated annotations
3709
+ * where the model may return slightly incorrect character offsets.
3710
+ *
3711
+ * @see https://www.w3.org/TR/annotation-model/#text-quote-selector
3712
+ */
3713
+ /**
3714
+ * Extract prefix and suffix context for TextQuoteSelector
3715
+ *
3716
+ * Extracts up to 64 characters before and after the selected text,
3717
+ * extending to word boundaries to avoid cutting words in half.
3718
+ * This ensures prefix/suffix are meaningful context for fuzzy anchoring.
3719
+ *
3720
+ * @param content - Full text content
3721
+ * @param start - Start offset of selection
3722
+ * @param end - End offset of selection
3723
+ * @returns Object with prefix and suffix (undefined if at boundaries)
3724
+ *
3725
+ * @example
3726
+ * ```typescript
3727
+ * const content = "The United States Congress...";
3728
+ * const context = extractContext(content, 4, 17); // "United States"
3729
+ * // Returns: { prefix: "The ", suffix: " Congress..." }
3730
+ * // NOT: { prefix: "nited ", suffix: "gress..." }
3731
+ * ```
3732
+ */
3733
+ declare function extractContext(content: string, start: number, end: number): {
3734
+ prefix?: string;
3735
+ suffix?: string;
3736
+ };
3737
+ /**
3738
+ * Result of validating and correcting AI-provided annotation offsets
3739
+ */
3740
+ interface ValidatedAnnotation {
3741
+ start: number;
3742
+ end: number;
3743
+ exact: string;
3744
+ prefix?: string;
3745
+ suffix?: string;
3746
+ corrected: boolean;
3747
+ fuzzyMatched?: boolean;
3748
+ matchQuality?: 'exact' | 'case-insensitive' | 'fuzzy';
3749
+ }
3750
+ /**
3751
+ * Validate and correct AI-provided annotation offsets with fuzzy matching tolerance
3752
+ *
3753
+ * AI models sometimes return offsets that don't match the actual text position,
3754
+ * or provide text with minor variations (case differences, whitespace, typos).
3755
+ *
3756
+ * This function uses a multi-strategy approach:
3757
+ * 1. Check if AI's offsets are exactly correct
3758
+ * 2. Try exact case-sensitive search
3759
+ * 3. Try case-insensitive search
3760
+ * 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
3761
+ *
3762
+ * This ensures we're maximally tolerant of AI errors while still maintaining
3763
+ * annotation quality and logging what corrections were made.
3764
+ *
3765
+ * @param content - Full text content
3766
+ * @param aiStart - Start offset from AI
3767
+ * @param aiEnd - End offset from AI
3768
+ * @param exact - The exact text that should be at this position (from AI)
3769
+ * @returns Validated annotation with corrected offsets and context
3770
+ * @throws Error if no acceptable match can be found
3771
+ *
3772
+ * @example
3773
+ * ```typescript
3774
+ * // AI said start=1143, but actual text is at 1161
3775
+ * const result = validateAndCorrectOffsets(
3776
+ * content,
3777
+ * 1143,
3778
+ * 1289,
3779
+ * "the question \"whether..."
3780
+ * );
3781
+ * // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
3782
+ * ```
3783
+ */
3784
+ declare function validateAndCorrectOffsets(content: string, aiStart: number, aiEnd: number, exact: string): ValidatedAnnotation;
3785
+
3657
3786
  /**
3658
3787
  * Text encoding utilities for consistent charset handling
3659
3788
  *
@@ -3745,4 +3874,4 @@ declare function validateData<T>(schema: {
3745
3874
  */
3746
3875
  declare function isValidEmail(email: string): boolean;
3747
3876
 
3748
- export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C, getTagSchemaId as D, type EntityType as E, isStubReference as F, type GoogleCredential as G, isResolvedReference as H, getExactText as I, type JobId as J, getAnnotationExactText as K, getPrimarySelector as L, type Motivation as M, getTextPositionSelector as N, getTextQuoteSelector as O, getSvgSelector as P, validateSvgMarkup as Q, type ResourceUri as R, type SearchQuery as S, type TextPositionSelector as T, type UserDID as U, extractBoundingBox as V, type StoredEvent as W, type EventMetadata as X, type ResourceEventType as Y, getAnnotationUriFromEvent as Z, isEventRelatedToAnnotation as _, type AnnotationUri as a, isResourceEvent as a0, formatEventType as a1, getEventEmoji as a2, formatRelativeTime as a3, getEventDisplayContent as a4, getEventEntityTypes as a5, type ResourceCreationDetails as a6, getResourceCreationDetails as a7, type TextPosition as a8, findTextWithContext as a9, JWTTokenSchema as aA, validateData as aB, isValidEmail as aC, type AuthCode as aD, type MCPToken as aE, email as aF, authCode as aG, googleCredential as aH, accessToken as aI, refreshToken as aJ, mcpToken as aK, cloneToken as aL, jobId as aM, userDID as aN, entityType as aO, searchQuery as aP, baseUrl as aQ, resourceUri as aR, annotationUri as aS, resourceAnnotationUri as aT, type ResourceEvent as aU, verifyPosition as aa, type LocaleInfo as ab, LOCALES as ac, getLocaleInfo as ad, getLocaleNativeName as ae, getLocaleEnglishName as af, formatLocaleDisplay as ag, getAllLocaleCodes as ah, getResourceId as ai, getPrimaryRepresentation as aj, getPrimaryMediaType as ak, getChecksum as al, getLanguage as am, type Point as an, type BoundingBox as ao, createRectangleSvg as ap, createPolygonSvg as aq, createCircleSvg as ar, parseSvgSelector as as, normalizeCoordinates as at, scaleSvgToNative as au, extractCharset as av, decodeWithCharset as aw, type ValidationSuccess as ax, type ValidationFailure as ay, type ValidationResult as az, type Email as b, type components as c, type RefreshToken as d, type CloneToken as e, type ResourceAnnotationUri as f, type TextQuoteSelector as g, type SvgSelector as h, type Selector as i, getBodySource as j, getBodyType as k, isBodyResolved as l, getTargetSource as m, getTargetSelector as n, type operations as o, type paths as p, hasTargetSelector as q, getEntityTypes as r, isHighlight as s, isReference as t, isAssessment as u, isComment as v, type webhooks as w, isTag as x, getCommentText as y, getTagCategory as z };
3877
+ export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C, getExactText as D, type EntityType as E, getAnnotationExactText as F, type GoogleCredential as G, getPrimarySelector as H, getTextPositionSelector as I, type JobId as J, getTextQuoteSelector as K, getSvgSelector as L, type Motivation as M, validateSvgMarkup as N, extractBoundingBox as O, type StoredEvent as P, type EventMetadata as Q, type ResourceUri as R, type SearchQuery as S, type TextPositionSelector as T, type UserDID as U, type ResourceEventType as V, getAnnotationUriFromEvent as W, isEventRelatedToAnnotation as X, isResourceEvent as Y, formatEventType as Z, getEventEmoji as _, type AnnotationUri as a, resourceAnnotationUri as a$, formatRelativeTime as a0, getEventDisplayContent as a1, getEventEntityTypes as a2, type ResourceCreationDetails as a3, getResourceCreationDetails as a4, type TextPosition as a5, findTextWithContext as a6, verifyPosition as a7, type LocaleInfo as a8, LOCALES as a9, extractContext as aA, type ValidatedAnnotation as aB, validateAndCorrectOffsets as aC, extractCharset as aD, decodeWithCharset as aE, type ValidationSuccess as aF, type ValidationFailure as aG, type ValidationResult as aH, JWTTokenSchema as aI, validateData as aJ, isValidEmail as aK, type AuthCode as aL, type MCPToken as aM, email as aN, authCode as aO, googleCredential as aP, accessToken as aQ, refreshToken as aR, mcpToken as aS, cloneToken as aT, jobId as aU, userDID as aV, entityType as aW, searchQuery as aX, baseUrl as aY, resourceUri as aZ, annotationUri as a_, getLocaleInfo as aa, getLocaleNativeName as ab, getLocaleEnglishName as ac, formatLocaleDisplay as ad, getAllLocaleCodes as ae, getResourceId as af, getPrimaryRepresentation as ag, getPrimaryMediaType as ah, getChecksum as ai, getLanguage as aj, getStorageUri as ak, getCreator as al, getDerivedFrom as am, isArchived as an, getResourceEntityTypes as ao, isDraft as ap, getNodeEncoding as aq, decodeRepresentation as ar, type Point as as, type BoundingBox as at, createRectangleSvg as au, createPolygonSvg as av, createCircleSvg as aw, parseSvgSelector as ax, normalizeCoordinates as ay, scaleSvgToNative as az, type Email as b, type ResourceEvent as b0, type components as c, type RefreshToken as d, type CloneToken as e, type ResourceAnnotationUri as f, type TextQuoteSelector as g, type SvgSelector as h, type Selector as i, getBodySource as j, getBodyType as k, isBodyResolved as l, getTargetSource as m, getTargetSelector as n, type operations as o, type paths as p, hasTargetSelector as q, isHighlight as r, isReference as s, isAssessment as t, isComment as u, isTag as v, type webhooks as w, getCommentText as x, isStubReference as y, isResolvedReference as z };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index-CBMGI-nS.js';
2
- export { $ as $defs, aD as AuthCode, ao as BoundingBox, X as EventMetadata, aA as JWTTokenSchema, ac as LOCALES, ab as LocaleInfo, aE as MCPToken, an as Point, a6 as ResourceCreationDetails, Y as ResourceEventType, i as Selector, W as StoredEvent, h as SvgSelector, a8 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, ay as ValidationFailure, az as ValidationResult, ax as ValidationSuccess, aI as accessToken, aS as annotationUri, aG as authCode, aQ as baseUrl, aL as cloneToken, ar as createCircleSvg, aq as createPolygonSvg, ap as createRectangleSvg, aw as decodeWithCharset, aF as email, aO as entityType, V as extractBoundingBox, av as extractCharset, a9 as findTextWithContext, a1 as formatEventType, ag as formatLocaleDisplay, a3 as formatRelativeTime, ah as getAllLocaleCodes, K as getAnnotationExactText, Z as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, al as getChecksum, y as getCommentText, r as getEntityTypes, a4 as getEventDisplayContent, a2 as getEventEmoji, a5 as getEventEntityTypes, I as getExactText, am as getLanguage, af as getLocaleEnglishName, ad as getLocaleInfo, ae as getLocaleNativeName, ak as getPrimaryMediaType, aj as getPrimaryRepresentation, L as getPrimarySelector, a7 as getResourceCreationDetails, ai as getResourceId, P as getSvgSelector, z as getTagCategory, D as getTagSchemaId, n as getTargetSelector, m as getTargetSource, N as getTextPositionSelector, O as getTextQuoteSelector, aH as googleCredential, q as hasTargetSelector, u as isAssessment, l as isBodyResolved, v as isComment, _ as isEventRelatedToAnnotation, s as isHighlight, t as isReference, H as isResolvedReference, a0 as isResourceEvent, F as isStubReference, x as isTag, aC as isValidEmail, aM as jobId, aK as mcpToken, at as normalizeCoordinates, o as operations, as as parseSvgSelector, aJ as refreshToken, aT as resourceAnnotationUri, aR as resourceUri, au as scaleSvgToNative, aP as searchQuery, aN as userDID, aB as validateData, Q as validateSvgMarkup, aa as verifyPosition, w as webhooks } from './index-CBMGI-nS.js';
1
+ import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index--2zlsZdR.js';
2
+ export { $ as $defs, aL as AuthCode, at as BoundingBox, Q as EventMetadata, aI as JWTTokenSchema, a9 as LOCALES, a8 as LocaleInfo, aM as MCPToken, as as Point, a3 as ResourceCreationDetails, V as ResourceEventType, i as Selector, P as StoredEvent, h as SvgSelector, a5 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, aB as ValidatedAnnotation, aG as ValidationFailure, aH as ValidationResult, aF as ValidationSuccess, aQ as accessToken, a_ as annotationUri, aO as authCode, aY as baseUrl, aT as cloneToken, aw as createCircleSvg, av as createPolygonSvg, au as createRectangleSvg, ar as decodeRepresentation, aE as decodeWithCharset, aN as email, aW as entityType, O as extractBoundingBox, aD as extractCharset, aA as extractContext, a6 as findTextWithContext, Z as formatEventType, ad as formatLocaleDisplay, a0 as formatRelativeTime, ae as getAllLocaleCodes, F as getAnnotationExactText, W as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, ai as getChecksum, x as getCommentText, al as getCreator, am as getDerivedFrom, a1 as getEventDisplayContent, _ as getEventEmoji, a2 as getEventEntityTypes, D as getExactText, aj as getLanguage, ac as getLocaleEnglishName, aa as getLocaleInfo, ab as getLocaleNativeName, aq as getNodeEncoding, ah as getPrimaryMediaType, ag as getPrimaryRepresentation, H as getPrimarySelector, a4 as getResourceCreationDetails, ao as getResourceEntityTypes, af as getResourceId, ak as getStorageUri, L as getSvgSelector, n as getTargetSelector, m as getTargetSource, I as getTextPositionSelector, K as getTextQuoteSelector, aP as googleCredential, q as hasTargetSelector, an as isArchived, t as isAssessment, l as isBodyResolved, u as isComment, ap as isDraft, X as isEventRelatedToAnnotation, r as isHighlight, s as isReference, z as isResolvedReference, Y as isResourceEvent, y as isStubReference, v as isTag, aK as isValidEmail, aU as jobId, aS as mcpToken, ay as normalizeCoordinates, o as operations, ax as parseSvgSelector, aR as refreshToken, a$ as resourceAnnotationUri, aZ as resourceUri, az as scaleSvgToNative, aX as searchQuery, aV as userDID, aC as validateAndCorrectOffsets, aJ as validateData, N as validateSvgMarkup, a7 as verifyPosition, w as webhooks } from './index--2zlsZdR.js';
3
3
 
4
4
  /**
5
5
  * TypeScript types for Server-Sent Events (SSE) streaming
package/dist/index.js CHANGED
@@ -1138,23 +1138,6 @@ function getTargetSelector(target) {
1138
1138
  function hasTargetSelector(target) {
1139
1139
  return typeof target !== "string" && target.selector !== void 0;
1140
1140
  }
1141
- function getEntityTypes(annotation) {
1142
- if (Array.isArray(annotation.body)) {
1143
- const entityTags = [];
1144
- for (const item of annotation.body) {
1145
- if (typeof item === "object" && item !== null && "type" in item && "value" in item && "purpose" in item) {
1146
- const itemType = item.type;
1147
- const itemValue = item.value;
1148
- const itemPurpose = item.purpose;
1149
- if (itemType === "TextualBody" && itemPurpose === "tagging" && typeof itemValue === "string" && itemValue.length > 0) {
1150
- entityTags.push(itemValue);
1151
- }
1152
- }
1153
- }
1154
- return entityTags;
1155
- }
1156
- return [];
1157
- }
1158
1141
  function isHighlight(annotation) {
1159
1142
  return annotation.motivation === "highlighting";
1160
1143
  }
@@ -1178,24 +1161,6 @@ function getCommentText(annotation) {
1178
1161
  }
1179
1162
  return void 0;
1180
1163
  }
1181
- function getTagCategory(annotation) {
1182
- if (!isTag(annotation)) return void 0;
1183
- const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
1184
- const taggingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "tagging");
1185
- if (taggingBody && "value" in taggingBody) {
1186
- return taggingBody.value;
1187
- }
1188
- return void 0;
1189
- }
1190
- function getTagSchemaId(annotation) {
1191
- if (!isTag(annotation)) return void 0;
1192
- const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
1193
- const classifyingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "classifying");
1194
- if (classifyingBody && "value" in classifyingBody) {
1195
- return classifyingBody.value;
1196
- }
1197
- return void 0;
1198
- }
1199
1164
  function isStubReference(annotation) {
1200
1165
  return isReference(annotation) && !isBodyResolved(annotation.body);
1201
1166
  }
@@ -1671,6 +1636,49 @@ function getChecksum(resource) {
1671
1636
  function getLanguage(resource) {
1672
1637
  return getPrimaryRepresentation(resource)?.language;
1673
1638
  }
1639
+ function getStorageUri(resource) {
1640
+ return getPrimaryRepresentation(resource)?.storageUri;
1641
+ }
1642
+ function getCreator(resource) {
1643
+ if (!resource?.wasAttributedTo) return void 0;
1644
+ return Array.isArray(resource.wasAttributedTo) ? resource.wasAttributedTo[0] : resource.wasAttributedTo;
1645
+ }
1646
+ function getDerivedFrom(resource) {
1647
+ if (!resource?.wasDerivedFrom) return void 0;
1648
+ return Array.isArray(resource.wasDerivedFrom) ? resource.wasDerivedFrom[0] : resource.wasDerivedFrom;
1649
+ }
1650
+ function isArchived(resource) {
1651
+ return resource?.archived === true;
1652
+ }
1653
+ function getResourceEntityTypes(resource) {
1654
+ return resource?.entityTypes || [];
1655
+ }
1656
+ function isDraft(resource) {
1657
+ return resource?.isDraft === true;
1658
+ }
1659
+ function getNodeEncoding(charset) {
1660
+ const normalized = charset.toLowerCase().replace(/[-_]/g, "");
1661
+ const charsetMap = {
1662
+ "utf8": "utf8",
1663
+ "iso88591": "latin1",
1664
+ "latin1": "latin1",
1665
+ "ascii": "ascii",
1666
+ "usascii": "ascii",
1667
+ "utf16le": "utf16le",
1668
+ "ucs2": "ucs2",
1669
+ "binary": "binary",
1670
+ "windows1252": "latin1",
1671
+ // Windows-1252 is a superset of Latin-1
1672
+ "cp1252": "latin1"
1673
+ };
1674
+ return charsetMap[normalized] || "utf8";
1675
+ }
1676
+ function decodeRepresentation(buffer, mediaType) {
1677
+ const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
1678
+ const charset = (charsetMatch?.[1] || "utf-8").toLowerCase();
1679
+ const encoding = getNodeEncoding(charset);
1680
+ return buffer.toString(encoding);
1681
+ }
1674
1682
 
1675
1683
  // src/utils/svg-utils.ts
1676
1684
  function createRectangleSvg(start, end) {
@@ -1768,6 +1776,180 @@ function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHei
1768
1776
  return svg;
1769
1777
  }
1770
1778
 
1779
+ // src/utils/text-context.ts
1780
+ function extractContext(content, start, end) {
1781
+ const CONTEXT_LENGTH = 64;
1782
+ const MAX_EXTENSION = 32;
1783
+ let prefix;
1784
+ if (start > 0) {
1785
+ let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
1786
+ let extensionCount = 0;
1787
+ while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
1788
+ const char = content[prefixStart - 1];
1789
+ if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
1790
+ break;
1791
+ }
1792
+ prefixStart--;
1793
+ extensionCount++;
1794
+ }
1795
+ prefix = content.substring(prefixStart, start);
1796
+ }
1797
+ let suffix;
1798
+ if (end < content.length) {
1799
+ let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
1800
+ let extensionCount = 0;
1801
+ while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
1802
+ const char = content[suffixEnd];
1803
+ if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
1804
+ break;
1805
+ }
1806
+ suffixEnd++;
1807
+ extensionCount++;
1808
+ }
1809
+ suffix = content.substring(end, suffixEnd);
1810
+ }
1811
+ return { prefix, suffix };
1812
+ }
1813
+ function levenshteinDistance(str1, str2) {
1814
+ const len1 = str1.length;
1815
+ const len2 = str2.length;
1816
+ const matrix = [];
1817
+ for (let i = 0; i <= len1; i++) {
1818
+ matrix[i] = [i];
1819
+ }
1820
+ for (let j = 0; j <= len2; j++) {
1821
+ matrix[0][j] = j;
1822
+ }
1823
+ for (let i = 1; i <= len1; i++) {
1824
+ for (let j = 1; j <= len2; j++) {
1825
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
1826
+ const deletion = matrix[i - 1][j] + 1;
1827
+ const insertion = matrix[i][j - 1] + 1;
1828
+ const substitution = matrix[i - 1][j - 1] + cost;
1829
+ matrix[i][j] = Math.min(deletion, insertion, substitution);
1830
+ }
1831
+ }
1832
+ return matrix[len1][len2];
1833
+ }
1834
+ function findBestMatch(content, searchText, aiStart, aiEnd) {
1835
+ const maxFuzzyDistance = Math.max(5, Math.floor(searchText.length * 0.05));
1836
+ const exactIndex = content.indexOf(searchText);
1837
+ if (exactIndex !== -1) {
1838
+ return {
1839
+ start: exactIndex,
1840
+ end: exactIndex + searchText.length,
1841
+ matchQuality: "exact"
1842
+ };
1843
+ }
1844
+ console.log("[findBestMatch] Exact match failed, trying case-insensitive...");
1845
+ const lowerContent = content.toLowerCase();
1846
+ const lowerSearch = searchText.toLowerCase();
1847
+ const caseInsensitiveIndex = lowerContent.indexOf(lowerSearch);
1848
+ if (caseInsensitiveIndex !== -1) {
1849
+ console.log("[findBestMatch] Found case-insensitive match");
1850
+ return {
1851
+ start: caseInsensitiveIndex,
1852
+ end: caseInsensitiveIndex + searchText.length,
1853
+ matchQuality: "case-insensitive"
1854
+ };
1855
+ }
1856
+ console.log("[findBestMatch] Case-insensitive failed, trying fuzzy match...");
1857
+ const windowSize = searchText.length;
1858
+ const searchRadius = Math.min(500, content.length);
1859
+ const searchStart = Math.max(0, aiStart - searchRadius);
1860
+ const searchEnd = Math.min(content.length, aiEnd + searchRadius);
1861
+ let bestMatch = null;
1862
+ for (let i = searchStart; i <= searchEnd - windowSize; i++) {
1863
+ const candidate = content.substring(i, i + windowSize);
1864
+ const distance = levenshteinDistance(searchText, candidate);
1865
+ if (distance <= maxFuzzyDistance) {
1866
+ if (!bestMatch || distance < bestMatch.distance) {
1867
+ bestMatch = { start: i, distance };
1868
+ console.log(`[findBestMatch] Found fuzzy match at ${i} with distance ${distance}`);
1869
+ }
1870
+ }
1871
+ }
1872
+ if (bestMatch) {
1873
+ return {
1874
+ start: bestMatch.start,
1875
+ end: bestMatch.start + windowSize,
1876
+ matchQuality: "fuzzy"
1877
+ };
1878
+ }
1879
+ console.log("[findBestMatch] No acceptable match found");
1880
+ return null;
1881
+ }
1882
+ function validateAndCorrectOffsets(content, aiStart, aiEnd, exact) {
1883
+ const exactPreview = exact.length > 50 ? exact.substring(0, 50) + "..." : exact;
1884
+ const textAtOffset = content.substring(aiStart, aiEnd);
1885
+ if (textAtOffset === exact) {
1886
+ console.log(`[validateAndCorrectOffsets] \u2713 Offsets correct for: "${exactPreview}"`);
1887
+ const context2 = extractContext(content, aiStart, aiEnd);
1888
+ return {
1889
+ start: aiStart,
1890
+ end: aiEnd,
1891
+ exact,
1892
+ prefix: context2.prefix,
1893
+ suffix: context2.suffix,
1894
+ corrected: false,
1895
+ matchQuality: "exact"
1896
+ };
1897
+ }
1898
+ const foundPreview = textAtOffset.length > 50 ? textAtOffset.substring(0, 50) + "..." : textAtOffset;
1899
+ console.warn(
1900
+ `[validateAndCorrectOffsets] \u26A0 AI offset mismatch:
1901
+ Expected text: "${exactPreview}"
1902
+ Found at AI offset (${aiStart}-${aiEnd}): "${foundPreview}"
1903
+ Attempting multi-strategy search...`
1904
+ );
1905
+ const match = findBestMatch(content, exact, aiStart, aiEnd);
1906
+ if (!match) {
1907
+ const exactLong = exact.length > 100 ? exact.substring(0, 100) + "..." : exact;
1908
+ console.error(
1909
+ `[validateAndCorrectOffsets] \u2717 No acceptable match found:
1910
+ AI offsets: start=${aiStart}, end=${aiEnd}
1911
+ AI text: "${exactLong}"
1912
+ Text at AI offset: "${foundPreview}"
1913
+ All search strategies (exact, case-insensitive, fuzzy) failed.
1914
+ This suggests the AI hallucinated text that doesn't exist in the document.`
1915
+ );
1916
+ throw new Error(
1917
+ "Cannot find acceptable match for text in content. All search strategies failed. Text may be hallucinated."
1918
+ );
1919
+ }
1920
+ const actualText = content.substring(match.start, match.end);
1921
+ const actualPreview = actualText.length > 50 ? actualText.substring(0, 50) + "..." : actualText;
1922
+ const offsetDelta = match.start - aiStart;
1923
+ const matchSymbol = match.matchQuality === "exact" ? "\u2713" : match.matchQuality === "case-insensitive" ? "\u2248" : "~";
1924
+ console.warn(
1925
+ `[validateAndCorrectOffsets] ${matchSymbol} Found ${match.matchQuality} match:
1926
+ AI offsets: start=${aiStart}, end=${aiEnd}
1927
+ Corrected: start=${match.start}, end=${match.end}
1928
+ Offset delta: ${offsetDelta} characters
1929
+ Actual text: "${actualPreview}"`
1930
+ );
1931
+ if (match.matchQuality === "fuzzy") {
1932
+ console.warn(
1933
+ `[validateAndCorrectOffsets] Fuzzy match details:
1934
+ AI provided: "${exactPreview}"
1935
+ Found in doc: "${actualPreview}"
1936
+ Minor text differences detected - using document version`
1937
+ );
1938
+ }
1939
+ const context = extractContext(content, match.start, match.end);
1940
+ return {
1941
+ start: match.start,
1942
+ end: match.end,
1943
+ exact: actualText,
1944
+ // Use actual text from document, not AI's version
1945
+ prefix: context.prefix,
1946
+ suffix: context.suffix,
1947
+ corrected: true,
1948
+ fuzzyMatched: match.matchQuality !== "exact",
1949
+ matchQuality: match.matchQuality
1950
+ };
1951
+ }
1952
+
1771
1953
  // src/utils/text-encoding.ts
1772
1954
  function extractCharset(mediaType) {
1773
1955
  const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
@@ -1851,6 +2033,6 @@ function getMimeCategory(mimeType) {
1851
2033
  return "unsupported";
1852
2034
  }
1853
2035
 
1854
- export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getEntityTypes, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getExtensionForMimeType, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceId, getSvgSelector, getTagCategory, getTagSchemaId, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAssessment, isBodyResolved, isComment, isEventRelatedToAnnotation, isHighlight, isImageMimeType, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isTextMimeType, isValidEmail, jobId, mcpToken, normalizeCoordinates, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceUri, scaleSvgToNative, searchQuery, userDID, validateData, validateSvgMarkup, verifyPosition };
2036
+ export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, extractContext, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getExtensionForMimeType, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isEventRelatedToAnnotation, isHighlight, isImageMimeType, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isTextMimeType, isValidEmail, jobId, mcpToken, normalizeCoordinates, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceUri, scaleSvgToNative, searchQuery, userDID, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
1855
2037
  //# sourceMappingURL=index.js.map
1856
2038
  //# sourceMappingURL=index.js.map