npm - @semiont/api-client - Versions diffs - 0.2.28-build.38 → 0.2.28-build.40 - Mend

@semiont/api-client 0.2.28-build.38 → 0.2.28-build.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +6 -2
package/dist/{index-CBMGI-nS.d.ts → index--2zlsZdR.d.ts} +152 -23
package/dist/index.d.ts +2 -2
package/dist/index.js +218 -36
package/dist/index.js.map +1 -1
package/dist/utils/index.d.ts +1 -1
package/dist/utils/index.js +218 -36
package/dist/utils/index.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,5 +1,6 @@
 # @semiont/api-client
+[![Tests](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml/badge.svg)](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+api-client%22)
 [![npm version](https://img.shields.io/npm/v/@semiont/api-client.svg)](https://www.npmjs.com/package/@semiont/api-client)
 [![npm downloads](https://img.shields.io/npm/dm/@semiont/api-client.svg)](https://www.npmjs.com/package/@semiont/api-client)
 [![License](https://img.shields.io/npm/l/@semiont/api-client.svg)](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
@@ -104,12 +105,15 @@ const client = new SemiontApiClient({
 📖 **[API Reference](./docs/API-Reference.md)** - Complete method documentation
+🛠️ **[Utilities Guide](./docs/Utilities.md)** - Text encoding, fuzzy anchoring, SVG utilities
 ## Key Features
 - **Type-safe** - Generated from OpenAPI spec with branded types
-- **W3C compliant** - Web Annotation standard
+- **W3C compliant** - Web Annotation standard with fuzzy text matching
 - **Real-time** - SSE streaming for long operations
-- **Framework-agnostic** - Works everywhere JavaScript runs
+- **Framework-agnostic** - Pure TypeScript utilities work everywhere
+- **Character encoding** - Proper UTF-8, ISO-8859-1, Windows-1252 support
 ## Use Cases

package/dist/{index-CBMGI-nS.d.ts → index--2zlsZdR.d.ts} RENAMED Viewed

@@ -3314,14 +3314,6 @@ declare function getTargetSelector(target: Annotation$1['target']): {
  * Check if target has a selector
  */
 declare function hasTargetSelector(target: Annotation$1['target']): boolean;
-/**
- * Extract entity types from annotation bodies
- * Entity types are stored as TextualBody with purpose: "tagging"
- * Accepts any object with a body property matching Annotation['body']
- */
-declare function getEntityTypes(annotation: {
-    body: Annotation$1['body'];
-}): string[];
 /**
  * Type guard to check if an annotation is a highlight
  */
@@ -3348,20 +3340,6 @@ declare function isTag(annotation: Annotation$1): annotation is Annotation$1;
  * @returns The comment text, or undefined if not a comment or no text found
  */
 declare function getCommentText(annotation: Annotation$1): string | undefined;
-/**
- * Extract tag category from a tag annotation's body
- * Tags use dual-body structure: first body has purpose: "tagging" with category value
- * @param annotation - The annotation to extract category from
- * @returns The tag category (e.g., "Issue", "Rule"), or undefined if not a tag or no category found
- */
-declare function getTagCategory(annotation: Annotation$1): string | undefined;
-/**
- * Extract tag schema ID from a tag annotation's body
- * Tags use dual-body structure: second body has purpose: "classifying" with schema ID
- * @param annotation - The annotation to extract schema ID from
- * @returns The schema ID (e.g., "legal-irac"), or undefined if not a tag or no schema found
- */
-declare function getTagSchemaId(annotation: Annotation$1): string | undefined;
 /**
  * Type guard to check if a reference annotation is a stub (unresolved)
  * Stub if no SpecificResource in body array
@@ -3609,6 +3587,73 @@ declare function getChecksum(resource: ResourceDescriptor | undefined): string |
  * Get the language from the primary representation
  */
 declare function getLanguage(resource: ResourceDescriptor | undefined): string | undefined;
+/**
+ * Get storage URI from primary representation
+ *
+ * @param resource - ResourceDescriptor
+ * @returns Storage URI or undefined
+ */
+declare function getStorageUri(resource: ResourceDescriptor | undefined): string | undefined;
+/**
+ * Get creator agent from wasAttributedTo
+ * Handles both single agent and array of agents
+ *
+ * @param resource - ResourceDescriptor
+ * @returns First agent or undefined
+ */
+declare function getCreator(resource: ResourceDescriptor | undefined): components['schemas']['Agent'] | undefined;
+/**
+ * Get derived-from URI
+ * Handles both single URI and array of URIs
+ *
+ * @param resource - ResourceDescriptor
+ * @returns First derivation URI or undefined
+ */
+declare function getDerivedFrom(resource: ResourceDescriptor | undefined): string | undefined;
+/**
+ * Check if resource is archived (application-specific field)
+ *
+ * @param resource - ResourceDescriptor
+ * @returns True if archived, false otherwise
+ */
+declare function isArchived(resource: ResourceDescriptor | undefined): boolean;
+/**
+ * Get entity types from resource (application-specific field)
+ *
+ * @param resource - ResourceDescriptor
+ * @returns Array of entity types, empty if not set
+ */
+declare function getResourceEntityTypes(resource: ResourceDescriptor | undefined): string[];
+/**
+ * Check if resource is a draft (application-specific field)
+ *
+ * @param resource - ResourceDescriptor
+ * @returns True if draft, false otherwise
+ */
+declare function isDraft(resource: ResourceDescriptor | undefined): boolean;
+/**
+ * Map charset names to Node.js Buffer encoding names
+ * Node.js Buffer.toString() supports: 'utf8', 'utf16le', 'latin1', 'base64', 'hex', 'ascii', 'binary', 'ucs2'
+ *
+ * @param charset - Charset name (e.g., "UTF-8", "ISO-8859-1", "Windows-1252")
+ * @returns Node.js BufferEncoding
+ */
+declare function getNodeEncoding(charset: string): BufferEncoding;
+/**
+ * Decode a representation buffer to string using the correct charset
+ * Extracts charset from media type and uses appropriate encoding
+ *
+ * @param buffer - The raw representation data
+ * @param mediaType - Media type with optional charset (e.g., "text/plain; charset=iso-8859-1")
+ * @returns Decoded string
+ *
+ * @example
+ * ```typescript
+ * const content = decodeRepresentation(buffer, "text/plain; charset=utf-8");
+ * const legacy = decodeRepresentation(buffer, "text/plain; charset=windows-1252");
+ * ```
+ */
+declare function decodeRepresentation(buffer: Buffer, mediaType: string): string;
 /**
  * SVG Utility Functions
@@ -3654,6 +3699,90 @@ declare function normalizeCoordinates(point: Point, displayWidth: number, displa
  */
 declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
+/**
+ * Text context extraction utilities for W3C Web Annotation TextQuoteSelector
+ *
+ * Provides robust prefix/suffix context extraction with word boundary detection
+ * to ensure fuzzy anchoring works correctly when the same text appears multiple times.
+ *
+ * Also provides AI offset validation and correction for handling AI-generated annotations
+ * where the model may return slightly incorrect character offsets.
+ *
+ * @see https://www.w3.org/TR/annotation-model/#text-quote-selector
+ */
+/**
+ * Extract prefix and suffix context for TextQuoteSelector
+ *
+ * Extracts up to 64 characters before and after the selected text,
+ * extending to word boundaries to avoid cutting words in half.
+ * This ensures prefix/suffix are meaningful context for fuzzy anchoring.
+ *
+ * @param content - Full text content
+ * @param start - Start offset of selection
+ * @param end - End offset of selection
+ * @returns Object with prefix and suffix (undefined if at boundaries)
+ *
+ * @example
+ * ```typescript
+ * const content = "The United States Congress...";
+ * const context = extractContext(content, 4, 17); // "United States"
+ * // Returns: { prefix: "The ", suffix: " Congress..." }
+ * // NOT: { prefix: "nited ", suffix: "gress..." }
+ * ```
+ */
+declare function extractContext(content: string, start: number, end: number): {
+    prefix?: string;
+    suffix?: string;
+};
+/**
+ * Result of validating and correcting AI-provided annotation offsets
+ */
+interface ValidatedAnnotation {
+    start: number;
+    end: number;
+    exact: string;
+    prefix?: string;
+    suffix?: string;
+    corrected: boolean;
+    fuzzyMatched?: boolean;
+    matchQuality?: 'exact' | 'case-insensitive' | 'fuzzy';
+}
+/**
+ * Validate and correct AI-provided annotation offsets with fuzzy matching tolerance
+ *
+ * AI models sometimes return offsets that don't match the actual text position,
+ * or provide text with minor variations (case differences, whitespace, typos).
+ *
+ * This function uses a multi-strategy approach:
+ * 1. Check if AI's offsets are exactly correct
+ * 2. Try exact case-sensitive search
+ * 3. Try case-insensitive search
+ * 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
+ *
+ * This ensures we're maximally tolerant of AI errors while still maintaining
+ * annotation quality and logging what corrections were made.
+ *
+ * @param content - Full text content
+ * @param aiStart - Start offset from AI
+ * @param aiEnd - End offset from AI
+ * @param exact - The exact text that should be at this position (from AI)
+ * @returns Validated annotation with corrected offsets and context
+ * @throws Error if no acceptable match can be found
+ *
+ * @example
+ * ```typescript
+ * // AI said start=1143, but actual text is at 1161
+ * const result = validateAndCorrectOffsets(
+ *   content,
+ *   1143,
+ *   1289,
+ *   "the question \"whether..."
+ * );
+ * // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
+ * ```
+ */
+declare function validateAndCorrectOffsets(content: string, aiStart: number, aiEnd: number, exact: string): ValidatedAnnotation;
 /**
  * Text encoding utilities for consistent charset handling
  *
@@ -3745,4 +3874,4 @@ declare function validateData<T>(schema: {
  */
 declare function isValidEmail(email: string): boolean;
-export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C, getTagSchemaId as D, type EntityType as E, isStubReference as F, type GoogleCredential as G, isResolvedReference as H, getExactText as I, type JobId as J, getAnnotationExactText as K, getPrimarySelector as L, type Motivation as M, getTextPositionSelector as N, getTextQuoteSelector as O, getSvgSelector as P, validateSvgMarkup as Q, type ResourceUri as R, type SearchQuery as S, type TextPositionSelector as T, type UserDID as U, extractBoundingBox as V, type StoredEvent as W, type EventMetadata as X, type ResourceEventType as Y, getAnnotationUriFromEvent as Z, isEventRelatedToAnnotation as _, type AnnotationUri as a, isResourceEvent as a0, formatEventType as a1, getEventEmoji as a2, formatRelativeTime as a3, getEventDisplayContent as a4, getEventEntityTypes as a5, type ResourceCreationDetails as a6, getResourceCreationDetails as a7, type TextPosition as a8, findTextWithContext as a9, JWTTokenSchema as aA, validateData as aB, isValidEmail as aC, type AuthCode as aD, type MCPToken as aE, email as aF, authCode as aG, googleCredential as aH, accessToken as aI, refreshToken as aJ, mcpToken as aK, cloneToken as aL, jobId as aM, userDID as aN, entityType as aO, searchQuery as aP, baseUrl as aQ, resourceUri as aR, annotationUri as aS, resourceAnnotationUri as aT, type ResourceEvent as aU, verifyPosition as aa, type LocaleInfo as ab, LOCALES as ac, getLocaleInfo as ad, getLocaleNativeName as ae, getLocaleEnglishName as af, formatLocaleDisplay as ag, getAllLocaleCodes as ah, getResourceId as ai, getPrimaryRepresentation as aj, getPrimaryMediaType as ak, getChecksum as al, getLanguage as am, type Point as an, type BoundingBox as ao, createRectangleSvg as ap, createPolygonSvg as aq, createCircleSvg as ar, parseSvgSelector as as, normalizeCoordinates as at, scaleSvgToNative as au, extractCharset as av, decodeWithCharset as aw, type ValidationSuccess as ax, type ValidationFailure as ay, type ValidationResult as az, type Email as b, type components as c, type RefreshToken as d, type CloneToken as e, type ResourceAnnotationUri as f, type TextQuoteSelector as g, type SvgSelector as h, type Selector as i, getBodySource as j, getBodyType as k, isBodyResolved as l, getTargetSource as m, getTargetSelector as n, type operations as o, type paths as p, hasTargetSelector as q, getEntityTypes as r, isHighlight as s, isReference as t, isAssessment as u, isComment as v, type webhooks as w, isTag as x, getCommentText as y, getTagCategory as z };
+export { type $defs as $, type AccessToken as A, type BaseUrl as B, type ContentFormat as C, getExactText as D, type EntityType as E, getAnnotationExactText as F, type GoogleCredential as G, getPrimarySelector as H, getTextPositionSelector as I, type JobId as J, getTextQuoteSelector as K, getSvgSelector as L, type Motivation as M, validateSvgMarkup as N, extractBoundingBox as O, type StoredEvent as P, type EventMetadata as Q, type ResourceUri as R, type SearchQuery as S, type TextPositionSelector as T, type UserDID as U, type ResourceEventType as V, getAnnotationUriFromEvent as W, isEventRelatedToAnnotation as X, isResourceEvent as Y, formatEventType as Z, getEventEmoji as _, type AnnotationUri as a, resourceAnnotationUri as a$, formatRelativeTime as a0, getEventDisplayContent as a1, getEventEntityTypes as a2, type ResourceCreationDetails as a3, getResourceCreationDetails as a4, type TextPosition as a5, findTextWithContext as a6, verifyPosition as a7, type LocaleInfo as a8, LOCALES as a9, extractContext as aA, type ValidatedAnnotation as aB, validateAndCorrectOffsets as aC, extractCharset as aD, decodeWithCharset as aE, type ValidationSuccess as aF, type ValidationFailure as aG, type ValidationResult as aH, JWTTokenSchema as aI, validateData as aJ, isValidEmail as aK, type AuthCode as aL, type MCPToken as aM, email as aN, authCode as aO, googleCredential as aP, accessToken as aQ, refreshToken as aR, mcpToken as aS, cloneToken as aT, jobId as aU, userDID as aV, entityType as aW, searchQuery as aX, baseUrl as aY, resourceUri as aZ, annotationUri as a_, getLocaleInfo as aa, getLocaleNativeName as ab, getLocaleEnglishName as ac, formatLocaleDisplay as ad, getAllLocaleCodes as ae, getResourceId as af, getPrimaryRepresentation as ag, getPrimaryMediaType as ah, getChecksum as ai, getLanguage as aj, getStorageUri as ak, getCreator as al, getDerivedFrom as am, isArchived as an, getResourceEntityTypes as ao, isDraft as ap, getNodeEncoding as aq, decodeRepresentation as ar, type Point as as, type BoundingBox as at, createRectangleSvg as au, createPolygonSvg as av, createCircleSvg as aw, parseSvgSelector as ax, normalizeCoordinates as ay, scaleSvgToNative as az, type Email as b, type ResourceEvent as b0, type components as c, type RefreshToken as d, type CloneToken as e, type ResourceAnnotationUri as f, type TextQuoteSelector as g, type SvgSelector as h, type Selector as i, getBodySource as j, getBodyType as k, isBodyResolved as l, getTargetSource as m, getTargetSelector as n, type operations as o, type paths as p, hasTargetSelector as q, isHighlight as r, isReference as s, isAssessment as t, isComment as u, isTag as v, type webhooks as w, getCommentText as x, isStubReference as y, isResolvedReference as z };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index-CBMGI-nS.js';
-export { $ as $defs, aD as AuthCode, ao as BoundingBox, X as EventMetadata, aA as JWTTokenSchema, ac as LOCALES, ab as LocaleInfo, aE as MCPToken, an as Point, a6 as ResourceCreationDetails, Y as ResourceEventType, i as Selector, W as StoredEvent, h as SvgSelector, a8 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, ay as ValidationFailure, az as ValidationResult, ax as ValidationSuccess, aI as accessToken, aS as annotationUri, aG as authCode, aQ as baseUrl, aL as cloneToken, ar as createCircleSvg, aq as createPolygonSvg, ap as createRectangleSvg, aw as decodeWithCharset, aF as email, aO as entityType, V as extractBoundingBox, av as extractCharset, a9 as findTextWithContext, a1 as formatEventType, ag as formatLocaleDisplay, a3 as formatRelativeTime, ah as getAllLocaleCodes, K as getAnnotationExactText, Z as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, al as getChecksum, y as getCommentText, r as getEntityTypes, a4 as getEventDisplayContent, a2 as getEventEmoji, a5 as getEventEntityTypes, I as getExactText, am as getLanguage, af as getLocaleEnglishName, ad as getLocaleInfo, ae as getLocaleNativeName, ak as getPrimaryMediaType, aj as getPrimaryRepresentation, L as getPrimarySelector, a7 as getResourceCreationDetails, ai as getResourceId, P as getSvgSelector, z as getTagCategory, D as getTagSchemaId, n as getTargetSelector, m as getTargetSource, N as getTextPositionSelector, O as getTextQuoteSelector, aH as googleCredential, q as hasTargetSelector, u as isAssessment, l as isBodyResolved, v as isComment, _ as isEventRelatedToAnnotation, s as isHighlight, t as isReference, H as isResolvedReference, a0 as isResourceEvent, F as isStubReference, x as isTag, aC as isValidEmail, aM as jobId, aK as mcpToken, at as normalizeCoordinates, o as operations, as as parseSvgSelector, aJ as refreshToken, aT as resourceAnnotationUri, aR as resourceUri, au as scaleSvgToNative, aP as searchQuery, aN as userDID, aB as validateData, Q as validateSvgMarkup, aa as verifyPosition, w as webhooks } from './index-CBMGI-nS.js';
+import { B as BaseUrl, A as AccessToken, R as ResourceUri, E as EntityType, a as AnnotationUri, c as components, b as Email, p as paths, d as RefreshToken, G as GoogleCredential, C as ContentFormat, S as SearchQuery, e as CloneToken, f as ResourceAnnotationUri, M as Motivation, U as UserDID, J as JobId } from './index--2zlsZdR.js';
+export { $ as $defs, aL as AuthCode, at as BoundingBox, Q as EventMetadata, aI as JWTTokenSchema, a9 as LOCALES, a8 as LocaleInfo, aM as MCPToken, as as Point, a3 as ResourceCreationDetails, V as ResourceEventType, i as Selector, P as StoredEvent, h as SvgSelector, a5 as TextPosition, T as TextPositionSelector, g as TextQuoteSelector, aB as ValidatedAnnotation, aG as ValidationFailure, aH as ValidationResult, aF as ValidationSuccess, aQ as accessToken, a_ as annotationUri, aO as authCode, aY as baseUrl, aT as cloneToken, aw as createCircleSvg, av as createPolygonSvg, au as createRectangleSvg, ar as decodeRepresentation, aE as decodeWithCharset, aN as email, aW as entityType, O as extractBoundingBox, aD as extractCharset, aA as extractContext, a6 as findTextWithContext, Z as formatEventType, ad as formatLocaleDisplay, a0 as formatRelativeTime, ae as getAllLocaleCodes, F as getAnnotationExactText, W as getAnnotationUriFromEvent, j as getBodySource, k as getBodyType, ai as getChecksum, x as getCommentText, al as getCreator, am as getDerivedFrom, a1 as getEventDisplayContent, _ as getEventEmoji, a2 as getEventEntityTypes, D as getExactText, aj as getLanguage, ac as getLocaleEnglishName, aa as getLocaleInfo, ab as getLocaleNativeName, aq as getNodeEncoding, ah as getPrimaryMediaType, ag as getPrimaryRepresentation, H as getPrimarySelector, a4 as getResourceCreationDetails, ao as getResourceEntityTypes, af as getResourceId, ak as getStorageUri, L as getSvgSelector, n as getTargetSelector, m as getTargetSource, I as getTextPositionSelector, K as getTextQuoteSelector, aP as googleCredential, q as hasTargetSelector, an as isArchived, t as isAssessment, l as isBodyResolved, u as isComment, ap as isDraft, X as isEventRelatedToAnnotation, r as isHighlight, s as isReference, z as isResolvedReference, Y as isResourceEvent, y as isStubReference, v as isTag, aK as isValidEmail, aU as jobId, aS as mcpToken, ay as normalizeCoordinates, o as operations, ax as parseSvgSelector, aR as refreshToken, a$ as resourceAnnotationUri, aZ as resourceUri, az as scaleSvgToNative, aX as searchQuery, aV as userDID, aC as validateAndCorrectOffsets, aJ as validateData, N as validateSvgMarkup, a7 as verifyPosition, w as webhooks } from './index--2zlsZdR.js';
 /**
  * TypeScript types for Server-Sent Events (SSE) streaming

package/dist/index.js CHANGED Viewed

@@ -1138,23 +1138,6 @@ function getTargetSelector(target) {
 function hasTargetSelector(target) {
   return typeof target !== "string" && target.selector !== void 0;
 }
-function getEntityTypes(annotation) {
-  if (Array.isArray(annotation.body)) {
-    const entityTags = [];
-    for (const item of annotation.body) {
-      if (typeof item === "object" && item !== null && "type" in item && "value" in item && "purpose" in item) {
-        const itemType = item.type;
-        const itemValue = item.value;
-        const itemPurpose = item.purpose;
-        if (itemType === "TextualBody" && itemPurpose === "tagging" && typeof itemValue === "string" && itemValue.length > 0) {
-          entityTags.push(itemValue);
-        }
-      }
-    }
-    return entityTags;
-  }
-  return [];
-}
 function isHighlight(annotation) {
   return annotation.motivation === "highlighting";
 }
@@ -1178,24 +1161,6 @@ function getCommentText(annotation) {
   }
   return void 0;
 }
-function getTagCategory(annotation) {
-  if (!isTag(annotation)) return void 0;
-  const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
-  const taggingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "tagging");
-  if (taggingBody && "value" in taggingBody) {
-    return taggingBody.value;
-  }
-  return void 0;
-}
-function getTagSchemaId(annotation) {
-  if (!isTag(annotation)) return void 0;
-  const bodies = Array.isArray(annotation.body) ? annotation.body : [annotation.body];
-  const classifyingBody = bodies.find((b) => b && "purpose" in b && b.purpose === "classifying");
-  if (classifyingBody && "value" in classifyingBody) {
-    return classifyingBody.value;
-  }
-  return void 0;
-}
 function isStubReference(annotation) {
   return isReference(annotation) && !isBodyResolved(annotation.body);
 }
@@ -1671,6 +1636,49 @@ function getChecksum(resource) {
 function getLanguage(resource) {
   return getPrimaryRepresentation(resource)?.language;
 }
+function getStorageUri(resource) {
+  return getPrimaryRepresentation(resource)?.storageUri;
+}
+function getCreator(resource) {
+  if (!resource?.wasAttributedTo) return void 0;
+  return Array.isArray(resource.wasAttributedTo) ? resource.wasAttributedTo[0] : resource.wasAttributedTo;
+}
+function getDerivedFrom(resource) {
+  if (!resource?.wasDerivedFrom) return void 0;
+  return Array.isArray(resource.wasDerivedFrom) ? resource.wasDerivedFrom[0] : resource.wasDerivedFrom;
+}
+function isArchived(resource) {
+  return resource?.archived === true;
+}
+function getResourceEntityTypes(resource) {
+  return resource?.entityTypes || [];
+}
+function isDraft(resource) {
+  return resource?.isDraft === true;
+}
+function getNodeEncoding(charset) {
+  const normalized = charset.toLowerCase().replace(/[-_]/g, "");
+  const charsetMap = {
+    "utf8": "utf8",
+    "iso88591": "latin1",
+    "latin1": "latin1",
+    "ascii": "ascii",
+    "usascii": "ascii",
+    "utf16le": "utf16le",
+    "ucs2": "ucs2",
+    "binary": "binary",
+    "windows1252": "latin1",
+    // Windows-1252 is a superset of Latin-1
+    "cp1252": "latin1"
+  };
+  return charsetMap[normalized] || "utf8";
+}
+function decodeRepresentation(buffer, mediaType) {
+  const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
+  const charset = (charsetMatch?.[1] || "utf-8").toLowerCase();
+  const encoding = getNodeEncoding(charset);
+  return buffer.toString(encoding);
+}
 // src/utils/svg-utils.ts
 function createRectangleSvg(start, end) {
@@ -1768,6 +1776,180 @@ function scaleSvgToNative(svg, displayWidth, displayHeight, imageWidth, imageHei
   return svg;
 }
+// src/utils/text-context.ts
+function extractContext(content, start, end) {
+  const CONTEXT_LENGTH = 64;
+  const MAX_EXTENSION = 32;
+  let prefix;
+  if (start > 0) {
+    let prefixStart = Math.max(0, start - CONTEXT_LENGTH);
+    let extensionCount = 0;
+    while (prefixStart > 0 && extensionCount < MAX_EXTENSION) {
+      const char = content[prefixStart - 1];
+      if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
+        break;
+      }
+      prefixStart--;
+      extensionCount++;
+    }
+    prefix = content.substring(prefixStart, start);
+  }
+  let suffix;
+  if (end < content.length) {
+    let suffixEnd = Math.min(content.length, end + CONTEXT_LENGTH);
+    let extensionCount = 0;
+    while (suffixEnd < content.length && extensionCount < MAX_EXTENSION) {
+      const char = content[suffixEnd];
+      if (!char || /[\s.,;:!?'"()\[\]{}<>\/\\]/.test(char)) {
+        break;
+      }
+      suffixEnd++;
+      extensionCount++;
+    }
+    suffix = content.substring(end, suffixEnd);
+  }
+  return { prefix, suffix };
+}
+function levenshteinDistance(str1, str2) {
+  const len1 = str1.length;
+  const len2 = str2.length;
+  const matrix = [];
+  for (let i = 0; i <= len1; i++) {
+    matrix[i] = [i];
+  }
+  for (let j = 0; j <= len2; j++) {
+    matrix[0][j] = j;
+  }
+  for (let i = 1; i <= len1; i++) {
+    for (let j = 1; j <= len2; j++) {
+      const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
+      const deletion = matrix[i - 1][j] + 1;
+      const insertion = matrix[i][j - 1] + 1;
+      const substitution = matrix[i - 1][j - 1] + cost;
+      matrix[i][j] = Math.min(deletion, insertion, substitution);
+    }
+  }
+  return matrix[len1][len2];
+}
+function findBestMatch(content, searchText, aiStart, aiEnd) {
+  const maxFuzzyDistance = Math.max(5, Math.floor(searchText.length * 0.05));
+  const exactIndex = content.indexOf(searchText);
+  if (exactIndex !== -1) {
+    return {
+      start: exactIndex,
+      end: exactIndex + searchText.length,
+      matchQuality: "exact"
+    };
+  }
+  console.log("[findBestMatch] Exact match failed, trying case-insensitive...");
+  const lowerContent = content.toLowerCase();
+  const lowerSearch = searchText.toLowerCase();
+  const caseInsensitiveIndex = lowerContent.indexOf(lowerSearch);
+  if (caseInsensitiveIndex !== -1) {
+    console.log("[findBestMatch] Found case-insensitive match");
+    return {
+      start: caseInsensitiveIndex,
+      end: caseInsensitiveIndex + searchText.length,
+      matchQuality: "case-insensitive"
+    };
+  }
+  console.log("[findBestMatch] Case-insensitive failed, trying fuzzy match...");
+  const windowSize = searchText.length;
+  const searchRadius = Math.min(500, content.length);
+  const searchStart = Math.max(0, aiStart - searchRadius);
+  const searchEnd = Math.min(content.length, aiEnd + searchRadius);
+  let bestMatch = null;
+  for (let i = searchStart; i <= searchEnd - windowSize; i++) {
+    const candidate = content.substring(i, i + windowSize);
+    const distance = levenshteinDistance(searchText, candidate);
+    if (distance <= maxFuzzyDistance) {
+      if (!bestMatch || distance < bestMatch.distance) {
+        bestMatch = { start: i, distance };
+        console.log(`[findBestMatch] Found fuzzy match at ${i} with distance ${distance}`);
+      }
+    }
+  }
+  if (bestMatch) {
+    return {
+      start: bestMatch.start,
+      end: bestMatch.start + windowSize,
+      matchQuality: "fuzzy"
+    };
+  }
+  console.log("[findBestMatch] No acceptable match found");
+  return null;
+}
+function validateAndCorrectOffsets(content, aiStart, aiEnd, exact) {
+  const exactPreview = exact.length > 50 ? exact.substring(0, 50) + "..." : exact;
+  const textAtOffset = content.substring(aiStart, aiEnd);
+  if (textAtOffset === exact) {
+    console.log(`[validateAndCorrectOffsets] \u2713 Offsets correct for: "${exactPreview}"`);
+    const context2 = extractContext(content, aiStart, aiEnd);
+    return {
+      start: aiStart,
+      end: aiEnd,
+      exact,
+      prefix: context2.prefix,
+      suffix: context2.suffix,
+      corrected: false,
+      matchQuality: "exact"
+    };
+  }
+  const foundPreview = textAtOffset.length > 50 ? textAtOffset.substring(0, 50) + "..." : textAtOffset;
+  console.warn(
+    `[validateAndCorrectOffsets] \u26A0 AI offset mismatch:
+  Expected text: "${exactPreview}"
+  Found at AI offset (${aiStart}-${aiEnd}): "${foundPreview}"
+  Attempting multi-strategy search...`
+  );
+  const match = findBestMatch(content, exact, aiStart, aiEnd);
+  if (!match) {
+    const exactLong = exact.length > 100 ? exact.substring(0, 100) + "..." : exact;
+    console.error(
+      `[validateAndCorrectOffsets] \u2717 No acceptable match found:
+  AI offsets: start=${aiStart}, end=${aiEnd}
+  AI text: "${exactLong}"
+  Text at AI offset: "${foundPreview}"
+  All search strategies (exact, case-insensitive, fuzzy) failed.
+  This suggests the AI hallucinated text that doesn't exist in the document.`
+    );
+    throw new Error(
+      "Cannot find acceptable match for text in content. All search strategies failed. Text may be hallucinated."
+    );
+  }
+  const actualText = content.substring(match.start, match.end);
+  const actualPreview = actualText.length > 50 ? actualText.substring(0, 50) + "..." : actualText;
+  const offsetDelta = match.start - aiStart;
+  const matchSymbol = match.matchQuality === "exact" ? "\u2713" : match.matchQuality === "case-insensitive" ? "\u2248" : "~";
+  console.warn(
+    `[validateAndCorrectOffsets] ${matchSymbol} Found ${match.matchQuality} match:
+  AI offsets: start=${aiStart}, end=${aiEnd}
+  Corrected: start=${match.start}, end=${match.end}
+  Offset delta: ${offsetDelta} characters
+  Actual text: "${actualPreview}"`
+  );
+  if (match.matchQuality === "fuzzy") {
+    console.warn(
+      `[validateAndCorrectOffsets] Fuzzy match details:
+  AI provided: "${exactPreview}"
+  Found in doc: "${actualPreview}"
+  Minor text differences detected - using document version`
+    );
+  }
+  const context = extractContext(content, match.start, match.end);
+  return {
+    start: match.start,
+    end: match.end,
+    exact: actualText,
+    // Use actual text from document, not AI's version
+    prefix: context.prefix,
+    suffix: context.suffix,
+    corrected: true,
+    fuzzyMatched: match.matchQuality !== "exact",
+    matchQuality: match.matchQuality
+  };
+}
 // src/utils/text-encoding.ts
 function extractCharset(mediaType) {
   const charsetMatch = mediaType.match(/charset=([^\s;]+)/i);
@@ -1851,6 +2033,6 @@ function getMimeCategory(mimeType) {
   return "unsupported";
 }
-export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getEntityTypes, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getExtensionForMimeType, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceId, getSvgSelector, getTagCategory, getTagSchemaId, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAssessment, isBodyResolved, isComment, isEventRelatedToAnnotation, isHighlight, isImageMimeType, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isTextMimeType, isValidEmail, jobId, mcpToken, normalizeCoordinates, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceUri, scaleSvgToNative, searchQuery, userDID, validateData, validateSvgMarkup, verifyPosition };
+export { APIError, JWTTokenSchema, LOCALES, SSEClient, SemiontApiClient, accessToken, annotationUri, authCode, baseUrl, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, email, entityType, extractBoundingBox, extractCharset, extractContext, findTextWithContext, formatEventType, formatLocaleDisplay, formatRelativeTime, getAllLocaleCodes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getEventDisplayContent, getEventEmoji, getEventEntityTypes, getExactText, getExtensionForMimeType, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceCreationDetails, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isEventRelatedToAnnotation, isHighlight, isImageMimeType, isReference, isResolvedReference, isResourceEvent, isStubReference, isTag, isTextMimeType, isValidEmail, jobId, mcpToken, normalizeCoordinates, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceUri, scaleSvgToNative, searchQuery, userDID, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map