@semiont/api-client 0.2.33 → 0.2.34-build.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/dist/index.d.ts +26 -207
- package/dist/index.js +52 -113
- package/dist/index.js.map +1 -1
- package/dist/utils/index.d.ts +603 -1
- package/dist/utils/index.js +1 -7
- package/dist/utils/index.js.map +1 -1
- package/package.json +3 -7
- package/dist/index-CJqmerJr.d.ts +0 -3859
package/dist/utils/index.d.ts
CHANGED
|
@@ -1 +1,603 @@
|
|
|
1
|
-
|
|
1
|
+
import { components, ResourceUri } from '@semiont/core';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Annotation and Selector Utilities
|
|
5
|
+
*
|
|
6
|
+
* Pure TypeScript utilities for working with W3C Web Annotations.
|
|
7
|
+
* No React dependencies - safe to use in any JavaScript environment.
|
|
8
|
+
*
|
|
9
|
+
* Body is either empty array (stub) or single SpecificResource (resolved)
|
|
10
|
+
* Body can be array of TextualBody (tagging) + SpecificResource (linking)
|
|
11
|
+
* Target can be simple string IRI or object with source and optional selector
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
type Annotation = components['schemas']['Annotation'];
|
|
15
|
+
type HighlightAnnotation = Annotation;
|
|
16
|
+
type ReferenceAnnotation = Annotation;
|
|
17
|
+
type TextPositionSelector = components['schemas']['TextPositionSelector'];
|
|
18
|
+
type TextQuoteSelector = components['schemas']['TextQuoteSelector'];
|
|
19
|
+
type SvgSelector = components['schemas']['SvgSelector'];
|
|
20
|
+
type FragmentSelector = components['schemas']['FragmentSelector'];
|
|
21
|
+
type Selector = TextPositionSelector | TextQuoteSelector | SvgSelector | FragmentSelector;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Get the source from an annotation body (null if stub)
|
|
25
|
+
* Search for SpecificResource in body array
|
|
26
|
+
*/
|
|
27
|
+
declare function getBodySource(body: Annotation['body']): ResourceUri | null;
|
|
28
|
+
/**
|
|
29
|
+
* Get the type from an annotation body (returns first body type in array)
|
|
30
|
+
*/
|
|
31
|
+
declare function getBodyType(body: Annotation['body']): 'TextualBody' | 'SpecificResource' | null;
|
|
32
|
+
/**
|
|
33
|
+
* Check if body is resolved (has a source)
|
|
34
|
+
* Check for SpecificResource in body array
|
|
35
|
+
*/
|
|
36
|
+
declare function isBodyResolved(body: Annotation['body']): boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Get the source IRI from target (handles both string and object forms)
|
|
39
|
+
*/
|
|
40
|
+
declare function getTargetSource(target: Annotation['target']): ResourceUri;
|
|
41
|
+
/**
|
|
42
|
+
* Get the selector from target (undefined if string or no selector)
|
|
43
|
+
*/
|
|
44
|
+
declare function getTargetSelector(target: Annotation['target']): {
|
|
45
|
+
type: "TextPositionSelector";
|
|
46
|
+
start: number;
|
|
47
|
+
end: number;
|
|
48
|
+
} | {
|
|
49
|
+
type: "TextQuoteSelector";
|
|
50
|
+
exact: string;
|
|
51
|
+
prefix?: string;
|
|
52
|
+
suffix?: string;
|
|
53
|
+
} | {
|
|
54
|
+
type: "SvgSelector";
|
|
55
|
+
value: string;
|
|
56
|
+
} | {
|
|
57
|
+
type: "FragmentSelector";
|
|
58
|
+
value: string;
|
|
59
|
+
conformsTo?: string;
|
|
60
|
+
} | ({
|
|
61
|
+
type: "TextPositionSelector";
|
|
62
|
+
start: number;
|
|
63
|
+
end: number;
|
|
64
|
+
} | {
|
|
65
|
+
type: "TextQuoteSelector";
|
|
66
|
+
exact: string;
|
|
67
|
+
prefix?: string;
|
|
68
|
+
suffix?: string;
|
|
69
|
+
} | {
|
|
70
|
+
type: "SvgSelector";
|
|
71
|
+
value: string;
|
|
72
|
+
} | {
|
|
73
|
+
type: "FragmentSelector";
|
|
74
|
+
value: string;
|
|
75
|
+
conformsTo?: string;
|
|
76
|
+
})[] | undefined;
|
|
77
|
+
/**
|
|
78
|
+
* Check if target has a selector
|
|
79
|
+
*/
|
|
80
|
+
declare function hasTargetSelector(target: Annotation['target']): boolean;
|
|
81
|
+
/**
|
|
82
|
+
* Type guard to check if an annotation is a highlight
|
|
83
|
+
*/
|
|
84
|
+
declare function isHighlight(annotation: Annotation): annotation is HighlightAnnotation;
|
|
85
|
+
/**
|
|
86
|
+
* Type guard to check if an annotation is a reference (linking)
|
|
87
|
+
*/
|
|
88
|
+
declare function isReference(annotation: Annotation): annotation is ReferenceAnnotation;
|
|
89
|
+
/**
|
|
90
|
+
* Type guard to check if an annotation is an assessment
|
|
91
|
+
*/
|
|
92
|
+
declare function isAssessment(annotation: Annotation): annotation is Annotation;
|
|
93
|
+
/**
|
|
94
|
+
* Type guard to check if an annotation is a comment
|
|
95
|
+
*/
|
|
96
|
+
declare function isComment(annotation: Annotation): annotation is Annotation;
|
|
97
|
+
/**
|
|
98
|
+
* Type guard to check if an annotation is a tag
|
|
99
|
+
*/
|
|
100
|
+
declare function isTag(annotation: Annotation): annotation is Annotation;
|
|
101
|
+
/**
|
|
102
|
+
* Extract comment text from a comment annotation's body
|
|
103
|
+
* @param annotation - The annotation to extract comment text from
|
|
104
|
+
* @returns The comment text, or undefined if not a comment or no text found
|
|
105
|
+
*/
|
|
106
|
+
declare function getCommentText(annotation: Annotation): string | undefined;
|
|
107
|
+
/**
|
|
108
|
+
* Type guard to check if a reference annotation is a stub (unresolved)
|
|
109
|
+
* Stub if no SpecificResource in body array
|
|
110
|
+
*/
|
|
111
|
+
declare function isStubReference(annotation: Annotation): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* Type guard to check if a reference annotation is resolved
|
|
114
|
+
* Resolved if SpecificResource exists in body array
|
|
115
|
+
*/
|
|
116
|
+
declare function isResolvedReference(annotation: Annotation): annotation is ReferenceAnnotation;
|
|
117
|
+
/**
|
|
118
|
+
* Get the exact text from a selector (single or array)
|
|
119
|
+
*
|
|
120
|
+
* When selector is an array, tries to find a TextQuoteSelector (which has exact text).
|
|
121
|
+
* TextPositionSelector does not have exact text, only character offsets.
|
|
122
|
+
* Handles undefined selector (when target is a string IRI with no selector)
|
|
123
|
+
*/
|
|
124
|
+
declare function getExactText(selector: Selector | Selector[] | undefined): string;
|
|
125
|
+
/**
|
|
126
|
+
* Get the exact text from an annotation's target selector
|
|
127
|
+
* Uses getTargetSelector helper to safely get selector
|
|
128
|
+
*/
|
|
129
|
+
declare function getAnnotationExactText(annotation: Annotation): string;
|
|
130
|
+
/**
|
|
131
|
+
* Get the primary selector from a selector (single or array)
|
|
132
|
+
*
|
|
133
|
+
* When selector is an array, returns the first selector.
|
|
134
|
+
* When selector is a single object, returns it as-is.
|
|
135
|
+
*/
|
|
136
|
+
declare function getPrimarySelector(selector: Selector | Selector[]): Selector;
|
|
137
|
+
/**
|
|
138
|
+
* Get TextPositionSelector from a selector (single or array)
|
|
139
|
+
*
|
|
140
|
+
* Returns the first TextPositionSelector found, or null if none exists.
|
|
141
|
+
* Handles undefined selector (when target is a string IRI with no selector)
|
|
142
|
+
*/
|
|
143
|
+
declare function getTextPositionSelector(selector: Selector | Selector[] | undefined): TextPositionSelector | null;
|
|
144
|
+
/**
|
|
145
|
+
* Get TextQuoteSelector from a selector (single or array)
|
|
146
|
+
*
|
|
147
|
+
* Returns the first TextQuoteSelector found, or null if none exists.
|
|
148
|
+
*/
|
|
149
|
+
declare function getTextQuoteSelector(selector: Selector | Selector[]): TextQuoteSelector | null;
|
|
150
|
+
/**
|
|
151
|
+
* Get SvgSelector from a selector (single or array)
|
|
152
|
+
*
|
|
153
|
+
* Returns the first SvgSelector found, or null if none exists.
|
|
154
|
+
*/
|
|
155
|
+
declare function getSvgSelector(selector: Selector | Selector[] | undefined): SvgSelector | null;
|
|
156
|
+
/**
|
|
157
|
+
* Get FragmentSelector from a selector (single or array)
|
|
158
|
+
*
|
|
159
|
+
* Returns the first FragmentSelector found, or null if none exists.
|
|
160
|
+
*/
|
|
161
|
+
declare function getFragmentSelector(selector: Selector | Selector[] | undefined): FragmentSelector | null;
|
|
162
|
+
/**
|
|
163
|
+
* Validate SVG markup for W3C compliance
|
|
164
|
+
*
|
|
165
|
+
* Checks that:
|
|
166
|
+
* - SVG contains xmlns attribute
|
|
167
|
+
* - SVG is well-formed XML
|
|
168
|
+
* - SVG contains at least one shape element
|
|
169
|
+
*
|
|
170
|
+
* @returns null if valid, error message if invalid
|
|
171
|
+
*/
|
|
172
|
+
declare function validateSvgMarkup(svg: string): string | null;
|
|
173
|
+
/**
|
|
174
|
+
* Extract bounding box from SVG markup
|
|
175
|
+
*
|
|
176
|
+
* Attempts to extract x, y, width, height from the SVG viewBox or root element.
|
|
177
|
+
* Returns null if bounding box cannot be determined.
|
|
178
|
+
*/
|
|
179
|
+
declare function extractBoundingBox(svg: string): {
|
|
180
|
+
x: number;
|
|
181
|
+
y: number;
|
|
182
|
+
width: number;
|
|
183
|
+
height: number;
|
|
184
|
+
} | null;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Fuzzy Anchoring for W3C Web Annotation TextQuoteSelector
|
|
188
|
+
*
|
|
189
|
+
* Uses prefix/suffix context to disambiguate when the same text appears multiple times.
|
|
190
|
+
* Implements fuzzy matching as specified in the W3C Web Annotation Data Model.
|
|
191
|
+
*
|
|
192
|
+
* @see https://www.w3.org/TR/annotation-model/#text-quote-selector
|
|
193
|
+
*/
|
|
194
|
+
interface TextPosition {
|
|
195
|
+
start: number;
|
|
196
|
+
end: number;
|
|
197
|
+
}
|
|
198
|
+
type MatchQuality = 'exact' | 'normalized' | 'case-insensitive' | 'fuzzy';
|
|
199
|
+
/**
|
|
200
|
+
* Normalize text for comparison - handles common document editing changes
|
|
201
|
+
*
|
|
202
|
+
* Collapses whitespace, converts curly quotes to straight quotes,
|
|
203
|
+
* and normalizes common punctuation variations.
|
|
204
|
+
*/
|
|
205
|
+
declare function normalizeText(text: string): string;
|
|
206
|
+
/**
|
|
207
|
+
* Find best match for text in content using multi-strategy search
|
|
208
|
+
*
|
|
209
|
+
* Shared core logic used by both findTextWithContext and validateAndCorrectOffsets.
|
|
210
|
+
*
|
|
211
|
+
* @param content - Full text content to search within
|
|
212
|
+
* @param searchText - The text to find
|
|
213
|
+
* @param positionHint - Optional hint for where to search (TextPositionSelector.start)
|
|
214
|
+
* @returns Match with position and quality, or null if not found
|
|
215
|
+
*/
|
|
216
|
+
declare function findBestTextMatch(content: string, searchText: string, positionHint?: number): {
|
|
217
|
+
start: number;
|
|
218
|
+
end: number;
|
|
219
|
+
matchQuality: MatchQuality;
|
|
220
|
+
} | null;
|
|
221
|
+
/**
|
|
222
|
+
* Find text using exact match with optional prefix/suffix context
|
|
223
|
+
*
|
|
224
|
+
* When the exact text appears multiple times in the content, prefix and suffix
|
|
225
|
+
* are used to disambiguate and find the correct occurrence.
|
|
226
|
+
*
|
|
227
|
+
* If exact text is not found, uses multi-strategy fuzzy matching (normalization,
|
|
228
|
+
* case-insensitive, Levenshtein distance) to locate changed text.
|
|
229
|
+
*
|
|
230
|
+
* @param content - Full text content to search within
|
|
231
|
+
* @param exact - The exact text to find
|
|
232
|
+
* @param prefix - Optional text that should appear immediately before the match
|
|
233
|
+
* @param suffix - Optional text that should appear immediately after the match
|
|
234
|
+
* @param positionHint - Optional position hint (from TextPositionSelector) for fuzzy search
|
|
235
|
+
* @returns Position of the matched text, or null if not found
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const content = "The cat sat. The cat ran.";
|
|
240
|
+
* // Find second "The cat" occurrence
|
|
241
|
+
* const pos = findTextWithContext(content, "The cat", "sat. ", " ran");
|
|
242
|
+
* // Returns { start: 13, end: 20 }
|
|
243
|
+
* ```
|
|
244
|
+
*/
|
|
245
|
+
declare function findTextWithContext(content: string, exact: string, prefix?: string, suffix?: string, positionHint?: number): TextPosition | null;
|
|
246
|
+
/**
|
|
247
|
+
* Verify that a position correctly points to the exact text
|
|
248
|
+
* Useful for debugging and validation
|
|
249
|
+
*/
|
|
250
|
+
declare function verifyPosition(content: string, position: TextPosition, expectedExact: string): boolean;
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Locale information
|
|
254
|
+
* Copied from SDK for frontend use
|
|
255
|
+
*/
|
|
256
|
+
interface LocaleInfo {
|
|
257
|
+
code: string;
|
|
258
|
+
nativeName: string;
|
|
259
|
+
englishName: string;
|
|
260
|
+
}
|
|
261
|
+
declare const LOCALES: readonly LocaleInfo[];
|
|
262
|
+
/**
|
|
263
|
+
* Get locale information by code
|
|
264
|
+
*/
|
|
265
|
+
declare function getLocaleInfo(code: string | undefined): LocaleInfo | undefined;
|
|
266
|
+
/**
|
|
267
|
+
* Get the native name of a language by its locale code
|
|
268
|
+
*/
|
|
269
|
+
declare function getLocaleNativeName(code: string | undefined): string | undefined;
|
|
270
|
+
/**
|
|
271
|
+
* Get the English name of a language by its locale code
|
|
272
|
+
*/
|
|
273
|
+
declare function getLocaleEnglishName(code: string | undefined): string | undefined;
|
|
274
|
+
/**
|
|
275
|
+
* Format locale code for display as "Native Name (code)"
|
|
276
|
+
*/
|
|
277
|
+
declare function formatLocaleDisplay(code: string | undefined): string | undefined;
|
|
278
|
+
/**
|
|
279
|
+
* Get all supported locale codes
|
|
280
|
+
*/
|
|
281
|
+
declare function getAllLocaleCodes(): readonly string[];
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Helper functions for working with W3C ResourceDescriptor
|
|
285
|
+
*/
|
|
286
|
+
|
|
287
|
+
type ResourceDescriptor = components['schemas']['ResourceDescriptor'];
|
|
288
|
+
type Representation = components['schemas']['Representation'];
|
|
289
|
+
/**
|
|
290
|
+
* Get the resource ID from @id property
|
|
291
|
+
*
|
|
292
|
+
* For internal resources: extracts UUID from "http://localhost:4000/resources/{uuid}"
|
|
293
|
+
* For external resources: returns undefined
|
|
294
|
+
*
|
|
295
|
+
* This is used for routing - the frontend URL should contain only the resource ID,
|
|
296
|
+
* not the full HTTP URI.
|
|
297
|
+
*/
|
|
298
|
+
declare function getResourceId(resource: ResourceDescriptor | undefined): string | undefined;
|
|
299
|
+
/**
|
|
300
|
+
* Get the primary representation (first or only representation)
|
|
301
|
+
*/
|
|
302
|
+
declare function getPrimaryRepresentation(resource: ResourceDescriptor | undefined): Representation | undefined;
|
|
303
|
+
/**
|
|
304
|
+
* Get the media type from the primary representation
|
|
305
|
+
*/
|
|
306
|
+
declare function getPrimaryMediaType(resource: ResourceDescriptor | undefined): string | undefined;
|
|
307
|
+
/**
|
|
308
|
+
* Get the checksum from the primary representation
|
|
309
|
+
*/
|
|
310
|
+
declare function getChecksum(resource: ResourceDescriptor | undefined): string | undefined;
|
|
311
|
+
/**
|
|
312
|
+
* Get the language from the primary representation
|
|
313
|
+
*/
|
|
314
|
+
declare function getLanguage(resource: ResourceDescriptor | undefined): string | undefined;
|
|
315
|
+
/**
|
|
316
|
+
* Get storage URI from primary representation
|
|
317
|
+
*
|
|
318
|
+
* @param resource - ResourceDescriptor
|
|
319
|
+
* @returns Storage URI or undefined
|
|
320
|
+
*/
|
|
321
|
+
declare function getStorageUri(resource: ResourceDescriptor | undefined): string | undefined;
|
|
322
|
+
/**
|
|
323
|
+
* Get creator agent from wasAttributedTo
|
|
324
|
+
* Handles both single agent and array of agents
|
|
325
|
+
*
|
|
326
|
+
* @param resource - ResourceDescriptor
|
|
327
|
+
* @returns First agent or undefined
|
|
328
|
+
*/
|
|
329
|
+
declare function getCreator(resource: ResourceDescriptor | undefined): components['schemas']['Agent'] | undefined;
|
|
330
|
+
/**
|
|
331
|
+
* Get derived-from URI
|
|
332
|
+
* Handles both single URI and array of URIs
|
|
333
|
+
*
|
|
334
|
+
* @param resource - ResourceDescriptor
|
|
335
|
+
* @returns First derivation URI or undefined
|
|
336
|
+
*/
|
|
337
|
+
declare function getDerivedFrom(resource: ResourceDescriptor | undefined): string | undefined;
|
|
338
|
+
/**
|
|
339
|
+
* Check if resource is archived (application-specific field)
|
|
340
|
+
*
|
|
341
|
+
* @param resource - ResourceDescriptor
|
|
342
|
+
* @returns True if archived, false otherwise
|
|
343
|
+
*/
|
|
344
|
+
declare function isArchived(resource: ResourceDescriptor | undefined): boolean;
|
|
345
|
+
/**
|
|
346
|
+
* Get entity types from resource (application-specific field)
|
|
347
|
+
*
|
|
348
|
+
* @param resource - ResourceDescriptor
|
|
349
|
+
* @returns Array of entity types, empty if not set
|
|
350
|
+
*/
|
|
351
|
+
declare function getResourceEntityTypes(resource: ResourceDescriptor | undefined): string[];
|
|
352
|
+
/**
|
|
353
|
+
* Check if resource is a draft (application-specific field)
|
|
354
|
+
*
|
|
355
|
+
* @param resource - ResourceDescriptor
|
|
356
|
+
* @returns True if draft, false otherwise
|
|
357
|
+
*/
|
|
358
|
+
declare function isDraft(resource: ResourceDescriptor | undefined): boolean;
|
|
359
|
+
/**
|
|
360
|
+
* Map charset names to Node.js Buffer encoding names
|
|
361
|
+
* Node.js Buffer.toString() supports: 'utf8', 'utf16le', 'latin1', 'base64', 'hex', 'ascii', 'binary', 'ucs2'
|
|
362
|
+
*
|
|
363
|
+
* @param charset - Charset name (e.g., "UTF-8", "ISO-8859-1", "Windows-1252")
|
|
364
|
+
* @returns Node.js BufferEncoding
|
|
365
|
+
*/
|
|
366
|
+
declare function getNodeEncoding(charset: string): BufferEncoding;
|
|
367
|
+
/**
|
|
368
|
+
* Decode a representation buffer to string using the correct charset
|
|
369
|
+
* Extracts charset from media type and uses appropriate encoding
|
|
370
|
+
*
|
|
371
|
+
* @param buffer - The raw representation data
|
|
372
|
+
* @param mediaType - Media type with optional charset (e.g., "text/plain; charset=iso-8859-1")
|
|
373
|
+
* @returns Decoded string
|
|
374
|
+
*
|
|
375
|
+
* @example
|
|
376
|
+
* ```typescript
|
|
377
|
+
* const content = decodeRepresentation(buffer, "text/plain; charset=utf-8");
|
|
378
|
+
* const legacy = decodeRepresentation(buffer, "text/plain; charset=windows-1252");
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
declare function decodeRepresentation(buffer: Buffer, mediaType: string): string;
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* SVG Utility Functions
|
|
385
|
+
*
|
|
386
|
+
* Utilities for creating, parsing, and manipulating W3C-compliant SVG selectors
|
|
387
|
+
* for image annotation.
|
|
388
|
+
*/
|
|
389
|
+
interface Point {
|
|
390
|
+
x: number;
|
|
391
|
+
y: number;
|
|
392
|
+
}
|
|
393
|
+
interface BoundingBox {
|
|
394
|
+
x: number;
|
|
395
|
+
y: number;
|
|
396
|
+
width: number;
|
|
397
|
+
height: number;
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Create W3C-compliant SVG rectangle selector
|
|
401
|
+
*/
|
|
402
|
+
declare function createRectangleSvg(start: Point, end: Point): string;
|
|
403
|
+
/**
|
|
404
|
+
* Create W3C-compliant SVG polygon selector
|
|
405
|
+
*/
|
|
406
|
+
declare function createPolygonSvg(points: Point[]): string;
|
|
407
|
+
/**
|
|
408
|
+
* Create W3C-compliant SVG circle selector
|
|
409
|
+
*/
|
|
410
|
+
declare function createCircleSvg(center: Point, radius: number): string;
|
|
411
|
+
/**
|
|
412
|
+
* Parse SVG selector to extract shape type and data
|
|
413
|
+
*/
|
|
414
|
+
declare function parseSvgSelector(svg: string): {
|
|
415
|
+
type: 'rect' | 'polygon' | 'circle' | 'path';
|
|
416
|
+
data: any;
|
|
417
|
+
} | null;
|
|
418
|
+
/**
|
|
419
|
+
* Normalize coordinates from display space to image native resolution
|
|
420
|
+
*/
|
|
421
|
+
declare function normalizeCoordinates(point: Point, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): Point;
|
|
422
|
+
/**
|
|
423
|
+
* Scale entire SVG selector from display space to image native resolution
|
|
424
|
+
*/
|
|
425
|
+
declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Text context extraction utilities for W3C Web Annotation TextQuoteSelector
|
|
429
|
+
*
|
|
430
|
+
* Provides robust prefix/suffix context extraction with word boundary detection
|
|
431
|
+
* to ensure fuzzy anchoring works correctly when the same text appears multiple times.
|
|
432
|
+
*
|
|
433
|
+
* Also provides AI offset validation and correction for handling AI-generated annotations
|
|
434
|
+
* where the model may return slightly incorrect character offsets.
|
|
435
|
+
*
|
|
436
|
+
* @see https://www.w3.org/TR/annotation-model/#text-quote-selector
|
|
437
|
+
*/
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Extract prefix and suffix context for TextQuoteSelector
|
|
441
|
+
*
|
|
442
|
+
* Extracts up to 64 characters before and after the selected text,
|
|
443
|
+
* extending to word boundaries to avoid cutting words in half.
|
|
444
|
+
* This ensures prefix/suffix are meaningful context for fuzzy anchoring.
|
|
445
|
+
*
|
|
446
|
+
* @param content - Full text content
|
|
447
|
+
* @param start - Start offset of selection
|
|
448
|
+
* @param end - End offset of selection
|
|
449
|
+
* @returns Object with prefix and suffix (undefined if at boundaries)
|
|
450
|
+
*
|
|
451
|
+
* @example
|
|
452
|
+
* ```typescript
|
|
453
|
+
* const content = "The United States Congress...";
|
|
454
|
+
* const context = extractContext(content, 4, 17); // "United States"
|
|
455
|
+
* // Returns: { prefix: "The ", suffix: " Congress..." }
|
|
456
|
+
* // NOT: { prefix: "nited ", suffix: "gress..." }
|
|
457
|
+
* ```
|
|
458
|
+
*/
|
|
459
|
+
declare function extractContext(content: string, start: number, end: number): {
|
|
460
|
+
prefix?: string;
|
|
461
|
+
suffix?: string;
|
|
462
|
+
};
|
|
463
|
+
/**
|
|
464
|
+
* Result of validating and correcting AI-provided annotation offsets
|
|
465
|
+
*/
|
|
466
|
+
interface ValidatedAnnotation {
|
|
467
|
+
start: number;
|
|
468
|
+
end: number;
|
|
469
|
+
exact: string;
|
|
470
|
+
prefix?: string;
|
|
471
|
+
suffix?: string;
|
|
472
|
+
corrected: boolean;
|
|
473
|
+
fuzzyMatched?: boolean;
|
|
474
|
+
matchQuality?: MatchQuality;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Validate and correct AI-provided annotation offsets with fuzzy matching tolerance
|
|
478
|
+
*
|
|
479
|
+
* AI models sometimes return offsets that don't match the actual text position,
|
|
480
|
+
* or provide text with minor variations (case differences, whitespace, typos).
|
|
481
|
+
*
|
|
482
|
+
* This function uses a multi-strategy approach:
|
|
483
|
+
* 1. Check if AI's offsets are exactly correct
|
|
484
|
+
* 2. Try exact case-sensitive search
|
|
485
|
+
* 3. Try case-insensitive search
|
|
486
|
+
* 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
|
|
487
|
+
*
|
|
488
|
+
* This ensures we're maximally tolerant of AI errors while still maintaining
|
|
489
|
+
* annotation quality and logging what corrections were made.
|
|
490
|
+
*
|
|
491
|
+
* @param content - Full text content
|
|
492
|
+
* @param aiStart - Start offset from AI
|
|
493
|
+
* @param aiEnd - End offset from AI
|
|
494
|
+
* @param exact - The exact text that should be at this position (from AI)
|
|
495
|
+
* @returns Validated annotation with corrected offsets and context
|
|
496
|
+
* @throws Error if no acceptable match can be found
|
|
497
|
+
*
|
|
498
|
+
* @example
|
|
499
|
+
* ```typescript
|
|
500
|
+
* // AI said start=1143, but actual text is at 1161
|
|
501
|
+
* const result = validateAndCorrectOffsets(
|
|
502
|
+
* content,
|
|
503
|
+
* 1143,
|
|
504
|
+
* 1289,
|
|
505
|
+
* "the question \"whether..."
|
|
506
|
+
* );
|
|
507
|
+
* // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
|
|
508
|
+
* ```
|
|
509
|
+
*/
|
|
510
|
+
declare function validateAndCorrectOffsets(content: string, aiStart: number, aiEnd: number, exact: string): ValidatedAnnotation;
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Text encoding utilities for consistent charset handling
|
|
514
|
+
*
|
|
515
|
+
* Ensures frontend decoding matches backend decoding by respecting
|
|
516
|
+
* charset parameters in mediaType (e.g., "text/plain; charset=iso-8859-1")
|
|
517
|
+
*/
|
|
518
|
+
/**
|
|
519
|
+
* Extract charset from mediaType parameter
|
|
520
|
+
*
|
|
521
|
+
* @param mediaType - Media type with optional charset (e.g., "text/plain; charset=utf-8")
|
|
522
|
+
* @returns Charset name in lowercase (defaults to "utf-8")
|
|
523
|
+
*
|
|
524
|
+
* @example
|
|
525
|
+
* extractCharset("text/plain; charset=iso-8859-1") // "iso-8859-1"
|
|
526
|
+
* extractCharset("text/plain") // "utf-8"
|
|
527
|
+
*/
|
|
528
|
+
declare function extractCharset(mediaType: string): string;
|
|
529
|
+
/**
|
|
530
|
+
* Decode ArrayBuffer to string using charset from mediaType
|
|
531
|
+
*
|
|
532
|
+
* Uses TextDecoder with the charset extracted from mediaType parameter.
|
|
533
|
+
* This ensures the same character space is used for both annotation creation
|
|
534
|
+
* (backend) and rendering (frontend).
|
|
535
|
+
*
|
|
536
|
+
* @param buffer - Binary data to decode
|
|
537
|
+
* @param mediaType - Media type with optional charset parameter
|
|
538
|
+
* @returns Decoded string in the original character space
|
|
539
|
+
*
|
|
540
|
+
* @example
|
|
541
|
+
* const buffer = new Uint8Array([...]);
|
|
542
|
+
* const text = decodeWithCharset(buffer, "text/plain; charset=iso-8859-1");
|
|
543
|
+
*/
|
|
544
|
+
declare function decodeWithCharset(buffer: ArrayBuffer, mediaType: string): string;
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* Generic validation utilities for @semiont/api-client
|
|
548
|
+
*
|
|
549
|
+
* Pure TypeScript validation with no external dependencies.
|
|
550
|
+
* Safe to use in any JavaScript environment (Node.js, browser, Deno, etc.)
|
|
551
|
+
*/
|
|
552
|
+
/**
|
|
553
|
+
* Validation result types
|
|
554
|
+
*/
|
|
555
|
+
type ValidationSuccess<T> = {
|
|
556
|
+
success: true;
|
|
557
|
+
data: T;
|
|
558
|
+
};
|
|
559
|
+
type ValidationFailure = {
|
|
560
|
+
success: false;
|
|
561
|
+
error: string;
|
|
562
|
+
details?: string[];
|
|
563
|
+
};
|
|
564
|
+
type ValidationResult<T> = ValidationSuccess<T> | ValidationFailure;
|
|
565
|
+
/**
|
|
566
|
+
* JWT Token validation
|
|
567
|
+
*
|
|
568
|
+
* Validates JWT token format (header.payload.signature).
|
|
569
|
+
* Does not verify signature - use for format validation only.
|
|
570
|
+
*/
|
|
571
|
+
declare const JWTTokenSchema: {
|
|
572
|
+
parse(token: unknown): string;
|
|
573
|
+
safeParse(token: unknown): ValidationResult<string>;
|
|
574
|
+
};
|
|
575
|
+
/**
|
|
576
|
+
* Generic validation helper with error formatting
|
|
577
|
+
*
|
|
578
|
+
* Wraps any schema's parse method with try/catch and returns ValidationResult.
|
|
579
|
+
*
|
|
580
|
+
* @example
|
|
581
|
+
* ```typescript
|
|
582
|
+
* const result = validateData(JWTTokenSchema, 'eyJ...');
|
|
583
|
+
* if (result.success) {
|
|
584
|
+
* console.log('Valid token:', result.data);
|
|
585
|
+
* } else {
|
|
586
|
+
* console.error('Invalid:', result.error);
|
|
587
|
+
* }
|
|
588
|
+
* ```
|
|
589
|
+
*/
|
|
590
|
+
declare function validateData<T>(schema: {
|
|
591
|
+
parse(data: unknown): T;
|
|
592
|
+
}, data: unknown): ValidationResult<T>;
|
|
593
|
+
/**
|
|
594
|
+
* Email validation helper
|
|
595
|
+
*
|
|
596
|
+
* Validates email format using RFC 5322 simplified regex.
|
|
597
|
+
*
|
|
598
|
+
* @param email - Email address to validate
|
|
599
|
+
* @returns true if valid email format
|
|
600
|
+
*/
|
|
601
|
+
declare function isValidEmail(email: string): boolean;
|
|
602
|
+
|
|
603
|
+
export { type BoundingBox, type FragmentSelector, JWTTokenSchema, LOCALES, type LocaleInfo, type MatchQuality, type Point, type Selector, type SvgSelector, type TextPosition, type TextPositionSelector, type TextQuoteSelector, type ValidatedAnnotation, type ValidationFailure, type ValidationResult, type ValidationSuccess, createCircleSvg, createPolygonSvg, createRectangleSvg, decodeRepresentation, decodeWithCharset, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findTextWithContext, formatLocaleDisplay, getAllLocaleCodes, getAnnotationExactText, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, hasTargetSelector, isArchived, isAssessment, isBodyResolved, isComment, isDraft, isHighlight, isReference, isResolvedReference, isStubReference, isTag, isValidEmail, normalizeCoordinates, normalizeText, parseSvgSelector, scaleSvgToNative, validateAndCorrectOffsets, validateData, validateSvgMarkup, verifyPosition };
|
package/dist/utils/index.js
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
function resourceUri(uri) {
|
|
3
|
-
if (!uri.startsWith("http://") && !uri.startsWith("https://")) {
|
|
4
|
-
throw new TypeError(`Expected ResourceUri, got: ${uri}`);
|
|
5
|
-
}
|
|
6
|
-
return uri;
|
|
7
|
-
}
|
|
1
|
+
import { resourceUri } from '@semiont/core';
|
|
8
2
|
|
|
9
3
|
// src/utils/annotations.ts
|
|
10
4
|
function getBodySource(body) {
|