docxodus 5.1.1 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -24
- package/dist/index.d.ts +199 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +423 -0
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +111 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/wasm/_framework/Docxodus.wasm +0 -0
- package/dist/wasm/_framework/DocxodusWasm.wasm +0 -0
- package/dist/wasm/_framework/System.Linq.wasm +0 -0
- package/dist/wasm/_framework/System.Private.CoreLib.wasm +0 -0
- package/dist/wasm/_framework/System.Security.Cryptography.wasm +0 -0
- package/dist/wasm/_framework/System.Text.Json.wasm +0 -0
- package/dist/wasm/_framework/blazor.boot.json +8 -8
- package/dist/wasm/_framework/dotnet.native.wasm +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -7,9 +7,19 @@ Docxodus brings professional-grade document comparison (redlining) to JavaScript
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
9
|
- **Document Comparison**: Compare two DOCX files and generate a redlined document with tracked changes
|
|
10
|
+
- **Move Detection**: Automatically identifies relocated content (not just deleted/re-inserted)
|
|
11
|
+
- **Format Change Detection**: Detects formatting-only changes (bold, italic, font size, etc.)
|
|
10
12
|
- **HTML Conversion**: Convert DOCX documents to HTML for display in the browser
|
|
13
|
+
- Comment rendering (endnote-style, inline, or margin)
|
|
14
|
+
- Paginated output mode for PDF-like viewing
|
|
15
|
+
- Headers, footers, footnotes, and endnotes support
|
|
16
|
+
- Custom annotation rendering
|
|
17
|
+
- **Document Metadata**: Fast metadata extraction for lazy loading and pagination
|
|
11
18
|
- **Revision Extraction**: Get structured data about all revisions in a compared document
|
|
19
|
+
- **OpenContracts Export**: Export documents to OpenContracts format for NLP/document analysis
|
|
20
|
+
- **External Annotations**: Store annotations externally without modifying the DOCX
|
|
12
21
|
- **100% Client-Side**: All processing happens in the browser using WebAssembly
|
|
22
|
+
- **Web Worker Support**: Non-blocking WASM execution via Web Workers
|
|
13
23
|
- **React Hooks**: Ready-to-use hooks for React applications
|
|
14
24
|
- **TypeScript Support**: Full type definitions included
|
|
15
25
|
|
|
@@ -109,7 +119,7 @@ Initialize the WASM runtime. Must be called before using any other functions.
|
|
|
109
119
|
Convert a DOCX document to HTML.
|
|
110
120
|
|
|
111
121
|
```typescript
|
|
112
|
-
import { CommentRenderMode } from 'docxodus';
|
|
122
|
+
import { CommentRenderMode, PaginationMode, AnnotationLabelMode } from 'docxodus';
|
|
113
123
|
|
|
114
124
|
interface ConversionOptions {
|
|
115
125
|
pageTitle?: string; // HTML document title
|
|
@@ -117,7 +127,14 @@ interface ConversionOptions {
|
|
|
117
127
|
fabricateClasses?: boolean; // Generate CSS classes (default: true)
|
|
118
128
|
additionalCss?: string; // Extra CSS to include
|
|
119
129
|
commentRenderMode?: CommentRenderMode; // How to render comments (default: Disabled)
|
|
120
|
-
commentCssClassPrefix?: string; // CSS prefix for comments
|
|
130
|
+
commentCssClassPrefix?: string; // CSS prefix for comments
|
|
131
|
+
paginationMode?: PaginationMode; // None (0) or Paginated (1)
|
|
132
|
+
paginationScale?: number; // Scale factor for pages (default: 1.0)
|
|
133
|
+
renderAnnotations?: boolean; // Render custom annotations
|
|
134
|
+
annotationLabelMode?: AnnotationLabelMode; // Above, Inline, Tooltip, or None
|
|
135
|
+
renderFootnotesAndEndnotes?: boolean; // Include footnotes/endnotes sections
|
|
136
|
+
renderHeadersAndFooters?: boolean; // Include headers and footers
|
|
137
|
+
renderTrackedChanges?: boolean; // Show insertions/deletions visually
|
|
121
138
|
}
|
|
122
139
|
```
|
|
123
140
|
|
|
@@ -170,44 +187,117 @@ interface CompareOptions {
|
|
|
170
187
|
#### `compareDocumentsToHtml(original, modified, options?): Promise<string>`
|
|
171
188
|
Compare documents and return the result as HTML.
|
|
172
189
|
|
|
173
|
-
#### `getRevisions(document: File | Uint8Array): Promise<Revision[]>`
|
|
190
|
+
#### `getRevisions(document: File | Uint8Array, options?): Promise<Revision[]>`
|
|
174
191
|
Extract revision information from a compared document.
|
|
175
192
|
|
|
176
193
|
```typescript
|
|
177
|
-
import {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
194
|
+
import {
|
|
195
|
+
getRevisions,
|
|
196
|
+
RevisionType,
|
|
197
|
+
isInsertion,
|
|
198
|
+
isDeletion,
|
|
199
|
+
isMove,
|
|
200
|
+
isMoveSource,
|
|
201
|
+
isFormatChange,
|
|
202
|
+
findMovePair
|
|
203
|
+
} from 'docxodus';
|
|
204
|
+
import type { Revision, GetRevisionsOptions } from 'docxodus';
|
|
205
|
+
|
|
206
|
+
// RevisionType enum
|
|
181
207
|
enum RevisionType {
|
|
182
|
-
Inserted = "Inserted",
|
|
183
|
-
Deleted = "Deleted",
|
|
208
|
+
Inserted = "Inserted", // Text or content that was added
|
|
209
|
+
Deleted = "Deleted", // Text or content that was removed
|
|
210
|
+
Moved = "Moved", // Text relocated within the document
|
|
211
|
+
FormatChanged = "FormatChanged" // Formatting-only change
|
|
184
212
|
}
|
|
185
213
|
|
|
186
214
|
// Revision interface with full documentation
|
|
187
215
|
interface Revision {
|
|
188
|
-
/** Author who made the revision (may be empty string if not specified) */
|
|
189
216
|
author: string;
|
|
190
|
-
/** ISO 8601 date string (e.g., "2024-01-15T10:30:00Z"), may be empty */
|
|
191
217
|
date: string;
|
|
192
|
-
/** Type of revision - "Inserted" or "Deleted" */
|
|
193
218
|
revisionType: RevisionType | string;
|
|
194
|
-
/** Text content (newline for paragraph breaks, empty for images/equations) */
|
|
195
219
|
text: string;
|
|
220
|
+
moveGroupId?: number; // Links move source/destination pairs
|
|
221
|
+
isMoveSource?: boolean; // true = moved FROM here, false = moved TO here
|
|
222
|
+
formatChange?: { // Details for FormatChanged revisions
|
|
223
|
+
oldProperties?: Record<string, string>;
|
|
224
|
+
newProperties?: Record<string, string>;
|
|
225
|
+
changedPropertyNames?: string[];
|
|
226
|
+
};
|
|
196
227
|
}
|
|
197
228
|
|
|
198
|
-
//
|
|
199
|
-
const revisions = await getRevisions(comparedDoc
|
|
229
|
+
// Get revisions with options
|
|
230
|
+
const revisions = await getRevisions(comparedDoc, {
|
|
231
|
+
detectMoves: true, // Enable move detection (default: true)
|
|
232
|
+
moveSimilarityThreshold: 0.8, // Jaccard similarity for moves (default: 0.8)
|
|
233
|
+
moveMinimumWordCount: 3, // Minimum words for move (default: 3)
|
|
234
|
+
caseInsensitive: false // Case-insensitive matching (default: false)
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
// Filter by type using helper functions
|
|
200
238
|
const insertions = revisions.filter(isInsertion);
|
|
201
239
|
const deletions = revisions.filter(isDeletion);
|
|
240
|
+
const moves = revisions.filter(isMove);
|
|
241
|
+
const formatChanges = revisions.filter(isFormatChange);
|
|
202
242
|
|
|
203
|
-
//
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
243
|
+
// Find move pairs
|
|
244
|
+
for (const rev of moves.filter(isMoveSource)) {
|
|
245
|
+
const destination = findMovePair(rev, revisions);
|
|
246
|
+
console.log(`"${rev.text}" moved to "${destination?.text}"`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Check format changes
|
|
250
|
+
for (const rev of formatChanges) {
|
|
251
|
+
console.log(`Format changed: ${rev.formatChange?.changedPropertyNames?.join(', ')}`);
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
#### `getDocumentMetadata(document: File | Uint8Array): Promise<DocumentMetadata>`
|
|
256
|
+
Get document metadata for lazy loading and pagination without full HTML rendering.
|
|
257
|
+
|
|
258
|
+
```typescript
|
|
259
|
+
const metadata = await getDocumentMetadata(docxFile);
|
|
260
|
+
|
|
261
|
+
console.log(`Sections: ${metadata.sections.length}`);
|
|
262
|
+
console.log(`Total paragraphs: ${metadata.totalParagraphs}`);
|
|
263
|
+
console.log(`Estimated pages: ${metadata.estimatedPageCount}`);
|
|
264
|
+
console.log(`Has comments: ${metadata.hasComments}`);
|
|
265
|
+
console.log(`Has tracked changes: ${metadata.hasTrackedChanges}`);
|
|
266
|
+
|
|
267
|
+
// Section dimensions (in points, 1pt = 1/72 inch)
|
|
268
|
+
const section = metadata.sections[0];
|
|
269
|
+
console.log(`Page size: ${section.pageWidthPt} x ${section.pageHeightPt} pt`);
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
#### `exportToOpenContract(document: File | Uint8Array): Promise<OpenContractDocExport>`
|
|
273
|
+
Export document to OpenContracts format for NLP/document analysis.
|
|
274
|
+
|
|
275
|
+
```typescript
|
|
276
|
+
const export = await exportToOpenContract(docxFile);
|
|
277
|
+
console.log(`Title: ${export.title}`);
|
|
278
|
+
console.log(`Content: ${export.content.length} characters`);
|
|
279
|
+
console.log(`Pages: ${export.pageCount}`);
|
|
280
|
+
console.log(`Structural annotations: ${export.labelledText.length}`);
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Web Worker API
|
|
284
|
+
|
|
285
|
+
For non-blocking WASM execution, use the worker-based API:
|
|
286
|
+
|
|
287
|
+
```typescript
|
|
288
|
+
import { createWorkerDocxodus } from 'docxodus/worker';
|
|
289
|
+
|
|
290
|
+
// Create a worker instance
|
|
291
|
+
const docxodus = await createWorkerDocxodus({ wasmBasePath: '/wasm/' });
|
|
292
|
+
|
|
293
|
+
// All operations run in a Web Worker - main thread stays responsive
|
|
294
|
+
const html = await docxodus.convertDocxToHtml(docxFile, options);
|
|
295
|
+
const redlined = await docxodus.compareDocuments(original, modified, options);
|
|
296
|
+
const revisions = await docxodus.getRevisions(docxFile);
|
|
297
|
+
const metadata = await docxodus.getDocumentMetadata(docxFile);
|
|
298
|
+
|
|
299
|
+
// Terminate when done
|
|
300
|
+
docxodus.terminate();
|
|
211
301
|
```
|
|
212
302
|
|
|
213
303
|
### React Hooks
|
|
@@ -223,6 +313,7 @@ Returns:
|
|
|
223
313
|
- `compare()` - Compare documents
|
|
224
314
|
- `compareToHtml()` - Compare and get HTML
|
|
225
315
|
- `getRevisions()` - Get revision list
|
|
316
|
+
- `getDocumentMetadata()` - Get document metadata
|
|
226
317
|
|
|
227
318
|
#### `useConversion(wasmBasePath?: string)`
|
|
228
319
|
Simplified hook for DOCX to HTML conversion with state management.
|
|
@@ -230,6 +321,12 @@ Simplified hook for DOCX to HTML conversion with state management.
|
|
|
230
321
|
#### `useComparison(wasmBasePath?: string)`
|
|
231
322
|
Simplified hook for document comparison with state management.
|
|
232
323
|
|
|
324
|
+
#### `useAnnotations(wasmBasePath?: string)`
|
|
325
|
+
Hook for managing custom annotations on documents.
|
|
326
|
+
|
|
327
|
+
#### `useDocumentStructure(wasmBasePath?: string)`
|
|
328
|
+
Hook for document structure analysis and element-based targeting.
|
|
329
|
+
|
|
233
330
|
## Hosting WASM Files
|
|
234
331
|
|
|
235
332
|
The WASM files need to be served from your web server. After building:
|
|
@@ -273,4 +370,4 @@ MIT
|
|
|
273
370
|
|
|
274
371
|
## Credits
|
|
275
372
|
|
|
276
|
-
Built on [Docxodus](https://github.com/JSv4/
|
|
373
|
+
Built on [Docxodus](https://github.com/JSv4/Docxodus), a .NET library for document manipulation based on OpenXML-PowerTools.
|
package/dist/index.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type { ConversionOptions, CompareOptions, Revision, VersionInfo, ErrorResponse, CompareResult, GetRevisionsOptions, FormatChangeDetails, Annotation, AddAnnotationRequest, AddAnnotationResponse, RemoveAnnotationResponse, AnnotationOptions, DocumentStructure, DocumentElement, TableColumnInfo, AnnotationTarget, AddAnnotationWithTargetRequest, DocumentMetadata, SectionMetadata, OpenContractDocExport, PawlsPage, PawlsPageBoundary, PawlsToken, OpenContractsAnnotation, OpenContractsSinglePageAnnotation, BoundingBox, TokenId, TextSpan, OpenContractsRelationship } from "./types.js";
|
|
1
|
+
import type { ConversionOptions, CompareOptions, Revision, VersionInfo, ErrorResponse, CompareResult, GetRevisionsOptions, FormatChangeDetails, Annotation, AddAnnotationRequest, AddAnnotationResponse, RemoveAnnotationResponse, AnnotationOptions, DocumentStructure, DocumentElement, TableColumnInfo, AnnotationTarget, AddAnnotationWithTargetRequest, DocumentMetadata, SectionMetadata, OpenContractDocExport, PawlsPage, PawlsPageBoundary, PawlsToken, OpenContractsAnnotation, OpenContractsSinglePageAnnotation, BoundingBox, TokenId, TextSpan, OpenContractsRelationship, AnnotationLabel, ExternalAnnotationSet, ExternalAnnotationValidationResult, ExternalAnnotationValidationIssue, ExternalAnnotationProjectionSettings } from "./types.js";
|
|
2
2
|
import { CommentRenderMode, PaginationMode, AnnotationLabelMode, RevisionType, DocumentElementType, isInsertion, isDeletion, isMove, isMoveSource, isMoveDestination, findMovePair, isFormatChange, findElementById, findElementsByType, getParagraphs, getTables, getTableColumns, targetElement, targetParagraph, targetParagraphRange, targetRun, targetTable, targetTableRow, targetTableCell, targetTableColumn, targetSearch, targetSearchInElement } from "./types.js";
|
|
3
3
|
export type { PageDimensions, MeasuredBlock, PageInfo, PaginationResult, PaginationOptions, } from "./pagination.js";
|
|
4
4
|
export { PaginationEngine, paginateHtml } from "./pagination.js";
|
|
5
|
-
export type { ConversionOptions, CompareOptions, Revision, VersionInfo, ErrorResponse, CompareResult, GetRevisionsOptions, FormatChangeDetails, Annotation, AddAnnotationRequest, AddAnnotationResponse, RemoveAnnotationResponse, AnnotationOptions, DocumentStructure, DocumentElement, TableColumnInfo, AnnotationTarget, AddAnnotationWithTargetRequest, DocumentMetadata, SectionMetadata, OpenContractDocExport, PawlsPage, PawlsPageBoundary, PawlsToken, OpenContractsAnnotation, OpenContractsSinglePageAnnotation, BoundingBox, TokenId, TextSpan, OpenContractsRelationship, };
|
|
5
|
+
export type { ConversionOptions, CompareOptions, Revision, VersionInfo, ErrorResponse, CompareResult, GetRevisionsOptions, FormatChangeDetails, Annotation, AddAnnotationRequest, AddAnnotationResponse, RemoveAnnotationResponse, AnnotationOptions, DocumentStructure, DocumentElement, TableColumnInfo, AnnotationTarget, AddAnnotationWithTargetRequest, DocumentMetadata, SectionMetadata, OpenContractDocExport, PawlsPage, PawlsPageBoundary, PawlsToken, OpenContractsAnnotation, OpenContractsSinglePageAnnotation, BoundingBox, TokenId, TextSpan, OpenContractsRelationship, AnnotationLabel, ExternalAnnotationSet, ExternalAnnotationValidationResult, ExternalAnnotationValidationIssue, ExternalAnnotationProjectionSettings, };
|
|
6
6
|
export { CommentRenderMode, PaginationMode, AnnotationLabelMode, RevisionType, DocumentElementType, isInsertion, isDeletion, isMove, isMoveSource, isMoveDestination, findMovePair, isFormatChange, findElementById, findElementsByType, getParagraphs, getTables, getTableColumns, targetElement, targetParagraph, targetParagraphRange, targetRun, targetTable, targetTableRow, targetTableCell, targetTableColumn, targetSearch, targetSearchInElement, };
|
|
7
7
|
/**
|
|
8
8
|
* Current base path for WASM files.
|
|
@@ -354,4 +354,201 @@ export declare function exportToOpenContract(document: File | Uint8Array): Promi
|
|
|
354
354
|
* ```
|
|
355
355
|
*/
|
|
356
356
|
export declare function addAnnotationWithTarget(document: File | Uint8Array, request: AddAnnotationWithTargetRequest): Promise<AddAnnotationResponse>;
|
|
357
|
+
/**
|
|
358
|
+
* Compute the SHA256 hash of a document for integrity validation.
|
|
359
|
+
*
|
|
360
|
+
* @param document - DOCX file as File object or Uint8Array
|
|
361
|
+
* @returns SHA256 hash as lowercase hex string
|
|
362
|
+
* @throws Error if operation fails
|
|
363
|
+
*
|
|
364
|
+
* @example
|
|
365
|
+
* ```typescript
|
|
366
|
+
* const hash = await computeDocumentHash(docxFile);
|
|
367
|
+
* console.log(`Document hash: ${hash}`);
|
|
368
|
+
*
|
|
369
|
+
* // Later, verify the document hasn't changed
|
|
370
|
+
* const currentHash = await computeDocumentHash(docxFile);
|
|
371
|
+
* if (currentHash !== storedHash) {
|
|
372
|
+
* console.log("Document has been modified");
|
|
373
|
+
* }
|
|
374
|
+
* ```
|
|
375
|
+
*/
|
|
376
|
+
export declare function computeDocumentHash(document: File | Uint8Array): Promise<string>;
|
|
377
|
+
/**
|
|
378
|
+
* Create an ExternalAnnotationSet from a document.
|
|
379
|
+
* This extracts the document structure and computes the hash for integrity validation.
|
|
380
|
+
*
|
|
381
|
+
* @param document - DOCX file as File object or Uint8Array
|
|
382
|
+
* @param documentId - Unique identifier for the document (filename, UUID, etc.)
|
|
383
|
+
* @returns ExternalAnnotationSet ready for adding annotations
|
|
384
|
+
* @throws Error if operation fails
|
|
385
|
+
*
|
|
386
|
+
* @example
|
|
387
|
+
* ```typescript
|
|
388
|
+
* // Create an annotation set
|
|
389
|
+
* const set = await createExternalAnnotationSet(docxFile, "contract-v1.0");
|
|
390
|
+
*
|
|
391
|
+
* // Access document text for searching
|
|
392
|
+
* console.log(`Document length: ${set.content.length} chars`);
|
|
393
|
+
*
|
|
394
|
+
* // Add label definitions
|
|
395
|
+
* set.textLabels["IMPORTANT"] = {
|
|
396
|
+
* id: "IMPORTANT",
|
|
397
|
+
* text: "Important",
|
|
398
|
+
* color: "#FF0000",
|
|
399
|
+
* description: "Important text",
|
|
400
|
+
* icon: "",
|
|
401
|
+
* labelType: "text"
|
|
402
|
+
* };
|
|
403
|
+
*
|
|
404
|
+
* // Create annotations using the content
|
|
405
|
+
* const annotation = createAnnotationFromSearch(
|
|
406
|
+
* "ann-001", "IMPORTANT", set.content, "shall not be liable"
|
|
407
|
+
* );
|
|
408
|
+
* if (annotation) {
|
|
409
|
+
* set.labelledText.push(annotation);
|
|
410
|
+
* }
|
|
411
|
+
*
|
|
412
|
+
* // Serialize for storage
|
|
413
|
+
* const json = JSON.stringify(set);
|
|
414
|
+
* ```
|
|
415
|
+
*/
|
|
416
|
+
export declare function createExternalAnnotationSet(document: File | Uint8Array, documentId: string): Promise<ExternalAnnotationSet>;
|
|
417
|
+
/**
|
|
418
|
+
* Validate an external annotation set against a document.
|
|
419
|
+
* Checks hash match and verifies each annotation's text still matches.
|
|
420
|
+
*
|
|
421
|
+
* @param document - DOCX file as File object or Uint8Array
|
|
422
|
+
* @param annotationSet - The annotation set to validate
|
|
423
|
+
* @returns Validation result with any issues found
|
|
424
|
+
* @throws Error if operation fails
|
|
425
|
+
*
|
|
426
|
+
* @example
|
|
427
|
+
* ```typescript
|
|
428
|
+
* const result = await validateExternalAnnotations(docxFile, annotationSet);
|
|
429
|
+
*
|
|
430
|
+
* if (!result.isValid) {
|
|
431
|
+
* if (result.hashMismatch) {
|
|
432
|
+
* console.log("Document has been modified since annotations were created");
|
|
433
|
+
* }
|
|
434
|
+
* for (const issue of result.issues) {
|
|
435
|
+
* console.log(`${issue.issueType}: ${issue.description}`);
|
|
436
|
+
* }
|
|
437
|
+
* }
|
|
438
|
+
* ```
|
|
439
|
+
*/
|
|
440
|
+
export declare function validateExternalAnnotations(document: File | Uint8Array, annotationSet: ExternalAnnotationSet): Promise<ExternalAnnotationValidationResult>;
|
|
441
|
+
/**
|
|
442
|
+
* Convert a DOCX document to HTML with external annotations projected.
|
|
443
|
+
*
|
|
444
|
+
* @param document - DOCX file as File object or Uint8Array
|
|
445
|
+
* @param annotationSet - The external annotation set to project
|
|
446
|
+
* @param conversionOptions - HTML conversion options
|
|
447
|
+
* @param projectionOptions - Annotation projection options
|
|
448
|
+
* @returns HTML string with annotations projected
|
|
449
|
+
* @throws Error if operation fails
|
|
450
|
+
*
|
|
451
|
+
* @example
|
|
452
|
+
* ```typescript
|
|
453
|
+
* // Basic usage
|
|
454
|
+
* const html = await convertDocxToHtmlWithExternalAnnotations(
|
|
455
|
+
* docxFile,
|
|
456
|
+
* annotationSet
|
|
457
|
+
* );
|
|
458
|
+
*
|
|
459
|
+
* // With custom options
|
|
460
|
+
* const html = await convertDocxToHtmlWithExternalAnnotations(
|
|
461
|
+
* docxFile,
|
|
462
|
+
* annotationSet,
|
|
463
|
+
* { pageTitle: "Annotated Document" },
|
|
464
|
+
* { labelMode: AnnotationLabelMode.Inline, cssClassPrefix: "my-annot-" }
|
|
465
|
+
* );
|
|
466
|
+
* ```
|
|
467
|
+
*/
|
|
468
|
+
export declare function convertDocxToHtmlWithExternalAnnotations(document: File | Uint8Array, annotationSet: ExternalAnnotationSet, conversionOptions?: ConversionOptions, projectionOptions?: ExternalAnnotationProjectionSettings): Promise<string>;
|
|
469
|
+
/**
|
|
470
|
+
* Search for text in a document and return character offsets.
|
|
471
|
+
* Useful for finding text locations to create annotations.
|
|
472
|
+
*
|
|
473
|
+
* @param document - DOCX file as File object or Uint8Array
|
|
474
|
+
* @param searchText - Text to search for
|
|
475
|
+
* @param maxResults - Maximum number of results (default: 100)
|
|
476
|
+
* @returns Array of TextSpan objects with offsets
|
|
477
|
+
* @throws Error if operation fails
|
|
478
|
+
*
|
|
479
|
+
* @example
|
|
480
|
+
* ```typescript
|
|
481
|
+
* const occurrences = await searchTextOffsets(docxFile, "liability");
|
|
482
|
+
* console.log(`Found ${occurrences.length} occurrences`);
|
|
483
|
+
*
|
|
484
|
+
* for (const span of occurrences) {
|
|
485
|
+
* console.log(`"${span.text}" at offset ${span.start}-${span.end}`);
|
|
486
|
+
* }
|
|
487
|
+
* ```
|
|
488
|
+
*/
|
|
489
|
+
export declare function searchTextOffsets(document: File | Uint8Array, searchText: string, maxResults?: number): Promise<TextSpan[]>;
|
|
490
|
+
/**
|
|
491
|
+
* Create an annotation from character offsets.
|
|
492
|
+
* This is a client-side helper - no WASM call needed.
|
|
493
|
+
*
|
|
494
|
+
* @param id - Unique identifier for the annotation
|
|
495
|
+
* @param labelId - Label/category ID for the annotation
|
|
496
|
+
* @param documentText - Full document text (from annotationSet.content)
|
|
497
|
+
* @param startOffset - Start character offset (0-indexed, inclusive)
|
|
498
|
+
* @param endOffset - End character offset (exclusive)
|
|
499
|
+
* @returns OpenContractsAnnotation ready to add to an annotation set
|
|
500
|
+
* @throws Error if offsets are invalid
|
|
501
|
+
*
|
|
502
|
+
* @example
|
|
503
|
+
* ```typescript
|
|
504
|
+
* const set = await createExternalAnnotationSet(docxFile, "doc-1");
|
|
505
|
+
* const annotation = createAnnotation("ann-001", "IMPORTANT", set.content, 100, 150);
|
|
506
|
+
* set.labelledText.push(annotation);
|
|
507
|
+
* ```
|
|
508
|
+
*/
|
|
509
|
+
export declare function createAnnotation(id: string, labelId: string, documentText: string, startOffset: number, endOffset: number): OpenContractsAnnotation;
|
|
510
|
+
/**
|
|
511
|
+
* Create an annotation by searching for text in the document.
|
|
512
|
+
* This is a client-side helper - no WASM call needed.
|
|
513
|
+
*
|
|
514
|
+
* @param id - Unique identifier for the annotation
|
|
515
|
+
* @param labelId - Label/category ID for the annotation
|
|
516
|
+
* @param documentText - Full document text (from annotationSet.content)
|
|
517
|
+
* @param searchText - Text to search for
|
|
518
|
+
* @param occurrence - Which occurrence to use (1-based, default: 1)
|
|
519
|
+
* @returns OpenContractsAnnotation, or null if text not found
|
|
520
|
+
*
|
|
521
|
+
* @example
|
|
522
|
+
* ```typescript
|
|
523
|
+
* const set = await createExternalAnnotationSet(docxFile, "doc-1");
|
|
524
|
+
*
|
|
525
|
+
* // Find first occurrence
|
|
526
|
+
* const ann1 = createAnnotationFromSearch("ann-001", "LIABILITY", set.content, "shall not be liable");
|
|
527
|
+
* if (ann1) set.labelledText.push(ann1);
|
|
528
|
+
*
|
|
529
|
+
* // Find second occurrence
|
|
530
|
+
* const ann2 = createAnnotationFromSearch("ann-002", "LIABILITY", set.content, "shall not be liable", 2);
|
|
531
|
+
* if (ann2) set.labelledText.push(ann2);
|
|
532
|
+
* ```
|
|
533
|
+
*/
|
|
534
|
+
export declare function createAnnotationFromSearch(id: string, labelId: string, documentText: string, searchText: string, occurrence?: number): OpenContractsAnnotation | null;
|
|
535
|
+
/**
|
|
536
|
+
* Find all occurrences of a text string in the document.
|
|
537
|
+
* This is a client-side helper - no WASM call needed.
|
|
538
|
+
*
|
|
539
|
+
* @param documentText - Full document text
|
|
540
|
+
* @param searchText - Text to search for
|
|
541
|
+
* @param maxResults - Maximum number of results (default: 100)
|
|
542
|
+
* @returns Array of { start, end } offsets
|
|
543
|
+
*
|
|
544
|
+
* @example
|
|
545
|
+
* ```typescript
|
|
546
|
+
* const occurrences = findTextOccurrences(set.content, "the");
|
|
547
|
+
* console.log(`Found ${occurrences.length} occurrences of "the"`);
|
|
548
|
+
* ```
|
|
549
|
+
*/
|
|
550
|
+
export declare function findTextOccurrences(documentText: string, searchText: string, maxResults?: number): Array<{
|
|
551
|
+
start: number;
|
|
552
|
+
end: number;
|
|
553
|
+
}>;
|
|
357
554
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,iBAAiB,EACjB,cAAc,EACd,QAAQ,EACR,WAAW,EACX,aAAa,EACb,aAAa,EAEb,mBAAmB,EACnB,mBAAmB,EACnB,UAAU,EACV,oBAAoB,EACpB,qBAAqB,EACrB,wBAAwB,EACxB,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,8BAA8B,EAC9B,gBAAgB,EAChB,eAAe,EAEf,qBAAqB,EACrB,SAAS,EACT,iBAAiB,EACjB,UAAU,EACV,uBAAuB,EACvB,iCAAiC,EACjC,WAAW,EACX,OAAO,EACP,QAAQ,EACR,yBAAyB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,iBAAiB,EACjB,cAAc,EACd,QAAQ,EACR,WAAW,EACX,aAAa,EACb,aAAa,EAEb,mBAAmB,EACnB,mBAAmB,EACnB,UAAU,EACV,oBAAoB,EACpB,qBAAqB,EACrB,wBAAwB,EACxB,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,8BAA8B,EAC9B,gBAAgB,EAChB,eAAe,EAEf,qBAAqB,EACrB,SAAS,EACT,iBAAiB,EACjB,UAAU,EACV,uBAAuB,EACvB,iCAAiC,EACjC,WAAW,EACX,OAAO,EACP,QAAQ,EACR,yBAAyB,EAEzB,eAAe,EACf,qBAAqB,EACrB,kCAAkC,EAClC,iCAAiC,EACjC,oCAAoC,EACrC,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACnB,YAAY,EACZ,mBAAmB,EACnB,WAAW,EACX,UAAU,EACV,MAAM,EACN,YAAY,EACZ,iBAAiB,EACjB,YAAY,EACZ,cAAc,EACd,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,SAAS,EACT,eAAe,EACf,aAAa,EACb,eAAe,EACf,oBAAoB,EACpB,SAAS,EACT,WAAW,EACX,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,qBAAqB,EACtB,MAAM,YAAY,CAAC;AAGpB,YAAY,EACV,cAAc,EACd,aAAa,EACb,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAEjE,YAAY,EACV,iBAAiB,EACjB,cAAc,EACd,QAAQ,EACR,WAAW,EACX,aAAa,EACb,aAAa,EACb,mBAAmB,EACnB,mBAAmB,EACnB,UAAU,EACV,oBAAoB,EACpB,qBAAqB,EACrB,wBAAwB,EACxB,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,8BAA8B,EAE9B,gBAAgB,EAChB,eAAe,EAEf,qBAAqB,EACrB,SAAS,EACT,iBAAiB,EACjB,UAAU,EACV,uBAAuB,EACvB,iCAAiC,EACjC,WAAW,EACX,OAAO,EACP,QAAQ,EACR,yBAAyB,EAEzB,eAAe,EACf,qBAAqB,EACrB,kCAAkC,EAClC,iCAAiC,EACjC,oCAAoC,GACrC,CAAC;AAEF,OAAO,EACL,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACnB,YAAY,EACZ,mBAAmB,EACnB,WAAW,EACX,UAAU,EACV,MAAM,EACN,YAAY,EACZ,iBAAiB,EACjB,YAAY,EACZ,cAAc,EAEd,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,SAAS,EACT,eAAe,EAEf,aAAa,EACb,eAAe,EACf,oBAAoB,EACpB,SAAS,EACT,WAAW,EACX,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,qBAAqB,GACtB,CAAC;AAuDF;;;GAGG;AACH,eAAO,IAAI,YAAY,QAAK,CAAC;AAE7B;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAElD;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAajE;AA8FD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,MAAM,CAAC,CAyEjB;AAED;;;;;;;;GAQG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC,CA+BrB;AAED;;;;;;;;GAQG;AACH,wBAAsB,sBAAsB,CAC1C,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,MAAM,CAAC,CAwBjB;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,CAAC,EAAE,mBAAmB,GAC5B,OAAO,CAAC,QAAQ,EAAE,CAAC,CAwCrB;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI,WAAW,CASxC;AAED;;GAEG;AACH,wBAAgB,aAAa,IAAI,OAAO,CAEvC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,UAAU,EAAE,CAAC,CAyBvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,qBAAqB,CAAC,CA4ChC;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,wBAAwB,CAAC,CAgBnC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,OAAO,CAAC,CAalB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,oBAAoB,CACxC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,iBAAiB,CAAC,CAyD5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,gBAAgB,CAAC,CAqD3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAsB,oBAAoB,CACxC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,qBAAqB,CAAC,CA+FhC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AACH,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,OAAO,EAAE,8BAA8B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAoDhC;AAMD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,IAAI,GAAG,UAAU,GAC1B,OAAO,CAAC,MAAM,CAAC,CAajB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,2BAA2B,CAC/C,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,qBAAqB,CAAC,CAkBhC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAsB,2BAA2B,CAC/C,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,aAAa,EAAE,qBAAqB,GACnC,OAAO,CAAC,kCAAkC,CAAC,CA4B7C;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAsB,wCAAwC,CAC5D,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,aAAa,EAAE,qBAAqB,EACpC,iBAAiB,CAAC,EAAE,iBAAiB,EACrC,iBAAiB,CAAC,EAAE,oCAAoC,GACvD,OAAO,CAAC,MAAM,CAAC,CA0BjB;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,IAAI,GAAG,UAAU,EAC3B,UAAU,EAAE,MAAM,EAClB,UAAU,GAAE,MAAY,GACvB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAsBrB;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,gBAAgB,CAC9B,EAAE,EAAE,MAAM,EACV,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,GAChB,uBAAuB,CA2BzB;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,0BAA0B,CACxC,EAAE,EAAE,MAAM,EACV,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,MAAM,EACpB,UAAU,EAAE,MAAM,EAClB,UAAU,GAAE,MAAU,GACrB,uBAAuB,GAAG,IAAI,CAahC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,mBAAmB,CACjC,YAAY,EAAE,MAAM,EACpB,UAAU,EAAE,MAAM,EAClB,UAAU,GAAE,MAAY,GACvB,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,CAevC"}
|