@adeu/core 1.6.7 → 1.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3969 -1859
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +95 -8
- package/dist/index.d.ts +95 -8
- package/dist/index.js +3966 -1859
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +220 -47
- package/src/docx/bridge.ts +111 -57
- package/src/docx/dom.ts +66 -7
- package/src/domain.test.ts +280 -0
- package/src/domain.ts +264 -10
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +7 -8
- package/src/ingest.ts +8 -0
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +130 -0
- package/src/sanitize/report.ts +125 -0
- package/src/sanitize/sanitize.test.ts +237 -0
- package/src/sanitize/transforms.ts +452 -0
- package/src/utils/docx.ts +292 -158
package/dist/index.d.cts
CHANGED
|
@@ -13,6 +13,7 @@ declare class Part {
|
|
|
13
13
|
contentType: string;
|
|
14
14
|
rels: Map<string, Relationship>;
|
|
15
15
|
_element: Element;
|
|
16
|
+
package?: DocxPackage;
|
|
16
17
|
constructor(partname: string, blob: string, element: Element, contentType: string);
|
|
17
18
|
addRelationship(id: string, type: string, target: string, isExternal?: boolean): void;
|
|
18
19
|
}
|
|
@@ -40,6 +41,7 @@ declare class DocumentObject {
|
|
|
40
41
|
*/
|
|
41
42
|
static load(buffer: Buffer | ArrayBuffer): Promise<DocumentObject>;
|
|
42
43
|
relateTo(part: Part, relType: string): void;
|
|
44
|
+
relateToExternal(target: string, relType: string): string;
|
|
43
45
|
save(): Promise<Buffer>;
|
|
44
46
|
}
|
|
45
47
|
|
|
@@ -55,8 +57,6 @@ declare class Run {
|
|
|
55
57
|
constructor(_element: Element, _parent: any);
|
|
56
58
|
}
|
|
57
59
|
|
|
58
|
-
declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
|
|
59
|
-
|
|
60
60
|
interface TextSpan {
|
|
61
61
|
start: number;
|
|
62
62
|
end: number;
|
|
@@ -187,10 +187,82 @@ declare class RedlineEngine {
|
|
|
187
187
|
private _getNextId;
|
|
188
188
|
private _create_track_change_tag;
|
|
189
189
|
private _set_text_content;
|
|
190
|
+
/**
|
|
191
|
+
* Attaches a comment that wraps a contiguous range within a single paragraph.
|
|
192
|
+
* start_element and end_element must both be direct children of parent_element
|
|
193
|
+
* and start_element must come before (or equal) end_element in document order.
|
|
194
|
+
* Ported from Python `RedlineEngine._attach_comment`.
|
|
195
|
+
*/
|
|
196
|
+
private _attach_comment;
|
|
197
|
+
/**
|
|
198
|
+
* Attaches a comment that spans across two different paragraphs (or other block
|
|
199
|
+
* containers). start_element lives inside start_p, end_element lives inside end_p,
|
|
200
|
+
* and the comment is open from start_element through end_element.
|
|
201
|
+
* Ported from Python `RedlineEngine._attach_comment_spanning`.
|
|
202
|
+
*/
|
|
203
|
+
private _attach_comment_spanning; /**
|
|
204
|
+
* Inserts `text` as one or more tracked paragraphs anchored relative to
|
|
205
|
+
* either an existing run or a paragraph. Returns:
|
|
206
|
+
* { first_node, last_p, last_ins, used_block_mode }
|
|
207
|
+
* where:
|
|
208
|
+
* - first_node: the first <w:ins> (for inline mode) OR the first new <w:p>
|
|
209
|
+
* (for block mode). The caller uses this for splicing into the DOM and
|
|
210
|
+
* for anchoring comments.
|
|
211
|
+
* - last_p: the last new <w:p> created, if any. null when entirely inline.
|
|
212
|
+
* - last_ins: the last <w:ins> created (inside the last new <w:p>, or the
|
|
213
|
+
* sole inline ins). Used as the comment's end anchor.
|
|
214
|
+
* - used_block_mode: true when the first line carried a heading/list style
|
|
215
|
+
* marker and we created a new paragraph for it (rather than inlining it).
|
|
216
|
+
*
|
|
217
|
+
* Multi-paragraph rules (only when text contains '\n'):
|
|
218
|
+
* - Each additional line becomes a new <w:p>, inserted after the anchor
|
|
219
|
+
* paragraph in document order.
|
|
220
|
+
* - Each new <w:p> gets a copy of the anchor paragraph's <w:pPr> (so list
|
|
221
|
+
* numbering / indentation are preserved) unless the line itself starts
|
|
222
|
+
* with a markdown heading or list marker, which overrides the style.
|
|
223
|
+
* - Each new <w:p> carries a tracked paragraph-break marker
|
|
224
|
+
* (<w:pPr><w:rPr><w:ins/></w:rPr></w:pPr>) so Word natively tracks the
|
|
225
|
+
* paragraph break.
|
|
226
|
+
* - Each new <w:p>'s content is wrapped in a <w:ins>, with inline bold/
|
|
227
|
+
* italic markdown parsed via _parse_inline_markdown.
|
|
228
|
+
*
|
|
229
|
+
* The first line:
|
|
230
|
+
* - If it carries a heading / list marker AND we have a paragraph anchor,
|
|
231
|
+
* we drop into "block mode": no inline <w:ins>; the first line itself
|
|
232
|
+
* becomes the first new <w:p>.
|
|
233
|
+
* - Otherwise we emit a single inline <w:ins> for the first line (current
|
|
234
|
+
* behaviour) and treat the remaining lines as block extensions.
|
|
235
|
+
*
|
|
236
|
+
* Does NOT attach comments; callers handle that.
|
|
237
|
+
*/
|
|
238
|
+
private _track_insert_multiline;
|
|
239
|
+
/**
|
|
240
|
+
* Builds a single tracked-insert wrapper (<w:ins>) containing one or more
|
|
241
|
+
* <w:r> elements representing the inline markdown segments of `line_text`.
|
|
242
|
+
* Returns null if line_text is empty.
|
|
243
|
+
*/
|
|
244
|
+
private _build_tracked_ins_for_line;
|
|
190
245
|
private _parse_markdown_style;
|
|
191
246
|
private _parse_inline_markdown;
|
|
192
247
|
private _apply_run_props;
|
|
248
|
+
/**
|
|
249
|
+
* Replaces (or creates) a paragraph's <w:pPr> with a single <w:pStyle> entry
|
|
250
|
+
* pointing at `style_name`. Strips any existing pPr to avoid layering a new
|
|
251
|
+
* heading style on top of a previous list/heading configuration.
|
|
252
|
+
*
|
|
253
|
+
* In Python, the style id is resolved via doc.styles[style_name].style_id and
|
|
254
|
+
* falls back to stripping spaces. Node has no equivalent style cache exposed
|
|
255
|
+
* on `doc`, so we always use the simple "strip spaces" fallback: "Heading 1"
|
|
256
|
+
* becomes the style id "Heading1", "List Number" becomes "ListNumber", etc.
|
|
257
|
+
* This matches python-docx's default style-id convention for the built-in
|
|
258
|
+
* paragraph styles and is what Word writes by default.
|
|
259
|
+
*/
|
|
260
|
+
private _set_paragraph_style;
|
|
261
|
+
private _anchor_reply_comment;
|
|
262
|
+
private _clean_wrapping_comments;
|
|
263
|
+
private _delete_comments_in_element;
|
|
193
264
|
validate_edits(edits: any[]): string[];
|
|
265
|
+
validate_review_actions(actions: any[]): string[];
|
|
194
266
|
process_batch(changes: DocumentChange[]): any;
|
|
195
267
|
apply_edits(edits: any[]): [number, number];
|
|
196
268
|
apply_review_actions(actions: any[]): [number, number];
|
|
@@ -201,6 +273,8 @@ declare class RedlineEngine {
|
|
|
201
273
|
|
|
202
274
|
declare function trim_common_context(target: string, new_val: string): [number, number];
|
|
203
275
|
declare function generate_edits_from_text(original_text: string, modified_text: string): ModifyText[];
|
|
276
|
+
declare function create_unified_diff(original_text: string, modified_text: string, context_lines?: number): string;
|
|
277
|
+
declare function create_word_patch_diff(original_text: string, modified_text: string, original_path?: string, modified_path?: string): string;
|
|
204
278
|
|
|
205
279
|
declare function apply_edits_to_markdown(markdown_text: string, edits: ModifyText[], include_index?: boolean, highlight_only?: boolean): string;
|
|
206
280
|
|
|
@@ -238,10 +312,23 @@ interface OutlineNode {
|
|
|
238
312
|
}
|
|
239
313
|
declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
|
|
240
314
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
315
|
+
declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
|
|
316
|
+
|
|
317
|
+
interface FinalizeOptions {
|
|
318
|
+
filename: string;
|
|
319
|
+
sanitize_mode?: 'full' | 'keep-markup' | 'baseline';
|
|
320
|
+
accept_all?: boolean;
|
|
321
|
+
protection_mode?: 'read_only' | 'encrypt' | null;
|
|
322
|
+
password?: string | null;
|
|
323
|
+
author?: string | null;
|
|
324
|
+
export_pdf?: boolean;
|
|
325
|
+
}
|
|
326
|
+
interface FinalizeResult {
|
|
327
|
+
reportText: string;
|
|
328
|
+
outBuffer?: Buffer;
|
|
329
|
+
}
|
|
330
|
+
declare function finalize_document(doc: DocumentObject, options: FinalizeOptions): Promise<FinalizeResult>;
|
|
331
|
+
|
|
332
|
+
declare function identifyEngine(): string;
|
|
246
333
|
|
|
247
|
-
export { BatchValidationError, DocumentMapper, DocumentObject, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, extractTextFromBuffer, extract_outline, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
|
|
334
|
+
export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
|
package/dist/index.d.ts
CHANGED
|
@@ -13,6 +13,7 @@ declare class Part {
|
|
|
13
13
|
contentType: string;
|
|
14
14
|
rels: Map<string, Relationship>;
|
|
15
15
|
_element: Element;
|
|
16
|
+
package?: DocxPackage;
|
|
16
17
|
constructor(partname: string, blob: string, element: Element, contentType: string);
|
|
17
18
|
addRelationship(id: string, type: string, target: string, isExternal?: boolean): void;
|
|
18
19
|
}
|
|
@@ -40,6 +41,7 @@ declare class DocumentObject {
|
|
|
40
41
|
*/
|
|
41
42
|
static load(buffer: Buffer | ArrayBuffer): Promise<DocumentObject>;
|
|
42
43
|
relateTo(part: Part, relType: string): void;
|
|
44
|
+
relateToExternal(target: string, relType: string): string;
|
|
43
45
|
save(): Promise<Buffer>;
|
|
44
46
|
}
|
|
45
47
|
|
|
@@ -55,8 +57,6 @@ declare class Run {
|
|
|
55
57
|
constructor(_element: Element, _parent: any);
|
|
56
58
|
}
|
|
57
59
|
|
|
58
|
-
declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
|
|
59
|
-
|
|
60
60
|
interface TextSpan {
|
|
61
61
|
start: number;
|
|
62
62
|
end: number;
|
|
@@ -187,10 +187,82 @@ declare class RedlineEngine {
|
|
|
187
187
|
private _getNextId;
|
|
188
188
|
private _create_track_change_tag;
|
|
189
189
|
private _set_text_content;
|
|
190
|
+
/**
|
|
191
|
+
* Attaches a comment that wraps a contiguous range within a single paragraph.
|
|
192
|
+
* start_element and end_element must both be direct children of parent_element
|
|
193
|
+
* and start_element must come before (or equal) end_element in document order.
|
|
194
|
+
* Ported from Python `RedlineEngine._attach_comment`.
|
|
195
|
+
*/
|
|
196
|
+
private _attach_comment;
|
|
197
|
+
/**
|
|
198
|
+
* Attaches a comment that spans across two different paragraphs (or other block
|
|
199
|
+
* containers). start_element lives inside start_p, end_element lives inside end_p,
|
|
200
|
+
* and the comment is open from start_element through end_element.
|
|
201
|
+
* Ported from Python `RedlineEngine._attach_comment_spanning`.
|
|
202
|
+
*/
|
|
203
|
+
private _attach_comment_spanning; /**
|
|
204
|
+
* Inserts `text` as one or more tracked paragraphs anchored relative to
|
|
205
|
+
* either an existing run or a paragraph. Returns:
|
|
206
|
+
* { first_node, last_p, last_ins, used_block_mode }
|
|
207
|
+
* where:
|
|
208
|
+
* - first_node: the first <w:ins> (for inline mode) OR the first new <w:p>
|
|
209
|
+
* (for block mode). The caller uses this for splicing into the DOM and
|
|
210
|
+
* for anchoring comments.
|
|
211
|
+
* - last_p: the last new <w:p> created, if any. null when entirely inline.
|
|
212
|
+
* - last_ins: the last <w:ins> created (inside the last new <w:p>, or the
|
|
213
|
+
* sole inline ins). Used as the comment's end anchor.
|
|
214
|
+
* - used_block_mode: true when the first line carried a heading/list style
|
|
215
|
+
* marker and we created a new paragraph for it (rather than inlining it).
|
|
216
|
+
*
|
|
217
|
+
* Multi-paragraph rules (only when text contains '\n'):
|
|
218
|
+
* - Each additional line becomes a new <w:p>, inserted after the anchor
|
|
219
|
+
* paragraph in document order.
|
|
220
|
+
* - Each new <w:p> gets a copy of the anchor paragraph's <w:pPr> (so list
|
|
221
|
+
* numbering / indentation are preserved) unless the line itself starts
|
|
222
|
+
* with a markdown heading or list marker, which overrides the style.
|
|
223
|
+
* - Each new <w:p> carries a tracked paragraph-break marker
|
|
224
|
+
* (<w:pPr><w:rPr><w:ins/></w:rPr></w:pPr>) so Word natively tracks the
|
|
225
|
+
* paragraph break.
|
|
226
|
+
* - Each new <w:p>'s content is wrapped in a <w:ins>, with inline bold/
|
|
227
|
+
* italic markdown parsed via _parse_inline_markdown.
|
|
228
|
+
*
|
|
229
|
+
* The first line:
|
|
230
|
+
* - If it carries a heading / list marker AND we have a paragraph anchor,
|
|
231
|
+
* we drop into "block mode": no inline <w:ins>; the first line itself
|
|
232
|
+
* becomes the first new <w:p>.
|
|
233
|
+
* - Otherwise we emit a single inline <w:ins> for the first line (current
|
|
234
|
+
* behaviour) and treat the remaining lines as block extensions.
|
|
235
|
+
*
|
|
236
|
+
* Does NOT attach comments; callers handle that.
|
|
237
|
+
*/
|
|
238
|
+
private _track_insert_multiline;
|
|
239
|
+
/**
|
|
240
|
+
* Builds a single tracked-insert wrapper (<w:ins>) containing one or more
|
|
241
|
+
* <w:r> elements representing the inline markdown segments of `line_text`.
|
|
242
|
+
* Returns null if line_text is empty.
|
|
243
|
+
*/
|
|
244
|
+
private _build_tracked_ins_for_line;
|
|
190
245
|
private _parse_markdown_style;
|
|
191
246
|
private _parse_inline_markdown;
|
|
192
247
|
private _apply_run_props;
|
|
248
|
+
/**
|
|
249
|
+
* Replaces (or creates) a paragraph's <w:pPr> with a single <w:pStyle> entry
|
|
250
|
+
* pointing at `style_name`. Strips any existing pPr to avoid layering a new
|
|
251
|
+
* heading style on top of a previous list/heading configuration.
|
|
252
|
+
*
|
|
253
|
+
* In Python, the style id is resolved via doc.styles[style_name].style_id and
|
|
254
|
+
* falls back to stripping spaces. Node has no equivalent style cache exposed
|
|
255
|
+
* on `doc`, so we always use the simple "strip spaces" fallback: "Heading 1"
|
|
256
|
+
* becomes the style id "Heading1", "List Number" becomes "ListNumber", etc.
|
|
257
|
+
* This matches python-docx's default style-id convention for the built-in
|
|
258
|
+
* paragraph styles and is what Word writes by default.
|
|
259
|
+
*/
|
|
260
|
+
private _set_paragraph_style;
|
|
261
|
+
private _anchor_reply_comment;
|
|
262
|
+
private _clean_wrapping_comments;
|
|
263
|
+
private _delete_comments_in_element;
|
|
193
264
|
validate_edits(edits: any[]): string[];
|
|
265
|
+
validate_review_actions(actions: any[]): string[];
|
|
194
266
|
process_batch(changes: DocumentChange[]): any;
|
|
195
267
|
apply_edits(edits: any[]): [number, number];
|
|
196
268
|
apply_review_actions(actions: any[]): [number, number];
|
|
@@ -201,6 +273,8 @@ declare class RedlineEngine {
|
|
|
201
273
|
|
|
202
274
|
declare function trim_common_context(target: string, new_val: string): [number, number];
|
|
203
275
|
declare function generate_edits_from_text(original_text: string, modified_text: string): ModifyText[];
|
|
276
|
+
declare function create_unified_diff(original_text: string, modified_text: string, context_lines?: number): string;
|
|
277
|
+
declare function create_word_patch_diff(original_text: string, modified_text: string, original_path?: string, modified_path?: string): string;
|
|
204
278
|
|
|
205
279
|
declare function apply_edits_to_markdown(markdown_text: string, edits: ModifyText[], include_index?: boolean, highlight_only?: boolean): string;
|
|
206
280
|
|
|
@@ -238,10 +312,23 @@ interface OutlineNode {
|
|
|
238
312
|
}
|
|
239
313
|
declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
|
|
240
314
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
315
|
+
declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
|
|
316
|
+
|
|
317
|
+
interface FinalizeOptions {
|
|
318
|
+
filename: string;
|
|
319
|
+
sanitize_mode?: 'full' | 'keep-markup' | 'baseline';
|
|
320
|
+
accept_all?: boolean;
|
|
321
|
+
protection_mode?: 'read_only' | 'encrypt' | null;
|
|
322
|
+
password?: string | null;
|
|
323
|
+
author?: string | null;
|
|
324
|
+
export_pdf?: boolean;
|
|
325
|
+
}
|
|
326
|
+
interface FinalizeResult {
|
|
327
|
+
reportText: string;
|
|
328
|
+
outBuffer?: Buffer;
|
|
329
|
+
}
|
|
330
|
+
declare function finalize_document(doc: DocumentObject, options: FinalizeOptions): Promise<FinalizeResult>;
|
|
331
|
+
|
|
332
|
+
declare function identifyEngine(): string;
|
|
246
333
|
|
|
247
|
-
export { BatchValidationError, DocumentMapper, DocumentObject, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, extractTextFromBuffer, extract_outline, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
|
|
334
|
+
export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
|