python-hwpx 2.3__py3-none-any.whl → 2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +36 -36
- hwpx/document.py +1362 -890
- hwpx/opc/package.py +607 -514
- hwpx/opc/xml_utils.py +86 -50
- hwpx/oxml/__init__.py +222 -220
- hwpx/oxml/body.py +435 -435
- hwpx/oxml/common.py +36 -36
- hwpx/oxml/document.py +4732 -3494
- hwpx/oxml/header.py +1369 -1369
- hwpx/oxml/header_part.py +10 -10
- hwpx/oxml/memo.py +10 -10
- hwpx/oxml/namespaces.py +24 -0
- hwpx/oxml/paragraph.py +10 -10
- hwpx/oxml/parser.py +72 -72
- hwpx/oxml/schema.py +44 -44
- hwpx/oxml/section.py +10 -10
- hwpx/oxml/table.py +10 -10
- hwpx/oxml/utils.py +85 -85
- hwpx/package.py +30 -24
- hwpx/templates.py +33 -33
- hwpx/tools/__init__.py +44 -36
- hwpx/tools/_schemas/header.xsd +14 -14
- hwpx/tools/_schemas/section.xsd +12 -12
- hwpx/tools/exporter.py +272 -0
- hwpx/tools/object_finder.py +347 -347
- hwpx/tools/text_extractor.py +726 -726
- hwpx/tools/validator.py +184 -184
- {python_hwpx-2.3.dist-info → python_hwpx-2.5.dist-info}/METADATA +333 -257
- python_hwpx-2.5.dist-info/RECORD +35 -0
- {python_hwpx-2.3.dist-info → python_hwpx-2.5.dist-info}/licenses/LICENSE +32 -32
- python_hwpx-2.3.dist-info/RECORD +0 -33
- {python_hwpx-2.3.dist-info → python_hwpx-2.5.dist-info}/WHEEL +0 -0
- {python_hwpx-2.3.dist-info → python_hwpx-2.5.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.3.dist-info → python_hwpx-2.5.dist-info}/top_level.txt +0 -0
hwpx/document.py
CHANGED
|
@@ -1,890 +1,1362 @@
|
|
|
1
|
-
"""High-level representation of an HWPX document."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import xml.etree.ElementTree as ET
|
|
6
|
-
import io
|
|
7
|
-
import warnings
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
import logging
|
|
10
|
-
import uuid
|
|
11
|
-
|
|
12
|
-
from os import PathLike
|
|
13
|
-
from typing import Any, BinaryIO, Iterator, overload
|
|
14
|
-
|
|
15
|
-
from lxml import etree
|
|
16
|
-
|
|
17
|
-
from .oxml import (
|
|
18
|
-
Bullet,
|
|
19
|
-
GenericElement,
|
|
20
|
-
HwpxOxmlDocument,
|
|
21
|
-
HwpxOxmlHeader,
|
|
22
|
-
HwpxOxmlHistory,
|
|
23
|
-
HwpxOxmlInlineObject,
|
|
24
|
-
HwpxOxmlMasterPage,
|
|
25
|
-
HwpxOxmlMemo,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
ET.register_namespace("
|
|
45
|
-
ET.register_namespace("
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
child
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
self.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
f"
|
|
91
|
-
f"
|
|
92
|
-
f"
|
|
93
|
-
")"
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
self.
|
|
164
|
-
for resource in self._managed_resources:
|
|
165
|
-
self.
|
|
166
|
-
|
|
167
|
-
self.
|
|
168
|
-
self.
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
section = self._root.sections[
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
created_value =
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
if char_ref is None:
|
|
386
|
-
char_ref =
|
|
387
|
-
char_ref
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
"
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
_append_element(
|
|
409
|
-
_append_element(parameters, f"{_HP}stringParam", {"name": "
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
"
|
|
430
|
-
"
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
_append_element(
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
def
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
for run in
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
if
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
self
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
*,
|
|
817
|
-
section: HwpxOxmlSection | None = None,
|
|
818
|
-
section_index: int | None = None,
|
|
819
|
-
|
|
820
|
-
) ->
|
|
821
|
-
"""
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
self
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
1
|
+
"""High-level representation of an HWPX document."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import xml.etree.ElementTree as ET
|
|
6
|
+
import io
|
|
7
|
+
import warnings
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import logging
|
|
10
|
+
import uuid
|
|
11
|
+
|
|
12
|
+
from os import PathLike
|
|
13
|
+
from typing import Any, BinaryIO, Iterator, Sequence, overload
|
|
14
|
+
|
|
15
|
+
from lxml import etree
|
|
16
|
+
|
|
17
|
+
from .oxml import (
|
|
18
|
+
Bullet,
|
|
19
|
+
GenericElement,
|
|
20
|
+
HwpxOxmlDocument,
|
|
21
|
+
HwpxOxmlHeader,
|
|
22
|
+
HwpxOxmlHistory,
|
|
23
|
+
HwpxOxmlInlineObject,
|
|
24
|
+
HwpxOxmlMasterPage,
|
|
25
|
+
HwpxOxmlMemo,
|
|
26
|
+
HwpxOxmlNote,
|
|
27
|
+
HwpxOxmlParagraph,
|
|
28
|
+
HwpxOxmlRun,
|
|
29
|
+
HwpxOxmlSection,
|
|
30
|
+
HwpxOxmlSectionHeaderFooter,
|
|
31
|
+
HwpxOxmlShape,
|
|
32
|
+
HwpxOxmlTable,
|
|
33
|
+
HwpxOxmlVersion,
|
|
34
|
+
MemoShape,
|
|
35
|
+
ParagraphProperty,
|
|
36
|
+
RunStyle,
|
|
37
|
+
Style,
|
|
38
|
+
TrackChange,
|
|
39
|
+
TrackChangeAuthor,
|
|
40
|
+
)
|
|
41
|
+
from .opc.package import HwpxPackage
|
|
42
|
+
from .templates import blank_document_bytes
|
|
43
|
+
|
|
44
|
+
ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
|
|
45
|
+
ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
|
|
46
|
+
ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
|
|
47
|
+
ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
|
|
48
|
+
|
|
49
|
+
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
50
|
+
_HP = f"{{{_HP_NS}}}"
|
|
51
|
+
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
|
|
52
|
+
_HH = f"{{{_HH_NS}}}"
|
|
53
|
+
|
|
54
|
+
logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _append_element(
|
|
58
|
+
parent: Any,
|
|
59
|
+
tag: str,
|
|
60
|
+
attributes: dict[str, str] | None = None,
|
|
61
|
+
) -> Any:
|
|
62
|
+
"""Create and append a child element that matches *parent*'s element type."""
|
|
63
|
+
|
|
64
|
+
child = parent.makeelement(tag, attributes or {})
|
|
65
|
+
parent.append(child)
|
|
66
|
+
return child
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class HwpxDocument:
|
|
70
|
+
"""Provides a user-friendly API for editing HWPX documents."""
|
|
71
|
+
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
package: HwpxPackage,
|
|
75
|
+
root: HwpxOxmlDocument,
|
|
76
|
+
*,
|
|
77
|
+
managed_resources: tuple[Any, ...] = (),
|
|
78
|
+
validate_on_save: bool = False,
|
|
79
|
+
):
|
|
80
|
+
self._package = package
|
|
81
|
+
self._root = root
|
|
82
|
+
self._managed_resources = list(managed_resources)
|
|
83
|
+
self._closed = False
|
|
84
|
+
self.validate_on_save = validate_on_save
|
|
85
|
+
|
|
86
|
+
def __repr__(self) -> str:
|
|
87
|
+
"""Return a compact and safe summary of the document state."""
|
|
88
|
+
|
|
89
|
+
return (
|
|
90
|
+
f"{self.__class__.__name__}("
|
|
91
|
+
f"sections={len(self.sections)}, "
|
|
92
|
+
f"paragraphs={len(self.paragraphs)}, "
|
|
93
|
+
f"headers={len(self.headers)}, "
|
|
94
|
+
f"master_pages={len(self.master_pages)}, "
|
|
95
|
+
f"histories={len(self.histories)}, "
|
|
96
|
+
f"closed={self._closed}"
|
|
97
|
+
")"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
# construction helpers
|
|
102
|
+
@classmethod
|
|
103
|
+
def open(
|
|
104
|
+
cls,
|
|
105
|
+
source: str | PathLike[str] | bytes | BinaryIO,
|
|
106
|
+
) -> "HwpxDocument":
|
|
107
|
+
"""Open *source* and return a :class:`HwpxDocument` instance.
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
|
|
111
|
+
HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
|
|
112
|
+
"""
|
|
113
|
+
internal_resources: list[Any] = []
|
|
114
|
+
open_source = source
|
|
115
|
+
if isinstance(source, bytes):
|
|
116
|
+
stream = io.BytesIO(source)
|
|
117
|
+
open_source = stream
|
|
118
|
+
internal_resources.append(stream)
|
|
119
|
+
package = HwpxPackage.open(open_source)
|
|
120
|
+
root = HwpxOxmlDocument.from_package(package)
|
|
121
|
+
return cls(package, root, managed_resources=tuple(internal_resources))
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def new(cls) -> "HwpxDocument":
|
|
125
|
+
"""Return a new blank document based on the default skeleton template."""
|
|
126
|
+
|
|
127
|
+
return cls.open(blank_document_bytes())
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
|
|
131
|
+
"""Create a document backed by an existing :class:`HwpxPackage`.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
|
|
135
|
+
"""
|
|
136
|
+
root = HwpxOxmlDocument.from_package(package)
|
|
137
|
+
return cls(package, root)
|
|
138
|
+
|
|
139
|
+
def __enter__(self) -> "HwpxDocument":
|
|
140
|
+
"""컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
|
|
141
|
+
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
|
|
145
|
+
"""예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
|
|
146
|
+
|
|
147
|
+
self.close()
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
def close(self) -> None:
|
|
151
|
+
"""문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
|
|
152
|
+
|
|
153
|
+
정리 정책:
|
|
154
|
+
- ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
|
|
155
|
+
- ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
|
|
156
|
+
- flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
|
|
157
|
+
- 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
if self._closed:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
self._flush_resource(self._package)
|
|
164
|
+
for resource in self._managed_resources:
|
|
165
|
+
self._flush_resource(resource)
|
|
166
|
+
|
|
167
|
+
self._close_resource(self._package)
|
|
168
|
+
for resource in self._managed_resources:
|
|
169
|
+
self._close_resource(resource)
|
|
170
|
+
|
|
171
|
+
self._managed_resources.clear()
|
|
172
|
+
self._closed = True
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _flush_resource(resource: Any) -> None:
|
|
176
|
+
flush = getattr(resource, "flush", None)
|
|
177
|
+
if not callable(flush):
|
|
178
|
+
return
|
|
179
|
+
try:
|
|
180
|
+
flush()
|
|
181
|
+
except Exception:
|
|
182
|
+
logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def _close_resource(resource: Any) -> None:
|
|
186
|
+
close = getattr(resource, "close", None)
|
|
187
|
+
if not callable(close):
|
|
188
|
+
return
|
|
189
|
+
try:
|
|
190
|
+
close()
|
|
191
|
+
except Exception:
|
|
192
|
+
logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
193
|
+
|
|
194
|
+
# ------------------------------------------------------------------
|
|
195
|
+
# properties exposing document content
|
|
196
|
+
@property
|
|
197
|
+
def package(self) -> HwpxPackage:
|
|
198
|
+
"""Return the :class:`HwpxPackage` backing this document."""
|
|
199
|
+
return self._package
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def oxml(self) -> HwpxOxmlDocument:
|
|
203
|
+
"""Return the low-level XML object tree representing the document."""
|
|
204
|
+
return self._root
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def sections(self) -> list[HwpxOxmlSection]:
|
|
208
|
+
"""Return the sections contained in the document."""
|
|
209
|
+
return self._root.sections
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def headers(self) -> list[HwpxOxmlHeader]:
|
|
213
|
+
"""Return the header parts referenced by the document."""
|
|
214
|
+
return self._root.headers
|
|
215
|
+
|
|
216
|
+
@property
|
|
217
|
+
def master_pages(self) -> list[HwpxOxmlMasterPage]:
|
|
218
|
+
"""Return the master-page parts declared in the manifest."""
|
|
219
|
+
return self._root.master_pages
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def histories(self) -> list[HwpxOxmlHistory]:
|
|
223
|
+
"""Return document history parts referenced by the manifest."""
|
|
224
|
+
return self._root.histories
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def version(self) -> HwpxOxmlVersion | None:
|
|
228
|
+
"""Return the version metadata part if present."""
|
|
229
|
+
return self._root.version
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def border_fills(self) -> dict[str, GenericElement]:
|
|
233
|
+
"""Return border fill definitions declared in the headers."""
|
|
234
|
+
|
|
235
|
+
return self._root.border_fills
|
|
236
|
+
|
|
237
|
+
def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
|
|
238
|
+
"""Return the border fill definition referenced by *border_fill_id_ref*."""
|
|
239
|
+
|
|
240
|
+
return self._root.border_fill(border_fill_id_ref)
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def memo_shapes(self) -> dict[str, MemoShape]:
|
|
244
|
+
"""Return memo shapes available in the header reference lists."""
|
|
245
|
+
|
|
246
|
+
return self._root.memo_shapes
|
|
247
|
+
|
|
248
|
+
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
|
|
249
|
+
"""Return the memo shape definition referenced by *memo_shape_id_ref*."""
|
|
250
|
+
|
|
251
|
+
return self._root.memo_shape(memo_shape_id_ref)
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def bullets(self) -> dict[str, Bullet]:
|
|
255
|
+
"""Return bullet definitions declared in header reference lists."""
|
|
256
|
+
|
|
257
|
+
return self._root.bullets
|
|
258
|
+
|
|
259
|
+
def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
|
|
260
|
+
"""Return the bullet definition referenced by *bullet_id_ref*."""
|
|
261
|
+
|
|
262
|
+
return self._root.bullet(bullet_id_ref)
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def paragraph_properties(self) -> dict[str, ParagraphProperty]:
|
|
266
|
+
"""Return paragraph property definitions declared in headers."""
|
|
267
|
+
|
|
268
|
+
return self._root.paragraph_properties
|
|
269
|
+
|
|
270
|
+
def paragraph_property(
|
|
271
|
+
self, para_pr_id_ref: int | str | None
|
|
272
|
+
) -> ParagraphProperty | None:
|
|
273
|
+
"""Return the paragraph property referenced by *para_pr_id_ref*."""
|
|
274
|
+
|
|
275
|
+
return self._root.paragraph_property(para_pr_id_ref)
|
|
276
|
+
|
|
277
|
+
@property
|
|
278
|
+
def styles(self) -> dict[str, Style]:
|
|
279
|
+
"""Return style definitions available in the document."""
|
|
280
|
+
|
|
281
|
+
return self._root.styles
|
|
282
|
+
|
|
283
|
+
def style(self, style_id_ref: int | str | None) -> Style | None:
|
|
284
|
+
"""Return the style definition referenced by *style_id_ref*."""
|
|
285
|
+
|
|
286
|
+
return self._root.style(style_id_ref)
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def track_changes(self) -> dict[str, TrackChange]:
|
|
290
|
+
"""Return tracked change metadata declared in the headers."""
|
|
291
|
+
|
|
292
|
+
return self._root.track_changes
|
|
293
|
+
|
|
294
|
+
def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
|
|
295
|
+
"""Return tracked change metadata referenced by *change_id_ref*."""
|
|
296
|
+
|
|
297
|
+
return self._root.track_change(change_id_ref)
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
|
|
301
|
+
"""Return tracked change author metadata declared in the headers."""
|
|
302
|
+
|
|
303
|
+
return self._root.track_change_authors
|
|
304
|
+
|
|
305
|
+
def track_change_author(
|
|
306
|
+
self, author_id_ref: int | str | None
|
|
307
|
+
) -> TrackChangeAuthor | None:
|
|
308
|
+
"""Return tracked change author details referenced by *author_id_ref*."""
|
|
309
|
+
|
|
310
|
+
return self._root.track_change_author(author_id_ref)
|
|
311
|
+
|
|
312
|
+
@property
|
|
313
|
+
def memos(self) -> list[HwpxOxmlMemo]:
|
|
314
|
+
"""Return all memo entries declared in every section."""
|
|
315
|
+
|
|
316
|
+
memos: list[HwpxOxmlMemo] = []
|
|
317
|
+
for section in self._root.sections:
|
|
318
|
+
memos.extend(section.memos)
|
|
319
|
+
return memos
|
|
320
|
+
|
|
321
|
+
def add_memo(
|
|
322
|
+
self,
|
|
323
|
+
text: str = "",
|
|
324
|
+
*,
|
|
325
|
+
section: HwpxOxmlSection | None = None,
|
|
326
|
+
section_index: int | None = None,
|
|
327
|
+
memo_shape_id_ref: str | int | None = None,
|
|
328
|
+
memo_id: str | None = None,
|
|
329
|
+
char_pr_id_ref: str | int | None = None,
|
|
330
|
+
attributes: dict[str, str] | None = None,
|
|
331
|
+
) -> HwpxOxmlMemo:
|
|
332
|
+
"""Create a memo entry inside *section* (or the last section by default)."""
|
|
333
|
+
|
|
334
|
+
if section is None and section_index is not None:
|
|
335
|
+
section = self._root.sections[section_index]
|
|
336
|
+
if section is None:
|
|
337
|
+
if not self._root.sections:
|
|
338
|
+
raise ValueError("document does not contain any sections")
|
|
339
|
+
section = self._root.sections[-1]
|
|
340
|
+
return section.add_memo(
|
|
341
|
+
text,
|
|
342
|
+
memo_shape_id_ref=memo_shape_id_ref,
|
|
343
|
+
memo_id=memo_id,
|
|
344
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
345
|
+
attributes=attributes,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def remove_memo(self, memo: HwpxOxmlMemo) -> None:
|
|
349
|
+
"""Remove *memo* from the section it belongs to."""
|
|
350
|
+
|
|
351
|
+
memo.remove()
|
|
352
|
+
|
|
353
|
+
def attach_memo_field(
|
|
354
|
+
self,
|
|
355
|
+
paragraph: HwpxOxmlParagraph,
|
|
356
|
+
memo: HwpxOxmlMemo,
|
|
357
|
+
*,
|
|
358
|
+
field_id: str | None = None,
|
|
359
|
+
author: str | None = None,
|
|
360
|
+
created: datetime | str | None = None,
|
|
361
|
+
number: int = 1,
|
|
362
|
+
char_pr_id_ref: str | int | None = None,
|
|
363
|
+
) -> str:
|
|
364
|
+
"""Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
|
|
365
|
+
|
|
366
|
+
if paragraph.section is None:
|
|
367
|
+
raise ValueError("paragraph must belong to a section before anchoring a memo")
|
|
368
|
+
if memo.group.section is None:
|
|
369
|
+
raise ValueError("memo is not attached to a section")
|
|
370
|
+
|
|
371
|
+
field_value = field_id or uuid.uuid4().hex
|
|
372
|
+
author_value = author or memo.attributes.get("author") or ""
|
|
373
|
+
|
|
374
|
+
created_value = created if created is not None else memo.attributes.get("createDateTime")
|
|
375
|
+
if isinstance(created_value, datetime):
|
|
376
|
+
created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
|
|
377
|
+
elif created_value is None:
|
|
378
|
+
created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
379
|
+
else:
|
|
380
|
+
created_value = str(created_value)
|
|
381
|
+
|
|
382
|
+
memo_shape_id = memo.memo_shape_id_ref or ""
|
|
383
|
+
|
|
384
|
+
char_ref = char_pr_id_ref
|
|
385
|
+
if char_ref is None:
|
|
386
|
+
char_ref = paragraph.char_pr_id_ref
|
|
387
|
+
if char_ref is None:
|
|
388
|
+
char_ref = memo._infer_char_pr_id_ref()
|
|
389
|
+
if char_ref is None:
|
|
390
|
+
char_ref = "0"
|
|
391
|
+
char_ref = str(char_ref)
|
|
392
|
+
|
|
393
|
+
paragraph_element = paragraph.element
|
|
394
|
+
run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
395
|
+
ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
|
|
396
|
+
field_begin = _append_element(
|
|
397
|
+
ctrl_begin,
|
|
398
|
+
f"{_HP}fieldBegin",
|
|
399
|
+
{
|
|
400
|
+
"id": field_value,
|
|
401
|
+
"type": "MEMO",
|
|
402
|
+
"editable": "true",
|
|
403
|
+
"dirty": "false",
|
|
404
|
+
"fieldid": field_value,
|
|
405
|
+
},
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
|
|
409
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
|
|
410
|
+
_append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
|
|
411
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
|
|
412
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
|
|
413
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
|
|
414
|
+
|
|
415
|
+
sub_list = _append_element(
|
|
416
|
+
field_begin,
|
|
417
|
+
f"{_HP}subList",
|
|
418
|
+
{
|
|
419
|
+
"id": f"memo-field-{memo.id or field_value}",
|
|
420
|
+
"textDirection": "HORIZONTAL",
|
|
421
|
+
"lineWrap": "BREAK",
|
|
422
|
+
"vertAlign": "TOP",
|
|
423
|
+
},
|
|
424
|
+
)
|
|
425
|
+
sub_para = _append_element(
|
|
426
|
+
sub_list,
|
|
427
|
+
f"{_HP}p",
|
|
428
|
+
{
|
|
429
|
+
"id": f"memo-field-{(memo.id or field_value)}-p",
|
|
430
|
+
"paraPrIDRef": "0",
|
|
431
|
+
"styleIDRef": "0",
|
|
432
|
+
"pageBreak": "0",
|
|
433
|
+
"columnBreak": "0",
|
|
434
|
+
"merged": "0",
|
|
435
|
+
},
|
|
436
|
+
)
|
|
437
|
+
sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
|
|
438
|
+
_append_element(sub_run, f"{_HP}t").text = memo.id or field_value
|
|
439
|
+
|
|
440
|
+
run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
441
|
+
ctrl_end = _append_element(run_end, f"{_HP}ctrl")
|
|
442
|
+
_append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
|
|
443
|
+
|
|
444
|
+
paragraph.element.insert(0, run_begin)
|
|
445
|
+
paragraph.element.append(run_end)
|
|
446
|
+
paragraph.section.mark_dirty()
|
|
447
|
+
|
|
448
|
+
return field_value
|
|
449
|
+
|
|
450
|
+
def add_memo_with_anchor(
|
|
451
|
+
self,
|
|
452
|
+
text: str = "",
|
|
453
|
+
*,
|
|
454
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
455
|
+
section: HwpxOxmlSection | None = None,
|
|
456
|
+
section_index: int | None = None,
|
|
457
|
+
paragraph_text: str | None = None,
|
|
458
|
+
memo_shape_id_ref: str | int | None = None,
|
|
459
|
+
memo_id: str | None = None,
|
|
460
|
+
char_pr_id_ref: str | int | None = None,
|
|
461
|
+
attributes: dict[str, str] | None = None,
|
|
462
|
+
field_id: str | None = None,
|
|
463
|
+
author: str | None = None,
|
|
464
|
+
created: datetime | str | None = None,
|
|
465
|
+
number: int = 1,
|
|
466
|
+
anchor_char_pr_id_ref: str | int | None = None,
|
|
467
|
+
) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
|
|
468
|
+
"""Create a memo and ensure it is visible by anchoring a MEMO field."""
|
|
469
|
+
|
|
470
|
+
memo = self.add_memo(
|
|
471
|
+
text,
|
|
472
|
+
section=section,
|
|
473
|
+
section_index=section_index,
|
|
474
|
+
memo_shape_id_ref=memo_shape_id_ref,
|
|
475
|
+
memo_id=memo_id,
|
|
476
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
477
|
+
attributes=attributes,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
target_paragraph = paragraph
|
|
481
|
+
if target_paragraph is None:
|
|
482
|
+
memo_section = memo.group.section
|
|
483
|
+
if memo_section is None:
|
|
484
|
+
raise ValueError("memo must belong to a section")
|
|
485
|
+
paragraph_value = "" if paragraph_text is None else paragraph_text
|
|
486
|
+
anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
|
|
487
|
+
target_paragraph = self.add_paragraph(
|
|
488
|
+
paragraph_value,
|
|
489
|
+
section=memo_section,
|
|
490
|
+
char_pr_id_ref=anchor_char,
|
|
491
|
+
)
|
|
492
|
+
elif paragraph_text is not None:
|
|
493
|
+
target_paragraph.text = paragraph_text
|
|
494
|
+
|
|
495
|
+
field_value = self.attach_memo_field(
|
|
496
|
+
target_paragraph,
|
|
497
|
+
memo,
|
|
498
|
+
field_id=field_id,
|
|
499
|
+
author=author,
|
|
500
|
+
created=created,
|
|
501
|
+
number=number,
|
|
502
|
+
char_pr_id_ref=anchor_char_pr_id_ref,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
return memo, target_paragraph, field_value
|
|
506
|
+
|
|
507
|
+
def remove_paragraph(
|
|
508
|
+
self,
|
|
509
|
+
paragraph: HwpxOxmlParagraph | int,
|
|
510
|
+
*,
|
|
511
|
+
section: HwpxOxmlSection | None = None,
|
|
512
|
+
section_index: int | None = None,
|
|
513
|
+
) -> None:
|
|
514
|
+
"""Remove a paragraph from the document.
|
|
515
|
+
|
|
516
|
+
*paragraph* may be a :class:`HwpxOxmlParagraph` instance or an
|
|
517
|
+
integer index into the paragraphs of the specified (or last)
|
|
518
|
+
section.
|
|
519
|
+
|
|
520
|
+
Raises ``ValueError`` if the target section would become empty.
|
|
521
|
+
"""
|
|
522
|
+
self._root.remove_paragraph(
|
|
523
|
+
paragraph,
|
|
524
|
+
section=section,
|
|
525
|
+
section_index=section_index,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def add_section(self, *, after: int | None = None) -> HwpxOxmlSection:
|
|
529
|
+
"""Append a new empty section to the document.
|
|
530
|
+
|
|
531
|
+
If *after* is given, the section is inserted after the section at
|
|
532
|
+
that index. Returns the newly created section.
|
|
533
|
+
"""
|
|
534
|
+
return self._root.add_section(after=after)
|
|
535
|
+
|
|
536
|
+
def remove_section(
|
|
537
|
+
self, section: HwpxOxmlSection | int,
|
|
538
|
+
) -> None:
|
|
539
|
+
"""Remove a section from the document.
|
|
540
|
+
|
|
541
|
+
Raises ``ValueError`` if the document would have no sections left.
|
|
542
|
+
"""
|
|
543
|
+
self._root.remove_section(section)
|
|
544
|
+
|
|
545
|
+
@property
|
|
546
|
+
def paragraphs(self) -> list[HwpxOxmlParagraph]:
|
|
547
|
+
"""Return all paragraphs across every section."""
|
|
548
|
+
return self._root.paragraphs
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def char_properties(self) -> dict[str, RunStyle]:
|
|
552
|
+
"""Return the resolved character style definitions available to the document."""
|
|
553
|
+
|
|
554
|
+
return self._root.char_properties
|
|
555
|
+
|
|
556
|
+
def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
|
|
557
|
+
"""Return the style referenced by *char_pr_id_ref* if known."""
|
|
558
|
+
|
|
559
|
+
return self._root.char_property(char_pr_id_ref)
|
|
560
|
+
|
|
561
|
+
def ensure_run_style(
|
|
562
|
+
self,
|
|
563
|
+
*,
|
|
564
|
+
bold: bool = False,
|
|
565
|
+
italic: bool = False,
|
|
566
|
+
underline: bool = False,
|
|
567
|
+
base_char_pr_id: str | int | None = None,
|
|
568
|
+
) -> str:
|
|
569
|
+
"""Return a ``charPr`` identifier matching the requested flags."""
|
|
570
|
+
|
|
571
|
+
return self._root.ensure_run_style(
|
|
572
|
+
bold=bold,
|
|
573
|
+
italic=italic,
|
|
574
|
+
underline=underline,
|
|
575
|
+
base_char_pr_id=base_char_pr_id,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
def iter_runs(self) -> Iterator[HwpxOxmlRun]:
|
|
579
|
+
"""Yield every run element contained in the document."""
|
|
580
|
+
|
|
581
|
+
for paragraph in self.paragraphs:
|
|
582
|
+
for run in paragraph.runs:
|
|
583
|
+
yield run
|
|
584
|
+
|
|
585
|
+
def find_runs_by_style(
|
|
586
|
+
self,
|
|
587
|
+
*,
|
|
588
|
+
text_color: str | None = None,
|
|
589
|
+
underline_type: str | None = None,
|
|
590
|
+
underline_color: str | None = None,
|
|
591
|
+
char_pr_id_ref: str | int | None = None,
|
|
592
|
+
) -> list[HwpxOxmlRun]:
|
|
593
|
+
"""Return runs matching the requested style criteria."""
|
|
594
|
+
|
|
595
|
+
matches: list[HwpxOxmlRun] = []
|
|
596
|
+
target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
|
|
597
|
+
|
|
598
|
+
for run in self.iter_runs():
|
|
599
|
+
if target_char is not None:
|
|
600
|
+
run_char = (run.char_pr_id_ref or "").strip()
|
|
601
|
+
if run_char != target_char:
|
|
602
|
+
continue
|
|
603
|
+
style = run.style
|
|
604
|
+
if text_color is not None:
|
|
605
|
+
if style is None or style.text_color() != text_color:
|
|
606
|
+
continue
|
|
607
|
+
if underline_type is not None:
|
|
608
|
+
if style is None or style.underline_type() != underline_type:
|
|
609
|
+
continue
|
|
610
|
+
if underline_color is not None:
|
|
611
|
+
if style is None or style.underline_color() != underline_color:
|
|
612
|
+
continue
|
|
613
|
+
matches.append(run)
|
|
614
|
+
return matches
|
|
615
|
+
|
|
616
|
+
def replace_text_in_runs(
|
|
617
|
+
self,
|
|
618
|
+
search: str,
|
|
619
|
+
replacement: str,
|
|
620
|
+
*,
|
|
621
|
+
text_color: str | None = None,
|
|
622
|
+
underline_type: str | None = None,
|
|
623
|
+
underline_color: str | None = None,
|
|
624
|
+
char_pr_id_ref: str | int | None = None,
|
|
625
|
+
limit: int | None = None,
|
|
626
|
+
) -> int:
|
|
627
|
+
"""Replace occurrences of *search* in runs matching the provided style filters."""
|
|
628
|
+
|
|
629
|
+
if not search:
|
|
630
|
+
raise ValueError("search must be a non-empty string")
|
|
631
|
+
|
|
632
|
+
replacements = 0
|
|
633
|
+
runs = self.find_runs_by_style(
|
|
634
|
+
text_color=text_color,
|
|
635
|
+
underline_type=underline_type,
|
|
636
|
+
underline_color=underline_color,
|
|
637
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
for run in runs:
|
|
641
|
+
remaining = None
|
|
642
|
+
if limit is not None:
|
|
643
|
+
remaining = limit - replacements
|
|
644
|
+
if remaining <= 0:
|
|
645
|
+
break
|
|
646
|
+
original_char_pr = run.char_pr_id_ref
|
|
647
|
+
replaced_here = run.replace_text(
|
|
648
|
+
search,
|
|
649
|
+
replacement,
|
|
650
|
+
count=remaining,
|
|
651
|
+
)
|
|
652
|
+
if replaced_here and original_char_pr is not None:
|
|
653
|
+
# Ensure the run retains its original formatting reference even
|
|
654
|
+
# if XML nodes were rewritten during substitution.
|
|
655
|
+
run.char_pr_id_ref = original_char_pr
|
|
656
|
+
replacements += replaced_here
|
|
657
|
+
if limit is not None and replacements >= limit:
|
|
658
|
+
break
|
|
659
|
+
return replacements
|
|
660
|
+
|
|
661
|
+
# ------------------------------------------------------------------
|
|
662
|
+
# editing helpers
|
|
663
|
+
def add_paragraph(
|
|
664
|
+
self,
|
|
665
|
+
text: str = "",
|
|
666
|
+
*,
|
|
667
|
+
section: HwpxOxmlSection | None = None,
|
|
668
|
+
section_index: int | None = None,
|
|
669
|
+
para_pr_id_ref: str | int | None = None,
|
|
670
|
+
style_id_ref: str | int | None = None,
|
|
671
|
+
char_pr_id_ref: str | int | None = None,
|
|
672
|
+
run_attributes: dict[str, str] | None = None,
|
|
673
|
+
include_run: bool = True,
|
|
674
|
+
inherit_style: bool = True,
|
|
675
|
+
**extra_attrs: str,
|
|
676
|
+
) -> HwpxOxmlParagraph:
|
|
677
|
+
"""Append a paragraph to the document and return it.
|
|
678
|
+
|
|
679
|
+
When *inherit_style* is ``True`` (the default) and no explicit
|
|
680
|
+
style references are given, the new paragraph inherits
|
|
681
|
+
``paraPrIDRef``, ``styleIDRef`` and ``charPrIDRef`` from the
|
|
682
|
+
last paragraph in the target section so that consecutive
|
|
683
|
+
paragraphs share the same formatting.
|
|
684
|
+
|
|
685
|
+
Formatting references may be overridden via ``para_pr_id_ref``,
|
|
686
|
+
``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
|
|
687
|
+
arguments are added as raw paragraph attributes.
|
|
688
|
+
"""
|
|
689
|
+
return self._root.add_paragraph(
|
|
690
|
+
text,
|
|
691
|
+
section=section,
|
|
692
|
+
section_index=section_index,
|
|
693
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
694
|
+
style_id_ref=style_id_ref,
|
|
695
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
696
|
+
run_attributes=run_attributes,
|
|
697
|
+
include_run=include_run,
|
|
698
|
+
inherit_style=inherit_style,
|
|
699
|
+
**extra_attrs,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
def add_table(
|
|
703
|
+
self,
|
|
704
|
+
rows: int,
|
|
705
|
+
cols: int,
|
|
706
|
+
*,
|
|
707
|
+
section: HwpxOxmlSection | None = None,
|
|
708
|
+
section_index: int | None = None,
|
|
709
|
+
width: int | None = None,
|
|
710
|
+
height: int | None = None,
|
|
711
|
+
border_fill_id_ref: str | int | None = None,
|
|
712
|
+
para_pr_id_ref: str | int | None = None,
|
|
713
|
+
style_id_ref: str | int | None = None,
|
|
714
|
+
char_pr_id_ref: str | int | None = None,
|
|
715
|
+
run_attributes: dict[str, str] | None = None,
|
|
716
|
+
**extra_attrs: str,
|
|
717
|
+
) -> HwpxOxmlTable:
|
|
718
|
+
"""Create a table in a new paragraph and return it."""
|
|
719
|
+
|
|
720
|
+
resolved_border_fill: str | int | None = border_fill_id_ref
|
|
721
|
+
if resolved_border_fill is None:
|
|
722
|
+
resolved_border_fill = self._root.ensure_basic_border_fill()
|
|
723
|
+
|
|
724
|
+
paragraph = self.add_paragraph(
|
|
725
|
+
"",
|
|
726
|
+
section=section,
|
|
727
|
+
section_index=section_index,
|
|
728
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
729
|
+
style_id_ref=style_id_ref,
|
|
730
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
731
|
+
include_run=False,
|
|
732
|
+
**extra_attrs,
|
|
733
|
+
)
|
|
734
|
+
return paragraph.add_table(
|
|
735
|
+
rows,
|
|
736
|
+
cols,
|
|
737
|
+
width=width,
|
|
738
|
+
height=height,
|
|
739
|
+
border_fill_id_ref=resolved_border_fill,
|
|
740
|
+
run_attributes=run_attributes,
|
|
741
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def add_shape(
|
|
745
|
+
self,
|
|
746
|
+
shape_type: str,
|
|
747
|
+
*,
|
|
748
|
+
section: HwpxOxmlSection | None = None,
|
|
749
|
+
section_index: int | None = None,
|
|
750
|
+
attributes: dict[str, str] | None = None,
|
|
751
|
+
para_pr_id_ref: str | int | None = None,
|
|
752
|
+
style_id_ref: str | int | None = None,
|
|
753
|
+
char_pr_id_ref: str | int | None = None,
|
|
754
|
+
run_attributes: dict[str, str] | None = None,
|
|
755
|
+
**extra_attrs: str,
|
|
756
|
+
) -> HwpxOxmlInlineObject:
|
|
757
|
+
"""Insert an inline shape into a new paragraph."""
|
|
758
|
+
|
|
759
|
+
paragraph = self.add_paragraph(
|
|
760
|
+
"",
|
|
761
|
+
section=section,
|
|
762
|
+
section_index=section_index,
|
|
763
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
764
|
+
style_id_ref=style_id_ref,
|
|
765
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
766
|
+
include_run=False,
|
|
767
|
+
**extra_attrs,
|
|
768
|
+
)
|
|
769
|
+
return paragraph.add_shape(
|
|
770
|
+
shape_type,
|
|
771
|
+
attributes=attributes,
|
|
772
|
+
run_attributes=run_attributes,
|
|
773
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
def add_control(
|
|
777
|
+
self,
|
|
778
|
+
*,
|
|
779
|
+
section: HwpxOxmlSection | None = None,
|
|
780
|
+
section_index: int | None = None,
|
|
781
|
+
attributes: dict[str, str] | None = None,
|
|
782
|
+
control_type: str | None = None,
|
|
783
|
+
para_pr_id_ref: str | int | None = None,
|
|
784
|
+
style_id_ref: str | int | None = None,
|
|
785
|
+
char_pr_id_ref: str | int | None = None,
|
|
786
|
+
run_attributes: dict[str, str] | None = None,
|
|
787
|
+
**extra_attrs: str,
|
|
788
|
+
) -> HwpxOxmlInlineObject:
|
|
789
|
+
"""Insert a control inline object into a new paragraph."""
|
|
790
|
+
|
|
791
|
+
paragraph = self.add_paragraph(
|
|
792
|
+
"",
|
|
793
|
+
section=section,
|
|
794
|
+
section_index=section_index,
|
|
795
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
796
|
+
style_id_ref=style_id_ref,
|
|
797
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
798
|
+
include_run=False,
|
|
799
|
+
**extra_attrs,
|
|
800
|
+
)
|
|
801
|
+
return paragraph.add_control(
|
|
802
|
+
attributes=attributes,
|
|
803
|
+
control_type=control_type,
|
|
804
|
+
run_attributes=run_attributes,
|
|
805
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
# ------------------------------------------------------------------
|
|
809
|
+
# Footnote / Endnote helpers
|
|
810
|
+
# ------------------------------------------------------------------
|
|
811
|
+
|
|
812
|
+
def add_footnote(
|
|
813
|
+
self,
|
|
814
|
+
text: str,
|
|
815
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
816
|
+
*,
|
|
817
|
+
section: HwpxOxmlSection | None = None,
|
|
818
|
+
section_index: int | None = None,
|
|
819
|
+
char_pr_id_ref: str | int | None = None,
|
|
820
|
+
) -> HwpxOxmlNote:
|
|
821
|
+
"""Add a footnote to an existing paragraph, or create a new one.
|
|
822
|
+
|
|
823
|
+
When *paragraph* is ``None`` a new paragraph is appended to the given
|
|
824
|
+
(or last) section.
|
|
825
|
+
"""
|
|
826
|
+
|
|
827
|
+
if paragraph is None:
|
|
828
|
+
paragraph = self.add_paragraph(
|
|
829
|
+
"",
|
|
830
|
+
section=section,
|
|
831
|
+
section_index=section_index,
|
|
832
|
+
include_run=False,
|
|
833
|
+
)
|
|
834
|
+
return paragraph.add_footnote(text, char_pr_id_ref=char_pr_id_ref)
|
|
835
|
+
|
|
836
|
+
def add_endnote(
|
|
837
|
+
self,
|
|
838
|
+
text: str,
|
|
839
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
840
|
+
*,
|
|
841
|
+
section: HwpxOxmlSection | None = None,
|
|
842
|
+
section_index: int | None = None,
|
|
843
|
+
char_pr_id_ref: str | int | None = None,
|
|
844
|
+
) -> HwpxOxmlNote:
|
|
845
|
+
"""Add an endnote to an existing paragraph, or create a new one."""
|
|
846
|
+
|
|
847
|
+
if paragraph is None:
|
|
848
|
+
paragraph = self.add_paragraph(
|
|
849
|
+
"",
|
|
850
|
+
section=section,
|
|
851
|
+
section_index=section_index,
|
|
852
|
+
include_run=False,
|
|
853
|
+
)
|
|
854
|
+
return paragraph.add_endnote(text, char_pr_id_ref=char_pr_id_ref)
|
|
855
|
+
|
|
856
|
+
# ------------------------------------------------------------------
|
|
857
|
+
# Drawing shapes
|
|
858
|
+
# ------------------------------------------------------------------
|
|
859
|
+
|
|
860
|
+
def add_line(
|
|
861
|
+
self,
|
|
862
|
+
start_x: int = 0,
|
|
863
|
+
start_y: int = 0,
|
|
864
|
+
end_x: int = 14400,
|
|
865
|
+
end_y: int = 0,
|
|
866
|
+
*,
|
|
867
|
+
line_color: str = "#000000",
|
|
868
|
+
line_width: str = "283",
|
|
869
|
+
treat_as_char: bool = True,
|
|
870
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
871
|
+
section: HwpxOxmlSection | None = None,
|
|
872
|
+
section_index: int | None = None,
|
|
873
|
+
) -> HwpxOxmlShape:
|
|
874
|
+
"""Insert a line drawing shape.
|
|
875
|
+
|
|
876
|
+
Coordinates are in HWPUNIT (7200 per inch).
|
|
877
|
+
"""
|
|
878
|
+
if paragraph is None:
|
|
879
|
+
paragraph = self.add_paragraph(
|
|
880
|
+
"", section=section, section_index=section_index,
|
|
881
|
+
include_run=False,
|
|
882
|
+
)
|
|
883
|
+
return paragraph.add_line(
|
|
884
|
+
start_x, start_y, end_x, end_y,
|
|
885
|
+
line_color=line_color, line_width=line_width,
|
|
886
|
+
treat_as_char=treat_as_char,
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
def add_rectangle(
|
|
890
|
+
self,
|
|
891
|
+
width: int = 14400,
|
|
892
|
+
height: int = 7200,
|
|
893
|
+
*,
|
|
894
|
+
ratio: int = 0,
|
|
895
|
+
line_color: str = "#000000",
|
|
896
|
+
line_width: str = "283",
|
|
897
|
+
fill_color: str | None = None,
|
|
898
|
+
treat_as_char: bool = True,
|
|
899
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
900
|
+
section: HwpxOxmlSection | None = None,
|
|
901
|
+
section_index: int | None = None,
|
|
902
|
+
) -> HwpxOxmlShape:
|
|
903
|
+
"""Insert a rectangle drawing shape.
|
|
904
|
+
|
|
905
|
+
Dimensions are in HWPUNIT. *ratio* controls corner roundness
|
|
906
|
+
(0 = sharp, 50 = semicircle).
|
|
907
|
+
"""
|
|
908
|
+
if paragraph is None:
|
|
909
|
+
paragraph = self.add_paragraph(
|
|
910
|
+
"", section=section, section_index=section_index,
|
|
911
|
+
include_run=False,
|
|
912
|
+
)
|
|
913
|
+
return paragraph.add_rectangle(
|
|
914
|
+
width, height, ratio=ratio,
|
|
915
|
+
line_color=line_color, line_width=line_width,
|
|
916
|
+
fill_color=fill_color, treat_as_char=treat_as_char,
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
def add_ellipse(
|
|
920
|
+
self,
|
|
921
|
+
width: int = 14400,
|
|
922
|
+
height: int = 7200,
|
|
923
|
+
*,
|
|
924
|
+
line_color: str = "#000000",
|
|
925
|
+
line_width: str = "283",
|
|
926
|
+
fill_color: str | None = None,
|
|
927
|
+
treat_as_char: bool = True,
|
|
928
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
929
|
+
section: HwpxOxmlSection | None = None,
|
|
930
|
+
section_index: int | None = None,
|
|
931
|
+
) -> HwpxOxmlShape:
|
|
932
|
+
"""Insert an ellipse drawing shape.
|
|
933
|
+
|
|
934
|
+
Dimensions are in HWPUNIT.
|
|
935
|
+
"""
|
|
936
|
+
if paragraph is None:
|
|
937
|
+
paragraph = self.add_paragraph(
|
|
938
|
+
"", section=section, section_index=section_index,
|
|
939
|
+
include_run=False,
|
|
940
|
+
)
|
|
941
|
+
return paragraph.add_ellipse(
|
|
942
|
+
width, height,
|
|
943
|
+
line_color=line_color, line_width=line_width,
|
|
944
|
+
fill_color=fill_color, treat_as_char=treat_as_char,
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
# ------------------------------------------------------------------
|
|
948
|
+
# Column layout
|
|
949
|
+
# ------------------------------------------------------------------
|
|
950
|
+
|
|
951
|
+
def set_columns(
|
|
952
|
+
self,
|
|
953
|
+
col_count: int = 2,
|
|
954
|
+
*,
|
|
955
|
+
col_type: str = "NEWSPAPER",
|
|
956
|
+
layout: str = "LEFT",
|
|
957
|
+
same_size: bool = True,
|
|
958
|
+
same_gap: int = 1200,
|
|
959
|
+
column_widths: "Sequence[tuple[int, int]] | None" = None,
|
|
960
|
+
separator_type: str | None = None,
|
|
961
|
+
separator_width: str | None = None,
|
|
962
|
+
separator_color: str | None = None,
|
|
963
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
964
|
+
section: HwpxOxmlSection | None = None,
|
|
965
|
+
section_index: int | None = None,
|
|
966
|
+
) -> HwpxOxmlInlineObject:
|
|
967
|
+
"""Insert a column definition control.
|
|
968
|
+
|
|
969
|
+
This adds a ``<hp:ctrl><hp:colPr>`` element to the specified paragraph.
|
|
970
|
+
Text that follows will be laid out in the specified number of columns.
|
|
971
|
+
|
|
972
|
+
Args:
|
|
973
|
+
col_count: Number of columns (1–255).
|
|
974
|
+
col_type: ``NEWSPAPER``, ``BALANCED_NEWSPAPER``, or ``PARALLEL``.
|
|
975
|
+
same_gap: Gap in HWPUNIT (7200 = 1 inch).
|
|
976
|
+
separator_type: Optional column separator line type (e.g. ``SOLID``).
|
|
977
|
+
"""
|
|
978
|
+
if paragraph is None:
|
|
979
|
+
paragraph = self.add_paragraph(
|
|
980
|
+
"", section=section, section_index=section_index,
|
|
981
|
+
include_run=False,
|
|
982
|
+
)
|
|
983
|
+
return paragraph.add_column_definition(
|
|
984
|
+
col_count,
|
|
985
|
+
col_type=col_type,
|
|
986
|
+
layout=layout,
|
|
987
|
+
same_size=same_size,
|
|
988
|
+
same_gap=same_gap,
|
|
989
|
+
column_widths=column_widths,
|
|
990
|
+
separator_type=separator_type,
|
|
991
|
+
separator_width=separator_width,
|
|
992
|
+
separator_color=separator_color,
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
# ------------------------------------------------------------------
|
|
996
|
+
# Bookmarks and hyperlinks
|
|
997
|
+
# ------------------------------------------------------------------
|
|
998
|
+
|
|
999
|
+
def add_bookmark(
|
|
1000
|
+
self,
|
|
1001
|
+
name: str,
|
|
1002
|
+
*,
|
|
1003
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
1004
|
+
section: HwpxOxmlSection | None = None,
|
|
1005
|
+
section_index: int | None = None,
|
|
1006
|
+
) -> HwpxOxmlInlineObject:
|
|
1007
|
+
"""Insert a bookmark marker in the document.
|
|
1008
|
+
|
|
1009
|
+
Returns the ``<hp:ctrl>`` wrapper element.
|
|
1010
|
+
"""
|
|
1011
|
+
if paragraph is None:
|
|
1012
|
+
paragraph = self.add_paragraph(
|
|
1013
|
+
"", section=section, section_index=section_index,
|
|
1014
|
+
include_run=False,
|
|
1015
|
+
)
|
|
1016
|
+
return paragraph.add_bookmark(name)
|
|
1017
|
+
|
|
1018
|
+
def add_hyperlink(
|
|
1019
|
+
self,
|
|
1020
|
+
url: str,
|
|
1021
|
+
display_text: str,
|
|
1022
|
+
*,
|
|
1023
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
1024
|
+
section: HwpxOxmlSection | None = None,
|
|
1025
|
+
section_index: int | None = None,
|
|
1026
|
+
) -> HwpxOxmlInlineObject:
|
|
1027
|
+
"""Insert a hyperlink (fieldBegin + text + fieldEnd).
|
|
1028
|
+
|
|
1029
|
+
Returns the ``<hp:ctrl>`` wrapper containing the ``<hp:fieldBegin>``.
|
|
1030
|
+
"""
|
|
1031
|
+
if paragraph is None:
|
|
1032
|
+
paragraph = self.add_paragraph(
|
|
1033
|
+
"", section=section, section_index=section_index,
|
|
1034
|
+
include_run=False,
|
|
1035
|
+
)
|
|
1036
|
+
return paragraph.add_hyperlink(url, display_text)
|
|
1037
|
+
|
|
1038
|
+
def set_header_text(
|
|
1039
|
+
self,
|
|
1040
|
+
text: str,
|
|
1041
|
+
*,
|
|
1042
|
+
section: HwpxOxmlSection | None = None,
|
|
1043
|
+
section_index: int | None = None,
|
|
1044
|
+
page_type: str = "BOTH",
|
|
1045
|
+
) -> HwpxOxmlSectionHeaderFooter:
|
|
1046
|
+
"""Ensure the requested section contains a header for *page_type* and set its text."""
|
|
1047
|
+
|
|
1048
|
+
target_section = section
|
|
1049
|
+
if target_section is None and section_index is not None:
|
|
1050
|
+
target_section = self._root.sections[section_index]
|
|
1051
|
+
if target_section is None:
|
|
1052
|
+
if not self._root.sections:
|
|
1053
|
+
raise ValueError("document does not contain any sections")
|
|
1054
|
+
target_section = self._root.sections[-1]
|
|
1055
|
+
return target_section.properties.set_header_text(text, page_type=page_type)
|
|
1056
|
+
|
|
1057
|
+
def set_footer_text(
|
|
1058
|
+
self,
|
|
1059
|
+
text: str,
|
|
1060
|
+
*,
|
|
1061
|
+
section: HwpxOxmlSection | None = None,
|
|
1062
|
+
section_index: int | None = None,
|
|
1063
|
+
page_type: str = "BOTH",
|
|
1064
|
+
) -> HwpxOxmlSectionHeaderFooter:
|
|
1065
|
+
"""Ensure the requested section contains a footer for *page_type* and set its text."""
|
|
1066
|
+
|
|
1067
|
+
target_section = section
|
|
1068
|
+
if target_section is None and section_index is not None:
|
|
1069
|
+
target_section = self._root.sections[section_index]
|
|
1070
|
+
if target_section is None:
|
|
1071
|
+
if not self._root.sections:
|
|
1072
|
+
raise ValueError("document does not contain any sections")
|
|
1073
|
+
target_section = self._root.sections[-1]
|
|
1074
|
+
return target_section.properties.set_footer_text(text, page_type=page_type)
|
|
1075
|
+
|
|
1076
|
+
def remove_header(
|
|
1077
|
+
self,
|
|
1078
|
+
*,
|
|
1079
|
+
section: HwpxOxmlSection | None = None,
|
|
1080
|
+
section_index: int | None = None,
|
|
1081
|
+
page_type: str = "BOTH",
|
|
1082
|
+
) -> None:
|
|
1083
|
+
"""Remove the header linked to *page_type* from the requested section if present."""
|
|
1084
|
+
|
|
1085
|
+
target_section = section
|
|
1086
|
+
if target_section is None and section_index is not None:
|
|
1087
|
+
target_section = self._root.sections[section_index]
|
|
1088
|
+
if target_section is None:
|
|
1089
|
+
if not self._root.sections:
|
|
1090
|
+
return
|
|
1091
|
+
target_section = self._root.sections[-1]
|
|
1092
|
+
target_section.properties.remove_header(page_type=page_type)
|
|
1093
|
+
|
|
1094
|
+
def remove_footer(
|
|
1095
|
+
self,
|
|
1096
|
+
*,
|
|
1097
|
+
section: HwpxOxmlSection | None = None,
|
|
1098
|
+
section_index: int | None = None,
|
|
1099
|
+
page_type: str = "BOTH",
|
|
1100
|
+
) -> None:
|
|
1101
|
+
"""Remove the footer linked to *page_type* from the requested section if present."""
|
|
1102
|
+
|
|
1103
|
+
target_section = section
|
|
1104
|
+
if target_section is None and section_index is not None:
|
|
1105
|
+
target_section = self._root.sections[section_index]
|
|
1106
|
+
if target_section is None:
|
|
1107
|
+
if not self._root.sections:
|
|
1108
|
+
return
|
|
1109
|
+
target_section = self._root.sections[-1]
|
|
1110
|
+
target_section.properties.remove_footer(page_type=page_type)
|
|
1111
|
+
|
|
1112
|
+
# ------------------------------------------------------------------
|
|
1113
|
+
# BinData / Image management
|
|
1114
|
+
# ------------------------------------------------------------------
|
|
1115
|
+
|
|
1116
|
+
_FORMAT_TO_MEDIA_TYPE: dict[str, str] = {
|
|
1117
|
+
"jpg": "image/jpeg",
|
|
1118
|
+
"jpeg": "image/jpeg",
|
|
1119
|
+
"png": "image/png",
|
|
1120
|
+
"gif": "image/gif",
|
|
1121
|
+
"bmp": "image/bmp",
|
|
1122
|
+
"tiff": "image/tiff",
|
|
1123
|
+
"tif": "image/tiff",
|
|
1124
|
+
"svg": "image/svg+xml",
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
def add_image(
|
|
1128
|
+
self,
|
|
1129
|
+
image_data: bytes,
|
|
1130
|
+
image_format: str,
|
|
1131
|
+
*,
|
|
1132
|
+
item_id: str | None = None,
|
|
1133
|
+
) -> str:
|
|
1134
|
+
"""Embed an image file and return the manifest item id.
|
|
1135
|
+
|
|
1136
|
+
Args:
|
|
1137
|
+
image_data: Raw image bytes.
|
|
1138
|
+
image_format: Image format extension (``jpg``, ``png``, …).
|
|
1139
|
+
item_id: Optional explicit manifest item id. When omitted an
|
|
1140
|
+
auto-generated ``BIN####`` id is used.
|
|
1141
|
+
|
|
1142
|
+
Returns:
|
|
1143
|
+
The manifest item id that can be passed to
|
|
1144
|
+
``binaryItemIDRef`` when constructing a ``<hp:pic>`` element.
|
|
1145
|
+
"""
|
|
1146
|
+
|
|
1147
|
+
fmt = image_format.lower().lstrip(".")
|
|
1148
|
+
media_type = self._FORMAT_TO_MEDIA_TYPE.get(fmt, f"image/{fmt}")
|
|
1149
|
+
|
|
1150
|
+
# Determine a unique item id
|
|
1151
|
+
if item_id is None:
|
|
1152
|
+
existing_ids: set[str] = set()
|
|
1153
|
+
header = self._root.headers[0] if self._root.headers else None
|
|
1154
|
+
if header is not None:
|
|
1155
|
+
for bi in header.list_bin_items():
|
|
1156
|
+
existing_ids.add(bi.get("id", ""))
|
|
1157
|
+
n = len(existing_ids) + 1
|
|
1158
|
+
while True:
|
|
1159
|
+
item_id = f"BIN{n:04d}"
|
|
1160
|
+
if item_id not in existing_ids:
|
|
1161
|
+
break
|
|
1162
|
+
n += 1
|
|
1163
|
+
|
|
1164
|
+
# File path inside the ZIP
|
|
1165
|
+
bin_data_name = f"{item_id}.{fmt}"
|
|
1166
|
+
bin_data_path = f"BinData/{bin_data_name}"
|
|
1167
|
+
|
|
1168
|
+
# 1) Write image bytes into the package
|
|
1169
|
+
self._package.write(bin_data_path, image_data)
|
|
1170
|
+
|
|
1171
|
+
# 2) Register in manifest
|
|
1172
|
+
self._package.add_manifest_item(item_id, bin_data_path, media_type)
|
|
1173
|
+
|
|
1174
|
+
# 3) Register in header binDataList
|
|
1175
|
+
header = self._root.headers[0] if self._root.headers else None
|
|
1176
|
+
if header is not None:
|
|
1177
|
+
header.add_bin_item(
|
|
1178
|
+
item_type="Embedding",
|
|
1179
|
+
bin_data_id=bin_data_name,
|
|
1180
|
+
format=fmt,
|
|
1181
|
+
)
|
|
1182
|
+
|
|
1183
|
+
return item_id
|
|
1184
|
+
|
|
1185
|
+
def list_images(self) -> list[dict[str, str]]:
|
|
1186
|
+
"""Return metadata dicts for all embedded binary data items.
|
|
1187
|
+
|
|
1188
|
+
Each dict contains the ``<hh:binItem>`` attributes (``id``, ``Type``,
|
|
1189
|
+
``BinData``, ``Format``, …).
|
|
1190
|
+
"""
|
|
1191
|
+
|
|
1192
|
+
header = self._root.headers[0] if self._root.headers else None
|
|
1193
|
+
if header is None:
|
|
1194
|
+
return []
|
|
1195
|
+
return header.list_bin_items()
|
|
1196
|
+
|
|
1197
|
+
def remove_image(self, item_id: str) -> bool:
|
|
1198
|
+
"""Remove an embedded image by its manifest item id.
|
|
1199
|
+
|
|
1200
|
+
This removes the binary data from the ZIP, the manifest entry, and
|
|
1201
|
+
the header binItem entry.
|
|
1202
|
+
|
|
1203
|
+
Returns:
|
|
1204
|
+
``True`` if any component was removed.
|
|
1205
|
+
"""
|
|
1206
|
+
|
|
1207
|
+
removed = False
|
|
1208
|
+
header = self._root.headers[0] if self._root.headers else None
|
|
1209
|
+
|
|
1210
|
+
# Find file path and binItem numeric id from header metadata
|
|
1211
|
+
bin_data_path: str | None = None
|
|
1212
|
+
bin_item_numeric_id: str | None = None
|
|
1213
|
+
if header is not None:
|
|
1214
|
+
for bi in header.list_bin_items():
|
|
1215
|
+
bin_data_val = bi.get("BinData", "")
|
|
1216
|
+
# Match by data file name prefix (e.g. "BIN0001" matches "BIN0001.jpg")
|
|
1217
|
+
if bin_data_val.startswith(item_id):
|
|
1218
|
+
bin_item_numeric_id = bi.get("id")
|
|
1219
|
+
if bin_data_val:
|
|
1220
|
+
bin_data_path = f"BinData/{bin_data_val}"
|
|
1221
|
+
break
|
|
1222
|
+
|
|
1223
|
+
# Also try manifest-based lookup for the file path
|
|
1224
|
+
if bin_data_path is None:
|
|
1225
|
+
manifest_el = self._package._manifest_element()
|
|
1226
|
+
if manifest_el is not None:
|
|
1227
|
+
ns = {"opf": "http://www.idpf.org/2007/opf/"}
|
|
1228
|
+
for it in manifest_el.findall("opf:item", ns):
|
|
1229
|
+
if it.get("id") == item_id:
|
|
1230
|
+
href = it.get("href", "")
|
|
1231
|
+
if href:
|
|
1232
|
+
bin_data_path = href
|
|
1233
|
+
break
|
|
1234
|
+
|
|
1235
|
+
# Remove from header binDataList (use the numeric id)
|
|
1236
|
+
if header is not None and bin_item_numeric_id is not None:
|
|
1237
|
+
if header.remove_bin_item(bin_item_numeric_id):
|
|
1238
|
+
removed = True
|
|
1239
|
+
|
|
1240
|
+
# Remove from manifest
|
|
1241
|
+
if self._package.remove_manifest_item(item_id):
|
|
1242
|
+
removed = True
|
|
1243
|
+
|
|
1244
|
+
# Remove from ZIP
|
|
1245
|
+
if bin_data_path and self._package.has_part(bin_data_path):
|
|
1246
|
+
self._package.delete(bin_data_path)
|
|
1247
|
+
removed = True
|
|
1248
|
+
|
|
1249
|
+
return removed
|
|
1250
|
+
|
|
1251
|
+
# ------------------------------------------------------------------
|
|
1252
|
+
# Export helpers
|
|
1253
|
+
# ------------------------------------------------------------------
|
|
1254
|
+
|
|
1255
|
+
def export_text(self, **kwargs: object) -> str:
|
|
1256
|
+
"""Export content as plain text. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_text`."""
|
|
1257
|
+
from .tools.exporter import export_text
|
|
1258
|
+
return export_text(self, **kwargs) # type: ignore[arg-type]
|
|
1259
|
+
|
|
1260
|
+
def export_html(self, **kwargs: object) -> str:
|
|
1261
|
+
"""Export content as HTML. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_html`."""
|
|
1262
|
+
from .tools.exporter import export_html
|
|
1263
|
+
return export_html(self, **kwargs) # type: ignore[arg-type]
|
|
1264
|
+
|
|
1265
|
+
def export_markdown(self, **kwargs: object) -> str:
|
|
1266
|
+
"""Export content as Markdown. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_markdown`."""
|
|
1267
|
+
from .tools.exporter import export_markdown
|
|
1268
|
+
return export_markdown(self, **kwargs) # type: ignore[arg-type]
|
|
1269
|
+
|
|
1270
|
+
# ------------------------------------------------------------------
|
|
1271
|
+
# Validation
|
|
1272
|
+
# ------------------------------------------------------------------
|
|
1273
|
+
|
|
1274
|
+
def validate(self) -> "ValidationReport":
|
|
1275
|
+
"""Run XML schema validation on the current document state.
|
|
1276
|
+
|
|
1277
|
+
Returns a :class:`~hwpx.tools.validator.ValidationReport` with
|
|
1278
|
+
any issues found. This does **not** require ``validate_on_save``
|
|
1279
|
+
to be enabled.
|
|
1280
|
+
"""
|
|
1281
|
+
from .tools.validator import validate_document
|
|
1282
|
+
|
|
1283
|
+
return validate_document(self._to_bytes_raw())
|
|
1284
|
+
|
|
1285
|
+
def _run_pre_save_validation(self) -> None:
|
|
1286
|
+
"""Raise if validate_on_save is enabled and the document is invalid."""
|
|
1287
|
+
if not self.validate_on_save:
|
|
1288
|
+
return
|
|
1289
|
+
report = self.validate()
|
|
1290
|
+
if not report.ok:
|
|
1291
|
+
msgs = "; ".join(str(i) for i in report.issues[:5])
|
|
1292
|
+
remaining = len(report.issues) - 5
|
|
1293
|
+
if remaining > 0:
|
|
1294
|
+
msgs += f" … and {remaining} more"
|
|
1295
|
+
raise ValueError(f"Document validation failed: {msgs}")
|
|
1296
|
+
|
|
1297
|
+
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
1298
|
+
"""Persist pending changes to *path* and return the same path."""
|
|
1299
|
+
|
|
1300
|
+
self._run_pre_save_validation()
|
|
1301
|
+
updates = self._root.serialize()
|
|
1302
|
+
result = self._package.save(path, updates)
|
|
1303
|
+
self._root.reset_dirty()
|
|
1304
|
+
return path if result is None else result
|
|
1305
|
+
|
|
1306
|
+
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
1307
|
+
"""Persist pending changes to *stream* and return the same stream."""
|
|
1308
|
+
|
|
1309
|
+
self._run_pre_save_validation()
|
|
1310
|
+
updates = self._root.serialize()
|
|
1311
|
+
result = self._package.save(stream, updates)
|
|
1312
|
+
self._root.reset_dirty()
|
|
1313
|
+
return stream if result is None else result
|
|
1314
|
+
|
|
1315
|
+
def to_bytes(self) -> bytes:
|
|
1316
|
+
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
1317
|
+
|
|
1318
|
+
self._run_pre_save_validation()
|
|
1319
|
+
return self._to_bytes_raw()
|
|
1320
|
+
|
|
1321
|
+
def _to_bytes_raw(self) -> bytes:
|
|
1322
|
+
"""Serialize without validation (used by :meth:`validate`)."""
|
|
1323
|
+
updates = self._root.serialize()
|
|
1324
|
+
result = self._package.save(None, updates)
|
|
1325
|
+
self._root.reset_dirty()
|
|
1326
|
+
if isinstance(result, bytes):
|
|
1327
|
+
return result
|
|
1328
|
+
raise TypeError("package.save(None) must return bytes")
|
|
1329
|
+
|
|
1330
|
+
@overload
|
|
1331
|
+
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
1332
|
+
|
|
1333
|
+
@overload
|
|
1334
|
+
def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
|
|
1335
|
+
|
|
1336
|
+
@overload
|
|
1337
|
+
def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
|
|
1338
|
+
|
|
1339
|
+
def save(
|
|
1340
|
+
self,
|
|
1341
|
+
path_or_stream: str | PathLike[str] | BinaryIO | None = None,
|
|
1342
|
+
) -> str | PathLike[str] | BinaryIO | bytes:
|
|
1343
|
+
"""Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
|
|
1344
|
+
|
|
1345
|
+
Deprecated:
|
|
1346
|
+
``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
|
|
1347
|
+
- 경로 저장: ``save_to_path(path)``
|
|
1348
|
+
- 스트림 저장: ``save_to_stream(stream)``
|
|
1349
|
+
- 바이트 반환: ``to_bytes()``
|
|
1350
|
+
"""
|
|
1351
|
+
|
|
1352
|
+
warnings.warn(
|
|
1353
|
+
"HwpxDocument.save()는 deprecated 예정입니다. "
|
|
1354
|
+
"save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
|
|
1355
|
+
DeprecationWarning,
|
|
1356
|
+
stacklevel=2,
|
|
1357
|
+
)
|
|
1358
|
+
if path_or_stream is None:
|
|
1359
|
+
return self.to_bytes()
|
|
1360
|
+
if isinstance(path_or_stream, (str, PathLike)):
|
|
1361
|
+
return self.save_to_path(path_or_stream)
|
|
1362
|
+
return self.save_to_stream(path_or_stream)
|