pyhwpxlib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. pyhwpxlib-0.1.0/LICENSE-APACHE +84 -0
  2. pyhwpxlib-0.1.0/LICENSE.md +66 -0
  3. pyhwpxlib-0.1.0/NOTICE +21 -0
  4. pyhwpxlib-0.1.0/PKG-INFO +339 -0
  5. pyhwpxlib-0.1.0/README.md +302 -0
  6. pyhwpxlib-0.1.0/pyhwpxlib/__init__.py +10 -0
  7. pyhwpxlib-0.1.0/pyhwpxlib/__main__.py +4 -0
  8. pyhwpxlib-0.1.0/pyhwpxlib/api.py +2493 -0
  9. pyhwpxlib-0.1.0/pyhwpxlib/base.py +127 -0
  10. pyhwpxlib-0.1.0/pyhwpxlib/builder.py +622 -0
  11. pyhwpxlib-0.1.0/pyhwpxlib/cli.py +331 -0
  12. pyhwpxlib-0.1.0/pyhwpxlib/constants/__init__.py +0 -0
  13. pyhwpxlib-0.1.0/pyhwpxlib/constants/attribute_names.py +449 -0
  14. pyhwpxlib-0.1.0/pyhwpxlib/constants/default_values.py +1 -0
  15. pyhwpxlib-0.1.0/pyhwpxlib/constants/element_names.py +357 -0
  16. pyhwpxlib-0.1.0/pyhwpxlib/constants/mime_types.py +6 -0
  17. pyhwpxlib-0.1.0/pyhwpxlib/constants/namespaces.py +44 -0
  18. pyhwpxlib-0.1.0/pyhwpxlib/constants/zip_entry_names.py +4 -0
  19. pyhwpxlib-0.1.0/pyhwpxlib/converter.py +613 -0
  20. pyhwpxlib-0.1.0/pyhwpxlib/html_converter.py +804 -0
  21. pyhwpxlib-0.1.0/pyhwpxlib/html_to_hwpx.py +1032 -0
  22. pyhwpxlib-0.1.0/pyhwpxlib/hwp2hwpx.py +2633 -0
  23. pyhwpxlib-0.1.0/pyhwpxlib/hwp_reader.py +1904 -0
  24. pyhwpxlib-0.1.0/pyhwpxlib/hwpx_file.py +128 -0
  25. pyhwpxlib-0.1.0/pyhwpxlib/object_type.py +370 -0
  26. pyhwpxlib-0.1.0/pyhwpxlib/objects/__init__.py +0 -0
  27. pyhwpxlib-0.1.0/pyhwpxlib/objects/chart.py +28 -0
  28. pyhwpxlib-0.1.0/pyhwpxlib/objects/common/__init__.py +0 -0
  29. pyhwpxlib-0.1.0/pyhwpxlib/objects/common/base_objects.py +334 -0
  30. pyhwpxlib-0.1.0/pyhwpxlib/objects/common/compatibility.py +146 -0
  31. pyhwpxlib-0.1.0/pyhwpxlib/objects/common/parameters.py +314 -0
  32. pyhwpxlib-0.1.0/pyhwpxlib/objects/content_hpf/__init__.py +0 -0
  33. pyhwpxlib-0.1.0/pyhwpxlib/objects/content_hpf/content_hpf.py +258 -0
  34. pyhwpxlib-0.1.0/pyhwpxlib/objects/etc.py +21 -0
  35. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/__init__.py +0 -0
  36. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/enum_types.py +640 -0
  37. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/header_xml_file.py +422 -0
  38. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/__init__.py +0 -0
  39. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/border_fill.py +351 -0
  40. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/char_pr.py +316 -0
  41. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/fontface.py +229 -0
  42. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/memo_pr.py +33 -0
  43. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/numbering.py +136 -0
  44. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/para_pr.py +283 -0
  45. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/style.py +34 -0
  46. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/tab_pr.py +92 -0
  47. pyhwpxlib-0.1.0/pyhwpxlib/objects/header/references/track_change.py +49 -0
  48. pyhwpxlib-0.1.0/pyhwpxlib/objects/masterpage/__init__.py +0 -0
  49. pyhwpxlib-0.1.0/pyhwpxlib/objects/masterpage/masterpage.py +77 -0
  50. pyhwpxlib-0.1.0/pyhwpxlib/objects/metainf/__init__.py +0 -0
  51. pyhwpxlib-0.1.0/pyhwpxlib/objects/metainf/container.py +83 -0
  52. pyhwpxlib-0.1.0/pyhwpxlib/objects/metainf/manifest.py +214 -0
  53. pyhwpxlib-0.1.0/pyhwpxlib/objects/root/__init__.py +0 -0
  54. pyhwpxlib-0.1.0/pyhwpxlib/objects/root/settings.py +145 -0
  55. pyhwpxlib-0.1.0/pyhwpxlib/objects/root/version.py +80 -0
  56. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/__init__.py +1 -0
  57. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/ctrl.py +411 -0
  58. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/enum_types.py +544 -0
  59. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/__init__.py +1 -0
  60. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/connect_line.py +72 -0
  61. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/drawing_object.py +371 -0
  62. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/equation.py +33 -0
  63. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/form_objects.py +230 -0
  64. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/ole.py +167 -0
  65. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/picture.py +290 -0
  66. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/shapes.py +231 -0
  67. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/table.py +259 -0
  68. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/objects/text_art.py +76 -0
  69. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/paragraph.py +820 -0
  70. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/sec_pr.py +488 -0
  71. pyhwpxlib-0.1.0/pyhwpxlib/objects/section/section_xml_file.py +96 -0
  72. pyhwpxlib-0.1.0/pyhwpxlib/reader.py +432 -0
  73. pyhwpxlib-0.1.0/pyhwpxlib/style_manager.py +986 -0
  74. pyhwpxlib-0.1.0/pyhwpxlib/tools/Skeleton.hwpx +0 -0
  75. pyhwpxlib-0.1.0/pyhwpxlib/tools/__init__.py +0 -0
  76. pyhwpxlib-0.1.0/pyhwpxlib/tools/_reference_header.xml +1 -0
  77. pyhwpxlib-0.1.0/pyhwpxlib/tools/blank.hwpx +0 -0
  78. pyhwpxlib-0.1.0/pyhwpxlib/tools/blank_file_maker.py +921 -0
  79. pyhwpxlib-0.1.0/pyhwpxlib/value_convertor.py +505 -0
  80. pyhwpxlib-0.1.0/pyhwpxlib/writer/__init__.py +0 -0
  81. pyhwpxlib-0.1.0/pyhwpxlib/writer/container_writer.py +37 -0
  82. pyhwpxlib-0.1.0/pyhwpxlib/writer/content_hpf_writer.py +136 -0
  83. pyhwpxlib-0.1.0/pyhwpxlib/writer/header/__init__.py +0 -0
  84. pyhwpxlib-0.1.0/pyhwpxlib/writer/header/header_writer.py +783 -0
  85. pyhwpxlib-0.1.0/pyhwpxlib/writer/hwpx_writer.py +108 -0
  86. pyhwpxlib-0.1.0/pyhwpxlib/writer/manifest_writer.py +67 -0
  87. pyhwpxlib-0.1.0/pyhwpxlib/writer/masterpage_writer.py +35 -0
  88. pyhwpxlib-0.1.0/pyhwpxlib/writer/section/__init__.py +0 -0
  89. pyhwpxlib-0.1.0/pyhwpxlib/writer/section/section_writer.py +1355 -0
  90. pyhwpxlib-0.1.0/pyhwpxlib/writer/settings_writer.py +56 -0
  91. pyhwpxlib-0.1.0/pyhwpxlib/writer/shape_writer.py +1781 -0
  92. pyhwpxlib-0.1.0/pyhwpxlib/writer/version_writer.py +33 -0
  93. pyhwpxlib-0.1.0/pyhwpxlib/writer/xml_builder.py +241 -0
  94. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/PKG-INFO +339 -0
  95. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/SOURCES.txt +113 -0
  96. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/dependency_links.txt +1 -0
  97. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/entry_points.txt +2 -0
  98. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/requires.txt +14 -0
  99. pyhwpxlib-0.1.0/pyhwpxlib.egg-info/top_level.txt +1 -0
  100. pyhwpxlib-0.1.0/pyproject.toml +47 -0
  101. pyhwpxlib-0.1.0/setup.cfg +4 -0
  102. pyhwpxlib-0.1.0/tests/test_api_core.py +244 -0
  103. pyhwpxlib-0.1.0/tests/test_api_extended.py +407 -0
  104. pyhwpxlib-0.1.0/tests/test_api_server.py +115 -0
  105. pyhwpxlib-0.1.0/tests/test_api_shapes.py +214 -0
  106. pyhwpxlib-0.1.0/tests/test_converter.py +83 -0
  107. pyhwpxlib-0.1.0/tests/test_form_pipeline.py +102 -0
  108. pyhwpxlib-0.1.0/tests/test_form_pipeline_multirun.py +185 -0
  109. pyhwpxlib-0.1.0/tests/test_html_converters.py +105 -0
  110. pyhwpxlib-0.1.0/tests/test_hwpx_builder.py +216 -0
  111. pyhwpxlib-0.1.0/tests/test_object_model.py +345 -0
  112. pyhwpxlib-0.1.0/tests/test_refactor.py +197 -0
  113. pyhwpxlib-0.1.0/tests/test_stability.py +169 -0
  114. pyhwpxlib-0.1.0/tests/test_style_manager.py +209 -0
  115. pyhwpxlib-0.1.0/tests/test_writer_utils.py +160 -0
@@ -0,0 +1,84 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity.
18
+
19
+ "You" (or "Your") shall mean an individual or Legal Entity
20
+ exercising permissions granted by this License.
21
+
22
+ "Source" form shall mean the preferred form for making modifications.
23
+
24
+ "Object" form shall mean any form resulting from mechanical
25
+ transformation or translation of a Source form.
26
+
27
+ "Work" shall mean the work of authorship made available under
28
+ the License.
29
+
30
+ "Derivative Works" shall mean any work that is based on the Work.
31
+
32
+ "Contribution" shall mean any work of authorship submitted to the
33
+ Licensor for inclusion in the Work.
34
+
35
+ "Contributor" shall mean Licensor and any Legal Entity on behalf of
36
+ whom a Contribution has been received by the Licensor.
37
+
38
+ 2. Grant of Copyright License. Subject to the terms and conditions of
39
+ this License, each Contributor hereby grants to You a perpetual,
40
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
41
+ copyright license to reproduce, prepare Derivative Works of,
42
+ publicly display, publicly perform, sublicense, and distribute the
43
+ Work and such Derivative Works in Source or Object form.
44
+
45
+ 3. Grant of Patent License. Subject to the terms and conditions of
46
+ this License, each Contributor hereby grants to You a perpetual,
47
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
48
+ patent license to make, have made, use, offer to sell, sell,
49
+ import, and otherwise transfer the Work.
50
+
51
+ 4. Redistribution. You may reproduce and distribute copies of the
52
+ Work or Derivative Works thereof in any medium, with or without
53
+ modifications, and in Source or Object form, provided that You
54
+ meet the following conditions:
55
+
56
+ (a) You must give any other recipients of the Work or
57
+ Derivative Works a copy of this License; and
58
+
59
+ (b) You must cause any modified files to carry prominent notices
60
+ stating that You changed the files; and
61
+
62
+ (c) You must retain, in the Source form of any Derivative Works
63
+ that You distribute, all copyright, patent, trademark, and
64
+ attribution notices from the Source form of the Work; and
65
+
66
+ (d) If the Work includes a "NOTICE" text file, You must include
67
+ a readable copy of the attribution notices contained within
68
+ such NOTICE file.
69
+
70
+ 5. Submission of Contributions.
71
+
72
+ 6. Trademarks. This License does not grant permission to use the trade
73
+ names, trademarks, service marks, or product names of the Licensor.
74
+
75
+ 7. Disclaimer of Warranty.
76
+
77
+ 8. Limitation of Liability.
78
+
79
+ 9. Accepting Warranty or Additional Liability.
80
+
81
+ END OF TERMS AND CONDITIONS
82
+
83
+ Copyright (c) neolord0
84
+ Licensed under the Apache License, Version 2.0
@@ -0,0 +1,66 @@
1
+ # pyhwpxlib License
2
+
3
+ 이 프로젝트는 **파일별로 다른 라이선스**가 적용됩니다.
4
+
5
+ ---
6
+
7
+ ## 1. Apache License 2.0 적용 파일
8
+
9
+ 다음 파일은 원본 오픈소스의 파생 저작물이므로 Apache License 2.0이 적용됩니다.
10
+ 상업적 사용을 포함한 모든 사용이 자유롭습니다.
11
+
12
+ | 파일 | 원본 |
13
+ |------|------|
14
+ | `pyhwpxlib/hwp2hwpx.py` | [neolord0/hwp2hwpx](https://github.com/neolord0/hwp2hwpx) |
15
+ | `pyhwpxlib/hwp_reader.py` | [neolord0/hwplib](https://github.com/neolord0/hwplib) |
16
+ | `pyhwpxlib/value_convertor.py` | [neolord0/hwp2hwpx](https://github.com/neolord0/hwp2hwpx) |
17
+
18
+ 전체 라이선스 텍스트: [LICENSE-APACHE](LICENSE-APACHE)
19
+
20
+ ---
21
+
22
+ ## 2. Business Source License 1.1 적용 파일
23
+
24
+ 위에 명시된 파일을 **제외한 모든 파일**에 BSL 1.1이 적용됩니다.
25
+
26
+ **Licensor:** Eunmi Lee (ratiertm)
27
+ **Licensed Work:** pyhwpxlib
28
+ **Copyright:** (c) 2026 Eunmi Lee
29
+
30
+ **Change License:** Apache License, Version 2.0
31
+ **Change Date:** 2030-04-07
32
+
33
+ ### Permitted Uses (무료)
34
+
35
+ 1. 개인, 비상업적 사용
36
+ 2. 사내 5인 이하 사용
37
+ 3. OSI 인증 오픈소스 프로젝트에서의 사용
38
+ 4. 학술, 교육 목적 사용
39
+ 5. 평가 및 테스트
40
+
41
+ ### Commercial License Required (유료)
42
+
43
+ 1. 제3자에게 제공하는 제품/서비스의 핵심 구성요소로 사용 (SaaS, API 등)
44
+ 2. 상업 제품에 포함하여 배포
45
+ 3. 사내 6인 이상 사용
46
+ 4. 라이선스 검증 메커니즘 제거 또는 우회
47
+
48
+ Commercial licenses: https://lchfkorea.com
49
+
50
+ ### Change Date
51
+
52
+ 2030-04-07 이후 (또는 해당 버전의 최초 공개 배포일로부터 4년 후 중 빠른 날짜),
53
+ BSL 적용 파일도 Apache License 2.0으로 자동 전환됩니다.
54
+
55
+ ### General
56
+
57
+ 본 라이선스를 준수하지 않는 사용은 라이선서, 계열사, 또는 공인 대리점으로부터
58
+ 상업 라이선스를 구매하거나 사용을 중단해야 합니다.
59
+
60
+ 본 라이선스를 위반하는 사용은 현재 및 모든 버전에 대한 권리를 자동으로 종료시킵니다.
61
+
62
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS
63
+ PROVIDED ON AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL
64
+ WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT
65
+ LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
66
+ PURPOSE, NON-INFRINGEMENT, AND TITLE.
pyhwpxlib-0.1.0/NOTICE ADDED
@@ -0,0 +1,21 @@
1
+ pyhwpxlib
2
+ Copyright (c) 2026 Eunmi Lee (ratiertm)
3
+
4
+ This product includes software ported from the following projects:
5
+
6
+ 1. hwp2hwpx (https://github.com/neolord0/hwp2hwpx)
7
+ Copyright (c) neolord0
8
+ Licensed under the Apache License, Version 2.0
9
+ Files: pyhwpxlib/hwp2hwpx.py, pyhwpxlib/value_convertor.py
10
+ Changes: Rewritten in Python from original Java implementation.
11
+
12
+ 2. hwplib (https://github.com/neolord0/hwplib)
13
+ Copyright (c) neolord0
14
+ Licensed under the Apache License, Version 2.0
15
+ Files: pyhwpxlib/hwp_reader.py
16
+ Changes: HWP binary format parser rewritten in Python using struct/dataclass.
17
+
18
+ 3. python-hwpx (https://github.com/airmang/python-hwpx)
19
+ Copyright (c) Kyuhyun Ko (airmang)
20
+ Licensed under the MIT License
21
+ Files: ratiertm-hwpx/ (bundled as submodule), pyhwpxlib core dataclass model
@@ -0,0 +1,339 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyhwpxlib
3
+ Version: 0.1.0
4
+ Summary: Python library for creating and editing HWPX (Hancom Office) documents without Hancom Office
5
+ License-Expression: BUSL-1.1 AND Apache-2.0
6
+ Project-URL: Homepage, https://github.com/ratiertm/hwpx-skill
7
+ Project-URL: Repository, https://github.com/ratiertm/hwpx-skill
8
+ Project-URL: Issues, https://github.com/ratiertm/hwpx-skill/issues
9
+ Keywords: hwpx,hwp,hancom,korean,document,llm
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Office/Business :: Office Suites
20
+ Classifier: Natural Language :: Korean
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE-APACHE
24
+ License-File: LICENSE.md
25
+ License-File: NOTICE
26
+ Provides-Extra: images
27
+ Requires-Dist: Pillow>=9.0; extra == "images"
28
+ Provides-Extra: lxml
29
+ Requires-Dist: lxml>=4.9; extra == "lxml"
30
+ Provides-Extra: hwp
31
+ Requires-Dist: olefile>=0.46; extra == "hwp"
32
+ Provides-Extra: all
33
+ Requires-Dist: Pillow>=9.0; extra == "all"
34
+ Requires-Dist: lxml>=4.9; extra == "all"
35
+ Requires-Dist: olefile>=0.46; extra == "all"
36
+ Dynamic: license-file
37
+
38
+ # pyhwpxlib
39
+
40
+ Python library for creating, converting, and editing HWPX (Hancom Office) documents programmatically. No Hancom Office installation required.
41
+
42
+ [한국어](README_KO.md)
43
+
44
+ ## Why pyhwpxlib?
45
+
46
+ - Generate HWPX reports on a server with zero desktop dependencies
47
+ - Convert Markdown, HTML, or legacy HWP 5.x files to HWPX
48
+ - Auto-fill government forms and contracts from data
49
+ - Let AI agents (Claude Code, Cursor, etc.) produce native Korean documents
50
+ - Extract text, Markdown, or HTML from existing HWPX files
51
+ - Merge multiple HWPX files into one
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install pyhwpxlib
57
+ ```
58
+
59
+ This installs both the Python library and the `pyhwpxlib` CLI command.
60
+
61
+ Python 3.10+ required. No external dependencies for core features.
62
+
63
+ ```bash
64
+ # Optional: image support
65
+ pip install pyhwpxlib[images] # Pillow
66
+
67
+ # Optional: faster XML parsing
68
+ pip install pyhwpxlib[lxml] # lxml
69
+
70
+ # Optional: HWP 5.x → HWPX conversion
71
+ pip install pyhwpxlib[hwp] # olefile
72
+
73
+ # Install everything
74
+ pip install pyhwpxlib[all]
75
+ ```
76
+
77
+ ## Quick Start
78
+
79
+ ### Create a document in 5 lines
80
+
81
+ ```python
82
+ from pyhwpxlib import HwpxBuilder
83
+
84
+ doc = HwpxBuilder()
85
+ doc.add_heading("Project Report", level=1)
86
+ doc.add_paragraph("April 2026")
87
+ doc.add_table([
88
+ ["Item", "Qty", "Price"],
89
+ ["Server", "3", "9,000,000"],
90
+ ["License", "10", "5,000,000"],
91
+ ])
92
+ doc.add_heading("1. Overview", level=2)
93
+ doc.add_paragraph("This report covers...")
94
+ doc.save("report.hwpx")
95
+ ```
96
+
97
+ ### Convert Markdown from the terminal
98
+
99
+ ```bash
100
+ pyhwpxlib md2hwpx report.md -o report.hwpx
101
+ ```
102
+
103
+ ### Fill a government form template
104
+
105
+ ```python
106
+ from pyhwpxlib.api import fill_template_checkbox
107
+
108
+ fill_template_checkbox(
109
+ "contract_template.hwpx",
110
+ data={">Name<": ">Name John Doe<"},
111
+ checks=["Agree"],
112
+ output_path="contract_filled.hwpx",
113
+ )
114
+ ```
115
+
116
+ ---
117
+
118
+ ## CLI Reference
119
+
120
+ `pip install pyhwpxlib` installs the `pyhwpxlib` command with 9 subcommands:
121
+
122
+ ### md2hwpx -- Markdown to HWPX
123
+
124
+ ```bash
125
+ pyhwpxlib md2hwpx report.md -o report.hwpx
126
+ pyhwpxlib md2hwpx report.md -o report.hwpx -s github # style preset
127
+ ```
128
+
129
+ Auto-detects: headings (#), **bold**, *italic*, bullet/numbered lists, code blocks, tables, horizontal rules.
130
+
131
+ ### hwpx2html -- HWPX to HTML
132
+
133
+ ```bash
134
+ pyhwpxlib hwpx2html document.hwpx -o document.html
135
+ ```
136
+
137
+ Produces a self-contained HTML with embedded base64 images.
138
+
139
+ ### text -- Extract text from HWPX
140
+
141
+ ```bash
142
+ pyhwpxlib text document.hwpx # plain text (default)
143
+ pyhwpxlib text document.hwpx -f markdown # as Markdown
144
+ pyhwpxlib text document.hwpx -f html # as HTML
145
+ ```
146
+
147
+ ### fill -- Fill template with data
148
+
149
+ ```bash
150
+ # Key-value pairs
151
+ pyhwpxlib fill template.hwpx -o filled.hwpx -d name=Hong age=30
152
+
153
+ # From JSON file
154
+ pyhwpxlib fill template.hwpx -o filled.hwpx -d data.json
155
+ ```
156
+
157
+ ### info -- Inspect HWPX file
158
+
159
+ ```bash
160
+ pyhwpxlib info document.hwpx
161
+ ```
162
+
163
+ Shows file size, section count, image list, text character/line counts, and a text preview.
164
+
165
+ ### merge -- Merge multiple HWPX files
166
+
167
+ ```bash
168
+ pyhwpxlib merge part1.hwpx part2.hwpx part3.hwpx -o combined.hwpx
169
+ ```
170
+
171
+ Inserts page breaks between documents automatically.
172
+
173
+ ### unpack -- Extract HWPX to folder
174
+
175
+ ```bash
176
+ pyhwpxlib unpack document.hwpx -o unpacked/
177
+ ```
178
+
179
+ Extracts all XML and binary files from the HWPX ZIP for direct editing.
180
+
181
+ ### pack -- Re-package folder as HWPX
182
+
183
+ ```bash
184
+ pyhwpxlib pack unpacked/ -o output.hwpx
185
+ ```
186
+
187
+ Re-creates a valid HWPX file from an unpacked folder. The `mimetype` entry is stored uncompressed per the OWPML spec.
188
+
189
+ ### validate -- Validate HWPX structure
190
+
191
+ ```bash
192
+ pyhwpxlib validate output.hwpx
193
+ ```
194
+
195
+ Checks for required files (`mimetype`, `header.xml`, `section0.xml`, `content.hpf`) and validates XML parsing. Returns exit code 0 on success, 1 on failure.
196
+
197
+ ---
198
+
199
+ ## Python API
200
+
201
+ ### Document Creation (HwpxBuilder)
202
+
203
+ High-level builder for creating HWPX documents. Includes table style presets (`corporate`, `government`, `academic`, `default`).
204
+
205
+ ```python
206
+ doc = HwpxBuilder(table_preset='corporate')
207
+ ```
208
+
209
+ | Method | Description |
210
+ |--------|-------------|
211
+ | `add_heading(text, level)` | Headings (level 1--4) |
212
+ | `add_paragraph(text, bold, italic, font_size, text_color, alignment)` | Styled paragraphs |
213
+ | `add_table(data, header_bg, col_widths, merge_info, cell_colors, ...)` | Tables with auto-preset styling |
214
+ | `add_bullet_list(items, bullet_char)` | Bullet lists (`-`, `•`, `◦`) |
215
+ | `add_numbered_list(items, format_string)` | Numbered lists (`^1.`, `^1)`, `(^1)`) |
216
+ | `add_nested_bullet_list(items)` | Multi-level bullet lists (level 0--6) |
217
+ | `add_nested_numbered_list(items)` | Multi-level numbered lists |
218
+ | `add_image(path, width, height)` | Local image |
219
+ | `add_image_from_url(url, width, height)` | Image from URL (auto-download) |
220
+ | `add_page_break()` | Page break |
221
+ | `add_line()` | Horizontal divider |
222
+ | `add_header(text)` / `add_footer(text)` | Header / Footer |
223
+ | `add_page_number(pos)` | Page numbers (4 positions) |
224
+ | `add_footnote(text)` | Footnotes |
225
+ | `add_equation(script)` | Math equations |
226
+ | `add_highlight(text, color)` | Highlighted text |
227
+ | `add_rectangle(...)` / `add_draw_line(...)` | Shapes |
228
+ | `save(path)` | Save as .hwpx |
229
+
230
+ ### Low-Level API (pyhwpxlib.api)
231
+
232
+ For fine-grained control over the HWPX object model:
233
+
234
+ ```python
235
+ from pyhwpxlib.api import create_document, add_paragraph, add_table, save
236
+
237
+ doc = create_document()
238
+ add_paragraph(doc, "Hello, World!", bold=True, font_size=14)
239
+ add_table(doc, rows=3, cols=2, data=[["A","B"],["1","2"],["3","4"]])
240
+ save(doc, "output.hwpx")
241
+ ```
242
+
243
+ **Additional low-level functions:**
244
+
245
+ | Category | Functions |
246
+ |----------|-----------|
247
+ | Text | `add_paragraph`, `add_styled_paragraph`, `add_heading`, `add_hyperlink`, `add_code_block` |
248
+ | Lists | `add_bullet_list`, `add_numbered_list`, `add_nested_bullet_list`, `add_nested_numbered_list` |
249
+ | Tables | `add_table` (with merge, gradient, per-cell styles) |
250
+ | Images & Shapes | `add_image`, `add_rectangle`, `add_ellipse`, `add_line`, `add_arc`, `add_polygon`, `add_curve`, `add_connect_line`, `add_textart`, `add_rectangle_with_image_fill` |
251
+ | Layout | `add_header`, `add_footer`, `add_page_number`, `add_page_break`, `set_page_setup`, `set_columns` |
252
+ | Annotations | `add_footnote`, `add_bookmark`, `add_indexmark`, `add_hidden_comment`, `add_highlight`, `add_dutmal` |
253
+ | Special | `add_equation`, `add_tab`, `add_special_char`, `add_container` |
254
+ | Form Controls | `add_checkbox`, `add_radio_button`, `add_button`, `add_combobox`, `add_listbox`, `add_edit_field`, `add_scrollbar` |
255
+ | Conversion | `convert_md_to_hwpx`, `convert_md_file_to_hwpx`, `convert_html_to_hwpx`, `convert_html_file_to_hwpx`, `convert_hwpx_to_html` |
256
+ | Reading | `open_document`, `extract_text`, `extract_markdown`, `extract_html` |
257
+ | Templates | `fill_template`, `fill_template_checkbox`, `fill_template_batch`, `extract_schema`, `analyze_schema_with_llm` |
258
+ | Documents | `merge_documents` |
259
+ | Page Setup | `set_page_setup(paper="A4"/"A3"/"B5"/"LETTER"/"LEGAL", landscape=True, margin_*)` |
260
+
261
+ ### Conversions
262
+
263
+ | Direction | CLI | Python |
264
+ |-----------|-----|--------|
265
+ | Markdown → HWPX | `pyhwpxlib md2hwpx in.md -o out.hwpx` | `convert_md_file_to_hwpx("in.md", "out.hwpx")` |
266
+ | HTML → HWPX | -- | `convert_html_file_to_hwpx("in.html", "out.hwpx")` |
267
+ | HWPX → HTML | `pyhwpxlib hwpx2html in.hwpx -o out.html` | `convert_hwpx_to_html("in.hwpx", "out.html")` |
268
+ | HWP 5.x → HWPX | -- | `from pyhwpxlib.hwp2hwpx import convert; convert("old.hwp", "new.hwpx")` |
269
+ | HWPX → Text | `pyhwpxlib text in.hwpx` | `extract_text("document.hwpx")` |
270
+ | HWPX → Markdown | `pyhwpxlib text in.hwpx -f markdown` | `extract_markdown("document.hwpx")` |
271
+
272
+ ### Template Automation
273
+
274
+ ```python
275
+ from pyhwpxlib.api import extract_schema, fill_template_checkbox, fill_template_batch
276
+
277
+ # 1. Discover what fields a template has
278
+ schema = extract_schema("form_template.hwpx")
279
+ print(schema) # {'title': '...', 'fields': [...], 'checkboxes': [...]}
280
+
281
+ # 2. Fill a single document
282
+ fill_template_checkbox(
283
+ "form_template.hwpx",
284
+ data={">Name<": ">Name Jane Doe<"},
285
+ checks=["Agree"],
286
+ output_path="filled.hwpx",
287
+ )
288
+
289
+ # 3. Batch-generate from a list of records
290
+ fill_template_batch(
291
+ "form_template.hwpx",
292
+ records=[
293
+ {"data": {">Name<": ">Name Alice<"}, "checks": ["Agree"]},
294
+ {"data": {">Name<": ">Name Bob<"}, "checks": ["Agree"]},
295
+ ],
296
+ output_dir="output/",
297
+ )
298
+ ```
299
+
300
+ ### Edit Existing Documents (Unpack/Pack)
301
+
302
+ ```bash
303
+ pyhwpxlib unpack document.hwpx -o unpacked/ # Extract ZIP to folder
304
+ # Edit XML files in unpacked/Contents/ directly
305
+ pyhwpxlib pack unpacked/ -o output.hwpx # Re-package as HWPX
306
+ pyhwpxlib validate output.hwpx # Validate structure
307
+ ```
308
+
309
+ ---
310
+
311
+ ## What is HWPX?
312
+
313
+ HWPX is the modern document format for Hancom Office, the standard office suite in South Korea. It's a ZIP archive containing XML files (OWPML spec) -- similar to `.docx` for Microsoft Word. Used by Korean government agencies, public institutions, and enterprises.
314
+
315
+ ## Credits
316
+
317
+ | Project | Author | License | Usage |
318
+ |---------|--------|---------|-------|
319
+ | [hwp2hwpx](https://github.com/neolord0/hwp2hwpx) | neolord0 | Apache 2.0 | HWP→HWPX conversion (ported to Python) |
320
+ | [hwplib](https://github.com/neolord0/hwplib) | neolord0 | Apache 2.0 | HWP binary parser (ported to Python) |
321
+ | [python-hwpx](https://github.com/airmang/python-hwpx) | Kyuhyun Ko | MIT | HWPX dataclass model |
322
+
323
+ ## Known Limitations
324
+
325
+ - Complex cell-merge layouts may require manual review
326
+ - No built-in HWPX preview (verify in Hancom Office or Whale)
327
+ - CSS→HWPX mapping covers 46 major properties only
328
+ - Image OCR for form text requires a separate API key
329
+
330
+ ## License
331
+
332
+ Dual license -- see [LICENSE.md](LICENSE.md) for details.
333
+
334
+ | Files | License |
335
+ |-------|---------|
336
+ | `hwp2hwpx.py`, `hwp_reader.py`, `value_convertor.py` | Apache 2.0 (derivative works) |
337
+ | **All other files** | **BSL 1.1** |
338
+
339
+ **BSL 1.1 summary:** Personal/non-commercial/educational/open-source use is free. Commercial use requires a license. Converts to Apache 2.0 after 2030-04-07.