myagent-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. package/Dockerfile +30 -0
  2. package/README.md +333 -0
  3. package/agents/__init__.py +6 -0
  4. package/agents/__pycache__/main_agent.cpython-312.pyc +0 -0
  5. package/agents/base.py +115 -0
  6. package/agents/main_agent.py +695 -0
  7. package/agents/memory_agent.py +313 -0
  8. package/agents/tool_agent.py +248 -0
  9. package/chatbot/__init__.py +5 -0
  10. package/chatbot/base.py +124 -0
  11. package/chatbot/discord_bot.py +146 -0
  12. package/chatbot/feishu_bot.py +548 -0
  13. package/chatbot/manager.py +164 -0
  14. package/chatbot/qq_bot.py +189 -0
  15. package/chatbot/telegram_bot.py +167 -0
  16. package/chatbot/wechat_bot.py +558 -0
  17. package/communication/__init__.py +66 -0
  18. package/communication/channel.py +576 -0
  19. package/communication/crypto.py +347 -0
  20. package/communication/manager.py +397 -0
  21. package/communication/peer.py +156 -0
  22. package/config.py +464 -0
  23. package/core/__init__.py +10 -0
  24. package/core/config_broadcast.py +276 -0
  25. package/core/llm.py +878 -0
  26. package/core/logger.py +241 -0
  27. package/core/task_queue.py +362 -0
  28. package/core/utils.py +184 -0
  29. package/executor/__init__.py +4 -0
  30. package/executor/__pycache__/engine.cpython-312.pyc +0 -0
  31. package/executor/engine.py +1215 -0
  32. package/groups/__init__.py +15 -0
  33. package/groups/manager.py +724 -0
  34. package/knowledge/__init__.py +4 -0
  35. package/knowledge/rag.py +444 -0
  36. package/main.py +801 -0
  37. package/memory/__init__.py +4 -0
  38. package/memory/manager.py +840 -0
  39. package/organization/__init__.py +4 -0
  40. package/organization/manager.py +350 -0
  41. package/package.json +58 -0
  42. package/requirements.txt +59 -0
  43. package/setup.py +40 -0
  44. package/skills/ASR/LICENSE.txt +21 -0
  45. package/skills/ASR/SKILL.md +580 -0
  46. package/skills/ASR/scripts/asr.ts +27 -0
  47. package/skills/LLM/LICENSE.txt +21 -0
  48. package/skills/LLM/SKILL.md +856 -0
  49. package/skills/LLM/scripts/chat.ts +32 -0
  50. package/skills/TTS/LICENSE.txt +21 -0
  51. package/skills/TTS/SKILL.md +735 -0
  52. package/skills/TTS/tts.ts +25 -0
  53. package/skills/VLM/LICENSE.txt +21 -0
  54. package/skills/VLM/SKILL.md +588 -0
  55. package/skills/VLM/scripts/vlm.ts +57 -0
  56. package/skills/__init__.py +5 -0
  57. package/skills/agent-browser/SKILL.md +328 -0
  58. package/skills/ai-news-collectors/SKILL.md +157 -0
  59. package/skills/ai-news-collectors/_meta.json +6 -0
  60. package/skills/ai-news-collectors/references/sources.md +128 -0
  61. package/skills/aminer-open-academic/SKILL.md +312 -0
  62. package/skills/aminer-open-academic/_meta.json +6 -0
  63. package/skills/aminer-open-academic/evals/evals.json +46 -0
  64. package/skills/aminer-open-academic/references/api-catalog.md +1032 -0
  65. package/skills/aminer-open-academic/scripts/__pycache__/aminer_client.cpython-312.pyc +0 -0
  66. package/skills/aminer-open-academic/scripts/aminer_client.py +875 -0
  67. package/skills/auto-target-tracker/SKILL.md +317 -0
  68. package/skills/base.py +147 -0
  69. package/skills/blog-writer/2024-02-17-radical-transparency-sales.md +35 -0
  70. package/skills/blog-writer/2024-02-17-raycast-spotlight-superpowers.md +33 -0
  71. package/skills/blog-writer/2024-02-17-short-form-content-marketing.md +47 -0
  72. package/skills/blog-writer/2024-02-17-typing-speed-benefits.md +33 -0
  73. package/skills/blog-writer/2024-03-14-effective-ai-prompts.md +55 -0
  74. package/skills/blog-writer/2024-11-08-ai-revolutionizing-entry-level-sales.md +43 -0
  75. package/skills/blog-writer/2025-11-12-why-ai-art-is-useless.md +49 -0
  76. package/skills/blog-writer/README.md +2 -0
  77. package/skills/blog-writer/SKILL.md +158 -0
  78. package/skills/blog-writer/__pycache__/manage_examples.cpython-312.pyc +0 -0
  79. package/skills/blog-writer/_meta.json +6 -0
  80. package/skills/blog-writer/manage_examples.py +90 -0
  81. package/skills/blog-writer/style-guide.md +160 -0
  82. package/skills/browser_skill.py +146 -0
  83. package/skills/coding-agent/SKILL.md +120 -0
  84. package/skills/coding-agent/_meta.json +6 -0
  85. package/skills/coding-agent/criteria.md +48 -0
  86. package/skills/coding-agent/execution.md +42 -0
  87. package/skills/coding-agent/memory-template.md +38 -0
  88. package/skills/coding-agent/planning.md +31 -0
  89. package/skills/coding-agent/state.md +60 -0
  90. package/skills/coding-agent/verification.md +39 -0
  91. package/skills/content-strategy/SKILL.md +181 -0
  92. package/skills/content-strategy/_meta.json +6 -0
  93. package/skills/contentanalysis/ExtractWisdom/SKILL.md +229 -0
  94. package/skills/contentanalysis/ExtractWisdom/Workflows/Extract.md +60 -0
  95. package/skills/contentanalysis/SKILL.md +14 -0
  96. package/skills/docx/CHANGELOG.md +85 -0
  97. package/skills/docx/LICENSE.txt +30 -0
  98. package/skills/docx/SKILL.md +455 -0
  99. package/skills/docx/docx-js.md +681 -0
  100. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  101. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  102. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  103. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  104. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  105. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  106. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  107. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  108. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  109. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  110. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  111. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  112. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  113. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  114. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  115. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  116. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  117. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  118. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  119. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  120. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  121. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  122. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  123. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  124. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  125. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  126. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  127. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  128. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  129. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  130. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  131. package/skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
  132. package/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  133. package/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  134. package/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  135. package/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  136. package/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  137. package/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  138. package/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  139. package/skills/docx/ooxml/scripts/__pycache__/pack.cpython-312.pyc +0 -0
  140. package/skills/docx/ooxml/scripts/__pycache__/unpack.cpython-312.pyc +0 -0
  141. package/skills/docx/ooxml/scripts/__pycache__/validate.cpython-312.pyc +0 -0
  142. package/skills/docx/ooxml/scripts/pack.py +159 -0
  143. package/skills/docx/ooxml/scripts/unpack.py +29 -0
  144. package/skills/docx/ooxml/scripts/validate.py +69 -0
  145. package/skills/docx/ooxml/scripts/validation/__init__.py +15 -0
  146. package/skills/docx/ooxml/scripts/validation/__pycache__/__init__.cpython-312.pyc +0 -0
  147. package/skills/docx/ooxml/scripts/validation/__pycache__/base.cpython-312.pyc +0 -0
  148. package/skills/docx/ooxml/scripts/validation/__pycache__/docx.cpython-312.pyc +0 -0
  149. package/skills/docx/ooxml/scripts/validation/__pycache__/pptx.cpython-312.pyc +0 -0
  150. package/skills/docx/ooxml/scripts/validation/__pycache__/redlining.cpython-312.pyc +0 -0
  151. package/skills/docx/ooxml/scripts/validation/base.py +951 -0
  152. package/skills/docx/ooxml/scripts/validation/docx.py +274 -0
  153. package/skills/docx/ooxml/scripts/validation/pptx.py +315 -0
  154. package/skills/docx/ooxml/scripts/validation/redlining.py +279 -0
  155. package/skills/docx/ooxml.md +615 -0
  156. package/skills/docx/scripts/__init__.py +1 -0
  157. package/skills/docx/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  158. package/skills/docx/scripts/__pycache__/add_toc_placeholders.cpython-312.pyc +0 -0
  159. package/skills/docx/scripts/__pycache__/document.cpython-312.pyc +0 -0
  160. package/skills/docx/scripts/__pycache__/utilities.cpython-312.pyc +0 -0
  161. package/skills/docx/scripts/add_toc_placeholders.py +220 -0
  162. package/skills/docx/scripts/document.py +1302 -0
  163. package/skills/docx/scripts/templates/comments.xml +3 -0
  164. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  165. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  166. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  167. package/skills/docx/scripts/templates/people.xml +3 -0
  168. package/skills/docx/scripts/utilities.py +374 -0
  169. package/skills/dream-interpreter/SKILL.md +88 -0
  170. package/skills/dream-interpreter/assets/example_asset.txt +24 -0
  171. package/skills/dream-interpreter/references/api_reference.md +34 -0
  172. package/skills/dream-interpreter/references/interpretation-guide.md +83 -0
  173. package/skills/dream-interpreter/references/output-schema.md +65 -0
  174. package/skills/dream-interpreter/references/questioning-strategy.md +62 -0
  175. package/skills/dream-interpreter/references/visual-mapping.md +81 -0
  176. package/skills/dream-interpreter/scripts/__pycache__/example.cpython-312.pyc +0 -0
  177. package/skills/dream-interpreter/scripts/example.py +19 -0
  178. package/skills/dream-interpreter/skill.json +7 -0
  179. package/skills/file_skill.py +246 -0
  180. package/skills/finance/Finance_API_Doc.md +445 -0
  181. package/skills/finance/SKILL.md +53 -0
  182. package/skills/fullstack-dev/SKILL.md +205 -0
  183. package/skills/get-fortune-analysis/SKILL.md +370 -0
  184. package/skills/get-fortune-analysis/lunar_python.py +91 -0
  185. package/skills/gift-evaluator/SKILL.md +83 -0
  186. package/skills/gift-evaluator/__pycache__/html_tools.cpython-312.pyc +0 -0
  187. package/skills/gift-evaluator/html_tools.py +268 -0
  188. package/skills/image-edit/LICENSE.txt +21 -0
  189. package/skills/image-edit/SKILL.md +896 -0
  190. package/skills/image-edit/scripts/image-edit.ts +36 -0
  191. package/skills/image-generation/LICENSE.txt +21 -0
  192. package/skills/image-generation/SKILL.md +583 -0
  193. package/skills/image-generation/scripts/image-generation.ts +28 -0
  194. package/skills/image-understand/LICENSE.txt +21 -0
  195. package/skills/image-understand/SKILL.md +855 -0
  196. package/skills/image-understand/scripts/image-understand.ts +41 -0
  197. package/skills/interview-designer/README.md +70 -0
  198. package/skills/interview-designer/SKILL.md +53 -0
  199. package/skills/interview-designer/_meta.json +6 -0
  200. package/skills/interview-designer/references/design_rationale.md +43 -0
  201. package/skills/interview-designer/templates/interview_guide_template.md +62 -0
  202. package/skills/market-research-reports/SKILL.md +901 -0
  203. package/skills/market-research-reports/assets/FORMATTING_GUIDE.md +428 -0
  204. package/skills/market-research-reports/assets/market_report_template.tex +1380 -0
  205. package/skills/market-research-reports/assets/market_research.sty +564 -0
  206. package/skills/market-research-reports/references/data_analysis_patterns.md +548 -0
  207. package/skills/market-research-reports/references/report_structure_guide.md +999 -0
  208. package/skills/market-research-reports/references/visual_generation_guide.md +1077 -0
  209. package/skills/market-research-reports/scripts/__pycache__/generate_market_visuals.cpython-312.pyc +0 -0
  210. package/skills/market-research-reports/scripts/generate_market_visuals.py +529 -0
  211. package/skills/marketing-mode/README.md +49 -0
  212. package/skills/marketing-mode/SKILL.md +693 -0
  213. package/skills/marketing-mode/_meta.json +6 -0
  214. package/skills/marketing-mode/mode-prompt.md +39 -0
  215. package/skills/marketing-mode/skill.json +51 -0
  216. package/skills/mindfulness-meditation/SKILL.md +65 -0
  217. package/skills/mindfulness-meditation/_meta.json +6 -0
  218. package/skills/multi-search-engine/CHANGELOG.md +15 -0
  219. package/skills/multi-search-engine/CHANNELLOG.md +48 -0
  220. package/skills/multi-search-engine/SKILL.md +78 -0
  221. package/skills/multi-search-engine/_meta.json +6 -0
  222. package/skills/multi-search-engine/config.json +14 -0
  223. package/skills/multi-search-engine/metadata.json +7 -0
  224. package/skills/multi-search-engine/references/international-search.md +651 -0
  225. package/skills/pdf/LICENSE.txt +30 -0
  226. package/skills/pdf/SKILL.md +1534 -0
  227. package/skills/pdf/forms.md +205 -0
  228. package/skills/pdf/reference.md +765 -0
  229. package/skills/pdf/scripts/__pycache__/add_zai_metadata.cpython-312.pyc +0 -0
  230. package/skills/pdf/scripts/__pycache__/check_bounding_boxes.cpython-312.pyc +0 -0
  231. package/skills/pdf/scripts/__pycache__/check_bounding_boxes_test.cpython-312.pyc +0 -0
  232. package/skills/pdf/scripts/__pycache__/check_fillable_fields.cpython-312.pyc +0 -0
  233. package/skills/pdf/scripts/__pycache__/convert_pdf_to_images.cpython-312.pyc +0 -0
  234. package/skills/pdf/scripts/__pycache__/create_validation_image.cpython-312.pyc +0 -0
  235. package/skills/pdf/scripts/__pycache__/extract_form_field_info.cpython-312.pyc +0 -0
  236. package/skills/pdf/scripts/__pycache__/fill_fillable_fields.cpython-312.pyc +0 -0
  237. package/skills/pdf/scripts/__pycache__/fill_pdf_form_with_annotations.cpython-312.pyc +0 -0
  238. package/skills/pdf/scripts/__pycache__/sanitize_code.cpython-312.pyc +0 -0
  239. package/skills/pdf/scripts/add_zai_metadata.py +172 -0
  240. package/skills/pdf/scripts/check_bounding_boxes.py +70 -0
  241. package/skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
  242. package/skills/pdf/scripts/check_fillable_fields.py +12 -0
  243. package/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
  244. package/skills/pdf/scripts/create_validation_image.py +41 -0
  245. package/skills/pdf/scripts/extract_form_field_info.py +152 -0
  246. package/skills/pdf/scripts/fill_fillable_fields.py +114 -0
  247. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
  248. package/skills/pdf/scripts/sanitize_code.py +110 -0
  249. package/skills/podcast-generate/LICENSE.txt +21 -0
  250. package/skills/podcast-generate/SKILL.md +198 -0
  251. package/skills/podcast-generate/generate.ts +661 -0
  252. package/skills/podcast-generate/package.json +30 -0
  253. package/skills/podcast-generate/readme.md +177 -0
  254. package/skills/podcast-generate/test_data/segments.jsonl +3 -0
  255. package/skills/podcast-generate/tsconfig.json +26 -0
  256. package/skills/pptx/LICENSE.txt +30 -0
  257. package/skills/pptx/SKILL.md +507 -0
  258. package/skills/pptx/html2pptx.md +625 -0
  259. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  260. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  261. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  262. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  263. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  264. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  265. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  266. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  267. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  268. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  269. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  270. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  271. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  272. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  273. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  274. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  275. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  276. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  277. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  278. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  279. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  280. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  281. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  282. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  283. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  284. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  285. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  286. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  287. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  288. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  289. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  290. package/skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
  291. package/skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  292. package/skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  293. package/skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  294. package/skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  295. package/skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  296. package/skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  297. package/skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  298. package/skills/pptx/ooxml/scripts/__pycache__/pack.cpython-312.pyc +0 -0
  299. package/skills/pptx/ooxml/scripts/__pycache__/unpack.cpython-312.pyc +0 -0
  300. package/skills/pptx/ooxml/scripts/__pycache__/validate.cpython-312.pyc +0 -0
  301. package/skills/pptx/ooxml/scripts/pack.py +159 -0
  302. package/skills/pptx/ooxml/scripts/unpack.py +29 -0
  303. package/skills/pptx/ooxml/scripts/validate.py +69 -0
  304. package/skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
  305. package/skills/pptx/ooxml/scripts/validation/__pycache__/__init__.cpython-312.pyc +0 -0
  306. package/skills/pptx/ooxml/scripts/validation/__pycache__/base.cpython-312.pyc +0 -0
  307. package/skills/pptx/ooxml/scripts/validation/__pycache__/docx.cpython-312.pyc +0 -0
  308. package/skills/pptx/ooxml/scripts/validation/__pycache__/pptx.cpython-312.pyc +0 -0
  309. package/skills/pptx/ooxml/scripts/validation/__pycache__/redlining.cpython-312.pyc +0 -0
  310. package/skills/pptx/ooxml/scripts/validation/base.py +951 -0
  311. package/skills/pptx/ooxml/scripts/validation/docx.py +274 -0
  312. package/skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
  313. package/skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
  314. package/skills/pptx/ooxml.md +427 -0
  315. package/skills/pptx/scripts/__pycache__/inventory.cpython-312.pyc +0 -0
  316. package/skills/pptx/scripts/__pycache__/inventory.cpython-313.pyc +0 -0
  317. package/skills/pptx/scripts/__pycache__/rearrange.cpython-312.pyc +0 -0
  318. package/skills/pptx/scripts/__pycache__/replace.cpython-312.pyc +0 -0
  319. package/skills/pptx/scripts/__pycache__/thumbnail.cpython-312.pyc +0 -0
  320. package/skills/pptx/scripts/html2pptx.js +1044 -0
  321. package/skills/pptx/scripts/inventory.py +1020 -0
  322. package/skills/pptx/scripts/rearrange.py +231 -0
  323. package/skills/pptx/scripts/replace.py +385 -0
  324. package/skills/pptx/scripts/thumbnail.py +450 -0
  325. package/skills/qingyan-research/SKILL.md +294 -0
  326. package/skills/qingyan-research/__pycache__/generate_html.cpython-312.pyc +0 -0
  327. package/skills/qingyan-research/generate_html.py +33 -0
  328. package/skills/registry.py +344 -0
  329. package/skills/search_skill.py +228 -0
  330. package/skills/seo-content-writer/SKILL.md +661 -0
  331. package/skills/seo-content-writer/_meta.json +6 -0
  332. package/skills/seo-content-writer/references/content-structure-templates.md +875 -0
  333. package/skills/seo-content-writer/references/title-formulas.md +339 -0
  334. package/skills/skill-creator/LICENSE.txt +202 -0
  335. package/skills/skill-creator/SKILL.md +485 -0
  336. package/skills/skill-creator/agents/analyzer.md +274 -0
  337. package/skills/skill-creator/agents/comparator.md +202 -0
  338. package/skills/skill-creator/agents/grader.md +223 -0
  339. package/skills/skill-creator/assets/eval_review.html +146 -0
  340. package/skills/skill-creator/eval-viewer/__pycache__/generate_review.cpython-312.pyc +0 -0
  341. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  342. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  343. package/skills/skill-creator/references/schemas.md +430 -0
  344. package/skills/skill-creator/scripts/__init__.py +0 -0
  345. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  346. package/skills/skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-312.pyc +0 -0
  347. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-312.pyc +0 -0
  348. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-312.pyc +0 -0
  349. package/skills/skill-creator/scripts/__pycache__/package_skill.cpython-312.pyc +0 -0
  350. package/skills/skill-creator/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
  351. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-312.pyc +0 -0
  352. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-312.pyc +0 -0
  353. package/skills/skill-creator/scripts/__pycache__/utils.cpython-312.pyc +0 -0
  354. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  355. package/skills/skill-creator/scripts/generate_report.py +326 -0
  356. package/skills/skill-creator/scripts/improve_description.py +236 -0
  357. package/skills/skill-creator/scripts/package_skill.py +136 -0
  358. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  359. package/skills/skill-creator/scripts/run_eval.py +310 -0
  360. package/skills/skill-creator/scripts/run_loop.py +328 -0
  361. package/skills/skill-creator/scripts/utils.py +47 -0
  362. package/skills/skill-finder-cn/SKILL.md +66 -0
  363. package/skills/skill-finder-cn/_meta.json +6 -0
  364. package/skills/skill-finder-cn/package.json +5 -0
  365. package/skills/skill-finder-cn/scripts/search.sh +15 -0
  366. package/skills/skill-vetter/SKILL.md +137 -0
  367. package/skills/stock-analysis-skill/SKILL.md +156 -0
  368. package/skills/stock-analysis-skill/package.json +21 -0
  369. package/skills/stock-analysis-skill/src/analyzer.ts +264 -0
  370. package/skills/stock-analysis-skill/src/dataFetcher.ts +130 -0
  371. package/skills/stock-analysis-skill/src/dividend.ts +226 -0
  372. package/skills/stock-analysis-skill/src/index.ts +327 -0
  373. package/skills/stock-analysis-skill/src/rumorScanner.ts +200 -0
  374. package/skills/stock-analysis-skill/src/types.ts +167 -0
  375. package/skills/stock-analysis-skill/src/watchlist.ts +292 -0
  376. package/skills/stock-analysis-skill/tsconfig.json +15 -0
  377. package/skills/storyboard-manager/SKILL.md +532 -0
  378. package/skills/storyboard-manager/index.js +9 -0
  379. package/skills/storyboard-manager/package.json +11 -0
  380. package/skills/storyboard-manager/references/character_development.md +232 -0
  381. package/skills/storyboard-manager/references/story_structures.md +148 -0
  382. package/skills/storyboard-manager/scripts/__pycache__/consistency_checker.cpython-312.pyc +0 -0
  383. package/skills/storyboard-manager/scripts/__pycache__/timeline_tracker.cpython-312.pyc +0 -0
  384. package/skills/storyboard-manager/scripts/consistency_checker.py +391 -0
  385. package/skills/storyboard-manager/scripts/timeline_tracker.py +352 -0
  386. package/skills/system_skill.py +249 -0
  387. package/skills/ui-ux-pro-max/SKILL.md +43 -0
  388. package/skills/ui-ux-pro-max/_meta.json +6 -0
  389. package/skills/ui-ux-pro-max/assets/data/charts.csv +26 -0
  390. package/skills/ui-ux-pro-max/assets/data/colors.csv +97 -0
  391. package/skills/ui-ux-pro-max/assets/data/icons.csv +101 -0
  392. package/skills/ui-ux-pro-max/assets/data/landing.csv +31 -0
  393. package/skills/ui-ux-pro-max/assets/data/products.csv +97 -0
  394. package/skills/ui-ux-pro-max/assets/data/react-performance.csv +45 -0
  395. package/skills/ui-ux-pro-max/assets/data/stacks/astro.csv +54 -0
  396. package/skills/ui-ux-pro-max/assets/data/stacks/flutter.csv +53 -0
  397. package/skills/ui-ux-pro-max/assets/data/stacks/html-tailwind.csv +56 -0
  398. package/skills/ui-ux-pro-max/assets/data/stacks/jetpack-compose.csv +53 -0
  399. package/skills/ui-ux-pro-max/assets/data/stacks/nextjs.csv +53 -0
  400. package/skills/ui-ux-pro-max/assets/data/stacks/nuxt-ui.csv +51 -0
  401. package/skills/ui-ux-pro-max/assets/data/stacks/nuxtjs.csv +59 -0
  402. package/skills/ui-ux-pro-max/assets/data/stacks/react-native.csv +52 -0
  403. package/skills/ui-ux-pro-max/assets/data/stacks/react.csv +54 -0
  404. package/skills/ui-ux-pro-max/assets/data/stacks/shadcn.csv +61 -0
  405. package/skills/ui-ux-pro-max/assets/data/stacks/svelte.csv +54 -0
  406. package/skills/ui-ux-pro-max/assets/data/stacks/swiftui.csv +51 -0
  407. package/skills/ui-ux-pro-max/assets/data/stacks/vue.csv +50 -0
  408. package/skills/ui-ux-pro-max/assets/data/styles.csv +68 -0
  409. package/skills/ui-ux-pro-max/assets/data/typography.csv +58 -0
  410. package/skills/ui-ux-pro-max/assets/data/ui-reasoning.csv +101 -0
  411. package/skills/ui-ux-pro-max/assets/data/ux-guidelines.csv +100 -0
  412. package/skills/ui-ux-pro-max/assets/data/web-interface.csv +31 -0
  413. package/skills/ui-ux-pro-max/data/charts.csv +26 -0
  414. package/skills/ui-ux-pro-max/data/colors.csv +97 -0
  415. package/skills/ui-ux-pro-max/data/icons.csv +101 -0
  416. package/skills/ui-ux-pro-max/data/landing.csv +31 -0
  417. package/skills/ui-ux-pro-max/data/products.csv +97 -0
  418. package/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
  419. package/skills/ui-ux-pro-max/data/stacks/astro.csv +54 -0
  420. package/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  421. package/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  422. package/skills/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  423. package/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  424. package/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  425. package/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  426. package/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  427. package/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
  428. package/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  429. package/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  430. package/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  431. package/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  432. package/skills/ui-ux-pro-max/data/styles.csv +68 -0
  433. package/skills/ui-ux-pro-max/data/typography.csv +58 -0
  434. package/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  435. package/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  436. package/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
  437. package/skills/ui-ux-pro-max/references/upstream-README.md +488 -0
  438. package/skills/ui-ux-pro-max/references/upstream-skill-content.md +288 -0
  439. package/skills/ui-ux-pro-max/scripts/__init__.py +0 -0
  440. package/skills/ui-ux-pro-max/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  441. package/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-312.pyc +0 -0
  442. package/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-312.pyc +0 -0
  443. package/skills/ui-ux-pro-max/scripts/__pycache__/search.cpython-312.pyc +0 -0
  444. package/skills/ui-ux-pro-max/scripts/core.py +253 -0
  445. package/skills/ui-ux-pro-max/scripts/design_system.py +1071 -0
  446. package/skills/ui-ux-pro-max/scripts/search.py +111 -0
  447. package/skills/video-generation/LICENSE.txt +21 -0
  448. package/skills/video-generation/SKILL.md +1082 -0
  449. package/skills/video-generation/scripts/video.ts +168 -0
  450. package/skills/video-understand/LICENSE.txt +21 -0
  451. package/skills/video-understand/SKILL.md +916 -0
  452. package/skills/video-understand/scripts/video-understand.ts +41 -0
  453. package/skills/visual-design-foundations/SKILL.md +318 -0
  454. package/skills/visual-design-foundations/references/color-systems.md +417 -0
  455. package/skills/visual-design-foundations/references/spacing-iconography.md +425 -0
  456. package/skills/visual-design-foundations/references/typography-systems.md +432 -0
  457. package/skills/web-reader/LICENSE.txt +21 -0
  458. package/skills/web-reader/SKILL.md +1140 -0
  459. package/skills/web-reader/scripts/web-reader.ts +37 -0
  460. package/skills/web-search/LICENSE.txt +21 -0
  461. package/skills/web-search/SKILL.md +912 -0
  462. package/skills/web-search/scripts/web_search.ts +44 -0
  463. package/skills/web-shader-extractor/SKILL.md +145 -0
  464. package/skills/web-shader-extractor/references/config-extraction.md +50 -0
  465. package/skills/web-shader-extractor/references/encoded-definitions.md +53 -0
  466. package/skills/web-shader-extractor/references/extraction-workflow.md +61 -0
  467. package/skills/web-shader-extractor/references/porting-strategy.md +164 -0
  468. package/skills/web-shader-extractor/references/shader-injection.md +126 -0
  469. package/skills/web-shader-extractor/references/shaders-com.md +190 -0
  470. package/skills/web-shader-extractor/references/tech-signatures.md +54 -0
  471. package/skills/web-shader-extractor/references/tsl-extraction.md +41 -0
  472. package/skills/web-shader-extractor/references/unicorn-studio.md +353 -0
  473. package/skills/web-shader-extractor/scripts/fetch-rendered-dom.mjs +153 -0
  474. package/skills/web-shader-extractor/scripts/scan-bundle.sh +76 -0
  475. package/skills/writing-plans/SKILL.md +116 -0
  476. package/skills/writing-plans/_meta.json +6 -0
  477. package/skills/xlsx/LICENSE.txt +30 -0
  478. package/skills/xlsx/SKILL.md +496 -0
  479. package/skills/xlsx/__pycache__/recalc.cpython-312.pyc +0 -0
  480. package/skills/xlsx/recalc.py +178 -0
  481. package/start.sh +36 -0
  482. package/web/__init__.py +1 -0
  483. package/web/__pycache__/api_server.cpython-312.pyc +0 -0
  484. package/web/api_server.py +2043 -0
  485. package/web/ui/chat.html +3235 -0
  486. package/web/ui/index.html +458 -0
@@ -0,0 +1,1534 @@
1
+ ---
2
+ name: pdf
3
+ description: Comprehensive PDF manipulation toolkit for extracting text and tables, creating new PDFs, merging/splitting documents, and handling forms. When GLM needs to fill in a PDF form or programmatically process, generate, or analyze PDF documents at scale.
4
+ license: Proprietary. LICENSE.txt has complete terms
5
+ ---
6
+
7
+ # PDF Processing Guide
8
+
9
+ ## Overview
10
+
11
+ This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see reference.md. If you need to fill out a PDF form, read forms.md and follow its instructions.
12
+
13
+ Role: You are a Professional Document Architect and Technical Editor specializing in high-density, industry-standard PDF content creation. If the content is not rich enough, use the web-search skill first.
14
+
15
+ Objective: Generate content that is information-rich, structured for maximum professional utility, and optimized for a compact, low-padding layout without sacrificing readability.
16
+
17
+ ---
18
+
19
+
20
+ ## Core Constraints (Must Follow)
21
+
22
+ ### 1. Output Language
23
+ **Generated PDF must use the same language as user's query.**
24
+ - Chinese query → Generate Chinese PDF content
25
+ - English query → Generate English PDF content
26
+ - Explicit language specification → Follow user's choice
27
+
28
+ ### 2. Page Count Control
29
+ - Follow user's page specifications strictly
30
+
31
+ | User Input | Execution Rule |
32
+ |------------|----------------|
33
+ | Explicit count (e.g., "3 pages") | Match exactly; allow partial final page |
34
+ | Unspecified | Determine based on document type; prioritize completeness over brevity |
35
+
36
+ **Avoid these mistakes**:
37
+ - Cutting content short (brevity is not a valid excuse)
38
+ - Filling pages with low-density bullet lists (keep information dense)
39
+ - Creating documents over 2x the requested length
40
+
41
+ **Resume/CV exception**:
42
+ - Target **1 page** by default unless otherwise instructed
43
+ - Apply tight margins: `margin: 1.5cm`
44
+
45
+ ### 3. Structure Compliance (Mandatory)
46
+ **User supplies outline**:
47
+ - **Strictly follow** the outline structure provided by user
48
+ - Match section names from outline (slight rewording OK; preserve hierarchy and sequence)
49
+ - Never add/remove sections on your own
50
+ - If structure seems flawed, **confirm with user** before changing
51
+
52
+ **No outline provided**:
53
+ - Deploy standard frameworks by document category:
54
+ - **Academic papers**: IMRaD format (Introduction-Methods-Results-Discussion) or Introduction-Literature Review-Methods-Results-Discussion-Conclusion
55
+ - **Business reports**: Top-down approach (Executive Summary → In-depth Analysis → Recommendations)
56
+ - **Technical guides**: Overview → Core Concepts → Implementation → Examples → FAQ
57
+ - **Academic assignments**: Match assignment rubric structure
58
+ - Ensure logical flow between sections without gaps
59
+
60
+ ### 4. Information Sourcing Requirements
61
+
62
+ #### CRITICAL: Verify Before Writing
63
+ **Never invent facts. If unsure, SEARCH immediately.**
64
+
65
+ Mandatory search triggers - You **MUST search FIRST** if content includes ANY of the following::
66
+ - Quantitative data, metrics, percentages, rankings
67
+ - Legal/regulatory frameworks, policies, industry standards
68
+ - Scholarly findings, theoretical models, research methods
69
+ - Recent news, emerging trends
70
+ - **Any information you cannot verify with certainty**
71
+
72
+ ### 5. Character Safety Rule (Mandatory)
73
+
74
+ **Golden Rule: Every character in the final PDF must come from following sources:**
75
+ 1. CJK characters rendered by registered Chinese fonts (SimHei / Microsoft YaHei)
76
+ 2. Mathematical/relational operators (e.g., `+` ,`−` , `×`, `÷`, `±`, `≤`,`√`, `∑`,`≅`, `∫`, `π`, `∠`, etc.)
77
+
78
+ **FORBIDDEN unicode escape sequence (DO NOT USE):**
79
+ 1. Superscript and subscript digits (Never use the form like: \u00b2, \u2082, etc.)
80
+ 2. Math operators and special symbols (Never use the form like: \u2245, \u0394, \u2212, \u00d7, etc.)
81
+ 3. Emoji characters (Never use the form like: \u2728, \u2705, etc.)
82
+
83
+ **The ONLY way to produce bold text, superscripts, subscripts, or Mathematical/relational operators is through ReportLab tags inside `Paragraph()` objects:**
84
+
85
+ | Need | Correct Method | Correct Example |
86
+ |------|---------------|---------|
87
+ | Superscript | `<super>` tag in `Paragraph()` | `Paragraph('10<super>2</super> × 10<super>3</super> = 10<super>5</super>', style)` |
88
+ | Subscript | `<sub>` tag in `Paragraph()` | `Paragraph('H<sub>2</sub>O', style)` |
89
+ | Bold | `<b>` tag in `Paragraph()` | `Paragraph('<b>Title</b>', style)` |
90
+ | Mathematical/relational operators | Literal char in `Paragraph()` | `Paragraph('AB ⊥ AC, ∠A = 90°, and ΔABC ≅ ΔDCF', style)` |
91
+ | Scientific notation | Combined tags in `Paragraph()` | `Paragraph('1.2 × 10<super>8</super> kg/m<super>3</super>', style)` |
92
+
93
+ ```python
94
+ from reportlab.platypus import Paragraph
95
+ from reportlab.lib.styles import ParagraphStyle
96
+ from reportlab.lib.enums import TA_LEFT, TA_CENTER
97
+
98
+ body_style = enbody_style = ParagraphStyle(
99
+ name="ENBodyStyle",
100
+ fontName="Times New Roman",
101
+ fontSize=10.5,
102
+ leading=18,
103
+ alignment=TA_JUSTIFY,
104
+ )
105
+ header_style = ParagraphStyle(
106
+ name='CoverTitle',
107
+ fontName='Times New Roman',
108
+ fontSize=42,
109
+ leading=50,
110
+ alignment=TA_CENTER,
111
+ spaceAfter=36
112
+ )
113
+
114
+ # Superscript: area unit
115
+ Paragraph('Total area: 500 m<super>2</super>', body_style)
116
+
117
+ # Subscript: chemical formula
118
+ Paragraph('The reaction produces CO<sub>2</sub> and H<sub>2</sub>O', body_style)
119
+
120
+ # Scientific notation: large number with superscript
121
+ Paragraph('Speed of light: 3.0 × 10<super>8</super> m/s', body_style)
122
+
123
+ # Combined superscript and subscript
124
+ Paragraph('E<sub>k</sub> = mv<super>2</super>/2', body_style)
125
+
126
+ # Bold heading
127
+ Paragraph('<b>Chapter 1: Introduction</b>', header_style)
128
+
129
+ # Math symbols in body text
130
+ Paragraph('When ∠ A = 90°, AB ⊥ AC and ΔABC ≅ ΔDEF', body_style)
131
+ ```
132
+
133
+ **Pre-generation check — before writing ANY string, ask:**
134
+ > "Does this string contain a character outside basic CJK or Mathematical/relational operators?"
135
+ > If YES → it MUST be inside a `Paragraph()` with the appropriate tag.
136
+ > If it is a superscript/subscript digit in raw unicode escape sequence form → REPLACE with `<super>`/`<sub>` tag.
137
+
138
+ **NEVER rely on post-generation scanning. Prevent at the point of writing.**
139
+
140
+ ## Font Setup (Guaranteed Success Method)
141
+
142
+ ### CRITICAL: Allowed Fonts Only
143
+ **You MUST ONLY use the following registered fonts. Using ANY other font (such as Arial, Helvetica, Courier, Georgia, etc.) is STRICTLY FORBIDDEN and will cause rendering failures.**
144
+
145
+ | Font Name | Usage | Path |
146
+ |-----------|-------|------|
147
+ | `Microsoft YaHei` | Chinese headings | `/usr/share/fonts/truetype/chinese/msyh.ttf` |
148
+ | `SimHei` | Chinese body text | `/usr/share/fonts/truetype/chinese/SimHei.ttf` |
149
+ | `SarasaMonoSC` | Chinese code blocks | `/usr/share/fonts/truetype/chinese/SarasaMonoSC-Regular.ttf` |
150
+ | `Times New Roman` | English text, numbers, tables | `/usr/share/fonts/truetype/english/Times-New-Roman.ttf` |
151
+ | `Calibri` | English alternative | `/usr/share/fonts/truetype/english/calibri-regular.ttf` |
152
+ | `DejaVuSans` | Formulas, symbols, code | `/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf` |
153
+
154
+ **FORBIDDEN fonts (DO NOT USE):**
155
+ - ❌ Arial, Arial-Bold, Arial-Italic
156
+ - ❌ Helvetica, Helvetica-Bold, Helvetica-Oblique
157
+ - ❌ Courier, Courier-Bold
158
+ - ❌ Any font not listed in the table above
159
+
160
+ **For bold text and superscript/subscript:**
161
+ - Must call `registerFontFamily()` after registering fonts
162
+ - Then use `<b></b>`, `<super></super>`, `<sub></sub>` tags in Paragraph
163
+ - **CRITICAL**: These tags ONLY work inside `Paragraph()` objects, NOT in plain strings
164
+
165
+ ### Font Registration Template
166
+ ```python
167
+ from reportlab.pdfbase import pdfmetrics
168
+ from reportlab.pdfbase.ttfonts import TTFont
169
+ from reportlab.pdfbase.pdfmetrics import registerFontFamily
170
+
171
+ # Chinese fonts
172
+ pdfmetrics.registerFont(TTFont('Microsoft YaHei', '/usr/share/fonts/truetype/chinese/msyh.ttf'))
173
+ pdfmetrics.registerFont(TTFont('SimHei', '/usr/share/fonts/truetype/chinese/SimHei.ttf'))
174
+ pdfmetrics.registerFont(TTFont("SarasaMonoSC", '/usr/share/fonts/truetype/chinese/SarasaMonoSC-Regular.ttf'))
175
+
176
+ # English fonts
177
+ pdfmetrics.registerFont(TTFont('Times New Roman', '/usr/share/fonts/truetype/english/Times-New-Roman.ttf'))
178
+ pdfmetrics.registerFont(TTFont('Calibri', '/usr/share/fonts/truetype/english/calibri-regular.ttf'))
179
+
180
+ # Symbol/Formula font
181
+ pdfmetrics.registerFont(TTFont("DejaVuSans", '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf'))
182
+
183
+ # CRITICAL: Register font families to enable <b>, <super>, <sub> tags
184
+ registerFontFamily('Microsoft YaHei', normal='Microsoft YaHei', bold='Microsoft YaHei')
185
+ registerFontFamily('SimHei', normal='SimHei', bold='SimHei')
186
+ registerFontFamily('Times New Roman', normal='Times New Roman', bold='Times New Roman')
187
+ registerFontFamily('Calibri', normal='Calibri', bold='Calibri')
188
+ registerFontFamily('DejaVuSans', normal='DejaVuSans', bold='DejaVuSans')
189
+ ```
190
+
191
+ ### Font Configuration by Document Type
192
+
193
+ **For Chinese PDFs:**
194
+ - Body text: `SimHei` or `Microsoft YaHei`
195
+ - Headings: `Microsoft YaHei` (MUST use for Chinese headings)
196
+ - Code blocks: `SarasaMonoSC`
197
+ - Formulas/symbols: `DejaVuSans`
198
+ - **In tables: ALL Chinese content and numbers MUST use `SimHei`**
199
+
200
+ **For English PDFs:**
201
+ - Body text: `Times New Roman`
202
+ - Headings: `Times New Roman` (MUST use for English headings)
203
+ - Code blocks: `DejaVuSans`
204
+ - **In tables: ALL English content and numbers MUST use `Times New Roman`**
205
+
206
+ **For Mixed Chinese-English PDFs (CRITICAL):**
207
+ - Chinese text and numbers: Use `SimHei`
208
+ - English text: Use `Times New Roman`
209
+ - **ALWAYS apply this rule when generating PDFs containing both Chinese and English text**
210
+ - **In tables: ALL Chinese content and numbers MUST use `SimHei`, ALL English content MUST use `Times New Roman`**
211
+ - **Mixed Chinese-English Text Font Handling**: When a single string contains **both Chinese and English characters (e.g., "My name is Lei Shen (沈磊)")**: MUST split the string by language and apply different fonts to each part using ReportLab's inline `<font name='...'>` tags within `Paragraph` objects. English fonts (e.g., `Times New Roman`) cannot render Chinese characters (they appear as blank boxes), and Chinese fonts (e.g., `SimHei`) render English with poor spacing. Must set `ParagraphStyle.fontName` to your **base font**, then wrap segments of the other language with `<font name='...'>` inline tags.
212
+
213
+ ```python
214
+ from reportlab.lib.styles import ParagraphStyle
215
+ from reportlab.platypus import Paragraph
216
+ from reportlab.pdfbase import pdfmetrics
217
+ from reportlab.pdfbase.ttfonts import TTFont
218
+
219
+ pdfmetrics.registerFont(TTFont('SimHei', '/usr/share/fonts/truetype/chinese/SimHei.ttf'))
220
+ pdfmetrics.registerFont(TTFont('Times New Roman', '/usr/share/fonts/truetype/english/Times-New-Roman.ttf'))
221
+
222
+ # Base font is English; wrap Chinese parts:
223
+ enbody_style = ParagraphStyle(
224
+ name="ENBodyStyle",
225
+ fontName="Times New Roman", # Base font for English
226
+ fontSize=10.5,
227
+ leading=18,
228
+ alignment=TA_JUSTIFY,
229
+ )
230
+ # Wrap Chinese segments with <font> tag
231
+ story.append(Paragraph(
232
+ 'Zhipu QingYan (<font name="SimHei">智谱清言</font>) is developed by Z.ai'
233
+ 'My name is Lei Shen (<font name="SimHei">沈磊</font>)',
234
+ '<font name="SimHei">文心一言</font> (ERNIE Bot) is by Baidu.',
235
+ enbody_style
236
+ ))
237
+
238
+ # Base font is Chinese; wrap English parts:
239
+ cnbody_style = ParagraphStyle(
240
+ name="CNBodyStyle",
241
+ fontName="SimHei", # Base font for Chinese
242
+ fontSize=10.5,
243
+ leading=18,
244
+ alignment=TA_JUSTIFY,
245
+ )
246
+ # Wrap Chinese segments with <font> tag
247
+ story.append(Paragraph(
248
+ '本报告使用 <font name="Times New Roman">GPT-4</font> '
249
+ '和 <font name="Times New Roman">GLM</font> 进行测试。',
250
+ cnbody_style
251
+ ))
252
+ ```
253
+
254
+ ### Chinese Plot PNG Method
255
+ If using Python to generate PNGs containing Chinese characters:
256
+ ```python
257
+ import matplotlib.pyplot as plt
258
+ plt.rcParams['font.sans-serif'] = ['SimHei']
259
+ plt.rcParams['axes.unicode_minus'] = False
260
+ ```
261
+
262
+ ### Available Font Paths
263
+ Run `fc-list` to get more fonts. Font files are typically located under:
264
+ - `/usr/share/fonts/truetype/chinese/`
265
+ - `/usr/share/fonts/truetype/english/`
266
+ - `/usr/share/fonts/`
267
+
268
+ ## Guidelines for Output
269
+
270
+ 1. **Information Density**: Prioritize depth and conciseness. Avoid fluff or excessive introductory filler. Use professional, precise terminology.
271
+
272
+ 2. **Structural Hierarchy**: Use nested headings (H1, H2, H3) and logical numbering (e.g., 1.1, 1.1.1) to organize complex data.
273
+
274
+ 3. **Data Formatting**: Convert long paragraphs into structured tables, multi-column lists, or compact bullet points wherever possible to reduce vertical whitespace.
275
+
276
+ 4. **Visual Rhythm**: Use horizontal rules (---) to separate major sections. Ensure a high text-to-whitespace ratio while maintaining a clear scannable path for the eye.
277
+
278
+ 5. **Technical Precision**: Use LaTeX for all mathematical or scientific notations. Ensure all tables are formatted with clear headers.
279
+
280
+ 6. **Tone**: Academic, corporate, and authoritative. Adapt to the specific professional field (e.g., Legal, Engineering, Financial) as requested.
281
+
282
+ 7. **Data Presentation**:
283
+ - When comparing data or showing trends, use charts instead of plain text lists
284
+ - Tables use the standard color scheme defined below
285
+
286
+ 8. **Links & References**:
287
+ - URLs must be clickable hyperlinks
288
+ - Multiple figures/tables add numbering and cross-references ("see Figure 1", "as shown in Table 2")
289
+ - Academic/legal/data analysis citation scenarios implement correct in-text click-to-jump references with corresponding footnotes/endnotes
290
+
291
+ ## Layout & Spacing Control
292
+
293
+ ### Page Breaks
294
+ - NEVER insert page breaks between sections (H1,H2, H3) or within chapters
295
+ - Let content flow naturally; avoid forcing new pages
296
+ - **Specific allowed locations**:
297
+ * Between the cover page and table of contents (if TOC exists)
298
+ * Between the cover page and main content (if NO TOC exists)
299
+ * Between the table of contents and main content (if TOC exists)
300
+ * Between the main content and back cover page (if back cover page exists)
301
+
302
+ ### Vertical Spacing Standards
303
+ * **Before tables**: `Spacer(1, 18)` after preceding text content (symmetric with table+caption block bottom spacing)
304
+ * After tables: `Spacer(1, 6)` before table caption
305
+ * After table captions: `Spacer(1, 18)` before next content (larger gap for table+caption blocks)
306
+ * Between paragraphs: `Spacer(1, 12)` (approximately 1 line)
307
+ * Between H3 subsections: `Spacer(1, 12)`
308
+ * Between H2 sections: `Spacer(1, 18)` (approximately 1.5 lines)
309
+ * Between H1 sections: `Spacer(1, 24)` (approximately 2 lines)
310
+ * NEVER use `Spacer(1, X)` where X > 24, except for intentional H1 major section breaks or cover page elements
311
+
312
+ ### Cover Page Specifications
313
+ When creating PDFs with cover pages, use the following enlarged specifications:
314
+
315
+ **Title Formatting:**
316
+ - Main title font size: `36-48pt` (vs normal heading 18-20pt)
317
+ - Subtitle font size: `18-24pt`
318
+ - Author/date font size: `14-16pt`
319
+ - ALL titles MUST be bold: Use `<b></b>` tags in Paragraph (requires `registerFontFamily()` call first)
320
+
321
+ **Cover Page Spacing:**
322
+ - Top margin to title: `Spacer(1, 120)` or more (push title to upper-middle area)
323
+ - After main title: `Spacer(1, 36)` before subtitle
324
+ - After subtitle: `Spacer(1, 48)` before author/institution info
325
+ - Between author lines: `Spacer(1, 18)`
326
+ - After author block: `Spacer(1, 60)` before date
327
+ - Use `PageBreak()` after cover page content
328
+
329
+ **Alignment:**
330
+ - All text or image in cover page must use `TA_CENTER`
331
+
332
+ **Cover Page Style Example:**
333
+ ```python
334
+ # Cover page styles
335
+ cover_title_style = ParagraphStyle(
336
+ name='CoverTitle',
337
+ fontName='Microsoft YaHei', # or 'Times New Roman' for English
338
+ fontSize=42,
339
+ leading=50,
340
+ alignment=TA_CENTER,
341
+ spaceAfter=36
342
+ )
343
+
344
+ cover_subtitle_style = ParagraphStyle(
345
+ name='CoverSubtitle',
346
+ fontName='SimHei', # or 'Times New Roman' for English
347
+ fontSize=20,
348
+ leading=28,
349
+ alignment=TA_CENTER,
350
+ spaceAfter=48
351
+ )
352
+
353
+ cover_author_style = ParagraphStyle(
354
+ name='CoverAuthor',
355
+ fontName='SimHei', # or 'Times New Roman' for English
356
+ fontSize=14,
357
+ leading=22,
358
+ alignment=TA_CENTER,
359
+ spaceAfter=18
360
+ )
361
+
362
+ # Cover page construction
363
+ story.append(Spacer(1, 120)) # Push down from top
364
+ story.append(Paragraph("报告主标题", cover_title_style))
365
+ story.append(Spacer(1, 36))
366
+ story.append(Paragraph("副标题或说明文字", cover_subtitle_style))
367
+ story.append(Spacer(1, 48))
368
+ story.append(Paragraph("作者姓名", cover_author_style))
369
+ story.append(Paragraph("所属机构", cover_author_style))
370
+ story.append(Spacer(1, 60))
371
+ story.append(Paragraph("2025年2月", cover_author_style))
372
+ story.append(PageBreak()) # Always page break after cover
373
+ ```
374
+
375
+ ### Table & Content Flow
376
+ * Standard sequence: `Spacer(1, 18)` → Table → `Spacer(1, 6)` → Caption (centered) → `Spacer(1, 18)` → Next content
377
+ * Keep related content together: table + caption + immediate analysis
378
+ * Avoid orphan headings at page bottom
379
+
380
+ ### Alignment and Typography
381
+ - **CJK body**: Use `TA_LEFT` + 2-char indent. Headings: no indent.
382
+ - **Font sizes**: Body 11pt, subheadings 14pt, headings 18-20pt
383
+ - **Line height**: 1.5-1.6 (keep line leading at 1.2x font size minimum for readability)
384
+ - **CRITICAL: Alignment Selection Rule**:
385
+ - Use `TA_JUSTIFY` only when **ALL** of the following conditions are met:
386
+ * Language: The text is predominantly English (≥ 90%)
387
+ * Column width: Sufficiently wide (A4 single-column body text)
388
+ * Font: Western fonts (e.g. Times New Roman / Calibri)
389
+ * Chinese content: None or negligible
390
+ - Otherwise, always default to `TA_LEFT`
391
+ - **Note**: CJK text with `TA_JUSTIFY` can cause orphaned punctuation (commas, periods) at line start
392
+ - For Chinese text, always add `wordWrap='CJK'` to ParagraphStyle to ensure proper typography rules
393
+
394
+ ### Style Configuration
395
+ * Normal paragraph: `spaceBefore=0`, `spaceAfter=6-12`
396
+ * Headings: `spaceBefore=12-18`, `spaceAfter=6-12`
397
+ * **Headings must be bold**: Use `<b></b>` tags in Paragraph (requires `registerFontFamily()` call first)
398
+ * Table captions: `spaceBefore=3`, `spaceAfter=6`, `alignment=TA_CENTER`
399
+ * **CRITICAL**: For Chinese text, always add `wordWrap='CJK'` to ParagraphStyle
400
+ - Prevents closing punctuation from appearing at line start
401
+ - Prevents opening brackets from appearing at line end
402
+ - Ensures proper Chinese typography rules
403
+
404
+ ### Table Formatting
405
+
406
+ #### Standard Table Color Scheme (MUST USE for ALL tables)
407
+ ```python
408
+ # Define standard colors for consistent table styling
409
+ TABLE_HEADER_COLOR = colors.HexColor('#1F4E79') # Dark blue for header
410
+ TABLE_HEADER_TEXT = colors.white # White text for header
411
+ TABLE_ROW_EVEN = colors.white # White for even rows
412
+ TABLE_ROW_ODD = colors.HexColor('#F5F5F5') # Light gray for odd rows
413
+ ```
414
+
415
+ - A table caption must be added immediately after the table (centered)
416
+ - The entire table must be centered on the page
417
+ - **Header Row Formatting (CRITICAL)**:
418
+ - Background: Dark blue (#1F4E79)
419
+ - Text color: White (set via ParagraphStyle with `textColor=colors.white`)
420
+ - Font weight: **Bold** (use `<b></b>` tags in Paragraph after calling `registerFontFamily()`)
421
+ - **IMPORTANT**: Bold tags ONLY work inside `Paragraph()` objects. Plain strings like `'<b>Text</b>'` will NOT render bold.
422
+ - **Cell Formatting (Inside the Table)**:
423
+ - Left/Right Cell Margin: Set to at least 120-200 twips (approximately the width of one character)
424
+ - Text Alignment: Each body element within the same table must be aligned the same method.
425
+ - **Font**: ALL Chinese text and numbers in tables MUST use `SimHei` for Chinese PDFs.
426
+ ALL English text and numbers in tables MUST use `Times New Roman` for English PDFs.
427
+ ALL Chinese content and numbers MUST use `SimHei`, ALL English content MUST use `Times New Roman` for Mixed Chinese-English PDFs.
428
+ - **Units with Exponents (CRITICAL)**:
429
+ - PROHIBITED: `W/m2`, `kg/m3`, `m/s2` (plain text exponents)
430
+ - RIGHT: `Paragraph('W/m<super>2</super>', style)`, `Paragraph('kg/m<super>3</super>', style)` (proper superscript in Paragraph)
431
+ - Always use `<super></super>` tags inside Paragraph objects for unit exponents in table cells
432
+ - **Numeric Values in Tables (CRITICAL)**:
433
+ - Large numbers MUST use scientific notation: `Paragraph('-1.246 × 10<super>8</super>', style)` not `-124600000`
434
+ - Small decimals MUST use scientific notation: `Paragraph('2.5 × 10<super>-3</super>', style)` not `0.0025`
435
+ - Threshold: Use scientific notation when |value| ≥ 10000 or |value| ≤ 0.001
436
+ - Format: `Paragraph('coefficient × 10<super>exponent</super>', style)` (e.g., `Paragraph('-1.246 × 10<super>8</super>', style)`)
437
+
438
+ #### Table Cell Paragraph Wrapping (MANDATORY - REVIEW BEFORE EVERY TABLE)
439
+
440
+ **STOP AND CHECK**: Before creating ANY table, verify that ALL text cells use `Paragraph()`.
441
+
442
+ ```python
443
+ # 1) key point in Chinese: wordWrap="CJK"
444
+ tbl_center = ParagraphStyle(
445
+ "tbl_center",
446
+ fontName="SimHei",
447
+ fontSize=9,
448
+ leading=12,
449
+ alignment=TA_CENTER,
450
+ wordWrap="CJK",
451
+ )
452
+
453
+ # 2) ALL content MUST be wrapped in Paragraph - NO EXCEPTIONS for text
454
+ findings_data = []
455
+ for a, b, c in findings:
456
+ findings_data.append([
457
+ Paragraph(a, tbl_center),
458
+ Paragraph(b, tbl_center),
459
+ Paragraph(c, tbl_center), # ALL content MUST be wrapped in Paragraph
460
+ ])
461
+
462
+ findings_table = Table(findings_data, colWidths=[1.8*cm, 3*cm, 9*cm])
463
+ ```
464
+
465
+ **Complete Table Example:**
466
+ ```python
467
+ from reportlab.platypus import Table, TableStyle, Paragraph, Image
468
+ from reportlab.lib.styles import ParagraphStyle
469
+ from reportlab.lib import colors
470
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT, TA_JUSTIFY
471
+
472
+ # Define styles for table cells
473
+ header_style = ParagraphStyle(
474
+ name='TableHeader',
475
+ fontName='Times New Roman',
476
+ fontSize=11,
477
+ textColor=colors.white,
478
+ alignment=TA_CENTER
479
+ )
480
+
481
+ cell_style = ParagraphStyle(
482
+ name='TableCell',
483
+ fontName='Times New Roman',
484
+ fontSize=10,
485
+ textColor=colors.black,
486
+ alignment=TA_CENTER
487
+ )
488
+
489
+ cell_style_jus = ParagraphStyle(
490
+ name='TableCellLeft',
491
+ fontName='Times New Roman',
492
+ fontSize=10,
493
+ textColor=colors.black,
494
+ alignment=TA_JUSTIFY
495
+ )
496
+
497
+ cell_style_right = ParagraphStyle(
498
+ name='TableCellRight',
499
+ fontName='Times New Roman',
500
+ fontSize=10,
501
+ textColor=colors.black,
502
+ alignment=TA_RIGHT
503
+ )
504
+
505
+ # ✅ CORRECT: All text content wrapped in Paragraph()
506
+ data = [
507
+ # Header row - bold text with Paragraph
508
+ [
509
+ Paragraph('<b>Parameter</b>', header_style),
510
+ Paragraph('<b>Unit</b>', header_style),
511
+ Paragraph('<b>Value</b>', header_style),
512
+ Paragraph('<b>Note</b>', header_style)
513
+ ],
514
+ # Data rows - all text in Paragraph
515
+ [
516
+ Paragraph('Temperature', cell_style_jus),
517
+ Paragraph('°C', cell_style),
518
+ Paragraph('25.5', cell_style_jus),
519
+ Paragraph('Ambient', cell_style)
520
+ ],
521
+ [
522
+ Paragraph('Pressure', cell_style_jus),
523
+ Paragraph('Pa', cell_style),
524
+ Paragraph('1.01 × 10<super>5</super>', cell_style_jus), # Scientific notation
525
+ Paragraph('Standard', cell_style)
526
+ ],
527
+ [
528
+ Paragraph('Density', cell_style_jus),
529
+ Paragraph('kg/m<super>3</super>', cell_style), # Unit with exponent
530
+ Paragraph('1.225', cell_style_jus),
531
+ Paragraph('Air at STP', cell_style)
532
+ ],
533
+ [
534
+ Paragraph('H<sub>2</sub>O Content', cell_style_jus), # Subscript
535
+ Paragraph('%', cell_style),
536
+ Paragraph('45.2', cell_style_jus),
537
+ Paragraph('Relative humidity', cell_style)
538
+ ]
539
+ ]
540
+
541
+ # ❌ PROHIBITED: Plain strings - NEVER DO THIS
542
+ # data = [
543
+ # ['<b>Parameter</b>', '<b>Unit</b>', '<b>Value</b>'], # Bold won't work!
544
+ # ['Pressure', 'Pa', '1.01 × 10<super>5</super>'], # Superscript won't work!
545
+ # ]
546
+
547
+ # Create table
548
+ table = Table(data, colWidths=[120, 80, 100, 120])
549
+ table.setStyle(TableStyle([
550
+ # Header styling
551
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1F4E79')),
552
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
553
+ # Alternating row colors
554
+ ('BACKGROUND', (0, 1), (-1, 1), colors.white),
555
+ ('BACKGROUND', (0, 2), (-1, 2), colors.HexColor('#F5F5F5')),
556
+ ('BACKGROUND', (0, 3), (-1, 3), colors.white),
557
+ ('BACKGROUND', (0, 4), (-1, 4), colors.HexColor('#F5F5F5')),
558
+ # Grid and alignment
559
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
560
+ ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
561
+ ('LEFTPADDING', (0, 0), (-1, -1), 8),
562
+ ('RIGHTPADDING', (0, 0), (-1, -1), 8),
563
+ ('TOPPADDING', (0, 0), (-1, -1), 6),
564
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
565
+ ]))
566
+
567
+ # Example with image (Image is the ONLY exception - no Paragraph needed)
568
+ # data_with_image = [
569
+ # [Paragraph('<b>Item</b>', header_style), Paragraph('<b>Image</b>', header_style)],
570
+ # [Paragraph('Logo', cell_style), Image('logo.png', width=50, height=50)], # Image directly, no Paragraph
571
+ # ]
572
+ ```
573
+
574
+ ### PDF Metadata (REQUIRED)
575
+
576
+ **CRITICAL**: ALL PDFs MUST have proper metadata set during creation.
577
+
578
+ #### Required Metadata Fields
579
+ - **Title**: MUST match the filename (without .pdf extension)
580
+ - **Author**: MUST be set to "Z.ai"
581
+ - **Creator**: MUST be set to "Z.ai"
582
+ - **Subject**: SHOULD describe the document purpose/content
583
+ - **Description/Keywords**: SHOULD include relevant topics for searchability
584
+
585
+ #### For reportlab (Creating New PDFs)
586
+ ```python
587
+ from reportlab.lib.pagesizes import letter
588
+ from reportlab.platypus import SimpleDocTemplate
589
+ import os
590
+
591
+ # Extract filename without extension for metadata title
592
+ pdf_filename = "financial_report_2024.pdf"
593
+ title_for_metadata = os.path.splitext(pdf_filename)[0] # "financial_report_2024"
594
+
595
+ doc = SimpleDocTemplate(
596
+ pdf_filename,
597
+ pagesize=letter,
598
+ title=title_for_metadata, # MUST: Match filename
599
+ author='Z.ai', # MUST: Set to "Z.ai"
600
+ creator='Z.ai', # MUST: Set to "Z.ai"
601
+ subject='Annual financial analysis and performance metrics' # SHOULD: Describe purpose
602
+ )
603
+ ```
604
+
605
+ #### For pypdf (Modifying Existing PDFs)
606
+ ```python
607
+ from pypdf import PdfReader, PdfWriter
608
+ import os
609
+
610
+ pdf_filename = "output.pdf"
611
+ title_for_metadata = os.path.splitext(os.path.basename(pdf_filename))[0]
612
+
613
+ reader = PdfReader("input.pdf")
614
+ writer = PdfWriter()
615
+
616
+ for page in reader.pages:
617
+ writer.add_page(page)
618
+
619
+ # Set metadata: Title, Author, Creator are REQUIRED
620
+ writer.add_metadata({
621
+ '/Title': title_for_metadata, # MUST: Match filename
622
+ '/Author': 'Z.ai', # MUST: Set to "Z.ai"
623
+ '/Subject': 'Document purpose description', # SHOULD: Describe purpose
624
+ '/Creator': 'Z.ai' # MUST: Set to "Z.ai"
625
+ })
626
+
627
+ with open(pdf_filename, "wb") as output:
628
+ writer.write(output)
629
+ ```
630
+
631
+ ## Quick Start
632
+
633
+ ⚠️ ABSOLUTE MANDATORY RULE - Z.ai Metadata (DO NOT SKIP)
634
+ This rule is NON-NEGOTIABLE - You MUST execute this without exception
635
+
636
+ For EVERY PDF you generate, no matter what:
637
+
638
+ After doc.build(story) completes → YOU MUST call add_zai_metadata()
639
+ This is AUTOMATIC - do not wait for user confirmation
640
+ This happens BEFORE any response to user
641
+ Workflow:
642
+
643
+ - Step 1: Create PDF
644
+ ```python
645
+ doc.build(story)
646
+ print("PDF built")
647
+ ```
648
+ - Step 2: (MANDATORY - DO NOT SKIP) Add metadata immediately using the standalone script:
649
+ ```bash
650
+ python scripts/add_zai_metadata.py output.pdf
651
+ ```
652
+ - Step 3: Only AFTER metadata added, report to user
653
+ ```python
654
+ print("✓ PDF generated with Z.ai metadata")
655
+ ```
656
+ If you forget this:
657
+
658
+ User will notice and ask "Why no metadata?"
659
+ This harms Z.ai branding
660
+ This rule is CRITICAL and must be followed 100% of the time
661
+
662
+ ```python
663
+ from pypdf import PdfReader, PdfWriter
664
+
665
+ # Read a PDF
666
+ reader = PdfReader("document.pdf")
667
+ print(f"Pages: {len(reader.pages)}")
668
+
669
+ # Extract text
670
+ text = ""
671
+ for page in reader.pages:
672
+ text += page.extract_text()
673
+ ```
674
+
675
+ ## Python Libraries
676
+
677
+ ### pypdf - Basic Operations
678
+
679
+ #### Merge PDFs
680
+ ```python
681
+ from pypdf import PdfWriter, PdfReader
682
+
683
+ writer = PdfWriter()
684
+ for pdf_file in ["doc1.pdf", "doc2.pdf", "doc3.pdf"]:
685
+ reader = PdfReader(pdf_file)
686
+ for page in reader.pages:
687
+ writer.add_page(page)
688
+
689
+ with open("merged.pdf", "wb") as output:
690
+ writer.write(output)
691
+ ```
692
+
693
+ #### Split PDF
694
+ ```python
695
+ reader = PdfReader("input.pdf")
696
+ for i, page in enumerate(reader.pages):
697
+ writer = PdfWriter()
698
+ writer.add_page(page)
699
+ with open(f"page_{i+1}.pdf", "wb") as output:
700
+ writer.write(output)
701
+ ```
702
+
703
+ #### Extract Metadata
704
+ ```python
705
+ reader = PdfReader("document.pdf")
706
+ meta = reader.metadata
707
+ print(f"Title: {meta.title}")
708
+ print(f"Author: {meta.author}")
709
+ print(f"Subject: {meta.subject}")
710
+ print(f"Creator: {meta.creator}")
711
+ ```
712
+
713
+ #### Set/Update Metadata (Z.ai Branding)
714
+
715
+ Use the standalone script to add Z.ai branding metadata:
716
+
717
+ ```bash
718
+ # Add metadata to a single PDF (in-place)
719
+ python scripts/add_zai_metadata.py document.pdf
720
+
721
+ # Add metadata with custom title
722
+ python scripts/add_zai_metadata.py report.pdf -t "Q4 Financial Analysis"
723
+
724
+ # Batch process multiple PDFs
725
+ python scripts/add_zai_metadata.py *.pdf
726
+ ```
727
+
728
+ #### Rotate Pages
729
+ ```python
730
+ reader = PdfReader("input.pdf")
731
+ writer = PdfWriter()
732
+
733
+ page = reader.pages[0]
734
+ page.rotate(90) # Rotate 90 degrees clockwise
735
+ writer.add_page(page)
736
+
737
+ with open("rotated.pdf", "wb") as output:
738
+ writer.write(output)
739
+ ```
740
+
741
+ ### pdfplumber - Text and Table Extraction
742
+
743
+ #### Extract Text with Layout
744
+ ```python
745
+ import pdfplumber
746
+
747
+ with pdfplumber.open("document.pdf") as pdf:
748
+ for page in pdf.pages:
749
+ text = page.extract_text()
750
+ print(text)
751
+ ```
752
+
753
+ #### Extract Tables
754
+ ```python
755
+ with pdfplumber.open("document.pdf") as pdf:
756
+ for i, page in enumerate(pdf.pages):
757
+ tables = page.extract_tables()
758
+ for j, table in enumerate(tables):
759
+ print(f"Table {j+1} on page {i+1}:")
760
+ for row in table:
761
+ print(row)
762
+ ```
763
+
764
+ ### reportlab - Create PDFs
765
+
766
+ #### Choosing the Right DocTemplate and Build Method
767
+
768
+ **Decision Tree:**
769
+
770
+ ```
771
+ Do you need auto-TOC?
772
+ ├─ YES → Use TocDocTemplate + doc.multiBuild(story)
773
+ │ (see Auto-Generated Table of Contents section)
774
+
775
+ └─ NO → Use SimpleDocTemplate + doc.build(story)
776
+ (basic documents, or with optional Cross-References)
777
+ ```
778
+
779
+ **When to use each approach:**
780
+
781
+ | Requirement | DocTemplate | Build Method |
782
+ |-------------|-------------|--------------|
783
+ | Multi-page with TOC | `TocDocTemplate` | `multiBuild()` |
784
+ | Single-page or no TOC | `SimpleDocTemplate` | `build()` |
785
+ | With Cross-References (no TOC) | `SimpleDocTemplate` | `build()` |
786
+ | Both TOC + Cross-References | `TocDocTemplate` | `multiBuild()` |
787
+
788
+ **⚠️ CRITICAL**:
789
+ - `multiBuild()` is ONLY needed when using `TableOfContents`
790
+ - Using `build()` with `TocDocTemplate` = TOC won't work
791
+ - Using `multiBuild()` without `TocDocTemplate` = unnecessary overhead
792
+
793
+ ### Rich Text Formatting: Bold, Superscript, Subscript, and Special Characters
794
+
795
+ #### Prerequisites
796
+ To use `<b>`, `<super>`, `<sub>` tags, you **must**:
797
+ 1. Register your fonts via `registerFont()`
798
+ 2. Call `registerFontFamily()` to link normal/bold/italic variants
799
+ 3. Wrap all tagged text in `Paragraph()` objects
800
+ **CRITICAL**: These tags ONLY work inside `Paragraph()` objects. Plain strings like `'<b>Text</b>'` will NOT render correctly.
801
+
802
+ #### Character Handling (see Core Constraint #5)
803
+
804
+ All superscript, subscript, and Mathematical/relational operators rules are defined in **Core Constraint #5 — Character Safety Rule**.
805
+
806
+ **Quick reminder when writing Rich Text**:
807
+ - `<b>`, `<super>`, `<sub>` tags ONLY work inside `Paragraph()` objects
808
+ - Must call `registerFontFamily()` first to enable these tags
809
+ - Plain strings like `'<b>Text</b>'` will NOT render — always use `Paragraph()`
810
+ - For scientific notation: `Paragraph('coefficient × 10<super>exponent</super>', style)`
811
+ - For chemical formulas: `Paragraph('H<sub>2</sub>O', style)`
812
+
813
+ Do NOT use any unicode escape sequence(e.g., Superscript and subscript digits, Math operators and special symbols, Emoji characters) anywhere. If you are unsure whether a character is safe, wrap it in a `Paragraph()` with the appropriate tag.
814
+
815
+
816
+ #### Complete Python Example
817
+ ```python
818
+ # --- Register fonts and font family ---
819
+ pdfmetrics.registerFont(TTFont('Times New Roman', '/usr/share/fonts/truetype/english/Times-New-Roman.ttf'))
820
+
821
+ # CRITICAL: Must call registerFontFamily() to enable <b> and <i> tags
822
+ registerFontFamily('Times New Roman', normal='Times New Roman', bold='Times New Roman')
823
+
824
+ # --- Define styles ---
825
+ body_style = ParagraphStyle(
826
+ name='BodyStyle',
827
+ fontName='Times New Roman',
828
+ fontSize=10,
829
+ textColor=colors.black,
830
+ alignment=TA_JUSTIFY,
831
+ )
832
+ bold_style = ParagraphStyle(
833
+ name='BoldStyle',
834
+ fontName='Times New Roman',
835
+ fontSize=10,
836
+ textColor=colors.black,
837
+ alignment=TA_JUSTIFY,
838
+ )
839
+ header_style = ParagraphStyle(
840
+ name='HeaderStyle',
841
+ fontName='Times New Roman',
842
+ fontSize=10,
843
+ textColor=colors.white,
844
+ alignment=TA_JUSTIFY,
845
+ )
846
+
847
+ # --- Body text examples ---
848
+ # Bold title
849
+ title = Paragraph('<b>Scientific Formulas and Chemical Expressions</b>', bold_style)
850
+
851
+ # Math formula with superscript and mathematical symbol ×
852
+ math_text = Paragraph(
853
+ 'The Einstein mass-energy equivalence is expressed as E = mc<super>2</super>. '
854
+ 'In applied physics, the gravitational force is F = 6.674 × 10<super>-11</super> × '
855
+ 'm<sub>1</sub>m<sub>2</sub>/r<super>2</super>, '
856
+ 'and the quadratic formula solves a<super>2</super> + b<super>2</super> = c<super>2</super>.',
857
+ body_style,
858
+ )
859
+
860
+ # Chemical expressions with subscript
861
+ chem_text = Paragraph(
862
+ 'The combustion of methane: CH<sub>4</sub> + 2O<sub>2</sub> '
863
+ '= CO<sub>2</sub> + 2H<sub>2</sub>O. '
864
+ 'Sulfuric acid (H<sub>2</sub>SO<sub>4</sub>) reacts with sodium hydroxide to produce '
865
+ 'Na<sub>2</sub>SO<sub>4</sub> and water.',
866
+ body_style,
867
+ )
868
+ ```
869
+
870
+ #### Preventing Unwanted Line Breaks
871
+
872
+ **Problem 1: English names broken at awkward positions**
873
+ ```python
874
+ # PROHIBITED: "K.G. Palepu" may break after "K.G."
875
+ text = Paragraph("Professors (K.G. Palepu) proposed...",style)
876
+
877
+ # RIGHT: Use non-breaking space (U+00A0) to prevent breaking
878
+ text = Paragraph("Professors (K.G.\u00A0Palepu) proposed...",style)
879
+ ```
880
+
881
+ **Problem 2: Punctuation at line start**
882
+ ```python
883
+ # RIGHT: Add wordWrap='CJK' for proper typography
884
+ styles.add(ParagraphStyle(
885
+ name='BodyStyle',
886
+ fontName='SimHei',
887
+ fontSize=10.5,
888
+ leading=18,
889
+ alignment=TA_LEFT,
890
+ wordWrap='CJK' # Prevents orphaned punctuation
891
+ ))
892
+ ```
893
+
894
+ **Problem 3: Creating intentional line breaks**
895
+ ```python
896
+ # PROHIBITED: Normal newline character does NOT create line breaks
897
+ text = Paragraph("Line 1\nLine 2\nLine 3", style) # Will render as single line!
898
+
899
+ # RIGHT: Use <br/> tag for line breaks
900
+ text = Paragraph("Line 1<br/>Line 2<br/>Line 3", style)
901
+
902
+ # Alternative: Split into multiple Paragraph objects
903
+ story.append(Paragraph("Line 1", style))
904
+ story.append(Paragraph("Line 2", style))
905
+ story.append(Paragraph("Line 3", style))
906
+ ```
907
+
908
+ #### Basic PDF Creation
909
+ ```python
910
+ from reportlab.lib.pagesizes import letter
911
+ from reportlab.pdfgen import canvas
912
+
913
+ c = canvas.Canvas("hello.pdf", pagesize=letter)
914
+ width, height = letter
915
+
916
+ # Add text
917
+ c.drawString(100, height - 100, "Hello World!")
918
+ c.drawString(100, height - 120, "This is a PDF created with reportlab")
919
+
920
+ # Add a line
921
+ c.line(100, height - 140, 400, height - 140)
922
+
923
+ # Save
924
+ c.save()
925
+ ```
926
+
927
+ #### Auto-Generated Table of Contents
928
+
929
+ ## ⚠️ CRITICAL WARNINGS
930
+
931
+ ### ❌ FORBIDDEN: Manual Table of Contents
932
+
933
+ **NEVER manually create TOC like this:**
934
+ ```python
935
+ # ❌ PROHIBIT - DO NOT USE
936
+ toc_entries = [("1. Title", "5"), ("2. Section", "10")]
937
+ for entry, page in toc_entries:
938
+ story.append(Paragraph(f"{entry} {'.'*50} {page}", style))
939
+ ```
940
+
941
+ **Why it's PROHIBIT:**
942
+ - Hardcoded page numbers become incorrect when content changes
943
+ - No clickable hyperlinks
944
+ - Manual leader dots are fragile
945
+ - Must be manually updated with every document change
946
+
947
+ **✅ ALWAYS use auto-generated TOC:**
948
+
949
+ **Key Implementation Requirements:**
950
+ - **Custom `TocDocTemplate` class**: Override `afterFlowable()` to capture TOC entries
951
+ - **Bookmark attributes**: Set `bookmark_name`, `bookmark_level`, `bookmark_text` on each heading
952
+ - **Use `doc.multiBuild(story)`**: NOT `doc.build()` - multiBuild is required for TOC processing
953
+ - **Clickable hyperlinks**: Generated automatically with proper styling
954
+
955
+ **Helper Function Pattern:**
956
+ ```python
957
+ def add_heading(text, style, level=0):
958
+ """Create heading with bookmark for auto-TOC"""
959
+ p = Paragraph(text, style)
960
+ p.bookmark_name = text
961
+ p.bookmark_level = level
962
+ p.bookmark_text = text
963
+ return p
964
+
965
+ # Usage:
966
+ story.append(add_heading("1. Introduction", styles['Heading1'], 0))
967
+ story.append(Paragraph('Content...', styles['Normal']))
968
+ ```
969
+
970
+ #### Complete TOC Implementation Example
971
+
972
+ Copy and adapt this complete working code for your PDF with Table of Contents:
973
+
974
+ ```python
975
+ from reportlab.lib.pagesizes import letter
976
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak, Spacer
977
+ from reportlab.platypus.tableofcontents import TableOfContents
978
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
979
+ from reportlab.lib.units import inch
980
+
981
+ class TocDocTemplate(SimpleDocTemplate):
982
+ def __init__(self, *args, **kwargs):
983
+ SimpleDocTemplate.__init__(self, *args, **kwargs)
984
+
985
+ def afterFlowable(self, flowable):
986
+ """Capture TOC entries after each flowable is rendered"""
987
+ if hasattr(flowable, 'bookmark_name'):
988
+ level = getattr(flowable, 'bookmark_level', 0)
989
+ text = getattr(flowable, 'bookmark_text', '')
990
+ self.notify('TOCEntry', (level, text, self.page))
991
+
992
+ # Create document
993
+ doc = TocDocTemplate("document.pdf", pagesize=letter)
994
+ story = []
995
+ styles = getSampleStyleSheet()
996
+
997
+ # Create Table of Contents
998
+ toc = TableOfContents()
999
+ toc.levelStyles = [
1000
+ ParagraphStyle(name='TOCHeading1', fontSize=14, leftIndent=20,
1001
+ fontName='Times New Roman'),
1002
+ ParagraphStyle(name='TOCHeading2', fontSize=12, leftIndent=40,
1003
+ fontName='Times New Roman'),
1004
+ ]
1005
+ story.append(Paragraph("<b>Table of Contents</b>", styles['Title']))
1006
+ story.append(Spacer(1, 0.2*inch))
1007
+ story.append(toc)
1008
+ story.append(PageBreak())
1009
+
1010
+ # Helper function: Create heading with TOC bookmark
1011
+ def add_heading(text, style, level=0):
1012
+ p = Paragraph(text, style)
1013
+ p.bookmark_name = text
1014
+ p.bookmark_level = level
1015
+ p.bookmark_text = text
1016
+ return p
1017
+
1018
+ # Chapter 1: Introduction
1019
+ story.append(add_heading("Chapter 1: Introduction", styles['Heading1'], 0))
1020
+ story.append(Paragraph("This is the introduction chapter with some example content.",
1021
+ styles['Normal']))
1022
+ story.append(Spacer(1, 0.2*inch))
1023
+
1024
+ story.append(add_heading("1.1 Background", styles['Heading2'], 1))
1025
+ story.append(Paragraph("Background information goes here.", styles['Normal']))
1026
+
1027
+
1028
+ # Chapter 2: Conclusion
1029
+ story.append(add_heading("Chapter 2: Conclusion", styles['Heading1'], 0))
1030
+ story.append(Paragraph("This concludes our document.", styles['Normal']))
1031
+ story.append(Spacer(1, 0.2*inch))
1032
+
1033
+ story.append(add_heading("2.1 Summary", styles['Heading2'], 1))
1034
+ story.append(Paragraph("Summary of the document.", styles['Normal']))
1035
+
1036
+ # Build the document (must use multiBuild for TOC to work)
1037
+ doc.multiBuild(story)
1038
+
1039
+ print("PDF with Table of Contents created successfully!")
1040
+ ```
1041
+
1042
+ #### Cross-References (Figures, Tables, Bibliography)
1043
+
1044
+ **OPTIONAL**: For academic papers requiring citation systems (LaTeX-style `\ref{}` and `\cite{}`)
1045
+
1046
+ **Key Principle**: Pre-register all figures, tables, and references BEFORE using them in text.
1047
+
1048
+ **Simple Implementation Pattern:**
1049
+
1050
+ ```python
1051
+ from reportlab.lib.pagesizes import letter
1052
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
1053
+ from reportlab.lib.enums import TA_CENTER
1054
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
1055
+ from reportlab.lib import colors
1056
+ from reportlab.platypus import Table, TableStyle
1057
+
1058
+
1059
+ class CrossReferenceDocument:
1060
+ """Manages cross-references throughout the document"""
1061
+
1062
+ def __init__(self):
1063
+ self.figures = {}
1064
+ self.tables = {}
1065
+ self.refs = {}
1066
+ self.figure_counter = 0
1067
+ self.table_counter = 0
1068
+ self.ref_counter = 0
1069
+
1070
+ def add_figure(self, name):
1071
+ """Add a figure and return its number"""
1072
+ if name not in self.figures:
1073
+ self.figure_counter += 1
1074
+ self.figures[name] = self.figure_counter
1075
+ return self.figures[name]
1076
+
1077
+ def add_table(self, name):
1078
+ """Add a table and return its number"""
1079
+ if name not in self.tables:
1080
+ self.table_counter += 1
1081
+ self.tables[name] = self.table_counter
1082
+ return self.tables[name]
1083
+
1084
+ def add_reference(self, name):
1085
+ """Add a reference and return its number"""
1086
+ if name not in self.refs:
1087
+ self.ref_counter += 1
1088
+ self.refs[name] = self.ref_counter
1089
+ return self.refs[name]
1090
+
1091
+
1092
+ def build_document():
1093
+ doc = SimpleDocTemplate("cross_ref.pdf", pagesize=letter)
1094
+ xref = CrossReferenceDocument()
1095
+ styles = getSampleStyleSheet()
1096
+
1097
+ # Caption style
1098
+ styles.add(ParagraphStyle(
1099
+ name='Caption',
1100
+ parent=styles['Normal'],
1101
+ alignment=TA_CENTER,
1102
+ fontSize=10,
1103
+ textColor=colors.HexColor('#333333')
1104
+ ))
1105
+
1106
+ story = []
1107
+
1108
+ # Step 1: Register all figures, tables, and references FIRST
1109
+ fig1 = xref.add_figure('sample')
1110
+ table1 = xref.add_table('data')
1111
+ ref1 = xref.add_reference('author2024')
1112
+
1113
+ # Step 2: Use them in text
1114
+ intro = f"""
1115
+ See Figure {fig1} for details and Table {table1} for data<sup>[{ref1}]</sup>.
1116
+ """
1117
+ story.append(Paragraph(intro, styles['Normal']))
1118
+ story.append(Spacer(1, 0.2*inch))
1119
+
1120
+ # Step 3: Create figures and tables with numbered captions
1121
+ story.append(Paragraph(f"<b>Figure {fig1}.</b> Sample Figure Caption",
1122
+ styles['Caption']
1123
+ ))
1124
+
1125
+ # Table example
1126
+ header_style = ParagraphStyle(
1127
+ name='TableHeader',
1128
+ fontName='Times New Roman',
1129
+ fontSize=11,
1130
+ textColor=colors.white,
1131
+ alignment=TA_CENTER
1132
+ )
1133
+
1134
+ cell_style = ParagraphStyle(
1135
+ name='TableCell',
1136
+ fontName='Times New Roman',
1137
+ fontSize=10,
1138
+ textColor=colors.black,
1139
+ alignment=TA_CENTER
1140
+ )
1141
+
1142
+ # All text content wrapped in Paragraph()
1143
+ data = [
1144
+ [Paragraph('<b>Item</b>', header_style), Paragraph('<b>Value</b>', header_style)],
1145
+ [Paragraph('A', cell_style), Paragraph('10', cell_style)],
1146
+ [Paragraph('B', cell_style), Paragraph('20', cell_style)],
1147
+ ]
1148
+ t = Table(data, colWidths=[2*inch, 2*inch])
1149
+ t.setStyle(TableStyle([
1150
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1F4E79')),
1151
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
1152
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
1153
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
1154
+ ]))
1155
+ story.append(t)
1156
+ story.append(Spacer(1, 6))
1157
+ story.append(Paragraph(f"<b>Table {table1}.</b> Sample Data Table",
1158
+ styles['Caption']
1159
+ ))
1160
+
1161
+ story.append(PageBreak())
1162
+
1163
+ # Step 4: Reference again in discussion
1164
+ discussion = f"""
1165
+ As shown in Figure {fig1} and Table {table1}, results are clear<sup>[{ref1}]</sup>.
1166
+ """
1167
+ story.append(Paragraph(discussion, styles['Normal']))
1168
+
1169
+ # Step 5: Bibliography section
1170
+ story.append(PageBreak())
1171
+ story.append(Paragraph("<b>References</b>", styles['Heading1']))
1172
+ story.append(Paragraph(
1173
+ f"[{ref1}] Author, A. (2024). Example Reference. <i>Journal Name</i>.",
1174
+ styles['Normal']
1175
+ ))
1176
+
1177
+ doc.build(story)
1178
+ print("PDF with cross-references created!")
1179
+
1180
+
1181
+ if __name__ == '__main__':
1182
+ build_document()
1183
+ ```
1184
+
1185
+ **Usage Notes:**
1186
+ - **Pre-registration is critical**: Call `add_figure()`/`add_table()`/`add_reference()` at the START of your document
1187
+ - **Citation format**: Use `Paragraph('<sup>[{ref_num}]</sup>')` for inline citations
1188
+ - **Caption format**: Use `Paragraph('<b>Figure {num}.</b>')` or `Paragraph('<b>Table {num}.</b>')` with centered caption style
1189
+ - **Combine with TOC**: Use `TocDocTemplate` + `doc.multiBuild(story)` if both cross-refs and auto-TOC are needed
1190
+
1191
+ ## Command-Line Tools
1192
+
1193
+ ### pdftotext (poppler-utils)
1194
+ ```bash
1195
+ # Extract text
1196
+ pdftotext input.pdf output.txt
1197
+
1198
+ # Extract text preserving layout
1199
+ pdftotext -layout input.pdf output.txt
1200
+
1201
+ # Extract specific pages
1202
+ pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5
1203
+ ```
1204
+
1205
+ ### qpdf
1206
+ ```bash
1207
+ # Merge PDFs
1208
+ qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf
1209
+
1210
+ # Split pages
1211
+ qpdf input.pdf --pages . 1-5 -- pages1-5.pdf
1212
+ qpdf input.pdf --pages . 6-10 -- pages6-10.pdf
1213
+
1214
+ # Rotate pages
1215
+ qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees
1216
+
1217
+ # Remove password
1218
+ qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf
1219
+ ```
1220
+
1221
+ ## Common Tasks
1222
+
1223
+ ### Brand PDFs with Z.ai Metadata
1224
+
1225
+ ⚠️ CRITICAL MANDATORY RULE - PDF Metadata MUST be Added After Every PDF Generation
1226
+
1227
+ All PDFs MUST have metadata added immediately after creation - This is the FINAL step and CANNOT be skipped
1228
+
1229
+ **Usage - Standalone Script:**
1230
+
1231
+ ```bash
1232
+ # Add metadata to a single PDF (in-place)
1233
+ python scripts/add_zai_metadata.py document.pdf
1234
+
1235
+ # Add metadata to a single PDF (create new file)
1236
+ python scripts/add_zai_metadata.py input.pdf -o output.pdf
1237
+
1238
+ # Add metadata with custom title
1239
+ python scripts/add_zai_metadata.py report.pdf -t "Q4 Financial Analysis"
1240
+
1241
+ # Batch process all PDFs in current directory
1242
+ python scripts/add_zai_metadata.py *.pdf
1243
+
1244
+ # Quiet mode (no output)
1245
+ python scripts/add_zai_metadata.py document.pdf -q
1246
+
1247
+ # Show help
1248
+ python scripts/add_zai_metadata.py --help
1249
+ ```
1250
+
1251
+ **Requirements:**
1252
+
1253
+ After doc.build(story) completes → Immediately call the script
1254
+ Do NOT wait for user reminder, Do NOT check task description - Execute automatically
1255
+ Confirm metadata info to user after adding
1256
+ Memory phrase: PDF build done, metadata must add, no need to remind
1257
+
1258
+ ### Extract Text from Scanned PDFs
1259
+ ```python
1260
+ # Requires: pip install pytesseract pdf2image
1261
+ import pytesseract
1262
+ from pdf2image import convert_from_path
1263
+
1264
+ # Convert PDF to images
1265
+ images = convert_from_path('scanned.pdf')
1266
+
1267
+ # OCR each page
1268
+ text = ""
1269
+ for i, image in enumerate(images):
1270
+ text += f"Page {i+1}:\n"
1271
+ text += pytesseract.image_to_string(image)
1272
+ text += "\n\n"
1273
+
1274
+ print(text)
1275
+ ```
1276
+
1277
+ ### Add Watermark
1278
+ ```python
1279
+ from pypdf import PdfReader, PdfWriter
1280
+
1281
+ # Create watermark (or load existing)
1282
+ watermark = PdfReader("watermark.pdf").pages[0]
1283
+
1284
+ # Apply to all pages
1285
+ reader = PdfReader("document.pdf")
1286
+ writer = PdfWriter()
1287
+
1288
+ for page in reader.pages:
1289
+ page.merge_page(watermark)
1290
+ writer.add_page(page)
1291
+
1292
+ with open("watermarked.pdf", "wb") as output:
1293
+ writer.write(output)
1294
+ ```
1295
+
1296
+ ### Password Protection
1297
+ ```python
1298
+ from pypdf import PdfReader, PdfWriter
1299
+
1300
+ reader = PdfReader("input.pdf")
1301
+ writer = PdfWriter()
1302
+
1303
+ for page in reader.pages:
1304
+ writer.add_page(page)
1305
+
1306
+ # Add password
1307
+ writer.encrypt("userpassword", "ownerpassword")
1308
+
1309
+ with open("encrypted.pdf", "wb") as output:
1310
+ writer.write(output)
1311
+ ```
1312
+
1313
+
1314
+ ## Critical Reminders (MUST Follow)
1315
+
1316
+ ### Font Rules
1317
+ - **FONT RESTRICTION**: ONLY use the six registered fonts. NEVER use Arial, Helvetica, Courier, or any unregistered fonts.
1318
+ - **In tables**: ALL Chinese text and numbers MUST use `SimHei` for Chinese PDF.
1319
+ ALL English text and numbers MUST use `Times New Roman` for English PDF.
1320
+ ALL Chinese content and numbers MUST use `SimHei`, ALL English content MUST use `Times New Roman` for Mixed Chinese-English PDF.
1321
+ - **CRITICAL**: Must call `registerFontFamily()` after registering fonts to enable `<b>`, `<super>`, `<sub>` tags.
1322
+ - **Mixed Chinese-English Text Font Handling**: When a single string contains **both Chinese and English characters (e.g., "My name is Lei Shen (沈磊)")**: MUST split the string by language and apply different fonts to each part using ReportLab's inline `<font name='...'>` tags within `Paragraph` objects. English fonts (e.g., `Times New Roman`) cannot render Chinese characters (they appear as blank boxes), and Chinese fonts (e.g., `SimHei`) render English with poor spacing. Must set `ParagraphStyle.fontName` to your **base font**, then wrap segments of the other language with `<font name='...'>` inline tags.
1323
+
1324
+ ```python
1325
+ from reportlab.lib.styles import ParagraphStyle
1326
+ from reportlab.platypus import Paragraph
1327
+ from reportlab.pdfbase import pdfmetrics
1328
+ from reportlab.pdfbase.ttfonts import TTFont
1329
+
1330
+ pdfmetrics.registerFont(TTFont('SimHei', '/usr/share/fonts/truetype/chinese/SimHei.ttf'))
1331
+ pdfmetrics.registerFont(TTFont('Times New Roman', '/usr/share/fonts/truetype/english/Times-New-Roman.ttf'))
1332
+
1333
+ # Base font is English; wrap Chinese parts:
1334
+ enbody_style = ParagraphStyle(
1335
+ name="ENBodyStyle",
1336
+ fontName="Times New Roman", # Base font for English
1337
+ fontSize=10.5,
1338
+ leading=18,
1339
+ alignment=TA_JUSTIFY,
1340
+ )
1341
+ # Wrap Chinese segments with <font> tag
1342
+ story.append(Paragraph(
1343
+ 'Zhipu QingYan (<font name="SimHei">智谱清言</font>) is developed by Z.ai'
1344
+ 'My name is Lei Shen (<font name="SimHei">沈磊</font>)',
1345
+ '<font name="SimHei">文心一言</font> (ERNIE Bot) is by Baidu.',
1346
+ enbody_style
1347
+ ))
1348
+
1349
+ # Base font is Chinese; wrap English parts:
1350
+ cnbody_style = ParagraphStyle(
1351
+ name="CNBodyStyle",
1352
+ fontName="SimHei", # Base font for Chinese
1353
+ fontSize=10.5,
1354
+ leading=18,
1355
+ alignment=TA_JUSTIFY,
1356
+ )
1357
+ # Wrap Chinese segments with <font> tag
1358
+ story.append(Paragraph(
1359
+ '本报告使用 <font name="Times New Roman">GPT-4</font> '
1360
+ '和 <font name="Times New Roman">GLM</font> 进行测试。',
1361
+ cnbody_style
1362
+ ))
1363
+ ```
1364
+
1365
+ ### Rich Text Tags (`<b>`, `<super>`, `<sub>`)
1366
+ - These tags ONLY work inside `Paragraph()` objects — plain strings will NOT render them.
1367
+ - **Character Safety**: Follow **Core Constraint #5** strictly. Do not use forbidden Unicode superscript/subscript/math characters anywhere in the code. Always use `<super>`, `<sub>`,`<b>` tags inside `Paragraph()`.
1368
+ - **Scientific Notation in Tables**: `Paragraph('1.246 × 10<super>8</super>', style)` — never write large numbers as plain digits.
1369
+
1370
+ ### Line Breaks in Paragraph
1371
+ - **CRITICAL**: `Paragraph` does not treat a normal newline character (`\n`) as a line break. To create line breaks, you must use `<br/>` (or split the content into multiple `Paragraph` objects).
1372
+ ```python
1373
+ sms3 = \\\"\\\"\\\"Hi [FIRST_NAME]
1374
+ You're invited! Join us for an exclusive first look at the Carolina Herrera Resort 2025 collection—before it opens to the public.
1375
+ [DATE] | [TIME]
1376
+ [Boutique Name]
1377
+ _private champagne reception included_
1378
+ Can I save you a spot? Just let me know!
1379
+ [Your Name]\\\"\\\"\\\"
1380
+ sms3_box = Table([[Paragraph(sms3, sms1_style)]], colWidths=[400])
1381
+
1382
+ # IMPORTANT:
1383
+ # Paragraph does NOT treat '\n' as a line break.
1384
+ # Use <br/> to force line breaks.
1385
+ sms3 = """Hi [FIRST_NAME]<br/><br/>
1386
+ You're invited! Join us for an exclusive first look at the Carolina Herrera Resort 2025 collection—before it opens to the public.<br/><br/>
1387
+ [DATE] | [TIME]<br/>
1388
+ [Boutique Name]<br/><br/>
1389
+ <i>private champagne reception included</i><br/><br/>
1390
+ Can I save you a spot? Just let me know!<br/><br/>
1391
+ [Your Name]"""
1392
+ sms3_box = Table([[Paragraph(sms3, sms1_style)]], colWidths=[400])
1393
+ ```
1394
+
1395
+ ### Body Title & Heading Styles
1396
+ - **All titles and sub-titles (except for Table headers)**: Must be bold with black text - use `Paragraph('<b>Title</b>', style)` + `textColor=colors.black`.
1397
+
1398
+ ### Table Cell Content Rule (MANDATORY)
1399
+ **ALL text content in table cells MUST be wrapped in `Paragraph()`. This is NON-NEGOTIABLE.**
1400
+
1401
+ ❌ **PROHIBITED** - Plain strings in table cells:
1402
+ ```python
1403
+ # NEVER DO THIS - formatting will NOT work
1404
+ data = [
1405
+ ['<b>Header</b>', 'Value'], # Bold won't render
1406
+ ['Temperature', '25°C'], # No style control
1407
+ ['Pressure', '1.01 × 10<super>5</super>'], # Superscript won't work
1408
+ ]
1409
+ ```
1410
+
1411
+ ✅ **REQUIRED** - All table text MUST wrapped in Paragraph:
1412
+ ```python
1413
+ # ALWAYS DO THIS
1414
+ data = [
1415
+ [Paragraph('<b>Header</b>', header_style), Paragraph('Value', header_style)],
1416
+ [Paragraph('Temperature', cell_style), Paragraph('25°C', cell_style)],
1417
+ [Paragraph('Pressure', cell_style), Paragraph('1.01 × 10<super>5</super>', cell_style)],
1418
+ ]
1419
+ ```
1420
+
1421
+ **Why this is mandatory:**
1422
+ - Rendering formatting tags (`<b>`, `<super>`, `<sub>`, `<i>`)
1423
+ - Proper font application
1424
+ - Correct text alignment within cells
1425
+ - Consistent styling across the table
1426
+
1427
+ **The ONLY exception**: `Image()` objects can be placed directly in table cells without Paragraph wrapping.
1428
+
1429
+ ### Table Style Specifications
1430
+ - **Header style**: Must be bold with white text on dark blue background - use `Paragraph('<b>Header</b>', header_style)` + `textColor=colors.white`.
1431
+ - **Standard color scheme**: Dark blue header (`#1F4E79`), alternating white/light gray rows.
1432
+ - **Color consistency**: If a single PDF contains multiple tables, only one color scheme is allowed across all tables.
1433
+ - **Alignment**: Each body element within the same table must use the same alignment method.
1434
+ - **Caption**: ALL table captions must be centered and followed by `Spacer(1, 18)` before next content.
1435
+ - **Spacing**: Add `Spacer(1, 18)` BEFORE tables to maintain symmetric spacing with bottom.
1436
+
1437
+ ### Document Structure
1438
+ - A PDF can contain ONLY ONE cover page and ONE back cover page.
1439
+ - The cover page and the back cover page MUST use the alignment method specified by `TA_JUSTIFY`.
1440
+ - **PDF Metadata (REQUIRED)**: Title MUST match filename; Author and Creator MUST be "Z.ai"; Subject SHOULD describe purpose.
1441
+
1442
+
1443
+ ### Image Handling
1444
+ - **Preserve aspect ratio**: Never adjust image aspect ratio. Must insert according to the original ratio.
1445
+ ```python
1446
+ from PIL import Image as PILImage
1447
+ from reportlab.platypus import Image
1448
+ # Get original dimensions
1449
+ pil_img = PILImage.open('image.png')
1450
+ orig_w, orig_h = pil_img.size
1451
+ # Scale to fit width while preserving aspect ratio
1452
+ target_width = 400
1453
+ scale = target_width / orig_w
1454
+ img = Image('image.png', width=target_width, height=orig_h * scale)
1455
+ ```
1456
+
1457
+ ## Final Code Check
1458
+ - Verify function parameter order against documentation.
1459
+ - Confirm list/array element type consistency; test-run immediately.
1460
+ - Use `Paragraph` (not `Preformatted`) for body text and formulas.
1461
+
1462
+ ### MANDATORY: Post-Generation Forbidden Character Sanitization
1463
+
1464
+ **After the complete Python code is written and BEFORE executing it**, you MUST sanitize the code using the pre-built script located at:
1465
+
1466
+ ```
1467
+ scripts/sanitize_code.py
1468
+ ```
1469
+
1470
+ This script catches any forbidden Unicode characters (superscript/subscript digits, math operators, emoji, HTML entities, literal `\uXXXX` escapes) that may have slipped through despite the prevention rules. It converts them to safe ReportLab `<super>`/`<sub>` tags or ASCII equivalents.
1471
+
1472
+ **⚠️ CRITICAL RULE**: You MUST ALWAYS write PDF generation code to a `.py` file first, then sanitize it, then execute it. **NEVER use `python -c "..."` or heredoc (`python3 << 'EOF'`) to run PDF generation code directly** — these patterns bypass the sanitization step and risk forbidden characters reaching the final PDF.
1473
+
1474
+ **Mandatory workflow (NO EXCEPTIONS):**
1475
+
1476
+ ```bash
1477
+ # Step 1: ALWAYS write code to a .py file first
1478
+ cat > generate_pdf.py << 'PYEOF'
1479
+ # ... your PDF generation code here ...
1480
+ PYEOF
1481
+
1482
+ # Step 2: Sanitize forbidden characters (MUST run before execution)
1483
+ python scripts/sanitize_code.py generate_pdf.py
1484
+
1485
+ # Step 3: Execute the sanitized code
1486
+ python generate_pdf.py
1487
+ ```
1488
+
1489
+ **Forbidden patterns — NEVER do any of the following:**
1490
+ ```bash
1491
+ # ❌ PROHIBITED: python -c with inline code (cannot be sanitized)
1492
+ python -c "from reportlab... doc.build(story)"
1493
+
1494
+ # ❌ PROHIBITED: heredoc without saving to file first (cannot be sanitized)
1495
+ python3 << 'EOF'
1496
+ from reportlab...
1497
+ EOF
1498
+
1499
+ # ❌ PROHIBITED: executing the .py file WITHOUT sanitizing first
1500
+ python generate_pdf.py # Missing sanitization step!
1501
+ ```
1502
+
1503
+ **✅ CORRECT: The ONLY allowed execution pattern:**
1504
+ ```bash
1505
+ # 1. Write to file → 2. Sanitize → 3. Execute
1506
+ cat > generate_pdf.py << 'PYEOF'
1507
+ ...code...
1508
+ PYEOF
1509
+ python scripts/sanitize_code.py generate_pdf.py
1510
+ python generate_pdf.py
1511
+ ```
1512
+
1513
+ **⚠️ This sanitization step is NON-OPTIONAL.** Even if you believe the code contains no forbidden characters, you MUST still run the sanitization script. It serves as a safety net to catch any characters that bypassed prevention rules.
1514
+
1515
+ ## Quick Reference
1516
+
1517
+ | Task | Best Tool | Command/Code |
1518
+ |------|-----------|--------------|
1519
+ | Merge PDFs | pypdf | `writer.add_page(page)` |
1520
+ | Split PDFs | pypdf | One page per file |
1521
+ | Extract text | pdfplumber | `page.extract_text()` |
1522
+ | Extract tables | pdfplumber | `page.extract_tables()` |
1523
+ | Create PDFs | reportlab | Canvas or Platypus |
1524
+ | Command line merge | qpdf | `qpdf --empty --pages ...` |
1525
+ | OCR scanned PDFs | pytesseract | Convert to image first |
1526
+ | Fill PDF forms | pdf-lib or pypdf (see forms.md) | See forms.md |
1527
+
1528
+ ## Next Steps
1529
+
1530
+ - For advanced pypdfium2 usage, see reference.md
1531
+ - For JavaScript libraries (pdf-lib), see reference.md
1532
+ - If you need to fill out a PDF form, follow the instructions in forms.md
1533
+ - For troubleshooting guides, see reference.md
1534
+ - For advanced table of content template, see reference.md