camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/_utils.py +38 -0
  4. camel/agents/chat_agent.py +4014 -410
  5. camel/agents/mcp_agent.py +30 -27
  6. camel/agents/repo_agent.py +2 -1
  7. camel/benchmarks/browsecomp.py +6 -6
  8. camel/configs/__init__.py +15 -0
  9. camel/configs/aihubmix_config.py +88 -0
  10. camel/configs/amd_config.py +70 -0
  11. camel/configs/cometapi_config.py +104 -0
  12. camel/configs/minimax_config.py +93 -0
  13. camel/configs/nebius_config.py +103 -0
  14. camel/configs/vllm_config.py +2 -0
  15. camel/data_collectors/alpaca_collector.py +15 -6
  16. camel/datagen/self_improving_cot.py +1 -1
  17. camel/datasets/base_generator.py +39 -10
  18. camel/environments/__init__.py +12 -0
  19. camel/environments/rlcards_env.py +860 -0
  20. camel/environments/single_step.py +28 -3
  21. camel/environments/tic_tac_toe.py +1 -1
  22. camel/interpreters/__init__.py +2 -0
  23. camel/interpreters/docker/Dockerfile +4 -16
  24. camel/interpreters/docker_interpreter.py +3 -2
  25. camel/interpreters/e2b_interpreter.py +34 -1
  26. camel/interpreters/internal_python_interpreter.py +51 -2
  27. camel/interpreters/microsandbox_interpreter.py +395 -0
  28. camel/loaders/__init__.py +11 -2
  29. camel/loaders/base_loader.py +85 -0
  30. camel/loaders/chunkr_reader.py +9 -0
  31. camel/loaders/firecrawl_reader.py +4 -4
  32. camel/logger.py +1 -1
  33. camel/memories/agent_memories.py +84 -1
  34. camel/memories/base.py +34 -0
  35. camel/memories/blocks/chat_history_block.py +122 -4
  36. camel/memories/blocks/vectordb_block.py +8 -1
  37. camel/memories/context_creators/score_based.py +29 -237
  38. camel/memories/records.py +88 -8
  39. camel/messages/base.py +166 -40
  40. camel/messages/func_message.py +32 -5
  41. camel/models/__init__.py +10 -0
  42. camel/models/aihubmix_model.py +83 -0
  43. camel/models/aiml_model.py +1 -16
  44. camel/models/amd_model.py +101 -0
  45. camel/models/anthropic_model.py +117 -18
  46. camel/models/aws_bedrock_model.py +2 -33
  47. camel/models/azure_openai_model.py +205 -91
  48. camel/models/base_audio_model.py +3 -1
  49. camel/models/base_model.py +189 -24
  50. camel/models/cohere_model.py +5 -17
  51. camel/models/cometapi_model.py +83 -0
  52. camel/models/crynux_model.py +1 -16
  53. camel/models/deepseek_model.py +6 -16
  54. camel/models/fish_audio_model.py +6 -0
  55. camel/models/gemini_model.py +71 -20
  56. camel/models/groq_model.py +1 -17
  57. camel/models/internlm_model.py +1 -16
  58. camel/models/litellm_model.py +49 -32
  59. camel/models/lmstudio_model.py +1 -17
  60. camel/models/minimax_model.py +83 -0
  61. camel/models/mistral_model.py +1 -16
  62. camel/models/model_factory.py +27 -1
  63. camel/models/model_manager.py +24 -6
  64. camel/models/modelscope_model.py +1 -16
  65. camel/models/moonshot_model.py +185 -19
  66. camel/models/nebius_model.py +83 -0
  67. camel/models/nemotron_model.py +0 -5
  68. camel/models/netmind_model.py +1 -16
  69. camel/models/novita_model.py +1 -16
  70. camel/models/nvidia_model.py +1 -16
  71. camel/models/ollama_model.py +4 -19
  72. camel/models/openai_compatible_model.py +171 -46
  73. camel/models/openai_model.py +205 -77
  74. camel/models/openrouter_model.py +1 -17
  75. camel/models/ppio_model.py +1 -16
  76. camel/models/qianfan_model.py +1 -16
  77. camel/models/qwen_model.py +1 -16
  78. camel/models/reka_model.py +1 -16
  79. camel/models/samba_model.py +34 -47
  80. camel/models/sglang_model.py +64 -31
  81. camel/models/siliconflow_model.py +1 -16
  82. camel/models/stub_model.py +0 -4
  83. camel/models/togetherai_model.py +1 -16
  84. camel/models/vllm_model.py +1 -16
  85. camel/models/volcano_model.py +0 -17
  86. camel/models/watsonx_model.py +1 -16
  87. camel/models/yi_model.py +1 -16
  88. camel/models/zhipuai_model.py +60 -16
  89. camel/parsers/__init__.py +18 -0
  90. camel/parsers/mcp_tool_call_parser.py +176 -0
  91. camel/retrievers/auto_retriever.py +1 -0
  92. camel/runtimes/configs.py +11 -11
  93. camel/runtimes/daytona_runtime.py +15 -16
  94. camel/runtimes/docker_runtime.py +6 -6
  95. camel/runtimes/remote_http_runtime.py +5 -5
  96. camel/services/agent_openapi_server.py +380 -0
  97. camel/societies/__init__.py +2 -0
  98. camel/societies/role_playing.py +26 -28
  99. camel/societies/workforce/__init__.py +2 -0
  100. camel/societies/workforce/events.py +122 -0
  101. camel/societies/workforce/prompts.py +249 -38
  102. camel/societies/workforce/role_playing_worker.py +82 -20
  103. camel/societies/workforce/single_agent_worker.py +634 -34
  104. camel/societies/workforce/structured_output_handler.py +512 -0
  105. camel/societies/workforce/task_channel.py +169 -23
  106. camel/societies/workforce/utils.py +176 -9
  107. camel/societies/workforce/worker.py +77 -23
  108. camel/societies/workforce/workflow_memory_manager.py +772 -0
  109. camel/societies/workforce/workforce.py +3168 -478
  110. camel/societies/workforce/workforce_callback.py +74 -0
  111. camel/societies/workforce/workforce_logger.py +203 -175
  112. camel/societies/workforce/workforce_metrics.py +33 -0
  113. camel/storages/__init__.py +4 -0
  114. camel/storages/key_value_storages/json.py +15 -2
  115. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  116. camel/storages/object_storages/google_cloud.py +1 -1
  117. camel/storages/vectordb_storages/__init__.py +6 -0
  118. camel/storages/vectordb_storages/chroma.py +731 -0
  119. camel/storages/vectordb_storages/oceanbase.py +13 -13
  120. camel/storages/vectordb_storages/pgvector.py +349 -0
  121. camel/storages/vectordb_storages/qdrant.py +3 -3
  122. camel/storages/vectordb_storages/surreal.py +365 -0
  123. camel/storages/vectordb_storages/tidb.py +8 -6
  124. camel/tasks/task.py +244 -27
  125. camel/toolkits/__init__.py +46 -8
  126. camel/toolkits/aci_toolkit.py +64 -19
  127. camel/toolkits/arxiv_toolkit.py +6 -6
  128. camel/toolkits/base.py +63 -5
  129. camel/toolkits/code_execution.py +28 -1
  130. camel/toolkits/context_summarizer_toolkit.py +684 -0
  131. camel/toolkits/craw4ai_toolkit.py +93 -0
  132. camel/toolkits/dappier_toolkit.py +10 -6
  133. camel/toolkits/dingtalk.py +1135 -0
  134. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  135. camel/toolkits/excel_toolkit.py +901 -67
  136. camel/toolkits/file_toolkit.py +1402 -0
  137. camel/toolkits/function_tool.py +30 -6
  138. camel/toolkits/github_toolkit.py +107 -20
  139. camel/toolkits/gmail_toolkit.py +1839 -0
  140. camel/toolkits/google_calendar_toolkit.py +38 -4
  141. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  142. camel/toolkits/human_toolkit.py +34 -10
  143. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  144. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  145. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  146. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  147. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  148. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
  149. camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
  150. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  151. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
  152. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  153. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
  154. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  155. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  156. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  157. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  158. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
  159. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  160. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  161. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
  162. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  163. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  164. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  165. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  166. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  167. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  168. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  169. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  170. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  171. camel/toolkits/image_generation_toolkit.py +390 -0
  172. camel/toolkits/jina_reranker_toolkit.py +3 -4
  173. camel/toolkits/klavis_toolkit.py +5 -1
  174. camel/toolkits/markitdown_toolkit.py +104 -0
  175. camel/toolkits/math_toolkit.py +64 -10
  176. camel/toolkits/mcp_toolkit.py +370 -45
  177. camel/toolkits/memory_toolkit.py +5 -1
  178. camel/toolkits/message_agent_toolkit.py +608 -0
  179. camel/toolkits/message_integration.py +724 -0
  180. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  181. camel/toolkits/note_taking_toolkit.py +277 -0
  182. camel/toolkits/notion_mcp_toolkit.py +224 -0
  183. camel/toolkits/openbb_toolkit.py +5 -1
  184. camel/toolkits/origene_mcp_toolkit.py +56 -0
  185. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  186. camel/toolkits/pptx_toolkit.py +25 -12
  187. camel/toolkits/resend_toolkit.py +168 -0
  188. camel/toolkits/screenshot_toolkit.py +213 -0
  189. camel/toolkits/search_toolkit.py +437 -142
  190. camel/toolkits/slack_toolkit.py +104 -50
  191. camel/toolkits/sympy_toolkit.py +1 -1
  192. camel/toolkits/task_planning_toolkit.py +3 -3
  193. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  194. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  195. camel/toolkits/terminal_toolkit/utils.py +532 -0
  196. camel/toolkits/thinking_toolkit.py +1 -1
  197. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  198. camel/toolkits/video_analysis_toolkit.py +106 -26
  199. camel/toolkits/video_download_toolkit.py +17 -14
  200. camel/toolkits/web_deploy_toolkit.py +1219 -0
  201. camel/toolkits/wechat_official_toolkit.py +483 -0
  202. camel/toolkits/zapier_toolkit.py +5 -1
  203. camel/types/__init__.py +2 -2
  204. camel/types/agents/tool_calling_record.py +4 -1
  205. camel/types/enums.py +316 -40
  206. camel/types/openai_types.py +2 -2
  207. camel/types/unified_model_type.py +31 -4
  208. camel/utils/commons.py +36 -5
  209. camel/utils/constants.py +3 -0
  210. camel/utils/context_utils.py +1003 -0
  211. camel/utils/mcp.py +138 -4
  212. camel/utils/mcp_client.py +45 -1
  213. camel/utils/message_summarizer.py +148 -0
  214. camel/utils/token_counting.py +43 -20
  215. camel/utils/tool_result.py +44 -0
  216. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
  217. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
  218. camel/loaders/pandas_reader.py +0 -368
  219. camel/toolkits/dalle_toolkit.py +0 -175
  220. camel/toolkits/file_write_toolkit.py +0 -444
  221. camel/toolkits/openai_agent_toolkit.py +0 -135
  222. camel/toolkits/terminal_toolkit.py +0 -1037
  223. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  224. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1402 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import os
15
+ import re
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Dict, List, Optional, Tuple, Union
19
+
20
+ from camel.logger import get_logger
21
+ from camel.toolkits.base import BaseToolkit
22
+ from camel.toolkits.function_tool import FunctionTool
23
+ from camel.utils import MCPServer, dependencies_required
24
+
25
+ logger = get_logger(__name__)
26
+
27
+
28
+ @MCPServer()
29
+ class FileToolkit(BaseToolkit):
30
+ r"""A comprehensive toolkit for file operations including reading,
31
+ writing, and editing files.
32
+
33
+ This class provides cross-platform (macOS, Linux, Windows) support for:
34
+ - Reading various file formats (text, JSON, YAML, PDF, DOCX)
35
+ - Writing to multiple formats (Markdown, DOCX, PDF, plaintext, JSON,
36
+ YAML, CSV, HTML)
37
+ - Editing and modifying existing files with content replacement
38
+ - Automatic backup creation before modifications
39
+ - Custom encoding and enhanced formatting options
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ working_directory: Optional[str] = None,
45
+ timeout: Optional[float] = None,
46
+ default_encoding: str = "utf-8",
47
+ backup_enabled: bool = True,
48
+ ) -> None:
49
+ r"""Initialize the FileWriteToolkit.
50
+
51
+ Args:
52
+ working_directory (str, optional): The default directory for
53
+ output files. If not provided, it will be determined by the
54
+ `CAMEL_WORKDIR` environment variable (if set). If the
55
+ environment variable is not set, it defaults to
56
+ `camel_working_dir`.
57
+ timeout (Optional[float]): The timeout for the toolkit.
58
+ (default: :obj:`None`)
59
+ default_encoding (str): Default character encoding for text
60
+ operations. (default: :obj:`utf-8`)
61
+ backup_enabled (bool): Whether to create backups of existing files
62
+ before overwriting. (default: :obj:`True`)
63
+ """
64
+ super().__init__(timeout=timeout)
65
+ if working_directory:
66
+ self.working_directory = Path(working_directory).resolve()
67
+ else:
68
+ camel_workdir = os.environ.get("CAMEL_WORKDIR")
69
+ if camel_workdir:
70
+ self.working_directory = Path(camel_workdir).resolve()
71
+ else:
72
+ self.working_directory = Path("./camel_working_dir").resolve()
73
+ self.working_directory.mkdir(parents=True, exist_ok=True)
74
+ self.default_encoding = default_encoding
75
+ self.backup_enabled = backup_enabled
76
+ logger.info(
77
+ f"FileWriteToolkit initialized with output directory"
78
+ f": {self.working_directory}, encoding: {default_encoding}"
79
+ )
80
+
81
+ def _resolve_filepath(self, file_path: str) -> Path:
82
+ r"""Convert the given string path to a Path object.
83
+
84
+ If the provided path is not absolute, it is made relative to the
85
+ default output directory. The filename part is sanitized to replace
86
+ spaces and special characters with underscores, ensuring safe usage
87
+ in downstream processing.
88
+
89
+ Args:
90
+ file_path (str): The file path to resolve.
91
+
92
+ Returns:
93
+ Path: A fully resolved (absolute) and sanitized Path object.
94
+ """
95
+ path_obj = Path(file_path)
96
+ if not path_obj.is_absolute():
97
+ path_obj = self.working_directory / path_obj
98
+
99
+ sanitized_filename = self._sanitize_filename(path_obj.name)
100
+ path_obj = path_obj.parent / sanitized_filename
101
+ return path_obj.resolve()
102
+
103
+ def _sanitize_filename(self, filename: str) -> str:
104
+ r"""Sanitize a filename by replacing any character that is not
105
+ alphanumeric, a dot (.), hyphen (-), or underscore (_) with an
106
+ underscore (_).
107
+
108
+ Args:
109
+ filename (str): The original filename which may contain spaces or
110
+ special characters.
111
+
112
+ Returns:
113
+ str: The sanitized filename with disallowed characters replaced by
114
+ underscores.
115
+ """
116
+ safe = re.sub(r'[^\w\-.]', '_', filename)
117
+ return safe
118
+
119
+ def _write_text_file(
120
+ self, file_path: Path, content: str, encoding: str = "utf-8"
121
+ ) -> None:
122
+ r"""Write text content to a plaintext file.
123
+
124
+ Args:
125
+ file_path (Path): The target file path.
126
+ content (str): The text content to write.
127
+ encoding (str): Character encoding to use. (default: :obj:`utf-8`)
128
+ """
129
+ with file_path.open("w", encoding=encoding) as f:
130
+ f.write(content)
131
+
132
+ def _create_backup(self, file_path: Path) -> Optional[Path]:
133
+ r"""Create a backup of the file if it exists and backup is enabled.
134
+
135
+ Args:
136
+ file_path (Path): The file path to backup.
137
+
138
+ Returns:
139
+ Optional[Path]: Path to the backup file if created, None otherwise.
140
+ """
141
+ if not self.backup_enabled or not file_path.exists():
142
+ return None
143
+
144
+ # Generate backup filename with .bak extension and timestamp
145
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
146
+ backup_path = file_path.parent / f"{file_path.name}.{timestamp}.bak"
147
+
148
+ # Copy the file to backup location
149
+ import shutil
150
+
151
+ try:
152
+ shutil.copy2(file_path, backup_path)
153
+ logger.info(f"Created backup: {backup_path}")
154
+ return backup_path
155
+ except Exception as e:
156
+ logger.warning(f"Failed to create backup: {e}")
157
+ return None
158
+
159
+ def _write_docx_file(self, file_path: Path, content: str) -> None:
160
+ r"""Write text content to a DOCX file with default formatting.
161
+
162
+ Args:
163
+ file_path (Path): The target file path.
164
+ content (str): The text content to write.
165
+ """
166
+ import docx
167
+
168
+ # Use default formatting values
169
+ font_name = 'Calibri'
170
+ font_size = 11
171
+ line_spacing = 1.0
172
+
173
+ document = docx.Document()
174
+ style = document.styles['Normal']
175
+ style.font.name = font_name
176
+ style.font.size = docx.shared.Pt(font_size)
177
+ style.paragraph_format.line_spacing = line_spacing
178
+
179
+ # Split content into paragraphs and add them
180
+ for para_text in content.split('\n'):
181
+ para = document.add_paragraph(para_text)
182
+ para.style = style
183
+
184
+ document.save(str(file_path))
185
+
186
+ @dependencies_required('reportlab')
187
+ def _write_pdf_file(
188
+ self,
189
+ file_path: Path,
190
+ title: str,
191
+ content: Union[str, List[List[str]]],
192
+ use_latex: bool = False,
193
+ ) -> None:
194
+ r"""Write text content to a PDF file with LaTeX and table support.
195
+
196
+ Args:
197
+ file_path (Path): The target file path.
198
+ title (str): The document title.
199
+ content (Union[str, List[List[str]]]): The content to write. Can
200
+ be:
201
+ - String: Supports Markdown-style tables and LaTeX math
202
+ expressions
203
+ - List[List[str]]: Table data as list of rows for direct table
204
+ rendering
205
+ use_latex (bool): Whether to use LaTeX for math rendering.
206
+ (default: :obj:`False`)
207
+ """
208
+ if use_latex:
209
+ from pylatex import (
210
+ Command,
211
+ Document,
212
+ Math,
213
+ Section,
214
+ )
215
+ from pylatex.utils import (
216
+ NoEscape,
217
+ )
218
+
219
+ doc = Document(documentclass="article")
220
+ doc.packages.append(Command('usepackage', 'amsmath'))
221
+ with doc.create(Section('Generated Content')):
222
+ # Handle different content types
223
+ if isinstance(content, str):
224
+ content_lines = content.split('\n')
225
+ else:
226
+ # Convert table data to LaTeX table format
227
+ content_lines = []
228
+ if content:
229
+ # Add table header
230
+ table_header = (
231
+ r'\begin{tabular}{' + 'l' * len(content[0]) + '}'
232
+ )
233
+ content_lines.append(table_header)
234
+ content_lines.append(r'\hline')
235
+ for row in content:
236
+ row_content = (
237
+ ' & '.join(str(cell) for cell in row) + r' \\'
238
+ )
239
+ content_lines.append(row_content)
240
+ content_lines.append(r'\hline')
241
+ content_lines.append(r'\end{tabular}')
242
+
243
+ for line in content_lines:
244
+ stripped_line = line.strip()
245
+
246
+ # Skip empty lines
247
+ if not stripped_line:
248
+ continue
249
+
250
+ # Convert Markdown-like headers
251
+ if stripped_line.startswith('## '):
252
+ header = stripped_line[3:]
253
+ doc.append(NoEscape(r'\subsection*{%s}' % header))
254
+ continue
255
+ elif stripped_line.startswith('# '):
256
+ header = stripped_line[2:]
257
+ doc.append(NoEscape(r'\section*{%s}' % header))
258
+ continue
259
+ elif stripped_line.strip() == '---':
260
+ doc.append(NoEscape(r'\hrule'))
261
+ continue
262
+
263
+ # Detect standalone math expressions like $...$
264
+ if (
265
+ stripped_line.startswith('$')
266
+ and stripped_line.endswith('$')
267
+ and len(stripped_line) > 1
268
+ ):
269
+ math_data = stripped_line[1:-1]
270
+ doc.append(Math(data=math_data))
271
+ else:
272
+ doc.append(NoEscape(stripped_line))
273
+ doc.append(NoEscape(r'\par'))
274
+
275
+ doc.generate_pdf(str(file_path), clean_tex=True)
276
+
277
+ logger.info(f"Wrote PDF (with LaTeX) to {file_path}")
278
+
279
+ else:
280
+ try:
281
+ from reportlab.lib import colors
282
+ from reportlab.lib.enums import TA_CENTER, TA_JUSTIFY
283
+ from reportlab.lib.pagesizes import A4
284
+ from reportlab.lib.styles import (
285
+ ParagraphStyle,
286
+ getSampleStyleSheet,
287
+ )
288
+ from reportlab.platypus import (
289
+ Paragraph,
290
+ SimpleDocTemplate,
291
+ Spacer,
292
+ )
293
+
294
+ # Register Chinese fonts
295
+ chinese_font = self._register_chinese_font()
296
+
297
+ # Create PDF document
298
+ doc = SimpleDocTemplate(
299
+ str(file_path),
300
+ pagesize=A4,
301
+ rightMargin=72,
302
+ leftMargin=72,
303
+ topMargin=72,
304
+ bottomMargin=18,
305
+ )
306
+
307
+ # Get styles with Chinese font support
308
+ styles = getSampleStyleSheet()
309
+ title_style = ParagraphStyle(
310
+ 'CustomTitle',
311
+ parent=styles['Heading1'],
312
+ fontSize=18,
313
+ spaceAfter=30,
314
+ alignment=TA_CENTER,
315
+ textColor=colors.black,
316
+ fontName=chinese_font,
317
+ )
318
+
319
+ heading_style = ParagraphStyle(
320
+ 'CustomHeading',
321
+ parent=styles['Heading2'],
322
+ fontSize=14,
323
+ spaceAfter=12,
324
+ spaceBefore=20,
325
+ textColor=colors.black,
326
+ fontName=chinese_font,
327
+ )
328
+
329
+ body_style = ParagraphStyle(
330
+ 'CustomBody',
331
+ parent=styles['Normal'],
332
+ fontSize=11,
333
+ spaceAfter=12,
334
+ alignment=TA_JUSTIFY,
335
+ textColor=colors.black,
336
+ fontName=chinese_font,
337
+ )
338
+
339
+ # Build story (content elements)
340
+ story = []
341
+
342
+ # Add title
343
+ if title:
344
+ story.append(Paragraph(title, title_style))
345
+ story.append(Spacer(1, 12))
346
+
347
+ # Handle different content types
348
+ if isinstance(content, list) and all(
349
+ isinstance(row, list) for row in content
350
+ ):
351
+ # Content is a table (List[List[str]])
352
+ if content:
353
+ table = self._create_pdf_table(content)
354
+ story.append(table)
355
+ else:
356
+ # Content is a string, process normally
357
+ content_str = str(content)
358
+ self._process_text_content(
359
+ story, content_str, heading_style, body_style
360
+ )
361
+
362
+ # Build PDF
363
+ doc.build(story)
364
+ except Exception as e:
365
+ logger.error(f"Error creating PDF: {e}")
366
+
367
+ def _process_text_content(
368
+ self, story, content: str, heading_style, body_style
369
+ ):
370
+ r"""Process text content and add to story.
371
+
372
+ Args:
373
+ story: The reportlab story list to append to
374
+ content (str): The text content to process
375
+ heading_style: Style for headings
376
+ body_style: Style for body text
377
+ """
378
+ from reportlab.platypus import Paragraph, Spacer
379
+
380
+ # Process content
381
+ lines = content.split('\n')
382
+
383
+ # Parse all tables from the content first
384
+ tables = self._parse_markdown_table(lines)
385
+ table_line_ranges = []
386
+
387
+ # Find line ranges that contain tables
388
+ if tables:
389
+ table_line_ranges = self._find_table_line_ranges(lines)
390
+
391
+ # Process lines, skipping table lines and adding tables at
392
+ # appropriate positions
393
+ i = 0
394
+ current_table_idx = 0
395
+
396
+ while i < len(lines):
397
+ line = lines[i].strip()
398
+
399
+ # Check if this line is part of a table
400
+ is_table_line = any(
401
+ start <= i <= end for start, end in table_line_ranges
402
+ )
403
+
404
+ if is_table_line:
405
+ # Skip all lines in this table and add the table to story
406
+ table_start, table_end = next(
407
+ (start, end)
408
+ for start, end in table_line_ranges
409
+ if start <= i <= end
410
+ )
411
+
412
+ if current_table_idx < len(tables):
413
+ try:
414
+ table = self._create_pdf_table(
415
+ tables[current_table_idx]
416
+ )
417
+ story.append(table)
418
+ story.append(Spacer(1, 12))
419
+ except Exception as e:
420
+ logger.error(f"Failed to create table: {e}")
421
+ # Fallback: render as text
422
+ table_error_msg = (
423
+ f"Table data (error): "
424
+ f"{tables[current_table_idx]}"
425
+ )
426
+ story.append(
427
+ Paragraph(
428
+ table_error_msg,
429
+ body_style,
430
+ )
431
+ )
432
+ current_table_idx += 1
433
+
434
+ # Skip to end of table
435
+ i = table_end + 1
436
+ continue
437
+
438
+ # Skip empty lines
439
+ if not line:
440
+ story.append(Spacer(1, 6))
441
+ i += 1
442
+ continue
443
+
444
+ # Handle headings
445
+ if line.startswith('# '):
446
+ story.append(Paragraph(line[2:], heading_style))
447
+ elif line.startswith('## '):
448
+ story.append(Paragraph(line[3:], heading_style))
449
+ elif line.startswith('### '):
450
+ story.append(Paragraph(line[4:], heading_style))
451
+ else:
452
+ # Regular paragraph
453
+ # Convert basic markdown formatting
454
+ line = self._convert_markdown_to_html(line)
455
+ story.append(Paragraph(line, body_style))
456
+
457
+ i += 1
458
+
459
+ def _find_table_line_ranges(
460
+ self, lines: List[str]
461
+ ) -> List[Tuple[int, int]]:
462
+ r"""Find line ranges that contain markdown tables.
463
+
464
+ Args:
465
+ lines (List[str]): List of lines to analyze.
466
+
467
+ Returns:
468
+ List[Tuple[int, int]]: List of (start_line, end_line) tuples
469
+ for table ranges.
470
+ """
471
+ ranges = []
472
+ in_table = False
473
+ table_start = 0
474
+
475
+ for i, line in enumerate(lines):
476
+ line = line.strip()
477
+
478
+ if self._is_table_row(line):
479
+ if not in_table:
480
+ in_table = True
481
+ table_start = i
482
+ else:
483
+ if in_table:
484
+ # End of table
485
+ ranges.append((table_start, i - 1))
486
+ in_table = False
487
+
488
+ # Handle table at end of content
489
+ if in_table:
490
+ ranges.append((table_start, len(lines) - 1))
491
+
492
+ return ranges
493
+
494
+ def _register_chinese_font(self) -> str:
495
+ r"""Register Chinese font for PDF generation.
496
+
497
+ Returns:
498
+ str: The font name to use for Chinese text.
499
+ """
500
+ import os
501
+ import platform
502
+
503
+ from reportlab.lib.fonts import addMapping
504
+ from reportlab.pdfbase import pdfmetrics
505
+ from reportlab.pdfbase.ttfonts import TTFont
506
+
507
+ # Try to find and register Chinese fonts on the system
508
+ font_paths = []
509
+ system = platform.system()
510
+
511
+ if system == "Darwin": # macOS
512
+ font_paths = [
513
+ "/System/Library/Fonts/PingFang.ttc",
514
+ "/System/Library/Fonts/Hiragino Sans GB.ttc",
515
+ "/System/Library/Fonts/STHeiti Light.ttc",
516
+ "/System/Library/Fonts/STHeiti Medium.ttc",
517
+ "/Library/Fonts/Arial Unicode MS.ttf",
518
+ ]
519
+ elif system == "Windows":
520
+ font_paths = [
521
+ r"C:\Windows\Fonts\msyh.ttc", # Microsoft YaHei
522
+ r"C:\Windows\Fonts\simsun.ttc", # SimSun
523
+ r"C:\Windows\Fonts\arial.ttf", # Arial (fallback)
524
+ ]
525
+ elif system == "Linux":
526
+ font_paths = [
527
+ "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
528
+ "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
529
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
530
+ ]
531
+
532
+ # Try to register the first available font
533
+ for font_path in font_paths:
534
+ if os.path.exists(font_path):
535
+ try:
536
+ font_name = "ChineseFont"
537
+ # Only register if not already registered
538
+ if font_name not in pdfmetrics.getRegisteredFontNames():
539
+ pdfmetrics.registerFont(TTFont(font_name, font_path))
540
+ # Add font mapping for bold/italic variants
541
+ addMapping(font_name, 0, 0, font_name) # normal
542
+ addMapping(font_name, 0, 1, font_name) # italic
543
+ addMapping(font_name, 1, 0, font_name) # bold
544
+ addMapping(font_name, 1, 1, font_name) # bold italic
545
+ return font_name
546
+ except Exception:
547
+ continue
548
+
549
+ # Fallback to Helvetica if no Chinese font found
550
+ logger.warning("No Chinese font found, falling back to Helvetica")
551
+ return "Helvetica"
552
+
553
+ def _parse_markdown_table(self, lines: List[str]) -> List[List[List[str]]]:
554
+ r"""Parse markdown-style tables from a list of lines.
555
+
556
+ Args:
557
+ lines (List[str]): List of text lines that may contain tables.
558
+
559
+ Returns:
560
+ List[List[List[str]]]: List of tables, where each table is a list
561
+ of rows, and each row is a list of cells.
562
+ """
563
+ tables = []
564
+ current_table_data: List[List[str]] = []
565
+ in_table = False
566
+
567
+ for line in lines:
568
+ line = line.strip()
569
+
570
+ # Check for table (Markdown-style)
571
+ if self._is_table_row(line):
572
+ if not in_table:
573
+ in_table = True
574
+ current_table_data = []
575
+
576
+ # Skip separator lines (e.g., |---|---|)
577
+ if self._is_table_separator(line):
578
+ continue
579
+
580
+ # Parse table row
581
+ cells = self._parse_table_row(line)
582
+ if cells:
583
+ current_table_data.append(cells)
584
+ continue
585
+
586
+ # If we were in a table and now we're not, finalize the table
587
+ if in_table:
588
+ if current_table_data:
589
+ tables.append(current_table_data)
590
+ current_table_data = []
591
+ in_table = False
592
+
593
+ # Add any remaining table
594
+ if in_table and current_table_data:
595
+ tables.append(current_table_data)
596
+
597
+ return tables
598
+
599
+ def _is_table_row(self, line: str) -> bool:
600
+ r"""Check if a line appears to be a table row.
601
+
602
+ Args:
603
+ line (str): The line to check.
604
+
605
+ Returns:
606
+ bool: True if the line looks like a table row.
607
+ """
608
+ return '|' in line and line.count('|') >= 2
609
+
610
+ def _is_table_separator(self, line: str) -> bool:
611
+ r"""Check if a line is a table separator (e.g., |---|---|).
612
+
613
+ Args:
614
+ line (str): The line to check.
615
+
616
+ Returns:
617
+ bool: True if the line is a table separator.
618
+ """
619
+ import re
620
+
621
+ # More precise check for separator lines
622
+ # Must contain only spaces, pipes, dashes, and colons
623
+ # and have at least one dash to be a separator
624
+ if not re.match(r'^[\s\|\-\:]+$', line):
625
+ return False
626
+
627
+ # Must contain at least one dash to be a valid separator
628
+ return '-' in line
629
+
630
+ def _parse_table_row(self, line: str) -> List[str]:
631
+ r"""Parse a single table row into cells.
632
+
633
+ Args:
634
+ line (str): The table row line.
635
+
636
+ Returns:
637
+ List[str]: List of cell contents.
638
+ """
639
+ # Parse table row
640
+ cells = [cell.strip() for cell in line.split('|')]
641
+
642
+ # Remove empty cells at start/end (common in markdown tables)
643
+ if cells and not cells[0]:
644
+ cells = cells[1:]
645
+ if cells and not cells[-1]:
646
+ cells = cells[:-1]
647
+
648
+ return cells
649
+
650
+ def _create_pdf_table(self, table_data: List[List[str]]):
651
+ r"""Create a formatted table for PDF.
652
+
653
+ Args:
654
+ table_data (List[List[str]]): Table data as list of rows.
655
+
656
+ Returns:
657
+ Table: A formatted reportlab Table object.
658
+ """
659
+ from reportlab.lib import colors
660
+ from reportlab.lib.pagesizes import A4
661
+ from reportlab.lib.styles import ParagraphStyle
662
+ from reportlab.platypus import Paragraph, Table, TableStyle
663
+
664
+ try:
665
+ # Get Chinese font for table
666
+ chinese_font = self._register_chinese_font()
667
+
668
+ # Calculate available width (A4 width minus margins)
669
+ page_width = A4[0] # A4 width in points
670
+ margins = 144 # left + right margins (72 each)
671
+ available_width = page_width - margins
672
+
673
+ # Calculate column widths and font size based on content
674
+ if table_data:
675
+ num_columns = len(table_data[0])
676
+
677
+ # Calculate max content length for each column
678
+ max_lengths = [0] * num_columns
679
+ max_cell_length = 0
680
+ for row in table_data:
681
+ for i, cell in enumerate(row):
682
+ if i < len(max_lengths):
683
+ cell_length = len(str(cell))
684
+ max_lengths[i] = max(max_lengths[i], cell_length)
685
+ max_cell_length = max(max_cell_length, cell_length)
686
+
687
+ # Dynamic font size calculation based on columns and content
688
+ # Base font sizes
689
+ base_header_font = 9
690
+ base_body_font = 8
691
+
692
+ # Calculate font size factor based on columns and content
693
+ column_factors = {10: 0.6, 8: 0.7, 6: 0.8, 4: 0.9}
694
+ font_size_factor = next(
695
+ (
696
+ factor
697
+ for cols, factor in column_factors.items()
698
+ if num_columns > cols
699
+ ),
700
+ 1.0,
701
+ )
702
+
703
+ # Further adjust if max cell content is very long
704
+ if max_cell_length > 30:
705
+ font_size_factor *= 0.8
706
+ elif max_cell_length > 20:
707
+ font_size_factor *= 0.9
708
+
709
+ header_font_size = max(
710
+ 5, int(base_header_font * font_size_factor)
711
+ )
712
+ body_font_size = max(5, int(base_body_font * font_size_factor))
713
+
714
+ # Calculate minimum column width based on font size
715
+ min_col_width = max(30, 40 * font_size_factor)
716
+
717
+ # Distribute width proportionally with minimum width
718
+ total_length = sum(max_lengths)
719
+ if total_length > 0:
720
+ # Calculate proportional widths
721
+ proportional_widths = [
722
+ (length / total_length) * available_width
723
+ for length in max_lengths
724
+ ]
725
+
726
+ # Ensure minimum width and adjust if necessary
727
+ col_widths = []
728
+ total_width = 0
729
+ for width in proportional_widths:
730
+ adjusted_width = max(min_col_width, width)
731
+ col_widths.append(adjusted_width)
732
+ total_width += adjusted_width
733
+
734
+ # Scale down if total exceeds available width
735
+ if total_width > available_width:
736
+ scale_factor = available_width / total_width
737
+ col_widths = [w * scale_factor for w in col_widths]
738
+ else:
739
+ col_widths = [available_width / num_columns] * num_columns
740
+
741
+ # Adjust padding based on font size
742
+ h_padding = max(2, int(6 * font_size_factor))
743
+ v_padding = max(2, int(4 * font_size_factor))
744
+ else:
745
+ col_widths = None
746
+ header_font_size = 9
747
+ body_font_size = 8
748
+ h_padding = 6
749
+ v_padding = 4
750
+
751
+ # Create paragraph styles for wrapping text
752
+ header_style = ParagraphStyle(
753
+ 'TableHeader',
754
+ fontName=chinese_font,
755
+ fontSize=header_font_size,
756
+ textColor=colors.whitesmoke,
757
+ alignment=0, # LEFT alignment
758
+ leading=header_font_size * 1.2,
759
+ )
760
+
761
+ body_style = ParagraphStyle(
762
+ 'TableBody',
763
+ fontName=chinese_font,
764
+ fontSize=body_font_size,
765
+ textColor=colors.black,
766
+ alignment=0, # LEFT alignment
767
+ leading=body_font_size * 1.2,
768
+ )
769
+
770
+ # Convert table data to Paragraph objects for text wrapping
771
+ wrapped_data = []
772
+ for row_idx, row in enumerate(table_data):
773
+ wrapped_row = []
774
+ for cell in row:
775
+ cell_text = str(cell)
776
+ # Use header style for first row, body style for others
777
+ style = header_style if row_idx == 0 else body_style
778
+ # Escape special characters for XML
779
+ cell_text = (
780
+ cell_text.replace('&', '&amp;')
781
+ .replace('<', '&lt;')
782
+ .replace('>', '&gt;')
783
+ )
784
+ para = Paragraph(cell_text, style)
785
+ wrapped_row.append(para)
786
+ wrapped_data.append(wrapped_row)
787
+
788
+ # Create table with wrapped data
789
+ table = Table(wrapped_data, colWidths=col_widths, repeatRows=1)
790
+
791
+ # Style the table with dynamic formatting
792
+ table.setStyle(
793
+ TableStyle(
794
+ [
795
+ ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
796
+ ('BACKGROUND', (0, 1), (-1, -1), colors.white),
797
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.black),
798
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
799
+ ('LEFTPADDING', (0, 0), (-1, -1), h_padding),
800
+ ('RIGHTPADDING', (0, 0), (-1, -1), h_padding),
801
+ ('TOPPADDING', (0, 0), (-1, -1), v_padding),
802
+ ('BOTTOMPADDING', (0, 0), (-1, -1), v_padding),
803
+ ]
804
+ )
805
+ )
806
+
807
+ return table
808
+
809
+ except Exception as e:
810
+ logger.error(f"Error creating table: {e}")
811
+ # Return simple unstyled table as fallback
812
+ from reportlab.platypus import Table
813
+
814
+ return Table(table_data)
815
+
816
+ def _convert_markdown_to_html(self, text: str) -> str:
817
+ r"""Convert basic markdown formatting to HTML for PDF rendering.
818
+
819
+ Args:
820
+ text (str): Text with markdown formatting.
821
+
822
+ Returns:
823
+ str: Text with HTML formatting.
824
+ """
825
+ # Define conversion patterns
826
+ conversions = [
827
+ (r'\*\*(.*?)\*\*', r'<b>\1</b>'), # Bold with **
828
+ (r'__(.*?)__', r'<b>\1</b>'), # Bold with __
829
+ (
830
+ r'(?<!\*)\*(?!\*)(.*?)(?<!\*)\*(?!\*)',
831
+ r'<i>\1</i>',
832
+ ), # Italic with *
833
+ (r'(?<!_)_(?!_)(.*?)(?<!_)_(?!_)', r'<i>\1</i>'), # Italic with _
834
+ (r'`(.*?)`', r'<font name="Courier">\1</font>'), # Inline code
835
+ ]
836
+
837
+ for pattern, replacement in conversions:
838
+ text = re.sub(pattern, replacement, text)
839
+
840
+ return text
841
+
842
+ def _ensure_html_utf8_meta(self, content: str) -> str:
843
+ r"""Ensure HTML content has UTF-8 meta tag.
844
+
845
+ Args:
846
+ content (str): The HTML content.
847
+
848
+ Returns:
849
+ str: HTML content with UTF-8 meta tag.
850
+ """
851
+ # Check if content already has a charset meta tag
852
+ has_charset = re.search(
853
+ r'<meta[^>]*charset[^>]*>', content, re.IGNORECASE
854
+ )
855
+
856
+ # UTF-8 meta tag
857
+ utf8_meta = '<meta charset="utf-8">'
858
+
859
+ if has_charset:
860
+ # Replace existing charset with UTF-8
861
+ content = re.sub(
862
+ r'<meta[^>]*charset[^>]*>',
863
+ utf8_meta,
864
+ content,
865
+ flags=re.IGNORECASE,
866
+ )
867
+ else:
868
+ # Add UTF-8 meta tag
869
+ # Try to find <head> tag
870
+ head_match = re.search(r'<head[^>]*>', content, re.IGNORECASE)
871
+ if head_match:
872
+ # Insert after <head> tag
873
+ insert_pos = head_match.end()
874
+ content = (
875
+ content[:insert_pos]
876
+ + '\n '
877
+ + utf8_meta
878
+ + content[insert_pos:]
879
+ )
880
+ else:
881
+ # No <head> tag found, check if there's <html> tag
882
+ html_match = re.search(r'<html[^>]*>', content, re.IGNORECASE)
883
+ if html_match:
884
+ # Insert <head> with meta tag after <html>
885
+ insert_pos = html_match.end()
886
+ content = (
887
+ content[:insert_pos]
888
+ + '\n<head>\n '
889
+ + utf8_meta
890
+ + '\n</head>'
891
+ + content[insert_pos:]
892
+ )
893
+ else:
894
+ # No proper HTML structure, wrap content
895
+ content = (
896
+ '<!DOCTYPE html>\n<html>\n<head>\n '
897
+ + utf8_meta
898
+ + '\n</head>\n<body>\n'
899
+ + content
900
+ + '\n</body>\n</html>'
901
+ )
902
+
903
+ return content
904
+
905
+ def _write_csv_file(
906
+ self,
907
+ file_path: Path,
908
+ content: Union[str, List[List]],
909
+ encoding: str = "utf-8-sig",
910
+ ) -> None:
911
+ r"""Write CSV content to a file.
912
+
913
+ Args:
914
+ file_path (Path): The target file path.
915
+ content (Union[str, List[List]]): The CSV content as a string or
916
+ list of lists.
917
+ encoding (str): Character encoding to use.
918
+ (default: :obj:`utf-8-sig`)
919
+ """
920
+ import csv
921
+
922
+ with file_path.open("w", encoding=encoding, newline='') as f:
923
+ if isinstance(content, str):
924
+ f.write(content)
925
+ else:
926
+ writer = csv.writer(f)
927
+ writer.writerows(content)
928
+
929
+ def _write_json_file(
930
+ self,
931
+ file_path: Path,
932
+ content: str,
933
+ encoding: str = "utf-8",
934
+ ) -> None:
935
+ r"""Write JSON content to a file.
936
+
937
+ Args:
938
+ file_path (Path): The target file path.
939
+ content (str): The JSON content as a string.
940
+ encoding (str): Character encoding to use. (default: :obj:`utf-8`)
941
+ """
942
+ import json
943
+
944
+ with file_path.open("w", encoding=encoding) as f:
945
+ if isinstance(content, str):
946
+ try:
947
+ # Try parsing as JSON string first
948
+ data = json.loads(content)
949
+ json.dump(data, f, ensure_ascii=False)
950
+ except json.JSONDecodeError:
951
+ # If not valid JSON string, write as is
952
+ f.write(content)
953
+ else:
954
+ # If not string, dump as JSON
955
+ json.dump(content, f, ensure_ascii=False)
956
+
957
+ def _write_simple_text_file(
958
+ self, file_path: Path, content: str, encoding: str = "utf-8"
959
+ ) -> None:
960
+ r"""Write text content to a file (used for HTML, Markdown, YAML, etc.).
961
+
962
+ Args:
963
+ file_path (Path): The target file path.
964
+ content (str): The content to write.
965
+ encoding (str): Character encoding to use. (default: :obj:`utf-8`)
966
+ """
967
+ # For HTML files, ensure UTF-8 meta tag is present
968
+ if file_path.suffix.lower() in ['.html', '.htm']:
969
+ content = self._ensure_html_utf8_meta(content)
970
+
971
+ with file_path.open("w", encoding=encoding) as f:
972
+ f.write(content)
973
+
974
+ def write_to_file(
975
+ self,
976
+ title: str,
977
+ content: Union[str, List[List[str]]],
978
+ filename: str,
979
+ encoding: Optional[str] = None,
980
+ use_latex: bool = False,
981
+ ) -> str:
982
+ r"""Write the given content to a file.
983
+
984
+ If the file exists, it will be overwritten. Supports multiple formats:
985
+ Markdown (.md, .markdown, default), Plaintext (.txt), CSV (.csv),
986
+ DOC/DOCX (.doc, .docx), PDF (.pdf), JSON (.json), YAML (.yml, .yaml),
987
+ and HTML (.html, .htm).
988
+
989
+ Args:
990
+ title (str): The title of the document.
991
+ content (Union[str, List[List[str]]]): The content to write to the
992
+ file. Content format varies by file type:
993
+ - Text formats (txt, md, html, yaml): string
994
+ - CSV: string or list of lists
995
+ - JSON: string or serializable object
996
+ filename (str): The name or path of the file. If a relative path is
997
+ supplied, it is resolved to self.working_directory.
998
+ encoding (Optional[str]): The character encoding to use. (default:
999
+ :obj: `None`)
1000
+ use_latex (bool): Whether to use LaTeX for math rendering.
1001
+ (default: :obj:`False`)
1002
+
1003
+ Returns:
1004
+ str: A message indicating success or error details.
1005
+ """
1006
+ file_path = self._resolve_filepath(filename)
1007
+ file_path.parent.mkdir(parents=True, exist_ok=True)
1008
+
1009
+ # Create backup of existing file if backup is enabled
1010
+ if file_path.exists() and self.backup_enabled:
1011
+ self._create_backup(file_path)
1012
+
1013
+ extension = file_path.suffix.lower()
1014
+
1015
+ # If no extension is provided, use markdown as default
1016
+ if extension == "":
1017
+ file_path = file_path.with_suffix('.md')
1018
+ extension = '.md'
1019
+
1020
+ try:
1021
+ # Get encoding or use default
1022
+ file_encoding = encoding or self.default_encoding
1023
+
1024
+ if extension in [".doc", ".docx"]:
1025
+ self._write_docx_file(file_path, str(content))
1026
+ elif extension == ".pdf":
1027
+ self._write_pdf_file(file_path, title, content, use_latex)
1028
+ elif extension == ".csv":
1029
+ self._write_csv_file(
1030
+ file_path, content, encoding=file_encoding
1031
+ )
1032
+ elif extension == ".json":
1033
+ self._write_json_file(
1034
+ file_path,
1035
+ content, # type: ignore[arg-type]
1036
+ encoding=file_encoding,
1037
+ )
1038
+ elif extension in [
1039
+ ".yml",
1040
+ ".yaml",
1041
+ ".html",
1042
+ ".htm",
1043
+ ".md",
1044
+ ".markdown",
1045
+ ]:
1046
+ self._write_simple_text_file(
1047
+ file_path, str(content), encoding=file_encoding
1048
+ )
1049
+ else:
1050
+ # Fallback to simple text writing for unknown or .txt
1051
+ # extensions
1052
+ self._write_text_file(
1053
+ file_path, str(content), encoding=file_encoding
1054
+ )
1055
+
1056
+ msg = f"Content successfully written to file: {file_path}"
1057
+ logger.info(msg)
1058
+ return msg
1059
+ except Exception as e:
1060
+ error_msg = (
1061
+ f"Error occurred while writing to file {file_path}: {e}"
1062
+ )
1063
+ logger.error(error_msg)
1064
+ return error_msg
1065
+
1066
+ # ----------------------------------------------
1067
+ # Read File Functions
1068
+ # ----------------------------------------------
1069
+ def read_file(
1070
+ self, file_paths: Union[str, List[str]]
1071
+ ) -> Union[str, Dict[str, str]]:
1072
+ r"""Read and return content of one or more files using MarkItDown
1073
+ for better format support.
1074
+
1075
+ This method uses MarkItDownLoader to convert various file formats
1076
+ to Markdown. It supports a wide range of formats including:
1077
+ - PDF (.pdf)
1078
+ - Microsoft Office: Word (.doc, .docx), Excel (.xls, .xlsx),
1079
+ PowerPoint (.ppt, .pptx)
1080
+ - EPUB (.epub)
1081
+ - HTML (.html, .htm)
1082
+ - Images (.jpg, .jpeg, .png) for OCR
1083
+ - Audio (.mp3, .wav) for transcription
1084
+ - Text-based formats (.csv, .json, .xml, .txt, .md)
1085
+ - ZIP archives (.zip)
1086
+
1087
+ Args:
1088
+ file_paths (Union[str, List[str]]): A single file path or a list
1089
+ of file paths to read. Paths can be relative or absolute.
1090
+ If relative, they will be resolved relative to the working
1091
+ directory.
1092
+
1093
+ Returns:
1094
+ Union[str, Dict[str, str]]:
1095
+ - If a single file path is provided: Returns the content as
1096
+ a string.
1097
+ - If multiple file paths are provided: Returns a dictionary
1098
+ where keys are file paths and values are the corresponding
1099
+ content in Markdown format.
1100
+ If conversion fails, returns an error message.
1101
+ """
1102
+ from camel.loaders.markitdown import MarkItDownLoader
1103
+
1104
+ try:
1105
+ # Handle single file path for backward compatibility
1106
+ if isinstance(file_paths, str):
1107
+ resolved_path = self._resolve_filepath(file_paths)
1108
+
1109
+ # Use MarkItDownLoader to convert the file
1110
+ result = MarkItDownLoader().convert_files(
1111
+ file_paths=[str(resolved_path)], parallel=False
1112
+ )
1113
+
1114
+ # Return the converted content or error message
1115
+ return result.get(
1116
+ str(resolved_path), f"Failed to read file: {resolved_path}"
1117
+ )
1118
+
1119
+ # Handle multiple file paths
1120
+ else:
1121
+ resolved_paths = [
1122
+ str(self._resolve_filepath(fp)) for fp in file_paths
1123
+ ]
1124
+
1125
+ # Use MarkItDownLoader to convert files in parallel
1126
+ result = MarkItDownLoader().convert_files(
1127
+ file_paths=resolved_paths, parallel=True
1128
+ )
1129
+
1130
+ # Map back to original paths if needed
1131
+ return_dict = {}
1132
+ for original, resolved in zip(file_paths, resolved_paths):
1133
+ return_dict[original] = result.get(
1134
+ resolved, f"Failed to read file: {resolved}"
1135
+ )
1136
+
1137
+ return return_dict
1138
+
1139
+ except Exception as e:
1140
+ return f"Error reading file(s): {e}"
1141
+
1142
+ # ----------------------------------------------
1143
+ # Edit File Functions
1144
+ # ----------------------------------------------
1145
+ def edit_file(
1146
+ self, file_path: str, old_content: str, new_content: str
1147
+ ) -> str:
1148
+ r"""Edit a file by replacing specified content.
1149
+
1150
+ This method performs simple text replacement in files. It reads
1151
+ the file, replaces all occurrences of old_content with new_content,
1152
+ and writes the result back.
1153
+
1154
+ Args:
1155
+ file_path (str): The path to the file to edit. Can be
1156
+ relative or absolute. If relative, it will be resolved
1157
+ relative to the working directory.
1158
+ old_content (str): The exact text to find and replace.
1159
+ new_content (str): The text to replace old_content with.
1160
+
1161
+ Returns:
1162
+ str: A success message if the edit was successful, or an
1163
+ error message if the content wasn't found or an error occurred.
1164
+ """
1165
+ try:
1166
+ working_path = self._resolve_filepath(file_path)
1167
+
1168
+ if not working_path.exists():
1169
+ return f"Error: File {working_path} does not exist"
1170
+
1171
+ # Create backup before editing if enabled
1172
+ self._create_backup(working_path)
1173
+
1174
+ # Read the file content
1175
+ try:
1176
+ file_text = working_path.read_text(
1177
+ encoding=self.default_encoding
1178
+ )
1179
+ except Exception as e:
1180
+ return f"Error reading file: {e}"
1181
+
1182
+ # Check if the old_content exists in the file
1183
+ if old_content not in file_text:
1184
+ return (
1185
+ f"No replacement performed: '{old_content}' not found in "
1186
+ f"{working_path}."
1187
+ )
1188
+
1189
+ # Replace the content
1190
+ new_file_text = file_text.replace(old_content, new_content)
1191
+
1192
+ # Write back to file
1193
+ try:
1194
+ working_path.write_text(
1195
+ new_file_text, encoding=self.default_encoding
1196
+ )
1197
+ return f"Successfully edited {working_path}"
1198
+ except Exception as e:
1199
+ return f"Error writing file: {e}"
1200
+
1201
+ except Exception as e:
1202
+ return f"Error editing file: {e}"
1203
+
1204
+ def search_files(
1205
+ self,
1206
+ pattern: str,
1207
+ file_types: Optional[List[str]] = None,
1208
+ file_pattern: Optional[str] = None,
1209
+ path: Optional[str] = None,
1210
+ ) -> str:
1211
+ r"""Search for a text pattern in files with specified extensions or
1212
+ file patterns.
1213
+
1214
+ This method searches for a text pattern (case-insensitive substring
1215
+ match) in files matching either the specified file types or a file
1216
+ pattern. It returns structured results showing which files contain
1217
+ the pattern, along with line numbers and matching content.
1218
+
1219
+ Args:
1220
+ pattern (str): The text pattern to search for (case-insensitive
1221
+ string match).
1222
+ file_types (Optional[List[str]]): List of file extensions to
1223
+ search (e.g., ["md", "txt", "py"]). Do not include the dot.
1224
+ If not provided and file_pattern is also not provided,
1225
+ defaults to ["md"] (markdown files). Ignored if file_pattern
1226
+ is provided. (default: :obj:`None`)
1227
+ file_pattern (Optional[str]): Glob pattern for matching files
1228
+ (e.g., "*_workflow.md", "test_*.py"). If provided, this
1229
+ overrides file_types. (default: :obj:`None`)
1230
+ path (Optional[str]): Directory to search in. If not provided,
1231
+ uses the working_directory. Can be relative or absolute.
1232
+ (default: :obj:`None`)
1233
+
1234
+ Returns:
1235
+ str: JSON-formatted string containing search results with the
1236
+ structure:
1237
+ {
1238
+ "pattern": "search_pattern",
1239
+ "searched_path": "/absolute/path",
1240
+ "file_types": ["md", "txt"],
1241
+ "file_pattern": "*_workflow.md",
1242
+ "matches": [
1243
+ {
1244
+ "file": "relative/path/to/file.md",
1245
+ "line": 42,
1246
+ "content": "matching line content"
1247
+ },
1248
+ ...
1249
+ ],
1250
+ "total_matches": 10,
1251
+ "files_searched": 5
1252
+ }
1253
+ If an error occurs, returns a JSON string with an "error" key.
1254
+ """
1255
+ import json
1256
+
1257
+ try:
1258
+ # resolve search path
1259
+ if path:
1260
+ path_obj = Path(path)
1261
+ if not path_obj.is_absolute():
1262
+ search_path = (self.working_directory / path_obj).resolve()
1263
+ else:
1264
+ search_path = path_obj.resolve()
1265
+ else:
1266
+ search_path = self.working_directory
1267
+
1268
+ # validate that search path exists
1269
+ if not search_path.exists():
1270
+ return json.dumps(
1271
+ {"error": f"Search path does not exist: {search_path}"}
1272
+ )
1273
+
1274
+ if not search_path.is_dir():
1275
+ return json.dumps(
1276
+ {"error": f"Search path is not a directory: {search_path}"}
1277
+ )
1278
+
1279
+ # collect all matching files
1280
+ matching_files: List[Path] = []
1281
+
1282
+ if file_pattern:
1283
+ # use file_pattern if provided (overrides file_types)
1284
+ pattern_glob = f"**/{file_pattern}"
1285
+ matching_files.extend(search_path.rglob(pattern_glob))
1286
+ else:
1287
+ # use file_types if file_pattern not provided
1288
+ if file_types is None:
1289
+ file_types = ["md"]
1290
+
1291
+ # normalize and deduplicate file types
1292
+ normalized_types = set()
1293
+ for file_type in file_types:
1294
+ file_type = file_type.lstrip('.')
1295
+ if file_type: # skip empty strings
1296
+ normalized_types.add(file_type)
1297
+
1298
+ for file_type in normalized_types:
1299
+ # use rglob for recursive search
1300
+ pattern_glob = f"**/*.{file_type}"
1301
+ matching_files.extend(search_path.rglob(pattern_glob))
1302
+
1303
+ # search through files (case-insensitive)
1304
+ matches = []
1305
+ files_searched = 0
1306
+ pattern_lower = pattern.lower()
1307
+
1308
+ for file_path in matching_files:
1309
+ files_searched += 1
1310
+ try:
1311
+ # read file content
1312
+ content = file_path.read_text(
1313
+ encoding=self.default_encoding
1314
+ )
1315
+ lines = content.splitlines()
1316
+
1317
+ # search each line for pattern (case-insensitive)
1318
+ for line_num, line in enumerate(lines, start=1):
1319
+ if pattern_lower in line.lower():
1320
+ # get relative path for cleaner output
1321
+ try:
1322
+ relative_path = file_path.relative_to(
1323
+ search_path
1324
+ )
1325
+ except ValueError:
1326
+ relative_path = file_path
1327
+
1328
+ matches.append(
1329
+ {
1330
+ "file": str(relative_path),
1331
+ "line": line_num,
1332
+ "content": line.strip(),
1333
+ }
1334
+ )
1335
+
1336
+ except (UnicodeDecodeError, PermissionError) as e:
1337
+ # skip files that can't be read
1338
+ logger.debug(f"Skipping file {file_path}: {e}")
1339
+ continue
1340
+
1341
+ # build result
1342
+ result = {
1343
+ "pattern": pattern,
1344
+ "searched_path": str(search_path),
1345
+ "matches": matches,
1346
+ "total_matches": len(matches),
1347
+ "files_searched": files_searched,
1348
+ }
1349
+
1350
+ # include file_pattern or file_types in result
1351
+ if file_pattern:
1352
+ result["file_pattern"] = file_pattern
1353
+ else:
1354
+ result["file_types"] = (
1355
+ sorted(normalized_types) if normalized_types else ["md"]
1356
+ )
1357
+
1358
+ logger.info(
1359
+ f"Search completed: found {len(matches)} matches "
1360
+ f"in {files_searched} files"
1361
+ )
1362
+ return json.dumps(result, indent=2)
1363
+
1364
+ except Exception as e:
1365
+ error_msg = f"Error during file search: {e}"
1366
+ logger.error(error_msg)
1367
+ return json.dumps({"error": error_msg})
1368
+
1369
+ def get_tools(self) -> List[FunctionTool]:
1370
+ r"""Return a list of FunctionTool objects representing the functions
1371
+ in the toolkit.
1372
+
1373
+ Returns:
1374
+ List[FunctionTool]: A list of FunctionTool objects representing
1375
+ the available functions in this toolkit.
1376
+ """
1377
+ return [
1378
+ FunctionTool(self.write_to_file),
1379
+ FunctionTool(self.read_file),
1380
+ FunctionTool(self.edit_file),
1381
+ FunctionTool(self.search_files),
1382
+ ]
1383
+
1384
+
1385
+ # Backward compatibility: FileWriteToolkit as deprecated alias
1386
+ class FileWriteToolkit(FileToolkit):
1387
+ r"""Deprecated: Use FileToolkit instead.
1388
+
1389
+ This class is maintained for backward compatibility only.
1390
+ Please use FileToolkit for new code.
1391
+ """
1392
+
1393
+ def __init__(self, *args, **kwargs):
1394
+ import warnings
1395
+
1396
+ warnings.warn(
1397
+ "FileWriteToolkit is deprecated and will be removed in a "
1398
+ "future version. Please use FileToolkit instead.",
1399
+ DeprecationWarning,
1400
+ stacklevel=2,
1401
+ )
1402
+ super().__init__(*args, **kwargs)