quantalogic 0.35.0__py3-none-any.whl → 0.40.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantalogic/__init__.py +0 -4
- quantalogic/agent.py +603 -363
- quantalogic/agent_config.py +233 -46
- quantalogic/agent_factory.py +34 -22
- quantalogic/coding_agent.py +16 -14
- quantalogic/config.py +2 -1
- quantalogic/console_print_events.py +4 -8
- quantalogic/console_print_token.py +2 -2
- quantalogic/docs_cli.py +15 -10
- quantalogic/event_emitter.py +258 -83
- quantalogic/flow/__init__.py +23 -0
- quantalogic/flow/flow.py +595 -0
- quantalogic/flow/flow_extractor.py +672 -0
- quantalogic/flow/flow_generator.py +89 -0
- quantalogic/flow/flow_manager.py +407 -0
- quantalogic/flow/flow_manager_schema.py +169 -0
- quantalogic/flow/flow_yaml.md +419 -0
- quantalogic/generative_model.py +109 -77
- quantalogic/get_model_info.py +5 -5
- quantalogic/interactive_text_editor.py +100 -73
- quantalogic/main.py +17 -21
- quantalogic/model_info_list.py +3 -3
- quantalogic/model_info_litellm.py +14 -14
- quantalogic/prompts.py +2 -1
- quantalogic/{llm.py → quantlitellm.py} +29 -39
- quantalogic/search_agent.py +4 -4
- quantalogic/server/models.py +4 -1
- quantalogic/task_file_reader.py +5 -5
- quantalogic/task_runner.py +20 -20
- quantalogic/tool_manager.py +10 -21
- quantalogic/tools/__init__.py +98 -68
- quantalogic/tools/composio/composio.py +416 -0
- quantalogic/tools/{generate_database_report_tool.py → database/generate_database_report_tool.py} +4 -9
- quantalogic/tools/database/sql_query_tool_advanced.py +261 -0
- quantalogic/tools/document_tools/markdown_to_docx_tool.py +620 -0
- quantalogic/tools/document_tools/markdown_to_epub_tool.py +438 -0
- quantalogic/tools/document_tools/markdown_to_html_tool.py +362 -0
- quantalogic/tools/document_tools/markdown_to_ipynb_tool.py +319 -0
- quantalogic/tools/document_tools/markdown_to_latex_tool.py +420 -0
- quantalogic/tools/document_tools/markdown_to_pdf_tool.py +623 -0
- quantalogic/tools/document_tools/markdown_to_pptx_tool.py +319 -0
- quantalogic/tools/duckduckgo_search_tool.py +2 -4
- quantalogic/tools/finance/alpha_vantage_tool.py +440 -0
- quantalogic/tools/finance/ccxt_tool.py +373 -0
- quantalogic/tools/finance/finance_llm_tool.py +387 -0
- quantalogic/tools/finance/google_finance.py +192 -0
- quantalogic/tools/finance/market_intelligence_tool.py +520 -0
- quantalogic/tools/finance/technical_analysis_tool.py +491 -0
- quantalogic/tools/finance/tradingview_tool.py +336 -0
- quantalogic/tools/finance/yahoo_finance.py +236 -0
- quantalogic/tools/git/bitbucket_clone_repo_tool.py +181 -0
- quantalogic/tools/git/bitbucket_operations_tool.py +326 -0
- quantalogic/tools/git/clone_repo_tool.py +189 -0
- quantalogic/tools/git/git_operations_tool.py +532 -0
- quantalogic/tools/google_packages/google_news_tool.py +480 -0
- quantalogic/tools/grep_app_tool.py +123 -186
- quantalogic/tools/{dalle_e.py → image_generation/dalle_e.py} +37 -27
- quantalogic/tools/jinja_tool.py +6 -10
- quantalogic/tools/language_handlers/__init__.py +22 -9
- quantalogic/tools/list_directory_tool.py +131 -42
- quantalogic/tools/llm_tool.py +45 -15
- quantalogic/tools/llm_vision_tool.py +59 -7
- quantalogic/tools/markitdown_tool.py +17 -5
- quantalogic/tools/nasa_packages/models.py +47 -0
- quantalogic/tools/nasa_packages/nasa_apod_tool.py +232 -0
- quantalogic/tools/nasa_packages/nasa_neows_tool.py +147 -0
- quantalogic/tools/nasa_packages/services.py +82 -0
- quantalogic/tools/presentation_tools/presentation_llm_tool.py +396 -0
- quantalogic/tools/product_hunt/product_hunt_tool.py +258 -0
- quantalogic/tools/product_hunt/services.py +63 -0
- quantalogic/tools/rag_tool/__init__.py +48 -0
- quantalogic/tools/rag_tool/document_metadata.py +15 -0
- quantalogic/tools/rag_tool/query_response.py +20 -0
- quantalogic/tools/rag_tool/rag_tool.py +566 -0
- quantalogic/tools/rag_tool/rag_tool_beta.py +264 -0
- quantalogic/tools/read_html_tool.py +24 -38
- quantalogic/tools/replace_in_file_tool.py +10 -10
- quantalogic/tools/safe_python_interpreter_tool.py +10 -24
- quantalogic/tools/search_definition_names.py +2 -2
- quantalogic/tools/sequence_tool.py +14 -23
- quantalogic/tools/sql_query_tool.py +17 -19
- quantalogic/tools/tool.py +39 -15
- quantalogic/tools/unified_diff_tool.py +1 -1
- quantalogic/tools/utilities/csv_processor_tool.py +234 -0
- quantalogic/tools/utilities/download_file_tool.py +179 -0
- quantalogic/tools/utilities/mermaid_validator_tool.py +661 -0
- quantalogic/tools/utils/__init__.py +1 -4
- quantalogic/tools/utils/create_sample_database.py +24 -38
- quantalogic/tools/utils/generate_database_report.py +74 -82
- quantalogic/tools/wikipedia_search_tool.py +17 -21
- quantalogic/utils/ask_user_validation.py +1 -1
- quantalogic/utils/async_utils.py +35 -0
- quantalogic/utils/check_version.py +3 -5
- quantalogic/utils/get_all_models.py +2 -1
- quantalogic/utils/git_ls.py +21 -7
- quantalogic/utils/lm_studio_model_info.py +9 -7
- quantalogic/utils/python_interpreter.py +113 -43
- quantalogic/utils/xml_utility.py +178 -0
- quantalogic/version_check.py +1 -1
- quantalogic/welcome_message.py +7 -7
- quantalogic/xml_parser.py +0 -1
- {quantalogic-0.35.0.dist-info → quantalogic-0.40.0.dist-info}/METADATA +41 -1
- quantalogic-0.40.0.dist-info/RECORD +148 -0
- quantalogic-0.35.0.dist-info/RECORD +0 -102
- {quantalogic-0.35.0.dist-info → quantalogic-0.40.0.dist-info}/LICENSE +0 -0
- {quantalogic-0.35.0.dist-info → quantalogic-0.40.0.dist-info}/WHEEL +0 -0
- {quantalogic-0.35.0.dist-info → quantalogic-0.40.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,620 @@
|
|
1
|
+
"""Tool for converting markdown content to well-structured DOCX documents.
|
2
|
+
|
3
|
+
Why this tool:
|
4
|
+
- Provides a standardized way to convert markdown to professional DOCX documents
|
5
|
+
- Maintains consistent styling and formatting across documents
|
6
|
+
- Handles complex elements like diagrams, code blocks, and tables
|
7
|
+
- Supports customization through templates and style configurations
|
8
|
+
"""
|
9
|
+
|
10
|
+
import hashlib
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
import tempfile
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Dict, List, Optional, Tuple, Union
|
16
|
+
|
17
|
+
import markdown
|
18
|
+
import requests
|
19
|
+
from bs4 import BeautifulSoup, Tag
|
20
|
+
from docx import Document
|
21
|
+
from docx.enum.style import WD_STYLE_TYPE
|
22
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
23
|
+
from docx.shared import Inches, Pt, RGBColor
|
24
|
+
from loguru import logger
|
25
|
+
from PIL import Image
|
26
|
+
from pygments import highlight
|
27
|
+
from pygments.formatters import HtmlFormatter
|
28
|
+
from pygments.lexers import TextLexer, get_lexer_by_name
|
29
|
+
from pygments.styles import get_style_by_name
|
30
|
+
|
31
|
+
from quantalogic.tools.tool import Tool, ToolArgument
|
32
|
+
|
33
|
+
|
34
|
+
class MarkdownToDocxTool(Tool):
|
35
|
+
"""Converts markdown to professional DOCX documents with advanced formatting."""
|
36
|
+
|
37
|
+
model_config = {
|
38
|
+
"arbitrary_types_allowed": True
|
39
|
+
}
|
40
|
+
|
41
|
+
name: str = "markdown_to_docx_tool"
|
42
|
+
description: str = (
|
43
|
+
"Converts markdown to DOCX with support for images, Mermaid diagrams, "
|
44
|
+
"code blocks, and advanced document formatting."
|
45
|
+
)
|
46
|
+
need_validation: bool = False
|
47
|
+
|
48
|
+
arguments: List[ToolArgument] = [
|
49
|
+
ToolArgument(
|
50
|
+
name="markdown_content",
|
51
|
+
arg_type="string",
|
52
|
+
description="Markdown content with support for advanced formatting",
|
53
|
+
required=True,
|
54
|
+
example='''# Technical Documentation
|
55
|
+
|
56
|
+
## Overview
|
57
|
+
This document demonstrates various formatting capabilities.
|
58
|
+
|
59
|
+
## Code Examples
|
60
|
+
|
61
|
+
### Python Code
|
62
|
+
```python
|
63
|
+
def hello_world():
|
64
|
+
"""Greet the world."""
|
65
|
+
return "Hello, World!"
|
66
|
+
```
|
67
|
+
|
68
|
+
### JavaScript Code
|
69
|
+
```javascript
|
70
|
+
function calculateTotal(items) {
|
71
|
+
return items.reduce((sum, item) => sum + item.price, 0);
|
72
|
+
}
|
73
|
+
```
|
74
|
+
|
75
|
+
## System Architecture
|
76
|
+
```mermaid
|
77
|
+
graph TD
|
78
|
+
A[Frontend] --> B[API Gateway]
|
79
|
+
B --> C[Microservices]
|
80
|
+
C --> D[(Database)]
|
81
|
+
B --> E[Cache]
|
82
|
+
```
|
83
|
+
|
84
|
+
## Feature List
|
85
|
+
1. **Authentication**
|
86
|
+
- OAuth 2.0 support
|
87
|
+
- Multi-factor authentication
|
88
|
+
- Role-based access control
|
89
|
+
|
90
|
+
2. **Data Processing**
|
91
|
+
- Real-time analytics
|
92
|
+
- Batch processing
|
93
|
+
- Data validation
|
94
|
+
|
95
|
+
## Performance Metrics
|
96
|
+
| Metric | Value | Status |
|
97
|
+
|--------|--------|--------|
|
98
|
+
| Latency | 100ms | ✅ |
|
99
|
+
| Uptime | 99.9% | ✅ |
|
100
|
+
| Error Rate | 0.1% | ✅ |
|
101
|
+
|
102
|
+
> **Note**: All metrics are measured over a 30-day period.
|
103
|
+
|
104
|
+

|
105
|
+
''',
|
106
|
+
),
|
107
|
+
ToolArgument(
|
108
|
+
name="output_path",
|
109
|
+
arg_type="string",
|
110
|
+
description="Path for saving the DOCX file",
|
111
|
+
required=True,
|
112
|
+
example="/path/to/output.docx",
|
113
|
+
),
|
114
|
+
ToolArgument(
|
115
|
+
name="template_path",
|
116
|
+
arg_type="string",
|
117
|
+
description="Optional DOCX template path. Use a template for consistent corporate styling.",
|
118
|
+
required=False,
|
119
|
+
example="/path/to/template.docx",
|
120
|
+
),
|
121
|
+
ToolArgument(
|
122
|
+
name="style_config",
|
123
|
+
arg_type="string",
|
124
|
+
description="JSON string with style settings",
|
125
|
+
required=False,
|
126
|
+
example='''{
|
127
|
+
"font_name": "Arial",
|
128
|
+
"title_size": 32,
|
129
|
+
"heading1_size": 28,
|
130
|
+
"heading2_size": 24,
|
131
|
+
"heading3_size": 20,
|
132
|
+
"body_size": 11,
|
133
|
+
"code_size": 10,
|
134
|
+
"code_font": "Consolas",
|
135
|
+
"primary_color": [0, 112, 192],
|
136
|
+
"secondary_color": [68, 114, 196],
|
137
|
+
"text_color": [0, 0, 0],
|
138
|
+
"link_color": [0, 0, 255],
|
139
|
+
"code_background": [245, 245, 245],
|
140
|
+
"code_border_color": [200, 200, 200],
|
141
|
+
"table_header_background": [217, 217, 217],
|
142
|
+
"margins": {
|
143
|
+
"top": 1,
|
144
|
+
"bottom": 1,
|
145
|
+
"left": 1,
|
146
|
+
"right": 1
|
147
|
+
}
|
148
|
+
}''',
|
149
|
+
),
|
150
|
+
]
|
151
|
+
|
152
|
+
# Default style configuration
|
153
|
+
DEFAULT_STYLES: Dict[str, Union[str, int, List[int]]] = {
|
154
|
+
"font_name": "Calibri",
|
155
|
+
"title_size": 32,
|
156
|
+
"heading1_size": 28,
|
157
|
+
"heading2_size": 24,
|
158
|
+
"heading3_size": 20,
|
159
|
+
"body_size": 11,
|
160
|
+
"code_size": 10,
|
161
|
+
"code_font": "Consolas",
|
162
|
+
"primary_color": [0, 112, 192],
|
163
|
+
"secondary_color": [68, 114, 196],
|
164
|
+
"text_color": [0, 0, 0],
|
165
|
+
"link_color": [0, 0, 255],
|
166
|
+
"code_background": [245, 245, 245],
|
167
|
+
"code_border_color": [200, 200, 200],
|
168
|
+
"table_header_background": [217, 217, 217],
|
169
|
+
"margins": {
|
170
|
+
"top": 1,
|
171
|
+
"bottom": 1,
|
172
|
+
"left": 1,
|
173
|
+
"right": 1
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
def _normalize_path(self, path: str) -> Path:
|
178
|
+
"""Convert path string to normalized Path object.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
path: Input path string
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
Normalized Path object
|
185
|
+
"""
|
186
|
+
if path.startswith("~"):
|
187
|
+
path = os.path.expanduser(path)
|
188
|
+
return Path(path).resolve()
|
189
|
+
|
190
|
+
def _parse_style_config(self, style_config: Optional[str]) -> Dict:
|
191
|
+
"""Parse and validate style configuration.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
style_config: JSON style configuration string
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
Merged style configuration dictionary
|
198
|
+
"""
|
199
|
+
config = self.DEFAULT_STYLES.copy()
|
200
|
+
if style_config:
|
201
|
+
try:
|
202
|
+
custom_styles = json.loads(style_config)
|
203
|
+
config.update(custom_styles)
|
204
|
+
except json.JSONDecodeError as e:
|
205
|
+
logger.warning(f"Invalid style config, using defaults: {e}")
|
206
|
+
return config
|
207
|
+
|
208
|
+
def _create_document(self, template_path: Optional[str] = None) -> Document:
|
209
|
+
"""Create a new document with predefined styles."""
|
210
|
+
if template_path:
|
211
|
+
template_path = self._normalize_path(template_path)
|
212
|
+
if not template_path.exists():
|
213
|
+
logger.warning(f"Template not found: {template_path}. Using default template.")
|
214
|
+
doc = Document()
|
215
|
+
else:
|
216
|
+
doc = Document(template_path)
|
217
|
+
else:
|
218
|
+
doc = Document()
|
219
|
+
|
220
|
+
# Add custom styles if they don't exist
|
221
|
+
styles = doc.styles
|
222
|
+
|
223
|
+
# Code block style
|
224
|
+
if 'Code' not in styles:
|
225
|
+
code_style = styles.add_style('Code', WD_STYLE_TYPE.PARAGRAPH)
|
226
|
+
code_font = code_style.font
|
227
|
+
code_font.name = self.DEFAULT_STYLES["code_font"]
|
228
|
+
code_font.size = Pt(self.DEFAULT_STYLES["code_size"])
|
229
|
+
code_style.paragraph_format.space_before = Pt(12)
|
230
|
+
code_style.paragraph_format.space_after = Pt(12)
|
231
|
+
code_style.paragraph_format.left_indent = Inches(0.5)
|
232
|
+
code_style.paragraph_format.right_indent = Inches(0.5)
|
233
|
+
code_style.paragraph_format.first_line_indent = Inches(0)
|
234
|
+
|
235
|
+
# Code header style
|
236
|
+
if 'CodeHeader' not in styles:
|
237
|
+
header_style = styles.add_style('CodeHeader', WD_STYLE_TYPE.PARAGRAPH)
|
238
|
+
header_font = header_style.font
|
239
|
+
header_font.name = self.DEFAULT_STYLES["font_name"]
|
240
|
+
header_font.size = Pt(self.DEFAULT_STYLES["code_size"] + 2)
|
241
|
+
header_font.bold = True
|
242
|
+
header_style.paragraph_format.space_before = Pt(12)
|
243
|
+
header_style.paragraph_format.space_after = Pt(6)
|
244
|
+
header_style.paragraph_format.left_indent = Inches(0.5)
|
245
|
+
|
246
|
+
# Diagram style
|
247
|
+
if 'Diagram' not in styles:
|
248
|
+
diagram_style = styles.add_style('Diagram', WD_STYLE_TYPE.PARAGRAPH)
|
249
|
+
diagram_style.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
250
|
+
diagram_style.paragraph_format.space_before = Pt(12)
|
251
|
+
diagram_style.paragraph_format.space_after = Pt(12)
|
252
|
+
|
253
|
+
# Caption style
|
254
|
+
if 'Caption' not in styles:
|
255
|
+
caption_style = styles.add_style('Caption', WD_STYLE_TYPE.PARAGRAPH)
|
256
|
+
caption_font = caption_style.font
|
257
|
+
caption_font.name = self.DEFAULT_STYLES["font_name"]
|
258
|
+
caption_font.size = Pt(10)
|
259
|
+
caption_font.italic = True
|
260
|
+
caption_style.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
261
|
+
caption_style.paragraph_format.space_after = Pt(18)
|
262
|
+
|
263
|
+
return doc
|
264
|
+
|
265
|
+
def _apply_text_style(self, run, style: str, style_config: Dict) -> None:
|
266
|
+
"""Apply text styling to a run.
|
267
|
+
|
268
|
+
Args:
|
269
|
+
run: Document run to style
|
270
|
+
style: Style to apply ('bold', 'italic', 'code', or 'link')
|
271
|
+
style_config: Style configuration dictionary
|
272
|
+
"""
|
273
|
+
if style == 'bold':
|
274
|
+
run.bold = True
|
275
|
+
elif style == 'italic':
|
276
|
+
run.italic = True
|
277
|
+
elif style == 'code':
|
278
|
+
run.font.name = style_config["code_font"]
|
279
|
+
elif style == 'link':
|
280
|
+
run.font.color.rgb = RGBColor(*style_config["link_color"])
|
281
|
+
run.underline = True
|
282
|
+
|
283
|
+
def _handle_image(self, src: str) -> Optional[str]:
|
284
|
+
"""Process and save image from source.
|
285
|
+
|
286
|
+
Args:
|
287
|
+
src: Image source (URL or path)
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
Path to processed image or None if failed
|
291
|
+
"""
|
292
|
+
try:
|
293
|
+
if src.startswith(('http://', 'https://')):
|
294
|
+
response = requests.get(src)
|
295
|
+
response.raise_for_status()
|
296
|
+
path = f"image_{hash(src)}.{src.split('.')[-1]}"
|
297
|
+
with open(path, 'wb') as f:
|
298
|
+
f.write(response.content)
|
299
|
+
return path
|
300
|
+
return src
|
301
|
+
except Exception as e:
|
302
|
+
logger.error(f"Failed to process image {src}: {e}")
|
303
|
+
return None
|
304
|
+
|
305
|
+
def _add_image_to_doc(self, doc: Document, image_path: str) -> None:
|
306
|
+
"""Add image to document with proper sizing.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
doc: Target document
|
310
|
+
image_path: Path to image file
|
311
|
+
"""
|
312
|
+
try:
|
313
|
+
paragraph = doc.add_paragraph()
|
314
|
+
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
315
|
+
|
316
|
+
img = Image.open(image_path)
|
317
|
+
width = min(6.0, img.width / 96) # Max 6 inches, convert from pixels
|
318
|
+
doc.add_picture(image_path, width=Inches(width))
|
319
|
+
except Exception as e:
|
320
|
+
logger.error(f"Failed to add image {image_path}: {e}")
|
321
|
+
|
322
|
+
def _process_mermaid(self, code: str) -> Tuple[Optional[str], Optional[str]]:
|
323
|
+
"""Generate image from Mermaid diagram code.
|
324
|
+
|
325
|
+
Args:
|
326
|
+
code: Mermaid diagram source code
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
Tuple of (diagram_path, error_message)
|
330
|
+
"""
|
331
|
+
try:
|
332
|
+
# Create hash of code for caching
|
333
|
+
code_hash = hashlib.md5(code.encode()).hexdigest()
|
334
|
+
cache_dir = Path(tempfile.gettempdir()) / "mermaid_cache"
|
335
|
+
cache_dir.mkdir(exist_ok=True)
|
336
|
+
|
337
|
+
cache_path = cache_dir / f"{code_hash}.png"
|
338
|
+
if cache_path.exists():
|
339
|
+
return str(cache_path), None
|
340
|
+
|
341
|
+
# Use Mermaid.ink API for rendering
|
342
|
+
import base64
|
343
|
+
graphbytes = base64.b64encode(code.encode('utf-8'))
|
344
|
+
graphurl = f"https://mermaid.ink/img/{graphbytes.decode('utf-8')}"
|
345
|
+
|
346
|
+
response = requests.get(graphurl)
|
347
|
+
if response.status_code == 200:
|
348
|
+
with open(cache_path, "wb") as f:
|
349
|
+
f.write(response.content)
|
350
|
+
return str(cache_path), None
|
351
|
+
else:
|
352
|
+
error = f"Failed to generate Mermaid diagram: HTTP {response.status_code}"
|
353
|
+
logger.error(error)
|
354
|
+
return None, error
|
355
|
+
|
356
|
+
except Exception as e:
|
357
|
+
error = f"Failed to generate diagram: {e}"
|
358
|
+
logger.error(error)
|
359
|
+
return None, error
|
360
|
+
|
361
|
+
def _process_code_block(self, doc: Document, element: Tag, style_config: Dict):
|
362
|
+
"""Process a code block element and add it to document with syntax highlighting."""
|
363
|
+
try:
|
364
|
+
# Get language if specified
|
365
|
+
code_class = element.get('class', [])
|
366
|
+
language = code_class[0].replace('language-', '') if code_class else 'text'
|
367
|
+
|
368
|
+
# Get code content preserving whitespace
|
369
|
+
code_text = element.get_text().strip()
|
370
|
+
if not code_text:
|
371
|
+
return
|
372
|
+
|
373
|
+
# Add language header
|
374
|
+
header = doc.add_paragraph(style='CodeHeader')
|
375
|
+
lang_run = header.add_run(f"{language.upper()}")
|
376
|
+
lang_run.font.color.rgb = RGBColor(*style_config["secondary_color"])
|
377
|
+
|
378
|
+
# Create code block container
|
379
|
+
code_para = doc.add_paragraph(style='Code')
|
380
|
+
|
381
|
+
# Apply syntax highlighting
|
382
|
+
try:
|
383
|
+
lexer = get_lexer_by_name(language, stripall=False)
|
384
|
+
except:
|
385
|
+
lexer = TextLexer()
|
386
|
+
|
387
|
+
formatter = HtmlFormatter(
|
388
|
+
style=get_style_by_name('monokai'),
|
389
|
+
linenos=True,
|
390
|
+
cssclass="source",
|
391
|
+
linenostart=1
|
392
|
+
)
|
393
|
+
|
394
|
+
highlighted = highlight(code_text, lexer, formatter)
|
395
|
+
soup = BeautifulSoup(highlighted, 'html.parser')
|
396
|
+
|
397
|
+
# Process each line with proper indentation
|
398
|
+
for line_num, line in enumerate(soup.find_all('span', class_='line'), 1):
|
399
|
+
# Add line number with proper padding
|
400
|
+
num_run = code_para.add_run(f"{line_num:3d} │ ")
|
401
|
+
num_run.font.name = style_config["code_font"]
|
402
|
+
num_run.font.size = Pt(style_config["code_size"])
|
403
|
+
num_run.font.color.rgb = RGBColor(128, 128, 128)
|
404
|
+
|
405
|
+
# Add code content with syntax highlighting
|
406
|
+
for span in line.find_all('span', recursive=False):
|
407
|
+
text = span.get_text()
|
408
|
+
if not text:
|
409
|
+
continue
|
410
|
+
|
411
|
+
run = code_para.add_run(text)
|
412
|
+
run.font.name = style_config["code_font"]
|
413
|
+
run.font.size = Pt(style_config["code_size"])
|
414
|
+
|
415
|
+
# Apply token colors
|
416
|
+
color = span.get('style', '').replace('color: ', '')
|
417
|
+
if color:
|
418
|
+
try:
|
419
|
+
if color.startswith('#'):
|
420
|
+
r = int(color[1:3], 16)
|
421
|
+
g = int(color[3:5], 16)
|
422
|
+
b = int(color[5:7], 16)
|
423
|
+
run.font.color.rgb = RGBColor(r, g, b)
|
424
|
+
except:
|
425
|
+
pass
|
426
|
+
|
427
|
+
code_para.add_run('\n')
|
428
|
+
|
429
|
+
# Add border and background
|
430
|
+
for run in code_para.runs:
|
431
|
+
run._element.rPr.highlight_val = 'lightGray'
|
432
|
+
|
433
|
+
except Exception as e:
|
434
|
+
logger.error(f"Failed to process code block: {e}")
|
435
|
+
# Fallback to simple code block
|
436
|
+
p = doc.add_paragraph(code_text, style='Code')
|
437
|
+
|
438
|
+
def _process_paragraph(self, doc: Document, element: Tag, style_config: Dict) -> None:
|
439
|
+
"""Process a paragraph element and add it to document."""
|
440
|
+
try:
|
441
|
+
p = doc.add_paragraph()
|
442
|
+
for child in element.children:
|
443
|
+
if child.name == 'strong':
|
444
|
+
run = p.add_run(child.get_text())
|
445
|
+
run.bold = True
|
446
|
+
elif child.name == 'em':
|
447
|
+
run = p.add_run(child.get_text())
|
448
|
+
run.italic = True
|
449
|
+
elif child.name == 'code':
|
450
|
+
run = p.add_run(child.get_text())
|
451
|
+
run.font.name = style_config["code_font"]
|
452
|
+
elif child.name == 'a':
|
453
|
+
run = p.add_run(child.get_text())
|
454
|
+
run.font.color.rgb = RGBColor(*style_config["link_color"])
|
455
|
+
run.underline = True
|
456
|
+
else:
|
457
|
+
p.add_run(str(child))
|
458
|
+
except Exception as e:
|
459
|
+
logger.error(f"Failed to process paragraph: {e}")
|
460
|
+
|
461
|
+
def _process_table(self, doc: Document, element: Tag) -> None:
|
462
|
+
"""Process a table element and add it to document."""
|
463
|
+
try:
|
464
|
+
rows = element.find_all('tr')
|
465
|
+
if rows:
|
466
|
+
table = doc.add_table(rows=len(rows), cols=len(rows[0].find_all(['td', 'th'])))
|
467
|
+
table.style = 'Table Grid'
|
468
|
+
|
469
|
+
for i, row in enumerate(rows):
|
470
|
+
cells = row.find_all(['td', 'th'])
|
471
|
+
for j, cell in enumerate(cells):
|
472
|
+
table.cell(i, j).text = cell.get_text().strip()
|
473
|
+
except Exception as e:
|
474
|
+
logger.error(f"Failed to process table: {e}")
|
475
|
+
|
476
|
+
def _process_element(self, doc: Document, element: Tag, style_config: Dict) -> None:
|
477
|
+
"""Process a single HTML element and add it to document."""
|
478
|
+
try:
|
479
|
+
if element.name in ['h1', 'h2', 'h3']:
|
480
|
+
level = int(element.name[1])
|
481
|
+
doc.add_heading(element.get_text(), level=level)
|
482
|
+
|
483
|
+
elif element.name == 'pre':
|
484
|
+
code_block = element.find('code')
|
485
|
+
if code_block:
|
486
|
+
# Check if it's a Mermaid diagram
|
487
|
+
if 'language-mermaid' in code_block.get('class', []):
|
488
|
+
code = code_block.get_text().strip()
|
489
|
+
|
490
|
+
# Add section header
|
491
|
+
doc.add_heading("Mermaid Diagram", level=4)
|
492
|
+
|
493
|
+
# Add original Mermaid code
|
494
|
+
doc.add_paragraph("Source Code:", style='CodeHeader')
|
495
|
+
self._process_code_block(doc, code_block, style_config)
|
496
|
+
|
497
|
+
# Generate and add diagram
|
498
|
+
diagram_path, error = self._process_mermaid(code)
|
499
|
+
if diagram_path:
|
500
|
+
# Add diagram with proper styling
|
501
|
+
diagram_para = doc.add_paragraph(style='Diagram')
|
502
|
+
run = diagram_para.add_run()
|
503
|
+
run.add_picture(diagram_path, width=Inches(6.0))
|
504
|
+
|
505
|
+
# Add caption
|
506
|
+
caption = doc.add_paragraph(style='Caption')
|
507
|
+
caption.add_run("Generated Mermaid Diagram")
|
508
|
+
else:
|
509
|
+
# Add error message
|
510
|
+
error_para = doc.add_paragraph(style='Quote')
|
511
|
+
error_run = error_para.add_run(f"⚠️ Error generating diagram: {error}")
|
512
|
+
error_run.font.color.rgb = RGBColor(255, 0, 0)
|
513
|
+
error_run.bold = True
|
514
|
+
else:
|
515
|
+
self._process_code_block(doc, code_block, style_config)
|
516
|
+
|
517
|
+
elif element.name == 'p':
|
518
|
+
if element.find('img'):
|
519
|
+
img_src = element.find('img').get('src', '')
|
520
|
+
if img_path := self._handle_image(img_src):
|
521
|
+
self._add_image_to_doc(doc, img_path)
|
522
|
+
else:
|
523
|
+
self._process_paragraph(doc, element, style_config)
|
524
|
+
|
525
|
+
elif element.name == 'table':
|
526
|
+
self._process_table(doc, element)
|
527
|
+
|
528
|
+
except Exception as e:
|
529
|
+
logger.error(f"Failed to process element {element.name}: {e}")
|
530
|
+
|
531
|
+
def execute(
|
532
|
+
self,
|
533
|
+
markdown_content: str,
|
534
|
+
output_path: str,
|
535
|
+
template_path: Optional[str] = None,
|
536
|
+
style_config: Optional[str] = None,
|
537
|
+
) -> str:
|
538
|
+
"""Convert markdown to DOCX format.
|
539
|
+
|
540
|
+
Args:
|
541
|
+
markdown_content: Markdown content to convert
|
542
|
+
output_path: Output DOCX file path
|
543
|
+
template_path: Optional template path
|
544
|
+
style_config: Optional style configuration
|
545
|
+
|
546
|
+
Returns:
|
547
|
+
Success message with output path
|
548
|
+
|
549
|
+
Raises:
|
550
|
+
ValueError: If content is empty or paths invalid
|
551
|
+
"""
|
552
|
+
if not markdown_content.strip():
|
553
|
+
raise ValueError("Markdown content cannot be empty")
|
554
|
+
|
555
|
+
try:
|
556
|
+
# Setup paths and document
|
557
|
+
output_path = self._normalize_path(output_path)
|
558
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
559
|
+
|
560
|
+
doc = self._create_document(template_path)
|
561
|
+
style_config = self._parse_style_config(style_config)
|
562
|
+
|
563
|
+
# Convert markdown to HTML
|
564
|
+
html = markdown.markdown(
|
565
|
+
markdown_content,
|
566
|
+
extensions=['fenced_code', 'tables', 'attr_list', 'md_in_html']
|
567
|
+
)
|
568
|
+
|
569
|
+
# Process elements
|
570
|
+
soup = BeautifulSoup(html, 'html.parser')
|
571
|
+
for element in soup.find_all():
|
572
|
+
self._process_element(doc, element, style_config)
|
573
|
+
|
574
|
+
# Save document
|
575
|
+
doc.save(str(output_path))
|
576
|
+
return f"Successfully created DOCX: {output_path}"
|
577
|
+
|
578
|
+
except Exception as e:
|
579
|
+
logger.error(f"Failed to convert markdown: {e}")
|
580
|
+
raise ValueError(f"Conversion failed: {str(e)}")
|
581
|
+
|
582
|
+
|
583
|
+
if __name__ == "__main__":
|
584
|
+
# Example usage with error handling
|
585
|
+
try:
|
586
|
+
tool = MarkdownToDocxTool()
|
587
|
+
|
588
|
+
# Test markdown with various features
|
589
|
+
markdown_content = """
|
590
|
+
# Document Title
|
591
|
+
|
592
|
+
## Code Example
|
593
|
+
```python
|
594
|
+
def greet(name: str) -> str:
|
595
|
+
return f"Hello, {name}!"
|
596
|
+
```
|
597
|
+
|
598
|
+
## System Diagram
|
599
|
+
```mermaid
|
600
|
+
graph TD
|
601
|
+
A[Start] --> B[Process]
|
602
|
+
B --> C[End]
|
603
|
+
```
|
604
|
+
|
605
|
+
## Feature Status
|
606
|
+
| Feature | Status |
|
607
|
+
|---------|--------|
|
608
|
+
| Auth | Done |
|
609
|
+
| API | WIP |
|
610
|
+
"""
|
611
|
+
|
612
|
+
result = tool.execute(
|
613
|
+
markdown_content=markdown_content,
|
614
|
+
output_path="./example.docx",
|
615
|
+
style_config='{"font_name": "Arial"}'
|
616
|
+
)
|
617
|
+
print(result)
|
618
|
+
|
619
|
+
except Exception as e:
|
620
|
+
logger.error(f"Example failed: {e}")
|