doctra 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctra/cli/main.py +5 -12
- doctra/cli/utils.py +2 -3
- doctra/engines/image_restoration/docres_engine.py +6 -11
- doctra/engines/vlm/outlines_types.py +13 -9
- doctra/engines/vlm/service.py +4 -2
- doctra/exporters/excel_writer.py +89 -0
- doctra/exporters/html_writer.py +206 -1
- doctra/parsers/enhanced_pdf_parser.py +124 -31
- doctra/parsers/structured_pdf_parser.py +58 -15
- doctra/parsers/table_chart_extractor.py +290 -284
- doctra/ui/app.py +39 -960
- doctra/ui/docres_ui.py +338 -0
- doctra/ui/docres_wrapper.py +120 -0
- doctra/ui/enhanced_parser_ui.py +483 -0
- doctra/ui/full_parse_ui.py +539 -0
- doctra/ui/tables_charts_ui.py +445 -0
- doctra/ui/ui_helpers.py +435 -0
- doctra/utils/progress.py +7 -7
- doctra/utils/structured_utils.py +5 -2
- doctra/version.py +1 -1
- {doctra-0.4.0.dist-info → doctra-0.4.2.dist-info}/METADATA +1 -1
- {doctra-0.4.0.dist-info → doctra-0.4.2.dist-info}/RECORD +25 -19
- {doctra-0.4.0.dist-info → doctra-0.4.2.dist-info}/WHEEL +0 -0
- {doctra-0.4.0.dist-info → doctra-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {doctra-0.4.0.dist-info → doctra-0.4.2.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ capabilities with DocRes image restoration for improved document processing.
|
|
8
8
|
from __future__ import annotations
|
9
9
|
import os
|
10
10
|
import sys
|
11
|
+
import numpy as np
|
11
12
|
from typing import List, Dict, Any, Optional, Union
|
12
13
|
from contextlib import ExitStack
|
13
14
|
from PIL import Image
|
@@ -16,9 +17,17 @@ from tqdm import tqdm
|
|
16
17
|
from doctra.parsers.structured_pdf_parser import StructuredPDFParser
|
17
18
|
from doctra.engines.image_restoration import DocResEngine
|
18
19
|
from doctra.utils.pdf_io import render_pdf_to_images
|
19
|
-
from doctra.utils.constants import IMAGE_SUBDIRS
|
20
|
+
from doctra.utils.constants import IMAGE_SUBDIRS, EXCLUDE_LABELS
|
20
21
|
from doctra.utils.file_ops import ensure_output_dirs
|
21
22
|
from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
|
23
|
+
from doctra.parsers.layout_order import reading_order_key
|
24
|
+
from doctra.utils.ocr_utils import ocr_box_text
|
25
|
+
from doctra.exporters.image_saver import save_box_image
|
26
|
+
from doctra.exporters.markdown_writer import write_markdown
|
27
|
+
from doctra.exporters.html_writer import write_html, write_structured_html, render_html_table, write_html_from_lines
|
28
|
+
from doctra.exporters.excel_writer import write_structured_excel
|
29
|
+
from doctra.utils.structured_utils import to_structured_dict
|
30
|
+
from doctra.exporters.markdown_table import render_markdown_table
|
22
31
|
|
23
32
|
|
24
33
|
class EnhancedPDFParser(StructuredPDFParser):
|
@@ -132,6 +141,13 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
132
141
|
if self.use_image_restoration and self.docres_engine:
|
133
142
|
print(f"🔄 Processing PDF with image restoration: {os.path.basename(pdf_path)}")
|
134
143
|
enhanced_pages = self._process_pages_with_restoration(pdf_path, out_dir)
|
144
|
+
|
145
|
+
# Create enhanced PDF file using the already processed enhanced pages
|
146
|
+
enhanced_pdf_path = os.path.join(out_dir, f"{pdf_filename}_enhanced.pdf")
|
147
|
+
try:
|
148
|
+
self._create_enhanced_pdf_from_pages(enhanced_pages, enhanced_pdf_path)
|
149
|
+
except Exception as e:
|
150
|
+
print(f"⚠️ Failed to create enhanced PDF: {e}")
|
135
151
|
else:
|
136
152
|
print(f"🔄 Processing PDF without image restoration: {os.path.basename(pdf_path)}")
|
137
153
|
enhanced_pages = [im for (im, _, _) in render_pdf_to_images(pdf_path, dpi=self.dpi)]
|
@@ -146,7 +162,7 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
146
162
|
pil_pages = enhanced_pages
|
147
163
|
|
148
164
|
# Continue with standard parsing logic
|
149
|
-
self._process_parsing_logic(pages, pil_pages, out_dir, pdf_filename)
|
165
|
+
self._process_parsing_logic(pages, pil_pages, out_dir, pdf_filename, pdf_path)
|
150
166
|
|
151
167
|
def _process_pages_with_restoration(self, pdf_path: str, out_dir: str) -> List[Image.Image]:
|
152
168
|
"""
|
@@ -168,12 +184,12 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
168
184
|
if is_notebook:
|
169
185
|
progress_bar = create_notebook_friendly_bar(
|
170
186
|
total=len(original_pages),
|
171
|
-
desc=f"
|
187
|
+
desc=f"DocRes {self.restoration_task}"
|
172
188
|
)
|
173
189
|
else:
|
174
190
|
progress_bar = create_beautiful_progress_bar(
|
175
191
|
total=len(original_pages),
|
176
|
-
desc=f"
|
192
|
+
desc=f"DocRes {self.restoration_task}",
|
177
193
|
leave=True
|
178
194
|
)
|
179
195
|
|
@@ -186,7 +202,6 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
186
202
|
for i, page_img in enumerate(original_pages):
|
187
203
|
try:
|
188
204
|
# Convert PIL to numpy array
|
189
|
-
import numpy as np
|
190
205
|
img_array = np.array(page_img)
|
191
206
|
|
192
207
|
# Apply DocRes restoration
|
@@ -216,31 +231,22 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
216
231
|
if hasattr(progress_bar, 'close'):
|
217
232
|
progress_bar.close()
|
218
233
|
|
219
|
-
print(f"✅ Image restoration completed. Enhanced pages saved to: {enhanced_dir}")
|
220
234
|
return enhanced_pages
|
221
235
|
|
222
|
-
def _process_parsing_logic(self, pages, pil_pages, out_dir, pdf_filename):
|
236
|
+
def _process_parsing_logic(self, pages, pil_pages, out_dir, pdf_filename, pdf_path):
|
223
237
|
"""
|
224
238
|
Process the parsing logic with enhanced pages.
|
225
239
|
This is extracted from the parent class to allow customization.
|
226
240
|
"""
|
227
|
-
from doctra.utils.constants import EXCLUDE_LABELS
|
228
|
-
from doctra.parsers.layout_order import reading_order_key
|
229
|
-
from doctra.utils.ocr_utils import ocr_box_text
|
230
|
-
from doctra.exporters.image_saver import save_box_image
|
231
|
-
from doctra.exporters.markdown_writer import write_markdown
|
232
|
-
from doctra.exporters.html_writer import write_html
|
233
|
-
from doctra.exporters.excel_writer import write_structured_excel
|
234
|
-
from doctra.exporters.html_writer import write_structured_html
|
235
|
-
from doctra.utils.structured_utils import to_structured_dict
|
236
|
-
from doctra.exporters.markdown_table import render_markdown_table
|
237
241
|
|
238
242
|
fig_count = sum(sum(1 for b in p.boxes if b.label == "figure") for p in pages)
|
239
243
|
chart_count = sum(sum(1 for b in p.boxes if b.label == "chart") for p in pages)
|
240
244
|
table_count = sum(sum(1 for b in p.boxes if b.label == "table") for p in pages)
|
241
245
|
|
242
246
|
md_lines: List[str] = ["# Enhanced Document Content\n"]
|
247
|
+
html_lines: List[str] = ["<h1>Enhanced Document Content</h1>"] # For direct HTML generation
|
243
248
|
structured_items: List[Dict[str, Any]] = []
|
249
|
+
page_content: Dict[int, List[str]] = {} # Store content by page
|
244
250
|
|
245
251
|
charts_desc = "Charts (VLM → table)" if self.use_vlm else "Charts (cropped)"
|
246
252
|
tables_desc = "Tables (VLM → table)" if self.use_vlm else "Tables (cropped)"
|
@@ -263,10 +269,15 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
263
269
|
figures_bar = stack.enter_context(
|
264
270
|
create_beautiful_progress_bar(total=fig_count, desc=figures_desc, leave=True)) if fig_count else None
|
265
271
|
|
272
|
+
# Initialize page content for all pages first
|
273
|
+
for page_num in range(1, len(pil_pages) + 1):
|
274
|
+
page_content[page_num] = [f"# Page {page_num} Content\n"]
|
275
|
+
|
266
276
|
for p in pages:
|
267
277
|
page_num = p.page_index
|
268
278
|
page_img: Image.Image = pil_pages[page_num - 1]
|
269
279
|
md_lines.append(f"\n## Page {page_num}\n")
|
280
|
+
html_lines.append(f"<h2>Page {page_num}</h2>")
|
270
281
|
|
271
282
|
for i, box in enumerate(sorted(p.boxes, key=reading_order_key), start=1):
|
272
283
|
if box.label in EXCLUDE_LABELS:
|
@@ -275,7 +286,11 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
275
286
|
rel = os.path.relpath(abs_img_path, out_dir)
|
276
287
|
|
277
288
|
if box.label == "figure":
|
278
|
-
|
289
|
+
figure_md = f"\n"
|
290
|
+
figure_html = f'<img src="{rel}" alt="Figure — page {page_num}" />'
|
291
|
+
md_lines.append(figure_md)
|
292
|
+
html_lines.append(figure_html)
|
293
|
+
page_content[page_num].append(figure_md)
|
279
294
|
if figures_bar: figures_bar.update(1)
|
280
295
|
|
281
296
|
elif box.label == "chart":
|
@@ -285,18 +300,35 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
285
300
|
chart = self.vlm.extract_chart(abs_img_path)
|
286
301
|
item = to_structured_dict(chart)
|
287
302
|
if item:
|
303
|
+
# Add page and type information to structured item
|
304
|
+
item["page"] = page_num
|
305
|
+
item["type"] = "Chart"
|
288
306
|
structured_items.append(item)
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
307
|
+
|
308
|
+
# Generate both markdown and HTML tables
|
309
|
+
table_md = render_markdown_table(item.get("headers"), item.get("rows"),
|
310
|
+
title=item.get("title"))
|
311
|
+
table_html = render_html_table(item.get("headers"), item.get("rows"),
|
312
|
+
title=item.get("title"))
|
313
|
+
|
314
|
+
md_lines.append(table_md)
|
315
|
+
html_lines.append(table_html)
|
316
|
+
page_content[page_num].append(table_md)
|
293
317
|
wrote_table = True
|
294
318
|
except Exception as e:
|
295
319
|
pass
|
296
320
|
if not wrote_table:
|
297
|
-
|
321
|
+
chart_md = f"\n"
|
322
|
+
chart_html = f'<img src="{rel}" alt="Chart — page {page_num}" />'
|
323
|
+
md_lines.append(chart_md)
|
324
|
+
html_lines.append(chart_html)
|
325
|
+
page_content[page_num].append(chart_md)
|
298
326
|
else:
|
299
|
-
|
327
|
+
chart_md = f"\n"
|
328
|
+
chart_html = f'<img src="{rel}" alt="Chart — page {page_num}" />'
|
329
|
+
md_lines.append(chart_md)
|
330
|
+
html_lines.append(chart_html)
|
331
|
+
page_content[page_num].append(chart_md)
|
300
332
|
if charts_bar: charts_bar.update(1)
|
301
333
|
|
302
334
|
elif box.label == "table":
|
@@ -306,27 +338,64 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
306
338
|
table = self.vlm.extract_table(abs_img_path)
|
307
339
|
item = to_structured_dict(table)
|
308
340
|
if item:
|
341
|
+
# Add page and type information to structured item
|
342
|
+
item["page"] = page_num
|
343
|
+
item["type"] = "Table"
|
309
344
|
structured_items.append(item)
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
345
|
+
|
346
|
+
# Generate both markdown and HTML tables
|
347
|
+
table_md = render_markdown_table(item.get("headers"), item.get("rows"),
|
348
|
+
title=item.get("title"))
|
349
|
+
table_html = render_html_table(item.get("headers"), item.get("rows"),
|
350
|
+
title=item.get("title"))
|
351
|
+
|
352
|
+
md_lines.append(table_md)
|
353
|
+
html_lines.append(table_html)
|
354
|
+
page_content[page_num].append(table_md)
|
314
355
|
wrote_table = True
|
315
356
|
except Exception as e:
|
316
357
|
pass
|
317
358
|
if not wrote_table:
|
318
|
-
|
359
|
+
table_md = f"\n"
|
360
|
+
table_html = f'<img src="{rel}" alt="Table — page {page_num}" />'
|
361
|
+
md_lines.append(table_md)
|
362
|
+
html_lines.append(table_html)
|
363
|
+
page_content[page_num].append(table_md)
|
319
364
|
else:
|
320
|
-
|
365
|
+
table_md = f"\n"
|
366
|
+
table_html = f'<img src="{rel}" alt="Table — page {page_num}" />'
|
367
|
+
md_lines.append(table_md)
|
368
|
+
html_lines.append(table_html)
|
369
|
+
page_content[page_num].append(table_md)
|
321
370
|
if tables_bar: tables_bar.update(1)
|
322
371
|
else:
|
323
372
|
text = ocr_box_text(self.ocr_engine, page_img, box)
|
324
373
|
if text:
|
325
374
|
md_lines.append(text)
|
326
375
|
md_lines.append(self.box_separator if self.box_separator else "")
|
376
|
+
# Convert text to HTML (basic conversion)
|
377
|
+
html_text = text.replace('\n', '<br>')
|
378
|
+
html_lines.append(f"<p>{html_text}</p>")
|
379
|
+
if self.box_separator:
|
380
|
+
html_lines.append("<br>")
|
381
|
+
page_content[page_num].append(text)
|
382
|
+
page_content[page_num].append(self.box_separator if self.box_separator else "")
|
327
383
|
|
328
384
|
md_path = write_markdown(md_lines, out_dir)
|
329
|
-
|
385
|
+
|
386
|
+
# Use HTML lines if VLM is enabled for better table formatting
|
387
|
+
if self.use_vlm and html_lines:
|
388
|
+
html_path = write_html_from_lines(html_lines, out_dir)
|
389
|
+
else:
|
390
|
+
html_path = write_html(md_lines, out_dir)
|
391
|
+
|
392
|
+
# Create pages folder and save individual page markdown files
|
393
|
+
pages_dir = os.path.join(out_dir, "pages")
|
394
|
+
os.makedirs(pages_dir, exist_ok=True)
|
395
|
+
|
396
|
+
for page_num, content_lines in page_content.items():
|
397
|
+
page_md_path = os.path.join(pages_dir, f"page_{page_num:03d}.md")
|
398
|
+
write_markdown(content_lines, os.path.dirname(page_md_path), os.path.basename(page_md_path))
|
330
399
|
|
331
400
|
excel_path = None
|
332
401
|
html_structured_path = None
|
@@ -339,6 +408,30 @@ class EnhancedPDFParser(StructuredPDFParser):
|
|
339
408
|
print(f"✅ Enhanced parsing completed successfully!")
|
340
409
|
print(f"📁 Output directory: {out_dir}")
|
341
410
|
|
411
|
+
def _create_enhanced_pdf_from_pages(self, enhanced_pages: List[Image.Image], output_path: str) -> None:
|
412
|
+
"""
|
413
|
+
Create an enhanced PDF from already processed enhanced pages.
|
414
|
+
|
415
|
+
:param enhanced_pages: List of enhanced PIL images
|
416
|
+
:param output_path: Path for the enhanced PDF
|
417
|
+
"""
|
418
|
+
if not enhanced_pages:
|
419
|
+
raise ValueError("No enhanced pages provided")
|
420
|
+
|
421
|
+
try:
|
422
|
+
# Create enhanced PDF from the processed pages
|
423
|
+
enhanced_pages[0].save(
|
424
|
+
output_path,
|
425
|
+
"PDF",
|
426
|
+
resolution=100.0,
|
427
|
+
save_all=True,
|
428
|
+
append_images=enhanced_pages[1:] if len(enhanced_pages) > 1 else []
|
429
|
+
)
|
430
|
+
print(f"✅ Enhanced PDF saved from processed pages: {output_path}")
|
431
|
+
except Exception as e:
|
432
|
+
print(f"❌ Error creating enhanced PDF from pages: {e}")
|
433
|
+
raise
|
434
|
+
|
342
435
|
def restore_pdf_only(self, pdf_path: str, output_path: str = None, task: str = None) -> str:
|
343
436
|
"""
|
344
437
|
Apply DocRes restoration to a PDF without parsing.
|
@@ -20,7 +20,7 @@ from doctra.exporters.excel_writer import write_structured_excel
|
|
20
20
|
from doctra.utils.structured_utils import to_structured_dict
|
21
21
|
from doctra.exporters.markdown_table import render_markdown_table
|
22
22
|
from doctra.exporters.markdown_writer import write_markdown
|
23
|
-
from doctra.exporters.html_writer import write_html, write_structured_html
|
23
|
+
from doctra.exporters.html_writer import write_html, write_structured_html, render_html_table, write_html_from_lines
|
24
24
|
from doctra.utils.progress import create_beautiful_progress_bar, create_multi_progress_bars, create_notebook_friendly_bar
|
25
25
|
|
26
26
|
|
@@ -117,6 +117,7 @@ class StructuredPDFParser:
|
|
117
117
|
table_count = sum(sum(1 for b in p.boxes if b.label == "table") for p in pages)
|
118
118
|
|
119
119
|
md_lines: List[str] = ["# Extracted Content\n"]
|
120
|
+
html_lines: List[str] = ["<h1>Extracted Content</h1>"] # For direct HTML generation
|
120
121
|
structured_items: List[Dict[str, Any]] = []
|
121
122
|
|
122
123
|
charts_desc = "Charts (VLM → table)" if self.use_vlm else "Charts (cropped)"
|
@@ -145,6 +146,7 @@ class StructuredPDFParser:
|
|
145
146
|
page_num = p.page_index
|
146
147
|
page_img: Image.Image = pil_pages[page_num - 1]
|
147
148
|
md_lines.append(f"\n## Page {page_num}\n")
|
149
|
+
html_lines.append(f"<h2>Page {page_num}</h2>")
|
148
150
|
|
149
151
|
for i, box in enumerate(sorted(p.boxes, key=reading_order_key), start=1):
|
150
152
|
if box.label in EXCLUDE_LABELS:
|
@@ -153,7 +155,10 @@ class StructuredPDFParser:
|
|
153
155
|
rel = os.path.relpath(abs_img_path, out_dir)
|
154
156
|
|
155
157
|
if box.label == "figure":
|
156
|
-
|
158
|
+
figure_md = f"\n"
|
159
|
+
figure_html = f'<img src="{rel}" alt="Figure — page {page_num}" />'
|
160
|
+
md_lines.append(figure_md)
|
161
|
+
html_lines.append(figure_html)
|
157
162
|
if figures_bar: figures_bar.update(1)
|
158
163
|
|
159
164
|
elif box.label == "chart":
|
@@ -163,18 +168,32 @@ class StructuredPDFParser:
|
|
163
168
|
chart = self.vlm.extract_chart(abs_img_path)
|
164
169
|
item = to_structured_dict(chart)
|
165
170
|
if item:
|
171
|
+
# Add page and type information to structured item
|
172
|
+
item["page"] = page_num
|
173
|
+
item["type"] = "Chart"
|
166
174
|
structured_items.append(item)
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
175
|
+
|
176
|
+
# Generate both markdown and HTML tables
|
177
|
+
table_md = render_markdown_table(item.get("headers"), item.get("rows"),
|
178
|
+
title=item.get("title"))
|
179
|
+
table_html = render_html_table(item.get("headers"), item.get("rows"),
|
180
|
+
title=item.get("title"))
|
181
|
+
|
182
|
+
md_lines.append(table_md)
|
183
|
+
html_lines.append(table_html)
|
171
184
|
wrote_table = True
|
172
185
|
except Exception as e:
|
173
186
|
pass
|
174
187
|
if not wrote_table:
|
175
|
-
|
188
|
+
chart_md = f"\n"
|
189
|
+
chart_html = f'<img src="{rel}" alt="Chart — page {page_num}" />'
|
190
|
+
md_lines.append(chart_md)
|
191
|
+
html_lines.append(chart_html)
|
176
192
|
else:
|
177
|
-
|
193
|
+
chart_md = f"\n"
|
194
|
+
chart_html = f'<img src="{rel}" alt="Chart — page {page_num}" />'
|
195
|
+
md_lines.append(chart_md)
|
196
|
+
html_lines.append(chart_html)
|
178
197
|
if charts_bar: charts_bar.update(1)
|
179
198
|
|
180
199
|
elif box.label == "table":
|
@@ -184,27 +203,51 @@ class StructuredPDFParser:
|
|
184
203
|
table = self.vlm.extract_table(abs_img_path)
|
185
204
|
item = to_structured_dict(table)
|
186
205
|
if item:
|
206
|
+
# Add page and type information to structured item
|
207
|
+
item["page"] = page_num
|
208
|
+
item["type"] = "Table"
|
187
209
|
structured_items.append(item)
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
210
|
+
|
211
|
+
# Generate both markdown and HTML tables
|
212
|
+
table_md = render_markdown_table(item.get("headers"), item.get("rows"),
|
213
|
+
title=item.get("title"))
|
214
|
+
table_html = render_html_table(item.get("headers"), item.get("rows"),
|
215
|
+
title=item.get("title"))
|
216
|
+
|
217
|
+
md_lines.append(table_md)
|
218
|
+
html_lines.append(table_html)
|
192
219
|
wrote_table = True
|
193
220
|
except Exception as e:
|
194
221
|
pass
|
195
222
|
if not wrote_table:
|
196
|
-
|
223
|
+
table_md = f"\n"
|
224
|
+
table_html = f'<img src="{rel}" alt="Table — page {page_num}" />'
|
225
|
+
md_lines.append(table_md)
|
226
|
+
html_lines.append(table_html)
|
197
227
|
else:
|
198
|
-
|
228
|
+
table_md = f"\n"
|
229
|
+
table_html = f'<img src="{rel}" alt="Table — page {page_num}" />'
|
230
|
+
md_lines.append(table_md)
|
231
|
+
html_lines.append(table_html)
|
199
232
|
if tables_bar: tables_bar.update(1)
|
200
233
|
else:
|
201
234
|
text = ocr_box_text(self.ocr_engine, page_img, box)
|
202
235
|
if text:
|
203
236
|
md_lines.append(text)
|
204
237
|
md_lines.append(self.box_separator if self.box_separator else "")
|
238
|
+
# Convert text to HTML (basic conversion)
|
239
|
+
html_text = text.replace('\n', '<br>')
|
240
|
+
html_lines.append(f"<p>{html_text}</p>")
|
241
|
+
if self.box_separator:
|
242
|
+
html_lines.append("<br>")
|
205
243
|
|
206
244
|
md_path = write_markdown(md_lines, out_dir)
|
207
|
-
|
245
|
+
|
246
|
+
# Use HTML lines if VLM is enabled for better table formatting
|
247
|
+
if self.use_vlm and html_lines:
|
248
|
+
html_path = write_html_from_lines(html_lines, out_dir)
|
249
|
+
else:
|
250
|
+
html_path = write_html(md_lines, out_dir)
|
208
251
|
|
209
252
|
excel_path = None
|
210
253
|
html_structured_path = None
|