doctra 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctra/__init__.py +21 -18
- doctra/cli/main.py +3 -0
- doctra/engines/layout/paddle_layout.py +11 -77
- doctra/engines/vlm/provider.py +85 -85
- doctra/engines/vlm/service.py +6 -13
- doctra/exporters/html_writer.py +1235 -0
- doctra/parsers/structured_pdf_parser.py +12 -7
- doctra/parsers/table_chart_extractor.py +47 -22
- doctra/ui/__init__.py +5 -0
- doctra/ui/app.py +1012 -0
- doctra/utils/progress.py +200 -49
- doctra/utils/structured_utils.py +49 -49
- doctra/version.py +1 -1
- {doctra-0.2.0.dist-info → doctra-0.3.0.dist-info}/METADATA +38 -1
- {doctra-0.2.0.dist-info → doctra-0.3.0.dist-info}/RECORD +18 -15
- {doctra-0.2.0.dist-info → doctra-0.3.0.dist-info}/WHEEL +0 -0
- {doctra-0.2.0.dist-info → doctra-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {doctra-0.2.0.dist-info → doctra-0.3.0.dist-info}/top_level.txt +0 -0
doctra/utils/progress.py
CHANGED
@@ -2,11 +2,140 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
import sys
|
5
|
-
from typing import Optional, Dict, Any
|
5
|
+
from typing import Optional, Dict, Any, Iterable, Iterator, Tuple
|
6
6
|
from tqdm import tqdm
|
7
7
|
from tqdm.auto import tqdm as tqdm_auto
|
8
8
|
|
9
9
|
|
10
|
+
class ProgressConfig:
|
11
|
+
"""
|
12
|
+
Central configuration for progress behavior, overridable via environment.
|
13
|
+
|
14
|
+
Env vars:
|
15
|
+
- DOCTRA_PROGRESS_DISABLE: "1" to disable progress entirely
|
16
|
+
- DOCTRA_PROGRESS_ASCII: "1" to force ASCII bars
|
17
|
+
- DOCTRA_PROGRESS_EMOJI: "0" to disable emoji prefixing
|
18
|
+
- DOCTRA_PROGRESS_NCOLS: integer width for bars
|
19
|
+
- DOCTRA_PROGRESS_EMOJI_MODE: one of {default, safe, ascii, none}
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self) -> None:
|
23
|
+
self.disable: bool = os.getenv("DOCTRA_PROGRESS_DISABLE", "0") == "1"
|
24
|
+
self.force_ascii: bool = os.getenv("DOCTRA_PROGRESS_ASCII", "0") == "1"
|
25
|
+
self.use_emoji: bool = os.getenv("DOCTRA_PROGRESS_EMOJI", "1") == "1"
|
26
|
+
self.ncols_env: Optional[int] = None
|
27
|
+
self.emoji_mode: str = os.getenv("DOCTRA_PROGRESS_EMOJI_MODE", "default").lower()
|
28
|
+
try:
|
29
|
+
ncols_val = os.getenv("DOCTRA_PROGRESS_NCOLS")
|
30
|
+
self.ncols_env = int(ncols_val) if ncols_val else None
|
31
|
+
except Exception:
|
32
|
+
self.ncols_env = None
|
33
|
+
|
34
|
+
|
35
|
+
_PROGRESS_CONFIG = ProgressConfig()
|
36
|
+
|
37
|
+
|
38
|
+
def _detect_environment() -> Tuple[bool, bool, bool]:
|
39
|
+
"""
|
40
|
+
Returns (is_notebook, is_tty, is_windows).
|
41
|
+
"""
|
42
|
+
is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
|
43
|
+
# Colab/Kaggle specifics
|
44
|
+
if "google.colab" in sys.modules:
|
45
|
+
is_notebook = True
|
46
|
+
if "kaggle_secrets" in sys.modules or "kaggle_web_client" in sys.modules:
|
47
|
+
is_notebook = True
|
48
|
+
is_tty = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
|
49
|
+
is_windows = sys.platform.startswith("win")
|
50
|
+
return is_notebook, is_tty, is_windows
|
51
|
+
|
52
|
+
|
53
|
+
def _select_emoji(key: str) -> str:
|
54
|
+
"""
|
55
|
+
Choose an emoji/symbol for a given key according to env and config.
|
56
|
+
Modes:
|
57
|
+
- default: rich emoji
|
58
|
+
- safe: single-codepoint symbols with stable width
|
59
|
+
- ascii: ASCII text tokens
|
60
|
+
- none: empty prefix
|
61
|
+
"""
|
62
|
+
# Maps
|
63
|
+
default_map = {
|
64
|
+
"loading": "🔄",
|
65
|
+
"charts": "📊",
|
66
|
+
"tables": "📋",
|
67
|
+
"figures": "🖼️",
|
68
|
+
"ocr": "🔍",
|
69
|
+
"vlm": "🤖",
|
70
|
+
"processing": "⚙️",
|
71
|
+
}
|
72
|
+
safe_map = {
|
73
|
+
# Use BMP or geometric shapes likely to render everywhere
|
74
|
+
"loading": "⏳",
|
75
|
+
"charts": "▦",
|
76
|
+
"tables": "▤",
|
77
|
+
"figures": "▧",
|
78
|
+
"ocr": "🔎",
|
79
|
+
"vlm": "★",
|
80
|
+
"processing": "⚙", # no variation selector
|
81
|
+
}
|
82
|
+
ascii_map = {
|
83
|
+
"loading": "[loading]",
|
84
|
+
"charts": "[charts]",
|
85
|
+
"tables": "[tables]",
|
86
|
+
"figures": "[figures]",
|
87
|
+
"ocr": "[ocr]",
|
88
|
+
"vlm": "[vlm]",
|
89
|
+
"processing": "[processing]",
|
90
|
+
}
|
91
|
+
|
92
|
+
# Determine effective mode
|
93
|
+
mode = _PROGRESS_CONFIG.emoji_mode
|
94
|
+
is_notebook, _, is_windows = _detect_environment()
|
95
|
+
if not _PROGRESS_CONFIG.use_emoji:
|
96
|
+
mode = "none"
|
97
|
+
elif mode == "default":
|
98
|
+
# Heuristics: prefer safe in Colab/Kaggle notebooks and Windows terminals
|
99
|
+
if is_windows or "google.colab" in sys.modules or "kaggle_secrets" in sys.modules:
|
100
|
+
mode = "safe"
|
101
|
+
|
102
|
+
if mode == "none":
|
103
|
+
return ""
|
104
|
+
if mode == "ascii":
|
105
|
+
return ascii_map.get(key, "")
|
106
|
+
if mode == "safe":
|
107
|
+
return safe_map.get(key, safe_map["processing"])
|
108
|
+
# default
|
109
|
+
return default_map.get(key, default_map["processing"])
|
110
|
+
|
111
|
+
|
112
|
+
def _supports_unicode_output() -> bool:
|
113
|
+
"""Best-effort detection whether stdout likely supports Unicode/emoji."""
|
114
|
+
try:
|
115
|
+
enc = getattr(sys.stdout, "encoding", None) or ""
|
116
|
+
enc_lower = enc.lower()
|
117
|
+
if "utf" in enc_lower:
|
118
|
+
return True
|
119
|
+
except Exception:
|
120
|
+
pass
|
121
|
+
|
122
|
+
# Heuristics for common notebook environments that support emoji
|
123
|
+
env = os.environ
|
124
|
+
if any(k in env for k in ("COLAB_GPU", "GCE_METADATA_HOST", "KAGGLE_KERNEL_RUN_TYPE", "JPY_PARENT_PID")):
|
125
|
+
return True
|
126
|
+
|
127
|
+
# On modern Windows terminals with UTF-8 code page, assume yes
|
128
|
+
if sys.platform.startswith("win"):
|
129
|
+
# If user opted-in to force ASCII, respect it
|
130
|
+
if _PROGRESS_CONFIG.force_ascii:
|
131
|
+
return False
|
132
|
+
# Try to detect WT/Terminal/VSCode which usually handle Unicode
|
133
|
+
if any(k in env for k in ("WT_SESSION", "TERM_PROGRAM", "VSCODE_PID")):
|
134
|
+
return True
|
135
|
+
|
136
|
+
return False
|
137
|
+
|
138
|
+
|
10
139
|
def create_beautiful_progress_bar(
|
11
140
|
total: int,
|
12
141
|
desc: str,
|
@@ -33,7 +162,8 @@ def create_beautiful_progress_bar(
|
|
33
162
|
"""
|
34
163
|
|
35
164
|
# Enhanced styling parameters - notebook-friendly format
|
36
|
-
|
165
|
+
is_notebook, is_tty, is_windows = _detect_environment()
|
166
|
+
if is_notebook:
|
37
167
|
# Simpler format for notebooks to avoid display issues
|
38
168
|
bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
|
39
169
|
else:
|
@@ -71,24 +201,15 @@ def create_beautiful_progress_bar(
|
|
71
201
|
else:
|
72
202
|
color_scheme = color_schemes["processing"]
|
73
203
|
|
74
|
-
#
|
75
|
-
|
76
|
-
"loading": "🔄",
|
77
|
-
"charts": "📊",
|
78
|
-
"tables": "📋",
|
79
|
-
"figures": "🖼️",
|
80
|
-
"ocr": "🔍",
|
81
|
-
"vlm": "🤖",
|
82
|
-
"processing": "⚙️",
|
83
|
-
}
|
204
|
+
# Emoji categories
|
205
|
+
emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
|
84
206
|
|
85
|
-
# Add appropriate emoji to description
|
86
|
-
|
87
|
-
if
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
desc = f"⚙️ {desc}"
|
207
|
+
# Add appropriate emoji to description (can be disabled)
|
208
|
+
if _PROGRESS_CONFIG.use_emoji:
|
209
|
+
prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
|
210
|
+
prefix = _select_emoji(prefix_key)
|
211
|
+
if prefix:
|
212
|
+
desc = f"{prefix} {desc}"
|
92
213
|
|
93
214
|
# Enhanced tqdm configuration
|
94
215
|
tqdm_config = {
|
@@ -96,8 +217,9 @@ def create_beautiful_progress_bar(
|
|
96
217
|
"desc": desc,
|
97
218
|
"leave": leave,
|
98
219
|
"bar_format": bar_format,
|
99
|
-
"ncols": color_scheme["ncols"],
|
100
|
-
|
220
|
+
"ncols": _PROGRESS_CONFIG.ncols_env or color_scheme["ncols"],
|
221
|
+
# Prefer Unicode unless user forces ASCII or environment lacks Unicode support
|
222
|
+
"ascii": _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output(),
|
101
223
|
"dynamic_ncols": True, # Responsive width
|
102
224
|
"smoothing": 0.3, # Smooth progress updates
|
103
225
|
"mininterval": 0.1, # Minimum update interval
|
@@ -107,21 +229,30 @@ def create_beautiful_progress_bar(
|
|
107
229
|
}
|
108
230
|
|
109
231
|
# Enhanced environment detection
|
110
|
-
is_notebook
|
111
|
-
is_terminal = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
232
|
+
is_notebook, is_terminal, is_windows = _detect_environment()
|
112
233
|
|
113
234
|
# Add color only for terminal environments (not notebooks)
|
114
235
|
if not is_notebook and is_terminal:
|
115
236
|
tqdm_config["colour"] = color_scheme["colour"]
|
116
237
|
|
117
|
-
#
|
238
|
+
# Respect global disable
|
239
|
+
if _PROGRESS_CONFIG.disable:
|
240
|
+
tqdm_config["disable"] = True
|
241
|
+
|
242
|
+
# Try creating the progress bar with Unicode, fallback to ASCII on failure (e.g., Windows code page)
|
118
243
|
if is_notebook:
|
119
|
-
|
120
|
-
|
121
|
-
|
244
|
+
tqdm_config.pop("colour", None)
|
245
|
+
try:
|
246
|
+
return tqdm_auto(**tqdm_config)
|
247
|
+
except Exception:
|
248
|
+
tqdm_config["ascii"] = True
|
249
|
+
return tqdm_auto(**tqdm_config)
|
122
250
|
else:
|
123
|
-
|
124
|
-
|
251
|
+
try:
|
252
|
+
return tqdm(**tqdm_config)
|
253
|
+
except Exception:
|
254
|
+
tqdm_config["ascii"] = True
|
255
|
+
return tqdm(**tqdm_config)
|
125
256
|
|
126
257
|
|
127
258
|
def create_multi_progress_bars(
|
@@ -234,28 +365,24 @@ def create_notebook_friendly_bar(
|
|
234
365
|
:return: Configured notebook-friendly progress bar
|
235
366
|
"""
|
236
367
|
# Force notebook mode
|
237
|
-
|
238
|
-
|
368
|
+
if _PROGRESS_CONFIG.disable:
|
369
|
+
kwargs["disable"] = True
|
370
|
+
else:
|
371
|
+
kwargs["disable"] = False
|
372
|
+
# Prefer Unicode in notebooks if supported
|
373
|
+
if "ascii" not in kwargs:
|
374
|
+
kwargs["ascii"] = _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output()
|
239
375
|
|
240
|
-
#
|
241
|
-
|
242
|
-
"loading": "🔄",
|
243
|
-
"charts": "📊",
|
244
|
-
"tables": "📋",
|
245
|
-
"figures": "🖼️",
|
246
|
-
"ocr": "🔍",
|
247
|
-
"vlm": "🤖",
|
248
|
-
"processing": "⚙️",
|
249
|
-
}
|
376
|
+
# Emoji categories
|
377
|
+
emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
|
250
378
|
|
251
379
|
# Add appropriate emoji to description
|
252
380
|
desc_lower = desc.lower()
|
253
|
-
|
254
|
-
if
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
desc = f"⚙️ {desc}"
|
381
|
+
if _PROGRESS_CONFIG.use_emoji:
|
382
|
+
prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
|
383
|
+
prefix = _select_emoji(prefix_key)
|
384
|
+
if prefix:
|
385
|
+
desc = f"{prefix} {desc}"
|
259
386
|
|
260
387
|
# Simple format for notebooks
|
261
388
|
bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}"
|
@@ -265,8 +392,8 @@ def create_notebook_friendly_bar(
|
|
265
392
|
"desc": desc,
|
266
393
|
"leave": True,
|
267
394
|
"bar_format": bar_format,
|
268
|
-
"ncols": 80,
|
269
|
-
"ascii":
|
395
|
+
"ncols": _PROGRESS_CONFIG.ncols_env or 80,
|
396
|
+
"ascii": kwargs.get("ascii", False),
|
270
397
|
"dynamic_ncols": False, # Fixed width for notebooks
|
271
398
|
"smoothing": 0.1, # Faster updates
|
272
399
|
"mininterval": 0.05,
|
@@ -275,3 +402,27 @@ def create_notebook_friendly_bar(
|
|
275
402
|
}
|
276
403
|
|
277
404
|
return tqdm_auto(**tqdm_config)
|
405
|
+
|
406
|
+
|
407
|
+
def progress_for(iterable: Iterable[Any], desc: str, total: Optional[int] = None, leave: bool = True, **kwargs) -> Iterator[Any]:
|
408
|
+
"""
|
409
|
+
Wrap an iterable with a configured progress bar.
|
410
|
+
Respects env config and auto-detects notebook vs terminal.
|
411
|
+
"""
|
412
|
+
if _PROGRESS_CONFIG.disable:
|
413
|
+
for item in iterable:
|
414
|
+
yield item
|
415
|
+
return
|
416
|
+
|
417
|
+
is_notebook, _, _ = _detect_environment()
|
418
|
+
bar_factory = create_notebook_friendly_bar if is_notebook else create_beautiful_progress_bar
|
419
|
+
with bar_factory(total=total if total is not None else 0, desc=desc, leave=leave, **kwargs) as bar:
|
420
|
+
if total is None:
|
421
|
+
# Unknown total: manual increments
|
422
|
+
for item in iterable:
|
423
|
+
yield item
|
424
|
+
bar.update(1)
|
425
|
+
else:
|
426
|
+
for item in iterable:
|
427
|
+
yield item
|
428
|
+
bar.update(1)
|
doctra/utils/structured_utils.py
CHANGED
@@ -1,49 +1,49 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
from typing import Any, Dict, Optional
|
3
|
-
import json
|
4
|
-
|
5
|
-
try:
|
6
|
-
from pydantic import BaseModel # type: ignore
|
7
|
-
except Exception: # pydantic not strictly required for normalization
|
8
|
-
class BaseModel: # fallback stub
|
9
|
-
pass
|
10
|
-
|
11
|
-
def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
|
12
|
-
"""
|
13
|
-
Accepts a VLM result that might be:
|
14
|
-
- JSON string
|
15
|
-
- dict
|
16
|
-
- Pydantic BaseModel (v1 .dict() or v2 .model_dump())
|
17
|
-
Returns a normalized dict with keys: title, headers, rows — or None.
|
18
|
-
"""
|
19
|
-
if obj is None:
|
20
|
-
return None
|
21
|
-
|
22
|
-
# JSON string from VLM
|
23
|
-
if isinstance(obj, str):
|
24
|
-
try:
|
25
|
-
obj = json.loads(obj)
|
26
|
-
except Exception:
|
27
|
-
return None
|
28
|
-
|
29
|
-
# Pydantic model
|
30
|
-
if isinstance(obj, BaseModel):
|
31
|
-
try:
|
32
|
-
return obj.model_dump() # pydantic v2
|
33
|
-
except Exception:
|
34
|
-
try:
|
35
|
-
return obj.dict() # pydantic v1
|
36
|
-
except Exception:
|
37
|
-
return None
|
38
|
-
|
39
|
-
# Plain dict
|
40
|
-
if isinstance(obj, dict):
|
41
|
-
title = obj.get("title") or "Untitled"
|
42
|
-
headers = obj.get("headers") or []
|
43
|
-
rows = obj.get("rows") or []
|
44
|
-
# Basic shape checks
|
45
|
-
if not isinstance(headers, list) or not isinstance(rows, list):
|
46
|
-
return None
|
47
|
-
return {"title": title, "headers": headers, "rows": rows}
|
48
|
-
|
49
|
-
return None
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any, Dict, Optional
|
3
|
+
import json
|
4
|
+
|
5
|
+
try:
|
6
|
+
from pydantic import BaseModel # type: ignore
|
7
|
+
except Exception: # pydantic not strictly required for normalization
|
8
|
+
class BaseModel: # fallback stub
|
9
|
+
pass
|
10
|
+
|
11
|
+
def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
|
12
|
+
"""
|
13
|
+
Accepts a VLM result that might be:
|
14
|
+
- JSON string
|
15
|
+
- dict
|
16
|
+
- Pydantic BaseModel (v1 .dict() or v2 .model_dump())
|
17
|
+
Returns a normalized dict with keys: title, headers, rows — or None.
|
18
|
+
"""
|
19
|
+
if obj is None:
|
20
|
+
return None
|
21
|
+
|
22
|
+
# JSON string from VLM
|
23
|
+
if isinstance(obj, str):
|
24
|
+
try:
|
25
|
+
obj = json.loads(obj)
|
26
|
+
except Exception:
|
27
|
+
return None
|
28
|
+
|
29
|
+
# Pydantic model
|
30
|
+
if isinstance(obj, BaseModel):
|
31
|
+
try:
|
32
|
+
return obj.model_dump() # pydantic v2
|
33
|
+
except Exception:
|
34
|
+
try:
|
35
|
+
return obj.dict() # pydantic v1
|
36
|
+
except Exception:
|
37
|
+
return None
|
38
|
+
|
39
|
+
# Plain dict
|
40
|
+
if isinstance(obj, dict):
|
41
|
+
title = obj.get("title") or "Untitled"
|
42
|
+
headers = obj.get("headers") or []
|
43
|
+
rows = obj.get("rows") or []
|
44
|
+
# Basic shape checks
|
45
|
+
if not isinstance(headers, list) or not isinstance(rows, list):
|
46
|
+
return None
|
47
|
+
return {"title": title, "headers": headers, "rows": rows}
|
48
|
+
|
49
|
+
return None
|
doctra/version.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
"""Version information for Doctra."""
|
2
|
-
__version__ = '0.
|
2
|
+
__version__ = '0.3.0'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: doctra
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Parse, extract, and analyze documents with ease
|
5
5
|
Home-page: https://github.com/AdemBoukhris457/Doctra
|
6
6
|
Author: Adem Boukhris
|
@@ -626,3 +626,40 @@ parser.display_pages_with_boxes("document.pdf")
|
|
626
626
|
- **OpenPyXL**: Excel file generation
|
627
627
|
- **Google Generative AI**: For Gemini VLM integration
|
628
628
|
- **OpenAI**: For GPT-5 VLM integration
|
629
|
+
|
630
|
+
## 🖥️ Web Interface (Gradio)
|
631
|
+
|
632
|
+
You can try Doctra in a simple web UI powered by Gradio.
|
633
|
+
|
634
|
+
### Run locally
|
635
|
+
|
636
|
+
```bash
|
637
|
+
pip install -U gradio
|
638
|
+
python gradio_app.py
|
639
|
+
```
|
640
|
+
|
641
|
+
Then open the printed URL (default `http://127.0.0.1:7860`).
|
642
|
+
|
643
|
+
Notes:
|
644
|
+
- If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
|
645
|
+
- Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
|
646
|
+
|
647
|
+
### Deploy on Hugging Face Spaces
|
648
|
+
|
649
|
+
1) Create a new Space (type: Gradio, SDK: Python).
|
650
|
+
|
651
|
+
2) Add these files to the Space repo:
|
652
|
+
- Your package code (or install from PyPI).
|
653
|
+
- `gradio_app.py` (entry point).
|
654
|
+
- `requirements.txt` with at least:
|
655
|
+
|
656
|
+
```text
|
657
|
+
doctra
|
658
|
+
gradio
|
659
|
+
```
|
660
|
+
|
661
|
+
3) Set a secret named `VLM_API_KEY` if you want VLM features.
|
662
|
+
|
663
|
+
4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
|
664
|
+
|
665
|
+
The Space will build and expose the same interface for uploads and processing.
|
@@ -1,29 +1,32 @@
|
|
1
|
-
doctra/__init__.py,sha256
|
2
|
-
doctra/version.py,sha256=
|
1
|
+
doctra/__init__.py,sha256=ST_c2GWBoB0y_wpL1qsOeK4bR1RyJhMMn6I5VjVRI6Y,613
|
2
|
+
doctra/version.py,sha256=hnuLMAgAv9rqQndLE3xdEZsa3vwZ4eZ2RVbRJjlJu8Y,60
|
3
3
|
doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
|
4
|
-
doctra/cli/main.py,sha256=
|
4
|
+
doctra/cli/main.py,sha256=o_W1b5kx3xaTbWK6l4IYi0YLwffKBj5pQKflnlaG2Fw,35611
|
5
5
|
doctra/cli/utils.py,sha256=IghiUZQCOmXODC5-5smHGz2KeV4xqbP4avmA1Mggln0,11800
|
6
6
|
doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
doctra/engines/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
doctra/engines/layout/layout_models.py,sha256=vuTzjWd3FD-SkFPngktmUVhOJ6Xvff6ufwFEq796PQs,3162
|
9
|
-
doctra/engines/layout/paddle_layout.py,sha256=
|
9
|
+
doctra/engines/layout/paddle_layout.py,sha256=P2-Gk8wHpWoA5Jpmo_3OLI59zWq3HeAOBOUKKVdXu8I,6792
|
10
10
|
doctra/engines/ocr/__init__.py,sha256=h6bFiveGXdI59fsKzCqOXki3C74DCndEmvloOtMqnR0,133
|
11
11
|
doctra/engines/ocr/api.py,sha256=YOBKDLExXpvSiOsc_TDJasaMPxzdVx1llQCtYlsruWo,1280
|
12
12
|
doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4G7S3pA,1492
|
13
13
|
doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
|
14
14
|
doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
doctra/engines/vlm/outlines_types.py,sha256=qL-G6MNiA5mxp1qAPVEFhOANp4NqVt_MQKseJCr_xXE,970
|
16
|
-
doctra/engines/vlm/provider.py,sha256=
|
17
|
-
doctra/engines/vlm/service.py,sha256=
|
16
|
+
doctra/engines/vlm/provider.py,sha256=aE8Eo1U-8XqAimakNlT0-T4etIyCV8rZ3DwxdqbFeTc,3131
|
17
|
+
doctra/engines/vlm/service.py,sha256=Jwws2Jw68-IdHyvEWks4UCoP7Olhqt8IpXfCv5Z7Ml4,4724
|
18
18
|
doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
doctra/exporters/excel_writer.py,sha256=U5Eb5SF7_ll1QveUapSWSkCRt3OEoisKEVUQ_7X8Wjo,7762
|
20
|
+
doctra/exporters/html_writer.py,sha256=OlW24Eg5bZcjldRHtd3GDD7RrajuRXj43EJpXIJkYf8,38810
|
20
21
|
doctra/exporters/image_saver.py,sha256=zsPoQ0CwoE643ui4iZMdXk96kv5mU8L_zC2JfF22N1A,1639
|
21
22
|
doctra/exporters/markdown_table.py,sha256=4_OJIwG_WoIPYBzJx1njy_3tNVdkK6QKSP-P9r-b0zw,2030
|
22
23
|
doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN9tdfuw,1027
|
23
24
|
doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
|
24
25
|
doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
|
25
|
-
doctra/parsers/structured_pdf_parser.py,sha256=
|
26
|
-
doctra/parsers/table_chart_extractor.py,sha256
|
26
|
+
doctra/parsers/structured_pdf_parser.py,sha256=fbDIQ6VFv1phFPC3lKgcjtCp0AdNA8Ny1dK0F726Pww,21357
|
27
|
+
doctra/parsers/table_chart_extractor.py,sha256=JuoScqCQbPdQjy4ak77OcZHSPYKGHF4H39fEW6gF3eo,15323
|
28
|
+
doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
|
29
|
+
doctra/ui/app.py,sha256=FYDlEG_2pfp7SSHnA04NRNUhOcI-BJPh3qAf5dw5D6g,45903
|
27
30
|
doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
31
|
doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
|
29
32
|
doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
|
@@ -31,11 +34,11 @@ doctra/utils/file_ops.py,sha256=3IS0EQncs6Kaj27fcg2zxQX3xRSvtItIsyKGLYgeOgw,815
|
|
31
34
|
doctra/utils/io_utils.py,sha256=L1bWV4-ybs2j_3ZEN7GfQVgdC73JKVECVnpwKbP0dy0,219
|
32
35
|
doctra/utils/ocr_utils.py,sha256=Doa1uYBg3kRgRYd2aPq9fICHgHfrM_efdhZfI7jl6OM,780
|
33
36
|
doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
|
34
|
-
doctra/utils/progress.py,sha256=
|
37
|
+
doctra/utils/progress.py,sha256=sNEjTdN32J1-eXFPqwZRw2EZQ1SXSesXBd5StJvtlmc,14481
|
35
38
|
doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
|
36
|
-
doctra/utils/structured_utils.py,sha256=
|
37
|
-
doctra-0.
|
38
|
-
doctra-0.
|
39
|
-
doctra-0.
|
40
|
-
doctra-0.
|
41
|
-
doctra-0.
|
39
|
+
doctra/utils/structured_utils.py,sha256=J-qTqo8eCjm36FaRJ_I482LFgYCpm3eukZm-gbNnchw,1401
|
40
|
+
doctra-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
41
|
+
doctra-0.3.0.dist-info/METADATA,sha256=tdfVsN0nDj_WcpptBvJvWF2tzdgp_0SfeeYya7oTqgU,27794
|
42
|
+
doctra-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
43
|
+
doctra-0.3.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
|
44
|
+
doctra-0.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|