doctra 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/utils/progress.py CHANGED
@@ -2,11 +2,140 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import sys
5
- from typing import Optional, Dict, Any
5
+ from typing import Optional, Dict, Any, Iterable, Iterator, Tuple
6
6
  from tqdm import tqdm
7
7
  from tqdm.auto import tqdm as tqdm_auto
8
8
 
9
9
 
10
+ class ProgressConfig:
11
+ """
12
+ Central configuration for progress behavior, overridable via environment.
13
+
14
+ Env vars:
15
+ - DOCTRA_PROGRESS_DISABLE: "1" to disable progress entirely
16
+ - DOCTRA_PROGRESS_ASCII: "1" to force ASCII bars
17
+ - DOCTRA_PROGRESS_EMOJI: "0" to disable emoji prefixing
18
+ - DOCTRA_PROGRESS_NCOLS: integer width for bars
19
+ - DOCTRA_PROGRESS_EMOJI_MODE: one of {default, safe, ascii, none}
20
+ """
21
+
22
+ def __init__(self) -> None:
23
+ self.disable: bool = os.getenv("DOCTRA_PROGRESS_DISABLE", "0") == "1"
24
+ self.force_ascii: bool = os.getenv("DOCTRA_PROGRESS_ASCII", "0") == "1"
25
+ self.use_emoji: bool = os.getenv("DOCTRA_PROGRESS_EMOJI", "1") == "1"
26
+ self.ncols_env: Optional[int] = None
27
+ self.emoji_mode: str = os.getenv("DOCTRA_PROGRESS_EMOJI_MODE", "default").lower()
28
+ try:
29
+ ncols_val = os.getenv("DOCTRA_PROGRESS_NCOLS")
30
+ self.ncols_env = int(ncols_val) if ncols_val else None
31
+ except Exception:
32
+ self.ncols_env = None
33
+
34
+
35
+ _PROGRESS_CONFIG = ProgressConfig()
36
+
37
+
38
+ def _detect_environment() -> Tuple[bool, bool, bool]:
39
+ """
40
+ Returns (is_notebook, is_tty, is_windows).
41
+ """
42
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
43
+ # Colab/Kaggle specifics
44
+ if "google.colab" in sys.modules:
45
+ is_notebook = True
46
+ if "kaggle_secrets" in sys.modules or "kaggle_web_client" in sys.modules:
47
+ is_notebook = True
48
+ is_tty = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
49
+ is_windows = sys.platform.startswith("win")
50
+ return is_notebook, is_tty, is_windows
51
+
52
+
53
+ def _select_emoji(key: str) -> str:
54
+ """
55
+ Choose an emoji/symbol for a given key according to env and config.
56
+ Modes:
57
+ - default: rich emoji
58
+ - safe: single-codepoint symbols with stable width
59
+ - ascii: ASCII text tokens
60
+ - none: empty prefix
61
+ """
62
+ # Maps
63
+ default_map = {
64
+ "loading": "🔄",
65
+ "charts": "📊",
66
+ "tables": "📋",
67
+ "figures": "🖼️",
68
+ "ocr": "🔍",
69
+ "vlm": "🤖",
70
+ "processing": "⚙️",
71
+ }
72
+ safe_map = {
73
+ # Use BMP or geometric shapes likely to render everywhere
74
+ "loading": "⏳",
75
+ "charts": "▦",
76
+ "tables": "▤",
77
+ "figures": "▧",
78
+ "ocr": "🔎",
79
+ "vlm": "★",
80
+ "processing": "⚙", # no variation selector
81
+ }
82
+ ascii_map = {
83
+ "loading": "[loading]",
84
+ "charts": "[charts]",
85
+ "tables": "[tables]",
86
+ "figures": "[figures]",
87
+ "ocr": "[ocr]",
88
+ "vlm": "[vlm]",
89
+ "processing": "[processing]",
90
+ }
91
+
92
+ # Determine effective mode
93
+ mode = _PROGRESS_CONFIG.emoji_mode
94
+ is_notebook, _, is_windows = _detect_environment()
95
+ if not _PROGRESS_CONFIG.use_emoji:
96
+ mode = "none"
97
+ elif mode == "default":
98
+ # Heuristics: prefer safe in Colab/Kaggle notebooks and Windows terminals
99
+ if is_windows or "google.colab" in sys.modules or "kaggle_secrets" in sys.modules:
100
+ mode = "safe"
101
+
102
+ if mode == "none":
103
+ return ""
104
+ if mode == "ascii":
105
+ return ascii_map.get(key, "")
106
+ if mode == "safe":
107
+ return safe_map.get(key, safe_map["processing"])
108
+ # default
109
+ return default_map.get(key, default_map["processing"])
110
+
111
+
112
+ def _supports_unicode_output() -> bool:
113
+ """Best-effort detection whether stdout likely supports Unicode/emoji."""
114
+ try:
115
+ enc = getattr(sys.stdout, "encoding", None) or ""
116
+ enc_lower = enc.lower()
117
+ if "utf" in enc_lower:
118
+ return True
119
+ except Exception:
120
+ pass
121
+
122
+ # Heuristics for common notebook environments that support emoji
123
+ env = os.environ
124
+ if any(k in env for k in ("COLAB_GPU", "GCE_METADATA_HOST", "KAGGLE_KERNEL_RUN_TYPE", "JPY_PARENT_PID")):
125
+ return True
126
+
127
+ # On modern Windows terminals with UTF-8 code page, assume yes
128
+ if sys.platform.startswith("win"):
129
+ # If user opted-in to force ASCII, respect it
130
+ if _PROGRESS_CONFIG.force_ascii:
131
+ return False
132
+ # Try to detect WT/Terminal/VSCode which usually handle Unicode
133
+ if any(k in env for k in ("WT_SESSION", "TERM_PROGRAM", "VSCODE_PID")):
134
+ return True
135
+
136
+ return False
137
+
138
+
10
139
  def create_beautiful_progress_bar(
11
140
  total: int,
12
141
  desc: str,
@@ -33,7 +162,8 @@ def create_beautiful_progress_bar(
33
162
  """
34
163
 
35
164
  # Enhanced styling parameters - notebook-friendly format
36
- if "ipykernel" in sys.modules:
165
+ is_notebook, is_tty, is_windows = _detect_environment()
166
+ if is_notebook:
37
167
  # Simpler format for notebooks to avoid display issues
38
168
  bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
39
169
  else:
@@ -71,24 +201,15 @@ def create_beautiful_progress_bar(
71
201
  else:
72
202
  color_scheme = color_schemes["processing"]
73
203
 
74
- # Add emoji icons to descriptions
75
- emoji_map = {
76
- "loading": "🔄",
77
- "charts": "📊",
78
- "tables": "📋",
79
- "figures": "🖼️",
80
- "ocr": "🔍",
81
- "vlm": "🤖",
82
- "processing": "⚙️",
83
- }
204
+ # Emoji categories
205
+ emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
84
206
 
85
- # Add appropriate emoji to description
86
- for key, emoji in emoji_map.items():
87
- if key in desc_lower:
88
- desc = f"{emoji} {desc}"
89
- break
90
- else:
91
- desc = f"⚙️ {desc}"
207
+ # Add appropriate emoji to description (can be disabled)
208
+ if _PROGRESS_CONFIG.use_emoji:
209
+ prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
210
+ prefix = _select_emoji(prefix_key)
211
+ if prefix:
212
+ desc = f"{prefix} {desc}"
92
213
 
93
214
  # Enhanced tqdm configuration
94
215
  tqdm_config = {
@@ -96,8 +217,9 @@ def create_beautiful_progress_bar(
96
217
  "desc": desc,
97
218
  "leave": leave,
98
219
  "bar_format": bar_format,
99
- "ncols": color_scheme["ncols"],
100
- "ascii": False, # Use Unicode characters for better appearance
220
+ "ncols": _PROGRESS_CONFIG.ncols_env or color_scheme["ncols"],
221
+ # Prefer Unicode unless user forces ASCII or environment lacks Unicode support
222
+ "ascii": _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output(),
101
223
  "dynamic_ncols": True, # Responsive width
102
224
  "smoothing": 0.3, # Smooth progress updates
103
225
  "mininterval": 0.1, # Minimum update interval
@@ -107,21 +229,30 @@ def create_beautiful_progress_bar(
107
229
  }
108
230
 
109
231
  # Enhanced environment detection
110
- is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
111
- is_terminal = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
232
+ is_notebook, is_terminal, is_windows = _detect_environment()
112
233
 
113
234
  # Add color only for terminal environments (not notebooks)
114
235
  if not is_notebook and is_terminal:
115
236
  tqdm_config["colour"] = color_scheme["colour"]
116
237
 
117
- # Use auto tqdm for better Jupyter notebook support
238
+ # Respect global disable
239
+ if _PROGRESS_CONFIG.disable:
240
+ tqdm_config["disable"] = True
241
+
242
+ # Try creating the progress bar with Unicode, fallback to ASCII on failure (e.g., Windows code page)
118
243
  if is_notebook:
119
- # In notebooks, don't use color to avoid ANSI code issues
120
- tqdm_config.pop("colour", None) # Remove color in notebooks
121
- return tqdm_auto(**tqdm_config)
244
+ tqdm_config.pop("colour", None)
245
+ try:
246
+ return tqdm_auto(**tqdm_config)
247
+ except Exception:
248
+ tqdm_config["ascii"] = True
249
+ return tqdm_auto(**tqdm_config)
122
250
  else:
123
- # In terminal/cmd/powershell, we can use colors
124
- return tqdm(**tqdm_config)
251
+ try:
252
+ return tqdm(**tqdm_config)
253
+ except Exception:
254
+ tqdm_config["ascii"] = True
255
+ return tqdm(**tqdm_config)
125
256
 
126
257
 
127
258
  def create_multi_progress_bars(
@@ -234,28 +365,24 @@ def create_notebook_friendly_bar(
234
365
  :return: Configured notebook-friendly progress bar
235
366
  """
236
367
  # Force notebook mode
237
- kwargs["disable"] = False
238
- kwargs["ascii"] = True # Use ASCII characters for better notebook compatibility
368
+ if _PROGRESS_CONFIG.disable:
369
+ kwargs["disable"] = True
370
+ else:
371
+ kwargs["disable"] = False
372
+ # Prefer Unicode in notebooks if supported
373
+ if "ascii" not in kwargs:
374
+ kwargs["ascii"] = _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output()
239
375
 
240
- # Add emoji icons to descriptions (same as beautiful bars)
241
- emoji_map = {
242
- "loading": "🔄",
243
- "charts": "📊",
244
- "tables": "📋",
245
- "figures": "🖼️",
246
- "ocr": "🔍",
247
- "vlm": "🤖",
248
- "processing": "⚙️",
249
- }
376
+ # Emoji categories
377
+ emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
250
378
 
251
379
  # Add appropriate emoji to description
252
380
  desc_lower = desc.lower()
253
- for key, emoji in emoji_map.items():
254
- if key in desc_lower:
255
- desc = f"{emoji} {desc}"
256
- break
257
- else:
258
- desc = f"⚙️ {desc}"
381
+ if _PROGRESS_CONFIG.use_emoji:
382
+ prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
383
+ prefix = _select_emoji(prefix_key)
384
+ if prefix:
385
+ desc = f"{prefix} {desc}"
259
386
 
260
387
  # Simple format for notebooks
261
388
  bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}"
@@ -265,8 +392,8 @@ def create_notebook_friendly_bar(
265
392
  "desc": desc,
266
393
  "leave": True,
267
394
  "bar_format": bar_format,
268
- "ncols": 80,
269
- "ascii": True,
395
+ "ncols": _PROGRESS_CONFIG.ncols_env or 80,
396
+ "ascii": kwargs.get("ascii", False),
270
397
  "dynamic_ncols": False, # Fixed width for notebooks
271
398
  "smoothing": 0.1, # Faster updates
272
399
  "mininterval": 0.05,
@@ -275,3 +402,27 @@ def create_notebook_friendly_bar(
275
402
  }
276
403
 
277
404
  return tqdm_auto(**tqdm_config)
405
+
406
+
407
+ def progress_for(iterable: Iterable[Any], desc: str, total: Optional[int] = None, leave: bool = True, **kwargs) -> Iterator[Any]:
408
+ """
409
+ Wrap an iterable with a configured progress bar.
410
+ Respects env config and auto-detects notebook vs terminal.
411
+ """
412
+ if _PROGRESS_CONFIG.disable:
413
+ for item in iterable:
414
+ yield item
415
+ return
416
+
417
+ is_notebook, _, _ = _detect_environment()
418
+ bar_factory = create_notebook_friendly_bar if is_notebook else create_beautiful_progress_bar
419
+ with bar_factory(total=total if total is not None else 0, desc=desc, leave=leave, **kwargs) as bar:
420
+ if total is None:
421
+ # Unknown total: manual increments
422
+ for item in iterable:
423
+ yield item
424
+ bar.update(1)
425
+ else:
426
+ for item in iterable:
427
+ yield item
428
+ bar.update(1)
@@ -1,49 +1,49 @@
1
- from __future__ import annotations
2
- from typing import Any, Dict, Optional
3
- import json
4
-
5
- try:
6
- from pydantic import BaseModel # type: ignore
7
- except Exception: # pydantic not strictly required for normalization
8
- class BaseModel: # fallback stub
9
- pass
10
-
11
- def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
12
- """
13
- Accepts a VLM result that might be:
14
- - JSON string
15
- - dict
16
- - Pydantic BaseModel (v1 .dict() or v2 .model_dump())
17
- Returns a normalized dict with keys: title, headers, rows — or None.
18
- """
19
- if obj is None:
20
- return None
21
-
22
- # JSON string from VLM
23
- if isinstance(obj, str):
24
- try:
25
- obj = json.loads(obj)
26
- except Exception:
27
- return None
28
-
29
- # Pydantic model
30
- if isinstance(obj, BaseModel):
31
- try:
32
- return obj.model_dump() # pydantic v2
33
- except Exception:
34
- try:
35
- return obj.dict() # pydantic v1
36
- except Exception:
37
- return None
38
-
39
- # Plain dict
40
- if isinstance(obj, dict):
41
- title = obj.get("title") or "Untitled"
42
- headers = obj.get("headers") or []
43
- rows = obj.get("rows") or []
44
- # Basic shape checks
45
- if not isinstance(headers, list) or not isinstance(rows, list):
46
- return None
47
- return {"title": title, "headers": headers, "rows": rows}
48
-
49
- return None
1
+ from __future__ import annotations
2
+ from typing import Any, Dict, Optional
3
+ import json
4
+
5
+ try:
6
+ from pydantic import BaseModel # type: ignore
7
+ except Exception: # pydantic not strictly required for normalization
8
+ class BaseModel: # fallback stub
9
+ pass
10
+
11
+ def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
12
+ """
13
+ Accepts a VLM result that might be:
14
+ - JSON string
15
+ - dict
16
+ - Pydantic BaseModel (v1 .dict() or v2 .model_dump())
17
+ Returns a normalized dict with keys: title, headers, rows — or None.
18
+ """
19
+ if obj is None:
20
+ return None
21
+
22
+ # JSON string from VLM
23
+ if isinstance(obj, str):
24
+ try:
25
+ obj = json.loads(obj)
26
+ except Exception:
27
+ return None
28
+
29
+ # Pydantic model
30
+ if isinstance(obj, BaseModel):
31
+ try:
32
+ return obj.model_dump() # pydantic v2
33
+ except Exception:
34
+ try:
35
+ return obj.dict() # pydantic v1
36
+ except Exception:
37
+ return None
38
+
39
+ # Plain dict
40
+ if isinstance(obj, dict):
41
+ title = obj.get("title") or "Untitled"
42
+ headers = obj.get("headers") or []
43
+ rows = obj.get("rows") or []
44
+ # Basic shape checks
45
+ if not isinstance(headers, list) or not isinstance(rows, list):
46
+ return None
47
+ return {"title": title, "headers": headers, "rows": rows}
48
+
49
+ return None
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.2.0'
2
+ __version__ = '0.3.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -234,6 +234,9 @@ Requires-Dist: opencv-python>=4.5.0
234
234
  Requires-Dist: pandas>=1.3.0
235
235
  Requires-Dist: openpyxl>=3.0.0
236
236
  Requires-Dist: tesseract>=0.1.3
237
+ Requires-Dist: pytesseract>=0.3.10
238
+ Requires-Dist: pdf2image>=1.16.0
239
+ Requires-Dist: anthropic>=0.40.0
237
240
  Requires-Dist: outlines>=0.0.34
238
241
  Requires-Dist: tqdm>=4.62.0
239
242
  Requires-Dist: matplotlib>=3.5.0
@@ -241,8 +244,6 @@ Provides-Extra: openai
241
244
  Requires-Dist: openai>=1.0.0; extra == "openai"
242
245
  Provides-Extra: gemini
243
246
  Requires-Dist: google-generativeai>=0.3.0; extra == "gemini"
244
- Provides-Extra: anthropic
245
- Requires-Dist: anthropic>=0.40.0; extra == "anthropic"
246
247
  Provides-Extra: dev
247
248
  Requires-Dist: pytest>=6.0; extra == "dev"
248
249
  Requires-Dist: pytest-cov>=2.0; extra == "dev"
@@ -295,6 +296,31 @@ cd Doctra
295
296
  pip install .
296
297
  ```
297
298
 
299
+ ### System Dependencies
300
+
301
+ Doctra requires **Poppler** for PDF processing. Install it based on your operating system:
302
+
303
+ #### Ubuntu/Debian
304
+ ```bash
305
+ sudo apt install poppler-utils
306
+ ```
307
+
308
+ #### macOS
309
+ ```bash
310
+ brew install poppler
311
+ ```
312
+
313
+ #### Windows
314
+ Download and install from [Poppler for Windows](http://blog.alivate.com.au/poppler-windows/) or use conda:
315
+ ```bash
316
+ conda install -c conda-forge poppler
317
+ ```
318
+
319
+ #### Google Colab
320
+ ```bash
321
+ !sudo apt install poppler-utils
322
+ ```
323
+
298
324
  ## ⚡ Quick Start
299
325
 
300
326
  ```python
@@ -626,3 +652,40 @@ parser.display_pages_with_boxes("document.pdf")
626
652
  - **OpenPyXL**: Excel file generation
627
653
  - **Google Generative AI**: For Gemini VLM integration
628
654
  - **OpenAI**: For GPT-5 VLM integration
655
+
656
+ ## 🖥️ Web Interface (Gradio)
657
+
658
+ You can try Doctra in a simple web UI powered by Gradio.
659
+
660
+ ### Run locally
661
+
662
+ ```bash
663
+ pip install -U gradio
664
+ python gradio_app.py
665
+ ```
666
+
667
+ Then open the printed URL (default `http://127.0.0.1:7860`).
668
+
669
+ Notes:
670
+ - If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
671
+ - Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
672
+
673
+ ### Deploy on Hugging Face Spaces
674
+
675
+ 1) Create a new Space (type: Gradio, SDK: Python).
676
+
677
+ 2) Add these files to the Space repo:
678
+ - Your package code (or install from PyPI).
679
+ - `gradio_app.py` (entry point).
680
+ - `requirements.txt` with at least:
681
+
682
+ ```text
683
+ doctra
684
+ gradio
685
+ ```
686
+
687
+ 3) Set a secret named `VLM_API_KEY` if you want VLM features.
688
+
689
+ 4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
690
+
691
+ The Space will build and expose the same interface for uploads and processing.
@@ -1,29 +1,32 @@
1
- doctra/__init__.py,sha256=-Pkx0Vh4Hz3EQvLaxlL6Mo4lVig59FTN5LvUcxThn4U,519
2
- doctra/version.py,sha256=oXtS5MRUB2QfE2Q8GOIq0p_iwA9QH5_2LxFqVKJlb_I,60
1
+ doctra/__init__.py,sha256=ST_c2GWBoB0y_wpL1qsOeK4bR1RyJhMMn6I5VjVRI6Y,613
2
+ doctra/version.py,sha256=BDWZqR8pRPnlsqLDR4Kx91MC6A9OwylJHhHemdaa6DQ,60
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
- doctra/cli/main.py,sha256=aRxV0yMtswwXKcBrIE7rxMvZCsFSjCVrE5rIqKzYGOY,35368
4
+ doctra/cli/main.py,sha256=o_W1b5kx3xaTbWK6l4IYi0YLwffKBj5pQKflnlaG2Fw,35611
5
5
  doctra/cli/utils.py,sha256=IghiUZQCOmXODC5-5smHGz2KeV4xqbP4avmA1Mggln0,11800
6
6
  doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  doctra/engines/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  doctra/engines/layout/layout_models.py,sha256=vuTzjWd3FD-SkFPngktmUVhOJ6Xvff6ufwFEq796PQs,3162
9
- doctra/engines/layout/paddle_layout.py,sha256=N9Bzt6372BfWUtQspYqh6PpYWOndjoIYET0_OJU85cs,9405
9
+ doctra/engines/layout/paddle_layout.py,sha256=P2-Gk8wHpWoA5Jpmo_3OLI59zWq3HeAOBOUKKVdXu8I,6792
10
10
  doctra/engines/ocr/__init__.py,sha256=h6bFiveGXdI59fsKzCqOXki3C74DCndEmvloOtMqnR0,133
11
11
  doctra/engines/ocr/api.py,sha256=YOBKDLExXpvSiOsc_TDJasaMPxzdVx1llQCtYlsruWo,1280
12
12
  doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4G7S3pA,1492
13
13
  doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
14
14
  doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  doctra/engines/vlm/outlines_types.py,sha256=qL-G6MNiA5mxp1qAPVEFhOANp4NqVt_MQKseJCr_xXE,970
16
- doctra/engines/vlm/provider.py,sha256=njkz99NXZQjkPlRKeje9M_tlaktXyw3VnpFT7enNalk,3216
17
- doctra/engines/vlm/service.py,sha256=uD4BXz3u7B_3iq-xU3MTdDDyjrj1Jm8MDeJU1KXHTZc,5121
16
+ doctra/engines/vlm/provider.py,sha256=aE8Eo1U-8XqAimakNlT0-T4etIyCV8rZ3DwxdqbFeTc,3131
17
+ doctra/engines/vlm/service.py,sha256=Jwws2Jw68-IdHyvEWks4UCoP7Olhqt8IpXfCv5Z7Ml4,4724
18
18
  doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  doctra/exporters/excel_writer.py,sha256=U5Eb5SF7_ll1QveUapSWSkCRt3OEoisKEVUQ_7X8Wjo,7762
20
+ doctra/exporters/html_writer.py,sha256=OlW24Eg5bZcjldRHtd3GDD7RrajuRXj43EJpXIJkYf8,38810
20
21
  doctra/exporters/image_saver.py,sha256=zsPoQ0CwoE643ui4iZMdXk96kv5mU8L_zC2JfF22N1A,1639
21
22
  doctra/exporters/markdown_table.py,sha256=4_OJIwG_WoIPYBzJx1njy_3tNVdkK6QKSP-P9r-b0zw,2030
22
23
  doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN9tdfuw,1027
23
24
  doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
24
25
  doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
25
- doctra/parsers/structured_pdf_parser.py,sha256=4T4zYZWbqqtRua_TPSRmjT1tOc1RE-XSMOLC5fVFJk0,21070
26
- doctra/parsers/table_chart_extractor.py,sha256=-pyJFYzFVdxDwCD1z5BHhT6qyf9BljbfccKGoMJiD90,13591
26
+ doctra/parsers/structured_pdf_parser.py,sha256=fbDIQ6VFv1phFPC3lKgcjtCp0AdNA8Ny1dK0F726Pww,21357
27
+ doctra/parsers/table_chart_extractor.py,sha256=JuoScqCQbPdQjy4ak77OcZHSPYKGHF4H39fEW6gF3eo,15323
28
+ doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
29
+ doctra/ui/app.py,sha256=FYDlEG_2pfp7SSHnA04NRNUhOcI-BJPh3qAf5dw5D6g,45903
27
30
  doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
31
  doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
29
32
  doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
@@ -31,11 +34,11 @@ doctra/utils/file_ops.py,sha256=3IS0EQncs6Kaj27fcg2zxQX3xRSvtItIsyKGLYgeOgw,815
31
34
  doctra/utils/io_utils.py,sha256=L1bWV4-ybs2j_3ZEN7GfQVgdC73JKVECVnpwKbP0dy0,219
32
35
  doctra/utils/ocr_utils.py,sha256=Doa1uYBg3kRgRYd2aPq9fICHgHfrM_efdhZfI7jl6OM,780
33
36
  doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
34
- doctra/utils/progress.py,sha256=GSjHkNulwqX-Uh_QNP-g-nZH6F-zAwQC120KeTRkRlo,8752
37
+ doctra/utils/progress.py,sha256=sNEjTdN32J1-eXFPqwZRw2EZQ1SXSesXBd5StJvtlmc,14481
35
38
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
36
- doctra/utils/structured_utils.py,sha256=EdNhCUDLKvYcLqXbTGveNtIRGyQ3yzYhTh-zy_awwM4,1450
37
- doctra-0.2.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
38
- doctra-0.2.0.dist-info/METADATA,sha256=zxVmrkHUI4puc1D8fdUFbRb8WQAL0M4X92v-UaZswPI,26862
39
- doctra-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- doctra-0.2.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
41
- doctra-0.2.0.dist-info/RECORD,,
39
+ doctra/utils/structured_utils.py,sha256=J-qTqo8eCjm36FaRJ_I482LFgYCpm3eukZm-gbNnchw,1401
40
+ doctra-0.3.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
41
+ doctra-0.3.1.dist-info/METADATA,sha256=2-2aMiNRvofe2WYuYejI6NqSkVctiH5SLK-EX4nIjaE,28298
42
+ doctra-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ doctra-0.3.1.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
44
+ doctra-0.3.1.dist-info/RECORD,,
File without changes