doctra 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,428 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from typing import Optional, Dict, Any, Iterable, Iterator, Tuple
6
+ from tqdm import tqdm
7
+ from tqdm.auto import tqdm as tqdm_auto
8
+
9
+
10
+ class ProgressConfig:
11
+ """
12
+ Central configuration for progress behavior, overridable via environment.
13
+
14
+ Env vars:
15
+ - DOCTRA_PROGRESS_DISABLE: "1" to disable progress entirely
16
+ - DOCTRA_PROGRESS_ASCII: "1" to force ASCII bars
17
+ - DOCTRA_PROGRESS_EMOJI: "0" to disable emoji prefixing
18
+ - DOCTRA_PROGRESS_NCOLS: integer width for bars
19
+ - DOCTRA_PROGRESS_EMOJI_MODE: one of {default, safe, ascii, none}
20
+ """
21
+
22
+ def __init__(self) -> None:
23
+ self.disable: bool = os.getenv("DOCTRA_PROGRESS_DISABLE", "0") == "1"
24
+ self.force_ascii: bool = os.getenv("DOCTRA_PROGRESS_ASCII", "0") == "1"
25
+ self.use_emoji: bool = os.getenv("DOCTRA_PROGRESS_EMOJI", "1") == "1"
26
+ self.ncols_env: Optional[int] = None
27
+ self.emoji_mode: str = os.getenv("DOCTRA_PROGRESS_EMOJI_MODE", "default").lower()
28
+ try:
29
+ ncols_val = os.getenv("DOCTRA_PROGRESS_NCOLS")
30
+ self.ncols_env = int(ncols_val) if ncols_val else None
31
+ except Exception:
32
+ self.ncols_env = None
33
+
34
+
35
+ _PROGRESS_CONFIG = ProgressConfig()
36
+
37
+
38
+ def _detect_environment() -> Tuple[bool, bool, bool]:
39
+ """
40
+ Returns (is_notebook, is_tty, is_windows).
41
+ """
42
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
43
+ # Colab/Kaggle specifics
44
+ if "google.colab" in sys.modules:
45
+ is_notebook = True
46
+ if "kaggle_secrets" in sys.modules or "kaggle_web_client" in sys.modules:
47
+ is_notebook = True
48
+ is_tty = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
49
+ is_windows = sys.platform.startswith("win")
50
+ return is_notebook, is_tty, is_windows
51
+
52
+
53
+ def _select_emoji(key: str) -> str:
54
+ """
55
+ Choose an emoji/symbol for a given key according to env and config.
56
+ Modes:
57
+ - default: rich emoji
58
+ - safe: single-codepoint symbols with stable width
59
+ - ascii: ASCII text tokens
60
+ - none: empty prefix
61
+ """
62
+ # Maps
63
+ default_map = {
64
+ "loading": "🔄",
65
+ "charts": "📊",
66
+ "tables": "📋",
67
+ "figures": "🖼️",
68
+ "ocr": "🔍",
69
+ "vlm": "🤖",
70
+ "processing": "⚙️",
71
+ }
72
+ safe_map = {
73
+ # Use BMP or geometric shapes likely to render everywhere
74
+ "loading": "⏳",
75
+ "charts": "▦",
76
+ "tables": "▤",
77
+ "figures": "▧",
78
+ "ocr": "🔎",
79
+ "vlm": "★",
80
+ "processing": "⚙", # no variation selector
81
+ }
82
+ ascii_map = {
83
+ "loading": "[loading]",
84
+ "charts": "[charts]",
85
+ "tables": "[tables]",
86
+ "figures": "[figures]",
87
+ "ocr": "[ocr]",
88
+ "vlm": "[vlm]",
89
+ "processing": "[processing]",
90
+ }
91
+
92
+ # Determine effective mode
93
+ mode = _PROGRESS_CONFIG.emoji_mode
94
+ is_notebook, _, is_windows = _detect_environment()
95
+ if not _PROGRESS_CONFIG.use_emoji:
96
+ mode = "none"
97
+ elif mode == "default":
98
+ # Heuristics: prefer safe in Colab/Kaggle notebooks and Windows terminals
99
+ if is_windows or "google.colab" in sys.modules or "kaggle_secrets" in sys.modules:
100
+ mode = "safe"
101
+
102
+ if mode == "none":
103
+ return ""
104
+ if mode == "ascii":
105
+ return ascii_map.get(key, "")
106
+ if mode == "safe":
107
+ return safe_map.get(key, safe_map["processing"])
108
+ # default
109
+ return default_map.get(key, default_map["processing"])
110
+
111
+
112
+ def _supports_unicode_output() -> bool:
113
+ """Best-effort detection whether stdout likely supports Unicode/emoji."""
114
+ try:
115
+ enc = getattr(sys.stdout, "encoding", None) or ""
116
+ enc_lower = enc.lower()
117
+ if "utf" in enc_lower:
118
+ return True
119
+ except Exception:
120
+ pass
121
+
122
+ # Heuristics for common notebook environments that support emoji
123
+ env = os.environ
124
+ if any(k in env for k in ("COLAB_GPU", "GCE_METADATA_HOST", "KAGGLE_KERNEL_RUN_TYPE", "JPY_PARENT_PID")):
125
+ return True
126
+
127
+ # On modern Windows terminals with UTF-8 code page, assume yes
128
+ if sys.platform.startswith("win"):
129
+ # If user opted-in to force ASCII, respect it
130
+ if _PROGRESS_CONFIG.force_ascii:
131
+ return False
132
+ # Try to detect WT/Terminal/VSCode which usually handle Unicode
133
+ if any(k in env for k in ("WT_SESSION", "TERM_PROGRAM", "VSCODE_PID")):
134
+ return True
135
+
136
+ return False
137
+
138
+
139
+ def create_beautiful_progress_bar(
140
+ total: int,
141
+ desc: str,
142
+ leave: bool = True,
143
+ position: Optional[int] = None,
144
+ **kwargs
145
+ ) -> tqdm:
146
+ """
147
+ Create a beautiful and interactive tqdm progress bar with enhanced styling.
148
+
149
+ Features:
150
+ - Colorful progress bars with gradients
151
+ - Emoji icons for different operations
152
+ - Better formatting and spacing
153
+ - Interactive features
154
+ - Responsive design
155
+
156
+ :param total: Total number of items to process
157
+ :param desc: Description text for the progress bar
158
+ :param leave: Whether to leave the progress bar after completion
159
+ :param position: Position of the progress bar (for multiple bars)
160
+ :param kwargs: Additional tqdm parameters
161
+ :return: Configured tqdm progress bar instance
162
+ """
163
+
164
+ # Enhanced styling parameters - notebook-friendly format
165
+ is_notebook, is_tty, is_windows = _detect_environment()
166
+ if is_notebook:
167
+ # Simpler format for notebooks to avoid display issues
168
+ bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
169
+ else:
170
+ # Full format for terminal
171
+ bar_format = (
172
+ "{l_bar}{bar:30}| {n_fmt}/{total_fmt} "
173
+ "[{elapsed}<{remaining}, {rate_fmt}{postfix}]"
174
+ )
175
+
176
+ # Color schemes based on operation type
177
+ color_schemes = {
178
+ "loading": {"colour": "cyan", "ncols": 100},
179
+ "charts": {"colour": "green", "ncols": 100},
180
+ "tables": {"colour": "blue", "ncols": 100},
181
+ "figures": {"colour": "magenta", "ncols": 100},
182
+ "ocr": {"colour": "yellow", "ncols": 100},
183
+ "vlm": {"colour": "red", "ncols": 100},
184
+ "processing": {"colour": "white", "ncols": 100},
185
+ }
186
+
187
+ # Determine color scheme based on description
188
+ desc_lower = desc.lower()
189
+ if "loading" in desc_lower or "model" in desc_lower:
190
+ color_scheme = color_schemes["loading"]
191
+ elif "chart" in desc_lower:
192
+ color_scheme = color_schemes["charts"]
193
+ elif "table" in desc_lower:
194
+ color_scheme = color_schemes["tables"]
195
+ elif "figure" in desc_lower:
196
+ color_scheme = color_schemes["figures"]
197
+ elif "ocr" in desc_lower:
198
+ color_scheme = color_schemes["ocr"]
199
+ elif "vlm" in desc_lower:
200
+ color_scheme = color_schemes["vlm"]
201
+ else:
202
+ color_scheme = color_schemes["processing"]
203
+
204
+ # Emoji categories
205
+ emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
206
+
207
+ # Add appropriate emoji to description (can be disabled)
208
+ if _PROGRESS_CONFIG.use_emoji:
209
+ prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
210
+ prefix = _select_emoji(prefix_key)
211
+ if prefix:
212
+ desc = f"{prefix} {desc}"
213
+
214
+ # Enhanced tqdm configuration
215
+ tqdm_config = {
216
+ "total": total,
217
+ "desc": desc,
218
+ "leave": leave,
219
+ "bar_format": bar_format,
220
+ "ncols": _PROGRESS_CONFIG.ncols_env or color_scheme["ncols"],
221
+ # Prefer Unicode unless user forces ASCII or environment lacks Unicode support
222
+ "ascii": _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output(),
223
+ "dynamic_ncols": True, # Responsive width
224
+ "smoothing": 0.3, # Smooth progress updates
225
+ "mininterval": 0.1, # Minimum update interval
226
+ "maxinterval": 1.0, # Maximum update interval
227
+ "position": position,
228
+ **kwargs
229
+ }
230
+
231
+ # Enhanced environment detection
232
+ is_notebook, is_terminal, is_windows = _detect_environment()
233
+
234
+ # Add color only for terminal environments (not notebooks)
235
+ if not is_notebook and is_terminal:
236
+ tqdm_config["colour"] = color_scheme["colour"]
237
+
238
+ # Respect global disable
239
+ if _PROGRESS_CONFIG.disable:
240
+ tqdm_config["disable"] = True
241
+
242
+ # Try creating the progress bar with Unicode, fallback to ASCII on failure (e.g., Windows code page)
243
+ if is_notebook:
244
+ tqdm_config.pop("colour", None)
245
+ try:
246
+ return tqdm_auto(**tqdm_config)
247
+ except Exception:
248
+ tqdm_config["ascii"] = True
249
+ return tqdm_auto(**tqdm_config)
250
+ else:
251
+ try:
252
+ return tqdm(**tqdm_config)
253
+ except Exception:
254
+ tqdm_config["ascii"] = True
255
+ return tqdm(**tqdm_config)
256
+
257
+
258
+ def create_multi_progress_bars(
259
+ descriptions: list[str],
260
+ totals: list[int],
261
+ positions: Optional[list[int]] = None
262
+ ) -> list[tqdm]:
263
+ """
264
+ Create multiple beautiful progress bars for concurrent operations.
265
+
266
+ :param descriptions: List of descriptions for each progress bar
267
+ :param totals: List of totals for each progress bar
268
+ :param positions: Optional list of positions for each bar
269
+ :return: List of configured tqdm progress bar instances
270
+ """
271
+ if positions is None:
272
+ positions = list(range(len(descriptions)))
273
+
274
+ bars = []
275
+ for desc, total, pos in zip(descriptions, totals, positions):
276
+ bar = create_beautiful_progress_bar(
277
+ total=total,
278
+ desc=desc,
279
+ position=pos,
280
+ leave=True
281
+ )
282
+ bars.append(bar)
283
+
284
+ return bars
285
+
286
+
287
+ def update_progress_with_info(
288
+ bar: tqdm,
289
+ increment: int = 1,
290
+ info: Optional[Dict[str, Any]] = None
291
+ ) -> None:
292
+ """
293
+ Update progress bar with additional information.
294
+
295
+ :param bar: tqdm progress bar instance
296
+ :param increment: Number to increment the progress
297
+ :param info: Optional dictionary of information to display
298
+ """
299
+ if info:
300
+ # Format info as postfix
301
+ postfix_parts = []
302
+ for key, value in info.items():
303
+ if isinstance(value, float):
304
+ postfix_parts.append(f"{key}: {value:.2f}")
305
+ else:
306
+ postfix_parts.append(f"{key}: {value}")
307
+
308
+ bar.set_postfix_str(", ".join(postfix_parts))
309
+
310
+ bar.update(increment)
311
+
312
+
313
+ def create_loading_bar(desc: str = "Loading", **kwargs) -> tqdm:
314
+ """
315
+ Create a special loading progress bar for model initialization.
316
+
317
+ :param desc: Description for the loading operation
318
+ :param kwargs: Additional tqdm parameters
319
+ :return: Configured loading progress bar
320
+ """
321
+ return create_beautiful_progress_bar(
322
+ total=1,
323
+ desc=desc,
324
+ leave=True,
325
+ **kwargs
326
+ )
327
+
328
+
329
+ def create_processing_bar(
330
+ total: int,
331
+ operation: str,
332
+ **kwargs
333
+ ) -> tqdm:
334
+ """
335
+ Create a processing progress bar for data operations.
336
+
337
+ :param total: Total number of items to process
338
+ :param operation: Type of operation (charts, tables, figures, etc.)
339
+ :param kwargs: Additional tqdm parameters
340
+ :return: Configured processing progress bar
341
+ """
342
+ desc = f"{operation.title()} (processing)"
343
+ return create_beautiful_progress_bar(
344
+ total=total,
345
+ desc=desc,
346
+ leave=True,
347
+ **kwargs
348
+ )
349
+
350
+
351
+ def create_notebook_friendly_bar(
352
+ total: int,
353
+ desc: str,
354
+ **kwargs
355
+ ) -> tqdm:
356
+ """
357
+ Create a notebook-friendly progress bar with minimal formatting.
358
+
359
+ This function creates progress bars specifically optimized for Jupyter notebooks
360
+ to avoid display issues and ANSI code problems.
361
+
362
+ :param total: Total number of items to process
363
+ :param desc: Description text for the progress bar
364
+ :param kwargs: Additional tqdm parameters
365
+ :return: Configured notebook-friendly progress bar
366
+ """
367
+ # Force notebook mode
368
+ if _PROGRESS_CONFIG.disable:
369
+ kwargs["disable"] = True
370
+ else:
371
+ kwargs["disable"] = False
372
+ # Prefer Unicode in notebooks if supported
373
+ if "ascii" not in kwargs:
374
+ kwargs["ascii"] = _PROGRESS_CONFIG.force_ascii or not _supports_unicode_output()
375
+
376
+ # Emoji categories
377
+ emoji_categories = {"loading", "charts", "tables", "figures", "ocr", "vlm", "processing"}
378
+
379
+ # Add appropriate emoji to description
380
+ desc_lower = desc.lower()
381
+ if _PROGRESS_CONFIG.use_emoji:
382
+ prefix_key = next((k for k in emoji_categories if k in desc_lower), "processing")
383
+ prefix = _select_emoji(prefix_key)
384
+ if prefix:
385
+ desc = f"{prefix} {desc}"
386
+
387
+ # Simple format for notebooks
388
+ bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}"
389
+
390
+ tqdm_config = {
391
+ "total": total,
392
+ "desc": desc,
393
+ "leave": True,
394
+ "bar_format": bar_format,
395
+ "ncols": _PROGRESS_CONFIG.ncols_env or 80,
396
+ "ascii": kwargs.get("ascii", False),
397
+ "dynamic_ncols": False, # Fixed width for notebooks
398
+ "smoothing": 0.1, # Faster updates
399
+ "mininterval": 0.05,
400
+ "maxinterval": 0.5,
401
+ **kwargs
402
+ }
403
+
404
+ return tqdm_auto(**tqdm_config)
405
+
406
+
407
+ def progress_for(iterable: Iterable[Any], desc: str, total: Optional[int] = None, leave: bool = True, **kwargs) -> Iterator[Any]:
408
+ """
409
+ Wrap an iterable with a configured progress bar.
410
+ Respects env config and auto-detects notebook vs terminal.
411
+ """
412
+ if _PROGRESS_CONFIG.disable:
413
+ for item in iterable:
414
+ yield item
415
+ return
416
+
417
+ is_notebook, _, _ = _detect_environment()
418
+ bar_factory = create_notebook_friendly_bar if is_notebook else create_beautiful_progress_bar
419
+ with bar_factory(total=total if total is not None else 0, desc=desc, leave=leave, **kwargs) as bar:
420
+ if total is None:
421
+ # Unknown total: manual increments
422
+ for item in iterable:
423
+ yield item
424
+ bar.update(1)
425
+ else:
426
+ for item in iterable:
427
+ yield item
428
+ bar.update(1)
@@ -1,49 +1,49 @@
1
- from __future__ import annotations
2
- from typing import Any, Dict, Optional
3
- import json
4
-
5
- try:
6
- from pydantic import BaseModel # type: ignore
7
- except Exception: # pydantic not strictly required for normalization
8
- class BaseModel: # fallback stub
9
- pass
10
-
11
- def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
12
- """
13
- Accepts a VLM result that might be:
14
- - JSON string
15
- - dict
16
- - Pydantic BaseModel (v1 .dict() or v2 .model_dump())
17
- Returns a normalized dict with keys: title, headers, rows — or None.
18
- """
19
- if obj is None:
20
- return None
21
-
22
- # JSON string from VLM
23
- if isinstance(obj, str):
24
- try:
25
- obj = json.loads(obj)
26
- except Exception:
27
- return None
28
-
29
- # Pydantic model
30
- if isinstance(obj, BaseModel):
31
- try:
32
- return obj.model_dump() # pydantic v2
33
- except Exception:
34
- try:
35
- return obj.dict() # pydantic v1
36
- except Exception:
37
- return None
38
-
39
- # Plain dict
40
- if isinstance(obj, dict):
41
- title = obj.get("title") or "Untitled"
42
- headers = obj.get("headers") or []
43
- rows = obj.get("rows") or []
44
- # Basic shape checks
45
- if not isinstance(headers, list) or not isinstance(rows, list):
46
- return None
47
- return {"title": title, "headers": headers, "rows": rows}
48
-
49
- return None
1
+ from __future__ import annotations
2
+ from typing import Any, Dict, Optional
3
+ import json
4
+
5
+ try:
6
+ from pydantic import BaseModel # type: ignore
7
+ except Exception: # pydantic not strictly required for normalization
8
+ class BaseModel: # fallback stub
9
+ pass
10
+
11
+ def to_structured_dict(obj: Any) -> Optional[Dict[str, Any]]:
12
+ """
13
+ Accepts a VLM result that might be:
14
+ - JSON string
15
+ - dict
16
+ - Pydantic BaseModel (v1 .dict() or v2 .model_dump())
17
+ Returns a normalized dict with keys: title, headers, rows — or None.
18
+ """
19
+ if obj is None:
20
+ return None
21
+
22
+ # JSON string from VLM
23
+ if isinstance(obj, str):
24
+ try:
25
+ obj = json.loads(obj)
26
+ except Exception:
27
+ return None
28
+
29
+ # Pydantic model
30
+ if isinstance(obj, BaseModel):
31
+ try:
32
+ return obj.model_dump() # pydantic v2
33
+ except Exception:
34
+ try:
35
+ return obj.dict() # pydantic v1
36
+ except Exception:
37
+ return None
38
+
39
+ # Plain dict
40
+ if isinstance(obj, dict):
41
+ title = obj.get("title") or "Untitled"
42
+ headers = obj.get("headers") or []
43
+ rows = obj.get("rows") or []
44
+ # Basic shape checks
45
+ if not isinstance(headers, list) or not isinstance(rows, list):
46
+ return None
47
+ return {"title": title, "headers": headers, "rows": rows}
48
+
49
+ return None
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.1.1'
2
+ __version__ = '0.3.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.1.1
3
+ Version: 0.3.0
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -241,6 +241,8 @@ Provides-Extra: openai
241
241
  Requires-Dist: openai>=1.0.0; extra == "openai"
242
242
  Provides-Extra: gemini
243
243
  Requires-Dist: google-generativeai>=0.3.0; extra == "gemini"
244
+ Provides-Extra: anthropic
245
+ Requires-Dist: anthropic>=0.40.0; extra == "anthropic"
244
246
  Provides-Extra: dev
245
247
  Requires-Dist: pytest>=6.0; extra == "dev"
246
248
  Requires-Dist: pytest-cov>=2.0; extra == "dev"
@@ -329,7 +331,7 @@ parser = StructuredPDFParser()
329
331
  # Parser with VLM for structured data extraction
330
332
  parser = StructuredPDFParser(
331
333
  use_vlm=True,
332
- vlm_provider="openai", # or "gemini"
334
+ vlm_provider="openai", # or "gemini" or "anthropic" or "openrouter"
333
335
  vlm_api_key="your_api_key_here"
334
336
  )
335
337
 
@@ -344,7 +346,7 @@ parser = StructuredPDFParser(
344
346
  # VLM Settings
345
347
  use_vlm=True,
346
348
  vlm_provider="openai",
347
- vlm_model="gpt-4o",
349
+ vlm_model="gpt-5",
348
350
  vlm_api_key="your_api_key",
349
351
 
350
352
  # Layout Detection Settings
@@ -406,7 +408,7 @@ parser = ChartTablePDFParser(
406
408
  # VLM Settings
407
409
  use_vlm=True,
408
410
  vlm_provider="openai",
409
- vlm_model="gpt-4o",
411
+ vlm_model="gpt-5",
410
412
  vlm_api_key="your_api_key",
411
413
 
412
414
  # Layout Detection Settings
@@ -545,7 +547,7 @@ parser = StructuredPDFParser(
545
547
  use_vlm=True,
546
548
  vlm_provider="openai",
547
549
  vlm_api_key="your_openai_api_key",
548
- vlm__model="gpt-4o",
550
+ vlm__model="gpt-5",
549
551
  layout_model_name="PP-DocLayout_plus-L",
550
552
  dpi=300, # Higher DPI for better quality
551
553
  min_score=0.5, # Higher confidence threshold
@@ -623,4 +625,41 @@ parser.display_pages_with_boxes("document.pdf")
623
625
  - **Pandas**: Data manipulation
624
626
  - **OpenPyXL**: Excel file generation
625
627
  - **Google Generative AI**: For Gemini VLM integration
626
- - **OpenAI**: For GPT-4 VLM integration
628
+ - **OpenAI**: For GPT-5 VLM integration
629
+
630
+ ## 🖥️ Web Interface (Gradio)
631
+
632
+ You can try Doctra in a simple web UI powered by Gradio.
633
+
634
+ ### Run locally
635
+
636
+ ```bash
637
+ pip install -U gradio
638
+ python gradio_app.py
639
+ ```
640
+
641
+ Then open the printed URL (default `http://127.0.0.1:7860`).
642
+
643
+ Notes:
644
+ - If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
645
+ - Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
646
+
647
+ ### Deploy on Hugging Face Spaces
648
+
649
+ 1) Create a new Space (type: Gradio, SDK: Python).
650
+
651
+ 2) Add these files to the Space repo:
652
+ - Your package code (or install from PyPI).
653
+ - `gradio_app.py` (entry point).
654
+ - `requirements.txt` with at least:
655
+
656
+ ```text
657
+ doctra
658
+ gradio
659
+ ```
660
+
661
+ 3) Set a secret named `VLM_API_KEY` if you want VLM features.
662
+
663
+ 4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
664
+
665
+ The Space will build and expose the same interface for uploads and processing.
@@ -1,29 +1,32 @@
1
- doctra/__init__.py,sha256=-Pkx0Vh4Hz3EQvLaxlL6Mo4lVig59FTN5LvUcxThn4U,519
2
- doctra/version.py,sha256=tiyU44F2UjL7SjuX_aL4-UZ5m_D9WOmXSUZQnjpR8PM,60
1
+ doctra/__init__.py,sha256=ST_c2GWBoB0y_wpL1qsOeK4bR1RyJhMMn6I5VjVRI6Y,613
2
+ doctra/version.py,sha256=hnuLMAgAv9rqQndLE3xdEZsa3vwZ4eZ2RVbRJjlJu8Y,60
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
- doctra/cli/main.py,sha256=O3Bgov3rtf58AJHmuojJaptrH17X1mw19iTplId3gGo,35327
5
- doctra/cli/utils.py,sha256=QuttjEtBiFrOHmqZz4mjbf3GWZe26lYChPwa23Loz_4,11314
4
+ doctra/cli/main.py,sha256=o_W1b5kx3xaTbWK6l4IYi0YLwffKBj5pQKflnlaG2Fw,35611
5
+ doctra/cli/utils.py,sha256=IghiUZQCOmXODC5-5smHGz2KeV4xqbP4avmA1Mggln0,11800
6
6
  doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  doctra/engines/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  doctra/engines/layout/layout_models.py,sha256=vuTzjWd3FD-SkFPngktmUVhOJ6Xvff6ufwFEq796PQs,3162
9
- doctra/engines/layout/paddle_layout.py,sha256=Yf6_OtBq_RSup8CSDofJUZxM_bJMBlCC0eSv5ib1uNk,9364
9
+ doctra/engines/layout/paddle_layout.py,sha256=P2-Gk8wHpWoA5Jpmo_3OLI59zWq3HeAOBOUKKVdXu8I,6792
10
10
  doctra/engines/ocr/__init__.py,sha256=h6bFiveGXdI59fsKzCqOXki3C74DCndEmvloOtMqnR0,133
11
11
  doctra/engines/ocr/api.py,sha256=YOBKDLExXpvSiOsc_TDJasaMPxzdVx1llQCtYlsruWo,1280
12
12
  doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4G7S3pA,1492
13
13
  doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
14
14
  doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  doctra/engines/vlm/outlines_types.py,sha256=qL-G6MNiA5mxp1qAPVEFhOANp4NqVt_MQKseJCr_xXE,970
16
- doctra/engines/vlm/provider.py,sha256=ws-04Jhuvg0a3vXzz8cfMWIiwldoIFs3i_qSb2Q6enA,2137
17
- doctra/engines/vlm/service.py,sha256=cONhekqKfGo2fe-2g7YT89BHxytdjGhCSFyU3sJUzWI,4966
16
+ doctra/engines/vlm/provider.py,sha256=aE8Eo1U-8XqAimakNlT0-T4etIyCV8rZ3DwxdqbFeTc,3131
17
+ doctra/engines/vlm/service.py,sha256=Jwws2Jw68-IdHyvEWks4UCoP7Olhqt8IpXfCv5Z7Ml4,4724
18
18
  doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  doctra/exporters/excel_writer.py,sha256=U5Eb5SF7_ll1QveUapSWSkCRt3OEoisKEVUQ_7X8Wjo,7762
20
+ doctra/exporters/html_writer.py,sha256=OlW24Eg5bZcjldRHtd3GDD7RrajuRXj43EJpXIJkYf8,38810
20
21
  doctra/exporters/image_saver.py,sha256=zsPoQ0CwoE643ui4iZMdXk96kv5mU8L_zC2JfF22N1A,1639
21
22
  doctra/exporters/markdown_table.py,sha256=4_OJIwG_WoIPYBzJx1njy_3tNVdkK6QKSP-P9r-b0zw,2030
22
23
  doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN9tdfuw,1027
23
24
  doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
24
25
  doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
25
- doctra/parsers/structured_pdf_parser.py,sha256=g0k9XsSJRVnJg4omrEC1Ef1MWZZ3Ve2OnXjMoc6IScU,19953
26
- doctra/parsers/table_chart_extractor.py,sha256=A-rjazOmx6d_8CbZXdebE4NsYYqiQP0wQktTfCp_pwI,12669
26
+ doctra/parsers/structured_pdf_parser.py,sha256=fbDIQ6VFv1phFPC3lKgcjtCp0AdNA8Ny1dK0F726Pww,21357
27
+ doctra/parsers/table_chart_extractor.py,sha256=JuoScqCQbPdQjy4ak77OcZHSPYKGHF4H39fEW6gF3eo,15323
28
+ doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
29
+ doctra/ui/app.py,sha256=FYDlEG_2pfp7SSHnA04NRNUhOcI-BJPh3qAf5dw5D6g,45903
27
30
  doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
31
  doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
29
32
  doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
@@ -31,10 +34,11 @@ doctra/utils/file_ops.py,sha256=3IS0EQncs6Kaj27fcg2zxQX3xRSvtItIsyKGLYgeOgw,815
31
34
  doctra/utils/io_utils.py,sha256=L1bWV4-ybs2j_3ZEN7GfQVgdC73JKVECVnpwKbP0dy0,219
32
35
  doctra/utils/ocr_utils.py,sha256=Doa1uYBg3kRgRYd2aPq9fICHgHfrM_efdhZfI7jl6OM,780
33
36
  doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
37
+ doctra/utils/progress.py,sha256=sNEjTdN32J1-eXFPqwZRw2EZQ1SXSesXBd5StJvtlmc,14481
34
38
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
35
- doctra/utils/structured_utils.py,sha256=EdNhCUDLKvYcLqXbTGveNtIRGyQ3yzYhTh-zy_awwM4,1450
36
- doctra-0.1.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
37
- doctra-0.1.1.dist-info/METADATA,sha256=NI9kwistJwaI6sYQ0vp5df-4302_EpSG7QUnk2OpocM,26751
38
- doctra-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
- doctra-0.1.1.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
40
- doctra-0.1.1.dist-info/RECORD,,
39
+ doctra/utils/structured_utils.py,sha256=J-qTqo8eCjm36FaRJ_I482LFgYCpm3eukZm-gbNnchw,1401
40
+ doctra-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
41
+ doctra-0.3.0.dist-info/METADATA,sha256=tdfVsN0nDj_WcpptBvJvWF2tzdgp_0SfeeYya7oTqgU,27794
42
+ doctra-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ doctra-0.3.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
44
+ doctra-0.3.0.dist-info/RECORD,,
File without changes