deepdoctection 1.2.1__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/PKG-INFO +1 -1
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/pyproject.toml +1 -1
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/__init__.py +1 -1
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/anngen.py +47 -15
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/base.py +7 -31
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/common.py +1 -1
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/doctectionpipe.py +1 -2
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/PKG-INFO +1 -1
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/README.md +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/setup.cfg +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/config.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/factory.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/configs/profiles.jsonl +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/base.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/base.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/doctrocr.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/model.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/segment.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/py.typed +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/requires.txt +0 -0
- {deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@ from dd_core.utils.env_info import collect_env_info
|
|
|
12
12
|
from dd_core.utils.file_utils import _LazyModule
|
|
13
13
|
from dd_core.utils.logger import LoggingRecord, logger
|
|
14
14
|
|
|
15
|
-
__version__ = "1.2.
|
|
15
|
+
__version__ = "1.2.2"
|
|
16
16
|
_IMPORT_STRUCTURE = {
|
|
17
17
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
|
|
18
18
|
"eval": [
|
|
@@ -45,7 +45,9 @@ class DataPointCacheStore(ABC):
|
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
47
|
@abstractmethod
|
|
48
|
-
def put_datapoint(
|
|
48
|
+
def put_datapoint(
|
|
49
|
+
self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
|
|
50
|
+
) -> None:
|
|
49
51
|
"""
|
|
50
52
|
Persist a datapoint (image) for a specific document and page number.
|
|
51
53
|
|
|
@@ -54,19 +56,23 @@ class DataPointCacheStore(ABC):
|
|
|
54
56
|
image_id (str): The unique identifier of the image.
|
|
55
57
|
page_number (int): The 0-based page number inside the document.
|
|
56
58
|
image (Image): The image object to store (may be serialized by the store).
|
|
59
|
+
job_id (str | None): Optional job identifier to distinguish between different processing runs.
|
|
60
|
+
If None, caching key remains unchanged (backward compatible).
|
|
57
61
|
|
|
58
62
|
Returns:
|
|
59
63
|
None
|
|
60
64
|
"""
|
|
61
65
|
|
|
62
66
|
@abstractmethod
|
|
63
|
-
def get_datapoints(self, document_id: str, last_d: int) -> tuple[Image, ...]:
|
|
67
|
+
def get_datapoints(self, document_id: str, last_d: int, job_id: str | None = None) -> tuple[Image, ...]:
|
|
64
68
|
"""
|
|
65
69
|
Retrieve up to `last_d` most recently stored datapoints for the given document.
|
|
66
70
|
|
|
67
71
|
Args:
|
|
68
72
|
document_id (str): The identifier of the document to retrieve datapoints for.
|
|
69
73
|
last_d (int): Maximum number of most recent datapoints to return. Must be >= 0.
|
|
74
|
+
job_id (str | None): Optional job identifier to retrieve datapoints from a specific processing run.
|
|
75
|
+
If None, retrieves datapoints for the document without job distinction (backward compatible).
|
|
70
76
|
|
|
71
77
|
Returns:
|
|
72
78
|
tuple[Image, ...]: A tuple of reconstructed :class:`Image` objects ordered from
|
|
@@ -115,7 +121,25 @@ class LocalDataPointCacheStore(DataPointCacheStore):
|
|
|
115
121
|
self._max_pages = max_pages
|
|
116
122
|
self._pages: dict[str, dict[int, dict[str, Any]]] = {}
|
|
117
123
|
|
|
118
|
-
def
|
|
124
|
+
def _get_cache_key(self, document_id: str, job_id: str | None) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Generate cache key, distinguishing by job_id if present.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
document_id (str): The document identifier.
|
|
130
|
+
job_id (str | None): Optional job identifier.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
str: Cache key. If job_id is None, returns document_id unchanged.
|
|
134
|
+
If job_id is provided, returns "document_id::job_id".
|
|
135
|
+
"""
|
|
136
|
+
if job_id is None:
|
|
137
|
+
return document_id
|
|
138
|
+
return f"{document_id}::{job_id}"
|
|
139
|
+
|
|
140
|
+
def put_datapoint(
|
|
141
|
+
self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
|
|
142
|
+
) -> None:
|
|
119
143
|
"""
|
|
120
144
|
Store a serialized version of ``image`` for ``document_id`` at ``page_number``.
|
|
121
145
|
|
|
@@ -128,23 +152,26 @@ class LocalDataPointCacheStore(DataPointCacheStore):
|
|
|
128
152
|
compatibility with other stores).
|
|
129
153
|
page_number (int): 0-based page number of the image.
|
|
130
154
|
image (Image): The Image object to serialize and store.
|
|
155
|
+
job_id (str | None): Optional job identifier to distinguish between different processing runs.
|
|
131
156
|
"""
|
|
132
|
-
|
|
157
|
+
cache_key = self._get_cache_key(document_id, job_id)
|
|
158
|
+
pages = self._pages.get(cache_key)
|
|
133
159
|
if pages is None:
|
|
134
160
|
pages = {}
|
|
135
|
-
self._pages[
|
|
161
|
+
self._pages[cache_key] = pages
|
|
136
162
|
pages[page_number] = _image_to_cache_dict(image)
|
|
137
163
|
if self._max_pages > 0 and len(pages) > self._max_pages:
|
|
138
164
|
for k in sorted(pages.keys())[: -self._max_pages]:
|
|
139
165
|
pages.pop(k, None)
|
|
140
166
|
|
|
141
|
-
def get_datapoints(self, document_id: str, last_d: int) -> tuple[Image, ...]:
|
|
167
|
+
def get_datapoints(self, document_id: str, last_d: int, job_id: str | None = None) -> tuple[Image, ...]:
|
|
142
168
|
"""
|
|
143
169
|
Retrieve up to ``last_d`` most recent datapoints for a document.
|
|
144
170
|
|
|
145
171
|
Args:
|
|
146
172
|
document_id (str): Document identifier to retrieve pages for.
|
|
147
173
|
last_d (int): Maximum number of pages to return. If <= 0, an empty tuple is returned.
|
|
174
|
+
job_id (str | None): Optional job identifier to retrieve datapoints from a specific processing run.
|
|
148
175
|
|
|
149
176
|
Returns:
|
|
150
177
|
tuple[Image, ...]: Tuple of :class:`Image` instances reconstructed from the stored
|
|
@@ -152,7 +179,8 @@ class LocalDataPointCacheStore(DataPointCacheStore):
|
|
|
152
179
|
"""
|
|
153
180
|
if last_d <= 0:
|
|
154
181
|
return ()
|
|
155
|
-
|
|
182
|
+
cache_key = self._get_cache_key(document_id, job_id)
|
|
183
|
+
pages = self._pages.get(cache_key) or {}
|
|
156
184
|
keys = sorted(pages.keys(), reverse=True)[:last_d]
|
|
157
185
|
return tuple(Image(**pages[k]) for k in keys)
|
|
158
186
|
|
|
@@ -194,7 +222,6 @@ class DatapointManager:
|
|
|
194
222
|
self.datapoint_is_passed: bool = False
|
|
195
223
|
self.service_id = service_id
|
|
196
224
|
self.model_id = model_id
|
|
197
|
-
self.session_id: Optional[str] = None
|
|
198
225
|
|
|
199
226
|
if num_cached_datapoints < 0:
|
|
200
227
|
raise ValueError("num_cached_datapoints must be >= 0")
|
|
@@ -203,7 +230,17 @@ class DatapointManager:
|
|
|
203
230
|
|
|
204
231
|
self._cache_store = cache_store or LocalDataPointCacheStore(max_pages=num_cached_datapoints)
|
|
205
232
|
|
|
206
|
-
def
|
|
233
|
+
def maybe_cache_datapoint(self, image: Optional[Image], job_id: str | None = None) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Cache the given datapoint if caching is enabled.
|
|
236
|
+
|
|
237
|
+
This should be called when a datapoint leaves the component to ensure it is cached.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
image: The image datapoint to cache, or None to skip caching.
|
|
241
|
+
job_id: Optional job identifier to distinguish caches between different processing runs.
|
|
242
|
+
If None, caching key remains unchanged (backward compatible).
|
|
243
|
+
"""
|
|
207
244
|
if image is None:
|
|
208
245
|
return
|
|
209
246
|
if self.num_cached_datapoints <= 0:
|
|
@@ -217,6 +254,7 @@ class DatapointManager:
|
|
|
217
254
|
image_id=image.image_id,
|
|
218
255
|
page_number=image.page_number,
|
|
219
256
|
image=image,
|
|
257
|
+
job_id=job_id,
|
|
220
258
|
)
|
|
221
259
|
|
|
222
260
|
@property
|
|
@@ -242,7 +280,6 @@ class DatapointManager:
|
|
|
242
280
|
Args:
|
|
243
281
|
dp: The datapoint to set.
|
|
244
282
|
"""
|
|
245
|
-
self._maybe_cache_datapoint(self._datapoint)
|
|
246
283
|
self._datapoint = dp
|
|
247
284
|
self._cache_anns = {ann.annotation_id: ann for ann in dp.get_annotation()}
|
|
248
285
|
self.datapoint_is_passed = True
|
|
@@ -329,7 +366,6 @@ class DatapointManager:
|
|
|
329
366
|
score=detect_result.score,
|
|
330
367
|
service_id=self.service_id,
|
|
331
368
|
model_id=self.model_id,
|
|
332
|
-
session_id=self.session_id,
|
|
333
369
|
)
|
|
334
370
|
if to_annotation_id is not None:
|
|
335
371
|
parent_ann = self._cache_anns[to_annotation_id]
|
|
@@ -406,7 +442,6 @@ class DatapointManager:
|
|
|
406
442
|
score=score,
|
|
407
443
|
service_id=self.service_id,
|
|
408
444
|
model_id=self.model_id,
|
|
409
|
-
session_id=self.session_id,
|
|
410
445
|
)
|
|
411
446
|
self._cache_anns[annotation_id].dump_sub_category(sub_cat_key, cat_ann)
|
|
412
447
|
if annotation_context.context_error:
|
|
@@ -454,7 +489,6 @@ class DatapointManager:
|
|
|
454
489
|
score=score,
|
|
455
490
|
service_id=self.service_id,
|
|
456
491
|
model_id=self.model_id,
|
|
457
|
-
session_id=self.session_id,
|
|
458
492
|
)
|
|
459
493
|
self._cache_anns[annotation_id].dump_sub_category(sub_cat_key, cont_ann)
|
|
460
494
|
if annotation_context.context_error:
|
|
@@ -542,7 +576,6 @@ class DatapointManager:
|
|
|
542
576
|
score=summary_score,
|
|
543
577
|
service_id=self.service_id,
|
|
544
578
|
model_id=self.model_id,
|
|
545
|
-
session_id=self.session_id,
|
|
546
579
|
)
|
|
547
580
|
else:
|
|
548
581
|
ann = CategoryAnnotation(
|
|
@@ -551,7 +584,6 @@ class DatapointManager:
|
|
|
551
584
|
score=summary_score,
|
|
552
585
|
service_id=self.service_id,
|
|
553
586
|
model_id=self.model_id,
|
|
554
|
-
session_id=self.session_id,
|
|
555
587
|
)
|
|
556
588
|
image.summary.dump_sub_category(summary_key, ann, image.image_id)
|
|
557
589
|
|
|
@@ -24,7 +24,6 @@ from __future__ import annotations
|
|
|
24
24
|
from abc import ABC, abstractmethod
|
|
25
25
|
from collections import defaultdict
|
|
26
26
|
from typing import Any, Callable, Mapping, Optional, Union
|
|
27
|
-
from uuid import uuid1
|
|
28
27
|
|
|
29
28
|
from dd_core.dataflow import DataFlow, MapData
|
|
30
29
|
from dd_core.datapoint.image import Image, MetaAnnotation
|
|
@@ -126,7 +125,7 @@ class PipelineComponent(ABC):
|
|
|
126
125
|
if not self.filter_func(dp):
|
|
127
126
|
self.serve(dp)
|
|
128
127
|
|
|
129
|
-
def pass_datapoint(self, dp: Image) -> Image:
|
|
128
|
+
def pass_datapoint(self, dp: Image, job_id: str | None = None) -> Image:
|
|
130
129
|
"""
|
|
131
130
|
Acceptance, handover to `dp_manager`, transformation and forwarding of `dp`.
|
|
132
131
|
|
|
@@ -134,6 +133,8 @@ class PipelineComponent(ABC):
|
|
|
134
133
|
|
|
135
134
|
Args:
|
|
136
135
|
dp: Datapoint.
|
|
136
|
+
job_id: Optional job identifier to distinguish caches between different processing runs.
|
|
137
|
+
When None, caching behavior is backward compatible (no job distinction).
|
|
137
138
|
|
|
138
139
|
Returns:
|
|
139
140
|
Datapoint.
|
|
@@ -143,6 +144,9 @@ class PipelineComponent(ABC):
|
|
|
143
144
|
self._pass_datapoint(dp)
|
|
144
145
|
else:
|
|
145
146
|
self._pass_datapoint(dp)
|
|
147
|
+
|
|
148
|
+
self.dp_manager.maybe_cache_datapoint(self.dp_manager.datapoint, job_id=job_id)
|
|
149
|
+
|
|
146
150
|
return self.dp_manager.datapoint
|
|
147
151
|
|
|
148
152
|
def predict_dataflow(self, df: DataFlow) -> DataFlow:
|
|
@@ -286,19 +290,6 @@ class Pipeline(ABC):
|
|
|
286
290
|
core model or already processed further).
|
|
287
291
|
|
|
288
292
|
In addition to `analyze`, the internal `_entry` is used to bundle preprocessing steps.
|
|
289
|
-
|
|
290
|
-
It is possible to set a session id for the pipeline. This is useful for logging purposes. The session id can be
|
|
291
|
-
either passed to the pipeline via the `analyze` method or generated automatically.
|
|
292
|
-
|
|
293
|
-
To generate a `session_id` automatically:
|
|
294
|
-
|
|
295
|
-
Example:
|
|
296
|
-
```python
|
|
297
|
-
pipe = MyPipeline(pipeline_component = [layout, text])
|
|
298
|
-
pipe.set_session_id = True
|
|
299
|
-
|
|
300
|
-
df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
|
|
301
|
-
```
|
|
302
293
|
"""
|
|
303
294
|
|
|
304
295
|
def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
|
|
@@ -309,7 +300,6 @@ class Pipeline(ABC):
|
|
|
309
300
|
pipeline_component_list: A list of pipeline components.
|
|
310
301
|
"""
|
|
311
302
|
self.pipe_component_list = pipeline_component_list
|
|
312
|
-
self.set_session_id = False
|
|
313
303
|
|
|
314
304
|
@abstractmethod
|
|
315
305
|
def _entry(self, **kwargs: Any) -> DataFlow:
|
|
@@ -380,22 +370,18 @@ class Pipeline(ABC):
|
|
|
380
370
|
"""
|
|
381
371
|
raise NotImplementedError()
|
|
382
372
|
|
|
383
|
-
def _build_pipe(self, df: DataFlow
|
|
373
|
+
def _build_pipe(self, df: DataFlow) -> DataFlow:
|
|
384
374
|
"""
|
|
385
375
|
Composition of the backbone.
|
|
386
376
|
|
|
387
377
|
Args:
|
|
388
378
|
df: The input dataflow.
|
|
389
|
-
session_id: Optional session id.
|
|
390
379
|
|
|
391
380
|
Returns:
|
|
392
381
|
The processed dataflow.
|
|
393
382
|
"""
|
|
394
|
-
if session_id is None and self.set_session_id:
|
|
395
|
-
session_id = self.get_session_id()
|
|
396
383
|
for component in self.pipe_component_list:
|
|
397
384
|
component.timer_on = True
|
|
398
|
-
component.dp_manager.session_id = session_id
|
|
399
385
|
df = component.predict_dataflow(df)
|
|
400
386
|
return df
|
|
401
387
|
|
|
@@ -490,13 +476,3 @@ class Pipeline(ABC):
|
|
|
490
476
|
if comp.service_id == service_id or comp.name == name:
|
|
491
477
|
return comp
|
|
492
478
|
raise ValueError(f"Pipeline component not found with service_id={service_id} or name={name}")
|
|
493
|
-
|
|
494
|
-
@staticmethod
|
|
495
|
-
def get_session_id() -> str:
|
|
496
|
-
"""
|
|
497
|
-
Get the generating a session id.
|
|
498
|
-
|
|
499
|
-
Returns:
|
|
500
|
-
The session id as a string.
|
|
501
|
-
"""
|
|
502
|
-
return str(uuid1())[:8]
|
|
@@ -406,7 +406,7 @@ class PageParsingService(PipelineComponent):
|
|
|
406
406
|
def serve(self, dp: Image) -> None:
|
|
407
407
|
raise NotImplementedError("PageParsingService is not meant to be used in serve method")
|
|
408
408
|
|
|
409
|
-
def pass_datapoint(self, dp: Image) -> Page: # type:ignore
|
|
409
|
+
def pass_datapoint(self, dp: Image, job_id: str | None = None) -> Page: # type:ignore
|
|
410
410
|
"""
|
|
411
411
|
Converts `Image` to `Page`.
|
|
412
412
|
|
|
@@ -386,10 +386,9 @@ class DoctectionPipe(Pipeline):
|
|
|
386
386
|
"""
|
|
387
387
|
|
|
388
388
|
output = kwargs.get("output", "page")
|
|
389
|
-
session_id = kwargs.get("session_id")
|
|
390
389
|
assert output in ("page", "image", "dict"), "output must be either page image or dict"
|
|
391
390
|
df = self._entry(**kwargs)
|
|
392
|
-
df = self._build_pipe(df
|
|
391
|
+
df = self._build_pipe(df)
|
|
393
392
|
if output == "page":
|
|
394
393
|
df = self.dataflow_to_page(df)
|
|
395
394
|
elif output == "dict":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection/configs/conf_tesseract.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-1.2.1 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|