deepdoctection 1.2.0__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/PKG-INFO +1 -1
  2. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/pyproject.toml +1 -1
  3. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/__init__.py +1 -1
  4. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/anngen.py +177 -19
  5. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/base.py +7 -31
  6. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/common.py +1 -1
  7. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/doctectionpipe.py +1 -2
  8. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/PKG-INFO +1 -1
  9. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/README.md +0 -0
  10. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/setup.cfg +0 -0
  11. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/__init__.py +0 -0
  12. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/config.py +0 -0
  13. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/dd.py +0 -0
  14. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/analyzer/factory.py +0 -0
  15. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/configs/__init__.py +0 -0
  16. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/configs/conf_dd_one.yaml +0 -0
  17. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/configs/conf_tesseract.yaml +0 -0
  18. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/configs/profiles.jsonl +0 -0
  19. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/__init__.py +0 -0
  20. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/accmetric.py +0 -0
  21. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/base.py +0 -0
  22. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/cocometric.py +0 -0
  23. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/eval.py +0 -0
  24. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/registry.py +0 -0
  25. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/eval/tedsmetric.py +0 -0
  26. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/__init__.py +0 -0
  27. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/base.py +0 -0
  28. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/d2detect.py +0 -0
  29. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/deskew.py +0 -0
  30. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/doctrocr.py +0 -0
  31. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/hfdetr.py +0 -0
  32. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/hflayoutlm.py +0 -0
  33. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/hflm.py +0 -0
  34. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/model.py +0 -0
  35. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/pdftext.py +0 -0
  36. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/tessocr.py +0 -0
  37. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/extern/texocr.py +0 -0
  38. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/__init__.py +0 -0
  39. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/concurrency.py +0 -0
  40. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/language.py +0 -0
  41. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/layout.py +0 -0
  42. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/lm.py +0 -0
  43. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/order.py +0 -0
  44. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/refine.py +0 -0
  45. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/registry.py +0 -0
  46. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/segment.py +0 -0
  47. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/sub_layout.py +0 -0
  48. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/text.py +0 -0
  49. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/pipe/transform.py +0 -0
  50. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/py.typed +0 -0
  51. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/train/__init__.py +0 -0
  52. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/train/d2_frcnn_train.py +0 -0
  53. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/train/hf_detr_train.py +0 -0
  54. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection/train/hf_layoutlm_train.py +0 -0
  55. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/SOURCES.txt +0 -0
  56. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/dependency_links.txt +0 -0
  57. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/requires.txt +0 -0
  58. {deepdoctection-1.2.0 → deepdoctection-1.2.2}/src/deepdoctection.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: Repository for Document AI - server/inference core package
5
5
  Author: Dr. Janis Meyer
6
6
  License: Apache License 2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deepdoctection"
7
- version = "1.2.0"
7
+ version = "1.2.2"
8
8
  authors = [
9
9
  {name = "Dr. Janis Meyer"}
10
10
  ]
@@ -12,7 +12,7 @@ from dd_core.utils.env_info import collect_env_info
12
12
  from dd_core.utils.file_utils import _LazyModule
13
13
  from dd_core.utils.logger import LoggingRecord, logger
14
14
 
15
- __version__ = "1.2.0"
15
+ __version__ = "1.2.2"
16
16
  _IMPORT_STRUCTURE = {
17
17
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
18
18
  "eval": [
@@ -18,7 +18,8 @@
18
18
  """
19
19
  Datapoint manager
20
20
  """
21
- from collections import deque
21
+
22
+ from abc import ABC, abstractmethod
22
23
  from dataclasses import asdict
23
24
  from typing import Any, Optional, Sequence, Union
24
25
 
@@ -33,6 +34,157 @@ from dd_core.utils.object_types import ObjectTypes, RelationshipKey
33
34
  from ..extern.base import DetectionResult
34
35
 
35
36
 
37
+ class DataPointCacheStore(ABC):
38
+ """
39
+ Abstract interface for a datapoint cache store.
40
+
41
+ Implementations are expected to provide a mechanism to persist and retrieve recently
42
+ used image datapoints (pages) for a given document. This is used by the
43
+ :class:`DatapointManager` to keep a bounded FIFO cache of previously seen
44
+ datapoints.
45
+ """
46
+
47
+ @abstractmethod
48
+ def put_datapoint(
49
+ self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
50
+ ) -> None:
51
+ """
52
+ Persist a datapoint (image) for a specific document and page number.
53
+
54
+ Args:
55
+ document_id (str): The identifier of the document the image belongs to.
56
+ image_id (str): The unique identifier of the image.
57
+ page_number (int): The 0-based page number inside the document.
58
+ image (Image): The image object to store (may be serialized by the store).
59
+ job_id (str | None): Optional job identifier to distinguish between different processing runs.
60
+ If None, caching key remains unchanged (backward compatible).
61
+
62
+ Returns:
63
+ None
64
+ """
65
+
66
+ @abstractmethod
67
+ def get_datapoints(self, document_id: str, last_d: int, job_id: str | None = None) -> tuple[Image, ...]:
68
+ """
69
+ Retrieve up to `last_d` most recently stored datapoints for the given document.
70
+
71
+ Args:
72
+ document_id (str): The identifier of the document to retrieve datapoints for.
73
+ last_d (int): Maximum number of most recent datapoints to return. Must be >= 0.
74
+ job_id (str | None): Optional job identifier to retrieve datapoints from a specific processing run.
75
+ If None, retrieves datapoints for the document without job distinction (backward compatible).
76
+
77
+ Returns:
78
+ tuple[Image, ...]: A tuple of reconstructed :class:`Image` objects ordered from
79
+ newest to oldest (or an empty tuple if none exist).
80
+ """
81
+
82
+
83
+ def _set_image_keys_to_none(d: Any) -> None:
84
+ if isinstance(d, dict):
85
+ for key, value in d.items():
86
+ if key == "_image":
87
+ d[key] = None
88
+ else:
89
+ _set_image_keys_to_none(value)
90
+ elif isinstance(d, list):
91
+ for item in d:
92
+ _set_image_keys_to_none(item)
93
+
94
+
95
+ def _image_to_cache_dict(image: Image) -> dict[str, Any]:
96
+ image.remove_image_from_lower_hierarchy()
97
+ export_dict = image.as_dict()
98
+ _set_image_keys_to_none(export_dict)
99
+ return export_dict
100
+
101
+
102
+ class LocalDataPointCacheStore(DataPointCacheStore):
103
+ """
104
+ In-memory implementation of :class:`DataPointCacheStore`.
105
+
106
+ This simple store keeps a small per-document mapping of page-number -> serialized image
107
+ dictionaries and enforces a FIFO eviction policy based on ``max_pages``.
108
+
109
+ Args:
110
+ max_pages (int): Maximum number of pages to keep per document. If <= 0 caching
111
+ is effectively disabled. Defaults to 3.
112
+ """
113
+
114
+ def __init__(self, max_pages: int = 3) -> None:
115
+ """
116
+ Initialize the in-memory cache store.
117
+
118
+ Args:
119
+ max_pages (int): Maximum number of pages to keep per document.
120
+ """
121
+ self._max_pages = max_pages
122
+ self._pages: dict[str, dict[int, dict[str, Any]]] = {}
123
+
124
+ def _get_cache_key(self, document_id: str, job_id: str | None) -> str:
125
+ """
126
+ Generate cache key, distinguishing by job_id if present.
127
+
128
+ Args:
129
+ document_id (str): The document identifier.
130
+ job_id (str | None): Optional job identifier.
131
+
132
+ Returns:
133
+ str: Cache key. If job_id is None, returns document_id unchanged.
134
+ If job_id is provided, returns "document_id::job_id".
135
+ """
136
+ if job_id is None:
137
+ return document_id
138
+ return f"{document_id}::{job_id}"
139
+
140
+ def put_datapoint(
141
+ self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
142
+ ) -> None:
143
+ """
144
+ Store a serialized version of ``image`` for ``document_id`` at ``page_number``.
145
+
146
+ If the number of stored pages for the document exceeds ``self._max_pages`` an eviction
147
+ of the oldest pages (lowest page numbers) will be performed.
148
+
149
+ Args:
150
+ document_id (str): Document identifier the image belongs to.
151
+ image_id (str): Image identifier (not directly used by this store but included for API
152
+ compatibility with other stores).
153
+ page_number (int): 0-based page number of the image.
154
+ image (Image): The Image object to serialize and store.
155
+ job_id (str | None): Optional job identifier to distinguish between different processing runs.
156
+ """
157
+ cache_key = self._get_cache_key(document_id, job_id)
158
+ pages = self._pages.get(cache_key)
159
+ if pages is None:
160
+ pages = {}
161
+ self._pages[cache_key] = pages
162
+ pages[page_number] = _image_to_cache_dict(image)
163
+ if self._max_pages > 0 and len(pages) > self._max_pages:
164
+ for k in sorted(pages.keys())[: -self._max_pages]:
165
+ pages.pop(k, None)
166
+
167
+ def get_datapoints(self, document_id: str, last_d: int, job_id: str | None = None) -> tuple[Image, ...]:
168
+ """
169
+ Retrieve up to ``last_d`` most recent datapoints for a document.
170
+
171
+ Args:
172
+ document_id (str): Document identifier to retrieve pages for.
173
+ last_d (int): Maximum number of pages to return. If <= 0, an empty tuple is returned.
174
+ job_id (str | None): Optional job identifier to retrieve datapoints from a specific processing run.
175
+
176
+ Returns:
177
+ tuple[Image, ...]: Tuple of :class:`Image` instances reconstructed from the stored
178
+ serialized dicts ordered from newest -> oldest.
179
+ """
180
+ if last_d <= 0:
181
+ return ()
182
+ cache_key = self._get_cache_key(document_id, job_id)
183
+ pages = self._pages.get(cache_key) or {}
184
+ keys = sorted(pages.keys(), reverse=True)[:last_d]
185
+ return tuple(Image(**pages[k]) for k in keys)
186
+
187
+
36
188
  class DatapointManager:
37
189
  """
38
190
  This class provides an API for manipulating image datapoints. This includes the creation and storage of
@@ -63,21 +215,32 @@ class DatapointManager:
63
215
  model_id: Optional[str] = None,
64
216
  num_cached_datapoints: int = 0,
65
217
  remove_pixel_values_from_cache: bool = True,
218
+ cache_store: LocalDataPointCacheStore | None = None,
66
219
  ) -> None:
67
220
  self._datapoint: Optional[Image] = None
68
221
  self._cache_anns: dict[str, ImageAnnotation] = {}
69
222
  self.datapoint_is_passed: bool = False
70
223
  self.service_id = service_id
71
224
  self.model_id = model_id
72
- self.session_id: Optional[str] = None
73
225
 
74
226
  if num_cached_datapoints < 0:
75
227
  raise ValueError("num_cached_datapoints must be >= 0")
76
228
  self.num_cached_datapoints = num_cached_datapoints
77
229
  self.remove_pixel_values_from_cache = remove_pixel_values_from_cache
78
- self._cached_datapoints: deque[Image] = deque()
79
230
 
80
- def _maybe_cache_datapoint(self, image: Optional[Image]) -> None:
231
+ self._cache_store = cache_store or LocalDataPointCacheStore(max_pages=num_cached_datapoints)
232
+
233
+ def maybe_cache_datapoint(self, image: Optional[Image], job_id: str | None = None) -> None:
234
+ """
235
+ Cache the given datapoint if caching is enabled.
236
+
237
+ This should be called when a datapoint leaves the component to ensure it is cached.
238
+
239
+ Args:
240
+ image: The image datapoint to cache, or None to skip caching.
241
+ job_id: Optional job identifier to distinguish caches between different processing runs.
242
+ If None, caching key remains unchanged (backward compatible).
243
+ """
81
244
  if image is None:
82
245
  return
83
246
  if self.num_cached_datapoints <= 0:
@@ -85,12 +248,14 @@ class DatapointManager:
85
248
 
86
249
  if self.remove_pixel_values_from_cache:
87
250
  image.clear_image()
88
- image.remove_image_from_lower_hierarchy(pixel_values_only=True)
89
251
 
90
- self._cached_datapoints.append(image)
91
-
92
- while len(self._cached_datapoints) > self.num_cached_datapoints:
93
- self._cached_datapoints.popleft()
252
+ self._cache_store.put_datapoint(
253
+ document_id=image.document_id,
254
+ image_id=image.image_id,
255
+ page_number=image.page_number,
256
+ image=image,
257
+ job_id=job_id,
258
+ )
94
259
 
95
260
  @property
96
261
  def datapoint(self) -> Image:
@@ -115,8 +280,6 @@ class DatapointManager:
115
280
  Args:
116
281
  dp: The datapoint to set.
117
282
  """
118
- self._maybe_cache_datapoint(self._datapoint)
119
-
120
283
  self._datapoint = dp
121
284
  self._cache_anns = {ann.annotation_id: ann for ann in dp.get_annotation()}
122
285
  self.datapoint_is_passed = True
@@ -203,7 +366,6 @@ class DatapointManager:
203
366
  score=detect_result.score,
204
367
  service_id=self.service_id,
205
368
  model_id=self.model_id,
206
- session_id=self.session_id,
207
369
  )
208
370
  if to_annotation_id is not None:
209
371
  parent_ann = self._cache_anns[to_annotation_id]
@@ -280,7 +442,6 @@ class DatapointManager:
280
442
  score=score,
281
443
  service_id=self.service_id,
282
444
  model_id=self.model_id,
283
- session_id=self.session_id,
284
445
  )
285
446
  self._cache_anns[annotation_id].dump_sub_category(sub_cat_key, cat_ann)
286
447
  if annotation_context.context_error:
@@ -328,7 +489,6 @@ class DatapointManager:
328
489
  score=score,
329
490
  service_id=self.service_id,
330
491
  model_id=self.model_id,
331
- session_id=self.session_id,
332
492
  )
333
493
  self._cache_anns[annotation_id].dump_sub_category(sub_cat_key, cont_ann)
334
494
  if annotation_context.context_error:
@@ -416,7 +576,6 @@ class DatapointManager:
416
576
  score=summary_score,
417
577
  service_id=self.service_id,
418
578
  model_id=self.model_id,
419
- session_id=self.session_id,
420
579
  )
421
580
  else:
422
581
  ann = CategoryAnnotation(
@@ -425,7 +584,6 @@ class DatapointManager:
425
584
  score=summary_score,
426
585
  service_id=self.service_id,
427
586
  model_id=self.model_id,
428
- session_id=self.session_id,
429
587
  )
430
588
  image.summary.dump_sub_category(summary_key, ann, image.image_id)
431
589
 
@@ -481,8 +639,8 @@ class DatapointManager:
481
639
  """
482
640
  if last_k < 0:
483
641
  raise ValueError("last_k must be >= 0")
484
- if last_k == 0 or not self._cached_datapoints:
642
+ if last_k == 0:
485
643
  return tuple()
486
644
 
487
- k = min(last_k, len(self._cached_datapoints))
488
- return tuple(list(self._cached_datapoints)[-k:])
645
+ doc_id = self.datapoint.document_id
646
+ return self._cache_store.get_datapoints(document_id=doc_id, last_d=last_k)
@@ -24,7 +24,6 @@ from __future__ import annotations
24
24
  from abc import ABC, abstractmethod
25
25
  from collections import defaultdict
26
26
  from typing import Any, Callable, Mapping, Optional, Union
27
- from uuid import uuid1
28
27
 
29
28
  from dd_core.dataflow import DataFlow, MapData
30
29
  from dd_core.datapoint.image import Image, MetaAnnotation
@@ -126,7 +125,7 @@ class PipelineComponent(ABC):
126
125
  if not self.filter_func(dp):
127
126
  self.serve(dp)
128
127
 
129
- def pass_datapoint(self, dp: Image) -> Image:
128
+ def pass_datapoint(self, dp: Image, job_id: str | None = None) -> Image:
130
129
  """
131
130
  Acceptance, handover to `dp_manager`, transformation and forwarding of `dp`.
132
131
 
@@ -134,6 +133,8 @@ class PipelineComponent(ABC):
134
133
 
135
134
  Args:
136
135
  dp: Datapoint.
136
+ job_id: Optional job identifier to distinguish caches between different processing runs.
137
+ When None, caching behavior is backward compatible (no job distinction).
137
138
 
138
139
  Returns:
139
140
  Datapoint.
@@ -143,6 +144,9 @@ class PipelineComponent(ABC):
143
144
  self._pass_datapoint(dp)
144
145
  else:
145
146
  self._pass_datapoint(dp)
147
+
148
+ self.dp_manager.maybe_cache_datapoint(self.dp_manager.datapoint, job_id=job_id)
149
+
146
150
  return self.dp_manager.datapoint
147
151
 
148
152
  def predict_dataflow(self, df: DataFlow) -> DataFlow:
@@ -286,19 +290,6 @@ class Pipeline(ABC):
286
290
  core model or already processed further).
287
291
 
288
292
  In addition to `analyze`, the internal `_entry` is used to bundle preprocessing steps.
289
-
290
- It is possible to set a session id for the pipeline. This is useful for logging purposes. The session id can be
291
- either passed to the pipeline via the `analyze` method or generated automatically.
292
-
293
- To generate a `session_id` automatically:
294
-
295
- Example:
296
- ```python
297
- pipe = MyPipeline(pipeline_component = [layout, text])
298
- pipe.set_session_id = True
299
-
300
- df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
301
- ```
302
293
  """
303
294
 
304
295
  def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
@@ -309,7 +300,6 @@ class Pipeline(ABC):
309
300
  pipeline_component_list: A list of pipeline components.
310
301
  """
311
302
  self.pipe_component_list = pipeline_component_list
312
- self.set_session_id = False
313
303
 
314
304
  @abstractmethod
315
305
  def _entry(self, **kwargs: Any) -> DataFlow:
@@ -380,22 +370,18 @@ class Pipeline(ABC):
380
370
  """
381
371
  raise NotImplementedError()
382
372
 
383
- def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
373
+ def _build_pipe(self, df: DataFlow) -> DataFlow:
384
374
  """
385
375
  Composition of the backbone.
386
376
 
387
377
  Args:
388
378
  df: The input dataflow.
389
- session_id: Optional session id.
390
379
 
391
380
  Returns:
392
381
  The processed dataflow.
393
382
  """
394
- if session_id is None and self.set_session_id:
395
- session_id = self.get_session_id()
396
383
  for component in self.pipe_component_list:
397
384
  component.timer_on = True
398
- component.dp_manager.session_id = session_id
399
385
  df = component.predict_dataflow(df)
400
386
  return df
401
387
 
@@ -490,13 +476,3 @@ class Pipeline(ABC):
490
476
  if comp.service_id == service_id or comp.name == name:
491
477
  return comp
492
478
  raise ValueError(f"Pipeline component not found with service_id={service_id} or name={name}")
493
-
494
- @staticmethod
495
- def get_session_id() -> str:
496
- """
497
- Get the generating a session id.
498
-
499
- Returns:
500
- The session id as a string.
501
- """
502
- return str(uuid1())[:8]
@@ -406,7 +406,7 @@ class PageParsingService(PipelineComponent):
406
406
  def serve(self, dp: Image) -> None:
407
407
  raise NotImplementedError("PageParsingService is not meant to be used in serve method")
408
408
 
409
- def pass_datapoint(self, dp: Image) -> Page: # type:ignore
409
+ def pass_datapoint(self, dp: Image, job_id: str | None = None) -> Page: # type:ignore
410
410
  """
411
411
  Converts `Image` to `Page`.
412
412
 
@@ -386,10 +386,9 @@ class DoctectionPipe(Pipeline):
386
386
  """
387
387
 
388
388
  output = kwargs.get("output", "page")
389
- session_id = kwargs.get("session_id")
390
389
  assert output in ("page", "image", "dict"), "output must be either page image or dict"
391
390
  df = self._entry(**kwargs)
392
- df = self._build_pipe(df, session_id=session_id) # type: ignore
391
+ df = self._build_pipe(df)
393
392
  if output == "page":
394
393
  df = self.dataflow_to_page(df)
395
394
  elif output == "dict":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: Repository for Document AI - server/inference core package
5
5
  Author: Dr. Janis Meyer
6
6
  License: Apache License 2.0
File without changes
File without changes