deepdoctection 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/PKG-INFO +1 -1
  2. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/pyproject.toml +1 -1
  3. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/__init__.py +1 -1
  4. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/anngen.py +2 -10
  5. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/base.py +82 -39
  6. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection.egg-info/PKG-INFO +1 -1
  7. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/README.md +0 -0
  8. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/setup.cfg +0 -0
  9. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/analyzer/__init__.py +0 -0
  10. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/analyzer/config.py +0 -0
  11. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/analyzer/dd.py +0 -0
  12. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/analyzer/factory.py +0 -0
  13. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/configs/__init__.py +0 -0
  14. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/configs/conf_dd_one.yaml +0 -0
  15. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/configs/conf_tesseract.yaml +0 -0
  16. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/configs/profiles.jsonl +0 -0
  17. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/__init__.py +0 -0
  18. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/accmetric.py +0 -0
  19. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/base.py +0 -0
  20. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/cocometric.py +0 -0
  21. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/eval.py +0 -0
  22. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/registry.py +0 -0
  23. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/eval/tedsmetric.py +0 -0
  24. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/__init__.py +0 -0
  25. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/base.py +0 -0
  26. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/d2detect.py +0 -0
  27. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/deskew.py +0 -0
  28. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/doctrocr.py +0 -0
  29. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/hfdetr.py +0 -0
  30. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/hflayoutlm.py +0 -0
  31. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/hflm.py +0 -0
  32. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/model.py +0 -0
  33. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/pdftext.py +0 -0
  34. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/tessocr.py +0 -0
  35. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/extern/texocr.py +0 -0
  36. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/__init__.py +0 -0
  37. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/common.py +0 -0
  38. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/concurrency.py +0 -0
  39. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/doctectionpipe.py +0 -0
  40. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/language.py +0 -0
  41. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/layout.py +0 -0
  42. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/lm.py +0 -0
  43. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/order.py +0 -0
  44. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/refine.py +0 -0
  45. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/registry.py +0 -0
  46. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/segment.py +0 -0
  47. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/sub_layout.py +0 -0
  48. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/text.py +0 -0
  49. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/pipe/transform.py +0 -0
  50. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/py.typed +0 -0
  51. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/train/__init__.py +0 -0
  52. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/train/d2_frcnn_train.py +0 -0
  53. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/train/hf_detr_train.py +0 -0
  54. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection/train/hf_layoutlm_train.py +0 -0
  55. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection.egg-info/SOURCES.txt +0 -0
  56. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection.egg-info/dependency_links.txt +0 -0
  57. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection.egg-info/requires.txt +0 -0
  58. {deepdoctection-1.2.2 → deepdoctection-1.2.4}/src/deepdoctection.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Repository for Document AI - server/inference core package
5
5
  Author: Dr. Janis Meyer
6
6
  License: Apache License 2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deepdoctection"
7
- version = "1.2.2"
7
+ version = "1.2.4"
8
8
  authors = [
9
9
  {name = "Dr. Janis Meyer"}
10
10
  ]
@@ -12,7 +12,7 @@ from dd_core.utils.env_info import collect_env_info
12
12
  from dd_core.utils.file_utils import _LazyModule
13
13
  from dd_core.utils.logger import LoggingRecord, logger
14
14
 
15
- __version__ = "1.2.2"
15
+ __version__ = "1.2.4"
16
16
  _IMPORT_STRUCTURE = {
17
17
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
18
18
  "eval": [
@@ -45,15 +45,12 @@ class DataPointCacheStore(ABC):
45
45
  """
46
46
 
47
47
  @abstractmethod
48
- def put_datapoint(
49
- self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
50
- ) -> None:
48
+ def put_datapoint(self, document_id: str, page_number: int, image: Image, job_id: str | None = None) -> None:
51
49
  """
52
50
  Persist a datapoint (image) for a specific document and page number.
53
51
 
54
52
  Args:
55
53
  document_id (str): The identifier of the document the image belongs to.
56
- image_id (str): The unique identifier of the image.
57
54
  page_number (int): The 0-based page number inside the document.
58
55
  image (Image): The image object to store (may be serialized by the store).
59
56
  job_id (str | None): Optional job identifier to distinguish between different processing runs.
@@ -137,9 +134,7 @@ class LocalDataPointCacheStore(DataPointCacheStore):
137
134
  return document_id
138
135
  return f"{document_id}::{job_id}"
139
136
 
140
- def put_datapoint(
141
- self, document_id: str, image_id: str, page_number: int, image: Image, job_id: str | None = None
142
- ) -> None:
137
+ def put_datapoint(self, document_id: str, page_number: int, image: Image, job_id: str | None = None) -> None:
143
138
  """
144
139
  Store a serialized version of ``image`` for ``document_id`` at ``page_number``.
145
140
 
@@ -148,8 +143,6 @@ class LocalDataPointCacheStore(DataPointCacheStore):
148
143
 
149
144
  Args:
150
145
  document_id (str): Document identifier the image belongs to.
151
- image_id (str): Image identifier (not directly used by this store but included for API
152
- compatibility with other stores).
153
146
  page_number (int): 0-based page number of the image.
154
147
  image (Image): The Image object to serialize and store.
155
148
  job_id (str | None): Optional job identifier to distinguish between different processing runs.
@@ -251,7 +244,6 @@ class DatapointManager:
251
244
 
252
245
  self._cache_store.put_datapoint(
253
246
  document_id=image.document_id,
254
- image_id=image.image_id,
255
247
  page_number=image.page_number,
256
248
  image=image,
257
249
  job_id=job_id,
@@ -259,6 +259,85 @@ class PipelineComponent(ABC):
259
259
  return MapData(df, self._undo)
260
260
 
261
261
 
262
+ def get_meta_annotation(pipeline_component_list: list[PipelineComponent]) -> MetaAnnotation:
263
+ """
264
+ Collects meta annotations from all pipeline components and summarizes the returned results.
265
+
266
+ Returns:
267
+ Meta annotations with information about image annotations (list), sub categories (dict with category
268
+ names and generated sub categories), relationships (dict with category names and generated relationships)
269
+ as well as summaries (list with sub categories).
270
+ """
271
+ image_annotations: list[ObjectTypes] = []
272
+ sub_categories: dict[ObjectTypes, dict[ObjectTypes, set[ObjectTypes]]] = {}
273
+ relationships = defaultdict(set[ObjectTypes]) # type: ignore
274
+ summaries: list[ObjectTypes] = []
275
+ for component in pipeline_component_list:
276
+ meta_anns = component.get_meta_annotation()
277
+ image_annotations.extend(meta_anns.image_annotations)
278
+ for key, value in meta_anns.sub_categories.items():
279
+ sub_dict = meta_anns.sub_categories[key]
280
+ for sub_cat, sub_cat_value in value.items():
281
+ if sub_cat in sub_dict:
282
+ sub_dict[sub_cat].update(sub_cat_value)
283
+ else:
284
+ sub_dict[sub_cat] = {sub_cat_value} # type: ignore
285
+ if key in sub_categories:
286
+ sub_categories[key].update(sub_dict)
287
+ else:
288
+ sub_categories[key] = sub_dict
289
+ for key, value in meta_anns.relationships.items(): # type: ignore
290
+ relationships[key].update(value)
291
+ summaries.extend(meta_anns.summaries)
292
+ return MetaAnnotation(
293
+ image_annotations=tuple(image_annotations),
294
+ sub_categories=sub_categories,
295
+ relationships=relationships,
296
+ summaries=tuple(summaries),
297
+ )
298
+
299
+
300
+ def get_service_id_to_meta_annotation(pipeline_component_list: list[PipelineComponent]) -> Mapping[str, MetaAnnotation]:
301
+ """
302
+ Collects meta annotations from all pipeline components and return a dict of service id to its meta annotation.
303
+
304
+ Returns:
305
+ `service_id` to `MetaAnnotation` with information about image annotations (list), sub categories (dict with
306
+ category names and generated sub categories), relationships (dict with category names and generated
307
+ relationships) as well as summaries (list with sub categories).
308
+ """
309
+ service_id_to_meta_annotation = {}
310
+ for component in pipeline_component_list:
311
+ meta_anns = component.get_meta_annotation()
312
+ service_id_to_meta_annotation[component.service_id] = meta_anns
313
+ return service_id_to_meta_annotation
314
+
315
+
316
+ def get_pipeline_info(
317
+ pipeline_component_list: list[PipelineComponent], service_id: str | None = None, name: str | None = None
318
+ ) -> Union[str, Mapping[str, str]]:
319
+ """
320
+ Get pipeline information.
321
+
322
+ Returns a dictionary with a description of each pipeline component.
323
+
324
+ Args:
325
+ service_id: Service id of the pipeline component to search for.
326
+ name: Name of the pipeline component to search for.
327
+
328
+ Returns:
329
+ Either a full dictionary with position and name of all pipeline components or the name, if
330
+ the position has been passed or the position if the name has been passed.
331
+ """
332
+ comp_info = {comp.service_id: comp.name for comp in pipeline_component_list}
333
+ comp_info_name_as_key = {value: key for key, value in comp_info.items()}
334
+ if service_id is not None:
335
+ return comp_info[service_id]
336
+ if name is not None:
337
+ return comp_info_name_as_key[name]
338
+ return comp_info
339
+
340
+
262
341
  class Pipeline(ABC):
263
342
  """
264
343
  Abstract base class for creating pipelines.
@@ -394,33 +473,7 @@ class Pipeline(ABC):
394
473
  names and generated sub categories), relationships (dict with category names and generated relationships)
395
474
  as well as summaries (list with sub categories).
396
475
  """
397
- image_annotations: list[ObjectTypes] = []
398
- sub_categories: dict[ObjectTypes, dict[ObjectTypes, set[ObjectTypes]]] = {}
399
- relationships = defaultdict(set[ObjectTypes]) # type: ignore
400
- summaries: list[ObjectTypes] = []
401
- for component in self.pipe_component_list:
402
- meta_anns = component.get_meta_annotation()
403
- image_annotations.extend(meta_anns.image_annotations)
404
- for key, value in meta_anns.sub_categories.items():
405
- sub_dict = meta_anns.sub_categories[key]
406
- for sub_cat, sub_cat_value in value.items():
407
- if sub_cat in sub_dict:
408
- sub_dict[sub_cat].update(sub_cat_value)
409
- else:
410
- sub_dict[sub_cat] = {sub_cat_value} # type: ignore
411
- if key in sub_categories:
412
- sub_categories[key].update(sub_dict)
413
- else:
414
- sub_categories[key] = sub_dict
415
- for key, value in meta_anns.relationships.items(): # type: ignore
416
- relationships[key].update(value)
417
- summaries.extend(meta_anns.summaries)
418
- return MetaAnnotation(
419
- image_annotations=tuple(image_annotations),
420
- sub_categories=dict(sub_categories),
421
- relationships=dict(relationships),
422
- summaries=tuple(summaries),
423
- )
476
+ return get_meta_annotation(self.pipe_component_list)
424
477
 
425
478
  def get_service_id_to_meta_annotation(self) -> Mapping[str, MetaAnnotation]:
426
479
  """
@@ -431,11 +484,7 @@ class Pipeline(ABC):
431
484
  category names and generated sub categories), relationships (dict with category names and generated
432
485
  relationships) as well as summaries (list with sub categories).
433
486
  """
434
- service_id_to_meta_annotation = {}
435
- for component in self.pipe_component_list:
436
- meta_anns = component.get_meta_annotation()
437
- service_id_to_meta_annotation[component.service_id] = meta_anns
438
- return service_id_to_meta_annotation
487
+ return get_service_id_to_meta_annotation(self.pipe_component_list)
439
488
 
440
489
  def get_pipeline_info(
441
490
  self, service_id: Optional[str] = None, name: Optional[str] = None
@@ -453,13 +502,7 @@ class Pipeline(ABC):
453
502
  Either a full dictionary with position and name of all pipeline components or the name, if
454
503
  the position has been passed or the position if the name has been passed.
455
504
  """
456
- comp_info = {comp.service_id: comp.name for comp in self.pipe_component_list}
457
- comp_info_name_as_key = {value: key for key, value in comp_info.items()}
458
- if service_id is not None:
459
- return comp_info[service_id]
460
- if name is not None:
461
- return comp_info_name_as_key[name]
462
- return comp_info
505
+ return get_pipeline_info(self.pipe_component_list, service_id=service_id, name=name)
463
506
 
464
507
  def get_pipeline_component(self, service_id: Optional[str] = None, name: Optional[str] = None) -> PipelineComponent:
465
508
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Repository for Document AI - server/inference core package
5
5
  Author: Dr. Janis Meyer
6
6
  License: Apache License 2.0
File without changes
File without changes