deepdoctection 0.39.1__py3-none-any.whl → 0.39.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.39.1"
28
+ __version__ = "0.39.2"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -40,6 +40,7 @@ cfg.TF.CELL.FILTER = None
40
40
  cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
41
41
  cfg.TF.ITEM.FILTER = None
42
42
 
43
+ cfg.PT.ENFORCE_WEIGHTS = False
43
44
  cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
44
45
  cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
45
46
  cfg.PT.LAYOUT.FILTER = None
@@ -98,7 +98,11 @@ class ServiceFactory:
98
98
  weights = (
99
99
  getattr(config.TF, mode).WEIGHTS
100
100
  if config.LIB == "TF"
101
- else (getattr(config.PT, mode).WEIGHTS if detectron2_available() else getattr(config.PT, mode).WEIGHTS_TS)
101
+ else (
102
+ getattr(config.PT, mode).WEIGHTS
103
+ if detectron2_available() or config.PT.ENFORCE_WEIGHTS
104
+ else getattr(config.PT, mode).WEIGHTS_TS
105
+ )
102
106
  )
103
107
  filter_categories = (
104
108
  getattr(getattr(config.TF, mode), "FILTER")
@@ -407,6 +407,35 @@ class Table(Layout):
407
407
  col_anns = self.base_page.get_annotation(annotation_ids=all_relation_ids, category_names=[LayoutType.COLUMN])
408
408
  return col_anns
409
409
 
410
+ def row(self, row_number: int) -> list[ImageAnnotationBaseView]:
411
+ """
412
+ Get a list of cells in a row.
413
+ """
414
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
415
+ all_cells = self.base_page.get_annotation(
416
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
417
+ )
418
+ row_cells = list(
419
+ filter(lambda c: row_number in (c.row_number, c.row_number + c.row_span - 1), all_cells) # type: ignore
420
+ )
421
+ row_cells.sort(key=lambda c: c.column_number) # type: ignore
422
+ return row_cells # type: ignore
423
+
424
+ def column(self, column_number: int) -> list[ImageAnnotationBaseView]:
425
+ """
426
+ Get a list of cells in a column.
427
+ """
428
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
429
+ all_cells = self.base_page.get_annotation(
430
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
431
+ )
432
+ column_cells = list(
433
+ filter(lambda c: column_number in # type: ignore
434
+ (c.column_number, c.column_number + c.column_span - 1), all_cells) # type: ignore
435
+ )
436
+ column_cells.sort(key=lambda c: c.row_number) # type: ignore
437
+ return column_cells # type: ignore
438
+
410
439
  @property
411
440
  def html(self) -> HTML:
412
441
  """
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
  from abc import ABC, abstractmethod
25
25
  from collections import defaultdict
26
26
  from dataclasses import dataclass, field
27
- from typing import Any, Mapping, Optional, Union, Callable
27
+ from typing import Any, Callable, Mapping, Optional, Union
28
28
  from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
@@ -100,7 +100,7 @@ class PipelineComponent(ABC):
100
100
 
101
101
  :param filter_func: A function that takes an image datapoint and returns a boolean value
102
102
  """
103
- self.filter_func = filter_func # type: ignore
103
+ self.filter_func = filter_func # type: ignore
104
104
 
105
105
  @abstractmethod
106
106
  def serve(self, dp: Image) -> None:
@@ -122,7 +122,6 @@ class PipelineComponent(ABC):
122
122
  if not self.filter_func(dp):
123
123
  self.serve(dp)
124
124
 
125
-
126
125
  def pass_datapoint(self, dp: Image) -> Image:
127
126
  """
128
127
  Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
@@ -362,7 +362,7 @@ class AnnotationNmsService(PipelineComponent):
362
362
  self.threshold = [thresholds for _ in self.nms_pairs]
363
363
  else:
364
364
  assert len(self.nms_pairs) == len(thresholds), "Sequences of nms_pairs and thresholds must have same length"
365
- self.threshold = thresholds # type: ignore
365
+ self.threshold = thresholds # type: ignore
366
366
  if priority:
367
367
  assert len(self.nms_pairs) == len(priority), "Sequences of nms_pairs and priority must have same length"
368
368
 
deepdoctection/pipe/lm.py CHANGED
@@ -265,7 +265,7 @@ class LMSequenceClassifierService(PipelineComponent):
265
265
  padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
266
266
  truncation: bool = True,
267
267
  return_overflowing_tokens: bool = False,
268
- use_other_as_default_category: bool = False
268
+ use_other_as_default_category: bool = False,
269
269
  ) -> None:
270
270
  """
271
271
  :param tokenizer: Tokenizer, typing allows currently anything. This will be changed in the future
@@ -309,11 +309,10 @@ class LMSequenceClassifierService(PipelineComponent):
309
309
  lm_output = None
310
310
  if lm_input is None:
311
311
  if self.use_other_as_default_category:
312
- class_id = self.language_model.categories.get_categories(as_dict=True,
313
- name_as_key=True).get(TokenClasses.OTHER, 1)
314
- lm_output = SequenceClassResult(class_name=TokenClasses.OTHER,
315
- class_id = class_id,
316
- score=-1.)
312
+ class_id = self.language_model.categories.get_categories(as_dict=True, name_as_key=True).get(
313
+ TokenClasses.OTHER, 1
314
+ )
315
+ lm_output = SequenceClassResult(class_name=TokenClasses.OTHER, class_id=class_id, score=-1.0)
317
316
  else:
318
317
  lm_output = self.language_model.predict(**lm_input)
319
318
  if lm_output:
@@ -499,9 +499,7 @@ def train_hf_layoutlm(
499
499
  )
500
500
  pipeline_component_cls = pipeline_component_registry.get(pipeline_component_name)
501
501
  if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
502
- pipeline_component = pipeline_component_cls(tokenizer_fast,
503
- dd_model,
504
- use_other_as_default_category=True)
502
+ pipeline_component = pipeline_component_cls(tokenizer_fast, dd_model, use_other_as_default_category=True)
505
503
  else:
506
504
  pipeline_component = pipeline_component_cls(
507
505
  tokenizer_fast,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39.1
3
+ Version: 0.39.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -1,9 +1,9 @@
1
- deepdoctection/__init__.py,sha256=uDowNayqaYZGYaqnGzPSz6pVuHQhtDVRAN_bvPq85Ko,12754
1
+ deepdoctection/__init__.py,sha256=KpFnC7nCOVQOgeXDWt0fIjKoikD3MRj-_iSAbARIWeQ,12754
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
- deepdoctection/analyzer/_config.py,sha256=OZMOPlyFv4gcyabPG6KO08EYx-0tUH82Ehs9YDv2B1Q,5027
4
+ deepdoctection/analyzer/_config.py,sha256=1rfvVrp7cI2YLzpahD77aa1tZ_KFAIQ21DM1NWhxYiI,5058
5
5
  deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
6
- deepdoctection/analyzer/factory.py,sha256=Kf3Ztv5FEcF5yJf6i4I557aOIUHybuxIP0moHryguTQ,32344
6
+ deepdoctection/analyzer/factory.py,sha256=7L-bJ9957TBn_C6OGWJFmZobrh8MPq4Q-Espx5faEiY,32435
7
7
  deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
8
8
  deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
9
9
  deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -20,7 +20,7 @@ deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ
20
20
  deepdoctection/datapoint/box.py,sha256=UAdSnLexvFyg4KK1u9kXdJxhaWTwRxTU-cnQcvl37Q8,23410
21
21
  deepdoctection/datapoint/convert.py,sha256=gJbHY2V8nlMht1N5VdxTmWSsOeydpFPTJsaJHp6XGgE,7516
22
22
  deepdoctection/datapoint/image.py,sha256=S6yfsIRQgMCl6HYAcHYJSBcbfdYKKtebtkEkkkrXsMQ,33619
23
- deepdoctection/datapoint/view.py,sha256=srMyPQGsK4OSiorxkyG6UAIgpViM6Ks1CI3b5k97cjY,49452
23
+ deepdoctection/datapoint/view.py,sha256=XPyhbBr2cGIKdAiISBVZWxNxlSvN8kmGsD9P0mfpEEE,50772
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
26
26
  deepdoctection/datasets/base.py,sha256=DT4i-d74sIEiUNC6UspIHNJuHSK0t1dBv7qwadg4rLw,22341
@@ -103,13 +103,13 @@ deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_
103
103
  deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
104
104
  deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
105
105
  deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
106
- deepdoctection/pipe/base.py,sha256=F4NusbZ-xYc6wuO-XAngmC8uzahT2ubsu2g9NO8PpVw,15390
107
- deepdoctection/pipe/common.py,sha256=vlWzvwn8wl7baPbK-917HUWujEGJEkHur_-ilkweKjk,17751
106
+ deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
107
+ deepdoctection/pipe/common.py,sha256=C1KxEfJFSPeDqlnkiJ1ZYPuA36P8BU_4jVhdsszW_V8,17752
108
108
  deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
109
  deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
110
110
  deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
111
111
  deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
112
- deepdoctection/pipe/lm.py,sha256=Ygj6MmBvBZ7l4RGCwBuhmMsOM0Ep3LWteNg7bzh-UmI,17703
112
+ deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
113
113
  deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
114
114
  deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
115
115
  deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
@@ -120,7 +120,7 @@ deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3n
120
120
  deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
121
121
  deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
122
122
  deepdoctection/train/hf_detr_train.py,sha256=eHSdI11U8oGy93noZxAISfukhRBElj4dBerJ4Xcercw,10785
123
- deepdoctection/train/hf_layoutlm_train.py,sha256=irSg-IpbVoSlaw1-vZCej2mCZcctONtXr5Z2NQAc_a4,22680
123
+ deepdoctection/train/hf_layoutlm_train.py,sha256=DTPJZYKeDdRtDpcObYh93uh5D4sgT4c0ckHiAknCroY,22568
124
124
  deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
125
125
  deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
126
126
  deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
141
141
  deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
143
  deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.39.1.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.39.1.dist-info/METADATA,sha256=NBN2dqFMUiXkcJ28xJDwyN6eNP-MmFw64F7dm3kUWTA,19741
146
- deepdoctection-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
147
- deepdoctection-0.39.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.39.1.dist-info/RECORD,,
144
+ deepdoctection-0.39.2.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.39.2.dist-info/METADATA,sha256=Z6b0DeMKoXZOR6O7Yxu7Qt5_MLgCxxvyNuWsPAhFo0M,19741
146
+ deepdoctection-0.39.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
147
+ deepdoctection-0.39.2.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.39.2.dist-info/RECORD,,