deepdoctection 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -436,24 +436,24 @@ def segment_table(
436
436
  child_ann_ids = table.get_relationship(Relationships.CHILD)
437
437
  cell_index_rows, row_index, _, _ = match_anns_by_intersection(
438
438
  dp,
439
- item_names[0],
440
- cell_names,
441
- segment_rule,
442
- threshold_rows,
443
- True,
444
- child_ann_ids,
445
- child_ann_ids,
439
+ parent_ann_category_names=item_names[0],
440
+ child_ann_category_names=cell_names,
441
+ matching_rule=segment_rule,
442
+ threshold=threshold_rows,
443
+ use_weighted_intersections=True,
444
+ parent_ann_ids=child_ann_ids,
445
+ child_ann_ids=child_ann_ids,
446
446
  )
447
447
 
448
448
  cell_index_cols, col_index, _, _ = match_anns_by_intersection(
449
449
  dp,
450
- item_names[1],
451
- cell_names,
452
- segment_rule,
453
- threshold_cols,
454
- True,
455
- child_ann_ids,
456
- child_ann_ids,
450
+ parent_ann_category_names=item_names[1],
451
+ child_ann_category_names=cell_names,
452
+ matching_rule=segment_rule,
453
+ threshold=threshold_cols,
454
+ use_weighted_intersections=True,
455
+ parent_ann_ids=child_ann_ids,
456
+ child_ann_ids=child_ann_ids,
457
457
  )
458
458
 
459
459
  cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=cell_names)
@@ -499,7 +499,6 @@ def create_intersection_cells(
499
499
  rows: Sequence[ImageAnnotation],
500
500
  cols: Sequence[ImageAnnotation],
501
501
  table_annotation_id: str,
502
- cell_class_id: int,
503
502
  sub_item_names: Sequence[ObjectTypes],
504
503
  ) -> tuple[Sequence[DetectionResult], Sequence[SegmentationResult]]:
505
504
  """
@@ -509,7 +508,6 @@ def create_intersection_cells(
509
508
  :param rows: list of rows
510
509
  :param cols: list of columns
511
510
  :param table_annotation_id: annotation_id of underlying table ImageAnnotation
512
- :param cell_class_id: The class_id to a synthetically generated DetectionResult
513
511
  :param sub_item_names: ObjectTypes for row-/column number
514
512
  :return: Pair of lists of `DetectionResult` and `SegmentationResult`.
515
513
  """
@@ -526,7 +524,6 @@ def create_intersection_cells(
526
524
  detect_result_cells.append(
527
525
  DetectionResult(
528
526
  box=boxes_cells[idx].to_list(mode="xyxy"),
529
- class_id=cell_class_id,
530
527
  absolute_coords=boxes_cells[idx].absolute_coords,
531
528
  class_name=LayoutType.CELL,
532
529
  )
@@ -574,13 +571,13 @@ def header_cell_to_item_detect_result(
574
571
  child_ann_ids = table.get_relationship(Relationships.CHILD)
575
572
  item_index, _, items, _ = match_anns_by_intersection(
576
573
  dp,
577
- item_header_name,
578
- item_name,
579
- segment_rule,
580
- threshold,
581
- True,
582
- child_ann_ids,
583
- child_ann_ids,
574
+ parent_ann_category_names=item_header_name,
575
+ child_ann_category_names=item_name,
576
+ matching_rule=segment_rule,
577
+ threshold=threshold,
578
+ use_weighted_intersections=True,
579
+ parent_ann_ids=child_ann_ids,
580
+ child_ann_ids=child_ann_ids,
584
581
  )
585
582
  item_headers = []
586
583
  for idx, item in enumerate(items):
@@ -622,24 +619,24 @@ def segment_pubtables(
622
619
  child_ann_ids = table.get_relationship(Relationships.CHILD)
623
620
  cell_index_rows, row_index, _, _ = match_anns_by_intersection(
624
621
  dp,
625
- item_names[0],
626
- spanning_cell_names,
627
- segment_rule,
628
- threshold_rows,
629
- True,
630
- child_ann_ids,
631
- child_ann_ids,
622
+ parent_ann_category_names=item_names[0],
623
+ child_ann_category_names=spanning_cell_names,
624
+ matching_rule=segment_rule,
625
+ threshold=threshold_rows,
626
+ use_weighted_intersections=True,
627
+ parent_ann_ids=child_ann_ids,
628
+ child_ann_ids=child_ann_ids,
632
629
  )
633
630
 
634
631
  cell_index_cols, col_index, _, _ = match_anns_by_intersection(
635
632
  dp,
636
- item_names[1],
637
- spanning_cell_names,
638
- segment_rule,
639
- threshold_cols,
640
- True,
641
- child_ann_ids,
642
- child_ann_ids,
633
+ parent_ann_category_names=item_names[1],
634
+ child_ann_category_names=spanning_cell_names,
635
+ matching_rule=segment_rule,
636
+ threshold=threshold_cols,
637
+ use_weighted_intersections=True,
638
+ parent_ann_ids=child_ann_ids,
639
+ child_ann_ids=child_ann_ids,
643
640
  )
644
641
 
645
642
  spanning_cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=spanning_cell_names)
@@ -976,7 +973,6 @@ class PubtablesSegmentationService(PipelineComponent):
976
973
  tile_table_with_items: bool,
977
974
  remove_iou_threshold_rows: float,
978
975
  remove_iou_threshold_cols: float,
979
- cell_class_id: int,
980
976
  table_name: TypeOrStr,
981
977
  cell_names: Sequence[TypeOrStr],
982
978
  spanning_cell_names: Sequence[TypeOrStr],
@@ -997,7 +993,6 @@ class PubtablesSegmentationService(PipelineComponent):
997
993
  the adjacent row. Will do a similar shifting with columns.
998
994
  :param remove_iou_threshold_rows: iou threshold for removing overlapping rows
999
995
  :param remove_iou_threshold_cols: iou threshold for removing overlapping columns
1000
- :param cell_class_id: 'category_id' for cells to be generated from intersected rows and columns
1001
996
  :param table_name: layout type table
1002
997
  :param cell_names: layout type of cells
1003
998
  :param spanning_cell_names: layout type of spanning cells
@@ -1022,7 +1017,6 @@ class PubtablesSegmentationService(PipelineComponent):
1022
1017
  self.spanning_cell_names = [get_type(cell_name) for cell_name in spanning_cell_names]
1023
1018
  self.remove_iou_threshold_rows = remove_iou_threshold_rows
1024
1019
  self.remove_iou_threshold_cols = remove_iou_threshold_cols
1025
- self.cell_class_id = cell_class_id
1026
1020
  self.cell_to_image = cell_to_image
1027
1021
  self.crop_cell_image = crop_cell_image
1028
1022
  self.item_names = [get_type(item_name) for item_name in item_names] # row names must be before column name
@@ -1089,7 +1083,7 @@ class PubtablesSegmentationService(PipelineComponent):
1089
1083
  rows = dp.get_annotation(category_names=self.item_names[0], annotation_ids=item_ann_ids)
1090
1084
  columns = dp.get_annotation(category_names=self.item_names[1], annotation_ids=item_ann_ids)
1091
1085
  detect_result_cells, segment_result_cells = create_intersection_cells(
1092
- rows, columns, table.annotation_id, self.cell_class_id, self.sub_item_names
1086
+ rows, columns, table.annotation_id, self.sub_item_names
1093
1087
  )
1094
1088
  cell_rn_cn_to_ann_id = {}
1095
1089
  for detect_result, segment_result in zip(detect_result_cells, segment_result_cells):
@@ -1228,7 +1222,6 @@ class PubtablesSegmentationService(PipelineComponent):
1228
1222
  self.tile_table,
1229
1223
  self.remove_iou_threshold_rows,
1230
1224
  self.remove_iou_threshold_cols,
1231
- self.cell_class_id,
1232
1225
  self.table_name,
1233
1226
  self.cell_names,
1234
1227
  self.spanning_cell_names,
@@ -92,7 +92,6 @@ class DetectResultGenerator:
92
92
  detect_result_list.append(
93
93
  DetectionResult(
94
94
  box=[0.0, 0.0, float(self.width), float(self.height)], # type: ignore
95
- class_id=self.categories_name_as_key[category_name],
96
95
  class_name=category_name,
97
96
  score=0.0,
98
97
  absolute_coords=self.absolute_coords,
@@ -123,7 +122,7 @@ class DetectResultGenerator:
123
122
  """
124
123
  sane_detect_results = []
125
124
  for detect_result in detect_result_list:
126
- if detect_result.box:
125
+ if detect_result.box is not None:
127
126
  ulx, uly, lrx, lry = detect_result.box
128
127
  if ulx >= 0 and lrx - ulx >= 0 and uly >= 0 and lry - uly >= 0:
129
128
  sane_detect_results.append(detect_result)
@@ -156,14 +155,13 @@ class SubImageLayoutService(PipelineComponent):
156
155
  detect_result_generator = DetectResultGenerator(categories_items)
157
156
  d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
158
157
  "2": LayoutType.column})
159
- item_component = SubImageLayoutService(d_items, LayoutType.table, {1: 7, 2: 8}, detect_result_generator)
158
+ item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
160
159
  """
161
160
 
162
161
  def __init__(
163
162
  self,
164
163
  sub_image_detector: ObjectDetector,
165
164
  sub_image_names: Union[str, Sequence[TypeOrStr]],
166
- category_id_mapping: Optional[dict[int, int]] = None,
167
165
  detect_result_generator: Optional[DetectResultGenerator] = None,
168
166
  padder: Optional[PadTransform] = None,
169
167
  ):
@@ -186,7 +184,6 @@ class SubImageLayoutService(PipelineComponent):
186
184
  if isinstance(sub_image_names, str)
187
185
  else tuple((get_type(cat) for cat in sub_image_names))
188
186
  )
189
- self.category_id_mapping = category_id_mapping
190
187
  self.detect_result_generator = detect_result_generator
191
188
  self.padder = padder
192
189
  self.predictor = sub_image_detector
@@ -223,11 +220,6 @@ class SubImageLayoutService(PipelineComponent):
223
220
  detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
224
221
 
225
222
  for detect_result in detect_result_list:
226
- if self.category_id_mapping:
227
- if detect_result.class_id:
228
- detect_result.class_id = self.category_id_mapping.get(
229
- detect_result.class_id, detect_result.class_id
230
- )
231
223
  self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
232
224
 
233
225
  def get_meta_annotation(self) -> MetaAnnotation:
@@ -254,7 +246,6 @@ class SubImageLayoutService(PipelineComponent):
254
246
  return self.__class__(
255
247
  predictor,
256
248
  self.sub_image_name,
257
- self.category_id_mapping,
258
249
  self.detect_result_generator,
259
250
  padder_clone,
260
251
  )
@@ -70,7 +70,6 @@ class TextExtractionService(PipelineComponent):
70
70
  text_extract_detector: Union[ObjectDetector, PdfMiner, TextRecognizer],
71
71
  extract_from_roi: Optional[Union[Sequence[TypeOrStr], TypeOrStr]] = None,
72
72
  run_time_ocr_language_selection: bool = False,
73
- skip_if_text_extracted: bool = False,
74
73
  ):
75
74
  """
76
75
  :param text_extract_detector: ObjectDetector
@@ -79,8 +78,6 @@ class TextExtractionService(PipelineComponent):
79
78
  multiple language selections. Also requires that a language detection
80
79
  pipeline component ran before. It will select the expert language OCR
81
80
  model based on the determined language.
82
- :param skip_if_text_extracted: Set to `True` if text has already been extracted in a previous pipeline component
83
- and should not be extracted again. Use-case: A PDF with some scanned images.
84
81
  """
85
82
 
86
83
  if extract_from_roi is None:
@@ -104,11 +101,6 @@ class TextExtractionService(PipelineComponent):
104
101
  raise TypeError("Only TesseractOcrDetector supports multiple languages")
105
102
 
106
103
  self.run_time_ocr_language_selection = run_time_ocr_language_selection
107
- self.skip_if_text_extracted = skip_if_text_extracted
108
- if self.skip_if_text_extracted and isinstance(self.predictor, TextRecognizer):
109
- raise ValueError(
110
- "skip_if_text_extracted=True and TextRecognizer in TextExtractionService is not compatible"
111
- )
112
104
 
113
105
  def serve(self, dp: Image) -> None:
114
106
  maybe_batched_text_rois = self.get_text_rois(dp)
@@ -154,11 +146,6 @@ class TextExtractionService(PipelineComponent):
154
146
  well `get_text_rois` will return an empty list.
155
147
  :return: list of ImageAnnotation or Image
156
148
  """
157
- if self.skip_if_text_extracted:
158
- text_categories = self.predictor.get_category_names()
159
- text_anns = dp.get_annotation(category_names=text_categories)
160
- if text_anns:
161
- return []
162
149
 
163
150
  if self.extract_from_category:
164
151
  if self.predictor.accepts_batch:
@@ -223,7 +210,11 @@ class TextExtractionService(PipelineComponent):
223
210
  predictor = self.predictor.clone()
224
211
  if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
225
212
  raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
226
- return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
213
+ return self.__class__(
214
+ text_extract_detector=predictor,
215
+ extract_from_roi=deepcopy(self.extract_from_category),
216
+ run_time_ocr_language_selection=self.run_time_ocr_language_selection,
217
+ )
227
218
 
228
219
  def clear_predictor(self) -> None:
229
220
  self.predictor.clear_model()
@@ -37,6 +37,8 @@ from ..extern.hfdetr import HFDetrDerivedDetector
37
37
  from ..mapper.hfstruct import DetrDataCollator, image_to_hf_detr_training
38
38
  from ..pipe.base import PipelineComponent
39
39
  from ..pipe.registry import pipeline_component_registry
40
+ from ..utils.error import DependencyError
41
+ from ..utils.file_utils import wandb_available
40
42
  from ..utils.logger import LoggingRecord, logger
41
43
  from ..utils.types import PathLikeOrStr
42
44
  from ..utils.utils import string_to_dict
@@ -56,6 +58,9 @@ with try_import() as hf_import_guard:
56
58
  TrainingArguments,
57
59
  )
58
60
 
61
+ with try_import() as wb_import_guard:
62
+ import wandb
63
+
59
64
 
60
65
  class DetrDerivedTrainer(Trainer):
61
66
  """
@@ -74,16 +79,18 @@ class DetrDerivedTrainer(Trainer):
74
79
  args: TrainingArguments,
75
80
  data_collator: DetrDataCollator,
76
81
  train_dataset: DatasetAdapter,
82
+ eval_dataset: Optional[DatasetBase] = None,
77
83
  ):
78
84
  self.evaluator: Optional[Evaluator] = None
79
85
  self.build_eval_kwargs: Optional[dict[str, Any]] = None
80
- super().__init__(model, args, data_collator, train_dataset)
86
+ super().__init__(model, args, data_collator, train_dataset, eval_dataset=eval_dataset)
81
87
 
82
88
  def setup_evaluator(
83
89
  self,
84
90
  dataset_val: DatasetBase,
85
91
  pipeline_component: PipelineComponent,
86
92
  metric: Union[Type[MetricBase], MetricBase],
93
+ run: Optional[wandb.sdk.wandb_run.Run] = None,
87
94
  **build_eval_kwargs: Union[str, int],
88
95
  ) -> None:
89
96
  """
@@ -93,10 +100,11 @@ class DetrDerivedTrainer(Trainer):
93
100
  :param dataset_val: dataset on which to run evaluation
94
101
  :param pipeline_component: pipeline component to plug into the evaluator
95
102
  :param metric: A metric class
103
+ :param run: WandB run
96
104
  :param build_eval_kwargs:
97
105
  """
98
106
 
99
- self.evaluator = Evaluator(dataset_val, pipeline_component, metric, num_threads=1)
107
+ self.evaluator = Evaluator(dataset_val, pipeline_component, metric, num_threads=1, run=run)
100
108
  assert self.evaluator.pipe_component
101
109
  for comp in self.evaluator.pipe_component.pipe_components:
102
110
  comp.clear_predictor()
@@ -205,12 +213,19 @@ def train_hf_detr(
205
213
  "remove_unused_columns": False,
206
214
  "per_device_train_batch_size": 2,
207
215
  "max_steps": number_samples,
208
- "evaluation_strategy": (
216
+ "eval_strategy": (
209
217
  "steps"
210
- if (dataset_val is not None and metric is not None and pipeline_component_name is not None)
218
+ if (
219
+ dataset_val is not None
220
+ and (metric is not None or metric_name is not None)
221
+ and pipeline_component_name is not None
222
+ )
211
223
  else "no"
212
224
  ),
213
225
  "eval_steps": 5000,
226
+ "use_wandb": False,
227
+ "wandb_project": None,
228
+ "wandb_repo": "deepdoctection",
214
229
  }
215
230
 
216
231
  for conf in config_overwrite:
@@ -224,6 +239,23 @@ def train_hf_detr(
224
239
  pass
225
240
  conf_dict[key] = val
226
241
 
242
+ use_wandb = conf_dict.pop("use_wandb")
243
+ wandb_project = str(conf_dict.pop("wandb_project"))
244
+ wandb_repo = str(conf_dict.pop("wandb_repo"))
245
+
246
+ # Initialize Wandb, if necessary
247
+ run = None
248
+ if use_wandb:
249
+ if not wandb_available():
250
+ raise DependencyError("WandB must be installed separately")
251
+ run = wandb.init(project=wandb_project, config=conf_dict)
252
+ run._label(repo=wandb_repo) # pylint: disable=W0212
253
+ os.environ["WANDB_DISABLED"] = "False"
254
+ os.environ["WANDB_WATCH"] = "True"
255
+ os.environ["WANDB_PROJECT"] = wandb_project
256
+ else:
257
+ os.environ["WANDB_DISABLED"] = "True"
258
+
227
259
  # Will inform about dataloader warnings if max_steps exceeds length of dataset
228
260
  if conf_dict["max_steps"] > number_samples: # type: ignore
229
261
  logger.warning(
@@ -232,7 +264,7 @@ def train_hf_detr(
232
264
  )
233
265
  )
234
266
 
235
- arguments = TrainingArguments(**conf_dict)
267
+ arguments = TrainingArguments(**conf_dict) # pylint: disable=E1123
236
268
  logger.info(LoggingRecord(f"Config: \n {arguments.to_dict()}", arguments.to_dict()))
237
269
 
238
270
  id2label = {int(k) - 1: v for v, k in categories_dict_name_as_key.items()}
@@ -240,6 +272,7 @@ def train_hf_detr(
240
272
  pretrained_model_name_or_path=path_config_json,
241
273
  num_labels=len(id2label),
242
274
  )
275
+ config.use_timm_backbone = True
243
276
 
244
277
  if path_weights != "":
245
278
  model = TableTransformerForObjectDetection.from_pretrained(
@@ -252,9 +285,9 @@ def train_hf_detr(
252
285
  pretrained_model_name_or_path=path_feature_extractor_config_json
253
286
  )
254
287
  data_collator = DetrDataCollator(feature_extractor)
255
- trainer = DetrDerivedTrainer(model, arguments, data_collator, dataset)
288
+ trainer = DetrDerivedTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
256
289
 
257
- if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
290
+ if arguments.eval_strategy in (IntervalStrategy.STEPS,):
258
291
  categories = dataset_val.dataflow.categories.get_categories(filtered=True) # type: ignore
259
292
  detector = HFDetrDerivedDetector(
260
293
  path_config_json, path_weights, path_feature_extractor_config_json, categories # type: ignore
@@ -266,6 +299,6 @@ def train_hf_detr(
266
299
  metric = metric_registry.get(metric_name)
267
300
  assert metric is not None
268
301
 
269
- trainer.setup_evaluator(dataset_val, pipeline_component, metric) # type: ignore
302
+ trainer.setup_evaluator(dataset_val, pipeline_component, metric, run, **build_val_dict) # type: ignore
270
303
 
271
304
  trainer.train()
@@ -395,7 +395,7 @@ def train_hf_layoutlm(
395
395
  "remove_unused_columns": False,
396
396
  "per_device_train_batch_size": 8,
397
397
  "max_steps": number_samples,
398
- "evaluation_strategy": (
398
+ "eval_strategy": (
399
399
  "steps"
400
400
  if (dataset_val is not None and metric is not None and pipeline_component_name is not None)
401
401
  else "no"
@@ -475,7 +475,7 @@ def train_hf_layoutlm(
475
475
  )
476
476
  trainer = LayoutLMTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
477
477
 
478
- if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
478
+ if arguments.eval_strategy in (IntervalStrategy.STEPS,):
479
479
  assert metric is not None # silence mypy
480
480
  if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
481
481
  categories = dataset_val.dataflow.categories.get_categories(filtered=True) # type: ignore
@@ -63,7 +63,7 @@ else:
63
63
 
64
64
 
65
65
  JsonDict = dict[str, Any]
66
-
66
+ BoxCoordinate = Union[int, float]
67
67
 
68
68
  # Some common deepdoctection dict-types
69
69
  AnnotationDict: TypeAlias = dict[str, Any]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.39.6
3
+ Version: 0.40.0
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -133,6 +133,7 @@ Dynamic: description
133
133
  Dynamic: description-content-type
134
134
  Dynamic: home-page
135
135
  Dynamic: license
136
+ Dynamic: license-file
136
137
  Dynamic: provides-extra
137
138
  Dynamic: requires-dist
138
139
  Dynamic: requires-python
@@ -1,9 +1,9 @@
1
- deepdoctection/__init__.py,sha256=F9uc6mjLFOYYGkT5UFs6M5YBxhcJlJ33G3-NSctzSF8,12754
1
+ deepdoctection/__init__.py,sha256=Onsg4vkNNIGYytDmH96KsxYt3xQLxcAbyYHCeOqThR8,12780
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
- deepdoctection/analyzer/_config.py,sha256=1rfvVrp7cI2YLzpahD77aa1tZ_KFAIQ21DM1NWhxYiI,5058
4
+ deepdoctection/analyzer/_config.py,sha256=kxQzDQvl2ygH84VTnumbRF7JLGM6VeJoBzv1xssm6H4,5019
5
5
  deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
6
- deepdoctection/analyzer/factory.py,sha256=7L-bJ9957TBn_C6OGWJFmZobrh8MPq4Q-Espx5faEiY,32435
6
+ deepdoctection/analyzer/factory.py,sha256=sXGL_faLkKCUBfq5YIpmzV5cWuvWChYy-zP5OtdaM4Y,33251
7
7
  deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
8
8
  deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
9
9
  deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -17,16 +17,16 @@ deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP
17
17
  deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
18
18
  deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
19
19
  deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
- deepdoctection/datapoint/box.py,sha256=UAdSnLexvFyg4KK1u9kXdJxhaWTwRxTU-cnQcvl37Q8,23410
21
- deepdoctection/datapoint/convert.py,sha256=gJbHY2V8nlMht1N5VdxTmWSsOeydpFPTJsaJHp6XGgE,7516
22
- deepdoctection/datapoint/image.py,sha256=uGmlgF6zGptvNowZTqf-io4hbd8aFFngAvQqgdEQ5Kw,34040
23
- deepdoctection/datapoint/view.py,sha256=sK6Ta9R6jdOS7iwF05-uPjL2wSz8wHQ5RIGCatw7i2M,50774
20
+ deepdoctection/datapoint/box.py,sha256=XPhC_xHqLZJjzafg1pIS_CxnVB5-0_yk-twsZZ3ncUU,30093
21
+ deepdoctection/datapoint/convert.py,sha256=Be2FvmRXt-5prZ1vwa5fG6VjgEQ_31hiQ13hAoXoaes,7740
22
+ deepdoctection/datapoint/image.py,sha256=_jN46UJUsOi6GC6VEUcp3L_vLL-iYRW05RKcFLWb6Dc,34048
23
+ deepdoctection/datapoint/view.py,sha256=iZiHMc2hkk6vWn87LK0Qf-toZU_kocW3m7Wq8M4IS2E,50782
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
26
26
  deepdoctection/datasets/base.py,sha256=AZx-hw8Mchzb7FiOASt7zCbiybFNsM_diBzKXyC-auU,22618
27
27
  deepdoctection/datasets/dataflow_builder.py,sha256=cYU2zV3gZW2bFvMHimlO9VIl3BAUaCwML08cCIQ8Em4,4107
28
28
  deepdoctection/datasets/info.py,sha256=sC1QCOdLWFMooVmiShZ43sLUpAi3FK4d0fsLyl_9-gA,20548
29
- deepdoctection/datasets/registry.py,sha256=K6ZHCSIHuElOMWWN_KJjicHP-BUhNWBHBSOFbCGuvRg,3388
29
+ deepdoctection/datasets/registry.py,sha256=utiB-PnE6vc5HvjcudO0O4Urp2BC3snqswY6d8uPQAo,3388
30
30
  deepdoctection/datasets/save.py,sha256=Y9508Qqp8gIGN7pbGgVBBnkiC6NdCb9L2YR4wVvEUxM,3350
31
31
  deepdoctection/datasets/instances/__init__.py,sha256=XEc_4vT5lDn6bbZID9ujDEumWu8Ec2W-QS4pI_bfWWE,1388
32
32
  deepdoctection/datasets/instances/doclaynet.py,sha256=Az7USCqF0lMk1n1Dk59uUrBgBNAbKEjtUvZnCgdUH70,12286
@@ -94,7 +94,7 @@ deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhX
94
94
  deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
95
95
  deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
96
96
  deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
97
- deepdoctection/mapper/match.py,sha256=Ed9FsuVPNp_faaW5PKnvUHZoEXcRcrO-muduTMzjp1s,8937
97
+ deepdoctection/mapper/match.py,sha256=RDTYSGtbtT8ph3L83PyHIkezJ2K82MwNerSM72uTMxM,10267
98
98
  deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
99
99
  deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
100
100
  deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
@@ -102,25 +102,25 @@ deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8O
102
102
  deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_rnww,4481
103
103
  deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
104
104
  deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
105
- deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
105
+ deepdoctection/pipe/anngen.py,sha256=7wvp7eghDwrgcIyu1vjRxmVy4SADPbn-k4ud8y2bgjU,15338
106
106
  deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
107
- deepdoctection/pipe/common.py,sha256=C1KxEfJFSPeDqlnkiJ1ZYPuA36P8BU_4jVhdsszW_V8,17752
107
+ deepdoctection/pipe/common.py,sha256=S6-NKvR0sqBfqjN-mH76uVgM_aHOZvhPe_ore36UPZA,21028
108
108
  deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
109
  deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
110
110
  deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
111
- deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
111
+ deepdoctection/pipe/layout.py,sha256=ThULc0b1f9KyaXYk9z0qbuJ0nhIodah9PcrEq2xKpAY,5670
112
112
  deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
113
- deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
113
+ deepdoctection/pipe/order.py,sha256=0KNiMinedjfuDVVHxJSaDL1yl4Sub-miMPcEC4gGwPA,39423
114
114
  deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
115
115
  deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
116
- deepdoctection/pipe/segment.py,sha256=mWYRg7UR80PtIj1SIg_hiujDcCtLlvKJUP9vx4ZpW0Y,59318
117
- deepdoctection/pipe/sub_layout.py,sha256=N1RcID-boORcwsW_j0l64HpUu3rff0ge5qEanudLYgk,13838
118
- deepdoctection/pipe/text.py,sha256=h9q6d3HFOs7LOg-iwdLUPiQxrPqgunBVNmtYMBrfRQE,11180
116
+ deepdoctection/pipe/segment.py,sha256=sny59GuP7dxLGX3YjHF0wllPxSiXL1GNQEhMGKcF8ZU,59594
117
+ deepdoctection/pipe/sub_layout.py,sha256=OLKvCYJynoFpo7bf2b3HzY0k-TJDLc0PHveWKcDbqZI,13324
118
+ deepdoctection/pipe/text.py,sha256=tLlJtneM__WsrAvp4pQFqwNlmq2RLqKqiPXlJ2lkniU,10483
119
119
  deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
120
120
  deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
121
121
  deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
122
- deepdoctection/train/hf_detr_train.py,sha256=eHSdI11U8oGy93noZxAISfukhRBElj4dBerJ4Xcercw,10785
123
- deepdoctection/train/hf_layoutlm_train.py,sha256=DTPJZYKeDdRtDpcObYh93uh5D4sgT4c0ckHiAknCroY,22568
122
+ deepdoctection/train/hf_detr_train.py,sha256=uBkkRyxrJF5UF__KbYvIlmb-HRWQ9TY6LiJr1Rm56kI,12043
123
+ deepdoctection/train/hf_layoutlm_train.py,sha256=8kiGp_8GEyqCkLgeMgCJOLJWSVoKWkUBHsZtDjZOcRk,22556
124
124
  deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
125
125
  deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
126
126
  deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
@@ -138,11 +138,11 @@ deepdoctection/utils/pdf_utils.py,sha256=Fi0eZ2GbnO7N61Rd8b8YRKRff4dalHAzkcn3zpG
138
138
  deepdoctection/utils/settings.py,sha256=hDD6yDX_4pQXwR5ILVwJIj6hb7NXA0-ifnC25ldcUjA,12464
139
139
  deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
140
140
  deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F2GPU,8502
141
- deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
141
+ deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
143
  deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.39.6.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.39.6.dist-info/METADATA,sha256=ezZIfdIjMmVp_60jLqco8OMeHQG_ksT4zEhflFLB5tI,19741
146
- deepdoctection-0.39.6.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
147
- deepdoctection-0.39.6.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.39.6.dist-info/RECORD,,
144
+ deepdoctection-0.40.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.40.0.dist-info/METADATA,sha256=YyPBlJBcUfAQP_cW7Mhq3eNs2-924o4BMS4X6Sn0Xwo,19763
146
+ deepdoctection-0.40.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
147
+ deepdoctection-0.40.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.40.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5