deepdoctection 0.36__py3-none-any.whl → 0.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -24,7 +24,7 @@ from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = 0.36
27
+ __version__ = "0.37.1"
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -87,6 +87,7 @@ _IMPORT_STRUCTURE = {
87
87
  "convert_b64_to_np_array",
88
88
  "convert_np_array_to_b64",
89
89
  "convert_np_array_to_b64_b",
90
+ "convert_bytes_to_np_array",
90
91
  "convert_pdf_bytes_to_np_array_v2",
91
92
  "box_to_point4",
92
93
  "point4_to_box",
@@ -371,6 +372,7 @@ _IMPORT_STRUCTURE = {
371
372
  "save_config_to_yaml",
372
373
  "config_to_cli_str",
373
374
  "decrypt_pdf_document",
375
+ "decrypt_pdf_document_from_bytes",
374
376
  "get_pdf_file_reader",
375
377
  "get_pdf_file_writer",
376
378
  "PDFStreamer",
@@ -327,9 +327,9 @@ class ServiceFactory:
327
327
  )
328
328
  if config.OCR.USE_TEXTRACT:
329
329
  credentials_kwargs = {
330
- "aws_access_key_id": environ.get("ACCESS_KEY", None),
331
- "aws_secret_access_key": environ.get("SECRET_KEY", None),
332
- "config": Config(region_name=environ.get("REGION", None)),
330
+ "aws_access_key_id": environ.get("AWS_ACCESS_KEY", None),
331
+ "aws_secret_access_key": environ.get("AWS_SECRET_KEY", None),
332
+ "config": Config(region_name=environ.get("AWS_REGION", None)),
333
333
  }
334
334
  return TextractOcrDetector(**credentials_kwargs)
335
335
  raise ValueError("You have set USE_OCR=True but any of USE_TESSERACT, USE_DOCTR, USE_TEXTRACT is set to False")
@@ -593,7 +593,7 @@ class SerializerPdfDoc:
593
593
  file_name = os.path.split(path)[1]
594
594
  prefix, suffix = os.path.splitext(file_name)
595
595
  df: DataFlow
596
- df = CustomDataFromIterable(PDFStreamer(path=path), max_datapoints=max_datapoints)
596
+ df = CustomDataFromIterable(PDFStreamer(path_or_bytes=path), max_datapoints=max_datapoints)
597
597
  df = MapData(
598
598
  df,
599
599
  lambda dp: {
@@ -40,6 +40,7 @@ __all__ = [
40
40
  "convert_b64_to_np_array",
41
41
  "convert_np_array_to_b64",
42
42
  "convert_np_array_to_b64_b",
43
+ "convert_bytes_to_np_array",
43
44
  "convert_pdf_bytes_to_np_array_v2",
44
45
  "box_to_point4",
45
46
  "point4_to_box",
@@ -107,6 +108,16 @@ def convert_np_array_to_b64_b(np_image: PixelValues) -> bytes:
107
108
  return viz_handler.encode(np_image)
108
109
 
109
110
 
111
+ def convert_bytes_to_np_array(image_bytes: bytes) -> PixelValues:
112
+ """
113
+ Converts an image in bytes to a numpy array
114
+
115
+ :param image_bytes: An image as bytes.
116
+ :return: numpy array.
117
+ """
118
+ return viz_handler.convert_bytes_to_np(image_bytes)
119
+
120
+
110
121
  @deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
111
122
  def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -> PixelValues:
112
123
  """
@@ -34,6 +34,7 @@ from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDErr
34
34
  from ..utils.identifier import get_uuid, is_uuid_like
35
35
  from ..utils.settings import ObjectTypes, SummaryType, get_type
36
36
  from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
37
+ from ..utils.logger import LoggingRecord, logger
37
38
  from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
38
39
  from .box import crop_box_from_image, global_to_local_coords, intersection_box
39
40
  from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
@@ -474,8 +475,11 @@ class Image:
474
475
 
475
476
  for service_id in service_ids:
476
477
  if service_id not in service_id_to_annotation_id:
477
- raise ImageError(f"Service id {service_id} not found")
478
- annotation_ids = service_id_to_annotation_id[service_id]
478
+ logger.info(
479
+ LoggingRecord(
480
+ f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."))
481
+
482
+ annotation_ids = service_id_to_annotation_id.get(service_id, [])
479
483
 
480
484
  for ann_id in annotation_ids:
481
485
  if ann_id not in ann_id_to_annotation_maps:
@@ -587,7 +591,7 @@ class Image:
587
591
  )
588
592
  ann.image.dump(sub_image)
589
593
 
590
- def remove_image_from_lower_hierachy(self, pixel_values_only: bool = False) -> None:
594
+ def remove_image_from_lower_hierarchy(self, pixel_values_only: bool = False) -> None:
591
595
  """Will remove all images from image annotations."""
592
596
  for ann in self.annotations:
593
597
  if pixel_values_only:
@@ -717,7 +721,7 @@ class Image:
717
721
  else:
718
722
  path_json = fspath(path) + ".json"
719
723
  if highest_hierarchy_only:
720
- self.remove_image_from_lower_hierachy()
724
+ self.remove_image_from_lower_hierarchy()
721
725
  export_dict = self.as_dict()
722
726
  export_dict["location"] = fspath(export_dict["location"])
723
727
  if not image_to_json:
@@ -747,7 +751,7 @@ class Image:
747
751
  if sub_cat.service_id:
748
752
  service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
749
753
  if ann.image is not None:
750
- for summary_cat_key in ann.image.summary:
754
+ for summary_cat_key in ann.image.summary.sub_categories:
751
755
  summary_cat = ann.get_summary(summary_cat_key)
752
756
  if summary_cat.service_id:
753
757
  service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
@@ -62,7 +62,7 @@ def dataflow_to_json(
62
62
  if highest_hierarchy_only:
63
63
 
64
64
  def _remove_hh(dp: Image) -> Image:
65
- dp.remove_image_from_lower_hierachy()
65
+ dp.remove_image_from_lower_hierarchy()
66
66
  return dp
67
67
 
68
68
  df = MapData(df, _remove_hh)
@@ -69,8 +69,7 @@ class ModelCategories:
69
69
  if self.init_categories:
70
70
  self._init_categories = MappingProxyType({key: get_type(val) for key, val in self.init_categories.items()})
71
71
  else:
72
- if self._init_categories is None:
73
- self._init_categories = MappingProxyType({})
72
+ self._init_categories = MappingProxyType({})
74
73
  self.categories = self._init_categories
75
74
 
76
75
  @overload
@@ -181,7 +180,7 @@ class NerModelCategories(ModelCategories):
181
180
  self._init_categories = self.merge_bio_semantics_categories(
182
181
  self._categories_semantics, self._categories_bio
183
182
  )
184
- super().__post_init__()
183
+ self.categories = self._init_categories
185
184
 
186
185
  @staticmethod
187
186
  def merge_bio_semantics_categories(
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
48
48
  import torch.nn.functional as F
49
49
 
50
50
  with try_import() as tr_import_guard:
51
- from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type: ignore
51
+ from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
52
52
  from transformers import (
53
53
  LayoutLMForSequenceClassification,
54
54
  LayoutLMForTokenClassification,
@@ -27,7 +27,7 @@ from typing import Mapping, Optional, Sequence, Union
27
27
 
28
28
  from lazy_imports import try_import
29
29
 
30
- from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
30
+ from ..datapoint.convert import convert_bytes_to_np_array, convert_pdf_bytes_to_np_array_v2
31
31
  from ..datapoint.image import Image
32
32
  from ..utils.fs import get_load_image_func, load_image_from_file
33
33
  from ..utils.types import JsonDict
@@ -49,6 +49,7 @@ def to_image(dp: Union[str, Mapping[str, Union[str, bytes]]], dpi: Optional[int]
49
49
 
50
50
  file_name: Optional[str]
51
51
  location: Optional[str]
52
+ image_bytes: Optional[bytes] = None
52
53
 
53
54
  if isinstance(dp, str):
54
55
  _, file_name = os.path.split(dp)
@@ -62,6 +63,7 @@ def to_image(dp: Union[str, Mapping[str, Union[str, bytes]]], dpi: Optional[int]
62
63
  document_id = dp.get("document_id")
63
64
  if location == "":
64
65
  location = str(dp.get("path", ""))
66
+ image_bytes = dp.get("image_bytes")
65
67
  else:
66
68
  raise TypeError("datapoint not of expected type for converting to image")
67
69
 
@@ -76,6 +78,8 @@ def to_image(dp: Union[str, Mapping[str, Union[str, bytes]]], dpi: Optional[int]
76
78
  if dp_image.pdf_bytes is not None:
77
79
  if isinstance(dp_image.pdf_bytes, bytes):
78
80
  dp_image.image = convert_pdf_bytes_to_np_array_v2(dp_image.pdf_bytes, dpi=dpi)
81
+ elif image_bytes is not None:
82
+ dp_image.image = convert_bytes_to_np_array(image_bytes)
79
83
  else:
80
84
  dp_image.image = load_image_from_file(location)
81
85
 
@@ -29,6 +29,7 @@ from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
31
31
  from ..datapoint.image import Image
32
+ from ..mapper.misc import curry
32
33
  from ..utils.context import timed_operation
33
34
  from ..utils.identifier import get_uuid_from_str
34
35
  from ..utils.settings import ObjectTypes
@@ -247,17 +248,24 @@ class Pipeline(ABC):
247
248
  """
248
249
  raise NotImplementedError()
249
250
 
250
- def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
251
+ @staticmethod
252
+ @curry
253
+ def _undo(dp: Image, service_ids: Optional[list[str]] = None) -> Image:
251
254
  """
252
- Composition of the backbone
255
+ Remove annotations from a datapoint
253
256
  """
254
- if session_id is None and self.set_session_id:
255
- session_id = self.get_session_id()
256
- for component in self.pipe_component_list:
257
- component.timer_on = True
258
- component.dp_manager.session_id = session_id
259
- df = component.predict_dataflow(df)
260
- return df
257
+ dp.remove(service_ids=service_ids)
258
+ return dp
259
+
260
+ def undo(self, df: DataFlow, service_ids: Optional[set[str]] = None) -> DataFlow:
261
+ """
262
+ Mapping a datapoint via `_undo` within a dataflow pipeline
263
+
264
+ :param df: An input dataflow of Images
265
+ :param service_ids: A set of service ids to remove
266
+ :return: A output dataflow of Images
267
+ """
268
+ return MapData(df, self._undo(service_ids=service_ids))
261
269
 
262
270
  @abstractmethod
263
271
  def analyze(self, **kwargs: Any) -> DataFlow:
@@ -273,6 +281,18 @@ class Pipeline(ABC):
273
281
  """
274
282
  raise NotImplementedError()
275
283
 
284
+ def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
285
+ """
286
+ Composition of the backbone
287
+ """
288
+ if session_id is None and self.set_session_id:
289
+ session_id = self.get_session_id()
290
+ for component in self.pipe_component_list:
291
+ component.timer_on = True
292
+ component.dp_manager.session_id = session_id
293
+ df = component.predict_dataflow(df)
294
+ return df
295
+
276
296
  def get_meta_annotation(self) -> MetaAnnotation:
277
297
  """
278
298
  Collects meta annotations from all pipeline components and summarizes the returned results
@@ -23,31 +23,38 @@ import os
23
23
  from pathlib import Path
24
24
  from typing import List, Mapping, Optional, Sequence, Tuple, Union
25
25
 
26
- from ..dataflow import DataFlow, MapData
26
+ from ..dataflow import CustomDataFromIterable, DataFlow, DataFromList, MapData
27
27
  from ..dataflow.custom_serialize import SerializerFiles, SerializerPdfDoc
28
28
  from ..datapoint.image import Image
29
29
  from ..datapoint.view import IMAGE_DEFAULTS
30
30
  from ..mapper.maputils import curry
31
31
  from ..mapper.misc import to_image
32
32
  from ..utils.fs import maybe_path_or_pdf
33
+ from ..utils.identifier import get_uuid_from_str
33
34
  from ..utils.logger import LoggingRecord, logger
35
+ from ..utils.pdf_utils import PDFStreamer
34
36
  from ..utils.types import PathLikeOrStr
37
+ from ..utils.utils import is_file_extension
35
38
  from .base import Pipeline, PipelineComponent
36
39
  from .common import PageParsingService
37
40
 
38
41
 
39
42
  def _collect_from_kwargs(
40
- **kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
41
- ) -> Tuple[Optional[str], Optional[str], bool, int, str, DataFlow]:
43
+ **kwargs: Union[Optional[str], bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
44
+ ) -> Tuple[Optional[str], Union[str, Sequence[str]], bool, int, str, DataFlow, Optional[bytes]]:
45
+ b_bytes = kwargs.get("bytes")
42
46
  dataset_dataflow = kwargs.get("dataset_dataflow")
43
47
  path = kwargs.get("path")
44
48
  if path is None and dataset_dataflow is None:
45
49
  raise ValueError("Pass either path or dataset_dataflow as argument")
50
+ if path is None and b_bytes:
51
+ raise ValueError("When passing bytes, a path to the source document must be provided")
46
52
 
47
53
  shuffle = kwargs.get("shuffle", False)
48
54
  if not isinstance(shuffle, bool):
49
55
  raise TypeError(f"shuffle must be of type bool but is of type {type(shuffle)}")
50
56
 
57
+ file_type = None
51
58
  doc_path = None
52
59
  if path:
53
60
  if not isinstance(path, (str, Path)):
@@ -56,15 +63,27 @@ def _collect_from_kwargs(
56
63
  if path_type == 2:
57
64
  doc_path = path
58
65
  path = None
66
+ file_type = ".pdf"
67
+ elif path_type == 3:
68
+ if is_file_extension(path, ".jpg"):
69
+ file_type = ".jpg"
70
+ if is_file_extension(path, ".png"):
71
+ file_type = ".png"
72
+ if is_file_extension(path, ".jpeg"):
73
+ file_type = ".jpeg"
74
+ if not b_bytes:
75
+ raise ValueError("When passing a path to a single image, bytes of the image must be passed")
59
76
  elif not path_type:
60
77
  raise ValueError("Pass only a path to a directory or to a pdf file")
61
78
 
62
- file_type = kwargs.get("file_type", [".jpg", ".png", ".tif"])
79
+ file_type = kwargs.get(
80
+ "file_type", [".jpg", ".png", ".jpeg", ".tif"] if file_type is None else file_type # type: ignore
81
+ )
63
82
 
64
83
  max_datapoints = kwargs.get("max_datapoints")
65
84
  if not isinstance(max_datapoints, (int, type(None))):
66
85
  raise TypeError(f"max_datapoints must be of type int, but is of type {type(max_datapoints)}")
67
- return path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow # type: ignore
86
+ return path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow, b_bytes # type: ignore
68
87
 
69
88
 
70
89
  @curry
@@ -142,12 +161,18 @@ class DoctectionPipe(Pipeline):
142
161
 
143
162
  super().__init__(pipeline_component_list)
144
163
 
145
- def _entry(self, **kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) -> DataFlow:
146
- path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow = _collect_from_kwargs(**kwargs)
164
+ def _entry(self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) \
165
+ -> DataFlow:
166
+ path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow, b_bytes = _collect_from_kwargs(**kwargs)
147
167
 
148
168
  df: DataFlow
149
169
 
150
- if isinstance(path, (str, Path)):
170
+ if isinstance(b_bytes, bytes):
171
+ df = DoctectionPipe.bytes_to_dataflow(path=doc_path if path is None else path,
172
+ b_bytes=b_bytes,
173
+ file_type=file_type)
174
+
175
+ elif isinstance(path, (str, Path)):
151
176
  if not isinstance(file_type, (str, list)):
152
177
  raise TypeError(f"file_type must be of type string or list, but is of type {type(file_type)}")
153
178
  df = DoctectionPipe.path_to_dataflow(path=path, file_type=file_type, shuffle=shuffle)
@@ -162,7 +187,7 @@ class DoctectionPipe(Pipeline):
162
187
 
163
188
  df = MapData(df, _proto_process(path, doc_path))
164
189
  if dataset_dataflow is None:
165
- df = MapData(df, _to_image(dpi=300)) # pylint: disable=E1120
190
+ df = MapData(df, _to_image(dpi=os.environ.get("DPI", 300))) # pylint: disable=E1120
166
191
  return df
167
192
 
168
193
  @staticmethod
@@ -197,6 +222,44 @@ class DoctectionPipe(Pipeline):
197
222
  """
198
223
  return _doc_to_dataflow(path, max_datapoints)
199
224
 
225
+ @staticmethod
226
+ def bytes_to_dataflow(
227
+ path: str, b_bytes: bytes, file_type: Union[str, Sequence[str]], max_datapoints: Optional[int] = None
228
+ ) -> DataFlow:
229
+ """
230
+ Converts a bytes object to a dataflow
231
+
232
+ :param path: path to directory or an image file
233
+ :param b_bytes: bytes object
234
+ :param file_type: e.g. ".pdf", ".jpg" or [".jpg", ".png", ".jpeg", ".tif"]
235
+ :param max_datapoints: max number of datapoints to consider
236
+ :return: DataFlow
237
+ """
238
+
239
+ file_name = os.path.split(path)[1]
240
+ if isinstance(file_type, str):
241
+ if file_type == ".pdf":
242
+ prefix, suffix = os.path.splitext(file_name)
243
+ df: DataFlow
244
+ df = CustomDataFromIterable(PDFStreamer(path_or_bytes=b_bytes), max_datapoints=max_datapoints)
245
+ df = MapData(
246
+ df,
247
+ lambda dp: {
248
+ "path": path,
249
+ "file_name": prefix + f"_{dp[1]}" + suffix,
250
+ "pdf_bytes": dp[0],
251
+ "page_number": dp[1],
252
+ "document_id": get_uuid_from_str(prefix),
253
+ },
254
+ )
255
+ else:
256
+ df = DataFromList(lst=[{"path": path, "file_name": file_name, "image_bytes": b_bytes}])
257
+ return df
258
+ raise ValueError(
259
+ f"pass: {path}, b_bytes: {b_bytes!r}, file_type: {file_type} and max_datapoints: {max_datapoints} "
260
+ f"not supported"
261
+ )
262
+
200
263
  def dataflow_to_page(self, df: DataFlow) -> DataFlow:
201
264
  """
202
265
  Converts a dataflow of images to a dataflow of pages
@@ -206,7 +269,9 @@ class DoctectionPipe(Pipeline):
206
269
  """
207
270
  return self.page_parser.predict_dataflow(df)
208
271
 
209
- def analyze(self, **kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) -> DataFlow:
272
+ def analyze(
273
+ self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
274
+ ) -> DataFlow:
210
275
  """
211
276
  `kwargs key dataset_dataflow:` Transfer a dataflow of a dataset via its dataflow builder
212
277
 
@@ -215,6 +280,8 @@ class DoctectionPipe(Pipeline):
215
280
  only the first page is processed through the pipeline.
216
281
  Alternatively, a path to a pdf document with multiple pages.
217
282
 
283
+ `kwargs key bytes:` A bytes object of an image
284
+
218
285
  `kwargs key file_type:` Selection of the file type, if: args:`file_type` is passed
219
286
 
220
287
  `kwargs key max_datapoints:` Stops processing as soon as max_datapoints images have been processed
@@ -227,20 +227,21 @@ def get_load_image_func(
227
227
 
228
228
  def maybe_path_or_pdf(path: PathLikeOrStr) -> int:
229
229
  """
230
- Checks if the path points to a directory or a pdf document. Returns 1 if the path points to a directory, 2
231
- if the path points to a pdf doc or 0, if none of the previous is true.
230
+ Checks if the path points to a directory, a pdf document or a single image. Returns 1 if the path points to a
231
+ directory, 2 if the path points to a pdf doc and 3 if path points to either a PNG, JPG or JPEG or 0 if none of the
232
+ previous is true.
232
233
 
233
234
  :param path: A path
234
- :return: A value of 0,1,2
235
+ :return: A value of 0,1,2,3
235
236
  """
236
237
 
237
- is_dir = os.path.isdir(path)
238
- if is_dir:
238
+ if os.path.isdir(path):
239
239
  return 1
240
240
  file_name = os.path.split(path)[1]
241
- is_pdf = is_file_extension(file_name, ".pdf")
242
- if is_pdf:
241
+ if is_file_extension(file_name, ".pdf"):
243
242
  return 2
243
+ if is_file_extension(file_name, [".png", ".jpeg", ".jpg", ".tif"]):
244
+ return 3
244
245
  return 0
245
246
 
246
247
 
@@ -26,7 +26,7 @@ from errno import ENOENT
26
26
  from io import BytesIO
27
27
  from pathlib import Path
28
28
  from shutil import copyfile
29
- from typing import Generator, Literal, Optional
29
+ from typing import Generator, Literal, Optional, Union
30
30
 
31
31
  from lazy_imports import try_import
32
32
  from numpy import uint8
@@ -46,6 +46,7 @@ with try_import() as pt_import_guard:
46
46
 
47
47
  __all__ = [
48
48
  "decrypt_pdf_document",
49
+ "decrypt_pdf_document_from_bytes",
49
50
  "get_pdf_file_reader",
50
51
  "get_pdf_file_writer",
51
52
  "PDFStreamer",
@@ -68,7 +69,6 @@ def decrypt_pdf_document(path: PathLikeOrStr) -> bool:
68
69
  :param path: A path to the pdf file
69
70
  :return: True if document has been successfully decrypted
70
71
  """
71
-
72
72
  if qpdf_available():
73
73
  path_base, file_name = os.path.split(path)
74
74
  file_name_tmp = os.path.splitext(file_name)[0] + "tmp.pdf"
@@ -86,41 +86,69 @@ def decrypt_pdf_document(path: PathLikeOrStr) -> bool:
86
86
  return False
87
87
 
88
88
 
89
- def get_pdf_file_reader(path: PathLikeOrStr) -> PdfReader:
89
+ def decrypt_pdf_document_from_bytes(input_bytes: bytes) -> bytes:
90
+ """
91
+ Decrypting a pdf given as bytes. Under the hood, it saves the bytes to a temporary file and then calls
92
+
93
+ qpdf: <http://qpdf.sourceforge.net/>
94
+
95
+ :param input_bytes: A bytes object representing the pdf file
96
+ :return: The decrypted bytes object
97
+ """
98
+ with save_tmp_file(input_bytes, "pdf_") as (_, input_file_name):
99
+ is_decrypted = decrypt_pdf_document(input_file_name)
100
+ if is_decrypted:
101
+ with open(input_file_name, "rb") as file:
102
+ return file.read()
103
+ else:
104
+ logger.error(LoggingRecord("pdf bytes cannot be decrypted and therefore cannot be processed further."))
105
+ sys.exit()
106
+
107
+
108
+ def get_pdf_file_reader(path_or_bytes: Union[PathLikeOrStr, bytes]) -> PdfReader:
90
109
  """
91
110
  Creates a file reader object from a pdf document. Will try to decrypt the document if it is
92
111
  encrypted. (See `decrypt_pdf_document` to understand what is meant with "decrypt").
93
112
 
94
- :param path: A path to a pdf document
113
+ :param path_or_bytes: A path to a pdf document
95
114
  :return: A file reader object from which you can iterate through the document.
96
115
  """
97
116
 
98
- if not os.path.isfile(path):
99
- raise FileNotFoundError(str(path))
100
- file_name = os.path.split(path)[1]
117
+ if isinstance(path_or_bytes, bytes):
118
+ try:
119
+ reader = PdfReader(BytesIO(path_or_bytes))
120
+ except (errors.PdfReadError, AttributeError):
121
+ decrypted_bytes = decrypt_pdf_document_from_bytes(path_or_bytes)
122
+ reader = PdfReader(BytesIO(decrypted_bytes))
123
+ return reader
124
+
125
+ if not os.path.isfile(path_or_bytes):
126
+ raise FileNotFoundError(str(path_or_bytes))
127
+ file_name = os.path.split(path_or_bytes)[1]
101
128
  if not is_file_extension(file_name, ".pdf"):
102
129
  raise FileExtensionError(f"must be a pdf file: {file_name}")
103
130
 
104
- with open(path, "rb") as file:
131
+ with open(path_or_bytes, "rb") as file:
105
132
  qpdf_called = False
106
133
  try:
107
- input_pdf_as_bytes = PdfReader(file)
134
+ reader = PdfReader(file)
108
135
  except (errors.PdfReadError, AttributeError):
109
- _ = decrypt_pdf_document(path)
136
+ _ = decrypt_pdf_document(path_or_bytes)
110
137
  qpdf_called = True
111
138
 
112
139
  if not qpdf_called:
113
- if input_pdf_as_bytes.is_encrypted:
114
- is_decrypted = decrypt_pdf_document(path)
140
+ if reader.is_encrypted:
141
+ is_decrypted = decrypt_pdf_document(path_or_bytes)
115
142
  if not is_decrypted:
116
143
  logger.error(
117
144
  LoggingRecord(
118
- f"pdf document {path} cannot be decrypted and therefore cannot be " f"processed further."
145
+ f"pdf document {path_or_bytes} cannot be decrypted and therefore cannot "
146
+ f"be processed further."
119
147
  )
120
148
  )
121
149
  sys.exit()
122
150
 
123
- return PdfReader(os.fspath(path))
151
+ return PdfReader(os.fspath(path_or_bytes))
124
152
 
125
153
 
126
154
  def get_pdf_file_writer() -> PdfWriter:
@@ -157,11 +185,11 @@ class PDFStreamer:
157
185
 
158
186
  """
159
187
 
160
- def __init__(self, path: PathLikeOrStr) -> None:
188
+ def __init__(self, path_or_bytes: Union[PathLikeOrStr, bytes]) -> None:
161
189
  """
162
- :param path: to a pdf.
190
+ :param path_or_bytes: to a pdf.
163
191
  """
164
- self.file_reader = get_pdf_file_reader(path)
192
+ self.file_reader = get_pdf_file_reader(path_or_bytes)
165
193
  self.file_writer = PdfWriter()
166
194
 
167
195
  def __len__(self) -> int:
@@ -312,6 +312,7 @@ class VizPackageHandler:
312
312
  "interactive_imshow": "_cv2_interactive_imshow",
313
313
  "encode": "_cv2_encode",
314
314
  "rotate_image": "_cv2_rotate_image",
315
+ "convert_bytes_to_np": "_cv2_convert_bytes_to_np",
315
316
  },
316
317
  "pillow": {
317
318
  "read_image": "_pillow_read_image",
@@ -325,6 +326,7 @@ class VizPackageHandler:
325
326
  "interactive_imshow": "_pillow_interactive_imshow",
326
327
  "encode": "_pillow_encode",
327
328
  "rotate_image": "_pillow_rotate_image",
329
+ "convert_bytes_to_np": "_pillow_convert_bytes_to_np",
328
330
  },
329
331
  }
330
332
 
@@ -484,6 +486,37 @@ class VizPackageHandler:
484
486
  pil_image = Image.open(im_file)
485
487
  return np.array(pil_image)[:, :, ::-1]
486
488
 
489
+ def convert_bytes_to_np(self, image_bytes: bytes) -> PixelValues:
490
+ """Converting an image as bytes into np.array
491
+
492
+ :param image_bytes: Image as np.array
493
+ """
494
+ return getattr(self, self.pkg_func_dict["convert_bytes_to_np"])(image_bytes)
495
+
496
+ @staticmethod
497
+ def _cv2_convert_bytes_to_np(image_bytes: bytes) -> PixelValues:
498
+ """
499
+ Convert image bytes to a numpy array using OpenCV.
500
+
501
+ :param image_bytes: Image bytes
502
+ :return: Image as numpy array
503
+ """
504
+ np_array = np.frombuffer(image_bytes, np.uint8)
505
+ np_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
506
+ return np_image
507
+
508
+ @staticmethod
509
+ def _pillow_convert_bytes_to_np(image_bytes: bytes) -> PixelValues:
510
+ """
511
+ Convert image bytes to a numpy array using Pillow.
512
+
513
+ :param image_bytes: Image bytes
514
+ :return: Image as numpy array
515
+ """
516
+ image = Image.open(BytesIO(image_bytes))
517
+ np_image = np.array(image)
518
+ return np_image
519
+
487
520
  def resize(self, image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
488
521
  """
489
522
  Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.36
3
+ Version: 0.37.1
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -1,9 +1,9 @@
1
- deepdoctection/__init__.py,sha256=fNUbaFAlK1JUXgPCmTu2UOLUMqW4HIgkaW4uOUYjYYg,12571
1
+ deepdoctection/__init__.py,sha256=i23UZBqMlkcvUILJxvUQAdj-3d2yV9edzxFsC5RoMHA,12655
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
4
  deepdoctection/analyzer/_config.py,sha256=NZl_REM8Ge2xfxvHN-mZR5KURcHfZii3xfMlKQwckbA,4864
5
5
  deepdoctection/analyzer/dd.py,sha256=DUOhOtwipHw5nabYqn3WGR9aZcgP0ma_bi_tjf9xscw,5973
6
- deepdoctection/analyzer/factory.py,sha256=xmo5F9X7I6lp0ZWJv8QavpMyG8UWYLvMi4qogsZV1_s,31507
6
+ deepdoctection/analyzer/factory.py,sha256=dEUOtdBS3yQGLqMqLR_kq5EYCR3IE30DjHNzE0spoQE,31519
7
7
  deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
8
8
  deepdoctection/configs/conf_dd_one.yaml,sha256=td7XsyVhdXkhh5Pie7sT_WNjGTaxBOWgpxhkobHd1H0,2325
9
9
  deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -11,15 +11,15 @@ deepdoctection/dataflow/__init__.py,sha256=CWRHMpmJaPk4xY_oIIFubCt-z11SguWrMWxHZ
11
11
  deepdoctection/dataflow/base.py,sha256=z4DCComSj5wStEPjtk0093cNNGfUMiDqx8dqz36nS_o,6221
12
12
  deepdoctection/dataflow/common.py,sha256=MyGA2VWlNMjQdIN_Jd-o0Ec3bDJmjQit4Nv0v43OCSQ,10119
13
13
  deepdoctection/dataflow/custom.py,sha256=3CK_1oL9p6nbOq8WtH5_vQUo70_8Z8pXY7kG0OFqzug,6803
14
- deepdoctection/dataflow/custom_serialize.py,sha256=CKeyw2Ayq_qAl0O5BoKkIOFJgteCt78h9QFTI23XhmQ,22818
14
+ deepdoctection/dataflow/custom_serialize.py,sha256=WocuiYo2gkih5Z9lWAoIIfUewwYSDOhHzG7ZZjKlUic,22827
15
15
  deepdoctection/dataflow/parallel_map.py,sha256=8FhxJBWV-kjJrJ27jQtP3yYF6Ev6rz98worO60oi96c,15837
16
16
  deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP2QYSsgs,4555
17
17
  deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
18
18
  deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
19
19
  deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
20
  deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
21
- deepdoctection/datapoint/convert.py,sha256=Gw2IjNiEotPu1yuMZqrIYB0mCAwafKt-VgMnrHj6S7U,6808
22
- deepdoctection/datapoint/image.py,sha256=EvZlVwJjMAcL1z8RNPBvZ8fwdJvkGuGpcFxCP1y26Go,33045
21
+ deepdoctection/datapoint/convert.py,sha256=O7920pIomyEkzXwxpFsrzfhn7Pl6UzVGhNzv90VcuKU,7099
22
+ deepdoctection/datapoint/image.py,sha256=AM34br9eM1syTIUXcJIrAaP7pEnejbUl-w-CK5pr9z8,33233
23
23
  deepdoctection/datapoint/view.py,sha256=1rVMuqucCrI5zlwyXMADJQBV38V_zSNFqFyBi3cMA1E,44914
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
@@ -27,7 +27,7 @@ deepdoctection/datasets/base.py,sha256=DT4i-d74sIEiUNC6UspIHNJuHSK0t1dBv7qwadg4r
27
27
  deepdoctection/datasets/dataflow_builder.py,sha256=cYU2zV3gZW2bFvMHimlO9VIl3BAUaCwML08cCIQ8Em4,4107
28
28
  deepdoctection/datasets/info.py,sha256=6y5TfiUhQppynbMFP5JmUPk95ggsVCtGIw4dYh2lVus,20501
29
29
  deepdoctection/datasets/registry.py,sha256=ZjzVzjsCgNXJuZZZtR98_yKocADmh4EBGV5JqJbGjWk,2543
30
- deepdoctection/datasets/save.py,sha256=khYQ4t94FOu9RWMimP9E4kASq25f61SIow78NHaX1pg,3349
30
+ deepdoctection/datasets/save.py,sha256=Y9508Qqp8gIGN7pbGgVBBnkiC6NdCb9L2YR4wVvEUxM,3350
31
31
  deepdoctection/datasets/instances/__init__.py,sha256=XEc_4vT5lDn6bbZID9ujDEumWu8Ec2W-QS4pI_bfWWE,1388
32
32
  deepdoctection/datasets/instances/doclaynet.py,sha256=wRZT7wMTilZBLZ1gKY2cWReD1EGT735vOOTy0pD0N6M,12038
33
33
  deepdoctection/datasets/instances/fintabnet.py,sha256=qYzFK1dWF6MEPkHamP255DvAzlQT_GnkvDe1aM7CgjA,12006
@@ -50,13 +50,13 @@ deepdoctection/eval/registry.py,sha256=v4mp-s67vBVRu1nQzuGlYPViQnMSeIXEcF_WmvfUC
50
50
  deepdoctection/eval/tedsmetric.py,sha256=rKw-734Y9CpBtIfkBSPQF2vAZxnIdWrI9Zc723P7RxI,9529
51
51
  deepdoctection/eval/tp_eval_callback.py,sha256=SXsXumoyxq-MIH9Cep5eUOwnNshMbKmC6mYOGwCg0pM,5283
52
52
  deepdoctection/extern/__init__.py,sha256=9Iks9b4Q_LynjcV167TVCoK8YsQRUcA2jjmAmDNA_X8,1056
53
- deepdoctection/extern/base.py,sha256=ajzFzD9BrFwnly4SziN8PadI-PBOzzVRlIGPm_sNllE,24142
53
+ deepdoctection/extern/base.py,sha256=ONPgappl_P5HSwQr42FatuRnwMTvUPecPsCztDTN0Hw,24108
54
54
  deepdoctection/extern/d2detect.py,sha256=zrKv1yurApnjD7QZIZk_8LYCahjmN82MQUjHjv8zvkQ,22127
55
55
  deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1SA,3077
56
56
  deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
57
57
  deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
58
58
  deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0oM,12055
59
- deepdoctection/extern/hflayoutlm.py,sha256=KfoWx9_Rpa1Y2L51HLrYvenfWaTB4SVTmVJH00Cqb-s,56510
59
+ deepdoctection/extern/hflayoutlm.py,sha256=_OUeQsbNgfjbV7TPYBjkqc4HoTBQqkOINnwpewPJpl8,56494
60
60
  deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
61
61
  deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
62
62
  deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
@@ -95,7 +95,7 @@ deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC1
95
95
  deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
96
96
  deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
97
97
  deepdoctection/mapper/match.py,sha256=pCWZpz2R8JahiKXCw7dxKRTLiPgJXeVDgkddDPLy_c0,9643
98
- deepdoctection/mapper/misc.py,sha256=rCqHOcsCfVPXs36AWK0rZ2kk0CUM3yXV370_zyIGBJ4,6518
98
+ deepdoctection/mapper/misc.py,sha256=NLSSgk066Tkrrdi075HkqV7cP-iqT9fv_MtyAJ-8gOg,6743
99
99
  deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
100
100
  deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
101
101
  deepdoctection/mapper/pubstruct.py,sha256=YxsrZ-E0pD45Mm_VCPQB9yEgHsTPkw4htt-3DwCRX1k,23361
@@ -103,10 +103,10 @@ deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_
103
103
  deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
104
104
  deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
105
105
  deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
106
- deepdoctection/pipe/base.py,sha256=Davjkf3D837y9AIITcx7yXdebmVaz6Moyw_5Wi3nfmg,13561
106
+ deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
107
107
  deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
108
108
  deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
- deepdoctection/pipe/doctectionpipe.py,sha256=I6B6HT_BG2ByQ3Rjsui3-Ct31yLmodx-iuZnujXaiSc,8953
109
+ deepdoctection/pipe/doctectionpipe.py,sha256=uhsrSuwaHcOMj8b8i6wCpPaZlSxCTaeHVhMokJ8vRSI,11835
110
110
  deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
111
111
  deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
112
112
  deepdoctection/pipe/lm.py,sha256=Sp-b7smeslNDyioEfNjuNBUxAuFKn3-OKpCZkGXri_c,16643
@@ -129,20 +129,20 @@ deepdoctection/utils/develop.py,sha256=4HyTarkFbJwctL-Hgu1TU_LSJppHvaroDbcyHsxhI
129
129
  deepdoctection/utils/env_info.py,sha256=TnCA-LOTj4WIHd9yvn1AaoPWsLmPgc42l-BJmGV6zmM,19147
130
130
  deepdoctection/utils/error.py,sha256=_3q9VepKfEhsM3H033_Fu0hwBzMSjsWALsjyJbGAZr8,2367
131
131
  deepdoctection/utils/file_utils.py,sha256=IRElrcND0YEiU1QELw5hfXeNA39uE2_nyzh9-X7YcxI,19477
132
- deepdoctection/utils/fs.py,sha256=C4ktrzjoVtX9kgycv5YrEigDI9byi65b6_D0aKsGM4Y,10161
132
+ deepdoctection/utils/fs.py,sha256=x842BxUP5bbjJ2cofw-g4dKJv4QAaGzda4qnAazabO4,10281
133
133
  deepdoctection/utils/identifier.py,sha256=QkNaGGqPynHwDPnd3_m8iur4Cv64rcQa7qolCE7Qphk,2159
134
134
  deepdoctection/utils/logger.py,sha256=J0OVKiXP_2A82MWbbJoOeMEJ-75aZu5npgaS_yI6mVA,10003
135
135
  deepdoctection/utils/metacfg.py,sha256=hD76KQ_RnD_5B02qLI2Zxf3WfnsnXhEI_KUTKpw91RI,5711
136
136
  deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
137
- deepdoctection/utils/pdf_utils.py,sha256=OAQjE9xHVNcDsFqAvX47Lu-mgmoMpVXqIf5pOK8AwxY,11595
137
+ deepdoctection/utils/pdf_utils.py,sha256=G0m8kUn2HwwyZWH_BcrDkm-m3MP9GN9SWHj5VhB7swY,12845
138
138
  deepdoctection/utils/settings.py,sha256=k6OyuWbj-IPeaO9zT9RZ-5Yad1wNhWGYqGLZdtgXAZY,12464
139
139
  deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
140
140
  deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F2GPU,8502
141
141
  deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
- deepdoctection/utils/viz.py,sha256=Mok1d0V7NwlhAvO1S1Iq5YitKpVmOfH_XHTSlRelCB0,25902
144
- deepdoctection-0.36.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.36.dist-info/METADATA,sha256=E-zXgx0bTdSqbd88D_abscR_poEJaKJGIwlv2RFbQs8,19543
146
- deepdoctection-0.36.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
- deepdoctection-0.36.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.36.dist-info/RECORD,,
143
+ deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
+ deepdoctection-0.37.1.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.37.1.dist-info/METADATA,sha256=M-HjpJpxuM4tHN0ld8DscsZPgKRUoNmsbx9slFkj6tg,19545
146
+ deepdoctection-0.37.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
+ deepdoctection-0.37.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.37.1.dist-info/RECORD,,