rara-tools 0.7.7__py3-none-any.whl → 0.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

@@ -241,8 +241,8 @@ class DIGARSchemaConverter:
241
241
  min_language_ratio: float = 0.2,
242
242
  convert_ratio: bool = False
243
243
  ) -> NoReturn:
244
- """ Initialize DIGARSchemaConverter object.
245
-
244
+ """ Initialize DIGARSchemaConverter object.
245
+
246
246
  Parameters
247
247
  ----------
248
248
  digitizer_output: dict
@@ -261,7 +261,7 @@ class DIGARSchemaConverter:
261
261
  be added to the final output.
262
262
  convert_ratio: bool
263
263
  If enabled, all ratios are converted into percentages.
264
-
264
+
265
265
  """
266
266
  self.__digitizer_output: dict = digitizer_output
267
267
  self.__min_language_ratio: float = min_language_ratio
@@ -280,6 +280,7 @@ class DIGARSchemaConverter:
280
280
  self.__dc_origin: dict = {}
281
281
  self.__dc_identifier: List[dict] = []
282
282
  self.__doc_id: str = ""
283
+ self.__page_count: int = None
283
284
 
284
285
  self.__doc_schemas = DocSchemas(
285
286
  doc_meta=self.doc_meta,
@@ -303,6 +304,28 @@ class DIGARSchemaConverter:
303
304
  page_number = _first_segment.get("page")
304
305
  return page_number
305
306
 
307
+ def _add_dummy_pages(self, docs: List[dict]):
308
+ for doc in docs:
309
+ if not doc.get("page"):
310
+ doc["page"] = self.dummy_page
311
+ return docs
312
+
313
+ @property
314
+ def dummy_page(self) -> int:
315
+ """ Get page number to add for images,
316
+ if actual page is missing. Currently returns
317
+ a new (non-existing) final page.
318
+ """
319
+ return self.page_count+1
320
+
321
+ @property
322
+ def page_count(self) -> int:
323
+ """ Returns total page count of the document.
324
+ """
325
+ if not self.__page_count:
326
+ self.__page_count = self.__digitizer_output.get("doc_meta", {}).get("pages", {}).get("count", 0)
327
+ return self.__page_count
328
+
306
329
  @property
307
330
  def doc_id(self) -> str:
308
331
  """ Retrieves document ID to use for generating
@@ -327,7 +350,8 @@ class DIGARSchemaConverter:
327
350
  @property
328
351
  def images(self) -> List[dict]:
329
352
  if not self.__images:
330
- self.__images = self.__digitizer_output.get("images")
353
+ images = self.__digitizer_output.get("images")
354
+ self.__images = self._add_dummy_pages(images)
331
355
  return self.__images
332
356
 
333
357
  @property
@@ -344,6 +368,7 @@ class DIGARSchemaConverter:
344
368
  mapped[text["start_page"]]["texts"].append(text)
345
369
  for img in self.images:
346
370
  mapped[img["page"]]["images"].append(img)
371
+ #print(mapped.items())
347
372
 
348
373
  self.__page_mappings = [
349
374
  v for k, v in sorted(list(mapped.items()), key=lambda x: x[0])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.7
3
+ Version: 0.7.8
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -1,6 +1,6 @@
1
1
  rara_tools/converters.py,sha256=a1dEMa0TwcO9UmjuSBkiuc7LGmH0d_dB6wwoTLpdZhI,4040
2
2
  rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
3
- rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
3
+ rara_tools/digar_schema_converter.py,sha256=usrNwlbN63wTE5U56vbmyzT_SxGLXO6ZF4JwY3Lnkqg,15061
4
4
  rara_tools/elastic.py,sha256=4D9yoyMy6AJIKwhSi2H1usffDHAh2A_IZfv5BtYnBKg,13992
5
5
  rara_tools/exceptions.py,sha256=YQyaueUbXeTkJYFDEuN6iWTXMI3eCv5l7PxGp87vg5I,550
6
6
  rara_tools/s3.py,sha256=9ziDXsLjBtFAvsjTPxFddhfvkpA8773rzPJqO7y1N5Q,6415
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
39
39
  rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
40
40
  rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
41
41
  rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
42
- rara_tools-0.7.7.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
43
- rara_tools-0.7.7.dist-info/METADATA,sha256=R_vgYTI7PaEkSiFz6XqC6CDmQNOskzLsKnPvktZi4BY,4079
44
- rara_tools-0.7.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
- rara_tools-0.7.7.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
46
- rara_tools-0.7.7.dist-info/RECORD,,
42
+ rara_tools-0.7.8.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
43
+ rara_tools-0.7.8.dist-info/METADATA,sha256=8ZgQyAat-9MyuKDDbfyLhLImB4eBA4WNEkCWXjNKPas,4079
44
+ rara_tools-0.7.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
+ rara_tools-0.7.8.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
46
+ rara_tools-0.7.8.dist-info/RECORD,,