rara-tools 0.7.7__py3-none-any.whl → 0.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/digar_schema_converter.py +29 -4
- {rara_tools-0.7.7.dist-info → rara_tools-0.7.8.dist-info}/METADATA +1 -1
- {rara_tools-0.7.7.dist-info → rara_tools-0.7.8.dist-info}/RECORD +6 -6
- {rara_tools-0.7.7.dist-info → rara_tools-0.7.8.dist-info}/WHEEL +0 -0
- {rara_tools-0.7.7.dist-info → rara_tools-0.7.8.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.7.7.dist-info → rara_tools-0.7.8.dist-info}/top_level.txt +0 -0
|
@@ -241,8 +241,8 @@ class DIGARSchemaConverter:
|
|
|
241
241
|
min_language_ratio: float = 0.2,
|
|
242
242
|
convert_ratio: bool = False
|
|
243
243
|
) -> NoReturn:
|
|
244
|
-
""" Initialize DIGARSchemaConverter object.
|
|
245
|
-
|
|
244
|
+
""" Initialize DIGARSchemaConverter object.
|
|
245
|
+
|
|
246
246
|
Parameters
|
|
247
247
|
----------
|
|
248
248
|
digitizer_output: dict
|
|
@@ -261,7 +261,7 @@ class DIGARSchemaConverter:
|
|
|
261
261
|
be added to the final output.
|
|
262
262
|
convert_ratio: bool
|
|
263
263
|
If enabled, all ratios are converted into percentages.
|
|
264
|
-
|
|
264
|
+
|
|
265
265
|
"""
|
|
266
266
|
self.__digitizer_output: dict = digitizer_output
|
|
267
267
|
self.__min_language_ratio: float = min_language_ratio
|
|
@@ -280,6 +280,7 @@ class DIGARSchemaConverter:
|
|
|
280
280
|
self.__dc_origin: dict = {}
|
|
281
281
|
self.__dc_identifier: List[dict] = []
|
|
282
282
|
self.__doc_id: str = ""
|
|
283
|
+
self.__page_count: int = None
|
|
283
284
|
|
|
284
285
|
self.__doc_schemas = DocSchemas(
|
|
285
286
|
doc_meta=self.doc_meta,
|
|
@@ -303,6 +304,28 @@ class DIGARSchemaConverter:
|
|
|
303
304
|
page_number = _first_segment.get("page")
|
|
304
305
|
return page_number
|
|
305
306
|
|
|
307
|
+
def _add_dummy_pages(self, docs: List[dict]):
|
|
308
|
+
for doc in docs:
|
|
309
|
+
if not doc.get("page"):
|
|
310
|
+
doc["page"] = self.dummy_page
|
|
311
|
+
return docs
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
def dummy_page(self) -> int:
|
|
315
|
+
""" Get page number to add for images,
|
|
316
|
+
if actual page is missing. Currently returns
|
|
317
|
+
a new (non-existing) final page.
|
|
318
|
+
"""
|
|
319
|
+
return self.page_count+1
|
|
320
|
+
|
|
321
|
+
@property
|
|
322
|
+
def page_count(self) -> int:
|
|
323
|
+
""" Returns total page count of the document.
|
|
324
|
+
"""
|
|
325
|
+
if not self.__page_count:
|
|
326
|
+
self.__page_count = self.__digitizer_output.get("doc_meta", {}).get("pages", {}).get("count", 0)
|
|
327
|
+
return self.__page_count
|
|
328
|
+
|
|
306
329
|
@property
|
|
307
330
|
def doc_id(self) -> str:
|
|
308
331
|
""" Retrieves document ID to use for generating
|
|
@@ -327,7 +350,8 @@ class DIGARSchemaConverter:
|
|
|
327
350
|
@property
|
|
328
351
|
def images(self) -> List[dict]:
|
|
329
352
|
if not self.__images:
|
|
330
|
-
|
|
353
|
+
images = self.__digitizer_output.get("images")
|
|
354
|
+
self.__images = self._add_dummy_pages(images)
|
|
331
355
|
return self.__images
|
|
332
356
|
|
|
333
357
|
@property
|
|
@@ -344,6 +368,7 @@ class DIGARSchemaConverter:
|
|
|
344
368
|
mapped[text["start_page"]]["texts"].append(text)
|
|
345
369
|
for img in self.images:
|
|
346
370
|
mapped[img["page"]]["images"].append(img)
|
|
371
|
+
#print(mapped.items())
|
|
347
372
|
|
|
348
373
|
self.__page_mappings = [
|
|
349
374
|
v for k, v in sorted(list(mapped.items()), key=lambda x: x[0])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
rara_tools/converters.py,sha256=a1dEMa0TwcO9UmjuSBkiuc7LGmH0d_dB6wwoTLpdZhI,4040
|
|
2
2
|
rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
|
|
3
|
-
rara_tools/digar_schema_converter.py,sha256=
|
|
3
|
+
rara_tools/digar_schema_converter.py,sha256=usrNwlbN63wTE5U56vbmyzT_SxGLXO6ZF4JwY3Lnkqg,15061
|
|
4
4
|
rara_tools/elastic.py,sha256=4D9yoyMy6AJIKwhSi2H1usffDHAh2A_IZfv5BtYnBKg,13992
|
|
5
5
|
rara_tools/exceptions.py,sha256=YQyaueUbXeTkJYFDEuN6iWTXMI3eCv5l7PxGp87vg5I,550
|
|
6
6
|
rara_tools/s3.py,sha256=9ziDXsLjBtFAvsjTPxFddhfvkpA8773rzPJqO7y1N5Q,6415
|
|
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
|
|
|
39
39
|
rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
|
|
40
40
|
rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
|
|
41
41
|
rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
|
|
42
|
-
rara_tools-0.7.
|
|
43
|
-
rara_tools-0.7.
|
|
44
|
-
rara_tools-0.7.
|
|
45
|
-
rara_tools-0.7.
|
|
46
|
-
rara_tools-0.7.
|
|
42
|
+
rara_tools-0.7.8.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
43
|
+
rara_tools-0.7.8.dist-info/METADATA,sha256=8ZgQyAat-9MyuKDDbfyLhLImB4eBA4WNEkCWXjNKPas,4079
|
|
44
|
+
rara_tools-0.7.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
rara_tools-0.7.8.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
46
|
+
rara_tools-0.7.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|