PyPI - rara-tools - Versions diffs - 0.7.7__tar.gz → 0.7.8__tar.gz - Mend

rara-tools 0.7.7tar.gz → 0.7.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rara-tools might be problematic. Click here for more details.

Files changed (67) hide show

{rara_tools-0.7.7/rara_tools.egg-info → rara_tools-0.7.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.7.7
+Version: 0.7.8
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

rara_tools-0.7.8/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.7.8

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/digar_schema_converter.py RENAMED Viewed

@@ -241,8 +241,8 @@ class DIGARSchemaConverter:
             min_language_ratio: float = 0.2,
             convert_ratio: bool = False
     ) -> NoReturn:
-        """ Initialize DIGARSchemaConverter object.
+        """ Initialize DIGARSchemaConverter object.
         Parameters
         ----------
         digitizer_output: dict
@@ -261,7 +261,7 @@ class DIGARSchemaConverter:
             be added to the final output.
         convert_ratio: bool
             If enabled, all ratios are converted into percentages.
         """
         self.__digitizer_output: dict = digitizer_output
         self.__min_language_ratio: float = min_language_ratio
@@ -280,6 +280,7 @@ class DIGARSchemaConverter:
         self.__dc_origin: dict = {}
         self.__dc_identifier: List[dict] = []
         self.__doc_id: str = ""
+        self.__page_count: int = None
         self.__doc_schemas = DocSchemas(
             doc_meta=self.doc_meta,
@@ -303,6 +304,28 @@ class DIGARSchemaConverter:
             page_number = _first_segment.get("page")
         return page_number
+    def _add_dummy_pages(self, docs: List[dict]):
+        for doc in docs:
+            if not doc.get("page"):
+                doc["page"] = self.dummy_page
+        return docs
+    @property
+    def dummy_page(self) -> int:
+        """ Get page number to add for images,
+        if actual page is missing. Currently returns
+        a new (non-existing) final page.
+        """
+        return self.page_count+1
+    @property
+    def page_count(self) -> int:
+        """ Returns total page count of the document.
+        """
+        if not self.__page_count:
+            self.__page_count = self.__digitizer_output.get("doc_meta", {}).get("pages", {}).get("count", 0)
+        return self.__page_count
     @property
     def doc_id(self) -> str:
         """ Retrieves document ID to use for generating
@@ -327,7 +350,8 @@ class DIGARSchemaConverter:
     @property
     def images(self) -> List[dict]:
         if not self.__images:
-            self.__images = self.__digitizer_output.get("images")
+            images = self.__digitizer_output.get("images")
+            self.__images = self._add_dummy_pages(images)
         return self.__images
     @property
@@ -344,6 +368,7 @@ class DIGARSchemaConverter:
                 mapped[text["start_page"]]["texts"].append(text)
             for img in self.images:
                 mapped[img["page"]]["images"].append(img)
+            #print(mapped.items())
             self.__page_mappings = [
                 v for k, v in sorted(list(mapped.items()), key=lambda x: x[0])

{rara_tools-0.7.7 → rara_tools-0.7.8/rara_tools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.7.7
+Version: 0.7.8
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_digar_schema_converter.py RENAMED Viewed

@@ -12,11 +12,19 @@ def load_json(file_path: str):
 TEST_DIGITIZER_OUTPUT_FILE = os.path.join(".", "tests", "test_data", "b1267058_test_digitizer_output.json")
+TEST_DIGITIZER_OUTPUT_FILE_2 = os.path.join(".", "tests", "test_data", "b5493797_test_digitizer_output_shortened_empty_image_pages.json")
 TEST_DIGITIZER_OUTPUT = load_json(TEST_DIGITIZER_OUTPUT_FILE)
+TEST_DIGITIZER_OUTPUT_2 = load_json(TEST_DIGITIZER_OUTPUT_FILE_2)
 TEST_SIERRA_ID = "b1267058"
 TEST_GENERATED_ID = "hsasaHSAHHGDhb"
 TEST_PERMALINK = "https://www.digar.ee/b1267058"
+TEST_SIERRA_ID_2 = "b5493797"
+TEST_GENERATED_ID_2 = "mzmudhju38hd3ndlk"
+TEST_PERMALINK_2 = "https://www.digar.ee/b5493797"
 def test_digar_schema_converstion_default():
     converter = DIGARSchemaConverter(
         digitizer_output=TEST_DIGITIZER_OUTPUT,
@@ -24,25 +32,25 @@ def test_digar_schema_converstion_default():
         generated_id=TEST_GENERATED_ID
     )
     digar_schema = converter.digar_schema
     # check that all neseccary fields are present
     assert "dc:language" in digar_schema
     assert "dcterms:provenance" in digar_schema
     assert "dc:identifier" in digar_schema
     assert "dcterms:hasPart" in digar_schema
     assert "dcterms:conformsTo" in digar_schema
     languages = [lang.get("value") for lang in digar_schema.get("dc:language")]
     # check that languages are converted into ISO-693-2
     for lang in languages:
         assert len(lang) == 3
     # check that ratio is converted into percentage
     text_quality = digar_schema.get("dcterms:conformsTo")[0].get("value")
     assert isinstance(text_quality, str)
 def test_digar_schema_id_generation():
     """ Tests ID generation logic.
     """
@@ -51,38 +59,38 @@ def test_digar_schema_id_generation():
         sierra_id=TEST_SIERRA_ID,
         generated_id=TEST_GENERATED_ID,
         permalink=TEST_PERMALINK
     )
     #If permalink is given, this should be used as base ID
     digar_schema = converter.digar_schema
     first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
     assert first_segment_id.startswith(TEST_PERMALINK)
     converter = DIGARSchemaConverter(
         digitizer_output=TEST_DIGITIZER_OUTPUT,
         sierra_id=TEST_SIERRA_ID,
         generated_id=TEST_GENERATED_ID
     )
     #If permalink is NOT given, Sierra ID should be used as base ID
     digar_schema = converter.digar_schema
     first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
     assert first_segment_id.startswith(TEST_SIERRA_ID)
     converter = DIGARSchemaConverter(
         digitizer_output=TEST_DIGITIZER_OUTPUT,
         generated_id=TEST_GENERATED_ID
     )
     #If neiter permalink nor Sierra ID is given, generated ID should be used as base ID
     digar_schema = converter.digar_schema
     first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
     assert first_segment_id.startswith(TEST_GENERATED_ID)
 def test_restricting_languages_with_ratio():
     """ Checks that param `min_language_ratio` influences
     the number of output languages.
@@ -93,41 +101,64 @@ def test_restricting_languages_with_ratio():
         generated_id=TEST_GENERATED_ID,
         permalink=TEST_PERMALINK,
         min_language_ratio=0
     )
     #If permalink is given, this should be used as base ID
     digar_schema = converter.digar_schema
     languages = [lang.get("value") for lang in digar_schema.get("dc:language")]
     assert len(languages) == 7
     converter = DIGARSchemaConverter(
         digitizer_output=TEST_DIGITIZER_OUTPUT,
         sierra_id=TEST_SIERRA_ID,
         generated_id=TEST_GENERATED_ID,
         permalink=TEST_PERMALINK,
         min_language_ratio=0.02
     )
     #If permalink is given, this should be used as base ID
     digar_schema = converter.digar_schema
     languages = [lang.get("value") for lang in digar_schema.get("dc:language")]
     assert len(languages) == 2
     converter = DIGARSchemaConverter(
         digitizer_output=TEST_DIGITIZER_OUTPUT,
         sierra_id=TEST_SIERRA_ID,
         generated_id=TEST_GENERATED_ID,
         permalink=TEST_PERMALINK,
         min_language_ratio=0.5
     )
     #If permalink is given, this should be used as base ID
     digar_schema = converter.digar_schema
     languages = [lang.get("value") for lang in digar_schema.get("dc:language")]
     assert len(languages) == 1
+def test_digar_schema_converstion_with_missing_image_pages():
+    converter = DIGARSchemaConverter(
+        digitizer_output=TEST_DIGITIZER_OUTPUT_2,
+        sierra_id=TEST_SIERRA_ID_2,
+        generated_id=TEST_GENERATED_ID_2
+    )
+    digar_schema = converter.digar_schema
+    # check that all neseccary fields are present
+    assert "dc:language" in digar_schema
+    assert "dcterms:provenance" in digar_schema
+    assert "dc:identifier" in digar_schema
+    assert "dcterms:hasPart" in digar_schema
+    assert "dcterms:conformsTo" in digar_schema
+    languages = [lang.get("value") for lang in digar_schema.get("dc:language")]
+    # check that languages are converted into ISO-693-2
+    for lang in languages:
+        assert len(lang) == 3
+    # check that ratio is converted into percentage
+    text_quality = digar_schema.get("dcterms:conformsTo")[0].get("value")
+    assert isinstance(text_quality, str)

rara_tools-0.7.7/VERSION DELETED Viewed

	@@ -1 +0,0 @@
1	- 0.7.7

{rara_tools-0.7.7 → rara_tools-0.7.8}/LICENSE.md RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/README.md RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/pyproject.toml RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/__init__.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/digitizer.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/general.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/language_evaluator.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/linker.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/meta_extractor.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/normalizers.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/parsers.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/constants/subject_indexer.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/converters.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/core_formatters/core_formatter.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/core_formatters/formatted_keyword.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/core_formatters/formatted_meta.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/core_formatters/formatted_object.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/decorators.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/elastic.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/exceptions.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/__init__.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/authorities.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/base.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/bibs.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/reader.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/normalizers/viaf.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/base_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/ems_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/location_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/organization_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/person_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_parsers/title_parser.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_records/base_record.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_records/ems_record.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_records/organization_record.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_records/person_record.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/marc_records/title_record.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/tools/entity_normalizers.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/tools/marc_converter.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/parsers/tools/russian_transliterator.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/s3.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/task_reporter.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools/utils.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools.egg-info/requires.txt RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/rara_tools.egg-info/top_level.txt RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/requirements.txt RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/setup.cfg RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_elastic.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_elastic_vector_and_search_operations.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_entity_normalizers.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_formatters.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_marc_parsers.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_normalization.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_s3_exceptions.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_s3_file_operations.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_sierra_converters.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_task_reporter.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_utils.py RENAMED Viewed

File without changes

{rara_tools-0.7.7 → rara_tools-0.7.8}/tests/test_viaf_client.py RENAMED Viewed

File without changes

rara-tools 0.7.7__tar.gz → 0.7.8__tar.gz

Potentially problematic release.

rara-tools 0.7.7tar.gz → 0.7.8tar.gz