rara-tools 0.7.17__tar.gz → 0.7.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.7.17/rara_tools.egg-info → rara_tools-0.7.19}/PKG-INFO +1 -1
- rara_tools-0.7.19/VERSION +1 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/digar_schema_converter.py +8 -4
- {rara_tools-0.7.17 → rara_tools-0.7.19/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_digar_schema_converter.py +3 -3
- rara_tools-0.7.17/VERSION +0 -1
- {rara_tools-0.7.17 → rara_tools-0.7.19}/LICENSE.md +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/README.md +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/pyproject.toml +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/language_evaluator.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/linker.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/meta_extractor.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/normalizers.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/parsers.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/constants/subject_indexer.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/converters.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/core_formatters/core_formatter.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/core_formatters/formatted_keyword.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/core_formatters/formatted_meta.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/core_formatters/formatted_object.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/decorators.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/elastic.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/__init__.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/authorities.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/base.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/bibs.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/reader.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/normalizers/viaf.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/base_record.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/ems_record.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/organization_record.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/person_record.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/title_record.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/tools/marc_converter.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/tools/validators.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/s3.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/utils.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/requirements.txt +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/setup.cfg +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_elastic.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_entity_normalizers.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_formatters.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_marc_parsers.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_normalization.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_utils.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_validators.py +0 -0
- {rara_tools-0.7.17 → rara_tools-0.7.19}/tests/test_viaf_client.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.7.19
|
|
@@ -77,7 +77,7 @@ class PageSchema:
|
|
|
77
77
|
self.__schema = {
|
|
78
78
|
"@type": "CreativeWork", # CONSTANT for pages
|
|
79
79
|
"@id": self.page_id,
|
|
80
|
-
"hasPart": []
|
|
80
|
+
"dcterms:hasPart": []
|
|
81
81
|
}
|
|
82
82
|
text_schemas = [
|
|
83
83
|
TextPageSchema(page).schema
|
|
@@ -91,7 +91,7 @@ class PageSchema:
|
|
|
91
91
|
page_schemas = text_schemas + image_schemas
|
|
92
92
|
page_schemas_with_ids = self._add_segment_ids(page_schemas)
|
|
93
93
|
|
|
94
|
-
self.__schema["hasPart"].extend(page_schemas_with_ids)
|
|
94
|
+
self.__schema["dcterms:hasPart"].extend(page_schemas_with_ids)
|
|
95
95
|
|
|
96
96
|
return self.__schema
|
|
97
97
|
|
|
@@ -237,6 +237,7 @@ class DIGARSchemaConverter:
|
|
|
237
237
|
generated_id: str,
|
|
238
238
|
sierra_id: str = "",
|
|
239
239
|
permalink: str = "",
|
|
240
|
+
hasPart_uri_prefix: str = None,
|
|
240
241
|
generated_id_type: str = "CustomID",
|
|
241
242
|
min_language_ratio: float = 0.2,
|
|
242
243
|
convert_ratio: bool = False
|
|
@@ -253,6 +254,8 @@ class DIGARSchemaConverter:
|
|
|
253
254
|
Document's corresponding Sierra ID.
|
|
254
255
|
permalink: str
|
|
255
256
|
Permanent link, where the document can be accessed.
|
|
257
|
+
hasPart_uri_prefix: str
|
|
258
|
+
Optional URI prefix for hasPart @ids.
|
|
256
259
|
generated_id_type: str
|
|
257
260
|
Method / type of generated ID (e.g. 'UUID')
|
|
258
261
|
min_language_ratio: float
|
|
@@ -269,6 +272,7 @@ class DIGARSchemaConverter:
|
|
|
269
272
|
self.__sierra_id: str = sierra_id
|
|
270
273
|
self.__generated_id: str = generated_id
|
|
271
274
|
self.__permalink: str = permalink.removesuffix("/")
|
|
275
|
+
self.__hasPart_uri_prefix = hasPart_uri_prefix.removesuffix("/") if hasPart_uri_prefix else None
|
|
272
276
|
self.__generated_id_type: str = generated_id_type
|
|
273
277
|
self.__texts: List[dict] = []
|
|
274
278
|
self.__images: List[dict] = []
|
|
@@ -368,7 +372,6 @@ class DIGARSchemaConverter:
|
|
|
368
372
|
mapped[text["start_page"]]["texts"].append(text)
|
|
369
373
|
for img in self.images:
|
|
370
374
|
mapped[img["page"]]["images"].append(img)
|
|
371
|
-
#print(mapped.items())
|
|
372
375
|
|
|
373
376
|
self.__page_mappings = [
|
|
374
377
|
v for k, v in sorted(list(mapped.items()), key=lambda x: x[0])
|
|
@@ -378,13 +381,14 @@ class DIGARSchemaConverter:
|
|
|
378
381
|
@property
|
|
379
382
|
def dcterms_haspart(self) -> dict:
|
|
380
383
|
if not self.__dcterms_haspart:
|
|
384
|
+
|
|
381
385
|
self.__dcterms_haspart = {
|
|
382
386
|
"dcterms:hasPart": [
|
|
383
387
|
PageSchema(
|
|
384
388
|
page_texts=page["texts"],
|
|
385
389
|
page_images=page["images"],
|
|
386
390
|
page_number=self._get_page_number(page),
|
|
387
|
-
doc_id=self.doc_id
|
|
391
|
+
doc_id=self.__hasPart_uri_prefix if self.__hasPart_uri_prefix else self.doc_id
|
|
388
392
|
).schema
|
|
389
393
|
for page in self.page_mappings
|
|
390
394
|
]
|
|
@@ -64,7 +64,7 @@ def test_digar_schema_id_generation():
|
|
|
64
64
|
|
|
65
65
|
#If permalink is given, this should be used as base ID
|
|
66
66
|
digar_schema = converter.digar_schema
|
|
67
|
-
first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
|
|
67
|
+
first_segment_id = digar_schema["dcterms:hasPart"][0]["dcterms:hasPart"][0]["@id"]
|
|
68
68
|
|
|
69
69
|
assert first_segment_id.startswith(TEST_PERMALINK)
|
|
70
70
|
|
|
@@ -76,7 +76,7 @@ def test_digar_schema_id_generation():
|
|
|
76
76
|
|
|
77
77
|
#If permalink is NOT given, Sierra ID should be used as base ID
|
|
78
78
|
digar_schema = converter.digar_schema
|
|
79
|
-
first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
|
|
79
|
+
first_segment_id = digar_schema["dcterms:hasPart"][0]["dcterms:hasPart"][0]["@id"]
|
|
80
80
|
assert first_segment_id.startswith(TEST_SIERRA_ID)
|
|
81
81
|
|
|
82
82
|
|
|
@@ -87,7 +87,7 @@ def test_digar_schema_id_generation():
|
|
|
87
87
|
|
|
88
88
|
#If neiter permalink nor Sierra ID is given, generated ID should be used as base ID
|
|
89
89
|
digar_schema = converter.digar_schema
|
|
90
|
-
first_segment_id = digar_schema["dcterms:hasPart"][0]["hasPart"][0]["@id"]
|
|
90
|
+
first_segment_id = digar_schema["dcterms:hasPart"][0]["dcterms:hasPart"][0]["@id"]
|
|
91
91
|
assert first_segment_id.startswith(TEST_GENERATED_ID)
|
|
92
92
|
|
|
93
93
|
|
rara_tools-0.7.17/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.7.17
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_parsers/organization_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.17 → rara_tools-0.7.19}/rara_tools/parsers/marc_records/organization_record.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|