rara-tools 0.6.11__tar.gz → 0.6.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.6.11/rara_tools.egg-info → rara_tools-0.6.13}/PKG-INFO +1 -1
- rara_tools-0.6.13/VERSION +1 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/base_record.py +12 -4
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/organization_record.py +45 -6
- {rara_tools-0.6.11 → rara_tools-0.6.13/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_marc_parsers.py +23 -2
- rara_tools-0.6.11/VERSION +0 -1
- {rara_tools-0.6.11 → rara_tools-0.6.13}/LICENSE.md +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/README.md +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/pyproject.toml +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/language_evaluator.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/linker.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/meta_extractor.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/normalizers.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/parsers.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/constants/subject_indexer.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/converters.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/decorators.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/elastic.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/normalizers/__init__.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/normalizers/authorities.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/normalizers/base.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/normalizers/bibs.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/normalizers/viaf.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/ems_record.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/person_record.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/title_record.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/tools/marc_converter.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/s3.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/utils.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/requirements.txt +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/setup.cfg +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_elastic.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_entity_normalizers.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_normalization.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_utils.py +0 -0
- {rara_tools-0.6.11 → rara_tools-0.6.13}/tests/test_viaf_client.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.6.13
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from typing import List, NoReturn, Tuple
|
|
2
2
|
from abc import abstractmethod
|
|
3
3
|
from pymarc.record import Record
|
|
4
|
+
from pymarc.marcjson import JSONHandler
|
|
4
5
|
from rara_tools.constants.parsers import GeneralMarcIDs
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class BaseRecord:
|
|
8
9
|
""" Implements general logic of parsing MARC files.
|
|
9
10
|
"""
|
|
10
|
-
def __init__(self, record: Record, add_variations: bool = False) -> NoReturn:
|
|
11
|
+
def __init__(self, record: Record | dict, add_variations: bool = False) -> NoReturn:
|
|
11
12
|
""" Initializes BaseRecord object.
|
|
12
13
|
|
|
13
14
|
Parameters
|
|
@@ -21,8 +22,8 @@ class BaseRecord:
|
|
|
21
22
|
via rara-norm-linker, it is necessary to enable this.
|
|
22
23
|
"""
|
|
23
24
|
self.add_variations: bool = add_variations
|
|
24
|
-
self.
|
|
25
|
-
self.__record_dict: dict =
|
|
25
|
+
self.__record_marc: Record = self._get_record_marc(record)
|
|
26
|
+
self.__record_dict: dict = self.marc_record.as_dict()["fields"]
|
|
26
27
|
|
|
27
28
|
self.__id_field_id: List[str] = GeneralMarcIDs.ID
|
|
28
29
|
self.__id_source_field_id: List[str] = GeneralMarcIDs.ID_SOURCE
|
|
@@ -30,6 +31,13 @@ class BaseRecord:
|
|
|
30
31
|
self.__identifier: str = ""
|
|
31
32
|
self.__identifier_source: str = ""
|
|
32
33
|
|
|
34
|
+
def _get_record_marc(self, record: Record | dict) -> Record:
|
|
35
|
+
""" Converts dict-type records into pymarc.Record objects.
|
|
36
|
+
"""
|
|
37
|
+
if isinstance(record, dict):
|
|
38
|
+
record = JSONHandler().elements([record])[0]
|
|
39
|
+
return record
|
|
40
|
+
|
|
33
41
|
def get_values(self,
|
|
34
42
|
marc_ids: List[str],
|
|
35
43
|
subfield_id: str | List[str] = "",
|
|
@@ -101,7 +109,7 @@ class BaseRecord:
|
|
|
101
109
|
|
|
102
110
|
@property
|
|
103
111
|
def marc_record(self) -> Record:
|
|
104
|
-
return self.
|
|
112
|
+
return self.__record_marc
|
|
105
113
|
|
|
106
114
|
@property
|
|
107
115
|
def marc_json_record(self) -> dict:
|
{rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_records/organization_record.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List, NoReturn
|
|
2
2
|
from pymarc.record import Record
|
|
3
3
|
from rara_tools.parsers.marc_records.base_record import BaseRecord
|
|
4
|
-
from rara_tools.constants.parsers import OrganizationMarcIDs
|
|
4
|
+
from rara_tools.constants.parsers import OrganizationMarcIDs, LOGGER
|
|
5
5
|
import regex as re
|
|
6
6
|
import json
|
|
7
7
|
|
|
@@ -38,9 +38,9 @@ class OrganizationRecord(BaseRecord):
|
|
|
38
38
|
self.__name: str = ""
|
|
39
39
|
self.__original_name: dict = {}
|
|
40
40
|
self.__name_specification: str = ""
|
|
41
|
-
self.
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
41
|
+
self.__dates: str = ""
|
|
42
|
+
self.__location: str = ""
|
|
43
|
+
self.__numeration: str = ""
|
|
44
44
|
self.__name_variations: List[str] = []
|
|
45
45
|
self.__source: str = ""
|
|
46
46
|
self.__description: str = ""
|
|
@@ -57,7 +57,10 @@ class OrganizationRecord(BaseRecord):
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def _clean_value(self, value: str) -> str:
|
|
60
|
-
|
|
60
|
+
try:
|
|
61
|
+
cleaned_value = value.strip("., ")
|
|
62
|
+
except Exception as e:
|
|
63
|
+
cleaned_value = ""
|
|
61
64
|
return cleaned_value
|
|
62
65
|
|
|
63
66
|
def _merge_and_clean(self, value: dict, keys: List[str]) -> str:
|
|
@@ -82,7 +85,10 @@ class OrganizationRecord(BaseRecord):
|
|
|
82
85
|
"b": self._clean_value(values[0].get("b", ""))
|
|
83
86
|
}
|
|
84
87
|
else:
|
|
85
|
-
|
|
88
|
+
LOGGER.info(
|
|
89
|
+
f"Could not parse subfields 'a' and/or 'b' from " \
|
|
90
|
+
f"field {self.__name_field_id}. Record:\n{self.marc_record}"
|
|
91
|
+
)
|
|
86
92
|
return self.__original_name
|
|
87
93
|
|
|
88
94
|
@property
|
|
@@ -91,6 +97,39 @@ class OrganizationRecord(BaseRecord):
|
|
|
91
97
|
self.__name = self._merge_and_clean(self.original_name, ["a", "b"])
|
|
92
98
|
return self.__name
|
|
93
99
|
|
|
100
|
+
@property
|
|
101
|
+
def dates(self) -> str:
|
|
102
|
+
if not self.__dates:
|
|
103
|
+
values = self.get_values(
|
|
104
|
+
marc_ids=self.__name_field_id,
|
|
105
|
+
subfield_id="d"
|
|
106
|
+
)
|
|
107
|
+
if values:
|
|
108
|
+
self.__dates = self._clean_value(values[0])
|
|
109
|
+
return self.__dates
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def location(self) -> str:
|
|
113
|
+
if not self.__location:
|
|
114
|
+
values = self.get_values(
|
|
115
|
+
marc_ids=self.__name_field_id,
|
|
116
|
+
subfield_id="c"
|
|
117
|
+
)
|
|
118
|
+
if values:
|
|
119
|
+
self.__location = self._clean_value(values[0])
|
|
120
|
+
return self.__location
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def numeration(self) -> str:
|
|
124
|
+
if not self.__numeration:
|
|
125
|
+
values = self.get_values(
|
|
126
|
+
marc_ids=self.__name_field_id,
|
|
127
|
+
subfield_id="n"
|
|
128
|
+
)
|
|
129
|
+
if values:
|
|
130
|
+
self.__numeration = self._clean_value(values[0])
|
|
131
|
+
return self.__numeration
|
|
132
|
+
|
|
94
133
|
@property
|
|
95
134
|
def acronyms(self) -> List[str]:
|
|
96
135
|
if not self.__acronyms:
|
|
@@ -5,13 +5,19 @@ from rara_tools.parsers.marc_parsers.person_parser import PersonsMARCParser
|
|
|
5
5
|
from rara_tools.parsers.marc_parsers.organization_parser import OrganizationsMARCParser
|
|
6
6
|
from rara_tools.parsers.marc_parsers.location_parser import LocationMARCParser
|
|
7
7
|
from rara_tools.parsers.marc_parsers.title_parser import TitlesMARCParser
|
|
8
|
+
from rara_tools.parsers.marc_records.person_record import PersonRecord
|
|
9
|
+
from rara_tools.parsers.marc_records.organization_record import OrganizationRecord
|
|
10
|
+
from tests.test_utils import read_json_file
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
MARC_ROOT_DIR = os.path.join(
|
|
12
|
+
ROOT_DIR = os.path.join("tests", "test_data", "marc_records")
|
|
13
|
+
MARC_ROOT_DIR = os.path.join(ROOT_DIR, "mrc")
|
|
14
|
+
JSON_ROOT_DIR = os.path.join(ROOT_DIR, "json")
|
|
11
15
|
EMS_TEST_FILE = os.path.join(MARC_ROOT_DIR, "ems_test_subset.mrc")
|
|
12
16
|
PER_TEST_FILE = os.path.join(MARC_ROOT_DIR, "per_test_subset.mrc")
|
|
13
17
|
ORG_TEST_FILE = os.path.join(MARC_ROOT_DIR, "org_test_subset.mrc")
|
|
14
18
|
TITLE_TEST_FILE = os.path.join(MARC_ROOT_DIR, "title_test_subset.mrc")
|
|
19
|
+
PER_JSON_TEST_FILE = os.path.join(JSON_ROOT_DIR, "per_marc_json_record.json")
|
|
20
|
+
ORG_JSON_TEST_FILE = os.path.join(JSON_ROOT_DIR, "org_marc_json_record.json")
|
|
15
21
|
|
|
16
22
|
def test_ems_parser_without_variations():
|
|
17
23
|
ems_marc_parser = EMSMARCParser(EMS_TEST_FILE, add_variations=False)
|
|
@@ -56,3 +62,18 @@ def test_title_parser_with_variations():
|
|
|
56
62
|
assert "name" in record
|
|
57
63
|
assert "link_variations" in record
|
|
58
64
|
assert len(record["link_variations"]) > 0
|
|
65
|
+
|
|
66
|
+
def test_creating_per_marc_record_with_json_input():
|
|
67
|
+
json_data = read_json_file(PER_JSON_TEST_FILE)
|
|
68
|
+
record = PersonRecord(json_data)
|
|
69
|
+
assert record.name == "Koidula, Lydia"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_creating_org_marc_record_with_json_input():
|
|
73
|
+
json_data = read_json_file(ORG_JSON_TEST_FILE)
|
|
74
|
+
record = OrganizationRecord(json_data)
|
|
75
|
+
assert record.original_name.get("a") == "Eesti"
|
|
76
|
+
assert record.original_name.get("b") == "Riigikogu"
|
|
77
|
+
assert not record.location
|
|
78
|
+
assert not record.dates
|
|
79
|
+
assert not record.numeration
|
rara_tools-0.6.11/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.6.11
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.6.11 → rara_tools-0.6.13}/rara_tools/parsers/marc_parsers/organization_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|