folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,26 +1,29 @@
|
|
|
1
1
|
"""The default mapper, responsible for parsing MARC21 records acording to the
|
|
2
2
|
FOLIO community specifications"""
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
import logging
|
|
5
5
|
import sys
|
|
6
6
|
import time
|
|
7
7
|
import typing
|
|
8
8
|
import uuid
|
|
9
|
-
|
|
10
|
-
from typing import Generator, List
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Generator, List
|
|
11
|
+
|
|
12
|
+
import i18n
|
|
13
|
+
from defusedxml.ElementTree import fromstring
|
|
11
14
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
12
15
|
from folio_uuid.folio_uuid import FolioUUID
|
|
13
|
-
|
|
14
|
-
import pymarc
|
|
15
|
-
import requests
|
|
16
16
|
from folioclient import FolioClient
|
|
17
|
+
from pymarc.record import Leader, Record
|
|
18
|
+
from pymarc.field import Field
|
|
19
|
+
|
|
17
20
|
from folio_migration_tools.custom_exceptions import (
|
|
18
21
|
TransformationProcessError,
|
|
19
22
|
TransformationRecordFailedError,
|
|
20
23
|
)
|
|
21
24
|
from folio_migration_tools.helper import Helper
|
|
22
25
|
from folio_migration_tools.library_configuration import (
|
|
23
|
-
|
|
26
|
+
FileDefinition,
|
|
24
27
|
HridHandling,
|
|
25
28
|
IlsFlavour,
|
|
26
29
|
LibraryConfiguration,
|
|
@@ -29,10 +32,7 @@ from folio_migration_tools.marc_rules_transformation.conditions import Condition
|
|
|
29
32
|
from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
|
|
30
33
|
RulesMapperBase,
|
|
31
34
|
)
|
|
32
|
-
|
|
33
|
-
from folio_migration_tools.report_blurbs import Blurbs
|
|
34
|
-
from pymarc import Field
|
|
35
|
-
from pymarc.record import Record
|
|
35
|
+
from folio_migration_tools.migration_tasks.migration_task_base import MarcTaskConfigurationBase
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class BibsRulesMapper(RulesMapperBase):
|
|
@@ -41,196 +41,201 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
41
41
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
|
-
folio_client,
|
|
44
|
+
folio_client: FolioClient,
|
|
45
45
|
library_configuration: LibraryConfiguration,
|
|
46
|
-
task_configuration,
|
|
46
|
+
task_configuration: MarcTaskConfigurationBase,
|
|
47
|
+
statistical_codes_map: Dict[str, str] = None,
|
|
47
48
|
):
|
|
48
49
|
super().__init__(
|
|
49
50
|
folio_client,
|
|
50
51
|
library_configuration,
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
self.record_status = {}
|
|
56
|
-
self.unique_001s = set()
|
|
57
|
-
self.holdings_map = {}
|
|
58
|
-
self.id_map = {}
|
|
59
|
-
self.srs_recs = []
|
|
60
|
-
self.schema = self.instance_json_schema
|
|
61
|
-
self.contrib_name_types = {}
|
|
62
|
-
self.mapped_folio_fields = {}
|
|
63
|
-
self.unmapped_folio_fields = {}
|
|
64
|
-
self.alt_title_map = {}
|
|
65
|
-
logging.info(
|
|
66
|
-
f"HRID handling is set to: '{self.task_configuration.hrid_handling}'"
|
|
52
|
+
task_configuration,
|
|
53
|
+
statistical_codes_map,
|
|
54
|
+
self.get_instance_schema(folio_client),
|
|
55
|
+
Conditions(folio_client, self, "bibs", library_configuration.folio_release),
|
|
67
56
|
)
|
|
68
|
-
self.hrid_handling: HridHandling = self.task_configuration.hrid_handling
|
|
69
57
|
logging.info("Fetching mapping rules from the tenant")
|
|
70
|
-
rules_endpoint =
|
|
71
|
-
|
|
72
|
-
if self.library_configuration.folio_release == FolioRelease.juniper
|
|
73
|
-
else "/mapping-rules/marc-bib"
|
|
74
|
-
)
|
|
75
|
-
self.mappings = self.folio.folio_get_single_object(rules_endpoint)
|
|
58
|
+
rules_endpoint = "/mapping-rules/marc-bib"
|
|
59
|
+
self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
|
|
76
60
|
logging.info("Fetching valid language codes...")
|
|
77
61
|
self.language_codes = list(self.fetch_language_codes())
|
|
78
|
-
self.
|
|
79
|
-
self.
|
|
80
|
-
self.
|
|
81
|
-
self.
|
|
82
|
-
|
|
62
|
+
self.instance_relationships: dict = {}
|
|
63
|
+
self.instance_relationship_types: dict = {}
|
|
64
|
+
self.other_mode_of_issuance_id = get_unspecified_mode_of_issuance(self.folio_client)
|
|
65
|
+
self.data_import_marc = self.task_configuration.data_import_marc
|
|
66
|
+
if self.data_import_marc:
|
|
67
|
+
self.hrid_handler.deactivate035_from001 = True
|
|
83
68
|
self.start = time.time()
|
|
84
69
|
|
|
85
|
-
def perform_initial_preparation(self, marc_record:
|
|
86
|
-
folio_instance = {
|
|
87
|
-
"metadata": self.folio.get_metadata_construct(),
|
|
88
|
-
}
|
|
70
|
+
def perform_initial_preparation(self, file_def: FileDefinition, marc_record: Record, legacy_ids: List[str]):
|
|
71
|
+
folio_instance = {}
|
|
89
72
|
folio_instance["id"] = str(
|
|
90
73
|
FolioUUID(
|
|
91
|
-
|
|
74
|
+
self.base_string_for_folio_uuid,
|
|
92
75
|
FOLIONamespaces.instances,
|
|
93
76
|
str(legacy_ids[-1]),
|
|
94
77
|
)
|
|
95
78
|
)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
79
|
+
if (
|
|
80
|
+
all([self.create_source_records, file_def.create_source_records])
|
|
81
|
+
or self.hrid_handler.handling == HridHandling.preserve001
|
|
82
|
+
):
|
|
83
|
+
self.hrid_handler.handle_hrid(
|
|
84
|
+
FOLIONamespaces.instances,
|
|
85
|
+
folio_instance,
|
|
86
|
+
marc_record,
|
|
87
|
+
legacy_ids,
|
|
102
88
|
)
|
|
89
|
+
self.handle_leader_05(marc_record, legacy_ids)
|
|
90
|
+
if self.task_configuration.add_administrative_notes_with_legacy_ids:
|
|
91
|
+
for legacy_id in legacy_ids:
|
|
92
|
+
self.add_legacy_id_to_admin_note(folio_instance, legacy_id)
|
|
93
|
+
|
|
103
94
|
return folio_instance
|
|
104
95
|
|
|
105
|
-
def
|
|
96
|
+
def handle_leader_05(self, marc_record: Record, legacy_ids: List[str]):
|
|
97
|
+
leader_05 = marc_record.leader[5] or "Empty"
|
|
98
|
+
self.migration_report.add("RecordStatus", i18n.t("Original value") + f": {leader_05}")
|
|
99
|
+
if leader_05 not in ["a", "c", "d", "n", "p"]:
|
|
100
|
+
marc_record.leader = Leader(f"{marc_record.leader[:5]}c{marc_record.leader[6:]}")
|
|
101
|
+
self.migration_report.add(
|
|
102
|
+
"RecordStatus", i18n.t("Changed %{a} to %{b}", a=leader_05, b="c")
|
|
103
|
+
)
|
|
104
|
+
if leader_05 == "d":
|
|
105
|
+
Helper.log_data_issue(legacy_ids, "d in leader. Is this correct?", marc_record.leader)
|
|
106
|
+
|
|
107
|
+
def parse_record(
|
|
108
|
+
self, marc_record: Record, file_def: FileDefinition, legacy_ids: List[str]
|
|
109
|
+
) -> list[dict]:
|
|
106
110
|
"""Parses a bib recod into a FOLIO Inventory instance object
|
|
107
111
|
Community mapping suggestion: https://bit.ly/2S7Gyp3
|
|
108
|
-
This is the main function
|
|
112
|
+
This is the main function
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
marc_record (Record): _description_
|
|
116
|
+
file_def (FileDefinition): _description_
|
|
117
|
+
legacy_ids (List[str]): List of legacy ids in record
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
dict: _description_
|
|
121
|
+
"""
|
|
109
122
|
self.print_progress()
|
|
110
|
-
ignored_subsequent_fields = set()
|
|
123
|
+
ignored_subsequent_fields: set = set()
|
|
111
124
|
bad_tags = set(self.task_configuration.tags_to_delete) # "907"
|
|
112
|
-
folio_instance = self.perform_initial_preparation(marc_record, legacy_ids)
|
|
113
|
-
|
|
114
|
-
self.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
125
|
+
folio_instance = self.perform_initial_preparation(file_def, marc_record, legacy_ids)
|
|
126
|
+
if self.data_import_marc:
|
|
127
|
+
self.simple_bib_map(folio_instance, marc_record, ignored_subsequent_fields, legacy_ids)
|
|
128
|
+
else:
|
|
129
|
+
for marc_field in marc_record:
|
|
130
|
+
self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
|
|
131
|
+
if marc_field.tag not in ignored_subsequent_fields:
|
|
132
|
+
self.process_marc_field(
|
|
133
|
+
folio_instance,
|
|
134
|
+
marc_field,
|
|
135
|
+
ignored_subsequent_fields,
|
|
136
|
+
legacy_ids,
|
|
137
|
+
)
|
|
124
138
|
|
|
125
|
-
self.perform_additional_parsing(
|
|
126
|
-
folio_instance, marc_record, legacy_ids, suppressed
|
|
127
|
-
)
|
|
139
|
+
self.perform_additional_parsing(folio_instance, marc_record, legacy_ids, file_def)
|
|
128
140
|
clean_folio_instance = self.validate_required_properties(
|
|
129
141
|
"-".join(legacy_ids), folio_instance, self.schema, FOLIONamespaces.instances
|
|
130
142
|
)
|
|
131
143
|
self.dedupe_rec(clean_folio_instance)
|
|
132
144
|
marc_record.remove_fields(*list(bad_tags))
|
|
133
|
-
self.report_folio_mapping(clean_folio_instance, self.
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
try:
|
|
158
|
-
self.map_field_according_to_mapping(
|
|
159
|
-
marc_field, mappings, folio_instance, legacy_ids
|
|
160
|
-
)
|
|
161
|
-
if any(m.get("ignoreSubsequentFields", False) for m in mappings):
|
|
162
|
-
ignored_subsequent_fields.add(marc_field.tag)
|
|
163
|
-
except Exception as ee:
|
|
164
|
-
logging.error(
|
|
165
|
-
f"map_field_according_to_mapping {marc_field.tag} {marc_field.format_field()} {json.dumps(mappings)}"
|
|
166
|
-
)
|
|
167
|
-
raise ee
|
|
168
|
-
|
|
169
|
-
def report_marc_stats(
|
|
170
|
-
self, marc_field, bad_tags, legacy_ids, ignored_subsequent_fields
|
|
171
|
-
):
|
|
172
|
-
self.migration_report.add_general_statistics("Total number of Tags processed")
|
|
173
|
-
self.report_bad_tags(marc_field, bad_tags, legacy_ids)
|
|
174
|
-
mapped = marc_field.tag in self.mappings
|
|
175
|
-
if marc_field.tag in ignored_subsequent_fields:
|
|
176
|
-
mapped = False
|
|
177
|
-
self.report_legacy_mapping(marc_field.tag, True, mapped)
|
|
178
|
-
|
|
179
|
-
def perform_proxy_mapping(self, marc_field):
|
|
180
|
-
proxy_mapping = next(iter(self.mappings.get("880", [])), [])
|
|
181
|
-
if proxy_mapping and "fieldReplacementRule" in proxy_mapping:
|
|
182
|
-
target_field = next(
|
|
183
|
-
(
|
|
184
|
-
r["targetField"]
|
|
185
|
-
for r in proxy_mapping["fieldReplacementRule"]
|
|
186
|
-
if r["sourceDigits"] == marc_field["6"][:3]
|
|
187
|
-
),
|
|
188
|
-
"",
|
|
189
|
-
)
|
|
190
|
-
mappings = self.mappings.get(target_field, {})
|
|
191
|
-
|
|
192
|
-
self.migration_report.add(
|
|
193
|
-
Blurbs.Field880Mappings,
|
|
194
|
-
f"Source digits: {marc_field['6'][:3]} Target field: {target_field}",
|
|
195
|
-
)
|
|
196
|
-
else:
|
|
197
|
-
raise TransformationProcessError(
|
|
198
|
-
"", "Mapping rules for 880 is missing. Halting"
|
|
145
|
+
self.report_folio_mapping(clean_folio_instance, self.schema)
|
|
146
|
+
return [clean_folio_instance]
|
|
147
|
+
|
|
148
|
+
def simple_bib_map(self, folio_instance: dict, marc_record: Record, ignored_subsequent_fields: set, legacy_ids: List[str]):
|
|
149
|
+
"""
|
|
150
|
+
This method applies a much simplified MARC-to-instance
|
|
151
|
+
mapping to create a minimal FOLIO Instance record to be
|
|
152
|
+
used with a Data Import based MARC loading flow, rather
|
|
153
|
+
than creating SRS records during transformation.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
folio_instance (dict): _description_
|
|
157
|
+
marc_record (Record): _description_
|
|
158
|
+
legacy_ids (List[str]): _description_
|
|
159
|
+
file_def (FileDefinition): _description_
|
|
160
|
+
"""
|
|
161
|
+
main_entry_field_tags = ["100", "110", "111", "130"]
|
|
162
|
+
main_entry_fields = marc_record.get_fields(*main_entry_field_tags)
|
|
163
|
+
main_entry_fields.sort(key=lambda x: int(x.tag))
|
|
164
|
+
if len(main_entry_fields) > 1:
|
|
165
|
+
Helper.log_data_issue(
|
|
166
|
+
legacy_ids,
|
|
167
|
+
"Multiple main entry fields in record. Record will fail Data Import. Creating Instance anyway.",
|
|
168
|
+
[str(field) for field in main_entry_fields]
|
|
199
169
|
)
|
|
200
|
-
|
|
170
|
+
if not main_entry_fields:
|
|
171
|
+
main_entry_fields += marc_record.get_fields("700", "710", "711", "730")
|
|
172
|
+
main_entry_fields.sort(key=lambda x: int(x.tag))
|
|
173
|
+
if main_entry_fields:
|
|
174
|
+
self.process_marc_field(folio_instance, main_entry_fields[0], ignored_subsequent_fields, legacy_ids)
|
|
175
|
+
try:
|
|
176
|
+
self.process_marc_field(folio_instance, marc_record['245'], ignored_subsequent_fields, legacy_ids)
|
|
177
|
+
except KeyError as ke:
|
|
178
|
+
raise TransformationRecordFailedError(
|
|
179
|
+
legacy_ids,
|
|
180
|
+
"No 245 field in MARC record"
|
|
181
|
+
) from ke
|
|
201
182
|
|
|
202
183
|
def perform_additional_parsing(
|
|
203
184
|
self,
|
|
204
185
|
folio_instance: dict,
|
|
205
186
|
marc_record: Record,
|
|
206
187
|
legacy_ids: List[str],
|
|
207
|
-
|
|
208
|
-
):
|
|
209
|
-
"""Do stuff not easily captured by the mapping rules
|
|
210
|
-
|
|
188
|
+
file_def: FileDefinition,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Do stuff not easily captured by the mapping rules
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
folio_instance (dict): _description_
|
|
194
|
+
marc_record (Record): _description_
|
|
195
|
+
legacy_ids (List[str]): _description_
|
|
196
|
+
file_def (FileDefinition): _description_
|
|
197
|
+
"""
|
|
198
|
+
if file_def.create_source_records and self.create_source_records:
|
|
199
|
+
folio_instance["source"] = "MARC"
|
|
200
|
+
else:
|
|
201
|
+
folio_instance["source"] = "FOLIO"
|
|
211
202
|
folio_instance["instanceFormatIds"] = list(
|
|
212
203
|
set(self.get_instance_format_ids(marc_record, legacy_ids))
|
|
213
204
|
)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
)
|
|
205
|
+
folio_instance["instanceTypeId"] = self.get_instance_type_id(marc_record, legacy_ids)
|
|
206
|
+
|
|
207
|
+
folio_instance["modeOfIssuanceId"] = self.get_mode_of_issuance_id(marc_record, legacy_ids)
|
|
208
|
+
self.handle_languages(folio_instance, marc_record, legacy_ids)
|
|
209
|
+
self.handle_suppression(folio_instance, file_def)
|
|
210
|
+
# Map statistical codes from MARC and FileDefinition, then map the IDs
|
|
211
|
+
self.map_statistical_codes(folio_instance, file_def, marc_record)
|
|
212
|
+
self.map_statistical_code_ids(legacy_ids, folio_instance)
|
|
213
|
+
self.handle_holdings(marc_record)
|
|
214
|
+
if prec_titles := folio_instance.get("precedingTitles", []):
|
|
215
|
+
self.migration_report.add("PrecedingSuccedingTitles", f"{len(prec_titles)}")
|
|
216
|
+
del folio_instance["precedingTitles"]
|
|
217
|
+
if succ_titles := folio_instance.get("succeedingTitles", []):
|
|
218
|
+
del folio_instance["succeedingTitles"]
|
|
219
|
+
self.migration_report.add("PrecedingSuccedingTitles", f"{len(succ_titles)}")
|
|
220
|
+
|
|
221
|
+
def handle_languages(self, folio_instance: Dict, marc_record: Record, legacy_ids: List[str]):
|
|
222
222
|
if "languages" in folio_instance:
|
|
223
|
-
folio_instance["languages"]
|
|
224
|
-
|
|
223
|
+
orig_languages = {lang: None for lang in folio_instance["languages"]}
|
|
224
|
+
orig_languages.update(
|
|
225
|
+
{lang: None for lang in self.get_languages(marc_record, legacy_ids)}
|
|
225
226
|
)
|
|
227
|
+
folio_instance["languages"] = list(orig_languages.keys())
|
|
226
228
|
else:
|
|
227
229
|
folio_instance["languages"] = self.get_languages(marc_record, legacy_ids)
|
|
228
230
|
folio_instance["languages"] = list(
|
|
229
231
|
self.filter_langs(folio_instance["languages"], marc_record, legacy_ids)
|
|
230
232
|
)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
233
|
+
|
|
234
|
+
def get_instance_schema(self, folio_client: FolioClient):
|
|
235
|
+
logging.info("Fetching Instance schema...")
|
|
236
|
+
return folio_client.get_from_github(
|
|
237
|
+
"folio-org", "mod-inventory-storage", "ramls/instance.json"
|
|
238
|
+
)
|
|
234
239
|
|
|
235
240
|
def handle_holdings(self, marc_record: Record):
|
|
236
241
|
if "852" in marc_record:
|
|
@@ -239,43 +244,38 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
239
244
|
)
|
|
240
245
|
f852s = (f for f in holdingsfields if f.tag == "852")
|
|
241
246
|
f86xs = (
|
|
242
|
-
f
|
|
243
|
-
for f in holdingsfields
|
|
244
|
-
if f.tag in ["866", "867", "868", "865", "864", "863"]
|
|
247
|
+
f for f in holdingsfields if f.tag in ["866", "867", "868", "865", "864", "863"]
|
|
245
248
|
)
|
|
246
249
|
if f852s and not f86xs:
|
|
247
250
|
self.migration_report.add(
|
|
248
|
-
|
|
249
|
-
|
|
251
|
+
"HoldingsGenerationFromBibs",
|
|
252
|
+
i18n.t(
|
|
253
|
+
"Records with %{has_many}s but no %{has_no}", has_many="852", has_no="86X"
|
|
254
|
+
),
|
|
250
255
|
)
|
|
251
256
|
elif any(f852s):
|
|
252
257
|
self.migration_report.add(
|
|
253
|
-
|
|
254
|
-
|
|
258
|
+
"HoldingsGenerationFromBibs",
|
|
259
|
+
i18n.t(
|
|
260
|
+
"Records with both %{has_many}s and at least one %{has_one}",
|
|
261
|
+
has_one="86X",
|
|
262
|
+
has_many="852",
|
|
263
|
+
),
|
|
255
264
|
)
|
|
256
265
|
|
|
257
266
|
elif any(f86xs):
|
|
258
267
|
self.migration_report.add(
|
|
259
|
-
|
|
260
|
-
"Records without
|
|
268
|
+
"HoldingsGenerationFromBibs",
|
|
269
|
+
i18n.t("Records without %{has_no}s but with %{has}", has="86X", has_no="852"),
|
|
261
270
|
)
|
|
262
271
|
|
|
263
272
|
def wrap_up(self):
|
|
264
273
|
logging.info("Mapper wrapping up")
|
|
265
|
-
self.
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if (
|
|
269
|
-
(not marc_field.tag.isnumeric())
|
|
270
|
-
and marc_field.tag != "LDR"
|
|
271
|
-
and marc_field.tag not in bad_tags
|
|
272
|
-
):
|
|
273
|
-
self.migration_report.add(Blurbs.NonNumericTagsInRecord, marc_field.tag)
|
|
274
|
-
message = "Non-numeric tags in records"
|
|
275
|
-
Helper.log_data_issue(legacy_ids, message, marc_field.tag)
|
|
276
|
-
bad_tags.add(marc_field.tag)
|
|
274
|
+
if self.create_source_records:
|
|
275
|
+
if self.task_configuration.update_hrid_settings:
|
|
276
|
+
self.hrid_handler.store_hrid_settings()
|
|
277
277
|
|
|
278
|
-
def get_instance_type_id(self, marc_record,
|
|
278
|
+
def get_instance_type_id(self, marc_record: Record, legacy_ids: List[str]) -> str:
|
|
279
279
|
return_id = ""
|
|
280
280
|
|
|
281
281
|
def get_folio_id_by_name(f336a: str):
|
|
@@ -283,40 +283,37 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
283
283
|
match = next(
|
|
284
284
|
(
|
|
285
285
|
f["id"]
|
|
286
|
-
for f in self.
|
|
286
|
+
for f in self.folio_client.instance_types
|
|
287
287
|
if f["name"].lower().replace(" ", "") == match_template
|
|
288
288
|
),
|
|
289
289
|
"",
|
|
290
290
|
)
|
|
291
291
|
if match:
|
|
292
292
|
self.migration_report.add(
|
|
293
|
-
|
|
294
|
-
|
|
293
|
+
"RecourceTypeMapping",
|
|
294
|
+
"336$a - "
|
|
295
|
+
+ i18n.t("Successful matching on %{criteria}", criteria=match_template)
|
|
296
|
+
+ f" ({f336a})",
|
|
295
297
|
)
|
|
296
298
|
else:
|
|
297
299
|
self.migration_report.add(
|
|
298
|
-
|
|
299
|
-
|
|
300
|
+
"RecourceTypeMapping",
|
|
301
|
+
"336$a - "
|
|
302
|
+
+ i18n.t("Unsuccessful matching on %{criteria}", criteria=match_template)
|
|
303
|
+
+ f" ({f336a})",
|
|
300
304
|
)
|
|
301
305
|
Helper.log_data_issue(
|
|
302
|
-
|
|
306
|
+
legacy_ids,
|
|
303
307
|
"instance type name (336$a) -Unsuccessful matching",
|
|
304
308
|
f336a,
|
|
305
309
|
)
|
|
306
310
|
return match
|
|
307
311
|
|
|
308
|
-
if not self.
|
|
312
|
+
if not self.folio_client.instance_types:
|
|
309
313
|
raise TransformationProcessError("", "No instance_types setup in tenant")
|
|
310
314
|
|
|
311
315
|
if "336" in marc_record and "b" not in marc_record["336"]:
|
|
312
|
-
self.migration_report.add(
|
|
313
|
-
Blurbs.RecourceTypeMapping, "Subfield b not in 336"
|
|
314
|
-
)
|
|
315
|
-
Helper.log_data_issue(
|
|
316
|
-
legacy_id,
|
|
317
|
-
"Subfield b not in 336",
|
|
318
|
-
"",
|
|
319
|
-
)
|
|
316
|
+
self.migration_report.add("RecourceTypeMapping", i18n.t("Subfield b not in 336"))
|
|
320
317
|
if "a" in marc_record["336"]:
|
|
321
318
|
return_id = get_folio_id_by_name(marc_record["336"]["a"])
|
|
322
319
|
|
|
@@ -324,215 +321,172 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
324
321
|
f336_b = marc_record["336"]["b"].lower().replace(" ", "")
|
|
325
322
|
f336_b_norm = f336_b.lower().replace(" ", "")
|
|
326
323
|
t = self.conditions.get_ref_data_tuple_by_code(
|
|
327
|
-
self.
|
|
324
|
+
self.folio_client.instance_types,
|
|
328
325
|
"instance_types",
|
|
329
326
|
f336_b_norm,
|
|
330
327
|
)
|
|
331
328
|
if not t:
|
|
332
329
|
self.migration_report.add(
|
|
333
|
-
|
|
334
|
-
|
|
330
|
+
"RecourceTypeMapping",
|
|
331
|
+
"336$b - "
|
|
332
|
+
+ i18n.t(
|
|
333
|
+
"Code %{code} ('%{code_raw}') not found in FOLIO ",
|
|
334
|
+
code=f336_b_norm,
|
|
335
|
+
code_raw=f336_b,
|
|
336
|
+
),
|
|
335
337
|
)
|
|
336
338
|
Helper.log_data_issue(
|
|
337
|
-
|
|
338
|
-
"instance type code (
|
|
339
|
+
legacy_ids,
|
|
340
|
+
i18n.t("instance type code (%{code}) not found in FOLIO", code="336$b"),
|
|
339
341
|
f336_b,
|
|
340
342
|
)
|
|
341
343
|
else:
|
|
342
344
|
self.migration_report.add(
|
|
343
|
-
|
|
344
|
-
|
|
345
|
+
"RecourceTypeMapping",
|
|
346
|
+
"336$b "
|
|
347
|
+
+ i18n.t(
|
|
348
|
+
"%{fro} mapped from %{record}", fro=t[1], record=marc_record["336"]["b"]
|
|
349
|
+
),
|
|
345
350
|
)
|
|
346
351
|
return_id = t[0]
|
|
347
352
|
|
|
348
353
|
if not return_id:
|
|
349
354
|
t = self.conditions.get_ref_data_tuple_by_code(
|
|
350
|
-
self.
|
|
355
|
+
self.folio_client.instance_types, "instance_types", "zzz"
|
|
351
356
|
)
|
|
352
357
|
return_id = t[0]
|
|
353
358
|
return return_id
|
|
354
359
|
|
|
355
|
-
def
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
)
|
|
372
|
-
self.migration_report.add(
|
|
373
|
-
Blurbs.InstanceFormat,
|
|
374
|
-
f"Code '{code}' not found in FOLIO",
|
|
375
|
-
)
|
|
376
|
-
return ""
|
|
360
|
+
def get_instance_format_id_by_code(self, legacy_ids: List[str], code: str):
|
|
361
|
+
try:
|
|
362
|
+
match = next(f for f in self.folio_client.instance_formats if f["code"] == code)
|
|
363
|
+
self.migration_report.add(
|
|
364
|
+
"InstanceFormat",
|
|
365
|
+
i18n.t("Successful match") + f" - {code}->{match['name']}",
|
|
366
|
+
)
|
|
367
|
+
return match["id"]
|
|
368
|
+
except Exception:
|
|
369
|
+
# TODO: Distinguish between generated codes and proper 338bs
|
|
370
|
+
Helper.log_data_issue(legacy_ids, "Instance format Code not found in FOLIO", code)
|
|
371
|
+
self.migration_report.add(
|
|
372
|
+
"InstanceFormat",
|
|
373
|
+
i18n.t("Code '%{code}' not found in FOLIO", code=code),
|
|
374
|
+
)
|
|
375
|
+
return ""
|
|
377
376
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
Helper.log_data_issue(
|
|
395
|
-
legacy_id,
|
|
396
|
-
"Unsuccessful matching on 337$a and 338$a",
|
|
397
|
-
match_template,
|
|
377
|
+
def get_instance_format_id_by_name(self, f337a: str, f338a: str, legacy_ids: List[str]):
|
|
378
|
+
f337a = f337a.lower().strip()
|
|
379
|
+
f338a = f338a.lower().strip()
|
|
380
|
+
match_template = f"{f337a} -- {f338a}"
|
|
381
|
+
try:
|
|
382
|
+
match = next(
|
|
383
|
+
f
|
|
384
|
+
for f in self.folio_client.instance_formats
|
|
385
|
+
if f["name"].lower() == match_template
|
|
386
|
+
)
|
|
387
|
+
self.migration_report.add(
|
|
388
|
+
"InstanceFormat",
|
|
389
|
+
i18n.t(
|
|
390
|
+
"Successful matching on %{criteria_1} and %{criteria_2}",
|
|
391
|
+
criteria_1="337$a",
|
|
392
|
+
criteria_2="338$a",
|
|
398
393
|
)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
394
|
+
+ f" - {match_template}->{match['name']}",
|
|
395
|
+
)
|
|
396
|
+
return match["id"]
|
|
397
|
+
except Exception:
|
|
398
|
+
Helper.log_data_issue(
|
|
399
|
+
legacy_ids,
|
|
400
|
+
"Unsuccessful matching on 337$a and 338$a",
|
|
401
|
+
match_template,
|
|
402
|
+
)
|
|
403
|
+
self.migration_report.add(
|
|
404
|
+
"InstanceFormat",
|
|
405
|
+
i18n.t(
|
|
406
|
+
"Unsuccessful matching on %{criteria_1} and %{criteria_2}",
|
|
407
|
+
criteria_1="337$a",
|
|
408
|
+
criteria_2="338$a",
|
|
402
409
|
)
|
|
403
|
-
|
|
410
|
+
+ f" - {match_template}",
|
|
411
|
+
)
|
|
412
|
+
return ""
|
|
413
|
+
|
|
414
|
+
def f338_source_is_rda_carrier(self, field: Field):
|
|
415
|
+
if "2" not in field:
|
|
416
|
+
self.migration_report.add(
|
|
417
|
+
"InstanceFormat",
|
|
418
|
+
("Instance Format not mapped from field since 338$2 is missing"),
|
|
419
|
+
)
|
|
420
|
+
return False
|
|
421
|
+
elif field["2"].strip().startswith("rdacarrier"):
|
|
422
|
+
return True
|
|
423
|
+
self.migration_report.add(
|
|
424
|
+
"InstanceFormat",
|
|
425
|
+
("InstanceFormat not mapped since 338$2 (Source) " f"is set to {field['2']}. "),
|
|
426
|
+
)
|
|
427
|
+
return False
|
|
404
428
|
|
|
429
|
+
def get_instance_format_ids_from_a(
|
|
430
|
+
self, field_index: int, f_338: Field, all_337s: List[Field], legacy_id: List[str]
|
|
431
|
+
):
|
|
432
|
+
self.migration_report.add(
|
|
433
|
+
"InstanceFormat",
|
|
434
|
+
i18n.t("338$b is missing. Will try parse from 337$a and 338$a"),
|
|
435
|
+
)
|
|
436
|
+
for a in f_338.get_subfields("a"):
|
|
437
|
+
corresponding_337 = all_337s[field_index] if field_index < len(all_337s) else None
|
|
438
|
+
if corresponding_337 and "a" in corresponding_337:
|
|
439
|
+
if fmt_id := self.get_instance_format_id_by_name(
|
|
440
|
+
corresponding_337["a"], a, legacy_id
|
|
441
|
+
):
|
|
442
|
+
yield fmt_id
|
|
443
|
+
|
|
444
|
+
def get_instance_format_ids(self, marc_record: Record, legacy_id: List[str]):
|
|
405
445
|
all_337s = marc_record.get_fields("337")
|
|
406
446
|
all_338s = marc_record.get_fields("338")
|
|
407
|
-
for fidx,
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
(
|
|
413
|
-
"InstanceFormat not mapped since 338$2 (Source) "
|
|
414
|
-
f"is set to {source}. "
|
|
415
|
-
),
|
|
416
|
-
)
|
|
417
|
-
else:
|
|
418
|
-
if "b" not in f and "a" in f:
|
|
419
|
-
self.migration_report.add(
|
|
420
|
-
Blurbs.InstanceFormat,
|
|
421
|
-
"338$b is missing. Will try parse from 337$a and 338$b",
|
|
447
|
+
for fidx, f_338 in enumerate(all_338s):
|
|
448
|
+
if self.f338_source_is_rda_carrier(f_338):
|
|
449
|
+
if "b" not in f_338 and "a" in f_338:
|
|
450
|
+
yield from self.get_instance_format_ids_from_a(
|
|
451
|
+
fidx, f_338, all_337s, legacy_id
|
|
422
452
|
)
|
|
423
|
-
for sfidx, a in enumerate(f.get_subfields("a")):
|
|
424
|
-
corresponding_337 = (
|
|
425
|
-
all_337s[fidx] if fidx < len(all_337s) else None
|
|
426
|
-
)
|
|
427
|
-
if corresponding_337 and "a" in corresponding_337:
|
|
428
|
-
fmt_id = get_folio_id_by_name(
|
|
429
|
-
corresponding_337["a"], a, legacy_id
|
|
430
|
-
)
|
|
431
|
-
if fmt_id:
|
|
432
|
-
yield fmt_id
|
|
433
453
|
|
|
434
|
-
for sfidx, b in enumerate(
|
|
454
|
+
for sfidx, b in enumerate(f_338.get_subfields("b")):
|
|
435
455
|
b = b.replace(" ", "")
|
|
436
|
-
if len(b) == 2:
|
|
437
|
-
|
|
456
|
+
if len(b) == 2:
|
|
457
|
+
# Normal 338b. should be able to map this
|
|
458
|
+
yield self.get_instance_format_id_by_code(legacy_id, b)
|
|
438
459
|
elif len(b) == 1:
|
|
439
|
-
corresponding_337 = (
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
s = "No corresponding 337 to 338 even though 338$b was one charachter code"
|
|
460
|
+
corresponding_337 = all_337s[fidx] if fidx < len(all_337s) else None
|
|
461
|
+
if not corresponding_337:
|
|
462
|
+
# No matching 337. No use mapping the 338
|
|
463
|
+
s = i18n.t(
|
|
464
|
+
"No corresponding 337 to 338 even though 338$b was one character"
|
|
465
|
+
)
|
|
446
466
|
Helper.log_data_issue(legacy_id, s, b)
|
|
447
467
|
self.migration_report.add(
|
|
448
|
-
|
|
468
|
+
"InstanceFormat",
|
|
449
469
|
s,
|
|
450
470
|
)
|
|
451
|
-
else:
|
|
471
|
+
else:
|
|
472
|
+
# Corresponding 337. Try to combine the codes.
|
|
452
473
|
corresponding_b = (
|
|
453
474
|
corresponding_337.get_subfields("b")[sfidx]
|
|
454
475
|
if sfidx < len(corresponding_337.get_subfields("b"))
|
|
455
476
|
else None
|
|
456
477
|
)
|
|
457
478
|
if not corresponding_b:
|
|
458
|
-
s = "No corresponding $b in corresponding 338"
|
|
479
|
+
s = i18n.t("No corresponding $b in corresponding 338")
|
|
459
480
|
Helper.log_data_issue(legacy_id, s, "")
|
|
460
|
-
self.migration_report.add(
|
|
481
|
+
self.migration_report.add("InstanceFormat", s)
|
|
461
482
|
else:
|
|
462
483
|
combined_code = (corresponding_b + b).strip()
|
|
463
484
|
if len(combined_code) == 2:
|
|
464
|
-
yield
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
"""Create HRID if not mapped. Add hrid as MARC record 001"""
|
|
468
|
-
if self.hrid_handling == HridHandling.default or "001" not in marc_record:
|
|
469
|
-
num_part = str(self.instance_hrid_counter).zfill(11)
|
|
470
|
-
folio_instance["hrid"] = f"{self.instance_hrid_prefix}{num_part}"
|
|
471
|
-
new_001 = Field(tag="001", data=folio_instance["hrid"])
|
|
472
|
-
try:
|
|
473
|
-
f_001 = marc_record["001"].value()
|
|
474
|
-
f_003 = (
|
|
475
|
-
marc_record["003"].value().strip() if "003" in marc_record else ""
|
|
476
|
-
)
|
|
477
|
-
self.migration_report.add(
|
|
478
|
-
Blurbs.HridHandling, f'Values in 003: {f_003 or "Empty"}'
|
|
479
|
-
)
|
|
480
|
-
|
|
481
|
-
if self.task_configuration.deactivate035_from001:
|
|
482
|
-
self.migration_report.add(
|
|
483
|
-
Blurbs.HridHandling, "035 generation from 001 turned off"
|
|
484
|
-
)
|
|
485
|
-
else:
|
|
486
|
-
str_035 = f"({f_003}){f_001}" if f_003 else f"{f_001}"
|
|
487
|
-
new_035 = Field(
|
|
488
|
-
tag="035",
|
|
489
|
-
indicators=[" ", " "],
|
|
490
|
-
subfields=["a", str_035],
|
|
491
|
-
)
|
|
492
|
-
marc_record.add_ordered_field(new_035)
|
|
493
|
-
self.migration_report.add(Blurbs.HridHandling, "Added 035 from 001")
|
|
494
|
-
marc_record.remove_fields("001")
|
|
495
|
-
|
|
496
|
-
except Exception:
|
|
497
|
-
if "001" in marc_record:
|
|
498
|
-
s = "Failed to create 035 from 001"
|
|
499
|
-
self.migration_report.add(Blurbs.HridHandling, s)
|
|
500
|
-
Helper.log_data_issue(legacy_ids, s, marc_record["001"])
|
|
501
|
-
else:
|
|
502
|
-
self.migration_report.add(
|
|
503
|
-
Blurbs.HridHandling, "Legacy bib records without 001"
|
|
504
|
-
)
|
|
505
|
-
marc_record.add_ordered_field(new_001)
|
|
506
|
-
self.migration_report.add(
|
|
507
|
-
Blurbs.HridHandling, "Created HRID using default settings"
|
|
508
|
-
)
|
|
509
|
-
self.instance_hrid_counter += 1
|
|
510
|
-
elif self.hrid_handling == HridHandling.preserve001:
|
|
511
|
-
value = marc_record["001"].value()
|
|
512
|
-
if value in self.unique_001s:
|
|
513
|
-
self.migration_report.add(
|
|
514
|
-
Blurbs.HridHandling, "Duplicate 001. Creating HRID instead"
|
|
515
|
-
)
|
|
516
|
-
Helper.log_data_issue(
|
|
517
|
-
legacy_ids,
|
|
518
|
-
"Duplicate 001 for record. HRID created for record",
|
|
519
|
-
value,
|
|
520
|
-
)
|
|
521
|
-
num_part = str(self.instance_hrid_counter).zfill(11)
|
|
522
|
-
folio_instance["hrid"] = f"{self.instance_hrid_prefix}{num_part}"
|
|
523
|
-
new_001 = Field(tag="001", data=folio_instance["hrid"])
|
|
524
|
-
marc_record.add_ordered_field(new_001)
|
|
525
|
-
self.instance_hrid_counter += 1
|
|
526
|
-
else:
|
|
527
|
-
self.unique_001s.add(value)
|
|
528
|
-
folio_instance["hrid"] = value
|
|
529
|
-
self.migration_report.add(Blurbs.HridHandling, "Took HRID from 001")
|
|
530
|
-
else:
|
|
531
|
-
raise TransformationProcessError(
|
|
532
|
-
"", f"Unknown HRID handling: {self.hrid_handling}"
|
|
533
|
-
)
|
|
485
|
+
yield self.get_instance_format_id_by_code(
|
|
486
|
+
legacy_id, combined_code
|
|
487
|
+
)
|
|
534
488
|
|
|
535
|
-
def get_mode_of_issuance_id(self, marc_record: Record,
|
|
489
|
+
def get_mode_of_issuance_id(self, marc_record: Record, legacy_ids: List[str]) -> str:
|
|
536
490
|
level = marc_record.leader[7]
|
|
537
491
|
try:
|
|
538
492
|
name = "unspecified"
|
|
@@ -545,30 +499,29 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
545
499
|
ret = next(
|
|
546
500
|
(
|
|
547
501
|
i["id"]
|
|
548
|
-
for i in self.
|
|
549
|
-
if
|
|
502
|
+
for i in self.folio_client.modes_of_issuance
|
|
503
|
+
if name.lower() == i["name"].lower()
|
|
550
504
|
),
|
|
551
505
|
"",
|
|
552
506
|
)
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
507
|
+
|
|
508
|
+
self.migration_report.add("MatchedModesOfIssuanceCode", f"{name} -- {ret}")
|
|
509
|
+
|
|
556
510
|
if not ret:
|
|
557
511
|
self.migration_report.add(
|
|
558
|
-
|
|
559
|
-
f"Unmatched level: {level}",
|
|
512
|
+
"MatchedModesOfIssuanceCode", i18n.t("Unmatched level") + f": {level}"
|
|
560
513
|
)
|
|
514
|
+
|
|
561
515
|
return self.other_mode_of_issuance_id
|
|
562
516
|
return ret
|
|
563
517
|
except IndexError:
|
|
564
518
|
self.migration_report.add(
|
|
565
|
-
|
|
519
|
+
"PossibleCleaningTasks", i18n.t("No Leader[7] in") + f" {legacy_ids}"
|
|
566
520
|
)
|
|
521
|
+
|
|
567
522
|
return self.other_mode_of_issuance_id
|
|
568
523
|
except StopIteration as ee:
|
|
569
|
-
logging.exception(
|
|
570
|
-
f"{marc_record.leader} {list(self.folio.modes_of_issuance)}"
|
|
571
|
-
)
|
|
524
|
+
logging.exception(f"{marc_record.leader} {list(self.folio_client.modes_of_issuance)}")
|
|
572
525
|
raise ee from ee
|
|
573
526
|
|
|
574
527
|
def get_nature_of_content(self, marc_record: Record) -> List[str]:
|
|
@@ -579,63 +532,74 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
579
532
|
return "".join(marc_record["008"].data[35:38])
|
|
580
533
|
return ""
|
|
581
534
|
|
|
582
|
-
def get_languages_041(self, marc_record, legacy_id):
|
|
583
|
-
languages =
|
|
535
|
+
def get_languages_041(self, marc_record: Record, legacy_id: List[str]) -> Dict[str, None]:
|
|
536
|
+
languages = dict()
|
|
584
537
|
lang_fields = marc_record.get_fields("041")
|
|
585
538
|
if not any(lang_fields):
|
|
586
|
-
return
|
|
539
|
+
return dict()
|
|
587
540
|
subfields = "abdefghjkmn"
|
|
588
541
|
for lang_tag in lang_fields:
|
|
589
542
|
if "2" in lang_tag:
|
|
590
|
-
self.migration_report.add(
|
|
591
|
-
|
|
592
|
-
"Field with other Language code
|
|
593
|
-
marc_record["001"],
|
|
594
|
-
lang_tag.value(),
|
|
543
|
+
self.migration_report.add("LanguageCodeSources", lang_tag["2"])
|
|
544
|
+
Helper.log_data_issue(
|
|
545
|
+
legacy_id, "Field with other Language code", lang_tag.value()
|
|
595
546
|
)
|
|
596
547
|
lang_codes = lang_tag.get_subfields(*list(subfields))
|
|
597
548
|
for lang_code in lang_codes:
|
|
598
549
|
lang_code = str(lang_code).lower().replace(" ", "")
|
|
599
550
|
langlength = len(lang_code)
|
|
600
551
|
if langlength == 3:
|
|
601
|
-
languages
|
|
552
|
+
languages[lang_code.replace(" ", "")] = None
|
|
602
553
|
elif langlength > 3 and langlength % 3 == 0:
|
|
603
554
|
lc = lang_code.replace(" ", "")
|
|
604
|
-
new_codes =
|
|
555
|
+
new_codes = {lc[i : i + 3]: None for i in range(0, len(lc), 3)}
|
|
605
556
|
languages.update(new_codes)
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
557
|
+
languages = {
|
|
558
|
+
str(lang): None
|
|
559
|
+
for lang in self.filter_langs(
|
|
560
|
+
list(filter(None, languages.keys())), marc_record, legacy_id
|
|
561
|
+
)
|
|
562
|
+
if lang
|
|
563
|
+
}
|
|
611
564
|
return languages
|
|
612
565
|
|
|
613
|
-
def get_languages(self, marc_record: Record, legacy_id: str) -> List[str]:
|
|
614
|
-
"""Get languages and tranforms them to correct codes
|
|
566
|
+
def get_languages(self, marc_record: Record, legacy_id: List[str]) -> List[str]:
|
|
567
|
+
"""Get languages and tranforms them to correct codes
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
marc_record (Record): A pymarc Record object
|
|
571
|
+
legacy_id (List[str]): A list of legacy ids from the legacy record
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
List[str]: List of language codes
|
|
575
|
+
"""
|
|
615
576
|
languages = self.get_languages_041(marc_record, legacy_id)
|
|
616
|
-
languages
|
|
577
|
+
languages[self.get_languages_008(marc_record)] = None
|
|
617
578
|
for lang in languages:
|
|
618
|
-
self.migration_report.add(
|
|
579
|
+
self.migration_report.add("LanguagesInRecords", lang)
|
|
619
580
|
return list(languages)
|
|
620
581
|
|
|
621
582
|
def fetch_language_codes(self) -> Generator[str, None, None]:
|
|
622
|
-
"""
|
|
623
|
-
|
|
624
|
-
|
|
583
|
+
"""Loads the list of standardized language codes from LoC
|
|
584
|
+
|
|
585
|
+
Yields:
|
|
586
|
+
Generator[str, None, None]: _description_
|
|
587
|
+
"""
|
|
588
|
+
path = Path(__file__).parent / "loc_language_codes.xml"
|
|
589
|
+
with open(path) as f:
|
|
590
|
+
lines = "".join(f.readlines())
|
|
591
|
+
tree = fromstring(lines)
|
|
625
592
|
name_space = "{info:lc/xmlns/codelist-v1}"
|
|
626
593
|
xpath_expr = "{0}languages/{0}language/{0}code".format(name_space)
|
|
627
594
|
for code in tree.findall(xpath_expr):
|
|
628
595
|
yield code.text
|
|
629
596
|
|
|
630
597
|
def filter_langs(
|
|
631
|
-
self, language_values: List[str], marc_record: Record, index_or_legacy_id
|
|
598
|
+
self, language_values: List[str], marc_record: Record, index_or_legacy_id: List[str]
|
|
632
599
|
) -> typing.Generator:
|
|
633
600
|
forbidden_values = ["###", "zxx", "n/a", "N/A", "|||"]
|
|
634
601
|
for language_value in language_values:
|
|
635
|
-
if
|
|
636
|
-
language_value in self.language_codes
|
|
637
|
-
and language_value not in forbidden_values
|
|
638
|
-
):
|
|
602
|
+
if language_value in self.language_codes and language_value not in forbidden_values:
|
|
639
603
|
yield language_value
|
|
640
604
|
elif language_value == "jap":
|
|
641
605
|
yield "jpn"
|
|
@@ -653,68 +617,51 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
653
617
|
m = "Unrecognized language codes in record"
|
|
654
618
|
Helper.log_data_issue(index_or_legacy_id, m, language_value)
|
|
655
619
|
self.migration_report.add(
|
|
656
|
-
|
|
620
|
+
"UnrecognizedLanguageCodes",
|
|
657
621
|
f"{m}: {language_value}",
|
|
658
622
|
)
|
|
659
623
|
|
|
660
|
-
def get_legacy_ids(
|
|
661
|
-
|
|
662
|
-
) -> List[str]:
|
|
624
|
+
def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
|
|
625
|
+
ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
|
|
663
626
|
if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
|
|
664
627
|
return get_iii_bib_id(marc_record)
|
|
665
628
|
elif ils_flavour == IlsFlavour.tag907y:
|
|
666
|
-
|
|
667
|
-
return list(set(marc_record["907"].get_subfields("a", "y")))
|
|
668
|
-
except Exception as e:
|
|
669
|
-
raise TransformationRecordFailedError(
|
|
670
|
-
index_or_legacy_id,
|
|
671
|
-
(
|
|
672
|
-
"907 $y and $a is missing is missing, although they is "
|
|
673
|
-
"required for this legacy ILS choice"
|
|
674
|
-
),
|
|
675
|
-
marc_record.as_json(),
|
|
676
|
-
) from e
|
|
629
|
+
return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
|
|
677
630
|
elif ils_flavour == IlsFlavour.tagf990a:
|
|
678
|
-
|
|
679
|
-
if marc_record["001"].format_field().strip():
|
|
680
|
-
res.add(marc_record["001"].format_field().strip())
|
|
681
|
-
if any(res):
|
|
682
|
-
self.migration_report.add_general_statistics("legacy id from 990$a")
|
|
683
|
-
return list(res)
|
|
631
|
+
return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
|
|
684
632
|
elif ils_flavour == IlsFlavour.aleph:
|
|
685
633
|
return self.get_aleph_bib_id(marc_record)
|
|
686
634
|
elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
|
|
687
|
-
|
|
688
|
-
return [marc_record["001"].format_field().strip()]
|
|
689
|
-
except Exception as e:
|
|
690
|
-
raise TransformationRecordFailedError(
|
|
691
|
-
index_or_legacy_id,
|
|
692
|
-
"001 is missing, although it is required for Voyager migrations",
|
|
693
|
-
marc_record.as_json(),
|
|
694
|
-
) from e
|
|
635
|
+
return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
|
|
695
636
|
elif ils_flavour == IlsFlavour.koha:
|
|
696
637
|
try:
|
|
697
638
|
return [marc_record["999"]["c"]]
|
|
698
639
|
except Exception as e:
|
|
699
640
|
raise TransformationRecordFailedError(
|
|
700
|
-
|
|
641
|
+
idx,
|
|
701
642
|
"999 $c is missing, although it is required for this legacy ILS choice",
|
|
702
643
|
marc_record.as_json(),
|
|
703
644
|
) from e
|
|
645
|
+
elif ils_flavour == IlsFlavour.custom:
|
|
646
|
+
return get_custom_bib_id(marc_record, self.task_configuration.custom_bib_id_field)
|
|
704
647
|
elif ils_flavour == IlsFlavour.none:
|
|
705
648
|
return [str(uuid.uuid4())]
|
|
706
649
|
else:
|
|
707
650
|
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
708
651
|
|
|
709
|
-
def get_aleph_bib_id(self, marc_record: Record):
|
|
710
|
-
res = {f["b"].strip() for f in marc_record.get_fields("998") if "b" in f}
|
|
652
|
+
def get_aleph_bib_id(self, marc_record: Record) -> List[str]:
|
|
653
|
+
res = {f["b"].strip(): None for f in marc_record.get_fields("998") if "b" in f}
|
|
711
654
|
if any(res):
|
|
712
|
-
self.migration_report.add_general_statistics(
|
|
655
|
+
self.migration_report.add_general_statistics(
|
|
656
|
+
i18n.t("legacy id from %{fro}", fro="998$b")
|
|
657
|
+
)
|
|
713
658
|
return list(res)
|
|
714
659
|
else:
|
|
715
660
|
try:
|
|
716
661
|
ret = [marc_record["001"].format_field().strip()]
|
|
717
|
-
self.migration_report.add_general_statistics(
|
|
662
|
+
self.migration_report.add_general_statistics(
|
|
663
|
+
i18n.t("legacy id from %{fro}", fro="001")
|
|
664
|
+
)
|
|
718
665
|
return ret
|
|
719
666
|
except Exception as e:
|
|
720
667
|
raise TransformationRecordFailedError(
|
|
@@ -724,7 +671,7 @@ class BibsRulesMapper(RulesMapperBase):
|
|
|
724
671
|
) from e
|
|
725
672
|
|
|
726
673
|
|
|
727
|
-
def get_unspecified_mode_of_issuance(folio_client: FolioClient):
|
|
674
|
+
def get_unspecified_mode_of_issuance(folio_client: FolioClient) -> str:
|
|
728
675
|
m_o_is = list(folio_client.modes_of_issuance)
|
|
729
676
|
if not any(m_o_is):
|
|
730
677
|
logging.critical("No Modes of issuance set up in tenant. Quitting...")
|
|
@@ -738,7 +685,26 @@ def get_unspecified_mode_of_issuance(folio_client: FolioClient):
|
|
|
738
685
|
return next(i["id"] for i in m_o_is if i["name"].lower() == "unspecified")
|
|
739
686
|
|
|
740
687
|
|
|
741
|
-
def
|
|
688
|
+
def get_custom_bib_id(marc_record: Record, field_string: str):
|
|
689
|
+
if field_keys := field_string.split("$", maxsplit=1):
|
|
690
|
+
try:
|
|
691
|
+
if len(field_keys) == 2:
|
|
692
|
+
return [marc_record[field_keys[0]][field_keys[1]]]
|
|
693
|
+
else:
|
|
694
|
+
return [marc_record[field_keys[0]]]
|
|
695
|
+
except Exception as e:
|
|
696
|
+
raise TransformationRecordFailedError(
|
|
697
|
+
"unknown identifier",
|
|
698
|
+
f"{field_string} is missing from record but is required in all records",
|
|
699
|
+
marc_record.as_json(),
|
|
700
|
+
) from e
|
|
701
|
+
else:
|
|
702
|
+
raise TransformationProcessError(
|
|
703
|
+
"", 'Critical process issue. No "customBibIdField" specified in task configuration.'
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def get_iii_bib_id(marc_record: Record) -> List[str]:
|
|
742
708
|
try:
|
|
743
709
|
return [marc_record["907"]["a"]]
|
|
744
710
|
except Exception as e:
|