folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -1,26 +1,29 @@
1
1
  """The default mapper, responsible for parsing MARC21 records acording to the
2
2
  FOLIO community specifications"""
3
- import json
3
+
4
4
  import logging
5
5
  import sys
6
6
  import time
7
7
  import typing
8
8
  import uuid
9
- import xml.etree.ElementTree as ET
10
- from typing import Generator, List
9
+ from pathlib import Path
10
+ from typing import Dict, Generator, List
11
+
12
+ import i18n
13
+ from defusedxml.ElementTree import fromstring
11
14
  from folio_uuid.folio_namespaces import FOLIONamespaces
12
15
  from folio_uuid.folio_uuid import FolioUUID
13
-
14
- import pymarc
15
- import requests
16
16
  from folioclient import FolioClient
17
+ from pymarc.record import Leader, Record
18
+ from pymarc.field import Field
19
+
17
20
  from folio_migration_tools.custom_exceptions import (
18
21
  TransformationProcessError,
19
22
  TransformationRecordFailedError,
20
23
  )
21
24
  from folio_migration_tools.helper import Helper
22
25
  from folio_migration_tools.library_configuration import (
23
- FolioRelease,
26
+ FileDefinition,
24
27
  HridHandling,
25
28
  IlsFlavour,
26
29
  LibraryConfiguration,
@@ -29,10 +32,7 @@ from folio_migration_tools.marc_rules_transformation.conditions import Condition
29
32
  from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
30
33
  RulesMapperBase,
31
34
  )
32
-
33
- from folio_migration_tools.report_blurbs import Blurbs
34
- from pymarc import Field
35
- from pymarc.record import Record
35
+ from folio_migration_tools.migration_tasks.migration_task_base import MarcTaskConfigurationBase
36
36
 
37
37
 
38
38
  class BibsRulesMapper(RulesMapperBase):
@@ -41,196 +41,201 @@ class BibsRulesMapper(RulesMapperBase):
41
41
 
42
42
  def __init__(
43
43
  self,
44
- folio_client,
44
+ folio_client: FolioClient,
45
45
  library_configuration: LibraryConfiguration,
46
- task_configuration,
46
+ task_configuration: MarcTaskConfigurationBase,
47
+ statistical_codes_map: Dict[str, str] = None,
47
48
  ):
48
49
  super().__init__(
49
50
  folio_client,
50
51
  library_configuration,
51
- Conditions(folio_client, self, "bibs"),
52
- )
53
- self.folio = folio_client
54
- self.task_configuration = task_configuration
55
- self.record_status = {}
56
- self.unique_001s = set()
57
- self.holdings_map = {}
58
- self.id_map = {}
59
- self.srs_recs = []
60
- self.schema = self.instance_json_schema
61
- self.contrib_name_types = {}
62
- self.mapped_folio_fields = {}
63
- self.unmapped_folio_fields = {}
64
- self.alt_title_map = {}
65
- logging.info(
66
- f"HRID handling is set to: '{self.task_configuration.hrid_handling}'"
52
+ task_configuration,
53
+ statistical_codes_map,
54
+ self.get_instance_schema(folio_client),
55
+ Conditions(folio_client, self, "bibs", library_configuration.folio_release),
67
56
  )
68
- self.hrid_handling: HridHandling = self.task_configuration.hrid_handling
69
57
  logging.info("Fetching mapping rules from the tenant")
70
- rules_endpoint = (
71
- "/mapping-rules"
72
- if self.library_configuration.folio_release == FolioRelease.juniper
73
- else "/mapping-rules/marc-bib"
74
- )
75
- self.mappings = self.folio.folio_get_single_object(rules_endpoint)
58
+ rules_endpoint = "/mapping-rules/marc-bib"
59
+ self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
76
60
  logging.info("Fetching valid language codes...")
77
61
  self.language_codes = list(self.fetch_language_codes())
78
- self.unmapped_tags = {}
79
- self.unmapped_conditions = {}
80
- self.instance_relationships = {}
81
- self.instance_relationship_types = {}
82
- self.other_mode_of_issuance_id = get_unspecified_mode_of_issuance(self.folio)
62
+ self.instance_relationships: dict = {}
63
+ self.instance_relationship_types: dict = {}
64
+ self.other_mode_of_issuance_id = get_unspecified_mode_of_issuance(self.folio_client)
65
+ self.data_import_marc = self.task_configuration.data_import_marc
66
+ if self.data_import_marc:
67
+ self.hrid_handler.deactivate035_from001 = True
83
68
  self.start = time.time()
84
69
 
85
- def perform_initial_preparation(self, marc_record: pymarc.Record, legacy_ids):
86
- folio_instance = {
87
- "metadata": self.folio.get_metadata_construct(),
88
- }
70
+ def perform_initial_preparation(self, file_def: FileDefinition, marc_record: Record, legacy_ids: List[str]):
71
+ folio_instance = {}
89
72
  folio_instance["id"] = str(
90
73
  FolioUUID(
91
- str(self.folio_client.okapi_url),
74
+ self.base_string_for_folio_uuid,
92
75
  FOLIONamespaces.instances,
93
76
  str(legacy_ids[-1]),
94
77
  )
95
78
  )
96
- leader_05 = marc_record.leader[5]
97
- self.migration_report.add(Blurbs.RecordStatus, leader_05 or "Empty")
98
- self.handle_hrid(folio_instance, marc_record, legacy_ids)
99
- if leader_05 == "d":
100
- Helper.log_data_issue(
101
- legacy_ids, "d in leader. Is this correct?", marc_record.leader
79
+ if (
80
+ all([self.create_source_records, file_def.create_source_records])
81
+ or self.hrid_handler.handling == HridHandling.preserve001
82
+ ):
83
+ self.hrid_handler.handle_hrid(
84
+ FOLIONamespaces.instances,
85
+ folio_instance,
86
+ marc_record,
87
+ legacy_ids,
102
88
  )
89
+ self.handle_leader_05(marc_record, legacy_ids)
90
+ if self.task_configuration.add_administrative_notes_with_legacy_ids:
91
+ for legacy_id in legacy_ids:
92
+ self.add_legacy_id_to_admin_note(folio_instance, legacy_id)
93
+
103
94
  return folio_instance
104
95
 
105
- def parse_bib(self, legacy_ids, marc_record: pymarc.Record, suppressed: bool):
96
+ def handle_leader_05(self, marc_record: Record, legacy_ids: List[str]):
97
+ leader_05 = marc_record.leader[5] or "Empty"
98
+ self.migration_report.add("RecordStatus", i18n.t("Original value") + f": {leader_05}")
99
+ if leader_05 not in ["a", "c", "d", "n", "p"]:
100
+ marc_record.leader = Leader(f"{marc_record.leader[:5]}c{marc_record.leader[6:]}")
101
+ self.migration_report.add(
102
+ "RecordStatus", i18n.t("Changed %{a} to %{b}", a=leader_05, b="c")
103
+ )
104
+ if leader_05 == "d":
105
+ Helper.log_data_issue(legacy_ids, "d in leader. Is this correct?", marc_record.leader)
106
+
107
+ def parse_record(
108
+ self, marc_record: Record, file_def: FileDefinition, legacy_ids: List[str]
109
+ ) -> list[dict]:
106
110
  """Parses a bib recod into a FOLIO Inventory instance object
107
111
  Community mapping suggestion: https://bit.ly/2S7Gyp3
108
- This is the main function"""
112
+ This is the main function
113
+
114
+ Args:
115
+ marc_record (Record): _description_
116
+ file_def (FileDefinition): _description_
117
+ legacy_ids (List[str]): List of legacy ids in record
118
+
119
+ Returns:
120
+ dict: _description_
121
+ """
109
122
  self.print_progress()
110
- ignored_subsequent_fields = set()
123
+ ignored_subsequent_fields: set = set()
111
124
  bad_tags = set(self.task_configuration.tags_to_delete) # "907"
112
- folio_instance = self.perform_initial_preparation(marc_record, legacy_ids)
113
- for marc_field in marc_record:
114
- self.report_marc_stats(
115
- marc_field, bad_tags, legacy_ids, ignored_subsequent_fields
116
- )
117
- if marc_field.tag not in ignored_subsequent_fields:
118
- self.process_marc_field(
119
- folio_instance,
120
- marc_field,
121
- ignored_subsequent_fields,
122
- legacy_ids,
123
- )
125
+ folio_instance = self.perform_initial_preparation(file_def, marc_record, legacy_ids)
126
+ if self.data_import_marc:
127
+ self.simple_bib_map(folio_instance, marc_record, ignored_subsequent_fields, legacy_ids)
128
+ else:
129
+ for marc_field in marc_record:
130
+ self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
131
+ if marc_field.tag not in ignored_subsequent_fields:
132
+ self.process_marc_field(
133
+ folio_instance,
134
+ marc_field,
135
+ ignored_subsequent_fields,
136
+ legacy_ids,
137
+ )
124
138
 
125
- self.perform_additional_parsing(
126
- folio_instance, marc_record, legacy_ids, suppressed
127
- )
139
+ self.perform_additional_parsing(folio_instance, marc_record, legacy_ids, file_def)
128
140
  clean_folio_instance = self.validate_required_properties(
129
141
  "-".join(legacy_ids), folio_instance, self.schema, FOLIONamespaces.instances
130
142
  )
131
143
  self.dedupe_rec(clean_folio_instance)
132
144
  marc_record.remove_fields(*list(bad_tags))
133
- self.report_folio_mapping(clean_folio_instance, self.instance_json_schema)
134
- if clean_folio_instance["discoverySuppress"]:
135
- self.migration_report.add_general_statistics("Suppressed from discovery")
136
- # TODO: trim away multiple whitespace and newlines..
137
- # TODO: createDate and update date and catalogeddate
138
- return clean_folio_instance
139
-
140
- def process_marc_field(
141
- self,
142
- folio_instance,
143
- marc_field,
144
- ignored_subsequent_fields,
145
- legacy_ids,
146
- ):
147
- if marc_field.tag == "880" and "6" in marc_field:
148
- mappings = self.perform_proxy_mapping(marc_field)
149
- else:
150
- tags_to_ignore = {"880", "001", "008"}
151
- mappings = (
152
- self.mappings.get(marc_field.tag, {})
153
- if marc_field.tag not in tags_to_ignore
154
- else []
155
- )
156
- if mappings:
157
- try:
158
- self.map_field_according_to_mapping(
159
- marc_field, mappings, folio_instance, legacy_ids
160
- )
161
- if any(m.get("ignoreSubsequentFields", False) for m in mappings):
162
- ignored_subsequent_fields.add(marc_field.tag)
163
- except Exception as ee:
164
- logging.error(
165
- f"map_field_according_to_mapping {marc_field.tag} {marc_field.format_field()} {json.dumps(mappings)}"
166
- )
167
- raise ee
168
-
169
- def report_marc_stats(
170
- self, marc_field, bad_tags, legacy_ids, ignored_subsequent_fields
171
- ):
172
- self.migration_report.add_general_statistics("Total number of Tags processed")
173
- self.report_bad_tags(marc_field, bad_tags, legacy_ids)
174
- mapped = marc_field.tag in self.mappings
175
- if marc_field.tag in ignored_subsequent_fields:
176
- mapped = False
177
- self.report_legacy_mapping(marc_field.tag, True, mapped)
178
-
179
- def perform_proxy_mapping(self, marc_field):
180
- proxy_mapping = next(iter(self.mappings.get("880", [])), [])
181
- if proxy_mapping and "fieldReplacementRule" in proxy_mapping:
182
- target_field = next(
183
- (
184
- r["targetField"]
185
- for r in proxy_mapping["fieldReplacementRule"]
186
- if r["sourceDigits"] == marc_field["6"][:3]
187
- ),
188
- "",
189
- )
190
- mappings = self.mappings.get(target_field, {})
191
-
192
- self.migration_report.add(
193
- Blurbs.Field880Mappings,
194
- f"Source digits: {marc_field['6'][:3]} Target field: {target_field}",
195
- )
196
- else:
197
- raise TransformationProcessError(
198
- "", "Mapping rules for 880 is missing. Halting"
145
+ self.report_folio_mapping(clean_folio_instance, self.schema)
146
+ return [clean_folio_instance]
147
+
148
+ def simple_bib_map(self, folio_instance: dict, marc_record: Record, ignored_subsequent_fields: set, legacy_ids: List[str]):
149
+ """
150
+ This method applies a much simplified MARC-to-instance
151
+ mapping to create a minimal FOLIO Instance record to be
152
+ used with a Data Import based MARC loading flow, rather
153
+ than creating SRS records during transformation.
154
+
155
+ Args:
156
+ folio_instance (dict): _description_
157
+ marc_record (Record): _description_
158
+ legacy_ids (List[str]): _description_
159
+ file_def (FileDefinition): _description_
160
+ """
161
+ main_entry_field_tags = ["100", "110", "111", "130"]
162
+ main_entry_fields = marc_record.get_fields(*main_entry_field_tags)
163
+ main_entry_fields.sort(key=lambda x: int(x.tag))
164
+ if len(main_entry_fields) > 1:
165
+ Helper.log_data_issue(
166
+ legacy_ids,
167
+ "Multiple main entry fields in record. Record will fail Data Import. Creating Instance anyway.",
168
+ [str(field) for field in main_entry_fields]
199
169
  )
200
- return mappings
170
+ if not main_entry_fields:
171
+ main_entry_fields += marc_record.get_fields("700", "710", "711", "730")
172
+ main_entry_fields.sort(key=lambda x: int(x.tag))
173
+ if main_entry_fields:
174
+ self.process_marc_field(folio_instance, main_entry_fields[0], ignored_subsequent_fields, legacy_ids)
175
+ try:
176
+ self.process_marc_field(folio_instance, marc_record['245'], ignored_subsequent_fields, legacy_ids)
177
+ except KeyError as ke:
178
+ raise TransformationRecordFailedError(
179
+ legacy_ids,
180
+ "No 245 field in MARC record"
181
+ ) from ke
201
182
 
202
183
  def perform_additional_parsing(
203
184
  self,
204
185
  folio_instance: dict,
205
186
  marc_record: Record,
206
187
  legacy_ids: List[str],
207
- suppressed: bool,
208
- ):
209
- """Do stuff not easily captured by the mapping rules"""
210
- folio_instance["source"] = "MARC"
188
+ file_def: FileDefinition,
189
+ ) -> None:
190
+ """Do stuff not easily captured by the mapping rules
191
+
192
+ Args:
193
+ folio_instance (dict): _description_
194
+ marc_record (Record): _description_
195
+ legacy_ids (List[str]): _description_
196
+ file_def (FileDefinition): _description_
197
+ """
198
+ if file_def.create_source_records and self.create_source_records:
199
+ folio_instance["source"] = "MARC"
200
+ else:
201
+ folio_instance["source"] = "FOLIO"
211
202
  folio_instance["instanceFormatIds"] = list(
212
203
  set(self.get_instance_format_ids(marc_record, legacy_ids))
213
204
  )
214
-
215
- folio_instance["instanceTypeId"] = self.get_instance_type_id(
216
- marc_record, legacy_ids
217
- )
218
-
219
- folio_instance["modeOfIssuanceId"] = self.get_mode_of_issuance_id(
220
- marc_record, legacy_ids
221
- )
205
+ folio_instance["instanceTypeId"] = self.get_instance_type_id(marc_record, legacy_ids)
206
+
207
+ folio_instance["modeOfIssuanceId"] = self.get_mode_of_issuance_id(marc_record, legacy_ids)
208
+ self.handle_languages(folio_instance, marc_record, legacy_ids)
209
+ self.handle_suppression(folio_instance, file_def)
210
+ # Map statistical codes from MARC and FileDefinition, then map the IDs
211
+ self.map_statistical_codes(folio_instance, file_def, marc_record)
212
+ self.map_statistical_code_ids(legacy_ids, folio_instance)
213
+ self.handle_holdings(marc_record)
214
+ if prec_titles := folio_instance.get("precedingTitles", []):
215
+ self.migration_report.add("PrecedingSuccedingTitles", f"{len(prec_titles)}")
216
+ del folio_instance["precedingTitles"]
217
+ if succ_titles := folio_instance.get("succeedingTitles", []):
218
+ del folio_instance["succeedingTitles"]
219
+ self.migration_report.add("PrecedingSuccedingTitles", f"{len(succ_titles)}")
220
+
221
+ def handle_languages(self, folio_instance: Dict, marc_record: Record, legacy_ids: List[str]):
222
222
  if "languages" in folio_instance:
223
- folio_instance["languages"].extend(
224
- self.get_languages(marc_record, legacy_ids)
223
+ orig_languages = {lang: None for lang in folio_instance["languages"]}
224
+ orig_languages.update(
225
+ {lang: None for lang in self.get_languages(marc_record, legacy_ids)}
225
226
  )
227
+ folio_instance["languages"] = list(orig_languages.keys())
226
228
  else:
227
229
  folio_instance["languages"] = self.get_languages(marc_record, legacy_ids)
228
230
  folio_instance["languages"] = list(
229
231
  self.filter_langs(folio_instance["languages"], marc_record, legacy_ids)
230
232
  )
231
- folio_instance["discoverySuppress"] = suppressed
232
- folio_instance["staffSuppress"] = False
233
- self.handle_holdings(marc_record)
233
+
234
+ def get_instance_schema(self, folio_client: FolioClient):
235
+ logging.info("Fetching Instance schema...")
236
+ return folio_client.get_from_github(
237
+ "folio-org", "mod-inventory-storage", "ramls/instance.json"
238
+ )
234
239
 
235
240
  def handle_holdings(self, marc_record: Record):
236
241
  if "852" in marc_record:
@@ -239,43 +244,38 @@ class BibsRulesMapper(RulesMapperBase):
239
244
  )
240
245
  f852s = (f for f in holdingsfields if f.tag == "852")
241
246
  f86xs = (
242
- f
243
- for f in holdingsfields
244
- if f.tag in ["866", "867", "868", "865", "864", "863"]
247
+ f for f in holdingsfields if f.tag in ["866", "867", "868", "865", "864", "863"]
245
248
  )
246
249
  if f852s and not f86xs:
247
250
  self.migration_report.add(
248
- Blurbs.HoldingsGenerationFromBibs,
249
- "Records with 852s but no 86X",
251
+ "HoldingsGenerationFromBibs",
252
+ i18n.t(
253
+ "Records with %{has_many}s but no %{has_no}", has_many="852", has_no="86X"
254
+ ),
250
255
  )
251
256
  elif any(f852s):
252
257
  self.migration_report.add(
253
- Blurbs.HoldingsGenerationFromBibs,
254
- "Records with both 852s and at least one 86X",
258
+ "HoldingsGenerationFromBibs",
259
+ i18n.t(
260
+ "Records with both %{has_many}s and at least one %{has_one}",
261
+ has_one="86X",
262
+ has_many="852",
263
+ ),
255
264
  )
256
265
 
257
266
  elif any(f86xs):
258
267
  self.migration_report.add(
259
- Blurbs.HoldingsGenerationFromBibs,
260
- "Records without 852s but with 86X",
268
+ "HoldingsGenerationFromBibs",
269
+ i18n.t("Records without %{has_no}s but with %{has}", has="86X", has_no="852"),
261
270
  )
262
271
 
263
272
  def wrap_up(self):
264
273
  logging.info("Mapper wrapping up")
265
- self.store_hrid_settings()
266
-
267
- def report_bad_tags(self, marc_field, bad_tags, legacy_ids):
268
- if (
269
- (not marc_field.tag.isnumeric())
270
- and marc_field.tag != "LDR"
271
- and marc_field.tag not in bad_tags
272
- ):
273
- self.migration_report.add(Blurbs.NonNumericTagsInRecord, marc_field.tag)
274
- message = "Non-numeric tags in records"
275
- Helper.log_data_issue(legacy_ids, message, marc_field.tag)
276
- bad_tags.add(marc_field.tag)
274
+ if self.create_source_records:
275
+ if self.task_configuration.update_hrid_settings:
276
+ self.hrid_handler.store_hrid_settings()
277
277
 
278
- def get_instance_type_id(self, marc_record, legacy_id):
278
+ def get_instance_type_id(self, marc_record: Record, legacy_ids: List[str]) -> str:
279
279
  return_id = ""
280
280
 
281
281
  def get_folio_id_by_name(f336a: str):
@@ -283,40 +283,37 @@ class BibsRulesMapper(RulesMapperBase):
283
283
  match = next(
284
284
  (
285
285
  f["id"]
286
- for f in self.folio.instance_types
286
+ for f in self.folio_client.instance_types
287
287
  if f["name"].lower().replace(" ", "") == match_template
288
288
  ),
289
289
  "",
290
290
  )
291
291
  if match:
292
292
  self.migration_report.add(
293
- Blurbs.RecourceTypeMapping,
294
- f"336$a - Successful matching on {match_template} ({f336a})",
293
+ "RecourceTypeMapping",
294
+ "336$a - "
295
+ + i18n.t("Successful matching on %{criteria}", criteria=match_template)
296
+ + f" ({f336a})",
295
297
  )
296
298
  else:
297
299
  self.migration_report.add(
298
- Blurbs.RecourceTypeMapping,
299
- f"336$a - Unsuccessful matching on {match_template} ({f336a})",
300
+ "RecourceTypeMapping",
301
+ "336$a - "
302
+ + i18n.t("Unsuccessful matching on %{criteria}", criteria=match_template)
303
+ + f" ({f336a})",
300
304
  )
301
305
  Helper.log_data_issue(
302
- legacy_id,
306
+ legacy_ids,
303
307
  "instance type name (336$a) -Unsuccessful matching",
304
308
  f336a,
305
309
  )
306
310
  return match
307
311
 
308
- if not self.folio.instance_types:
312
+ if not self.folio_client.instance_types:
309
313
  raise TransformationProcessError("", "No instance_types setup in tenant")
310
314
 
311
315
  if "336" in marc_record and "b" not in marc_record["336"]:
312
- self.migration_report.add(
313
- Blurbs.RecourceTypeMapping, "Subfield b not in 336"
314
- )
315
- Helper.log_data_issue(
316
- legacy_id,
317
- "Subfield b not in 336",
318
- "",
319
- )
316
+ self.migration_report.add("RecourceTypeMapping", i18n.t("Subfield b not in 336"))
320
317
  if "a" in marc_record["336"]:
321
318
  return_id = get_folio_id_by_name(marc_record["336"]["a"])
322
319
 
@@ -324,215 +321,172 @@ class BibsRulesMapper(RulesMapperBase):
324
321
  f336_b = marc_record["336"]["b"].lower().replace(" ", "")
325
322
  f336_b_norm = f336_b.lower().replace(" ", "")
326
323
  t = self.conditions.get_ref_data_tuple_by_code(
327
- self.folio.instance_types,
324
+ self.folio_client.instance_types,
328
325
  "instance_types",
329
326
  f336_b_norm,
330
327
  )
331
328
  if not t:
332
329
  self.migration_report.add(
333
- Blurbs.RecourceTypeMapping,
334
- f"336$b - Code {f336_b_norm} ('{f336_b}') not found in FOLIO ",
330
+ "RecourceTypeMapping",
331
+ "336$b - "
332
+ + i18n.t(
333
+ "Code %{code} ('%{code_raw}') not found in FOLIO ",
334
+ code=f336_b_norm,
335
+ code_raw=f336_b,
336
+ ),
335
337
  )
336
338
  Helper.log_data_issue(
337
- legacy_id,
338
- "instance type code (336$b) not found in FOLIO",
339
+ legacy_ids,
340
+ i18n.t("instance type code (%{code}) not found in FOLIO", code="336$b"),
339
341
  f336_b,
340
342
  )
341
343
  else:
342
344
  self.migration_report.add(
343
- Blurbs.RecourceTypeMapping,
344
- f'336$b {t[1]} mapped from {marc_record["336"]["b"]}',
345
+ "RecourceTypeMapping",
346
+ "336$b "
347
+ + i18n.t(
348
+ "%{fro} mapped from %{record}", fro=t[1], record=marc_record["336"]["b"]
349
+ ),
345
350
  )
346
351
  return_id = t[0]
347
352
 
348
353
  if not return_id:
349
354
  t = self.conditions.get_ref_data_tuple_by_code(
350
- self.folio.instance_types, "instance_types", "zzz"
355
+ self.folio_client.instance_types, "instance_types", "zzz"
351
356
  )
352
357
  return_id = t[0]
353
358
  return return_id
354
359
 
355
- def get_instance_format_ids(self, marc_record, legacy_id):
356
- # Lambdas
357
- def get_folio_id(code: str):
358
- try:
359
- match = next(
360
- f for f in self.folio.instance_formats if f["code"] == code
361
- )
362
- self.migration_report.add(
363
- Blurbs.InstanceFormat,
364
- f"Successful match - {code}->{match['name']}",
365
- )
366
- return match["id"]
367
- except Exception:
368
- # TODO: Distinguish between generated codes and proper 338bs
369
- Helper.log_data_issue(
370
- legacy_id, "Instance format Code not found in FOLIO", code
371
- )
372
- self.migration_report.add(
373
- Blurbs.InstanceFormat,
374
- f"Code '{code}' not found in FOLIO",
375
- )
376
- return ""
360
+ def get_instance_format_id_by_code(self, legacy_ids: List[str], code: str):
361
+ try:
362
+ match = next(f for f in self.folio_client.instance_formats if f["code"] == code)
363
+ self.migration_report.add(
364
+ "InstanceFormat",
365
+ i18n.t("Successful match") + f" - {code}->{match['name']}",
366
+ )
367
+ return match["id"]
368
+ except Exception:
369
+ # TODO: Distinguish between generated codes and proper 338bs
370
+ Helper.log_data_issue(legacy_ids, "Instance format Code not found in FOLIO", code)
371
+ self.migration_report.add(
372
+ "InstanceFormat",
373
+ i18n.t("Code '%{code}' not found in FOLIO", code=code),
374
+ )
375
+ return ""
377
376
 
378
- def get_folio_id_by_name(f337a: str, f338a: str, legacy_id: str):
379
- f337a = f337a.lower().replace(" ", "")
380
- f338a = f338a.lower().replace(" ", "")
381
- match_template = f"{f337a} -- {f338a}"
382
- try:
383
- match = next(
384
- f
385
- for f in self.folio.instance_formats
386
- if f["name"].lower().replace(" ", "") == match_template
387
- )
388
- self.migration_report.add(
389
- Blurbs.InstanceFormat,
390
- f"Successful matching on 337$a & 338$a - {match_template}->{match['name']}",
391
- )
392
- return match["id"]
393
- except Exception:
394
- Helper.log_data_issue(
395
- legacy_id,
396
- "Unsuccessful matching on 337$a and 338$a",
397
- match_template,
377
+ def get_instance_format_id_by_name(self, f337a: str, f338a: str, legacy_ids: List[str]):
378
+ f337a = f337a.lower().strip()
379
+ f338a = f338a.lower().strip()
380
+ match_template = f"{f337a} -- {f338a}"
381
+ try:
382
+ match = next(
383
+ f
384
+ for f in self.folio_client.instance_formats
385
+ if f["name"].lower() == match_template
386
+ )
387
+ self.migration_report.add(
388
+ "InstanceFormat",
389
+ i18n.t(
390
+ "Successful matching on %{criteria_1} and %{criteria_2}",
391
+ criteria_1="337$a",
392
+ criteria_2="338$a",
398
393
  )
399
- self.migration_report.add(
400
- Blurbs.InstanceFormat,
401
- f"Unsuccessful matching on 337$a and 338$a - {match_template}",
394
+ + f" - {match_template}->{match['name']}",
395
+ )
396
+ return match["id"]
397
+ except Exception:
398
+ Helper.log_data_issue(
399
+ legacy_ids,
400
+ "Unsuccessful matching on 337$a and 338$a",
401
+ match_template,
402
+ )
403
+ self.migration_report.add(
404
+ "InstanceFormat",
405
+ i18n.t(
406
+ "Unsuccessful matching on %{criteria_1} and %{criteria_2}",
407
+ criteria_1="337$a",
408
+ criteria_2="338$a",
402
409
  )
403
- return ""
410
+ + f" - {match_template}",
411
+ )
412
+ return ""
413
+
414
+ def f338_source_is_rda_carrier(self, field: Field):
415
+ if "2" not in field:
416
+ self.migration_report.add(
417
+ "InstanceFormat",
418
+ ("Instance Format not mapped from field since 338$2 is missing"),
419
+ )
420
+ return False
421
+ elif field["2"].strip().startswith("rdacarrier"):
422
+ return True
423
+ self.migration_report.add(
424
+ "InstanceFormat",
425
+ ("InstanceFormat not mapped since 338$2 (Source) " f"is set to {field['2']}. "),
426
+ )
427
+ return False
404
428
 
429
+ def get_instance_format_ids_from_a(
430
+ self, field_index: int, f_338: Field, all_337s: List[Field], legacy_id: List[str]
431
+ ):
432
+ self.migration_report.add(
433
+ "InstanceFormat",
434
+ i18n.t("338$b is missing. Will try parse from 337$a and 338$a"),
435
+ )
436
+ for a in f_338.get_subfields("a"):
437
+ corresponding_337 = all_337s[field_index] if field_index < len(all_337s) else None
438
+ if corresponding_337 and "a" in corresponding_337:
439
+ if fmt_id := self.get_instance_format_id_by_name(
440
+ corresponding_337["a"], a, legacy_id
441
+ ):
442
+ yield fmt_id
443
+
444
+ def get_instance_format_ids(self, marc_record: Record, legacy_id: List[str]):
405
445
  all_337s = marc_record.get_fields("337")
406
446
  all_338s = marc_record.get_fields("338")
407
- for fidx, f in enumerate(all_338s):
408
- source = f["2"] if "2" in f else "Not set"
409
- if not source.strip().startswith("rdacarrier"):
410
- self.migration_report.add(
411
- Blurbs.InstanceFormat,
412
- (
413
- "InstanceFormat not mapped since 338$2 (Source) "
414
- f"is set to {source}. "
415
- ),
416
- )
417
- else:
418
- if "b" not in f and "a" in f:
419
- self.migration_report.add(
420
- Blurbs.InstanceFormat,
421
- "338$b is missing. Will try parse from 337$a and 338$b",
447
+ for fidx, f_338 in enumerate(all_338s):
448
+ if self.f338_source_is_rda_carrier(f_338):
449
+ if "b" not in f_338 and "a" in f_338:
450
+ yield from self.get_instance_format_ids_from_a(
451
+ fidx, f_338, all_337s, legacy_id
422
452
  )
423
- for sfidx, a in enumerate(f.get_subfields("a")):
424
- corresponding_337 = (
425
- all_337s[fidx] if fidx < len(all_337s) else None
426
- )
427
- if corresponding_337 and "a" in corresponding_337:
428
- fmt_id = get_folio_id_by_name(
429
- corresponding_337["a"], a, legacy_id
430
- )
431
- if fmt_id:
432
- yield fmt_id
433
453
 
434
- for sfidx, b in enumerate(f.get_subfields("b")):
454
+ for sfidx, b in enumerate(f_338.get_subfields("b")):
435
455
  b = b.replace(" ", "")
436
- if len(b) == 2: # Normal 338b. should be able to map this
437
- yield get_folio_id(b)
456
+ if len(b) == 2:
457
+ # Normal 338b. should be able to map this
458
+ yield self.get_instance_format_id_by_code(legacy_id, b)
438
459
  elif len(b) == 1:
439
- corresponding_337 = (
440
- all_337s[fidx] if fidx < len(all_337s) else None
441
- )
442
- if (
443
- not corresponding_337
444
- ): # No matching 337. No use mapping the 338
445
- s = "No corresponding 337 to 338 even though 338$b was one charachter code"
460
+ corresponding_337 = all_337s[fidx] if fidx < len(all_337s) else None
461
+ if not corresponding_337:
462
+ # No matching 337. No use mapping the 338
463
+ s = i18n.t(
464
+ "No corresponding 337 to 338 even though 338$b was one character"
465
+ )
446
466
  Helper.log_data_issue(legacy_id, s, b)
447
467
  self.migration_report.add(
448
- Blurbs.InstanceFormat,
468
+ "InstanceFormat",
449
469
  s,
450
470
  )
451
- else: # Corresponding 337. Try to combine the codes.
471
+ else:
472
+ # Corresponding 337. Try to combine the codes.
452
473
  corresponding_b = (
453
474
  corresponding_337.get_subfields("b")[sfidx]
454
475
  if sfidx < len(corresponding_337.get_subfields("b"))
455
476
  else None
456
477
  )
457
478
  if not corresponding_b:
458
- s = "No corresponding $b in corresponding 338"
479
+ s = i18n.t("No corresponding $b in corresponding 338")
459
480
  Helper.log_data_issue(legacy_id, s, "")
460
- self.migration_report.add(Blurbs.InstanceFormat, s)
481
+ self.migration_report.add("InstanceFormat", s)
461
482
  else:
462
483
  combined_code = (corresponding_b + b).strip()
463
484
  if len(combined_code) == 2:
464
- yield get_folio_id(combined_code)
465
-
466
- def handle_hrid(self, folio_instance, marc_record: Record, legacy_ids) -> None:
467
- """Create HRID if not mapped. Add hrid as MARC record 001"""
468
- if self.hrid_handling == HridHandling.default or "001" not in marc_record:
469
- num_part = str(self.instance_hrid_counter).zfill(11)
470
- folio_instance["hrid"] = f"{self.instance_hrid_prefix}{num_part}"
471
- new_001 = Field(tag="001", data=folio_instance["hrid"])
472
- try:
473
- f_001 = marc_record["001"].value()
474
- f_003 = (
475
- marc_record["003"].value().strip() if "003" in marc_record else ""
476
- )
477
- self.migration_report.add(
478
- Blurbs.HridHandling, f'Values in 003: {f_003 or "Empty"}'
479
- )
480
-
481
- if self.task_configuration.deactivate035_from001:
482
- self.migration_report.add(
483
- Blurbs.HridHandling, "035 generation from 001 turned off"
484
- )
485
- else:
486
- str_035 = f"({f_003}){f_001}" if f_003 else f"{f_001}"
487
- new_035 = Field(
488
- tag="035",
489
- indicators=[" ", " "],
490
- subfields=["a", str_035],
491
- )
492
- marc_record.add_ordered_field(new_035)
493
- self.migration_report.add(Blurbs.HridHandling, "Added 035 from 001")
494
- marc_record.remove_fields("001")
495
-
496
- except Exception:
497
- if "001" in marc_record:
498
- s = "Failed to create 035 from 001"
499
- self.migration_report.add(Blurbs.HridHandling, s)
500
- Helper.log_data_issue(legacy_ids, s, marc_record["001"])
501
- else:
502
- self.migration_report.add(
503
- Blurbs.HridHandling, "Legacy bib records without 001"
504
- )
505
- marc_record.add_ordered_field(new_001)
506
- self.migration_report.add(
507
- Blurbs.HridHandling, "Created HRID using default settings"
508
- )
509
- self.instance_hrid_counter += 1
510
- elif self.hrid_handling == HridHandling.preserve001:
511
- value = marc_record["001"].value()
512
- if value in self.unique_001s:
513
- self.migration_report.add(
514
- Blurbs.HridHandling, "Duplicate 001. Creating HRID instead"
515
- )
516
- Helper.log_data_issue(
517
- legacy_ids,
518
- "Duplicate 001 for record. HRID created for record",
519
- value,
520
- )
521
- num_part = str(self.instance_hrid_counter).zfill(11)
522
- folio_instance["hrid"] = f"{self.instance_hrid_prefix}{num_part}"
523
- new_001 = Field(tag="001", data=folio_instance["hrid"])
524
- marc_record.add_ordered_field(new_001)
525
- self.instance_hrid_counter += 1
526
- else:
527
- self.unique_001s.add(value)
528
- folio_instance["hrid"] = value
529
- self.migration_report.add(Blurbs.HridHandling, "Took HRID from 001")
530
- else:
531
- raise TransformationProcessError(
532
- "", f"Unknown HRID handling: {self.hrid_handling}"
533
- )
485
+ yield self.get_instance_format_id_by_code(
486
+ legacy_id, combined_code
487
+ )
534
488
 
535
- def get_mode_of_issuance_id(self, marc_record: Record, legacy_id: str) -> str:
489
+ def get_mode_of_issuance_id(self, marc_record: Record, legacy_ids: List[str]) -> str:
536
490
  level = marc_record.leader[7]
537
491
  try:
538
492
  name = "unspecified"
@@ -545,30 +499,29 @@ class BibsRulesMapper(RulesMapperBase):
545
499
  ret = next(
546
500
  (
547
501
  i["id"]
548
- for i in self.folio.modes_of_issuance
549
- if str(name).lower() == i["name"].lower()
502
+ for i in self.folio_client.modes_of_issuance
503
+ if name.lower() == i["name"].lower()
550
504
  ),
551
505
  "",
552
506
  )
553
- self.migration_report.add(
554
- Blurbs.MatchedModesOfIssuanceCode, f"{name} -- {ret}"
555
- )
507
+
508
+ self.migration_report.add("MatchedModesOfIssuanceCode", f"{name} -- {ret}")
509
+
556
510
  if not ret:
557
511
  self.migration_report.add(
558
- Blurbs.MatchedModesOfIssuanceCode,
559
- f"Unmatched level: {level}",
512
+ "MatchedModesOfIssuanceCode", i18n.t("Unmatched level") + f": {level}"
560
513
  )
514
+
561
515
  return self.other_mode_of_issuance_id
562
516
  return ret
563
517
  except IndexError:
564
518
  self.migration_report.add(
565
- Blurbs.PossibleCleaningTasks, f"No Leader[7] in {legacy_id}"
519
+ "PossibleCleaningTasks", i18n.t("No Leader[7] in") + f" {legacy_ids}"
566
520
  )
521
+
567
522
  return self.other_mode_of_issuance_id
568
523
  except StopIteration as ee:
569
- logging.exception(
570
- f"{marc_record.leader} {list(self.folio.modes_of_issuance)}"
571
- )
524
+ logging.exception(f"{marc_record.leader} {list(self.folio_client.modes_of_issuance)}")
572
525
  raise ee from ee
573
526
 
574
527
  def get_nature_of_content(self, marc_record: Record) -> List[str]:
@@ -579,63 +532,74 @@ class BibsRulesMapper(RulesMapperBase):
579
532
  return "".join(marc_record["008"].data[35:38])
580
533
  return ""
581
534
 
582
- def get_languages_041(self, marc_record, legacy_id):
583
- languages = set()
535
+ def get_languages_041(self, marc_record: Record, legacy_id: List[str]) -> Dict[str, None]:
536
+ languages = dict()
584
537
  lang_fields = marc_record.get_fields("041")
585
538
  if not any(lang_fields):
586
- return set()
539
+ return dict()
587
540
  subfields = "abdefghjkmn"
588
541
  for lang_tag in lang_fields:
589
542
  if "2" in lang_tag:
590
- self.migration_report.add(Blurbs.LanguageCodeSources, lang_tag["2"])
591
- logging.info(
592
- "Field with other Language code\t%s\t%s",
593
- marc_record["001"],
594
- lang_tag.value(),
543
+ self.migration_report.add("LanguageCodeSources", lang_tag["2"])
544
+ Helper.log_data_issue(
545
+ legacy_id, "Field with other Language code", lang_tag.value()
595
546
  )
596
547
  lang_codes = lang_tag.get_subfields(*list(subfields))
597
548
  for lang_code in lang_codes:
598
549
  lang_code = str(lang_code).lower().replace(" ", "")
599
550
  langlength = len(lang_code)
600
551
  if langlength == 3:
601
- languages.add(lang_code.replace(" ", ""))
552
+ languages[lang_code.replace(" ", "")] = None
602
553
  elif langlength > 3 and langlength % 3 == 0:
603
554
  lc = lang_code.replace(" ", "")
604
- new_codes = (lc[i : i + 3] for i in range(0, len(lc), 3))
555
+ new_codes = {lc[i : i + 3]: None for i in range(0, len(lc), 3)}
605
556
  languages.update(new_codes)
606
- languages.discard(lang_code)
607
- languages.update()
608
- languages = set(
609
- self.filter_langs(filter(None, languages), marc_record, legacy_id)
610
- )
557
+ languages = {
558
+ str(lang): None
559
+ for lang in self.filter_langs(
560
+ list(filter(None, languages.keys())), marc_record, legacy_id
561
+ )
562
+ if lang
563
+ }
611
564
  return languages
612
565
 
613
- def get_languages(self, marc_record: Record, legacy_id: str) -> List[str]:
614
- """Get languages and tranforms them to correct codes"""
566
+ def get_languages(self, marc_record: Record, legacy_id: List[str]) -> List[str]:
567
+ """Get languages and tranforms them to correct codes
568
+
569
+ Args:
570
+ marc_record (Record): A pymarc Record object
571
+ legacy_id (List[str]): A list of legacy ids from the legacy record
572
+
573
+ Returns:
574
+ List[str]: List of language codes
575
+ """
615
576
  languages = self.get_languages_041(marc_record, legacy_id)
616
- languages.add(self.get_languages_008(marc_record))
577
+ languages[self.get_languages_008(marc_record)] = None
617
578
  for lang in languages:
618
- self.migration_report.add(Blurbs.LanguagesInRecords, lang)
579
+ self.migration_report.add("LanguagesInRecords", lang)
619
580
  return list(languages)
620
581
 
621
582
  def fetch_language_codes(self) -> Generator[str, None, None]:
622
- """fetches the list of standardized language codes from LoC"""
623
- url = "https://www.loc.gov/standards/codelists/languages.xml"
624
- tree = ET.fromstring(requests.get(url).content)
583
+ """Loads the list of standardized language codes from LoC
584
+
585
+ Yields:
586
+ Generator[str, None, None]: _description_
587
+ """
588
+ path = Path(__file__).parent / "loc_language_codes.xml"
589
+ with open(path) as f:
590
+ lines = "".join(f.readlines())
591
+ tree = fromstring(lines)
625
592
  name_space = "{info:lc/xmlns/codelist-v1}"
626
593
  xpath_expr = "{0}languages/{0}language/{0}code".format(name_space)
627
594
  for code in tree.findall(xpath_expr):
628
595
  yield code.text
629
596
 
630
597
  def filter_langs(
631
- self, language_values: List[str], marc_record: Record, index_or_legacy_id
598
+ self, language_values: List[str], marc_record: Record, index_or_legacy_id: List[str]
632
599
  ) -> typing.Generator:
633
600
  forbidden_values = ["###", "zxx", "n/a", "N/A", "|||"]
634
601
  for language_value in language_values:
635
- if (
636
- language_value in self.language_codes
637
- and language_value not in forbidden_values
638
- ):
602
+ if language_value in self.language_codes and language_value not in forbidden_values:
639
603
  yield language_value
640
604
  elif language_value == "jap":
641
605
  yield "jpn"
@@ -653,68 +617,51 @@ class BibsRulesMapper(RulesMapperBase):
653
617
  m = "Unrecognized language codes in record"
654
618
  Helper.log_data_issue(index_or_legacy_id, m, language_value)
655
619
  self.migration_report.add(
656
- Blurbs.UnrecognizedLanguageCodes,
620
+ "UnrecognizedLanguageCodes",
657
621
  f"{m}: {language_value}",
658
622
  )
659
623
 
660
- def get_legacy_ids(
661
- self, marc_record: Record, ils_flavour: IlsFlavour, index_or_legacy_id: str
662
- ) -> List[str]:
624
+ def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
625
+ ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
663
626
  if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
664
627
  return get_iii_bib_id(marc_record)
665
628
  elif ils_flavour == IlsFlavour.tag907y:
666
- try:
667
- return list(set(marc_record["907"].get_subfields("a", "y")))
668
- except Exception as e:
669
- raise TransformationRecordFailedError(
670
- index_or_legacy_id,
671
- (
672
- "907 $y and $a is missing is missing, although they is "
673
- "required for this legacy ILS choice"
674
- ),
675
- marc_record.as_json(),
676
- ) from e
629
+ return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
677
630
  elif ils_flavour == IlsFlavour.tagf990a:
678
- res = {f["a"].strip() for f in marc_record.get_fields("990") if "a" in f}
679
- if marc_record["001"].format_field().strip():
680
- res.add(marc_record["001"].format_field().strip())
681
- if any(res):
682
- self.migration_report.add_general_statistics("legacy id from 990$a")
683
- return list(res)
631
+ return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
684
632
  elif ils_flavour == IlsFlavour.aleph:
685
633
  return self.get_aleph_bib_id(marc_record)
686
634
  elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
687
- try:
688
- return [marc_record["001"].format_field().strip()]
689
- except Exception as e:
690
- raise TransformationRecordFailedError(
691
- index_or_legacy_id,
692
- "001 is missing, although it is required for Voyager migrations",
693
- marc_record.as_json(),
694
- ) from e
635
+ return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
695
636
  elif ils_flavour == IlsFlavour.koha:
696
637
  try:
697
638
  return [marc_record["999"]["c"]]
698
639
  except Exception as e:
699
640
  raise TransformationRecordFailedError(
700
- index_or_legacy_id,
641
+ idx,
701
642
  "999 $c is missing, although it is required for this legacy ILS choice",
702
643
  marc_record.as_json(),
703
644
  ) from e
645
+ elif ils_flavour == IlsFlavour.custom:
646
+ return get_custom_bib_id(marc_record, self.task_configuration.custom_bib_id_field)
704
647
  elif ils_flavour == IlsFlavour.none:
705
648
  return [str(uuid.uuid4())]
706
649
  else:
707
650
  raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
708
651
 
709
- def get_aleph_bib_id(self, marc_record: Record):
710
- res = {f["b"].strip() for f in marc_record.get_fields("998") if "b" in f}
652
+ def get_aleph_bib_id(self, marc_record: Record) -> List[str]:
653
+ res = {f["b"].strip(): None for f in marc_record.get_fields("998") if "b" in f}
711
654
  if any(res):
712
- self.migration_report.add_general_statistics("legacy id from 998$b")
655
+ self.migration_report.add_general_statistics(
656
+ i18n.t("legacy id from %{fro}", fro="998$b")
657
+ )
713
658
  return list(res)
714
659
  else:
715
660
  try:
716
661
  ret = [marc_record["001"].format_field().strip()]
717
- self.migration_report.add_general_statistics("legacy id from 001")
662
+ self.migration_report.add_general_statistics(
663
+ i18n.t("legacy id from %{fro}", fro="001")
664
+ )
718
665
  return ret
719
666
  except Exception as e:
720
667
  raise TransformationRecordFailedError(
@@ -724,7 +671,7 @@ class BibsRulesMapper(RulesMapperBase):
724
671
  ) from e
725
672
 
726
673
 
727
- def get_unspecified_mode_of_issuance(folio_client: FolioClient):
674
+ def get_unspecified_mode_of_issuance(folio_client: FolioClient) -> str:
728
675
  m_o_is = list(folio_client.modes_of_issuance)
729
676
  if not any(m_o_is):
730
677
  logging.critical("No Modes of issuance set up in tenant. Quitting...")
@@ -738,7 +685,26 @@ def get_unspecified_mode_of_issuance(folio_client: FolioClient):
738
685
  return next(i["id"] for i in m_o_is if i["name"].lower() == "unspecified")
739
686
 
740
687
 
741
- def get_iii_bib_id(marc_record: Record):
688
+ def get_custom_bib_id(marc_record: Record, field_string: str):
689
+ if field_keys := field_string.split("$", maxsplit=1):
690
+ try:
691
+ if len(field_keys) == 2:
692
+ return [marc_record[field_keys[0]][field_keys[1]]]
693
+ else:
694
+ return [marc_record[field_keys[0]]]
695
+ except Exception as e:
696
+ raise TransformationRecordFailedError(
697
+ "unknown identifier",
698
+ f"{field_string} is missing from record but is required in all records",
699
+ marc_record.as_json(),
700
+ ) from e
701
+ else:
702
+ raise TransformationProcessError(
703
+ "", 'Critical process issue. No "customBibIdField" specified in task configuration.'
704
+ )
705
+
706
+
707
+ def get_iii_bib_id(marc_record: Record) -> List[str]:
742
708
  try:
743
709
  return [marc_record["907"]["a"]]
744
710
  except Exception as e: