folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -2,22 +2,31 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import time
5
+ import urllib.parse
5
6
  import uuid
7
+ from abc import abstractmethod
6
8
  from textwrap import wrap
9
+ from typing import Dict, List, Tuple
7
10
 
11
+ import i18n
8
12
  import pymarc
13
+ from dateutil.parser import parse
9
14
  from folio_uuid.folio_uuid import FOLIONamespaces, FolioUUID
10
15
  from folioclient import FolioClient
16
+ from pymarc import Field, Optional, Record, Subfield
17
+
11
18
  from folio_migration_tools.custom_exceptions import (
12
19
  TransformationFieldMappingError,
13
20
  TransformationProcessError,
14
21
  TransformationRecordFailedError,
15
22
  )
16
23
  from folio_migration_tools.helper import Helper
17
- from folio_migration_tools.library_configuration import LibraryConfiguration
24
+ from folio_migration_tools.library_configuration import (
25
+ FileDefinition,
26
+ LibraryConfiguration,
27
+ )
18
28
  from folio_migration_tools.mapper_base import MapperBase
19
- from folio_migration_tools.report_blurbs import Blurbs
20
- from pymarc import Field, Record, Leader
29
+ from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
21
30
 
22
31
 
23
32
  class RulesMapperBase(MapperBase):
@@ -25,23 +34,37 @@ class RulesMapperBase(MapperBase):
25
34
  self,
26
35
  folio_client: FolioClient,
27
36
  library_configuration: LibraryConfiguration,
37
+ task_configuration,
38
+ statistical_codes_map: Optional[Dict],
39
+ schema: dict,
28
40
  conditions=None,
41
+ parent_id_map: dict[str, tuple] = None,
29
42
  ):
30
- super().__init__(library_configuration, folio_client)
43
+ super().__init__(library_configuration, task_configuration, folio_client, parent_id_map)
31
44
  self.parsed_records = 0
45
+ self.id_map: dict[str, tuple] = {}
32
46
  self.start = time.time()
33
47
  self.last_batch_time = time.time()
34
48
  self.folio_client: FolioClient = folio_client
35
- self.holdings_json_schema = self.fetch_holdings_schema()
36
- self.instance_json_schema = self.get_instance_schema()
37
- self.schema = {}
49
+ self.schema: dict = schema
38
50
  self.conditions = conditions
39
51
  self.item_json_schema = ""
40
- self.mappings = {}
52
+ self.mappings: dict = {}
41
53
  self.schema_properties = None
54
+ self.create_source_records = all(
55
+ [self.task_configuration.create_source_records, (not getattr(self.task_configuration, "data_import_marc", False))]
56
+ )
57
+ if hasattr(self.task_configuration, "hrid_handling"):
58
+ self.hrid_handler = HRIDHandler(
59
+ folio_client,
60
+ self.task_configuration.hrid_handling,
61
+ self.migration_report,
62
+ self.task_configuration.deactivate035_from001,
63
+ )
64
+
65
+ self.setup_statistical_codes_map(statistical_codes_map)
42
66
  logging.info("Current user id is %s", self.folio_client.current_user)
43
67
 
44
- # TODO: Rebuild and move
45
68
  def print_progress(self):
46
69
  self.parsed_records += 1
47
70
  num_recs = 5000
@@ -56,11 +79,17 @@ class RulesMapperBase(MapperBase):
56
79
  )
57
80
  self.last_batch_time = time.time()
58
81
 
82
+ @abstractmethod
83
+ def get_legacy_ids(self, marc_record: Record, idx: int):
84
+ raise NotImplementedError()
85
+
59
86
  @staticmethod
60
- def dedupe_rec(rec):
87
+ def dedupe_rec(rec, props_to_not_dedupe=None):
88
+ if props_to_not_dedupe is None:
89
+ props_to_not_dedupe = []
61
90
  # remove duplicates
62
91
  for key, value in rec.items():
63
- if isinstance(value, list):
92
+ if key not in props_to_not_dedupe and isinstance(value, list):
64
93
  res = []
65
94
  for v in value:
66
95
  if v not in res:
@@ -71,27 +100,25 @@ class RulesMapperBase(MapperBase):
71
100
  self, marc_field: pymarc.Field, mappings, folio_record, legacy_ids
72
101
  ):
73
102
  for mapping in mappings:
74
- if "entity" not in mapping:
75
- self.handle_normal_mapping(
76
- mapping, marc_field, folio_record, legacy_ids
77
- )
78
- else:
79
- self.handle_entity_mapping(
80
- marc_field,
81
- mapping,
82
- folio_record,
83
- legacy_ids,
84
- )
103
+ try:
104
+ if "entity" not in mapping:
105
+ self.handle_normal_mapping(mapping, marc_field, folio_record, legacy_ids)
106
+ else:
107
+ self.handle_entity_mapping(
108
+ marc_field,
109
+ mapping,
110
+ folio_record,
111
+ legacy_ids,
112
+ )
113
+ except TransformationFieldMappingError as tre:
114
+ tre.log_it()
85
115
 
86
- def handle_normal_mapping(
87
- self, mapping, marc_field: pymarc.Field, folio_record, legacy_ids
88
- ):
116
+ def handle_normal_mapping(self, mapping, marc_field: pymarc.Field, folio_record, legacy_ids):
89
117
  target = mapping["target"]
90
118
  if mapping.get("ignoreSubsequentSubfields", False):
91
119
  marc_field = self.remove_repeated_subfields(marc_field)
92
120
  if has_conditions(mapping):
93
121
  values = self.apply_rules(marc_field, mapping, legacy_ids)
94
- # TODO: add condition to customize this hardcoded thing
95
122
  if marc_field.tag == "655":
96
123
  values[0] = f"Genre: {values[0]}"
97
124
  self.add_value_to_target(folio_record, target, values)
@@ -108,7 +135,10 @@ class RulesMapperBase(MapperBase):
108
135
  # Adding stuff without rules/Conditions.
109
136
  # Might need more complex mapping for arrays etc
110
137
  if any(mapping["subfield"]):
111
- value = " ".join(marc_field.get_subfields(*mapping["subfield"]))
138
+ values = self.handle_sub_field_delimiters(
139
+ ",".join(legacy_ids), mapping, marc_field
140
+ )
141
+ value = " ".join(values)
112
142
  else:
113
143
  value = marc_field.format_field() if marc_field else ""
114
144
  self.add_value_to_target(folio_record, target, [value])
@@ -118,7 +148,8 @@ class RulesMapperBase(MapperBase):
118
148
  try:
119
149
  f005 = marc_record["005"].data[:14]
120
150
  parsed_date = datetime.datetime.strptime(f005, "%Y%m%d%H%M%S").isoformat()
121
- folio_object["metadata"]["updatedDate"] = parsed_date
151
+ if "metadata" in folio_object:
152
+ folio_object["metadata"]["updatedDate"] = parsed_date
122
153
  except Exception as exception:
123
154
  if "005" in marc_record:
124
155
  Helper.log_data_issue(
@@ -127,6 +158,12 @@ class RulesMapperBase(MapperBase):
127
158
  marc_record["005"].data,
128
159
  )
129
160
 
161
+ @abstractmethod
162
+ def parse_record(
163
+ self, marc_record: Record, file_def: FileDefinition, legacy_ids: List[str]
164
+ ) -> list[dict]:
165
+ raise NotImplementedError()
166
+
130
167
  @staticmethod
131
168
  def use_008_for_dates(marc_record: Record, folio_object: dict, legacy_ids):
132
169
  try:
@@ -143,6 +180,56 @@ class RulesMapperBase(MapperBase):
143
180
  marc_record["008"].data,
144
181
  )
145
182
 
183
+ def handle_sub_field_delimiters(
184
+ self,
185
+ legacy_id: str,
186
+ mapping,
187
+ marc_field: pymarc.Field,
188
+ condition_types: List[str] = None,
189
+ parameter: dict = None,
190
+ ):
191
+ values: List[str] = []
192
+ if mapping.get("subfield") and (custom_delimiters := mapping.get("subFieldDelimiter")):
193
+ delimiter_map = {sub_f: " " for sub_f in mapping.get("subfield")}
194
+ for custom_delimiter in custom_delimiters:
195
+ delimiter_map.update(
196
+ {sub_f: custom_delimiter["value"] for sub_f in custom_delimiter["subfields"]}
197
+ )
198
+ custom_delimited_strings: List[Tuple[str, List[str]]] = []
199
+ subfields = mapping.get("subfield")
200
+ for custom_delimiter in custom_delimiters:
201
+ subfields_for_delimiter = [
202
+ sub_f
203
+ for sub_f in subfields
204
+ if custom_delimiter["subfields"]
205
+ and delimiter_map[sub_f] == custom_delimiter["value"]
206
+ ]
207
+ subfield_collection: Tuple[str, List[str]] = (custom_delimiter["value"], [])
208
+ subfield_collection[1].extend(marc_field.get_subfields(*subfields_for_delimiter))
209
+ custom_delimited_strings.append(subfield_collection)
210
+ for custom_delimited_string in custom_delimited_strings:
211
+ if mapping.get("applyRulesOnConcatenatedData", ""):
212
+ values.extend(custom_delimited_string[1])
213
+ else:
214
+ values.extend(
215
+ dict.fromkeys(
216
+ [
217
+ self.apply_rule(
218
+ legacy_id,
219
+ x,
220
+ condition_types or [],
221
+ marc_field,
222
+ parameter or {},
223
+ )
224
+ for x in custom_delimited_string[1]
225
+ ]
226
+ )
227
+ )
228
+ values = [custom_delimited_string[0].join(values)]
229
+ elif mapping.get("subfield", []):
230
+ values.extend(marc_field.get_subfields(*mapping["subfield"]))
231
+ return values
232
+
146
233
  def get_value_from_condition(
147
234
  self,
148
235
  legacy_id,
@@ -152,32 +239,136 @@ class RulesMapperBase(MapperBase):
152
239
  stripped_conds = mapping["rules"][0]["conditions"][0]["type"].split(",")
153
240
  condition_types = list(map(str.strip, stripped_conds))
154
241
  parameter = mapping["rules"][0]["conditions"][0].get("parameter", {})
155
- if mapping.get("applyRulesOnConcatenatedData", ""):
156
- value = " ".join(marc_field.get_subfields(*mapping["subfield"]))
157
- return self.apply_rule(
158
- legacy_id, value, condition_types, marc_field, parameter
242
+ values: List[str] = []
243
+ if mapping.get("subfield"):
244
+ values.extend(
245
+ self.handle_sub_field_delimiters(
246
+ legacy_id, mapping, marc_field, condition_types, parameter
247
+ )
248
+ )
249
+ else:
250
+ values.append(marc_field.format_field() if marc_field else "")
251
+
252
+ if not mapping.get("applyRulesOnConcatenatedData", "") and mapping.get("subfield", []):
253
+ return " ".join(
254
+ dict.fromkeys(
255
+ [
256
+ self.apply_rule(legacy_id, x, condition_types, marc_field, parameter)
257
+ for x in values
258
+ ]
259
+ )
159
260
  )
160
- elif mapping.get("subfield", []):
161
- subfields = marc_field.get_subfields(*mapping["subfield"])
162
- x = [
163
- self.apply_rule(legacy_id, x, condition_types, marc_field, parameter)
164
- for x in subfields
165
- ]
166
- return " ".join(set(x))
167
261
  else:
168
- value1 = marc_field.format_field() if marc_field else ""
169
262
  return self.apply_rule(
170
- legacy_id, value1, condition_types, marc_field, parameter
263
+ legacy_id, " ".join(values), condition_types, marc_field, parameter
264
+ )
265
+
266
+ def process_marc_field(
267
+ self,
268
+ folio_record: dict,
269
+ marc_field: Field,
270
+ ignored_subsequent_fields,
271
+ legacy_ids,
272
+ ):
273
+ if marc_field.tag == "880":
274
+ mappings = self.perform_proxy_mapping(marc_field)
275
+ else:
276
+ tags_to_ignore = {"880", "001", "008"}
277
+ mappings = (
278
+ self.mappings.get(marc_field.tag, {})
279
+ if marc_field.tag not in tags_to_ignore
280
+ else []
281
+ )
282
+ if mappings:
283
+ try:
284
+ self.map_field_according_to_mapping(marc_field, mappings, folio_record, legacy_ids)
285
+ if any(m.get("ignoreSubsequentFields", False) for m in mappings):
286
+ ignored_subsequent_fields.add(marc_field.tag)
287
+ except Exception as ee:
288
+ logging.error(
289
+ "map_field_according_to_mapping %s %s %s",
290
+ marc_field.tag,
291
+ marc_field.format_field(),
292
+ json.dumps(mappings),
293
+ )
294
+ raise ee
295
+
296
+ def perform_proxy_mapping(self, marc_field):
297
+ proxy_mapping = next(iter(self.mappings.get("880", [])), [])
298
+ if "6" not in marc_field:
299
+ self.migration_report.add("Field880Mappings", i18n.t("Records without $6"))
300
+ return None
301
+ if not proxy_mapping or not proxy_mapping.get("fieldReplacementBy3Digits", False):
302
+ return None
303
+ if not marc_field["6"][:3] or len(marc_field["6"][:3]) != 3:
304
+ self.migration_report.add(
305
+ "Field880Mappings", i18n.t("Records with unexpected length in $6")
306
+ )
307
+ return None
308
+ first_three = marc_field["6"][:3]
309
+
310
+ target_field = next(
311
+ (
312
+ r.get("targetField", "")
313
+ for r in proxy_mapping.get("fieldReplacementRule", [])
314
+ if r["sourceDigits"] == first_three
315
+ ),
316
+ first_three,
317
+ )
318
+ self.migration_report.add(
319
+ "Field880Mappings",
320
+ i18n.t("Source digits")
321
+ + f": {marc_field['6']} "
322
+ + i18n.t("Target field")
323
+ + f": {target_field}",
324
+ )
325
+ mappings = self.mappings.get(target_field, {})
326
+ if not mappings:
327
+ self.migration_report.add(
328
+ "Field880Mappings",
329
+ i18n.t("Mapping not set up for target field")
330
+ + f": {target_field} ({marc_field['6']})",
171
331
  )
332
+ return mappings
333
+
334
+ def report_marc_stats(
335
+ self, marc_field: Field, bad_tags, legacy_ids, ignored_subsequent_fields
336
+ ):
337
+ self.migration_report.add("Trivia", i18n.t("Total number of Tags processed"))
338
+ self.report_source_and_links(marc_field)
339
+ self.report_bad_tags(marc_field, bad_tags, legacy_ids)
340
+ mapped = marc_field.tag in self.mappings
341
+ if marc_field.tag in ignored_subsequent_fields:
342
+ mapped = False
343
+ self.report_legacy_mapping(marc_field.tag, True, mapped)
344
+
345
+ def report_source_and_links(self, marc_field: Field):
346
+ if marc_field.is_control_field():
347
+ return
348
+ for subfield_2 in marc_field.get_subfields("2"):
349
+ self.migration_report.add(
350
+ "AuthoritySources",
351
+ i18n.t("Source of heading or term") + f": {subfield_2.split(' ')[0]}",
352
+ )
353
+ for subfield_0 in marc_field.get_subfields("0"):
354
+ code = ""
355
+ if "(" in subfield_0 and ")" in subfield_0:
356
+ code = subfield_0[subfield_0.find("(") + 1 : subfield_0.find(")")]
357
+ code = code.split(" ")[0]
358
+ elif url := urllib.parse.urlparse(subfield_0):
359
+ if url.hostname:
360
+ code = subfield_0[: subfield_0.find(url.path)]
361
+ if code:
362
+ self.migration_report.add(
363
+ "AuthoritySources", i18n.t("$0 base uri or source code") + f": {code}"
364
+ )
172
365
 
173
366
  def apply_rules(self, marc_field: pymarc.Field, mapping, legacy_ids):
174
367
  try:
175
368
  values = []
176
369
  value = ""
177
370
  if has_conditions(mapping):
178
- value = self.get_value_from_condition(
179
- ",".join(legacy_ids), mapping, marc_field
180
- )
371
+ value = self.get_value_from_condition(",".join(legacy_ids), mapping, marc_field)
181
372
  elif has_value_to_add(mapping):
182
373
  value = mapping["rules"][0]["value"]
183
374
  if value == "false":
@@ -186,31 +377,44 @@ class RulesMapperBase(MapperBase):
186
377
  return [True]
187
378
  else:
188
379
  return [value]
189
- elif not mapping.get("rules", []) or not mapping["rules"][0].get(
190
- "conditions", []
191
- ):
192
- value = " ".join(marc_field.get_subfields(*mapping["subfield"]))
380
+ elif not mapping.get("rules", []) or not mapping["rules"][0].get("conditions", []):
381
+ values = self.handle_sub_field_delimiters(
382
+ ",".join(legacy_ids), mapping, marc_field
383
+ )
384
+ value = " ".join(values)
193
385
  values = wrap(value, 3) if mapping.get("subFieldSplit", "") else [value]
194
386
  return values
195
387
  except TransformationProcessError as trpe:
196
388
  self.handle_transformation_process_error(self.parsed_records, trpe)
197
389
  except TransformationFieldMappingError as fme:
198
- self.migration_report.add(Blurbs.FieldMappingErrors, fme.message)
199
- fme.data_value = f"{fme.data_value} MARCField: {marc_field} Mapping: {json.dumps(mapping)}"
390
+ self.migration_report.add("FieldMappingErrors", fme.message)
391
+ fme.data_value = (
392
+ f"{fme.data_value} MARCField: {marc_field} Mapping: {json.dumps(mapping)}"
393
+ )
200
394
  fme.log_it()
201
395
  return []
202
396
  except TransformationRecordFailedError as trfe:
203
397
  trfe.data_value = (
204
- f"{trfe.data_value} MARCField: {marc_field} "
205
- f"Mapping: {json.dumps(mapping)}"
398
+ f"{trfe.data_value} MARCField: {marc_field} Mapping: {json.dumps(mapping)}"
206
399
  )
207
400
  trfe.log_it()
208
401
  self.migration_report.add_general_statistics(
209
- "Records failed due to an error. See data issues log for details"
402
+ i18n.t("Records failed due to an error. See data issues log for details")
210
403
  )
211
404
  except Exception as exception:
212
405
  self.handle_generic_exception(self.parsed_records, exception)
213
406
 
407
+ def report_bad_tags(self, marc_field, bad_tags, legacy_ids):
408
+ if (
409
+ (not marc_field.tag.isnumeric())
410
+ and marc_field.tag != "LDR"
411
+ and marc_field.tag not in bad_tags
412
+ ):
413
+ self.migration_report.add("NonNumericTagsInRecord", marc_field.tag)
414
+ message = "Non-numeric tags in records"
415
+ Helper.log_data_issue(legacy_ids, message, marc_field.tag)
416
+ bad_tags.add(marc_field.tag)
417
+
214
418
  def add_value_to_target(self, rec, target_string, value):
215
419
  if not value:
216
420
  return
@@ -227,9 +431,7 @@ class RulesMapperBase(MapperBase):
227
431
  sc_prop = sc_prop[target] # set current property
228
432
  else: # next level. take the properties from the items
229
433
  sc_prop = schema_parent["items"]["properties"][target]
230
- if (
231
- target not in rec and not schema_parent
232
- ): # have we added this already?
434
+ if target not in rec and not schema_parent: # have we added this already?
233
435
  if is_array_of_strings(sc_prop):
234
436
  rec[target] = []
235
437
  # break
@@ -284,12 +486,29 @@ class RulesMapperBase(MapperBase):
284
486
 
285
487
  def add_value_to_first_level_target(self, rec, target_string, value):
286
488
  sch = self.schema["properties"]
287
-
489
+ if (
490
+ self.task_configuration.migration_task_type == "BibsTransformer"
491
+ and self.task_configuration.parse_cataloged_date
492
+ and target_string == "catalogedDate"
493
+ ):
494
+ try:
495
+ value = [str(parse(value[0], fuzzy=True).date())]
496
+ except Exception as ee:
497
+ Helper.log_data_issue("", f"Could not parse catalogedDate: {ee}", value)
498
+ self.migration_report.add(
499
+ "FieldMappingErrors", i18n.t("Could not parse catalogedDate")
500
+ )
288
501
  if not target_string or target_string not in sch:
289
- raise TransformationProcessError(
502
+ raise TransformationFieldMappingError(
503
+ "",
504
+ i18n.t("Target string '%{string}' not in Schema!", string=target_string)
505
+ + i18n.t("Check mapping file against the schema.")
506
+ + " "
507
+ + i18n.t("Target type")
508
+ + f": {sch.get(target_string,{}).get('type','')} "
509
+ + i18n.t("Value")
510
+ + f": {value}",
290
511
  "",
291
- f"Target string {target_string} not in Schema! Check mapping file against the schema."
292
- f"Target type: {sch.get(target_string,{}).get('type','')} Value: {value}",
293
512
  )
294
513
 
295
514
  target_field = sch.get(target_string, {})
@@ -297,7 +516,6 @@ class RulesMapperBase(MapperBase):
297
516
  target_field.get("type", "") == "array"
298
517
  and target_field.get("items", {}).get("type", "") == "string"
299
518
  ):
300
-
301
519
  if target_string not in rec:
302
520
  rec[target_string] = value
303
521
  else:
@@ -309,22 +527,81 @@ class RulesMapperBase(MapperBase):
309
527
  else:
310
528
  raise TransformationProcessError(
311
529
  "",
312
- f"Edge! Target string: {target_string} Target type: {sch.get(target_string,{}).get('type','')} Value: {value}",
530
+ (
531
+ f"Edge! Target string: {target_string} "
532
+ f"Target type: {sch.get(target_string,{}).get('type','')} Value: {value}"
533
+ ),
313
534
  )
314
535
 
536
+ def remove_from_id_map(self, former_ids: List[str]):
537
+ """removes the ID from the map in case parsing failed
538
+
539
+ Args:
540
+ former_ids (_type_): _description_
541
+ """
542
+ for former_id in [id for id in former_ids if id]:
543
+ if former_id in self.id_map:
544
+ del self.id_map[former_id]
545
+
315
546
  def create_entity(
316
- self, entity_mappings, marc_field, entity_parent_key, index_or_legacy_id
547
+ self, entity_mappings, marc_field: Field, entity_parent_key, index_or_legacy_id
317
548
  ):
318
549
  entity = {}
550
+ parent_schema_prop = self.schema.get("properties", {}).get(entity_parent_key, {})
551
+ if parent_schema_prop.get("type", "") == "array":
552
+ req_entity_props = parent_schema_prop.get("items", {}).get("required", [])
553
+ elif parent_schema_prop.get("type", "") == "object":
554
+ req_entity_props = parent_schema_prop.get("required", [])
555
+ else:
556
+ req_entity_props = []
319
557
  for entity_mapping in entity_mappings:
320
558
  k = entity_mapping["target"].split(".")[-1]
321
- if values := self.apply_rules(
322
- marc_field, entity_mapping, index_or_legacy_id
323
- ):
324
- if entity_parent_key == k:
325
- entity = values[0]
559
+ if k == "authorityId" and (legacy_subfield_9 := marc_field.get("9")):
560
+ marc_field.add_subfield("0", legacy_subfield_9)
561
+ marc_field.delete_subfield("9")
562
+ if k == "authorityId" and (entity_subfields := entity_mapping.get("subfield", [])):
563
+ for subfield in entity_subfields:
564
+ if subfield != "9":
565
+ Helper.log_data_issue(
566
+ index_or_legacy_id,
567
+ f"authorityId mapping from ${subfield} is not supported. Data Import will fail. "
568
+ "Use only $9 for authority id mapping in MARC-to-Instance mapping rules.",
569
+ marc_field,
570
+ )
571
+ entity_mapping["subfield"] = ["9"]
572
+ if my_values := [
573
+ v
574
+ for v in self.apply_rules(marc_field, entity_mapping, index_or_legacy_id)
575
+ if v != ""
576
+ ]:
577
+ if entity_parent_key != k:
578
+ entity[k] = my_values[0]
326
579
  else:
327
- entity[k] = values[0]
580
+ entity = my_values[0]
581
+ elif "alternativeMapping" in entity_mapping:
582
+ alt_mapping = entity_mapping["alternativeMapping"]
583
+ alt_k = alt_mapping["target"].split(".")[-1]
584
+ if alt_values := [
585
+ v
586
+ for v in self.apply_rules(marc_field, alt_mapping, index_or_legacy_id)
587
+ if v != ""
588
+ ]:
589
+ if entity_parent_key != alt_k:
590
+ entity[alt_k] = alt_values[0]
591
+ else:
592
+ entity = alt_values[0]
593
+ missing_required_props = [
594
+ req_entity_prop
595
+ for req_entity_prop in req_entity_props
596
+ if req_entity_prop not in entity
597
+ ]
598
+ if any(missing_required_props):
599
+ entity = {}
600
+ Helper.log_data_issue(
601
+ index_or_legacy_id,
602
+ f"Missing one or more required property in entity {entity_parent_key} ({missing_required_props})",
603
+ marc_field,
604
+ )
328
605
  return entity
329
606
 
330
607
  def handle_entity_mapping(
@@ -335,96 +612,111 @@ class RulesMapperBase(MapperBase):
335
612
  legacy_ids,
336
613
  ):
337
614
  entity_mapping = mapping["entity"]
338
- e_parent = entity_mapping[0]["target"].split(".")[0]
339
- if mapping.get("entityPerRepeatedSubfield", False):
340
- for temp_field in self.grouped(marc_field):
341
- entity = self.create_entity(
342
- entity_mapping, temp_field, e_parent, legacy_ids
343
- )
344
- if (type(entity) is dict and all(entity.values())) or (
345
- type(entity) is list and all(entity)
615
+ if entity_indicators_match(entity_mapping, marc_field):
616
+ entity_mapping = [x for x in entity_mapping if "indicators" not in x]
617
+ e_parent = entity_mapping[0]["target"].split(".")[0]
618
+ if mapping.get("entityPerRepeatedSubfield", False):
619
+ for temp_field in self.grouped(marc_field):
620
+ entity = self.create_entity(entity_mapping, temp_field, e_parent, legacy_ids)
621
+ if entity and (
622
+ (isinstance(entity, dict) and all(entity.values()))
623
+ or (isinstance(entity, list) and all(entity))
624
+ ):
625
+ self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
626
+ else:
627
+ if mapping.get("ignoreSubsequentSubfields", False):
628
+ marc_field = self.remove_repeated_subfields(marc_field)
629
+ entity = self.create_entity(entity_mapping, marc_field, e_parent, legacy_ids)
630
+ if e_parent in ["precedingTitles", "succeedingTitles"]:
631
+ self.create_preceding_succeeding_titles(
632
+ entity, e_parent, folio_record["id"], marc_field
633
+ )
634
+ elif entity and (
635
+ all(
636
+ v
637
+ for k, v in entity.items()
638
+ if k not in ["staffOnly", "primary", "isbnValue", "issnValue"]
639
+ )
640
+ or e_parent in ["electronicAccess", "publication"]
641
+ or (
642
+ e_parent.startswith("holdingsStatements") and any(v for k, v in entity.items())
643
+ )
346
644
  ):
347
- self.add_entity_to_record(
348
- entity, e_parent, folio_record, self.schema
645
+ self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
646
+ else:
647
+ sfs = " - ".join(
648
+ f"{f[0]}:{('has_value' if f[1].strip() else 'empty')}" for f in marc_field
349
649
  )
350
- else:
351
- if mapping.get("ignoreSubsequentSubfields", False):
352
- marc_field = self.remove_repeated_subfields(marc_field)
353
- entity = self.create_entity(
354
- entity_mapping, marc_field, e_parent, legacy_ids
650
+ pattern = " - ".join(f"{k}:'{bool(v)}'" for k, v in entity.items())
651
+ self.migration_report.add(
652
+ "IncompleteEntityMapping",
653
+ f"{marc_field.tag} {sfs} ->>-->> {e_parent} {pattern} ",
654
+ )
655
+ # Experimental
656
+ # self.add_entity_to_record(entity, e_parent, rec, self.schema)
657
+
658
+ def handle_suppression(
659
+ self, folio_record, file_def: FileDefinition, only_discovery_suppress: bool = False
660
+ ):
661
+ folio_record["discoverySuppress"] = file_def.discovery_suppressed
662
+ self.migration_report.add(
663
+ "Suppression",
664
+ i18n.t("Suppressed from discovery") + f' = {folio_record["discoverySuppress"]}',
665
+ )
666
+ if not only_discovery_suppress:
667
+ folio_record["staffSuppress"] = file_def.staff_suppressed
668
+ self.migration_report.add(
669
+ "Suppression", i18n.t("Staff suppressed") + f' = {folio_record["staffSuppress"]} '
355
670
  )
356
- if e_parent in ["precedingTitles", "succeedingTitles"]:
357
- self.create_preceding_succeeding_titles(
358
- entity, e_parent, folio_record["id"]
359
- )
360
- elif (
361
- all(
362
- v
363
- for k, v in entity.items()
364
- if k
365
- not in [
366
- "staffOnly",
367
- "primary",
368
- "isbnValue",
369
- "issnValue",
370
- ]
371
- )
372
- or e_parent in ["electronicAccess", "publication"]
373
- or (
374
- e_parent.startswith("holdingsStatements")
375
- and any(v for k, v in entity.items())
376
- )
377
- ):
378
- self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
671
+
672
+ def create_preceding_succeeding_titles(
673
+ self, entity, e_parent: str, identifier: str, marc_field: pymarc.Field
674
+ ):
675
+ if title := entity.get("title"):
676
+ self.migration_report.add(
677
+ "PrecedingSuccedingTitles", f"{e_parent} " + i18n.t("created")
678
+ )
679
+ # TODO: Make these uuids deterministic
680
+ new_entity = {
681
+ "id": str(uuid.uuid4()),
682
+ "title": title,
683
+ "identifiers": [],
684
+ }
685
+ if e_parent == "precedingTitles":
686
+ new_entity["succeedingInstanceId"] = identifier
379
687
  else:
380
- sfs = " - ".join(
381
- f"{f[0]}:{('has_value' if f[1].strip() else 'empty')}"
382
- for f in marc_field
688
+ new_entity["precedingInstanceId"] = identifier
689
+ if new_entity.get("isbnValue", ""):
690
+ new_entity["identifiers"].append(
691
+ {
692
+ "identifierTypeId": new_entity.get("isbnId"),
693
+ "value": new_entity.get("isbnValue"),
694
+ }
383
695
  )
384
- pattern = " - ".join(f"{k}:'{bool(v)}'" for k, v in entity.items())
385
- self.migration_report.add(
386
- Blurbs.IncompleteEntityMapping,
387
- f"{marc_field.tag} {sfs} ->>-->> {e_parent} {pattern} ",
696
+ if new_entity.get("issnValue", ""):
697
+ new_entity["identifiers"].append(
698
+ {
699
+ "identifierTypeId": new_entity.get("issnId"),
700
+ "value": new_entity.get("issnValue"),
701
+ }
388
702
  )
389
- # Experimental
390
- # self.add_entity_to_record(entity, e_parent, rec, self.schema)
391
-
392
- def create_preceding_succeeding_titles(self, entity, e_parent, identifier):
393
- self.migration_report.add(
394
- Blurbs.PrecedingSuccedingTitles, f"{e_parent} created"
395
- )
396
- # TODO: Make these uuids deterministic
397
- new_entity = {
398
- "id": str(uuid.uuid4()),
399
- "title": entity.get("title"),
400
- "identifiers": [],
401
- }
402
- if e_parent == "precedingTitles":
403
- new_entity["succeedingInstanceId"] = identifier
703
+ self.extradata_writer.write(e_parent, new_entity)
404
704
  else:
405
- new_entity["precedingInstanceId"] = identifier
406
- if new_entity.get("isbnValue", ""):
407
- new_entity["identifiers"].append(
408
- {
409
- "identifierTypeId": new_entity.get("isbnId"),
410
- "value": new_entity.get("isbnValue"),
411
- }
705
+ Helper.log_data_issue(
706
+ identifier, f"Unable to create {e_parent} entity. Missing title.", marc_field
412
707
  )
413
- if new_entity.get("issnValue", ""):
414
- new_entity["identifiers"].append(
415
- {
416
- "identifierTypeId": new_entity.get("issnId"),
417
- "value": new_entity.get("issnValue"),
418
- }
419
- )
420
- logging.log(25, f"{e_parent}\t{json.dumps(new_entity)}")
421
708
 
422
709
  def apply_rule(self, legacy_id, value, condition_types, marc_field, parameter):
423
710
  v = value
424
711
  for condition_type in iter(condition_types):
425
- v = self.conditions.get_condition(
426
- condition_type, legacy_id, v, parameter, marc_field
427
- )
712
+ try:
713
+ v = self.conditions.get_condition(
714
+ condition_type, legacy_id, v, parameter, marc_field
715
+ )
716
+ except AttributeError as attr_error:
717
+ raise TransformationProcessError(
718
+ legacy_id, attr_error, condition_type
719
+ ) from attr_error
428
720
  return v
429
721
 
430
722
  @staticmethod
@@ -439,38 +731,28 @@ class RulesMapperBase(MapperBase):
439
731
  rec[entity_parent_key] = entity
440
732
 
441
733
  @staticmethod
442
- def get_instance_schema():
443
- logging.info("Fetching Instance schema...")
444
- instance_schema = FolioClient.get_latest_from_github(
445
- "folio-org", "mod-inventory-storage", "ramls/instance.json"
446
- )
447
- logging.info("done")
448
- return instance_schema
734
+ def grouped(marc_field: Field):
735
+ """Groups the subfields
736
+ s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ...
449
737
 
450
- @staticmethod
451
- def fetch_holdings_schema():
452
- logging.info("Fetching HoldingsRecord schema...")
453
- holdings_record_schema = FolioClient.get_latest_from_github(
454
- "folio-org", "mod-inventory-storage", "ramls/holdingsrecord.json"
455
- )
456
- logging.info("done")
457
- return holdings_record_schema
458
738
 
459
- @staticmethod
460
- def grouped(marc_field: Field):
461
- "s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ..."
462
- unique_subfields = []
463
- repeated_subfields = []
464
- results = list()
739
+ Args:
740
+ marc_field (Field): _description_
741
+
742
+ Returns:
743
+ _type_: _description_
744
+ """
745
+ unique_subfields: list = []
746
+ repeated_subfields: list = []
747
+ results = []
465
748
  for sf, sf_vals in marc_field.subfields_as_dict().items():
466
749
  if len(sf_vals) == 1:
467
- unique_subfields.extend([sf, sf_vals[0]])
750
+ unique_subfields.append(Subfield(code=sf, value=sf_vals[0]))
468
751
  else:
469
- for sf_val in sf_vals:
470
- repeated_subfields.append([sf, sf_val])
752
+ repeated_subfields.extend([Subfield(code=sf, value=sf_val) for sf_val in sf_vals])
471
753
  if any(repeated_subfields):
472
754
  for repeated_subfield in repeated_subfields:
473
- new_subfields = [repeated_subfield[0], repeated_subfield[1]]
755
+ new_subfields = [repeated_subfield]
474
756
  new_subfields.extend(unique_subfields)
475
757
  temp_field = Field(
476
758
  tag=marc_field.tag,
@@ -489,10 +771,18 @@ class RulesMapperBase(MapperBase):
489
771
 
490
772
  @staticmethod
491
773
  def remove_repeated_subfields(marc_field: Field):
492
- "s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ..."
774
+ """Removes repeated subfields
775
+ s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ...
776
+
777
+ Args:
778
+ marc_field (Field): _description_
779
+
780
+ Returns:
781
+ _type_: _description_
782
+ """
493
783
  new_subfields = []
494
784
  for sf, sf_vals in marc_field.subfields_as_dict().items():
495
- new_subfields.extend([sf, sf_vals[0]])
785
+ new_subfields.extend([Subfield(code=sf, value=sf_vals[0])])
496
786
  return Field(
497
787
  tag=marc_field.tag,
498
788
  indicators=marc_field.indicators,
@@ -500,90 +790,250 @@ class RulesMapperBase(MapperBase):
500
790
  )
501
791
 
502
792
  @staticmethod
793
+ def save_data_import_marc_record(
794
+ data_import_marc_file,
795
+ record_type: FOLIONamespaces,
796
+ marc_record: Record,
797
+ folio_record,
798
+ ):
799
+ """Saves the source marc_record to a file to be loaded via Data Import
800
+
801
+ Args:
802
+ srs_records_file (_type_): _description_
803
+ record_type (FOLIONamespaces): _description_
804
+ folio_client (FolioClient): _description_
805
+ marc_record (Record): _description_
806
+ folio_record (_type_): _description_
807
+ legacy_ids (List[str]): _description_
808
+ suppress (bool): _description_
809
+ """
810
+ marc_record.add_ordered_field(
811
+ Field(
812
+ tag="999",
813
+ indicators=["f", "f"],
814
+ subfields=[
815
+ Subfield(code="i", value=folio_record["id"]),
816
+ ],
817
+ )
818
+ )
819
+ # Since they all should be UTF encoded, make the leader align.
820
+ try:
821
+ marc_record.leader[9] = "a"
822
+ except Exception as ee:
823
+ logging.exception(
824
+ "Something is wrong with the marc record's leader: %s, %s", marc_record.leader, ee
825
+ )
826
+ data_import_marc_file.write(marc_record.as_marc())
827
+
828
+
829
+ def map_statistical_codes(
830
+ self,
831
+ folio_record: dict,
832
+ file_def: FileDefinition,
833
+ marc_record: Record,
834
+ ):
835
+ """Map statistical codes to FOLIO instance
836
+
837
+ This method first calls the base class method to map statistical codes
838
+ from the file_def. Then, it checks to see if there are any MARC field
839
+ mappings defined in the task configuration. If so, it creates a list
840
+ of lists where the first element is the MARC field tag, and the remaining
841
+ elements are the subfields to be used for mapping. It then iterates
842
+ through the MARC fields, retrieves the values based on the subfields.
843
+ Finally, it adds the mapped codes to the folio_record's statisticalCodeIds.
844
+
845
+ Args:
846
+ legacy_ids (List[str]): The legacy IDs of the folio record
847
+ folio_record (dict): The Dictionary representation of the FOLIO record
848
+ marc_record (Record): The pymarc Record object
849
+ file_def (FileDefinition): The file definition object from which marc_record was read
850
+ """
851
+ super().map_statistical_codes(folio_record, file_def)
852
+ if self.task_configuration.statistical_code_mapping_fields:
853
+ stat_code_marc_fields = []
854
+ for mapping in self.task_configuration.statistical_code_mapping_fields:
855
+ stat_code_marc_fields.append(mapping.split("$"))
856
+ for field_map in stat_code_marc_fields:
857
+ mapped_codes = self.map_stat_codes_from_marc_field(field_map, marc_record, self.library_configuration.multi_field_delimiter)
858
+ folio_record['statisticalCodeIds'] = folio_record.get("statisticalCodeIds", []) + mapped_codes
859
+
860
+ @staticmethod
861
+ def map_stat_codes_from_marc_field(field_map: List[str], marc_record: Record, multi_field_delimiter: str="<delimiter>") -> List[str]:
862
+ """Map statistical codes from MARC field to FOLIO instance.
863
+
864
+ This function extracts statistical codes from a MARC field based on the provided field map.
865
+ It supports multiple subfields and uses a delimiter to handle concatenated values.
866
+
867
+ Args:
868
+ field_map (List[str]): A list where the first element is the MARC field tag, and the remaining elements are subfields to extract values from.
869
+ marc_record (Record): The MARC record to process.
870
+ multi_field_delimiter (str): A delimiter used to concatenate multiple subfield values that should be individual mapped values.
871
+
872
+ Returns:
873
+ str: A string of statistical codes extracted from the MARC field, formatted as "<field>_<subfield>:<value>".
874
+ """
875
+ field_values = []
876
+ if len(field_map) == 2:
877
+ subfields = []
878
+ for mf in marc_record.get_fields(field_map[0]):
879
+ subfields.extend(
880
+ multi_field_delimiter.join(
881
+ mf.get_subfields(field_map[1])
882
+ ).split(multi_field_delimiter)
883
+ )
884
+ field_values.extend(
885
+ [
886
+ f"{field_map[0]}_{field_map[1]}:{x}" for
887
+ x in subfields
888
+ ]
889
+ )
890
+ elif len(field_map) > 2:
891
+ for mf in marc_record.get_fields(field_map[0]):
892
+ for sf in field_map[1:]:
893
+ field_values.extend(
894
+ [
895
+ f"{field_map[0]}_{sf}:{x}" for x in multi_field_delimiter.join(
896
+ mf.get_subfields(sf)
897
+ ).split(multi_field_delimiter)
898
+ ]
899
+ )
900
+ elif field_map:
901
+ for mf in marc_record.get_fields(field_map[0]):
902
+ field_values.append(f"{field_map[0]}:{mf.value()}")
903
+ return field_values
904
+
503
905
  def save_source_record(
906
+ self,
504
907
  srs_records_file,
505
908
  record_type: FOLIONamespaces,
506
909
  folio_client: FolioClient,
507
910
  marc_record: Record,
508
911
  folio_record,
509
- legacy_id: str,
912
+ legacy_ids: List[str],
510
913
  suppress: bool,
511
914
  ):
512
- """Saves the source Marc_record to the Source record Storage module"""
513
- srs_id = RulesMapperBase.create_srs_id(
514
- record_type, folio_client.okapi_url, legacy_id
515
- )
915
+ """Saves the source Marc_record to the Source record Storage module
916
+
917
+ Args:
918
+ srs_records_file (_type_): _description_
919
+ record_type (FOLIONamespaces): _description_
920
+ folio_client (FolioClient): _description_
921
+ marc_record (Record): _description_
922
+ folio_record (_type_): _description_
923
+ legacy_ids (List[str]): _description_
924
+ suppress (bool): _description_
925
+ """
926
+ srs_id = self.create_srs_id(record_type, legacy_ids[-1])
516
927
 
517
928
  marc_record.add_ordered_field(
518
929
  Field(
519
930
  tag="999",
520
931
  indicators=["f", "f"],
521
- subfields=["i", folio_record["id"], "s", srs_id],
932
+ subfields=[
933
+ Subfield(code="i", value=folio_record["id"]),
934
+ Subfield(code="s", value=srs_id),
935
+ ],
522
936
  )
523
937
  )
524
938
  # Since they all should be UTF encoded, make the leader align.
525
939
  try:
526
- temp_leader = Leader(marc_record.leader)
527
- temp_leader[9] = "a"
528
- marc_record.leader = temp_leader
529
- except Exception:
940
+ marc_record.leader[9] = "a"
941
+ except Exception as ee:
530
942
  logging.exception(
531
- "Something is wrong with the marc records leader: %s",
532
- marc_record.leader,
943
+ "Something is wrong with the marc record's leader: %s, %s", marc_record.leader, ee
533
944
  )
534
- srs_record_string = RulesMapperBase.get_srs_string(
945
+ srs_record_string = self.get_srs_string(
535
946
  marc_record,
536
947
  folio_record,
537
948
  srs_id,
538
- folio_client.get_metadata_construct(),
539
949
  suppress,
540
950
  record_type,
541
951
  )
542
952
  srs_records_file.write(f"{srs_record_string}\n")
543
953
 
544
- @staticmethod
545
- def create_srs_id(record_type, okapi_url: str, legacy_id: str):
954
+ def create_srs_id(self, record_type, legacy_id: str):
546
955
  srs_types = {
547
956
  FOLIONamespaces.holdings: FOLIONamespaces.srs_records_holdingsrecord,
548
957
  FOLIONamespaces.instances: FOLIONamespaces.srs_records_bib,
549
- FOLIONamespaces.athorities: FOLIONamespaces.srs_records_auth,
958
+ FOLIONamespaces.authorities: FOLIONamespaces.srs_records_auth,
550
959
  FOLIONamespaces.edifact: FOLIONamespaces.srs_records_edifact,
551
960
  }
552
961
 
553
962
  return str(
554
963
  FolioUUID(
555
- okapi_url,
964
+ self.base_string_for_folio_uuid,
556
965
  srs_types.get(record_type),
557
- str(legacy_id),
966
+ legacy_id
558
967
  )
559
968
  )
560
969
 
970
+ @staticmethod
971
+ def get_bib_id_from_907y(marc_record: Record, index_or_legacy_id):
972
+ try:
973
+ return list(set(marc_record["907"].get_subfields("a", "y")))
974
+ except Exception as e:
975
+ raise TransformationRecordFailedError(
976
+ index_or_legacy_id,
977
+ (
978
+ "907 $y and $a is missing is missing, although they is "
979
+ "required for this legacy ILS choice"
980
+ ),
981
+ marc_record.as_json(),
982
+ ) from e
983
+
984
+ @staticmethod
985
+ def get_bib_id_from_990a(marc_record: Record, index_or_legacy_id):
986
+ res = {f["a"].strip() for f in marc_record.get_fields("990") if "a" in f}
987
+ if marc_record["001"].format_field().strip():
988
+ res.add(marc_record["001"].format_field().strip())
989
+ if any(res):
990
+ return list(res)
991
+ else:
992
+ raise TransformationRecordFailedError(
993
+ index_or_legacy_id,
994
+ "neither 990$a or 001 found in record.",
995
+ marc_record.as_json(),
996
+ )
997
+
998
+ @staticmethod
999
+ def get_bib_id_from_001(marc_record: Record, index_or_legacy_id):
1000
+ try:
1001
+ return [marc_record["001"].format_field().strip()]
1002
+ except Exception as e:
1003
+ raise TransformationRecordFailedError(
1004
+ index_or_legacy_id,
1005
+ "001 is missing, although it is required for Voyager migrations",
1006
+ marc_record.as_json(),
1007
+ ) from e
1008
+
561
1009
  @staticmethod
562
1010
  def get_srs_string(
563
1011
  marc_record: Record,
564
1012
  folio_object: dict,
565
1013
  srs_id,
566
- metadata_obj,
567
- suppress,
1014
+ discovery_suppress: bool,
568
1015
  record_type: FOLIONamespaces,
569
1016
  ):
570
1017
  record_types = {
571
1018
  FOLIONamespaces.holdings: "MARC_HOLDING",
572
1019
  FOLIONamespaces.instances: "MARC_BIB",
573
- FOLIONamespaces.athorities: "MARC_AUTHORITY",
1020
+ FOLIONamespaces.authorities: "MARC_AUTHORITY",
574
1021
  FOLIONamespaces.edifact: "EDIFACT",
575
1022
  }
576
1023
 
577
1024
  id_holders = {
578
1025
  FOLIONamespaces.instances: {
579
1026
  "instanceId": folio_object["id"],
580
- "instanceHrid": folio_object["hrid"],
1027
+ "instanceHrid": folio_object.get("hrid", ""),
581
1028
  },
582
1029
  FOLIONamespaces.holdings: {
583
1030
  "holdingsId": folio_object["id"],
584
- "holdingsHrid": folio_object["hrid"],
1031
+ "holdingsHrid": folio_object.get("hrid", ""),
1032
+ },
1033
+ FOLIONamespaces.authorities: {
1034
+ "authorityId": folio_object["id"],
1035
+ "authorityHrid": marc_record["001"].data,
585
1036
  },
586
- FOLIONamespaces.athorities: {},
587
1037
  FOLIONamespaces.edifact: {},
588
1038
  }
589
1039
 
@@ -598,9 +1048,8 @@ class RulesMapperBase(MapperBase):
598
1048
  "recordType": record_types.get(record_type),
599
1049
  "rawRecord": raw_record,
600
1050
  "parsedRecord": parsed_record,
601
- "additionalInfo": {"suppressDiscovery": suppress},
1051
+ "additionalInfo": {"suppressDiscovery": discovery_suppress},
602
1052
  "externalIdsHolder": id_holders.get(record_type),
603
- "metadata": metadata_obj,
604
1053
  "state": "ACTUAL",
605
1054
  "leaderRecordStatus": parsed_record["content"]["leader"][5]
606
1055
  if parsed_record["content"]["leader"][5] in [*"acdnposx"]
@@ -625,3 +1074,34 @@ def is_array_of_strings(schema_property):
625
1074
  def is_array_of_objects(schema_property):
626
1075
  sc_prop_type = schema_property.get("type", "string")
627
1076
  return sc_prop_type == "array" and schema_property["items"]["type"] == "object"
1077
+
1078
+ def entity_indicators_match(entity_mapping, marc_field):
1079
+ """
1080
+ Check if the indicators of the entity mapping match the indicators of the MARC field.
1081
+ Entity mappings can limit the fields they are applied to by specifying indicator values that
1082
+ must match the provided MARC field's indicators. If the entity mapping does not specify any
1083
+ indicator values, it is assumed to match all MARC fields. Entity indicator values can be a
1084
+ specific value or a wildcard "*", which matches any value.
1085
+
1086
+ This function compares the indicators of the entity mapping with the indicators of the MARC field.
1087
+ If the entity does not specify any indicator values, the function returns True. If the entity does
1088
+ specify indicator values, the function checks if the MARC field's indicators match the specified
1089
+ values or if the specified values are wildcards. If both indicators match, the function returns True;
1090
+ otherwise, it returns False.
1091
+
1092
+ Args:
1093
+ entity_mapping (dict): _description_
1094
+ marc_field (pymarc.Field): _description_
1095
+
1096
+ Returns:
1097
+ bool: True if the indicators match, False otherwise.
1098
+ """
1099
+ if indicator_rule := [x["indicators"] for x in entity_mapping if "indicators" in x]:
1100
+ return all(
1101
+ [
1102
+ (marc_field.indicator1 == indicator_rule[0]['ind1'] or indicator_rule[0]['ind1'] == "*"),
1103
+ (marc_field.indicator2 == indicator_rule[0]['ind2'] or indicator_rule[0]['ind2'] == "*"),
1104
+ ]
1105
+ )
1106
+ else:
1107
+ return True