folio-migration-tools 1.9.0rc12__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. folio_migration_tools/library_configuration.py +21 -1
  2. folio_migration_tools/mapper_base.py +78 -4
  3. folio_migration_tools/mapping_file_transformation/courses_mapper.py +2 -1
  4. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +8 -4
  5. folio_migration_tools/mapping_file_transformation/item_mapper.py +6 -13
  6. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +1 -0
  7. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +17 -21
  8. folio_migration_tools/mapping_file_transformation/notes_mapper.py +2 -0
  9. folio_migration_tools/mapping_file_transformation/order_mapper.py +4 -1
  10. folio_migration_tools/mapping_file_transformation/organization_mapper.py +3 -0
  11. folio_migration_tools/mapping_file_transformation/user_mapper.py +3 -1
  12. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +1 -0
  13. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +83 -4
  14. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +5 -0
  15. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +51 -10
  16. folio_migration_tools/migration_tasks/batch_poster.py +65 -2
  17. folio_migration_tools/migration_tasks/bibs_transformer.py +13 -3
  18. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +42 -21
  19. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +22 -12
  20. folio_migration_tools/migration_tasks/items_transformer.py +5 -4
  21. folio_migration_tools/migration_tasks/migration_task_base.py +22 -1
  22. folio_migration_tools/migration_tasks/orders_transformer.py +2 -0
  23. folio_migration_tools/migration_tasks/user_transformer.py +1 -0
  24. folio_migration_tools/translations/en.json +12 -3
  25. {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/METADATA +1 -1
  26. {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/RECORD +29 -29
  27. {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/LICENSE +0 -0
  28. {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/WHEEL +0 -0
  29. {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,14 @@
1
1
  import copy
2
2
  import json
3
3
  import logging
4
+ import re
4
5
  from typing import Dict, List, Set
5
6
 
6
7
  import i18n
7
8
  from folio_uuid.folio_namespaces import FOLIONamespaces
8
9
  from folio_uuid.folio_uuid import FolioUUID
9
10
  from folioclient import FolioClient
11
+ from pymarc import Optional
10
12
  from pymarc.field import Field
11
13
  from pymarc.record import Record
12
14
 
@@ -40,20 +42,21 @@ class RulesMapperHoldings(RulesMapperBase):
40
42
  library_configuration: LibraryConfiguration,
41
43
  parent_id_map: dict,
42
44
  boundwith_relationship_map_rows: List[Dict],
45
+ statistical_codes_map: Optional[Dict] = None,
43
46
  ):
44
- self.task_configuration = task_configuration
45
47
  self.conditions = Conditions(
46
48
  folio_client,
47
49
  self,
48
50
  "holdings",
49
51
  library_configuration.folio_release,
50
- self.task_configuration.default_call_number_type_name,
52
+ task_configuration.default_call_number_type_name,
51
53
  )
52
54
  self.folio = folio_client
53
55
  super().__init__(
54
56
  folio_client,
55
57
  library_configuration,
56
58
  task_configuration,
59
+ statistical_codes_map,
57
60
  self.fetch_holdings_schema(folio_client),
58
61
  self.conditions,
59
62
  parent_id_map,
@@ -297,6 +300,10 @@ class RulesMapperHoldings(RulesMapperBase):
297
300
  "",
298
301
  )
299
302
  self.handle_suppression(folio_holding, file_def, True)
303
+ # First, map statistical codes from MARC fields and FileDefinitions to FOLIO statistical codes.
304
+ # Then, convert the mapped statistical codes to their corresponding code IDs.
305
+ self.map_statistical_codes(folio_holding, file_def, marc_record)
306
+ self.map_statistical_code_ids(legacy_ids, folio_holding)
300
307
  self.set_source_id(self.create_source_records, folio_holding, self.holdingssources, file_def)
301
308
 
302
309
  def pick_first_location_if_many(self, folio_holding: Dict, legacy_ids: List[str]):
@@ -387,12 +394,29 @@ class RulesMapperHoldings(RulesMapperBase):
387
394
  ) from ee
388
395
  return [
389
396
  {
390
- "note": "\n".join(mrk_statement_notes),
397
+ "note": chunk,
391
398
  "holdingsNoteTypeId": holdings_note_type_id,
392
399
  "staffOnly": True,
393
- }
400
+ } for chunk in self.split_mrk_by_max_note_size("\n".join(mrk_statement_notes))
394
401
  ]
395
402
 
403
+ @staticmethod
404
+ def split_mrk_by_max_note_size(s: str, max_chunk_size: int = 32000) -> List[str]:
405
+ lines = s.splitlines(keepends=True)
406
+ chunks = []
407
+ current_chunk = ""
408
+ for line in lines:
409
+ # If adding this line would exceed the limit, start a new chunk
410
+ if len(current_chunk) + len(line) > max_chunk_size:
411
+ if current_chunk:
412
+ chunks.append(current_chunk)
413
+ current_chunk = line
414
+ else:
415
+ current_chunk += line
416
+ if current_chunk:
417
+ chunks.append(current_chunk)
418
+ return chunks
419
+
396
420
  def add_mfhd_as_mrk_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
397
421
  """Adds the MFHD as a note to the holdings record
398
422
 
@@ -418,12 +442,29 @@ class RulesMapperHoldings(RulesMapperBase):
418
442
  ) from ee
419
443
  folio_holding["notes"] = folio_holding.get("notes", []) + [
420
444
  {
421
- "note": str(marc_record),
445
+ "note": chunk,
422
446
  "holdingsNoteTypeId": holdings_note_type_id,
423
447
  "staffOnly": True,
424
- }
448
+ } for chunk in self.split_mrk_by_max_note_size(str(marc_record))
425
449
  ]
426
450
 
451
+ @staticmethod
452
+ def split_mrc_by_max_note_size(data: bytes, sep: bytes = b"\x1e", max_chunk_size: int = 32000) -> List[bytes]:
453
+ # Split data into segments, each ending with the separator (except possibly the last)
454
+ pattern = re.compile(b'(.*?' + re.escape(sep) + b'|.+?$)', re.DOTALL)
455
+ parts = [m.group(0) for m in pattern.finditer(data) if m.group(0)]
456
+ chunks = []
457
+ current_chunk = b""
458
+ for part in parts:
459
+ if len(current_chunk) + len(part) > max_chunk_size and current_chunk:
460
+ chunks.append(current_chunk)
461
+ current_chunk = part
462
+ else:
463
+ current_chunk += part
464
+ if current_chunk:
465
+ chunks.append(current_chunk)
466
+ return chunks
467
+
427
468
  def add_mfhd_as_mrc_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
428
469
  """Adds the MFHD as a note to the holdings record
429
470
 
@@ -449,10 +490,10 @@ class RulesMapperHoldings(RulesMapperBase):
449
490
  ) from ee
450
491
  folio_holding["notes"] = folio_holding.get("notes", []) + [
451
492
  {
452
- "note": marc_record.as_marc().decode("utf-8"),
493
+ "note": chunk.decode("utf-8"),
453
494
  "holdingsNoteTypeId": holdings_note_type_id,
454
495
  "staffOnly": True,
455
- }
496
+ } for chunk in self.split_mrc_by_max_note_size(marc_record.as_marc())
456
497
  ]
457
498
 
458
499
  def wrap_up(self):
@@ -582,7 +623,7 @@ class RulesMapperHoldings(RulesMapperBase):
582
623
  "", "Column BIB_ID missing from Boundwith relationship map", ""
583
624
  )
584
625
 
585
- def setup_boundwith_relationship_map(self, boundwith_relationship_map: List[Dict]):
626
+ def setup_boundwith_relationship_map(self, boundwith_relationship_map_list: List[Dict]):
586
627
  """
587
628
  Creates a map of MFHD_ID to BIB_ID for boundwith relationships.
588
629
 
@@ -597,7 +638,7 @@ class RulesMapperHoldings(RulesMapperBase):
597
638
  TransformationRecordFailedError: If BIB_ID is not in the instance id map.
598
639
  """
599
640
  new_map = {}
600
- for idx, entry in enumerate(boundwith_relationship_map):
641
+ for idx, entry in enumerate(boundwith_relationship_map_list):
601
642
  self.verity_boundwith_map_entry(entry)
602
643
  mfhd_uuid = str(
603
644
  FolioUUID(
@@ -6,7 +6,7 @@ import sys
6
6
  import time
7
7
  import traceback
8
8
  from datetime import datetime
9
- from typing import Annotated, List
9
+ from typing import Annotated, List, Optional
10
10
  from uuid import uuid4
11
11
 
12
12
  import httpx
@@ -173,11 +173,13 @@ class BatchPoster(MigrationTaskBase):
173
173
  self.num_posted = 0
174
174
  self.okapi_headers = self.folio_client.okapi_headers
175
175
  self.http_client = None
176
+ self.starting_record_count_in_folio: Optional[int] = None
176
177
 
177
178
  def do_work(self):
178
179
  with self.folio_client.get_folio_http_client() as httpx_client:
179
180
  self.http_client = httpx_client
180
181
  with open(self.folder_structure.failed_recs_path, "w", encoding='utf-8') as failed_recs_file:
182
+ self.get_starting_record_count()
181
183
  try:
182
184
  batch = []
183
185
  if self.task_configuration.object_type == "SRS":
@@ -317,6 +319,8 @@ class BatchPoster(MigrationTaskBase):
317
319
  updates[record["id"]] = {
318
320
  "_version": record["_version"],
319
321
  }
322
+ if "hrid" in record:
323
+ updates[record["id"]]["hrid"] = record["hrid"]
320
324
  if "status" in record:
321
325
  updates[record["id"]]["status"] = record["status"]
322
326
  if "lastCheckIn" in record:
@@ -604,6 +608,42 @@ class BatchPoster(MigrationTaskBase):
604
608
  else:
605
609
  return httpx.post(url, headers=self.okapi_headers, json=payload, params=self.query_params, timeout=None)
606
610
 
611
+ def get_current_record_count_in_folio(self):
612
+ if "query_endpoint" in self.api_info:
613
+ url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
614
+ query_params = {"query": "cql.allRecords=1", "limit": 0}
615
+ if self.http_client and not self.http_client.is_closed:
616
+ res = self.http_client.get(url, headers=self.folio_client.okapi_headers, params=query_params)
617
+ else:
618
+ res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
619
+ try:
620
+ res.raise_for_status()
621
+ return res.json()["totalRecords"]
622
+ except httpx.HTTPStatusError:
623
+ logging.error("Failed to get current record count. HTTP %s", res.status_code)
624
+ return 0
625
+ except KeyError:
626
+ logging.error(f"Failed to get current record count. No 'totalRecords' in response: {res.json()}")
627
+ return 0
628
+ else:
629
+ raise ValueError(
630
+ "No 'query_endpoint' available for %s. Cannot get current record count.", self.task_configuration.object_type
631
+ )
632
+
633
+ def get_starting_record_count(self):
634
+ if "query_endpoint" in self.api_info and not self.starting_record_count_in_folio:
635
+ logging.info("Getting starting record count in FOLIO")
636
+ self.starting_record_count_in_folio = self.get_current_record_count_in_folio()
637
+ else:
638
+ logging.info("No query_endpoint available for %s. Cannot get starting record count.", self.task_configuration.object_type)
639
+
640
+ def get_finished_record_count(self):
641
+ if "query_endpoint" in self.api_info:
642
+ logging.info("Getting finished record count in FOLIO")
643
+ self.finished_record_count_in_folio = self.get_current_record_count_in_folio()
644
+ else:
645
+ logging.info("No query_endpoint available for %s. Cannot get ending record count.", self.task_configuration.object_type)
646
+
607
647
  def wrap_up(self):
608
648
  logging.info("Done. Wrapping up")
609
649
  self.extradata_writer.flush()
@@ -621,11 +661,34 @@ class BatchPoster(MigrationTaskBase):
621
661
  )
622
662
  else:
623
663
  logging.info("Done posting %s records. %s failed", self.num_posted, self.num_failures)
624
-
664
+ if self.starting_record_count_in_folio:
665
+ self.get_finished_record_count()
666
+ total_on_server = self.finished_record_count_in_folio - self.starting_record_count_in_folio
667
+ discrepancy = self.processed - self.num_failures - total_on_server
668
+ if discrepancy != 0:
669
+ logging.error(
670
+ (
671
+ "Discrepancy in record count. "
672
+ "Starting record count: %s. Finished record count: %s. "
673
+ "Records posted: %s. Discrepancy: %s"
674
+ ),
675
+ self.starting_record_count_in_folio,
676
+ self.finished_record_count_in_folio,
677
+ self.num_posted - self.num_failures,
678
+ discrepancy,
679
+ )
680
+ else:
681
+ discrepancy = 0
625
682
  run = "second time" if self.performing_rerun else "first time"
626
683
  self.migration_report.set("GeneralStatistics", f"Records processed {run}", self.processed)
627
684
  self.migration_report.set("GeneralStatistics", f"Records posted {run}", self.num_posted)
628
685
  self.migration_report.set("GeneralStatistics", f"Failed to post {run}", self.num_failures)
686
+ if discrepancy:
687
+ self.migration_report.set(
688
+ "GeneralStatistics",
689
+ f"Discrepancy in record count {run}",
690
+ discrepancy,
691
+ )
629
692
  self.rerun_run()
630
693
  with open(self.folder_structure.migration_reports_file, "w+") as report_file:
631
694
  self.migration_report.write_migration_report(
@@ -7,8 +7,6 @@ from pydantic import Field
7
7
 
8
8
  from folio_migration_tools.helper import Helper
9
9
  from folio_migration_tools.library_configuration import (
10
- FileDefinition,
11
- HridHandling,
12
10
  IlsFlavour,
13
11
  LibraryConfiguration,
14
12
  )
@@ -116,11 +114,23 @@ class BibsTransformer(MigrationTaskBase):
116
114
  use_logging: bool = True,
117
115
  ):
118
116
  super().__init__(library_config, task_config, folio_client, use_logging)
117
+ self.task_config = task_config
118
+ self.task_configuration = self.task_config
119
+ if self.task_config.statistical_codes_map_file_name:
120
+ statcode_mapping = self.load_ref_data_mapping_file(
121
+ "statisticalCodeIds",
122
+ self.folder_structure.mapping_files_folder
123
+ / self.task_config.statistical_codes_map_file_name,
124
+ [],
125
+ False,
126
+ )
127
+ else:
128
+ statcode_mapping = None
119
129
  self.processor: MarcFileProcessor
120
130
  self.check_source_files(
121
131
  self.folder_structure.legacy_records_folder, self.task_configuration.files
122
132
  )
123
- self.mapper = BibsRulesMapper(self.folio_client, library_config, self.task_configuration)
133
+ self.mapper = BibsRulesMapper(self.folio_client, library_config, self.task_configuration, statcode_mapping)
124
134
  self.bib_ids: set = set()
125
135
  if (
126
136
  self.task_configuration.reset_hrid_settings
@@ -160,6 +160,16 @@ class HoldingsCsvTransformer(MigrationTaskBase):
160
160
  ),
161
161
  ),
162
162
  ] = True
163
+ statistical_codes_map_file_name: Annotated[
164
+ Optional[str],
165
+ Field(
166
+ title="Statistical code map file name",
167
+ description=(
168
+ "Path to the file containing the mapping of statistical codes. "
169
+ "The file should be in TSV format with legacy_stat_code and folio_code columns."
170
+ ),
171
+ ),
172
+ ] = ""
163
173
 
164
174
  @staticmethod
165
175
  def get_object_type() -> FOLIONamespaces:
@@ -174,16 +184,27 @@ class HoldingsCsvTransformer(MigrationTaskBase):
174
184
  ):
175
185
  super().__init__(library_config, task_config, folio_client, use_logging)
176
186
  self.fallback_holdings_type = None
187
+ self.folio_keys, self.holdings_field_map = self.load_mapped_fields()
188
+ if any(k for k in self.folio_keys if k.startswith("statisticalCodeIds")):
189
+ statcode_mapping = self.load_ref_data_mapping_file(
190
+ "statisticalCodeIds",
191
+ self.folder_structure.mapping_files_folder
192
+ / self.task_configuration.statistical_codes_map_file_name,
193
+ self.folio_keys,
194
+ False,
195
+ )
196
+ else:
197
+ statcode_mapping = None
177
198
  try:
178
- self.task_config = task_config
179
199
  self.bound_with_keys = set()
180
200
  self.mapper = HoldingsMapper(
181
201
  self.folio_client,
182
- self.load_mapped_fields(),
202
+ self.holdings_field_map,
183
203
  self.load_location_map(),
184
204
  self.load_call_number_type_map(),
185
205
  self.load_instance_id_map(True),
186
206
  library_config,
207
+ statcode_mapping,
187
208
  )
188
209
  self.holdings = {}
189
210
  self.total_records = 0
@@ -196,19 +217,19 @@ class HoldingsCsvTransformer(MigrationTaskBase):
196
217
  logging.info("%s\tholdings types in tenant", len(self.holdings_types))
197
218
  self.validate_merge_criterias()
198
219
  self.check_source_files(
199
- self.folder_structure.data_folder / "items", self.task_config.files
220
+ self.folder_structure.data_folder / "items", self.task_configuration.files
200
221
  )
201
222
  self.fallback_holdings_type = next(
202
223
  h
203
224
  for h in self.holdings_types
204
- if h["id"] == self.task_config.fallback_holdings_type_id
225
+ if h["id"] == self.task_configuration.fallback_holdings_type_id
205
226
  )
206
227
  if not self.fallback_holdings_type:
207
228
  raise TransformationProcessError(
208
229
  "",
209
230
  (
210
231
  "Holdings type with ID "
211
- f"{self.task_config.fallback_holdings_type_id} "
232
+ f"{self.task_configuration.fallback_holdings_type_id} "
212
233
  "not found in FOLIO."
213
234
  ),
214
235
  )
@@ -216,15 +237,15 @@ class HoldingsCsvTransformer(MigrationTaskBase):
216
237
  "%s will be used as default holdings type",
217
238
  self.fallback_holdings_type["name"],
218
239
  )
219
- if any(self.task_config.previously_generated_holdings_files):
220
- for file_name in self.task_config.previously_generated_holdings_files:
240
+ if any(self.task_configuration.previously_generated_holdings_files):
241
+ for file_name in self.task_configuration.previously_generated_holdings_files:
221
242
  logging.info("Processing %s", file_name)
222
243
  self.holdings.update(
223
244
  HoldingsHelper.load_previously_generated_holdings(
224
245
  self.folder_structure.results_folder / file_name,
225
- self.task_config.holdings_merge_criteria,
246
+ self.task_configuration.holdings_merge_criteria,
226
247
  self.mapper.migration_report,
227
- self.task_config.holdings_type_uuid_for_boundwiths,
248
+ self.task_configuration.holdings_type_uuid_for_boundwiths,
228
249
  )
229
250
  )
230
251
 
@@ -260,7 +281,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
260
281
  def load_call_number_type_map(self):
261
282
  with open(
262
283
  self.folder_structure.mapping_files_folder
263
- / self.task_config.call_number_type_map_file_name,
284
+ / self.task_configuration.call_number_type_map_file_name,
264
285
  "r",
265
286
  ) as callnumber_type_map_f:
266
287
  return self.load_ref_data_map_from_file(
@@ -269,7 +290,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
269
290
 
270
291
  def load_location_map(self):
271
292
  with open(
272
- self.folder_structure.mapping_files_folder / self.task_config.location_map_file_name
293
+ self.folder_structure.mapping_files_folder / self.task_configuration.location_map_file_name
273
294
  ) as location_map_f:
274
295
  return self.load_ref_data_map_from_file(
275
296
  location_map_f, "Found %s rows in location map"
@@ -283,7 +304,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
283
304
 
284
305
  def load_mapped_fields(self):
285
306
  with open(
286
- self.folder_structure.mapping_files_folder / self.task_config.holdings_map_file_name
307
+ self.folder_structure.mapping_files_folder / self.task_configuration.holdings_map_file_name
287
308
  ) as holdings_mapper_f:
288
309
  holdings_map = json.load(holdings_mapper_f)
289
310
  logging.info("%s fields in holdings mapping file map", len(holdings_map["data"]))
@@ -294,11 +315,11 @@ class HoldingsCsvTransformer(MigrationTaskBase):
294
315
  "%s mapped fields in holdings mapping file map",
295
316
  len(list(mapped_fields)),
296
317
  )
297
- return holdings_map
318
+ return mapped_fields, holdings_map
298
319
 
299
320
  def do_work(self):
300
321
  logging.info("Starting....")
301
- for file_def in self.task_config.files:
322
+ for file_def in self.task_configuration.files:
302
323
  logging.info("Processing %s", file_def.file_name)
303
324
  try:
304
325
  self.process_single_file(file_def)
@@ -311,7 +332,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
311
332
  print(f"\n{error_str}\nHalting")
312
333
  sys.exit(1)
313
334
  logging.info(
314
- f"processed {self.total_records:,} records in {len(self.task_config.files)} files"
335
+ f"processed {self.total_records:,} records in {len(self.task_configuration.files)} files"
315
336
  )
316
337
 
317
338
  def wrap_up(self):
@@ -357,8 +378,8 @@ class HoldingsCsvTransformer(MigrationTaskBase):
357
378
  holdings_schema = self.folio_client.get_holdings_schema()
358
379
  properties = holdings_schema["properties"].keys()
359
380
  logging.info(properties)
360
- logging.info(self.task_config.holdings_merge_criteria)
361
- res = [mc for mc in self.task_config.holdings_merge_criteria if mc not in properties]
381
+ logging.info(self.task_configuration.holdings_merge_criteria)
382
+ res = [mc for mc in self.task_configuration.holdings_merge_criteria if mc not in properties]
362
383
  if any(res):
363
384
  logging.critical(
364
385
  (
@@ -426,7 +447,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
426
447
  raise TransformationRecordFailedError(legacy_id, "No instance id in parsed record", "")
427
448
 
428
449
  for folio_holding in holdings_from_row:
429
- self.mapper.perform_additional_mappings(folio_holding, file_def)
450
+ self.mapper.perform_additional_mappings(legacy_id, folio_holding, file_def)
430
451
  self.merge_holding_in(folio_holding, all_instance_ids, legacy_id)
431
452
  self.mapper.report_folio_mapping(folio_holding, self.mapper.schema)
432
453
 
@@ -436,7 +457,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
436
457
  self.mapper.create_bound_with_holdings(
437
458
  folio_holding,
438
459
  folio_holding["instanceId"],
439
- self.task_config.holdings_type_uuid_for_boundwiths,
460
+ self.task_configuration.holdings_type_uuid_for_boundwiths,
440
461
  )
441
462
  )
442
463
 
@@ -480,9 +501,9 @@ class HoldingsCsvTransformer(MigrationTaskBase):
480
501
  # Regular holding. Merge according to criteria
481
502
  new_holding_key = HoldingsHelper.to_key(
482
503
  incoming_holding,
483
- self.task_config.holdings_merge_criteria,
504
+ self.task_configuration.holdings_merge_criteria,
484
505
  self.mapper.migration_report,
485
- self.task_config.holdings_type_uuid_for_boundwiths,
506
+ self.task_configuration.holdings_type_uuid_for_boundwiths,
486
507
  )
487
508
  if self.holdings.get(new_holding_key, None):
488
509
  self.mapper.migration_report.add_general_statistics(
@@ -216,7 +216,16 @@ class HoldingsMarcTransformer(MigrationTaskBase):
216
216
  ):
217
217
  csv.register_dialect("tsv", delimiter="\t")
218
218
  super().__init__(library_config, task_config, folio_client, use_logging)
219
- self.task_config = task_config
219
+ if self.task_configuration.statistical_codes_map_file_name:
220
+ statcode_mapping = self.load_ref_data_mapping_file(
221
+ "statisticalCodeIds",
222
+ self.folder_structure.mapping_files_folder
223
+ / self.task_configuration.statistical_codes_map_file_name,
224
+ [],
225
+ False,
226
+ )
227
+ else:
228
+ statcode_mapping = None
220
229
  self.holdings_types = list(
221
230
  self.folio_client.folio_get_all("/holdings-types", "holdingsTypes")
222
231
  )
@@ -224,7 +233,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
224
233
  (
225
234
  h
226
235
  for h in self.holdings_types
227
- if h["id"] == self.task_config.fallback_holdings_type_id
236
+ if h["id"] == self.task_configuration.fallback_holdings_type_id
228
237
  ),
229
238
  {"name": ""},
230
239
  )
@@ -232,7 +241,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
232
241
  raise TransformationProcessError(
233
242
  "",
234
243
  (
235
- f"Holdings type with ID {self.task_config.fallback_holdings_type_id}"
244
+ f"Holdings type with ID {self.task_configuration.fallback_holdings_type_id}"
236
245
  " not found in FOLIO."
237
246
  ),
238
247
  )
@@ -243,11 +252,11 @@ class HoldingsMarcTransformer(MigrationTaskBase):
243
252
 
244
253
  # Load Boundwith relationship map
245
254
  self.boundwith_relationship_map_rows = []
246
- if self.task_config.boundwith_relationship_file_path:
255
+ if self.task_configuration.boundwith_relationship_file_path:
247
256
  try:
248
257
  with open(
249
258
  self.folder_structure.legacy_records_folder
250
- / self.task_config.boundwith_relationship_file_path
259
+ / self.task_configuration.boundwith_relationship_file_path
251
260
  ) as boundwith_relationship_file:
252
261
  self.boundwith_relationship_map_rows = list(
253
262
  csv.DictReader(boundwith_relationship_file, dialect="tsv")
@@ -260,28 +269,29 @@ class HoldingsMarcTransformer(MigrationTaskBase):
260
269
  raise TransformationProcessError(
261
270
  "",
262
271
  i18n.t("Provided boundwith relationship file not found"),
263
- self.task_config.boundwith_relationship_file_path,
272
+ self.task_configuration.boundwith_relationship_file_path,
264
273
  )
265
274
 
266
275
  location_map_path = (
267
276
  self.folder_structure.mapping_files_folder
268
- / self.task_config.location_map_file_name
277
+ / self.task_configuration.location_map_file_name
269
278
  )
270
279
  with open(location_map_path) as location_map_file:
271
280
  self.location_map = list(csv.DictReader(location_map_file, dialect="tsv"))
272
281
  logging.info("Locations in map: %s", len(self.location_map))
273
282
 
274
283
  self.check_source_files(
275
- self.folder_structure.legacy_records_folder, self.task_config.files
284
+ self.folder_structure.legacy_records_folder, self.task_configuration.files
276
285
  )
277
286
  self.instance_id_map = self.load_instance_id_map(True)
278
287
  self.mapper = RulesMapperHoldings(
279
288
  self.folio_client,
280
289
  self.location_map,
281
- self.task_config,
290
+ self.task_configuration,
282
291
  self.library_configuration,
283
292
  self.instance_id_map,
284
293
  self.boundwith_relationship_map_rows,
294
+ statcode_mapping
285
295
  )
286
296
  self.add_supplemental_mfhd_mappings()
287
297
  if (
@@ -293,12 +303,12 @@ class HoldingsMarcTransformer(MigrationTaskBase):
293
303
  logging.info("Init done")
294
304
 
295
305
  def add_supplemental_mfhd_mappings(self):
296
- if self.task_config.supplemental_mfhd_mapping_rules_file:
306
+ if self.task_configuration.supplemental_mfhd_mapping_rules_file:
297
307
  try:
298
308
  with open(
299
309
  (
300
310
  self.folder_structure.mapping_files_folder
301
- / self.task_config.supplemental_mfhd_mapping_rules_file
311
+ / self.task_configuration.supplemental_mfhd_mapping_rules_file
302
312
  ),
303
313
  "r",
304
314
  ) as new_rules_file:
@@ -313,7 +323,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
313
323
  raise TransformationProcessError(
314
324
  "",
315
325
  "Provided supplemental MFHD mapping rules file not found",
316
- self.task_config.supplemental_mfhd_mapping_rules_file,
326
+ self.task_configuration.supplemental_mfhd_mapping_rules_file,
317
327
  )
318
328
  else:
319
329
  new_rules = {}
@@ -124,10 +124,10 @@ class ItemsTransformer(MigrationTaskBase):
124
124
  statistical_codes_map_file_name: Annotated[
125
125
  Optional[str],
126
126
  Field(
127
- title="Statistical codes map file name",
127
+ title="Statistical code map file name",
128
128
  description=(
129
- "File name for statistical codes map. "
130
- "Empty string by default."
129
+ "Path to the file containing the mapping of statistical codes. "
130
+ "The file should be in TSV format with legacy_stat_code and folio_code columns."
131
131
  ),
132
132
  ),
133
133
  ] = ""
@@ -205,6 +205,7 @@ class ItemsTransformer(MigrationTaskBase):
205
205
  csv.register_dialect("tsv", delimiter="\t")
206
206
  super().__init__(library_config, task_config, folio_client, use_logging)
207
207
  self.task_config = task_config
208
+ self.task_configuration = self.task_config
208
209
  self.check_source_files(
209
210
  self.folder_structure.legacy_records_folder, self.task_config.files
210
211
  )
@@ -353,7 +354,7 @@ class ItemsTransformer(MigrationTaskBase):
353
354
  record, f"row {idx}", FOLIONamespaces.items
354
355
  )
355
356
 
356
- self.mapper.perform_additional_mappings(folio_rec, file_def)
357
+ self.mapper.perform_additional_mappings(legacy_id, folio_rec, file_def)
357
358
  self.handle_circiulation_notes(folio_rec, self.folio_client.current_user)
358
359
  self.handle_notes(folio_rec)
359
360
  if folio_rec["holdingsRecordId"] in self.boundwith_relationship_map:
@@ -455,7 +455,7 @@ class MigrationTaskBase:
455
455
  logging.info("No mapping setup for %s", folio_property_name)
456
456
  logging.info("%s will have default mapping if any ", folio_property_name)
457
457
  logging.info(
458
- "Add a file named %s and add the field to the item.mapping.json file.",
458
+ "Add a file named %s and add the field to the field mapping json file.",
459
459
  map_file_path,
460
460
  )
461
461
  return None
@@ -522,6 +522,27 @@ class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
522
522
  ),
523
523
  ),
524
524
  ] = False
525
+ statistical_codes_map_file_name: Annotated[
526
+ Optional[str],
527
+ Field(
528
+ title="Statistical code map file name",
529
+ description=(
530
+ "Path to the file containing the mapping of statistical codes. "
531
+ "The file should be in TSV format with legacy_stat_code and folio_code columns."
532
+ ),
533
+ ),
534
+ ] = ""
535
+ statistical_code_mapping_fields: Annotated[
536
+ List[str],
537
+ Field(
538
+ title="Statistical code mapping fields",
539
+ description=(
540
+ "List of fields + subfields to be used for mapping statistical codes. "
541
+ "Subfields should be delimited by a \"$\" (eg. 907$a). Single repeating subfields "
542
+ "will be treated as unique values. Multiple subfields will be concatenated together with a space."
543
+ ),
544
+ ),
545
+ ] = []
525
546
 
526
547
  class ExcludeLevelFilter(logging.Filter):
527
548
  def __init__(self, level):
@@ -157,6 +157,7 @@ class OrdersTransformer(MigrationTaskBase):
157
157
  super().__init__(library_config, task_config, folio_client, use_logging)
158
158
  self.object_type_name = self.get_object_type().name
159
159
  self.task_config = task_config
160
+ self.task_configuration = self.task_config
160
161
  self.files = self.list_source_files()
161
162
  self.total_records = 0
162
163
  self.current_folio_record: dict = {}
@@ -175,6 +176,7 @@ class OrdersTransformer(MigrationTaskBase):
175
176
  self.mapper = CompositeOrderMapper(
176
177
  self.folio_client,
177
178
  self.library_configuration,
179
+ self.task_configuration,
178
180
  self.orders_map,
179
181
  self.load_id_map(self.folder_structure.organizations_id_map_path, True),
180
182
  self.load_instance_id_map(True),
@@ -119,6 +119,7 @@ class UserTransformer(MigrationTaskBase):
119
119
  ):
120
120
  super().__init__(library_config, task_config, folio_client, use_logging)
121
121
  self.task_config = task_config
122
+ self.task_configuration = self.task_config
122
123
  self.total_records = 0
123
124
 
124
125
  self.user_map = self.setup_records_map(