folio-migration-tools 1.9.0rc12__py3-none-any.whl → 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/library_configuration.py +21 -1
- folio_migration_tools/mapper_base.py +78 -4
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +2 -1
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +8 -4
- folio_migration_tools/mapping_file_transformation/item_mapper.py +6 -13
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +1 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +17 -21
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +2 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +4 -1
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +3 -0
- folio_migration_tools/mapping_file_transformation/user_mapper.py +3 -1
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +1 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +83 -4
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +5 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +51 -10
- folio_migration_tools/migration_tasks/batch_poster.py +65 -2
- folio_migration_tools/migration_tasks/bibs_transformer.py +13 -3
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +42 -21
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +22 -12
- folio_migration_tools/migration_tasks/items_transformer.py +5 -4
- folio_migration_tools/migration_tasks/migration_task_base.py +22 -1
- folio_migration_tools/migration_tasks/orders_transformer.py +2 -0
- folio_migration_tools/migration_tasks/user_transformer.py +1 -0
- folio_migration_tools/translations/en.json +12 -3
- {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/METADATA +1 -1
- {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/RECORD +29 -29
- {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/LICENSE +0 -0
- {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/WHEEL +0 -0
- {folio_migration_tools-1.9.0rc12.dist-info → folio_migration_tools-1.9.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import re
|
|
4
5
|
from typing import Dict, List, Set
|
|
5
6
|
|
|
6
7
|
import i18n
|
|
7
8
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
8
9
|
from folio_uuid.folio_uuid import FolioUUID
|
|
9
10
|
from folioclient import FolioClient
|
|
11
|
+
from pymarc import Optional
|
|
10
12
|
from pymarc.field import Field
|
|
11
13
|
from pymarc.record import Record
|
|
12
14
|
|
|
@@ -40,20 +42,21 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
40
42
|
library_configuration: LibraryConfiguration,
|
|
41
43
|
parent_id_map: dict,
|
|
42
44
|
boundwith_relationship_map_rows: List[Dict],
|
|
45
|
+
statistical_codes_map: Optional[Dict] = None,
|
|
43
46
|
):
|
|
44
|
-
self.task_configuration = task_configuration
|
|
45
47
|
self.conditions = Conditions(
|
|
46
48
|
folio_client,
|
|
47
49
|
self,
|
|
48
50
|
"holdings",
|
|
49
51
|
library_configuration.folio_release,
|
|
50
|
-
|
|
52
|
+
task_configuration.default_call_number_type_name,
|
|
51
53
|
)
|
|
52
54
|
self.folio = folio_client
|
|
53
55
|
super().__init__(
|
|
54
56
|
folio_client,
|
|
55
57
|
library_configuration,
|
|
56
58
|
task_configuration,
|
|
59
|
+
statistical_codes_map,
|
|
57
60
|
self.fetch_holdings_schema(folio_client),
|
|
58
61
|
self.conditions,
|
|
59
62
|
parent_id_map,
|
|
@@ -297,6 +300,10 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
297
300
|
"",
|
|
298
301
|
)
|
|
299
302
|
self.handle_suppression(folio_holding, file_def, True)
|
|
303
|
+
# First, map statistical codes from MARC fields and FileDefinitions to FOLIO statistical codes.
|
|
304
|
+
# Then, convert the mapped statistical codes to their corresponding code IDs.
|
|
305
|
+
self.map_statistical_codes(folio_holding, file_def, marc_record)
|
|
306
|
+
self.map_statistical_code_ids(legacy_ids, folio_holding)
|
|
300
307
|
self.set_source_id(self.create_source_records, folio_holding, self.holdingssources, file_def)
|
|
301
308
|
|
|
302
309
|
def pick_first_location_if_many(self, folio_holding: Dict, legacy_ids: List[str]):
|
|
@@ -387,12 +394,29 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
387
394
|
) from ee
|
|
388
395
|
return [
|
|
389
396
|
{
|
|
390
|
-
"note":
|
|
397
|
+
"note": chunk,
|
|
391
398
|
"holdingsNoteTypeId": holdings_note_type_id,
|
|
392
399
|
"staffOnly": True,
|
|
393
|
-
}
|
|
400
|
+
} for chunk in self.split_mrk_by_max_note_size("\n".join(mrk_statement_notes))
|
|
394
401
|
]
|
|
395
402
|
|
|
403
|
+
@staticmethod
|
|
404
|
+
def split_mrk_by_max_note_size(s: str, max_chunk_size: int = 32000) -> List[str]:
|
|
405
|
+
lines = s.splitlines(keepends=True)
|
|
406
|
+
chunks = []
|
|
407
|
+
current_chunk = ""
|
|
408
|
+
for line in lines:
|
|
409
|
+
# If adding this line would exceed the limit, start a new chunk
|
|
410
|
+
if len(current_chunk) + len(line) > max_chunk_size:
|
|
411
|
+
if current_chunk:
|
|
412
|
+
chunks.append(current_chunk)
|
|
413
|
+
current_chunk = line
|
|
414
|
+
else:
|
|
415
|
+
current_chunk += line
|
|
416
|
+
if current_chunk:
|
|
417
|
+
chunks.append(current_chunk)
|
|
418
|
+
return chunks
|
|
419
|
+
|
|
396
420
|
def add_mfhd_as_mrk_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
|
|
397
421
|
"""Adds the MFHD as a note to the holdings record
|
|
398
422
|
|
|
@@ -418,12 +442,29 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
418
442
|
) from ee
|
|
419
443
|
folio_holding["notes"] = folio_holding.get("notes", []) + [
|
|
420
444
|
{
|
|
421
|
-
"note":
|
|
445
|
+
"note": chunk,
|
|
422
446
|
"holdingsNoteTypeId": holdings_note_type_id,
|
|
423
447
|
"staffOnly": True,
|
|
424
|
-
}
|
|
448
|
+
} for chunk in self.split_mrk_by_max_note_size(str(marc_record))
|
|
425
449
|
]
|
|
426
450
|
|
|
451
|
+
@staticmethod
|
|
452
|
+
def split_mrc_by_max_note_size(data: bytes, sep: bytes = b"\x1e", max_chunk_size: int = 32000) -> List[bytes]:
|
|
453
|
+
# Split data into segments, each ending with the separator (except possibly the last)
|
|
454
|
+
pattern = re.compile(b'(.*?' + re.escape(sep) + b'|.+?$)', re.DOTALL)
|
|
455
|
+
parts = [m.group(0) for m in pattern.finditer(data) if m.group(0)]
|
|
456
|
+
chunks = []
|
|
457
|
+
current_chunk = b""
|
|
458
|
+
for part in parts:
|
|
459
|
+
if len(current_chunk) + len(part) > max_chunk_size and current_chunk:
|
|
460
|
+
chunks.append(current_chunk)
|
|
461
|
+
current_chunk = part
|
|
462
|
+
else:
|
|
463
|
+
current_chunk += part
|
|
464
|
+
if current_chunk:
|
|
465
|
+
chunks.append(current_chunk)
|
|
466
|
+
return chunks
|
|
467
|
+
|
|
427
468
|
def add_mfhd_as_mrc_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
|
|
428
469
|
"""Adds the MFHD as a note to the holdings record
|
|
429
470
|
|
|
@@ -449,10 +490,10 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
449
490
|
) from ee
|
|
450
491
|
folio_holding["notes"] = folio_holding.get("notes", []) + [
|
|
451
492
|
{
|
|
452
|
-
"note":
|
|
493
|
+
"note": chunk.decode("utf-8"),
|
|
453
494
|
"holdingsNoteTypeId": holdings_note_type_id,
|
|
454
495
|
"staffOnly": True,
|
|
455
|
-
}
|
|
496
|
+
} for chunk in self.split_mrc_by_max_note_size(marc_record.as_marc())
|
|
456
497
|
]
|
|
457
498
|
|
|
458
499
|
def wrap_up(self):
|
|
@@ -582,7 +623,7 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
582
623
|
"", "Column BIB_ID missing from Boundwith relationship map", ""
|
|
583
624
|
)
|
|
584
625
|
|
|
585
|
-
def setup_boundwith_relationship_map(self,
|
|
626
|
+
def setup_boundwith_relationship_map(self, boundwith_relationship_map_list: List[Dict]):
|
|
586
627
|
"""
|
|
587
628
|
Creates a map of MFHD_ID to BIB_ID for boundwith relationships.
|
|
588
629
|
|
|
@@ -597,7 +638,7 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
597
638
|
TransformationRecordFailedError: If BIB_ID is not in the instance id map.
|
|
598
639
|
"""
|
|
599
640
|
new_map = {}
|
|
600
|
-
for idx, entry in enumerate(
|
|
641
|
+
for idx, entry in enumerate(boundwith_relationship_map_list):
|
|
601
642
|
self.verity_boundwith_map_entry(entry)
|
|
602
643
|
mfhd_uuid = str(
|
|
603
644
|
FolioUUID(
|
|
@@ -6,7 +6,7 @@ import sys
|
|
|
6
6
|
import time
|
|
7
7
|
import traceback
|
|
8
8
|
from datetime import datetime
|
|
9
|
-
from typing import Annotated, List
|
|
9
|
+
from typing import Annotated, List, Optional
|
|
10
10
|
from uuid import uuid4
|
|
11
11
|
|
|
12
12
|
import httpx
|
|
@@ -173,11 +173,13 @@ class BatchPoster(MigrationTaskBase):
|
|
|
173
173
|
self.num_posted = 0
|
|
174
174
|
self.okapi_headers = self.folio_client.okapi_headers
|
|
175
175
|
self.http_client = None
|
|
176
|
+
self.starting_record_count_in_folio: Optional[int] = None
|
|
176
177
|
|
|
177
178
|
def do_work(self):
|
|
178
179
|
with self.folio_client.get_folio_http_client() as httpx_client:
|
|
179
180
|
self.http_client = httpx_client
|
|
180
181
|
with open(self.folder_structure.failed_recs_path, "w", encoding='utf-8') as failed_recs_file:
|
|
182
|
+
self.get_starting_record_count()
|
|
181
183
|
try:
|
|
182
184
|
batch = []
|
|
183
185
|
if self.task_configuration.object_type == "SRS":
|
|
@@ -317,6 +319,8 @@ class BatchPoster(MigrationTaskBase):
|
|
|
317
319
|
updates[record["id"]] = {
|
|
318
320
|
"_version": record["_version"],
|
|
319
321
|
}
|
|
322
|
+
if "hrid" in record:
|
|
323
|
+
updates[record["id"]]["hrid"] = record["hrid"]
|
|
320
324
|
if "status" in record:
|
|
321
325
|
updates[record["id"]]["status"] = record["status"]
|
|
322
326
|
if "lastCheckIn" in record:
|
|
@@ -604,6 +608,42 @@ class BatchPoster(MigrationTaskBase):
|
|
|
604
608
|
else:
|
|
605
609
|
return httpx.post(url, headers=self.okapi_headers, json=payload, params=self.query_params, timeout=None)
|
|
606
610
|
|
|
611
|
+
def get_current_record_count_in_folio(self):
|
|
612
|
+
if "query_endpoint" in self.api_info:
|
|
613
|
+
url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
|
|
614
|
+
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
615
|
+
if self.http_client and not self.http_client.is_closed:
|
|
616
|
+
res = self.http_client.get(url, headers=self.folio_client.okapi_headers, params=query_params)
|
|
617
|
+
else:
|
|
618
|
+
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
619
|
+
try:
|
|
620
|
+
res.raise_for_status()
|
|
621
|
+
return res.json()["totalRecords"]
|
|
622
|
+
except httpx.HTTPStatusError:
|
|
623
|
+
logging.error("Failed to get current record count. HTTP %s", res.status_code)
|
|
624
|
+
return 0
|
|
625
|
+
except KeyError:
|
|
626
|
+
logging.error(f"Failed to get current record count. No 'totalRecords' in response: {res.json()}")
|
|
627
|
+
return 0
|
|
628
|
+
else:
|
|
629
|
+
raise ValueError(
|
|
630
|
+
"No 'query_endpoint' available for %s. Cannot get current record count.", self.task_configuration.object_type
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
def get_starting_record_count(self):
|
|
634
|
+
if "query_endpoint" in self.api_info and not self.starting_record_count_in_folio:
|
|
635
|
+
logging.info("Getting starting record count in FOLIO")
|
|
636
|
+
self.starting_record_count_in_folio = self.get_current_record_count_in_folio()
|
|
637
|
+
else:
|
|
638
|
+
logging.info("No query_endpoint available for %s. Cannot get starting record count.", self.task_configuration.object_type)
|
|
639
|
+
|
|
640
|
+
def get_finished_record_count(self):
|
|
641
|
+
if "query_endpoint" in self.api_info:
|
|
642
|
+
logging.info("Getting finished record count in FOLIO")
|
|
643
|
+
self.finished_record_count_in_folio = self.get_current_record_count_in_folio()
|
|
644
|
+
else:
|
|
645
|
+
logging.info("No query_endpoint available for %s. Cannot get ending record count.", self.task_configuration.object_type)
|
|
646
|
+
|
|
607
647
|
def wrap_up(self):
|
|
608
648
|
logging.info("Done. Wrapping up")
|
|
609
649
|
self.extradata_writer.flush()
|
|
@@ -621,11 +661,34 @@ class BatchPoster(MigrationTaskBase):
|
|
|
621
661
|
)
|
|
622
662
|
else:
|
|
623
663
|
logging.info("Done posting %s records. %s failed", self.num_posted, self.num_failures)
|
|
624
|
-
|
|
664
|
+
if self.starting_record_count_in_folio:
|
|
665
|
+
self.get_finished_record_count()
|
|
666
|
+
total_on_server = self.finished_record_count_in_folio - self.starting_record_count_in_folio
|
|
667
|
+
discrepancy = self.processed - self.num_failures - total_on_server
|
|
668
|
+
if discrepancy != 0:
|
|
669
|
+
logging.error(
|
|
670
|
+
(
|
|
671
|
+
"Discrepancy in record count. "
|
|
672
|
+
"Starting record count: %s. Finished record count: %s. "
|
|
673
|
+
"Records posted: %s. Discrepancy: %s"
|
|
674
|
+
),
|
|
675
|
+
self.starting_record_count_in_folio,
|
|
676
|
+
self.finished_record_count_in_folio,
|
|
677
|
+
self.num_posted - self.num_failures,
|
|
678
|
+
discrepancy,
|
|
679
|
+
)
|
|
680
|
+
else:
|
|
681
|
+
discrepancy = 0
|
|
625
682
|
run = "second time" if self.performing_rerun else "first time"
|
|
626
683
|
self.migration_report.set("GeneralStatistics", f"Records processed {run}", self.processed)
|
|
627
684
|
self.migration_report.set("GeneralStatistics", f"Records posted {run}", self.num_posted)
|
|
628
685
|
self.migration_report.set("GeneralStatistics", f"Failed to post {run}", self.num_failures)
|
|
686
|
+
if discrepancy:
|
|
687
|
+
self.migration_report.set(
|
|
688
|
+
"GeneralStatistics",
|
|
689
|
+
f"Discrepancy in record count {run}",
|
|
690
|
+
discrepancy,
|
|
691
|
+
)
|
|
629
692
|
self.rerun_run()
|
|
630
693
|
with open(self.folder_structure.migration_reports_file, "w+") as report_file:
|
|
631
694
|
self.migration_report.write_migration_report(
|
|
@@ -7,8 +7,6 @@ from pydantic import Field
|
|
|
7
7
|
|
|
8
8
|
from folio_migration_tools.helper import Helper
|
|
9
9
|
from folio_migration_tools.library_configuration import (
|
|
10
|
-
FileDefinition,
|
|
11
|
-
HridHandling,
|
|
12
10
|
IlsFlavour,
|
|
13
11
|
LibraryConfiguration,
|
|
14
12
|
)
|
|
@@ -116,11 +114,23 @@ class BibsTransformer(MigrationTaskBase):
|
|
|
116
114
|
use_logging: bool = True,
|
|
117
115
|
):
|
|
118
116
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
117
|
+
self.task_config = task_config
|
|
118
|
+
self.task_configuration = self.task_config
|
|
119
|
+
if self.task_config.statistical_codes_map_file_name:
|
|
120
|
+
statcode_mapping = self.load_ref_data_mapping_file(
|
|
121
|
+
"statisticalCodeIds",
|
|
122
|
+
self.folder_structure.mapping_files_folder
|
|
123
|
+
/ self.task_config.statistical_codes_map_file_name,
|
|
124
|
+
[],
|
|
125
|
+
False,
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
statcode_mapping = None
|
|
119
129
|
self.processor: MarcFileProcessor
|
|
120
130
|
self.check_source_files(
|
|
121
131
|
self.folder_structure.legacy_records_folder, self.task_configuration.files
|
|
122
132
|
)
|
|
123
|
-
self.mapper = BibsRulesMapper(self.folio_client, library_config, self.task_configuration)
|
|
133
|
+
self.mapper = BibsRulesMapper(self.folio_client, library_config, self.task_configuration, statcode_mapping)
|
|
124
134
|
self.bib_ids: set = set()
|
|
125
135
|
if (
|
|
126
136
|
self.task_configuration.reset_hrid_settings
|
|
@@ -160,6 +160,16 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
160
160
|
),
|
|
161
161
|
),
|
|
162
162
|
] = True
|
|
163
|
+
statistical_codes_map_file_name: Annotated[
|
|
164
|
+
Optional[str],
|
|
165
|
+
Field(
|
|
166
|
+
title="Statistical code map file name",
|
|
167
|
+
description=(
|
|
168
|
+
"Path to the file containing the mapping of statistical codes. "
|
|
169
|
+
"The file should be in TSV format with legacy_stat_code and folio_code columns."
|
|
170
|
+
),
|
|
171
|
+
),
|
|
172
|
+
] = ""
|
|
163
173
|
|
|
164
174
|
@staticmethod
|
|
165
175
|
def get_object_type() -> FOLIONamespaces:
|
|
@@ -174,16 +184,27 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
174
184
|
):
|
|
175
185
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
176
186
|
self.fallback_holdings_type = None
|
|
187
|
+
self.folio_keys, self.holdings_field_map = self.load_mapped_fields()
|
|
188
|
+
if any(k for k in self.folio_keys if k.startswith("statisticalCodeIds")):
|
|
189
|
+
statcode_mapping = self.load_ref_data_mapping_file(
|
|
190
|
+
"statisticalCodeIds",
|
|
191
|
+
self.folder_structure.mapping_files_folder
|
|
192
|
+
/ self.task_configuration.statistical_codes_map_file_name,
|
|
193
|
+
self.folio_keys,
|
|
194
|
+
False,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
statcode_mapping = None
|
|
177
198
|
try:
|
|
178
|
-
self.task_config = task_config
|
|
179
199
|
self.bound_with_keys = set()
|
|
180
200
|
self.mapper = HoldingsMapper(
|
|
181
201
|
self.folio_client,
|
|
182
|
-
self.
|
|
202
|
+
self.holdings_field_map,
|
|
183
203
|
self.load_location_map(),
|
|
184
204
|
self.load_call_number_type_map(),
|
|
185
205
|
self.load_instance_id_map(True),
|
|
186
206
|
library_config,
|
|
207
|
+
statcode_mapping,
|
|
187
208
|
)
|
|
188
209
|
self.holdings = {}
|
|
189
210
|
self.total_records = 0
|
|
@@ -196,19 +217,19 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
196
217
|
logging.info("%s\tholdings types in tenant", len(self.holdings_types))
|
|
197
218
|
self.validate_merge_criterias()
|
|
198
219
|
self.check_source_files(
|
|
199
|
-
self.folder_structure.data_folder / "items", self.
|
|
220
|
+
self.folder_structure.data_folder / "items", self.task_configuration.files
|
|
200
221
|
)
|
|
201
222
|
self.fallback_holdings_type = next(
|
|
202
223
|
h
|
|
203
224
|
for h in self.holdings_types
|
|
204
|
-
if h["id"] == self.
|
|
225
|
+
if h["id"] == self.task_configuration.fallback_holdings_type_id
|
|
205
226
|
)
|
|
206
227
|
if not self.fallback_holdings_type:
|
|
207
228
|
raise TransformationProcessError(
|
|
208
229
|
"",
|
|
209
230
|
(
|
|
210
231
|
"Holdings type with ID "
|
|
211
|
-
f"{self.
|
|
232
|
+
f"{self.task_configuration.fallback_holdings_type_id} "
|
|
212
233
|
"not found in FOLIO."
|
|
213
234
|
),
|
|
214
235
|
)
|
|
@@ -216,15 +237,15 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
216
237
|
"%s will be used as default holdings type",
|
|
217
238
|
self.fallback_holdings_type["name"],
|
|
218
239
|
)
|
|
219
|
-
if any(self.
|
|
220
|
-
for file_name in self.
|
|
240
|
+
if any(self.task_configuration.previously_generated_holdings_files):
|
|
241
|
+
for file_name in self.task_configuration.previously_generated_holdings_files:
|
|
221
242
|
logging.info("Processing %s", file_name)
|
|
222
243
|
self.holdings.update(
|
|
223
244
|
HoldingsHelper.load_previously_generated_holdings(
|
|
224
245
|
self.folder_structure.results_folder / file_name,
|
|
225
|
-
self.
|
|
246
|
+
self.task_configuration.holdings_merge_criteria,
|
|
226
247
|
self.mapper.migration_report,
|
|
227
|
-
self.
|
|
248
|
+
self.task_configuration.holdings_type_uuid_for_boundwiths,
|
|
228
249
|
)
|
|
229
250
|
)
|
|
230
251
|
|
|
@@ -260,7 +281,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
260
281
|
def load_call_number_type_map(self):
|
|
261
282
|
with open(
|
|
262
283
|
self.folder_structure.mapping_files_folder
|
|
263
|
-
/ self.
|
|
284
|
+
/ self.task_configuration.call_number_type_map_file_name,
|
|
264
285
|
"r",
|
|
265
286
|
) as callnumber_type_map_f:
|
|
266
287
|
return self.load_ref_data_map_from_file(
|
|
@@ -269,7 +290,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
269
290
|
|
|
270
291
|
def load_location_map(self):
|
|
271
292
|
with open(
|
|
272
|
-
self.folder_structure.mapping_files_folder / self.
|
|
293
|
+
self.folder_structure.mapping_files_folder / self.task_configuration.location_map_file_name
|
|
273
294
|
) as location_map_f:
|
|
274
295
|
return self.load_ref_data_map_from_file(
|
|
275
296
|
location_map_f, "Found %s rows in location map"
|
|
@@ -283,7 +304,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
283
304
|
|
|
284
305
|
def load_mapped_fields(self):
|
|
285
306
|
with open(
|
|
286
|
-
self.folder_structure.mapping_files_folder / self.
|
|
307
|
+
self.folder_structure.mapping_files_folder / self.task_configuration.holdings_map_file_name
|
|
287
308
|
) as holdings_mapper_f:
|
|
288
309
|
holdings_map = json.load(holdings_mapper_f)
|
|
289
310
|
logging.info("%s fields in holdings mapping file map", len(holdings_map["data"]))
|
|
@@ -294,11 +315,11 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
294
315
|
"%s mapped fields in holdings mapping file map",
|
|
295
316
|
len(list(mapped_fields)),
|
|
296
317
|
)
|
|
297
|
-
return holdings_map
|
|
318
|
+
return mapped_fields, holdings_map
|
|
298
319
|
|
|
299
320
|
def do_work(self):
|
|
300
321
|
logging.info("Starting....")
|
|
301
|
-
for file_def in self.
|
|
322
|
+
for file_def in self.task_configuration.files:
|
|
302
323
|
logging.info("Processing %s", file_def.file_name)
|
|
303
324
|
try:
|
|
304
325
|
self.process_single_file(file_def)
|
|
@@ -311,7 +332,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
311
332
|
print(f"\n{error_str}\nHalting")
|
|
312
333
|
sys.exit(1)
|
|
313
334
|
logging.info(
|
|
314
|
-
f"processed {self.total_records:,} records in {len(self.
|
|
335
|
+
f"processed {self.total_records:,} records in {len(self.task_configuration.files)} files"
|
|
315
336
|
)
|
|
316
337
|
|
|
317
338
|
def wrap_up(self):
|
|
@@ -357,8 +378,8 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
357
378
|
holdings_schema = self.folio_client.get_holdings_schema()
|
|
358
379
|
properties = holdings_schema["properties"].keys()
|
|
359
380
|
logging.info(properties)
|
|
360
|
-
logging.info(self.
|
|
361
|
-
res = [mc for mc in self.
|
|
381
|
+
logging.info(self.task_configuration.holdings_merge_criteria)
|
|
382
|
+
res = [mc for mc in self.task_configuration.holdings_merge_criteria if mc not in properties]
|
|
362
383
|
if any(res):
|
|
363
384
|
logging.critical(
|
|
364
385
|
(
|
|
@@ -426,7 +447,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
426
447
|
raise TransformationRecordFailedError(legacy_id, "No instance id in parsed record", "")
|
|
427
448
|
|
|
428
449
|
for folio_holding in holdings_from_row:
|
|
429
|
-
self.mapper.perform_additional_mappings(folio_holding, file_def)
|
|
450
|
+
self.mapper.perform_additional_mappings(legacy_id, folio_holding, file_def)
|
|
430
451
|
self.merge_holding_in(folio_holding, all_instance_ids, legacy_id)
|
|
431
452
|
self.mapper.report_folio_mapping(folio_holding, self.mapper.schema)
|
|
432
453
|
|
|
@@ -436,7 +457,7 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
436
457
|
self.mapper.create_bound_with_holdings(
|
|
437
458
|
folio_holding,
|
|
438
459
|
folio_holding["instanceId"],
|
|
439
|
-
self.
|
|
460
|
+
self.task_configuration.holdings_type_uuid_for_boundwiths,
|
|
440
461
|
)
|
|
441
462
|
)
|
|
442
463
|
|
|
@@ -480,9 +501,9 @@ class HoldingsCsvTransformer(MigrationTaskBase):
|
|
|
480
501
|
# Regular holding. Merge according to criteria
|
|
481
502
|
new_holding_key = HoldingsHelper.to_key(
|
|
482
503
|
incoming_holding,
|
|
483
|
-
self.
|
|
504
|
+
self.task_configuration.holdings_merge_criteria,
|
|
484
505
|
self.mapper.migration_report,
|
|
485
|
-
self.
|
|
506
|
+
self.task_configuration.holdings_type_uuid_for_boundwiths,
|
|
486
507
|
)
|
|
487
508
|
if self.holdings.get(new_holding_key, None):
|
|
488
509
|
self.mapper.migration_report.add_general_statistics(
|
|
@@ -216,7 +216,16 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
216
216
|
):
|
|
217
217
|
csv.register_dialect("tsv", delimiter="\t")
|
|
218
218
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
219
|
-
self.
|
|
219
|
+
if self.task_configuration.statistical_codes_map_file_name:
|
|
220
|
+
statcode_mapping = self.load_ref_data_mapping_file(
|
|
221
|
+
"statisticalCodeIds",
|
|
222
|
+
self.folder_structure.mapping_files_folder
|
|
223
|
+
/ self.task_configuration.statistical_codes_map_file_name,
|
|
224
|
+
[],
|
|
225
|
+
False,
|
|
226
|
+
)
|
|
227
|
+
else:
|
|
228
|
+
statcode_mapping = None
|
|
220
229
|
self.holdings_types = list(
|
|
221
230
|
self.folio_client.folio_get_all("/holdings-types", "holdingsTypes")
|
|
222
231
|
)
|
|
@@ -224,7 +233,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
224
233
|
(
|
|
225
234
|
h
|
|
226
235
|
for h in self.holdings_types
|
|
227
|
-
if h["id"] == self.
|
|
236
|
+
if h["id"] == self.task_configuration.fallback_holdings_type_id
|
|
228
237
|
),
|
|
229
238
|
{"name": ""},
|
|
230
239
|
)
|
|
@@ -232,7 +241,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
232
241
|
raise TransformationProcessError(
|
|
233
242
|
"",
|
|
234
243
|
(
|
|
235
|
-
f"Holdings type with ID {self.
|
|
244
|
+
f"Holdings type with ID {self.task_configuration.fallback_holdings_type_id}"
|
|
236
245
|
" not found in FOLIO."
|
|
237
246
|
),
|
|
238
247
|
)
|
|
@@ -243,11 +252,11 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
243
252
|
|
|
244
253
|
# Load Boundwith relationship map
|
|
245
254
|
self.boundwith_relationship_map_rows = []
|
|
246
|
-
if self.
|
|
255
|
+
if self.task_configuration.boundwith_relationship_file_path:
|
|
247
256
|
try:
|
|
248
257
|
with open(
|
|
249
258
|
self.folder_structure.legacy_records_folder
|
|
250
|
-
/ self.
|
|
259
|
+
/ self.task_configuration.boundwith_relationship_file_path
|
|
251
260
|
) as boundwith_relationship_file:
|
|
252
261
|
self.boundwith_relationship_map_rows = list(
|
|
253
262
|
csv.DictReader(boundwith_relationship_file, dialect="tsv")
|
|
@@ -260,28 +269,29 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
260
269
|
raise TransformationProcessError(
|
|
261
270
|
"",
|
|
262
271
|
i18n.t("Provided boundwith relationship file not found"),
|
|
263
|
-
self.
|
|
272
|
+
self.task_configuration.boundwith_relationship_file_path,
|
|
264
273
|
)
|
|
265
274
|
|
|
266
275
|
location_map_path = (
|
|
267
276
|
self.folder_structure.mapping_files_folder
|
|
268
|
-
/ self.
|
|
277
|
+
/ self.task_configuration.location_map_file_name
|
|
269
278
|
)
|
|
270
279
|
with open(location_map_path) as location_map_file:
|
|
271
280
|
self.location_map = list(csv.DictReader(location_map_file, dialect="tsv"))
|
|
272
281
|
logging.info("Locations in map: %s", len(self.location_map))
|
|
273
282
|
|
|
274
283
|
self.check_source_files(
|
|
275
|
-
self.folder_structure.legacy_records_folder, self.
|
|
284
|
+
self.folder_structure.legacy_records_folder, self.task_configuration.files
|
|
276
285
|
)
|
|
277
286
|
self.instance_id_map = self.load_instance_id_map(True)
|
|
278
287
|
self.mapper = RulesMapperHoldings(
|
|
279
288
|
self.folio_client,
|
|
280
289
|
self.location_map,
|
|
281
|
-
self.
|
|
290
|
+
self.task_configuration,
|
|
282
291
|
self.library_configuration,
|
|
283
292
|
self.instance_id_map,
|
|
284
293
|
self.boundwith_relationship_map_rows,
|
|
294
|
+
statcode_mapping
|
|
285
295
|
)
|
|
286
296
|
self.add_supplemental_mfhd_mappings()
|
|
287
297
|
if (
|
|
@@ -293,12 +303,12 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
293
303
|
logging.info("Init done")
|
|
294
304
|
|
|
295
305
|
def add_supplemental_mfhd_mappings(self):
|
|
296
|
-
if self.
|
|
306
|
+
if self.task_configuration.supplemental_mfhd_mapping_rules_file:
|
|
297
307
|
try:
|
|
298
308
|
with open(
|
|
299
309
|
(
|
|
300
310
|
self.folder_structure.mapping_files_folder
|
|
301
|
-
/ self.
|
|
311
|
+
/ self.task_configuration.supplemental_mfhd_mapping_rules_file
|
|
302
312
|
),
|
|
303
313
|
"r",
|
|
304
314
|
) as new_rules_file:
|
|
@@ -313,7 +323,7 @@ class HoldingsMarcTransformer(MigrationTaskBase):
|
|
|
313
323
|
raise TransformationProcessError(
|
|
314
324
|
"",
|
|
315
325
|
"Provided supplemental MFHD mapping rules file not found",
|
|
316
|
-
self.
|
|
326
|
+
self.task_configuration.supplemental_mfhd_mapping_rules_file,
|
|
317
327
|
)
|
|
318
328
|
else:
|
|
319
329
|
new_rules = {}
|
|
@@ -124,10 +124,10 @@ class ItemsTransformer(MigrationTaskBase):
|
|
|
124
124
|
statistical_codes_map_file_name: Annotated[
|
|
125
125
|
Optional[str],
|
|
126
126
|
Field(
|
|
127
|
-
title="Statistical
|
|
127
|
+
title="Statistical code map file name",
|
|
128
128
|
description=(
|
|
129
|
-
"
|
|
130
|
-
"
|
|
129
|
+
"Path to the file containing the mapping of statistical codes. "
|
|
130
|
+
"The file should be in TSV format with legacy_stat_code and folio_code columns."
|
|
131
131
|
),
|
|
132
132
|
),
|
|
133
133
|
] = ""
|
|
@@ -205,6 +205,7 @@ class ItemsTransformer(MigrationTaskBase):
|
|
|
205
205
|
csv.register_dialect("tsv", delimiter="\t")
|
|
206
206
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
207
207
|
self.task_config = task_config
|
|
208
|
+
self.task_configuration = self.task_config
|
|
208
209
|
self.check_source_files(
|
|
209
210
|
self.folder_structure.legacy_records_folder, self.task_config.files
|
|
210
211
|
)
|
|
@@ -353,7 +354,7 @@ class ItemsTransformer(MigrationTaskBase):
|
|
|
353
354
|
record, f"row {idx}", FOLIONamespaces.items
|
|
354
355
|
)
|
|
355
356
|
|
|
356
|
-
self.mapper.perform_additional_mappings(folio_rec, file_def)
|
|
357
|
+
self.mapper.perform_additional_mappings(legacy_id, folio_rec, file_def)
|
|
357
358
|
self.handle_circiulation_notes(folio_rec, self.folio_client.current_user)
|
|
358
359
|
self.handle_notes(folio_rec)
|
|
359
360
|
if folio_rec["holdingsRecordId"] in self.boundwith_relationship_map:
|
|
@@ -455,7 +455,7 @@ class MigrationTaskBase:
|
|
|
455
455
|
logging.info("No mapping setup for %s", folio_property_name)
|
|
456
456
|
logging.info("%s will have default mapping if any ", folio_property_name)
|
|
457
457
|
logging.info(
|
|
458
|
-
"Add a file named %s and add the field to the
|
|
458
|
+
"Add a file named %s and add the field to the field mapping json file.",
|
|
459
459
|
map_file_path,
|
|
460
460
|
)
|
|
461
461
|
return None
|
|
@@ -522,6 +522,27 @@ class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
|
|
|
522
522
|
),
|
|
523
523
|
),
|
|
524
524
|
] = False
|
|
525
|
+
statistical_codes_map_file_name: Annotated[
|
|
526
|
+
Optional[str],
|
|
527
|
+
Field(
|
|
528
|
+
title="Statistical code map file name",
|
|
529
|
+
description=(
|
|
530
|
+
"Path to the file containing the mapping of statistical codes. "
|
|
531
|
+
"The file should be in TSV format with legacy_stat_code and folio_code columns."
|
|
532
|
+
),
|
|
533
|
+
),
|
|
534
|
+
] = ""
|
|
535
|
+
statistical_code_mapping_fields: Annotated[
|
|
536
|
+
List[str],
|
|
537
|
+
Field(
|
|
538
|
+
title="Statistical code mapping fields",
|
|
539
|
+
description=(
|
|
540
|
+
"List of fields + subfields to be used for mapping statistical codes. "
|
|
541
|
+
"Subfields should be delimited by a \"$\" (eg. 907$a). Single repeating subfields "
|
|
542
|
+
"will be treated as unique values. Multiple subfields will be concatenated together with a space."
|
|
543
|
+
),
|
|
544
|
+
),
|
|
545
|
+
] = []
|
|
525
546
|
|
|
526
547
|
class ExcludeLevelFilter(logging.Filter):
|
|
527
548
|
def __init__(self, level):
|
|
@@ -157,6 +157,7 @@ class OrdersTransformer(MigrationTaskBase):
|
|
|
157
157
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
158
158
|
self.object_type_name = self.get_object_type().name
|
|
159
159
|
self.task_config = task_config
|
|
160
|
+
self.task_configuration = self.task_config
|
|
160
161
|
self.files = self.list_source_files()
|
|
161
162
|
self.total_records = 0
|
|
162
163
|
self.current_folio_record: dict = {}
|
|
@@ -175,6 +176,7 @@ class OrdersTransformer(MigrationTaskBase):
|
|
|
175
176
|
self.mapper = CompositeOrderMapper(
|
|
176
177
|
self.folio_client,
|
|
177
178
|
self.library_configuration,
|
|
179
|
+
self.task_configuration,
|
|
178
180
|
self.orders_map,
|
|
179
181
|
self.load_id_map(self.folder_structure.organizations_id_map_path, True),
|
|
180
182
|
self.load_instance_id_map(True),
|
|
@@ -119,6 +119,7 @@ class UserTransformer(MigrationTaskBase):
|
|
|
119
119
|
):
|
|
120
120
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
121
121
|
self.task_config = task_config
|
|
122
|
+
self.task_configuration = self.task_config
|
|
122
123
|
self.total_records = 0
|
|
123
124
|
|
|
124
125
|
self.user_map = self.setup_records_map(
|