folio-data-import 0.5.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1252 @@
1
+ import asyncio
2
+ import datetime
3
+ import glob
4
+ import io
5
+ import json
6
+ import logging
7
+ import math
8
+ import os
9
+ import sys
10
+ import uuid
11
+ from contextlib import ExitStack
12
+ from datetime import datetime as dt
13
+ from functools import cached_property
14
+ from pathlib import Path
15
+ from time import sleep
16
+ from typing import Annotated, BinaryIO, Callable, Dict, Generator, List, cast
17
+
18
+ import cyclopts
19
+ import folioclient
20
+ import httpx
21
+ import pymarc
22
+ import questionary
23
+ import tabulate
24
+ from humps import decamelize
25
+ from pydantic import BaseModel, Field
26
+ from rich.logging import RichHandler
27
+
28
+ from folio_data_import import get_folio_connection_parameters, __version__ as app_version
29
+ from folio_data_import._progress import (
30
+ RichProgressReporter,
31
+ ProgressReporter,
32
+ NoOpProgressReporter,
33
+ )
34
+ from folio_data_import.custom_exceptions import (
35
+ FolioDataImportBatchError,
36
+ FolioDataImportJobError,
37
+ )
38
+ from folio_data_import.marc_preprocessors._preprocessors import MARCPreprocessor
39
+
40
+ try:
41
+ datetime_utc = datetime.UTC
42
+ except AttributeError:
43
+ datetime_utc = datetime.timezone.utc
44
+
45
+
46
+ # The order in which the report summary should be displayed
47
+ REPORT_SUMMARY_ORDERING = {"created": 0, "updated": 1, "discarded": 2, "error": 3}
48
+
49
+ # Set default timeout and backoff values for HTTP requests when retrying job status and final summary checks # noqa: E501
50
+ RETRY_TIMEOUT_START = 5
51
+ RETRY_TIMEOUT_RETRY_FACTOR = 1.5
52
+ RETRY_TIMEOUT_MAX = 25.32
53
+
54
+ # Custom log level for data issues, set to 26
55
+ DATA_ISSUE_LVL_NUM = 26
56
+ logging.addLevelName(DATA_ISSUE_LVL_NUM, "DATA_ISSUES")
57
+
58
+
59
+ class MARCImportStats(BaseModel):
60
+ """Statistics for MARC import operations."""
61
+
62
+ records_sent: int = 0
63
+ records_processed: int = 0
64
+ created: int = 0
65
+ updated: int = 0
66
+ discarded: int = 0
67
+ error: int = 0
68
+
69
+
70
+ class CustomLogger(logging.Logger):
71
+ """Logger subclass with custom data_issues method."""
72
+
73
+ def data_issues(self, msg: str, *args, **kws) -> None:
74
+ """Log data issues at custom level (26)."""
75
+ if self.isEnabledFor(DATA_ISSUE_LVL_NUM):
76
+ self._log(DATA_ISSUE_LVL_NUM, msg, args, **kws)
77
+
78
+
79
+ # Set the custom logger class as the default
80
+ logging.setLoggerClass(CustomLogger)
81
+
82
+ logger: CustomLogger = logging.getLogger(__name__) # type: ignore[assignment]
83
+
84
+
85
+ class MARCImportJob:
86
+ """
87
+ Class to manage importing MARC data (Bib, Authority) into FOLIO using the Change Manager
88
+ APIs (https://github.com/folio-org/mod-source-record-manager/tree/master?tab=readme-ov-file#data-import-workflow),
89
+ rather than file-based Data Import. When executed in an interactive environment, it can provide progress bars
90
+ for tracking the number of records both uploaded and processed.
91
+
92
+ Args:
93
+ folio_client (FolioClient): An instance of the FolioClient class.
94
+ marc_files (list): A list of Path objects representing the MARC files to import.
95
+ import_profile_name (str): The name of the data import job profile to use.
96
+ batch_size (int): The number of source records to include in a record batch (default=10).
97
+ batch_delay (float): The number of seconds to wait between record batches (default=0).
98
+ no_progress (bool): Disable progress bars (eg. for running in a CI environment).
99
+ marc_record_preprocessors (list or str): A list of callables, or a string representing
100
+ a comma-separated list of MARC record preprocessor names to apply to each record before import.
101
+ preprocessor_args (dict): A dictionary of arguments to pass to the MARC record preprocessor(s).
102
+ let_summary_fail (bool): If True, will not retry or fail the import if the final job summary
103
+ cannot be retrieved (default=False).
104
+ split_files (bool): If True, will split each file into smaller jobs of size `split_size`
105
+ split_size (int): The number of records to include in each split file (default=1000).
106
+ split_offset (int): The number of split files to skip before starting processing (default=0).
107
+ job_ids_file_path (str): The path to the file where job IDs will be saved (default="marc_import_job_ids.txt").
108
+ show_file_names_in_data_import_logs (bool): If True, will set the file name for each job in the data import logs.
109
+ """ # noqa: E501
110
+
111
+ class Config(BaseModel):
112
+ """Configuration for MARC import operations."""
113
+
114
+ marc_files: Annotated[
115
+ List[Path],
116
+ Field(
117
+ title="MARC files",
118
+ description="List of Path objects representing the MARC files to import",
119
+ ),
120
+ ]
121
+ import_profile_name: Annotated[
122
+ str,
123
+ Field(
124
+ title="Import profile name",
125
+ description="The name of the data import job profile to use",
126
+ ),
127
+ ]
128
+ batch_size: Annotated[
129
+ int,
130
+ Field(
131
+ title="Batch size",
132
+ description="Number of source records to include in a record batch",
133
+ ge=1,
134
+ le=1000,
135
+ ),
136
+ ] = 10
137
+ batch_delay: Annotated[
138
+ float,
139
+ Field(
140
+ title="Batch delay",
141
+ description="Number of seconds to wait between record batches",
142
+ ge=0.0,
143
+ ),
144
+ ] = 0.0
145
+ marc_record_preprocessors: Annotated[
146
+ List[Callable] | str | None,
147
+ Field(
148
+ title="MARC record preprocessor",
149
+ description=(
150
+ "List of callables or string representing preprocessor(s) "
151
+ "to apply to each record before import"
152
+ ),
153
+ ),
154
+ ] = None
155
+ preprocessors_args: Annotated[
156
+ Dict[str, Dict] | None,
157
+ Field(
158
+ title="Preprocessor arguments",
159
+ description="Dictionary of arguments to pass to the MARC record preprocessor(s)",
160
+ ),
161
+ ] = None
162
+ no_progress: Annotated[
163
+ bool,
164
+ Field(
165
+ title="No progress bars",
166
+ description="Disable progress bars (e.g., for CI environments)",
167
+ ),
168
+ ] = False
169
+ no_summary: Annotated[
170
+ bool,
171
+ Field(
172
+ title="No summary",
173
+ description="Skip the final job summary",
174
+ ),
175
+ ] = False
176
+ let_summary_fail: Annotated[
177
+ bool,
178
+ Field(
179
+ title="Let summary fail",
180
+ description="Do not retry or fail import if final job summary cannot be retrieved",
181
+ ),
182
+ ] = False
183
+ split_files: Annotated[
184
+ bool,
185
+ Field(
186
+ title="Split files",
187
+ description="Split each file into smaller jobs",
188
+ ),
189
+ ] = False
190
+ split_size: Annotated[
191
+ int,
192
+ Field(
193
+ title="Split size",
194
+ description="Number of records to include in each split file",
195
+ ge=1,
196
+ ),
197
+ ] = 1000
198
+ split_offset: Annotated[
199
+ int,
200
+ Field(
201
+ title="Split offset",
202
+ description="Number of split files to skip before starting processing",
203
+ ge=0,
204
+ ),
205
+ ] = 0
206
+ job_ids_file_path: Annotated[
207
+ Path | None,
208
+ Field(
209
+ title="Job IDs file path",
210
+ description="Path to file where job IDs will be saved",
211
+ ),
212
+ ] = None
213
+ show_file_names_in_data_import_logs: Annotated[
214
+ bool,
215
+ Field(
216
+ title="Show file names in DI logs",
217
+ description="Show file names in data import logs",
218
+ ),
219
+ ] = False
220
+
221
+ bad_records_file: BinaryIO
222
+ failed_batches_file: BinaryIO
223
+ job_id: str
224
+ reporter: ProgressReporter
225
+ task_sent: str
226
+ task_imported: str
227
+ http_client: httpx.Client
228
+ current_file: List[Path] | List[BinaryIO]
229
+ record_batch: List[bytes]
230
+ last_current: int = 0
231
+ total_records_sent: int = 0
232
+ finished: bool = False
233
+ job_id: str = ""
234
+ job_ids: List[str]
235
+ job_hrid: int = 0
236
+ _max_summary_retries: int = 2
237
+ _max_job_retries: int = 2
238
+ _job_retries: int = 0
239
+ _summary_retries: int = 0
240
+
241
+ def __init__(
242
+ self,
243
+ folio_client: folioclient.FolioClient,
244
+ config: "MARCImportJob.Config",
245
+ reporter: ProgressReporter | None = None,
246
+ ) -> None:
247
+ self.folio_client: folioclient.FolioClient = folio_client
248
+ self.config = config
249
+ self.reporter = reporter or NoOpProgressReporter()
250
+ self.current_retry_timeout: float | None = None
251
+ self.marc_record_preprocessor: MARCPreprocessor = MARCPreprocessor(
252
+ config.marc_record_preprocessors or "", **(config.preprocessors_args or {})
253
+ )
254
+ self.job_ids_file_path = config.job_ids_file_path or config.marc_files[0].parent.joinpath(
255
+ "marc_import_job_ids.txt"
256
+ )
257
+
258
+ async def do_work(self) -> None:
259
+ """
260
+ Performs the necessary work for data import.
261
+
262
+ This method initializes an HTTP client, files to store records that fail to send,
263
+ and calls the appropriate method to import MARC files based on the configuration.
264
+
265
+ Returns:
266
+ None
267
+ """
268
+ self.record_batch = []
269
+ self.job_ids = []
270
+ with (
271
+ self.folio_client.get_folio_http_client() as http_client,
272
+ open(
273
+ self.config.marc_files[0].parent.joinpath(
274
+ f"bad_marc_records_{dt.now(tz=datetime_utc).strftime('%Y%m%d%H%M%S')}.mrc"
275
+ ),
276
+ "wb+",
277
+ ) as bad_marc_file,
278
+ open(
279
+ self.config.marc_files[0].parent.joinpath(
280
+ f"failed_batches_{dt.now(tz=datetime_utc).strftime('%Y%m%d%H%M%S')}.mrc"
281
+ ),
282
+ "wb+",
283
+ ) as failed_batches,
284
+ ):
285
+ self.bad_records_file = bad_marc_file
286
+ logger.info(f"Writing bad records to {self.bad_records_file.name}")
287
+ self.failed_batches_file = failed_batches
288
+ logger.info(f"Writing failed batches to {self.failed_batches_file.name}")
289
+ self.http_client = http_client
290
+ if self.config.split_files:
291
+ await self.process_split_files()
292
+ else:
293
+ for file in self.config.marc_files:
294
+ self.current_file = [file]
295
+ await self.import_marc_file()
296
+
297
+ async def process_split_files(self):
298
+ """
299
+ Process the import of files in smaller batches.
300
+ This method is called when `split_files` is set to True.
301
+ It splits each file into smaller chunks and processes them one by one.
302
+ """
303
+ for file in self.config.marc_files:
304
+ with open(file, "rb") as f:
305
+ file_length = await self.read_total_records([f])
306
+ expected_batches = math.ceil(file_length / self.config.split_size)
307
+ logger.info(
308
+ f"{file.name} contains {file_length} records."
309
+ f" Splitting into {expected_batches} {self.config.split_size} record batches."
310
+ )
311
+ zero_pad_parts = len(str(expected_batches)) if expected_batches > 1 else 2
312
+ for idx, batch in enumerate(
313
+ self.split_marc_file(file, self.config.split_size), start=1
314
+ ):
315
+ if idx > self.config.split_offset:
316
+ batch.name = f"{file.name} (Part {idx:0{zero_pad_parts}})"
317
+ self.current_file = [batch]
318
+ await self.import_marc_file()
319
+ self.move_file_to_complete(file)
320
+
321
+ async def wrap_up(self) -> None:
322
+ """
323
+ Wraps up the data import process.
324
+
325
+ This method is called after the import process is complete.
326
+ It checks for empty bad records and error files and removes them.
327
+
328
+ Returns:
329
+ None
330
+ """
331
+ with open(self.bad_records_file.name, "rb") as bad_records:
332
+ if not bad_records.read(1):
333
+ os.remove(bad_records.name)
334
+ logger.info("No bad records found. Removing bad records file.")
335
+ with open(self.failed_batches_file.name, "rb") as failed_batches:
336
+ if not failed_batches.read(1):
337
+ os.remove(failed_batches.name)
338
+ logger.info("No failed batches. Removing failed batches file.")
339
+ with open(self.job_ids_file_path, "a+") as job_ids_file:
340
+ logger.info(f"Writing job IDs to {self.job_ids_file_path}")
341
+ for job_id in self.job_ids:
342
+ job_ids_file.write(f"{job_id}\n")
343
+ logger.info("Import complete.")
344
+ logger.info(f"Total records imported: {self.total_records_sent}")
345
+
346
+ async def get_job_status(self) -> None:
347
+ """
348
+ Retrieves the status of a job execution.
349
+
350
+ Returns:
351
+ None
352
+
353
+ Raises:
354
+ IndexError: If the job execution with the specified ID is not found.
355
+ """
356
+ job_status: Dict | None = None
357
+ try:
358
+ self.current_retry_timeout = (
359
+ (self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR)
360
+ if self.current_retry_timeout
361
+ else RETRY_TIMEOUT_START
362
+ )
363
+ with self.folio_client.get_folio_http_client() as temp_client:
364
+ temp_client.timeout = self.current_retry_timeout
365
+ self.folio_client.httpx_client = temp_client
366
+ job_status = self.folio_client.folio_get(
367
+ "/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
368
+ "=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
369
+ )
370
+ self.current_retry_timeout = None
371
+ except (folioclient.FolioConnectionError, folioclient.FolioHTTPError) as e:
372
+ error_text = e.response.text if hasattr(e, "response") else str(e)
373
+
374
+ # Raise non-retriable HTTP errors immediately
375
+ if hasattr(e, "response") and e.response.status_code not in [502, 504, 401]:
376
+ raise e
377
+
378
+ # For retriable errors or connection errors
379
+ if (
380
+ self.current_retry_timeout is not None
381
+ and self.current_retry_timeout <= RETRY_TIMEOUT_MAX
382
+ ):
383
+ logger.warning(f"SERVER ERROR fetching job status: {error_text}. Retrying.")
384
+ sleep(0.25)
385
+ return await self.get_job_status()
386
+ elif (
387
+ self.current_retry_timeout is not None
388
+ and self.current_retry_timeout > RETRY_TIMEOUT_MAX
389
+ ):
390
+ logger.critical(
391
+ f"SERVER ERROR fetching job status: {error_text}. Max retries exceeded."
392
+ )
393
+ raise FolioDataImportJobError(self.job_id, error_text, e) from e
394
+ else:
395
+ raise e
396
+ except Exception as e:
397
+ logger.error(f"Error fetching job status. {e}")
398
+
399
+ if job_status is None:
400
+ return
401
+
402
+ try:
403
+ status = [job for job in job_status["jobExecutions"] if job["id"] == self.job_id][0]
404
+ self.reporter.update_task(
405
+ self.task_imported,
406
+ advance=status["progress"]["current"] - self.last_current,
407
+ )
408
+ self.last_current = status["progress"]["current"]
409
+ except (IndexError, ValueError, KeyError):
410
+ logger.debug(f"No active job found with ID {self.job_id}. Checking for finished job.")
411
+ try:
412
+ job_status = self.folio_client.folio_get(
413
+ "/metadata-provider/jobExecutions?limit=100&sortBy=completed_date%2Cdesc&statusAny"
414
+ "=COMMITTED&statusAny=ERROR&statusAny=CANCELLED"
415
+ )
416
+ status = [job for job in job_status["jobExecutions"] if job["id"] == self.job_id][
417
+ 0
418
+ ]
419
+ self.reporter.update_task(
420
+ self.task_imported,
421
+ advance=status["progress"]["current"] - self.last_current,
422
+ )
423
+ self.last_current = status["progress"]["current"]
424
+ self.finished = True
425
+ except (folioclient.FolioConnectionError, folioclient.FolioHTTPError) as e:
426
+ # Raise non-retriable HTTP errors immediately
427
+ if hasattr(e, "response") and e.response.status_code not in [502, 504]:
428
+ raise e
429
+
430
+ # Retry retriable errors or connection errors
431
+ error_text = e.response.text if hasattr(e, "response") else str(e)
432
+ logger.warning(f"SERVER ERROR fetching job status: {error_text}. Retrying.")
433
+ sleep(0.25)
434
+ with self.folio_client.get_folio_http_client() as temp_client:
435
+ temp_client.timeout = self.current_retry_timeout
436
+ self.folio_client.httpx_client = temp_client
437
+ return await self.get_job_status()
438
+
439
+ async def set_job_file_name(self) -> None:
440
+ """
441
+ Sets the file name for the current job execution.
442
+
443
+ Returns:
444
+ None
445
+ """
446
+ try:
447
+ job_object = self.http_client.get(
448
+ "/change-manager/jobExecutions/" + self.job_id,
449
+ )
450
+ job_object.raise_for_status()
451
+ job_object_json = job_object.json()
452
+ job_object_json.update({"fileName": self.current_file[0].name})
453
+ set_file_name = self.http_client.put(
454
+ "/change-manager/jobExecutions/" + self.job_id,
455
+ json=job_object_json,
456
+ )
457
+ set_file_name.raise_for_status()
458
+ except httpx.HTTPError as e:
459
+ logger.error(
460
+ "Error setting job file name: "
461
+ + str(e)
462
+ + "\n"
463
+ + getattr(getattr(e, "response", ""), "text", "")
464
+ )
465
+ raise e
466
+
467
+ async def create_folio_import_job(self) -> None:
468
+ """
469
+ Creates a job execution for importing data into FOLIO.
470
+
471
+ Returns:
472
+ None
473
+
474
+ Raises:
475
+ FolioHTTPError: If there is an error creating the job.
476
+ """
477
+ try:
478
+ job_response = self.folio_client.folio_post(
479
+ "/change-manager/jobExecutions",
480
+ {"sourceType": "ONLINE", "userId": self.folio_client.current_user},
481
+ )
482
+ except (folioclient.FolioConnectionError, folioclient.FolioHTTPError) as e:
483
+ # Raise non-retriable HTTP errors immediately
484
+ if hasattr(e, "response") and e.response.status_code not in [502, 504]:
485
+ raise e
486
+
487
+ # Retry retriable errors or connection errors
488
+ error_text = e.response.text if hasattr(e, "response") else str(e)
489
+ logger.warning(f"SERVER ERROR creating job: {error_text}. Retrying.")
490
+ sleep(0.25)
491
+ return await self.create_folio_import_job()
492
+
493
+ try:
494
+ self.job_id = job_response["parentJobExecutionId"]
495
+ except (KeyError, TypeError) as e:
496
+ logger.error(
497
+ f"Invalid job response from FOLIO API. Expected 'parentJobExecutionId' key. "
498
+ f"Response: {job_response}"
499
+ )
500
+ raise ValueError(f"FOLIO API returned invalid job response: {job_response}") from e
501
+
502
+ if self.config.show_file_names_in_data_import_logs:
503
+ await self.set_job_file_name()
504
+ self.job_ids.append(self.job_id)
505
+ logger.info(f"Created job: {self.job_id}")
506
+
507
+ @cached_property
508
+ def import_profile(self) -> dict:
509
+ """
510
+ Returns the import profile for the current job execution.
511
+
512
+ Returns:
513
+ dict: The import profile for the current job execution.
514
+ """
515
+ import_profiles = self.folio_client.folio_get(
516
+ "/data-import-profiles/jobProfiles",
517
+ "jobProfiles",
518
+ query_params={"limit": "1000"},
519
+ )
520
+ profile = [
521
+ profile
522
+ for profile in import_profiles
523
+ if profile["name"] == self.config.import_profile_name
524
+ ][0]
525
+ return profile
526
+
527
+ async def set_job_profile(self) -> None:
528
+ """
529
+ Sets the job profile for the current job execution.
530
+
531
+ Returns:
532
+ The response from the HTTP request to set the job profile.
533
+ """
534
+ logger.info(
535
+ f"Setting job profile: {self.import_profile['name']} ({self.import_profile['id']})"
536
+ f" for job {self.job_id}"
537
+ )
538
+ set_job_profile = self.http_client.put(
539
+ "/change-manager/jobExecutions/" + self.job_id + "/jobProfile",
540
+ json={
541
+ "id": self.import_profile["id"],
542
+ "name": self.import_profile["name"],
543
+ "dataType": "MARC",
544
+ },
545
+ )
546
+ try:
547
+ set_job_profile.raise_for_status()
548
+ self.job_hrid = set_job_profile.json()["hrId"]
549
+ logger.info(f"Job HRID: {self.job_hrid}")
550
+ except httpx.HTTPError as e:
551
+ logger.error(
552
+ "Error creating job: "
553
+ + str(e)
554
+ + "\n"
555
+ + getattr(getattr(e, "response", ""), "text", "")
556
+ )
557
+ raise e
558
+
559
+ @staticmethod
560
+ async def _count_records(files: List[BinaryIO]) -> int:
561
+ """
562
+ Internal method to count total number of records from files.
563
+
564
+ Args:
565
+ files (list): List of files to read.
566
+
567
+ Returns:
568
+ int: The total number of records found in the files.
569
+ """
570
+ total_records = 0
571
+ for import_file in files:
572
+ while True:
573
+ chunk = import_file.read(104857600)
574
+ if not chunk:
575
+ break
576
+ total_records += chunk.count(b"\x1d")
577
+ import_file.seek(0)
578
+ return total_records
579
+
580
+ @staticmethod
581
+ async def read_total_records(files: List[BinaryIO]) -> int:
582
+ """
583
+ Count records from files with per-file logging.
584
+
585
+ Args:
586
+ files (list): List of files to read.
587
+
588
+ Returns:
589
+ int: The total number of records found in the files.
590
+ """
591
+ total_records = 0
592
+ for import_file in files:
593
+ file_name = os.path.basename(import_file.name)
594
+ logger.info(f"Counting records in {file_name}...")
595
+ file_record_count = await MARCImportJob._count_records([import_file])
596
+ total_records += file_record_count
597
+ logger.info(f"Counted {file_record_count} records in {file_name}")
598
+ return total_records
599
+
600
+ async def process_record_batch(self, batch_payload) -> None:
601
+ """
602
+ Processes a record batch.
603
+
604
+ Args:
605
+ batch_payload (dict): A records payload containing the current batch of MARC records.
606
+ """
607
+ try:
608
+ post_batch = self.http_client.post(
609
+ "/change-manager/jobExecutions/" + self.job_id + "/records",
610
+ json=batch_payload,
611
+ )
612
+ except (httpx.ConnectTimeout, httpx.ReadTimeout):
613
+ logger.warning(f"CONNECTION ERROR posting batch {batch_payload['id']}. Retrying...")
614
+ sleep(0.25)
615
+ return await self.process_record_batch(batch_payload)
616
+ try:
617
+ post_batch.raise_for_status()
618
+ self.total_records_sent += len(self.record_batch)
619
+ self.record_batch = []
620
+ self.reporter.update_task(self.task_sent, advance=len(batch_payload["initialRecords"]))
621
+ except httpx.HTTPStatusError as e:
622
+ if e.response.status_code in [
623
+ 500,
624
+ 400,
625
+ 422,
626
+ ]: # TODO: Update once we no longer have to support < Sunflower to just be 400
627
+ self.total_records_sent += len(self.record_batch)
628
+ self.record_batch = []
629
+ self.reporter.update_task(
630
+ self.task_sent, advance=len(batch_payload["initialRecords"])
631
+ )
632
+ else:
633
+ for record in self.record_batch:
634
+ self.failed_batches_file.write(record)
635
+ raise FolioDataImportBatchError(
636
+ batch_payload["id"], f"{e}\n{e.response.text}", e
637
+ ) from e
638
+ await self.get_job_status()
639
+ sleep(self.config.batch_delay)
640
+
641
+ async def process_records(self, files, total_records: int) -> None:
642
+ """
643
+ Process records from the given files.
644
+
645
+ Args:
646
+ files (list): List of files to process.
647
+ total_records (int): Total number of records to process.
648
+ pbar_sent: Progress bar for tracking the number of records sent.
649
+
650
+ Returns:
651
+ None
652
+ """
653
+ counter = 0
654
+ for import_file in files:
655
+ file_path = Path(import_file.name)
656
+ self.reporter.update_task(
657
+ self.task_sent,
658
+ description=f"Sent ({os.path.basename(import_file.name)})",
659
+ )
660
+ reader = pymarc.MARCReader(import_file, hide_utf8_warnings=True)
661
+ for idx, record in enumerate(reader, start=1):
662
+ if len(self.record_batch) == self.config.batch_size:
663
+ await self.process_record_batch(
664
+ await self.create_batch_payload(
665
+ counter,
666
+ total_records,
667
+ counter == total_records,
668
+ ),
669
+ )
670
+ sleep(0.25)
671
+ if record:
672
+ record = self.marc_record_preprocessor.do_work(record)
673
+ self.record_batch.append(record.as_marc())
674
+ counter += 1
675
+ else:
676
+ logger.data_issues(
677
+ "RECORD FAILED\t%s\t%s\t%s",
678
+ f"{file_path.name}:{idx}",
679
+ f"Error reading {idx} record from {file_path}. Skipping."
680
+ f" Writing current chunk to {self.bad_records_file.name}.",
681
+ "",
682
+ )
683
+ if reader.current_chunk:
684
+ self.bad_records_file.write(reader.current_chunk)
685
+ if not self.config.split_files:
686
+ self.move_file_to_complete(file_path)
687
+ if self.record_batch or not self.finished:
688
+ await self.process_record_batch(
689
+ await self.create_batch_payload(
690
+ counter,
691
+ total_records,
692
+ counter == total_records,
693
+ ),
694
+ )
695
+
696
+ def move_file_to_complete(self, file_path: Path) -> None:
697
+ import_complete_path = file_path.parent.joinpath("import_complete")
698
+ if not import_complete_path.exists():
699
+ logger.debug(f"Creating import_complete directory: {import_complete_path.absolute()}")
700
+ import_complete_path.mkdir(exist_ok=True)
701
+ logger.debug(f"Moving {file_path} to {import_complete_path.absolute()}")
702
+ file_path.rename(file_path.parent.joinpath("import_complete", file_path.name))
703
+
704
+ async def create_batch_payload(self, counter: int, total_records, is_last: bool) -> dict:
705
+ """
706
+ Create a batch payload for data import.
707
+
708
+ Args:
709
+ counter (int): The current counter value.
710
+ total_records (int): The total number of records.
711
+ is_last (bool): Indicates if this is the last batch.
712
+
713
+ Returns:
714
+ dict: The batch payload containing the ID, records metadata, and initial records.
715
+ """
716
+ return {
717
+ "id": str(uuid.uuid4()),
718
+ "recordsMetadata": {
719
+ "last": is_last,
720
+ "counter": counter,
721
+ "contentType": "MARC_RAW",
722
+ "total": total_records,
723
+ },
724
+ "initialRecords": [{"record": x.decode()} for x in self.record_batch],
725
+ }
726
+
727
+ @staticmethod
728
+ def split_marc_file(file_path: Path, batch_size: int) -> Generator[io.BytesIO, None, None]:
729
+ """Generator to iterate over MARC records in batches, yielding BytesIO objects."""
730
+ with open(file_path, "rb") as f:
731
+ batch = io.BytesIO()
732
+ count = 0
733
+
734
+ while True:
735
+ leader = f.read(24)
736
+ if not leader:
737
+ break # End of file
738
+
739
+ try:
740
+ record_length = int(leader[:5]) # Extract record length from leader
741
+ except ValueError as ve:
742
+ raise ValueError("Invalid MARC record length encountered.") from ve
743
+
744
+ record_body = f.read(record_length - 24)
745
+ if len(record_body) != record_length - 24:
746
+ raise ValueError("Unexpected end of file while reading MARC record.")
747
+
748
+ # Verify record terminator
749
+ if record_body[-1:] != b"\x1d":
750
+ raise ValueError(
751
+ "MARC record does not end with the expected terminator (0x1D)."
752
+ )
753
+
754
+ # Write the full record to the batch buffer
755
+ batch.write(leader + record_body)
756
+ count += 1
757
+
758
+ if count >= batch_size:
759
+ batch.seek(0)
760
+ yield batch
761
+ batch = io.BytesIO() # Reset buffer
762
+ count = 0
763
+
764
+ # Yield any remaining records
765
+ if count > 0:
766
+ batch.seek(0)
767
+ yield batch
768
+
769
+ async def import_marc_file(self) -> None:
770
+ """
771
+ Imports MARC file into the system.
772
+
773
+ This method performs the following steps:
774
+ 1. Creates a FOLIO import job.
775
+ 2. Retrieves the import profile.
776
+ 3. Sets the job profile.
777
+ 4. Opens the MARC file(s) and reads the total number of records.
778
+ 5. Displays progress bars for imported and sent records.
779
+ 6. Processes the records and updates the progress bars.
780
+ 7. Checks the job status periodically until the import is finished.
781
+
782
+ Note: This method assumes that the necessary instance attributes are already set.
783
+
784
+ Returns:
785
+ None
786
+ """
787
+ await self.create_folio_import_job()
788
+ await self.set_job_profile()
789
+ with ExitStack() as stack:
790
+ files: List[BinaryIO]
791
+ try:
792
+ if isinstance(self.current_file[0], Path):
793
+ path_list = cast(List[Path], self.current_file)
794
+ files = [stack.enter_context(open(file, "rb")) for file in path_list]
795
+ elif isinstance(self.current_file[0], io.BytesIO):
796
+ bytesio_list = cast(List[io.BytesIO], self.current_file)
797
+ files = [stack.enter_context(file) for file in bytesio_list]
798
+ else:
799
+ raise ValueError("Invalid file type. Must be Path or BytesIO.")
800
+ except IndexError as e:
801
+ logger.error(f"Error opening file: {e}")
802
+ raise e
803
+
804
+ total_records = await self._count_records(files)
805
+
806
+ with self.reporter:
807
+ try:
808
+ self.task_sent = self.reporter.start_task(
809
+ "sent", total=total_records, description="Sent"
810
+ )
811
+ self.task_imported = self.reporter.start_task(
812
+ f"imported_{self.job_hrid}",
813
+ total=total_records,
814
+ description=f"Imported ({self.job_hrid})",
815
+ )
816
+ await self.process_records(files, total_records)
817
+ while not self.finished:
818
+ await self.get_job_status()
819
+ except FolioDataImportBatchError as e:
820
+ logger.error(f"Unhandled error posting batch {e.batch_id}: {e.message}")
821
+ await self.cancel_job()
822
+ raise e
823
+ except FolioDataImportJobError as e:
824
+ await self.cancel_job()
825
+ if self._job_retries < self._max_job_retries:
826
+ self._job_retries += 1
827
+ logger.error(
828
+ f"Unhandled error processing job {e.job_id}: {e.message},"
829
+ f" cancelling and retrying."
830
+ )
831
+ await self.import_marc_file()
832
+ else:
833
+ logger.critical(
834
+ f"Unhandled error processing job {e.job_id}: {e.message},"
835
+ f" cancelling and exiting (maximum retries reached)."
836
+ )
837
+ raise e
838
+ if self.finished and not self.config.no_summary:
839
+ await asyncio.sleep(5)
840
+ await self.log_job_summary()
841
+ elif self.finished:
842
+ logger.info("Skipping final job summary.")
843
+ self.last_current = 0
844
+ self.finished = False
845
+
846
+ async def cancel_job(self) -> None:
847
+ """
848
+ Cancels the current job execution.
849
+
850
+ This method sends a request to cancel the job execution and logs the result.
851
+
852
+ Returns:
853
+ None
854
+ """
855
+ try:
856
+ cancel = self.http_client.delete(
857
+ f"/change-manager/jobExecutions/{self.job_id}/records",
858
+ )
859
+ cancel.raise_for_status()
860
+ self.finished = True
861
+ logger.info(f"Cancelled job: {self.job_id}")
862
+ except (httpx.ConnectTimeout, httpx.ReadTimeout):
863
+ logger.warning(f"CONNECTION ERROR cancelling job {self.job_id}. Retrying...")
864
+ sleep(0.25)
865
+ await self.cancel_job()
866
+
867
+ async def log_job_summary(self):
868
+ if job_summary := await self.get_job_summary():
869
+ job_id = job_summary.pop("jobExecutionId", None)
870
+ total_errors = job_summary.pop("totalErrors", 0)
871
+ columns = ["Summary"] + list(job_summary.keys())
872
+ rows = set()
873
+ for key in columns[1:]:
874
+ rows.update(job_summary[key].keys())
875
+
876
+ table_data = []
877
+ for row in rows:
878
+ metric_name = decamelize(row).split("_")[1]
879
+ table_row = [metric_name]
880
+ for col in columns[1:]:
881
+ table_row.append(job_summary[col].get(row, "N/A"))
882
+ table_data.append(table_row)
883
+ table_data.sort(key=lambda x: REPORT_SUMMARY_ORDERING.get(x[0], 99))
884
+ columns = columns[:1] + [" ".join(decamelize(x).split("_")[:-1]) for x in columns[1:]]
885
+ logger.info(
886
+ f"Results for {'file' if len(self.current_file) == 1 else 'files'}: "
887
+ f"{', '.join([os.path.basename(x.name) for x in self.current_file])}"
888
+ )
889
+ logger.info(
890
+ "\n" + tabulate.tabulate(table_data, headers=columns, tablefmt="fancy_grid"),
891
+ )
892
+ if total_errors:
893
+ logger.info(f"Total errors: {total_errors}. Job ID: {job_id}.")
894
+ else:
895
+ logger.error(f"No job summary available for job #{self.job_hrid}({self.job_id}).")
896
+
897
+ async def get_job_summary(self) -> dict:
898
+ """
899
+ Retrieves the job summary for the current job execution.
900
+
901
+ Returns:
902
+ dict: The job summary for the current job execution.
903
+ """
904
+ try:
905
+ self.current_retry_timeout = (
906
+ (self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR)
907
+ if self.current_retry_timeout
908
+ else RETRY_TIMEOUT_START
909
+ )
910
+ with self.folio_client.get_folio_http_client() as temp_client:
911
+ temp_client.timeout = self.current_retry_timeout
912
+ self.folio_client.httpx_client = temp_client
913
+ job_summary = self.folio_client.folio_get(
914
+ f"/metadata-provider/jobSummary/{self.job_id}"
915
+ )
916
+ self.current_retry_timeout = None
917
+ except (folioclient.FolioConnectionError, folioclient.FolioHTTPError) as e:
918
+ error_text = e.response.text if hasattr(e, "response") else str(e)
919
+ if hasattr(e, "response") and e.response.status_code not in [502, 504, 404]:
920
+ raise e
921
+
922
+ if (
923
+ self._max_summary_retries > self._summary_retries
924
+ ) and not self.config.let_summary_fail:
925
+ logger.warning(f"SERVER ERROR fetching job summary: {e}. Retrying.")
926
+ sleep(0.25)
927
+ with self.folio_client.get_folio_http_client() as temp_client:
928
+ temp_client.timeout = self.current_retry_timeout
929
+ self.folio_client.httpx_client = temp_client
930
+ self._summary_retries += 1
931
+ return await self.get_job_summary()
932
+ else:
933
+ logger.warning(
934
+ f"SERVER ERROR fetching job summary: {error_text}."
935
+ " Skipping final summary check."
936
+ )
937
+ job_summary = {}
938
+
939
+ return job_summary
940
+
941
+
942
+ def set_up_cli_logging() -> None:
943
+ """
944
+ This function sets up logging for the CLI.
945
+ """
946
+ logger.setLevel(logging.INFO)
947
+ logger.propagate = False
948
+
949
+ # Set up file and stream handlers
950
+ file_handler = logging.FileHandler(
951
+ "folio_data_import_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
952
+ )
953
+ file_handler.setLevel(logging.INFO)
954
+ file_handler.addFilter(ExcludeLevelFilter(DATA_ISSUE_LVL_NUM))
955
+ # file_handler.addFilter(IncludeLevelFilter(25))
956
+ file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
957
+ file_handler.setFormatter(file_formatter)
958
+ logger.addHandler(file_handler)
959
+
960
+ if not any(
961
+ isinstance(h, logging.StreamHandler) and h.stream == sys.stderr for h in logger.handlers
962
+ ):
963
+ stream_handler = RichHandler(
964
+ show_level=False,
965
+ show_time=False,
966
+ omit_repeated_times=False,
967
+ show_path=False,
968
+ )
969
+ stream_handler.setLevel(logging.INFO)
970
+ stream_handler.addFilter(ExcludeLevelFilter(DATA_ISSUE_LVL_NUM))
971
+ # stream_handler.addFilter(ExcludeLevelFilter(25))
972
+ stream_formatter = logging.Formatter("%(message)s")
973
+ stream_handler.setFormatter(stream_formatter)
974
+ logger.addHandler(stream_handler)
975
+
976
+ # Set up data issues logging
977
+ data_issues_handler = logging.FileHandler(
978
+ "marc_import_data_issues_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
979
+ )
980
+ data_issues_handler.setLevel(26)
981
+ data_issues_handler.addFilter(IncludeLevelFilter(DATA_ISSUE_LVL_NUM))
982
+ data_issues_formatter = logging.Formatter("%(message)s")
983
+ data_issues_handler.setFormatter(data_issues_formatter)
984
+ logger.addHandler(data_issues_handler)
985
+
986
+ # Stop httpx from logging info messages to the console
987
+ logging.getLogger("httpx").setLevel(logging.WARNING)
988
+
989
+
990
+ app = cyclopts.App(version=app_version)
991
+
992
+
993
+ @app.default
994
+ def main(
995
+ config_file: Annotated[
996
+ Path | None, cyclopts.Parameter(group="Job Configuration Parameters")
997
+ ] = None,
998
+ *,
999
+ gateway_url: Annotated[
1000
+ str | None,
1001
+ cyclopts.Parameter(
1002
+ env_var=["FOLIO_GATEWAY_URL"],
1003
+ show_env_var=True,
1004
+ group="FOLIO Connection Parameters",
1005
+ ),
1006
+ ] = None,
1007
+ tenant_id: Annotated[
1008
+ str | None,
1009
+ cyclopts.Parameter(
1010
+ env_var=["FOLIO_TENANT_ID"],
1011
+ show_env_var=True,
1012
+ group="FOLIO Connection Parameters",
1013
+ ),
1014
+ ] = None,
1015
+ username: Annotated[
1016
+ str | None,
1017
+ cyclopts.Parameter(
1018
+ env_var=["FOLIO_USERNAME"],
1019
+ show_env_var=True,
1020
+ group="FOLIO Connection Parameters",
1021
+ ),
1022
+ ] = None,
1023
+ password: Annotated[
1024
+ str | None,
1025
+ cyclopts.Parameter(
1026
+ env_var=["FOLIO_PASSWORD"],
1027
+ show_env_var=True,
1028
+ group="FOLIO Connection Parameters",
1029
+ ),
1030
+ ] = None,
1031
+ marc_file_paths: Annotated[
1032
+ List[Path] | None,
1033
+ cyclopts.Parameter(
1034
+ consume_multiple=True,
1035
+ name=["--marc-file-paths", "--marc-file-path"],
1036
+ help="Path(s) to MARC file(s). Accepts multiple values and glob patterns.",
1037
+ group="Job Configuration Parameters",
1038
+ ),
1039
+ ] = None,
1040
+ member_tenant_id: Annotated[
1041
+ str | None,
1042
+ cyclopts.Parameter(
1043
+ env_var="FOLIO_MEMBER_TENANT_ID",
1044
+ show_env_var=True,
1045
+ group="FOLIO Connection Parameters",
1046
+ ),
1047
+ ] = None,
1048
+ import_profile_name: Annotated[
1049
+ str | None, cyclopts.Parameter(group="Job Configuration Parameters")
1050
+ ] = None,
1051
+ batch_size: Annotated[int, cyclopts.Parameter(group="Job Configuration Parameters")] = 10,
1052
+ batch_delay: Annotated[float, cyclopts.Parameter(group="Job Configuration Parameters")] = 0.0,
1053
+ preprocessors: Annotated[
1054
+ str | None,
1055
+ cyclopts.Parameter(
1056
+ name=["--preprocessor", "--preprocessors"], group="Job Configuration Parameters"
1057
+ ),
1058
+ ] = None,
1059
+ preprocessors_config: Annotated[
1060
+ str | None,
1061
+ cyclopts.Parameter(
1062
+ name=["--preprocessor-config", "--preprocessors-config"],
1063
+ group="Job Configuration Parameters",
1064
+ ),
1065
+ ] = None,
1066
+ file_names_in_di_logs: Annotated[
1067
+ bool, cyclopts.Parameter(group="Job Configuration Parameters")
1068
+ ] = False,
1069
+ split_files: Annotated[bool, cyclopts.Parameter(group="Job Configuration Parameters")] = False,
1070
+ split_size: Annotated[int, cyclopts.Parameter(group="Job Configuration Parameters")] = 1000,
1071
+ split_offset: Annotated[int, cyclopts.Parameter(group="Job Configuration Parameters")] = 0,
1072
+ no_progress: Annotated[bool, cyclopts.Parameter(group="Job Configuration Parameters")] = False,
1073
+ no_summary: Annotated[bool, cyclopts.Parameter(group="Job Configuration Parameters")] = False,
1074
+ let_summary_fail: Annotated[
1075
+ bool, cyclopts.Parameter(group="Job Configuration Parameters")
1076
+ ] = False,
1077
+ job_ids_file_path: Annotated[
1078
+ str | None, cyclopts.Parameter(group="Job Configuration Parameters")
1079
+ ] = None,
1080
+ ) -> None:
1081
+ """
1082
+ Command-line interface to batch import MARC records into FOLIO using FOLIO Data Import
1083
+
1084
+ Parameters:
1085
+ config_file (Path | None): Path to JSON config file for the import job, overrides other parameters if provided.
1086
+ gateway_url (str): The FOLIO API Gateway URL.
1087
+ tenant_id (str): The tenant id.
1088
+ username (str): The FOLIO username.
1089
+ password (str): The FOLIO password.
1090
+ marc_file_paths (List[Path]): The MARC file(s) or glob pattern(s) to import.
1091
+ member_tenant_id (str): The FOLIO ECS member tenant id (if applicable).
1092
+ import_profile_name (str): The name of the import profile to use.
1093
+ batch_size (int): The number of records to send in each batch.
1094
+ batch_delay (float): The delay (in seconds) between sending each batch.
1095
+ preprocessors (str): Comma-separated list of MARC record preprocessors to use.
1096
+ preprocessors_config (str): Path to JSON config file for the preprocessors.
1097
+ file_names_in_di_logs (bool): Show file names in data import logs.
1098
+ split_files (bool): Split files into smaller batches.
1099
+ split_size (int): The number of records per split batch.
1100
+ split_offset (int): The number of split batches to skip before starting import.
1101
+ no_progress (bool): Disable progress bars.
1102
+ no_summary (bool): Skip the final job summary.
1103
+ let_summary_fail (bool): Let the final summary check fail without exiting.
1104
+ preprocessor_config (str): Path to JSON config file for the preprocessor.
1105
+ job_ids_file_path (str): Path to file to write job IDs to.
1106
+ """ # noqa: E501
1107
+ set_up_cli_logging()
1108
+ gateway_url, tenant_id, username, password = get_folio_connection_parameters(
1109
+ gateway_url, tenant_id, username, password
1110
+ )
1111
+ folio_client = folioclient.FolioClient(gateway_url, tenant_id, username, password)
1112
+
1113
+ if member_tenant_id:
1114
+ folio_client.tenant_id = member_tenant_id
1115
+
1116
+ # Handle file path expansion
1117
+ marc_files = collect_marc_file_paths(marc_file_paths)
1118
+
1119
+ marc_files.sort()
1120
+
1121
+ if len(marc_files) == 0:
1122
+ logger.critical(f"No files found matching {marc_file_paths}. Exiting.")
1123
+ sys.exit(1)
1124
+ else:
1125
+ logger.info(marc_files)
1126
+
1127
+ if preprocessors_config:
1128
+ with open(preprocessors_config, "r") as f:
1129
+ preprocessor_args = json.load(f)
1130
+ else:
1131
+ preprocessor_args = {}
1132
+
1133
+ if not import_profile_name:
1134
+ import_profile_name = select_import_profile(folio_client)
1135
+
1136
+ job = None
1137
+ try:
1138
+ if config_file:
1139
+ with open(config_file, "r") as f:
1140
+ config_data = json.load(f)
1141
+ config = MARCImportJob.Config(**config_data)
1142
+ else:
1143
+ config = MARCImportJob.Config(
1144
+ marc_files=marc_files,
1145
+ import_profile_name=import_profile_name,
1146
+ batch_size=batch_size,
1147
+ batch_delay=batch_delay,
1148
+ marc_record_preprocessors=preprocessors,
1149
+ preprocessors_args=preprocessor_args,
1150
+ no_progress=no_progress,
1151
+ no_summary=no_summary,
1152
+ let_summary_fail=let_summary_fail,
1153
+ split_files=split_files,
1154
+ split_size=split_size,
1155
+ split_offset=split_offset,
1156
+ job_ids_file_path=Path(job_ids_file_path) if job_ids_file_path else None,
1157
+ show_file_names_in_data_import_logs=file_names_in_di_logs,
1158
+ )
1159
+
1160
+ # Create progress reporter
1161
+ reporter = (
1162
+ NoOpProgressReporter()
1163
+ if no_progress
1164
+ else RichProgressReporter(show_speed=True, show_time=True)
1165
+ )
1166
+
1167
+ job = MARCImportJob(folio_client, config, reporter)
1168
+ asyncio.run(run_job(job))
1169
+ except Exception as e:
1170
+ logger.error("Could not initialize MARCImportJob: " + str(e))
1171
+ sys.exit(1)
1172
+
1173
+
1174
+ def select_import_profile(folio_client):
1175
+ try:
1176
+ import_profiles = folio_client.folio_get(
1177
+ "/data-import-profiles/jobProfiles",
1178
+ "jobProfiles",
1179
+ query_params={"limit": "1000"},
1180
+ )
1181
+ import_profile_names = [
1182
+ profile["name"] for profile in import_profiles if "marc" in profile["dataType"].lower()
1183
+ ]
1184
+ import_profile_name = questionary.select(
1185
+ "Select an import profile:",
1186
+ choices=import_profile_names,
1187
+ ).ask()
1188
+ except httpx.HTTPStatusError as e:
1189
+ logger.error(
1190
+ f"HTTP Error fetching import profiles: {e}"
1191
+ f"\n{getattr(getattr(e, 'response', ''), 'text', '')}\nExiting."
1192
+ )
1193
+ sys.exit(1)
1194
+ except KeyboardInterrupt:
1195
+ logger.info("Keyboard interrupt received. Exiting.")
1196
+ sys.exit(0)
1197
+ return import_profile_name
1198
+
1199
+
1200
+ def collect_marc_file_paths(marc_file_paths):
1201
+ marc_files: List[Path] = []
1202
+ if marc_file_paths:
1203
+ for file_path in marc_file_paths:
1204
+ # Check if the path contains glob patterns
1205
+ file_path_str = str(file_path)
1206
+ if any(char in file_path_str for char in ["*", "?", "["]):
1207
+ # It's a glob pattern - expand it
1208
+ expanded = glob.glob(file_path_str)
1209
+ marc_files.extend([Path(x) for x in expanded])
1210
+ else:
1211
+ # It's a regular path
1212
+ marc_files.append(file_path)
1213
+ return marc_files
1214
+
1215
+
1216
+ async def run_job(job: MARCImportJob):
1217
+ try:
1218
+ await job.do_work()
1219
+ except httpx.HTTPStatusError as e:
1220
+ logger.error(
1221
+ f"HTTP Error importing files: {e}"
1222
+ f"\n{getattr(getattr(e, 'response', ''), 'text', '')}\nExiting."
1223
+ )
1224
+ sys.exit(1)
1225
+ except Exception as e:
1226
+ logger.error("Error importing files: " + str(e))
1227
+ raise
1228
+ finally:
1229
+ if job:
1230
+ await job.wrap_up()
1231
+
1232
+
1233
+ class ExcludeLevelFilter(logging.Filter):
1234
+ def __init__(self, level) -> None:
1235
+ super().__init__()
1236
+ self.level = level
1237
+
1238
+ def filter(self, record):
1239
+ return record.levelno != self.level
1240
+
1241
+
1242
+ class IncludeLevelFilter(logging.Filter):
1243
+ def __init__(self, level) -> None:
1244
+ super().__init__()
1245
+ self.level = level
1246
+
1247
+ def filter(self, record):
1248
+ return record.levelno == self.level
1249
+
1250
+
1251
+ if __name__ == "__main__":
1252
+ app()