bankstatementparser 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,742 @@
1
+ # Copyright (C) 2023 Sebastien Rousseau.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12
+ # implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """
17
+ pain001_parser.py
18
+
19
+ Provides a class for parsing PAIN.001 format bank statement files.
20
+ """
21
+
22
+ import logging
23
+ import os
24
+ import re
25
+ from collections.abc import Generator
26
+ from io import BytesIO
27
+ from typing import Optional, Union, cast
28
+
29
+ import pandas as pd
30
+ from lxml import etree
31
+
32
+ from .base_parser import BankStatementParser
33
+ from .exceptions import Pain001ParseError
34
+ from .input_validator import InputValidator, ValidationError
35
+ from .record_types import PaymentRecord, SummaryRecord
36
+
37
+ # Configuring the logging
38
+ logger = logging.getLogger(__name__)
39
+
40
+ PAIN_NAMESPACE_PATTERN = re.compile(
41
+ r'xmlns="urn:iso:std:iso:20022:tech:xsd:pain\.\d{3}\.\d{3}\.\d{2}"'
42
+ )
43
+
44
+
45
+ class Pain001Parser(BankStatementParser):
46
+ """
47
+ Class to parse PAIN.001 format bank statement files.
48
+ """
49
+
50
+ def __init__(self, file_name: str) -> None:
51
+ """Initialize the parser with the file name.
52
+
53
+ Args:
54
+ file_name (str): Path to the PAIN.001 file.
55
+
56
+ Raises:
57
+ FileNotFoundError: If file does not exist.
58
+ ValidationError: If file validation fails.
59
+ """
60
+ super().__init__(file_name)
61
+ # Validate input file if it's a raw string path
62
+ if isinstance(file_name, str):
63
+ validator = InputValidator()
64
+ try:
65
+ validated_path = validator.validate_input_file_path(
66
+ file_name
67
+ )
68
+ file_name = str(validated_path)
69
+ logger.info(f"Input file validated: {file_name}")
70
+ except ValidationError as e:
71
+ logger.error(
72
+ f"File validation failed for {file_name}: {e}"
73
+ )
74
+ # Check if it's a file read error during validation and re-raise with expected message
75
+ if "Cannot read file for format validation" in str(e):
76
+ raise ValidationError(
77
+ f"Error reading file: {str(e).split(': ')[-1]}"
78
+ ) from e
79
+ raise
80
+ except FileNotFoundError as e:
81
+ logger.error(
82
+ f"File validation failed for {file_name}: {e}"
83
+ )
84
+ raise
85
+
86
+ self.file_name = file_name
87
+
88
+ try:
89
+ # Attempt to open and read the file content
90
+ with open(file_name, encoding="utf-8") as f:
91
+ data = f.read()
92
+ except FileNotFoundError as exc:
93
+ logger.error("File %s not found!", file_name)
94
+ raise FileNotFoundError(
95
+ f"PAIN.001 file not found: {file_name}"
96
+ ) from exc
97
+ except PermissionError as exc:
98
+ logger.error(
99
+ "Permission denied reading file: %s", file_name
100
+ )
101
+ raise ValidationError(
102
+ f"Permission denied reading file: {file_name}"
103
+ ) from exc
104
+ except OSError as e:
105
+ logger.error(
106
+ "An error occurred while reading the file: %s", str(e)
107
+ )
108
+ raise ValidationError(
109
+ f"Error reading file: {str(e)}"
110
+ ) from e
111
+
112
+ try:
113
+ # Remove the namespace from the XML data for easier parsing
114
+ data_bytes = self._normalize_xml_text(data).encode("utf-8")
115
+
116
+ # Parse the XML data with security settings
117
+ parser = etree.XMLParser(
118
+ recover=False,
119
+ encoding="utf-8",
120
+ resolve_entities=False,
121
+ load_dtd=False,
122
+ no_network=True,
123
+ )
124
+ self.tree = etree.fromstring(data_bytes, parser)
125
+ except ValueError as e:
126
+ logger.error("XML syntax error: %s", str(e))
127
+ # Check if it's a basic XML structure error and use appropriate message
128
+ error_msg = str(e)
129
+ if (
130
+ "Start tag expected" in error_msg
131
+ and "not found" in error_msg
132
+ ):
133
+ raise ValidationError(
134
+ f"Error parsing XML: {error_msg}"
135
+ ) from e
136
+ else:
137
+ raise ValidationError(
138
+ f"Invalid XML format: {error_msg}"
139
+ ) from e
140
+ except etree.LxmlError as e:
141
+ logger.error(
142
+ "An error occurred while parsing the XML: %s", str(e)
143
+ )
144
+ error_msg = str(e)
145
+ if (
146
+ "Start tag expected" in error_msg
147
+ and "not found" in error_msg
148
+ ):
149
+ raise ValidationError(
150
+ f"Error parsing XML: {error_msg}"
151
+ ) from e
152
+ else:
153
+ raise ValidationError(
154
+ f"Invalid XML format: {error_msg}"
155
+ ) from e
156
+
157
+ def _normalize_xml_text(self, data: str) -> str:
158
+ """Strip the default PAIN namespace for simpler XPath handling."""
159
+ return PAIN_NAMESPACE_PATTERN.sub("", data)
160
+
161
+ def parse(
162
+ self,
163
+ output_file: Optional[str] = None,
164
+ redact_pii: bool = False,
165
+ ) -> pd.DataFrame:
166
+ """
167
+ Parse the PAIN.001 XML file and return structured payment data.
168
+
169
+ Extracts group header, payment information, and individual credit
170
+ transfer transactions into a flat DataFrame.
171
+
172
+ Args:
173
+ output_file (str, optional): Path to save parsed data as CSV.
174
+ redact_pii (bool): Whether to redact PII fields.
175
+
176
+ Returns:
177
+ pd.DataFrame: Parsed payment data with columns for header,
178
+ payment, and transaction-level fields.
179
+
180
+ Raises:
181
+ ParseError: If parsing fails for any reason.
182
+ """
183
+ try:
184
+ # Get the root element
185
+ root = self.tree.getroottree().getroot()
186
+
187
+ # Check for required PAIN.001 structure
188
+ customer_credit_transfer = root.find(".//CstmrCdtTrfInitn")
189
+ if customer_credit_transfer is None:
190
+ raise ValueError(
191
+ "Invalid PAIN.001 structure: missing CstmrCdtTrfInitn element"
192
+ )
193
+
194
+ # Pre-extract header information once
195
+ group_header = root.find(".//CstmrCdtTrfInitn/GrpHdr")
196
+ header_fields: dict[str, Optional[str]] = {}
197
+ if group_header is not None:
198
+ # Batch extract all header fields in single iteration
199
+ for child in group_header:
200
+ if child.tag in ["MsgId", "CreDtTm", "NbOfTxs"]:
201
+ header_fields[child.tag] = child.text
202
+
203
+ # Extract initiating party
204
+ init_party_elem = group_header.find("InitgPty/Nm")
205
+ header_fields["InitgPty"] = (
206
+ init_party_elem.text
207
+ if init_party_elem is not None
208
+ else None
209
+ )
210
+
211
+ # Batch extract payment information records
212
+ payment_info_records = root.findall(
213
+ ".//CstmrCdtTrfInitn/PmtInf"
214
+ )
215
+ payments: list[dict[str, Optional[str]]] = []
216
+
217
+ for pmt in payment_info_records:
218
+ # Pre-extract all payment-level fields in single iteration
219
+ pmt_fields: dict[str, Optional[str]] = {}
220
+ for child in pmt:
221
+ if child.tag in [
222
+ "PmtInfId",
223
+ "PmtMtd",
224
+ "NbOfTxs",
225
+ "CtrlSum",
226
+ "ReqdExctnDt",
227
+ "ChrgBr",
228
+ ]:
229
+ pmt_fields[child.tag] = child.text
230
+ elif child.tag == "Dbtr":
231
+ # Extract debtor information
232
+ dbtr_name = child.find("Nm")
233
+ pmt_fields["DbtrNm"] = (
234
+ dbtr_name.text
235
+ if dbtr_name is not None
236
+ else None
237
+ )
238
+ # Extract debtor account
239
+ dbtr_acct = child.find("DbtrAcct/Id/IBAN")
240
+ pmt_fields["DbtrIBAN"] = (
241
+ dbtr_acct.text
242
+ if dbtr_acct is not None
243
+ else None
244
+ )
245
+ elif child.tag == "DbtrAgt":
246
+ # Extract debtor agent
247
+ dbtr_agt = child.find("FinInstnId/BIC")
248
+ pmt_fields["DbtrBIC"] = (
249
+ dbtr_agt.text
250
+ if dbtr_agt is not None
251
+ else None
252
+ )
253
+
254
+ # Batch process all transactions for this payment
255
+ transactions = pmt.findall("CdtTrfTxInf")
256
+ for tx in transactions:
257
+ payment: dict[str, Optional[str]] = (
258
+ pmt_fields.copy()
259
+ ) # Start with payment-level data
260
+
261
+ # Pre-extract all transaction fields in single iteration
262
+ for child in tx:
263
+ if child.tag == "PmtId":
264
+ end_to_end_elem = child.find("EndToEndId")
265
+ payment["EndToEndId"] = (
266
+ end_to_end_elem.text
267
+ if end_to_end_elem is not None
268
+ else None
269
+ )
270
+ elif child.tag == "Amt":
271
+ instd_amt_elem = child.find("InstdAmt")
272
+ if instd_amt_elem is not None:
273
+ payment["InstdAmt"] = (
274
+ instd_amt_elem.text
275
+ )
276
+ payment["Currency"] = (
277
+ instd_amt_elem.get("Ccy")
278
+ )
279
+ elif child.tag == "CdtrAgt":
280
+ cdtr_agt_elem = child.find("FinInstnId/BIC")
281
+ payment["CdtrBIC"] = (
282
+ cdtr_agt_elem.text
283
+ if cdtr_agt_elem is not None
284
+ else None
285
+ )
286
+ elif child.tag == "Cdtr":
287
+ cdtr_name_elem = child.find("Nm")
288
+ payment["CdtrNm"] = (
289
+ cdtr_name_elem.text
290
+ if cdtr_name_elem is not None
291
+ else None
292
+ )
293
+ elif child.tag == "RmtInf":
294
+ ustrd_elem = child.find("Ustrd")
295
+ payment["RmtInf"] = (
296
+ ustrd_elem.text
297
+ if ustrd_elem is not None
298
+ else None
299
+ )
300
+
301
+ # Add header fields to each payment record
302
+ payment.update(header_fields)
303
+ payments.append(payment)
304
+
305
+ # Create DataFrame from parsed data
306
+ df = pd.DataFrame.from_records(payments)
307
+
308
+ if output_file:
309
+ # Use atomic write operation with temp file
310
+ temp_file = f"{output_file}.tmp"
311
+ df.to_csv(temp_file, index=False)
312
+ os.replace(temp_file, output_file)
313
+ logger.info("Parsed data saved to %s", output_file)
314
+
315
+ return df
316
+ except (
317
+ OSError,
318
+ ValueError,
319
+ TypeError,
320
+ etree.LxmlError,
321
+ ) as e:
322
+ raise Pain001ParseError(
323
+ f"Error parsing PAIN.001 file: {e}"
324
+ ) from e
325
+
326
+ def parse_streaming(
327
+ self, redact_pii: bool = False
328
+ ) -> Generator[PaymentRecord, None, None]:
329
+ """
330
+ Parse the PAIN.001 file using streaming XML parsing for large files.
331
+ Yields payment data incrementally to keep memory usage low.
332
+
333
+ Parameters:
334
+ redact_pii (bool): Whether to redact PII data (address fields).
335
+
336
+ Yields:
337
+ Dict[str, Any]: Individual payment transaction data.
338
+ """
339
+ # Validate input file
340
+ if isinstance(self.file_name, str):
341
+ validator = InputValidator()
342
+ try:
343
+ validated_path = validator.validate_input_file_path(
344
+ self.file_name
345
+ )
346
+ file_path = str(validated_path)
347
+ logger.info(
348
+ f"Input file validated for streaming: {file_path}"
349
+ )
350
+ except (ValidationError, FileNotFoundError) as e:
351
+ logger.error(
352
+ f"File validation failed for streaming {self.file_name}: {e}"
353
+ )
354
+ raise
355
+ else:
356
+ file_path = self.file_name
357
+
358
+ # Stream-process: read in chunks, strip namespace, feed to
359
+ # iterparse via a temporary file so we never hold the full
360
+ # document in memory. For files that fit comfortably in RAM
361
+ # (< _STREAMING_MEMORY_THRESHOLD) we use BytesIO for speed.
362
+ _STREAMING_MEMORY_THRESHOLD = 50 * 1024 * 1024 # 50 MB
363
+
364
+ try:
365
+ file_size = os.path.getsize(file_path)
366
+ except FileNotFoundError as exc:
367
+ logger.error(
368
+ "File %s not found for streaming!",
369
+ file_path,
370
+ )
371
+ raise FileNotFoundError(
372
+ f"PAIN.001 file not found: {file_path}"
373
+ ) from exc
374
+ except OSError as exc:
375
+ logger.error(
376
+ "Cannot stat file for streaming: %s", str(exc)
377
+ )
378
+ raise ValidationError(
379
+ f"Error reading file {file_path}: {exc}"
380
+ ) from exc
381
+
382
+ temp_file: Optional[str] = None
383
+ try:
384
+ if file_size <= _STREAMING_MEMORY_THRESHOLD:
385
+ # Small file — fast path via BytesIO
386
+ try:
387
+ with open(
388
+ file_path, encoding="utf-8"
389
+ ) as f:
390
+ data = f.read()
391
+ except FileNotFoundError as exc:
392
+ logger.error(
393
+ "File %s not found for streaming!",
394
+ file_path,
395
+ )
396
+ raise FileNotFoundError(
397
+ f"PAIN.001 file not found: {file_path}"
398
+ ) from exc
399
+ except PermissionError as exc:
400
+ logger.error(
401
+ "Permission denied reading file "
402
+ "for streaming: %s",
403
+ file_path,
404
+ )
405
+ raise ValidationError(
406
+ f"Permission denied reading file: "
407
+ f"{file_path}"
408
+ ) from exc
409
+ except OSError as e:
410
+ logger.error(
411
+ "Error reading file for streaming: %s",
412
+ str(e),
413
+ )
414
+ raise ValidationError(
415
+ f"Error reading file {file_path}: "
416
+ f"{str(e)}"
417
+ ) from e
418
+
419
+ data_bytes = self._normalize_xml_text(
420
+ data
421
+ ).encode("utf-8")
422
+ source_stream: Union[BytesIO, str] = BytesIO(data_bytes)
423
+ else:
424
+ # Large file — chunk-based namespace stripping to a
425
+ # temp file so peak memory stays bounded.
426
+ import tempfile as _tf
427
+
428
+ fd, temp_file = _tf.mkstemp(
429
+ suffix=".xml", prefix="bsp_stream_"
430
+ )
431
+ try:
432
+ with open(
433
+ file_path, encoding="utf-8"
434
+ ) as src, os.fdopen(
435
+ fd, "w", encoding="utf-8"
436
+ ) as dst:
437
+ for chunk in iter(
438
+ lambda: src.read(8 * 1024 * 1024), ""
439
+ ):
440
+ dst.write(
441
+ PAIN_NAMESPACE_PATTERN.sub(
442
+ "", chunk
443
+ )
444
+ )
445
+ except FileNotFoundError as exc:
446
+ logger.error(
447
+ "File %s not found for streaming!",
448
+ file_path,
449
+ )
450
+ raise FileNotFoundError(
451
+ f"PAIN.001 file not found: {file_path}"
452
+ ) from exc
453
+ except PermissionError as exc:
454
+ logger.error(
455
+ "Permission denied reading file "
456
+ "for streaming: %s",
457
+ file_path,
458
+ )
459
+ raise ValidationError(
460
+ f"Permission denied reading file: "
461
+ f"{file_path}"
462
+ ) from exc
463
+ except OSError as e:
464
+ logger.error(
465
+ "Error reading file for streaming: %s",
466
+ str(e),
467
+ )
468
+ raise ValidationError(
469
+ f"Error reading file {file_path}: "
470
+ f"{str(e)}"
471
+ ) from e
472
+
473
+ source_stream = temp_file
474
+
475
+ # Track context for header and payment info
476
+ header_fields: dict[str, Optional[str]] = {}
477
+ current_payment_info: dict[str, Optional[str]] = {}
478
+
479
+ for event, elem in etree.iterparse(
480
+ source_stream,
481
+ events=("start", "end"),
482
+ resolve_entities=False,
483
+ load_dtd=False,
484
+ no_network=True,
485
+ huge_tree=False,
486
+ ):
487
+ if event == "end" and elem.tag == "GrpHdr":
488
+ for child in elem:
489
+ if child.tag in [
490
+ "MsgId",
491
+ "CreDtTm",
492
+ "NbOfTxs",
493
+ ]:
494
+ header_fields[child.tag] = child.text
495
+ elif child.tag == "InitgPty":
496
+ nm_elem = child.find("Nm")
497
+ header_fields["InitgPty"] = (
498
+ nm_elem.text
499
+ if nm_elem is not None
500
+ else None
501
+ )
502
+ elem.clear()
503
+
504
+ elif event == "start" and elem.tag == "PmtInf":
505
+ current_payment_info = {}
506
+
507
+ elif event == "end" and elem.tag in (
508
+ "PmtInfId",
509
+ "PmtMtd",
510
+ "NbOfTxs",
511
+ "CtrlSum",
512
+ "ReqdExctnDt",
513
+ "ChrgBr",
514
+ ):
515
+ parent = elem.getparent()
516
+ if (
517
+ parent is not None
518
+ and parent.tag == "PmtInf"
519
+ ):
520
+ current_payment_info[elem.tag] = (
521
+ elem.text
522
+ )
523
+
524
+ elif event == "end" and elem.tag == "Dbtr":
525
+ parent = elem.getparent()
526
+ if (
527
+ parent is not None
528
+ and parent.tag == "PmtInf"
529
+ ):
530
+ dbtr_name = elem.find("Nm")
531
+ current_payment_info["DbtrNm"] = (
532
+ dbtr_name.text
533
+ if dbtr_name is not None
534
+ else None
535
+ )
536
+
537
+ elif event == "end" and elem.tag == "DbtrAcct":
538
+ parent = elem.getparent()
539
+ if (
540
+ parent is not None
541
+ and parent.tag == "PmtInf"
542
+ ):
543
+ iban = elem.find("Id/IBAN")
544
+ current_payment_info["DbtrIBAN"] = (
545
+ iban.text
546
+ if iban is not None
547
+ else None
548
+ )
549
+
550
+ elif event == "end" and elem.tag == "DbtrAgt":
551
+ parent = elem.getparent()
552
+ if (
553
+ parent is not None
554
+ and parent.tag == "PmtInf"
555
+ ):
556
+ bic = elem.find("FinInstnId/BIC")
557
+ current_payment_info["DbtrBIC"] = (
558
+ bic.text
559
+ if bic is not None
560
+ else None
561
+ )
562
+
563
+ elif (
564
+ event == "end"
565
+ and elem.tag == "CdtTrfTxInf"
566
+ ):
567
+ try:
568
+ payment_data = (
569
+ self._parse_streaming_payment(
570
+ elem,
571
+ current_payment_info,
572
+ header_fields,
573
+ redact_pii,
574
+ )
575
+ )
576
+ yield payment_data
577
+ except Exception as e:
578
+ logger.error(
579
+ "Error parsing payment "
580
+ "transaction: %s",
581
+ e,
582
+ )
583
+ raise
584
+ finally:
585
+ elem.clear()
586
+ while elem.getprevious() is not None:
587
+ del elem.getparent()[0]
588
+ finally:
589
+ if temp_file is not None:
590
+ try:
591
+ os.unlink(temp_file)
592
+ except OSError:
593
+ pass
594
+
595
+ def _parse_streaming_payment(
596
+ self,
597
+ tx_elem: etree._Element,
598
+ payment_info: dict[str, Optional[str]],
599
+ header_fields: dict[str, Optional[str]],
600
+ redact_pii: bool = False,
601
+ ) -> PaymentRecord:
602
+ """
603
+ Parse a single credit transfer transaction element for streaming mode.
604
+
605
+ Parameters:
606
+ tx_elem (etree.Element): XML element representing a credit transfer transaction.
607
+ payment_info (Dict[str, Any]): Payment-level information.
608
+ header_fields (Dict[str, Any]): Header-level information.
609
+ redact_pii (bool): Whether to redact PII data (address fields).
610
+
611
+ Returns:
612
+ Dict[str, Any]: Parsed payment data.
613
+ """
614
+ # Start with payment-level data
615
+ payment: dict[str, Optional[str]] = payment_info.copy()
616
+
617
+ # Extract transaction-specific fields
618
+ for child in tx_elem:
619
+ if child.tag == "PmtId":
620
+ end_to_end_elem = child.find("EndToEndId")
621
+ payment["EndToEndId"] = (
622
+ end_to_end_elem.text
623
+ if end_to_end_elem is not None
624
+ else None
625
+ )
626
+ elif child.tag == "Amt":
627
+ instd_amt_elem = child.find("InstdAmt")
628
+ if instd_amt_elem is not None:
629
+ payment["InstdAmt"] = instd_amt_elem.text
630
+ payment["Currency"] = instd_amt_elem.get("Ccy")
631
+ elif child.tag == "CdtrAgt":
632
+ cdtr_agt_elem = child.find("FinInstnId/BIC")
633
+ payment["CdtrBIC"] = (
634
+ cdtr_agt_elem.text
635
+ if cdtr_agt_elem is not None
636
+ else None
637
+ )
638
+ elif child.tag == "Cdtr":
639
+ cdtr_name_elem = child.find("Nm")
640
+ payment["CdtrNm"] = (
641
+ cdtr_name_elem.text
642
+ if cdtr_name_elem is not None
643
+ else None
644
+ )
645
+ elif child.tag == "RmtInf":
646
+ ustrd_elem = child.find("Ustrd")
647
+ payment["RmtInf"] = (
648
+ ustrd_elem.text if ustrd_elem is not None else None
649
+ )
650
+
651
+ # Add header fields
652
+ payment.update(header_fields)
653
+
654
+ # Apply PII redaction if requested
655
+ if redact_pii:
656
+ pii_fields = ["DbtrNm", "CdtrNm", "DbtrIBAN", "InitgPty"]
657
+ for field in pii_fields:
658
+ if payment.get(field):
659
+ payment[field] = "***REDACTED***"
660
+
661
+ return cast(PaymentRecord, payment)
662
+
663
+ def get_summary(self, redact_pii: bool = False) -> SummaryRecord:
664
+ """
665
+ Get a summary of the parsed PAIN.001 statement data.
666
+
667
+ Returns:
668
+ Dict[str, Any]: Summary information including message details,
669
+ transaction counts, and total amounts.
670
+ """
671
+ try:
672
+ # Get the root element
673
+ root = self.tree.getroottree().getroot()
674
+
675
+ # Get the group header and batch extract all fields
676
+ group_header = root.find(".//CstmrCdtTrfInitn/GrpHdr")
677
+ header_data = {
678
+ "MsgId": "Unknown",
679
+ "CreDtTm": "Unknown",
680
+ "NbOfTxs": "0",
681
+ "InitgPty": "Unknown",
682
+ }
683
+
684
+ if group_header is not None:
685
+ # Batch extract all header fields in one iteration
686
+ for child in group_header:
687
+ if child.tag in ["MsgId", "CreDtTm", "NbOfTxs"]:
688
+ header_data[child.tag] = (
689
+ child.text if child.text else "Unknown"
690
+ )
691
+ elif child.tag == "InitgPty":
692
+ nm_elem = child.find("Nm")
693
+ header_data["InitgPty"] = (
694
+ nm_elem.text
695
+ if nm_elem is not None and nm_elem.text
696
+ else "Unknown"
697
+ )
698
+
699
+ # Batch extract all payment information and calculate totals
700
+ payment_info_records = root.findall(
701
+ ".//CstmrCdtTrfInitn/PmtInf"
702
+ )
703
+ total_amount = 0.0
704
+ currency = "Unknown"
705
+
706
+ for pmt in payment_info_records:
707
+ # Pre-extract all transactions for this payment in one call
708
+ transactions = pmt.findall("CdtTrfTxInf")
709
+ for tx in transactions:
710
+ # Find amount element directly rather than using nested XPath
711
+ amt_elem = None
712
+ for child in tx:
713
+ if child.tag == "Amt":
714
+ amt_elem = child.find("InstdAmt")
715
+ break
716
+
717
+ if amt_elem is not None and amt_elem.text:
718
+ total_amount += float(amt_elem.text)
719
+ if currency == "Unknown":
720
+ currency = amt_elem.get("Ccy", "Unknown")
721
+
722
+ return {
723
+ "account_id": header_data["InitgPty"],
724
+ "statement_date": header_data["CreDtTm"],
725
+ "transaction_count": int(header_data["NbOfTxs"])
726
+ if header_data["NbOfTxs"].isdigit()
727
+ else 0,
728
+ "total_amount": total_amount,
729
+ "currency": currency,
730
+ "message_id": header_data["MsgId"],
731
+ "initiating_party": header_data["InitgPty"],
732
+ }
733
+ except Exception as e:
734
+ # Return minimal summary if parsing fails
735
+ return {
736
+ "account_id": "Unknown",
737
+ "statement_date": "Unknown",
738
+ "transaction_count": 0,
739
+ "total_amount": 0.0,
740
+ "currency": "Unknown",
741
+ "error": str(e),
742
+ }