pyxecm 1.3.0__py3-none-any.whl → 1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/helper/xml.py CHANGED
@@ -3,6 +3,10 @@
3
3
  Class: XML
4
4
  Methods:
5
5
 
6
+ load_xml_file: Load an XML file into a Python list of dictionaries
7
+ load_xml_files_from_directory: Load all XML files from a directory that matches defined file names
8
+ then using the XPath to identify a set of elements and convert them
9
+ into a Python list of dictionaries.
6
10
  get_xml_element: Retrieve an XML Element from a string using an XPath expression
7
11
  modify_xml_element: Update the text (= content) of an XML element
8
12
  search_setting: Search a JSON-like setting inside an XML text telement
@@ -13,7 +17,7 @@ replace_in_xml_files: Replace all occurrences of the search pattern with the rep
13
17
  """
14
18
 
15
19
  __author__ = "Dr. Marc Diefenbruch"
16
- __copyright__ = "Copyright 2023, OpenText"
20
+ __copyright__ = "Copyright 2024, OpenText"
17
21
  __credits__ = ["Kai-Philip Gatzweiler"]
18
22
  __maintainer__ = "Dr. Marc Diefenbruch"
19
23
  __email__ = "mdiefenb@opentext.com"
@@ -21,9 +25,12 @@ __email__ = "mdiefenb@opentext.com"
21
25
  import logging
22
26
  import os
23
27
  import re
28
+ import fnmatch
24
29
 
25
30
  # we need lxml instead of stadard xml.etree to have xpath capabilities!
26
31
  from lxml import etree
32
+ import xmltodict
33
+ import zipfile
27
34
 
28
35
  # import xml.etree.ElementTree as etree
29
36
  from pyxecm.helper.assoc import Assoc
@@ -32,8 +39,129 @@ logger = logging.getLogger("pyxecm.xml")
32
39
 
33
40
 
34
41
  class XML:
35
- """XML Class to parse and update Extended ECM transport packages
36
- """
42
+ """XML Class to handle XML processing, e.g. to parse and update Extended ECM transport packages"""
43
+
44
+ @classmethod
45
+ def load_xml_file(
46
+ cls, file_path: str, xpath: str, dir_name: str | None = None
47
+ ) -> list | None:
48
+ """Load an XML file into a Python list of dictionaries
49
+
50
+ Args:
51
+ file_path (str): Path to XML file
52
+ xpath (str): XPath to select sub-elements
53
+
54
+ Returns:
55
+ dict | None: _description_
56
+ """
57
+
58
+ try:
59
+
60
+ tree = etree.parse(file_path)
61
+ if not tree:
62
+ return []
63
+
64
+ # Perform the XPath query to select 'child' elements
65
+ elements = tree.xpath(xpath) # Adjust XPath as needed
66
+
67
+ # Convert the selected elements to dictionaries
68
+ results = []
69
+ tag = xpath.split("/")[-1]
70
+ for element in elements:
71
+ element_dict = xmltodict.parse(etree.tostring(element))
72
+ if tag in element_dict:
73
+ element_dict = element_dict[tag]
74
+ if dir_name:
75
+ element_dict["directory"] = dir_name
76
+ results.append(element_dict)
77
+
78
+ except IOError as e:
79
+ logger.error("IO Error -> %s", str(e))
80
+ except etree.XMLSyntaxError as e:
81
+ logger.error("XML Syntax Error -> %s", str(e))
82
+ except etree.DocumentInvalid as e:
83
+ logger.error("Document Invalid -> %s", str(e))
84
+
85
+ return results
86
+
87
+ # end method definition
88
+
89
+ @classmethod
90
+ def load_xml_files_from_directory(
91
+ cls, path_to_root: str, filenames: list | None, xpath: str | None = None
92
+ ) -> list | None:
93
+ """Load all XML files from a directory that matches defined file names
94
+ then using the XPath to identify a set of elements and convert them
95
+ into a Python list of dictionaries.
96
+
97
+ Args:
98
+ path_to_root (str): Path to the root element of the
99
+ directory structure
100
+ filenames (list): list of filenames. If empty all filenames ending
101
+ with ".xml" are used.
102
+ xpath (str, optional): XPath to the elements we want to select
103
+
104
+ Returns:
105
+ list: List of dictionaries
106
+ """
107
+
108
+ try:
109
+
110
+ # Check if the provided path is a directory
111
+ if not os.path.isdir(path_to_root) and not path_to_root.endswith(".zip"):
112
+ logger.error(
113
+ "The provided path '%s' is not a valid directory or Zip file.",
114
+ path_to_root,
115
+ )
116
+ return False
117
+
118
+ if path_to_root.endswith(".zip"):
119
+ zip_file_folder = os.path.splitext(path_to_root)[0]
120
+ if not os.path.exists(zip_file_folder):
121
+ logger.info(
122
+ "Unzipping -> '%s' into folder -> '%s'...",
123
+ path_to_root,
124
+ zip_file_folder,
125
+ )
126
+ with zipfile.ZipFile(path_to_root, "r") as zfile:
127
+ zfile.extractall(zip_file_folder)
128
+ else:
129
+ logger.info(
130
+ "Zip file is already extracted (path -> '%s' exists). Reusing extracted data...",
131
+ zip_file_folder,
132
+ )
133
+ path_to_root = zip_file_folder
134
+
135
+ results = []
136
+
137
+ # Walk through the directory
138
+ for root, _, files in os.walk(path_to_root):
139
+ for file_data in files:
140
+ file_path = os.path.join(root, file_data)
141
+ file_size = os.path.getsize(file_path)
142
+ file_name = os.path.basename(file_path)
143
+ dir_name = os.path.dirname(file_path)
144
+
145
+ if any(
146
+ fnmatch.fnmatch(file_path, pattern) for pattern in filenames
147
+ ) and file_name.endswith(".xml"):
148
+ logger.info(
149
+ "Load XML file -> '%s' of size -> %s", file_path, file_size
150
+ )
151
+ results += cls.load_xml_file(
152
+ file_path, xpath=xpath, dir_name=dir_name
153
+ )
154
+
155
+ except NotADirectoryError as nde:
156
+ logger.error("Error -> %s", str(nde))
157
+ except FileNotFoundError as fnfe:
158
+ logger.error("Error -> %s", str(fnfe))
159
+ except PermissionError as pe:
160
+ logger.error("Error -> %s", str(pe))
161
+
162
+ return results
163
+
164
+ # end method definition
37
165
 
38
166
  @classmethod
39
167
  def get_xml_element(cls, xml_content: str, xpath: str):
@@ -55,6 +183,8 @@ class XML:
55
183
 
56
184
  return element
57
185
 
186
+ # end method definition
187
+
58
188
  @classmethod
59
189
  def modify_xml_element(cls, xml_content: str, xpath: str, new_value: str):
60
190
  """Update the text (= content) of an XML element
@@ -72,6 +202,8 @@ class XML:
72
202
  else:
73
203
  logger.warning("XML Element -> %s not found.", xpath)
74
204
 
205
+ # end method definition
206
+
75
207
  @classmethod
76
208
  def search_setting(
77
209
  cls,
@@ -122,6 +254,8 @@ class XML:
122
254
  else:
123
255
  return None
124
256
 
257
+ # end method definition
258
+
125
259
  @classmethod
126
260
  def replace_setting(
127
261
  cls,
@@ -170,6 +304,8 @@ class XML:
170
304
 
171
305
  return new_text
172
306
 
307
+ # end method definition
308
+
173
309
  @classmethod
174
310
  def replace_in_xml_files(
175
311
  cls,
@@ -206,18 +342,18 @@ class XML:
206
342
  found = False
207
343
 
208
344
  # Traverse the directory and its subdirectories
209
- for subdir, dirs, files in os.walk(directory):
210
- for file in files:
345
+ for subdir, _, files in os.walk(directory):
346
+ for filename in files:
211
347
  # Check if the file is an XML file
212
- if file.endswith(".xml"):
348
+ if filename.endswith(".xml"):
213
349
  # Read the contents of the file
214
- file_path = os.path.join(subdir, file)
350
+ file_path = os.path.join(subdir, filename)
215
351
 
216
352
  # if xpath is given we do an intelligent replacement
217
353
  if xpath:
218
354
  xml_modified = False
219
- logger.info("Replacement with xpath...")
220
- logger.info(
355
+ logger.debug("Replacement with xpath...")
356
+ logger.debug(
221
357
  "XML path -> %s, setting -> %s, assoc element -> %s",
222
358
  xpath,
223
359
  setting,
@@ -225,17 +361,15 @@ class XML:
225
361
  )
226
362
  tree = etree.parse(file_path)
227
363
  if not tree:
228
- logger.erro(
229
- "Cannot parse XML tree -> {}. Skipping...".format(
230
- file_path
231
- )
364
+ logger.error(
365
+ "Cannot parse XML tree -> %s. Skipping...", file_path
232
366
  )
233
367
  continue
234
368
  root = tree.getroot()
235
- # find the matching XML element using the given XPath:
369
+ # find the matching XML elements using the given XPath:
236
370
  elements = root.xpath(xpath)
237
371
  if not elements:
238
- logger.info(
372
+ logger.debug(
239
373
  "The XML file -> %s does not have any element with the given XML path -> %s. Skipping...",
240
374
  file_path,
241
375
  xpath,
@@ -243,13 +377,15 @@ class XML:
243
377
  continue
244
378
  for element in elements:
245
379
  # as XPath returns a list
246
- # element = elements[0]
247
- logger.info(
248
- "Found XML element -> %s in -> %s", element.tag, xpath
380
+ logger.debug(
381
+ "Found XML element -> %s in file -> %s using xpath -> %s",
382
+ element.tag,
383
+ filename,
384
+ xpath,
249
385
  )
250
386
  # the simple case: replace the complete text of the XML element
251
387
  if not setting and not assoc_elem:
252
- logger.info(
388
+ logger.debug(
253
389
  "Replace complete text of XML element -> %s from -> %s to -> %s",
254
390
  xpath,
255
391
  element.text,
@@ -259,7 +395,7 @@ class XML:
259
395
  xml_modified = True
260
396
  # In this case we want to set a complete value of a setting (basically replacing a whole line)
261
397
  elif setting and not assoc_elem:
262
- logger.info(
398
+ logger.debug(
263
399
  "Replace single setting -> %s in XML element -> %s with new value -> %s",
264
400
  setting,
265
401
  xpath,
@@ -269,7 +405,7 @@ class XML:
269
405
  element.text, setting, is_simple=True
270
406
  )
271
407
  if setting_value:
272
- logger.info(
408
+ logger.debug(
273
409
  "Found existing setting value -> %s",
274
410
  setting_value,
275
411
  )
@@ -288,7 +424,7 @@ class XML:
288
424
  replace_setting = (
289
425
  '"' + setting + '":"' + replace_string + '"'
290
426
  )
291
- logger.info(
427
+ logger.debug(
292
428
  "Replacement setting -> %s", replace_setting
293
429
  )
294
430
  element.text = cls.replace_setting(
@@ -306,7 +442,7 @@ class XML:
306
442
  continue
307
443
  # in this case the text is just one assoc (no setting substructure)
308
444
  elif not setting and assoc_elem:
309
- logger.info(
445
+ logger.debug(
310
446
  "Replace single Assoc value -> %s in XML element -> %s with -> %s",
311
447
  assoc_elem,
312
448
  xpath,
@@ -320,13 +456,13 @@ class XML:
320
456
  assoc_string=assoc_string
321
457
  )
322
458
  logger.debug("Assoc Dict -> %s", str(assoc_dict))
323
- assoc_dict[
324
- assoc_elem
325
- ] = replace_string # escaped_replace_string
459
+ assoc_dict[assoc_elem] = (
460
+ replace_string # escaped_replace_string
461
+ )
326
462
  assoc_string_new: str = Assoc.dict_to_string(
327
463
  assoc_dict=assoc_dict
328
464
  )
329
- logger.info(
465
+ logger.debug(
330
466
  "Replace assoc with -> %s", assoc_string_new
331
467
  )
332
468
  element.text = assoc_string_new
@@ -334,7 +470,7 @@ class XML:
334
470
  xml_modified = True
335
471
  # In this case we have multiple settings with their own assocs
336
472
  elif setting and assoc_elem:
337
- logger.info(
473
+ logger.debug(
338
474
  "Replace single Assoc value -> %s in setting -> %s in XML element -> %s with -> %s",
339
475
  assoc_elem,
340
476
  setting,
@@ -345,7 +481,7 @@ class XML:
345
481
  element.text, setting, is_simple=False
346
482
  )
347
483
  if setting_value:
348
- logger.info(
484
+ logger.debug(
349
485
  "Found setting value -> %s", setting_value
350
486
  )
351
487
  assoc_string: str = Assoc.extract_assoc_string(
@@ -359,13 +495,13 @@ class XML:
359
495
  escaped_replace_string = replace_string.replace(
360
496
  "'", "\\\\\u0027"
361
497
  )
362
- logger.info(
498
+ logger.debug(
363
499
  "Escaped replacement string -> %s",
364
500
  escaped_replace_string,
365
501
  )
366
- assoc_dict[
367
- assoc_elem
368
- ] = escaped_replace_string # escaped_replace_string
502
+ assoc_dict[assoc_elem] = (
503
+ escaped_replace_string # escaped_replace_string
504
+ )
369
505
  assoc_string_new: str = Assoc.dict_to_string(
370
506
  assoc_dict=assoc_dict
371
507
  )
@@ -376,7 +512,7 @@ class XML:
376
512
  replace_setting = (
377
513
  '"' + setting + '":"' + assoc_string_new + '"'
378
514
  )
379
- logger.info(
515
+ logger.debug(
380
516
  "Replacement setting -> %s", replace_setting
381
517
  )
382
518
  # here we need to apply a "trick". It is required
@@ -405,7 +541,7 @@ class XML:
405
541
  )
406
542
  continue
407
543
  if xml_modified:
408
- logger.info(
544
+ logger.debug(
409
545
  "XML tree has been modified. Write updated file -> %s...",
410
546
  file_path,
411
547
  )
@@ -463,24 +599,92 @@ class XML:
463
599
  found = True
464
600
  # this is not using xpath - do a simple search and replace
465
601
  else:
466
- logger.info("Replacement without xpath...")
467
- with open(file_path, "r") as f:
602
+ logger.debug("Replacement without xpath...")
603
+ with open(file_path, "r", encoding="UTF-8") as f:
468
604
  contents = f.read()
469
605
  # Replace all occurrences of the search pattern with the replace string
470
606
  new_contents = pattern.sub(replace_string, contents)
471
607
 
472
608
  # Write the updated contents to the file if there were replacements
473
609
  if contents != new_contents:
474
- logger.info(
610
+ logger.debug(
475
611
  "Found search string -> %s in XML file -> %s. Write updated file...",
476
612
  search_pattern,
477
613
  file_path,
478
614
  )
479
615
  # Write the updated contents to the file
480
- with open(file_path, "w") as f:
616
+ with open(file_path, "w", encoding="UTF-8") as f:
481
617
  f.write(new_contents)
482
618
  found = True
483
619
 
484
620
  return found
485
621
 
486
- # end method definition
622
+ # end method definition
623
+
624
+ @classmethod
625
+ def extract_from_xml_files(
626
+ cls,
627
+ directory: str,
628
+ xpath: str,
629
+ ) -> list | None:
630
+ """Extracts the XML subtrees using an XPath in all XML files
631
+ in the directory and its subdirectories.
632
+
633
+ Args:
634
+ directory (str): directory to traverse for XML files
635
+ xpath (str): used to determine XML elements to extract
636
+ Returns:
637
+ list | None: Extracted data if it is found by the XPath, None otherwise
638
+ """
639
+
640
+ extracted_data_list = []
641
+
642
+ # Traverse the directory and its subdirectories
643
+ for subdir, _, files in os.walk(directory):
644
+ for filename in files:
645
+ # Check if the file is an XML file
646
+ if filename.endswith(".xml"):
647
+ # Read the contents of the file
648
+ file_path = os.path.join(subdir, filename)
649
+
650
+ logger.debug("Extraction with xpath -> %s...", xpath)
651
+ tree = etree.parse(file_path)
652
+ if not tree:
653
+ logger.error(
654
+ "Cannot parse XML file -> '%s'. Skipping...", file_path
655
+ )
656
+ continue
657
+ root = tree.getroot()
658
+ # find the matching XML elements using the given XPath:
659
+ elements = root.xpath(xpath)
660
+ if not elements:
661
+ logger.debug(
662
+ "The XML file -> %s does not have any element with the given XML path -> %s. Skipping...",
663
+ file_path,
664
+ xpath,
665
+ )
666
+ continue
667
+ for element in elements:
668
+ # as XPath returns a list
669
+ logger.debug(
670
+ "Found XML element -> %s in file -> %s using xpath -> %s. Add it to result list.",
671
+ element.tag,
672
+ filename,
673
+ xpath,
674
+ )
675
+ extracted_content = etree.tostring(element)
676
+
677
+ try:
678
+ dict_content = xmltodict.parse(extracted_content)
679
+ except xmltodict.expat.ExpatError:
680
+ logger.error(
681
+ "Invalid XML syntax in file -> %s. Please check the XML file for errors.",
682
+ filename,
683
+ )
684
+ continue
685
+
686
+ extracted_data_list.append(dict_content)
687
+
688
+ return extracted_data_list
689
+
690
+ # end method definition