pyxecm 1.6__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyxecm might be problematic. Click here for more details.
- pyxecm/__init__.py +7 -4
- pyxecm/avts.py +727 -254
- pyxecm/coreshare.py +686 -467
- pyxecm/customizer/__init__.py +16 -4
- pyxecm/customizer/__main__.py +58 -0
- pyxecm/customizer/api/__init__.py +5 -0
- pyxecm/customizer/api/__main__.py +6 -0
- pyxecm/customizer/api/app.py +163 -0
- pyxecm/customizer/api/auth/__init__.py +1 -0
- pyxecm/customizer/api/auth/functions.py +92 -0
- pyxecm/customizer/api/auth/models.py +13 -0
- pyxecm/customizer/api/auth/router.py +78 -0
- pyxecm/customizer/api/common/__init__.py +1 -0
- pyxecm/customizer/api/common/functions.py +47 -0
- pyxecm/customizer/api/common/metrics.py +92 -0
- pyxecm/customizer/api/common/models.py +21 -0
- pyxecm/customizer/api/common/payload_list.py +870 -0
- pyxecm/customizer/api/common/router.py +72 -0
- pyxecm/customizer/api/settings.py +128 -0
- pyxecm/customizer/api/terminal/__init__.py +1 -0
- pyxecm/customizer/api/terminal/router.py +87 -0
- pyxecm/customizer/api/v1_csai/__init__.py +1 -0
- pyxecm/customizer/api/v1_csai/router.py +87 -0
- pyxecm/customizer/api/v1_maintenance/__init__.py +1 -0
- pyxecm/customizer/api/v1_maintenance/functions.py +100 -0
- pyxecm/customizer/api/v1_maintenance/models.py +12 -0
- pyxecm/customizer/api/v1_maintenance/router.py +76 -0
- pyxecm/customizer/api/v1_otcs/__init__.py +1 -0
- pyxecm/customizer/api/v1_otcs/functions.py +61 -0
- pyxecm/customizer/api/v1_otcs/router.py +179 -0
- pyxecm/customizer/api/v1_payload/__init__.py +1 -0
- pyxecm/customizer/api/v1_payload/functions.py +179 -0
- pyxecm/customizer/api/v1_payload/models.py +51 -0
- pyxecm/customizer/api/v1_payload/router.py +499 -0
- pyxecm/customizer/browser_automation.py +721 -286
- pyxecm/customizer/customizer.py +1076 -1425
- pyxecm/customizer/exceptions.py +35 -0
- pyxecm/customizer/guidewire.py +1186 -0
- pyxecm/customizer/k8s.py +901 -379
- pyxecm/customizer/log.py +107 -0
- pyxecm/customizer/m365.py +2967 -920
- pyxecm/customizer/nhc.py +1169 -0
- pyxecm/customizer/openapi.py +258 -0
- pyxecm/customizer/payload.py +18228 -7820
- pyxecm/customizer/pht.py +717 -286
- pyxecm/customizer/salesforce.py +516 -342
- pyxecm/customizer/sap.py +58 -41
- pyxecm/customizer/servicenow.py +611 -372
- pyxecm/customizer/settings.py +445 -0
- pyxecm/customizer/successfactors.py +408 -346
- pyxecm/customizer/translate.py +83 -48
- pyxecm/helper/__init__.py +5 -2
- pyxecm/helper/assoc.py +83 -43
- pyxecm/helper/data.py +2406 -870
- pyxecm/helper/logadapter.py +27 -0
- pyxecm/helper/web.py +229 -101
- pyxecm/helper/xml.py +596 -171
- pyxecm/maintenance_page/__init__.py +5 -0
- pyxecm/maintenance_page/__main__.py +6 -0
- pyxecm/maintenance_page/app.py +51 -0
- pyxecm/maintenance_page/settings.py +28 -0
- pyxecm/maintenance_page/static/favicon.avif +0 -0
- pyxecm/maintenance_page/templates/maintenance.html +165 -0
- pyxecm/otac.py +235 -141
- pyxecm/otawp.py +2668 -1220
- pyxecm/otca.py +569 -0
- pyxecm/otcs.py +7956 -3237
- pyxecm/otds.py +2178 -925
- pyxecm/otiv.py +36 -21
- pyxecm/otmm.py +1272 -325
- pyxecm/otpd.py +231 -127
- pyxecm-2.0.1.dist-info/METADATA +122 -0
- pyxecm-2.0.1.dist-info/RECORD +76 -0
- {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/WHEEL +1 -1
- pyxecm-1.6.dist-info/METADATA +0 -53
- pyxecm-1.6.dist-info/RECORD +0 -32
- {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info/licenses}/LICENSE +0 -0
- {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/top_level.txt +0 -0
pyxecm/helper/xml.py
CHANGED
|
@@ -1,68 +1,151 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
Class: XML
|
|
4
|
-
Methods:
|
|
5
|
-
|
|
6
|
-
load_xml_file: Load an XML file into a Python list of dictionaries
|
|
7
|
-
load_xml_files_from_directory: Load all XML files from a directory that matches defined file names
|
|
8
|
-
then using the XPath to identify a set of elements and convert them
|
|
9
|
-
into a Python list of dictionaries.
|
|
10
|
-
get_xml_element: Retrieve an XML Element from a string using an XPath expression
|
|
11
|
-
modify_xml_element: Update the text (= content) of an XML element
|
|
12
|
-
search_setting: Search a JSON-like setting inside an XML text telement
|
|
13
|
-
replace_setting: Update a setting value
|
|
14
|
-
replace_in_xml_files: Replace all occurrences of the search pattern with the replace string in all
|
|
15
|
-
XML files in the directory and its subdirectories.
|
|
16
|
-
|
|
17
|
-
"""
|
|
1
|
+
"""XML helper module."""
|
|
18
2
|
|
|
19
3
|
__author__ = "Dr. Marc Diefenbruch"
|
|
20
|
-
__copyright__ = "Copyright 2024, OpenText"
|
|
4
|
+
__copyright__ = "Copyright (C) 2024-2025, OpenText"
|
|
21
5
|
__credits__ = ["Kai-Philip Gatzweiler"]
|
|
22
6
|
__maintainer__ = "Dr. Marc Diefenbruch"
|
|
23
7
|
__email__ = "mdiefenb@opentext.com"
|
|
24
8
|
|
|
9
|
+
import fnmatch
|
|
10
|
+
import glob
|
|
25
11
|
import logging
|
|
26
12
|
import os
|
|
27
13
|
import re
|
|
28
|
-
import fnmatch
|
|
29
14
|
import zipfile
|
|
15
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
+
from queue import Queue
|
|
17
|
+
|
|
18
|
+
import xmltodict
|
|
30
19
|
|
|
31
20
|
# we need lxml instead of stadard xml.etree to have xpath capabilities!
|
|
32
21
|
from lxml import etree
|
|
33
|
-
import
|
|
22
|
+
from lxml.etree import Element
|
|
34
23
|
|
|
35
|
-
|
|
36
|
-
from pyxecm.helper.assoc import Assoc
|
|
24
|
+
from pyxecm.helper import Assoc
|
|
37
25
|
|
|
38
|
-
|
|
26
|
+
default_logger = logging.getLogger("pyxecm.helper.xml")
|
|
39
27
|
|
|
40
28
|
|
|
41
29
|
class XML:
|
|
42
|
-
"""
|
|
30
|
+
"""Handle XML processing, e.g. to parse and update Extended ECM transport packages."""
|
|
31
|
+
|
|
32
|
+
logger: logging.Logger = default_logger
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def remove_xml_namespace(cls, tag: str) -> str:
|
|
36
|
+
"""Remove namespace from XML tag.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
tag (str):
|
|
40
|
+
The XML tag with namespace.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str:
|
|
44
|
+
The tag without namespace.
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
# In Python's ElementTree, the tag namespace
|
|
49
|
+
# is put into curly braces like "{namespace}element"
|
|
50
|
+
# that's why this method splits after the closing curly brace
|
|
51
|
+
# and takes the last item (-1):
|
|
52
|
+
|
|
53
|
+
return tag.split("}", 1)[-1]
|
|
54
|
+
|
|
55
|
+
# end method definition
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def xml_to_dict(cls, xml_string: str) -> dict:
|
|
59
|
+
"""Parse XML string and return a dictionary without namespaces.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
xml_string (str):
|
|
63
|
+
The XML string to process.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
dict:
|
|
67
|
+
The XML structure converted to a dictionary.
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def xml_element_to_dict(element: Element) -> dict:
|
|
72
|
+
"""Convert XML element to dictionary.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
element (Element):
|
|
76
|
+
The XML element.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
dict:
|
|
80
|
+
Dictionary representing the XML element
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
tag = cls.remove_xml_namespace(element.tag)
|
|
84
|
+
children = list(element)
|
|
85
|
+
if children:
|
|
86
|
+
return {
|
|
87
|
+
tag: {
|
|
88
|
+
cls.remove_xml_namespace(child.tag): xml_element_to_dict(child)[
|
|
89
|
+
cls.remove_xml_namespace(child.tag)
|
|
90
|
+
]
|
|
91
|
+
for child in children
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return {tag: element.text.strip() if element.text else None}
|
|
96
|
+
|
|
97
|
+
root = etree.fromstring(xml_string)
|
|
98
|
+
|
|
99
|
+
return xml_element_to_dict(root)
|
|
100
|
+
|
|
101
|
+
# end method definition
|
|
43
102
|
|
|
44
103
|
@classmethod
|
|
45
104
|
def load_xml_file(
|
|
46
|
-
cls,
|
|
105
|
+
cls,
|
|
106
|
+
file_path: str,
|
|
107
|
+
xpath: str,
|
|
108
|
+
dir_name: str | None = None,
|
|
109
|
+
logger: logging.Logger = default_logger,
|
|
47
110
|
) -> list | None:
|
|
48
|
-
"""Load an XML file into a Python list of dictionaries
|
|
111
|
+
"""Load an XML file into a Python list of dictionaries.
|
|
49
112
|
|
|
50
113
|
Args:
|
|
51
|
-
file_path (str):
|
|
52
|
-
|
|
114
|
+
file_path (str):
|
|
115
|
+
The path to XML file.
|
|
116
|
+
xpath (str):
|
|
117
|
+
XPath to select sub-elements.
|
|
118
|
+
dir_name (str | None, optional):
|
|
119
|
+
Directory name to include in each dictionary, if provided.
|
|
120
|
+
logger (logging.Logger):
|
|
121
|
+
The logging object used for all log messages.
|
|
53
122
|
|
|
54
123
|
Returns:
|
|
55
|
-
dict | None:
|
|
124
|
+
dict | None:
|
|
125
|
+
A list of dictionaries representing the parsed XML elements,
|
|
126
|
+
or None if an error occurs during file reading or parsing.
|
|
127
|
+
|
|
56
128
|
"""
|
|
57
129
|
|
|
58
|
-
|
|
130
|
+
if not os.path.exists(file_path):
|
|
131
|
+
logger.error("XML File -> %s does not exist!", file_path)
|
|
132
|
+
return None
|
|
59
133
|
|
|
134
|
+
try:
|
|
60
135
|
tree = etree.parse(file_path)
|
|
61
136
|
if not tree:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
elements
|
|
137
|
+
logger.warning("Empty or invalid XML tree for file -> %s", file_path)
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
# Extract elements using the XPath:
|
|
141
|
+
elements = tree.xpath(xpath)
|
|
142
|
+
if not elements:
|
|
143
|
+
logger.warning(
|
|
144
|
+
"No elements matched XPath -> %s in file -> '%s'",
|
|
145
|
+
xpath,
|
|
146
|
+
file_path,
|
|
147
|
+
)
|
|
148
|
+
return None
|
|
66
149
|
|
|
67
150
|
# Convert the selected elements to dictionaries
|
|
68
151
|
results = []
|
|
@@ -75,12 +158,15 @@ class XML:
|
|
|
75
158
|
element_dict["directory"] = dir_name
|
|
76
159
|
results.append(element_dict)
|
|
77
160
|
|
|
78
|
-
except
|
|
79
|
-
logger.error("IO Error -> %s",
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
161
|
+
except OSError:
|
|
162
|
+
logger.error("IO Error with file -> %s", file_path)
|
|
163
|
+
return None
|
|
164
|
+
except etree.XMLSyntaxError:
|
|
165
|
+
logger.error("XML Syntax Error in file -> %s", file_path)
|
|
166
|
+
return None
|
|
167
|
+
except etree.DocumentInvalid:
|
|
168
|
+
logger.error("Invalid XML document -> %s", file_path)
|
|
169
|
+
return None
|
|
84
170
|
|
|
85
171
|
return results
|
|
86
172
|
|
|
@@ -88,33 +174,50 @@ class XML:
|
|
|
88
174
|
|
|
89
175
|
@classmethod
|
|
90
176
|
def load_xml_files_from_directory(
|
|
91
|
-
cls,
|
|
177
|
+
cls,
|
|
178
|
+
path_to_root: str,
|
|
179
|
+
filenames: list | None,
|
|
180
|
+
xpath: str | None = None,
|
|
181
|
+
logger: logging.Logger = default_logger,
|
|
92
182
|
) -> list | None:
|
|
93
|
-
"""Load all XML files from a directory that matches defined file names
|
|
94
|
-
|
|
95
|
-
|
|
183
|
+
"""Load all XML files from a directory that matches defined file names.
|
|
184
|
+
|
|
185
|
+
Then using the XPath to identify a set of elements and convert them
|
|
186
|
+
into a Python list of dictionaries.
|
|
96
187
|
|
|
97
188
|
Args:
|
|
98
|
-
path_to_root (str):
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
189
|
+
path_to_root (str):
|
|
190
|
+
Path to the root element of the
|
|
191
|
+
directory structure
|
|
192
|
+
filenames (list):
|
|
193
|
+
A list of filenames. This can also be patterns like
|
|
194
|
+
"*/en/docovw.xml". If empty all filenames ending
|
|
195
|
+
with ".xml" is used.
|
|
196
|
+
xpath (str, optional):
|
|
197
|
+
The XPath to the elements we want to select.
|
|
198
|
+
logger (logging.Logger):
|
|
199
|
+
The logging object used for all log messages.
|
|
103
200
|
|
|
104
201
|
Returns:
|
|
105
|
-
list:
|
|
202
|
+
list:
|
|
203
|
+
List of dictionaries.
|
|
204
|
+
|
|
106
205
|
"""
|
|
107
206
|
|
|
108
|
-
|
|
207
|
+
if not filenames:
|
|
208
|
+
filenames = ["*.xml"]
|
|
109
209
|
|
|
110
|
-
|
|
210
|
+
try:
|
|
211
|
+
# Check if the provided path is a directory or a zip file that can be extracted
|
|
212
|
+
# into a directory:
|
|
111
213
|
if not os.path.isdir(path_to_root) and not path_to_root.endswith(".zip"):
|
|
112
214
|
logger.error(
|
|
113
|
-
"The provided path '%s' is not a valid directory or Zip file.",
|
|
215
|
+
"The provided path -> '%s' is not a valid directory or Zip file.",
|
|
114
216
|
path_to_root,
|
|
115
217
|
)
|
|
116
|
-
return
|
|
218
|
+
return None
|
|
117
219
|
|
|
220
|
+
# If we have a zip file we extract it - but only if it has not been extracted before:
|
|
118
221
|
if path_to_root.endswith(".zip"):
|
|
119
222
|
zip_file_folder = os.path.splitext(path_to_root)[0]
|
|
120
223
|
if not os.path.exists(zip_file_folder):
|
|
@@ -123,8 +226,21 @@ class XML:
|
|
|
123
226
|
path_to_root,
|
|
124
227
|
zip_file_folder,
|
|
125
228
|
)
|
|
126
|
-
|
|
127
|
-
|
|
229
|
+
try:
|
|
230
|
+
with zipfile.ZipFile(path_to_root, "r") as zfile:
|
|
231
|
+
zfile.extractall(zip_file_folder)
|
|
232
|
+
except zipfile.BadZipFile:
|
|
233
|
+
logger.error(
|
|
234
|
+
"Failed to extract zip file -> '%s'",
|
|
235
|
+
path_to_root,
|
|
236
|
+
)
|
|
237
|
+
return None
|
|
238
|
+
except OSError:
|
|
239
|
+
logger.error(
|
|
240
|
+
"OS error occurred while trying to extract -> '%s'",
|
|
241
|
+
path_to_root,
|
|
242
|
+
)
|
|
243
|
+
return None
|
|
128
244
|
else:
|
|
129
245
|
logger.info(
|
|
130
246
|
"Zip file is already extracted (path -> '%s' exists). Reusing extracted data...",
|
|
@@ -142,37 +258,308 @@ class XML:
|
|
|
142
258
|
file_name = os.path.basename(file_path)
|
|
143
259
|
dir_name = os.path.dirname(file_path)
|
|
144
260
|
|
|
145
|
-
if any(
|
|
146
|
-
|
|
147
|
-
|
|
261
|
+
if any(fnmatch.fnmatch(file_path, pattern) for pattern in filenames) and file_name.endswith(".xml"):
|
|
262
|
+
logger.info(
|
|
263
|
+
"Load XML file -> '%s' of size -> %s",
|
|
264
|
+
file_path,
|
|
265
|
+
file_size,
|
|
266
|
+
)
|
|
267
|
+
elements = cls.load_xml_file(
|
|
268
|
+
file_path,
|
|
269
|
+
xpath=xpath,
|
|
270
|
+
dir_name=dir_name,
|
|
271
|
+
)
|
|
272
|
+
if elements:
|
|
273
|
+
results += elements
|
|
274
|
+
|
|
275
|
+
except NotADirectoryError:
|
|
276
|
+
logger.error(
|
|
277
|
+
"The given path -> '%s' is not a directory!",
|
|
278
|
+
path_to_root,
|
|
279
|
+
)
|
|
280
|
+
return None
|
|
281
|
+
except FileNotFoundError:
|
|
282
|
+
logger.error(
|
|
283
|
+
"The given path -> '%s' does not exist!",
|
|
284
|
+
path_to_root,
|
|
285
|
+
)
|
|
286
|
+
return None
|
|
287
|
+
except PermissionError:
|
|
288
|
+
logger.error(
|
|
289
|
+
"No permission to access path -> '%s'!",
|
|
290
|
+
path_to_root,
|
|
291
|
+
)
|
|
292
|
+
return None
|
|
293
|
+
except OSError:
|
|
294
|
+
logger.error("Low level OS error with file -> %s", path_to_root)
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
return results
|
|
298
|
+
|
|
299
|
+
# end method definition
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def load_xml_files_from_directories(
|
|
303
|
+
cls,
|
|
304
|
+
directories: list[str],
|
|
305
|
+
filenames: list[str] | None = None,
|
|
306
|
+
xpath: str | None = None,
|
|
307
|
+
logger: logging.Logger = default_logger,
|
|
308
|
+
) -> list[dict] | None:
|
|
309
|
+
"""Load XML files from multiple directories or zip files concurrently.
|
|
310
|
+
|
|
311
|
+
Process them using XPath, and return a list of dictionaries containing the extracted elements.
|
|
312
|
+
|
|
313
|
+
This method handles multiple directories or zip files, processes XML files inside them in parallel
|
|
314
|
+
using threads, and extracts elements that match the specified XPath. It also supports pattern matching
|
|
315
|
+
for filenames and handles errors such as missing files or permission issues.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
directories (list[str]):
|
|
319
|
+
A list of directories or zip files to process. Each item can be a path
|
|
320
|
+
to a directory or a zip file that contains XML files.
|
|
321
|
+
filenames (list[str] | None, optional):
|
|
322
|
+
A list of filename patterns (e.g., ["*/en/docovw.xml"]) to match
|
|
323
|
+
against the XML files. If None or empty, defaults to ["*.xml"].
|
|
324
|
+
xpath (str | None, optional):
|
|
325
|
+
An optional XPath string used to filter elements from the XML files.
|
|
326
|
+
logger (logging.Logger):
|
|
327
|
+
The logging object used for all log messages.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
list[dict] | None:
|
|
331
|
+
A list of dictionaries containing the extracted XML elements. Returns None
|
|
332
|
+
if any error occurs during processing.
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
Exception: If any error occurs during processing, such as issues with directories, files, or zip extraction.
|
|
336
|
+
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
# Set default for filenames if not provided
|
|
340
|
+
if not filenames:
|
|
341
|
+
filenames = ["*.xml"]
|
|
342
|
+
|
|
343
|
+
results_queue = Queue()
|
|
344
|
+
|
|
345
|
+
def process_xml_file(file_path: str) -> None:
|
|
346
|
+
"""Process a single XML file.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
file_path (str):
|
|
350
|
+
Path to the XML file.
|
|
351
|
+
|
|
352
|
+
Results:
|
|
353
|
+
Adds elements to the result_queue defined outside this sub-method.
|
|
354
|
+
|
|
355
|
+
"""
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
file_size = os.path.getsize(file_path)
|
|
359
|
+
file_name = os.path.basename(file_path)
|
|
360
|
+
dir_name = os.path.dirname(file_path)
|
|
361
|
+
|
|
362
|
+
if (
|
|
363
|
+
not filenames or any(fnmatch.fnmatch(file_path, pattern) for pattern in filenames)
|
|
364
|
+
) and file_name.endswith(".xml"):
|
|
365
|
+
logger.info(
|
|
366
|
+
"Load XML file -> '%s' of size -> %s",
|
|
367
|
+
file_path,
|
|
368
|
+
file_size,
|
|
369
|
+
)
|
|
370
|
+
elements = cls.load_xml_file(
|
|
371
|
+
file_path,
|
|
372
|
+
xpath=xpath,
|
|
373
|
+
dir_name=dir_name,
|
|
374
|
+
)
|
|
375
|
+
if elements:
|
|
376
|
+
results_queue.put(elements)
|
|
377
|
+
except FileNotFoundError:
|
|
378
|
+
logger.error("File not found -> '%s'!", file_path)
|
|
379
|
+
except PermissionError:
|
|
380
|
+
logger.error(
|
|
381
|
+
"Permission error with file -> '%s'!",
|
|
382
|
+
file_path,
|
|
383
|
+
)
|
|
384
|
+
except OSError:
|
|
385
|
+
logger.error(
|
|
386
|
+
"OS error processing file -> '%s'!",
|
|
387
|
+
file_path,
|
|
388
|
+
)
|
|
389
|
+
except ValueError:
|
|
390
|
+
logger.error(
|
|
391
|
+
"Value error processing file -> '%s'!",
|
|
392
|
+
file_path,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# end method process_xml_file
|
|
396
|
+
|
|
397
|
+
def process_directory_or_zip(path_to_root: str) -> list | None:
|
|
398
|
+
"""Process all files in a directory or zip file.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
path_to_root (str):
|
|
402
|
+
File path to the root directory or zip file.
|
|
403
|
+
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
# Handle zip files
|
|
408
|
+
if path_to_root.endswith(".zip"):
|
|
409
|
+
zip_file_folder = os.path.splitext(path_to_root)[0]
|
|
410
|
+
if not os.path.exists(zip_file_folder):
|
|
148
411
|
logger.info(
|
|
149
|
-
"
|
|
412
|
+
"Unzipping -> '%s' into folder -> '%s'...",
|
|
413
|
+
path_to_root,
|
|
414
|
+
zip_file_folder,
|
|
150
415
|
)
|
|
151
|
-
|
|
152
|
-
|
|
416
|
+
try:
|
|
417
|
+
with zipfile.ZipFile(path_to_root, "r") as zfile:
|
|
418
|
+
zfile.extractall(zip_file_folder)
|
|
419
|
+
except zipfile.BadZipFile:
|
|
420
|
+
logger.error(
|
|
421
|
+
"Bad zip file -> '%s'!",
|
|
422
|
+
path_to_root,
|
|
423
|
+
)
|
|
424
|
+
except zipfile.LargeZipFile:
|
|
425
|
+
logger.error(
|
|
426
|
+
"Zip file is too large to process -> '%s'!",
|
|
427
|
+
path_to_root,
|
|
428
|
+
)
|
|
429
|
+
except PermissionError:
|
|
430
|
+
logger.error(
|
|
431
|
+
"Permission error extracting zip file -> '%s'!",
|
|
432
|
+
path_to_root,
|
|
433
|
+
)
|
|
434
|
+
except OSError:
|
|
435
|
+
logger.error(
|
|
436
|
+
"OS error occurred while extracting zip file -> '%s'!",
|
|
437
|
+
path_to_root,
|
|
438
|
+
)
|
|
439
|
+
return # Don't proceed further if zip extraction fails
|
|
440
|
+
|
|
441
|
+
else:
|
|
442
|
+
logger.info(
|
|
443
|
+
"Zip file is already extracted (path -> '%s' exists). Reusing extracted data...",
|
|
444
|
+
zip_file_folder,
|
|
153
445
|
)
|
|
446
|
+
path_to_root = zip_file_folder
|
|
447
|
+
# end if path_to_root.endswith(".zip")
|
|
448
|
+
|
|
449
|
+
# Use inner threading to process files within the directory
|
|
450
|
+
with ThreadPoolExecutor(
|
|
451
|
+
thread_name_prefix="ProcessXMLFile",
|
|
452
|
+
) as inner_executor:
|
|
453
|
+
for root, _, files in os.walk(path_to_root):
|
|
454
|
+
for file_data in files:
|
|
455
|
+
file_path = os.path.join(root, file_data)
|
|
456
|
+
inner_executor.submit(process_xml_file, file_path)
|
|
457
|
+
|
|
458
|
+
except FileNotFoundError:
|
|
459
|
+
logger.error(
|
|
460
|
+
"Directory or file not found -> '%s'!",
|
|
461
|
+
path_to_root,
|
|
462
|
+
)
|
|
463
|
+
except PermissionError:
|
|
464
|
+
logger.error(
|
|
465
|
+
"Permission error with directory -> '%s'!",
|
|
466
|
+
path_to_root,
|
|
467
|
+
)
|
|
468
|
+
except OSError:
|
|
469
|
+
logger.error(
|
|
470
|
+
"OS error processing path -> '%s'!",
|
|
471
|
+
path_to_root,
|
|
472
|
+
)
|
|
473
|
+
except ValueError:
|
|
474
|
+
logger.error(
|
|
475
|
+
"Value error processing path -> '%s'!",
|
|
476
|
+
path_to_root,
|
|
477
|
+
)
|
|
154
478
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
479
|
+
# end method process_directory_or_zip
|
|
480
|
+
|
|
481
|
+
try:
|
|
482
|
+
# Resolve wildcards in the directories list
|
|
483
|
+
expanded_directories: list[str] = []
|
|
484
|
+
for directory in directories:
|
|
485
|
+
if "*" in directory:
|
|
486
|
+
expanded_directory: list = glob.glob(directory)
|
|
487
|
+
logger.info(
|
|
488
|
+
"Expanding directory -> '%s' with wildcards...",
|
|
489
|
+
directory,
|
|
490
|
+
)
|
|
491
|
+
expanded_directories.extend(expanded_directory)
|
|
492
|
+
else:
|
|
493
|
+
logger.info(
|
|
494
|
+
"Directory -> '%s' has no wildcards. Not expanding...",
|
|
495
|
+
directory,
|
|
496
|
+
)
|
|
497
|
+
expanded_directories.append(directory)
|
|
498
|
+
|
|
499
|
+
# Use ThreadPoolExecutor for outer level: processing directories/zip files
|
|
500
|
+
logger.info(
|
|
501
|
+
"Starting %d threads for each directory or zip file...",
|
|
502
|
+
len(expanded_directories),
|
|
503
|
+
)
|
|
504
|
+
with ThreadPoolExecutor(
|
|
505
|
+
thread_name_prefix="ProcessDirOrZip",
|
|
506
|
+
) as outer_executor:
|
|
507
|
+
futures = [
|
|
508
|
+
outer_executor.submit(process_directory_or_zip, directory) for directory in expanded_directories
|
|
509
|
+
]
|
|
510
|
+
|
|
511
|
+
# Wait for all futures to complete
|
|
512
|
+
for future in futures:
|
|
513
|
+
future.result()
|
|
514
|
+
|
|
515
|
+
# Collect results from the queue
|
|
516
|
+
logger.info("Collecting results from worker queue...")
|
|
517
|
+
results = []
|
|
518
|
+
while not results_queue.empty():
|
|
519
|
+
results.extend(results_queue.get())
|
|
520
|
+
logger.info("Done. Collected %d results.", len(results))
|
|
521
|
+
|
|
522
|
+
except FileNotFoundError:
|
|
523
|
+
logger.error(
|
|
524
|
+
"Directory or file not found during execution!",
|
|
525
|
+
)
|
|
526
|
+
return None
|
|
527
|
+
except PermissionError:
|
|
528
|
+
logger.error("Permission error during execution!")
|
|
529
|
+
return None
|
|
530
|
+
except TimeoutError:
|
|
531
|
+
logger.error(
|
|
532
|
+
"Timeout occurred while waiting for threads!",
|
|
533
|
+
)
|
|
534
|
+
return None
|
|
535
|
+
except BrokenPipeError:
|
|
536
|
+
logger.error(
|
|
537
|
+
"Broken pipe error occurred during thread communication!",
|
|
538
|
+
)
|
|
539
|
+
return None
|
|
161
540
|
|
|
162
541
|
return results
|
|
163
542
|
|
|
164
543
|
# end method definition
|
|
165
544
|
|
|
166
545
|
@classmethod
|
|
167
|
-
def get_xml_element(
|
|
168
|
-
|
|
546
|
+
def get_xml_element(
|
|
547
|
+
cls,
|
|
548
|
+
xml_content: str,
|
|
549
|
+
xpath: str,
|
|
550
|
+
) -> Element:
|
|
551
|
+
"""Retrieve an XML Element from a string using an XPath expression.
|
|
169
552
|
|
|
170
553
|
Args:
|
|
171
|
-
xml_content (str):
|
|
172
|
-
|
|
554
|
+
xml_content (str):
|
|
555
|
+
XML file as a string
|
|
556
|
+
xpath (str):
|
|
557
|
+
XPath used to find the element.
|
|
173
558
|
|
|
174
559
|
Returns:
|
|
175
|
-
|
|
560
|
+
Element:
|
|
561
|
+
The XML element.
|
|
562
|
+
|
|
176
563
|
"""
|
|
177
564
|
|
|
178
565
|
# Parse XML content into an etree
|
|
@@ -186,13 +573,25 @@ class XML:
|
|
|
186
573
|
# end method definition
|
|
187
574
|
|
|
188
575
|
@classmethod
|
|
189
|
-
def modify_xml_element(
|
|
190
|
-
|
|
576
|
+
def modify_xml_element(
|
|
577
|
+
cls,
|
|
578
|
+
xml_content: str,
|
|
579
|
+
xpath: str,
|
|
580
|
+
new_value: str,
|
|
581
|
+
logger: logging.Logger = default_logger,
|
|
582
|
+
) -> None:
|
|
583
|
+
"""Update the text (= content) of an XML element.
|
|
191
584
|
|
|
192
585
|
Args:
|
|
193
|
-
xml_content (str):
|
|
194
|
-
|
|
195
|
-
|
|
586
|
+
xml_content (str):
|
|
587
|
+
The content of an XML file.
|
|
588
|
+
xpath (str):
|
|
589
|
+
XML Path to identify the XML element.
|
|
590
|
+
new_value (str):
|
|
591
|
+
The new text (content).
|
|
592
|
+
logger (logging.Logger):
|
|
593
|
+
The logging object used for all log messages.
|
|
594
|
+
|
|
196
595
|
"""
|
|
197
596
|
element = cls.get_xml_element(xml_content=xml_content, xpath=xpath)
|
|
198
597
|
|
|
@@ -212,7 +611,7 @@ class XML:
|
|
|
212
611
|
is_simple: bool = True,
|
|
213
612
|
is_escaped: bool = False,
|
|
214
613
|
) -> str | None:
|
|
215
|
-
"""Search a setting in an XML element and return its value
|
|
614
|
+
"""Search a setting in an XML element and return its value.
|
|
216
615
|
|
|
217
616
|
The simple case covers settings like this:
|
|
218
617
|
"syncCandidates":true,
|
|
@@ -226,25 +625,27 @@ class XML:
|
|
|
226
625
|
but we take the value for a string delimited by double quotes ("...")
|
|
227
626
|
|
|
228
627
|
Args:
|
|
229
|
-
element_text (str):
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
628
|
+
element_text (str):
|
|
629
|
+
The text to examine - typically content of an XML element.
|
|
630
|
+
setting_key (str):
|
|
631
|
+
The name of the setting key (before the colon).
|
|
632
|
+
is_simple (bool, optional):
|
|
633
|
+
True if the value is scalar (not having assocs with commas). Defaults to True.
|
|
634
|
+
is_escaped (bool, optional):
|
|
635
|
+
True if the quotes or escaped with ". Defaults to False.
|
|
233
636
|
|
|
234
637
|
Returns:
|
|
235
|
-
str:
|
|
638
|
+
str:
|
|
639
|
+
The value of the setting or None if the setting is not found.
|
|
640
|
+
|
|
236
641
|
"""
|
|
237
642
|
|
|
238
643
|
if is_simple:
|
|
239
|
-
if is_escaped:
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
pattern = r'"{0}":[^,]*'.format(setting_key)
|
|
644
|
+
pattern = r""{}":[^,]*".format(setting_key) if is_escaped else r'"{}":[^,]*'.format(setting_key)
|
|
645
|
+
elif is_escaped:
|
|
646
|
+
pattern = r""{}":".*"".format(setting_key)
|
|
243
647
|
else:
|
|
244
|
-
|
|
245
|
-
pattern = r""{0}":".*"".format(setting_key)
|
|
246
|
-
else:
|
|
247
|
-
pattern = r'"{0}":"([^"]*)"'.format(setting_key)
|
|
648
|
+
pattern = r'"{}":"([^"]*)"'.format(setting_key)
|
|
248
649
|
|
|
249
650
|
match = re.search(pattern, element_text)
|
|
250
651
|
if match:
|
|
@@ -279,26 +680,29 @@ class XML:
|
|
|
279
680
|
but we take the value for a string delimited by double quotes ("...")
|
|
280
681
|
|
|
281
682
|
Args:
|
|
282
|
-
element_text (str):
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
683
|
+
element_text (str):
|
|
684
|
+
The original text of the XML element (that is to be updated).
|
|
685
|
+
setting_key (str):
|
|
686
|
+
The name of the setting.
|
|
687
|
+
new_value (str):
|
|
688
|
+
The new value of the setting.
|
|
689
|
+
is_simple (bool, optional):
|
|
690
|
+
True = value is a scalar like true, false, a number or none. Defaults to True.
|
|
691
|
+
is_escaped (bool, optional):
|
|
692
|
+
True if the value is surrrounded with ". Defaults to False.
|
|
287
693
|
|
|
288
694
|
Returns:
|
|
289
|
-
str:
|
|
695
|
+
str:
|
|
696
|
+
The updated element text.
|
|
697
|
+
|
|
290
698
|
"""
|
|
291
699
|
|
|
292
700
|
if is_simple:
|
|
293
|
-
if is_escaped:
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
pattern = r'"{0}":[^,]*'.format(setting_key)
|
|
701
|
+
pattern = r""{}":[^,]*".format(setting_key) if is_escaped else r'"{}":[^,]*'.format(setting_key)
|
|
702
|
+
elif is_escaped:
|
|
703
|
+
pattern = r""{}":".*"".format(setting_key)
|
|
297
704
|
else:
|
|
298
|
-
|
|
299
|
-
pattern = r""{0}":".*"".format(setting_key)
|
|
300
|
-
else:
|
|
301
|
-
pattern = r'"{0}":"([^"]*)"'.format(setting_key)
|
|
705
|
+
pattern = r'"{}":"([^"]*)"'.format(setting_key)
|
|
302
706
|
|
|
303
707
|
new_text = re.sub(pattern, new_value, element_text)
|
|
304
708
|
|
|
@@ -315,24 +719,38 @@ class XML:
|
|
|
315
719
|
xpath: str = "",
|
|
316
720
|
setting: str = "",
|
|
317
721
|
assoc_elem: str = "",
|
|
722
|
+
logger: logging.Logger = default_logger,
|
|
318
723
|
) -> bool:
|
|
319
|
-
"""
|
|
320
|
-
|
|
724
|
+
"""Replace all occurrences of the search pattern with the replace string.
|
|
725
|
+
|
|
726
|
+
This is done in all XML files in the directory and its subdirectories.
|
|
321
727
|
|
|
322
728
|
Args:
|
|
323
|
-
directory (str):
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
729
|
+
directory (str):
|
|
730
|
+
Directory to traverse for XML files
|
|
731
|
+
search_pattern (str):
|
|
732
|
+
The string to search in the XML file.
|
|
733
|
+
This can be empty if xpath is used!
|
|
734
|
+
replace_string (str):
|
|
735
|
+
The replacement string.
|
|
736
|
+
xpath (str, optional):
|
|
737
|
+
An XPath can be given to narrow down the replacement to an XML element.
|
|
738
|
+
For now the XPath needs to be constructed in a way the it returns
|
|
739
|
+
one or none element.
|
|
740
|
+
setting (str, optional):
|
|
741
|
+
Narrow down the replacement to the line that includes the setting with this name.
|
|
742
|
+
This parameter is optional.
|
|
743
|
+
assoc_elem (str, optional):
|
|
744
|
+
Lookup a specific assoc element. This parameter is optional.
|
|
745
|
+
logger (logging.Logger):
|
|
746
|
+
The logging object used for all log messages.
|
|
747
|
+
|
|
333
748
|
Returns:
|
|
334
|
-
bool:
|
|
749
|
+
bool:
|
|
750
|
+
True if a replacement happened, False otherwise
|
|
751
|
+
|
|
335
752
|
"""
|
|
753
|
+
|
|
336
754
|
# Define the regular expression pattern to search for
|
|
337
755
|
# search pattern can be empty if an xpath is used. So
|
|
338
756
|
# be careful here:
|
|
@@ -362,7 +780,8 @@ class XML:
|
|
|
362
780
|
tree = etree.parse(file_path)
|
|
363
781
|
if not tree:
|
|
364
782
|
logger.error(
|
|
365
|
-
"Cannot parse XML tree -> %s. Skipping...",
|
|
783
|
+
"Cannot parse XML tree -> %s. Skipping...",
|
|
784
|
+
file_path,
|
|
366
785
|
)
|
|
367
786
|
continue
|
|
368
787
|
root = tree.getroot()
|
|
@@ -402,7 +821,9 @@ class XML:
|
|
|
402
821
|
replace_string,
|
|
403
822
|
)
|
|
404
823
|
setting_value = cls.search_setting(
|
|
405
|
-
element.text,
|
|
824
|
+
element.text,
|
|
825
|
+
setting,
|
|
826
|
+
is_simple=True,
|
|
406
827
|
)
|
|
407
828
|
if setting_value:
|
|
408
829
|
logger.debug(
|
|
@@ -411,21 +832,13 @@ class XML:
|
|
|
411
832
|
)
|
|
412
833
|
# Check if the setting value needs to be surrounded by quotes.
|
|
413
834
|
# Only simplistic values like booleans or numeric values don't need quotes
|
|
414
|
-
if (
|
|
415
|
-
|
|
416
|
-
or replace_string == "false"
|
|
417
|
-
or replace_string == "none"
|
|
418
|
-
or replace_string.isnumeric()
|
|
419
|
-
):
|
|
420
|
-
replace_setting = (
|
|
421
|
-
'"' + setting + '":' + replace_string
|
|
422
|
-
)
|
|
835
|
+
if replace_string in ("true", "false", "none") or replace_string.isnumeric():
|
|
836
|
+
replace_setting = '"' + setting + '":' + replace_string
|
|
423
837
|
else:
|
|
424
|
-
replace_setting =
|
|
425
|
-
'"' + setting + '":"' + replace_string + '"'
|
|
426
|
-
)
|
|
838
|
+
replace_setting = '"' + setting + '":"' + replace_string + '"'
|
|
427
839
|
logger.debug(
|
|
428
|
-
"Replacement setting -> %s",
|
|
840
|
+
"Replacement setting -> %s",
|
|
841
|
+
replace_setting,
|
|
429
842
|
)
|
|
430
843
|
element.text = cls.replace_setting(
|
|
431
844
|
element_text=element.text,
|
|
@@ -449,21 +862,20 @@ class XML:
|
|
|
449
862
|
replace_string,
|
|
450
863
|
)
|
|
451
864
|
assoc_string: str = Assoc.extract_assoc_string(
|
|
452
|
-
input_string=element.text
|
|
865
|
+
input_string=element.text,
|
|
453
866
|
)
|
|
454
867
|
logger.debug("Assoc String -> %s", assoc_string)
|
|
455
868
|
assoc_dict = Assoc.string_to_dict(
|
|
456
|
-
assoc_string=assoc_string
|
|
869
|
+
assoc_string=assoc_string,
|
|
457
870
|
)
|
|
458
871
|
logger.debug("Assoc Dict -> %s", str(assoc_dict))
|
|
459
|
-
assoc_dict[assoc_elem] =
|
|
460
|
-
replace_string # escaped_replace_string
|
|
461
|
-
)
|
|
872
|
+
assoc_dict[assoc_elem] = replace_string # escaped_replace_string
|
|
462
873
|
assoc_string_new: str = Assoc.dict_to_string(
|
|
463
|
-
assoc_dict=assoc_dict
|
|
874
|
+
assoc_dict=assoc_dict,
|
|
464
875
|
)
|
|
465
876
|
logger.debug(
|
|
466
|
-
"Replace assoc with -> %s",
|
|
877
|
+
"Replace assoc with -> %s",
|
|
878
|
+
assoc_string_new,
|
|
467
879
|
)
|
|
468
880
|
element.text = assoc_string_new
|
|
469
881
|
element.text = element.text.replace('"', """)
|
|
@@ -478,42 +890,43 @@ class XML:
|
|
|
478
890
|
replace_string,
|
|
479
891
|
)
|
|
480
892
|
setting_value = cls.search_setting(
|
|
481
|
-
element.text,
|
|
893
|
+
element.text,
|
|
894
|
+
setting,
|
|
895
|
+
is_simple=False,
|
|
482
896
|
)
|
|
483
897
|
if setting_value:
|
|
484
898
|
logger.debug(
|
|
485
|
-
"Found setting value -> %s",
|
|
899
|
+
"Found setting value -> %s",
|
|
900
|
+
setting_value,
|
|
486
901
|
)
|
|
487
902
|
assoc_string: str = Assoc.extract_assoc_string(
|
|
488
|
-
input_string=setting_value
|
|
903
|
+
input_string=setting_value,
|
|
489
904
|
)
|
|
490
905
|
logger.debug("Assoc String -> %s", assoc_string)
|
|
491
906
|
assoc_dict = Assoc.string_to_dict(
|
|
492
|
-
assoc_string=assoc_string
|
|
907
|
+
assoc_string=assoc_string,
|
|
493
908
|
)
|
|
494
909
|
logger.debug("Assoc Dict -> %s", str(assoc_dict))
|
|
495
910
|
escaped_replace_string = replace_string.replace(
|
|
496
|
-
"'",
|
|
911
|
+
"'",
|
|
912
|
+
"\\\\\u0027",
|
|
497
913
|
)
|
|
498
914
|
logger.debug(
|
|
499
915
|
"Escaped replacement string -> %s",
|
|
500
916
|
escaped_replace_string,
|
|
501
917
|
)
|
|
502
|
-
assoc_dict[assoc_elem] =
|
|
503
|
-
escaped_replace_string # escaped_replace_string
|
|
504
|
-
)
|
|
918
|
+
assoc_dict[assoc_elem] = escaped_replace_string # escaped_replace_string
|
|
505
919
|
assoc_string_new: str = Assoc.dict_to_string(
|
|
506
|
-
assoc_dict=assoc_dict
|
|
920
|
+
assoc_dict=assoc_dict,
|
|
507
921
|
)
|
|
508
922
|
assoc_string_new = assoc_string_new.replace(
|
|
509
|
-
"'",
|
|
510
|
-
|
|
511
|
-
# replace_setting = """ + setting + "":"" + assoc_string_new + """
|
|
512
|
-
replace_setting = (
|
|
513
|
-
'"' + setting + '":"' + assoc_string_new + '"'
|
|
923
|
+
"'",
|
|
924
|
+
"\\u0027",
|
|
514
925
|
)
|
|
926
|
+
replace_setting = '"' + setting + '":"' + assoc_string_new + '"'
|
|
515
927
|
logger.debug(
|
|
516
|
-
"Replacement setting -> %s",
|
|
928
|
+
"Replacement setting -> %s",
|
|
929
|
+
replace_setting,
|
|
517
930
|
)
|
|
518
931
|
# here we need to apply a "trick". It is required
|
|
519
932
|
# as regexp cannot handle the special unicode escapes \u0027
|
|
@@ -524,13 +937,13 @@ class XML:
|
|
|
524
937
|
element.text = cls.replace_setting(
|
|
525
938
|
element_text=element.text,
|
|
526
939
|
setting_key=setting,
|
|
527
|
-
# new_value=replace_setting,
|
|
528
940
|
new_value="PLACEHOLDER",
|
|
529
941
|
is_simple=False,
|
|
530
942
|
is_escaped=False,
|
|
531
943
|
)
|
|
532
944
|
element.text = element.text.replace(
|
|
533
|
-
"PLACEHOLDER",
|
|
945
|
+
"PLACEHOLDER",
|
|
946
|
+
replace_setting,
|
|
534
947
|
)
|
|
535
948
|
element.text = element.text.replace('"', """)
|
|
536
949
|
xml_modified = True
|
|
@@ -554,10 +967,12 @@ class XML:
|
|
|
554
967
|
)
|
|
555
968
|
# we need to undo some of the stupid things tostring() did:
|
|
556
969
|
new_contents = new_contents.replace(
|
|
557
|
-
b""",
|
|
970
|
+
b""",
|
|
971
|
+
b""",
|
|
558
972
|
)
|
|
559
973
|
new_contents = new_contents.replace(
|
|
560
|
-
b"'",
|
|
974
|
+
b"'",
|
|
975
|
+
b"'",
|
|
561
976
|
)
|
|
562
977
|
new_contents = new_contents.replace(b">", b">")
|
|
563
978
|
new_contents = new_contents.replace(b"&lt;", b"<")
|
|
@@ -576,12 +991,14 @@ class XML:
|
|
|
576
991
|
# This is required as we next want to replace all double quotes with single quotes
|
|
577
992
|
# to make the XML files as similar as possible with Extended ECM's format
|
|
578
993
|
pattern = b">([^<>]+?)<"
|
|
579
|
-
replacement = lambda match: match.group(0).replace(
|
|
580
|
-
b'"',
|
|
994
|
+
replacement = lambda match: match.group(0).replace( # noqa: E731
|
|
995
|
+
b'"',
|
|
996
|
+
b""",
|
|
581
997
|
)
|
|
582
998
|
new_contents = re.sub(pattern, replacement, new_contents)
|
|
583
|
-
replacement = lambda match: match.group(0).replace(
|
|
584
|
-
b"'",
|
|
999
|
+
replacement = lambda match: match.group(0).replace( # noqa: E731
|
|
1000
|
+
b"'",
|
|
1001
|
+
b"'",
|
|
585
1002
|
)
|
|
586
1003
|
new_contents = re.sub(pattern, replacement, new_contents)
|
|
587
1004
|
|
|
@@ -600,7 +1017,7 @@ class XML:
|
|
|
600
1017
|
# this is not using xpath - do a simple search and replace
|
|
601
1018
|
else:
|
|
602
1019
|
logger.debug("Replacement without xpath...")
|
|
603
|
-
with open(file_path,
|
|
1020
|
+
with open(file_path, encoding="UTF-8") as f:
|
|
604
1021
|
contents = f.read()
|
|
605
1022
|
# Replace all occurrences of the search pattern with the replace string
|
|
606
1023
|
new_contents = pattern.sub(replace_string, contents)
|
|
@@ -626,15 +1043,22 @@ class XML:
|
|
|
626
1043
|
cls,
|
|
627
1044
|
directory: str,
|
|
628
1045
|
xpath: str,
|
|
1046
|
+
logger: logging.Logger = default_logger,
|
|
629
1047
|
) -> list | None:
|
|
630
|
-
"""
|
|
631
|
-
in the directory and its subdirectories.
|
|
1048
|
+
"""Extract the XML subtrees using an XPath in all XML files in the directory and its subdirectories.
|
|
632
1049
|
|
|
633
1050
|
Args:
|
|
634
|
-
directory (str):
|
|
635
|
-
|
|
1051
|
+
directory (str):
|
|
1052
|
+
The directory to traverse for XML files.
|
|
1053
|
+
xpath (str):
|
|
1054
|
+
Used to determine XML elements to extract.
|
|
1055
|
+
logger (logging.Logger):
|
|
1056
|
+
The logging object used for all log messages.
|
|
1057
|
+
|
|
636
1058
|
Returns:
|
|
637
|
-
list | None:
|
|
1059
|
+
list | None:
|
|
1060
|
+
Extracted data if it is found by the XPath, None otherwise.
|
|
1061
|
+
|
|
638
1062
|
"""
|
|
639
1063
|
|
|
640
1064
|
extracted_data_list = []
|
|
@@ -651,7 +1075,8 @@ class XML:
|
|
|
651
1075
|
tree = etree.parse(file_path)
|
|
652
1076
|
if not tree:
|
|
653
1077
|
logger.error(
|
|
654
|
-
"Cannot parse XML file -> '%s'. Skipping...",
|
|
1078
|
+
"Cannot parse XML file -> '%s'. Skipping...",
|
|
1079
|
+
file_path,
|
|
655
1080
|
)
|
|
656
1081
|
continue
|
|
657
1082
|
root = tree.getroot()
|