smartXML 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartXML/__init__.py +0 -0
- smartXML/_elements_utils.py +75 -0
- smartXML/element.py +250 -0
- smartXML/xmltree.py +321 -0
- smartxml-1.0.8.dist-info/METADATA +82 -0
- smartxml-1.0.8.dist-info/RECORD +8 -0
- smartxml-1.0.8.dist-info/WHEEL +5 -0
- smartxml-1.0.8.dist-info/top_level.txt +1 -0
smartXML/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
def _find_one_in_sons(
|
|
2
|
+
element: "Element",
|
|
3
|
+
names_list: list[str],
|
|
4
|
+
with_content: str = None,
|
|
5
|
+
) -> "Element":
|
|
6
|
+
if not names_list:
|
|
7
|
+
return element
|
|
8
|
+
for name in names_list:
|
|
9
|
+
for son in element._sons:
|
|
10
|
+
if _check_match(son, name):
|
|
11
|
+
found = _find_one_in_sons(son, names_list[1:], with_content)
|
|
12
|
+
if found:
|
|
13
|
+
if with_content is None or found.content == with_content:
|
|
14
|
+
return found
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _check_match(element: "Element", names: str) -> bool:
|
|
19
|
+
if names and element.name != names:
|
|
20
|
+
return False
|
|
21
|
+
return True
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _find_one(element: "Element", names: str, with_content: str) -> "Element":
|
|
25
|
+
|
|
26
|
+
if _check_match(element, names):
|
|
27
|
+
if with_content is None or element.content == with_content:
|
|
28
|
+
return element
|
|
29
|
+
|
|
30
|
+
names_list = names.split("|")
|
|
31
|
+
|
|
32
|
+
if len(names_list) > 1:
|
|
33
|
+
if element.name == names_list[0]:
|
|
34
|
+
found = _find_one_in_sons(element, names_list[1:], with_content)
|
|
35
|
+
if found:
|
|
36
|
+
return found
|
|
37
|
+
|
|
38
|
+
for son in element._sons:
|
|
39
|
+
found = _find_one(son, names, with_content)
|
|
40
|
+
if found:
|
|
41
|
+
return found
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _find_all(element: "Element", names: str, with_content: str) -> list["Element"]:
|
|
46
|
+
results = []
|
|
47
|
+
if _check_match(element, names=names):
|
|
48
|
+
if with_content is None or element.content == with_content:
|
|
49
|
+
results.extend([element])
|
|
50
|
+
for son in element._sons:
|
|
51
|
+
results.extend(_find_all(son, names, with_content))
|
|
52
|
+
return results
|
|
53
|
+
|
|
54
|
+
names_list = names.split("|")
|
|
55
|
+
|
|
56
|
+
if _check_match(element, names_list[0]):
|
|
57
|
+
if with_content is None or element.content == with_content:
|
|
58
|
+
sons = []
|
|
59
|
+
sons.extend(element._sons)
|
|
60
|
+
match = []
|
|
61
|
+
for index, name in enumerate(names_list[1:]):
|
|
62
|
+
for son in sons:
|
|
63
|
+
if son.name == name:
|
|
64
|
+
if index == len(names_list) - 2:
|
|
65
|
+
results.append(son)
|
|
66
|
+
else:
|
|
67
|
+
match.extend(son._sons)
|
|
68
|
+
sons.clear()
|
|
69
|
+
sons.extend(match)
|
|
70
|
+
match.clear()
|
|
71
|
+
|
|
72
|
+
for son in element._sons:
|
|
73
|
+
results.extend(_find_all(son, names, with_content))
|
|
74
|
+
|
|
75
|
+
return results
|
smartXML/element.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
from ._elements_utils import (
|
|
5
|
+
_find_one,
|
|
6
|
+
_find_all,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
class IllegalOperation(Exception):
|
|
10
|
+
def __init__(self, message: str):
|
|
11
|
+
self.message = message
|
|
12
|
+
super().__init__(self.message)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ElementBase:
|
|
17
|
+
def __init__(self, name: str):
|
|
18
|
+
self._name = name
|
|
19
|
+
self._sons = []
|
|
20
|
+
self._parent = None
|
|
21
|
+
|
|
22
|
+
def is_comment(self) -> bool:
|
|
23
|
+
"""Check if the element is a comment."""
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def parent(self):
|
|
28
|
+
"""Get the parent of the element."""
|
|
29
|
+
return self._parent
|
|
30
|
+
@property
|
|
31
|
+
def name(self) -> str:
|
|
32
|
+
"""Get the name of the element."""
|
|
33
|
+
return self._name
|
|
34
|
+
|
|
35
|
+
@name.setter
|
|
36
|
+
def name(self, new_name: str):
|
|
37
|
+
"""Set the name of the element."""
|
|
38
|
+
if not new_name or new_name[0].isdigit():
|
|
39
|
+
raise ValueError(f"Invalid tag name '{new_name}'")
|
|
40
|
+
self._name = new_name
|
|
41
|
+
|
|
42
|
+
def to_string(self, indentation: str = "\t") -> str:
|
|
43
|
+
"""
|
|
44
|
+
Convert the XML tree to a string.
|
|
45
|
+
:param indentation: string used for indentation, default is tab character
|
|
46
|
+
:return: XML string
|
|
47
|
+
"""
|
|
48
|
+
return self._to_string(0, indentation)
|
|
49
|
+
|
|
50
|
+
def _to_string(self, index: int, indentation: str) -> str:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def get_path(self) -> str:
|
|
54
|
+
""" Get the full path of the element
|
|
55
|
+
returns: the path as a string from the root of the XML tree, separated by |.
|
|
56
|
+
"""
|
|
57
|
+
elements = []
|
|
58
|
+
current = self
|
|
59
|
+
while current is not None:
|
|
60
|
+
elements.append(current._name)
|
|
61
|
+
current = current._parent
|
|
62
|
+
return "|".join(reversed(elements))
|
|
63
|
+
|
|
64
|
+
def add_before(self, sibling: "Element"):
|
|
65
|
+
"""Add this element before the given sibling element."""
|
|
66
|
+
parent = sibling._parent
|
|
67
|
+
if parent is None:
|
|
68
|
+
raise ValueError(f"Element {sibling.name} has no parent")
|
|
69
|
+
index = parent._sons.index(sibling)
|
|
70
|
+
parent._sons.insert(index, self)
|
|
71
|
+
self._parent = parent
|
|
72
|
+
|
|
73
|
+
def add_after(self, sibling: "Element"):
|
|
74
|
+
"""Add this element after the given sibling element."""
|
|
75
|
+
parent = sibling._parent
|
|
76
|
+
if parent is None:
|
|
77
|
+
raise ValueError(f"Element {sibling.name} has no parent")
|
|
78
|
+
index = parent._sons.index(sibling)
|
|
79
|
+
parent._sons.insert(index + 1, self)
|
|
80
|
+
self._parent = parent
|
|
81
|
+
|
|
82
|
+
def add_as_son_of(self, parent: "Element"):
|
|
83
|
+
"""Add this element as a son of the given parent element."""
|
|
84
|
+
warnings.warn(
|
|
85
|
+
"add_as_son_of() is deprecated and will be removed in version 1.1.0 . add_before() ot add_after() instead.",
|
|
86
|
+
category=DeprecationWarning,
|
|
87
|
+
stacklevel=2
|
|
88
|
+
)
|
|
89
|
+
parent._sons.append(self)
|
|
90
|
+
self._parent = parent
|
|
91
|
+
|
|
92
|
+
def set_as_parent_of(self, son: "Element"):
|
|
93
|
+
"""Set this element as the parent of the given son element."""
|
|
94
|
+
warnings.warn(
|
|
95
|
+
"set_as_parent_of() is deprecated and will be removed in version 1.1.0 . add_before() ot add_after() instead.",
|
|
96
|
+
category=DeprecationWarning,
|
|
97
|
+
stacklevel=2
|
|
98
|
+
)
|
|
99
|
+
self._sons.append(son)
|
|
100
|
+
son._parent = self
|
|
101
|
+
|
|
102
|
+
def remove(self):
|
|
103
|
+
"""Remove this element from its parent's sons."""
|
|
104
|
+
self._parent._sons.remove(self)
|
|
105
|
+
self._parent = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class TextOnlyComment(ElementBase):
|
|
109
|
+
"""A comment that only contains text, not other elements."""
|
|
110
|
+
def __init__(self, text: str):
|
|
111
|
+
super().__init__("")
|
|
112
|
+
self._text = text
|
|
113
|
+
|
|
114
|
+
def is_comment(self) -> bool:
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
def _to_string(self, index: int, indentation: str) -> str:
|
|
118
|
+
indent = indentation * index
|
|
119
|
+
return f"{indent}<!-- {self._text} -->\n"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class CData(ElementBase):
|
|
123
|
+
"""A CDATA section that contains text."""
|
|
124
|
+
def __init__(self, text: str):
|
|
125
|
+
super().__init__("")
|
|
126
|
+
self._text = text
|
|
127
|
+
|
|
128
|
+
def _to_string(self, index: int, indentation: str) -> str:
|
|
129
|
+
indent = indentation * index
|
|
130
|
+
return f"{indent}<![CDATA[{self._text}]]>\n"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class Doctype(ElementBase):
|
|
134
|
+
"""A DOCTYPE declaration."""
|
|
135
|
+
def __init__(self, text: str):
|
|
136
|
+
super().__init__("")
|
|
137
|
+
self._text = text
|
|
138
|
+
|
|
139
|
+
def _to_string(self, index: int, indentation: str) -> str:
|
|
140
|
+
indent = indentation * index
|
|
141
|
+
sons_indent = indentation * (index + 1)
|
|
142
|
+
children_str = ""
|
|
143
|
+
for son in self._sons:
|
|
144
|
+
if isinstance(son, TextOnlyComment):
|
|
145
|
+
children_str = children_str + son._to_string(index + 1, indentation)
|
|
146
|
+
else:
|
|
147
|
+
children_str = children_str + sons_indent + "<" + son.name + ">\n"
|
|
148
|
+
if children_str:
|
|
149
|
+
return f"{indent}<{self._text}[\n{children_str}{indent}]>\n"
|
|
150
|
+
else:
|
|
151
|
+
return f"{indent}<![CDATA[{self._text}]]>\n"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Element(ElementBase):
|
|
155
|
+
"""An XML element that can contain attributes, content, and child elements."""
|
|
156
|
+
def __init__(self, name: str):
|
|
157
|
+
super().__init__(name)
|
|
158
|
+
self.content = ""
|
|
159
|
+
self.attributes = {}
|
|
160
|
+
self._is_empty = False # whether the element is self-closing
|
|
161
|
+
|
|
162
|
+
def comment_out(self):
|
|
163
|
+
"""Convert this element into a comment.
|
|
164
|
+
raises IllegalOperation, if any parent or any descended is a comment
|
|
165
|
+
"""
|
|
166
|
+
def find_comment_son(element: "Element") -> bool:
|
|
167
|
+
if element.is_comment():
|
|
168
|
+
return True
|
|
169
|
+
for son in element._sons:
|
|
170
|
+
if find_comment_son(son):
|
|
171
|
+
return True
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
parent = self.parent
|
|
175
|
+
while parent:
|
|
176
|
+
if parent.is_comment():
|
|
177
|
+
raise IllegalOperation("Cannot comment out an element whose parent is a comment")
|
|
178
|
+
parent = parent.parent
|
|
179
|
+
|
|
180
|
+
for son in self._sons:
|
|
181
|
+
if find_comment_son(son):
|
|
182
|
+
raise IllegalOperation("Cannot comment out an element whose descended is a comment")
|
|
183
|
+
|
|
184
|
+
self.__class__ = Comment
|
|
185
|
+
|
|
186
|
+
def _to_string(self, index: int, indentation: str, with_endl=True) -> str:
|
|
187
|
+
indent = indentation * index
|
|
188
|
+
|
|
189
|
+
attributes_str = " ".join(
|
|
190
|
+
f'{key}="{value}"' for key, value in self.attributes.items() # f-string formats the pair as key="value"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
attributes_part = f" {attributes_str}" if attributes_str else ""
|
|
194
|
+
|
|
195
|
+
if self._is_empty:
|
|
196
|
+
result = f"{indent}<{self.name}{attributes_part}/>"
|
|
197
|
+
else:
|
|
198
|
+
opening_tag = f"<{self.name}{attributes_part}>"
|
|
199
|
+
closing_tag = f"</{self.name}>"
|
|
200
|
+
|
|
201
|
+
children_str = "".join(son._to_string(index + 1, indentation) for son in self._sons)
|
|
202
|
+
|
|
203
|
+
if children_str:
|
|
204
|
+
result = f"{indent}{opening_tag}{self.content}\n{children_str}{indent}{closing_tag}"
|
|
205
|
+
else:
|
|
206
|
+
result = f"{indent}{opening_tag}{self.content}{closing_tag}"
|
|
207
|
+
|
|
208
|
+
if with_endl:
|
|
209
|
+
result += "\n"
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
def find(
|
|
213
|
+
self,
|
|
214
|
+
name: str = None,
|
|
215
|
+
only_one: bool = True,
|
|
216
|
+
with_content: str = None,
|
|
217
|
+
) -> Union["Element", list["Element"], None]:
|
|
218
|
+
"""
|
|
219
|
+
Find element(s) by name or content or both
|
|
220
|
+
:param name: name of the element to find, can be nested using |, e.g. "parent|child|subchild"
|
|
221
|
+
:param only_one: stop at first find or return all found elements
|
|
222
|
+
:param with_content: filter by content
|
|
223
|
+
:return: the elements found,
|
|
224
|
+
if found, return the elements that match the last name in the path,
|
|
225
|
+
if not found, return None if only_one is True, else return empty list
|
|
226
|
+
"""
|
|
227
|
+
if only_one:
|
|
228
|
+
return _find_one(self, name, with_content=with_content)
|
|
229
|
+
else:
|
|
230
|
+
return _find_all(self, name, with_content=with_content)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class Comment(Element):
|
|
234
|
+
"""An XML comment that can contain other elements."""
|
|
235
|
+
def __init__(self, name: str):
|
|
236
|
+
super().__init__(name)
|
|
237
|
+
|
|
238
|
+
def is_comment(self) -> bool:
|
|
239
|
+
return True
|
|
240
|
+
|
|
241
|
+
def uncomment(self):
|
|
242
|
+
"""Convert this comment back into a normal element."""
|
|
243
|
+
self.__class__ = Element
|
|
244
|
+
|
|
245
|
+
def _to_string(self, index: int, indentation: str) -> str:
|
|
246
|
+
indent = indentation * index
|
|
247
|
+
if len(self._sons) == 0:
|
|
248
|
+
return f"{indent}<!-- {super()._to_string(0, indentation, False)} -->\n"
|
|
249
|
+
else:
|
|
250
|
+
return f"{indent}<!--\n{super()._to_string(index +1, indentation, False)}\n{indent}-->\n"
|
smartXML/xmltree.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from enum import Enum
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .element import ElementBase, Element, Comment, CData, Doctype, TextOnlyComment
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BadXMLFormat(Exception):
|
|
10
|
+
def __init__(self, message: str):
|
|
11
|
+
self.message = message
|
|
12
|
+
super().__init__(self.message)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TokenType(Enum):
|
|
16
|
+
comment_start = 1
|
|
17
|
+
full = 2
|
|
18
|
+
closing = 3
|
|
19
|
+
content = 4
|
|
20
|
+
c_data = 5
|
|
21
|
+
doctype = 6
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _divide_to_tokens(file_content):
|
|
25
|
+
tokens = []
|
|
26
|
+
|
|
27
|
+
last_char = ""
|
|
28
|
+
last_index = 0
|
|
29
|
+
|
|
30
|
+
index = 0
|
|
31
|
+
while index < len(file_content):
|
|
32
|
+
char = file_content[index]
|
|
33
|
+
|
|
34
|
+
if char == "!" and index < len(file_content) + 10:
|
|
35
|
+
# check fot CDATA:
|
|
36
|
+
if file_content[index : index + 8] == "![CDATA[" and last_char == "<":
|
|
37
|
+
cdata_end = file_content.find("]]>", index)
|
|
38
|
+
if cdata_end == -1:
|
|
39
|
+
raise BadXMLFormat("Malformed CDATA section")
|
|
40
|
+
cdata_content = file_content[index + 8 : cdata_end]
|
|
41
|
+
tokens.append((TokenType.c_data, cdata_content))
|
|
42
|
+
last_index = cdata_end + 2
|
|
43
|
+
last_char = ">"
|
|
44
|
+
index = last_index + 1
|
|
45
|
+
continue
|
|
46
|
+
elif file_content[index : index + 8] == "!DOCTYPE":
|
|
47
|
+
start = file_content.find("[", index)
|
|
48
|
+
if start == -1:
|
|
49
|
+
raise BadXMLFormat("Malformed DOCTYPE declaration")
|
|
50
|
+
doctype = file_content[index:start]
|
|
51
|
+
tokens.append((TokenType.doctype, doctype))
|
|
52
|
+
|
|
53
|
+
last_char = ""
|
|
54
|
+
last_index = start + 1
|
|
55
|
+
index = start + 1
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
if char == ">":
|
|
59
|
+
if last_char == "<":
|
|
60
|
+
tokens.append((TokenType.full, file_content[last_index + 1 : index].strip()))
|
|
61
|
+
else:
|
|
62
|
+
tokens.append((TokenType.closing, file_content[last_index + 1 : index].strip()))
|
|
63
|
+
last_char = char
|
|
64
|
+
last_index = index
|
|
65
|
+
elif char == "<":
|
|
66
|
+
if last_char == "<":
|
|
67
|
+
# this is a case of opening a comment of type <!-- TAG>...</TAG --> (or bad format)
|
|
68
|
+
tokens.append((TokenType.comment_start, file_content[last_index + 1 : index].strip()))
|
|
69
|
+
if last_char == ">":
|
|
70
|
+
text = file_content[last_index + 1 : index - 1].strip()
|
|
71
|
+
if text:
|
|
72
|
+
tokens.append((TokenType.content, file_content[last_index + 1 : index].strip()))
|
|
73
|
+
last_char = char
|
|
74
|
+
last_index = index
|
|
75
|
+
index += 1
|
|
76
|
+
|
|
77
|
+
return tokens
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _add_ready_token(ready_nodes, element: ElementBase, depth: int):
|
|
81
|
+
if depth in ready_nodes:
|
|
82
|
+
ready_nodes[depth].append(element)
|
|
83
|
+
else:
|
|
84
|
+
ready_nodes[depth] = [element]
|
|
85
|
+
|
|
86
|
+
if depth + 1 in ready_nodes:
|
|
87
|
+
element._sons = ready_nodes[depth + 1]
|
|
88
|
+
del ready_nodes[depth + 1]
|
|
89
|
+
for son in element._sons:
|
|
90
|
+
son._parent = element
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _parse_element(data: str) -> Element:
|
|
94
|
+
if data[0] == "!":
|
|
95
|
+
return Element(data)
|
|
96
|
+
tag_name_match = re.match(r"(\S+)\s*(.*)", data)
|
|
97
|
+
|
|
98
|
+
if not tag_name_match:
|
|
99
|
+
raise BadXMLFormat(f'Could not parse tag name and attributes from line: "{data}"')
|
|
100
|
+
name = tag_name_match.group(1)
|
|
101
|
+
if not name[0].isalpha():
|
|
102
|
+
raise BadXMLFormat(f"Tag {name} can not starts with a number")
|
|
103
|
+
attributes_string = tag_name_match.group(2).strip()
|
|
104
|
+
attributes = {}
|
|
105
|
+
|
|
106
|
+
for match in re.compile(r"(\S+)\s*=\s*([^\s=]+|\"[^\"]*\"|\'[^\']*\')").finditer(attributes_string):
|
|
107
|
+
attr_name, attr_value = match.groups()
|
|
108
|
+
attributes[attr_name] = attr_value.strip().strip('"')
|
|
109
|
+
|
|
110
|
+
element = Element(name)
|
|
111
|
+
element.attributes = attributes
|
|
112
|
+
return element
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class SmartXML:
|
|
116
|
+
def __init__(self, data: Path = None):
|
|
117
|
+
self._file_name = data
|
|
118
|
+
self._declaration = ""
|
|
119
|
+
self._tree = None
|
|
120
|
+
self._doctype = None
|
|
121
|
+
if data:
|
|
122
|
+
self._tree, self._doctype = self._read(self._file_name)
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def tree(self) -> ElementBase:
|
|
126
|
+
"""Get the root element of the XML tree."""
|
|
127
|
+
return self._tree
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def declaration(self) -> str:
|
|
131
|
+
"""Get the XML declaration."""
|
|
132
|
+
return self._declaration
|
|
133
|
+
|
|
134
|
+
def _parse_declaration(self, file_content: str):
|
|
135
|
+
start = file_content.find("<?xml")
|
|
136
|
+
end = file_content.find("?>", start)
|
|
137
|
+
if (start >= 0 and end == -1) or (start == -1 and end > 0):
|
|
138
|
+
raise BadXMLFormat("Malformed XML declaration")
|
|
139
|
+
if start > 0:
|
|
140
|
+
raise BadXMLFormat("XML declaration must be at the beginning of the file")
|
|
141
|
+
if start >= 0 and end >= 0:
|
|
142
|
+
declaration = file_content[start + 5 : end].strip()
|
|
143
|
+
self._declaration = declaration
|
|
144
|
+
file_content = file_content[end + 2 :]
|
|
145
|
+
|
|
146
|
+
return file_content
|
|
147
|
+
|
|
148
|
+
def read(self, file_name: Path) -> None:
|
|
149
|
+
"""
|
|
150
|
+
Read and parse the XML file into an element tree.
|
|
151
|
+
:param file_name: Path to the XML file
|
|
152
|
+
:raises:
|
|
153
|
+
TypeError: if file_name is not a pathlib.Path object
|
|
154
|
+
FileNotFoundError: if file_name does not exist
|
|
155
|
+
BadXMLFormat: if the XML format is invalid
|
|
156
|
+
"""
|
|
157
|
+
if not isinstance(file_name, Path):
|
|
158
|
+
raise TypeError("file_name must be a pathlib.Path object")
|
|
159
|
+
if not file_name.exists():
|
|
160
|
+
raise FileNotFoundError(f"File {file_name} does not exist")
|
|
161
|
+
|
|
162
|
+
self._tree, self._doctype = self._read(file_name)
|
|
163
|
+
|
|
164
|
+
def _read(self, file_name: Path) -> tuple[Any, None] | tuple[Any, Any]:
|
|
165
|
+
self._file_name = file_name
|
|
166
|
+
ready_nodes = {} # depth -> list of elements
|
|
167
|
+
incomplete_nodes = []
|
|
168
|
+
depth = 0
|
|
169
|
+
|
|
170
|
+
count_comment_start = 0
|
|
171
|
+
count_comment_end = 0
|
|
172
|
+
|
|
173
|
+
file_content = self._file_name.read_text()
|
|
174
|
+
file_content = self._parse_declaration(file_content)
|
|
175
|
+
|
|
176
|
+
tokens = _divide_to_tokens(file_content)
|
|
177
|
+
|
|
178
|
+
for token in tokens:
|
|
179
|
+
token_type = token[0]
|
|
180
|
+
data = token[1]
|
|
181
|
+
|
|
182
|
+
if token_type == TokenType.full:
|
|
183
|
+
if data.endswith("/"):
|
|
184
|
+
data = data[:-1].strip()
|
|
185
|
+
element = _parse_element(data)
|
|
186
|
+
element._is_empty = True
|
|
187
|
+
if incomplete_nodes[-1].name == "!--":
|
|
188
|
+
element.comment_out()
|
|
189
|
+
_add_ready_token(ready_nodes, element, depth + 1)
|
|
190
|
+
|
|
191
|
+
elif data.startswith("/"):
|
|
192
|
+
data = data[1:].strip()
|
|
193
|
+
element = incomplete_nodes.pop()
|
|
194
|
+
if data.endswith("--"):
|
|
195
|
+
# this is a case of closing a comment of type <!-- TAG>...</TAG -->
|
|
196
|
+
data = data[:-2].strip()
|
|
197
|
+
count_comment_end += 1
|
|
198
|
+
|
|
199
|
+
if element.name != data:
|
|
200
|
+
raise BadXMLFormat(f"Mismatched XML tags, opening: {element.name}, closing: {data}")
|
|
201
|
+
_add_ready_token(ready_nodes, element, depth)
|
|
202
|
+
depth -= 1
|
|
203
|
+
|
|
204
|
+
elif data.startswith("!--"):
|
|
205
|
+
if incomplete_nodes and isinstance(incomplete_nodes[-1], Comment):
|
|
206
|
+
raise BadXMLFormat("Nested comments are not allowed")
|
|
207
|
+
|
|
208
|
+
if data.endswith("--"):
|
|
209
|
+
element = TextOnlyComment(data[3:-2].strip())
|
|
210
|
+
_add_ready_token(ready_nodes, element, depth + 1)
|
|
211
|
+
else:
|
|
212
|
+
# this is a case of opening a comment of type <!-- TAG>...</TAG -->
|
|
213
|
+
count_comment_start += 1
|
|
214
|
+
name = data[3:].strip()
|
|
215
|
+
incomplete_nodes.append(Comment(name))
|
|
216
|
+
depth += 1
|
|
217
|
+
|
|
218
|
+
else:
|
|
219
|
+
element = _parse_element(data)
|
|
220
|
+
if incomplete_nodes and incomplete_nodes[-1].name == "!--":
|
|
221
|
+
element.comment_out()
|
|
222
|
+
parent_is_doctype = incomplete_nodes and isinstance(incomplete_nodes[-1], Doctype)
|
|
223
|
+
if parent_is_doctype:
|
|
224
|
+
_add_ready_token(ready_nodes, element, depth + 1)
|
|
225
|
+
else:
|
|
226
|
+
incomplete_nodes.append(element)
|
|
227
|
+
depth += 1
|
|
228
|
+
|
|
229
|
+
elif token_type == TokenType.comment_start:
|
|
230
|
+
if incomplete_nodes and isinstance(incomplete_nodes[-1], Comment):
|
|
231
|
+
raise BadXMLFormat("Nested comments are not allowed")
|
|
232
|
+
count_comment_start += 1
|
|
233
|
+
if data != "!--":
|
|
234
|
+
raise BadXMLFormat("Malformed comment closure")
|
|
235
|
+
element = Comment(data) # This is a placeholder, indicating future soms are in a comment
|
|
236
|
+
incomplete_nodes.append(element)
|
|
237
|
+
|
|
238
|
+
elif token_type == TokenType.closing:
|
|
239
|
+
element = incomplete_nodes.pop()
|
|
240
|
+
if data == "--":
|
|
241
|
+
count_comment_end += 1
|
|
242
|
+
if isinstance(element, Doctype):
|
|
243
|
+
_add_ready_token(ready_nodes, element, depth)
|
|
244
|
+
depth -= 1
|
|
245
|
+
|
|
246
|
+
elif token_type == TokenType.content:
|
|
247
|
+
incomplete_nodes[-1].content = data
|
|
248
|
+
|
|
249
|
+
elif token_type == TokenType.doctype:
|
|
250
|
+
element = Doctype(data)
|
|
251
|
+
incomplete_nodes.append(element)
|
|
252
|
+
depth += 1
|
|
253
|
+
|
|
254
|
+
elif token_type == TokenType.c_data:
|
|
255
|
+
element = CData(data)
|
|
256
|
+
_add_ready_token(ready_nodes, element, depth + 1)
|
|
257
|
+
|
|
258
|
+
if count_comment_start != count_comment_end:
|
|
259
|
+
raise BadXMLFormat("Mismatched comment tags")
|
|
260
|
+
|
|
261
|
+
if len(ready_nodes.get(1, [])) == 1:
|
|
262
|
+
return ready_nodes[1][0], None
|
|
263
|
+
if (
|
|
264
|
+
len(ready_nodes.get(1, [])) == 2
|
|
265
|
+
and isinstance(ready_nodes[1][0], Doctype)
|
|
266
|
+
and isinstance(ready_nodes[1][1], Element)
|
|
267
|
+
):
|
|
268
|
+
return ready_nodes[1][1], ready_nodes[1][0]
|
|
269
|
+
raise BadXMLFormat("xml contains more than one outer element")
|
|
270
|
+
|
|
271
|
+
def write(self, file_name: Path = None, indentation: str = "\t") -> str | None:
|
|
272
|
+
"""Write the XML tree back to the file.
|
|
273
|
+
:param file_name: Path to the XML file, if None, overwrite the original file
|
|
274
|
+
:param indentation: string used for indentation, default is tab character
|
|
275
|
+
:return: XML string if file_name is None, else None
|
|
276
|
+
:raises:
|
|
277
|
+
ValueError: if file name is not specified
|
|
278
|
+
TypeError: if file_name is not a pathlib.Path object
|
|
279
|
+
FileNotFoundError: if file_name does not exist
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
if file_name:
|
|
283
|
+
self._file_name = file_name
|
|
284
|
+
if not self._file_name:
|
|
285
|
+
raise ValueError("File name is not specified")
|
|
286
|
+
|
|
287
|
+
with open(self._file_name, "w") as file:
|
|
288
|
+
if self._declaration:
|
|
289
|
+
file.write(f"<?xml {self._declaration}?>\n")
|
|
290
|
+
file.write(self.to_string(indentation))
|
|
291
|
+
|
|
292
|
+
def to_string(self, indentation: str = "\t") -> str:
|
|
293
|
+
"""
|
|
294
|
+
Convert the XML tree to a string.
|
|
295
|
+
:param indentation: string used for indentation, default is tab character
|
|
296
|
+
:return: XML string
|
|
297
|
+
"""
|
|
298
|
+
result = self._doctype.to_string(indentation) if self._doctype else ""
|
|
299
|
+
return result + self._tree.to_string(indentation)
|
|
300
|
+
|
|
301
|
+
def find(
|
|
302
|
+
self,
|
|
303
|
+
name: str = "",
|
|
304
|
+
only_one: bool = True,
|
|
305
|
+
with_content: str = None,
|
|
306
|
+
) -> Element | list[Element] | None:
|
|
307
|
+
"""
|
|
308
|
+
Find element(s) by name or content or both
|
|
309
|
+
:param name: name of the element to find, can be nested using |, e.g. "parent|child|subchild"
|
|
310
|
+
:param only_one: stop at first find or return all found elements
|
|
311
|
+
:param with_content: filter by content
|
|
312
|
+
:return: the elements found,
|
|
313
|
+
if found, return the elements that match the last name in the path,
|
|
314
|
+
if not found, return None if only_one is True, else return empty list
|
|
315
|
+
:raises:
|
|
316
|
+
ValueError: if neither name nor with_content is provided
|
|
317
|
+
|
|
318
|
+
"""
|
|
319
|
+
if not name and with_content is None:
|
|
320
|
+
raise ValueError("At least one search criteria must be provided")
|
|
321
|
+
return self._tree.find(name, only_one, with_content)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: smartXML
|
|
3
|
+
Version: 1.0.8
|
|
4
|
+
Summary: smartXML package enables you to read, search, manipulate, and write XML files with ease
|
|
5
|
+
Author-email: Dudu Arbel <duduarbel@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Changelog, https://github.com/duduarbel/smartXML/blob/main/changelog.md
|
|
8
|
+
Keywords: python,example
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: requests>=2.31
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
21
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff>=0.3; extra == "dev"
|
|
23
|
+
|
|
24
|
+
# smartXML
|
|
25
|
+
|
|
26
|
+
The **smartXML** package enables you to read, search, manipulate, and write XML files with ease.
|
|
27
|
+
|
|
28
|
+
The API is designed to be simple, but it will be enhanced according to usage and requests.
|
|
29
|
+
The package includes a `SmartXML` representing the XML file, and `ElementBase` representing each element in the XML tree
|
|
30
|
+
### SmartXML:
|
|
31
|
+
- properties:
|
|
32
|
+
- `root`: the root element of the XML file
|
|
33
|
+
- `declaration`: the XML declaration (e.g., `<?xml version="1.0" encoding="UTF-8"?>`)
|
|
34
|
+
- methods:
|
|
35
|
+
- `read`: reads the XML file from the root element
|
|
36
|
+
- `write`: writes the XML to a file
|
|
37
|
+
- `find`: finds elements from the root element
|
|
38
|
+
|
|
39
|
+
### ElementBase: (base class for Element, Comment, TextOnlyComment, CData, and Doctype)
|
|
40
|
+
- properties:
|
|
41
|
+
- `name`: the name of the element
|
|
42
|
+
- `parent`: the parent element
|
|
43
|
+
- methods:
|
|
44
|
+
- `find`: finds elements from the current element
|
|
45
|
+
- `remove`: removes the current element from its parent
|
|
46
|
+
- `comment_out`: comments out the current element
|
|
47
|
+
- `add_before`: adds an element before the current element
|
|
48
|
+
- `add_after`: adds an element after the current element
|
|
49
|
+
- `add_as_son_of`: adds an element as a son of the current element
|
|
50
|
+
- `set_as_parent_of`: sets the current element as the parent of another element
|
|
51
|
+
- `to_string`: converts the current element to a string
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### Usage Example
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from pathlib import Path
|
|
58
|
+
from smartXML.xmltree import SmartXML, TextOnlyComment
|
|
59
|
+
|
|
60
|
+
input_file = Path('./example.xml')
|
|
61
|
+
xml = SmartXML(input_file)
|
|
62
|
+
|
|
63
|
+
firstName = xml.find('students|student|firstName', with_content='Bob')
|
|
64
|
+
bob = firstName.parent
|
|
65
|
+
bob.comment_out()
|
|
66
|
+
header = TextOnlyComment('Bob is out')
|
|
67
|
+
header.add_before(bob)
|
|
68
|
+
|
|
69
|
+
xml.write()
|
|
70
|
+
```
|
|
71
|
+
result (example.xml):
|
|
72
|
+
```xml
|
|
73
|
+
<students>
|
|
74
|
+
<!-- Bob is out -->
|
|
75
|
+
<!--
|
|
76
|
+
<student id="S002">
|
|
77
|
+
<firstName>Bob</firstName>
|
|
78
|
+
<lastName>Levi</lastName>
|
|
79
|
+
</student>
|
|
80
|
+
-->
|
|
81
|
+
</students>
|
|
82
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
smartXML/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
smartXML/_elements_utils.py,sha256=nYGBbQVr5OG6blEbwSrBMXzM71Qtj5fyNGxC1xQVRRY,2372
|
|
3
|
+
smartXML/element.py,sha256=8ZmBZ-UDC5FiGcbpPiJv3_9qqHgNfI4PpswFYv-C_9o,8455
|
|
4
|
+
smartXML/xmltree.py,sha256=-oDroh1wC457fLkzIc_yZLDILlbf8yfhu5OWohACkok,12320
|
|
5
|
+
smartxml-1.0.8.dist-info/METADATA,sha256=xF9GoGkboLeoyIRXAypne-Aj1Qmt4O6pLecP0CnCfus,2816
|
|
6
|
+
smartxml-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
smartxml-1.0.8.dist-info/top_level.txt,sha256=r3cP_uWcWF46t1J8P4-T4dJJEMY7El0Mdkuj1p0Ialk,9
|
|
8
|
+
smartxml-1.0.8.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
smartXML
|