osbot-utils 2.67.0__py3-none-any.whl → 2.69.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- osbot_utils/helpers/html/Html_Dict__To__Html.py +78 -63
- osbot_utils/helpers/html/Html_Dict__To__Html_Document.py +26 -14
- osbot_utils/helpers/html/Html_Dict__To__Html_Tags.py +51 -42
- osbot_utils/helpers/html/Html_Document__To__Html_Dict.py +44 -0
- osbot_utils/helpers/html/Html__To__Html_Dict.py +4 -1
- osbot_utils/helpers/html/schemas/Schema__Html_Node.py +6 -4
- osbot_utils/helpers/html/schemas/Schema__Html_Node__Data.py +1 -0
- osbot_utils/helpers/llms/builders/LLM_Request__Builder.py +9 -8
- osbot_utils/helpers/llms/schemas/Safe_Str__LLM__Model_Name.py +10 -0
- osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Data.py +2 -1
- osbot_utils/type_safe/steps/Type_Safe__Step__Class_Kwargs.py +0 -2
- osbot_utils/type_safe/steps/Type_Safe__Step__From_Json.py +5 -1
- osbot_utils/type_safe/steps/Type_Safe__Step__Init.py +6 -1
- osbot_utils/version +1 -1
- {osbot_utils-2.67.0.dist-info → osbot_utils-2.69.0.dist-info}/METADATA +2 -2
- {osbot_utils-2.67.0.dist-info → osbot_utils-2.69.0.dist-info}/RECORD +18 -16
- {osbot_utils-2.67.0.dist-info → osbot_utils-2.69.0.dist-info}/LICENSE +0 -0
- {osbot_utils-2.67.0.dist-info → osbot_utils-2.69.0.dist-info}/WHEEL +0 -0
@@ -1,97 +1,112 @@
|
|
1
1
|
from osbot_utils.helpers.html.Html__To__Html_Dict import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES
|
2
2
|
|
3
|
-
HTML_SELF_CLOSING_TAGS
|
3
|
+
HTML_SELF_CLOSING_TAGS = {'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta',
|
4
|
+
'param', 'source', 'track', 'wbr'}
|
4
5
|
HTML_DEFAULT_DOCTYPE_VALUE = "<!DOCTYPE html>\n"
|
5
6
|
|
7
|
+
|
6
8
|
class Html_Dict__To__Html:
|
7
|
-
def __init__(self, root,
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
self.
|
9
|
+
def __init__(self, root, # Root element dictionary
|
10
|
+
include_doctype=True, # Whether to include DOCTYPE
|
11
|
+
doctype=HTML_DEFAULT_DOCTYPE_VALUE # DOCTYPE string to use
|
12
|
+
):
|
13
|
+
self.self_closing_tags = HTML_SELF_CLOSING_TAGS
|
14
|
+
self.root = root
|
15
|
+
self.include_doctype = include_doctype
|
16
|
+
self.doctype = doctype
|
17
|
+
|
18
|
+
def convert(self): # Convert dictionary to HTML string
|
19
|
+
if not self.root:
|
20
|
+
return ""
|
12
21
|
|
13
|
-
def convert(self):
|
14
22
|
html = self.convert_element(self.root, 0)
|
15
|
-
|
23
|
+
|
24
|
+
if self.include_doctype and self.root.get("tag") == "html": # Only add DOCTYPE for html root
|
16
25
|
return self.doctype + html
|
17
26
|
return html
|
18
27
|
|
19
|
-
def convert_attrs(self, attrs):
|
20
|
-
|
21
|
-
|
22
|
-
|
28
|
+
def convert_attrs(self, attrs): # Convert attributes dict to HTML string
|
29
|
+
if not attrs:
|
30
|
+
return ""
|
31
|
+
|
32
|
+
attrs_str_parts = []
|
33
|
+
|
34
|
+
for key, value in attrs.items(): # Preserve original order
|
35
|
+
if value is None:
|
23
36
|
attr_str = f'{key}'
|
24
|
-
elif
|
25
|
-
|
26
|
-
|
37
|
+
elif value == "": # Handle empty string values
|
38
|
+
attr_str = f'{key}=""'
|
39
|
+
elif '"' in str(value) and "'" in str(value): # Both quotes present
|
40
|
+
escaped_value = str(value).replace('"', '"')
|
41
|
+
attr_str = f'{key}="{escaped_value}"'
|
42
|
+
elif '"' in str(value): # Use single quotes if double quotes present
|
43
|
+
attr_str = f"{key}='{value}'"
|
27
44
|
else:
|
28
|
-
attr_str = f'{key}="{value}"'
|
45
|
+
attr_str = f'{key}="{value}"'
|
29
46
|
attrs_str_parts.append(attr_str)
|
30
47
|
|
31
|
-
attrs_str = ' '.join(attrs_str_parts)
|
48
|
+
attrs_str = ' '.join(attrs_str_parts)
|
49
|
+
return f" {attrs_str}" if attrs_str else ""
|
32
50
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
def convert_element(self, element, indent_level):
|
38
|
-
"""Recursively converts a dictionary to an HTML string with indentation."""
|
39
|
-
# Check if this is a text node
|
51
|
+
def convert_element(self, element, # Element dictionary to convert
|
52
|
+
indent_level # Current indentation level
|
53
|
+
):
|
40
54
|
if element.get("type") == STRING__SCHEMA_TEXT:
|
41
|
-
return element.get("data", "")
|
55
|
+
return element.get("data", "")
|
42
56
|
|
43
|
-
tag
|
57
|
+
tag = element.get("tag")
|
44
58
|
attrs = element.get("attrs", {})
|
45
59
|
nodes = element.get(STRING__SCHEMA_NODES, [])
|
46
60
|
|
47
|
-
|
48
|
-
|
61
|
+
if not tag: # Safety check
|
62
|
+
return ""
|
63
|
+
|
64
|
+
attrs_str = self.convert_attrs(attrs)
|
65
|
+
indent = " " * indent_level
|
49
66
|
|
50
|
-
#
|
51
|
-
if tag in self.self_closing_tags
|
67
|
+
# Special handling for void elements
|
68
|
+
if tag in self.self_closing_tags:
|
69
|
+
if nodes: # Void elements shouldn't have content
|
70
|
+
print(f"Warning: void element <{tag}> has child nodes")
|
52
71
|
return f"{indent}<{tag}{attrs_str} />\n"
|
53
72
|
|
54
73
|
# Start building the HTML
|
55
|
-
html = f"{indent}<{tag}{attrs_str}>"
|
74
|
+
html = f"{indent}<{tag}{attrs_str}>"
|
56
75
|
|
57
|
-
#
|
58
|
-
|
59
|
-
|
76
|
+
# Analyze content type
|
77
|
+
has_text_nodes = any(node.get("type") == STRING__SCHEMA_TEXT for node in nodes)
|
78
|
+
has_element_nodes = any(node.get("type") != STRING__SCHEMA_TEXT for node in nodes)
|
60
79
|
|
61
|
-
#
|
62
|
-
if
|
80
|
+
# Determine formatting strategy
|
81
|
+
if not nodes: # Empty element
|
82
|
+
html += f"</{tag}>\n"
|
83
|
+
elif has_element_nodes and not has_text_nodes: # Only element children
|
63
84
|
html += "\n"
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
85
|
+
html += self.convert_children(nodes, indent_level + 1)
|
86
|
+
html += f"{indent}</{tag}>\n"
|
87
|
+
elif has_text_nodes and not has_element_nodes: # Only text content
|
88
|
+
html += self.convert_children(nodes, indent_level + 1)
|
89
|
+
html += f"</{tag}>\n"
|
90
|
+
else: # Mixed content
|
91
|
+
# For mixed content, don't add extra formatting
|
71
92
|
for node in nodes:
|
72
93
|
if node.get("type") == STRING__SCHEMA_TEXT:
|
73
|
-
# Text node - directly append content
|
74
94
|
html += node.get("data", "")
|
75
|
-
previous_was_element = False
|
76
95
|
else:
|
77
|
-
#
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
html += self.convert_element(node, indent_level + 1)
|
84
|
-
previous_was_element = True
|
85
|
-
|
86
|
-
# Handle closing tag based on content
|
87
|
-
if element_nodes and not text_nodes:
|
88
|
-
# If only element nodes, add indented closing tag
|
89
|
-
html += f"{indent}</{tag}>\n"
|
90
|
-
elif nodes: # Any type of nodes
|
91
|
-
# If mixed content or only text, add closing tag without indentation
|
96
|
+
# Recursively convert child elements with no indentation
|
97
|
+
child_html = self.convert_element(node, 0)
|
98
|
+
# Remove the trailing newline from child elements in mixed content
|
99
|
+
if child_html.endswith('\n'):
|
100
|
+
child_html = child_html[:-1]
|
101
|
+
html += child_html
|
92
102
|
html += f"</{tag}>\n"
|
93
|
-
else:
|
94
|
-
# Empty tag, replace with self-contained format
|
95
|
-
html = f"{indent}<{tag}{attrs_str}></{tag}>\n"
|
96
103
|
|
104
|
+
return html
|
105
|
+
|
106
|
+
def convert_children(self, nodes, # List of child nodes
|
107
|
+
indent_level # Current indentation level
|
108
|
+
):
|
109
|
+
html = ""
|
110
|
+
for node in nodes:
|
111
|
+
html += self.convert_element(node, indent_level)
|
97
112
|
return html
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict,
|
1
|
+
from typing import Dict, Any
|
2
2
|
from osbot_utils.helpers.html.Html__To__Html_Dict import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES
|
3
3
|
from osbot_utils.helpers.html.schemas.Schema__Html_Document import Schema__Html_Document
|
4
4
|
from osbot_utils.helpers.html.schemas.Schema__Html_Node import Schema__Html_Node
|
@@ -6,7 +6,6 @@ from osbot_utils.helpers.html.schemas.Schema__Html_Node__Data import Schem
|
|
6
6
|
from osbot_utils.helpers.html.schemas.Schema__Html_Node__Data__Type import Schema__Html_Node__Data__Type
|
7
7
|
from osbot_utils.type_safe.Type_Safe import Type_Safe
|
8
8
|
|
9
|
-
|
10
9
|
class Html_Dict__To__Html_Document(Type_Safe):
|
11
10
|
html__dict : dict = None
|
12
11
|
html__document: Schema__Html_Document = None
|
@@ -19,19 +18,32 @@ class Html_Dict__To__Html_Document(Type_Safe):
|
|
19
18
|
if not target or not isinstance(target, dict):
|
20
19
|
raise ValueError("Invalid HTML dictionary structure")
|
21
20
|
|
22
|
-
root_node = self.parse_node(target)
|
21
|
+
root_node = self.parse_node(target, position=-1) # Root has position -1
|
23
22
|
return Schema__Html_Document(root_node=root_node)
|
24
23
|
|
25
|
-
def parse_node(self, target: Dict[str, Any]) ->
|
24
|
+
def parse_node(self, target: Dict[str, Any], position: int) -> Schema__Html_Node: # Parse a node and separate child nodes from text nodes with positions
|
25
|
+
|
26
|
+
if target.get('type') == STRING__SCHEMA_TEXT: # This shouldn't happen at this level since we're parsing element nodes
|
27
|
+
raise ValueError("Unexpected text node at element level")
|
28
|
+
|
29
|
+
|
30
|
+
child_nodes = [] # Create lists for child nodes and text nodes
|
31
|
+
text_nodes = []
|
32
|
+
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
nodes_list = target.get(STRING__SCHEMA_NODES, []) # Process all nodes and assign positions
|
35
|
+
for idx, node in enumerate(nodes_list):
|
36
|
+
if node.get('type') == STRING__SCHEMA_TEXT: # Create text node with position
|
37
|
+
text_node = Schema__Html_Node__Data(data = node.get('data', '') ,
|
38
|
+
type = Schema__Html_Node__Data__Type.TEXT,
|
39
|
+
position = idx )
|
40
|
+
text_nodes.append(text_node)
|
41
|
+
else:
|
42
|
+
child_node = self.parse_node(node, position=idx) # Create element node with position
|
43
|
+
child_nodes.append(child_node)
|
34
44
|
|
35
|
-
|
36
|
-
|
37
|
-
|
45
|
+
return Schema__Html_Node(attrs = target.get('attrs', {}), # Create the element node with separated lists
|
46
|
+
child_nodes = child_nodes ,
|
47
|
+
text_nodes = text_nodes ,
|
48
|
+
tag = target.get('tag' , ''),
|
49
|
+
position = position )
|
@@ -19,19 +19,17 @@ class Html_Dict__To__Html_Tags:
|
|
19
19
|
def convert_element(self, element):
|
20
20
|
tag_name = element.get("tag")
|
21
21
|
|
22
|
-
|
23
|
-
if tag_name == 'html':
|
22
|
+
|
23
|
+
if tag_name == 'html': # Handle special tag types with dedicated conversion methods
|
24
24
|
return self.convert_to__tag__html(element)
|
25
25
|
elif tag_name == 'head':
|
26
|
-
return self.convert_to__tag__head(element, 0)
|
26
|
+
return self.convert_to__tag__head(element, 0) # Default indent 0
|
27
27
|
elif tag_name == 'link':
|
28
28
|
return self.convert_to__tag__link(element)
|
29
|
-
else:
|
30
|
-
# Default case: convert to a generic Tag__Base
|
29
|
+
else: # Default case: convert to a generic Tag__Base
|
31
30
|
return self.convert_to__tag(Tag__Base, element, 0) # Default indent 0
|
32
31
|
|
33
|
-
def collect_inner_text(self, element):
|
34
|
-
"""Extract all text from an element's text node nodes."""
|
32
|
+
def collect_inner_text(self, element): # Extract all text from an element's text node nodes.
|
35
33
|
inner_text = ""
|
36
34
|
for node in element.get(STRING__SCHEMA_NODES, []):
|
37
35
|
if node.get("type") == STRING__SCHEMA_TEXT:
|
@@ -39,8 +37,7 @@ class Html_Dict__To__Html_Tags:
|
|
39
37
|
return inner_text
|
40
38
|
|
41
39
|
def convert_to__tag(self, target_tag, element, indent):
|
42
|
-
if element.get("type") == STRING__SCHEMA_TEXT:
|
43
|
-
# Handle text nodes directly
|
40
|
+
if element.get("type") == STRING__SCHEMA_TEXT: # Handle text nodes directly
|
44
41
|
return Tag__Text(element.get("data", ""))
|
45
42
|
|
46
43
|
tag_name = element.get("tag")
|
@@ -49,51 +46,68 @@ class Html_Dict__To__Html_Tags:
|
|
49
46
|
end_tag = tag_name not in HTML_SELF_CLOSING_TAGS
|
50
47
|
tag_indent = indent + 1
|
51
48
|
|
52
|
-
|
53
|
-
|
49
|
+
node_positions = [] # Create node lists with position tracking
|
50
|
+
|
51
|
+
for idx, node in enumerate(nodes): # Process all nodes and track their positions
|
52
|
+
if node.get("type") == STRING__SCHEMA_TEXT:
|
53
|
+
text_obj = Tag__Text(node.get("data", "")) # Create text node with position info
|
54
|
+
node_positions.append((idx, 'text', text_obj))
|
55
|
+
else:
|
56
|
+
child_tag = self.convert_to__tag(Tag__Base, node, tag_indent) # Create element node
|
57
|
+
node_positions.append((idx, 'element', child_tag))
|
58
|
+
|
59
|
+
node_positions.sort(key=lambda x: x[0]) # Sort by position (though they should already be in order)
|
54
60
|
|
55
|
-
tag_kwargs = dict(
|
56
|
-
tag_name = tag_name,
|
57
|
-
attributes = attrs,
|
58
|
-
end_tag = end_tag,
|
59
|
-
indent = tag_indent,
|
60
|
-
inner_html = inner_html
|
61
|
-
)
|
62
61
|
|
63
|
-
|
62
|
+
inner_html = "" # Collect consecutive text nodes at the beginning for inner_html
|
63
|
+
first_element_idx = None
|
64
64
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
65
|
+
for idx, (pos, node_type, node_obj) in enumerate(node_positions):
|
66
|
+
if node_type == 'element':
|
67
|
+
first_element_idx = idx
|
68
|
+
break
|
69
|
+
else:
|
70
|
+
inner_html += node_obj.data
|
71
|
+
|
72
|
+
|
73
|
+
if first_element_idx is None: # If all nodes are text, use them all as inner_html
|
74
|
+
inner_html = "".join(n[2].data for n in node_positions if n[1] == 'text')
|
75
|
+
elements = []
|
76
|
+
else:
|
77
|
+
inner_html = "".join(n[2].data for i, n in enumerate(node_positions) # Only use text before first element as inner_html
|
78
|
+
if n[1] == 'text' and i < first_element_idx)
|
79
|
+
elements = [n[2] for i, n in enumerate(node_positions) if i >= first_element_idx] # All nodes go into elements (including remaining text nodes)
|
80
|
+
|
81
|
+
tag_kwargs = dict(tag_name = tag_name ,
|
82
|
+
attributes = attrs ,
|
83
|
+
end_tag = end_tag ,
|
84
|
+
indent = tag_indent,
|
85
|
+
inner_html = inner_html)
|
86
|
+
tag = target_tag(**tag_kwargs)
|
87
|
+
tag.elements = elements
|
70
88
|
|
71
89
|
return tag
|
72
90
|
|
73
91
|
def convert_to__tag__head(self, element, indent):
|
74
|
-
attrs
|
75
|
-
nodes
|
76
|
-
|
92
|
+
attrs = element.get("attrs", {})
|
93
|
+
nodes = element.get(STRING__SCHEMA_NODES, [])
|
77
94
|
head_indent = indent + 1
|
78
|
-
tag_head
|
95
|
+
tag_head = Tag__Head(indent=head_indent, **attrs)
|
79
96
|
|
80
97
|
for node in nodes:
|
81
98
|
tag_name = node.get("tag")
|
82
99
|
|
83
100
|
if tag_name == 'title':
|
84
|
-
# Extract title text from text node nodes
|
85
|
-
tag_head.title = self.collect_inner_text(node)
|
101
|
+
tag_head.title = self.collect_inner_text(node) # Extract title text from text node nodes
|
86
102
|
elif tag_name == 'link':
|
87
103
|
tag_head.links.append(self.convert_to__tag__link(node))
|
88
104
|
elif tag_name == 'meta':
|
89
105
|
tag_head.elements.append(self.convert_to__tag(Tag__Base, node, head_indent))
|
90
106
|
elif tag_name == 'style':
|
91
|
-
# For style tags, collect the CSS content from text nodes
|
92
|
-
style_element = self.convert_to__tag(Tag__Base, node, head_indent)
|
107
|
+
style_element = self.convert_to__tag(Tag__Base, node, head_indent) # For style tags, collect the CSS content from text nodes
|
93
108
|
tag_head.elements.append(style_element)
|
94
109
|
else:
|
95
|
-
# Handle any other head elements
|
96
|
-
tag_head.elements.append(self.convert_to__tag(Tag__Base, node, head_indent))
|
110
|
+
tag_head.elements.append(self.convert_to__tag(Tag__Base, node, head_indent)) # Handle any other head elements
|
97
111
|
|
98
112
|
return tag_head
|
99
113
|
|
@@ -104,8 +118,7 @@ class Html_Dict__To__Html_Tags:
|
|
104
118
|
|
105
119
|
tag_html = Tag__Html(attributes=attrs, lang=lang, doc_type=False)
|
106
120
|
|
107
|
-
# Initialize head and body if not found
|
108
|
-
head_found = False
|
121
|
+
head_found = False # Initialize head and body if not found
|
109
122
|
body_found = False
|
110
123
|
|
111
124
|
for node in nodes:
|
@@ -118,16 +131,12 @@ class Html_Dict__To__Html_Tags:
|
|
118
131
|
tag_html.body = self.convert_to__tag(Tag__Body, node, tag_html.indent)
|
119
132
|
body_found = True
|
120
133
|
else:
|
121
|
-
# Log unexpected child elements of html
|
122
|
-
print(f'Unexpected child of html tag: {tag_name}')
|
134
|
+
print(f'Unexpected child of html tag: {tag_name}') # Log unexpected child elements of html
|
123
135
|
|
124
|
-
# Handle missing head or body (required for valid HTML structure)
|
125
|
-
if not head_found:
|
126
|
-
#print("Warning: No head element found, creating empty one")
|
136
|
+
if not head_found: # Handle missing head or body (required for valid HTML structure)
|
127
137
|
tag_html.head = Tag__Head(indent=tag_html.indent + 1)
|
128
138
|
|
129
139
|
if not body_found:
|
130
|
-
#print("Warning: No body element found, creating empty one")
|
131
140
|
tag_html.body = Tag__Body(indent=tag_html.indent + 1)
|
132
141
|
|
133
142
|
return tag_html
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from typing import Dict, Any, List, Union
|
2
|
+
from osbot_utils.helpers.html.Html__To__Html_Dict import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES, STRING__SCHEMA_TAG, STRING__SCHEMA_ATTRS
|
3
|
+
from osbot_utils.helpers.html.schemas.Schema__Html_Document import Schema__Html_Document
|
4
|
+
from osbot_utils.helpers.html.schemas.Schema__Html_Node import Schema__Html_Node
|
5
|
+
from osbot_utils.type_safe.Type_Safe import Type_Safe
|
6
|
+
|
7
|
+
|
8
|
+
class Html_Document__To__Html_Dict(Type_Safe):
|
9
|
+
html__document : Schema__Html_Document = None
|
10
|
+
html__dict : dict = None
|
11
|
+
|
12
|
+
def convert(self) -> dict: # Convert Schema__Html_Document back to html dict format
|
13
|
+
if not self.html__document:
|
14
|
+
raise ValueError("No document to convert")
|
15
|
+
|
16
|
+
self.html__dict = self.node_to_dict(self.html__document.root_node)
|
17
|
+
return self.html__dict
|
18
|
+
|
19
|
+
def node_to_dict(self, node: Schema__Html_Node) -> Dict[str, Any]: # Convert a Schema__Html_Node back to dict format, merging child and text nodes by position
|
20
|
+
|
21
|
+
result = { STRING__SCHEMA_TAG : node.tag, # Create the basic dict structure
|
22
|
+
STRING__SCHEMA_ATTRS : node.attrs,
|
23
|
+
STRING__SCHEMA_NODES : []}
|
24
|
+
|
25
|
+
all_nodes = [] # Merge child_nodes and text_nodes back together based on position
|
26
|
+
|
27
|
+
for child in node.child_nodes: # Add child nodes with their positions
|
28
|
+
all_nodes.append((child.position, 'child', child))
|
29
|
+
|
30
|
+
for text in node.text_nodes: # Add text nodes with their positions
|
31
|
+
all_nodes.append((text.position, 'text', text))
|
32
|
+
|
33
|
+
all_nodes.sort(key=lambda x: x[0]) # Sort by position
|
34
|
+
|
35
|
+
for position, node_type, node_obj in all_nodes: # Build the nodes list in the correct order
|
36
|
+
if node_type == 'text':
|
37
|
+
text_dict = { 'type': STRING__SCHEMA_TEXT, # Convert text node to dict
|
38
|
+
'data': node_obj.data }
|
39
|
+
result[STRING__SCHEMA_NODES].append(text_dict)
|
40
|
+
else:
|
41
|
+
child_dict = self.node_to_dict(node_obj) # Recursively convert child node
|
42
|
+
result[STRING__SCHEMA_NODES].append(child_dict)
|
43
|
+
|
44
|
+
return result
|
@@ -1,8 +1,11 @@
|
|
1
1
|
from html.parser import HTMLParser
|
2
2
|
|
3
3
|
HTML_SELF_CLOSING_TAGS = {'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'}
|
4
|
-
|
4
|
+
STRING__SCHEMA_ATTRS = 'attrs'
|
5
5
|
STRING__SCHEMA_NODES = 'nodes'
|
6
|
+
STRING__SCHEMA_TEXT = 'TEXT'
|
7
|
+
STRING__SCHEMA_TAG = 'tag'
|
8
|
+
|
6
9
|
STRING__DATA_TEXT = f'{STRING__SCHEMA_TEXT}:'
|
7
10
|
|
8
11
|
|
@@ -1,10 +1,12 @@
|
|
1
|
-
from typing
|
1
|
+
from typing import List, Dict, Optional
|
2
2
|
from osbot_utils.helpers.html.schemas.Schema__Html_Node__Data import Schema__Html_Node__Data
|
3
3
|
from osbot_utils.type_safe.Type_Safe import Type_Safe
|
4
4
|
|
5
5
|
|
6
6
|
class Schema__Html_Node(Type_Safe):
|
7
|
-
attrs
|
8
|
-
|
9
|
-
|
7
|
+
attrs : Dict[str, str ] # HTML attributes (e.g., {'class': 'container'})
|
8
|
+
child_nodes : List['Schema__Html_Node'] # Element nodes only
|
9
|
+
text_nodes : List[Schema__Html_Node__Data] # Text nodes only
|
10
|
+
tag : str # HTML tag name (e.g., 'div', 'meta', 'title')
|
11
|
+
position : int = -1 # Position in parent's nodes list (-1 for root)
|
10
12
|
|
@@ -4,3 +4,4 @@ from osbot_utils.type_safe.Type_Safe import Type_
|
|
4
4
|
class Schema__Html_Node__Data(Type_Safe):
|
5
5
|
data : str # Text content
|
6
6
|
type : Schema__Html_Node__Data__Type = Schema__Html_Node__Data__Type.TEXT # Always 'text' for text nodes
|
7
|
+
position : int # Position in parent's nodes list
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import Dict, Any, Type
|
2
2
|
from osbot_utils.helpers.llms.actions.Type_Safe__Schema_For__LLMs import Type_Safe__Schema_For__LLMs
|
3
|
+
from osbot_utils.helpers.llms.schemas.Safe_Str__LLM__Model_Name import Safe_Str__LLM__Model_Name
|
3
4
|
from osbot_utils.helpers.llms.schemas.Schema__LLM_Request import Schema__LLM_Request
|
4
5
|
from osbot_utils.helpers.llms.schemas.Schema__LLM_Request__Data import Schema__LLM_Request__Data
|
5
6
|
from osbot_utils.helpers.llms.schemas.Schema__LLM_Request__Function_Call import Schema__LLM_Request__Function_Call
|
@@ -40,14 +41,14 @@ class LLM_Request__Builder(Type_Safe):
|
|
40
41
|
self.llm_request_data.function_call = function_call
|
41
42
|
return self
|
42
43
|
|
43
|
-
def set__model (self, model :
|
44
|
-
def set__platform (self, platform: Safe_Str__Text): self.llm_request_data.platform = platform; return self
|
45
|
-
def set__provider (self, provider: Safe_Str__Text): self.llm_request_data.provider = provider; return self
|
46
|
-
def set__model__gpt_4o (self
|
47
|
-
def set__model__gpt_4o_mini (self
|
48
|
-
def set__model__gpt_4_1 (self
|
49
|
-
def set__model__gpt_4_1_mini(self
|
50
|
-
def set__model__gpt_4_1_nano(self
|
44
|
+
def set__model (self, model : Safe_Str__LLM__Model_Name): self.llm_request_data.model = model ; return self
|
45
|
+
def set__platform (self, platform: Safe_Str__Text ): self.llm_request_data.platform = platform; return self
|
46
|
+
def set__provider (self, provider: Safe_Str__Text ): self.llm_request_data.provider = provider; return self
|
47
|
+
def set__model__gpt_4o (self ): return self.set__model('gpt-4o' )
|
48
|
+
def set__model__gpt_4o_mini (self ): return self.set__model('gpt-4o-mini' )
|
49
|
+
def set__model__gpt_4_1 (self ): return self.set__model('gpt-4.1' )
|
50
|
+
def set__model__gpt_4_1_mini(self ): return self.set__model('gpt-4.1-mini')
|
51
|
+
def set__model__gpt_4_1_nano(self ): return self.set__model('gpt-4.1-nano')
|
51
52
|
|
52
53
|
|
53
54
|
@type_safe
|
@@ -0,0 +1,10 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
|
4
|
+
|
5
|
+
TYPE_SAFE_STR__LLM__MODEL_NAME__MAX_LENGTH = 256
|
6
|
+
TYPE_SAFE_STR__LLM__MODEL_NAME__REGEX = r'[^a-zA-Z0-9/_\-.:]'
|
7
|
+
|
8
|
+
class Safe_Str__LLM__Model_Name(Safe_Str):
|
9
|
+
regex = re.compile(TYPE_SAFE_STR__LLM__MODEL_NAME__REGEX)
|
10
|
+
max_length = TYPE_SAFE_STR__LLM__MODEL_NAME__MAX_LENGTH
|
@@ -1,11 +1,12 @@
|
|
1
1
|
from typing import List, Optional
|
2
|
+
from osbot_utils.helpers.llms.schemas.Safe_Str__LLM__Model_Name import Safe_Str__LLM__Model_Name
|
2
3
|
from osbot_utils.helpers.llms.schemas.Schema__LLM_Request__Function_Call import Schema__LLM_Request__Function_Call
|
3
4
|
from osbot_utils.helpers.llms.schemas.Schema__LLM_Request__Message__Content import Schema__LLM_Request__Message__Content
|
4
5
|
from osbot_utils.helpers.safe_str.Safe_Str__Text import Safe_Str__Text
|
5
6
|
from osbot_utils.type_safe.Type_Safe import Type_Safe
|
6
7
|
|
7
8
|
class Schema__LLM_Request__Data(Type_Safe): # Schema for LLM API request data
|
8
|
-
model :
|
9
|
+
model : Safe_Str__LLM__Model_Name # LLM model identifier
|
9
10
|
platform : Safe_Str__Text
|
10
11
|
provider : Safe_Str__Text
|
11
12
|
messages : List [Schema__LLM_Request__Message__Content] # Message content entries
|
@@ -6,8 +6,6 @@ from osbot_utils.type_safe.shared.Type_Safe__Shared__Variables import IMMUTABL
|
|
6
6
|
from osbot_utils.type_safe.shared.Type_Safe__Validation import type_safe_validation
|
7
7
|
from osbot_utils.type_safe.steps.Type_Safe__Step__Default_Value import type_safe_step_default_value
|
8
8
|
|
9
|
-
|
10
|
-
|
11
9
|
class Type_Safe__Step__Class_Kwargs: # Handles class-level keyword arguments processing
|
12
10
|
|
13
11
|
type_safe_cache : Type_Safe__Cache # Cache component reference
|
@@ -26,7 +26,7 @@ if sys.version_info < (3, 8): # pragma
|
|
26
26
|
else:
|
27
27
|
return ()
|
28
28
|
else:
|
29
|
-
from typing import get_args, Any
|
29
|
+
from typing import get_args, Any, ForwardRef
|
30
30
|
|
31
31
|
|
32
32
|
class Type_Safe__Step__From_Json:
|
@@ -83,6 +83,10 @@ class Type_Safe__Step__From_Json:
|
|
83
83
|
expected_type = get_args(attribute_annotation)[0] # get the first arg (which is the type)
|
84
84
|
type_safe_list = Type_Safe__List(expected_type) # create a new instance of Type_Safe__List
|
85
85
|
if value:
|
86
|
+
if isinstance(expected_type, ForwardRef): # Check if it's a self-reference
|
87
|
+
forward_name = expected_type.__forward_arg__
|
88
|
+
if forward_name == _self.__class__.__name__:
|
89
|
+
expected_type = _self.__class__
|
86
90
|
for item in value: # next we need to convert all items (to make sure they all match the type)
|
87
91
|
if type(item) is dict:
|
88
92
|
new_item = expected_type(**item) # create new object
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from enum import EnumMeta
|
2
|
+
from typing import ForwardRef
|
2
3
|
from osbot_utils.type_safe.shared.Type_Safe__Annotations import type_safe_annotations
|
3
4
|
from osbot_utils.type_safe.steps.Type_Safe__Step__Default_Value import type_safe_step_default_value, get_args
|
4
5
|
|
@@ -27,7 +28,7 @@ class Type_Safe__Step__Init:
|
|
27
28
|
raise ValueError(f"{__self.__class__.__name__} has no attribute '{key}' and cannot be assigned the value '{value}'. "
|
28
29
|
f"Use {__self.__class__.__name__}.__default_kwargs__() see what attributes are available") from None
|
29
30
|
|
30
|
-
def convert_value_to_type_safe_objects(self, __self, key, value):
|
31
|
+
def convert_value_to_type_safe_objects(self, __self, key, value): # todo: see if we should use _self here (like in Type_Safe__Step__From_Json, or vice versa)
|
31
32
|
annotation = type_safe_annotations.obj_attribute_annotation(__self, key)
|
32
33
|
if annotation:
|
33
34
|
if isinstance(annotation, EnumMeta) and type(value) is str:
|
@@ -47,6 +48,10 @@ class Type_Safe__Step__Init:
|
|
47
48
|
elif origin is list and isinstance(value, list):
|
48
49
|
from osbot_utils.type_safe.Type_Safe__List import Type_Safe__List
|
49
50
|
item_type = get_args(annotation)[0]
|
51
|
+
if isinstance(item_type, ForwardRef):
|
52
|
+
forward_name = item_type.__forward_arg__
|
53
|
+
if forward_name == __self.__class__.__name__:
|
54
|
+
item_type = __self.__class__
|
50
55
|
type_safe_list = Type_Safe__List(expected_type=item_type)
|
51
56
|
for item in value:
|
52
57
|
type_safe_list.append(item)
|
osbot_utils/version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
v2.
|
1
|
+
v2.69.0
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: osbot_utils
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.69.0
|
4
4
|
Summary: OWASP Security Bot - Utils
|
5
5
|
License: MIT
|
6
6
|
Author: Dinis Cruz
|
@@ -23,7 +23,7 @@ Description-Content-Type: text/markdown
|
|
23
23
|
|
24
24
|
Powerful Python util methods and classes that simplify common apis and tasks.
|
25
25
|
|
26
|
-

|
27
27
|
[](https://codecov.io/gh/owasp-sbot/OSBot-Utils)
|
28
28
|
|
29
29
|
|
@@ -181,10 +181,11 @@ osbot_utils/helpers/generators/Model__Generator_State.py,sha256=cS9tigdBIAvexip2
|
|
181
181
|
osbot_utils/helpers/generators/Model__Generator_Target.py,sha256=Sh_1J0_RYOBYKqg19DP4_e2MPx6CHCzydkA56F8SjRs,816
|
182
182
|
osbot_utils/helpers/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
183
183
|
osbot_utils/helpers/html/CSS_Dict__To__Css.py,sha256=ZDIVfFCYO4UQsRprbx9vCosFqrnHizZUH-JgP5oZcuA,1102
|
184
|
-
osbot_utils/helpers/html/Html_Dict__To__Html.py,sha256=
|
185
|
-
osbot_utils/helpers/html/Html_Dict__To__Html_Document.py,sha256=
|
186
|
-
osbot_utils/helpers/html/Html_Dict__To__Html_Tags.py,sha256=
|
187
|
-
osbot_utils/helpers/html/
|
184
|
+
osbot_utils/helpers/html/Html_Dict__To__Html.py,sha256=Xa5J1elZBK1a5ae75ckenTtNOZs1XX87s1qmu73JatE,4547
|
185
|
+
osbot_utils/helpers/html/Html_Dict__To__Html_Document.py,sha256=mUi3U5zJwFC7iXQ-kT8uWhMkkK9ZIUKHukdSOuIBCfc,3132
|
186
|
+
osbot_utils/helpers/html/Html_Dict__To__Html_Tags.py,sha256=46-5aeCnx5LgegKpN227xtx21WcQOB_7gN9NCcxYoA0,7019
|
187
|
+
osbot_utils/helpers/html/Html_Document__To__Html_Dict.py,sha256=D4tiZaPpyUXKsNURejvBXNW3BFX3dQAm65bLBkD0FhY,2509
|
188
|
+
osbot_utils/helpers/html/Html__To__Html_Dict.py,sha256=GJ4NZZ5lMFr2u0hfTpqPOMXmKLyinjCeqypg3JjME8c,4405
|
188
189
|
osbot_utils/helpers/html/Html__To__Html_Document.py,sha256=xVbIT3TaxZPt4cz3lO4WvSru7FyWI1p36qdXp6HGC8o,886
|
189
190
|
osbot_utils/helpers/html/Html__To__Html_Tag.py,sha256=Qz6he08DwfYkYM_krt_FCVtEWgw6_9bs5j5CoC1FCQQ,566
|
190
191
|
osbot_utils/helpers/html/Tag__Base.py,sha256=geD7TpA3lHrSiaoLsViH2rcjrqZkMxNpDOhLx3HYbSk,4470
|
@@ -199,15 +200,15 @@ osbot_utils/helpers/html/Tag__Style.py,sha256=wRZ8DN1HVCtrnL4Flz82oByqJOy8Th40Eh
|
|
199
200
|
osbot_utils/helpers/html/Tag__Text.py,sha256=Pqf96QGwX9wdGqlwBvWYHWz9Qqi-oZrkgEzHQm6LzdY,241
|
200
201
|
osbot_utils/helpers/html/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
201
202
|
osbot_utils/helpers/html/schemas/Schema__Html_Document.py,sha256=227IP9kfQ7FH7bhd-LsPsppPo-mhwC58BcGYWsy0WwQ,381
|
202
|
-
osbot_utils/helpers/html/schemas/Schema__Html_Node.py,sha256=
|
203
|
-
osbot_utils/helpers/html/schemas/Schema__Html_Node__Data.py,sha256=
|
203
|
+
osbot_utils/helpers/html/schemas/Schema__Html_Node.py,sha256=iMGClwil99_KoeLuft53DQLu9Jw-PCqRjBXpfInd7rM,794
|
204
|
+
osbot_utils/helpers/html/schemas/Schema__Html_Node__Data.py,sha256=GiAZY5mO3DNasUO_oNM4ndTvRU2VpjPxOi7kYzu2DHE,568
|
204
205
|
osbot_utils/helpers/html/schemas/Schema__Html_Node__Data__Type.py,sha256=mbYivcm6BfN5-oeCyl6gjbnSKs_b_t-G2H1rpSK6nUY,90
|
205
206
|
osbot_utils/helpers/html/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
206
207
|
osbot_utils/helpers/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
207
208
|
osbot_utils/helpers/llms/actions/LLM_Request__Execute.py,sha256=IsC5gzQXG__j5M2dSWVCmV77Kf133pKbZRvbiQyeihU,2453
|
208
209
|
osbot_utils/helpers/llms/actions/Type_Safe__Schema_For__LLMs.py,sha256=em9RoSZqSSo6BQBZvEKH8Qv8f8f8oubNpy0LIDsak-E,12024
|
209
210
|
osbot_utils/helpers/llms/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
210
|
-
osbot_utils/helpers/llms/builders/LLM_Request__Builder.py,sha256=
|
211
|
+
osbot_utils/helpers/llms/builders/LLM_Request__Builder.py,sha256=d9TnJmA1KbEypvCdgFXWcg684n-TGIrumxigsTOMECE,4008
|
211
212
|
osbot_utils/helpers/llms/builders/LLM_Request__Builder__Open_AI.py,sha256=JwyPDeBUQd70Ltm801y5qvS883IIC7TcfG04D3LWdGI,3430
|
212
213
|
osbot_utils/helpers/llms/builders/LLM_Request__Factory.py,sha256=bpFXVTKpalBL7ZONjaHU5c0-2Rwzzd2vgdD1FpYxfGw,6291
|
213
214
|
osbot_utils/helpers/llms/builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -221,9 +222,10 @@ osbot_utils/helpers/llms/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
221
222
|
osbot_utils/helpers/llms/platforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
222
223
|
osbot_utils/helpers/llms/platforms/open_ai/API__LLM__Open_AI.py,sha256=Yyy2ZnIS6CfxXepP9pZNsOYx02d-5EnK1IFeFf8myyk,2148
|
223
224
|
osbot_utils/helpers/llms/platforms/open_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
225
|
+
osbot_utils/helpers/llms/schemas/Safe_Str__LLM__Model_Name.py,sha256=AKLfBv65ELl6XvSsLKMVS-y9LefSVNf8GNrEWKVd_TY,358
|
224
226
|
osbot_utils/helpers/llms/schemas/Schema__LLM_Cache__Index.py,sha256=12dUUSFhWRZvqEkNivqajqMPApPWhfSI7jWnHyBuxNw,717
|
225
227
|
osbot_utils/helpers/llms/schemas/Schema__LLM_Request.py,sha256=UUiqZQ5mj16SPdlzef7_j3myLaKKCZdAUjsWUJnK1Ag,378
|
226
|
-
osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Data.py,sha256=
|
228
|
+
osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Data.py,sha256=oLrvmngkDVX2KB2gVR1DwvJQcxWMbBxomjs_IBFQtzc,1489
|
227
229
|
osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Function_Call.py,sha256=VJgWi4aK-DJmuJvfY2qZUZuLkrLlmu5lgyzxZrrp3hM,440
|
228
230
|
osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Message__Content.py,sha256=nl-16yz4G_72ViACKE9CvGStrKxw2Gm_JcaU8wVcJXI,521
|
229
231
|
osbot_utils/helpers/llms/schemas/Schema__LLM_Request__Message__Role.py,sha256=T99w0cRrDPXQqPT-Nw7_14tMr4vKpUlhw74UJZL6w6w,168
|
@@ -393,11 +395,11 @@ osbot_utils/type_safe/shared/Type_Safe__Raise_Exception.py,sha256=TMO4uqPLzie79k
|
|
393
395
|
osbot_utils/type_safe/shared/Type_Safe__Shared__Variables.py,sha256=SuZGl9LryQX6IpOE0I_lbzClT-h17UNylC__-M8ltTY,129
|
394
396
|
osbot_utils/type_safe/shared/Type_Safe__Validation.py,sha256=1XvbWJmRfyqBcdOTuYZ5fiItyMF0ttSPFnWjqZTGbYE,19542
|
395
397
|
osbot_utils/type_safe/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
396
|
-
osbot_utils/type_safe/steps/Type_Safe__Step__Class_Kwargs.py,sha256=
|
398
|
+
osbot_utils/type_safe/steps/Type_Safe__Step__Class_Kwargs.py,sha256=hlKJi2hVFntOoQJNlcyDk4JdnkZfEmnIhQNA0c8keCs,6993
|
397
399
|
osbot_utils/type_safe/steps/Type_Safe__Step__Default_Kwargs.py,sha256=tzKXDUc0HVP5QvCWsmcPuuZodNvQZ9FeMDNI2x00Ngw,1943
|
398
400
|
osbot_utils/type_safe/steps/Type_Safe__Step__Default_Value.py,sha256=b5vsgM8eg9yq2KM0wRMntVHma6OhN_HnU76LxhEIpoA,4483
|
399
|
-
osbot_utils/type_safe/steps/Type_Safe__Step__From_Json.py,sha256=
|
400
|
-
osbot_utils/type_safe/steps/Type_Safe__Step__Init.py,sha256=
|
401
|
+
osbot_utils/type_safe/steps/Type_Safe__Step__From_Json.py,sha256=jeRj7dNDObDv_wz0EELX_hTprVJ2Ddi_UG3_ZZUL-74,15013
|
402
|
+
osbot_utils/type_safe/steps/Type_Safe__Step__Init.py,sha256=6TKpIiwuYcQRUS5Wc0j_y0oToKBah2yc0q8jcDwEZ7U,4938
|
401
403
|
osbot_utils/type_safe/steps/Type_Safe__Step__Set_Attr.py,sha256=k7GX3q0ps4R1Z3w5JMFHB0w19zVXtO1VS11wpFVI19o,6680
|
402
404
|
osbot_utils/type_safe/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
403
405
|
osbot_utils/type_safe/validators/Type_Safe__Validator.py,sha256=cJIPSBarjV716SZUOLvz7Mthjk-aUYKUQtRDtKUBmN4,779
|
@@ -432,8 +434,8 @@ osbot_utils/utils/Toml.py,sha256=Rxl8gx7mni5CvBAK-Ai02EKw-GwtJdd3yeHT2kMloik,166
|
|
432
434
|
osbot_utils/utils/Version.py,sha256=Ww6ChwTxqp1QAcxOnztkTicShlcx6fbNsWX5xausHrg,422
|
433
435
|
osbot_utils/utils/Zip.py,sha256=pR6sKliUY0KZXmqNzKY2frfW-YVQEVbLKiyqQX_lc-8,14052
|
434
436
|
osbot_utils/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
435
|
-
osbot_utils/version,sha256=
|
436
|
-
osbot_utils-2.
|
437
|
-
osbot_utils-2.
|
438
|
-
osbot_utils-2.
|
439
|
-
osbot_utils-2.
|
437
|
+
osbot_utils/version,sha256=mmT3TZTEYCy0jjpssvKXIUrmVMPAJu7HXXH98D1g9XE,8
|
438
|
+
osbot_utils-2.69.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
439
|
+
osbot_utils-2.69.0.dist-info/METADATA,sha256=GFekKv0EI34E7oUrhdWAUcTZQh0NWsvuh76sFgazH84,1329
|
440
|
+
osbot_utils-2.69.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
441
|
+
osbot_utils-2.69.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|