cmdop 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cmdop/__init__.py +1 -1
- cmdop/services/browser/js/__init__.py +49 -0
- cmdop/services/browser/js/core.py +38 -0
- cmdop/services/browser/{js.py → js/fetch.py} +1 -34
- cmdop/services/browser/js/interaction.py +109 -0
- cmdop/services/browser/js/scroll.py +133 -0
- cmdop/services/browser/models.py +32 -0
- cmdop/services/browser/parsing.py +104 -0
- cmdop/services/browser/sync/session.py +264 -3
- cmdop/transport/discovery.py +25 -1
- cmdop-0.1.17.dist-info/METADATA +249 -0
- {cmdop-0.1.16.dist-info → cmdop-0.1.17.dist-info}/RECORD +14 -9
- cmdop-0.1.16.dist-info/METADATA +0 -464
- {cmdop-0.1.16.dist-info → cmdop-0.1.17.dist-info}/WHEEL +0 -0
- {cmdop-0.1.16.dist-info → cmdop-0.1.17.dist-info}/licenses/LICENSE +0 -0
cmdop/__init__.py
CHANGED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""JavaScript builders for browser automation.
|
|
2
|
+
|
|
3
|
+
This module provides JavaScript code generators for common browser operations:
|
|
4
|
+
- Core: JSON parsing, async wrappers
|
|
5
|
+
- Fetch: HTTP requests from browser context
|
|
6
|
+
- Scroll: Page scrolling and infinite scroll
|
|
7
|
+
- Interaction: Hover, select, modals
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from cmdop.services.browser.js.core import (
|
|
11
|
+
parse_json_result,
|
|
12
|
+
build_async_js,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from cmdop.services.browser.js.fetch import (
|
|
16
|
+
build_fetch_js,
|
|
17
|
+
build_fetch_all_js,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from cmdop.services.browser.js.scroll import (
|
|
21
|
+
build_scroll_js,
|
|
22
|
+
build_scroll_to_bottom_js,
|
|
23
|
+
build_infinite_scroll_js,
|
|
24
|
+
build_get_scroll_info_js,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from cmdop.services.browser.js.interaction import (
|
|
28
|
+
build_hover_js,
|
|
29
|
+
build_select_js,
|
|
30
|
+
build_close_modal_js,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
# Core
|
|
35
|
+
"parse_json_result",
|
|
36
|
+
"build_async_js",
|
|
37
|
+
# Fetch
|
|
38
|
+
"build_fetch_js",
|
|
39
|
+
"build_fetch_all_js",
|
|
40
|
+
# Scroll
|
|
41
|
+
"build_scroll_js",
|
|
42
|
+
"build_scroll_to_bottom_js",
|
|
43
|
+
"build_infinite_scroll_js",
|
|
44
|
+
"build_get_scroll_info_js",
|
|
45
|
+
# Interaction
|
|
46
|
+
"build_hover_js",
|
|
47
|
+
"build_select_js",
|
|
48
|
+
"build_close_modal_js",
|
|
49
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Core JavaScript utilities for browser automation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_json_result(result: str) -> dict | list | None:
|
|
10
|
+
"""Parse JSON result from JS execution."""
|
|
11
|
+
if result:
|
|
12
|
+
try:
|
|
13
|
+
data = json.loads(result)
|
|
14
|
+
if isinstance(data, dict) and "__error" in data:
|
|
15
|
+
return None
|
|
16
|
+
return data
|
|
17
|
+
except json.JSONDecodeError:
|
|
18
|
+
return None
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_async_js(code: str) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Wrap JS code in async IIFE with error handling.
|
|
25
|
+
|
|
26
|
+
The code should return a value (will be JSON.stringify'd).
|
|
27
|
+
Use for async operations like fetch, Promise.all, etc.
|
|
28
|
+
"""
|
|
29
|
+
return f"""
|
|
30
|
+
(async function() {{
|
|
31
|
+
try {{
|
|
32
|
+
const result = await (async () => {{ {code} }})();
|
|
33
|
+
return JSON.stringify(result);
|
|
34
|
+
}} catch(e) {{
|
|
35
|
+
return JSON.stringify({{__error: e.message}});
|
|
36
|
+
}}
|
|
37
|
+
}})()
|
|
38
|
+
"""
|
|
@@ -1,28 +1,8 @@
|
|
|
1
|
-
"""JavaScript
|
|
1
|
+
"""Fetch JavaScript builders for browser automation."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def build_async_js(code: str) -> str:
|
|
10
|
-
"""
|
|
11
|
-
Wrap JS code in async IIFE with error handling.
|
|
12
|
-
|
|
13
|
-
The code should return a value (will be JSON.stringify'd).
|
|
14
|
-
Use for async operations like fetch, Promise.all, etc.
|
|
15
|
-
"""
|
|
16
|
-
return f"""
|
|
17
|
-
(async function() {{
|
|
18
|
-
try {{
|
|
19
|
-
const result = await (async () => {{ {code} }})();
|
|
20
|
-
return JSON.stringify(result);
|
|
21
|
-
}} catch(e) {{
|
|
22
|
-
return JSON.stringify({{__error: e.message}});
|
|
23
|
-
}}
|
|
24
|
-
}})()
|
|
25
|
-
"""
|
|
26
6
|
|
|
27
7
|
|
|
28
8
|
def build_fetch_js(url: str) -> str:
|
|
@@ -94,16 +74,3 @@ def build_fetch_all_js(
|
|
|
94
74
|
|
|
95
75
|
return results;
|
|
96
76
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def parse_json_result(result: str) -> dict | list | None:
|
|
100
|
-
"""Parse JSON result from JS execution."""
|
|
101
|
-
if result:
|
|
102
|
-
try:
|
|
103
|
-
data = json.loads(result)
|
|
104
|
-
if isinstance(data, dict) and "__error" in data:
|
|
105
|
-
return None
|
|
106
|
-
return data
|
|
107
|
-
except json.JSONDecodeError:
|
|
108
|
-
return None
|
|
109
|
-
return None
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Interaction JavaScript builders for browser automation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_hover_js(selector: str) -> str:
|
|
9
|
+
"""Build JS to hover over element."""
|
|
10
|
+
return f"""
|
|
11
|
+
(function() {{
|
|
12
|
+
const el = document.querySelector("{selector}");
|
|
13
|
+
if (!el) return JSON.stringify({{ success: false, error: 'Element not found' }});
|
|
14
|
+
|
|
15
|
+
const event = new MouseEvent('mouseover', {{
|
|
16
|
+
bubbles: true,
|
|
17
|
+
cancelable: true,
|
|
18
|
+
view: window
|
|
19
|
+
}});
|
|
20
|
+
el.dispatchEvent(event);
|
|
21
|
+
|
|
22
|
+
return JSON.stringify({{ success: true }});
|
|
23
|
+
}})()
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def build_select_js(selector: str, value: str | None = None, text: str | None = None) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Build JS to select option from dropdown.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
selector: CSS selector for <select> element
|
|
33
|
+
value: Option value to select
|
|
34
|
+
text: Option text to select (if value not provided)
|
|
35
|
+
"""
|
|
36
|
+
if value is not None:
|
|
37
|
+
select_code = f'select.value = "{value}";'
|
|
38
|
+
elif text is not None:
|
|
39
|
+
select_code = f'''
|
|
40
|
+
const option = Array.from(select.options).find(o => o.text === "{text}");
|
|
41
|
+
if (option) select.value = option.value;
|
|
42
|
+
'''
|
|
43
|
+
else:
|
|
44
|
+
return 'JSON.stringify({ success: false, error: "Need value or text" })'
|
|
45
|
+
|
|
46
|
+
return f"""
|
|
47
|
+
(function() {{
|
|
48
|
+
const select = document.querySelector("{selector}");
|
|
49
|
+
if (!select) return JSON.stringify({{ success: false, error: 'Select not found' }});
|
|
50
|
+
|
|
51
|
+
{select_code}
|
|
52
|
+
|
|
53
|
+
// Trigger change event
|
|
54
|
+
select.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
|
55
|
+
|
|
56
|
+
return JSON.stringify({{
|
|
57
|
+
success: true,
|
|
58
|
+
selected_value: select.value,
|
|
59
|
+
selected_text: select.options[select.selectedIndex]?.text
|
|
60
|
+
}});
|
|
61
|
+
}})()
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def build_close_modal_js(
|
|
66
|
+
selectors: list[str] | None = None,
|
|
67
|
+
) -> str:
|
|
68
|
+
"""
|
|
69
|
+
Build JS to close modal dialogs.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
selectors: Custom selectors to try. Default tries common patterns.
|
|
73
|
+
"""
|
|
74
|
+
default_selectors = [
|
|
75
|
+
'[aria-label="Close"]',
|
|
76
|
+
'[aria-label="Dismiss"]',
|
|
77
|
+
'button[class*="close"]',
|
|
78
|
+
'button[class*="dismiss"]',
|
|
79
|
+
'[data-testid="close"]',
|
|
80
|
+
'.modal-close',
|
|
81
|
+
'.dialog-close',
|
|
82
|
+
'button:has(svg[class*="close"])',
|
|
83
|
+
'div[role="dialog"] button:first-child',
|
|
84
|
+
]
|
|
85
|
+
all_selectors = selectors or default_selectors
|
|
86
|
+
selectors_json = json.dumps(all_selectors)
|
|
87
|
+
|
|
88
|
+
return f"""
|
|
89
|
+
(function() {{
|
|
90
|
+
const selectors = {selectors_json};
|
|
91
|
+
|
|
92
|
+
for (const sel of selectors) {{
|
|
93
|
+
try {{
|
|
94
|
+
const el = document.querySelector(sel);
|
|
95
|
+
if (el && el.offsetParent !== null) {{
|
|
96
|
+
el.click();
|
|
97
|
+
return JSON.stringify({{ success: true, selector: sel }});
|
|
98
|
+
}}
|
|
99
|
+
}} catch(e) {{
|
|
100
|
+
// Skip invalid selectors
|
|
101
|
+
}}
|
|
102
|
+
}}
|
|
103
|
+
|
|
104
|
+
// Try pressing Escape
|
|
105
|
+
document.dispatchEvent(new KeyboardEvent('keydown', {{ key: 'Escape', code: 'Escape' }}));
|
|
106
|
+
|
|
107
|
+
return JSON.stringify({{ success: false, error: 'No modal close button found' }});
|
|
108
|
+
}})()
|
|
109
|
+
"""
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Scroll JavaScript builders for browser automation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_scroll_js(
|
|
9
|
+
direction: str = "down",
|
|
10
|
+
amount: int = 500,
|
|
11
|
+
selector: str | None = None,
|
|
12
|
+
) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Build JS for scrolling.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
direction: "up", "down", "left", "right"
|
|
18
|
+
amount: Pixels to scroll (ignored if selector provided)
|
|
19
|
+
selector: CSS selector to scroll into view
|
|
20
|
+
"""
|
|
21
|
+
if selector:
|
|
22
|
+
return f"""
|
|
23
|
+
(function() {{
|
|
24
|
+
const el = document.querySelector("{selector}");
|
|
25
|
+
if (el) {{
|
|
26
|
+
el.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
|
|
27
|
+
return JSON.stringify({{ success: true }});
|
|
28
|
+
}}
|
|
29
|
+
return JSON.stringify({{ success: false, error: 'Element not found' }});
|
|
30
|
+
}})()
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
scroll_map = {
|
|
34
|
+
"down": f"window.scrollBy(0, {amount})",
|
|
35
|
+
"up": f"window.scrollBy(0, -{amount})",
|
|
36
|
+
"right": f"window.scrollBy({amount}, 0)",
|
|
37
|
+
"left": f"window.scrollBy(-{amount}, 0)",
|
|
38
|
+
}
|
|
39
|
+
scroll_code = scroll_map.get(direction, scroll_map["down"])
|
|
40
|
+
|
|
41
|
+
return f"""
|
|
42
|
+
(function() {{
|
|
43
|
+
const before = window.scrollY;
|
|
44
|
+
{scroll_code};
|
|
45
|
+
return JSON.stringify({{
|
|
46
|
+
success: true,
|
|
47
|
+
scrollY: window.scrollY,
|
|
48
|
+
scrolledBy: window.scrollY - before,
|
|
49
|
+
atBottom: (window.innerHeight + window.scrollY) >= document.body.scrollHeight
|
|
50
|
+
}});
|
|
51
|
+
}})()
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def build_scroll_to_bottom_js() -> str:
|
|
56
|
+
"""Build JS to scroll to page bottom."""
|
|
57
|
+
return """
|
|
58
|
+
(function() {
|
|
59
|
+
const before = window.scrollY;
|
|
60
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
61
|
+
return JSON.stringify({
|
|
62
|
+
success: true,
|
|
63
|
+
scrollY: window.scrollY,
|
|
64
|
+
scrolledBy: window.scrollY - before,
|
|
65
|
+
atBottom: true
|
|
66
|
+
});
|
|
67
|
+
})()
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def build_infinite_scroll_js(
|
|
72
|
+
seen_keys: list[str],
|
|
73
|
+
key_selector: str = "a[href]",
|
|
74
|
+
key_attr: str = "href",
|
|
75
|
+
container_selector: str = "body",
|
|
76
|
+
) -> str:
|
|
77
|
+
"""
|
|
78
|
+
Build JS for infinite scroll with deduplication.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
seen_keys: List of already seen keys (e.g., URLs)
|
|
82
|
+
key_selector: CSS selector for key elements
|
|
83
|
+
key_attr: Attribute to use as key (default: href)
|
|
84
|
+
container_selector: Container to find elements in
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
JS that returns {new_keys: [...], at_bottom: bool}
|
|
88
|
+
"""
|
|
89
|
+
seen_json = json.dumps(seen_keys)
|
|
90
|
+
return f"""
|
|
91
|
+
(function() {{
|
|
92
|
+
const seen = new Set({seen_json});
|
|
93
|
+
const container = document.querySelector("{container_selector}");
|
|
94
|
+
if (!container) return JSON.stringify({{ new_keys: [], at_bottom: true, error: 'Container not found' }});
|
|
95
|
+
|
|
96
|
+
const elements = container.querySelectorAll("{key_selector}");
|
|
97
|
+
const newKeys = [];
|
|
98
|
+
|
|
99
|
+
elements.forEach(el => {{
|
|
100
|
+
const key = el.getAttribute("{key_attr}") || el.textContent.trim();
|
|
101
|
+
if (key && !seen.has(key)) {{
|
|
102
|
+
seen.add(key);
|
|
103
|
+
newKeys.push(key);
|
|
104
|
+
}}
|
|
105
|
+
}});
|
|
106
|
+
|
|
107
|
+
const atBottom = (window.innerHeight + window.scrollY) >= document.body.scrollHeight - 100;
|
|
108
|
+
|
|
109
|
+
return JSON.stringify({{
|
|
110
|
+
new_keys: newKeys,
|
|
111
|
+
at_bottom: atBottom,
|
|
112
|
+
total_seen: seen.size
|
|
113
|
+
}});
|
|
114
|
+
}})()
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def build_get_scroll_info_js() -> str:
|
|
119
|
+
"""Build JS to get current scroll position and page dimensions."""
|
|
120
|
+
return """
|
|
121
|
+
(function() {
|
|
122
|
+
return JSON.stringify({
|
|
123
|
+
scrollX: window.scrollX,
|
|
124
|
+
scrollY: window.scrollY,
|
|
125
|
+
pageHeight: document.body.scrollHeight,
|
|
126
|
+
pageWidth: document.body.scrollWidth,
|
|
127
|
+
viewportHeight: window.innerHeight,
|
|
128
|
+
viewportWidth: window.innerWidth,
|
|
129
|
+
atBottom: (window.innerHeight + window.scrollY) >= document.body.scrollHeight - 10,
|
|
130
|
+
atTop: window.scrollY <= 10
|
|
131
|
+
});
|
|
132
|
+
})()
|
|
133
|
+
"""
|
cmdop/services/browser/models.py
CHANGED
|
@@ -48,3 +48,35 @@ class BrowserState(BaseModel):
|
|
|
48
48
|
|
|
49
49
|
url: str
|
|
50
50
|
title: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ScrollInfo(BaseModel):
|
|
54
|
+
"""Scroll position and page dimensions."""
|
|
55
|
+
|
|
56
|
+
scroll_x: int = 0
|
|
57
|
+
scroll_y: int = 0
|
|
58
|
+
page_height: int = 0
|
|
59
|
+
page_width: int = 0
|
|
60
|
+
viewport_height: int = 0
|
|
61
|
+
viewport_width: int = 0
|
|
62
|
+
at_bottom: bool = False
|
|
63
|
+
at_top: bool = True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ScrollResult(BaseModel):
|
|
67
|
+
"""Result of scroll operation."""
|
|
68
|
+
|
|
69
|
+
success: bool = True
|
|
70
|
+
scroll_y: int = 0
|
|
71
|
+
scrolled_by: int = 0
|
|
72
|
+
at_bottom: bool = False
|
|
73
|
+
error: str | None = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class InfiniteScrollResult(BaseModel):
|
|
77
|
+
"""Result of infinite scroll extraction."""
|
|
78
|
+
|
|
79
|
+
new_keys: list[str] = []
|
|
80
|
+
at_bottom: bool = False
|
|
81
|
+
total_seen: int = 0
|
|
82
|
+
error: str | None = None
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""HTML parsing utilities with BeautifulSoup integration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Iterator
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup, Tag
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_html(html: str, parser: str = "html.parser") -> BeautifulSoup:
|
|
11
|
+
"""Parse HTML string into BeautifulSoup object."""
|
|
12
|
+
return BeautifulSoup(html, parser)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SoupWrapper:
|
|
16
|
+
"""Wrapper around BeautifulSoup with convenience methods."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, html: str | None = None, soup: BeautifulSoup | Tag | None = None):
|
|
19
|
+
if soup is not None:
|
|
20
|
+
self._soup: BeautifulSoup | Tag = soup
|
|
21
|
+
elif html is not None:
|
|
22
|
+
self._soup = BeautifulSoup(html, "html.parser")
|
|
23
|
+
else:
|
|
24
|
+
raise ValueError("Either html or soup must be provided")
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def soup(self) -> BeautifulSoup | Tag:
|
|
28
|
+
"""Get underlying BeautifulSoup/Tag object."""
|
|
29
|
+
return self._soup
|
|
30
|
+
|
|
31
|
+
def select(self, selector: str) -> list[SoupWrapper]:
|
|
32
|
+
"""Select all matching elements."""
|
|
33
|
+
return [SoupWrapper(soup=el) for el in self._soup.select(selector)]
|
|
34
|
+
|
|
35
|
+
def select_one(self, selector: str) -> SoupWrapper | None:
|
|
36
|
+
"""Select first matching element."""
|
|
37
|
+
el = self._soup.select_one(selector)
|
|
38
|
+
return SoupWrapper(soup=el) if el else None
|
|
39
|
+
|
|
40
|
+
def find(self, name: str, **kwargs: Any) -> SoupWrapper | None:
|
|
41
|
+
"""Find first element by tag name."""
|
|
42
|
+
el = self._soup.find(name, **kwargs)
|
|
43
|
+
if el and isinstance(el, Tag):
|
|
44
|
+
return SoupWrapper(soup=el)
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
def find_all(self, name: str | None = None, **kwargs: Any) -> list[SoupWrapper]:
|
|
48
|
+
"""Find all elements by tag name."""
|
|
49
|
+
elements = self._soup.find_all(name, **kwargs) if name else self._soup.find_all(**kwargs)
|
|
50
|
+
return [SoupWrapper(soup=el) for el in elements if isinstance(el, Tag)]
|
|
51
|
+
|
|
52
|
+
def children(self) -> list[SoupWrapper]:
|
|
53
|
+
"""Get direct child elements (not recursive)."""
|
|
54
|
+
return [SoupWrapper(soup=el) for el in self._soup.children if isinstance(el, Tag)]
|
|
55
|
+
|
|
56
|
+
def text(self, strip: bool = True, separator: str = "") -> str:
|
|
57
|
+
"""Get text content."""
|
|
58
|
+
return self._soup.get_text(strip=strip, separator=separator)
|
|
59
|
+
|
|
60
|
+
def attr(self, name: str, default: str = "") -> str:
|
|
61
|
+
"""Get attribute value."""
|
|
62
|
+
val = self._soup.get(name, default) if hasattr(self._soup, 'get') else default
|
|
63
|
+
return str(val) if val else default
|
|
64
|
+
|
|
65
|
+
def attrs(self) -> dict[str, Any]:
|
|
66
|
+
"""Get all attributes."""
|
|
67
|
+
return dict(self._soup.attrs) if hasattr(self._soup, 'attrs') else {}
|
|
68
|
+
|
|
69
|
+
def html(self) -> str:
|
|
70
|
+
"""Get HTML."""
|
|
71
|
+
return str(self._soup)
|
|
72
|
+
|
|
73
|
+
# Convenience methods
|
|
74
|
+
|
|
75
|
+
def texts(self, selector: str, strip: bool = True) -> list[str]:
|
|
76
|
+
"""Get text from all matching elements."""
|
|
77
|
+
return [el.text(strip=strip) for el in self.select(selector)]
|
|
78
|
+
|
|
79
|
+
def attrs_list(self, selector: str, attr_name: str) -> list[str]:
|
|
80
|
+
"""Get attribute from all matching elements."""
|
|
81
|
+
return [el.attr(attr_name) for el in self.select(selector) if el.attr(attr_name)]
|
|
82
|
+
|
|
83
|
+
def links(self, selector: str = "a[href]") -> list[str]:
|
|
84
|
+
"""Get all href values."""
|
|
85
|
+
return self.attrs_list(selector, "href")
|
|
86
|
+
|
|
87
|
+
def images(self, selector: str = "img[src]") -> list[str]:
|
|
88
|
+
"""Get all image src values."""
|
|
89
|
+
return self.attrs_list(selector, "src")
|
|
90
|
+
|
|
91
|
+
def __bool__(self) -> bool:
|
|
92
|
+
return self._soup is not None
|
|
93
|
+
|
|
94
|
+
def __iter__(self) -> Iterator[SoupWrapper]:
|
|
95
|
+
for child in self.children():
|
|
96
|
+
yield child
|
|
97
|
+
|
|
98
|
+
def __repr__(self) -> str:
|
|
99
|
+
return f"<SoupWrapper({getattr(self._soup, 'name', 'soup')})>"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def soup(html: str) -> SoupWrapper:
|
|
103
|
+
"""Create SoupWrapper from HTML string."""
|
|
104
|
+
return SoupWrapper(html=html)
|