real-browser-cli 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_cli/__init__.py +164 -0
- browser_cli/async_sdk.py +237 -0
- browser_cli/auth.py +263 -0
- browser_cli/cli.py +151 -0
- browser_cli/client/__init__.py +47 -0
- browser_cli/client/auth.py +63 -0
- browser_cli/client/core.py +200 -0
- browser_cli/client/messages.py +45 -0
- browser_cli/client/targets.py +95 -0
- browser_cli/command_security.py +119 -0
- browser_cli/commands/__init__.py +81 -0
- browser_cli/commands/auth.py +157 -0
- browser_cli/commands/clients.py +173 -0
- browser_cli/commands/completion.py +56 -0
- browser_cli/commands/doctor.py +90 -0
- browser_cli/commands/dom.py +191 -0
- browser_cli/commands/events.py +52 -0
- browser_cli/commands/extension.py +42 -0
- browser_cli/commands/extract.py +70 -0
- browser_cli/commands/groups.py +108 -0
- browser_cli/commands/install.py +121 -0
- browser_cli/commands/navigate.py +96 -0
- browser_cli/commands/page.py +26 -0
- browser_cli/commands/perf.py +47 -0
- browser_cli/commands/raw.py +23 -0
- browser_cli/commands/remote.py +68 -0
- browser_cli/commands/script.py +68 -0
- browser_cli/commands/search.py +79 -0
- browser_cli/commands/serve.py +117 -0
- browser_cli/commands/serve_http.py +115 -0
- browser_cli/commands/session.py +163 -0
- browser_cli/commands/storage.py +36 -0
- browser_cli/commands/tabs.py +252 -0
- browser_cli/commands/watch.py +60 -0
- browser_cli/commands/windows.py +87 -0
- browser_cli/commands/workspace.py +91 -0
- browser_cli/compat/__init__.py +4 -0
- browser_cli/compat/auth.py +44 -0
- browser_cli/compat/commands.py +43 -0
- browser_cli/constants.py +95 -0
- browser_cli/endpoints.py +55 -0
- browser_cli/errors.py +9 -0
- browser_cli/framing.py +83 -0
- browser_cli/local_transport.py +64 -0
- browser_cli/markdown/__init__.py +8 -0
- browser_cli/markdown/html.py +259 -0
- browser_cli/markdown/render.py +188 -0
- browser_cli/models.py +182 -0
- browser_cli/native/__init__.py +1 -0
- browser_cli/native/host.py +211 -0
- browser_cli/native/local_server.py +111 -0
- browser_cli/native/protocol.py +30 -0
- browser_cli/platform.py +34 -0
- browser_cli/registry.py +99 -0
- browser_cli/remote/__init__.py +1 -0
- browser_cli/remote/registry.py +53 -0
- browser_cli/remote/transport.py +230 -0
- browser_cli/sdk/__init__.py +48 -0
- browser_cli/sdk/base.py +116 -0
- browser_cli/sdk/browser_data.py +37 -0
- browser_cli/sdk/decorators.py +107 -0
- browser_cli/sdk/dom.py +169 -0
- browser_cli/sdk/extension.py +24 -0
- browser_cli/sdk/factories.py +103 -0
- browser_cli/sdk/groups.py +51 -0
- browser_cli/sdk/navigation.py +122 -0
- browser_cli/sdk/perf.py +23 -0
- browser_cli/sdk/routing.py +149 -0
- browser_cli/sdk/session.py +72 -0
- browser_cli/sdk/tabs.py +213 -0
- browser_cli/sdk/windows.py +26 -0
- browser_cli/sdk/workflow_decorators.py +200 -0
- browser_cli/serve/__init__.py +0 -0
- browser_cli/serve/auth.py +107 -0
- browser_cli/serve/control.py +59 -0
- browser_cli/serve/logging.py +16 -0
- browser_cli/serve/proxy.py +79 -0
- browser_cli/serve/runtime.py +196 -0
- browser_cli/transport.py +214 -0
- browser_cli/version_manager.py +17 -0
- real_browser_cli-0.14.2.dist-info/METADATA +87 -0
- real_browser_cli-0.14.2.dist-info/RECORD +85 -0
- real_browser_cli-0.14.2.dist-info/WHEEL +4 -0
- real_browser_cli-0.14.2.dist-info/entry_points.txt +2 -0
- real_browser_cli-0.14.2.dist-info/licenses/LICENSE +75 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""HTML tree walking for browser-cli Markdown rendering."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from html.parser import HTMLParser
|
|
6
|
+
|
|
7
|
+
def _normalize_text(value):
|
|
8
|
+
return re.sub(r"\s+", " ", value or "").strip()
|
|
9
|
+
|
|
10
|
+
def _normalize_inline(value):
|
|
11
|
+
value = value.replace("\xa0", " ")
|
|
12
|
+
value = re.sub(r"[ \t\r\f\v]+", " ", value)
|
|
13
|
+
value = re.sub(r" *\n *", "\n", value)
|
|
14
|
+
return value.strip()
|
|
15
|
+
|
|
16
|
+
def _collapse_blank_lines(value):
|
|
17
|
+
value = re.sub(r"[ \t]+\n", "\n", value)
|
|
18
|
+
value = re.sub(r"\n{3,}", "\n\n", value)
|
|
19
|
+
return value.strip()
|
|
20
|
+
|
|
21
|
+
def _escape_markdown(text):
|
|
22
|
+
return re.sub(r"([\\`[\]])", r"\\\1", text)
|
|
23
|
+
|
|
24
|
+
def _escape_table_cell(text):
|
|
25
|
+
return text.replace("|", r"\|").replace("\n", " ").strip()
|
|
26
|
+
|
|
27
|
+
class _HtmlNode:
|
|
28
|
+
def __init__(self, tag=None, attrs=None, text=None):
|
|
29
|
+
self.tag = tag
|
|
30
|
+
self.attrs = attrs or {}
|
|
31
|
+
self.text = text
|
|
32
|
+
self.children = []
|
|
33
|
+
|
|
34
|
+
class _HtmlTreeBuilder(HTMLParser):
|
|
35
|
+
_VOID_TAGS = {"br", "hr", "img"}
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
super().__init__(convert_charrefs=True)
|
|
39
|
+
self.root = _HtmlNode(tag="document")
|
|
40
|
+
self._stack = [self.root]
|
|
41
|
+
|
|
42
|
+
def handle_starttag(self, tag, attrs):
|
|
43
|
+
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
|
|
44
|
+
self._stack[-1].children.append(node)
|
|
45
|
+
if node.tag not in self._VOID_TAGS:
|
|
46
|
+
self._stack.append(node)
|
|
47
|
+
|
|
48
|
+
def handle_startendtag(self, tag, attrs):
|
|
49
|
+
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
|
|
50
|
+
self._stack[-1].children.append(node)
|
|
51
|
+
|
|
52
|
+
def handle_endtag(self, tag):
|
|
53
|
+
lowered = tag.lower()
|
|
54
|
+
for index in range(len(self._stack) - 1, 0, -1):
|
|
55
|
+
if self._stack[index].tag == lowered:
|
|
56
|
+
del self._stack[index:]
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
def handle_data(self, data):
|
|
60
|
+
if data:
|
|
61
|
+
self._stack[-1].children.append(_HtmlNode(text=data))
|
|
62
|
+
|
|
63
|
+
def _normalize_text(value):
|
|
64
|
+
return re.sub(r"\s+", " ", value or "").strip()
|
|
65
|
+
|
|
66
|
+
def _normalize_inline(value):
|
|
67
|
+
value = value.replace("\xa0", " ")
|
|
68
|
+
value = re.sub(r"[ \t\r\f\v]+", " ", value)
|
|
69
|
+
value = re.sub(r" *\n *", "\n", value)
|
|
70
|
+
return value.strip()
|
|
71
|
+
|
|
72
|
+
def _collapse_blank_lines(value):
|
|
73
|
+
value = re.sub(r"[ \t]+\n", "\n", value)
|
|
74
|
+
value = re.sub(r"\n{3,}", "\n\n", value)
|
|
75
|
+
return value.strip()
|
|
76
|
+
|
|
77
|
+
def _escape_markdown(text):
|
|
78
|
+
return re.sub(r"([\\`[\]])", r"\\\1", text)
|
|
79
|
+
|
|
80
|
+
def _escape_table_cell(text):
|
|
81
|
+
return text.replace("|", r"\|").replace("\n", " ").strip()
|
|
82
|
+
|
|
83
|
+
def _iter_descendants(node):
|
|
84
|
+
for child in getattr(node, "children", []):
|
|
85
|
+
yield child
|
|
86
|
+
yield from _iter_descendants(child)
|
|
87
|
+
|
|
88
|
+
def _has_class(node, class_name):
|
|
89
|
+
classes = (node.attrs.get("class") or "").split()
|
|
90
|
+
return class_name in classes
|
|
91
|
+
|
|
92
|
+
def _is_code_block_node(node):
|
|
93
|
+
if not node or not node.tag:
|
|
94
|
+
return False
|
|
95
|
+
if node.attrs.get("data-is-code-block-view") == "true":
|
|
96
|
+
return True
|
|
97
|
+
return node.tag == "pre"
|
|
98
|
+
|
|
99
|
+
def _inline_text(node):
|
|
100
|
+
if node.text is not None:
|
|
101
|
+
return _escape_markdown(node.text)
|
|
102
|
+
if not node.tag:
|
|
103
|
+
return ""
|
|
104
|
+
|
|
105
|
+
tag = node.tag
|
|
106
|
+
if tag == "br":
|
|
107
|
+
return "\n"
|
|
108
|
+
if tag == "img":
|
|
109
|
+
src = node.attrs.get("src") or ""
|
|
110
|
+
alt = _normalize_text(node.attrs.get("alt") or "")
|
|
111
|
+
if not src:
|
|
112
|
+
return ""
|
|
113
|
+
return f"" if alt else f""
|
|
114
|
+
if tag == "a":
|
|
115
|
+
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
|
116
|
+
href = node.attrs.get("href") or ""
|
|
117
|
+
return f"[{text or href}]({href})" if href else text
|
|
118
|
+
if tag == "code":
|
|
119
|
+
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
|
120
|
+
return f"`{text.replace('`', r'\\`')}`" if text else ""
|
|
121
|
+
if tag in {"strong", "b"}:
|
|
122
|
+
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
|
123
|
+
return f"**{text}**" if text else ""
|
|
124
|
+
if tag in {"em", "i"}:
|
|
125
|
+
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
|
126
|
+
return f"*{text}*" if text else ""
|
|
127
|
+
|
|
128
|
+
chunks = []
|
|
129
|
+
for child in node.children:
|
|
130
|
+
rendered = _inline_text(child)
|
|
131
|
+
if rendered:
|
|
132
|
+
chunks.append(rendered)
|
|
133
|
+
if child.tag in {"p", "div", "table", "ul", "ol", "pre"}:
|
|
134
|
+
chunks.append("\n")
|
|
135
|
+
return "".join(chunks)
|
|
136
|
+
|
|
137
|
+
def _text_block(node):
|
|
138
|
+
return _collapse_blank_lines(_normalize_inline("".join(_inline_text(child) for child in node.children)))
|
|
139
|
+
|
|
140
|
+
def _inner_text_preserve(node):
|
|
141
|
+
if node.text is not None:
|
|
142
|
+
return node.text
|
|
143
|
+
if not node.tag:
|
|
144
|
+
return ""
|
|
145
|
+
if node.tag == "br":
|
|
146
|
+
return ""
|
|
147
|
+
return "".join(_inner_text_preserve(child) for child in node.children)
|
|
148
|
+
|
|
149
|
+
def _table_to_markdown(node):
|
|
150
|
+
rows = []
|
|
151
|
+
for descendant in _iter_descendants(node):
|
|
152
|
+
if descendant.tag != "tr":
|
|
153
|
+
continue
|
|
154
|
+
row = []
|
|
155
|
+
for cell in descendant.children:
|
|
156
|
+
if cell.tag in {"td", "th"}:
|
|
157
|
+
row.append(_escape_table_cell(_text_block(cell)))
|
|
158
|
+
if row:
|
|
159
|
+
rows.append(row)
|
|
160
|
+
if not rows:
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
widths = max(len(row) for row in rows)
|
|
164
|
+
normalized_rows = [row + [""] * (widths - len(row)) for row in rows]
|
|
165
|
+
|
|
166
|
+
headers = normalized_rows[0]
|
|
167
|
+
body_rows = normalized_rows[1:]
|
|
168
|
+
first_row_blank = all(not cell.strip() for cell in headers)
|
|
169
|
+
if first_row_blank and len(normalized_rows) > 1:
|
|
170
|
+
headers = normalized_rows[1]
|
|
171
|
+
body_rows = normalized_rows[2:]
|
|
172
|
+
|
|
173
|
+
has_thead = any(child.tag == "thead" for child in node.children)
|
|
174
|
+
first_row = next((child for child in _iter_descendants(node) if child.tag == "tr"), None)
|
|
175
|
+
first_row_has_th = bool(first_row and any(child.tag == "th" for child in first_row.children))
|
|
176
|
+
if not (has_thead or first_row_has_th or first_row_blank):
|
|
177
|
+
headers = [""] * widths
|
|
178
|
+
body_rows = normalized_rows
|
|
179
|
+
|
|
180
|
+
separator = ["---"] * widths
|
|
181
|
+
lines = [
|
|
182
|
+
f"| {' | '.join(headers)} |",
|
|
183
|
+
f"| {' | '.join(separator)} |",
|
|
184
|
+
]
|
|
185
|
+
lines.extend(f"| {' | '.join(row)} |" for row in body_rows)
|
|
186
|
+
return "\n".join(lines)
|
|
187
|
+
|
|
188
|
+
def _list_to_markdown(node, depth=0):
|
|
189
|
+
ordered = node.tag == "ol"
|
|
190
|
+
items = []
|
|
191
|
+
index = 1
|
|
192
|
+
for child in node.children:
|
|
193
|
+
if child.tag != "li":
|
|
194
|
+
continue
|
|
195
|
+
marker = f"{index}. " if ordered else "- "
|
|
196
|
+
index += 1
|
|
197
|
+
content = []
|
|
198
|
+
nested = []
|
|
199
|
+
for item_child in child.children:
|
|
200
|
+
if item_child.tag in {"ul", "ol"}:
|
|
201
|
+
nested.append(_list_to_markdown(item_child, depth + 1))
|
|
202
|
+
else:
|
|
203
|
+
content.append(_inline_text(item_child))
|
|
204
|
+
line = _collapse_blank_lines(_normalize_inline("".join(content)))
|
|
205
|
+
indent = " " * depth
|
|
206
|
+
if line:
|
|
207
|
+
line_parts = line.splitlines()
|
|
208
|
+
items.append(f"{indent}{marker}{line_parts[0]}")
|
|
209
|
+
continuation_indent = f"{indent}{' ' * len(marker)}"
|
|
210
|
+
items.extend(f"{continuation_indent}{part}" for part in line_parts[1:])
|
|
211
|
+
items.extend(block for block in nested if block)
|
|
212
|
+
return "\n".join(items)
|
|
213
|
+
|
|
214
|
+
def _code_block_to_markdown(node):
|
|
215
|
+
if node.tag == "pre":
|
|
216
|
+
text = _inner_text_preserve(node).rstrip("\n")
|
|
217
|
+
return f"```\n{text}\n```" if text else ""
|
|
218
|
+
|
|
219
|
+
lines = []
|
|
220
|
+
for descendant in _iter_descendants(node):
|
|
221
|
+
if descendant.tag and _has_class(descendant, "cm-line"):
|
|
222
|
+
lines.append(_inner_text_preserve(descendant))
|
|
223
|
+
code = "\n".join(lines).rstrip("\n")
|
|
224
|
+
return f"```\n{code}\n```" if code else ""
|
|
225
|
+
|
|
226
|
+
def _block_to_markdown(node):
|
|
227
|
+
if node.text is not None:
|
|
228
|
+
return _normalize_text(node.text)
|
|
229
|
+
if not node.tag:
|
|
230
|
+
return ""
|
|
231
|
+
if _is_code_block_node(node):
|
|
232
|
+
return _code_block_to_markdown(node)
|
|
233
|
+
if node.tag == "table":
|
|
234
|
+
return _table_to_markdown(node)
|
|
235
|
+
if node.tag in {"ul", "ol"}:
|
|
236
|
+
return _list_to_markdown(node)
|
|
237
|
+
if re.fullmatch(r"h[1-6]", node.tag):
|
|
238
|
+
text = _text_block(node)
|
|
239
|
+
return f"{'#' * int(node.tag[1])} {text}" if text else ""
|
|
240
|
+
if node.tag in {"p", "figcaption"}:
|
|
241
|
+
return _text_block(node)
|
|
242
|
+
if node.tag == "blockquote":
|
|
243
|
+
content = _collapse_blank_lines("\n\n".join(filter(None, (_block_to_markdown(child) for child in node.children))))
|
|
244
|
+
return "\n".join(f"> {line}" if line else ">" for line in content.splitlines()) if content else ""
|
|
245
|
+
if node.tag == "hr":
|
|
246
|
+
return "---"
|
|
247
|
+
if node.tag == "img":
|
|
248
|
+
return _inline_text(node)
|
|
249
|
+
|
|
250
|
+
child_blocks = [block for block in (_block_to_markdown(child) for child in node.children) if block]
|
|
251
|
+
if child_blocks:
|
|
252
|
+
return _collapse_blank_lines("\n\n".join(child_blocks))
|
|
253
|
+
return _text_block(node)
|
|
254
|
+
|
|
255
|
+
def convert_html_to_markdown(html, clean_markdown_output):
|
|
256
|
+
parser = _HtmlTreeBuilder()
|
|
257
|
+
parser.feed(html or "")
|
|
258
|
+
markdown = _block_to_markdown(parser.root)
|
|
259
|
+
return clean_markdown_output(markdown)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""HTML → Markdown conversion and Markdown clean-up.
|
|
2
|
+
|
|
3
|
+
Pure, presentation-agnostic text transforms shared by the SDK
|
|
4
|
+
(:meth:`browser_cli.sdk.dom.ExtractNS.markdown`) and the ``extract markdown``
|
|
5
|
+
CLI command. No Click/Rich/IPC dependencies — just an HTML tree walker plus a
|
|
6
|
+
set of repair passes for the markdown the page (or a markdown editor like
|
|
7
|
+
Obsidian/CodeMirror) hands back.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from browser_cli.markdown.html import convert_html_to_markdown
|
|
14
|
+
|
|
15
|
+
_FENCE_RE = re.compile(r"```(?:[^\n`]*)\n.*?\n```", re.DOTALL)
|
|
16
|
+
_ESCAPED_MARKDOWN_RE = re.compile(r"\\([_-])")
|
|
17
|
+
_TABLE_SEPARATOR_RE = re.compile(r"^\|(?:\s*:?-{3,}:?\s*\|)+\s*$")
|
|
18
|
+
|
|
19
|
+
def _collapse_blank_lines(value):
|
|
20
|
+
value = re.sub(r"[ \t]+\n", "\n", value)
|
|
21
|
+
value = re.sub(r"\n{3,}", "\n\n", value)
|
|
22
|
+
return value.strip()
|
|
23
|
+
|
|
24
|
+
def _parse_table_row(line):
|
|
25
|
+
stripped = line.strip()
|
|
26
|
+
if not stripped.startswith("|") or not stripped.endswith("|"):
|
|
27
|
+
return None
|
|
28
|
+
return [cell.strip() for cell in stripped.strip("|").split("|")]
|
|
29
|
+
|
|
30
|
+
def _repair_table_headers(lines):
|
|
31
|
+
repaired = []
|
|
32
|
+
index = 0
|
|
33
|
+
while index < len(lines):
|
|
34
|
+
if (
|
|
35
|
+
index + 2 < len(lines)
|
|
36
|
+
and _parse_table_row(lines[index]) is not None
|
|
37
|
+
and _TABLE_SEPARATOR_RE.match(lines[index + 1].strip())
|
|
38
|
+
and _parse_table_row(lines[index + 2]) is not None
|
|
39
|
+
):
|
|
40
|
+
first = _parse_table_row(lines[index])
|
|
41
|
+
third = _parse_table_row(lines[index + 2])
|
|
42
|
+
if first and all(not cell for cell in first) and any(cell for cell in third):
|
|
43
|
+
repaired.append(lines[index + 2].strip())
|
|
44
|
+
repaired.append(lines[index + 1].strip())
|
|
45
|
+
index += 3
|
|
46
|
+
continue
|
|
47
|
+
repaired.append(lines[index].strip())
|
|
48
|
+
index += 1
|
|
49
|
+
return repaired
|
|
50
|
+
|
|
51
|
+
def _repair_list_continuations(lines):
|
|
52
|
+
repaired = []
|
|
53
|
+
previous_was_list_item = False
|
|
54
|
+
previous_continuation_indent = ""
|
|
55
|
+
|
|
56
|
+
for line in lines:
|
|
57
|
+
stripped = line.strip()
|
|
58
|
+
list_match = re.match(r"^(\s*)([-*+]|\d+\.)\s+.+$", stripped)
|
|
59
|
+
is_markdown_block_start = (
|
|
60
|
+
not stripped
|
|
61
|
+
or stripped.startswith(("```", "#", ">", "|"))
|
|
62
|
+
or _TABLE_SEPARATOR_RE.match(stripped)
|
|
63
|
+
or re.match(r"^(\s*)([-*+]|\d+\.)\s+", stripped)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if previous_was_list_item and stripped and not is_markdown_block_start:
|
|
67
|
+
repaired.append(f"{previous_continuation_indent}{stripped}")
|
|
68
|
+
previous_was_list_item = False
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
repaired.append(stripped)
|
|
72
|
+
if list_match:
|
|
73
|
+
marker = list_match.group(2)
|
|
74
|
+
base_indent = list_match.group(1)
|
|
75
|
+
previous_continuation_indent = f"{base_indent}{' ' * (len(marker) + 1)}"
|
|
76
|
+
previous_was_list_item = True
|
|
77
|
+
else:
|
|
78
|
+
previous_was_list_item = False
|
|
79
|
+
|
|
80
|
+
return repaired
|
|
81
|
+
|
|
82
|
+
def _repair_flattened_diagram(text):
|
|
83
|
+
if "\n" in text:
|
|
84
|
+
return text
|
|
85
|
+
if sum(text.count(char) for char in "│▼├└") < 2:
|
|
86
|
+
return text
|
|
87
|
+
|
|
88
|
+
text = re.sub(r"\s{2,}([│▼])", r"\n \1", text)
|
|
89
|
+
text = re.sub(r"([│▼])\s{2,}", r"\1\n", text)
|
|
90
|
+
text = re.sub(r"([│▼])(?=[^\s\n│▼├└])", r"\1\n", text)
|
|
91
|
+
text = re.sub(r"(?<=[^\s\n])([├└])", r"\n\1", text)
|
|
92
|
+
text = re.sub(r"([^\s\n])(\()", r"\1\n\2", text)
|
|
93
|
+
return "\n".join(line.rstrip() for line in text.splitlines() if line.strip())
|
|
94
|
+
|
|
95
|
+
def _convert_dash_lists_to_branches(lines):
|
|
96
|
+
converted = []
|
|
97
|
+
index = 0
|
|
98
|
+
while index < len(lines):
|
|
99
|
+
match = re.match(r"^(\s*)-\s+(.*)$", lines[index])
|
|
100
|
+
if not match:
|
|
101
|
+
converted.append(lines[index])
|
|
102
|
+
index += 1
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
indent = match.group(1)
|
|
106
|
+
items = []
|
|
107
|
+
while index < len(lines):
|
|
108
|
+
next_match = re.match(rf"^{re.escape(indent)}-\s+(.*)$", lines[index])
|
|
109
|
+
if not next_match:
|
|
110
|
+
break
|
|
111
|
+
items.append(next_match.group(1))
|
|
112
|
+
index += 1
|
|
113
|
+
|
|
114
|
+
for item_index, item in enumerate(items):
|
|
115
|
+
branch = "└" if item_index == len(items) - 1 else "├"
|
|
116
|
+
converted.append(f"{indent}{branch} {item}")
|
|
117
|
+
return converted
|
|
118
|
+
|
|
119
|
+
def _clean_code_block(code):
|
|
120
|
+
lines = [line.rstrip() for line in code.splitlines()]
|
|
121
|
+
while lines and not lines[0].strip():
|
|
122
|
+
lines.pop(0)
|
|
123
|
+
while lines and not lines[-1].strip():
|
|
124
|
+
lines.pop()
|
|
125
|
+
|
|
126
|
+
flattened = _repair_flattened_diagram("\n".join(lines))
|
|
127
|
+
lines = flattened.splitlines() if flattened else []
|
|
128
|
+
lines = [
|
|
129
|
+
f" {line.strip()}"
|
|
130
|
+
if line.strip() in {"│", "▼"} and not re.match(r"^\s+[│▼]\s*$", line)
|
|
131
|
+
else line
|
|
132
|
+
for line in lines
|
|
133
|
+
]
|
|
134
|
+
lines = _convert_dash_lists_to_branches(lines)
|
|
135
|
+
return "\n".join(lines)
|
|
136
|
+
|
|
137
|
+
def _clean_markdown_output(markdown):
|
|
138
|
+
if not markdown:
|
|
139
|
+
return ""
|
|
140
|
+
|
|
141
|
+
pieces = []
|
|
142
|
+
last_index = 0
|
|
143
|
+
for match in _FENCE_RE.finditer(markdown):
|
|
144
|
+
prose = markdown[last_index:match.start()]
|
|
145
|
+
if prose:
|
|
146
|
+
cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", prose)
|
|
147
|
+
lines = [line.strip() for line in cleaned.splitlines()]
|
|
148
|
+
lines = _repair_table_headers(lines)
|
|
149
|
+
lines = _repair_list_continuations(lines)
|
|
150
|
+
cleaned = "\n".join(lines)
|
|
151
|
+
cleaned = _collapse_blank_lines(cleaned)
|
|
152
|
+
if cleaned:
|
|
153
|
+
pieces.append(cleaned)
|
|
154
|
+
|
|
155
|
+
fence = match.group(0)
|
|
156
|
+
header, _, tail = fence.partition("\n")
|
|
157
|
+
body, _, _ = tail.rpartition("\n")
|
|
158
|
+
cleaned_body = _clean_code_block(body)
|
|
159
|
+
pieces.append(f"{header}\n{cleaned_body}\n```" if cleaned_body else f"{header}\n```")
|
|
160
|
+
last_index = match.end()
|
|
161
|
+
|
|
162
|
+
trailing = markdown[last_index:]
|
|
163
|
+
if trailing:
|
|
164
|
+
cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", trailing)
|
|
165
|
+
lines = [line.strip() for line in cleaned.splitlines()]
|
|
166
|
+
lines = _repair_table_headers(lines)
|
|
167
|
+
lines = _repair_list_continuations(lines)
|
|
168
|
+
cleaned = "\n".join(lines)
|
|
169
|
+
cleaned = _collapse_blank_lines(cleaned)
|
|
170
|
+
if cleaned:
|
|
171
|
+
pieces.append(cleaned)
|
|
172
|
+
|
|
173
|
+
return "\n\n".join(piece for piece in pieces if piece)
|
|
174
|
+
|
|
175
|
+
def _convert_html_to_markdown(html):
|
|
176
|
+
return convert_html_to_markdown(html, _clean_markdown_output)
|
|
177
|
+
|
|
178
|
+
def render_markdown(raw: str | None) -> str:
|
|
179
|
+
"""Normalize *raw* extractor output into clean Markdown.
|
|
180
|
+
|
|
181
|
+
If the payload looks like HTML (first non-space char is ``<``) it is run
|
|
182
|
+
through the HTML→Markdown converter; otherwise it is treated as Markdown and
|
|
183
|
+
only the clean-up/repair passes are applied.
|
|
184
|
+
"""
|
|
185
|
+
raw = raw or ""
|
|
186
|
+
if raw.lstrip().startswith("<"):
|
|
187
|
+
return _convert_html_to_markdown(raw)
|
|
188
|
+
return _clean_markdown_output(raw)
|
browser_cli/models.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Typed dataclasses returned by the BrowserCLI Python API.
|
|
3
|
+
|
|
4
|
+
Each object is bound to a BrowserCLI instance so you can call actions
|
|
5
|
+
directly on it:
|
|
6
|
+
|
|
7
|
+
tabs = b.tabs.list()
|
|
8
|
+
tabs[0].close()
|
|
9
|
+
tabs[0].move(forward=True)
|
|
10
|
+
|
|
11
|
+
groups = b.groups.list()
|
|
12
|
+
groups[0].tabs()
|
|
13
|
+
groups[0].add_tab("https://example.com")
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Any, Protocol
|
|
19
|
+
|
|
20
|
+
class BoundBrowser(Protocol):
|
|
21
|
+
tabs: Any
|
|
22
|
+
groups: Any
|
|
23
|
+
nav: Any
|
|
24
|
+
|
|
25
|
+
def dispatch(self, command: str, args: dict | None = None): ...
|
|
26
|
+
|
|
27
|
+
# ── BrowserCounts ───────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class BrowserCounts:
|
|
31
|
+
"""Aggregated per-browser counts returned in implicit multi-browser mode."""
|
|
32
|
+
total: int
|
|
33
|
+
by_browser: dict[str, int]
|
|
34
|
+
|
|
35
|
+
# ── Tab ───────────────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class Tab:
|
|
39
|
+
"""A browser tab."""
|
|
40
|
+
id: int
|
|
41
|
+
window_id: int
|
|
42
|
+
active: bool
|
|
43
|
+
muted: bool = False
|
|
44
|
+
title: str = ""
|
|
45
|
+
url: str = ""
|
|
46
|
+
group_id: int | None = None
|
|
47
|
+
browser: str | None = None
|
|
48
|
+
_browser: BoundBrowser | None = field(default=None, repr=False, compare=False, init=False)
|
|
49
|
+
|
|
50
|
+
def _b(self) -> BoundBrowser:
|
|
51
|
+
if self._browser is None:
|
|
52
|
+
raise RuntimeError("Tab is not bound to a BrowserCLI instance")
|
|
53
|
+
return self._browser
|
|
54
|
+
|
|
55
|
+
def _command(self, name: str, args: dict | None = None):
|
|
56
|
+
browser = self._b()
|
|
57
|
+
return browser.dispatch(name, args)
|
|
58
|
+
|
|
59
|
+
def close(self) -> None:
|
|
60
|
+
"""Close this tab."""
|
|
61
|
+
self._command("tabs.close", {"tabId": self.id})
|
|
62
|
+
|
|
63
|
+
def activate(self) -> None:
|
|
64
|
+
"""Switch browser focus to this tab."""
|
|
65
|
+
self._command("tabs.active", {"tabId": self.id})
|
|
66
|
+
|
|
67
|
+
def mute(self) -> None:
|
|
68
|
+
"""Mute this tab."""
|
|
69
|
+
self._command("tabs.mute", {"tabId": self.id})
|
|
70
|
+
|
|
71
|
+
def unmute(self) -> None:
|
|
72
|
+
"""Unmute this tab."""
|
|
73
|
+
self._command("tabs.unmute", {"tabId": self.id})
|
|
74
|
+
|
|
75
|
+
def reload(self) -> None:
|
|
76
|
+
"""Reload this tab."""
|
|
77
|
+
self._command("navigate.reload", {"tabId": self.id})
|
|
78
|
+
|
|
79
|
+
def hard_reload(self) -> None:
|
|
80
|
+
"""Hard-reload this tab (bypass cache)."""
|
|
81
|
+
self._command("navigate.hard_reload", {"tabId": self.id})
|
|
82
|
+
|
|
83
|
+
def move(
|
|
84
|
+
self, *,
|
|
85
|
+
forward: bool = False,
|
|
86
|
+
backward: bool = False,
|
|
87
|
+
group_id: int | None = None,
|
|
88
|
+
window_id: int | None = None,
|
|
89
|
+
index: int | None = None,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Move this tab.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
forward: Move one position to the right within the window.
|
|
95
|
+
backward: Move one position to the left within the window.
|
|
96
|
+
group_id: Move into the tab group with this ID.
|
|
97
|
+
window_id: Move to the window with this ID.
|
|
98
|
+
index: Absolute position index in the target window.
|
|
99
|
+
"""
|
|
100
|
+
self._command("tabs.move", {
|
|
101
|
+
"tabId": self.id,
|
|
102
|
+
"forward": forward,
|
|
103
|
+
"backward": backward,
|
|
104
|
+
"groupId": group_id,
|
|
105
|
+
"windowId": window_id,
|
|
106
|
+
"index": index,
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
def html(self) -> str:
|
|
110
|
+
"""Return the full HTML source of this tab."""
|
|
111
|
+
return self._command("tabs.html", {"tabId": self.id})
|
|
112
|
+
|
|
113
|
+
def screenshot(self, *, format: str = "png", quality: int | None = None) -> str:
|
|
114
|
+
"""Capture this tab's visible area. Returns a base64 data URL."""
|
|
115
|
+
return self._b().tabs.screenshot(self.id, format=format, quality=quality)
|
|
116
|
+
|
|
117
|
+
def pin(self) -> None:
|
|
118
|
+
"""Pin this tab."""
|
|
119
|
+
self._command("tabs.pin", {"tabId": self.id})
|
|
120
|
+
|
|
121
|
+
def unpin(self) -> None:
|
|
122
|
+
"""Unpin this tab."""
|
|
123
|
+
self._command("tabs.unpin", {"tabId": self.id})
|
|
124
|
+
|
|
125
|
+
def refresh(self) -> Tab:
|
|
126
|
+
"""Return a fresh snapshot of this tab."""
|
|
127
|
+
return self._b().tabs.status(self.id)
|
|
128
|
+
|
|
129
|
+
def wait_for_load(self, *, timeout: float = 30.0, ready_state: str = "complete") -> Tab:
|
|
130
|
+
"""Wait until this tab reaches the requested readyState."""
|
|
131
|
+
return self._b().tabs.wait_for_load(self.id, timeout=timeout, ready_state=ready_state)
|
|
132
|
+
|
|
133
|
+
def watch_url(self, pattern: str, *, timeout: float = 30.0) -> Tab:
|
|
134
|
+
"""Wait until this tab's URL matches regex *pattern*."""
|
|
135
|
+
return self._b().tabs.watch_url(pattern, tab_id=self.id, timeout=timeout)
|
|
136
|
+
|
|
137
|
+
def open(self, url: str, *, background: bool = False) -> None:
|
|
138
|
+
"""Navigate this tab to *url* in place."""
|
|
139
|
+
self._b().nav.to(self.id, url)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ── Group ─────────────────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class Group:
|
|
146
|
+
"""A browser tab group."""
|
|
147
|
+
id: int
|
|
148
|
+
title: str
|
|
149
|
+
color: str
|
|
150
|
+
collapsed: bool
|
|
151
|
+
tab_count: int
|
|
152
|
+
browser: str | None = None
|
|
153
|
+
_browser: BoundBrowser | None = field(default=None, repr=False, compare=False, init=False)
|
|
154
|
+
|
|
155
|
+
def _b(self) -> BoundBrowser:
|
|
156
|
+
if self._browser is None:
|
|
157
|
+
raise RuntimeError("Group is not bound to a BrowserCLI instance")
|
|
158
|
+
return self._browser
|
|
159
|
+
|
|
160
|
+
def _command(self, name: str, args: dict | None = None):
|
|
161
|
+
browser = self._b()
|
|
162
|
+
return browser.dispatch(name, args)
|
|
163
|
+
|
|
164
|
+
def close(self) -> None:
|
|
165
|
+
"""Ungroup (and close) this tab group."""
|
|
166
|
+
self._command("group.close", {"groupId": self.id})
|
|
167
|
+
|
|
168
|
+
def tabs(self) -> list[Tab]:
|
|
169
|
+
"""Return all tabs inside this group."""
|
|
170
|
+
return self._b().groups.tabs(self.id)
|
|
171
|
+
|
|
172
|
+
def move(self, *, forward: bool = False, backward: bool = False) -> None:
|
|
173
|
+
"""Move this group forward or backward among groups."""
|
|
174
|
+
self._command("group.move", {
|
|
175
|
+
"group": str(self.id),
|
|
176
|
+
"forward": forward,
|
|
177
|
+
"backward": backward,
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
def add_tab(self, url: str | None = None) -> int | None:
|
|
181
|
+
"""Open a new tab inside this group. Returns the new tab ID."""
|
|
182
|
+
return self._b().groups.add_tab(self.id, url)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Native messaging host internals."""
|