chatterer 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ from typing import Literal, TypeAlias
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class MultiMatchRegex(BaseModel):
7
+ type: Literal["multi_match_regex"] = Field(
8
+ description="A regex pattern that should match multiple instances of the subject in the document."
9
+ )
10
+ regular_expression: str = Field(
11
+ description="The regex pattern that should match multiple instances of the subject in the document."
12
+ )
13
+
14
+ def __hash__(self) -> int:
15
+ return hash((self.type, self.regular_expression))
16
+
17
+
18
+ class SingleMatchCitation(BaseModel):
19
+ start_from: str = Field(description="A snippet of text at the beginning of the cited section.")
20
+ end_at: str = Field(description="A snippet of text at the end of the cited section.")
21
+
22
+ def __hash__(self) -> int:
23
+ return hash((self.start_from, self.end_at))
24
+
25
+
26
+ Reference: TypeAlias = SingleMatchCitation | MultiMatchRegex
@@ -0,0 +1,138 @@
1
+ from typing import Callable, NamedTuple, Self, TypeVar
2
+
3
+ from pydantic import BaseModel
4
+
5
+ T = TypeVar("T", bound=BaseModel)
6
+
7
+
8
+ class MatchedText(NamedTuple):
9
+ text: str
10
+ start_idx: int
11
+ end_idx: int
12
+
13
+ @classmethod
14
+ def from_text(
15
+ cls,
16
+ full_text: str,
17
+ len_func: Callable[[str], int],
18
+ chunk_size: int = 2048,
19
+ token_overlap: int = 0,
20
+ separator: str = "\n",
21
+ ) -> list[Self]:
22
+ """
23
+ 토큰 수 제한과 선택적 오버랩을 기준으로 텍스트를 청크로 분할합니다.
24
+ 각 청크는 원본 텍스트 내의 위치 정보 (start_idx, end_idx)와 함께 반환됩니다.
25
+ 텍스트는 separator 문자열로 분할하며, 토큰 수는 len_func 함수를 통해 계산합니다.
26
+
27
+ Args:
28
+ full_text: 분할할 전체 텍스트.
29
+ len_func: 주어진 텍스트의 토큰 수를 반환하는 함수.
30
+ chunk_size: 각 청크의 최대 토큰 수. 기본값은 2048.
31
+ token_overlap: 청크 간 중첩할 토큰 수. 기본값은 0.
32
+ separator: 텍스트를 분할할 구분자 문자열. 기본값은 "\n".
33
+
34
+ Returns:
35
+ 각 요소가 (chunk_text, start_idx, end_idx)인 튜플의 리스트.
36
+ chunk_text는 whole_text 내에서 whole_text[start_idx:end_idx]와 동일한 부분 문자열입니다.
37
+ """
38
+ text_chunks: list[Self] = []
39
+ sep_token_count: int = len_func(separator)
40
+ sep_len = len(separator)
41
+
42
+ # 먼저, separator를 기준으로 원본 텍스트를 분할하되 각 조각의 시작/종료 인덱스를 기록합니다.
43
+ piece_infos: list[Self] = [] # 각 튜플: (piece_text, start_index, end_index)
44
+ start_idx = 0
45
+ while True:
46
+ idx = full_text.find(separator, start_idx)
47
+ if idx == -1:
48
+ # 마지막 조각: separator가 더 이상 없으므로 전체 남은 부분을 추가합니다.
49
+ piece_infos.append(
50
+ cls(
51
+ text=full_text[start_idx:],
52
+ start_idx=start_idx,
53
+ end_idx=len(full_text),
54
+ )
55
+ )
56
+ break
57
+ else:
58
+ piece_infos.append(
59
+ cls(
60
+ text=full_text[start_idx:idx],
61
+ start_idx=start_idx,
62
+ end_idx=idx,
63
+ )
64
+ )
65
+ start_idx = idx + sep_len
66
+
67
+ current_chunk: list[Self] = []
68
+ current_token_count: int = 0
69
+ i = 0
70
+ while i < len(piece_infos):
71
+ piece_info = piece_infos[i]
72
+ piece = piece_info.text
73
+ piece_start = piece_info.start_idx
74
+ piece_end = piece_info.end_idx
75
+ # 원래 코드는 각 조각에 separator의 토큰 수도 포함합니다.
76
+ piece_token_count: int = len_func(piece) + sep_token_count
77
+
78
+ # 현재 청크에 추가하면 chunk_size를 초과하는 경우
79
+ if current_token_count + piece_token_count > chunk_size:
80
+ # 단일 조각이 chunk_size보다 큰 경우엔 어쩔 수 없이 추가합니다.
81
+ if not current_chunk:
82
+ current_chunk.append(
83
+ cls(
84
+ text=piece,
85
+ start_idx=piece_start,
86
+ end_idx=piece_end,
87
+ )
88
+ )
89
+ current_token_count += piece_token_count
90
+ i += 1
91
+ # 현재 청크 완성 → 청크에 추가
92
+ chunk_start = current_chunk[0].start_idx
93
+ # current_chunk에 담긴 조각들은 원본 텍스트상 연속되어 있으므로,
94
+ # 청크의 종료 인덱스는 마지막 조각의 end_index가 됩니다.
95
+ chunk_end = current_chunk[-1].end_idx
96
+ # 원본 텍스트의 해당 구간을 그대로 추출하면 separator가 포함됩니다.
97
+ chunk_text = full_text[chunk_start:chunk_end]
98
+ text_chunks.append(
99
+ cls(
100
+ text=chunk_text,
101
+ start_idx=chunk_start,
102
+ end_idx=chunk_end,
103
+ )
104
+ )
105
+
106
+ # token_overlap이 적용되는 경우: 청크 끝부분 일부를 다음 청크에 오버랩합니다.
107
+ if token_overlap > 0:
108
+ overlap_chunk: list[Self] = []
109
+ overlap_count: int = 0
110
+ # 뒤에서부터 역순으로 오버랩할 조각들을 선택합니다.
111
+ for j in range(len(current_chunk) - 1, -1, -1):
112
+ p_text = current_chunk[j].text
113
+ p_token_count = len_func(p_text) + sep_token_count
114
+ # 최소 한 조각은 포함하고, 오버랩 토큰 수가 token_overlap 이하라면 계속 추가
115
+ if overlap_count + p_token_count <= token_overlap or not overlap_chunk:
116
+ overlap_chunk.insert(0, current_chunk[j])
117
+ overlap_count += p_token_count
118
+ else:
119
+ break
120
+ current_chunk = overlap_chunk.copy()
121
+ current_token_count = overlap_count
122
+ else:
123
+ current_chunk.clear()
124
+ current_token_count = 0
125
+ else:
126
+ # 청크에 추가 후 다음 조각 진행
127
+ current_chunk.append(cls(text=piece, start_idx=piece_start, end_idx=piece_end))
128
+ current_token_count += piece_token_count
129
+ i += 1
130
+
131
+ # 남은 조각이 있다면 마지막 청크로 추가합니다.
132
+ if current_chunk:
133
+ chunk_start = current_chunk[0].start_idx
134
+ chunk_end = current_chunk[-1].end_idx
135
+ chunk_text = full_text[chunk_start:chunk_end]
136
+ text_chunks.append(cls(text=chunk_text, start_idx=chunk_start, end_idx=chunk_end))
137
+
138
+ return text_chunks
@@ -0,0 +1,466 @@
1
+ import ast
2
+ import importlib
3
+ import os
4
+ import re
5
+ import site
6
+ from contextlib import contextmanager, suppress
7
+ from fnmatch import fnmatch
8
+ from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
9
+ from pathlib import Path
10
+ from typing import (
11
+ TYPE_CHECKING,
12
+ Callable,
13
+ Iterator,
14
+ NamedTuple,
15
+ NotRequired,
16
+ Optional,
17
+ Self,
18
+ Sequence,
19
+ TypeAlias,
20
+ TypedDict,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from bs4 import Tag
25
+ from openai import OpenAI
26
+ from requests import Response, Session
27
+
28
+ try:
29
+ from tiktoken import get_encoding, list_encoding_names
30
+
31
+ enc = get_encoding(list_encoding_names()[-1])
32
+ except ImportError:
33
+ enc = None
34
+
35
+
36
+ # Type definition for representing a file tree structure
37
+ type FileTree = dict[str, Optional[FileTree]]
38
+
39
+ # Type aliases for callback functions and file descriptors
40
+ CodeLanguageCallback: TypeAlias = Callable[["Tag"], Optional[str]]
41
+ FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
42
+
43
+ # Type aliases for different types of IO objects
44
+ BytesReadable: TypeAlias = BytesIO | BufferedReader
45
+ BytesWritable: TypeAlias = BytesIO | BufferedWriter
46
+ StringReadable: TypeAlias = StringIO | TextIOWrapper
47
+ StringWritable: TypeAlias = StringIO | TextIOWrapper
48
+
49
+ # Combined type aliases for readable and writable objects
50
+ Readable: TypeAlias = BytesReadable | StringReadable
51
+ Writable: TypeAlias = BytesWritable | StringWritable
52
+
53
+ # Type alias for path or readable object
54
+ PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
55
+
56
+
57
+ class HtmlToMarkdownOptions(TypedDict):
58
+ """
59
+ TypedDict for options used in HTML to Markdown conversion.
60
+
61
+ Contains various configuration options for controlling how HTML is converted to Markdown,
62
+ including formatting preferences, escape behaviors, and styling options.
63
+ """
64
+
65
+ autolinks: NotRequired[bool]
66
+ bullets: NotRequired[str]
67
+ code_language: NotRequired[str]
68
+ code_language_callback: NotRequired[CodeLanguageCallback]
69
+ convert: NotRequired[Sequence[str]]
70
+ default_title: NotRequired[bool]
71
+ escape_asterisks: NotRequired[bool]
72
+ escape_underscores: NotRequired[bool]
73
+ escape_misc: NotRequired[bool]
74
+ heading_style: NotRequired[str]
75
+ keep_inline_images_in: NotRequired[Sequence[str]]
76
+ newline_style: NotRequired[str]
77
+ strip: NotRequired[Sequence[str]]
78
+ strip_document: NotRequired[str]
79
+ strong_em_symbol: NotRequired[str]
80
+ sub_symbol: NotRequired[str]
81
+ sup_symbol: NotRequired[str]
82
+ table_infer_header: NotRequired[bool]
83
+ wrap: NotRequired[bool]
84
+ wrap_width: NotRequired[int]
85
+
86
+
87
+ def get_default_html_to_markdown_options() -> HtmlToMarkdownOptions:
88
+ """
89
+ Returns the default options for HTML to Markdown conversion.
90
+
91
+ This function provides a set of sensible defaults for the markdownify library,
92
+ including settings for bullets, escaping, heading styles, and other formatting options.
93
+
94
+ Returns:
95
+ HtmlToMarkdownOptions: A dictionary of default conversion options.
96
+ """
97
+ from markdownify import ( # pyright: ignore[reportUnknownVariableType, reportMissingTypeStubs]
98
+ ASTERISK,
99
+ SPACES,
100
+ STRIP,
101
+ UNDERLINED,
102
+ )
103
+
104
+ return {
105
+ "autolinks": True,
106
+ "bullets": "*+-", # An iterable of bullet types.
107
+ "code_language": "",
108
+ "default_title": False,
109
+ "escape_asterisks": True,
110
+ "escape_underscores": True,
111
+ "escape_misc": False,
112
+ "heading_style": UNDERLINED,
113
+ "keep_inline_images_in": [],
114
+ "newline_style": SPACES,
115
+ "strip_document": STRIP,
116
+ "strong_em_symbol": ASTERISK,
117
+ "sub_symbol": "",
118
+ "sup_symbol": "",
119
+ "table_infer_header": False,
120
+ "wrap": False,
121
+ "wrap_width": 80,
122
+ }
123
+
124
+
125
+ class CodeSnippets(NamedTuple):
126
+ """
127
+ A named tuple that represents code snippets extracted from Python files.
128
+
129
+ Contains the paths to the files, the concatenated text of all snippets,
130
+ and the base directory of the files.
131
+ """
132
+
133
+ paths: list[Path]
134
+ snippets_text: str
135
+ base_dir: Path
136
+
137
+ @classmethod
138
+ def from_path_or_pkgname(cls, path_or_pkgname: str, ban_file_patterns: Optional[list[str]] = None) -> Self:
139
+ """
140
+ Creates a CodeSnippets instance from a file path or package name.
141
+
142
+ Args:
143
+ path_or_pkgname: Path to a file/directory or a Python package name.
144
+ ban_file_patterns: Optional list of patterns to exclude files.
145
+
146
+ Returns:
147
+ A new CodeSnippets instance with extracted code snippets.
148
+ """
149
+ paths: list[Path] = _get_pyscript_paths(path_or_pkgname=path_or_pkgname, ban_fn_patterns=ban_file_patterns)
150
+ snippets_text: str = "".join(_get_a_snippet(p) for p in paths)
151
+ return cls(
152
+ paths=paths,
153
+ snippets_text=snippets_text,
154
+ base_dir=_get_base_dir(paths),
155
+ )
156
+
157
+ @property
158
+ def metadata(self) -> str:
159
+ """
160
+ Generates metadata about the code snippets.
161
+
162
+ Returns a string containing information about the file tree structure,
163
+ total number of files, tokens (if tiktoken is available), and lines.
164
+
165
+ Returns:
166
+ str: Formatted metadata string.
167
+ """
168
+ file_paths: list[Path] = self.paths
169
+ text: str = self.snippets_text
170
+
171
+ base_dir: Path = _get_base_dir(file_paths)
172
+ results: list[str] = [base_dir.as_posix()]
173
+
174
+ file_tree: FileTree = {}
175
+ for file_path in sorted(file_paths):
176
+ rel_path = file_path.relative_to(base_dir)
177
+ subtree: Optional[FileTree] = file_tree
178
+ for part in rel_path.parts[:-1]:
179
+ if subtree is not None:
180
+ subtree = subtree.setdefault(part, {})
181
+ if subtree is not None:
182
+ subtree[rel_path.parts[-1]] = None
183
+
184
+ def _display_tree(tree: FileTree, prefix: str = "") -> None:
185
+ """
186
+ Helper function to recursively display a file tree structure.
187
+
188
+ Args:
189
+ tree: The file tree dictionary to display.
190
+ prefix: Current line prefix for proper indentation.
191
+ """
192
+ items: list[tuple[str, Optional[FileTree]]] = sorted(tree.items())
193
+ count: int = len(items)
194
+ for idx, (name, subtree) in enumerate(items):
195
+ branch: str = "└── " if idx == count - 1 else "├── "
196
+ results.append(f"{prefix}{branch}{name}")
197
+ if subtree is not None:
198
+ extension: str = " " if idx == count - 1 else "│ "
199
+ _display_tree(tree=subtree, prefix=prefix + extension)
200
+
201
+ _display_tree(file_tree)
202
+ results.append(f"- Total files: {len(file_paths)}")
203
+ if enc is not None:
204
+ num_tokens: int = len(enc.encode(text, disallowed_special=()))
205
+ results.append(f"- Total tokens: {num_tokens}")
206
+ results.append(f"- Total lines: {text.count('\n') + 1}")
207
+ return "\n".join(results)
208
+
209
+
210
+ def html_to_markdown(html: str, options: Optional[HtmlToMarkdownOptions]) -> str:
211
+ """
212
+ Convert HTML content to Markdown using the provided options.
213
+
214
+ Args:
215
+ html (str): HTML content to convert.
216
+ options (HtmlToMarkdownOptions): Options for the conversion.
217
+
218
+ Returns:
219
+ str: The Markdown content.
220
+ """
221
+ from markdownify import markdownify # pyright: ignore[reportUnknownVariableType, reportMissingTypeStubs]
222
+
223
+ return str(markdownify(html, **(options or {}))) # pyright: ignore[reportUnknownArgumentType]
224
+
225
+
226
+ def pdf_to_text(path_or_file: PathOrReadable) -> str:
227
+ """
228
+ Convert a PDF file to plain text.
229
+
230
+ Extracts text from each page of a PDF file and formats it with page markers.
231
+
232
+ Args:
233
+ path_or_file: Path to a PDF file or a readable object containing PDF data.
234
+
235
+ Returns:
236
+ str: Extracted text with page markers.
237
+
238
+ Raises:
239
+ FileNotFoundError: If the file cannot be found or opened.
240
+ """
241
+ from pymupdf import Document # pyright: ignore[reportMissingTypeStubs]
242
+
243
+ with _open_stream(path_or_file) as stream:
244
+ if stream is None:
245
+ raise FileNotFoundError(path_or_file)
246
+ return "\n".join(
247
+ f"<!-- Page {page_no} -->\n{text.strip()}\n"
248
+ for page_no, text in enumerate(
249
+ (
250
+ page.get_textpage().extractText() # pyright: ignore[reportUnknownMemberType]
251
+ for page in Document(stream=stream.read())
252
+ ),
253
+ 1,
254
+ )
255
+ )
256
+
257
+
258
+ def anything_to_markdown(
259
+ source: "str | Response | Path",
260
+ requests_session: Optional["Session"] = None,
261
+ llm_client: Optional["OpenAI"] = None,
262
+ llm_model: Optional[str] = None,
263
+ style_map: Optional[str] = None,
264
+ exiftool_path: Optional[str] = None,
265
+ docintel_endpoint: Optional[str] = None,
266
+ ) -> str:
267
+ """
268
+ Convert various types of content to Markdown format.
269
+
270
+ Uses the MarkItDown library to convert different types of content (URLs, files, API responses)
271
+ to Markdown format.
272
+
273
+ Args:
274
+ source: The source content to convert (URL string, Response object, or Path).
275
+ requests_session: Optional requests Session for HTTP requests.
276
+ llm_client: Optional OpenAI client for LLM-based conversions.
277
+ llm_model: Optional model name for the LLM.
278
+ style_map: Optional style mapping configuration.
279
+ exiftool_path: Optional path to exiftool for metadata extraction.
280
+ docintel_endpoint: Optional Document Intelligence API endpoint.
281
+
282
+ Returns:
283
+ str: The converted Markdown content.
284
+ """
285
+ from markitdown import MarkItDown
286
+
287
+ result = MarkItDown(
288
+ requests_session=requests_session,
289
+ llm_client=llm_client,
290
+ llm_model=llm_model,
291
+ style_map=style_map,
292
+ exiftool_path=exiftool_path,
293
+ docintel_endpoint=docintel_endpoint,
294
+ ).convert(source)
295
+ return result.text_content
296
+
297
+
298
+ # Alias for CodeSnippets.from_path_or_pkgname for backward compatibility
299
+ pyscripts_to_snippets = CodeSnippets.from_path_or_pkgname
300
+
301
+
302
+ def _pattern_to_regex(pattern: str) -> re.Pattern[str]:
303
+ """
304
+ Converts an fnmatch pattern to a regular expression.
305
+
306
+ In this function, '**' is converted to match any character including directory separators.
307
+ The remaining '*' matches any character except directory separators, and '?' matches a single character.
308
+
309
+ Args:
310
+ pattern: The fnmatch pattern to convert.
311
+
312
+ Returns:
313
+ A compiled regular expression pattern.
314
+ """
315
+ # First escape the pattern
316
+ pattern = re.escape(pattern)
317
+ # Convert '**' to match any character including directory separators ('.*')
318
+ pattern = pattern.replace(r"\*\*", ".*")
319
+ # Then convert single '*' to match any character except directory separators
320
+ pattern = pattern.replace(r"\*", "[^/]*")
321
+ # Convert '?' to match a single character
322
+ pattern = pattern.replace(r"\?", ".")
323
+ # Anchor the pattern to start and end
324
+ pattern = "^" + pattern + "$"
325
+ return re.compile(pattern)
326
+
327
+
328
+ def _is_banned(p: Path, ban_patterns: list[str]) -> bool:
329
+ """
330
+ Checks if a given path matches any of the ban patterns.
331
+
332
+ Determines if the path p matches any pattern in ban_patterns using either
333
+ fnmatch-based or recursive patterns (i.e., containing '**').
334
+
335
+ Note: Patterns should use POSIX-style paths (i.e., '/' separators).
336
+
337
+ Args:
338
+ p: The path to check.
339
+ ban_patterns: List of patterns to match against.
340
+
341
+ Returns:
342
+ bool: True if the path matches any ban pattern, False otherwise.
343
+ """
344
+ p_str = p.as_posix()
345
+ for pattern in ban_patterns:
346
+ if "**" in pattern:
347
+ regex = _pattern_to_regex(pattern)
348
+ if regex.match(p_str):
349
+ return True
350
+ else:
351
+ # Simple fnmatch: '*' by default doesn't match '/'
352
+ if fnmatch(p_str, pattern):
353
+ return True
354
+ return False
355
+
356
+
357
+ def _get_a_snippet(fpath: Path) -> str:
358
+ """
359
+ Extracts a code snippet from a Python file.
360
+
361
+ Reads the file, parses it as Python code, and returns a formatted code snippet
362
+ with the relative path as a header in markdown code block format.
363
+
364
+ Args:
365
+ fpath: Path to the Python file.
366
+
367
+ Returns:
368
+ str: Formatted code snippet or empty string if the file doesn't exist.
369
+ """
370
+ if not fpath.is_file():
371
+ return ""
372
+
373
+ cleaned_code: str = "\n".join(
374
+ line for line in ast.unparse(ast.parse(fpath.read_text(encoding="utf-8"))).splitlines()
375
+ )
376
+ if site_dir := next(
377
+ (d for d in reversed(site.getsitepackages()) if fpath.is_relative_to(d)),
378
+ None,
379
+ ):
380
+ display_path = fpath.relative_to(site_dir)
381
+ elif fpath.is_relative_to(cwd := Path.cwd()):
382
+ display_path = fpath.relative_to(cwd)
383
+ else:
384
+ display_path = fpath.absolute()
385
+ return f"```{display_path}\n{cleaned_code}\n```\n\n"
386
+
387
+
388
+ def _get_base_dir(target_files: Sequence[Path]) -> Path:
389
+ """
390
+ Determines the common base directory for a sequence of file paths.
391
+
392
+ Finds the directory with the shortest path that is a parent to at least one file.
393
+
394
+ Args:
395
+ target_files: Sequence of file paths.
396
+
397
+ Returns:
398
+ Path: The common base directory.
399
+ """
400
+ return sorted(
401
+ {file_path.parent for file_path in target_files},
402
+ key=lambda p: len(p.parts),
403
+ )[0]
404
+
405
+
406
+ def _get_pyscript_paths(path_or_pkgname: str, ban_fn_patterns: Optional[list[str]] = None) -> list[Path]:
407
+ """
408
+ Gets paths to Python script files from a directory, file, or package name.
409
+
410
+ If path_or_pkgname is a directory, finds all .py files recursively.
411
+ If it's a file, returns just that file.
412
+ If it's a package name, imports the package and finds all .py files in its directory.
413
+
414
+ Args:
415
+ path_or_pkgname: Path to directory/file or package name.
416
+ ban_fn_patterns: Optional list of patterns to exclude files.
417
+
418
+ Returns:
419
+ list[Path]: List of paths to Python files.
420
+ """
421
+ path = Path(path_or_pkgname)
422
+ pypaths: list[Path]
423
+ if path.is_dir():
424
+ pypaths = list(path.rglob("*.py", case_sensitive=False))
425
+ elif path.is_file():
426
+ pypaths = [path]
427
+ else:
428
+ pypaths = [
429
+ p
430
+ for p in Path(next(iter(importlib.import_module(path_or_pkgname).__path__))).rglob(
431
+ "*.py", case_sensitive=False
432
+ )
433
+ if p.is_file()
434
+ ]
435
+ return [p for p in pypaths if ban_fn_patterns and not _is_banned(p, ban_fn_patterns)]
436
+
437
+
438
+ @contextmanager
439
+ def _open_stream(
440
+ path_or_file: PathOrReadable,
441
+ ) -> Iterator[Optional[BytesReadable]]:
442
+ """
443
+ Context manager for opening a file or using an existing stream.
444
+
445
+ Handles different types of input (file paths, byte streams, string streams)
446
+ and yields a BytesReadable object that can be used to read binary data.
447
+
448
+ Args:
449
+ path_or_file: File path or readable object.
450
+
451
+ Yields:
452
+ Optional[BytesReadable]: A readable binary stream or None if opening fails.
453
+ """
454
+ stream: Optional[BytesReadable] = None
455
+ try:
456
+ with suppress(BaseException):
457
+ if isinstance(path_or_file, BytesReadable):
458
+ stream = path_or_file
459
+ elif isinstance(path_or_file, StringReadable):
460
+ stream = BytesIO(path_or_file.read().encode("utf-8"))
461
+ else:
462
+ stream = open(path_or_file, "rb")
463
+ yield stream
464
+ finally:
465
+ if stream is not None:
466
+ stream.close()
@@ -0,0 +1,4 @@
1
+ from .playwright_bot import PlayWrightBot
2
+ from .utils import MarkdownLink
3
+
4
+ __all__ = ["PlayWrightBot", "MarkdownLink"]