chatterer 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. chatterer/__init__.py +93 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__init__.py +0 -0
  5. chatterer/examples/anything_to_markdown.py +85 -91
  6. chatterer/examples/get_code_snippets.py +55 -62
  7. chatterer/examples/login_with_playwright.py +156 -167
  8. chatterer/examples/make_ppt.py +488 -497
  9. chatterer/examples/pdf_to_markdown.py +100 -107
  10. chatterer/examples/pdf_to_text.py +54 -56
  11. chatterer/examples/transcription_api.py +112 -123
  12. chatterer/examples/upstage_parser.py +89 -100
  13. chatterer/examples/webpage_to_markdown.py +70 -79
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +533 -533
  16. chatterer/messages.py +21 -21
  17. chatterer/strategies/__init__.py +13 -13
  18. chatterer/strategies/atom_of_thoughts.py +975 -975
  19. chatterer/strategies/base.py +14 -14
  20. chatterer/tools/__init__.py +46 -46
  21. chatterer/tools/caption_markdown_images.py +384 -384
  22. chatterer/tools/citation_chunking/__init__.py +3 -3
  23. chatterer/tools/citation_chunking/chunks.py +53 -53
  24. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  25. chatterer/tools/citation_chunking/citations.py +285 -285
  26. chatterer/tools/citation_chunking/prompt.py +157 -157
  27. chatterer/tools/citation_chunking/reference.py +26 -26
  28. chatterer/tools/citation_chunking/utils.py +138 -138
  29. chatterer/tools/convert_pdf_to_markdown.py +393 -302
  30. chatterer/tools/convert_to_text.py +446 -447
  31. chatterer/tools/upstage_document_parser.py +705 -705
  32. chatterer/tools/webpage_to_markdown.py +739 -739
  33. chatterer/tools/youtube.py +146 -146
  34. chatterer/utils/__init__.py +15 -15
  35. chatterer/utils/base64_image.py +285 -285
  36. chatterer/utils/bytesio.py +59 -59
  37. chatterer/utils/code_agent.py +237 -237
  38. chatterer/utils/imghdr.py +148 -148
  39. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/METADATA +392 -392
  40. chatterer-0.1.20.dist-info/RECORD +44 -0
  41. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/WHEEL +1 -1
  42. chatterer-0.1.20.dist-info/entry_points.txt +10 -0
  43. chatterer-0.1.18.dist-info/RECORD +0 -42
  44. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/top_level.txt +0 -0
chatterer/__init__.py CHANGED
@@ -1,93 +1,93 @@
1
- from .interactive import interactive_shell
2
- from .language_model import Chatterer
3
- from .messages import (
4
- AIMessage,
5
- BaseMessage,
6
- BaseMessageChunk,
7
- FunctionMessage,
8
- HumanMessage,
9
- LanguageModelInput,
10
- SystemMessage,
11
- UsageMetadata,
12
- )
13
- from .strategies import (
14
- AoTPipeline,
15
- AoTPrompter,
16
- AoTStrategy,
17
- BaseStrategy,
18
- )
19
- from .tools import (
20
- CodeSnippets,
21
- MarkdownLink,
22
- PdfToMarkdown,
23
- PlayWrightBot,
24
- PlaywrightLaunchOptions,
25
- PlaywrightOptions,
26
- PlaywrightPersistencyOptions,
27
- UpstageDocumentParseParser,
28
- acaption_markdown_images,
29
- anything_to_markdown,
30
- caption_markdown_images,
31
- citation_chunker,
32
- extract_text_from_pdf,
33
- get_default_html_to_markdown_options,
34
- get_default_playwright_launch_options,
35
- get_youtube_video_details,
36
- get_youtube_video_subtitle,
37
- html_to_markdown,
38
- open_pdf,
39
- pdf_to_text,
40
- pyscripts_to_snippets,
41
- render_pdf_as_image,
42
- )
43
- from .utils import (
44
- Base64Image,
45
- CodeExecutionResult,
46
- FunctionSignature,
47
- get_default_repl_tool,
48
- insert_callables_into_global,
49
- )
50
-
51
- __all__ = [
52
- "BaseStrategy",
53
- "Chatterer",
54
- "AoTStrategy",
55
- "AoTPipeline",
56
- "AoTPrompter",
57
- "html_to_markdown",
58
- "anything_to_markdown",
59
- "pdf_to_text",
60
- "get_default_html_to_markdown_options",
61
- "pyscripts_to_snippets",
62
- "citation_chunker",
63
- "BaseMessage",
64
- "HumanMessage",
65
- "SystemMessage",
66
- "AIMessage",
67
- "FunctionMessage",
68
- "Base64Image",
69
- "FunctionSignature",
70
- "CodeExecutionResult",
71
- "get_default_repl_tool",
72
- "insert_callables_into_global",
73
- "get_youtube_video_subtitle",
74
- "get_youtube_video_details",
75
- "interactive_shell",
76
- "UpstageDocumentParseParser",
77
- "BaseMessageChunk",
78
- "CodeSnippets",
79
- "LanguageModelInput",
80
- "UsageMetadata",
81
- "PlayWrightBot",
82
- "PlaywrightLaunchOptions",
83
- "PlaywrightOptions",
84
- "PlaywrightPersistencyOptions",
85
- "get_default_playwright_launch_options",
86
- "acaption_markdown_images",
87
- "caption_markdown_images",
88
- "MarkdownLink",
89
- "PdfToMarkdown",
90
- "extract_text_from_pdf",
91
- "open_pdf",
92
- "render_pdf_as_image",
93
- ]
1
+ from .interactive import interactive_shell
2
+ from .language_model import Chatterer
3
+ from .messages import (
4
+ AIMessage,
5
+ BaseMessage,
6
+ BaseMessageChunk,
7
+ FunctionMessage,
8
+ HumanMessage,
9
+ LanguageModelInput,
10
+ SystemMessage,
11
+ UsageMetadata,
12
+ )
13
+ from .strategies import (
14
+ AoTPipeline,
15
+ AoTPrompter,
16
+ AoTStrategy,
17
+ BaseStrategy,
18
+ )
19
+ from .tools import (
20
+ CodeSnippets,
21
+ MarkdownLink,
22
+ PdfToMarkdown,
23
+ PlayWrightBot,
24
+ PlaywrightLaunchOptions,
25
+ PlaywrightOptions,
26
+ PlaywrightPersistencyOptions,
27
+ UpstageDocumentParseParser,
28
+ acaption_markdown_images,
29
+ anything_to_markdown,
30
+ caption_markdown_images,
31
+ citation_chunker,
32
+ extract_text_from_pdf,
33
+ get_default_html_to_markdown_options,
34
+ get_default_playwright_launch_options,
35
+ get_youtube_video_details,
36
+ get_youtube_video_subtitle,
37
+ html_to_markdown,
38
+ open_pdf,
39
+ pdf_to_text,
40
+ pyscripts_to_snippets,
41
+ render_pdf_as_image,
42
+ )
43
+ from .utils import (
44
+ Base64Image,
45
+ CodeExecutionResult,
46
+ FunctionSignature,
47
+ get_default_repl_tool,
48
+ insert_callables_into_global,
49
+ )
50
+
51
+ __all__ = [
52
+ "BaseStrategy",
53
+ "Chatterer",
54
+ "AoTStrategy",
55
+ "AoTPipeline",
56
+ "AoTPrompter",
57
+ "html_to_markdown",
58
+ "anything_to_markdown",
59
+ "pdf_to_text",
60
+ "get_default_html_to_markdown_options",
61
+ "pyscripts_to_snippets",
62
+ "citation_chunker",
63
+ "BaseMessage",
64
+ "HumanMessage",
65
+ "SystemMessage",
66
+ "AIMessage",
67
+ "FunctionMessage",
68
+ "Base64Image",
69
+ "FunctionSignature",
70
+ "CodeExecutionResult",
71
+ "get_default_repl_tool",
72
+ "insert_callables_into_global",
73
+ "get_youtube_video_subtitle",
74
+ "get_youtube_video_details",
75
+ "interactive_shell",
76
+ "UpstageDocumentParseParser",
77
+ "BaseMessageChunk",
78
+ "CodeSnippets",
79
+ "LanguageModelInput",
80
+ "UsageMetadata",
81
+ "PlayWrightBot",
82
+ "PlaywrightLaunchOptions",
83
+ "PlaywrightOptions",
84
+ "PlaywrightPersistencyOptions",
85
+ "get_default_playwright_launch_options",
86
+ "acaption_markdown_images",
87
+ "caption_markdown_images",
88
+ "MarkdownLink",
89
+ "PdfToMarkdown",
90
+ "extract_text_from_pdf",
91
+ "open_pdf",
92
+ "render_pdf_as_image",
93
+ ]
@@ -1,21 +1,21 @@
1
- from .io import (
2
- BytesReadable,
3
- BytesWritable,
4
- FileDescriptorOrPath,
5
- PathOrReadable,
6
- Readable,
7
- StringReadable,
8
- StringWritable,
9
- Writable,
10
- )
11
-
12
- __all__ = [
13
- "BytesReadable",
14
- "BytesWritable",
15
- "FileDescriptorOrPath",
16
- "PathOrReadable",
17
- "Readable",
18
- "StringReadable",
19
- "StringWritable",
20
- "Writable",
21
- ]
1
+ from .io import (
2
+ BytesReadable,
3
+ BytesWritable,
4
+ FileDescriptorOrPath,
5
+ PathOrReadable,
6
+ Readable,
7
+ StringReadable,
8
+ StringWritable,
9
+ Writable,
10
+ )
11
+
12
+ __all__ = [
13
+ "BytesReadable",
14
+ "BytesWritable",
15
+ "FileDescriptorOrPath",
16
+ "PathOrReadable",
17
+ "Readable",
18
+ "StringReadable",
19
+ "StringWritable",
20
+ "Writable",
21
+ ]
@@ -1,19 +1,19 @@
1
- import os
2
- from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
3
- from typing import TypeAlias
4
-
5
- # Type aliases for callback functions and file descriptors
6
- FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
7
-
8
- # Type aliases for different types of IO objects
9
- BytesReadable: TypeAlias = BytesIO | BufferedReader
10
- BytesWritable: TypeAlias = BytesIO | BufferedWriter
11
- StringReadable: TypeAlias = StringIO | TextIOWrapper
12
- StringWritable: TypeAlias = StringIO | TextIOWrapper
13
-
14
- # Combined type aliases for readable and writable objects
15
- Readable: TypeAlias = BytesReadable | StringReadable
16
- Writable: TypeAlias = BytesWritable | StringWritable
17
-
18
- # Type alias for path or readable object
19
- PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
1
+ import os
2
+ from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
3
+ from typing import TypeAlias
4
+
5
+ # Type aliases for callback functions and file descriptors
6
+ FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
7
+
8
+ # Type aliases for different types of IO objects
9
+ BytesReadable: TypeAlias = BytesIO | BufferedReader
10
+ BytesWritable: TypeAlias = BytesIO | BufferedWriter
11
+ StringReadable: TypeAlias = StringIO | TextIOWrapper
12
+ StringWritable: TypeAlias = StringIO | TextIOWrapper
13
+
14
+ # Combined type aliases for readable and writable objects
15
+ Readable: TypeAlias = BytesReadable | StringReadable
16
+ Writable: TypeAlias = BytesWritable | StringWritable
17
+
18
+ # Type alias for path or readable object
19
+ PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
File without changes
@@ -1,91 +1,85 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
- from pathlib import Path
15
- from typing import Optional, TypedDict
16
-
17
- import openai
18
- from spargear import ArgumentSpec, BaseArguments
19
-
20
- from chatterer import anything_to_markdown
21
-
22
-
23
- class AnythingToMarkdownReturns(TypedDict):
24
- in_path: str
25
- out_path: Optional[str]
26
- out_text: str
27
-
28
-
29
- class AnythingToMarkdownArguments(BaseArguments):
30
- """Command line arguments for converting various file types to markdown."""
31
-
32
- in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
33
- out_path: Optional[str] = None
34
- """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
35
- model: Optional[str] = None
36
- """OpenAI Model to use for conversion"""
37
- api_key: Optional[str] = None
38
- """API key for OpenAI API"""
39
- base_url: Optional[str] = None
40
- """Base URL for OpenAI API"""
41
- style_map: Optional[str] = None
42
- """Output style map"""
43
- exiftool_path: Optional[str] = None
44
- """"Path to exiftool for metadata extraction"""
45
- docintel_endpoint: Optional[str] = None
46
- "Document Intelligence API endpoint"
47
- prevent_save_file: bool = False
48
- """Prevent saving the converted file to disk."""
49
- encoding: str = "utf-8"
50
- """Encoding for the output file."""
51
-
52
- def run(self) -> AnythingToMarkdownReturns:
53
- in_path = self.in_path.unwrap()
54
- if not self.prevent_save_file:
55
- if not self.out_path:
56
- out_path = Path(in_path).with_suffix(".md")
57
- else:
58
- out_path = Path(self.out_path)
59
- else:
60
- out_path = None
61
-
62
- if self.model:
63
- llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
64
- llm_model = self.model
65
- else:
66
- llm_client = None
67
- llm_model = None
68
-
69
- text: str = anything_to_markdown(
70
- in_path,
71
- llm_client=llm_client,
72
- llm_model=llm_model,
73
- style_map=self.style_map,
74
- exiftool_path=self.exiftool_path,
75
- docintel_endpoint=self.docintel_endpoint,
76
- )
77
- if out_path:
78
- out_path.parent.mkdir(parents=True, exist_ok=True)
79
- out_path.write_text(text, encoding=self.encoding)
80
- logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
81
- else:
82
- logger.info(f"Converted `{in_path}` to markdown.")
83
- return {
84
- "in_path": in_path,
85
- "out_path": str(out_path) if out_path is not None else None,
86
- "out_text": text,
87
- }
88
-
89
-
90
- if __name__ == "__main__":
91
- AnythingToMarkdownArguments().run()
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Optional, TypedDict
4
+
5
+ import openai
6
+ from spargear import BaseArguments
7
+
8
+ from chatterer import anything_to_markdown
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class AnythingToMarkdownReturns(TypedDict):
14
+ input: str
15
+ output: Optional[str]
16
+ out_text: str
17
+
18
+
19
+ class AnythingToMarkdownArguments(BaseArguments):
20
+ """Command line arguments for converting various file types to markdown."""
21
+
22
+ input: str
23
+ """Input file to convert to markdown. Can be a file path or a URL."""
24
+ output: Optional[str] = None
25
+ """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
26
+ model: Optional[str] = None
27
+ """OpenAI Model to use for conversion"""
28
+ api_key: Optional[str] = None
29
+ """API key for OpenAI API"""
30
+ base_url: Optional[str] = None
31
+ """Base URL for OpenAI API"""
32
+ style_map: Optional[str] = None
33
+ """Output style map"""
34
+ exiftool_path: Optional[str] = None
35
+ """"Path to exiftool for metadata extraction"""
36
+ docintel_endpoint: Optional[str] = None
37
+ "Document Intelligence API endpoint"
38
+ prevent_save_file: bool = False
39
+ """Prevent saving the converted file to disk."""
40
+ encoding: str = "utf-8"
41
+ """Encoding for the output file."""
42
+
43
+ def run(self) -> AnythingToMarkdownReturns:
44
+ if not self.prevent_save_file:
45
+ if not self.output:
46
+ output = Path(self.input).with_suffix(".md")
47
+ else:
48
+ output = Path(self.output)
49
+ else:
50
+ output = None
51
+
52
+ if self.model:
53
+ llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
54
+ llm_model = self.model
55
+ else:
56
+ llm_client = None
57
+ llm_model = None
58
+
59
+ text: str = anything_to_markdown(
60
+ self.input,
61
+ llm_client=llm_client,
62
+ llm_model=llm_model,
63
+ style_map=self.style_map,
64
+ exiftool_path=self.exiftool_path,
65
+ docintel_endpoint=self.docintel_endpoint,
66
+ )
67
+ if output:
68
+ output.parent.mkdir(parents=True, exist_ok=True)
69
+ output.write_text(text, encoding=self.encoding)
70
+ logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
71
+ else:
72
+ logger.info(f"Converted `{self.input}` to markdown.")
73
+ return {
74
+ "input": self.input,
75
+ "output": str(output) if output is not None else None,
76
+ "out_text": text,
77
+ }
78
+
79
+
80
+ def main() -> None:
81
+ AnythingToMarkdownArguments().run()
82
+
83
+
84
+ if __name__ == "__main__":
85
+ main()
@@ -1,62 +1,55 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
- from pathlib import Path
15
- from typing import Optional
16
-
17
- from spargear import ArgumentSpec, BaseArguments
18
-
19
- from chatterer import CodeSnippets
20
-
21
-
22
- class GetCodeSnippetsArgs(BaseArguments):
23
- path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(
24
- ["path_or_pkgname"], help="Path to the package or file from which to extract code snippets."
25
- )
26
- out_path: Optional[str] = None
27
- ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
28
- """List of file patterns to ignore."""
29
- glob_patterns: list[str] = ["*.py"]
30
- """List of glob patterns to include."""
31
- case_sensitive: bool = False
32
- """Enable case-sensitive matching for glob patterns."""
33
- prevent_save_file: bool = False
34
- """Prevent saving the extracted code snippets to a file."""
35
-
36
- def run(self) -> CodeSnippets:
37
- path_or_pkgname = self.path_or_pkgname.unwrap()
38
- if not self.prevent_save_file:
39
- if not self.out_path:
40
- out_path = Path(__file__).with_suffix(".txt")
41
- else:
42
- out_path = Path(self.out_path)
43
- else:
44
- out_path = None
45
-
46
- cs = CodeSnippets.from_path_or_pkgname(
47
- path_or_pkgname=path_or_pkgname,
48
- ban_file_patterns=self.ban_file_patterns,
49
- glob_patterns=self.glob_patterns,
50
- case_sensitive=self.case_sensitive,
51
- )
52
- if out_path is not None:
53
- out_path.parent.mkdir(parents=True, exist_ok=True)
54
- out_path.write_text(cs.snippets_text, encoding="utf-8")
55
- logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
56
- else:
57
- logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
58
- return cs
59
-
60
-
61
- if __name__ == "__main__":
62
- GetCodeSnippetsArgs().run()
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from spargear import BaseArguments
6
+
7
+ from chatterer import CodeSnippets
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class GetCodeSnippetsArgs(BaseArguments):
13
+ input: str
14
+ """Path to the package or file from which to extract code snippets."""
15
+ output: Optional[str] = None
16
+ """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
17
+ ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
18
+ """List of file patterns to ignore."""
19
+ glob_patterns: list[str] = ["*.py"]
20
+ """List of glob patterns to include."""
21
+ case_sensitive: bool = False
22
+ """Enable case-sensitive matching for glob patterns."""
23
+ prevent_save_file: bool = False
24
+ """Prevent saving the extracted code snippets to a file."""
25
+
26
+ def run(self) -> CodeSnippets:
27
+ if not self.prevent_save_file:
28
+ if not self.output:
29
+ output = Path(__file__).with_suffix(".txt")
30
+ else:
31
+ output = Path(self.output)
32
+ else:
33
+ output = None
34
+
35
+ cs = CodeSnippets.from_path_or_pkgname(
36
+ path_or_pkgname=self.input,
37
+ ban_file_patterns=self.ban_file_patterns,
38
+ glob_patterns=self.glob_patterns,
39
+ case_sensitive=self.case_sensitive,
40
+ )
41
+ if output is not None:
42
+ output.parent.mkdir(parents=True, exist_ok=True)
43
+ output.write_text(cs.snippets_text, encoding="utf-8")
44
+ logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
45
+ else:
46
+ logger.info(f"Extracted code snippets from `{self.input}`.")
47
+ return cs
48
+
49
+
50
+ def main() -> None:
51
+ GetCodeSnippetsArgs().run()
52
+
53
+
54
+ if __name__ == "__main__":
55
+ main()