PyPI - chatterer - Versions diffs - 0.1.13__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

chatterer 0.1.13py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

chatterer/__init__.py +36 -5
chatterer/interactive.py +692 -0
chatterer/language_model.py +217 -261
chatterer/messages.py +13 -1
chatterer/tools/__init__.py +26 -15
chatterer/tools/{webpage_to_markdown/utils.py → caption_markdown_images.py} +158 -108
chatterer/tools/convert_pdf_to_markdown.py +302 -0
chatterer/tools/convert_to_text.py +45 -16
chatterer/tools/upstage_document_parser.py +481 -214
chatterer/tools/{webpage_to_markdown/playwright_bot.py → webpage_to_markdown.py} +197 -107
chatterer/tools/youtube.py +2 -1
chatterer/utils/__init__.py +1 -1
chatterer/utils/{image.py → base64_image.py} +56 -62
chatterer/utils/code_agent.py +137 -38
chatterer/utils/imghdr.py +148 -0
chatterer-0.1.16.dist-info/METADATA +392 -0
chatterer-0.1.16.dist-info/RECORD +33 -0
{chatterer-0.1.13.dist-info → chatterer-0.1.16.dist-info}/WHEEL +1 -1
chatterer/tools/webpage_to_markdown/__init__.py +0 -4
chatterer-0.1.13.dist-info/METADATA +0 -171
chatterer-0.1.13.dist-info/RECORD +0 -31
{chatterer-0.1.13.dist-info → chatterer-0.1.16.dist-info}/top_level.txt +0 -0

chatterer/utils/code_agent.py CHANGED Viewed

@@ -7,13 +7,44 @@ from typing import (
     NamedTuple,
     Optional,
     Self,
+    Sequence,
 )
 from langchain_core.runnables.config import RunnableConfig
+from ..messages import LanguageModelInput, SystemMessage
 if TYPE_CHECKING:
     from langchain_experimental.tools import PythonAstREPLTool
+# --- Constants ---
+DEFAULT_CODE_GENERATION_PROMPT = (
+    "You are equipped with a Python code execution tool.\n"
+    "Your primary goal is to generate Python code that effectively solves the *specific, immediate sub-task* required to progress towards the overall user request. The generated code and its resulting output will be automatically added to our conversation history.\n"
+    "\n"
+    "Guidelines for Optimal Tool Use:\n"
+    "- Conciseness and Efficiency: Write code that directly addresses the current need. Avoid unnecessary complexity, computations, or data loading. Tool execution has resource limits.\n"
+    "- Targeted Action: Focus only on the code required for the *next logical step*. Do not attempt to solve the entire problem in one code block if it involves multiple steps.\n"
+    "- Error Handling: Implement basic error handling (e.g., `try-except`) for operations that might fail (like file access or network requests, if applicable).\n"
+    "- Context Awareness: Assume the code runs in a stateful environment where variables and imports might persist from previous executions (unless explicitly cleared).\n"
+    "- Self-Contained Execution: Ensure the code block is runnable as provided. Define necessary variables within the block if they aren't guaranteed to exist from prior context.\n"
+    "\n"
+    "Output Format:\n"
+    "Return *only* a JSON object containing the Python code:\n"
+    '{\n  "code": "<your_python_code_here>"\n}\n\n'
+)
+DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
+    "The following Python functions are available in the global scope for you to use directly in your code.\n"
+    "You do not need to define these functions; simply call them as needed.\n"
+    "Use these functions only when they directly help in solving the current task. You are not obligated to use them.\n"
+)
+DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n"  # Separator to distinguish different function references
+# --- Helper Classes and Functions ---
 class FunctionSignature(NamedTuple):
     name: str
@@ -24,57 +55,63 @@ class FunctionSignature(NamedTuple):
     def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
         if callables is None:
             return []
-        if callable(callables):
+        # Correctly handle single callable case
+        if isinstance(callables, Callable) and not isinstance(callables, type):  # Exclude classes if not intended
             return [cls._from_callable(callables)]
-        return [cls._from_callable(callable) for callable in callables]
+        # Handle iterables
+        if isinstance(callables, Iterable):
+            return [cls._from_callable(c) for c in callables]
+        # If it's neither a callable nor an iterable of callables, return empty
+        return []
     @classmethod
-    def _from_callable(cls, callable: Callable[..., object]) -> Self:
+    def _from_callable(cls, callable_obj: Callable[..., object]) -> Self:
         """
         Get the name and signature of a function as a string.
         """
-        # Determine if the function is async
-        is_async_func = inspect.iscoroutinefunction(callable)
+        is_async_func = inspect.iscoroutinefunction(callable_obj)
         function_def = "async def" if is_async_func else "def"
-        # Determine the function name based on the type of callable
-        if inspect.isfunction(callable):
-            # For regular Python functions, use __code__.co_name
-            function_name = callable.__code__.co_name
-        elif hasattr(callable, "name"):
-            # For StructuredTool or similar objects with a 'name' attribute
-            function_name = callable.name  # type: ignore
-        elif hasattr(callable, "__name__"):
-            # For other callables with a __name__ attribute
-            function_name = callable.__name__
+        if inspect.isfunction(callable_obj):
+            function_name = callable_obj.__code__.co_name
+        elif hasattr(callable_obj, "name") and isinstance(getattr(callable_obj, "name"), str):
+            function_name = getattr(callable_obj, "name")
+        elif hasattr(callable_obj, "__name__"):
+            function_name = callable_obj.__name__
         else:
-            # Fallback to the class name if no name is found
-            function_name = type(callable).__name__
+            function_name = type(callable_obj).__name__
+        try:
+            signature_str = str(inspect.signature(callable_obj))
+        except ValueError:  # Handles built-ins or others without inspectable signatures
+            signature_str = "(...)"  # Placeholder signature
+        signature = f"{function_def} {function_name}{signature_str}:"
+        docstring = inspect.getdoc(callable_obj)
-        # Build the signature string
-        signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
-        docstring = inspect.getdoc(callable)
         if docstring:
             docstring = f'"""{docstring.strip()}"""'
-            return cls(
-                name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, '    ')}"
-            )
+            full_signature = f"{signature}\n{textwrap.indent(docstring, '    ')}"
         else:
-            return cls(name=function_name, callable=callable, signature=signature)
+            full_signature = signature
+        return cls(name=function_name, callable=callable_obj, signature=full_signature)
     @classmethod
     def as_prompt(
         cls,
         function_signatures: Iterable[Self],
-        prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
-        sep: str = "\n---\n",
+        prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,  # Use constant
+        sep: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,  # Use constant
     ) -> str:
         """
-        Generate a prompt string from a list of callables.
+        Generate a prompt string from a list of function signatures.
         """
+        if not function_signatures:
+            return ""
         body: str = sep.join(fsig.signature for fsig in function_signatures)
         if prefix:
-            return f"{prefix}{body}"
+            return f"{prefix}\n{body}"  # Add newline for clarity
         return body
@@ -92,13 +129,14 @@ class CodeExecutionResult(NamedTuple):
         **kwargs: object,
     ) -> Self:
         """
-        Execute code using the Python Code Execution Language Model.
+        Execute code using the Python REPL tool.
         """
         if repl_tool is None:
             repl_tool = get_default_repl_tool()
         if function_signatures:
             insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
-        output = str(repl_tool.invoke(code, config=config, **kwargs))  # pyright: ignore[reportUnknownMemberType]
+        # Ensure kwargs are passed correctly if needed by invoke
+        output = str(repl_tool.invoke(code, config=config))  # pyright: ignore[reportUnknownMemberType]
         return cls(code=code, output=output)
     @classmethod
@@ -111,28 +149,89 @@ class CodeExecutionResult(NamedTuple):
         **kwargs: object,
     ) -> Self:
         """
-        Execute code using the Python Code Execution Language Model asynchronously.
+        Execute code using the Python REPL tool asynchronously.
         """
         if repl_tool is None:
             repl_tool = get_default_repl_tool()
         if function_signatures:
             insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
-        output = str(await repl_tool.ainvoke(code, config=config, **kwargs))  # pyright: ignore[reportUnknownMemberType]
+        # Ensure kwargs are passed correctly if needed by ainvoke
+        output = str(await repl_tool.ainvoke(code, config=config))  # pyright: ignore[reportUnknownMemberType]if not needed by base ainvoke
         return cls(code=code, output=output)
 def get_default_repl_tool() -> "PythonAstREPLTool":
-    from langchain_experimental.tools import PythonAstREPLTool
+    """Initializes and returns a default PythonAstREPLTool instance."""
+    try:
+        from langchain_experimental.tools import PythonAstREPLTool
-    return PythonAstREPLTool()
+        # You might want to configure specific globals/locals here if needed
+        return PythonAstREPLTool()
+    except ImportError:
+        raise ImportError(
+            "PythonAstREPLTool requires langchain_experimental. Install with: pip install langchain-experimental"
+        )
 def insert_callables_into_global(
     function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
 ) -> None:
     """Insert callables into the REPL tool's globals."""
-    repl_globals: Optional[dict[str, object]] = repl_tool.globals  # pyright: ignore[reportUnknownMemberType]
-    if repl_globals is None:
-        repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
+    # Accessing globals might depend on the specific REPL tool implementation.
+    # This assumes a .globals attribute exists and is a dict.
+    if not hasattr(repl_tool, "globals") or not isinstance(repl_tool.globals, dict):  # pyright: ignore[reportUnknownMemberType]
+        # Handle cases where .globals is not available or not a dict
+        # Maybe initialize it or log a warning/error
+        repl_tool.globals = {}  # Or handle appropriately
+    # Safely update globals
+    current_globals: dict[object, object] = repl_tool.globals  # pyright: ignore[reportUnknownMemberType]
+    for fsig in function_signatures:
+        current_globals[fsig.name] = fsig.callable
+    # No need to reassign if globals is mutable (dict)
+    # repl_tool.globals = current_globals
+def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
+    """Prepends a SystemMessage to the beginning of the message list/string."""
+    if not prompt_to_add:  # Don't add empty prompts
+        return messages
+    if isinstance(messages, str):
+        # Prepend with a newline for separation
+        return f"{prompt_to_add}\n\n{messages}"
+    elif isinstance(messages, Sequence):
+        # Create a mutable copy if it's a tuple
+        msg_list = list(messages)
+        msg_list.insert(0, SystemMessage(content=prompt_to_add))
+        return msg_list
+    # Handle LangChain Core BaseMessagePromptTemplate or similar if needed
+    # elif hasattr(messages, 'to_messages'):
+    #    msg_list = messages.to_messages()
+    #    msg_list.insert(0, SystemMessage(content=prompt_to_add))
+    #    return msg_list # Or return a new prompt template if required
     else:
-        repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
+        # Fallback or raise error for unsupported types
+        raise TypeError(f"Unsupported message input type: {type(messages)}")
+def augment_prompt_for_toolcall(
+    function_signatures: Iterable[FunctionSignature],
+    messages: LanguageModelInput,
+    prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
+    function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
+    function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
+) -> LanguageModelInput:
+    """Adds function references and code invocation prompts to the messages."""
+    # Add function references first (if any)
+    func_prompt = FunctionSignature.as_prompt(
+        function_signatures, function_reference_prefix, function_reference_seperator
+    )
+    if func_prompt:
+        messages = _add_message_first(messages=messages, prompt_to_add=func_prompt)
+    # Then add the main code invocation prompt (if provided)
+    if prompt_for_code_invoke:
+        messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
+    return messages

chatterer/utils/imghdr.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""
+Recognize image file formats based on their first few bytes (base64-encoded).
+Originally derived from Python's imghdr, modified for base64 inputs.
+"""
+import base64
+import math
+from typing import Callable, List, Literal, Optional
+ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
+tests: List[Callable[[bytes], Optional[ImageType]]] = []
+def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
+    tests.append(func)
+    return func
+def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
+    needed_chars = math.ceil(prefix_bytes * 4 / 3)
+    truncated_data = b64_data[:needed_chars]
+    try:
+        return base64.b64decode(truncated_data)
+    except Exception:
+        return base64.b64decode(b64_data)
+def what(b64_data: str) -> Optional[ImageType]:
+    """
+    base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
+    :param b64_data: 이미지 데이터를 담은 base64 문자열.
+    :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
+    """
+    h: bytes = decode_prefix(b64_data, prefix_bytes=32)
+    for tf in tests:
+        res = tf(h)
+        if res:
+            return res
+    return None
+# --- 테스트 함수들 --- #
+@register_test
+def test_jpeg(h: bytes) -> Optional[ImageType]:
+    if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
+        return "jpeg"
+    elif h.startswith(b"\xff\xd8\xff\xdb"):
+        return "jpeg"
+    return None
+@register_test
+def test_png(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"\x89PNG\r\n\x1a\n"):
+        return "png"
+    return None
+@register_test
+def test_gif(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
+        return "gif"
+    return None
+@register_test
+def test_tiff(h: bytes) -> Optional[ImageType]:
+    if h[:2] in (b"MM", b"II"):
+        return "tiff"
+    return None
+@register_test
+def test_rgb(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"\x01\xda"):
+        return "rgb"
+    return None
+@register_test
+def test_pbm(h: bytes) -> Optional[ImageType]:
+    if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
+        return "pbm"
+    return None
+@register_test
+def test_pgm(h: bytes) -> Optional[ImageType]:
+    if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
+        return "pgm"
+    return None
+@register_test
+def test_ppm(h: bytes) -> Optional[ImageType]:
+    if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
+        return "ppm"
+    return None
+@register_test
+def test_rast(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"\x59\xa6\x6a\x95"):
+        return "rast"
+    return None
+@register_test
+def test_xbm(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"#define "):
+        return "xbm"
+    return None
+@register_test
+def test_bmp(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"BM"):
+        return "bmp"
+    return None
+@register_test
+def test_webp(h: bytes) -> Optional[ImageType]:
+    if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
+        return "webp"
+    return None
+@register_test
+def test_exr(h: bytes) -> Optional[ImageType]:
+    if h.startswith(b"\x76\x2f\x31\x01"):
+        return "exr"
+    return None
+if __name__ == "__main__":
+    example_png_base64 = (
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
+    )
+    fmt = what(example_png_base64)
+    print(f"Detected format: {fmt}")  # Expected: png

chatterer 0.1.13__py3-none-any.whl → 0.1.16__py3-none-any.whl

chatterer 0.1.13py3-none-any.whl → 0.1.16py3-none-any.whl