PyPI - dataact - Versions diffs - 0.1.0__py3-none-any.whl - Mend

dataact 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

dataact/__init__.py +31 -0
dataact/agent.py +237 -0
dataact/cache.py +319 -0
dataact/exceptions.py +21 -0
dataact/format.py +108 -0
dataact/logger.py +66 -0
dataact/loop.py +153 -0
dataact/observe.py +31 -0
dataact/providers/__init__.py +0 -0
dataact/providers/anthropic.py +112 -0
dataact/providers/base.py +35 -0
dataact/providers/openai.py +125 -0
dataact/schema.py +79 -0
dataact/serialize.py +111 -0
dataact/testing.py +70 -0
dataact/tools/__init__.py +0 -0
dataact/tools/connectors.py +129 -0
dataact/tools/interpreter.py +189 -0
dataact/tools/planner.py +107 -0
dataact/tools/subagent.py +222 -0
dataact/tools/variables.py +25 -0
dataact/types.py +54 -0
dataact-0.1.0.dist-info/METADATA +212 -0
dataact-0.1.0.dist-info/RECORD +26 -0
dataact-0.1.0.dist-info/WHEEL +4 -0
dataact-0.1.0.dist-info/licenses/LICENSE +21 -0

dataact/schema.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Input-schema inference for small connector functions."""
+from __future__ import annotations
+import dataclasses
+import inspect
+import types
+from collections.abc import Callable
+from typing import Any, get_args, get_origin, get_type_hints
+_OVERRIDE_HINT = "pass input_schema=... to override"
+def infer_input_schema(fn: Callable[..., Any]) -> dict:
+    """Infer a small JSON schema from a connector function signature."""
+    signature = inspect.signature(fn)
+    try:
+        hints = get_type_hints(fn)
+    except Exception as exc:
+        raise _unsupported(fn, "could not resolve type annotations") from exc
+    properties: dict[str, dict] = {}
+    required: list[str] = []
+    for name, parameter in signature.parameters.items():
+        if parameter.kind in (
+            inspect.Parameter.VAR_POSITIONAL,
+            inspect.Parameter.VAR_KEYWORD,
+        ):
+            raise _unsupported(fn, f"unsupported variadic parameter {name!r}")
+        if parameter.kind == inspect.Parameter.POSITIONAL_ONLY:
+            raise _unsupported(fn, f"unsupported positional-only parameter {name!r}")
+        if parameter.annotation is inspect.Parameter.empty or name not in hints:
+            raise _unsupported(fn, f"missing annotation for parameter {name!r}")
+        properties[name] = _schema_for_annotation(fn, hints[name])
+        if parameter.default is inspect.Parameter.empty:
+            required.append(name)
+    return {
+        "type": "object",
+        "properties": properties,
+        "required": required,
+    }
+def _schema_for_annotation(fn: Callable[..., Any], annotation: Any) -> dict:
+    if annotation is str:
+        return {"type": "string"}
+    if annotation is int:
+        return {"type": "integer"}
+    if annotation is float:
+        return {"type": "number"}
+    if annotation is bool:
+        return {"type": "boolean"}
+    origin = get_origin(annotation)
+    args = get_args(annotation)
+    if origin is list and args == (str,):
+        return {"type": "array", "items": {"type": "string"}}
+    if annotation is Any:
+        raise _unsupported(fn, "Any is not supported")
+    if annotation is dict or origin is dict:
+        raise _unsupported(fn, "dict is not supported")
+    if dataclasses.is_dataclass(annotation):
+        raise _unsupported(fn, "dataclass annotations are not supported")
+    if origin in (types.UnionType, getattr(types, "UnionType", object)):
+        raise _unsupported(fn, "union annotations are not supported")
+    if str(origin) == "typing.Union":
+        raise _unsupported(fn, "union annotations are not supported")
+    raise _unsupported(fn, f"unsupported annotation {annotation!r}")
+def _unsupported(fn: Callable[..., Any], reason: str) -> TypeError:
+    return TypeError(
+        f"Cannot infer input schema for {fn.__name__}: {reason}; {_OVERRIDE_HINT}"
+    )

dataact/serialize.py ADDED Viewed

@@ -0,0 +1,111 @@
+from __future__ import annotations
+import dataclasses
+from datetime import datetime
+from enum import Enum
+from typing import Any
+def to_jsonable(obj: Any) -> Any:
+    """Recursively convert obj to a JSON-serializable structure. Never raises."""
+    try:
+        return _convert(obj)
+    except Exception as exc:
+        return f"<serialization error: {exc!r}>"
+def _convert(obj: Any) -> Any:
+    if obj is None or isinstance(obj, (bool, int, float, str)):
+        return obj
+    if isinstance(obj, Enum):
+        return obj.value
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+    if isinstance(obj, Exception):
+        return {"error_type": type(obj).__name__, "error_message": str(obj)}
+    if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
+        result: dict[str, Any] = {}
+        if isinstance(obj, _get_text_block_type()):
+            result["type"] = "text"
+            result["text"] = _convert(obj.text)  # type: ignore[attr-defined]
+        elif isinstance(obj, _get_tool_use_block_type()):
+            result["type"] = "tool_use"
+            result["id"] = _convert(obj.tool_use_id)  # type: ignore[attr-defined]
+            result["name"] = _convert(obj.tool_name)  # type: ignore[attr-defined]
+            result["input"] = _convert(obj.tool_input)  # type: ignore[attr-defined]
+        elif isinstance(obj, _get_tool_result_block_type()):
+            result["type"] = "tool_result"
+            result["tool_use_id"] = _convert(obj.tool_use_id)  # type: ignore[attr-defined]
+            result["content"] = _convert(obj.content)  # type: ignore[attr-defined]
+            result["is_error"] = _convert(obj.is_error)  # type: ignore[attr-defined]
+        else:
+            for f in dataclasses.fields(obj):
+                result[f.name] = _convert(getattr(obj, f.name))
+        return result
+    if isinstance(obj, dict):
+        return {str(k): _convert(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_convert(item) for item in obj]
+    # Try pandas DataFrame
+    try:
+        import pandas as pd
+        if isinstance(obj, pd.DataFrame):
+            return {
+                "type": "dataframe_snapshot",
+                "shape": list(obj.shape),
+                "columns": list(obj.columns),
+                "sample": obj.head(5).to_dict(orient="records"),
+            }
+    except ImportError:
+        pass
+    # Try numpy ndarray
+    try:
+        import numpy as np
+        if isinstance(obj, np.ndarray):
+            return {
+                "type": "ndarray_snapshot",
+                "shape": list(obj.shape),
+                "dtype": str(obj.dtype),
+                "sample": obj.flat[:5].tolist(),
+            }
+    except ImportError:
+        pass
+    return repr(obj)
+def _get_text_block_type():
+    try:
+        from dataact.types import TextBlock
+        return TextBlock
+    except ImportError:
+        return type(None)
+def _get_tool_use_block_type():
+    try:
+        from dataact.types import ToolUseBlock
+        return ToolUseBlock
+    except ImportError:
+        return type(None)
+def _get_tool_result_block_type():
+    try:
+        from dataact.types import ToolResultBlock
+        return ToolResultBlock
+    except ImportError:
+        return type(None)

dataact/testing.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Public testing helpers.
+`FakeAdapter` is a scripted `ProviderAdapter` that returns pre-built responses
+in order. It exists so that documentation snippets, unit tests, and the
+`Agent` quick-start example can run without an API key.
+"""
+from __future__ import annotations
+import copy
+from typing import Any
+from dataact.providers.base import NormalizedResponse, ProviderAdapter, StopReason
+from dataact.types import Message, TextBlock, ToolSpec, ToolUseBlock
+class FakeAdapter(ProviderAdapter):
+    def __init__(self, responses: list[NormalizedResponse]) -> None:
+        self._responses = list(responses)
+        self.calls: list[dict[str, Any]] = []
+    def chat(
+        self,
+        system: str,
+        messages: list[Message],
+        tools: list[ToolSpec],
+    ) -> NormalizedResponse:
+        self.calls.append(
+            {
+                "system": system,
+                "messages": copy.deepcopy(messages),
+                "tools": copy.deepcopy(tools),
+            }
+        )
+        return self._responses.pop(0)
+    def format_cache_control(self, obj: dict) -> dict:
+        result = dict(obj)
+        result["cache_control"] = {"type": "ephemeral"}
+        return result
+    @staticmethod
+    def text(text: str) -> NormalizedResponse:
+        return NormalizedResponse(
+            stop_reason=StopReason.END_TURN,
+            content=[TextBlock(text=text)],
+            input_tokens=0,
+            output_tokens=0,
+            cache_read_tokens=0,
+            cache_write_tokens=0,
+        )
+    @staticmethod
+    def tool_use(
+        tool_use_id: str, tool_name: str, tool_input: dict
+    ) -> NormalizedResponse:
+        return NormalizedResponse(
+            stop_reason=StopReason.TOOL_USE,
+            content=[
+                ToolUseBlock(
+                    tool_use_id=tool_use_id,
+                    tool_name=tool_name,
+                    tool_input=tool_input,
+                )
+            ],
+            input_tokens=0,
+            output_tokens=0,
+            cache_read_tokens=0,
+            cache_write_tokens=0,
+        )

dataact/tools/__init__.py ADDED Viewed

File without changes

dataact/tools/connectors.py ADDED Viewed

@@ -0,0 +1,129 @@
+from __future__ import annotations
+from typing import Any, Callable
+from dataact.cache import SessionCache
+from dataact.format import format_tool_output
+from dataact.types import ToolSpec
+class ConnectorRegistry:
+    def __init__(self) -> None:
+        self._directory: dict[str, str] = {}  # name -> one-line description
+        self._connector_tools: dict[
+            str, list[ToolSpec]
+        ] = {}  # name -> list of ToolSpec
+    def register(
+        self,
+        name: str,
+        description: str,
+        tools: list[ToolSpec],
+    ) -> None:
+        self._directory[name] = description
+        # Ensure all tools are hidden by default
+        for spec in tools:
+            spec.visible = False
+        self._connector_tools[name] = list(tools)
+    def get_load_connectors_spec(self) -> ToolSpec:
+        directory = dict(self._directory)
+        connector_tools = self._connector_tools
+        def load_connector(name: str) -> str:
+            if name not in connector_tools:
+                available = list(directory.keys())
+                return f"Error: connector {name!r} not found. Available: {available}"
+            for spec in connector_tools[name]:
+                spec.visible = True
+            desc = directory.get(name, "")
+            tool_names = [s.name for s in connector_tools[name]]
+            return (
+                f"Loaded connector {name!r}.\n"
+                f"Description: {desc}\n"
+                f"Available tools: {tool_names}"
+            )
+        dir_lines = "\n".join(f"- {k}: {v}" for k, v in directory.items())
+        return ToolSpec(
+            name="load_connectors",
+            description=(
+                f"Load a data connector to make its tools available.\n"
+                f"Available connectors:\n{dir_lines}"
+            ),
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": (
+                            f"Connector name. One of: {list(directory.keys())}"
+                        ),
+                    }
+                },
+                "required": ["name"],
+            },
+            handler=load_connector,
+            visible=True,
+        )
+    def all_tool_specs(self) -> list[ToolSpec]:
+        specs = []
+        for tool_list in self._connector_tools.values():
+            specs.extend(tool_list)
+        return specs
+    def call_connector(
+        self,
+        tool_name: str,
+        tool_input: dict,
+        cache: SessionCache,
+    ) -> str:
+        for tool_list in self._connector_tools.values():
+            for spec in tool_list:
+                if spec.name == tool_name and spec.handler is not None:
+                    raw = spec.handler(**tool_input)
+                    return format_tool_output(
+                        raw, cache=cache, preferred_name=tool_name.split("__")[-1]
+                    )
+        return f"Error: tool {tool_name!r} not found"
+    def make_wrapped_specs(self, cache: SessionCache) -> list[ToolSpec]:
+        """
+        Return ToolSpecs whose handlers auto-cache large results.
+        Replaces the specs in the registry in-place so that load_connectors'
+        visibility flip applies to the returned (wrapped) specs, not stale originals.
+        """
+        result = []
+        for connector_name, tool_list in self._connector_tools.items():
+            new_list = []
+            for orig_spec in tool_list:
+                handler = orig_spec.handler
+                if handler is None:
+                    new_list.append(orig_spec)
+                    result.append(orig_spec)
+                    continue
+                preferred = orig_spec.name.split("__")[-1]
+                def make_handler(h: Callable, pname: str):
+                    def wrapped(**kwargs: Any) -> str:
+                        raw = h(**kwargs)
+                        return format_tool_output(
+                            raw, cache=cache, preferred_name=pname
+                        )
+                    return wrapped
+                new_spec = ToolSpec(
+                    name=orig_spec.name,
+                    description=orig_spec.description,
+                    input_schema=orig_spec.input_schema,
+                    handler=make_handler(handler, preferred),
+                    visible=orig_spec.visible,
+                )
+                new_list.append(new_spec)
+                result.append(new_spec)
+            # Replace in registry so load_connectors flips the wrapped specs' visible
+            self._connector_tools[connector_name] = new_list
+        return result

dataact/tools/interpreter.py ADDED Viewed

@@ -0,0 +1,189 @@
+from __future__ import annotations
+import ast
+import builtins
+import io
+import traceback
+from contextlib import redirect_stdout
+from typing import Any
+from dataact.cache import SessionCache
+from dataact.types import ToolSpec
+_DEFAULT_ALLOWLIST = frozenset(
+    {
+        "pandas",
+        "numpy",
+        "json",
+        "math",
+        "datetime",
+        "collections",
+        "itertools",
+        "pd",
+        "np",  # common aliases
+    }
+)
+_FORBIDDEN_NAMES = frozenset({"eval", "exec", "__import__", "open", "compile"})
+class _SecurityVisitor(ast.NodeVisitor):
+    """AST visitor that raises ValueError on forbidden patterns."""
+    def __init__(self, allowlist: frozenset[str]) -> None:
+        self._allowlist = allowlist
+        self.errors: list[str] = []
+    def visit_Import(self, node: ast.Import) -> None:
+        for alias in node.names:
+            top = alias.name.split(".")[0]
+            if top not in self._allowlist:
+                self.errors.append(f"Import not allowed: {alias.name!r}")
+        self.generic_visit(node)
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        if node.module:
+            top = node.module.split(".")[0]
+            if top not in self._allowlist:
+                self.errors.append(f"Import not allowed: {node.module!r}")
+        self.generic_visit(node)
+    def visit_Call(self, node: ast.Call) -> None:
+        if isinstance(node.func, ast.Name) and node.func.id in _FORBIDDEN_NAMES:
+            self.errors.append(f"Call not allowed: {node.func.id!r}")
+        self.generic_visit(node)
+    def visit_Attribute(self, node: ast.Attribute) -> None:
+        if node.attr.startswith("__") and node.attr.endswith("__"):
+            self.errors.append(f"Dunder attribute access not allowed: {node.attr!r}")
+        self.generic_visit(node)
+    def visit_Name(self, node: ast.Name) -> None:
+        if node.id in _FORBIDDEN_NAMES:
+            self.errors.append(f"Name not allowed: {node.id!r}")
+        self.generic_visit(node)
+class PythonInterpreter:
+    def __init__(
+        self,
+        cache: SessionCache,
+        allowlist: frozenset[str] | None = None,
+    ) -> None:
+        self._cache = cache
+        self._allowlist = allowlist if allowlist is not None else _DEFAULT_ALLOWLIST
+    def run(self, code: str) -> str:
+        # AST security check
+        try:
+            tree = ast.parse(code)
+        except SyntaxError as exc:
+            return f"SyntaxError: {exc}"
+        visitor = _SecurityVisitor(self._allowlist)
+        visitor.visit(tree)
+        if visitor.errors:
+            return "SecurityError: " + "; ".join(visitor.errors) + " — not allowed"
+        # Build fresh locals for this call
+        local_vars: dict[str, Any] = {}
+        # Inject cache handles
+        for name, value in self._cache.items():
+            local_vars[name] = value
+        # Inject save() helper
+        def save(name: str, value: Any) -> str:
+            return self._cache.put(name, value)
+        local_vars["save"] = save
+        # Capture stdout
+        buf = io.StringIO()
+        try:
+            with redirect_stdout(buf):
+                exec(
+                    compile(tree, "<code>", "exec"),
+                    {"__builtins__": _safe_builtins(self._allowlist)},
+                    local_vars,
+                )  # noqa: S102
+        except Exception:
+            err = traceback.format_exc()
+            return f"Error:\n{err}"
+        output = buf.getvalue()
+        return output if output else "ran successfully with no output"
+    @staticmethod
+    def make_tool_spec(cache: SessionCache) -> ToolSpec:
+        interp = PythonInterpreter(cache=cache)
+        return ToolSpec(
+            name="python_interpreter",
+            description=(
+                "Run Python code over cached data handles. "
+                "Cache handles are available as local variables. "
+                "Call save(name, value) to store computed artifacts back to cache."
+            ),
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "code": {"type": "string", "description": "Python code to execute"},
+                },
+                "required": ["code"],
+            },
+            handler=interp.run,
+        )
+def _safe_builtins(allowlist: frozenset[str]) -> dict:
+    safe = {
+        "print": print,
+        "len": len,
+        "range": range,
+        "enumerate": enumerate,
+        "zip": zip,
+        "map": map,
+        "filter": filter,
+        "sorted": sorted,
+        "reversed": reversed,
+        "list": list,
+        "dict": dict,
+        "set": set,
+        "tuple": tuple,
+        "str": str,
+        "int": int,
+        "float": float,
+        "bool": bool,
+        "type": type,
+        "isinstance": isinstance,
+        "hasattr": hasattr,
+        "getattr": getattr,
+        "abs": abs,
+        "round": round,
+        "min": min,
+        "max": max,
+        "sum": sum,
+        "any": any,
+        "all": all,
+        "repr": repr,
+        "format": format,
+        "vars": vars,
+        "dir": dir,
+        "None": None,
+        "True": True,
+        "False": False,
+        "__import__": _make_safe_import(allowlist),
+    }
+    return safe
+def _make_safe_import(allowlist: frozenset[str]):
+    def safe_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if level != 0:
+            raise ImportError("relative imports are not allowed")
+        top = name.split(".")[0]
+        if top not in allowlist:
+            raise ImportError(f"Import not allowed: {name!r}")
+        return builtins.__import__(name, globals, locals, fromlist, level)
+    return safe_import

dataact/tools/planner.py ADDED Viewed

@@ -0,0 +1,107 @@
+from __future__ import annotations
+import uuid
+from typing import Any
+from dataact.types import ToolSpec
+class Planner:
+    def __init__(self) -> None:
+        self._items: list[dict[str, Any]] = []
+        self._turns_since_update: int = 0
+    def add(self, items: list[str]) -> str:
+        for text in items:
+            self._items.append(
+                {
+                    "id": str(uuid.uuid4())[:8],
+                    "text": text,
+                    "status": "pending",
+                }
+            )
+        self._turns_since_update = 0
+        return self.list()
+    def update(self, id: str, status: str) -> str:
+        for item in self._items:
+            if item["id"] == id:
+                item["status"] = status
+                self._turns_since_update = 0
+                return f"Updated {id!r} to {status!r}"
+        return f"Item {id!r} not found"
+    def list(self) -> str:
+        if not self._items:
+            return "Todo list is empty."
+        lines = []
+        for item in self._items:
+            lines.append(f"[{item['id']}] ({item['status']}) {item['text']}")
+        return "\n".join(lines)
+    def reminder_hook(self, current_turn: int, max_turns: int) -> str | None:
+        pending = [i for i in self._items if i["status"] == "pending"]
+        n = self._turns_since_update
+        self._turns_since_update += 1
+        if not pending:
+            return None
+        if n >= 12:
+            return (
+                f"URGENT: You have {len(pending)} pending todo item(s) "
+                f"that haven't been updated in {n} turns. Address them immediately."
+            )
+        if n >= 8:
+            return (
+                f"WARNING: {len(pending)} pending todo item(s) remain "
+                f"with no updates for {n} turns. Please make progress on your plan."
+            )
+        if n >= 4:
+            return (
+                f"Reminder: You have {len(pending)} pending todo item(s). "
+                f"Consider updating your plan."
+            )
+        return None
+    def make_tool_specs(self) -> list[ToolSpec]:
+        return [
+            ToolSpec(
+                name="planner__add",
+                description="Add items to your todo list.",
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "items": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "List of task descriptions to add.",
+                        }
+                    },
+                    "required": ["items"],
+                },
+                handler=self.add,
+            ),
+            ToolSpec(
+                name="planner__update",
+                description="Update the status of a todo item.",
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "id": {"type": "string", "description": "Item ID"},
+                        "status": {
+                            "type": "string",
+                            "enum": ["pending", "in_progress", "done", "blocked"],
+                        },
+                    },
+                    "required": ["id", "status"],
+                },
+                handler=self.update,
+            ),
+            ToolSpec(
+                name="planner__list",
+                description="List all todo items and their statuses.",
+                input_schema={"type": "object", "properties": {}},
+                handler=self.list,
+            ),
+        ]