PyPI - agent-handler-sdk - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

agent-handler-sdk 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agent-handler-sdk might be problematic. Click here for more details.

Files changed (20) hide show

{agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: agent-handler-sdk
-Version: 0.1.4
+Version: 0.1.6
 Summary: Agent Handler SDK for defining and invoking LLM tools
 Author: David Dalmaso
 Author-email: david.dalmaso@merge.dev

{agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/cli.py RENAMED Viewed

@@ -56,9 +56,10 @@ def scaffold_connector() -> int:
     pkg_dir = base / f"{slug}_connector"
     tools_dir = pkg_dir / "tools"
     tests_dir = base / "tests"
+    evals_dir = base / "evals"
     # Create directories
-    for d in (base, pkg_dir, tools_dir, tests_dir):
+    for d in (base, pkg_dir, tools_dir, tests_dir, evals_dir):
         d.mkdir(parents=True, exist_ok=True)
     # Map template → output path
@@ -68,6 +69,7 @@ def scaffold_connector() -> int:
         "init.py.tpl": pkg_dir / "__init__.py",
         "handlers.py.tpl": tools_dir / "handlers.py",
         "test_handlers.py.tpl": tests_dir / "test_handlers.py",
+        "evals.json.tpl": evals_dir / "evals.json",
         "README.md.tpl": base / "README.md",
     }

agent_handler_sdk-0.1.6/agent_handler_sdk/eval_types.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import List, Dict, Any, Optional, Literal, Union
+from pydantic import BaseModel, Extra
+from datetime import datetime
+class JsonSchema(BaseModel):
+    type: Optional[str] = None
+    properties: Optional[Dict[str, "JsonSchema"]] = None
+    items: Optional[Union["JsonSchema", List["JsonSchema"]]] = None
+    required: Optional[List[str]] = None
+    enum: Optional[List[Any]] = None
+    description: Optional[str] = None
+    additional_properties: Optional[Union[bool, "JsonSchema"]] = None
+    model: Optional[str] = None
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"
+JsonSchema.model_rebuild()
+class DataSourceConfig(BaseModel):
+    input_schema: JsonSchema
+class MessageContent(BaseModel):
+    type: str
+    text: str
+class MessageInput(BaseModel):
+    type: str
+    role: str
+    content: MessageContent
+class BaseEvaluator(BaseModel):
+    name: str
+    id: str
+    type: str  # Discriminator for future extension
+class ReferenceToolCallsMatchEvaluator(BaseEvaluator):
+    type: Literal["reference_tool_calls_match"]
+    enforce_ordering: bool
+    fail_on_args_mismatch: bool
+class LabelModelEvaluator(BaseEvaluator):
+    type: Literal["label_model"]
+    passing_labels: Optional[List[str]]
+    labels: Optional[List[str]]
+    model: Optional[str]
+    input: List[MessageInput]
+Evaluator = Union[ReferenceToolCallsMatchEvaluator, LabelModelEvaluator, BaseEvaluator]
+class EvalMetadata(BaseModel):
+    description: Optional[str]
+class EvalItemInput(BaseModel, extra=Extra.allow):
+    input: str
+class EvalItem(BaseModel, extra=Extra.allow):
+    """
+    Schema for individual eval items.
+    Supports both runtime evaluation (with id and tool_calls) and connector eval files (flexible input).
+    """
+    input: Union[str, EvalItemInput]  # Can be either a string or EvalItemInput object
+    id: Optional[str] = None  # Optional for connector eval files
+class ConnectorEvalBundle(BaseModel):
+    """
+    Schema for eval bundles stored in connector /evals/ folders.
+    This matches the JSON structure that contains config, items, and prompts together.
+    """
+    data_source_config: DataSourceConfig
+    items: List[EvalItem]
+    prompts: List[MessageInput]
+    name: str
+    metadata: Optional[EvalMetadata] = None
+    def to_eval_config(self) -> "EvalConfig":
+        """
+        Convert this bundle to an EvalConfig for use with the eval runner.
+        Note: This creates a minimal EvalConfig without testing_evaluators.
+        """
+        return EvalConfig(
+            id=None,
+            created_at=None,
+            updated_at=None,
+            data_source_config=self.data_source_config,
+            testing_evaluators=[],  # Empty list since connector evals don't define evaluators
+            metadata=self.metadata,
+        )
+class EvalConfig(BaseModel):
+    id: Optional[str]
+    created_at: Optional[datetime]
+    updated_at: Optional[datetime]
+    data_source_config: DataSourceConfig
+    testing_evaluators: Optional[List[Evaluator]] = []
+    metadata: Optional[EvalMetadata]

{agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/registry.py RENAMED Viewed

@@ -31,8 +31,6 @@ class ConnectorRegistry:
         param_schema: Dict[str, Any],
         tags: List[str],
     ) -> None:
-        if name in cls._tools:
-            raise ValueError(f"Tool {name!r} already registered")
         cls._tools[name] = ToolSpec(name, description, fn, param_schema, tags)
     @classmethod

agent_handler_sdk-0.1.6/agent_handler_sdk/templates/connector/evals.json.tpl ADDED Viewed

@@ -0,0 +1,55 @@
+[
+    {
+      "data_source_config": {
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "input": { "type": "string" },
+            "reference_value": { "type": "string" },
+            "reference_tools": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "name": { "type": "string" },
+                  "args": { "type": "object" }
+                },
+                "required": ["name"]
+              }
+            }
+          },
+          "required": ["input"]
+        }
+      },
+      "items": [
+        {
+          "input": "Tell me about George Washington",
+          "reference_value": null,
+          "reference_tools": [
+            {
+              "name": "wikipedia__search",
+              "args": {
+                "query": "George Washington"
+              }
+            }
+          ]
+        },
+        {
+          "input": "Where does London's name come from? Don't use any tools to solve this.",
+          "reference_value": "London's name is believed to originate from the Latin word \"Londinium,\" which was the name used during the Roman period when the city was established as a settlement. The exact origin of \"Londinium\" is uncertain, but it may derive from a pre-Roman or Celtic word. Over time, the name evolved through various forms, such as \"Londinium\" in Latin and \"Lunden\" in Old English, eventually becoming \"London\" as we know it today.",
+          "reference_tools": []
+          ]
+        }
+      ],
+      "prompts": [
+        {
+          "type": "text",
+          "role": "user",
+          "content": {
+            "type": "text",
+            "text": "{{input}}"
+          }
+        }
+      ]
+    }
+]

agent_handler_sdk-0.1.6/agent_handler_sdk/templates/evals/models.py ADDED Viewed

@@ -0,0 +1,83 @@
+from typing import List, Dict, Any, Optional, Literal, Union
+from pydantic import BaseModel, Extra
+from datetime import datetime
+class JsonSchema(BaseModel):
+    type: Optional[str] = None
+    properties: Optional[Dict[str, "JsonSchema"]] = None
+    items: Optional[Union["JsonSchema", List["JsonSchema"]]] = None
+    required: Optional[List[str]] = None
+    enum: Optional[List[Any]] = None
+    description: Optional[str] = None
+    additional_properties: Optional[Union[bool, "JsonSchema"]] = None
+    model: Optional[str] = None
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"
+JsonSchema.model_rebuild()
+class DataSourceConfig(BaseModel):
+    input_schema: JsonSchema
+class MessageContent(BaseModel):
+    type: str
+    text: str
+class MessageInput(BaseModel):
+    type: str
+    role: str
+    content: MessageContent
+class BaseEvaluator(BaseModel):
+    name: str
+    id: str
+    type: str
+class ToolCallModelEvaluator(BaseEvaluator):
+    type: Literal["tool_call"]
+    tool_name: str
+    params: dict | None
+class LabelModelEvaluator(BaseEvaluator):
+    type: Literal["label_model"]
+    passing_labels: Optional[List[str]]
+    labels: Optional[List[str]]
+    model: Optional[str]
+    input: List[MessageInput]
+Evaluator = Union[ToolCallModelEvaluator, LabelModelEvaluator, BaseEvaluator]
+class EvalMetadata(BaseModel):
+    description: Optional[str]
+class EvalItemInput(BaseModel, extra=Extra.allow):
+    input: str
+class EvalItem(BaseModel):
+    id: str
+    input: EvalItemInput
+    tool_calls: Optional[List[Dict[str, Any]]] = None
+class EvalConfig(BaseModel):
+    id: str
+    created_at: datetime
+    updated_at: datetime
+    data_source_config: DataSourceConfig
+    testing_evaluators: List[Evaluator]
+    name: str
+    metadata: EvalMetadata

{agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "agent-handler-sdk"
-version = "0.1.4"
+version = "0.1.6"
 description = "Agent Handler SDK for defining and invoking LLM tools"
-authors = ["David Dalmaso <david.dalmaso@merge.dev>"]
+authors = ["David Dalmaso <david.dalmaso@merge.dev>", "Gil Feig <gil@merge.dev>"]
 packages = [
   { include = "agent_handler_sdk" }
 ]