agent-handler-sdk 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agent-handler-sdk might be problematic. Click here for more details.

Files changed (20) hide show
  1. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/PKG-INFO +1 -1
  2. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/cli.py +3 -1
  3. agent_handler_sdk-0.1.6/agent_handler_sdk/eval_types.py +113 -0
  4. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/registry.py +0 -2
  5. agent_handler_sdk-0.1.6/agent_handler_sdk/templates/connector/evals.json.tpl +55 -0
  6. agent_handler_sdk-0.1.6/agent_handler_sdk/templates/evals/models.py +83 -0
  7. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/pyproject.toml +2 -2
  8. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/__init__.py +0 -0
  9. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/auth.py +0 -0
  10. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/connector.py +0 -0
  11. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/exceptions.py +0 -0
  12. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/invocation.py +0 -0
  13. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/README.md.tpl +0 -0
  14. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/handlers.py.tpl +0 -0
  15. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/init.py.tpl +0 -0
  16. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/metadata.yaml.tpl +0 -0
  17. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/pyproject.toml.tpl +0 -0
  18. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/templates/connector/test_handlers.py.tpl +0 -0
  19. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/tool.py +0 -0
  20. {agent_handler_sdk-0.1.4 → agent_handler_sdk-0.1.6}/agent_handler_sdk/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: agent-handler-sdk
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Agent Handler SDK for defining and invoking LLM tools
5
5
  Author: David Dalmaso
6
6
  Author-email: david.dalmaso@merge.dev
@@ -56,9 +56,10 @@ def scaffold_connector() -> int:
56
56
  pkg_dir = base / f"{slug}_connector"
57
57
  tools_dir = pkg_dir / "tools"
58
58
  tests_dir = base / "tests"
59
+ evals_dir = base / "evals"
59
60
 
60
61
  # Create directories
61
- for d in (base, pkg_dir, tools_dir, tests_dir):
62
+ for d in (base, pkg_dir, tools_dir, tests_dir, evals_dir):
62
63
  d.mkdir(parents=True, exist_ok=True)
63
64
 
64
65
  # Map template → output path
@@ -68,6 +69,7 @@ def scaffold_connector() -> int:
68
69
  "init.py.tpl": pkg_dir / "__init__.py",
69
70
  "handlers.py.tpl": tools_dir / "handlers.py",
70
71
  "test_handlers.py.tpl": tests_dir / "test_handlers.py",
72
+ "evals.json.tpl": evals_dir / "evals.json",
71
73
  "README.md.tpl": base / "README.md",
72
74
  }
73
75
 
@@ -0,0 +1,113 @@
1
+ from typing import List, Dict, Any, Optional, Literal, Union
2
+ from pydantic import BaseModel, Extra
3
+ from datetime import datetime
4
+
5
+
6
+ class JsonSchema(BaseModel):
7
+ type: Optional[str] = None
8
+ properties: Optional[Dict[str, "JsonSchema"]] = None
9
+ items: Optional[Union["JsonSchema", List["JsonSchema"]]] = None
10
+ required: Optional[List[str]] = None
11
+ enum: Optional[List[Any]] = None
12
+ description: Optional[str] = None
13
+ additional_properties: Optional[Union[bool, "JsonSchema"]] = None
14
+ model: Optional[str] = None
15
+
16
+ class Config:
17
+ arbitrary_types_allowed = True
18
+ extra = "allow"
19
+
20
+
21
+ JsonSchema.model_rebuild()
22
+
23
+
24
+ class DataSourceConfig(BaseModel):
25
+ input_schema: JsonSchema
26
+
27
+
28
+ class MessageContent(BaseModel):
29
+ type: str
30
+ text: str
31
+
32
+
33
+ class MessageInput(BaseModel):
34
+ type: str
35
+ role: str
36
+ content: MessageContent
37
+
38
+
39
+ class BaseEvaluator(BaseModel):
40
+ name: str
41
+ id: str
42
+ type: str # Discriminator for future extension
43
+
44
+
45
+ class ReferenceToolCallsMatchEvaluator(BaseEvaluator):
46
+ type: Literal["reference_tool_calls_match"]
47
+ enforce_ordering: bool
48
+ fail_on_args_mismatch: bool
49
+
50
+
51
+ class LabelModelEvaluator(BaseEvaluator):
52
+ type: Literal["label_model"]
53
+ passing_labels: Optional[List[str]]
54
+ labels: Optional[List[str]]
55
+ model: Optional[str]
56
+ input: List[MessageInput]
57
+
58
+
59
+ Evaluator = Union[ReferenceToolCallsMatchEvaluator, LabelModelEvaluator, BaseEvaluator]
60
+
61
+
62
+ class EvalMetadata(BaseModel):
63
+ description: Optional[str]
64
+
65
+
66
+ class EvalItemInput(BaseModel, extra=Extra.allow):
67
+ input: str
68
+
69
+
70
+ class EvalItem(BaseModel, extra=Extra.allow):
71
+ """
72
+ Schema for individual eval items.
73
+ Supports both runtime evaluation (with id and tool_calls) and connector eval files (flexible input).
74
+ """
75
+
76
+ input: Union[str, EvalItemInput] # Can be either a string or EvalItemInput object
77
+ id: Optional[str] = None # Optional for connector eval files
78
+
79
+
80
+ class ConnectorEvalBundle(BaseModel):
81
+ """
82
+ Schema for eval bundles stored in connector /evals/ folders.
83
+ This matches the JSON structure that contains config, items, and prompts together.
84
+ """
85
+
86
+ data_source_config: DataSourceConfig
87
+ items: List[EvalItem]
88
+ prompts: List[MessageInput]
89
+ name: str
90
+ metadata: Optional[EvalMetadata] = None
91
+
92
+ def to_eval_config(self) -> "EvalConfig":
93
+ """
94
+ Convert this bundle to an EvalConfig for use with the eval runner.
95
+ Note: This creates a minimal EvalConfig without testing_evaluators.
96
+ """
97
+ return EvalConfig(
98
+ id=None,
99
+ created_at=None,
100
+ updated_at=None,
101
+ data_source_config=self.data_source_config,
102
+ testing_evaluators=[], # Empty list since connector evals don't define evaluators
103
+ metadata=self.metadata,
104
+ )
105
+
106
+
107
+ class EvalConfig(BaseModel):
108
+ id: Optional[str]
109
+ created_at: Optional[datetime]
110
+ updated_at: Optional[datetime]
111
+ data_source_config: DataSourceConfig
112
+ testing_evaluators: Optional[List[Evaluator]] = []
113
+ metadata: Optional[EvalMetadata]
@@ -31,8 +31,6 @@ class ConnectorRegistry:
31
31
  param_schema: Dict[str, Any],
32
32
  tags: List[str],
33
33
  ) -> None:
34
- if name in cls._tools:
35
- raise ValueError(f"Tool {name!r} already registered")
36
34
  cls._tools[name] = ToolSpec(name, description, fn, param_schema, tags)
37
35
 
38
36
  @classmethod
@@ -0,0 +1,55 @@
1
+ [
2
+ {
3
+ "data_source_config": {
4
+ "input_schema": {
5
+ "type": "object",
6
+ "properties": {
7
+ "input": { "type": "string" },
8
+ "reference_value": { "type": "string" },
9
+ "reference_tools": {
10
+ "type": "array",
11
+ "items": {
12
+ "type": "object",
13
+ "properties": {
14
+ "name": { "type": "string" },
15
+ "args": { "type": "object" }
16
+ },
17
+ "required": ["name"]
18
+ }
19
+ }
20
+ },
21
+ "required": ["input"]
22
+ }
23
+ },
24
+ "items": [
25
+ {
26
+ "input": "Tell me about George Washington",
27
+ "reference_value": null,
28
+ "reference_tools": [
29
+ {
30
+ "name": "wikipedia__search",
31
+ "args": {
32
+ "query": "George Washington"
33
+ }
34
+ }
35
+ ]
36
+ },
37
+ {
38
+ "input": "Where does London's name come from? Don't use any tools to solve this.",
39
+ "reference_value": "London's name is believed to originate from the Latin word \"Londinium,\" which was the name used during the Roman period when the city was established as a settlement. The exact origin of \"Londinium\" is uncertain, but it may derive from a pre-Roman or Celtic word. Over time, the name evolved through various forms, such as \"Londinium\" in Latin and \"Lunden\" in Old English, eventually becoming \"London\" as we know it today.",
40
+ "reference_tools": []
41
+ ]
42
+ }
43
+ ],
44
+ "prompts": [
45
+ {
46
+ "type": "text",
47
+ "role": "user",
48
+ "content": {
49
+ "type": "text",
50
+ "text": "{{input}}"
51
+ }
52
+ }
53
+ ]
54
+ }
55
+ ]
@@ -0,0 +1,83 @@
1
+ from typing import List, Dict, Any, Optional, Literal, Union
2
+ from pydantic import BaseModel, Extra
3
+ from datetime import datetime
4
+
5
+
6
+ class JsonSchema(BaseModel):
7
+ type: Optional[str] = None
8
+ properties: Optional[Dict[str, "JsonSchema"]] = None
9
+ items: Optional[Union["JsonSchema", List["JsonSchema"]]] = None
10
+ required: Optional[List[str]] = None
11
+ enum: Optional[List[Any]] = None
12
+ description: Optional[str] = None
13
+ additional_properties: Optional[Union[bool, "JsonSchema"]] = None
14
+ model: Optional[str] = None
15
+
16
+ class Config:
17
+ arbitrary_types_allowed = True
18
+ extra = "allow"
19
+
20
+
21
+ JsonSchema.model_rebuild()
22
+
23
+
24
+ class DataSourceConfig(BaseModel):
25
+ input_schema: JsonSchema
26
+
27
+
28
+ class MessageContent(BaseModel):
29
+ type: str
30
+ text: str
31
+
32
+
33
+ class MessageInput(BaseModel):
34
+ type: str
35
+ role: str
36
+ content: MessageContent
37
+
38
+
39
+ class BaseEvaluator(BaseModel):
40
+ name: str
41
+ id: str
42
+ type: str
43
+
44
+
45
+ class ToolCallModelEvaluator(BaseEvaluator):
46
+ type: Literal["tool_call"]
47
+ tool_name: str
48
+ params: dict | None
49
+
50
+
51
+ class LabelModelEvaluator(BaseEvaluator):
52
+ type: Literal["label_model"]
53
+ passing_labels: Optional[List[str]]
54
+ labels: Optional[List[str]]
55
+ model: Optional[str]
56
+ input: List[MessageInput]
57
+
58
+
59
+ Evaluator = Union[ToolCallModelEvaluator, LabelModelEvaluator, BaseEvaluator]
60
+
61
+
62
+ class EvalMetadata(BaseModel):
63
+ description: Optional[str]
64
+
65
+
66
+ class EvalItemInput(BaseModel, extra=Extra.allow):
67
+ input: str
68
+
69
+
70
+ class EvalItem(BaseModel):
71
+ id: str
72
+ input: EvalItemInput
73
+ tool_calls: Optional[List[Dict[str, Any]]] = None
74
+
75
+
76
+ class EvalConfig(BaseModel):
77
+ id: str
78
+ created_at: datetime
79
+ updated_at: datetime
80
+ data_source_config: DataSourceConfig
81
+ testing_evaluators: List[Evaluator]
82
+ name: str
83
+ metadata: EvalMetadata
@@ -1,8 +1,8 @@
1
1
  [tool.poetry]
2
2
  name = "agent-handler-sdk"
3
- version = "0.1.4"
3
+ version = "0.1.6"
4
4
  description = "Agent Handler SDK for defining and invoking LLM tools"
5
- authors = ["David Dalmaso <david.dalmaso@merge.dev>"]
5
+ authors = ["David Dalmaso <david.dalmaso@merge.dev>", "Gil Feig <gil@merge.dev>"]
6
6
  packages = [
7
7
  { include = "agent_handler_sdk" }
8
8
  ]