agentops-toolkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. agentops_toolkit/__init__.py +3 -0
  2. agentops_toolkit/adapters/__init__.py +30 -0
  3. agentops_toolkit/adapters/agent_service.py +79 -0
  4. agentops_toolkit/adapters/autogen.py +81 -0
  5. agentops_toolkit/adapters/generic.py +121 -0
  6. agentops_toolkit/adapters/registry.py +158 -0
  7. agentops_toolkit/adapters/semantic_kernel.py +92 -0
  8. agentops_toolkit/bundles/agent_quality.yaml +22 -0
  9. agentops_toolkit/bundles/agent_safety.yaml +20 -0
  10. agentops_toolkit/bundles/custom.yaml +9 -0
  11. agentops_toolkit/bundles/multi_agent_quality.yaml +25 -0
  12. agentops_toolkit/bundles/rag_agentic_retrieval.yaml +23 -0
  13. agentops_toolkit/bundles/rag_complete.yaml +36 -0
  14. agentops_toolkit/bundles/rag_cross_iq.yaml +22 -0
  15. agentops_toolkit/bundles/rag_fabric_iq.yaml +22 -0
  16. agentops_toolkit/bundles/rag_foundry_iq.yaml +26 -0
  17. agentops_toolkit/bundles/rag_permission_aware.yaml +21 -0
  18. agentops_toolkit/bundles/rag_quality.yaml +22 -0
  19. agentops_toolkit/bundles/rag_safety.yaml +20 -0
  20. agentops_toolkit/bundles/rag_work_iq.yaml +22 -0
  21. agentops_toolkit/cli/__init__.py +1 -0
  22. agentops_toolkit/cli/app.py +67 -0
  23. agentops_toolkit/cli/bundle_cmd.py +214 -0
  24. agentops_toolkit/cli/config_cmd.py +200 -0
  25. agentops_toolkit/cli/dataset_cmd.py +317 -0
  26. agentops_toolkit/cli/eval_cmd.py +173 -0
  27. agentops_toolkit/cli/init_cmd.py +224 -0
  28. agentops_toolkit/cli/model_cmd.py +83 -0
  29. agentops_toolkit/cli/monitor_cmd.py +91 -0
  30. agentops_toolkit/cli/report_cmd.py +258 -0
  31. agentops_toolkit/cli/run_cmd.py +151 -0
  32. agentops_toolkit/cli/trace_cmd.py +82 -0
  33. agentops_toolkit/connectors/__init__.py +4 -0
  34. agentops_toolkit/core/__init__.py +5 -0
  35. agentops_toolkit/core/aggregator.py +4 -0
  36. agentops_toolkit/core/bundle_registry.py +127 -0
  37. agentops_toolkit/core/client.py +4 -0
  38. agentops_toolkit/core/config_loader.py +157 -0
  39. agentops_toolkit/core/errors.py +116 -0
  40. agentops_toolkit/core/foundry_client.py +50 -0
  41. agentops_toolkit/core/foundry_sdk_client.py +139 -0
  42. agentops_toolkit/core/hooks.py +4 -0
  43. agentops_toolkit/core/logging.py +78 -0
  44. agentops_toolkit/core/persistence.py +4 -0
  45. agentops_toolkit/core/pipeline.py +291 -0
  46. agentops_toolkit/core/rate_limiter.py +4 -0
  47. agentops_toolkit/core/registry.py +4 -0
  48. agentops_toolkit/core/runner.py +4 -0
  49. agentops_toolkit/evaluators/__init__.py +5 -0
  50. agentops_toolkit/evaluators/base.py +166 -0
  51. agentops_toolkit/evaluators/citation.py +131 -0
  52. agentops_toolkit/evaluators/rag_iq.py +179 -0
  53. agentops_toolkit/mcp/__init__.py +4 -0
  54. agentops_toolkit/mcp/client.py +93 -0
  55. agentops_toolkit/models/__init__.py +68 -0
  56. agentops_toolkit/models/bundle.py +47 -0
  57. agentops_toolkit/models/config.py +229 -0
  58. agentops_toolkit/models/dataset.py +60 -0
  59. agentops_toolkit/models/observability.py +4 -0
  60. agentops_toolkit/models/rag.py +4 -0
  61. agentops_toolkit/models/run.py +146 -0
  62. agentops_toolkit/obs/__init__.py +9 -0
  63. agentops_toolkit/obs/decorators.py +69 -0
  64. agentops_toolkit/obs/monitor.py +84 -0
  65. agentops_toolkit/obs/tracing.py +148 -0
  66. agentops_toolkit-0.1.0.dist-info/METADATA +704 -0
  67. agentops_toolkit-0.1.0.dist-info/RECORD +69 -0
  68. agentops_toolkit-0.1.0.dist-info/WHEEL +4 -0
  69. agentops_toolkit-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,3 @@
1
+ """AgentOps Toolkit — Evaluate, trace, and monitor AI agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,30 @@
1
+ """Framework adapters — bridge between AgentOps and agent frameworks.
2
+
3
+ SPEC-004: FrameworkAdapter protocol, AdapterRegistry, auto-detection.
4
+
5
+ Importing this package auto-registers all built-in adapters.
6
+ """
7
+
8
+ from agentops_toolkit.adapters.registry import (
9
+ AdapterCapabilities,
10
+ AdapterRegistry,
11
+ AgentDiscovery,
12
+ AgentOutput,
13
+ DiscoveredAgent,
14
+ FrameworkAdapter,
15
+ )
16
+
17
+ # Auto-register built-in adapters on import
18
+ from agentops_toolkit.adapters import semantic_kernel as _sk # noqa: F401
19
+ from agentops_toolkit.adapters import autogen as _ag # noqa: F401
20
+ from agentops_toolkit.adapters import agent_service as _as # noqa: F401
21
+ from agentops_toolkit.adapters import generic as _gen # noqa: F401
22
+
23
+ __all__ = [
24
+ "AdapterCapabilities",
25
+ "AdapterRegistry",
26
+ "AgentDiscovery",
27
+ "AgentOutput",
28
+ "DiscoveredAgent",
29
+ "FrameworkAdapter",
30
+ ]
@@ -0,0 +1,79 @@
1
+ """Azure AI Agent Service adapter — pull agent defs from Foundry.
2
+
3
+ SPEC-004 §4.3, FR-083: AgentServiceAdapter.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import Any
10
+
11
+ from agentops_toolkit.adapters.registry import (
12
+ AdapterCapabilities,
13
+ AdapterRegistry,
14
+ AgentDiscovery,
15
+ AgentOutput,
16
+ DiscoveredAgent,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AgentServiceAdapter:
23
+ """Adapter for Azure AI Agent Service (SPEC-004 §4.3).
24
+
25
+ Creates threads, sends messages, polls for completion,
26
+ extracts tool calls and file search results.
27
+ """
28
+
29
+ def __init__(self, config: dict[str, Any]) -> None:
30
+ self._config = config
31
+
32
+ @property
33
+ def name(self) -> str:
34
+ return "agent-service"
35
+
36
+ @property
37
+ def framework_version(self) -> str:
38
+ try:
39
+ import azure.ai.projects
40
+ return getattr(azure.ai.projects, "__version__", "unknown")
41
+ except ImportError:
42
+ return "not-installed"
43
+
44
+ async def setup(self, config: dict[str, Any]) -> None:
45
+ agent_id = config.get("agent_id")
46
+ logger.info("Agent Service adapter setup: agent_id=%s", agent_id)
47
+
48
+ async def teardown(self) -> None:
49
+ pass
50
+
51
+ async def invoke(self, query: str, context: str | None = None) -> AgentOutput:
52
+ logger.info("Agent Service invoke: query=%s", query[:50])
53
+ return AgentOutput(
54
+ response="[Agent Service adapter — production creates thread + polls]",
55
+ metadata={"framework": "agent-service"},
56
+ )
57
+
58
+ async def discover(self, config: dict[str, Any]) -> AgentDiscovery:
59
+ return AgentDiscovery(
60
+ framework="agent-service",
61
+ framework_version=self.framework_version,
62
+ agents=[DiscoveredAgent(
63
+ name=config.get("agent_id", "agent"),
64
+ entry_point="foundry-hosted",
65
+ agent_type="single",
66
+ )],
67
+ )
68
+
69
+ def get_capabilities(self) -> AdapterCapabilities:
70
+ return AdapterCapabilities(
71
+ captures_tool_calls=True,
72
+ captures_token_usage=True,
73
+ )
74
+
75
+ def supports_streaming(self) -> bool:
76
+ return False
77
+
78
+
79
+ AdapterRegistry.register("agent-service", AgentServiceAdapter)
@@ -0,0 +1,81 @@
1
+ """AutoGen adapter — hook ConversableAgent message streams.
2
+
3
+ SPEC-004 §4.2, FR-082: AutoGenAdapter.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import Any
10
+
11
+ from agentops_toolkit.adapters.registry import (
12
+ AdapterCapabilities,
13
+ AdapterRegistry,
14
+ AgentDiscovery,
15
+ AgentOutput,
16
+ DiscoveredAgent,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AutoGenAdapter:
23
+ """Adapter for AutoGen multi-agent systems (SPEC-004 §4.2).
24
+
25
+ Hooks into ConversableAgent message streams, captures inter-agent
26
+ messages as ConversationTurn objects.
27
+ """
28
+
29
+ def __init__(self, config: dict[str, Any]) -> None:
30
+ self._config = config
31
+ self._captured_turns: list[dict] = []
32
+
33
+ @property
34
+ def name(self) -> str:
35
+ return "autogen"
36
+
37
+ @property
38
+ def framework_version(self) -> str:
39
+ try:
40
+ import autogen
41
+ return getattr(autogen, "__version__", "unknown")
42
+ except ImportError:
43
+ return "not-installed"
44
+
45
+ async def setup(self, config: dict[str, Any]) -> None:
46
+ entry_point = config.get("entry_point", "")
47
+ logger.info("AutoGen adapter setup: entry=%s", entry_point)
48
+
49
+ async def teardown(self) -> None:
50
+ self._captured_turns.clear()
51
+
52
+ async def invoke(self, query: str, context: str | None = None) -> AgentOutput:
53
+ logger.info("AutoGen invoke: query=%s", query[:50])
54
+ return AgentOutput(
55
+ response="[AutoGen adapter — production implementation calls team.run()]",
56
+ conversation_turns=self._captured_turns or None,
57
+ metadata={"framework": "autogen"},
58
+ )
59
+
60
+ async def discover(self, config: dict[str, Any]) -> AgentDiscovery:
61
+ return AgentDiscovery(
62
+ framework="autogen",
63
+ framework_version=self.framework_version,
64
+ agents=[DiscoveredAgent(
65
+ name="team",
66
+ entry_point=config.get("entry_point", ""),
67
+ agent_type="multi-agent",
68
+ )],
69
+ )
70
+
71
+ def get_capabilities(self) -> AdapterCapabilities:
72
+ return AdapterCapabilities(
73
+ captures_tool_calls=True,
74
+ captures_conversation_turns=True,
75
+ )
76
+
77
+ def supports_streaming(self) -> bool:
78
+ return False
79
+
80
+
81
+ AdapterRegistry.register("autogen", AutoGenAdapter)
@@ -0,0 +1,121 @@
1
+ """Generic adapter — works with any framework via callable or HTTP endpoint.
2
+
3
+ SPEC-004 §4.4, FR-084: GenericAdapter with @agentops.trace support.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import Any
10
+
11
+ from agentops_toolkit.adapters.registry import (
12
+ AdapterCapabilities,
13
+ AdapterRegistry,
14
+ AgentDiscovery,
15
+ AgentOutput,
16
+ DiscoveredAgent,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class GenericAdapter:
23
+ """Generic adapter for any framework (SPEC-004 §4.4).
24
+
25
+ Supports two modes:
26
+ - Python callable: auto-detected function signature
27
+ - HTTP endpoint: POST with JSON request/response
28
+ """
29
+
30
+ def __init__(self, config: dict[str, Any]) -> None:
31
+ self._config = config
32
+
33
+ @property
34
+ def name(self) -> str:
35
+ return "custom"
36
+
37
+ @property
38
+ def framework_version(self) -> str:
39
+ return "n/a"
40
+
41
+ async def setup(self, config: dict[str, Any]) -> None:
42
+ entry_point = config.get("entry_point", "")
43
+ logger.info("Generic adapter setup: entry=%s", entry_point)
44
+
45
+ async def teardown(self) -> None:
46
+ pass
47
+
48
+ async def invoke(self, query: str, context: str | None = None) -> AgentOutput:
49
+ entry_point = self._config.get("entry_point", "")
50
+
51
+ if entry_point.startswith("http://") or entry_point.startswith("https://"):
52
+ return await self._invoke_http(query, context, entry_point)
53
+ else:
54
+ return await self._invoke_callable(query, context, entry_point)
55
+
56
+ async def _invoke_http(self, query: str, context: str | None, url: str) -> AgentOutput:
57
+ """Invoke agent via HTTP POST."""
58
+ import httpx
59
+
60
+ payload: dict[str, Any] = {"query": query}
61
+ if context:
62
+ payload["context"] = context
63
+
64
+ async with httpx.AsyncClient() as client:
65
+ resp = await client.post(url, json=payload)
66
+ resp.raise_for_status()
67
+ data = resp.json()
68
+
69
+ return AgentOutput(
70
+ response=data.get("response", str(data)),
71
+ metadata={"source": "http", "url": url},
72
+ )
73
+
74
+ async def _invoke_callable(self, query: str, context: str | None, entry_point: str) -> AgentOutput:
75
+ """Invoke agent via Python callable."""
76
+ import importlib
77
+
78
+ if ":" in entry_point:
79
+ module_path, func_name = entry_point.rsplit(":", 1)
80
+ else:
81
+ module_path = entry_point.replace(".py", "").replace("/", ".").replace("\\", ".")
82
+ func_name = "agent_fn"
83
+
84
+ try:
85
+ module = importlib.import_module(module_path)
86
+ func = getattr(module, func_name)
87
+ if context:
88
+ result = func(query, context)
89
+ else:
90
+ result = func(query)
91
+
92
+ if hasattr(result, "__await__"):
93
+ result = await result
94
+
95
+ return AgentOutput(
96
+ response=str(result),
97
+ metadata={"source": "callable", "entry_point": entry_point},
98
+ )
99
+ except Exception as e:
100
+ return AgentOutput(
101
+ response="",
102
+ metadata={"error": str(e)},
103
+ )
104
+
105
+ async def discover(self, config: dict[str, Any]) -> AgentDiscovery:
106
+ return AgentDiscovery(
107
+ framework="custom",
108
+ agents=[DiscoveredAgent(
109
+ name="agent",
110
+ entry_point=config.get("entry_point", ""),
111
+ )],
112
+ )
113
+
114
+ def get_capabilities(self) -> AdapterCapabilities:
115
+ return AdapterCapabilities()
116
+
117
+ def supports_streaming(self) -> bool:
118
+ return False
119
+
120
+
121
+ AdapterRegistry.register("custom", GenericAdapter)
@@ -0,0 +1,158 @@
1
+ """Adapter protocol and registry.
2
+
3
+ SPEC-004 §2–3: FrameworkAdapter protocol, AdapterRegistry with auto-detection.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Any, Protocol, runtime_checkable
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ # ── Supporting types ──
18
+
19
+
20
+ class AdapterCapabilities(BaseModel):
21
+ """What an adapter can capture (SPEC-004 §2)."""
22
+
23
+ captures_response: bool = True
24
+ captures_tool_calls: bool = False
25
+ captures_conversation_turns: bool = False
26
+ captures_planner_steps: bool = False
27
+ captures_token_usage: bool = False
28
+ supports_streaming: bool = False
29
+
30
+
31
+ class DiscoveredAgent(BaseModel):
32
+ """A detected agent entry point (SPEC-004 §2)."""
33
+
34
+ name: str
35
+ entry_point: str
36
+ agent_type: str = "single" # "single" | "multi-agent" | "rag"
37
+ tools: list[str] = Field(default_factory=list)
38
+ plugins: list[str] = Field(default_factory=list)
39
+
40
+
41
+ class AgentDiscovery(BaseModel):
42
+ """Results of agent discovery in a project (SPEC-004 §2)."""
43
+
44
+ agents: list[DiscoveredAgent] = Field(default_factory=list)
45
+ framework: str = ""
46
+ framework_version: str = ""
47
+ warnings: list[str] = Field(default_factory=list)
48
+
49
+
50
+ class AgentOutput(BaseModel):
51
+ """Captured output from an agent invocation (SPEC-003 §2)."""
52
+
53
+ response: str
54
+ tool_calls: list[Any] | None = None
55
+ conversation_turns: list[Any] | None = None
56
+ metadata: dict[str, Any] = Field(default_factory=dict)
57
+
58
+
59
+ # ── Protocol ──
60
+
61
+
62
+ @runtime_checkable
63
+ class FrameworkAdapter(Protocol):
64
+ """Protocol that all framework adapters must satisfy (SPEC-004 §2)."""
65
+
66
+ @property
67
+ def name(self) -> str: ...
68
+
69
+ @property
70
+ def framework_version(self) -> str: ...
71
+
72
+ async def setup(self, config: dict[str, Any]) -> None: ...
73
+
74
+ async def teardown(self) -> None: ...
75
+
76
+ async def invoke(self, query: str, context: str | None = None) -> AgentOutput: ...
77
+
78
+ async def discover(self, config: dict[str, Any]) -> AgentDiscovery: ...
79
+
80
+ def get_capabilities(self) -> AdapterCapabilities: ...
81
+
82
+ def supports_streaming(self) -> bool: ...
83
+
84
+
85
+ # ── Registry ──
86
+
87
+
88
+ # Package name → adapter name detection map
89
+ _DETECTION_MAP: dict[str, str] = {
90
+ "semantic_kernel": "semantic-kernel",
91
+ "semantic-kernel": "semantic-kernel",
92
+ "autogen": "autogen",
93
+ "autogen-agentchat": "autogen",
94
+ "pyautogen": "autogen",
95
+ "azure-ai-projects": "agent-service",
96
+ "azure.ai.projects": "agent-service",
97
+ }
98
+
99
+
100
+ class AdapterRegistry:
101
+ """Registry for framework adapters with auto-detection (SPEC-004 §3).
102
+
103
+ Usage::
104
+
105
+ registry = AdapterRegistry()
106
+ detected = registry.auto_detect(Path("."))
107
+ adapter = registry.get("semantic-kernel", config={})
108
+ """
109
+
110
+ _adapters: dict[str, type] = {}
111
+
112
+ @classmethod
113
+ def register(cls, name: str, adapter_class: type) -> None:
114
+ """Register an adapter class by name."""
115
+ cls._adapters[name] = adapter_class
116
+ logger.debug("Adapter registered: %s", name)
117
+
118
+ @classmethod
119
+ def get(cls, name: str, config: dict[str, Any] | None = None) -> Any:
120
+ """Resolve an adapter by name.
121
+
122
+ Raises:
123
+ KeyError: If the adapter is not registered.
124
+ """
125
+ if name not in cls._adapters:
126
+ available = sorted(cls._adapters.keys()) or ["(none registered)"]
127
+ raise KeyError(
128
+ f"Adapter '{name}' not found. Available: {', '.join(available)}"
129
+ )
130
+ return cls._adapters[name](config or {})
131
+
132
+ @classmethod
133
+ def list_registered(cls) -> list[str]:
134
+ """Return sorted list of registered adapter names."""
135
+ return sorted(cls._adapters.keys())
136
+
137
+ @classmethod
138
+ def auto_detect(cls, project_root: Path) -> str | None:
139
+ """Scan project dependencies to detect which framework is in use (SPEC-004 §3, FR-085).
140
+
141
+ Checks in order:
142
+ 1. pyproject.toml
143
+ 2. requirements.txt
144
+ 3. setup.cfg
145
+ """
146
+ files_to_check = ["pyproject.toml", "requirements.txt", "setup.cfg"]
147
+
148
+ for fname in files_to_check:
149
+ fpath = project_root / fname
150
+ if fpath.exists():
151
+ content = fpath.read_text(encoding="utf-8").lower()
152
+ for package, adapter_name in _DETECTION_MAP.items():
153
+ if package.lower() in content:
154
+ logger.info("Auto-detected framework: %s (from %s)", adapter_name, fname)
155
+ return adapter_name
156
+
157
+ logger.debug("No framework detected in %s", project_root)
158
+ return None
@@ -0,0 +1,92 @@
1
+ """Semantic Kernel adapter — auto-discover plugins, capture kernel I/O.
2
+
3
+ SPEC-004 §4.1, FR-081: SemanticKernelAdapter.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import Any
10
+
11
+ from agentops_toolkit.adapters.registry import (
12
+ AdapterCapabilities,
13
+ AdapterRegistry,
14
+ AgentDiscovery,
15
+ AgentOutput,
16
+ DiscoveredAgent,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class SemanticKernelAdapter:
23
+ """Adapter for Microsoft Semantic Kernel agents (SPEC-004 §4.1).
24
+
25
+ Auto-discovers kernel plugins, captures I/O via ChatCompletionService,
26
+ and hooks planner steps via FunctionInvocationFilter.
27
+ """
28
+
29
+ def __init__(self, config: dict[str, Any]) -> None:
30
+ self._config = config
31
+ self._kernel = None
32
+ self._captured_tool_calls: list[dict] = []
33
+
34
+ @property
35
+ def name(self) -> str:
36
+ return "semantic-kernel"
37
+
38
+ @property
39
+ def framework_version(self) -> str:
40
+ try:
41
+ import semantic_kernel
42
+ return getattr(semantic_kernel, "__version__", "unknown")
43
+ except ImportError:
44
+ return "not-installed"
45
+
46
+ async def setup(self, config: dict[str, Any]) -> None:
47
+ """Import entry point, locate Kernel, install filters."""
48
+ entry_point = config.get("entry_point", "")
49
+ kernel_var = config.get("kernel_var", "kernel")
50
+ logger.info("SK adapter setup: entry=%s, kernel_var=%s", entry_point, kernel_var)
51
+ # Production: import module, find kernel, install filters
52
+ # See SPEC-004 §4.1 for full implementation
53
+
54
+ async def teardown(self) -> None:
55
+ self._kernel = None
56
+ self._captured_tool_calls.clear()
57
+
58
+ async def invoke(self, query: str, context: str | None = None) -> AgentOutput:
59
+ """Invoke the SK agent with a query."""
60
+ logger.info("SK invoke: query=%s", query[:50])
61
+ # Production: build ChatHistory, call ChatCompletionService, capture result
62
+ return AgentOutput(
63
+ response="[SK adapter — production implementation calls kernel]",
64
+ tool_calls=self._captured_tool_calls or None,
65
+ metadata={"framework": "semantic-kernel"},
66
+ )
67
+
68
+ async def discover(self, config: dict[str, Any]) -> AgentDiscovery:
69
+ """Discover SK agents: scan for Kernel instances and plugins."""
70
+ return AgentDiscovery(
71
+ framework="semantic-kernel",
72
+ framework_version=self.framework_version,
73
+ agents=[DiscoveredAgent(
74
+ name="kernel",
75
+ entry_point=config.get("entry_point", ""),
76
+ agent_type="single",
77
+ )],
78
+ )
79
+
80
+ def get_capabilities(self) -> AdapterCapabilities:
81
+ return AdapterCapabilities(
82
+ captures_tool_calls=True,
83
+ captures_planner_steps=True,
84
+ captures_token_usage=True,
85
+ )
86
+
87
+ def supports_streaming(self) -> bool:
88
+ return True
89
+
90
+
91
+ # Auto-register
92
+ AdapterRegistry.register("semantic-kernel", SemanticKernelAdapter)
@@ -0,0 +1,22 @@
1
+ # agent_quality — Quality for single tool-using agents
2
+ # SPEC-001 §3.4
3
+
4
+ name: agent_quality
5
+ description: "Quality evaluators for tool-using agents"
6
+ use_case: agent
7
+ evaluators:
8
+ - name: groundedness
9
+ type: foundry
10
+ - name: relevance
11
+ type: foundry
12
+ - name: coherence
13
+ type: foundry
14
+ - name: tool_call_accuracy
15
+ type: foundry
16
+ thresholds:
17
+ groundedness: 3.0
18
+ relevance: 3.0
19
+ coherence: 3.0
20
+ tool_call_accuracy: 0.7
21
+ tags: [agent, quality, tools]
22
+ builtin: true
@@ -0,0 +1,20 @@
1
+ # agent_safety — Safety for agents
2
+ # SPEC-001 §3.4
3
+
4
+ name: agent_safety
5
+ description: "Safety evaluators for agents"
6
+ use_case: agent
7
+ evaluators:
8
+ - name: hate_unfairness
9
+ type: foundry
10
+ - name: sexual
11
+ type: foundry
12
+ - name: violence
13
+ type: foundry
14
+ - name: self_harm
15
+ type: foundry
16
+ - name: jailbreak
17
+ type: foundry
18
+ thresholds: {}
19
+ tags: [agent, safety]
20
+ builtin: true
@@ -0,0 +1,9 @@
1
+ # custom — Empty template for user customization
2
+ # SPEC-001 §3.4
3
+
4
+ name: custom
5
+ description: "Empty template — add your own evaluators"
6
+ evaluators: []
7
+ thresholds: {}
8
+ tags: [custom]
9
+ builtin: true
@@ -0,0 +1,25 @@
1
+ # multi_agent_quality — Quality for orchestrated multi-agent systems
2
+ # SPEC-001 §3.4
3
+
4
+ name: multi_agent_quality
5
+ description: "Quality evaluators for orchestrated multi-agent systems"
6
+ use_case: multi-agent
7
+ evaluators:
8
+ - name: groundedness
9
+ type: foundry
10
+ - name: relevance
11
+ type: foundry
12
+ - name: coherence
13
+ type: foundry
14
+ - name: task_completion
15
+ type: foundry
16
+ - name: handoff_quality
17
+ type: foundry
18
+ thresholds:
19
+ groundedness: 3.0
20
+ relevance: 3.0
21
+ coherence: 3.0
22
+ task_completion: 3.0
23
+ handoff_quality: 3.0
24
+ tags: [multi-agent, quality, orchestration]
25
+ builtin: true
@@ -0,0 +1,23 @@
1
+ # rag_agentic_retrieval — RAG + agentic retrieval pipeline
2
+ # SPEC-008 §4.1
3
+
4
+ name: rag_agentic_retrieval
5
+ description: "Evaluates the full agentic retrieval pipeline including query decomposition"
6
+ use_case: rag
7
+ evaluators:
8
+ - name: groundedness
9
+ type: foundry
10
+ - name: relevance
11
+ type: foundry
12
+ - name: sub_query_quality
13
+ type: custom
14
+ - name: source_coverage
15
+ type: custom
16
+ - name: citation_accuracy
17
+ type: custom
18
+ thresholds:
19
+ groundedness: 4.0
20
+ relevance: 4.0
21
+ source_coverage: 0.8
22
+ tags: [rag, agentic-retrieval, query-decomposition]
23
+ builtin: true