arksim 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arksim/__init__.py +50 -0
- arksim/_version.py +34 -0
- arksim/cli.py +395 -0
- arksim/config/__init__.py +11 -0
- arksim/config/core/__init__.py +0 -0
- arksim/config/core/agent.py +128 -0
- arksim/config/types.py +8 -0
- arksim/config/utils.py +38 -0
- arksim/config_evaluate.yaml +39 -0
- arksim/config_simulate_evaluate.yaml +80 -0
- arksim/constants.py +4 -0
- arksim/evaluator/__init__.py +34 -0
- arksim/evaluator/base_metric.py +137 -0
- arksim/evaluator/builtin_metrics.py +194 -0
- arksim/evaluator/entities.py +175 -0
- arksim/evaluator/error_detection.py +136 -0
- arksim/evaluator/evaluate.py +241 -0
- arksim/evaluator/evaluator.py +670 -0
- arksim/evaluator/prompt_registry.py +151 -0
- arksim/evaluator/utils/__init__.py +0 -0
- arksim/evaluator/utils/constants.py +29 -0
- arksim/evaluator/utils/enums.py +57 -0
- arksim/evaluator/utils/error_messages.py +41 -0
- arksim/evaluator/utils/prompts.py +341 -0
- arksim/evaluator/utils/schema.py +21 -0
- arksim/llms/__init__.py +0 -0
- arksim/llms/chat/__init__.py +3 -0
- arksim/llms/chat/base/__init__.py +0 -0
- arksim/llms/chat/base/base_llm.py +94 -0
- arksim/llms/chat/base/types.py +17 -0
- arksim/llms/chat/llm.py +37 -0
- arksim/llms/chat/providers/azure_openai.py +132 -0
- arksim/llms/chat/providers/claude.py +122 -0
- arksim/llms/chat/providers/gemini.py +126 -0
- arksim/llms/chat/providers/openai.py +97 -0
- arksim/llms/chat/utils.py +64 -0
- arksim/llms/embedding/__init__.py +0 -0
- arksim/llms/utils/__init__.py +3 -0
- arksim/llms/utils/azure.py +39 -0
- arksim/py.typed +0 -0
- arksim/scenario/__init__.py +7 -0
- arksim/scenario/entities.py +33 -0
- arksim/simulation_engine/__init__.py +20 -0
- arksim/simulation_engine/agent/__init__.py +5 -0
- arksim/simulation_engine/agent/base.py +20 -0
- arksim/simulation_engine/agent/clients/a2a.py +113 -0
- arksim/simulation_engine/agent/clients/chat_completions.py +79 -0
- arksim/simulation_engine/agent/factory.py +17 -0
- arksim/simulation_engine/agent/utils.py +70 -0
- arksim/simulation_engine/core/__init__.py +15 -0
- arksim/simulation_engine/core/multi_knowledge_handling.py +198 -0
- arksim/simulation_engine/core/profile.py +83 -0
- arksim/simulation_engine/entities.py +139 -0
- arksim/simulation_engine/simulator.py +446 -0
- arksim/simulation_engine/utils/prompts.py +125 -0
- arksim/simulation_engine/utils/schema.py +8 -0
- arksim/simulation_engine/utils/utils.py +29 -0
- arksim/ui/__init__.py +0 -0
- arksim/ui/api/__init__.py +0 -0
- arksim/ui/api/routes_evaluate.py +137 -0
- arksim/ui/api/routes_filesystem.py +249 -0
- arksim/ui/api/routes_results.py +68 -0
- arksim/ui/api/routes_simulate.py +166 -0
- arksim/ui/api/state.py +162 -0
- arksim/ui/api/ws_logs.py +44 -0
- arksim/ui/app.py +75 -0
- arksim/ui/frontend/app.js +713 -0
- arksim/ui/frontend/index.html +800 -0
- arksim/utils/concurrency/__init__.py +6 -0
- arksim/utils/concurrency/workers.py +18 -0
- arksim/utils/html_report/__init__.py +5 -0
- arksim/utils/html_report/generate_html_report.py +530 -0
- arksim/utils/html_report/report_template.html +2437 -0
- arksim/utils/logger/__init__.py +5 -0
- arksim/utils/logger/logging.py +52 -0
- arksim/utils/output/__init__.py +13 -0
- arksim/utils/output/types.py +8 -0
- arksim/utils/output/utils.py +56 -0
- arksim-0.0.2.dist-info/METADATA +357 -0
- arksim-0.0.2.dist-info/RECORD +83 -0
- arksim-0.0.2.dist-info/WHEEL +4 -0
- arksim-0.0.2.dist-info/entry_points.txt +2 -0
- arksim-0.0.2.dist-info/licenses/LICENSE +191 -0
arksim/__init__.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Arksim: open-source agent simulation and evaluation toolkit."""
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
|
|
5
|
+
__version__ = "0.0.1"
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"__version__",
|
|
9
|
+
"AgentConfig",
|
|
10
|
+
"ChatCompletionsConfig",
|
|
11
|
+
"A2AConfig",
|
|
12
|
+
"Evaluator",
|
|
13
|
+
"EvaluationInput",
|
|
14
|
+
"EvaluationParams",
|
|
15
|
+
"QuantitativeMetric",
|
|
16
|
+
"QualitativeMetric",
|
|
17
|
+
"run_evaluation",
|
|
18
|
+
"Scenario",
|
|
19
|
+
"Scenarios",
|
|
20
|
+
"Simulator",
|
|
21
|
+
"SimulationInput",
|
|
22
|
+
"SimulationParams",
|
|
23
|
+
"run_simulation",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
|
|
27
|
+
"AgentConfig": (".config", "AgentConfig"),
|
|
28
|
+
"ChatCompletionsConfig": (".config", "ChatCompletionsConfig"),
|
|
29
|
+
"A2AConfig": (".config", "A2AConfig"),
|
|
30
|
+
"Evaluator": (".evaluator", "Evaluator"),
|
|
31
|
+
"EvaluationInput": (".evaluator", "EvaluationInput"),
|
|
32
|
+
"EvaluationParams": (".evaluator", "EvaluationParams"),
|
|
33
|
+
"QuantitativeMetric": (".evaluator", "QuantitativeMetric"),
|
|
34
|
+
"QualitativeMetric": (".evaluator", "QualitativeMetric"),
|
|
35
|
+
"run_evaluation": (".evaluator", "run_evaluation"),
|
|
36
|
+
"Scenario": (".scenario", "Scenario"),
|
|
37
|
+
"Scenarios": (".scenario", "Scenarios"),
|
|
38
|
+
"Simulator": (".simulation_engine", "Simulator"),
|
|
39
|
+
"SimulationInput": (".simulation_engine", "SimulationInput"),
|
|
40
|
+
"SimulationParams": (".simulation_engine", "SimulationParams"),
|
|
41
|
+
"run_simulation": (".simulation_engine", "run_simulation"),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def __getattr__(name: str) -> object:
|
|
46
|
+
if name in _LAZY_IMPORTS:
|
|
47
|
+
module_path, attr = _LAZY_IMPORTS[name]
|
|
48
|
+
module = importlib.import_module(module_path, __package__)
|
|
49
|
+
return getattr(module, attr)
|
|
50
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
arksim/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.0.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 0, 2)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
arksim/cli.py
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from arksim.evaluator import Evaluation, EvaluationInput, run_evaluation
|
|
13
|
+
from arksim.simulation_engine import SimulationInput, run_simulation
|
|
14
|
+
from arksim.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger("arksim")
|
|
17
|
+
|
|
18
|
+
# Suppress various logging and warnings
|
|
19
|
+
logging.getLogger("azure").setLevel(logging.WARNING)
|
|
20
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
21
|
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
22
|
+
logging.getLogger("a2a.client.card_resolver").setLevel(logging.WARNING)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _check_score_threshold(
|
|
26
|
+
evaluator_output: Evaluation,
|
|
27
|
+
score_threshold: float | None,
|
|
28
|
+
) -> bool:
|
|
29
|
+
"""Check if any conversation's overall_agent_score is below
|
|
30
|
+
the threshold.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
evaluator_output: Evaluation with conversations
|
|
34
|
+
score_threshold: Threshold value (0.0 to 1.0),
|
|
35
|
+
None to skip check
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
True if all scores pass (or threshold is None),
|
|
39
|
+
False if any score fails
|
|
40
|
+
"""
|
|
41
|
+
if score_threshold is None:
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
failed_conversations = []
|
|
45
|
+
for convo in evaluator_output.conversations:
|
|
46
|
+
if convo.overall_agent_score < score_threshold:
|
|
47
|
+
failed_conversations.append(
|
|
48
|
+
{
|
|
49
|
+
"conversation_id": convo.conversation_id,
|
|
50
|
+
"overall_agent_score": convo.overall_agent_score,
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if failed_conversations:
|
|
55
|
+
logger.error(
|
|
56
|
+
f"Score threshold check failed! "
|
|
57
|
+
f"Threshold: {score_threshold}, "
|
|
58
|
+
f"Failed conversations: {len(failed_conversations)}",
|
|
59
|
+
)
|
|
60
|
+
for fc in failed_conversations:
|
|
61
|
+
logger.error(
|
|
62
|
+
f" Conversation {fc['conversation_id']}: "
|
|
63
|
+
f"overall_agent_score={fc['overall_agent_score']:.3f}"
|
|
64
|
+
f" < {score_threshold}",
|
|
65
|
+
)
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
logger.info(
|
|
69
|
+
f"Score threshold check passed! "
|
|
70
|
+
f"All {len(evaluator_output.conversations)} conversations "
|
|
71
|
+
f"have overall_agent_score >= {score_threshold}",
|
|
72
|
+
)
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _merge_cli_overrides(yaml_settings: dict, cli_overrides: dict) -> dict:
|
|
77
|
+
"""Merge CLI overrides into YAML settings.
|
|
78
|
+
CLI values take priority.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
yaml_settings: Settings loaded from YAML file
|
|
82
|
+
cli_overrides: Settings provided via CLI options
|
|
83
|
+
(None values are ignored)
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Merged settings dictionary
|
|
87
|
+
"""
|
|
88
|
+
merged = yaml_settings.copy()
|
|
89
|
+
for key, value in cli_overrides.items():
|
|
90
|
+
if value is not None:
|
|
91
|
+
merged[key] = value
|
|
92
|
+
return merged
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ============================================================================
|
|
96
|
+
# Argparse CLI - Dynamic argument parsing
|
|
97
|
+
# ============================================================================
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def parse_extra_args(extra_args: list) -> dict:
|
|
101
|
+
"""Parse extra CLI arguments in --key value format."""
|
|
102
|
+
overrides = {}
|
|
103
|
+
i = 0
|
|
104
|
+
while i < len(extra_args):
|
|
105
|
+
arg = extra_args[i]
|
|
106
|
+
if arg.startswith("--"):
|
|
107
|
+
# Convert --key-name to key_name (match YAML format)
|
|
108
|
+
key = arg[2:].replace("-", "_")
|
|
109
|
+
|
|
110
|
+
# Check for --key=value format
|
|
111
|
+
if "=" in key:
|
|
112
|
+
key, value = key.split("=", 1)
|
|
113
|
+
overrides[key] = _parse_value(value)
|
|
114
|
+
i += 1
|
|
115
|
+
elif i + 1 < len(extra_args) and not extra_args[i + 1].startswith("-"):
|
|
116
|
+
# --key value format
|
|
117
|
+
value = extra_args[i + 1]
|
|
118
|
+
overrides[key] = _parse_value(value)
|
|
119
|
+
i += 2
|
|
120
|
+
else:
|
|
121
|
+
# Boolean flag without value (--flag means True)
|
|
122
|
+
overrides[key] = True
|
|
123
|
+
i += 1
|
|
124
|
+
else:
|
|
125
|
+
i += 1
|
|
126
|
+
return overrides
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _parse_value(value: str) -> bool | int | float | str:
|
|
130
|
+
"""Parse a string value to appropriate type."""
|
|
131
|
+
# Boolean
|
|
132
|
+
if value.lower() in ("true", "yes"):
|
|
133
|
+
return True
|
|
134
|
+
if value.lower() in ("false", "no"):
|
|
135
|
+
return False
|
|
136
|
+
# Integer
|
|
137
|
+
try:
|
|
138
|
+
return int(value)
|
|
139
|
+
except ValueError:
|
|
140
|
+
pass
|
|
141
|
+
# Float
|
|
142
|
+
try:
|
|
143
|
+
return float(value)
|
|
144
|
+
except ValueError:
|
|
145
|
+
pass
|
|
146
|
+
# String
|
|
147
|
+
return value
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def validate_overrides(overrides: dict, valid_keys: set) -> None:
|
|
151
|
+
invalid_keys = set(overrides.keys()) - valid_keys
|
|
152
|
+
if invalid_keys:
|
|
153
|
+
logger.error(f"Unknown options: {', '.join(sorted(invalid_keys))}")
|
|
154
|
+
logger.info(f"Valid options: {', '.join(sorted(valid_keys))}")
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _log_config_summary(label: str, settings: dict) -> None:
|
|
159
|
+
"""Log a compact summary of resolved configuration."""
|
|
160
|
+
logger.info(f"\n{label} configuration:")
|
|
161
|
+
for key, value in sorted(settings.items()):
|
|
162
|
+
logger.info(f" {key}: {value}")
|
|
163
|
+
logger.info("")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _run_show_prompts(category: str | None) -> None:
|
|
167
|
+
"""Print evaluation prompts, optionally filtered by category."""
|
|
168
|
+
from arksim.evaluator.prompt_registry import (
|
|
169
|
+
get_categories,
|
|
170
|
+
get_prompts_by_category,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
matches = get_prompts_by_category(category)
|
|
174
|
+
if not matches:
|
|
175
|
+
print(
|
|
176
|
+
f"Unknown category: '{category}'. Available: {', '.join(get_categories())}"
|
|
177
|
+
)
|
|
178
|
+
sys.exit(1)
|
|
179
|
+
|
|
180
|
+
for cat in matches:
|
|
181
|
+
print(f"{'=' * 60}")
|
|
182
|
+
print(f"Category: {cat.category}")
|
|
183
|
+
print(f"Description: {cat.description}")
|
|
184
|
+
print(f"{'=' * 60}")
|
|
185
|
+
for entry in cat.prompts:
|
|
186
|
+
print(f"\n--- {entry.name} ---")
|
|
187
|
+
print(entry.text.strip())
|
|
188
|
+
print()
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def build_parser(valid_commands: list[str] | None = None) -> argparse.ArgumentParser:
|
|
192
|
+
"""Build the CLI argument parser.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
valid_commands: List of valid command names.
|
|
196
|
+
Defaults to ["simulate", "evaluate",
|
|
197
|
+
"simulate-evaluate"].
|
|
198
|
+
"""
|
|
199
|
+
if valid_commands is None:
|
|
200
|
+
valid_commands = [
|
|
201
|
+
"simulate",
|
|
202
|
+
"evaluate",
|
|
203
|
+
"simulate-evaluate",
|
|
204
|
+
"show-prompts",
|
|
205
|
+
"ui",
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
commands_str = ", ".join(valid_commands)
|
|
209
|
+
|
|
210
|
+
examples = []
|
|
211
|
+
if "simulate" in valid_commands:
|
|
212
|
+
examples.append(
|
|
213
|
+
" arksim simulate config.yaml --scenario-file-path ./scenario.json # Simulate conversations"
|
|
214
|
+
)
|
|
215
|
+
if "evaluate" in valid_commands:
|
|
216
|
+
examples.append(
|
|
217
|
+
" arksim evaluate config.yaml --simulation-file-path ./results/simulation/simulation.json # Evaluate results"
|
|
218
|
+
)
|
|
219
|
+
if "simulate-evaluate" in valid_commands:
|
|
220
|
+
examples.append(
|
|
221
|
+
" arksim simulate-evaluate config.yaml # Simulate then evaluate"
|
|
222
|
+
)
|
|
223
|
+
if "show-prompts" in valid_commands:
|
|
224
|
+
examples.append(
|
|
225
|
+
" arksim show-prompts --category agent_behavior_failure # Show prompts by category"
|
|
226
|
+
)
|
|
227
|
+
if "ui" in valid_commands:
|
|
228
|
+
examples.append(
|
|
229
|
+
" arksim ui # Launch web UI control plane"
|
|
230
|
+
)
|
|
231
|
+
examples_str = "\n".join(examples)
|
|
232
|
+
|
|
233
|
+
parser = argparse.ArgumentParser(
|
|
234
|
+
prog="arksim",
|
|
235
|
+
description="Arksim CLI - Run agent simulations and evaluations",
|
|
236
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
237
|
+
epilog=f"\nCommands: {commands_str}\n\nExamples:\n{examples_str}\n",
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
parser.add_argument(
|
|
241
|
+
"command",
|
|
242
|
+
type=str,
|
|
243
|
+
nargs="?",
|
|
244
|
+
default="",
|
|
245
|
+
help=f"Command to execute ({commands_str})",
|
|
246
|
+
)
|
|
247
|
+
parser.add_argument(
|
|
248
|
+
"config_file",
|
|
249
|
+
type=str,
|
|
250
|
+
nargs="?",
|
|
251
|
+
default=None,
|
|
252
|
+
help="Path to YAML config file",
|
|
253
|
+
)
|
|
254
|
+
parser.add_argument(
|
|
255
|
+
"additional_args",
|
|
256
|
+
nargs=argparse.REMAINDER,
|
|
257
|
+
help="Additional arguments (e.g. --seed 42 --enable-topic-modeling false)",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
return parser
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def main() -> None:
|
|
264
|
+
"""Main entry point for the arksim CLI."""
|
|
265
|
+
s_time = time.time()
|
|
266
|
+
parser = build_parser()
|
|
267
|
+
args = parser.parse_args()
|
|
268
|
+
|
|
269
|
+
valid_commands = [
|
|
270
|
+
"simulate",
|
|
271
|
+
"evaluate",
|
|
272
|
+
"simulate-evaluate",
|
|
273
|
+
"show-prompts",
|
|
274
|
+
"ui",
|
|
275
|
+
]
|
|
276
|
+
|
|
277
|
+
# Check execution command and config file
|
|
278
|
+
if not args.command or args.command not in valid_commands:
|
|
279
|
+
parser.print_help()
|
|
280
|
+
sys.exit(1)
|
|
281
|
+
|
|
282
|
+
if args.command == "show-prompts":
|
|
283
|
+
overrides = parse_extra_args(args.additional_args)
|
|
284
|
+
category = overrides.get("category")
|
|
285
|
+
_run_show_prompts(category)
|
|
286
|
+
return
|
|
287
|
+
|
|
288
|
+
if args.command == "ui":
|
|
289
|
+
from arksim.ui.app import launch_ui
|
|
290
|
+
|
|
291
|
+
overrides = parse_extra_args(args.additional_args)
|
|
292
|
+
port = int(overrides.get("port", 8080))
|
|
293
|
+
launch_ui(port=port)
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
use_config_file = (
|
|
297
|
+
args.config_file
|
|
298
|
+
and os.path.exists(args.config_file)
|
|
299
|
+
and args.config_file.endswith((".yaml", ".yml"))
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
overrides = parse_extra_args(args.additional_args)
|
|
303
|
+
|
|
304
|
+
# Load settings from YAML file if valid, otherwise use empty dict
|
|
305
|
+
settings = {}
|
|
306
|
+
if use_config_file:
|
|
307
|
+
try:
|
|
308
|
+
with open(args.config_file) as f:
|
|
309
|
+
settings = yaml.safe_load(f) or {}
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.error(
|
|
312
|
+
f"Could not load config file '{args.config_file}': {e}",
|
|
313
|
+
)
|
|
314
|
+
sys.exit(1)
|
|
315
|
+
else:
|
|
316
|
+
logger.warning("No config YAML file provided.")
|
|
317
|
+
|
|
318
|
+
# override with the environment variables
|
|
319
|
+
if os.getenv("LLM_PROVIDER"):
|
|
320
|
+
settings["provider"] = os.getenv("LLM_PROVIDER")
|
|
321
|
+
|
|
322
|
+
# Resolve log level: env var takes priority over YAML key
|
|
323
|
+
log_level = os.getenv("LOG_LEVEL")
|
|
324
|
+
if log_level:
|
|
325
|
+
logging.getLogger("arksim").setLevel(log_level.upper())
|
|
326
|
+
|
|
327
|
+
# Extract verbose flag before building model inputs
|
|
328
|
+
verbose = overrides.pop("verbose", False)
|
|
329
|
+
|
|
330
|
+
if args.command == "simulate":
|
|
331
|
+
valid_keys = set(SimulationInput.model_fields.keys())
|
|
332
|
+
validate_overrides(overrides, valid_keys)
|
|
333
|
+
settings = _merge_cli_overrides(settings, overrides)
|
|
334
|
+
simulation_input = SimulationInput(**settings)
|
|
335
|
+
_log_config_summary("Simulation", simulation_input.model_dump())
|
|
336
|
+
asyncio.run(run_simulation(simulation_input, verbose=verbose))
|
|
337
|
+
elif args.command == "evaluate":
|
|
338
|
+
valid_keys = set(EvaluationInput.model_fields.keys())
|
|
339
|
+
validate_overrides(overrides, valid_keys)
|
|
340
|
+
settings = _merge_cli_overrides(settings, overrides)
|
|
341
|
+
evaluation_input = EvaluationInput(**settings)
|
|
342
|
+
_log_config_summary("Evaluation", evaluation_input.model_dump())
|
|
343
|
+
evaluator_output = run_evaluation(evaluation_input)
|
|
344
|
+
|
|
345
|
+
# Check score threshold if specified
|
|
346
|
+
if not _check_score_threshold(
|
|
347
|
+
evaluator_output, evaluation_input.score_threshold
|
|
348
|
+
):
|
|
349
|
+
sys.exit(1)
|
|
350
|
+
elif args.command == "simulate-evaluate":
|
|
351
|
+
valid_keys = set(SimulationInput.model_fields.keys()) | set(
|
|
352
|
+
EvaluationInput.model_fields.keys()
|
|
353
|
+
)
|
|
354
|
+
validate_overrides(overrides, valid_keys)
|
|
355
|
+
settings = _merge_cli_overrides(settings, overrides)
|
|
356
|
+
|
|
357
|
+
simulation_settings = {
|
|
358
|
+
k: v for k, v in settings.items() if k in SimulationInput.model_fields
|
|
359
|
+
}
|
|
360
|
+
simulation_input = SimulationInput(**simulation_settings)
|
|
361
|
+
_log_config_summary("Simulation", simulation_input.model_dump())
|
|
362
|
+
|
|
363
|
+
sim_start = time.time()
|
|
364
|
+
simulation_output = asyncio.run(
|
|
365
|
+
run_simulation(simulation_input, verbose=verbose)
|
|
366
|
+
)
|
|
367
|
+
sim_elapsed = time.time() - sim_start
|
|
368
|
+
logger.info(f"Simulation completed in {sim_elapsed:.2f} seconds")
|
|
369
|
+
|
|
370
|
+
evaluation_settings = {
|
|
371
|
+
k: v for k, v in settings.items() if k in EvaluationInput.model_fields
|
|
372
|
+
}
|
|
373
|
+
evaluation_input = EvaluationInput.model_validate(
|
|
374
|
+
evaluation_settings,
|
|
375
|
+
context={"skip_input_dir_validation": True},
|
|
376
|
+
)
|
|
377
|
+
_log_config_summary("Evaluation", evaluation_input.model_dump())
|
|
378
|
+
|
|
379
|
+
eval_start = time.time()
|
|
380
|
+
evaluator_output = run_evaluation(
|
|
381
|
+
evaluation_input, simulation=simulation_output
|
|
382
|
+
)
|
|
383
|
+
eval_elapsed = time.time() - eval_start
|
|
384
|
+
logger.info(f"Evaluation completed in {eval_elapsed:.2f} seconds")
|
|
385
|
+
|
|
386
|
+
if not _check_score_threshold(
|
|
387
|
+
evaluator_output, evaluation_input.score_threshold
|
|
388
|
+
):
|
|
389
|
+
sys.exit(1)
|
|
390
|
+
|
|
391
|
+
logger.info(f"Total elapsed: {time.time() - s_time:.2f} seconds")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
if __name__ == "__main__":
|
|
395
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
from typing import Self
|
|
8
|
+
else:
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field, model_validator
|
|
12
|
+
|
|
13
|
+
from arksim.config.types import AgentType
|
|
14
|
+
from arksim.config.utils import resolve_env_vars
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class A2AConfig(BaseModel):
|
|
18
|
+
"""API configuration for A2A (Agent-to-Agent) agent type."""
|
|
19
|
+
|
|
20
|
+
endpoint: str = Field(..., description="Endpoint URL for the A2A agent server")
|
|
21
|
+
headers: dict[str, str] | None = Field(
|
|
22
|
+
None,
|
|
23
|
+
description="HTTP headers for A2A requests. Values can use ${ENV_VAR} syntax for environment variable substitution.",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def get_headers(self) -> dict[str, str] | None:
|
|
27
|
+
"""Get headers with environment variable substitution.
|
|
28
|
+
|
|
29
|
+
Supports ${ENV_VAR} syntax in header values, which will be replaced
|
|
30
|
+
with the corresponding environment variable value.
|
|
31
|
+
"""
|
|
32
|
+
if not self.headers:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
return resolve_env_vars(self.headers)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ChatCompletionsConfig(BaseModel):
|
|
39
|
+
"""API configuration for chat completion agent type."""
|
|
40
|
+
|
|
41
|
+
# New format fields (optional for new format support)
|
|
42
|
+
endpoint: str | None = Field(
|
|
43
|
+
None, description="Chat completion endpoint URL (new format)"
|
|
44
|
+
)
|
|
45
|
+
headers: dict[str, str] | None = Field(
|
|
46
|
+
None,
|
|
47
|
+
description="HTTP headers for chat requests (new format). Values can use ${ENV_VAR} syntax for env var substitution.",
|
|
48
|
+
)
|
|
49
|
+
body: dict[str, Any] | None = Field(
|
|
50
|
+
None,
|
|
51
|
+
description="Request body template (new format, may contain messages array with {chat_id} placeholder)",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Optional fields for Azure OpenAI
|
|
55
|
+
azure_config: dict[str, Any] | None = Field(
|
|
56
|
+
None, description="Azure OpenAI configuration"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@model_validator(mode="after")
|
|
60
|
+
def validate_config_format(self) -> Self:
|
|
61
|
+
"""Validate that required fields are provided."""
|
|
62
|
+
if not self.body:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"ChatCompletions agent configuration requires 'body' field"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
def get_endpoint(self) -> str:
|
|
70
|
+
"""Get endpoint URL."""
|
|
71
|
+
base_endpoint = self.endpoint
|
|
72
|
+
# resolve environment variables in the endpoint
|
|
73
|
+
resolved = resolve_env_vars({"endpoint": base_endpoint})
|
|
74
|
+
return resolved["endpoint"]
|
|
75
|
+
|
|
76
|
+
def get_headers(self) -> dict[str, str]:
|
|
77
|
+
"""Get headers.
|
|
78
|
+
|
|
79
|
+
Supports ${ENV_VAR} syntax in header values, which will be replaced
|
|
80
|
+
with the corresponding environment variable value.
|
|
81
|
+
"""
|
|
82
|
+
base_headers = self.headers or {}
|
|
83
|
+
resolved_headers = resolve_env_vars(base_headers)
|
|
84
|
+
return resolved_headers
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class AgentConfig(BaseModel):
|
|
88
|
+
"""Agent configuration."""
|
|
89
|
+
|
|
90
|
+
agent_name: str = Field(..., description="Unique identifier for the agent")
|
|
91
|
+
agent_type: str = Field(..., description="Agent type identifier")
|
|
92
|
+
api_config: ChatCompletionsConfig | A2AConfig = Field(
|
|
93
|
+
..., description="Agent configuration"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
@model_validator(mode="before")
|
|
97
|
+
@classmethod
|
|
98
|
+
def parse_config(cls, data: object) -> object:
|
|
99
|
+
"""Parse config based on top-level agent_type."""
|
|
100
|
+
if isinstance(data, dict):
|
|
101
|
+
agent_type = data.get("agent_type")
|
|
102
|
+
config_data = data.get("api_config")
|
|
103
|
+
|
|
104
|
+
if agent_type == AgentType.CHAT_COMPLETIONS.value:
|
|
105
|
+
data["api_config"] = ChatCompletionsConfig(**config_data)
|
|
106
|
+
elif agent_type == AgentType.A2A.value:
|
|
107
|
+
data["api_config"] = A2AConfig(**config_data)
|
|
108
|
+
else:
|
|
109
|
+
raise ValueError(f"Unsupported agent type: {agent_type}")
|
|
110
|
+
else:
|
|
111
|
+
raise ValueError("Agent configuration must be a dictionary")
|
|
112
|
+
|
|
113
|
+
return data
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def load(cls, path: str | Path) -> "AgentConfig":
|
|
117
|
+
"""Load agent configuration from a JSON file."""
|
|
118
|
+
path = Path(path)
|
|
119
|
+
if not path.exists():
|
|
120
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
with open(path) as f:
|
|
124
|
+
data = json.load(f)
|
|
125
|
+
except json.JSONDecodeError as e:
|
|
126
|
+
raise ValueError(f"Invalid JSON in {path}: {e}") from e
|
|
127
|
+
|
|
128
|
+
return cls.model_validate(data)
|
arksim/config/types.py
ADDED
arksim/config/utils.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def resolve_env_vars(headers: dict[str, str]) -> dict[str, str]:
|
|
9
|
+
"""Resolve ${ENV_VAR} patterns in header values with actual env values."""
|
|
10
|
+
env_var_pattern = re.compile(r"\$\{([^}]+)\}")
|
|
11
|
+
resolved_headers = {}
|
|
12
|
+
|
|
13
|
+
for key, value in headers.items():
|
|
14
|
+
|
|
15
|
+
def replace_env_var(match: re.Match[str]) -> str:
|
|
16
|
+
env_var_name = match.group(1)
|
|
17
|
+
if not os.getenv(env_var_name, ""):
|
|
18
|
+
logger.warning(
|
|
19
|
+
f"Warning: Environment variable {env_var_name} configured in the agent configuration file is not set."
|
|
20
|
+
)
|
|
21
|
+
# For Azure Agent, the access token is generated at the runtime.
|
|
22
|
+
if env_var_name == "AZURE_ACCESS_TOKEN" and not os.getenv(env_var_name, ""):
|
|
23
|
+
from arksim.llms.utils import (
|
|
24
|
+
check_azure_env_vars,
|
|
25
|
+
get_azure_token_provider,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
check_azure_env_vars()
|
|
29
|
+
azure_token = get_azure_token_provider(
|
|
30
|
+
client_id=os.getenv("AZURE_CLIENT_ID")
|
|
31
|
+
)
|
|
32
|
+
return azure_token()
|
|
33
|
+
return os.getenv(env_var_name, "")
|
|
34
|
+
|
|
35
|
+
resolved_value = env_var_pattern.sub(replace_env_var, value)
|
|
36
|
+
resolved_headers[key] = resolved_value
|
|
37
|
+
|
|
38
|
+
return resolved_headers
|