agenta 0.70.1__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenta/__init__.py +9 -3
- agenta/sdk/__init__.py +2 -4
- agenta/sdk/agenta_init.py +22 -75
- agenta/sdk/assets.py +57 -0
- agenta/sdk/context/serving.py +2 -0
- agenta/sdk/contexts/routing.py +2 -0
- agenta/sdk/contexts/running.py +3 -2
- agenta/sdk/decorators/running.py +8 -4
- agenta/sdk/decorators/serving.py +65 -26
- agenta/sdk/decorators/tracing.py +51 -30
- agenta/sdk/engines/tracing/inline.py +8 -1
- agenta/sdk/engines/tracing/processors.py +23 -12
- agenta/sdk/evaluations/preview/evaluate.py +36 -8
- agenta/sdk/evaluations/runs.py +2 -1
- agenta/sdk/litellm/mockllm.py +2 -2
- agenta/sdk/managers/config.py +3 -1
- agenta/sdk/managers/secrets.py +25 -8
- agenta/sdk/managers/testsets.py +143 -227
- agenta/sdk/middleware/config.py +3 -1
- agenta/sdk/middleware/otel.py +3 -1
- agenta/sdk/middleware/vault.py +33 -18
- agenta/sdk/middlewares/routing/otel.py +1 -1
- agenta/sdk/middlewares/running/vault.py +33 -17
- agenta/sdk/router.py +30 -5
- agenta/sdk/tracing/inline.py +8 -1
- agenta/sdk/tracing/processors.py +8 -3
- agenta/sdk/tracing/propagation.py +9 -12
- agenta/sdk/types.py +19 -21
- agenta/sdk/utils/client.py +10 -9
- agenta/sdk/utils/lazy.py +253 -0
- agenta/sdk/workflows/builtin.py +2 -0
- agenta/sdk/workflows/configurations.py +1 -0
- agenta/sdk/workflows/handlers.py +236 -81
- agenta/sdk/workflows/interfaces.py +47 -0
- agenta/sdk/workflows/runners/base.py +6 -2
- agenta/sdk/workflows/runners/daytona.py +250 -131
- agenta/sdk/workflows/runners/local.py +22 -56
- agenta/sdk/workflows/runners/registry.py +1 -1
- agenta/sdk/workflows/sandbox.py +17 -5
- agenta/sdk/workflows/templates.py +81 -0
- agenta/sdk/workflows/utils.py +6 -0
- {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/METADATA +4 -8
- {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/RECORD +44 -44
- agenta/config.py +0 -25
- agenta/config.toml +0 -4
- {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/WHEEL +0 -0
agenta/sdk/workflows/handlers.py
CHANGED
|
@@ -1,63 +1,69 @@
|
|
|
1
|
-
from typing import List, Any, Optional, Any, Dict, Union
|
|
2
|
-
from json import dumps, loads
|
|
3
|
-
import traceback
|
|
4
1
|
import json
|
|
5
|
-
import re
|
|
6
2
|
import math
|
|
3
|
+
import re
|
|
4
|
+
import traceback
|
|
5
|
+
from difflib import SequenceMatcher
|
|
6
|
+
from json import dumps, loads
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
|
7
8
|
|
|
8
9
|
import httpx
|
|
9
10
|
|
|
10
|
-
import litellm
|
|
11
|
-
|
|
12
11
|
from pydantic import BaseModel, Field
|
|
13
|
-
from openai import AsyncOpenAI, OpenAIError
|
|
14
|
-
from difflib import SequenceMatcher
|
|
15
12
|
|
|
16
13
|
from agenta.sdk.utils.logging import get_module_logger
|
|
14
|
+
from agenta.sdk.utils.lazy import (
|
|
15
|
+
_load_jinja2,
|
|
16
|
+
_load_jsonpath,
|
|
17
|
+
_load_litellm,
|
|
18
|
+
_load_openai,
|
|
19
|
+
)
|
|
17
20
|
|
|
18
21
|
from agenta.sdk.litellm import mockllm
|
|
19
22
|
from agenta.sdk.types import PromptTemplate, Message
|
|
20
23
|
from agenta.sdk.managers.secrets import SecretsManager
|
|
21
|
-
|
|
22
24
|
from agenta.sdk.decorators.tracing import instrument
|
|
23
|
-
|
|
25
|
+
from agenta.sdk.litellm.litellm import litellm_handler
|
|
24
26
|
from agenta.sdk.models.shared import Data
|
|
25
|
-
from agenta.sdk.models.tracing import Trace
|
|
26
27
|
from agenta.sdk.workflows.sandbox import execute_code_safely
|
|
28
|
+
from agenta.sdk.workflows.templates import EVALUATOR_TEMPLATES
|
|
27
29
|
from agenta.sdk.workflows.errors import (
|
|
30
|
+
CustomCodeServerV0Error,
|
|
28
31
|
InvalidConfigurationParametersV0Error,
|
|
29
|
-
MissingConfigurationParameterV0Error,
|
|
30
32
|
InvalidConfigurationParameterV0Error,
|
|
31
33
|
InvalidInputsV0Error,
|
|
32
|
-
MissingInputV0Error,
|
|
33
34
|
InvalidInputV0Error,
|
|
34
35
|
InvalidOutputsV0Error,
|
|
35
|
-
MissingOutputV0Error,
|
|
36
36
|
InvalidSecretsV0Error,
|
|
37
37
|
JSONDiffV0Error,
|
|
38
38
|
LevenshteinDistanceV0Error,
|
|
39
|
-
|
|
39
|
+
MissingConfigurationParameterV0Error,
|
|
40
|
+
MissingInputV0Error,
|
|
41
|
+
PromptCompletionV0Error,
|
|
42
|
+
PromptFormattingV0Error,
|
|
43
|
+
RegexPatternV0Error,
|
|
40
44
|
SemanticSimilarityV0Error,
|
|
41
|
-
|
|
45
|
+
SyntacticSimilarityV0Error,
|
|
42
46
|
WebhookClientV0Error,
|
|
43
|
-
|
|
44
|
-
RegexPatternV0Error,
|
|
45
|
-
PromptFormattingV0Error,
|
|
46
|
-
PromptCompletionV0Error,
|
|
47
|
+
WebhookServerV0Error,
|
|
47
48
|
)
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
from agenta.sdk.litellm.litellm import litellm_handler
|
|
50
|
+
log = get_module_logger(__name__)
|
|
51
51
|
|
|
52
|
-
litellm.logging = False
|
|
53
|
-
litellm.set_verbose = False
|
|
54
|
-
litellm.drop_params = True
|
|
55
|
-
# litellm.turn_off_message_logging = True
|
|
56
|
-
mockllm.litellm = litellm
|
|
57
52
|
|
|
58
|
-
|
|
53
|
+
def _configure_litellm():
|
|
54
|
+
"""Lazy configuration of litellm - only imported when needed."""
|
|
55
|
+
litellm = _load_litellm()
|
|
56
|
+
if not litellm:
|
|
57
|
+
raise ImportError("litellm is required for completion handling.")
|
|
59
58
|
|
|
60
|
-
|
|
59
|
+
litellm.logging = False
|
|
60
|
+
litellm.set_verbose = False
|
|
61
|
+
litellm.drop_params = True
|
|
62
|
+
# litellm.turn_off_message_logging = True
|
|
63
|
+
mockllm.litellm = litellm
|
|
64
|
+
litellm.callbacks = [litellm_handler()]
|
|
65
|
+
|
|
66
|
+
return litellm
|
|
61
67
|
|
|
62
68
|
|
|
63
69
|
async def _compute_embedding(openai: Any, model: str, input: str) -> List[float]:
|
|
@@ -76,16 +82,8 @@ def _compute_similarity(embedding_1: List[float], embedding_2: List[float]) -> f
|
|
|
76
82
|
return dot / (norm1 * norm2)
|
|
77
83
|
|
|
78
84
|
|
|
79
|
-
import
|
|
80
|
-
import re
|
|
81
|
-
from typing import Any, Dict, Iterable, Tuple, Optional
|
|
85
|
+
from typing import Any, Iterable, Tuple
|
|
82
86
|
|
|
83
|
-
try:
|
|
84
|
-
import jsonpath # ✅ use module API
|
|
85
|
-
from jsonpath import JSONPointer # pointer class is fine to use
|
|
86
|
-
except Exception:
|
|
87
|
-
jsonpath = None
|
|
88
|
-
JSONPointer = None
|
|
89
87
|
|
|
90
88
|
# ========= Scheme detection =========
|
|
91
89
|
|
|
@@ -128,7 +126,8 @@ def resolve_dot_notation(expr: str, data: dict) -> object:
|
|
|
128
126
|
|
|
129
127
|
|
|
130
128
|
def resolve_json_path(expr: str, data: dict) -> object:
|
|
131
|
-
|
|
129
|
+
json_path, _ = _load_jsonpath()
|
|
130
|
+
if json_path is None:
|
|
132
131
|
raise ImportError("python-jsonpath is required for json-path ($...)")
|
|
133
132
|
|
|
134
133
|
if not (expr == "$" or expr.startswith("$.") or expr.startswith("$[")):
|
|
@@ -138,15 +137,16 @@ def resolve_json_path(expr: str, data: dict) -> object:
|
|
|
138
137
|
)
|
|
139
138
|
|
|
140
139
|
# Use package-level APIf
|
|
141
|
-
results =
|
|
140
|
+
results = json_path.findall(expr, data) # always returns a list
|
|
142
141
|
return results[0] if len(results) == 1 else results
|
|
143
142
|
|
|
144
143
|
|
|
145
144
|
def resolve_json_pointer(expr: str, data: Dict[str, Any]) -> Any:
|
|
146
145
|
"""Resolve a JSON Pointer; returns a single value."""
|
|
147
|
-
|
|
146
|
+
_, json_pointer = _load_jsonpath()
|
|
147
|
+
if json_pointer is None:
|
|
148
148
|
raise ImportError("python-jsonpath is required for json-pointer (/...)")
|
|
149
|
-
return
|
|
149
|
+
return json_pointer(expr).resolve(data)
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
def resolve_any(expr: str, data: Dict[str, Any]) -> Any:
|
|
@@ -214,12 +214,10 @@ def compute_truly_unreplaced(original: set, rendered: str) -> set:
|
|
|
214
214
|
|
|
215
215
|
def missing_lib_hints(unreplaced: set) -> Optional[str]:
|
|
216
216
|
"""Suggest installing python-jsonpath if placeholders indicate json-path or json-pointer usage."""
|
|
217
|
-
if any(expr.startswith("$") or expr.startswith("/") for expr in unreplaced)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
"Install python-jsonpath to enable json-path ($...) and json-pointer (/...)"
|
|
222
|
-
)
|
|
217
|
+
if any(expr.startswith("$") or expr.startswith("/") for expr in unreplaced):
|
|
218
|
+
json_path, json_pointer = _load_jsonpath()
|
|
219
|
+
if json_path is None or json_pointer is None:
|
|
220
|
+
return "Install python-jsonpath to enable json-path ($...) and json-pointer (/...)"
|
|
223
221
|
return None
|
|
224
222
|
|
|
225
223
|
|
|
@@ -233,7 +231,7 @@ def _format_with_template(
|
|
|
233
231
|
return content.format(**kwargs)
|
|
234
232
|
|
|
235
233
|
elif format == "jinja2":
|
|
236
|
-
|
|
234
|
+
Template, TemplateError = _load_jinja2()
|
|
237
235
|
|
|
238
236
|
try:
|
|
239
237
|
return Template(content).render(**kwargs)
|
|
@@ -389,7 +387,7 @@ def auto_exact_match_v0(
|
|
|
389
387
|
if parameters is None or not isinstance(parameters, dict):
|
|
390
388
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
391
389
|
|
|
392
|
-
if
|
|
390
|
+
if "correct_answer_key" not in parameters:
|
|
393
391
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
394
392
|
|
|
395
393
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -397,7 +395,7 @@ def auto_exact_match_v0(
|
|
|
397
395
|
if inputs is None or not isinstance(inputs, dict):
|
|
398
396
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
399
397
|
|
|
400
|
-
if not
|
|
398
|
+
if correct_answer_key not in inputs:
|
|
401
399
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
402
400
|
|
|
403
401
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -434,7 +432,7 @@ def auto_regex_test_v0(
|
|
|
434
432
|
if parameters is None or not isinstance(parameters, dict):
|
|
435
433
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
436
434
|
|
|
437
|
-
if
|
|
435
|
+
if "regex_pattern" not in parameters:
|
|
438
436
|
raise MissingConfigurationParameterV0Error(path="regex_pattern")
|
|
439
437
|
|
|
440
438
|
regex_pattern = parameters["regex_pattern"]
|
|
@@ -492,12 +490,12 @@ def field_match_test_v0(
|
|
|
492
490
|
if parameters is None or not isinstance(parameters, dict):
|
|
493
491
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
494
492
|
|
|
495
|
-
if
|
|
493
|
+
if "json_field" not in parameters:
|
|
496
494
|
raise MissingConfigurationParameterV0Error(path="json_field")
|
|
497
495
|
|
|
498
496
|
json_field = str(parameters["json_field"])
|
|
499
497
|
|
|
500
|
-
if
|
|
498
|
+
if "correct_answer_key" not in parameters:
|
|
501
499
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
502
500
|
|
|
503
501
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -505,7 +503,7 @@ def field_match_test_v0(
|
|
|
505
503
|
if inputs is None or not isinstance(inputs, dict):
|
|
506
504
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
507
505
|
|
|
508
|
-
if not
|
|
506
|
+
if correct_answer_key not in inputs:
|
|
509
507
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
510
508
|
|
|
511
509
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -518,7 +516,7 @@ def field_match_test_v0(
|
|
|
518
516
|
if isinstance(outputs, str):
|
|
519
517
|
try:
|
|
520
518
|
outputs_dict = loads(outputs)
|
|
521
|
-
except json.JSONDecodeError
|
|
519
|
+
except json.JSONDecodeError:
|
|
522
520
|
# raise InvalidOutputsV0Error(expected="dict", got=outputs) from e
|
|
523
521
|
return {"success": False}
|
|
524
522
|
|
|
@@ -526,7 +524,7 @@ def field_match_test_v0(
|
|
|
526
524
|
# raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
|
|
527
525
|
return {"success": False}
|
|
528
526
|
|
|
529
|
-
if not
|
|
527
|
+
if json_field not in outputs_dict:
|
|
530
528
|
# raise MissingOutputV0Error(path=json_field)
|
|
531
529
|
return {"success": False}
|
|
532
530
|
|
|
@@ -537,6 +535,148 @@ def field_match_test_v0(
|
|
|
537
535
|
return {"success": success}
|
|
538
536
|
|
|
539
537
|
|
|
538
|
+
def _get_nested_value(obj: Any, path: str) -> Any:
|
|
539
|
+
"""
|
|
540
|
+
Get value from nested object using resolve_any() with graceful None on failure.
|
|
541
|
+
|
|
542
|
+
Supports multiple path formats:
|
|
543
|
+
- Dot notation: "user.address.city", "items.0.name"
|
|
544
|
+
- JSON Path: "$.user.address.city", "$.items[0].name"
|
|
545
|
+
- JSON Pointer: "/user/address/city", "/items/0/name"
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
obj: The object to traverse (dict or list)
|
|
549
|
+
path: Path expression in any supported format
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
The value at the path, or None if path doesn't exist or resolution fails
|
|
553
|
+
"""
|
|
554
|
+
if obj is None:
|
|
555
|
+
return None
|
|
556
|
+
|
|
557
|
+
try:
|
|
558
|
+
return resolve_any(path, obj)
|
|
559
|
+
except (KeyError, IndexError, ValueError, TypeError, ImportError):
|
|
560
|
+
return None
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
@instrument(annotate=True)
|
|
564
|
+
def json_multi_field_match_v0(
|
|
565
|
+
parameters: Optional[Data] = None,
|
|
566
|
+
inputs: Optional[Data] = None,
|
|
567
|
+
outputs: Optional[Union[Data, str]] = None,
|
|
568
|
+
) -> Any:
|
|
569
|
+
"""
|
|
570
|
+
Multi-field JSON match evaluator for comparing multiple fields between expected and actual JSON.
|
|
571
|
+
|
|
572
|
+
Each configured field becomes a separate score (0 or 1), and an aggregate_score shows
|
|
573
|
+
the percentage of matching fields. Useful for entity extraction validation.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
inputs: Testcase data with ground truth JSON
|
|
577
|
+
outputs: Output from the workflow execution (expected to be JSON string or dict)
|
|
578
|
+
parameters: Configuration with:
|
|
579
|
+
- fields: List of field paths to compare (e.g., ["name", "user.address.city"])
|
|
580
|
+
- correct_answer_key: Key in inputs containing the expected JSON
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Dict with per-field scores and aggregate_score, e.g.:
|
|
584
|
+
{"name": 1.0, "email": 0.0, "aggregate_score": 0.5}
|
|
585
|
+
"""
|
|
586
|
+
if parameters is None or not isinstance(parameters, dict):
|
|
587
|
+
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
588
|
+
|
|
589
|
+
if "fields" not in parameters:
|
|
590
|
+
raise MissingConfigurationParameterV0Error(path="fields")
|
|
591
|
+
|
|
592
|
+
fields = parameters["fields"]
|
|
593
|
+
|
|
594
|
+
if not isinstance(fields, list) or len(fields) == 0:
|
|
595
|
+
raise InvalidConfigurationParameterV0Error(
|
|
596
|
+
path="fields",
|
|
597
|
+
expected="non-empty list",
|
|
598
|
+
got=fields,
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
if "correct_answer_key" not in parameters:
|
|
602
|
+
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
603
|
+
|
|
604
|
+
correct_answer_key = str(parameters["correct_answer_key"])
|
|
605
|
+
|
|
606
|
+
if inputs is None or not isinstance(inputs, dict):
|
|
607
|
+
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
608
|
+
|
|
609
|
+
if correct_answer_key not in inputs:
|
|
610
|
+
raise MissingInputV0Error(path=correct_answer_key)
|
|
611
|
+
|
|
612
|
+
correct_answer = inputs[correct_answer_key]
|
|
613
|
+
|
|
614
|
+
# Parse ground truth JSON
|
|
615
|
+
if isinstance(correct_answer, str):
|
|
616
|
+
try:
|
|
617
|
+
expected = json.loads(correct_answer)
|
|
618
|
+
except json.JSONDecodeError:
|
|
619
|
+
raise InvalidInputV0Error(
|
|
620
|
+
path=correct_answer_key,
|
|
621
|
+
expected="valid JSON string",
|
|
622
|
+
got=correct_answer,
|
|
623
|
+
)
|
|
624
|
+
elif isinstance(correct_answer, dict):
|
|
625
|
+
expected = correct_answer
|
|
626
|
+
else:
|
|
627
|
+
raise InvalidInputV0Error(
|
|
628
|
+
path=correct_answer_key,
|
|
629
|
+
expected=["dict", "str"],
|
|
630
|
+
got=correct_answer,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Parse output JSON
|
|
634
|
+
if not isinstance(outputs, str) and not isinstance(outputs, dict):
|
|
635
|
+
# Return all zeros if output is invalid
|
|
636
|
+
results: Dict[str, Any] = {field: 0.0 for field in fields}
|
|
637
|
+
results["aggregate_score"] = 0.0
|
|
638
|
+
return results
|
|
639
|
+
|
|
640
|
+
if isinstance(outputs, str):
|
|
641
|
+
try:
|
|
642
|
+
actual = json.loads(outputs)
|
|
643
|
+
except json.JSONDecodeError:
|
|
644
|
+
# Return all zeros if output is not valid JSON
|
|
645
|
+
results = {field: 0.0 for field in fields}
|
|
646
|
+
results["aggregate_score"] = 0.0
|
|
647
|
+
return results
|
|
648
|
+
else:
|
|
649
|
+
actual = outputs
|
|
650
|
+
|
|
651
|
+
if not isinstance(actual, dict):
|
|
652
|
+
# Return all zeros if parsed output is not a dict
|
|
653
|
+
results = {field: 0.0 for field in fields}
|
|
654
|
+
results["aggregate_score"] = 0.0
|
|
655
|
+
return results
|
|
656
|
+
|
|
657
|
+
# --------------------------------------------------------------------------
|
|
658
|
+
# Compare each configured field
|
|
659
|
+
results = {}
|
|
660
|
+
matches = 0
|
|
661
|
+
|
|
662
|
+
for field_path in fields:
|
|
663
|
+
expected_val = _get_nested_value(expected, field_path)
|
|
664
|
+
actual_val = _get_nested_value(actual, field_path)
|
|
665
|
+
|
|
666
|
+
# Exact match comparison
|
|
667
|
+
match = expected_val == actual_val
|
|
668
|
+
|
|
669
|
+
results[field_path] = 1.0 if match else 0.0
|
|
670
|
+
if match:
|
|
671
|
+
matches += 1
|
|
672
|
+
|
|
673
|
+
# Aggregate score is the percentage of matching fields
|
|
674
|
+
results["aggregate_score"] = matches / len(fields) if fields else 0.0
|
|
675
|
+
# --------------------------------------------------------------------------
|
|
676
|
+
|
|
677
|
+
return results
|
|
678
|
+
|
|
679
|
+
|
|
540
680
|
@instrument(annotate=True)
|
|
541
681
|
async def auto_webhook_test_v0(
|
|
542
682
|
parameters: Optional[Data] = None,
|
|
@@ -557,12 +697,12 @@ async def auto_webhook_test_v0(
|
|
|
557
697
|
if parameters is None or not isinstance(parameters, dict):
|
|
558
698
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
559
699
|
|
|
560
|
-
if
|
|
700
|
+
if "webhook_url" not in parameters:
|
|
561
701
|
raise MissingConfigurationParameterV0Error(path="webhook_url")
|
|
562
702
|
|
|
563
703
|
webhook_url = str(parameters["webhook_url"])
|
|
564
704
|
|
|
565
|
-
if
|
|
705
|
+
if "correct_answer_key" not in parameters:
|
|
566
706
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
567
707
|
|
|
568
708
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -570,7 +710,7 @@ async def auto_webhook_test_v0(
|
|
|
570
710
|
if inputs is None or not isinstance(inputs, dict):
|
|
571
711
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
572
712
|
|
|
573
|
-
if not
|
|
713
|
+
if correct_answer_key not in inputs:
|
|
574
714
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
575
715
|
|
|
576
716
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -662,12 +802,12 @@ async def auto_custom_code_run_v0(
|
|
|
662
802
|
if parameters is None or not isinstance(parameters, dict):
|
|
663
803
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
664
804
|
|
|
665
|
-
if
|
|
805
|
+
if "code" not in parameters:
|
|
666
806
|
raise MissingConfigurationParameterV0Error(path="code")
|
|
667
807
|
|
|
668
808
|
code = str(parameters["code"])
|
|
669
809
|
|
|
670
|
-
if
|
|
810
|
+
if "correct_answer_key" not in parameters:
|
|
671
811
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
672
812
|
|
|
673
813
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -675,7 +815,7 @@ async def auto_custom_code_run_v0(
|
|
|
675
815
|
if inputs is None or not isinstance(inputs, dict):
|
|
676
816
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
677
817
|
|
|
678
|
-
if not
|
|
818
|
+
if correct_answer_key not in inputs:
|
|
679
819
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
680
820
|
|
|
681
821
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -701,6 +841,15 @@ async def auto_custom_code_run_v0(
|
|
|
701
841
|
|
|
702
842
|
_outputs = None
|
|
703
843
|
|
|
844
|
+
runtime = parameters.get("runtime") or "python"
|
|
845
|
+
|
|
846
|
+
if runtime not in ["python", "javascript", "typescript"]:
|
|
847
|
+
raise InvalidConfigurationParameterV0Error(
|
|
848
|
+
path="runtime",
|
|
849
|
+
expected="['python', 'javascript', 'typescript']",
|
|
850
|
+
got=runtime,
|
|
851
|
+
)
|
|
852
|
+
|
|
704
853
|
# --------------------------------------------------------------------------
|
|
705
854
|
try:
|
|
706
855
|
_outputs = execute_code_safely(
|
|
@@ -709,6 +858,8 @@ async def auto_custom_code_run_v0(
|
|
|
709
858
|
output=outputs,
|
|
710
859
|
correct_answer=correct_answer,
|
|
711
860
|
code=code,
|
|
861
|
+
runtime=runtime,
|
|
862
|
+
templates=EVALUATOR_TEMPLATES.get("v0", {}),
|
|
712
863
|
)
|
|
713
864
|
except Exception as e:
|
|
714
865
|
raise CustomCodeServerV0Error(
|
|
@@ -753,7 +904,7 @@ async def auto_ai_critique_v0(
|
|
|
753
904
|
|
|
754
905
|
correct_answer_key = parameters.get("correct_answer_key")
|
|
755
906
|
|
|
756
|
-
if
|
|
907
|
+
if "prompt_template" not in parameters:
|
|
757
908
|
raise MissingConfigurationParameterV0Error(path="prompt_template")
|
|
758
909
|
|
|
759
910
|
prompt_template = parameters.get("prompt_template")
|
|
@@ -784,7 +935,7 @@ async def auto_ai_critique_v0(
|
|
|
784
935
|
"json_schema" if template_version == "4" else "text"
|
|
785
936
|
)
|
|
786
937
|
|
|
787
|
-
if not
|
|
938
|
+
if response_type not in ["text", "json_object", "json_schema"]:
|
|
788
939
|
raise InvalidConfigurationParameterV0Error(
|
|
789
940
|
path="response_type",
|
|
790
941
|
expected=["text", "json_object", "json_schema"],
|
|
@@ -817,7 +968,7 @@ async def auto_ai_critique_v0(
|
|
|
817
968
|
if correct_answer_key in inputs:
|
|
818
969
|
correct_answer = inputs[correct_answer_key]
|
|
819
970
|
|
|
820
|
-
secrets = await SecretsManager.retrieve_secrets()
|
|
971
|
+
secrets, _, _ = await SecretsManager.retrieve_secrets()
|
|
821
972
|
|
|
822
973
|
if secrets is None or not isinstance(secrets, list):
|
|
823
974
|
raise InvalidSecretsV0Error(expected="list", got=secrets)
|
|
@@ -862,6 +1013,9 @@ async def auto_ai_critique_v0(
|
|
|
862
1013
|
|
|
863
1014
|
_outputs = None
|
|
864
1015
|
|
|
1016
|
+
# Lazy import and configure litellm
|
|
1017
|
+
litellm = _configure_litellm()
|
|
1018
|
+
|
|
865
1019
|
# --------------------------------------------------------------------------
|
|
866
1020
|
litellm.openai_key = openai_api_key
|
|
867
1021
|
litellm.anthropic_key = anthropic_api_key
|
|
@@ -986,7 +1140,7 @@ def auto_starts_with_v0(
|
|
|
986
1140
|
if parameters is None or not isinstance(parameters, dict):
|
|
987
1141
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
988
1142
|
|
|
989
|
-
if
|
|
1143
|
+
if "prefix" not in parameters:
|
|
990
1144
|
raise MissingConfigurationParameterV0Error(path="prefix")
|
|
991
1145
|
|
|
992
1146
|
prefix = parameters["prefix"]
|
|
@@ -1035,7 +1189,7 @@ def auto_ends_with_v0(
|
|
|
1035
1189
|
if parameters is None or not isinstance(parameters, dict):
|
|
1036
1190
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1037
1191
|
|
|
1038
|
-
if
|
|
1192
|
+
if "suffix" not in parameters:
|
|
1039
1193
|
raise MissingConfigurationParameterV0Error(path="suffix")
|
|
1040
1194
|
|
|
1041
1195
|
suffix = parameters["suffix"]
|
|
@@ -1084,7 +1238,7 @@ def auto_contains_v0(
|
|
|
1084
1238
|
if parameters is None or not isinstance(parameters, dict):
|
|
1085
1239
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1086
1240
|
|
|
1087
|
-
if
|
|
1241
|
+
if "substring" not in parameters:
|
|
1088
1242
|
raise MissingConfigurationParameterV0Error(path="substring")
|
|
1089
1243
|
|
|
1090
1244
|
substring = parameters["substring"]
|
|
@@ -1133,7 +1287,7 @@ def auto_contains_any_v0(
|
|
|
1133
1287
|
if parameters is None or not isinstance(parameters, dict):
|
|
1134
1288
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1135
1289
|
|
|
1136
|
-
if
|
|
1290
|
+
if "substrings" not in parameters:
|
|
1137
1291
|
raise MissingConfigurationParameterV0Error(path="substrings")
|
|
1138
1292
|
|
|
1139
1293
|
substrings = parameters["substrings"]
|
|
@@ -1191,7 +1345,7 @@ def auto_contains_all_v0(
|
|
|
1191
1345
|
if parameters is None or not isinstance(parameters, dict):
|
|
1192
1346
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1193
1347
|
|
|
1194
|
-
if
|
|
1348
|
+
if "substrings" not in parameters:
|
|
1195
1349
|
raise MissingConfigurationParameterV0Error(path="substrings")
|
|
1196
1350
|
|
|
1197
1351
|
substrings = parameters["substrings"]
|
|
@@ -1291,7 +1445,7 @@ def auto_json_diff_v0(
|
|
|
1291
1445
|
if parameters is None or not isinstance(parameters, dict):
|
|
1292
1446
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1293
1447
|
|
|
1294
|
-
if
|
|
1448
|
+
if "correct_answer_key" not in parameters:
|
|
1295
1449
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
1296
1450
|
|
|
1297
1451
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -1299,7 +1453,7 @@ def auto_json_diff_v0(
|
|
|
1299
1453
|
if inputs is None or not isinstance(inputs, dict):
|
|
1300
1454
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
1301
1455
|
|
|
1302
|
-
if not
|
|
1456
|
+
if correct_answer_key not in inputs:
|
|
1303
1457
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
1304
1458
|
|
|
1305
1459
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -1383,7 +1537,7 @@ def auto_levenshtein_distance_v0(
|
|
|
1383
1537
|
if parameters is None or not isinstance(parameters, dict):
|
|
1384
1538
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1385
1539
|
|
|
1386
|
-
if
|
|
1540
|
+
if "correct_answer_key" not in parameters:
|
|
1387
1541
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
1388
1542
|
|
|
1389
1543
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -1393,7 +1547,7 @@ def auto_levenshtein_distance_v0(
|
|
|
1393
1547
|
if inputs is None or not isinstance(inputs, dict):
|
|
1394
1548
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
1395
1549
|
|
|
1396
|
-
if not
|
|
1550
|
+
if correct_answer_key not in inputs:
|
|
1397
1551
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
1398
1552
|
|
|
1399
1553
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -1488,7 +1642,7 @@ def auto_similarity_match_v0(
|
|
|
1488
1642
|
if parameters is None or not isinstance(parameters, dict):
|
|
1489
1643
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1490
1644
|
|
|
1491
|
-
if
|
|
1645
|
+
if "correct_answer_key" not in parameters:
|
|
1492
1646
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
1493
1647
|
|
|
1494
1648
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -1498,7 +1652,7 @@ def auto_similarity_match_v0(
|
|
|
1498
1652
|
if inputs is None or not isinstance(inputs, dict):
|
|
1499
1653
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
1500
1654
|
|
|
1501
|
-
if not
|
|
1655
|
+
if correct_answer_key not in inputs:
|
|
1502
1656
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
1503
1657
|
|
|
1504
1658
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -1581,7 +1735,7 @@ async def auto_semantic_similarity_v0(
|
|
|
1581
1735
|
if parameters is None or not isinstance(parameters, dict):
|
|
1582
1736
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1583
1737
|
|
|
1584
|
-
if
|
|
1738
|
+
if "correct_answer_key" not in parameters:
|
|
1585
1739
|
raise MissingConfigurationParameterV0Error(path="correct_answer_key")
|
|
1586
1740
|
|
|
1587
1741
|
correct_answer_key = str(parameters["correct_answer_key"])
|
|
@@ -1594,7 +1748,7 @@ async def auto_semantic_similarity_v0(
|
|
|
1594
1748
|
if inputs is None or not isinstance(inputs, dict):
|
|
1595
1749
|
raise InvalidInputsV0Error(expected="dict", got=inputs)
|
|
1596
1750
|
|
|
1597
|
-
if not
|
|
1751
|
+
if correct_answer_key not in inputs:
|
|
1598
1752
|
raise MissingInputV0Error(path=correct_answer_key)
|
|
1599
1753
|
|
|
1600
1754
|
correct_answer = inputs[correct_answer_key]
|
|
@@ -1613,7 +1767,7 @@ async def auto_semantic_similarity_v0(
|
|
|
1613
1767
|
|
|
1614
1768
|
outputs_str = outputs if isinstance(outputs, str) else dumps(outputs)
|
|
1615
1769
|
|
|
1616
|
-
secrets = await SecretsManager.retrieve_secrets()
|
|
1770
|
+
secrets, _, _ = await SecretsManager.retrieve_secrets()
|
|
1617
1771
|
|
|
1618
1772
|
if secrets is None or not isinstance(secrets, list):
|
|
1619
1773
|
raise InvalidSecretsV0Error(expected="list", got=secrets)
|
|
@@ -1646,6 +1800,7 @@ async def auto_semantic_similarity_v0(
|
|
|
1646
1800
|
_outputs = None
|
|
1647
1801
|
|
|
1648
1802
|
# --------------------------------------------------------------------------
|
|
1803
|
+
AsyncOpenAI, OpenAIError = _load_openai()
|
|
1649
1804
|
try:
|
|
1650
1805
|
openai = AsyncOpenAI(api_key=openai_api_key)
|
|
1651
1806
|
except OpenAIError as e:
|
|
@@ -1696,7 +1851,7 @@ async def completion_v0(
|
|
|
1696
1851
|
if parameters is None or not isinstance(parameters, dict):
|
|
1697
1852
|
raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
|
|
1698
1853
|
|
|
1699
|
-
if
|
|
1854
|
+
if "prompt" not in parameters:
|
|
1700
1855
|
raise MissingConfigurationParameterV0Error(path="prompt")
|
|
1701
1856
|
|
|
1702
1857
|
params: Dict[str, Any] = {**(parameters or {})}
|
|
@@ -169,6 +169,53 @@ field_match_test_v0_interface = WorkflowServiceInterface(
|
|
|
169
169
|
),
|
|
170
170
|
)
|
|
171
171
|
|
|
172
|
+
json_multi_field_match_v0_interface = WorkflowServiceInterface(
|
|
173
|
+
uri="agenta:built-in:json_multi_field_match:v0",
|
|
174
|
+
schemas=dict( # type: ignore
|
|
175
|
+
parameters={
|
|
176
|
+
"type": "object",
|
|
177
|
+
"title": "JSON Multi-Field Match Parameters",
|
|
178
|
+
"description": "Settings for comparing multiple JSON fields against expected values from a ground truth column.",
|
|
179
|
+
"properties": {
|
|
180
|
+
"correct_answer_key": {
|
|
181
|
+
"type": "string",
|
|
182
|
+
"title": "Ground Truth Column",
|
|
183
|
+
"description": "Column in test data containing the JSON ground truth.",
|
|
184
|
+
"default": "correct_answer",
|
|
185
|
+
},
|
|
186
|
+
"fields": {
|
|
187
|
+
"type": "array",
|
|
188
|
+
"title": "Fields to Compare",
|
|
189
|
+
"description": "List of JSON field paths (dot notation) to compare. Each field becomes a separate score.",
|
|
190
|
+
"items": {"type": "string"},
|
|
191
|
+
"default": [],
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
"required": ["correct_answer_key", "fields"],
|
|
195
|
+
"additionalProperties": False,
|
|
196
|
+
},
|
|
197
|
+
inputs={
|
|
198
|
+
"type": "object",
|
|
199
|
+
"title": "JSON Multi-Field Match Inputs",
|
|
200
|
+
"description": "Testcase data including the JSON ground truth.",
|
|
201
|
+
},
|
|
202
|
+
outputs={
|
|
203
|
+
"type": "object",
|
|
204
|
+
"title": "JSON Multi-Field Match Outputs",
|
|
205
|
+
"description": "Per-field match scores and aggregate score. Each field produces a 0 or 1 output.",
|
|
206
|
+
"properties": {
|
|
207
|
+
"aggregate_score": {
|
|
208
|
+
"type": "number",
|
|
209
|
+
"title": "Aggregate Score",
|
|
210
|
+
"description": "Percentage of matched fields (0-1).",
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
"required": ["aggregate_score"],
|
|
214
|
+
"additionalProperties": True, # Allows dynamic field outputs
|
|
215
|
+
},
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
|
|
172
219
|
auto_webhook_test_v0_interface = WorkflowServiceInterface(
|
|
173
220
|
uri="agenta:built-in:auto_webhook_test:v0",
|
|
174
221
|
schemas=dict( # type: ignore
|