agenta 0.12.7__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of agenta might be problematic. Click here for more details.
- agenta/__init__.py +3 -1
- agenta/cli/helper.py +1 -1
- agenta/cli/main.py +1 -1
- agenta/cli/variant_commands.py +7 -5
- agenta/client/api.py +1 -1
- agenta/client/backend/__init__.py +78 -18
- agenta/client/backend/client.py +1031 -5526
- agenta/client/backend/resources/__init__.py +31 -0
- agenta/client/backend/resources/apps/__init__.py +1 -0
- agenta/client/backend/resources/apps/client.py +977 -0
- agenta/client/backend/resources/bases/__init__.py +1 -0
- agenta/client/backend/resources/bases/client.py +127 -0
- agenta/client/backend/resources/configs/__init__.py +1 -0
- agenta/client/backend/resources/configs/client.py +377 -0
- agenta/client/backend/resources/containers/__init__.py +5 -0
- agenta/client/backend/resources/containers/client.py +383 -0
- agenta/client/backend/resources/containers/types/__init__.py +5 -0
- agenta/client/backend/{types → resources/containers/types}/container_templates_response.py +1 -1
- agenta/client/backend/resources/environments/__init__.py +1 -0
- agenta/client/backend/resources/environments/client.py +131 -0
- agenta/client/backend/resources/evaluations/__init__.py +1 -0
- agenta/client/backend/resources/evaluations/client.py +1008 -0
- agenta/client/backend/resources/evaluators/__init__.py +1 -0
- agenta/client/backend/resources/evaluators/client.py +594 -0
- agenta/client/backend/resources/observability/__init__.py +1 -0
- agenta/client/backend/resources/observability/client.py +1184 -0
- agenta/client/backend/resources/testsets/__init__.py +1 -0
- agenta/client/backend/resources/testsets/client.py +689 -0
- agenta/client/backend/resources/variants/__init__.py +5 -0
- agenta/client/backend/resources/variants/client.py +796 -0
- agenta/client/backend/resources/variants/types/__init__.py +7 -0
- agenta/client/backend/resources/variants/types/add_variant_from_base_and_config_response.py +7 -0
- agenta/client/backend/types/__init__.py +54 -22
- agenta/client/backend/types/aggregated_result.py +2 -2
- agenta/client/backend/types/aggregated_result_evaluator_config.py +9 -0
- agenta/client/backend/types/{app_variant_output.py → app_variant_response.py} +4 -2
- agenta/client/backend/types/{trace.py → create_span.py} +20 -10
- agenta/client/backend/types/create_trace_response.py +37 -0
- agenta/client/backend/types/environment_output.py +3 -1
- agenta/client/backend/types/environment_output_extended.py +45 -0
- agenta/client/backend/types/environment_revision.py +41 -0
- agenta/client/backend/types/error.py +37 -0
- agenta/client/backend/types/evaluation.py +6 -3
- agenta/client/backend/types/evaluation_scenario_output.py +4 -2
- agenta/client/backend/types/{delete_evaluation.py → evaluation_scenario_score_update.py} +2 -2
- agenta/client/backend/types/evaluation_status_enum.py +4 -0
- agenta/client/backend/types/evaluator.py +1 -0
- agenta/client/backend/types/{get_config_reponse.py → get_config_response.py} +1 -2
- agenta/client/backend/types/human_evaluation_scenario.py +2 -2
- agenta/client/backend/types/{app_variant_output_extended.py → human_evaluation_scenario_update.py} +11 -16
- agenta/client/backend/types/human_evaluation_update.py +37 -0
- agenta/client/backend/types/image.py +1 -0
- agenta/client/backend/types/invite_request.py +1 -0
- agenta/client/backend/types/{list_api_keys_output.py → list_api_keys_response.py} +1 -1
- agenta/client/backend/types/llm_tokens.py +38 -0
- agenta/client/backend/types/new_human_evaluation.py +42 -0
- agenta/client/backend/types/organization.py +1 -0
- agenta/client/backend/types/permission.py +141 -0
- agenta/client/backend/types/result.py +2 -0
- agenta/client/backend/types/{human_evaluation_scenario_score.py → score.py} +1 -1
- agenta/client/backend/types/span.py +18 -16
- agenta/client/backend/types/span_detail.py +52 -0
- agenta/client/backend/types/span_kind.py +49 -0
- agenta/client/backend/types/span_status_code.py +29 -0
- agenta/client/backend/types/span_variant.py +38 -0
- agenta/client/backend/types/trace_detail.py +52 -0
- agenta/client/backend/types/with_pagination.py +40 -0
- agenta/client/backend/types/workspace_member_response.py +38 -0
- agenta/client/backend/types/workspace_permission.py +40 -0
- agenta/client/backend/types/workspace_response.py +44 -0
- agenta/client/backend/types/workspace_role.py +41 -0
- agenta/client/backend/types/workspace_role_response.py +38 -0
- agenta/docker/docker_utils.py +1 -5
- agenta/sdk/__init__.py +3 -1
- agenta/sdk/agenta_decorator.py +68 -18
- agenta/sdk/agenta_init.py +53 -21
- agenta/sdk/tracing/context_manager.py +13 -0
- agenta/sdk/tracing/decorators.py +41 -0
- agenta/sdk/tracing/llm_tracing.py +220 -0
- agenta/sdk/tracing/logger.py +19 -0
- agenta/sdk/tracing/tasks_manager.py +130 -0
- {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/METADATA +47 -96
- agenta-0.13.0.dist-info/RECORD +161 -0
- agenta/client/backend/types/add_variant_from_base_and_config_response.py +0 -7
- agenta/client/backend/types/human_evaluation_scenario_update_score.py +0 -5
- agenta-0.12.7.dist-info/RECORD +0 -114
- {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/WHEEL +0 -0
- {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Stdlib Imports
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from typing import Optional, Dict, Any, List, Union
|
|
4
|
+
|
|
5
|
+
# Own Imports
|
|
6
|
+
from agenta.sdk.tracing.logger import llm_logger
|
|
7
|
+
from agenta.sdk.tracing.tasks_manager import TaskQueue
|
|
8
|
+
from agenta.client.backend.client import AsyncAgentaApi
|
|
9
|
+
from agenta.client.backend.client import AsyncObservabilityClient
|
|
10
|
+
from agenta.client.backend.types.create_span import CreateSpan, SpanKind, SpanStatusCode
|
|
11
|
+
|
|
12
|
+
# Third Party Imports
|
|
13
|
+
from bson.objectid import ObjectId
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Tracing(object):
|
|
17
|
+
"""Agenta llm tracing object.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
base_url (str): The URL of the backend host
|
|
21
|
+
api_key (str): The API Key of the backend host
|
|
22
|
+
tasks_manager (TaskQueue): The tasks manager dedicated to handling asynchronous tasks
|
|
23
|
+
llm_logger (Logger): The logger associated with the LLM tracing
|
|
24
|
+
max_workers (int): The maximum number of workers to run tracing
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
_instance = None
|
|
28
|
+
|
|
29
|
+
def __new__(cls, *args, **kwargs):
|
|
30
|
+
if not cls._instance:
|
|
31
|
+
cls._instance = super().__new__(cls)
|
|
32
|
+
return cls._instance
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
base_url: str,
|
|
37
|
+
app_id: str,
|
|
38
|
+
variant_id: str,
|
|
39
|
+
variant_name: Optional[str] = None,
|
|
40
|
+
api_key: Optional[str] = None,
|
|
41
|
+
max_workers: Optional[int] = None,
|
|
42
|
+
):
|
|
43
|
+
self.base_url = base_url + "/api"
|
|
44
|
+
self.api_key = api_key if api_key is not None else ""
|
|
45
|
+
self.llm_logger = llm_logger
|
|
46
|
+
self.app_id = app_id
|
|
47
|
+
self.variant_id = variant_id
|
|
48
|
+
self.variant_name = variant_name
|
|
49
|
+
self.tasks_manager = TaskQueue(
|
|
50
|
+
max_workers if max_workers else 4, logger=llm_logger
|
|
51
|
+
)
|
|
52
|
+
self.active_span = CreateSpan
|
|
53
|
+
self.active_trace = CreateSpan
|
|
54
|
+
self.recording_trace_id: Union[str, None] = None
|
|
55
|
+
self.recorded_spans: List[CreateSpan] = []
|
|
56
|
+
self.tags: List[str] = []
|
|
57
|
+
self.span_dict: Dict[str, CreateSpan] = {} # type: ignore
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def client(self) -> AsyncObservabilityClient:
|
|
61
|
+
"""Initialize observability async client
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
AsyncObservabilityClient: async client
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
return AsyncAgentaApi(
|
|
68
|
+
base_url=self.base_url, api_key=self.api_key, timeout=120 # type: ignore
|
|
69
|
+
).observability
|
|
70
|
+
|
|
71
|
+
def set_span_attribute(
|
|
72
|
+
self, parent_key: Optional[str] = None, attributes: Dict[str, Any] = {}
|
|
73
|
+
):
|
|
74
|
+
span = self.span_dict[self.active_span.id] # type: ignore
|
|
75
|
+
for key, value in attributes.items():
|
|
76
|
+
self.set_attribute(span.attributes, key, value, parent_key) # type: ignore
|
|
77
|
+
|
|
78
|
+
def set_attribute(
|
|
79
|
+
self,
|
|
80
|
+
span_attributes: Dict[str, Any],
|
|
81
|
+
key: str,
|
|
82
|
+
value: Any,
|
|
83
|
+
parent_key: Optional[str] = None,
|
|
84
|
+
):
|
|
85
|
+
if parent_key is not None:
|
|
86
|
+
model_config = span_attributes.get(parent_key, None)
|
|
87
|
+
if not model_config:
|
|
88
|
+
span_attributes[parent_key] = {}
|
|
89
|
+
span_attributes[parent_key][key] = value
|
|
90
|
+
else:
|
|
91
|
+
span_attributes[key] = value
|
|
92
|
+
|
|
93
|
+
def set_trace_tags(self, tags: List[str]):
|
|
94
|
+
self.tags.extend(tags)
|
|
95
|
+
|
|
96
|
+
def start_parent_span(
|
|
97
|
+
self, name: str, inputs: Dict[str, Any], config: Dict[str, Any], **kwargs
|
|
98
|
+
):
|
|
99
|
+
trace_id = self._create_trace_id()
|
|
100
|
+
span_id = self._create_span_id()
|
|
101
|
+
self.llm_logger.info("Recording parent span...")
|
|
102
|
+
span = CreateSpan(
|
|
103
|
+
id=span_id,
|
|
104
|
+
app_id=self.app_id,
|
|
105
|
+
variant_id=self.variant_id,
|
|
106
|
+
variant_name=self.variant_name,
|
|
107
|
+
inputs=inputs,
|
|
108
|
+
name=name,
|
|
109
|
+
config=config,
|
|
110
|
+
environment=kwargs.get("environment"),
|
|
111
|
+
spankind=SpanKind.WORKFLOW.value,
|
|
112
|
+
status=SpanStatusCode.UNSET.value,
|
|
113
|
+
start_time=datetime.now(timezone.utc),
|
|
114
|
+
)
|
|
115
|
+
self.active_trace = span
|
|
116
|
+
self.recording_trace_id = trace_id
|
|
117
|
+
self.parent_span_id = span.id
|
|
118
|
+
self.llm_logger.info(
|
|
119
|
+
f"Recorded active_trace and setting parent_span_id: {span.id}"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def start_span(
|
|
123
|
+
self,
|
|
124
|
+
name: str,
|
|
125
|
+
spankind: str,
|
|
126
|
+
input: Dict[str, Any],
|
|
127
|
+
config: Dict[str, Any] = {},
|
|
128
|
+
) -> CreateSpan:
|
|
129
|
+
span_id = self._create_span_id()
|
|
130
|
+
self.llm_logger.info(f"Recording {spankind} span...")
|
|
131
|
+
span = CreateSpan(
|
|
132
|
+
id=span_id,
|
|
133
|
+
inputs=input,
|
|
134
|
+
name=name,
|
|
135
|
+
app_id=self.app_id,
|
|
136
|
+
variant_id=self.variant_id,
|
|
137
|
+
variant_name=self.variant_name,
|
|
138
|
+
config=config,
|
|
139
|
+
environment=self.active_trace.environment,
|
|
140
|
+
parent_span_id=self.parent_span_id,
|
|
141
|
+
spankind=spankind.upper(),
|
|
142
|
+
attributes={},
|
|
143
|
+
status=SpanStatusCode.UNSET.value,
|
|
144
|
+
start_time=datetime.now(timezone.utc),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
self.active_span = span
|
|
148
|
+
self.span_dict[span.id] = span
|
|
149
|
+
self.parent_span_id = span.id
|
|
150
|
+
self.llm_logger.info(
|
|
151
|
+
f"Recorded active_span and setting parent_span_id: {span.id}"
|
|
152
|
+
)
|
|
153
|
+
return span
|
|
154
|
+
|
|
155
|
+
def update_span_status(self, span: CreateSpan, value: str):
|
|
156
|
+
updated_span = CreateSpan(**{**span.dict(), "status": value})
|
|
157
|
+
self.active_span = updated_span
|
|
158
|
+
|
|
159
|
+
def end_span(self, outputs: Dict[str, Any], span: CreateSpan, **kwargs):
|
|
160
|
+
updated_span = CreateSpan(
|
|
161
|
+
**span.dict(),
|
|
162
|
+
end_time=datetime.now(timezone.utc),
|
|
163
|
+
outputs=[outputs["message"]],
|
|
164
|
+
cost=outputs.get("cost", None),
|
|
165
|
+
tokens=outputs.get("usage"),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Push span to list of recorded spans
|
|
169
|
+
self.recorded_spans.append(updated_span)
|
|
170
|
+
self.llm_logger.info(
|
|
171
|
+
f"Pushed {updated_span.spankind} span {updated_span.id} to recorded spans."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def end_recording(self, outputs: Dict[str, Any], span: CreateSpan, **kwargs):
|
|
175
|
+
self.end_span(outputs=outputs, span=span, **kwargs)
|
|
176
|
+
if self.api_key == "":
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
self.llm_logger.info(f"Preparing to send recorded spans for processing.")
|
|
180
|
+
self.llm_logger.info(f"Recorded spans => {len(self.recorded_spans)}")
|
|
181
|
+
self.tasks_manager.add_task(
|
|
182
|
+
self.active_trace.id,
|
|
183
|
+
"trace",
|
|
184
|
+
self.client.create_traces(
|
|
185
|
+
trace=self.recording_trace_id, spans=self.recorded_spans # type: ignore
|
|
186
|
+
),
|
|
187
|
+
self.client,
|
|
188
|
+
)
|
|
189
|
+
self.llm_logger.info(
|
|
190
|
+
f"Tracing for {span.id} recorded successfully and sent for processing."
|
|
191
|
+
)
|
|
192
|
+
self._clear_recorded_spans()
|
|
193
|
+
|
|
194
|
+
def _create_trace_id(self) -> str:
|
|
195
|
+
"""Creates a unique mongo id for the trace object.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
str: stringify oid of the trace
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
return str(ObjectId())
|
|
202
|
+
|
|
203
|
+
def _create_span_id(self) -> str:
|
|
204
|
+
"""Creates a unique mongo id for the span object.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
str: stringify oid of the span
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
return str(ObjectId())
|
|
211
|
+
|
|
212
|
+
def _clear_recorded_spans(self) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Clear the list of recorded spans to prepare for next batch processing.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
self.recorded_spans = []
|
|
218
|
+
self.llm_logger.info(
|
|
219
|
+
f"Cleared all recorded spans from batch: {self.recorded_spans}"
|
|
220
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LLMLogger:
|
|
5
|
+
def __init__(self, name="LLMLogger", level=logging.INFO):
|
|
6
|
+
self.logger = logging.getLogger(name)
|
|
7
|
+
self.logger.setLevel(level)
|
|
8
|
+
|
|
9
|
+
# Add a stream logger to view the logs in the console
|
|
10
|
+
console_handler = logging.StreamHandler()
|
|
11
|
+
self.logger.addHandler(console_handler)
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def log(self) -> logging.Logger:
|
|
15
|
+
return self.logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Initialize llm logger
|
|
19
|
+
llm_logger = LLMLogger().log
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Stdlib Imports
|
|
2
|
+
import queue
|
|
3
|
+
import asyncio
|
|
4
|
+
from logging import Logger
|
|
5
|
+
from typing import Coroutine, Optional, Union
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
|
|
8
|
+
# Own Imports
|
|
9
|
+
from agenta.client.backend.types.error import Error
|
|
10
|
+
from agenta.client.backend.client import AsyncObservabilityClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AsyncTask(object):
|
|
14
|
+
"""Wraps a coroutine (an asynchronous function defined with async def).
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
coroutine (Coroutine): asynchronous function
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
coroutine_id: str,
|
|
23
|
+
coroutine_type: str,
|
|
24
|
+
coroutine: Coroutine,
|
|
25
|
+
client: AsyncObservabilityClient,
|
|
26
|
+
):
|
|
27
|
+
self.coroutine_id = coroutine_id
|
|
28
|
+
self.coroutine_type = coroutine_type
|
|
29
|
+
self.coroutine = coroutine
|
|
30
|
+
self.task: Optional[asyncio.Task] = None
|
|
31
|
+
self.client = client
|
|
32
|
+
|
|
33
|
+
async def run(self) -> Union[asyncio.Task, Error]:
|
|
34
|
+
"""Creates an asyncio Task from the coroutine and starts it
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Task: asyncio task
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
self.task = asyncio.create_task(self.coroutine)
|
|
42
|
+
except Exception as exc:
|
|
43
|
+
return Error(message="error running task", stacktrace=str(exc))
|
|
44
|
+
return await self.task
|
|
45
|
+
|
|
46
|
+
def cancel(self):
|
|
47
|
+
"""
|
|
48
|
+
Cancels running asyncio Task.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
if self.task:
|
|
52
|
+
self.task.cancel()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TaskQueue(object):
|
|
56
|
+
"""Stores a list of AsyncTask instances.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
tasks (List[AsyncTasks]): list of async task instances
|
|
60
|
+
|
|
61
|
+
Example Usage:
|
|
62
|
+
```python
|
|
63
|
+
queue = TaskQueue()
|
|
64
|
+
queue.add_task(long_running_task(1))
|
|
65
|
+
queue.add_task(long_running_task(2))
|
|
66
|
+
```
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, num_workers: int, logger: Logger):
|
|
70
|
+
self.tasks = queue.Queue() # type: ignore
|
|
71
|
+
self._logger = logger
|
|
72
|
+
self._thread_pool = ThreadPoolExecutor(max_workers=num_workers)
|
|
73
|
+
|
|
74
|
+
def add_task(
|
|
75
|
+
self,
|
|
76
|
+
coroutine_id: str,
|
|
77
|
+
coroutine_type: str,
|
|
78
|
+
coroutine: Coroutine,
|
|
79
|
+
obs_client: AsyncObservabilityClient,
|
|
80
|
+
) -> AsyncTask:
|
|
81
|
+
"""Adds a new task to be executed.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
coroutine_id (str): The Id of the coroutine
|
|
85
|
+
coroutine_type (str): The type of coroutine
|
|
86
|
+
coroutine (Coroutine): async task
|
|
87
|
+
obs_client (AsyncObservabilityClient): The async observability client
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
AsyncTask: task to be executed
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
task = AsyncTask(coroutine_id, coroutine_type, coroutine, obs_client)
|
|
94
|
+
self.tasks.put(task)
|
|
95
|
+
return self._worker()
|
|
96
|
+
|
|
97
|
+
def _worker(self):
|
|
98
|
+
"""
|
|
99
|
+
Runs task gotten from the queue in a thread pool.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
while True:
|
|
103
|
+
task: AsyncTask = self.tasks.get() # type: ignore
|
|
104
|
+
try:
|
|
105
|
+
future = self._thread_pool.submit(asyncio.run, task.run())
|
|
106
|
+
future.result()
|
|
107
|
+
except Exception as exc:
|
|
108
|
+
self._logger.error(f"Error running task: {str(exc)}")
|
|
109
|
+
self._logger.error(
|
|
110
|
+
f"Recording trace {task.coroutine_type} status to ERROR."
|
|
111
|
+
)
|
|
112
|
+
break
|
|
113
|
+
finally:
|
|
114
|
+
self.tasks.task_done()
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
def _get_size(self) -> int:
|
|
118
|
+
"""Returns the approximate number of items in the queue."""
|
|
119
|
+
|
|
120
|
+
return self.tasks.qsize()
|
|
121
|
+
|
|
122
|
+
def flush(self) -> None:
|
|
123
|
+
"""Clears all items from the queue."""
|
|
124
|
+
|
|
125
|
+
q_size = self._get_size()
|
|
126
|
+
self._logger.info("Flushing queue...")
|
|
127
|
+
with self.tasks.mutex: # acts as a lock to ensure that only one thread can access the queue
|
|
128
|
+
self.tasks.join()
|
|
129
|
+
self._logger.info(f"Queue with {q_size} items flushed successfully")
|
|
130
|
+
return
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: agenta
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: The SDK for agenta is an open-source LLMOps platform.
|
|
5
5
|
Home-page: https://agenta.ai
|
|
6
6
|
Keywords: LLMOps,LLM,evaluation,prompt engineering
|
|
@@ -23,6 +23,7 @@ Requires-Dist: importlib-metadata (>=6.7.0,<7.0.0)
|
|
|
23
23
|
Requires-Dist: ipdb (>=0.13)
|
|
24
24
|
Requires-Dist: posthog (>=3.1.0,<4.0.0)
|
|
25
25
|
Requires-Dist: pydantic (==1.10.13)
|
|
26
|
+
Requires-Dist: pymongo (>=4.6.3,<5.0.0)
|
|
26
27
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
|
27
28
|
Requires-Dist: python-multipart (>=0.0.6,<0.0.7)
|
|
28
29
|
Requires-Dist: questionary (>=1.10.0,<2.0.0)
|
|
@@ -35,8 +36,8 @@ Description-Content-Type: text/markdown
|
|
|
35
36
|
<a href="https://agenta.ai/">
|
|
36
37
|
<div align="center" >
|
|
37
38
|
<picture >
|
|
38
|
-
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/
|
|
39
|
-
<source media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/
|
|
39
|
+
<source width="500" media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cdddf5ad-2352-4920-b1d9-ae7f8d9d7735" >
|
|
40
|
+
<source width="500" media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ab75cbac-b807-496f-aab3-57463a33f726" >
|
|
40
41
|
<img alt="Shows the logo of agenta" src="https://github.com/Agenta-AI/agenta/assets/4510758/68e055d4-d7b8-4943-992f-761558c64253" >
|
|
41
42
|
</picture>
|
|
42
43
|
</div>
|
|
@@ -47,8 +48,8 @@ Description-Content-Type: text/markdown
|
|
|
47
48
|
<a href="https://docs.agenta.ai/">Documentation</a>
|
|
48
49
|
</h4>
|
|
49
50
|
<div align="center">
|
|
50
|
-
<strong>
|
|
51
|
-
The open-source
|
|
51
|
+
<strong> <h2> Collaborate on prompts, evaluate, and deploy LLM applications with confidence </h2></strong>
|
|
52
|
+
The open-source LLM developer platform for prompt-engineering, evaluation, human feedback, and deployment of complex LLM apps.
|
|
52
53
|
</div>
|
|
53
54
|
</br>
|
|
54
55
|
<p align="center">
|
|
@@ -67,7 +68,6 @@ Description-Content-Type: text/markdown
|
|
|
67
68
|
<a href="https://pypi.org/project/agenta/">
|
|
68
69
|
<img src="https://img.shields.io/pypi/dm/agenta" alt="PyPI - Downloads">
|
|
69
70
|
</a>
|
|
70
|
-
|
|
71
71
|
</br>
|
|
72
72
|
</p>
|
|
73
73
|
|
|
@@ -89,7 +89,7 @@ Description-Content-Type: text/markdown
|
|
|
89
89
|
|
|
90
90
|
<a href="https://cloud.agenta.ai">
|
|
91
91
|
<picture>
|
|
92
|
-
<img src="https://github.com/Agenta-AI/agenta/assets/4510758/
|
|
92
|
+
<img src="https://github.com/Agenta-AI/agenta/assets/4510758/d622f268-c295-42d4-a43c-548b611ff6f2" />
|
|
93
93
|
</picture>
|
|
94
94
|
</a>
|
|
95
95
|
|
|
@@ -99,8 +99,8 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
<br />
|
|
100
100
|
<div align="center" >
|
|
101
101
|
<picture >
|
|
102
|
-
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cf6d4713-4558-4c6c-9e1b-ee4eab261f4c" >
|
|
103
|
-
<source media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
|
|
102
|
+
<source width="800" media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cf6d4713-4558-4c6c-9e1b-ee4eab261f4c" >
|
|
103
|
+
<source width="800" media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
|
|
104
104
|
<img alt="Mockup agenta" src="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
|
|
105
105
|
</picture>
|
|
106
106
|
</div>
|
|
@@ -112,126 +112,75 @@ Description-Content-Type: text/markdown
|
|
|
112
112
|
---
|
|
113
113
|
|
|
114
114
|
<h3 align="center">
|
|
115
|
-
<a href="#ℹ️-about"><b>About</b></a> •
|
|
116
115
|
<a href="#quick-start"><b>Quick Start</b></a> •
|
|
117
|
-
<a href="https://docs.agenta.ai/installation"><b>Installation</b></a> •
|
|
118
116
|
<a href="#features"><b>Features</b></a> •
|
|
119
117
|
<a href="https://docs.agenta.ai"><b>Documentation</b></a> •
|
|
120
118
|
<a href="#enterprise-support"><b>Enterprise</b></a> •
|
|
121
|
-
<a href="https://
|
|
119
|
+
<a href="https://github.com/orgs/Agenta-AI/projects/13"><b>Roadmap</b></a> •
|
|
120
|
+
<a href="https://join.slack.com/t/agenta-hq/shared_invite/zt-1zsafop5i-Y7~ZySbhRZvKVPV5DO_7IA"><b>Join Our Slack</b></a> •
|
|
122
121
|
<a href="#contributing"><b>Contributing</b></a>
|
|
123
122
|
</h3>
|
|
124
123
|
|
|
125
124
|
---
|
|
126
125
|
|
|
127
|
-
#
|
|
126
|
+
# ⭐️ Why Agenta?
|
|
127
|
+
|
|
128
|
+
Agenta is an end-to-end LLM developer platform. It provides the tools for **prompt engineering and management**, ⚖️ **evaluation**, **human annotation**, and :rocket: **deployment**. All without imposing any restrictions on your choice of framework, library, or model.
|
|
129
|
+
|
|
130
|
+
Agenta allows developers and product teams to collaborate in building production-grade LLM-powered applications in less time.
|
|
131
|
+
|
|
132
|
+
### With Agenta, you can:
|
|
128
133
|
|
|
129
|
-
|
|
134
|
+
- [🧪 **Experiment** and **compare** prompts](https://docs.agenta.ai/basic_guides/prompt_engineering) on [any LLM workflow](https://docs.agenta.ai/advanced_guides/custom_applications) (chain-of-prompts, Retrieval Augmented Generation (RAG), LLM agents...)
|
|
135
|
+
- ✍️ Collect and [**annotate golden test sets**](https://docs.agenta.ai/basic_guides/test_sets) for evaluation
|
|
136
|
+
- 📈 [**Evaluate** your application](https://docs.agenta.ai/basic_guides/automatic_evaluation) with pre-existing or [**custom evaluators**](https://docs.agenta.ai/advanced_guides/using_custom_evaluators)
|
|
137
|
+
- [🔍 **Annotate** and **A/B test**](https://docs.agenta.aibasic_guides/human_evaluation) your applications with **human feedback**
|
|
138
|
+
- [🤝 **Collaborate with product teams**](https://docs.agenta.ai/basic_guides/team_management) for prompt engineering and evaluation
|
|
139
|
+
- [🚀 **Deploy your application**](https://docs.agenta.ai/basic_guides/deployment) in one-click in the UI, through CLI, or through github workflows.
|
|
130
140
|
|
|
131
|
-
|
|
141
|
+
### Works with any LLM app workflow
|
|
132
142
|
|
|
133
|
-
|
|
143
|
+
Agenta enables prompt engineering and evaluation on any LLM app architecture:
|
|
144
|
+
- Chain of prompts
|
|
145
|
+
- RAG
|
|
146
|
+
- Agents
|
|
147
|
+
- ...
|
|
134
148
|
|
|
135
|
-
|
|
136
|
-
| ------------- | ------------- |
|
|
137
|
-
|1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |
|
|
149
|
+
It works with any framework such as [Langchain](https://langchain.com), [LlamaIndex](https://www.llamaindex.ai/) and any LLM provider (openAI, Cohere, Mistral).
|
|
138
150
|
|
|
139
|
-
|
|
151
|
+
[Jump here to see how to use your own custom application with agenta](/advanced_guides/custom_applications)
|
|
140
152
|
|
|
141
153
|
# Quick Start
|
|
142
154
|
|
|
143
|
-
### [
|
|
144
|
-
### [
|
|
145
|
-
### [Create
|
|
155
|
+
### [Get started for free](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)
|
|
156
|
+
### [Explore the Docs](https://docs.agenta.ai)
|
|
157
|
+
### [Create your first application in one-minute](https://docs.agenta.ai/quickstart/getting-started-ui)
|
|
158
|
+
### [Create an application using Langchain](https://docs.agenta.ai/tutorials/first-app-with-langchain)
|
|
146
159
|
### [Self-host agenta](https://docs.agenta.ai/self-host/host-locally)
|
|
147
|
-
### [Read the Documentation](https://docs.agenta.ai)
|
|
148
160
|
### [Check the Cookbook](https://docs.agenta.ai/cookbook)
|
|
149
161
|
|
|
150
162
|
# Features
|
|
151
163
|
|
|
152
|
-
<h3>Playground 🪄 <br/></h3>
|
|
153
|
-
With just a few lines of code, define the parameters and prompts you wish to experiment with. You and your team can quickly experiment and test new variants on the web UI. <br/>
|
|
154
|
-
|
|
155
|
-
https://github.com/Agenta-AI/agenta/assets/4510758/8b736d2b-7c61-414c-b534-d95efc69134c
|
|
156
|
-
|
|
157
|
-
<h3>Version Evaluation 📊 <br/></h3>
|
|
158
|
-
Define test sets, then evaluate manually or programmatically your different variants.<br/>
|
|
159
|
-
|
|
160
|
-

|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
<h3>API Deployment 🚀<br/></h3>
|
|
164
|
-
When you are ready, deploy your LLM applications as APIs in one click.<br/>
|
|
165
|
-
|
|
166
|
-

|
|
167
|
-
|
|
168
|
-
## Why choose Agenta for building LLM-apps?
|
|
169
|
-
|
|
170
|
-
- 🔨 **Build quickly**: You need to iterate many times on different architectures and prompts to bring apps to production. We streamline this process and allow you to do this in days instead of weeks.
|
|
171
|
-
- 🏗️ **Build robust apps and reduce hallucination**: We provide you with the tools to systematically and easily evaluate your application to make sure you only serve robust apps to production.
|
|
172
|
-
- 👨💻 **Developer-centric**: We cater to complex LLM-apps and pipelines that require more than one simple prompt. We allow you to experiment and iterate on apps that have complex integration, business logic, and many prompts.
|
|
173
|
-
- 🌐 **Solution-Agnostic**: You have the freedom to use any libraries and models, be it Langchain, llma_index, or a custom-written alternative.
|
|
174
|
-
- 🔒 **Privacy-First**: We respect your privacy and do not proxy your data through third-party services. The platform and the data are hosted on your infrastructure.
|
|
175
|
-
|
|
176
|
-
## How Agenta works:
|
|
177
|
-
|
|
178
|
-
**1. Write your LLM-app code**
|
|
179
|
-
|
|
180
|
-
Write the code using any framework, library, or model you want. Add the `agenta.post` decorator and put the inputs and parameters in the function call just like in this example:
|
|
181
|
-
|
|
182
|
-
_Example simple application that generates baby names:_
|
|
183
|
-
|
|
184
|
-
```python
|
|
185
|
-
import agenta as ag
|
|
186
|
-
from langchain.chains import LLMChain
|
|
187
|
-
from langchain.llms import OpenAI
|
|
188
|
-
from langchain.prompts import PromptTemplate
|
|
189
|
-
|
|
190
|
-
default_prompt = "Give me five cool names for a baby from {country} with this gender {gender}!!!!"
|
|
191
|
-
ag.init()
|
|
192
|
-
ag.config(prompt_template=ag.TextParam(default_prompt),
|
|
193
|
-
temperature=ag.FloatParam(0.9))
|
|
194
|
-
|
|
195
|
-
@ag.entrypoint
|
|
196
|
-
def generate(
|
|
197
|
-
country: str,
|
|
198
|
-
gender: str,
|
|
199
|
-
) -> str:
|
|
200
|
-
llm = OpenAI(temperature=ag.config.temperature)
|
|
201
|
-
prompt = PromptTemplate(
|
|
202
|
-
input_variables=["country", "gender"],
|
|
203
|
-
template=ag.config.prompt_template,
|
|
204
|
-
)
|
|
205
|
-
chain = LLMChain(llm=llm, prompt=prompt)
|
|
206
|
-
output = chain.run(country=country, gender=gender)
|
|
207
|
-
|
|
208
|
-
return output
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
**2.Deploy your app using the Agenta CLI**
|
|
212
|
-
|
|
213
|
-
<img width="650" alt="Screenshot 2023-06-19 at 15 58 34" src="https://github.com/Agenta-AI/agenta/assets/4510758/eede3e78-0fe1-42a0-ad4e-d880ddb10bf0">
|
|
214
|
-
|
|
215
|
-
**3. Go to agenta at http://localhost**
|
|
216
|
-
|
|
217
|
-
Now your team can 🔄 iterate, 🧪 experiment, and ⚖️ evaluate different versions of your app (with your code!) in the web platform.</summary>
|
|
218
|
-
<br/>
|
|
219
|
-
|
|
220
|
-
<img width="900" alt="Screenshot 2023-06-25 at 21 08 53" src="https://github.com/Agenta-AI/agenta/assets/57623556/7e07a988-a36a-4fb5-99dd-9cc13a678434">
|
|
221
164
|
|
|
165
|
+
| Playground | Evaluation |
|
|
166
|
+
| ------- | ------- |
|
|
167
|
+
| Compare and version prompts for any LLM app, from single prompt to agents. <br/> <video src="https://github.com/Agenta-AI/agenta/assets/4510758/8b736d2b-7c61-414c-b534-d95efc69134c" controls="controls" style="max-width:100%;"> | Define test sets, then evaluate manually or programmatically your different variants.<br/> <video src="https://github.com/Agenta-AI/agenta/assets/4510758/8c6997c6-da87-46ad-a81f-e15e277263d2" controls="controls" style="max-width:100%;">|
|
|
168
|
+
| Human annotation | Deployment |
|
|
169
|
+
| Use Human annotator to A/B test and score your LLM apps. <br/> <img width="750" alt="Screenshot 2024-01-28 at 12 57 46" src="https://github.com/Agenta-AI/agenta/assets/4510758/bf62a697-bf19-4ba9-850e-742fbfb75424"> | When you are ready, deploy your LLM applications as APIs in one click.<br/> |
|
|
222
170
|
|
|
223
171
|
# Enterprise Support
|
|
224
172
|
Contact us here for enterprise support and early access to agenta self-managed enterprise with Kubernetes support. <br/><br/>
|
|
225
173
|
<a href="https://cal.com/mahmoud-mabrouk-ogzgey/demo"><img src="https://cal.com/book-with-cal-dark.svg" alt="Book us"></a>
|
|
226
174
|
|
|
227
175
|
# Disabling Anonymized Tracking
|
|
176
|
+
By default, Agenta automatically reports anonymized basic usage statistics. This helps us understand how Agenta is used and track its overall usage and growth. This data does not include any sensitive information.
|
|
228
177
|
|
|
229
|
-
To disable anonymized telemetry,
|
|
178
|
+
To disable anonymized telemetry, follow these steps:
|
|
230
179
|
|
|
231
180
|
- For web: Set `TELEMETRY_TRACKING_ENABLED` to `false` in your `agenta-web/.env` file.
|
|
232
181
|
- For CLI: Set `telemetry_tracking_enabled` to `false` in your `~/.agenta/config.toml` file.
|
|
233
182
|
|
|
234
|
-
After making this change, restart
|
|
183
|
+
After making this change, restart Agenta Compose.
|
|
235
184
|
|
|
236
185
|
# Contributing
|
|
237
186
|
|
|
@@ -244,7 +193,7 @@ Check out our [Contributing Guide](https://docs.agenta.ai/contributing/getting-s
|
|
|
244
193
|
## Contributors ✨
|
|
245
194
|
|
|
246
195
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
247
|
-
[](#contributors-)
|
|
248
197
|
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
249
198
|
|
|
250
199
|
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
|
|
@@ -304,6 +253,8 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
|
|
|
304
253
|
<td align="center" valign="top" width="14.28%"><a href="https://github.com/brockWith"><img src="https://avatars.githubusercontent.com/u/105627491?v=4?s=100" width="100px;" alt="brockWith"/><br /><sub><b>brockWith</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=brockWith" title="Code">💻</a></td>
|
|
305
254
|
<td align="center" valign="top" width="14.28%"><a href="http://denniszelada.wordpress.com/"><img src="https://avatars.githubusercontent.com/u/219311?v=4?s=100" width="100px;" alt="Dennis Zelada"/><br /><sub><b>Dennis Zelada</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=denniszelada" title="Code">💻</a></td>
|
|
306
255
|
<td align="center" valign="top" width="14.28%"><a href="https://github.com/romainrbr"><img src="https://avatars.githubusercontent.com/u/10381609?v=4?s=100" width="100px;" alt="Romain Brucker"/><br /><sub><b>Romain Brucker</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=romainrbr" title="Code">💻</a></td>
|
|
256
|
+
<td align="center" valign="top" width="14.28%"><a href="http://heonheo.com"><img src="https://avatars.githubusercontent.com/u/76820291?v=4?s=100" width="100px;" alt="Heon Heo"/><br /><sub><b>Heon Heo</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=HeonHeo23" title="Code">💻</a></td>
|
|
257
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Drewski2222"><img src="https://avatars.githubusercontent.com/u/39228951?v=4?s=100" width="100px;" alt="Drew Reisner"/><br /><sub><b>Drew Reisner</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=Drewski2222" title="Code">💻</a></td>
|
|
307
258
|
</tr>
|
|
308
259
|
</tbody>
|
|
309
260
|
</table>
|