agenta 0.12.7__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agenta might be problematic. Click here for more details.

Files changed (88) hide show
  1. agenta/__init__.py +3 -1
  2. agenta/cli/helper.py +1 -1
  3. agenta/cli/main.py +1 -1
  4. agenta/cli/variant_commands.py +7 -5
  5. agenta/client/api.py +1 -1
  6. agenta/client/backend/__init__.py +78 -18
  7. agenta/client/backend/client.py +1031 -5526
  8. agenta/client/backend/resources/__init__.py +31 -0
  9. agenta/client/backend/resources/apps/__init__.py +1 -0
  10. agenta/client/backend/resources/apps/client.py +977 -0
  11. agenta/client/backend/resources/bases/__init__.py +1 -0
  12. agenta/client/backend/resources/bases/client.py +127 -0
  13. agenta/client/backend/resources/configs/__init__.py +1 -0
  14. agenta/client/backend/resources/configs/client.py +377 -0
  15. agenta/client/backend/resources/containers/__init__.py +5 -0
  16. agenta/client/backend/resources/containers/client.py +383 -0
  17. agenta/client/backend/resources/containers/types/__init__.py +5 -0
  18. agenta/client/backend/{types → resources/containers/types}/container_templates_response.py +1 -1
  19. agenta/client/backend/resources/environments/__init__.py +1 -0
  20. agenta/client/backend/resources/environments/client.py +131 -0
  21. agenta/client/backend/resources/evaluations/__init__.py +1 -0
  22. agenta/client/backend/resources/evaluations/client.py +1008 -0
  23. agenta/client/backend/resources/evaluators/__init__.py +1 -0
  24. agenta/client/backend/resources/evaluators/client.py +594 -0
  25. agenta/client/backend/resources/observability/__init__.py +1 -0
  26. agenta/client/backend/resources/observability/client.py +1184 -0
  27. agenta/client/backend/resources/testsets/__init__.py +1 -0
  28. agenta/client/backend/resources/testsets/client.py +689 -0
  29. agenta/client/backend/resources/variants/__init__.py +5 -0
  30. agenta/client/backend/resources/variants/client.py +796 -0
  31. agenta/client/backend/resources/variants/types/__init__.py +7 -0
  32. agenta/client/backend/resources/variants/types/add_variant_from_base_and_config_response.py +7 -0
  33. agenta/client/backend/types/__init__.py +54 -22
  34. agenta/client/backend/types/aggregated_result.py +2 -2
  35. agenta/client/backend/types/aggregated_result_evaluator_config.py +9 -0
  36. agenta/client/backend/types/{app_variant_output.py → app_variant_response.py} +4 -2
  37. agenta/client/backend/types/{trace.py → create_span.py} +20 -10
  38. agenta/client/backend/types/create_trace_response.py +37 -0
  39. agenta/client/backend/types/environment_output.py +3 -1
  40. agenta/client/backend/types/environment_output_extended.py +45 -0
  41. agenta/client/backend/types/environment_revision.py +41 -0
  42. agenta/client/backend/types/error.py +37 -0
  43. agenta/client/backend/types/evaluation.py +6 -3
  44. agenta/client/backend/types/evaluation_scenario_output.py +4 -2
  45. agenta/client/backend/types/{delete_evaluation.py → evaluation_scenario_score_update.py} +2 -2
  46. agenta/client/backend/types/evaluation_status_enum.py +4 -0
  47. agenta/client/backend/types/evaluator.py +1 -0
  48. agenta/client/backend/types/{get_config_reponse.py → get_config_response.py} +1 -2
  49. agenta/client/backend/types/human_evaluation_scenario.py +2 -2
  50. agenta/client/backend/types/{app_variant_output_extended.py → human_evaluation_scenario_update.py} +11 -16
  51. agenta/client/backend/types/human_evaluation_update.py +37 -0
  52. agenta/client/backend/types/image.py +1 -0
  53. agenta/client/backend/types/invite_request.py +1 -0
  54. agenta/client/backend/types/{list_api_keys_output.py → list_api_keys_response.py} +1 -1
  55. agenta/client/backend/types/llm_tokens.py +38 -0
  56. agenta/client/backend/types/new_human_evaluation.py +42 -0
  57. agenta/client/backend/types/organization.py +1 -0
  58. agenta/client/backend/types/permission.py +141 -0
  59. agenta/client/backend/types/result.py +2 -0
  60. agenta/client/backend/types/{human_evaluation_scenario_score.py → score.py} +1 -1
  61. agenta/client/backend/types/span.py +18 -16
  62. agenta/client/backend/types/span_detail.py +52 -0
  63. agenta/client/backend/types/span_kind.py +49 -0
  64. agenta/client/backend/types/span_status_code.py +29 -0
  65. agenta/client/backend/types/span_variant.py +38 -0
  66. agenta/client/backend/types/trace_detail.py +52 -0
  67. agenta/client/backend/types/with_pagination.py +40 -0
  68. agenta/client/backend/types/workspace_member_response.py +38 -0
  69. agenta/client/backend/types/workspace_permission.py +40 -0
  70. agenta/client/backend/types/workspace_response.py +44 -0
  71. agenta/client/backend/types/workspace_role.py +41 -0
  72. agenta/client/backend/types/workspace_role_response.py +38 -0
  73. agenta/docker/docker_utils.py +1 -5
  74. agenta/sdk/__init__.py +3 -1
  75. agenta/sdk/agenta_decorator.py +68 -18
  76. agenta/sdk/agenta_init.py +53 -21
  77. agenta/sdk/tracing/context_manager.py +13 -0
  78. agenta/sdk/tracing/decorators.py +41 -0
  79. agenta/sdk/tracing/llm_tracing.py +220 -0
  80. agenta/sdk/tracing/logger.py +19 -0
  81. agenta/sdk/tracing/tasks_manager.py +130 -0
  82. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/METADATA +47 -96
  83. agenta-0.13.0.dist-info/RECORD +161 -0
  84. agenta/client/backend/types/add_variant_from_base_and_config_response.py +0 -7
  85. agenta/client/backend/types/human_evaluation_scenario_update_score.py +0 -5
  86. agenta-0.12.7.dist-info/RECORD +0 -114
  87. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/WHEEL +0 -0
  88. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,220 @@
1
+ # Stdlib Imports
2
+ from datetime import datetime, timezone
3
+ from typing import Optional, Dict, Any, List, Union
4
+
5
+ # Own Imports
6
+ from agenta.sdk.tracing.logger import llm_logger
7
+ from agenta.sdk.tracing.tasks_manager import TaskQueue
8
+ from agenta.client.backend.client import AsyncAgentaApi
9
+ from agenta.client.backend.client import AsyncObservabilityClient
10
+ from agenta.client.backend.types.create_span import CreateSpan, SpanKind, SpanStatusCode
11
+
12
+ # Third Party Imports
13
+ from bson.objectid import ObjectId
14
+
15
+
16
+ class Tracing(object):
17
+ """Agenta llm tracing object.
18
+
19
+ Args:
20
+ base_url (str): The URL of the backend host
21
+ api_key (str): The API Key of the backend host
22
+ tasks_manager (TaskQueue): The tasks manager dedicated to handling asynchronous tasks
23
+ llm_logger (Logger): The logger associated with the LLM tracing
24
+ max_workers (int): The maximum number of workers to run tracing
25
+ """
26
+
27
+ _instance = None
28
+
29
+ def __new__(cls, *args, **kwargs):
30
+ if not cls._instance:
31
+ cls._instance = super().__new__(cls)
32
+ return cls._instance
33
+
34
+ def __init__(
35
+ self,
36
+ base_url: str,
37
+ app_id: str,
38
+ variant_id: str,
39
+ variant_name: Optional[str] = None,
40
+ api_key: Optional[str] = None,
41
+ max_workers: Optional[int] = None,
42
+ ):
43
+ self.base_url = base_url + "/api"
44
+ self.api_key = api_key if api_key is not None else ""
45
+ self.llm_logger = llm_logger
46
+ self.app_id = app_id
47
+ self.variant_id = variant_id
48
+ self.variant_name = variant_name
49
+ self.tasks_manager = TaskQueue(
50
+ max_workers if max_workers else 4, logger=llm_logger
51
+ )
52
+ self.active_span = CreateSpan
53
+ self.active_trace = CreateSpan
54
+ self.recording_trace_id: Union[str, None] = None
55
+ self.recorded_spans: List[CreateSpan] = []
56
+ self.tags: List[str] = []
57
+ self.span_dict: Dict[str, CreateSpan] = {} # type: ignore
58
+
59
+ @property
60
+ def client(self) -> AsyncObservabilityClient:
61
+ """Initialize observability async client
62
+
63
+ Returns:
64
+ AsyncObservabilityClient: async client
65
+ """
66
+
67
+ return AsyncAgentaApi(
68
+ base_url=self.base_url, api_key=self.api_key, timeout=120 # type: ignore
69
+ ).observability
70
+
71
+ def set_span_attribute(
72
+ self, parent_key: Optional[str] = None, attributes: Dict[str, Any] = {}
73
+ ):
74
+ span = self.span_dict[self.active_span.id] # type: ignore
75
+ for key, value in attributes.items():
76
+ self.set_attribute(span.attributes, key, value, parent_key) # type: ignore
77
+
78
+ def set_attribute(
79
+ self,
80
+ span_attributes: Dict[str, Any],
81
+ key: str,
82
+ value: Any,
83
+ parent_key: Optional[str] = None,
84
+ ):
85
+ if parent_key is not None:
86
+ model_config = span_attributes.get(parent_key, None)
87
+ if not model_config:
88
+ span_attributes[parent_key] = {}
89
+ span_attributes[parent_key][key] = value
90
+ else:
91
+ span_attributes[key] = value
92
+
93
+ def set_trace_tags(self, tags: List[str]):
94
+ self.tags.extend(tags)
95
+
96
+ def start_parent_span(
97
+ self, name: str, inputs: Dict[str, Any], config: Dict[str, Any], **kwargs
98
+ ):
99
+ trace_id = self._create_trace_id()
100
+ span_id = self._create_span_id()
101
+ self.llm_logger.info("Recording parent span...")
102
+ span = CreateSpan(
103
+ id=span_id,
104
+ app_id=self.app_id,
105
+ variant_id=self.variant_id,
106
+ variant_name=self.variant_name,
107
+ inputs=inputs,
108
+ name=name,
109
+ config=config,
110
+ environment=kwargs.get("environment"),
111
+ spankind=SpanKind.WORKFLOW.value,
112
+ status=SpanStatusCode.UNSET.value,
113
+ start_time=datetime.now(timezone.utc),
114
+ )
115
+ self.active_trace = span
116
+ self.recording_trace_id = trace_id
117
+ self.parent_span_id = span.id
118
+ self.llm_logger.info(
119
+ f"Recorded active_trace and setting parent_span_id: {span.id}"
120
+ )
121
+
122
+ def start_span(
123
+ self,
124
+ name: str,
125
+ spankind: str,
126
+ input: Dict[str, Any],
127
+ config: Dict[str, Any] = {},
128
+ ) -> CreateSpan:
129
+ span_id = self._create_span_id()
130
+ self.llm_logger.info(f"Recording {spankind} span...")
131
+ span = CreateSpan(
132
+ id=span_id,
133
+ inputs=input,
134
+ name=name,
135
+ app_id=self.app_id,
136
+ variant_id=self.variant_id,
137
+ variant_name=self.variant_name,
138
+ config=config,
139
+ environment=self.active_trace.environment,
140
+ parent_span_id=self.parent_span_id,
141
+ spankind=spankind.upper(),
142
+ attributes={},
143
+ status=SpanStatusCode.UNSET.value,
144
+ start_time=datetime.now(timezone.utc),
145
+ )
146
+
147
+ self.active_span = span
148
+ self.span_dict[span.id] = span
149
+ self.parent_span_id = span.id
150
+ self.llm_logger.info(
151
+ f"Recorded active_span and setting parent_span_id: {span.id}"
152
+ )
153
+ return span
154
+
155
+ def update_span_status(self, span: CreateSpan, value: str):
156
+ updated_span = CreateSpan(**{**span.dict(), "status": value})
157
+ self.active_span = updated_span
158
+
159
+ def end_span(self, outputs: Dict[str, Any], span: CreateSpan, **kwargs):
160
+ updated_span = CreateSpan(
161
+ **span.dict(),
162
+ end_time=datetime.now(timezone.utc),
163
+ outputs=[outputs["message"]],
164
+ cost=outputs.get("cost", None),
165
+ tokens=outputs.get("usage"),
166
+ )
167
+
168
+ # Push span to list of recorded spans
169
+ self.recorded_spans.append(updated_span)
170
+ self.llm_logger.info(
171
+ f"Pushed {updated_span.spankind} span {updated_span.id} to recorded spans."
172
+ )
173
+
174
+ def end_recording(self, outputs: Dict[str, Any], span: CreateSpan, **kwargs):
175
+ self.end_span(outputs=outputs, span=span, **kwargs)
176
+ if self.api_key == "":
177
+ return
178
+
179
+ self.llm_logger.info(f"Preparing to send recorded spans for processing.")
180
+ self.llm_logger.info(f"Recorded spans => {len(self.recorded_spans)}")
181
+ self.tasks_manager.add_task(
182
+ self.active_trace.id,
183
+ "trace",
184
+ self.client.create_traces(
185
+ trace=self.recording_trace_id, spans=self.recorded_spans # type: ignore
186
+ ),
187
+ self.client,
188
+ )
189
+ self.llm_logger.info(
190
+ f"Tracing for {span.id} recorded successfully and sent for processing."
191
+ )
192
+ self._clear_recorded_spans()
193
+
194
+ def _create_trace_id(self) -> str:
195
+ """Creates a unique mongo id for the trace object.
196
+
197
+ Returns:
198
+ str: stringify oid of the trace
199
+ """
200
+
201
+ return str(ObjectId())
202
+
203
+ def _create_span_id(self) -> str:
204
+ """Creates a unique mongo id for the span object.
205
+
206
+ Returns:
207
+ str: stringify oid of the span
208
+ """
209
+
210
+ return str(ObjectId())
211
+
212
+ def _clear_recorded_spans(self) -> None:
213
+ """
214
+ Clear the list of recorded spans to prepare for next batch processing.
215
+ """
216
+
217
+ self.recorded_spans = []
218
+ self.llm_logger.info(
219
+ f"Cleared all recorded spans from batch: {self.recorded_spans}"
220
+ )
@@ -0,0 +1,19 @@
1
+ import logging
2
+
3
+
4
+ class LLMLogger:
5
+ def __init__(self, name="LLMLogger", level=logging.INFO):
6
+ self.logger = logging.getLogger(name)
7
+ self.logger.setLevel(level)
8
+
9
+ # Add a stream logger to view the logs in the console
10
+ console_handler = logging.StreamHandler()
11
+ self.logger.addHandler(console_handler)
12
+
13
+ @property
14
+ def log(self) -> logging.Logger:
15
+ return self.logger
16
+
17
+
18
+ # Initialize llm logger
19
+ llm_logger = LLMLogger().log
@@ -0,0 +1,130 @@
1
+ # Stdlib Imports
2
+ import queue
3
+ import asyncio
4
+ from logging import Logger
5
+ from typing import Coroutine, Optional, Union
6
+ from concurrent.futures import ThreadPoolExecutor
7
+
8
+ # Own Imports
9
+ from agenta.client.backend.types.error import Error
10
+ from agenta.client.backend.client import AsyncObservabilityClient
11
+
12
+
13
+ class AsyncTask(object):
14
+ """Wraps a coroutine (an asynchronous function defined with async def).
15
+
16
+ Args:
17
+ coroutine (Coroutine): asynchronous function
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ coroutine_id: str,
23
+ coroutine_type: str,
24
+ coroutine: Coroutine,
25
+ client: AsyncObservabilityClient,
26
+ ):
27
+ self.coroutine_id = coroutine_id
28
+ self.coroutine_type = coroutine_type
29
+ self.coroutine = coroutine
30
+ self.task: Optional[asyncio.Task] = None
31
+ self.client = client
32
+
33
+ async def run(self) -> Union[asyncio.Task, Error]:
34
+ """Creates an asyncio Task from the coroutine and starts it
35
+
36
+ Returns:
37
+ Task: asyncio task
38
+ """
39
+
40
+ try:
41
+ self.task = asyncio.create_task(self.coroutine)
42
+ except Exception as exc:
43
+ return Error(message="error running task", stacktrace=str(exc))
44
+ return await self.task
45
+
46
+ def cancel(self):
47
+ """
48
+ Cancels running asyncio Task.
49
+ """
50
+
51
+ if self.task:
52
+ self.task.cancel()
53
+
54
+
55
+ class TaskQueue(object):
56
+ """Stores a list of AsyncTask instances.
57
+
58
+ Args:
59
+ tasks (List[AsyncTasks]): list of async task instances
60
+
61
+ Example Usage:
62
+ ```python
63
+ queue = TaskQueue()
64
+ queue.add_task(long_running_task(1))
65
+ queue.add_task(long_running_task(2))
66
+ ```
67
+ """
68
+
69
+ def __init__(self, num_workers: int, logger: Logger):
70
+ self.tasks = queue.Queue() # type: ignore
71
+ self._logger = logger
72
+ self._thread_pool = ThreadPoolExecutor(max_workers=num_workers)
73
+
74
+ def add_task(
75
+ self,
76
+ coroutine_id: str,
77
+ coroutine_type: str,
78
+ coroutine: Coroutine,
79
+ obs_client: AsyncObservabilityClient,
80
+ ) -> AsyncTask:
81
+ """Adds a new task to be executed.
82
+
83
+ Args:
84
+ coroutine_id (str): The Id of the coroutine
85
+ coroutine_type (str): The type of coroutine
86
+ coroutine (Coroutine): async task
87
+ obs_client (AsyncObservabilityClient): The async observability client
88
+
89
+ Returns:
90
+ AsyncTask: task to be executed
91
+ """
92
+
93
+ task = AsyncTask(coroutine_id, coroutine_type, coroutine, obs_client)
94
+ self.tasks.put(task)
95
+ return self._worker()
96
+
97
+ def _worker(self):
98
+ """
99
+ Runs task gotten from the queue in a thread pool.
100
+ """
101
+
102
+ while True:
103
+ task: AsyncTask = self.tasks.get() # type: ignore
104
+ try:
105
+ future = self._thread_pool.submit(asyncio.run, task.run())
106
+ future.result()
107
+ except Exception as exc:
108
+ self._logger.error(f"Error running task: {str(exc)}")
109
+ self._logger.error(
110
+ f"Recording trace {task.coroutine_type} status to ERROR."
111
+ )
112
+ break
113
+ finally:
114
+ self.tasks.task_done()
115
+ break
116
+
117
+ def _get_size(self) -> int:
118
+ """Returns the approximate number of items in the queue."""
119
+
120
+ return self.tasks.qsize()
121
+
122
+ def flush(self) -> None:
123
+ """Clears all items from the queue."""
124
+
125
+ q_size = self._get_size()
126
+ self._logger.info("Flushing queue...")
127
+ with self.tasks.mutex: # acts as a lock to ensure that only one thread can access the queue
128
+ self.tasks.join()
129
+ self._logger.info(f"Queue with {q_size} items flushed successfully")
130
+ return
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: agenta
3
- Version: 0.12.7
3
+ Version: 0.13.0
4
4
  Summary: The SDK for agenta is an open-source LLMOps platform.
5
5
  Home-page: https://agenta.ai
6
6
  Keywords: LLMOps,LLM,evaluation,prompt engineering
@@ -23,6 +23,7 @@ Requires-Dist: importlib-metadata (>=6.7.0,<7.0.0)
23
23
  Requires-Dist: ipdb (>=0.13)
24
24
  Requires-Dist: posthog (>=3.1.0,<4.0.0)
25
25
  Requires-Dist: pydantic (==1.10.13)
26
+ Requires-Dist: pymongo (>=4.6.3,<5.0.0)
26
27
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
27
28
  Requires-Dist: python-multipart (>=0.0.6,<0.0.7)
28
29
  Requires-Dist: questionary (>=1.10.0,<2.0.0)
@@ -35,8 +36,8 @@ Description-Content-Type: text/markdown
35
36
  <a href="https://agenta.ai/">
36
37
  <div align="center" >
37
38
  <picture >
38
- <source media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/a356f263-6f5e-43df-8b58-4c183b8d8878" >
39
- <source media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/68e055d4-d7b8-4943-992f-761558c64253" >
39
+ <source width="500" media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cdddf5ad-2352-4920-b1d9-ae7f8d9d7735" >
40
+ <source width="500" media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ab75cbac-b807-496f-aab3-57463a33f726" >
40
41
  <img alt="Shows the logo of agenta" src="https://github.com/Agenta-AI/agenta/assets/4510758/68e055d4-d7b8-4943-992f-761558c64253" >
41
42
  </picture>
42
43
  </div>
@@ -47,8 +48,8 @@ Description-Content-Type: text/markdown
47
48
  <a href="https://docs.agenta.ai/">Documentation</a>
48
49
  </h4>
49
50
  <div align="center">
50
- <strong>Quickly iterate, debug, and evaluate your LLM apps</strong><br />
51
- The open-source LLMOps platform for prompt-engineering, evaluation, human feedback, and deployment of complex LLM apps.
51
+ <strong> <h2> Collaborate on prompts, evaluate, and deploy LLM applications with confidence </h2></strong>
52
+ The open-source LLM developer platform for prompt-engineering, evaluation, human feedback, and deployment of complex LLM apps.
52
53
  </div>
53
54
  </br>
54
55
  <p align="center">
@@ -67,7 +68,6 @@ Description-Content-Type: text/markdown
67
68
  <a href="https://pypi.org/project/agenta/">
68
69
  <img src="https://img.shields.io/pypi/dm/agenta" alt="PyPI - Downloads">
69
70
  </a>
70
-
71
71
  </br>
72
72
  </p>
73
73
 
@@ -89,7 +89,7 @@ Description-Content-Type: text/markdown
89
89
 
90
90
  <a href="https://cloud.agenta.ai">
91
91
  <picture>
92
- <img src="https://github.com/Agenta-AI/agenta/assets/4510758/a3024fac-2945-4208-ae12-4cc51ecfc970" />
92
+ <img src="https://github.com/Agenta-AI/agenta/assets/4510758/d622f268-c295-42d4-a43c-548b611ff6f2" />
93
93
  </picture>
94
94
  </a>
95
95
 
@@ -99,8 +99,8 @@ Description-Content-Type: text/markdown
99
99
  <br />
100
100
  <div align="center" >
101
101
  <picture >
102
- <source media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cf6d4713-4558-4c6c-9e1b-ee4eab261f4c" >
103
- <source media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
102
+ <source width="800" media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cf6d4713-4558-4c6c-9e1b-ee4eab261f4c" >
103
+ <source width="800" media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
104
104
  <img alt="Mockup agenta" src="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
105
105
  </picture>
106
106
  </div>
@@ -112,126 +112,75 @@ Description-Content-Type: text/markdown
112
112
  ---
113
113
 
114
114
  <h3 align="center">
115
- <a href="#ℹ️-about"><b>About</b></a> &bull;
116
115
  <a href="#quick-start"><b>Quick Start</b></a> &bull;
117
- <a href="https://docs.agenta.ai/installation"><b>Installation</b></a> &bull;
118
116
  <a href="#features"><b>Features</b></a> &bull;
119
117
  <a href="https://docs.agenta.ai"><b>Documentation</b></a> &bull;
120
118
  <a href="#enterprise-support"><b>Enterprise</b></a> &bull;
121
- <a href="https://join.slack.com/t/agenta-hq/shared_invite/zt-1zsafop5i-Y7~ZySbhRZvKVPV5DO_7IA"><b>Community</b></a> &bull;
119
+ <a href="https://github.com/orgs/Agenta-AI/projects/13"><b>Roadmap</b></a> &bull;
120
+ <a href="https://join.slack.com/t/agenta-hq/shared_invite/zt-1zsafop5i-Y7~ZySbhRZvKVPV5DO_7IA"><b>Join Our Slack</b></a> &bull;
122
121
  <a href="#contributing"><b>Contributing</b></a>
123
122
  </h3>
124
123
 
125
124
  ---
126
125
 
127
- # ℹ️ About
126
+ # ⭐️ Why Agenta?
127
+
128
+ Agenta is an end-to-end LLM developer platform. It provides the tools for **prompt engineering and management**, ⚖️ **evaluation**, **human annotation**, and :rocket: **deployment**. All without imposing any restrictions on your choice of framework, library, or model.
129
+
130
+ Agenta allows developers and product teams to collaborate in building production-grade LLM-powered applications in less time.
131
+
132
+ ### With Agenta, you can:
128
133
 
129
- Agenta is an end-to-end LLMOps platform. It provides the tools for **prompt engineering and management**, ⚖️ **evaluation**, and :rocket: **deployment**. All without imposing any restrictions on your choice of framework, library, or model.
134
+ - [🧪 **Experiment** and **compare** prompts](https://docs.agenta.ai/basic_guides/prompt_engineering) on [any LLM workflow](https://docs.agenta.ai/advanced_guides/custom_applications) (chain-of-prompts, Retrieval Augmented Generation (RAG), LLM agents...)
135
+ - ✍️ Collect and [**annotate golden test sets**](https://docs.agenta.ai/basic_guides/test_sets) for evaluation
136
+ - 📈 [**Evaluate** your application](https://docs.agenta.ai/basic_guides/automatic_evaluation) with pre-existing or [**custom evaluators**](https://docs.agenta.ai/advanced_guides/using_custom_evaluators)
137
+ - [🔍 **Annotate** and **A/B test**](https://docs.agenta.aibasic_guides/human_evaluation) your applications with **human feedback**
138
+ - [🤝 **Collaborate with product teams**](https://docs.agenta.ai/basic_guides/team_management) for prompt engineering and evaluation
139
+ - [🚀 **Deploy your application**](https://docs.agenta.ai/basic_guides/deployment) in one-click in the UI, through CLI, or through github workflows.
130
140
 
131
- Agenta allows developers and product teams to collaborate and build robust AI applications in less time.
141
+ ### Works with any LLM app workflow
132
142
 
133
- ## 🔨 How does it work?
143
+ Agenta enables prompt engineering and evaluation on any LLM app architecture:
144
+ - Chain of prompts
145
+ - RAG
146
+ - Agents
147
+ - ...
134
148
 
135
- | Using an LLM App Template (For Non-Technical Users) | Starting from Code |
136
- | ------------- | ------------- |
137
- |1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |
149
+ It works with any framework such as [Langchain](https://langchain.com), [LlamaIndex](https://www.llamaindex.ai/) and any LLM provider (openAI, Cohere, Mistral).
138
150
 
139
- <br /><br />
151
+ [Jump here to see how to use your own custom application with agenta](/advanced_guides/custom_applications)
140
152
 
141
153
  # Quick Start
142
154
 
143
- ### [Try the cloud version](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)
144
- ### [Create your first application in one-minute](https://docs.agenta.ai/getting_started/getting-started-ui)
145
- ### [Create an application using Langchain](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain)
155
+ ### [Get started for free](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)
156
+ ### [Explore the Docs](https://docs.agenta.ai)
157
+ ### [Create your first application in one-minute](https://docs.agenta.ai/quickstart/getting-started-ui)
158
+ ### [Create an application using Langchain](https://docs.agenta.ai/tutorials/first-app-with-langchain)
146
159
  ### [Self-host agenta](https://docs.agenta.ai/self-host/host-locally)
147
- ### [Read the Documentation](https://docs.agenta.ai)
148
160
  ### [Check the Cookbook](https://docs.agenta.ai/cookbook)
149
161
 
150
162
  # Features
151
163
 
152
- <h3>Playground 🪄 <br/></h3>
153
- With just a few lines of code, define the parameters and prompts you wish to experiment with. You and your team can quickly experiment and test new variants on the web UI. <br/>
154
-
155
- https://github.com/Agenta-AI/agenta/assets/4510758/8b736d2b-7c61-414c-b534-d95efc69134c
156
-
157
- <h3>Version Evaluation 📊 <br/></h3>
158
- Define test sets, then evaluate manually or programmatically your different variants.<br/>
159
-
160
- ![](https://github.com/Agenta-AI/agenta/assets/4510758/b1de455d-7e0a-48d6-8497-39ba641600f0)
161
-
162
-
163
- <h3>API Deployment 🚀<br/></h3>
164
- When you are ready, deploy your LLM applications as APIs in one click.<br/>
165
-
166
- ![](https://github.com/Agenta-AI/agenta/blob/main/docs/images/endpoint.gif)
167
-
168
- ## Why choose Agenta for building LLM-apps?
169
-
170
- - 🔨 **Build quickly**: You need to iterate many times on different architectures and prompts to bring apps to production. We streamline this process and allow you to do this in days instead of weeks.
171
- - 🏗️ **Build robust apps and reduce hallucination**: We provide you with the tools to systematically and easily evaluate your application to make sure you only serve robust apps to production.
172
- - 👨‍💻 **Developer-centric**: We cater to complex LLM-apps and pipelines that require more than one simple prompt. We allow you to experiment and iterate on apps that have complex integration, business logic, and many prompts.
173
- - 🌐 **Solution-Agnostic**: You have the freedom to use any libraries and models, be it Langchain, llma_index, or a custom-written alternative.
174
- - 🔒 **Privacy-First**: We respect your privacy and do not proxy your data through third-party services. The platform and the data are hosted on your infrastructure.
175
-
176
- ## How Agenta works:
177
-
178
- **1. Write your LLM-app code**
179
-
180
- Write the code using any framework, library, or model you want. Add the `agenta.post` decorator and put the inputs and parameters in the function call just like in this example:
181
-
182
- _Example simple application that generates baby names:_
183
-
184
- ```python
185
- import agenta as ag
186
- from langchain.chains import LLMChain
187
- from langchain.llms import OpenAI
188
- from langchain.prompts import PromptTemplate
189
-
190
- default_prompt = "Give me five cool names for a baby from {country} with this gender {gender}!!!!"
191
- ag.init()
192
- ag.config(prompt_template=ag.TextParam(default_prompt),
193
- temperature=ag.FloatParam(0.9))
194
-
195
- @ag.entrypoint
196
- def generate(
197
- country: str,
198
- gender: str,
199
- ) -> str:
200
- llm = OpenAI(temperature=ag.config.temperature)
201
- prompt = PromptTemplate(
202
- input_variables=["country", "gender"],
203
- template=ag.config.prompt_template,
204
- )
205
- chain = LLMChain(llm=llm, prompt=prompt)
206
- output = chain.run(country=country, gender=gender)
207
-
208
- return output
209
- ```
210
-
211
- **2.Deploy your app using the Agenta CLI**
212
-
213
- <img width="650" alt="Screenshot 2023-06-19 at 15 58 34" src="https://github.com/Agenta-AI/agenta/assets/4510758/eede3e78-0fe1-42a0-ad4e-d880ddb10bf0">
214
-
215
- **3. Go to agenta at http://localhost**
216
-
217
- Now your team can 🔄 iterate, 🧪 experiment, and ⚖️ evaluate different versions of your app (with your code!) in the web platform.</summary>
218
- <br/>
219
-
220
- <img width="900" alt="Screenshot 2023-06-25 at 21 08 53" src="https://github.com/Agenta-AI/agenta/assets/57623556/7e07a988-a36a-4fb5-99dd-9cc13a678434">
221
164
 
165
+ | Playground | Evaluation |
166
+ | ------- | ------- |
167
+ | Compare and version prompts for any LLM app, from single prompt to agents. <br/> <video src="https://github.com/Agenta-AI/agenta/assets/4510758/8b736d2b-7c61-414c-b534-d95efc69134c" controls="controls" style="max-width:100%;"> | Define test sets, then evaluate manually or programmatically your different variants.<br/> <video src="https://github.com/Agenta-AI/agenta/assets/4510758/8c6997c6-da87-46ad-a81f-e15e277263d2" controls="controls" style="max-width:100%;">|
168
+ | Human annotation | Deployment |
169
+ | Use Human annotator to A/B test and score your LLM apps. <br/> <img width="750" alt="Screenshot 2024-01-28 at 12 57 46" src="https://github.com/Agenta-AI/agenta/assets/4510758/bf62a697-bf19-4ba9-850e-742fbfb75424"> | When you are ready, deploy your LLM applications as APIs in one click.<br/>![](https://github.com/Agenta-AI/agenta/blob/main/docs/images/endpoint.gif) |
222
170
 
223
171
  # Enterprise Support
224
172
  Contact us here for enterprise support and early access to agenta self-managed enterprise with Kubernetes support. <br/><br/>
225
173
  <a href="https://cal.com/mahmoud-mabrouk-ogzgey/demo"><img src="https://cal.com/book-with-cal-dark.svg" alt="Book us"></a>
226
174
 
227
175
  # Disabling Anonymized Tracking
176
+ By default, Agenta automatically reports anonymized basic usage statistics. This helps us understand how Agenta is used and track its overall usage and growth. This data does not include any sensitive information.
228
177
 
229
- To disable anonymized telemetry, set the following environment variable:
178
+ To disable anonymized telemetry, follow these steps:
230
179
 
231
180
  - For web: Set `TELEMETRY_TRACKING_ENABLED` to `false` in your `agenta-web/.env` file.
232
181
  - For CLI: Set `telemetry_tracking_enabled` to `false` in your `~/.agenta/config.toml` file.
233
182
 
234
- After making this change, restart agenta compose.
183
+ After making this change, restart Agenta Compose.
235
184
 
236
185
  # Contributing
237
186
 
@@ -244,7 +193,7 @@ Check out our [Contributing Guide](https://docs.agenta.ai/contributing/getting-s
244
193
  ## Contributors ✨
245
194
 
246
195
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
247
- [![All Contributors](https://img.shields.io/badge/all_contributors-39-orange.svg?style=flat-square)](#contributors-)
196
+ [![All Contributors](https://img.shields.io/badge/all_contributors-41-orange.svg?style=flat-square)](#contributors-)
248
197
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
249
198
 
250
199
  Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
@@ -304,6 +253,8 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
304
253
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/brockWith"><img src="https://avatars.githubusercontent.com/u/105627491?v=4?s=100" width="100px;" alt="brockWith"/><br /><sub><b>brockWith</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=brockWith" title="Code">💻</a></td>
305
254
  <td align="center" valign="top" width="14.28%"><a href="http://denniszelada.wordpress.com/"><img src="https://avatars.githubusercontent.com/u/219311?v=4?s=100" width="100px;" alt="Dennis Zelada"/><br /><sub><b>Dennis Zelada</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=denniszelada" title="Code">💻</a></td>
306
255
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/romainrbr"><img src="https://avatars.githubusercontent.com/u/10381609?v=4?s=100" width="100px;" alt="Romain Brucker"/><br /><sub><b>Romain Brucker</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=romainrbr" title="Code">💻</a></td>
256
+ <td align="center" valign="top" width="14.28%"><a href="http://heonheo.com"><img src="https://avatars.githubusercontent.com/u/76820291?v=4?s=100" width="100px;" alt="Heon Heo"/><br /><sub><b>Heon Heo</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=HeonHeo23" title="Code">💻</a></td>
257
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/Drewski2222"><img src="https://avatars.githubusercontent.com/u/39228951?v=4?s=100" width="100px;" alt="Drew Reisner"/><br /><sub><b>Drew Reisner</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=Drewski2222" title="Code">💻</a></td>
307
258
  </tr>
308
259
  </tbody>
309
260
  </table>