pygeai 0.6.0b7__py3-none-any.whl → 0.6.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygeai/_docs/source/conf.py +78 -6
- pygeai/_docs/source/content/api_reference/embeddings.rst +31 -1
- pygeai/_docs/source/content/api_reference/evaluation.rst +590 -0
- pygeai/_docs/source/content/api_reference/feedback.rst +237 -0
- pygeai/_docs/source/content/api_reference/files.rst +592 -0
- pygeai/_docs/source/content/api_reference/gam.rst +401 -0
- pygeai/_docs/source/content/api_reference/proxy.rst +318 -0
- pygeai/_docs/source/content/api_reference/secrets.rst +495 -0
- pygeai/_docs/source/content/api_reference/usage_limits.rst +390 -0
- pygeai/_docs/source/content/api_reference.rst +7 -0
- pygeai/_docs/source/content/debugger.rst +376 -83
- pygeai/_docs/source/content/migration.rst +528 -0
- pygeai/_docs/source/content/modules.rst +1 -1
- pygeai/_docs/source/pygeai.cli.rst +8 -0
- pygeai/_docs/source/pygeai.tests.cli.rst +16 -0
- pygeai/_docs/source/pygeai.tests.core.embeddings.rst +16 -0
- pygeai/_docs/source/pygeai.tests.snippets.chat.rst +40 -0
- pygeai/_docs/source/pygeai.tests.snippets.dbg.rst +45 -0
- pygeai/_docs/source/pygeai.tests.snippets.embeddings.rst +40 -0
- pygeai/_docs/source/pygeai.tests.snippets.evaluation.dataset.rst +197 -0
- pygeai/_docs/source/pygeai.tests.snippets.evaluation.plan.rst +133 -0
- pygeai/_docs/source/pygeai.tests.snippets.evaluation.result.rst +37 -0
- pygeai/_docs/source/pygeai.tests.snippets.evaluation.rst +10 -0
- pygeai/_docs/source/pygeai.tests.snippets.rst +1 -0
- pygeai/admin/clients.py +5 -0
- pygeai/assistant/clients.py +7 -0
- pygeai/assistant/data_analyst/clients.py +2 -0
- pygeai/assistant/rag/clients.py +11 -0
- pygeai/chat/clients.py +191 -25
- pygeai/chat/endpoints.py +2 -1
- pygeai/cli/commands/chat.py +227 -1
- pygeai/cli/commands/embeddings.py +56 -8
- pygeai/cli/commands/migrate.py +994 -434
- pygeai/cli/error_handler.py +116 -0
- pygeai/cli/geai.py +28 -10
- pygeai/cli/parsers.py +8 -2
- pygeai/core/base/clients.py +3 -1
- pygeai/core/common/exceptions.py +11 -10
- pygeai/core/embeddings/__init__.py +19 -0
- pygeai/core/embeddings/clients.py +17 -2
- pygeai/core/embeddings/mappers.py +16 -2
- pygeai/core/embeddings/responses.py +9 -2
- pygeai/core/feedback/clients.py +1 -0
- pygeai/core/files/clients.py +5 -7
- pygeai/core/files/managers.py +42 -0
- pygeai/core/llm/clients.py +4 -0
- pygeai/core/plugins/clients.py +1 -0
- pygeai/core/rerank/clients.py +1 -0
- pygeai/core/secrets/clients.py +6 -0
- pygeai/core/services/rest.py +1 -1
- pygeai/dbg/__init__.py +3 -0
- pygeai/dbg/debugger.py +565 -70
- pygeai/evaluation/clients.py +1 -1
- pygeai/evaluation/dataset/clients.py +45 -44
- pygeai/evaluation/plan/clients.py +27 -26
- pygeai/evaluation/result/clients.py +37 -5
- pygeai/gam/clients.py +4 -0
- pygeai/health/clients.py +1 -0
- pygeai/lab/agents/clients.py +8 -1
- pygeai/lab/models.py +3 -3
- pygeai/lab/processes/clients.py +21 -0
- pygeai/lab/strategies/clients.py +4 -0
- pygeai/lab/tools/clients.py +1 -0
- pygeai/migration/__init__.py +31 -0
- pygeai/migration/strategies.py +404 -155
- pygeai/migration/tools.py +170 -3
- pygeai/organization/clients.py +13 -0
- pygeai/organization/limits/clients.py +15 -0
- pygeai/proxy/clients.py +3 -1
- pygeai/tests/admin/test_clients.py +16 -11
- pygeai/tests/assistants/rag/test_clients.py +35 -23
- pygeai/tests/assistants/test_clients.py +22 -15
- pygeai/tests/auth/test_clients.py +14 -6
- pygeai/tests/chat/test_clients.py +211 -1
- pygeai/tests/cli/commands/test_embeddings.py +32 -9
- pygeai/tests/cli/commands/test_evaluation.py +7 -0
- pygeai/tests/cli/commands/test_migrate.py +112 -243
- pygeai/tests/cli/test_error_handler.py +225 -0
- pygeai/tests/cli/test_geai_driver.py +154 -0
- pygeai/tests/cli/test_parsers.py +5 -5
- pygeai/tests/core/embeddings/test_clients.py +144 -0
- pygeai/tests/core/embeddings/test_managers.py +171 -0
- pygeai/tests/core/embeddings/test_mappers.py +142 -0
- pygeai/tests/core/feedback/test_clients.py +2 -0
- pygeai/tests/core/files/test_clients.py +1 -0
- pygeai/tests/core/llm/test_clients.py +14 -9
- pygeai/tests/core/plugins/test_clients.py +5 -3
- pygeai/tests/core/rerank/test_clients.py +1 -0
- pygeai/tests/core/secrets/test_clients.py +19 -13
- pygeai/tests/dbg/test_debugger.py +453 -75
- pygeai/tests/evaluation/dataset/test_clients.py +3 -1
- pygeai/tests/evaluation/plan/test_clients.py +4 -2
- pygeai/tests/evaluation/result/test_clients.py +7 -5
- pygeai/tests/gam/test_clients.py +1 -1
- pygeai/tests/health/test_clients.py +1 -0
- pygeai/tests/lab/agents/test_clients.py +9 -0
- pygeai/tests/lab/processes/test_clients.py +36 -0
- pygeai/tests/lab/processes/test_mappers.py +3 -0
- pygeai/tests/lab/strategies/test_clients.py +14 -9
- pygeai/tests/migration/test_strategies.py +45 -218
- pygeai/tests/migration/test_tools.py +133 -9
- pygeai/tests/organization/limits/test_clients.py +17 -0
- pygeai/tests/organization/test_clients.py +22 -0
- pygeai/tests/proxy/test_clients.py +2 -0
- pygeai/tests/proxy/test_integration.py +1 -0
- pygeai/tests/snippets/chat/chat_completion_with_reasoning_effort.py +18 -0
- pygeai/tests/snippets/chat/get_response.py +15 -0
- pygeai/tests/snippets/chat/get_response_streaming.py +20 -0
- pygeai/tests/snippets/chat/get_response_with_files.py +16 -0
- pygeai/tests/snippets/chat/get_response_with_tools.py +36 -0
- pygeai/tests/snippets/dbg/__init__.py +0 -0
- pygeai/tests/snippets/dbg/basic_debugging.py +32 -0
- pygeai/tests/snippets/dbg/breakpoint_management.py +48 -0
- pygeai/tests/snippets/dbg/stack_navigation.py +45 -0
- pygeai/tests/snippets/dbg/stepping_example.py +40 -0
- pygeai/tests/snippets/embeddings/cache_example.py +31 -0
- pygeai/tests/snippets/embeddings/cohere_example.py +41 -0
- pygeai/tests/snippets/embeddings/openai_base64_example.py +27 -0
- pygeai/tests/snippets/embeddings/openai_example.py +30 -0
- pygeai/tests/snippets/embeddings/similarity_example.py +42 -0
- pygeai/tests/snippets/evaluation/dataset/__init__.py +0 -0
- pygeai/tests/snippets/evaluation/dataset/complete_workflow_example.py +195 -0
- pygeai/tests/snippets/evaluation/dataset/create_dataset.py +26 -0
- pygeai/tests/snippets/evaluation/dataset/create_dataset_from_file.py +11 -0
- pygeai/tests/snippets/evaluation/dataset/create_dataset_row.py +17 -0
- pygeai/tests/snippets/evaluation/dataset/create_expected_source.py +18 -0
- pygeai/tests/snippets/evaluation/dataset/create_filter_variable.py +19 -0
- pygeai/tests/snippets/evaluation/dataset/delete_dataset.py +9 -0
- pygeai/tests/snippets/evaluation/dataset/delete_dataset_row.py +10 -0
- pygeai/tests/snippets/evaluation/dataset/delete_expected_source.py +15 -0
- pygeai/tests/snippets/evaluation/dataset/delete_filter_variable.py +15 -0
- pygeai/tests/snippets/evaluation/dataset/get_dataset.py +9 -0
- pygeai/tests/snippets/evaluation/dataset/get_dataset_row.py +10 -0
- pygeai/tests/snippets/evaluation/dataset/get_expected_source.py +15 -0
- pygeai/tests/snippets/evaluation/dataset/get_filter_variable.py +15 -0
- pygeai/tests/snippets/evaluation/dataset/list_dataset_rows.py +9 -0
- pygeai/tests/snippets/evaluation/dataset/list_datasets.py +6 -0
- pygeai/tests/snippets/evaluation/dataset/list_expected_sources.py +10 -0
- pygeai/tests/snippets/evaluation/dataset/list_filter_variables.py +10 -0
- pygeai/tests/snippets/evaluation/dataset/update_dataset.py +15 -0
- pygeai/tests/snippets/evaluation/dataset/update_dataset_row.py +20 -0
- pygeai/tests/snippets/evaluation/dataset/update_expected_source.py +18 -0
- pygeai/tests/snippets/evaluation/dataset/update_filter_variable.py +19 -0
- pygeai/tests/snippets/evaluation/dataset/upload_dataset_rows_file.py +10 -0
- pygeai/tests/snippets/evaluation/plan/__init__.py +0 -0
- pygeai/tests/snippets/evaluation/plan/add_plan_system_metric.py +13 -0
- pygeai/tests/snippets/evaluation/plan/complete_workflow_example.py +136 -0
- pygeai/tests/snippets/evaluation/plan/create_evaluation_plan.py +24 -0
- pygeai/tests/snippets/evaluation/plan/create_rag_evaluation_plan.py +22 -0
- pygeai/tests/snippets/evaluation/plan/delete_evaluation_plan.py +9 -0
- pygeai/tests/snippets/evaluation/plan/delete_plan_system_metric.py +13 -0
- pygeai/tests/snippets/evaluation/plan/execute_evaluation_plan.py +11 -0
- pygeai/tests/snippets/evaluation/plan/get_evaluation_plan.py +9 -0
- pygeai/tests/snippets/evaluation/plan/get_plan_system_metric.py +13 -0
- pygeai/tests/snippets/evaluation/plan/get_system_metric.py +9 -0
- pygeai/tests/snippets/evaluation/plan/list_evaluation_plans.py +7 -0
- pygeai/tests/snippets/evaluation/plan/list_plan_system_metrics.py +9 -0
- pygeai/tests/snippets/evaluation/plan/list_system_metrics.py +7 -0
- pygeai/tests/snippets/evaluation/plan/update_evaluation_plan.py +22 -0
- pygeai/tests/snippets/evaluation/plan/update_plan_system_metric.py +14 -0
- pygeai/tests/snippets/evaluation/result/__init__.py +0 -0
- pygeai/tests/snippets/evaluation/result/complete_workflow_example.py +150 -0
- pygeai/tests/snippets/evaluation/result/get_evaluation_result.py +26 -0
- pygeai/tests/snippets/evaluation/result/list_evaluation_results.py +17 -0
- pygeai/tests/snippets/migrate/__init__.py +45 -0
- pygeai/tests/snippets/migrate/agent_migration.py +110 -0
- pygeai/tests/snippets/migrate/assistant_migration.py +64 -0
- pygeai/tests/snippets/migrate/orchestrator_examples.py +179 -0
- pygeai/tests/snippets/migrate/process_migration.py +64 -0
- pygeai/tests/snippets/migrate/project_migration.py +42 -0
- pygeai/tests/snippets/migrate/tool_migration.py +64 -0
- pygeai/tests/snippets/organization/create_project.py +2 -2
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/METADATA +1 -1
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/RECORD +178 -96
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/WHEEL +0 -0
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/entry_points.txt +0 -0
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/licenses/LICENSE +0 -0
- {pygeai-0.6.0b7.dist-info → pygeai-0.6.0b10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
row_id = "your-row-id-here"
|
|
7
|
+
|
|
8
|
+
response = client.list_expected_sources(dataset_id=dataset_id, dataset_row_id=row_id)
|
|
9
|
+
|
|
10
|
+
print(f"Expected sources: {response}")
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
row_id = "your-row-id-here"
|
|
7
|
+
|
|
8
|
+
response = client.list_filter_variables(dataset_id=dataset_id, dataset_row_id=row_id)
|
|
9
|
+
|
|
10
|
+
print(f"Filter variables: {response}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
|
|
7
|
+
response = client.update_dataset(
|
|
8
|
+
dataset_id=dataset_id,
|
|
9
|
+
dataset_name="Updated Dataset Name",
|
|
10
|
+
dataset_description="Updated description",
|
|
11
|
+
dataset_type="E",
|
|
12
|
+
dataset_active=False
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
print(f"Updated dataset: {response}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
row_id = "your-row-id-here"
|
|
7
|
+
|
|
8
|
+
updated_row = {
|
|
9
|
+
"dataSetRowInput": "Updated question: What is Machine Learning?",
|
|
10
|
+
"dataSetRowExpectedAnswer": "Updated answer: ML is a subset of AI",
|
|
11
|
+
"dataSetRowContextDocument": "Updated context about ML"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
response = client.update_dataset_row(
|
|
15
|
+
dataset_id=dataset_id,
|
|
16
|
+
dataset_row_id=row_id,
|
|
17
|
+
row=updated_row
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
print(f"Updated row: {response}")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
row_id = "your-row-id-here"
|
|
7
|
+
source_id = "your-expected-source-id-here"
|
|
8
|
+
|
|
9
|
+
response = client.update_expected_source(
|
|
10
|
+
dataset_id=dataset_id,
|
|
11
|
+
dataset_row_id=row_id,
|
|
12
|
+
expected_source_id=source_id,
|
|
13
|
+
expected_source_name="Updated Documentation",
|
|
14
|
+
expected_source_value="Updated content from the source...",
|
|
15
|
+
expected_source_extension="txt"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
print(f"Updated expected source: {response}")
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
row_id = "your-row-id-here"
|
|
7
|
+
filter_var_id = "your-filter-variable-id-here"
|
|
8
|
+
|
|
9
|
+
response = client.update_filter_variable(
|
|
10
|
+
dataset_id=dataset_id,
|
|
11
|
+
dataset_row_id=row_id,
|
|
12
|
+
filter_variable_id=filter_var_id,
|
|
13
|
+
metadata_type="V",
|
|
14
|
+
filter_variable_key="category",
|
|
15
|
+
filter_variable_value="technology",
|
|
16
|
+
filter_variable_operator="="
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
print(f"Updated filter variable: {response}")
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from pygeai.evaluation.dataset.clients import EvaluationDatasetClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationDatasetClient()
|
|
4
|
+
|
|
5
|
+
dataset_id = "your-dataset-id-here"
|
|
6
|
+
file_path = "path/to/rows.json"
|
|
7
|
+
|
|
8
|
+
response = client.upload_dataset_rows_file(dataset_id=dataset_id, file_path=file_path)
|
|
9
|
+
|
|
10
|
+
print(f"Uploaded rows from file: {response}")
|
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.add_evaluation_plan_system_metric(
|
|
8
|
+
evaluation_plan_id=evaluation_plan_id,
|
|
9
|
+
system_metric_id="metric-id",
|
|
10
|
+
system_metric_weight=0.5
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
print(f"Added system metric: {response}")
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Complete Evaluation Plan API Workflow Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates a full workflow using the Evaluation Plan API:
|
|
5
|
+
1. Query available system metrics
|
|
6
|
+
2. Create an evaluation plan
|
|
7
|
+
3. Add system metrics
|
|
8
|
+
4. Update the plan
|
|
9
|
+
5. Execute the plan
|
|
10
|
+
6. Cleanup
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main():
|
|
17
|
+
# Initialize client
|
|
18
|
+
client = EvaluationPlanClient()
|
|
19
|
+
|
|
20
|
+
print("="*60)
|
|
21
|
+
print("Evaluation Plan API Complete Workflow Example")
|
|
22
|
+
print("="*60)
|
|
23
|
+
|
|
24
|
+
# Step 1: List available system metrics
|
|
25
|
+
print("\n[1/7] Listing available system metrics...")
|
|
26
|
+
metrics = client.list_system_metrics()
|
|
27
|
+
print(f"✓ Available metrics: {metrics}")
|
|
28
|
+
|
|
29
|
+
# Get details of a specific metric
|
|
30
|
+
if metrics and 'systemMetrics' in metrics and len(metrics['systemMetrics']) > 0:
|
|
31
|
+
first_metric_id = metrics['systemMetrics'][0].get('systemMetricId')
|
|
32
|
+
metric_details = client.get_system_metric(system_metric_id=first_metric_id)
|
|
33
|
+
print(f"✓ Sample metric details: {metric_details}")
|
|
34
|
+
|
|
35
|
+
# Step 2: Create an evaluation plan
|
|
36
|
+
print("\n[2/7] Creating evaluation plan...")
|
|
37
|
+
plan = client.create_evaluation_plan(
|
|
38
|
+
name="AI Assistant Performance Test",
|
|
39
|
+
type="TextPromptAssistant",
|
|
40
|
+
assistant_id="your-assistant-id",
|
|
41
|
+
assistant_name="Test Assistant",
|
|
42
|
+
assistant_revision="1.0",
|
|
43
|
+
dataset_id="your-dataset-id",
|
|
44
|
+
system_metrics=[
|
|
45
|
+
{
|
|
46
|
+
"systemMetricId": "accuracy",
|
|
47
|
+
"systemMetricWeight": 0.6
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"systemMetricId": "fluency",
|
|
51
|
+
"systemMetricWeight": 0.4
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
)
|
|
55
|
+
plan_id = plan.get('evaluationPlanId')
|
|
56
|
+
print(f"✓ Created evaluation plan: {plan_id}")
|
|
57
|
+
|
|
58
|
+
# Step 3: Get the plan details
|
|
59
|
+
print("\n[3/7] Retrieving plan details...")
|
|
60
|
+
plan_details = client.get_evaluation_plan(evaluation_plan_id=plan_id)
|
|
61
|
+
print(f"✓ Plan name: {plan_details.get('evaluationPlanName')}")
|
|
62
|
+
print(f"✓ Plan type: {plan_details.get('evaluationPlanType')}")
|
|
63
|
+
print(f"✓ Number of metrics: {len(plan_details.get('systemMetrics', []))}")
|
|
64
|
+
|
|
65
|
+
# Step 4: List plan's system metrics
|
|
66
|
+
print("\n[4/7] Listing plan's system metrics...")
|
|
67
|
+
plan_metrics = client.list_evaluation_plan_system_metrics(evaluation_plan_id=plan_id)
|
|
68
|
+
print(f"✓ Plan metrics: {plan_metrics}")
|
|
69
|
+
|
|
70
|
+
# Step 5: Add a new metric to the plan
|
|
71
|
+
print("\n[5/7] Adding new system metric to plan...")
|
|
72
|
+
new_metric = client.add_evaluation_plan_system_metric(
|
|
73
|
+
evaluation_plan_id=plan_id,
|
|
74
|
+
system_metric_id="relevance",
|
|
75
|
+
system_metric_weight=0.5
|
|
76
|
+
)
|
|
77
|
+
print(f"✓ Added metric: {new_metric}")
|
|
78
|
+
|
|
79
|
+
# Step 6: Update a metric's weight
|
|
80
|
+
print("\n[6/7] Updating metric weight...")
|
|
81
|
+
updated_metric = client.update_evaluation_plan_system_metric(
|
|
82
|
+
evaluation_plan_id=plan_id,
|
|
83
|
+
system_metric_id="accuracy",
|
|
84
|
+
system_metric_weight=0.8
|
|
85
|
+
)
|
|
86
|
+
print(f"✓ Updated metric weight")
|
|
87
|
+
|
|
88
|
+
# Get specific metric details
|
|
89
|
+
metric_detail = client.get_evaluation_plan_system_metric(
|
|
90
|
+
evaluation_plan_id=plan_id,
|
|
91
|
+
system_metric_id="accuracy"
|
|
92
|
+
)
|
|
93
|
+
print(f"✓ Metric details: {metric_detail}")
|
|
94
|
+
|
|
95
|
+
# Step 7: Update the plan itself
|
|
96
|
+
print("\n[7/7] Updating evaluation plan...")
|
|
97
|
+
updated_plan = client.update_evaluation_plan(
|
|
98
|
+
evaluation_plan_id=plan_id,
|
|
99
|
+
name="Updated Performance Test",
|
|
100
|
+
assistant_revision="2.0"
|
|
101
|
+
)
|
|
102
|
+
print(f"✓ Updated plan")
|
|
103
|
+
|
|
104
|
+
# Execute the evaluation plan
|
|
105
|
+
print("\n[EXECUTE] Running evaluation plan...")
|
|
106
|
+
execution = client.execute_evaluation_plan(evaluation_plan_id=plan_id)
|
|
107
|
+
print(f"✓ Execution started")
|
|
108
|
+
print(f" Execution ID: {execution.get('executionId')}")
|
|
109
|
+
print(f" Status: {execution.get('status')}")
|
|
110
|
+
|
|
111
|
+
# Cleanup
|
|
112
|
+
print("\n[CLEANUP] Cleaning up...")
|
|
113
|
+
|
|
114
|
+
# Delete metrics
|
|
115
|
+
client.delete_evaluation_plan_system_metric(
|
|
116
|
+
evaluation_plan_id=plan_id,
|
|
117
|
+
system_metric_id="relevance"
|
|
118
|
+
)
|
|
119
|
+
print(f"✓ Deleted added metric")
|
|
120
|
+
|
|
121
|
+
# Delete the plan
|
|
122
|
+
client.delete_evaluation_plan(evaluation_plan_id=plan_id)
|
|
123
|
+
print(f"✓ Deleted evaluation plan")
|
|
124
|
+
|
|
125
|
+
print("\n" + "="*60)
|
|
126
|
+
print("Workflow completed successfully!")
|
|
127
|
+
print("="*60)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
try:
|
|
132
|
+
main()
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f"\n❌ Error occurred: {e}")
|
|
135
|
+
import traceback
|
|
136
|
+
traceback.print_exc()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
response = client.create_evaluation_plan(
|
|
6
|
+
name="My Evaluation Plan",
|
|
7
|
+
type="TextPromptAssistant",
|
|
8
|
+
assistant_id="your-assistant-id",
|
|
9
|
+
assistant_name="My Assistant",
|
|
10
|
+
assistant_revision="1.0",
|
|
11
|
+
dataset_id="your-dataset-id",
|
|
12
|
+
system_metrics=[
|
|
13
|
+
{
|
|
14
|
+
"systemMetricId": "metric-1",
|
|
15
|
+
"systemMetricWeight": 0.6
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"systemMetricId": "metric-2",
|
|
19
|
+
"systemMetricWeight": 0.4
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
print(f"Created evaluation plan: {response}")
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
response = client.create_evaluation_plan(
|
|
6
|
+
name="RAG Assistant Evaluation",
|
|
7
|
+
type="RAG Assistant",
|
|
8
|
+
profile_name="My RAG Profile",
|
|
9
|
+
dataset_id="your-dataset-id",
|
|
10
|
+
system_metrics=[
|
|
11
|
+
{
|
|
12
|
+
"systemMetricId": "accuracy",
|
|
13
|
+
"systemMetricWeight": 0.7
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"systemMetricId": "fluency",
|
|
17
|
+
"systemMetricWeight": 0.3
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
print(f"Created RAG evaluation plan: {response}")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.delete_evaluation_plan(evaluation_plan_id=evaluation_plan_id)
|
|
8
|
+
|
|
9
|
+
print(f"Deleted evaluation plan: {response}")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
system_metric_id = "your-metric-id"
|
|
7
|
+
|
|
8
|
+
response = client.delete_evaluation_plan_system_metric(
|
|
9
|
+
evaluation_plan_id=evaluation_plan_id,
|
|
10
|
+
system_metric_id=system_metric_id
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
print(f"Deleted system metric: {response}")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.execute_evaluation_plan(evaluation_plan_id=evaluation_plan_id)
|
|
8
|
+
|
|
9
|
+
print(f"Execution started: {response}")
|
|
10
|
+
print(f"Execution ID: {response.get('executionId')}")
|
|
11
|
+
print(f"Status: {response.get('status')}")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.get_evaluation_plan(evaluation_plan_id=evaluation_plan_id)
|
|
8
|
+
|
|
9
|
+
print(f"Evaluation plan: {response}")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
system_metric_id = "your-metric-id"
|
|
7
|
+
|
|
8
|
+
response = client.get_evaluation_plan_system_metric(
|
|
9
|
+
evaluation_plan_id=evaluation_plan_id,
|
|
10
|
+
system_metric_id=system_metric_id
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
print(f"Plan system metric: {response}")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.list_evaluation_plan_system_metrics(evaluation_plan_id=evaluation_plan_id)
|
|
8
|
+
|
|
9
|
+
print(f"Plan system metrics: {response}")
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
|
|
7
|
+
response = client.update_evaluation_plan(
|
|
8
|
+
evaluation_plan_id=evaluation_plan_id,
|
|
9
|
+
name="Updated Plan Name",
|
|
10
|
+
system_metrics=[
|
|
11
|
+
{
|
|
12
|
+
"systemMetricId": "metric-1",
|
|
13
|
+
"systemMetricWeight": 0.8
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"systemMetricId": "metric-2",
|
|
17
|
+
"systemMetricWeight": 0.2
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
print(f"Updated evaluation plan: {response}")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pygeai.evaluation.plan.clients import EvaluationPlanClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationPlanClient()
|
|
4
|
+
|
|
5
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
6
|
+
system_metric_id = "your-metric-id"
|
|
7
|
+
|
|
8
|
+
response = client.update_evaluation_plan_system_metric(
|
|
9
|
+
evaluation_plan_id=evaluation_plan_id,
|
|
10
|
+
system_metric_id=system_metric_id,
|
|
11
|
+
system_metric_weight=0.9
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
print(f"Updated system metric: {response}")
|
|
File without changes
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Complete Evaluation Result API Workflow Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates how to retrieve and analyze evaluation results:
|
|
5
|
+
1. List all results for a plan
|
|
6
|
+
2. Get detailed results for each
|
|
7
|
+
3. Analyze performance metrics
|
|
8
|
+
4. Extract insights
|
|
9
|
+
|
|
10
|
+
Note: This is a read-only API. Results are created by executing evaluation plans.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from pygeai.evaluation.result.clients import EvaluationResultClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main():
|
|
17
|
+
# Initialize client
|
|
18
|
+
client = EvaluationResultClient()
|
|
19
|
+
|
|
20
|
+
print("="*70)
|
|
21
|
+
print("Evaluation Result API Complete Workflow Example")
|
|
22
|
+
print("="*70)
|
|
23
|
+
|
|
24
|
+
# Step 1: List all results for an evaluation plan
|
|
25
|
+
print("\n[1/4] Listing evaluation results for a plan...")
|
|
26
|
+
evaluation_plan_id = "your-evaluation-plan-id"
|
|
27
|
+
|
|
28
|
+
results = client.list_evaluation_results(evaluation_plan_id=evaluation_plan_id)
|
|
29
|
+
|
|
30
|
+
if isinstance(results, list):
|
|
31
|
+
print(f"✓ Found {len(results)} evaluation results")
|
|
32
|
+
else:
|
|
33
|
+
print(f"✓ Results: {results}")
|
|
34
|
+
results = []
|
|
35
|
+
|
|
36
|
+
# Step 2: Display summary of all results
|
|
37
|
+
print("\n[2/4] Summary of all results:")
|
|
38
|
+
print("-" * 70)
|
|
39
|
+
|
|
40
|
+
total_cost = 0
|
|
41
|
+
total_duration = 0
|
|
42
|
+
status_counts = {}
|
|
43
|
+
|
|
44
|
+
for i, result in enumerate(results, 1):
|
|
45
|
+
result_id = result.get('evaluationResultId', 'Unknown')
|
|
46
|
+
status = result.get('evaluationResultStatus', 'Unknown')
|
|
47
|
+
cost = result.get('evaluationResultCost', 0)
|
|
48
|
+
duration = result.get('evaluationResultDuration', 0)
|
|
49
|
+
|
|
50
|
+
print(f"\n{i}. Result ID: {result_id}")
|
|
51
|
+
print(f" Status: {status}")
|
|
52
|
+
print(f" Cost: ${cost}")
|
|
53
|
+
print(f" Duration: {duration}ms")
|
|
54
|
+
print(f" Model: {result.get('evaluationResultModelName', 'N/A')}")
|
|
55
|
+
print(f" Provider: {result.get('evaluationResultProviderName', 'N/A')}")
|
|
56
|
+
|
|
57
|
+
# Aggregate metrics
|
|
58
|
+
total_cost += cost
|
|
59
|
+
total_duration += duration
|
|
60
|
+
status_counts[status] = status_counts.get(status, 0) + 1
|
|
61
|
+
|
|
62
|
+
# Step 3: Get detailed results for the first result
|
|
63
|
+
if results:
|
|
64
|
+
print("\n[3/4] Getting detailed results for first evaluation...")
|
|
65
|
+
first_result_id = results[0].get('evaluationResultId')
|
|
66
|
+
|
|
67
|
+
detailed_result = client.get_evaluation_result(
|
|
68
|
+
evaluation_result_id=first_result_id
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
print(f"✓ Retrieved detailed result: {detailed_result.get('evaluationResultId')}")
|
|
72
|
+
|
|
73
|
+
# Display row-level details
|
|
74
|
+
rows = detailed_result.get('rows', [])
|
|
75
|
+
print(f"\n Row-level results: {len(rows)} rows")
|
|
76
|
+
|
|
77
|
+
successful_rows = 0
|
|
78
|
+
failed_rows = 0
|
|
79
|
+
row_costs = []
|
|
80
|
+
|
|
81
|
+
for row in rows:
|
|
82
|
+
row_status = row.get('evaluationResultRowStatus', 'Unknown')
|
|
83
|
+
row_cost = row.get('evaluationResultRowCost', 0)
|
|
84
|
+
|
|
85
|
+
if row_status == 'completed' or row_status == 'success':
|
|
86
|
+
successful_rows += 1
|
|
87
|
+
else:
|
|
88
|
+
failed_rows += 1
|
|
89
|
+
|
|
90
|
+
row_costs.append(row_cost)
|
|
91
|
+
|
|
92
|
+
print(f" Successful rows: {successful_rows}")
|
|
93
|
+
print(f" Failed rows: {failed_rows}")
|
|
94
|
+
|
|
95
|
+
if row_costs:
|
|
96
|
+
avg_row_cost = sum(row_costs) / len(row_costs)
|
|
97
|
+
print(f" Average row cost: ${avg_row_cost:.4f}")
|
|
98
|
+
|
|
99
|
+
# Show sample row
|
|
100
|
+
if rows:
|
|
101
|
+
print(f"\n Sample Row:")
|
|
102
|
+
sample_row = rows[0]
|
|
103
|
+
print(f" Dataset Row ID: {sample_row.get('dataSetRowId')}")
|
|
104
|
+
print(f" Status: {sample_row.get('evaluationResultRowStatus')}")
|
|
105
|
+
print(f" Cost: ${sample_row.get('evaluationResultRowCost')}")
|
|
106
|
+
print(f" Start: {sample_row.get('evaluationResultRowStartDate')}")
|
|
107
|
+
print(f" End: {sample_row.get('evaluationResultRowEndDate')}")
|
|
108
|
+
|
|
109
|
+
output = sample_row.get('evaluationResultRowOutput', '')
|
|
110
|
+
if output:
|
|
111
|
+
print(f" Output (first 200 chars):")
|
|
112
|
+
print(f" {output[:200]}...")
|
|
113
|
+
|
|
114
|
+
# Step 4: Display aggregated analytics
|
|
115
|
+
print("\n[4/4] Aggregated Analytics:")
|
|
116
|
+
print("-" * 70)
|
|
117
|
+
print(f"Total Evaluations: {len(results)}")
|
|
118
|
+
print(f"Total Cost: ${total_cost:.2f}")
|
|
119
|
+
print(f"Total Duration: {total_duration}ms ({total_duration/1000:.2f}s)")
|
|
120
|
+
|
|
121
|
+
if results:
|
|
122
|
+
print(f"Average Cost per Evaluation: ${total_cost/len(results):.4f}")
|
|
123
|
+
print(f"Average Duration per Evaluation: {total_duration/len(results):.0f}ms")
|
|
124
|
+
|
|
125
|
+
print(f"\nStatus Distribution:")
|
|
126
|
+
for status, count in status_counts.items():
|
|
127
|
+
percentage = (count / len(results) * 100) if results else 0
|
|
128
|
+
print(f" {status}: {count} ({percentage:.1f}%)")
|
|
129
|
+
|
|
130
|
+
print("\n" + "="*70)
|
|
131
|
+
print("Workflow completed successfully!")
|
|
132
|
+
print("="*70)
|
|
133
|
+
|
|
134
|
+
# Important note about field names
|
|
135
|
+
print("\n⚠️ IMPORTANT NOTE:")
|
|
136
|
+
print("The API responses contain typos in some field names:")
|
|
137
|
+
print(" - evaluationResultAssitantRevision (missing 's' in Assistant)")
|
|
138
|
+
print(" - evaluationResultChunckCount (should be Chunk)")
|
|
139
|
+
print(" - evaluationResultChunckSize (should be Chunk)")
|
|
140
|
+
print(" - evaluationResultaMaxTokens (lowercase 'a')")
|
|
141
|
+
print("\nThese are API-level typos, not errors in our code.")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
if __name__ == "__main__":
|
|
145
|
+
try:
|
|
146
|
+
main()
|
|
147
|
+
except Exception as e:
|
|
148
|
+
print(f"\n❌ Error occurred: {e}")
|
|
149
|
+
import traceback
|
|
150
|
+
traceback.print_exc()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pygeai.evaluation.result.clients import EvaluationResultClient
|
|
2
|
+
|
|
3
|
+
client = EvaluationResultClient()
|
|
4
|
+
|
|
5
|
+
evaluation_result_id = "your-evaluation-result-id"
|
|
6
|
+
|
|
7
|
+
result = client.get_evaluation_result(evaluation_result_id=evaluation_result_id)
|
|
8
|
+
|
|
9
|
+
print(f"Evaluation Result: {result.get('evaluationResultId')}")
|
|
10
|
+
print(f"Status: {result.get('evaluationResultStatus')}")
|
|
11
|
+
print(f"Plan ID: {result.get('evaluationPlanId')}")
|
|
12
|
+
print(f"Dataset ID: {result.get('dataSetId')}")
|
|
13
|
+
print(f"Cost: ${result.get('evaluationResultCost')}")
|
|
14
|
+
print(f"Duration: {result.get('evaluationResultDuration')}ms")
|
|
15
|
+
print(f"Model: {result.get('evaluationResultModelName')}")
|
|
16
|
+
print(f"Provider: {result.get('evaluationResultProviderName')}")
|
|
17
|
+
|
|
18
|
+
# Row-level details
|
|
19
|
+
rows = result.get('rows', [])
|
|
20
|
+
print(f"\nRow-level results: {len(rows)} rows")
|
|
21
|
+
|
|
22
|
+
for row in rows:
|
|
23
|
+
print(f"\n Row ID: {row.get('dataSetRowId')}")
|
|
24
|
+
print(f" Status: {row.get('evaluationResultRowStatus')}")
|
|
25
|
+
print(f" Cost: ${row.get('evaluationResultRowCost')}")
|
|
26
|
+
print(f" Output: {row.get('evaluationResultRowOutput')[:100]}...")
|