langwatch-scenario 0.6.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/METADATA +145 -41
- langwatch_scenario-0.7.2.dist-info/RECORD +237 -0
- scenario/__init__.py +1 -4
- scenario/{events → _events}/__init__.py +9 -11
- scenario/_events/event_bus.py +185 -0
- scenario/{events → _events}/event_reporter.py +1 -1
- scenario/{events → _events}/events.py +20 -27
- scenario/_events/messages.py +58 -0
- scenario/{events → _events}/utils.py +43 -32
- scenario/_generated/langwatch_api_client/README.md +139 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
- scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
- scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
- scenario/_utils/__init__.py +1 -1
- scenario/_utils/message_conversion.py +2 -2
- scenario/judge_agent.py +6 -1
- scenario/pytest_plugin.py +4 -4
- scenario/scenario_executor.py +196 -223
- scenario/types.py +5 -2
- langwatch_scenario-0.6.0.dist-info/RECORD +0 -27
- scenario/events/event_bus.py +0 -175
- scenario/events/messages.py +0 -84
- {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/top_level.txt +0 -0
scenario/scenario_executor.py
CHANGED
@@ -19,14 +19,15 @@ from typing import (
|
|
19
19
|
TypedDict,
|
20
20
|
)
|
21
21
|
import time
|
22
|
+
import warnings
|
22
23
|
import termcolor
|
23
24
|
import asyncio
|
24
25
|
import concurrent.futures
|
25
26
|
|
26
27
|
from scenario.config import ScenarioConfig
|
27
28
|
from scenario._utils import (
|
28
|
-
check_valid_return_type,
|
29
29
|
convert_agent_return_types_to_openai_messages,
|
30
|
+
check_valid_return_type,
|
30
31
|
print_openai_messages,
|
31
32
|
show_spinner,
|
32
33
|
await_if_awaitable,
|
@@ -46,17 +47,20 @@ from .agent_adapter import AgentAdapter
|
|
46
47
|
from .script import proceed
|
47
48
|
from pksuid import PKSUID
|
48
49
|
from .scenario_state import ScenarioState
|
49
|
-
from .
|
50
|
-
ScenarioEventBus,
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
50
|
+
from ._events import (
|
51
|
+
ScenarioEventBus,
|
52
|
+
ScenarioEvent,
|
53
|
+
ScenarioRunStartedEvent,
|
54
|
+
ScenarioMessageSnapshotEvent,
|
55
|
+
ScenarioRunFinishedEvent,
|
56
|
+
ScenarioRunStartedEventMetadata,
|
57
|
+
ScenarioRunFinishedEventResults,
|
58
|
+
ScenarioRunFinishedEventVerdict,
|
59
|
+
ScenarioRunFinishedEventStatus,
|
60
|
+
convert_messages_to_api_client_messages,
|
59
61
|
)
|
62
|
+
from rx.subject.subject import Subject
|
63
|
+
from rx.core.observable.observable import Observable
|
60
64
|
|
61
65
|
|
62
66
|
class ScenarioExecutor:
|
@@ -80,40 +84,6 @@ class ScenarioExecutor:
|
|
80
84
|
agents: List of agent adapters participating in the scenario
|
81
85
|
script: Optional list of script steps to control scenario flow
|
82
86
|
config: Configuration settings for execution behavior
|
83
|
-
|
84
|
-
Example:
|
85
|
-
```
|
86
|
-
# Direct instantiation (less common)
|
87
|
-
executor = ScenarioExecutor(
|
88
|
-
name="weather query test",
|
89
|
-
description="User asks about weather, agent should provide helpful response",
|
90
|
-
agents=[
|
91
|
-
weather_agent,
|
92
|
-
scenario.UserSimulatorAgent(),
|
93
|
-
scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
|
94
|
-
],
|
95
|
-
max_turns=10,
|
96
|
-
verbose=True
|
97
|
-
)
|
98
|
-
result = await executor._run()
|
99
|
-
|
100
|
-
# Preferred high-level API
|
101
|
-
result = await scenario.run(
|
102
|
-
name="weather query test",
|
103
|
-
description="User asks about weather, agent should provide helpful response",
|
104
|
-
agents=[
|
105
|
-
weather_agent,
|
106
|
-
scenario.UserSimulatorAgent(),
|
107
|
-
scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
|
108
|
-
]
|
109
|
-
)
|
110
|
-
```
|
111
|
-
|
112
|
-
Note:
|
113
|
-
- Scenarios run in isolated thread pools to support parallel execution
|
114
|
-
- All agent interactions are cached when cache_key is configured
|
115
|
-
- Debug mode allows step-by-step execution with user intervention
|
116
|
-
- Results include detailed timing information and conversation history
|
117
87
|
"""
|
118
88
|
|
119
89
|
name: str
|
@@ -130,6 +100,7 @@ class ScenarioExecutor:
|
|
130
100
|
_pending_roles_on_turn: List[AgentRole] = []
|
131
101
|
_pending_agents_on_turn: Set[AgentAdapter] = set()
|
132
102
|
_agent_times: Dict[int, float] = {}
|
103
|
+
_events: Subject
|
133
104
|
|
134
105
|
event_bus: ScenarioEventBus
|
135
106
|
|
@@ -167,27 +138,7 @@ class ScenarioExecutor:
|
|
167
138
|
Overrides global configuration for this scenario.
|
168
139
|
debug: Whether to enable debug mode with step-by-step execution.
|
169
140
|
Overrides global configuration for this scenario.
|
170
|
-
|
171
|
-
|
172
|
-
Example:
|
173
|
-
```python
|
174
|
-
executor = ScenarioExecutor(
|
175
|
-
name="customer service test",
|
176
|
-
description="Customer has a billing question and needs help",
|
177
|
-
agents=[
|
178
|
-
customer_service_agent,
|
179
|
-
scenario.UserSimulatorAgent(),
|
180
|
-
scenario.JudgeAgent(criteria=[
|
181
|
-
"Agent is polite and professional",
|
182
|
-
"Agent addresses the billing question",
|
183
|
-
"Agent provides clear next steps"
|
184
|
-
])
|
185
|
-
],
|
186
|
-
max_turns=15,
|
187
|
-
verbose=True,
|
188
|
-
debug=False
|
189
|
-
)
|
190
|
-
```
|
141
|
+
event_bus: Optional event bus that will subscribe to this executor's events
|
191
142
|
"""
|
192
143
|
self.name = name
|
193
144
|
self.description = description
|
@@ -204,120 +155,33 @@ class ScenarioExecutor:
|
|
204
155
|
|
205
156
|
self.reset()
|
206
157
|
|
158
|
+
# Create executor's own event stream
|
159
|
+
self._events = Subject()
|
160
|
+
|
161
|
+
# Create and configure event bus to subscribe to our events
|
207
162
|
self.event_bus = event_bus or ScenarioEventBus()
|
163
|
+
self.event_bus.subscribe_to_events(self._events)
|
208
164
|
|
209
165
|
self.batch_run_id = get_or_create_batch_run_id()
|
210
166
|
|
211
|
-
@
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
max_turns: Optional[int] = None,
|
218
|
-
verbose: Optional[Union[bool, int]] = None,
|
219
|
-
cache_key: Optional[str] = None,
|
220
|
-
debug: Optional[bool] = None,
|
221
|
-
script: Optional[List[ScriptStep]] = None,
|
222
|
-
) -> ScenarioResult:
|
167
|
+
@property
|
168
|
+
def events(self) -> Observable:
|
169
|
+
"""Expose event stream for subscribers like the event bus."""
|
170
|
+
return self._events
|
171
|
+
|
172
|
+
def _emit_event(self, event: ScenarioEvent) -> None:
|
223
173
|
"""
|
224
|
-
|
174
|
+
Emit a domain event to all subscribers.
|
225
175
|
|
226
|
-
This
|
227
|
-
|
228
|
-
|
176
|
+
This method publishes scenario events to the internal event stream,
|
177
|
+
which subscribers (like the event bus) can observe and react to.
|
178
|
+
The timestamp is automatically set to the current time.
|
229
179
|
|
230
180
|
Args:
|
231
|
-
|
232
|
-
description: Detailed description of what the scenario tests
|
233
|
-
agents: List of agent adapters (agent under test, user simulator, judge)
|
234
|
-
max_turns: Maximum conversation turns before timeout (default: 10)
|
235
|
-
verbose: Show detailed output during execution
|
236
|
-
cache_key: Cache key for deterministic behavior
|
237
|
-
debug: Enable debug mode for step-by-step execution
|
238
|
-
script: Optional script steps to control scenario flow
|
239
|
-
|
240
|
-
Returns:
|
241
|
-
ScenarioResult containing the test outcome, conversation history,
|
242
|
-
success/failure status, and detailed reasoning
|
243
|
-
|
244
|
-
Example:
|
245
|
-
```
|
246
|
-
import scenario
|
247
|
-
|
248
|
-
# Simple scenario with automatic flow
|
249
|
-
result = await scenario.run(
|
250
|
-
name="help request",
|
251
|
-
description="User asks for help with a technical problem",
|
252
|
-
agents=[
|
253
|
-
my_agent,
|
254
|
-
scenario.UserSimulatorAgent(),
|
255
|
-
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
256
|
-
]
|
257
|
-
)
|
258
|
-
|
259
|
-
# Scripted scenario with custom evaluations
|
260
|
-
result = await scenario.run(
|
261
|
-
name="custom interaction",
|
262
|
-
description="Test specific conversation flow",
|
263
|
-
agents=[
|
264
|
-
my_agent,
|
265
|
-
scenario.UserSimulatorAgent(),
|
266
|
-
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
267
|
-
],
|
268
|
-
script=[
|
269
|
-
scenario.user("Hello"),
|
270
|
-
scenario.agent(),
|
271
|
-
custom_eval,
|
272
|
-
scenario.succeed()
|
273
|
-
]
|
274
|
-
)
|
275
|
-
|
276
|
-
# Results analysis
|
277
|
-
print(f"Test {'PASSED' if result.success else 'FAILED'}")
|
278
|
-
print(f"Reasoning: {result.reasoning}")
|
279
|
-
print(f"Conversation had {len(result.messages)} messages")
|
280
|
-
```
|
281
|
-
|
282
|
-
Note:
|
283
|
-
- Runs in isolated thread pool to support parallel execution
|
284
|
-
- Blocks until scenario completes or times out
|
285
|
-
- All agent calls are automatically cached when cache_key is set
|
286
|
-
- Exception handling ensures clean resource cleanup
|
181
|
+
event: The scenario event to emit
|
287
182
|
"""
|
288
|
-
|
289
|
-
|
290
|
-
description=description,
|
291
|
-
agents=agents,
|
292
|
-
max_turns=max_turns,
|
293
|
-
verbose=verbose,
|
294
|
-
cache_key=cache_key,
|
295
|
-
debug=debug,
|
296
|
-
script=script,
|
297
|
-
)
|
298
|
-
|
299
|
-
# We'll use a thread pool to run the execution logic, we
|
300
|
-
# require a separate thread because even though asyncio is
|
301
|
-
# being used throughout, any user code on the callback can
|
302
|
-
# be blocking, preventing them from running scenarios in parallel
|
303
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
304
|
-
|
305
|
-
def run_in_thread():
|
306
|
-
loop = asyncio.new_event_loop()
|
307
|
-
asyncio.set_event_loop(loop)
|
308
|
-
|
309
|
-
try:
|
310
|
-
return loop.run_until_complete(scenario._run())
|
311
|
-
finally:
|
312
|
-
loop.run_until_complete(scenario.event_bus.drain())
|
313
|
-
loop.close()
|
314
|
-
|
315
|
-
# Run the function in the thread pool and await its result
|
316
|
-
# This converts the thread's execution into a Future that the current
|
317
|
-
# event loop can await without blocking
|
318
|
-
loop = asyncio.get_event_loop()
|
319
|
-
result = await loop.run_in_executor(executor, run_in_thread)
|
320
|
-
return result
|
183
|
+
event.timestamp = int(time.time() * 1000)
|
184
|
+
self._events.on_next(event)
|
321
185
|
|
322
186
|
def reset(self):
|
323
187
|
"""
|
@@ -394,7 +258,6 @@ class ScenarioExecutor:
|
|
394
258
|
self._pending_messages[idx] = []
|
395
259
|
self._pending_messages[idx].append(message)
|
396
260
|
|
397
|
-
|
398
261
|
def add_messages(
|
399
262
|
self,
|
400
263
|
messages: List[ChatCompletionMessageParam],
|
@@ -522,7 +385,7 @@ class ScenarioExecutor:
|
|
522
385
|
agent_time=agent_time,
|
523
386
|
)
|
524
387
|
|
525
|
-
async def
|
388
|
+
async def run(self) -> ScenarioResult:
|
526
389
|
"""
|
527
390
|
Run a scenario against the agent under test.
|
528
391
|
|
@@ -535,7 +398,6 @@ class ScenarioExecutor:
|
|
535
398
|
scenario_run_id = generate_scenario_run_id()
|
536
399
|
|
537
400
|
try:
|
538
|
-
await self.event_bus.listen()
|
539
401
|
self._emit_run_started_event(scenario_run_id)
|
540
402
|
|
541
403
|
if self.config.verbose:
|
@@ -552,7 +414,11 @@ class ScenarioExecutor:
|
|
552
414
|
self._emit_message_snapshot_event(scenario_run_id)
|
553
415
|
|
554
416
|
if isinstance(result, ScenarioResult):
|
555
|
-
status =
|
417
|
+
status = (
|
418
|
+
ScenarioRunFinishedEventStatus.SUCCESS
|
419
|
+
if result.success
|
420
|
+
else ScenarioRunFinishedEventStatus.FAILED
|
421
|
+
)
|
556
422
|
self._emit_run_finished_event(scenario_run_id, result, status)
|
557
423
|
return result
|
558
424
|
|
@@ -565,7 +431,11 @@ class ScenarioExecutor:
|
|
565
431
|
"""
|
566
432
|
)
|
567
433
|
|
568
|
-
status =
|
434
|
+
status = (
|
435
|
+
ScenarioRunFinishedEventStatus.SUCCESS
|
436
|
+
if result.success
|
437
|
+
else ScenarioRunFinishedEventStatus.FAILED
|
438
|
+
)
|
569
439
|
self._emit_run_finished_event(scenario_run_id, result, status)
|
570
440
|
return result
|
571
441
|
|
@@ -578,7 +448,9 @@ class ScenarioExecutor:
|
|
578
448
|
total_time=time.time() - self._total_start_time,
|
579
449
|
agent_time=0,
|
580
450
|
)
|
581
|
-
self._emit_run_finished_event(
|
451
|
+
self._emit_run_finished_event(
|
452
|
+
scenario_run_id, error_result, ScenarioRunFinishedEventStatus.ERROR
|
453
|
+
)
|
582
454
|
raise # Re-raise the exception after cleanup
|
583
455
|
|
584
456
|
async def _call_agent(
|
@@ -620,16 +492,19 @@ class ScenarioExecutor:
|
|
620
492
|
):
|
621
493
|
start_time = time.time()
|
622
494
|
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
495
|
+
# Prevent pydantic validation warnings which should already be disabled
|
496
|
+
with warnings.catch_warnings():
|
497
|
+
warnings.simplefilter("ignore")
|
498
|
+
agent_response = agent.call(
|
499
|
+
AgentInput(
|
500
|
+
# TODO: test thread_id
|
501
|
+
thread_id=self._state.thread_id,
|
502
|
+
messages=self._state.messages,
|
503
|
+
new_messages=self._pending_messages.get(idx, []),
|
504
|
+
judgment_request=request_judgment,
|
505
|
+
scenario_state=self._state,
|
506
|
+
)
|
631
507
|
)
|
632
|
-
)
|
633
508
|
if not isinstance(agent_response, Awaitable):
|
634
509
|
raise Exception(
|
635
510
|
agent_response_not_awaitable(agent.__class__.__name__),
|
@@ -819,16 +694,17 @@ class ScenarioExecutor:
|
|
819
694
|
class _CommonEventFields(TypedDict):
|
820
695
|
"""
|
821
696
|
Common fields shared across all scenario events.
|
822
|
-
|
697
|
+
|
823
698
|
These fields provide consistent identification and timing information
|
824
699
|
for all events emitted during scenario execution.
|
825
|
-
|
700
|
+
|
826
701
|
Attributes:
|
827
702
|
batch_run_id: Unique identifier for the batch of scenario runs
|
828
703
|
scenario_run_id: Unique identifier for this specific scenario run
|
829
704
|
scenario_id: Human-readable name/identifier for the scenario
|
830
705
|
timestamp: Unix timestamp in milliseconds when the event occurred
|
831
706
|
"""
|
707
|
+
|
832
708
|
batch_run_id: str
|
833
709
|
scenario_run_id: str
|
834
710
|
scenario_id: str
|
@@ -837,13 +713,13 @@ class ScenarioExecutor:
|
|
837
713
|
def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
|
838
714
|
"""
|
839
715
|
Create common fields used across all scenario events.
|
840
|
-
|
716
|
+
|
841
717
|
This method generates the standard fields that every scenario event
|
842
718
|
must include for proper identification and timing.
|
843
|
-
|
719
|
+
|
844
720
|
Args:
|
845
721
|
scenario_run_id: Unique identifier for the current scenario run
|
846
|
-
|
722
|
+
|
847
723
|
Returns:
|
848
724
|
Dictionary containing common event fields with current timestamp
|
849
725
|
"""
|
@@ -857,86 +733,183 @@ class ScenarioExecutor:
|
|
857
733
|
def _emit_run_started_event(self, scenario_run_id: str) -> None:
|
858
734
|
"""
|
859
735
|
Emit a scenario run started event.
|
860
|
-
|
736
|
+
|
861
737
|
This event is published when a scenario begins execution. It includes
|
862
738
|
metadata about the scenario such as name and description, and is used
|
863
739
|
to track the start of scenario runs in monitoring systems.
|
864
|
-
|
740
|
+
|
865
741
|
Args:
|
866
742
|
scenario_run_id: Unique identifier for the current scenario run
|
867
|
-
|
868
|
-
Note:
|
869
|
-
This event is automatically published at the beginning of `_run()`
|
870
|
-
and signals the start of scenario execution to any event listeners.
|
871
743
|
"""
|
872
744
|
common_fields = self._create_common_event_fields(scenario_run_id)
|
873
745
|
metadata = ScenarioRunStartedEventMetadata(
|
874
746
|
name=self.name,
|
875
747
|
description=self.description,
|
876
748
|
)
|
877
|
-
|
749
|
+
|
878
750
|
event = ScenarioRunStartedEvent(
|
879
751
|
**common_fields,
|
880
752
|
metadata=metadata,
|
881
753
|
)
|
882
|
-
self.
|
754
|
+
self._emit_event(event)
|
883
755
|
|
884
756
|
def _emit_message_snapshot_event(self, scenario_run_id: str) -> None:
|
885
757
|
"""
|
886
758
|
Emit a message snapshot event.
|
887
|
-
|
759
|
+
|
888
760
|
This event captures the current state of the conversation during
|
889
761
|
scenario execution. It's published whenever messages are added to
|
890
762
|
the conversation, allowing real-time tracking of scenario progress.
|
891
|
-
|
892
|
-
Note:
|
893
|
-
This event is automatically published by `add_message()` and
|
894
|
-
`add_messages()` to provide continuous visibility into scenario
|
895
|
-
execution state.
|
896
763
|
"""
|
897
764
|
common_fields = self._create_common_event_fields(scenario_run_id)
|
898
|
-
|
765
|
+
|
899
766
|
event = ScenarioMessageSnapshotEvent(
|
900
767
|
**common_fields,
|
901
|
-
messages=
|
768
|
+
messages=convert_messages_to_api_client_messages(self._state.messages),
|
902
769
|
)
|
903
|
-
self.
|
770
|
+
self._emit_event(event)
|
904
771
|
|
905
772
|
def _emit_run_finished_event(
|
906
|
-
self,
|
907
|
-
scenario_run_id: str,
|
908
|
-
result: ScenarioResult,
|
909
|
-
status: ScenarioRunFinishedEventStatus
|
773
|
+
self,
|
774
|
+
scenario_run_id: str,
|
775
|
+
result: ScenarioResult,
|
776
|
+
status: ScenarioRunFinishedEventStatus,
|
910
777
|
) -> None:
|
911
778
|
"""
|
912
779
|
Emit a scenario run finished event.
|
913
|
-
|
780
|
+
|
914
781
|
This event is published when a scenario completes execution, whether
|
915
782
|
successfully or with an error. It includes the final results, verdict,
|
916
783
|
and reasoning for the scenario outcome.
|
917
|
-
|
784
|
+
|
918
785
|
Args:
|
919
786
|
scenario_run_id: Unique identifier for the current scenario run
|
920
787
|
result: The final scenario result containing success/failure status
|
921
788
|
status: The execution status (SUCCESS, FAILED, or ERROR)
|
922
|
-
|
923
|
-
Note:
|
924
|
-
This event is automatically published at the end of `_run()` and
|
925
|
-
signals the completion of scenario execution to any event listeners.
|
926
|
-
It includes detailed results for monitoring and analysis purposes.
|
927
789
|
"""
|
928
790
|
common_fields = self._create_common_event_fields(scenario_run_id)
|
929
|
-
|
791
|
+
|
930
792
|
results = ScenarioRunFinishedEventResults(
|
931
|
-
verdict=
|
793
|
+
verdict=(
|
794
|
+
ScenarioRunFinishedEventVerdict.SUCCESS
|
795
|
+
if result.success
|
796
|
+
else ScenarioRunFinishedEventVerdict.FAILURE
|
797
|
+
),
|
932
798
|
reasoning=result.reasoning or "",
|
933
799
|
met_criteria=result.passed_criteria,
|
934
800
|
unmet_criteria=result.failed_criteria,
|
935
801
|
)
|
936
|
-
|
802
|
+
|
937
803
|
event = ScenarioRunFinishedEvent(
|
938
804
|
**common_fields,
|
939
805
|
status=status,
|
940
806
|
results=results,
|
941
807
|
)
|
942
|
-
self.
|
808
|
+
self._emit_event(event)
|
809
|
+
|
810
|
+
# Signal end of event stream
|
811
|
+
self._events.on_completed()
|
812
|
+
|
813
|
+
|
814
|
+
async def run(
|
815
|
+
name: str,
|
816
|
+
description: str,
|
817
|
+
agents: List[AgentAdapter] = [],
|
818
|
+
max_turns: Optional[int] = None,
|
819
|
+
verbose: Optional[Union[bool, int]] = None,
|
820
|
+
cache_key: Optional[str] = None,
|
821
|
+
debug: Optional[bool] = None,
|
822
|
+
script: Optional[List[ScriptStep]] = None,
|
823
|
+
) -> ScenarioResult:
|
824
|
+
"""
|
825
|
+
High-level interface for running a scenario test.
|
826
|
+
|
827
|
+
This is the main entry point for executing scenario tests. It creates a
|
828
|
+
ScenarioExecutor instance and runs it in an isolated thread pool to support
|
829
|
+
parallel execution and prevent blocking.
|
830
|
+
|
831
|
+
Args:
|
832
|
+
name: Human-readable name for the scenario
|
833
|
+
description: Detailed description of what the scenario tests
|
834
|
+
agents: List of agent adapters (agent under test, user simulator, judge)
|
835
|
+
max_turns: Maximum conversation turns before timeout (default: 10)
|
836
|
+
verbose: Show detailed output during execution
|
837
|
+
cache_key: Cache key for deterministic behavior
|
838
|
+
debug: Enable debug mode for step-by-step execution
|
839
|
+
script: Optional script steps to control scenario flow
|
840
|
+
|
841
|
+
Returns:
|
842
|
+
ScenarioResult containing the test outcome, conversation history,
|
843
|
+
success/failure status, and detailed reasoning
|
844
|
+
|
845
|
+
Example:
|
846
|
+
```
|
847
|
+
import scenario
|
848
|
+
|
849
|
+
# Simple scenario with automatic flow
|
850
|
+
result = await scenario.run(
|
851
|
+
name="help request",
|
852
|
+
description="User asks for help with a technical problem",
|
853
|
+
agents=[
|
854
|
+
my_agent,
|
855
|
+
scenario.UserSimulatorAgent(),
|
856
|
+
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
857
|
+
]
|
858
|
+
)
|
859
|
+
|
860
|
+
# Scripted scenario with custom evaluations
|
861
|
+
result = await scenario.run(
|
862
|
+
name="custom interaction",
|
863
|
+
description="Test specific conversation flow",
|
864
|
+
agents=[
|
865
|
+
my_agent,
|
866
|
+
scenario.UserSimulatorAgent(),
|
867
|
+
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
868
|
+
],
|
869
|
+
script=[
|
870
|
+
scenario.user("Hello"),
|
871
|
+
scenario.agent(),
|
872
|
+
custom_eval,
|
873
|
+
scenario.succeed()
|
874
|
+
]
|
875
|
+
)
|
876
|
+
|
877
|
+
# Results analysis
|
878
|
+
print(f"Test {'PASSED' if result.success else 'FAILED'}")
|
879
|
+
print(f"Reasoning: {result.reasoning}")
|
880
|
+
print(f"Conversation had {len(result.messages)} messages")
|
881
|
+
```
|
882
|
+
"""
|
883
|
+
scenario = ScenarioExecutor(
|
884
|
+
name=name,
|
885
|
+
description=description,
|
886
|
+
agents=agents,
|
887
|
+
max_turns=max_turns,
|
888
|
+
verbose=verbose,
|
889
|
+
cache_key=cache_key,
|
890
|
+
debug=debug,
|
891
|
+
script=script,
|
892
|
+
)
|
893
|
+
|
894
|
+
# We'll use a thread pool to run the execution logic, we
|
895
|
+
# require a separate thread because even though asyncio is
|
896
|
+
# being used throughout, any user code on the callback can
|
897
|
+
# be blocking, preventing them from running scenarios in parallel
|
898
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
899
|
+
|
900
|
+
def run_in_thread():
|
901
|
+
loop = asyncio.new_event_loop()
|
902
|
+
asyncio.set_event_loop(loop)
|
903
|
+
|
904
|
+
try:
|
905
|
+
return loop.run_until_complete(scenario.run())
|
906
|
+
finally:
|
907
|
+
scenario.event_bus.drain()
|
908
|
+
loop.close()
|
909
|
+
|
910
|
+
# Run the function in the thread pool and await its result
|
911
|
+
# This converts the thread's execution into a Future that the current
|
912
|
+
# event loop can await without blocking
|
913
|
+
loop = asyncio.get_event_loop()
|
914
|
+
result = await loop.run_in_executor(executor, run_in_thread)
|
915
|
+
return result
|
scenario/types.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from enum import Enum
|
2
|
-
from pydantic import BaseModel,
|
2
|
+
from pydantic import BaseModel, SkipValidation
|
3
3
|
from typing import (
|
4
4
|
TYPE_CHECKING,
|
5
5
|
Annotated,
|
@@ -35,6 +35,7 @@ class AgentRole(Enum):
|
|
35
35
|
AGENT: Represents the agent under test that responds to user inputs
|
36
36
|
JUDGE: Represents a judge agent that evaluates the conversation and determines success/failure
|
37
37
|
"""
|
38
|
+
|
38
39
|
USER = "User"
|
39
40
|
AGENT = "Agent"
|
40
41
|
JUDGE = "Judge"
|
@@ -71,6 +72,7 @@ class AgentInput(BaseModel):
|
|
71
72
|
return response
|
72
73
|
```
|
73
74
|
"""
|
75
|
+
|
74
76
|
thread_id: str
|
75
77
|
# Prevent pydantic from validating/parsing the messages and causing issues: https://github.com/pydantic/pydantic/issues/9541
|
76
78
|
messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
|
@@ -168,7 +170,8 @@ class ScenarioResult(BaseModel):
|
|
168
170
|
"""
|
169
171
|
|
170
172
|
success: bool
|
171
|
-
|
173
|
+
# Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
|
174
|
+
messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
|
172
175
|
reasoning: Optional[str] = None
|
173
176
|
passed_criteria: List[str] = []
|
174
177
|
failed_criteria: List[str] = []
|
@@ -1,27 +0,0 @@
|
|
1
|
-
scenario/__init__.py,sha256=UJ5l-sG4TMG0wR8Ba-dxdDW36m3apTvawP-lNvk7Jm0,4293
|
2
|
-
scenario/_error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
|
3
|
-
scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
|
4
|
-
scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
|
5
|
-
scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
|
6
|
-
scenario/judge_agent.py,sha256=9CCO699qoWqXvWdQ73Yc3dqPOwaJdJ-zqxVaLaKi_cA,16161
|
7
|
-
scenario/pytest_plugin.py,sha256=f2ETBpATz80k7K87M6046ZIFiQpHEvDN7dxakd3y2wk,11321
|
8
|
-
scenario/scenario_executor.py,sha256=nkSIuIlwPHfr6pueSBbARrgiqPtW0SxajV3PFypAnJ4,34508
|
9
|
-
scenario/scenario_state.py,sha256=dQDjazem-dn1c5mw6TwngEu6Tv_cHwEzemepsPBy2f0,7039
|
10
|
-
scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
|
11
|
-
scenario/types.py,sha256=BhXcTEMGyGg_1QysN-GXVjm8DP2VH3UEzj_qvoglp2k,9466
|
12
|
-
scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
|
13
|
-
scenario/_utils/__init__.py,sha256=wNX9hU8vzYlyLDwjkt7JUW3IPo2DhME6UIt_zvLM3B0,1000
|
14
|
-
scenario/_utils/ids.py,sha256=K1iPuJgPh3gX9HCrDZGqK5lDgdwZXfOBF1YXVOWNHRg,1843
|
15
|
-
scenario/_utils/message_conversion.py,sha256=AM9DLyWpy97CrAH8RmId9Mv2rmLquQhFoUpRyp-jVeY,3622
|
16
|
-
scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
|
17
|
-
scenario/events/__init__.py,sha256=_autF1cMZYpNXE-kJNvvRb-H_hYqy4gOSSp2fT3Wi9k,1533
|
18
|
-
scenario/events/event_bus.py,sha256=MThIMIaI2nj2CoegZazTNxeHbtl4_M7bW3vEAHz6R8g,7102
|
19
|
-
scenario/events/event_reporter.py,sha256=cMh_5jA5hG3Q9IsoAgPJhxnIVs_M1Q0e2lgLTEK4oPc,3100
|
20
|
-
scenario/events/events.py,sha256=jPXylwiADb0Bdk7u1YkAaU_jLebH7NW8J7SZI9JDTxw,6750
|
21
|
-
scenario/events/messages.py,sha256=1QAkwDExdF6AHgXdEFhHwmCv3Mxu3j0AXIptMekc_bg,3299
|
22
|
-
scenario/events/utils.py,sha256=yrTUTByeb0eAAQniQH7EyKs-usgGti8f17IemUyBZBw,3357
|
23
|
-
langwatch_scenario-0.6.0.dist-info/METADATA,sha256=IvD9on4tP57ldmizFzfGQBtiCT6Z7yoz0trlCSPSW9M,14227
|
24
|
-
langwatch_scenario-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
25
|
-
langwatch_scenario-0.6.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
|
26
|
-
langwatch_scenario-0.6.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
|
27
|
-
langwatch_scenario-0.6.0.dist-info/RECORD,,
|