langwatch-scenario 0.6.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/PKG-INFO +143 -41
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/README.md +140 -37
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/langwatch_scenario.egg-info/PKG-INFO +143 -41
- langwatch_scenario-0.7.1/langwatch_scenario.egg-info/SOURCES.txt +247 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/langwatch_scenario.egg-info/requires.txt +2 -3
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/pyproject.toml +13 -5
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/__init__.py +1 -4
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.1/scenario/_events}/__init__.py +9 -11
- langwatch_scenario-0.7.1/scenario/_events/event_bus.py +185 -0
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.1/scenario/_events}/event_reporter.py +1 -1
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.1/scenario/_events}/events.py +20 -27
- langwatch_scenario-0.7.1/scenario/_events/messages.py +58 -0
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.1/scenario/_events}/utils.py +43 -32
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/README.md +139 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
- langwatch_scenario-0.7.1/scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/_utils/__init__.py +1 -1
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/_utils/message_conversion.py +2 -2
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/judge_agent.py +6 -1
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/pytest_plugin.py +4 -4
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/scenario_executor.py +196 -223
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/types.py +5 -2
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/tests/test_event_reporter.py +3 -3
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/tests/test_scenario_event_bus.py +66 -67
- langwatch_scenario-0.7.1/tests/test_scenario_executor_events.py +183 -0
- langwatch_scenario-0.6.0/langwatch_scenario.egg-info/SOURCES.txt +0 -36
- langwatch_scenario-0.6.0/scenario/events/event_bus.py +0 -175
- langwatch_scenario-0.6.0/scenario/events/messages.py +0 -84
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/langwatch_scenario.egg-info/dependency_links.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/langwatch_scenario.egg-info/entry_points.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/langwatch_scenario.egg-info/top_level.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/_error_messages.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/_utils/ids.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/_utils/utils.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/agent_adapter.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/cache.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/config.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/scenario_state.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/script.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/scenario/user_simulator_agent.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/setup.cfg +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/setup.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/tests/test_scenario.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/tests/test_scenario_agent.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.1}/tests/test_scenario_executor.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.1
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -26,11 +26,8 @@ Requires-Dist: wrapt>=1.17.2
|
|
26
26
|
Requires-Dist: pytest-asyncio>=0.26.0
|
27
27
|
Requires-Dist: rich<15.0.0,>=13.3.3
|
28
28
|
Requires-Dist: pksuid>=1.1.2
|
29
|
-
Requires-Dist: pdoc3>=0.11.6
|
30
|
-
Requires-Dist: ag-ui-protocol>=0.1.0
|
31
29
|
Requires-Dist: httpx>=0.27.0
|
32
30
|
Requires-Dist: rx>=3.2.0
|
33
|
-
Requires-Dist: respx>=0.22.0
|
34
31
|
Provides-Extra: dev
|
35
32
|
Requires-Dist: black; extra == "dev"
|
36
33
|
Requires-Dist: isort; extra == "dev"
|
@@ -40,12 +37,20 @@ Requires-Dist: commitizen; extra == "dev"
|
|
40
37
|
Requires-Dist: pyright; extra == "dev"
|
41
38
|
Requires-Dist: pydantic-ai; extra == "dev"
|
42
39
|
Requires-Dist: function-schema; extra == "dev"
|
40
|
+
Requires-Dist: pdoc3; extra == "dev"
|
41
|
+
Requires-Dist: respx; extra == "dev"
|
43
42
|
|
44
43
|

|
45
44
|
|
46
|
-
<
|
47
|
-
|
48
|
-
|
45
|
+
<p align="center">
|
46
|
+
<a href="https://discord.gg/kT4PhDS2gH" target="_blank"><img src="https://img.shields.io/discord/1227886780536324106?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb" alt="chat on Discord"></a>
|
47
|
+
<a href="https://pypi.python.org/pypi/langwatch-scenario" target="_blank"><img src="https://img.shields.io/pypi/dm/langwatch-scenario?logo=python&logoColor=white&label=pypi%20langwatch-scenario&color=blue" alt="Scenario Python package on PyPi"></a>
|
48
|
+
<a href="https://www.npmjs.com/package/@langwatch/scenario" target="_blank"><img src="https://img.shields.io/npm/dm/@langwatch/scenario?logo=npm&logoColor=white&label=npm%20@langwatch/scenario&color=blue" alt="Scenario JavaScript package on npm"></a>
|
49
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml/badge.svg" alt="Python Tests" /></a>
|
50
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml/badge.svg" alt="JavaScript Tests" /></a>
|
51
|
+
<a href="https://twitter.com/intent/follow?screen_name=langwatchai" target="_blank">
|
52
|
+
<img src="https://img.shields.io/twitter/follow/langwatchai?logo=X&color=%20%23f5f5f5" alt="follow on X(Twitter)"></a>
|
53
|
+
</p>
|
49
54
|
|
50
55
|
# Scenario
|
51
56
|
|
@@ -54,19 +59,15 @@ Scenario is an Agent Testing Framework based on simulations, it can:
|
|
54
59
|
- Test real agent behavior by simulating users in different scenarios and edge cases
|
55
60
|
- Evaluate and judge at any point of the conversation, powerful multi-turn control
|
56
61
|
- Combine it with any LLM eval framework or custom evals, agnostic by design
|
57
|
-
- Integrate your Agent by implementing just one `call()` method
|
62
|
+
- Integrate your Agent by implementing just one [`call()`](https://scenario.langwatch.ai/agent-integration) method
|
58
63
|
- Available in Python, TypeScript and Go
|
59
64
|
|
60
|
-
[
|
61
|
-
|
62
|
-
### In other languages
|
63
|
-
|
64
|
-
- [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
|
65
|
-
- [Scenario Go](https://github.com/langwatch/scenario-go/)
|
65
|
+
📖 [Documentation](https://scenario.langwatch.ai)\
|
66
|
+
📺 [Watch Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
|
66
67
|
|
67
68
|
## Example
|
68
69
|
|
69
|
-
This is how a
|
70
|
+
This is how a simulation with tool check looks like with Scenario:
|
70
71
|
|
71
72
|
```python
|
72
73
|
# Define any custom assertions
|
@@ -100,18 +101,56 @@ result = await scenario.run(
|
|
100
101
|
assert result.success
|
101
102
|
```
|
102
103
|
|
104
|
+
<details>
|
105
|
+
<summary><strong>TypeScript Example</strong></summary>
|
106
|
+
|
107
|
+
```typescript
|
108
|
+
const result = await scenario.run({
|
109
|
+
name: "vegetarian recipe agent",
|
110
|
+
|
111
|
+
// Define the prompt to guide the simulation
|
112
|
+
description: `
|
113
|
+
The user is planning a boat trip from Barcelona to Rome,
|
114
|
+
and is wondering what the weather will be like.
|
115
|
+
`,
|
116
|
+
|
117
|
+
// Define the agents that will play this simulation
|
118
|
+
agents: [new MyAgent(), scenario.userSimulatorAgent()],
|
119
|
+
|
120
|
+
// (Optional) Control the simulation
|
121
|
+
script: [
|
122
|
+
scenario.user(), // let the user simulator generate a user message
|
123
|
+
scenario.agent(), // agent responds
|
124
|
+
// check for tool call after the first agent response
|
125
|
+
(state) => expect(state.has_tool_call("get_current_weather")).toBe(true),
|
126
|
+
scenario.succeed(), // simulation ends successfully
|
127
|
+
],
|
128
|
+
});
|
129
|
+
```
|
130
|
+
|
131
|
+
</details>
|
132
|
+
|
103
133
|
> [!NOTE]
|
104
|
-
> Check out full examples in the [examples folder](./examples/).
|
134
|
+
> Check out full examples in the [python/examples folder](./python/examples/). or the [typescript/examples folder](./typescript/examples/).
|
105
135
|
|
106
|
-
##
|
136
|
+
## Quick Start
|
107
137
|
|
108
|
-
Install
|
138
|
+
Install scenario and a test runner:
|
109
139
|
|
110
140
|
```bash
|
111
|
-
|
141
|
+
# on python
|
142
|
+
uv add langwatch-scenario pytest
|
143
|
+
|
144
|
+
# or on typescript
|
145
|
+
pnpm install @langwatch/scenario vitest
|
112
146
|
```
|
113
147
|
|
114
|
-
Now create your first scenario
|
148
|
+
Now create your first scenario, copy the full working example below.
|
149
|
+
|
150
|
+
<details>
|
151
|
+
<summary><strong>Quick Start - Python</strong></summary>
|
152
|
+
|
153
|
+
Save it as `tests/test_vegetarian_recipe_agent.py`:
|
115
154
|
|
116
155
|
```python
|
117
156
|
import pytest
|
@@ -178,23 +217,86 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
|
178
217
|
return response.choices[0].message # type: ignore
|
179
218
|
```
|
180
219
|
|
181
|
-
|
220
|
+
</details>
|
221
|
+
|
222
|
+
<details>
|
223
|
+
<summary><strong>Quick Start - TypeScript</strong></summary>
|
224
|
+
|
225
|
+
Save it as `tests/vegetarian-recipe-agent.test.ts`:
|
226
|
+
|
227
|
+
```typescript
|
228
|
+
import { openai } from "@ai-sdk/openai";
|
229
|
+
import * as scenario from "@langwatch/scenario";
|
230
|
+
import { generateText } from "ai";
|
231
|
+
import { describe, it, expect } from "vitest";
|
232
|
+
|
233
|
+
describe("Vegetarian Recipe Agent", () => {
|
234
|
+
const agent: scenario.AgentAdapter = {
|
235
|
+
role: scenario.AgentRole.AGENT,
|
236
|
+
call: async (input) => {
|
237
|
+
const response = await generateText({
|
238
|
+
model: openai("gpt-4.1-mini"),
|
239
|
+
messages: [
|
240
|
+
{
|
241
|
+
role: "system",
|
242
|
+
content: `You are a vegetarian recipe agent.\nGiven the user request, ask AT MOST ONE follow-up question, then provide a complete recipe. Keep your responses concise and focused.`,
|
243
|
+
},
|
244
|
+
...input.messages,
|
245
|
+
],
|
246
|
+
});
|
247
|
+
return response.text;
|
248
|
+
},
|
249
|
+
};
|
250
|
+
|
251
|
+
it("should generate a vegetarian recipe for a hungry and tired user on a Saturday evening", async () => {
|
252
|
+
const result = await scenario.run({
|
253
|
+
name: "dinner idea",
|
254
|
+
description: `It's saturday evening, the user is very hungry and tired, but have no money to order out, so they are looking for a recipe.`,
|
255
|
+
agents: [
|
256
|
+
agent,
|
257
|
+
scenario.userSimulatorAgent(),
|
258
|
+
scenario.judgeAgent({
|
259
|
+
model: openai("gpt-4.1-mini"),
|
260
|
+
criteria: [
|
261
|
+
"Agent should not ask more than two follow-up questions",
|
262
|
+
"Agent should generate a recipe",
|
263
|
+
"Recipe should include a list of ingredients",
|
264
|
+
"Recipe should include step-by-step cooking instructions",
|
265
|
+
"Recipe should be vegetarian and not include any sort of meat",
|
266
|
+
],
|
267
|
+
}),
|
268
|
+
],
|
269
|
+
});
|
270
|
+
expect(result.success).toBe(true);
|
271
|
+
});
|
272
|
+
});
|
273
|
+
```
|
274
|
+
|
275
|
+
</details>
|
276
|
+
|
277
|
+
Export your OpenAI API key:
|
182
278
|
|
183
279
|
```bash
|
184
280
|
OPENAI_API_KEY=<your-api-key>
|
185
281
|
```
|
186
282
|
|
187
|
-
Now run it
|
283
|
+
Now run it the test:
|
188
284
|
|
189
285
|
```bash
|
286
|
+
# on python
|
190
287
|
pytest -s tests/test_vegetarian_recipe_agent.py
|
288
|
+
|
289
|
+
# on typescript
|
290
|
+
npx vitest run tests/vegetarian-recipe-agent.test.ts
|
191
291
|
```
|
192
292
|
|
193
293
|
This is how it will look like:
|
194
294
|
|
195
|
-
[](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
|
196
296
|
|
197
|
-
You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/
|
297
|
+
You can find the same code example in [python/examples/](python/examples/test_vegetarian_recipe_agent.py) or [javascript/examples/](javascript/examples/vitest/tests/vegetarian-recipe-agent.test.ts).
|
298
|
+
|
299
|
+
Now check out the [full documentation](https://scenario.langwatch.ai) to learn more and next steps.
|
198
300
|
|
199
301
|
## Simulation on Autopilot
|
200
302
|
|
@@ -296,6 +398,16 @@ async def test_early_assumption_bias():
|
|
296
398
|
assert result.success
|
297
399
|
```
|
298
400
|
|
401
|
+
## LangWatch Visualization
|
402
|
+
|
403
|
+
Set your [LangWatch API key](https://app.langwatch.ai/) to visualize the scenarios in real-time, as they run, for a much better debugging experience and team collaboration:
|
404
|
+
|
405
|
+
```bash
|
406
|
+
LANGWATCH_API_KEY="your-api-key"
|
407
|
+
```
|
408
|
+
|
409
|
+

|
410
|
+
|
299
411
|
## Debug mode
|
300
412
|
|
301
413
|
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
|
@@ -360,26 +472,16 @@ async def test_user_is_very_hungry():
|
|
360
472
|
|
361
473
|
Those two scenarios should now run in parallel.
|
362
474
|
|
363
|
-
##
|
364
|
-
|
365
|
-
Scenario automatically publishes events during execution for monitoring and observability. You can enable event reporting by setting environment variables:
|
366
|
-
|
367
|
-
```bash
|
368
|
-
# Enable automatic event reporting
|
369
|
-
export LANGWATCH_ENDPOINT="https://api.langwatch.ai"
|
370
|
-
export LANGWATCH_API_KEY="your-api-key"
|
371
|
-
```
|
372
|
-
|
373
|
-
With these variables set, Scenario will automatically:
|
475
|
+
## Contributing
|
374
476
|
|
375
|
-
|
376
|
-
- Handle retries and error handling automatically
|
377
|
-
- Process events asynchronously without blocking your tests
|
477
|
+
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
|
378
478
|
|
379
|
-
|
479
|
+
## Support
|
380
480
|
|
381
|
-
|
481
|
+
- 📖 [Documentation](https://scenario.langwatch.ai)
|
482
|
+
- 💬 [Discord Community](https://discord.gg/langwatch)
|
483
|
+
- 🐛 [Issue Tracker](https://github.com/langwatch/scenario/issues)
|
382
484
|
|
383
485
|
## License
|
384
486
|
|
385
|
-
MIT License
|
487
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
@@ -1,8 +1,14 @@
|
|
1
1
|

|
2
2
|
|
3
|
-
<
|
4
|
-
|
5
|
-
|
3
|
+
<p align="center">
|
4
|
+
<a href="https://discord.gg/kT4PhDS2gH" target="_blank"><img src="https://img.shields.io/discord/1227886780536324106?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb" alt="chat on Discord"></a>
|
5
|
+
<a href="https://pypi.python.org/pypi/langwatch-scenario" target="_blank"><img src="https://img.shields.io/pypi/dm/langwatch-scenario?logo=python&logoColor=white&label=pypi%20langwatch-scenario&color=blue" alt="Scenario Python package on PyPi"></a>
|
6
|
+
<a href="https://www.npmjs.com/package/@langwatch/scenario" target="_blank"><img src="https://img.shields.io/npm/dm/@langwatch/scenario?logo=npm&logoColor=white&label=npm%20@langwatch/scenario&color=blue" alt="Scenario JavaScript package on npm"></a>
|
7
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml/badge.svg" alt="Python Tests" /></a>
|
8
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml/badge.svg" alt="JavaScript Tests" /></a>
|
9
|
+
<a href="https://twitter.com/intent/follow?screen_name=langwatchai" target="_blank">
|
10
|
+
<img src="https://img.shields.io/twitter/follow/langwatchai?logo=X&color=%20%23f5f5f5" alt="follow on X(Twitter)"></a>
|
11
|
+
</p>
|
6
12
|
|
7
13
|
# Scenario
|
8
14
|
|
@@ -11,19 +17,15 @@ Scenario is an Agent Testing Framework based on simulations, it can:
|
|
11
17
|
- Test real agent behavior by simulating users in different scenarios and edge cases
|
12
18
|
- Evaluate and judge at any point of the conversation, powerful multi-turn control
|
13
19
|
- Combine it with any LLM eval framework or custom evals, agnostic by design
|
14
|
-
- Integrate your Agent by implementing just one `call()` method
|
20
|
+
- Integrate your Agent by implementing just one [`call()`](https://scenario.langwatch.ai/agent-integration) method
|
15
21
|
- Available in Python, TypeScript and Go
|
16
22
|
|
17
|
-
[
|
18
|
-
|
19
|
-
### In other languages
|
20
|
-
|
21
|
-
- [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
|
22
|
-
- [Scenario Go](https://github.com/langwatch/scenario-go/)
|
23
|
+
📖 [Documentation](https://scenario.langwatch.ai)\
|
24
|
+
📺 [Watch Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
|
23
25
|
|
24
26
|
## Example
|
25
27
|
|
26
|
-
This is how a
|
28
|
+
This is how a simulation with tool check looks like with Scenario:
|
27
29
|
|
28
30
|
```python
|
29
31
|
# Define any custom assertions
|
@@ -57,18 +59,56 @@ result = await scenario.run(
|
|
57
59
|
assert result.success
|
58
60
|
```
|
59
61
|
|
62
|
+
<details>
|
63
|
+
<summary><strong>TypeScript Example</strong></summary>
|
64
|
+
|
65
|
+
```typescript
|
66
|
+
const result = await scenario.run({
|
67
|
+
name: "vegetarian recipe agent",
|
68
|
+
|
69
|
+
// Define the prompt to guide the simulation
|
70
|
+
description: `
|
71
|
+
The user is planning a boat trip from Barcelona to Rome,
|
72
|
+
and is wondering what the weather will be like.
|
73
|
+
`,
|
74
|
+
|
75
|
+
// Define the agents that will play this simulation
|
76
|
+
agents: [new MyAgent(), scenario.userSimulatorAgent()],
|
77
|
+
|
78
|
+
// (Optional) Control the simulation
|
79
|
+
script: [
|
80
|
+
scenario.user(), // let the user simulator generate a user message
|
81
|
+
scenario.agent(), // agent responds
|
82
|
+
// check for tool call after the first agent response
|
83
|
+
(state) => expect(state.has_tool_call("get_current_weather")).toBe(true),
|
84
|
+
scenario.succeed(), // simulation ends successfully
|
85
|
+
],
|
86
|
+
});
|
87
|
+
```
|
88
|
+
|
89
|
+
</details>
|
90
|
+
|
60
91
|
> [!NOTE]
|
61
|
-
> Check out full examples in the [examples folder](./examples/).
|
92
|
+
> Check out full examples in the [python/examples folder](./python/examples/). or the [typescript/examples folder](./typescript/examples/).
|
62
93
|
|
63
|
-
##
|
94
|
+
## Quick Start
|
64
95
|
|
65
|
-
Install
|
96
|
+
Install scenario and a test runner:
|
66
97
|
|
67
98
|
```bash
|
68
|
-
|
99
|
+
# on python
|
100
|
+
uv add langwatch-scenario pytest
|
101
|
+
|
102
|
+
# or on typescript
|
103
|
+
pnpm install @langwatch/scenario vitest
|
69
104
|
```
|
70
105
|
|
71
|
-
Now create your first scenario
|
106
|
+
Now create your first scenario, copy the full working example below.
|
107
|
+
|
108
|
+
<details>
|
109
|
+
<summary><strong>Quick Start - Python</strong></summary>
|
110
|
+
|
111
|
+
Save it as `tests/test_vegetarian_recipe_agent.py`:
|
72
112
|
|
73
113
|
```python
|
74
114
|
import pytest
|
@@ -135,23 +175,86 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
|
135
175
|
return response.choices[0].message # type: ignore
|
136
176
|
```
|
137
177
|
|
138
|
-
|
178
|
+
</details>
|
179
|
+
|
180
|
+
<details>
|
181
|
+
<summary><strong>Quick Start - TypeScript</strong></summary>
|
182
|
+
|
183
|
+
Save it as `tests/vegetarian-recipe-agent.test.ts`:
|
184
|
+
|
185
|
+
```typescript
|
186
|
+
import { openai } from "@ai-sdk/openai";
|
187
|
+
import * as scenario from "@langwatch/scenario";
|
188
|
+
import { generateText } from "ai";
|
189
|
+
import { describe, it, expect } from "vitest";
|
190
|
+
|
191
|
+
describe("Vegetarian Recipe Agent", () => {
|
192
|
+
const agent: scenario.AgentAdapter = {
|
193
|
+
role: scenario.AgentRole.AGENT,
|
194
|
+
call: async (input) => {
|
195
|
+
const response = await generateText({
|
196
|
+
model: openai("gpt-4.1-mini"),
|
197
|
+
messages: [
|
198
|
+
{
|
199
|
+
role: "system",
|
200
|
+
content: `You are a vegetarian recipe agent.\nGiven the user request, ask AT MOST ONE follow-up question, then provide a complete recipe. Keep your responses concise and focused.`,
|
201
|
+
},
|
202
|
+
...input.messages,
|
203
|
+
],
|
204
|
+
});
|
205
|
+
return response.text;
|
206
|
+
},
|
207
|
+
};
|
208
|
+
|
209
|
+
it("should generate a vegetarian recipe for a hungry and tired user on a Saturday evening", async () => {
|
210
|
+
const result = await scenario.run({
|
211
|
+
name: "dinner idea",
|
212
|
+
description: `It's saturday evening, the user is very hungry and tired, but have no money to order out, so they are looking for a recipe.`,
|
213
|
+
agents: [
|
214
|
+
agent,
|
215
|
+
scenario.userSimulatorAgent(),
|
216
|
+
scenario.judgeAgent({
|
217
|
+
model: openai("gpt-4.1-mini"),
|
218
|
+
criteria: [
|
219
|
+
"Agent should not ask more than two follow-up questions",
|
220
|
+
"Agent should generate a recipe",
|
221
|
+
"Recipe should include a list of ingredients",
|
222
|
+
"Recipe should include step-by-step cooking instructions",
|
223
|
+
"Recipe should be vegetarian and not include any sort of meat",
|
224
|
+
],
|
225
|
+
}),
|
226
|
+
],
|
227
|
+
});
|
228
|
+
expect(result.success).toBe(true);
|
229
|
+
});
|
230
|
+
});
|
231
|
+
```
|
232
|
+
|
233
|
+
</details>
|
234
|
+
|
235
|
+
Export your OpenAI API key:
|
139
236
|
|
140
237
|
```bash
|
141
238
|
OPENAI_API_KEY=<your-api-key>
|
142
239
|
```
|
143
240
|
|
144
|
-
Now run it
|
241
|
+
Now run it the test:
|
145
242
|
|
146
243
|
```bash
|
244
|
+
# on python
|
147
245
|
pytest -s tests/test_vegetarian_recipe_agent.py
|
246
|
+
|
247
|
+
# on typescript
|
248
|
+
npx vitest run tests/vegetarian-recipe-agent.test.ts
|
148
249
|
```
|
149
250
|
|
150
251
|
This is how it will look like:
|
151
252
|
|
152
|
-
[](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
|
153
254
|
|
154
|
-
You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/
|
255
|
+
You can find the same code example in [python/examples/](python/examples/test_vegetarian_recipe_agent.py) or [javascript/examples/](javascript/examples/vitest/tests/vegetarian-recipe-agent.test.ts).
|
256
|
+
|
257
|
+
Now check out the [full documentation](https://scenario.langwatch.ai) to learn more and next steps.
|
155
258
|
|
156
259
|
## Simulation on Autopilot
|
157
260
|
|
@@ -253,6 +356,16 @@ async def test_early_assumption_bias():
|
|
253
356
|
assert result.success
|
254
357
|
```
|
255
358
|
|
359
|
+
## LangWatch Visualization
|
360
|
+
|
361
|
+
Set your [LangWatch API key](https://app.langwatch.ai/) to visualize the scenarios in real-time, as they run, for a much better debugging experience and team collaboration:
|
362
|
+
|
363
|
+
```bash
|
364
|
+
LANGWATCH_API_KEY="your-api-key"
|
365
|
+
```
|
366
|
+
|
367
|
+

|
368
|
+
|
256
369
|
## Debug mode
|
257
370
|
|
258
371
|
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
|
@@ -317,26 +430,16 @@ async def test_user_is_very_hungry():
|
|
317
430
|
|
318
431
|
Those two scenarios should now run in parallel.
|
319
432
|
|
320
|
-
##
|
321
|
-
|
322
|
-
Scenario automatically publishes events during execution for monitoring and observability. You can enable event reporting by setting environment variables:
|
323
|
-
|
324
|
-
```bash
|
325
|
-
# Enable automatic event reporting
|
326
|
-
export LANGWATCH_ENDPOINT="https://api.langwatch.ai"
|
327
|
-
export LANGWATCH_API_KEY="your-api-key"
|
328
|
-
```
|
329
|
-
|
330
|
-
With these variables set, Scenario will automatically:
|
433
|
+
## Contributing
|
331
434
|
|
332
|
-
|
333
|
-
- Handle retries and error handling automatically
|
334
|
-
- Process events asynchronously without blocking your tests
|
435
|
+
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
|
335
436
|
|
336
|
-
|
437
|
+
## Support
|
337
438
|
|
338
|
-
|
439
|
+
- 📖 [Documentation](https://scenario.langwatch.ai)
|
440
|
+
- 💬 [Discord Community](https://discord.gg/langwatch)
|
441
|
+
- 🐛 [Issue Tracker](https://github.com/langwatch/scenario/issues)
|
339
442
|
|
340
443
|
## License
|
341
444
|
|
342
|
-
MIT License
|
445
|
+
MIT License - see [LICENSE](LICENSE) for details.
|