langwatch-scenario 0.6.0__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/PKG-INFO +145 -41
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/README.md +140 -37
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/langwatch_scenario.egg-info/PKG-INFO +145 -41
- langwatch_scenario-0.7.2/langwatch_scenario.egg-info/SOURCES.txt +247 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/langwatch_scenario.egg-info/requires.txt +4 -3
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/pyproject.toml +15 -5
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/__init__.py +1 -4
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.2/scenario/_events}/__init__.py +9 -11
- langwatch_scenario-0.7.2/scenario/_events/event_bus.py +185 -0
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.2/scenario/_events}/event_reporter.py +1 -1
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.2/scenario/_events}/events.py +20 -27
- langwatch_scenario-0.7.2/scenario/_events/messages.py +58 -0
- {langwatch_scenario-0.6.0/scenario/events → langwatch_scenario-0.7.2/scenario/_events}/utils.py +43 -32
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/README.md +139 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
- langwatch_scenario-0.7.2/scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/_utils/__init__.py +1 -1
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/_utils/message_conversion.py +2 -2
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/judge_agent.py +6 -1
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/pytest_plugin.py +4 -4
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/scenario_executor.py +196 -223
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/types.py +5 -2
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/tests/test_event_reporter.py +3 -3
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/tests/test_scenario_event_bus.py +66 -67
- langwatch_scenario-0.7.2/tests/test_scenario_executor_events.py +183 -0
- langwatch_scenario-0.6.0/langwatch_scenario.egg-info/SOURCES.txt +0 -36
- langwatch_scenario-0.6.0/scenario/events/event_bus.py +0 -175
- langwatch_scenario-0.6.0/scenario/events/messages.py +0 -84
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/langwatch_scenario.egg-info/dependency_links.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/langwatch_scenario.egg-info/entry_points.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/langwatch_scenario.egg-info/top_level.txt +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/_error_messages.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/_utils/ids.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/_utils/utils.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/agent_adapter.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/cache.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/config.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/scenario_state.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/script.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/scenario/user_simulator_agent.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/setup.cfg +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/setup.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/tests/test_scenario.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/tests/test_scenario_agent.py +0 -0
- {langwatch_scenario-0.6.0 → langwatch_scenario-0.7.2}/tests/test_scenario_executor.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.2
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -18,6 +18,7 @@ Requires-Python: >=3.9
|
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
Requires-Dist: pytest>=8.1.1
|
20
20
|
Requires-Dist: litellm>=1.49.0
|
21
|
+
Requires-Dist: openai>=1.88.0
|
21
22
|
Requires-Dist: python-dotenv>=1.0.1
|
22
23
|
Requires-Dist: termcolor>=2.4.0
|
23
24
|
Requires-Dist: pydantic>=2.7.0
|
@@ -26,11 +27,9 @@ Requires-Dist: wrapt>=1.17.2
|
|
26
27
|
Requires-Dist: pytest-asyncio>=0.26.0
|
27
28
|
Requires-Dist: rich<15.0.0,>=13.3.3
|
28
29
|
Requires-Dist: pksuid>=1.1.2
|
29
|
-
Requires-Dist: pdoc3>=0.11.6
|
30
|
-
Requires-Dist: ag-ui-protocol>=0.1.0
|
31
30
|
Requires-Dist: httpx>=0.27.0
|
32
31
|
Requires-Dist: rx>=3.2.0
|
33
|
-
Requires-Dist:
|
32
|
+
Requires-Dist: python-dateutil>=2.9.0.post0
|
34
33
|
Provides-Extra: dev
|
35
34
|
Requires-Dist: black; extra == "dev"
|
36
35
|
Requires-Dist: isort; extra == "dev"
|
@@ -40,12 +39,20 @@ Requires-Dist: commitizen; extra == "dev"
|
|
40
39
|
Requires-Dist: pyright; extra == "dev"
|
41
40
|
Requires-Dist: pydantic-ai; extra == "dev"
|
42
41
|
Requires-Dist: function-schema; extra == "dev"
|
42
|
+
Requires-Dist: pdoc3; extra == "dev"
|
43
|
+
Requires-Dist: respx; extra == "dev"
|
43
44
|
|
44
45
|

|
45
46
|
|
46
|
-
<
|
47
|
-
|
48
|
-
|
47
|
+
<p align="center">
|
48
|
+
<a href="https://discord.gg/kT4PhDS2gH" target="_blank"><img src="https://img.shields.io/discord/1227886780536324106?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb" alt="chat on Discord"></a>
|
49
|
+
<a href="https://pypi.python.org/pypi/langwatch-scenario" target="_blank"><img src="https://img.shields.io/pypi/dm/langwatch-scenario?logo=python&logoColor=white&label=pypi%20langwatch-scenario&color=blue" alt="Scenario Python package on PyPi"></a>
|
50
|
+
<a href="https://www.npmjs.com/package/@langwatch/scenario" target="_blank"><img src="https://img.shields.io/npm/dm/@langwatch/scenario?logo=npm&logoColor=white&label=npm%20@langwatch/scenario&color=blue" alt="Scenario JavaScript package on npm"></a>
|
51
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml/badge.svg" alt="Python Tests" /></a>
|
52
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml/badge.svg" alt="JavaScript Tests" /></a>
|
53
|
+
<a href="https://twitter.com/intent/follow?screen_name=langwatchai" target="_blank">
|
54
|
+
<img src="https://img.shields.io/twitter/follow/langwatchai?logo=X&color=%20%23f5f5f5" alt="follow on X(Twitter)"></a>
|
55
|
+
</p>
|
49
56
|
|
50
57
|
# Scenario
|
51
58
|
|
@@ -54,19 +61,15 @@ Scenario is an Agent Testing Framework based on simulations, it can:
|
|
54
61
|
- Test real agent behavior by simulating users in different scenarios and edge cases
|
55
62
|
- Evaluate and judge at any point of the conversation, powerful multi-turn control
|
56
63
|
- Combine it with any LLM eval framework or custom evals, agnostic by design
|
57
|
-
- Integrate your Agent by implementing just one `call()` method
|
64
|
+
- Integrate your Agent by implementing just one [`call()`](https://scenario.langwatch.ai/agent-integration) method
|
58
65
|
- Available in Python, TypeScript and Go
|
59
66
|
|
60
|
-
[
|
61
|
-
|
62
|
-
### In other languages
|
63
|
-
|
64
|
-
- [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
|
65
|
-
- [Scenario Go](https://github.com/langwatch/scenario-go/)
|
67
|
+
📖 [Documentation](https://scenario.langwatch.ai)\
|
68
|
+
📺 [Watch Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
|
66
69
|
|
67
70
|
## Example
|
68
71
|
|
69
|
-
This is how a
|
72
|
+
This is how a simulation with tool check looks like with Scenario:
|
70
73
|
|
71
74
|
```python
|
72
75
|
# Define any custom assertions
|
@@ -100,18 +103,56 @@ result = await scenario.run(
|
|
100
103
|
assert result.success
|
101
104
|
```
|
102
105
|
|
106
|
+
<details>
|
107
|
+
<summary><strong>TypeScript Example</strong></summary>
|
108
|
+
|
109
|
+
```typescript
|
110
|
+
const result = await scenario.run({
|
111
|
+
name: "vegetarian recipe agent",
|
112
|
+
|
113
|
+
// Define the prompt to guide the simulation
|
114
|
+
description: `
|
115
|
+
The user is planning a boat trip from Barcelona to Rome,
|
116
|
+
and is wondering what the weather will be like.
|
117
|
+
`,
|
118
|
+
|
119
|
+
// Define the agents that will play this simulation
|
120
|
+
agents: [new MyAgent(), scenario.userSimulatorAgent()],
|
121
|
+
|
122
|
+
// (Optional) Control the simulation
|
123
|
+
script: [
|
124
|
+
scenario.user(), // let the user simulator generate a user message
|
125
|
+
scenario.agent(), // agent responds
|
126
|
+
// check for tool call after the first agent response
|
127
|
+
(state) => expect(state.has_tool_call("get_current_weather")).toBe(true),
|
128
|
+
scenario.succeed(), // simulation ends successfully
|
129
|
+
],
|
130
|
+
});
|
131
|
+
```
|
132
|
+
|
133
|
+
</details>
|
134
|
+
|
103
135
|
> [!NOTE]
|
104
|
-
> Check out full examples in the [examples folder](./examples/).
|
136
|
+
> Check out full examples in the [python/examples folder](./python/examples/). or the [typescript/examples folder](./typescript/examples/).
|
105
137
|
|
106
|
-
##
|
138
|
+
## Quick Start
|
107
139
|
|
108
|
-
Install
|
140
|
+
Install scenario and a test runner:
|
109
141
|
|
110
142
|
```bash
|
111
|
-
|
143
|
+
# on python
|
144
|
+
uv add langwatch-scenario pytest
|
145
|
+
|
146
|
+
# or on typescript
|
147
|
+
pnpm install @langwatch/scenario vitest
|
112
148
|
```
|
113
149
|
|
114
|
-
Now create your first scenario
|
150
|
+
Now create your first scenario, copy the full working example below.
|
151
|
+
|
152
|
+
<details>
|
153
|
+
<summary><strong>Quick Start - Python</strong></summary>
|
154
|
+
|
155
|
+
Save it as `tests/test_vegetarian_recipe_agent.py`:
|
115
156
|
|
116
157
|
```python
|
117
158
|
import pytest
|
@@ -178,23 +219,86 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
|
178
219
|
return response.choices[0].message # type: ignore
|
179
220
|
```
|
180
221
|
|
181
|
-
|
222
|
+
</details>
|
223
|
+
|
224
|
+
<details>
|
225
|
+
<summary><strong>Quick Start - TypeScript</strong></summary>
|
226
|
+
|
227
|
+
Save it as `tests/vegetarian-recipe-agent.test.ts`:
|
228
|
+
|
229
|
+
```typescript
|
230
|
+
import { openai } from "@ai-sdk/openai";
|
231
|
+
import * as scenario from "@langwatch/scenario";
|
232
|
+
import { generateText } from "ai";
|
233
|
+
import { describe, it, expect } from "vitest";
|
234
|
+
|
235
|
+
describe("Vegetarian Recipe Agent", () => {
|
236
|
+
const agent: scenario.AgentAdapter = {
|
237
|
+
role: scenario.AgentRole.AGENT,
|
238
|
+
call: async (input) => {
|
239
|
+
const response = await generateText({
|
240
|
+
model: openai("gpt-4.1-mini"),
|
241
|
+
messages: [
|
242
|
+
{
|
243
|
+
role: "system",
|
244
|
+
content: `You are a vegetarian recipe agent.\nGiven the user request, ask AT MOST ONE follow-up question, then provide a complete recipe. Keep your responses concise and focused.`,
|
245
|
+
},
|
246
|
+
...input.messages,
|
247
|
+
],
|
248
|
+
});
|
249
|
+
return response.text;
|
250
|
+
},
|
251
|
+
};
|
252
|
+
|
253
|
+
it("should generate a vegetarian recipe for a hungry and tired user on a Saturday evening", async () => {
|
254
|
+
const result = await scenario.run({
|
255
|
+
name: "dinner idea",
|
256
|
+
description: `It's saturday evening, the user is very hungry and tired, but have no money to order out, so they are looking for a recipe.`,
|
257
|
+
agents: [
|
258
|
+
agent,
|
259
|
+
scenario.userSimulatorAgent(),
|
260
|
+
scenario.judgeAgent({
|
261
|
+
model: openai("gpt-4.1-mini"),
|
262
|
+
criteria: [
|
263
|
+
"Agent should not ask more than two follow-up questions",
|
264
|
+
"Agent should generate a recipe",
|
265
|
+
"Recipe should include a list of ingredients",
|
266
|
+
"Recipe should include step-by-step cooking instructions",
|
267
|
+
"Recipe should be vegetarian and not include any sort of meat",
|
268
|
+
],
|
269
|
+
}),
|
270
|
+
],
|
271
|
+
});
|
272
|
+
expect(result.success).toBe(true);
|
273
|
+
});
|
274
|
+
});
|
275
|
+
```
|
276
|
+
|
277
|
+
</details>
|
278
|
+
|
279
|
+
Export your OpenAI API key:
|
182
280
|
|
183
281
|
```bash
|
184
282
|
OPENAI_API_KEY=<your-api-key>
|
185
283
|
```
|
186
284
|
|
187
|
-
Now run it
|
285
|
+
Now run it the test:
|
188
286
|
|
189
287
|
```bash
|
288
|
+
# on python
|
190
289
|
pytest -s tests/test_vegetarian_recipe_agent.py
|
290
|
+
|
291
|
+
# on typescript
|
292
|
+
npx vitest run tests/vegetarian-recipe-agent.test.ts
|
191
293
|
```
|
192
294
|
|
193
295
|
This is how it will look like:
|
194
296
|
|
195
|
-
[](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
|
196
298
|
|
197
|
-
You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/
|
299
|
+
You can find the same code example in [python/examples/](python/examples/test_vegetarian_recipe_agent.py) or [javascript/examples/](javascript/examples/vitest/tests/vegetarian-recipe-agent.test.ts).
|
300
|
+
|
301
|
+
Now check out the [full documentation](https://scenario.langwatch.ai) to learn more and next steps.
|
198
302
|
|
199
303
|
## Simulation on Autopilot
|
200
304
|
|
@@ -296,6 +400,16 @@ async def test_early_assumption_bias():
|
|
296
400
|
assert result.success
|
297
401
|
```
|
298
402
|
|
403
|
+
## LangWatch Visualization
|
404
|
+
|
405
|
+
Set your [LangWatch API key](https://app.langwatch.ai/) to visualize the scenarios in real-time, as they run, for a much better debugging experience and team collaboration:
|
406
|
+
|
407
|
+
```bash
|
408
|
+
LANGWATCH_API_KEY="your-api-key"
|
409
|
+
```
|
410
|
+
|
411
|
+

|
412
|
+
|
299
413
|
## Debug mode
|
300
414
|
|
301
415
|
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
|
@@ -360,26 +474,16 @@ async def test_user_is_very_hungry():
|
|
360
474
|
|
361
475
|
Those two scenarios should now run in parallel.
|
362
476
|
|
363
|
-
##
|
364
|
-
|
365
|
-
Scenario automatically publishes events during execution for monitoring and observability. You can enable event reporting by setting environment variables:
|
366
|
-
|
367
|
-
```bash
|
368
|
-
# Enable automatic event reporting
|
369
|
-
export LANGWATCH_ENDPOINT="https://api.langwatch.ai"
|
370
|
-
export LANGWATCH_API_KEY="your-api-key"
|
371
|
-
```
|
372
|
-
|
373
|
-
With these variables set, Scenario will automatically:
|
477
|
+
## Contributing
|
374
478
|
|
375
|
-
|
376
|
-
- Handle retries and error handling automatically
|
377
|
-
- Process events asynchronously without blocking your tests
|
479
|
+
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
|
378
480
|
|
379
|
-
|
481
|
+
## Support
|
380
482
|
|
381
|
-
|
483
|
+
- 📖 [Documentation](https://scenario.langwatch.ai)
|
484
|
+
- 💬 [Discord Community](https://discord.gg/langwatch)
|
485
|
+
- 🐛 [Issue Tracker](https://github.com/langwatch/scenario/issues)
|
382
486
|
|
383
487
|
## License
|
384
488
|
|
385
|
-
MIT License
|
489
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
@@ -1,8 +1,14 @@
|
|
1
1
|

|
2
2
|
|
3
|
-
<
|
4
|
-
|
5
|
-
|
3
|
+
<p align="center">
|
4
|
+
<a href="https://discord.gg/kT4PhDS2gH" target="_blank"><img src="https://img.shields.io/discord/1227886780536324106?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb" alt="chat on Discord"></a>
|
5
|
+
<a href="https://pypi.python.org/pypi/langwatch-scenario" target="_blank"><img src="https://img.shields.io/pypi/dm/langwatch-scenario?logo=python&logoColor=white&label=pypi%20langwatch-scenario&color=blue" alt="Scenario Python package on PyPi"></a>
|
6
|
+
<a href="https://www.npmjs.com/package/@langwatch/scenario" target="_blank"><img src="https://img.shields.io/npm/dm/@langwatch/scenario?logo=npm&logoColor=white&label=npm%20@langwatch/scenario&color=blue" alt="Scenario JavaScript package on npm"></a>
|
7
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/python-ci.yml/badge.svg" alt="Python Tests" /></a>
|
8
|
+
<a href="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml"><img src="https://github.com/langwatch/scenario/actions/workflows/javascript-ci.yml/badge.svg" alt="JavaScript Tests" /></a>
|
9
|
+
<a href="https://twitter.com/intent/follow?screen_name=langwatchai" target="_blank">
|
10
|
+
<img src="https://img.shields.io/twitter/follow/langwatchai?logo=X&color=%20%23f5f5f5" alt="follow on X(Twitter)"></a>
|
11
|
+
</p>
|
6
12
|
|
7
13
|
# Scenario
|
8
14
|
|
@@ -11,19 +17,15 @@ Scenario is an Agent Testing Framework based on simulations, it can:
|
|
11
17
|
- Test real agent behavior by simulating users in different scenarios and edge cases
|
12
18
|
- Evaluate and judge at any point of the conversation, powerful multi-turn control
|
13
19
|
- Combine it with any LLM eval framework or custom evals, agnostic by design
|
14
|
-
- Integrate your Agent by implementing just one `call()` method
|
20
|
+
- Integrate your Agent by implementing just one [`call()`](https://scenario.langwatch.ai/agent-integration) method
|
15
21
|
- Available in Python, TypeScript and Go
|
16
22
|
|
17
|
-
[
|
18
|
-
|
19
|
-
### In other languages
|
20
|
-
|
21
|
-
- [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
|
22
|
-
- [Scenario Go](https://github.com/langwatch/scenario-go/)
|
23
|
+
📖 [Documentation](https://scenario.langwatch.ai)\
|
24
|
+
📺 [Watch Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
|
23
25
|
|
24
26
|
## Example
|
25
27
|
|
26
|
-
This is how a
|
28
|
+
This is how a simulation with tool check looks like with Scenario:
|
27
29
|
|
28
30
|
```python
|
29
31
|
# Define any custom assertions
|
@@ -57,18 +59,56 @@ result = await scenario.run(
|
|
57
59
|
assert result.success
|
58
60
|
```
|
59
61
|
|
62
|
+
<details>
|
63
|
+
<summary><strong>TypeScript Example</strong></summary>
|
64
|
+
|
65
|
+
```typescript
|
66
|
+
const result = await scenario.run({
|
67
|
+
name: "vegetarian recipe agent",
|
68
|
+
|
69
|
+
// Define the prompt to guide the simulation
|
70
|
+
description: `
|
71
|
+
The user is planning a boat trip from Barcelona to Rome,
|
72
|
+
and is wondering what the weather will be like.
|
73
|
+
`,
|
74
|
+
|
75
|
+
// Define the agents that will play this simulation
|
76
|
+
agents: [new MyAgent(), scenario.userSimulatorAgent()],
|
77
|
+
|
78
|
+
// (Optional) Control the simulation
|
79
|
+
script: [
|
80
|
+
scenario.user(), // let the user simulator generate a user message
|
81
|
+
scenario.agent(), // agent responds
|
82
|
+
// check for tool call after the first agent response
|
83
|
+
(state) => expect(state.has_tool_call("get_current_weather")).toBe(true),
|
84
|
+
scenario.succeed(), // simulation ends successfully
|
85
|
+
],
|
86
|
+
});
|
87
|
+
```
|
88
|
+
|
89
|
+
</details>
|
90
|
+
|
60
91
|
> [!NOTE]
|
61
|
-
> Check out full examples in the [examples folder](./examples/).
|
92
|
+
> Check out full examples in the [python/examples folder](./python/examples/). or the [typescript/examples folder](./typescript/examples/).
|
62
93
|
|
63
|
-
##
|
94
|
+
## Quick Start
|
64
95
|
|
65
|
-
Install
|
96
|
+
Install scenario and a test runner:
|
66
97
|
|
67
98
|
```bash
|
68
|
-
|
99
|
+
# on python
|
100
|
+
uv add langwatch-scenario pytest
|
101
|
+
|
102
|
+
# or on typescript
|
103
|
+
pnpm install @langwatch/scenario vitest
|
69
104
|
```
|
70
105
|
|
71
|
-
Now create your first scenario
|
106
|
+
Now create your first scenario, copy the full working example below.
|
107
|
+
|
108
|
+
<details>
|
109
|
+
<summary><strong>Quick Start - Python</strong></summary>
|
110
|
+
|
111
|
+
Save it as `tests/test_vegetarian_recipe_agent.py`:
|
72
112
|
|
73
113
|
```python
|
74
114
|
import pytest
|
@@ -135,23 +175,86 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
|
135
175
|
return response.choices[0].message # type: ignore
|
136
176
|
```
|
137
177
|
|
138
|
-
|
178
|
+
</details>
|
179
|
+
|
180
|
+
<details>
|
181
|
+
<summary><strong>Quick Start - TypeScript</strong></summary>
|
182
|
+
|
183
|
+
Save it as `tests/vegetarian-recipe-agent.test.ts`:
|
184
|
+
|
185
|
+
```typescript
|
186
|
+
import { openai } from "@ai-sdk/openai";
|
187
|
+
import * as scenario from "@langwatch/scenario";
|
188
|
+
import { generateText } from "ai";
|
189
|
+
import { describe, it, expect } from "vitest";
|
190
|
+
|
191
|
+
describe("Vegetarian Recipe Agent", () => {
|
192
|
+
const agent: scenario.AgentAdapter = {
|
193
|
+
role: scenario.AgentRole.AGENT,
|
194
|
+
call: async (input) => {
|
195
|
+
const response = await generateText({
|
196
|
+
model: openai("gpt-4.1-mini"),
|
197
|
+
messages: [
|
198
|
+
{
|
199
|
+
role: "system",
|
200
|
+
content: `You are a vegetarian recipe agent.\nGiven the user request, ask AT MOST ONE follow-up question, then provide a complete recipe. Keep your responses concise and focused.`,
|
201
|
+
},
|
202
|
+
...input.messages,
|
203
|
+
],
|
204
|
+
});
|
205
|
+
return response.text;
|
206
|
+
},
|
207
|
+
};
|
208
|
+
|
209
|
+
it("should generate a vegetarian recipe for a hungry and tired user on a Saturday evening", async () => {
|
210
|
+
const result = await scenario.run({
|
211
|
+
name: "dinner idea",
|
212
|
+
description: `It's saturday evening, the user is very hungry and tired, but have no money to order out, so they are looking for a recipe.`,
|
213
|
+
agents: [
|
214
|
+
agent,
|
215
|
+
scenario.userSimulatorAgent(),
|
216
|
+
scenario.judgeAgent({
|
217
|
+
model: openai("gpt-4.1-mini"),
|
218
|
+
criteria: [
|
219
|
+
"Agent should not ask more than two follow-up questions",
|
220
|
+
"Agent should generate a recipe",
|
221
|
+
"Recipe should include a list of ingredients",
|
222
|
+
"Recipe should include step-by-step cooking instructions",
|
223
|
+
"Recipe should be vegetarian and not include any sort of meat",
|
224
|
+
],
|
225
|
+
}),
|
226
|
+
],
|
227
|
+
});
|
228
|
+
expect(result.success).toBe(true);
|
229
|
+
});
|
230
|
+
});
|
231
|
+
```
|
232
|
+
|
233
|
+
</details>
|
234
|
+
|
235
|
+
Export your OpenAI API key:
|
139
236
|
|
140
237
|
```bash
|
141
238
|
OPENAI_API_KEY=<your-api-key>
|
142
239
|
```
|
143
240
|
|
144
|
-
Now run it
|
241
|
+
Now run it the test:
|
145
242
|
|
146
243
|
```bash
|
244
|
+
# on python
|
147
245
|
pytest -s tests/test_vegetarian_recipe_agent.py
|
246
|
+
|
247
|
+
# on typescript
|
248
|
+
npx vitest run tests/vegetarian-recipe-agent.test.ts
|
148
249
|
```
|
149
250
|
|
150
251
|
This is how it will look like:
|
151
252
|
|
152
|
-
[](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
|
153
254
|
|
154
|
-
You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/
|
255
|
+
You can find the same code example in [python/examples/](python/examples/test_vegetarian_recipe_agent.py) or [javascript/examples/](javascript/examples/vitest/tests/vegetarian-recipe-agent.test.ts).
|
256
|
+
|
257
|
+
Now check out the [full documentation](https://scenario.langwatch.ai) to learn more and next steps.
|
155
258
|
|
156
259
|
## Simulation on Autopilot
|
157
260
|
|
@@ -253,6 +356,16 @@ async def test_early_assumption_bias():
|
|
253
356
|
assert result.success
|
254
357
|
```
|
255
358
|
|
359
|
+
## LangWatch Visualization
|
360
|
+
|
361
|
+
Set your [LangWatch API key](https://app.langwatch.ai/) to visualize the scenarios in real-time, as they run, for a much better debugging experience and team collaboration:
|
362
|
+
|
363
|
+
```bash
|
364
|
+
LANGWATCH_API_KEY="your-api-key"
|
365
|
+
```
|
366
|
+
|
367
|
+

|
368
|
+
|
256
369
|
## Debug mode
|
257
370
|
|
258
371
|
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
|
@@ -317,26 +430,16 @@ async def test_user_is_very_hungry():
|
|
317
430
|
|
318
431
|
Those two scenarios should now run in parallel.
|
319
432
|
|
320
|
-
##
|
321
|
-
|
322
|
-
Scenario automatically publishes events during execution for monitoring and observability. You can enable event reporting by setting environment variables:
|
323
|
-
|
324
|
-
```bash
|
325
|
-
# Enable automatic event reporting
|
326
|
-
export LANGWATCH_ENDPOINT="https://api.langwatch.ai"
|
327
|
-
export LANGWATCH_API_KEY="your-api-key"
|
328
|
-
```
|
329
|
-
|
330
|
-
With these variables set, Scenario will automatically:
|
433
|
+
## Contributing
|
331
434
|
|
332
|
-
|
333
|
-
- Handle retries and error handling automatically
|
334
|
-
- Process events asynchronously without blocking your tests
|
435
|
+
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
|
335
436
|
|
336
|
-
|
437
|
+
## Support
|
337
438
|
|
338
|
-
|
439
|
+
- 📖 [Documentation](https://scenario.langwatch.ai)
|
440
|
+
- 💬 [Discord Community](https://discord.gg/langwatch)
|
441
|
+
- 🐛 [Issue Tracker](https://github.com/langwatch/scenario/issues)
|
339
442
|
|
340
443
|
## License
|
341
444
|
|
342
|
-
MIT License
|
445
|
+
MIT License - see [LICENSE](LICENSE) for details.
|