langwatch-scenario 0.4.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/METADATA +210 -86
  2. langwatch_scenario-0.7.1.dist-info/RECORD +237 -0
  3. scenario/__init__.py +12 -118
  4. scenario/_events/__init__.py +64 -0
  5. scenario/_events/event_bus.py +185 -0
  6. scenario/_events/event_reporter.py +83 -0
  7. scenario/_events/events.py +162 -0
  8. scenario/_events/messages.py +58 -0
  9. scenario/_events/utils.py +97 -0
  10. scenario/_generated/langwatch_api_client/README.md +139 -0
  11. scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
  12. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
  13. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
  14. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
  15. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
  16. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
  17. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
  18. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
  19. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
  20. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
  21. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
  22. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
  23. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
  24. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
  25. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
  26. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
  27. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
  28. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
  29. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
  30. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
  31. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
  32. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
  33. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
  34. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
  35. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
  36. scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
  37. scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
  38. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
  39. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
  40. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
  41. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
  42. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
  43. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
  44. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
  45. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
  46. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
  47. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
  48. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
  49. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
  50. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
  51. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
  52. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
  53. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
  54. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
  55. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
  56. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
  57. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
  58. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
  59. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
  60. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
  61. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
  62. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
  63. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
  64. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
  65. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
  66. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
  67. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
  68. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
  69. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
  70. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
  71. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
  72. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
  73. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
  74. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
  75. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
  76. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
  77. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
  78. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
  79. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
  80. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  81. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  82. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  83. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  84. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  85. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  86. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  87. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  88. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  89. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  90. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  91. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  92. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
  93. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
  94. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
  95. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
  96. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
  97. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
  98. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
  99. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
  100. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
  101. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
  102. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
  103. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
  104. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
  105. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
  106. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
  107. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
  108. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
  109. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
  110. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
  111. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
  112. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
  113. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
  114. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
  115. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
  116. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
  117. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
  118. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
  119. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
  120. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
  121. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
  122. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
  123. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
  124. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
  125. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
  126. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
  127. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
  128. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
  129. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
  130. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
  131. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
  132. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
  133. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
  134. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
  135. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
  136. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
  137. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
  138. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
  139. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
  140. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
  141. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
  142. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
  143. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
  144. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
  145. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
  146. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
  147. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
  148. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
  149. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
  150. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
  151. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
  152. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
  153. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
  154. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  155. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  156. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  157. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  158. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  159. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  160. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  161. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  162. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  163. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  164. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  165. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  166. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
  167. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
  168. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
  169. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
  170. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
  171. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
  172. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
  173. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
  174. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
  175. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
  176. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
  177. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
  178. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
  179. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
  180. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
  181. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
  182. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
  183. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
  184. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
  185. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
  186. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
  187. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
  188. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
  189. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
  190. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
  191. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
  192. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
  193. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
  194. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
  195. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
  196. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
  197. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
  198. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
  199. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
  200. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
  201. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
  202. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
  203. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
  204. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
  205. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
  206. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
  207. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
  208. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
  209. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
  210. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
  211. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
  212. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
  213. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
  214. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
  215. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
  216. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
  217. scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
  218. scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
  219. scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
  220. scenario/_utils/__init__.py +32 -0
  221. scenario/_utils/ids.py +58 -0
  222. scenario/_utils/message_conversion.py +103 -0
  223. scenario/{utils.py → _utils/utils.py} +21 -110
  224. scenario/agent_adapter.py +8 -4
  225. scenario/cache.py +4 -3
  226. scenario/config.py +7 -5
  227. scenario/judge_agent.py +13 -29
  228. scenario/pytest_plugin.py +6 -51
  229. scenario/scenario_executor.py +372 -215
  230. scenario/scenario_state.py +6 -6
  231. scenario/script.py +9 -9
  232. scenario/types.py +15 -8
  233. scenario/user_simulator_agent.py +4 -11
  234. langwatch_scenario-0.4.0.dist-info/RECORD +0 -18
  235. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/WHEEL +0 -0
  236. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/entry_points.txt +0 -0
  237. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/top_level.txt +0 -0
  238. /scenario/{error_messages.py → _error_messages.py} +0 -0
@@ -12,37 +12,55 @@ from typing import (
12
12
  Callable,
13
13
  Dict,
14
14
  List,
15
- Any,
16
15
  Optional,
17
16
  Set,
18
17
  Tuple,
19
18
  Union,
19
+ TypedDict,
20
20
  )
21
21
  import time
22
+ import warnings
22
23
  import termcolor
23
24
  import asyncio
24
25
  import concurrent.futures
25
26
 
26
27
  from scenario.config import ScenarioConfig
27
- from scenario.utils import (
28
- await_if_awaitable,
29
- check_valid_return_type,
28
+ from scenario._utils import (
30
29
  convert_agent_return_types_to_openai_messages,
30
+ check_valid_return_type,
31
31
  print_openai_messages,
32
32
  show_spinner,
33
+ await_if_awaitable,
34
+ get_or_create_batch_run_id,
35
+ generate_scenario_run_id,
33
36
  )
34
37
  from openai.types.chat import (
35
38
  ChatCompletionMessageParam,
36
39
  ChatCompletionUserMessageParam,
40
+ ChatCompletionAssistantMessageParam,
37
41
  )
38
42
 
39
43
  from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
40
- from .error_messages import agent_response_not_awaitable
44
+ from ._error_messages import agent_response_not_awaitable
41
45
  from .cache import context_scenario
42
46
  from .agent_adapter import AgentAdapter
43
47
  from .script import proceed
44
48
  from pksuid import PKSUID
45
49
  from .scenario_state import ScenarioState
50
+ from ._events import (
51
+ ScenarioEventBus,
52
+ ScenarioEvent,
53
+ ScenarioRunStartedEvent,
54
+ ScenarioMessageSnapshotEvent,
55
+ ScenarioRunFinishedEvent,
56
+ ScenarioRunStartedEventMetadata,
57
+ ScenarioRunFinishedEventResults,
58
+ ScenarioRunFinishedEventVerdict,
59
+ ScenarioRunFinishedEventStatus,
60
+ convert_messages_to_api_client_messages,
61
+ )
62
+ from rx.subject.subject import Subject
63
+ from rx.core.observable.observable import Observable
46
64
 
47
65
 
48
66
  class ScenarioExecutor:
@@ -66,41 +84,8 @@ class ScenarioExecutor:
66
84
  agents: List of agent adapters participating in the scenario
67
85
  script: Optional list of script steps to control scenario flow
68
86
  config: Configuration settings for execution behavior
69
-
70
- Example:
71
- ```python
72
- # Direct instantiation (less common)
73
- executor = ScenarioExecutor(
74
- name="weather query test",
75
- description="User asks about weather, agent should provide helpful response",
76
- agents=[
77
- weather_agent,
78
- scenario.UserSimulatorAgent(),
79
- scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
80
- ],
81
- max_turns=10,
82
- verbose=True
83
- )
84
- result = await executor._run()
85
-
86
- # Preferred high-level API
87
- result = await scenario.run(
88
- name="weather query test",
89
- description="User asks about weather, agent should provide helpful response",
90
- agents=[
91
- weather_agent,
92
- scenario.UserSimulatorAgent(),
93
- scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
94
- ]
95
- )
96
- ```
97
-
98
- Note:
99
- - Scenarios run in isolated thread pools to support parallel execution
100
- - All agent interactions are cached when cache_key is configured
101
- - Debug mode allows step-by-step execution with user intervention
102
- - Results include detailed timing information and conversation history
103
87
  """
88
+
104
89
  name: str
105
90
  description: str
106
91
  agents: List[AgentAdapter]
@@ -115,6 +100,11 @@ class ScenarioExecutor:
115
100
  _pending_roles_on_turn: List[AgentRole] = []
116
101
  _pending_agents_on_turn: Set[AgentAdapter] = set()
117
102
  _agent_times: Dict[int, float] = {}
103
+ _events: Subject
104
+
105
+ event_bus: ScenarioEventBus
106
+
107
+ batch_run_id: str
118
108
 
119
109
  def __init__(
120
110
  self,
@@ -127,6 +117,7 @@ class ScenarioExecutor:
127
117
  verbose: Optional[Union[bool, int]] = None,
128
118
  cache_key: Optional[str] = None,
129
119
  debug: Optional[bool] = None,
120
+ event_bus: Optional[ScenarioEventBus] = None,
130
121
  ):
131
122
  """
132
123
  Initialize a scenario executor.
@@ -147,26 +138,7 @@ class ScenarioExecutor:
147
138
  Overrides global configuration for this scenario.
148
139
  debug: Whether to enable debug mode with step-by-step execution.
149
140
  Overrides global configuration for this scenario.
150
-
151
- Example:
152
- ```python
153
- executor = ScenarioExecutor(
154
- name="customer service test",
155
- description="Customer has a billing question and needs help",
156
- agents=[
157
- customer_service_agent,
158
- scenario.UserSimulatorAgent(),
159
- scenario.JudgeAgent(criteria=[
160
- "Agent is polite and professional",
161
- "Agent addresses the billing question",
162
- "Agent provides clear next steps"
163
- ])
164
- ],
165
- max_turns=15,
166
- verbose=True,
167
- debug=False
168
- )
169
- ```
141
+ event_bus: Optional event bus that will subscribe to this executor's events
170
142
  """
171
143
  self.name = name
172
144
  self.description = description
@@ -183,115 +155,33 @@ class ScenarioExecutor:
183
155
 
184
156
  self.reset()
185
157
 
186
- @classmethod
187
- async def run(
188
- cls,
189
- name: str,
190
- description: str,
191
- agents: List[AgentAdapter] = [],
192
- max_turns: Optional[int] = None,
193
- verbose: Optional[Union[bool, int]] = None,
194
- cache_key: Optional[str] = None,
195
- debug: Optional[bool] = None,
196
- script: Optional[List[ScriptStep]] = None,
197
- ) -> ScenarioResult:
198
- """
199
- High-level interface for running a scenario test.
200
-
201
- This is the main entry point for executing scenario tests. It creates a
202
- ScenarioExecutor instance and runs it in an isolated thread pool to support
203
- parallel execution and prevent blocking.
158
+ # Create executor's own event stream
159
+ self._events = Subject()
204
160
 
205
- Args:
206
- name: Human-readable name for the scenario
207
- description: Detailed description of what the scenario tests
208
- agents: List of agent adapters (agent under test, user simulator, judge)
209
- max_turns: Maximum conversation turns before timeout (default: 10)
210
- verbose: Show detailed output during execution
211
- cache_key: Cache key for deterministic behavior
212
- debug: Enable debug mode for step-by-step execution
213
- script: Optional script steps to control scenario flow
161
+ # Create and configure event bus to subscribe to our events
162
+ self.event_bus = event_bus or ScenarioEventBus()
163
+ self.event_bus.subscribe_to_events(self._events)
214
164
 
215
- Returns:
216
- ScenarioResult containing the test outcome, conversation history,
217
- success/failure status, and detailed reasoning
218
-
219
- Example:
220
- ```python
221
- import scenario
165
+ self.batch_run_id = get_or_create_batch_run_id()
222
166
 
223
- # Simple scenario with automatic flow
224
- result = await scenario.run(
225
- name="help request",
226
- description="User asks for help with a technical problem",
227
- agents=[
228
- my_agent,
229
- scenario.UserSimulatorAgent(),
230
- scenario.JudgeAgent(criteria=["Agent provides helpful response"])
231
- ]
232
- )
167
+ @property
168
+ def events(self) -> Observable:
169
+ """Expose event stream for subscribers like the event bus."""
170
+ return self._events
233
171
 
234
- # Scripted scenario with custom evaluations
235
- result = await scenario.run(
236
- name="custom interaction",
237
- description="Test specific conversation flow",
238
- agents=[
239
- my_agent,
240
- scenario.UserSimulatorAgent(),
241
- scenario.JudgeAgent(criteria=["Agent provides helpful response"])
242
- ],
243
- script=[
244
- scenario.user("Hello"),
245
- scenario.agent(),
246
- custom_eval,
247
- scenario.succeed()
248
- ]
249
- )
172
+ def _emit_event(self, event: ScenarioEvent) -> None:
173
+ """
174
+ Emit a domain event to all subscribers.
250
175
 
251
- # Results analysis
252
- print(f"Test {'PASSED' if result.success else 'FAILED'}")
253
- print(f"Reasoning: {result.reasoning}")
254
- print(f"Conversation had {len(result.messages)} messages")
255
- ```
176
+ This method publishes scenario events to the internal event stream,
177
+ which subscribers (like the event bus) can observe and react to.
178
+ The timestamp is automatically set to the current time.
256
179
 
257
- Note:
258
- - Runs in isolated thread pool to support parallel execution
259
- - Blocks until scenario completes or times out
260
- - All agent calls are automatically cached when cache_key is set
261
- - Exception handling ensures clean resource cleanup
180
+ Args:
181
+ event: The scenario event to emit
262
182
  """
263
- scenario = cls(
264
- name=name,
265
- description=description,
266
- agents=agents,
267
- max_turns=max_turns,
268
- verbose=verbose,
269
- cache_key=cache_key,
270
- debug=debug,
271
- script=script,
272
- )
273
-
274
- # We'll use a thread pool to run the execution logic, we
275
- # require a separate thread because even though asyncio is
276
- # being used throughout, any user code on the callback can
277
- # be blocking, preventing them from running scenarios in parallel
278
- with concurrent.futures.ThreadPoolExecutor() as executor:
279
-
280
- def run_in_thread():
281
- loop = asyncio.new_event_loop()
282
- asyncio.set_event_loop(loop)
283
-
284
- try:
285
- return loop.run_until_complete(scenario._run())
286
- finally:
287
- loop.close()
288
-
289
- # Run the function in the thread pool and await its result
290
- # This converts the thread's execution into a Future that the current
291
- # event loop can await without blocking
292
- loop = asyncio.get_event_loop()
293
- result = await loop.run_in_executor(executor, run_in_thread)
294
- return result
183
+ event.timestamp = int(time.time() * 1000)
184
+ self._events.on_next(event)
295
185
 
296
186
  def reset(self):
297
187
  """
@@ -300,18 +190,6 @@ class ScenarioExecutor:
300
190
  This method reinitializes all internal state for a fresh scenario run,
301
191
  including conversation history, turn counters, and agent timing information.
302
192
  Called automatically during initialization and can be used to rerun scenarios.
303
-
304
- Example:
305
- ```python
306
- executor = ScenarioExecutor(...)
307
-
308
- # Run first test
309
- result1 = await executor._run()
310
-
311
- # Reset and run again
312
- executor.reset()
313
- result2 = await executor._run()
314
- ```
315
193
  """
316
194
  self._state = ScenarioState(
317
195
  description=self.description,
@@ -349,24 +227,24 @@ class ScenarioExecutor:
349
227
  Used to avoid broadcasting the message back to its creator.
350
228
 
351
229
  Example:
352
- ```python
230
+ ```
353
231
  def inject_system_message(state: ScenarioState) -> None:
354
- state._executor.add_message({
232
+ state.add_message({
355
233
  "role": "system",
356
234
  "content": "The user is now in a hurry"
357
235
  })
358
236
 
359
237
  # Use in script
360
238
  result = await scenario.run(
361
- name="system message test",
362
- agents=[agent, user_sim, judge],
363
- script=[
364
- scenario.user("Hello"),
365
- scenario.agent(),
366
- inject_system_message,
367
- scenario.user(), # Will see the system message
368
- scenario.succeed()
369
- ]
239
+ name="system message test",
240
+ agents=[agent, user_sim, judge],
241
+ script=[
242
+ scenario.user("Hello"),
243
+ scenario.agent(),
244
+ inject_system_message,
245
+ scenario.user(), # Will see the system message
246
+ scenario.succeed()
247
+ ]
370
248
  )
371
249
  ```
372
250
  """
@@ -396,7 +274,7 @@ class ScenarioExecutor:
396
274
  from_agent_idx: Index of the agent that generated these messages
397
275
 
398
276
  Example:
399
- ```python
277
+ ```
400
278
  # Agent returns multiple messages for a complex interaction
401
279
  messages = [
402
280
  {"role": "assistant", "content": "Let me search for that..."},
@@ -476,7 +354,11 @@ class ScenarioExecutor:
476
354
  self, role: AgentRole
477
355
  ) -> Tuple[int, Optional[AgentAdapter]]:
478
356
  for idx, agent in enumerate(self.agents):
479
- if role == agent.role and agent in self._pending_agents_on_turn:
357
+ if (
358
+ role == agent.role
359
+ and agent in self._pending_agents_on_turn
360
+ and agent.role in self._pending_roles_on_turn
361
+ ):
480
362
  return idx, agent
481
363
  return -1, None
482
364
 
@@ -503,7 +385,7 @@ class ScenarioExecutor:
503
385
  agent_time=agent_time,
504
386
  )
505
387
 
506
- async def _run(self) -> ScenarioResult:
388
+ async def run(self) -> ScenarioResult:
507
389
  """
508
390
  Run a scenario against the agent under test.
509
391
 
@@ -513,30 +395,63 @@ class ScenarioExecutor:
513
395
  Returns:
514
396
  ScenarioResult containing the test outcome
515
397
  """
398
+ scenario_run_id = generate_scenario_run_id()
516
399
 
517
- if self.config.verbose:
518
- print("") # new line
519
-
520
- self.reset()
521
-
522
- for script_step in self.script:
523
- callable = script_step(self._state)
524
- if isinstance(callable, Awaitable):
525
- result = await callable
526
- else:
527
- result = callable
400
+ try:
401
+ self._emit_run_started_event(scenario_run_id)
528
402
 
529
- if isinstance(result, ScenarioResult):
530
- return result
403
+ if self.config.verbose:
404
+ print("") # new line
405
+
406
+ self.reset()
407
+
408
+ for script_step in self.script:
409
+ callable = script_step(self._state)
410
+ if isinstance(callable, Awaitable):
411
+ result = await callable
412
+ else:
413
+ result = callable
414
+ self._emit_message_snapshot_event(scenario_run_id)
415
+
416
+ if isinstance(result, ScenarioResult):
417
+ status = (
418
+ ScenarioRunFinishedEventStatus.SUCCESS
419
+ if result.success
420
+ else ScenarioRunFinishedEventStatus.FAILED
421
+ )
422
+ self._emit_run_finished_event(scenario_run_id, result, status)
423
+ return result
531
424
 
532
- return self._reached_max_turns(
533
- """Reached end of script without conclusion, add one of the following to the end of the script:
425
+ result = self._reached_max_turns(
426
+ """Reached end of script without conclusion, add one of the following to the end of the script:
534
427
 
535
428
  - `scenario.proceed()` to let the simulation continue to play out
536
429
  - `scenario.judge()` to force criteria judgement
537
430
  - `scenario.succeed()` or `scenario.fail()` to end the test with an explicit result
538
- """
539
- )
431
+ """
432
+ )
433
+
434
+ status = (
435
+ ScenarioRunFinishedEventStatus.SUCCESS
436
+ if result.success
437
+ else ScenarioRunFinishedEventStatus.FAILED
438
+ )
439
+ self._emit_run_finished_event(scenario_run_id, result, status)
440
+ return result
441
+
442
+ except Exception as e:
443
+ # Publish failure event before propagating the error
444
+ error_result = ScenarioResult(
445
+ success=False,
446
+ messages=self._state.messages,
447
+ reasoning=f"Scenario failed with error: {str(e)}",
448
+ total_time=time.time() - self._total_start_time,
449
+ agent_time=0,
450
+ )
451
+ self._emit_run_finished_event(
452
+ scenario_run_id, error_result, ScenarioRunFinishedEventStatus.ERROR
453
+ )
454
+ raise # Re-raise the exception after cleanup
540
455
 
541
456
  async def _call_agent(
542
457
  self, idx: int, role: AgentRole, request_judgment: bool = False
@@ -577,16 +492,19 @@ class ScenarioExecutor:
577
492
  ):
578
493
  start_time = time.time()
579
494
 
580
- agent_response = agent.call(
581
- AgentInput(
582
- # TODO: test thread_id
583
- thread_id=self._state.thread_id,
584
- messages=self._state.messages,
585
- new_messages=self._pending_messages.get(idx, []),
586
- judgment_request=request_judgment,
587
- scenario_state=self._state,
495
+ # Prevent pydantic validation warnings which should already be disabled
496
+ with warnings.catch_warnings():
497
+ warnings.simplefilter("ignore")
498
+ agent_response = agent.call(
499
+ AgentInput(
500
+ # TODO: test thread_id
501
+ thread_id=self._state.thread_id,
502
+ messages=self._state.messages,
503
+ new_messages=self._pending_messages.get(idx, []),
504
+ judgment_request=request_judgment,
505
+ scenario_state=self._state,
506
+ )
588
507
  )
589
- )
590
508
  if not isinstance(agent_response, Awaitable):
591
509
  raise Exception(
592
510
  agent_response_not_awaitable(agent.__class__.__name__),
@@ -708,15 +626,24 @@ class ScenarioExecutor:
708
626
  reasoning=reasoning or "Scenario marked as failed with scenario.fail()",
709
627
  )
710
628
 
629
+ def _consume_until_role(self, role: AgentRole) -> None:
630
+ while len(self._pending_roles_on_turn) > 0:
631
+ next_role = self._pending_roles_on_turn[0]
632
+ if next_role == role:
633
+ break
634
+ self._pending_roles_on_turn.pop(0)
635
+
711
636
  async def _script_call_agent(
712
637
  self,
713
638
  role: AgentRole,
714
639
  content: Optional[Union[str, ChatCompletionMessageParam]] = None,
715
640
  request_judgment: bool = False,
716
641
  ) -> Optional[ScenarioResult]:
642
+ self._consume_until_role(role)
717
643
  idx, next_agent = self._next_agent_for_role(role)
718
644
  if not next_agent:
719
645
  self._new_turn()
646
+ self._consume_until_role(role)
720
647
  idx, next_agent = self._next_agent_for_role(role)
721
648
 
722
649
  if not next_agent:
@@ -738,11 +665,16 @@ class ScenarioExecutor:
738
665
  )
739
666
 
740
667
  self._pending_agents_on_turn.remove(next_agent)
741
- self._pending_roles_on_turn.remove(role)
742
668
 
743
669
  if content:
744
670
  if isinstance(content, str):
745
- message = ChatCompletionUserMessageParam(role="user", content=content)
671
+ message = (
672
+ ChatCompletionUserMessageParam(role="user", content=content)
673
+ if role == AgentRole.USER
674
+ else ChatCompletionAssistantMessageParam(
675
+ role="assistant", content=content
676
+ )
677
+ )
746
678
  else:
747
679
  message = content
748
680
 
@@ -756,3 +688,228 @@ class ScenarioExecutor:
756
688
  )
757
689
  if isinstance(result, ScenarioResult):
758
690
  return result
691
+
692
+ # Event handling methods
693
+
694
+ class _CommonEventFields(TypedDict):
695
+ """
696
+ Common fields shared across all scenario events.
697
+
698
+ These fields provide consistent identification and timing information
699
+ for all events emitted during scenario execution.
700
+
701
+ Attributes:
702
+ batch_run_id: Unique identifier for the batch of scenario runs
703
+ scenario_run_id: Unique identifier for this specific scenario run
704
+ scenario_id: Human-readable name/identifier for the scenario
705
+ timestamp: Unix timestamp in milliseconds when the event occurred
706
+ """
707
+
708
+ batch_run_id: str
709
+ scenario_run_id: str
710
+ scenario_id: str
711
+ timestamp: int
712
+
713
+ def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
714
+ """
715
+ Create common fields used across all scenario events.
716
+
717
+ This method generates the standard fields that every scenario event
718
+ must include for proper identification and timing.
719
+
720
+ Args:
721
+ scenario_run_id: Unique identifier for the current scenario run
722
+
723
+ Returns:
724
+ Dictionary containing common event fields with current timestamp
725
+ """
726
+ return {
727
+ "batch_run_id": self.batch_run_id,
728
+ "scenario_run_id": scenario_run_id,
729
+ "scenario_id": self.name,
730
+ "timestamp": int(time.time() * 1000),
731
+ }
732
+
733
+ def _emit_run_started_event(self, scenario_run_id: str) -> None:
734
+ """
735
+ Emit a scenario run started event.
736
+
737
+ This event is published when a scenario begins execution. It includes
738
+ metadata about the scenario such as name and description, and is used
739
+ to track the start of scenario runs in monitoring systems.
740
+
741
+ Args:
742
+ scenario_run_id: Unique identifier for the current scenario run
743
+ """
744
+ common_fields = self._create_common_event_fields(scenario_run_id)
745
+ metadata = ScenarioRunStartedEventMetadata(
746
+ name=self.name,
747
+ description=self.description,
748
+ )
749
+
750
+ event = ScenarioRunStartedEvent(
751
+ **common_fields,
752
+ metadata=metadata,
753
+ )
754
+ self._emit_event(event)
755
+
756
+ def _emit_message_snapshot_event(self, scenario_run_id: str) -> None:
757
+ """
758
+ Emit a message snapshot event.
759
+
760
+ This event captures the current state of the conversation during
761
+ scenario execution. It's published whenever messages are added to
762
+ the conversation, allowing real-time tracking of scenario progress.
763
+ """
764
+ common_fields = self._create_common_event_fields(scenario_run_id)
765
+
766
+ event = ScenarioMessageSnapshotEvent(
767
+ **common_fields,
768
+ messages=convert_messages_to_api_client_messages(self._state.messages),
769
+ )
770
+ self._emit_event(event)
771
+
772
+ def _emit_run_finished_event(
773
+ self,
774
+ scenario_run_id: str,
775
+ result: ScenarioResult,
776
+ status: ScenarioRunFinishedEventStatus,
777
+ ) -> None:
778
+ """
779
+ Emit a scenario run finished event.
780
+
781
+ This event is published when a scenario completes execution, whether
782
+ successfully or with an error. It includes the final results, verdict,
783
+ and reasoning for the scenario outcome.
784
+
785
+ Args:
786
+ scenario_run_id: Unique identifier for the current scenario run
787
+ result: The final scenario result containing success/failure status
788
+ status: The execution status (SUCCESS, FAILED, or ERROR)
789
+ """
790
+ common_fields = self._create_common_event_fields(scenario_run_id)
791
+
792
+ results = ScenarioRunFinishedEventResults(
793
+ verdict=(
794
+ ScenarioRunFinishedEventVerdict.SUCCESS
795
+ if result.success
796
+ else ScenarioRunFinishedEventVerdict.FAILURE
797
+ ),
798
+ reasoning=result.reasoning or "",
799
+ met_criteria=result.passed_criteria,
800
+ unmet_criteria=result.failed_criteria,
801
+ )
802
+
803
+ event = ScenarioRunFinishedEvent(
804
+ **common_fields,
805
+ status=status,
806
+ results=results,
807
+ )
808
+ self._emit_event(event)
809
+
810
+ # Signal end of event stream
811
+ self._events.on_completed()
812
+
813
+
814
+ async def run(
815
+ name: str,
816
+ description: str,
817
+ agents: List[AgentAdapter] = [],
818
+ max_turns: Optional[int] = None,
819
+ verbose: Optional[Union[bool, int]] = None,
820
+ cache_key: Optional[str] = None,
821
+ debug: Optional[bool] = None,
822
+ script: Optional[List[ScriptStep]] = None,
823
+ ) -> ScenarioResult:
824
+ """
825
+ High-level interface for running a scenario test.
826
+
827
+ This is the main entry point for executing scenario tests. It creates a
828
+ ScenarioExecutor instance and runs it in an isolated thread pool to support
829
+ parallel execution and prevent blocking.
830
+
831
+ Args:
832
+ name: Human-readable name for the scenario
833
+ description: Detailed description of what the scenario tests
834
+ agents: List of agent adapters (agent under test, user simulator, judge)
835
+ max_turns: Maximum conversation turns before timeout (default: 10)
836
+ verbose: Show detailed output during execution
837
+ cache_key: Cache key for deterministic behavior
838
+ debug: Enable debug mode for step-by-step execution
839
+ script: Optional script steps to control scenario flow
840
+
841
+ Returns:
842
+ ScenarioResult containing the test outcome, conversation history,
843
+ success/failure status, and detailed reasoning
844
+
845
+ Example:
846
+ ```
847
+ import scenario
848
+
849
+ # Simple scenario with automatic flow
850
+ result = await scenario.run(
851
+ name="help request",
852
+ description="User asks for help with a technical problem",
853
+ agents=[
854
+ my_agent,
855
+ scenario.UserSimulatorAgent(),
856
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
857
+ ]
858
+ )
859
+
860
+ # Scripted scenario with custom evaluations
861
+ result = await scenario.run(
862
+ name="custom interaction",
863
+ description="Test specific conversation flow",
864
+ agents=[
865
+ my_agent,
866
+ scenario.UserSimulatorAgent(),
867
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
868
+ ],
869
+ script=[
870
+ scenario.user("Hello"),
871
+ scenario.agent(),
872
+ custom_eval,
873
+ scenario.succeed()
874
+ ]
875
+ )
876
+
877
+ # Results analysis
878
+ print(f"Test {'PASSED' if result.success else 'FAILED'}")
879
+ print(f"Reasoning: {result.reasoning}")
880
+ print(f"Conversation had {len(result.messages)} messages")
881
+ ```
882
+ """
883
+ scenario = ScenarioExecutor(
884
+ name=name,
885
+ description=description,
886
+ agents=agents,
887
+ max_turns=max_turns,
888
+ verbose=verbose,
889
+ cache_key=cache_key,
890
+ debug=debug,
891
+ script=script,
892
+ )
893
+
894
+ # We'll use a thread pool to run the execution logic, we
895
+ # require a separate thread because even though asyncio is
896
+ # being used throughout, any user code on the callback can
897
+ # be blocking, preventing them from running scenarios in parallel
898
+ with concurrent.futures.ThreadPoolExecutor() as executor:
899
+
900
+ def run_in_thread():
901
+ loop = asyncio.new_event_loop()
902
+ asyncio.set_event_loop(loop)
903
+
904
+ try:
905
+ return loop.run_until_complete(scenario.run())
906
+ finally:
907
+ scenario.event_bus.drain()
908
+ loop.close()
909
+
910
+ # Run the function in the thread pool and await its result
911
+ # This converts the thread's execution into a Future that the current
912
+ # event loop can await without blocking
913
+ loop = asyncio.get_event_loop()
914
+ result = await loop.run_in_executor(executor, run_in_thread)
915
+ return result