langwatch-scenario 0.6.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/METADATA +145 -41
  2. langwatch_scenario-0.7.2.dist-info/RECORD +237 -0
  3. scenario/__init__.py +1 -4
  4. scenario/{events → _events}/__init__.py +9 -11
  5. scenario/_events/event_bus.py +185 -0
  6. scenario/{events → _events}/event_reporter.py +1 -1
  7. scenario/{events → _events}/events.py +20 -27
  8. scenario/_events/messages.py +58 -0
  9. scenario/{events → _events}/utils.py +43 -32
  10. scenario/_generated/langwatch_api_client/README.md +139 -0
  11. scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
  12. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
  13. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
  14. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
  15. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
  16. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
  17. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
  18. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
  19. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
  20. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
  21. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
  22. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
  23. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
  24. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
  25. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
  26. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
  27. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
  28. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
  29. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
  30. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
  31. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
  32. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
  33. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
  34. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
  35. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
  36. scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
  37. scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
  38. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
  39. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
  40. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
  41. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
  42. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
  43. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
  44. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
  45. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
  46. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
  47. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
  48. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
  49. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
  50. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
  51. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
  52. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
  53. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
  54. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
  55. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
  56. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
  57. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
  58. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
  59. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
  60. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
  61. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
  62. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
  63. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
  64. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
  65. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
  66. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
  67. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
  68. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
  69. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
  70. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
  71. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
  72. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
  73. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
  74. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
  75. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
  76. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
  77. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
  78. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
  79. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
  80. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  81. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  82. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  83. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  84. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  85. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  86. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  87. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  88. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  89. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  90. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  91. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  92. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
  93. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
  94. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
  95. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
  96. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
  97. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
  98. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
  99. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
  100. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
  101. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
  102. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
  103. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
  104. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
  105. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
  106. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
  107. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
  108. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
  109. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
  110. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
  111. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
  112. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
  113. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
  114. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
  115. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
  116. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
  117. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
  118. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
  119. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
  120. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
  121. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
  122. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
  123. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
  124. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
  125. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
  126. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
  127. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
  128. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
  129. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
  130. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
  131. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
  132. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
  133. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
  134. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
  135. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
  136. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
  137. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
  138. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
  139. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
  140. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
  141. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
  142. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
  143. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
  144. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
  145. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
  146. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
  147. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
  148. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
  149. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
  150. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
  151. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
  152. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
  153. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
  154. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  155. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  156. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  157. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  158. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  159. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  160. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  161. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  162. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  163. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  164. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  165. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  166. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
  167. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
  168. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
  169. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
  170. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
  171. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
  172. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
  173. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
  174. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
  175. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
  176. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
  177. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
  178. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
  179. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
  180. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
  181. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
  182. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
  183. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
  184. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
  185. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
  186. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
  187. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
  188. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
  189. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
  190. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
  191. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
  192. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
  193. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
  194. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
  195. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
  196. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
  197. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
  198. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
  199. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
  200. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
  201. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
  202. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
  203. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
  204. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
  205. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
  206. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
  207. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
  208. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
  209. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
  210. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
  211. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
  212. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
  213. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
  214. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
  215. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
  216. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
  217. scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
  218. scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
  219. scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
  220. scenario/_utils/__init__.py +1 -1
  221. scenario/_utils/message_conversion.py +2 -2
  222. scenario/judge_agent.py +6 -1
  223. scenario/pytest_plugin.py +4 -4
  224. scenario/scenario_executor.py +196 -223
  225. scenario/types.py +5 -2
  226. langwatch_scenario-0.6.0.dist-info/RECORD +0 -27
  227. scenario/events/event_bus.py +0 -175
  228. scenario/events/messages.py +0 -84
  229. {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/WHEEL +0 -0
  230. {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/entry_points.txt +0 -0
  231. {langwatch_scenario-0.6.0.dist-info → langwatch_scenario-0.7.2.dist-info}/top_level.txt +0 -0
@@ -19,14 +19,15 @@ from typing import (
19
19
  TypedDict,
20
20
  )
21
21
  import time
22
+ import warnings
22
23
  import termcolor
23
24
  import asyncio
24
25
  import concurrent.futures
25
26
 
26
27
  from scenario.config import ScenarioConfig
27
28
  from scenario._utils import (
28
- check_valid_return_type,
29
29
  convert_agent_return_types_to_openai_messages,
30
+ check_valid_return_type,
30
31
  print_openai_messages,
31
32
  show_spinner,
32
33
  await_if_awaitable,
@@ -46,17 +47,20 @@ from .agent_adapter import AgentAdapter
46
47
  from .script import proceed
47
48
  from pksuid import PKSUID
48
49
  from .scenario_state import ScenarioState
49
- from .events import (
50
- ScenarioEventBus,
51
- ScenarioRunStartedEvent,
52
- ScenarioMessageSnapshotEvent,
53
- ScenarioRunFinishedEvent,
54
- ScenarioRunStartedEventMetadata,
55
- ScenarioRunFinishedEventResults,
56
- ScenarioRunFinishedEventVerdict,
57
- ScenarioRunFinishedEventStatus,
58
- convert_messages_to_ag_ui_messages,
50
+ from ._events import (
51
+ ScenarioEventBus,
52
+ ScenarioEvent,
53
+ ScenarioRunStartedEvent,
54
+ ScenarioMessageSnapshotEvent,
55
+ ScenarioRunFinishedEvent,
56
+ ScenarioRunStartedEventMetadata,
57
+ ScenarioRunFinishedEventResults,
58
+ ScenarioRunFinishedEventVerdict,
59
+ ScenarioRunFinishedEventStatus,
60
+ convert_messages_to_api_client_messages,
59
61
  )
62
+ from rx.subject.subject import Subject
63
+ from rx.core.observable.observable import Observable
60
64
 
61
65
 
62
66
  class ScenarioExecutor:
@@ -80,40 +84,6 @@ class ScenarioExecutor:
80
84
  agents: List of agent adapters participating in the scenario
81
85
  script: Optional list of script steps to control scenario flow
82
86
  config: Configuration settings for execution behavior
83
-
84
- Example:
85
- ```
86
- # Direct instantiation (less common)
87
- executor = ScenarioExecutor(
88
- name="weather query test",
89
- description="User asks about weather, agent should provide helpful response",
90
- agents=[
91
- weather_agent,
92
- scenario.UserSimulatorAgent(),
93
- scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
94
- ],
95
- max_turns=10,
96
- verbose=True
97
- )
98
- result = await executor._run()
99
-
100
- # Preferred high-level API
101
- result = await scenario.run(
102
- name="weather query test",
103
- description="User asks about weather, agent should provide helpful response",
104
- agents=[
105
- weather_agent,
106
- scenario.UserSimulatorAgent(),
107
- scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
108
- ]
109
- )
110
- ```
111
-
112
- Note:
113
- - Scenarios run in isolated thread pools to support parallel execution
114
- - All agent interactions are cached when cache_key is configured
115
- - Debug mode allows step-by-step execution with user intervention
116
- - Results include detailed timing information and conversation history
117
87
  """
118
88
 
119
89
  name: str
@@ -130,6 +100,7 @@ class ScenarioExecutor:
130
100
  _pending_roles_on_turn: List[AgentRole] = []
131
101
  _pending_agents_on_turn: Set[AgentAdapter] = set()
132
102
  _agent_times: Dict[int, float] = {}
103
+ _events: Subject
133
104
 
134
105
  event_bus: ScenarioEventBus
135
106
 
@@ -167,27 +138,7 @@ class ScenarioExecutor:
167
138
  Overrides global configuration for this scenario.
168
139
  debug: Whether to enable debug mode with step-by-step execution.
169
140
  Overrides global configuration for this scenario.
170
- event_reporter: Optional event reporter for the scenario
171
-
172
- Example:
173
- ```python
174
- executor = ScenarioExecutor(
175
- name="customer service test",
176
- description="Customer has a billing question and needs help",
177
- agents=[
178
- customer_service_agent,
179
- scenario.UserSimulatorAgent(),
180
- scenario.JudgeAgent(criteria=[
181
- "Agent is polite and professional",
182
- "Agent addresses the billing question",
183
- "Agent provides clear next steps"
184
- ])
185
- ],
186
- max_turns=15,
187
- verbose=True,
188
- debug=False
189
- )
190
- ```
141
+ event_bus: Optional event bus that will subscribe to this executor's events
191
142
  """
192
143
  self.name = name
193
144
  self.description = description
@@ -204,120 +155,33 @@ class ScenarioExecutor:
204
155
 
205
156
  self.reset()
206
157
 
158
+ # Create executor's own event stream
159
+ self._events = Subject()
160
+
161
+ # Create and configure event bus to subscribe to our events
207
162
  self.event_bus = event_bus or ScenarioEventBus()
163
+ self.event_bus.subscribe_to_events(self._events)
208
164
 
209
165
  self.batch_run_id = get_or_create_batch_run_id()
210
166
 
211
- @classmethod
212
- async def run(
213
- cls,
214
- name: str,
215
- description: str,
216
- agents: List[AgentAdapter] = [],
217
- max_turns: Optional[int] = None,
218
- verbose: Optional[Union[bool, int]] = None,
219
- cache_key: Optional[str] = None,
220
- debug: Optional[bool] = None,
221
- script: Optional[List[ScriptStep]] = None,
222
- ) -> ScenarioResult:
167
+ @property
168
+ def events(self) -> Observable:
169
+ """Expose event stream for subscribers like the event bus."""
170
+ return self._events
171
+
172
+ def _emit_event(self, event: ScenarioEvent) -> None:
223
173
  """
224
- High-level interface for running a scenario test.
174
+ Emit a domain event to all subscribers.
225
175
 
226
- This is the main entry point for executing scenario tests. It creates a
227
- ScenarioExecutor instance and runs it in an isolated thread pool to support
228
- parallel execution and prevent blocking.
176
+ This method publishes scenario events to the internal event stream,
177
+ which subscribers (like the event bus) can observe and react to.
178
+ The timestamp is automatically set to the current time.
229
179
 
230
180
  Args:
231
- name: Human-readable name for the scenario
232
- description: Detailed description of what the scenario tests
233
- agents: List of agent adapters (agent under test, user simulator, judge)
234
- max_turns: Maximum conversation turns before timeout (default: 10)
235
- verbose: Show detailed output during execution
236
- cache_key: Cache key for deterministic behavior
237
- debug: Enable debug mode for step-by-step execution
238
- script: Optional script steps to control scenario flow
239
-
240
- Returns:
241
- ScenarioResult containing the test outcome, conversation history,
242
- success/failure status, and detailed reasoning
243
-
244
- Example:
245
- ```
246
- import scenario
247
-
248
- # Simple scenario with automatic flow
249
- result = await scenario.run(
250
- name="help request",
251
- description="User asks for help with a technical problem",
252
- agents=[
253
- my_agent,
254
- scenario.UserSimulatorAgent(),
255
- scenario.JudgeAgent(criteria=["Agent provides helpful response"])
256
- ]
257
- )
258
-
259
- # Scripted scenario with custom evaluations
260
- result = await scenario.run(
261
- name="custom interaction",
262
- description="Test specific conversation flow",
263
- agents=[
264
- my_agent,
265
- scenario.UserSimulatorAgent(),
266
- scenario.JudgeAgent(criteria=["Agent provides helpful response"])
267
- ],
268
- script=[
269
- scenario.user("Hello"),
270
- scenario.agent(),
271
- custom_eval,
272
- scenario.succeed()
273
- ]
274
- )
275
-
276
- # Results analysis
277
- print(f"Test {'PASSED' if result.success else 'FAILED'}")
278
- print(f"Reasoning: {result.reasoning}")
279
- print(f"Conversation had {len(result.messages)} messages")
280
- ```
281
-
282
- Note:
283
- - Runs in isolated thread pool to support parallel execution
284
- - Blocks until scenario completes or times out
285
- - All agent calls are automatically cached when cache_key is set
286
- - Exception handling ensures clean resource cleanup
181
+ event: The scenario event to emit
287
182
  """
288
- scenario = cls(
289
- name=name,
290
- description=description,
291
- agents=agents,
292
- max_turns=max_turns,
293
- verbose=verbose,
294
- cache_key=cache_key,
295
- debug=debug,
296
- script=script,
297
- )
298
-
299
- # We'll use a thread pool to run the execution logic, we
300
- # require a separate thread because even though asyncio is
301
- # being used throughout, any user code on the callback can
302
- # be blocking, preventing them from running scenarios in parallel
303
- with concurrent.futures.ThreadPoolExecutor() as executor:
304
-
305
- def run_in_thread():
306
- loop = asyncio.new_event_loop()
307
- asyncio.set_event_loop(loop)
308
-
309
- try:
310
- return loop.run_until_complete(scenario._run())
311
- finally:
312
- loop.run_until_complete(scenario.event_bus.drain())
313
- loop.close()
314
-
315
- # Run the function in the thread pool and await its result
316
- # This converts the thread's execution into a Future that the current
317
- # event loop can await without blocking
318
- loop = asyncio.get_event_loop()
319
- result = await loop.run_in_executor(executor, run_in_thread)
320
- return result
183
+ event.timestamp = int(time.time() * 1000)
184
+ self._events.on_next(event)
321
185
 
322
186
  def reset(self):
323
187
  """
@@ -394,7 +258,6 @@ class ScenarioExecutor:
394
258
  self._pending_messages[idx] = []
395
259
  self._pending_messages[idx].append(message)
396
260
 
397
-
398
261
  def add_messages(
399
262
  self,
400
263
  messages: List[ChatCompletionMessageParam],
@@ -522,7 +385,7 @@ class ScenarioExecutor:
522
385
  agent_time=agent_time,
523
386
  )
524
387
 
525
- async def _run(self) -> ScenarioResult:
388
+ async def run(self) -> ScenarioResult:
526
389
  """
527
390
  Run a scenario against the agent under test.
528
391
 
@@ -535,7 +398,6 @@ class ScenarioExecutor:
535
398
  scenario_run_id = generate_scenario_run_id()
536
399
 
537
400
  try:
538
- await self.event_bus.listen()
539
401
  self._emit_run_started_event(scenario_run_id)
540
402
 
541
403
  if self.config.verbose:
@@ -552,7 +414,11 @@ class ScenarioExecutor:
552
414
  self._emit_message_snapshot_event(scenario_run_id)
553
415
 
554
416
  if isinstance(result, ScenarioResult):
555
- status = ScenarioRunFinishedEventStatus.SUCCESS if result.success else ScenarioRunFinishedEventStatus.FAILED
417
+ status = (
418
+ ScenarioRunFinishedEventStatus.SUCCESS
419
+ if result.success
420
+ else ScenarioRunFinishedEventStatus.FAILED
421
+ )
556
422
  self._emit_run_finished_event(scenario_run_id, result, status)
557
423
  return result
558
424
 
@@ -565,7 +431,11 @@ class ScenarioExecutor:
565
431
  """
566
432
  )
567
433
 
568
- status = ScenarioRunFinishedEventStatus.SUCCESS if result.success else ScenarioRunFinishedEventStatus.FAILED
434
+ status = (
435
+ ScenarioRunFinishedEventStatus.SUCCESS
436
+ if result.success
437
+ else ScenarioRunFinishedEventStatus.FAILED
438
+ )
569
439
  self._emit_run_finished_event(scenario_run_id, result, status)
570
440
  return result
571
441
 
@@ -578,7 +448,9 @@ class ScenarioExecutor:
578
448
  total_time=time.time() - self._total_start_time,
579
449
  agent_time=0,
580
450
  )
581
- self._emit_run_finished_event(scenario_run_id, error_result, ScenarioRunFinishedEventStatus.ERROR)
451
+ self._emit_run_finished_event(
452
+ scenario_run_id, error_result, ScenarioRunFinishedEventStatus.ERROR
453
+ )
582
454
  raise # Re-raise the exception after cleanup
583
455
 
584
456
  async def _call_agent(
@@ -620,16 +492,19 @@ class ScenarioExecutor:
620
492
  ):
621
493
  start_time = time.time()
622
494
 
623
- agent_response = agent.call(
624
- AgentInput(
625
- # TODO: test thread_id
626
- thread_id=self._state.thread_id,
627
- messages=self._state.messages,
628
- new_messages=self._pending_messages.get(idx, []),
629
- judgment_request=request_judgment,
630
- scenario_state=self._state,
495
+ # Prevent pydantic validation warnings which should already be disabled
496
+ with warnings.catch_warnings():
497
+ warnings.simplefilter("ignore")
498
+ agent_response = agent.call(
499
+ AgentInput(
500
+ # TODO: test thread_id
501
+ thread_id=self._state.thread_id,
502
+ messages=self._state.messages,
503
+ new_messages=self._pending_messages.get(idx, []),
504
+ judgment_request=request_judgment,
505
+ scenario_state=self._state,
506
+ )
631
507
  )
632
- )
633
508
  if not isinstance(agent_response, Awaitable):
634
509
  raise Exception(
635
510
  agent_response_not_awaitable(agent.__class__.__name__),
@@ -819,16 +694,17 @@ class ScenarioExecutor:
819
694
  class _CommonEventFields(TypedDict):
820
695
  """
821
696
  Common fields shared across all scenario events.
822
-
697
+
823
698
  These fields provide consistent identification and timing information
824
699
  for all events emitted during scenario execution.
825
-
700
+
826
701
  Attributes:
827
702
  batch_run_id: Unique identifier for the batch of scenario runs
828
703
  scenario_run_id: Unique identifier for this specific scenario run
829
704
  scenario_id: Human-readable name/identifier for the scenario
830
705
  timestamp: Unix timestamp in milliseconds when the event occurred
831
706
  """
707
+
832
708
  batch_run_id: str
833
709
  scenario_run_id: str
834
710
  scenario_id: str
@@ -837,13 +713,13 @@ class ScenarioExecutor:
837
713
  def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
838
714
  """
839
715
  Create common fields used across all scenario events.
840
-
716
+
841
717
  This method generates the standard fields that every scenario event
842
718
  must include for proper identification and timing.
843
-
719
+
844
720
  Args:
845
721
  scenario_run_id: Unique identifier for the current scenario run
846
-
722
+
847
723
  Returns:
848
724
  Dictionary containing common event fields with current timestamp
849
725
  """
@@ -857,86 +733,183 @@ class ScenarioExecutor:
857
733
  def _emit_run_started_event(self, scenario_run_id: str) -> None:
858
734
  """
859
735
  Emit a scenario run started event.
860
-
736
+
861
737
  This event is published when a scenario begins execution. It includes
862
738
  metadata about the scenario such as name and description, and is used
863
739
  to track the start of scenario runs in monitoring systems.
864
-
740
+
865
741
  Args:
866
742
  scenario_run_id: Unique identifier for the current scenario run
867
-
868
- Note:
869
- This event is automatically published at the beginning of `_run()`
870
- and signals the start of scenario execution to any event listeners.
871
743
  """
872
744
  common_fields = self._create_common_event_fields(scenario_run_id)
873
745
  metadata = ScenarioRunStartedEventMetadata(
874
746
  name=self.name,
875
747
  description=self.description,
876
748
  )
877
-
749
+
878
750
  event = ScenarioRunStartedEvent(
879
751
  **common_fields,
880
752
  metadata=metadata,
881
753
  )
882
- self.event_bus.publish(event)
754
+ self._emit_event(event)
883
755
 
884
756
  def _emit_message_snapshot_event(self, scenario_run_id: str) -> None:
885
757
  """
886
758
  Emit a message snapshot event.
887
-
759
+
888
760
  This event captures the current state of the conversation during
889
761
  scenario execution. It's published whenever messages are added to
890
762
  the conversation, allowing real-time tracking of scenario progress.
891
-
892
- Note:
893
- This event is automatically published by `add_message()` and
894
- `add_messages()` to provide continuous visibility into scenario
895
- execution state.
896
763
  """
897
764
  common_fields = self._create_common_event_fields(scenario_run_id)
898
-
765
+
899
766
  event = ScenarioMessageSnapshotEvent(
900
767
  **common_fields,
901
- messages=convert_messages_to_ag_ui_messages(self._state.messages),
768
+ messages=convert_messages_to_api_client_messages(self._state.messages),
902
769
  )
903
- self.event_bus.publish(event)
770
+ self._emit_event(event)
904
771
 
905
772
  def _emit_run_finished_event(
906
- self,
907
- scenario_run_id: str,
908
- result: ScenarioResult,
909
- status: ScenarioRunFinishedEventStatus
773
+ self,
774
+ scenario_run_id: str,
775
+ result: ScenarioResult,
776
+ status: ScenarioRunFinishedEventStatus,
910
777
  ) -> None:
911
778
  """
912
779
  Emit a scenario run finished event.
913
-
780
+
914
781
  This event is published when a scenario completes execution, whether
915
782
  successfully or with an error. It includes the final results, verdict,
916
783
  and reasoning for the scenario outcome.
917
-
784
+
918
785
  Args:
919
786
  scenario_run_id: Unique identifier for the current scenario run
920
787
  result: The final scenario result containing success/failure status
921
788
  status: The execution status (SUCCESS, FAILED, or ERROR)
922
-
923
- Note:
924
- This event is automatically published at the end of `_run()` and
925
- signals the completion of scenario execution to any event listeners.
926
- It includes detailed results for monitoring and analysis purposes.
927
789
  """
928
790
  common_fields = self._create_common_event_fields(scenario_run_id)
929
-
791
+
930
792
  results = ScenarioRunFinishedEventResults(
931
- verdict=ScenarioRunFinishedEventVerdict.SUCCESS if result.success else ScenarioRunFinishedEventVerdict.FAILURE,
793
+ verdict=(
794
+ ScenarioRunFinishedEventVerdict.SUCCESS
795
+ if result.success
796
+ else ScenarioRunFinishedEventVerdict.FAILURE
797
+ ),
932
798
  reasoning=result.reasoning or "",
933
799
  met_criteria=result.passed_criteria,
934
800
  unmet_criteria=result.failed_criteria,
935
801
  )
936
-
802
+
937
803
  event = ScenarioRunFinishedEvent(
938
804
  **common_fields,
939
805
  status=status,
940
806
  results=results,
941
807
  )
942
- self.event_bus.publish(event)
808
+ self._emit_event(event)
809
+
810
+ # Signal end of event stream
811
+ self._events.on_completed()
812
+
813
+
814
+ async def run(
815
+ name: str,
816
+ description: str,
817
+ agents: List[AgentAdapter] = [],
818
+ max_turns: Optional[int] = None,
819
+ verbose: Optional[Union[bool, int]] = None,
820
+ cache_key: Optional[str] = None,
821
+ debug: Optional[bool] = None,
822
+ script: Optional[List[ScriptStep]] = None,
823
+ ) -> ScenarioResult:
824
+ """
825
+ High-level interface for running a scenario test.
826
+
827
+ This is the main entry point for executing scenario tests. It creates a
828
+ ScenarioExecutor instance and runs it in an isolated thread pool to support
829
+ parallel execution and prevent blocking.
830
+
831
+ Args:
832
+ name: Human-readable name for the scenario
833
+ description: Detailed description of what the scenario tests
834
+ agents: List of agent adapters (agent under test, user simulator, judge)
835
+ max_turns: Maximum conversation turns before timeout (default: 10)
836
+ verbose: Show detailed output during execution
837
+ cache_key: Cache key for deterministic behavior
838
+ debug: Enable debug mode for step-by-step execution
839
+ script: Optional script steps to control scenario flow
840
+
841
+ Returns:
842
+ ScenarioResult containing the test outcome, conversation history,
843
+ success/failure status, and detailed reasoning
844
+
845
+ Example:
846
+ ```
847
+ import scenario
848
+
849
+ # Simple scenario with automatic flow
850
+ result = await scenario.run(
851
+ name="help request",
852
+ description="User asks for help with a technical problem",
853
+ agents=[
854
+ my_agent,
855
+ scenario.UserSimulatorAgent(),
856
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
857
+ ]
858
+ )
859
+
860
+ # Scripted scenario with custom evaluations
861
+ result = await scenario.run(
862
+ name="custom interaction",
863
+ description="Test specific conversation flow",
864
+ agents=[
865
+ my_agent,
866
+ scenario.UserSimulatorAgent(),
867
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
868
+ ],
869
+ script=[
870
+ scenario.user("Hello"),
871
+ scenario.agent(),
872
+ custom_eval,
873
+ scenario.succeed()
874
+ ]
875
+ )
876
+
877
+ # Results analysis
878
+ print(f"Test {'PASSED' if result.success else 'FAILED'}")
879
+ print(f"Reasoning: {result.reasoning}")
880
+ print(f"Conversation had {len(result.messages)} messages")
881
+ ```
882
+ """
883
+ scenario = ScenarioExecutor(
884
+ name=name,
885
+ description=description,
886
+ agents=agents,
887
+ max_turns=max_turns,
888
+ verbose=verbose,
889
+ cache_key=cache_key,
890
+ debug=debug,
891
+ script=script,
892
+ )
893
+
894
+ # We'll use a thread pool to run the execution logic, we
895
+ # require a separate thread because even though asyncio is
896
+ # being used throughout, any user code on the callback can
897
+ # be blocking, preventing them from running scenarios in parallel
898
+ with concurrent.futures.ThreadPoolExecutor() as executor:
899
+
900
+ def run_in_thread():
901
+ loop = asyncio.new_event_loop()
902
+ asyncio.set_event_loop(loop)
903
+
904
+ try:
905
+ return loop.run_until_complete(scenario.run())
906
+ finally:
907
+ scenario.event_bus.drain()
908
+ loop.close()
909
+
910
+ # Run the function in the thread pool and await its result
911
+ # This converts the thread's execution into a Future that the current
912
+ # event loop can await without blocking
913
+ loop = asyncio.get_event_loop()
914
+ result = await loop.run_in_executor(executor, run_in_thread)
915
+ return result
scenario/types.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from pydantic import BaseModel, Field, SkipValidation
2
+ from pydantic import BaseModel, SkipValidation
3
3
  from typing import (
4
4
  TYPE_CHECKING,
5
5
  Annotated,
@@ -35,6 +35,7 @@ class AgentRole(Enum):
35
35
  AGENT: Represents the agent under test that responds to user inputs
36
36
  JUDGE: Represents a judge agent that evaluates the conversation and determines success/failure
37
37
  """
38
+
38
39
  USER = "User"
39
40
  AGENT = "Agent"
40
41
  JUDGE = "Judge"
@@ -71,6 +72,7 @@ class AgentInput(BaseModel):
71
72
  return response
72
73
  ```
73
74
  """
75
+
74
76
  thread_id: str
75
77
  # Prevent pydantic from validating/parsing the messages and causing issues: https://github.com/pydantic/pydantic/issues/9541
76
78
  messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
@@ -168,7 +170,8 @@ class ScenarioResult(BaseModel):
168
170
  """
169
171
 
170
172
  success: bool
171
- messages: List[ChatCompletionMessageParam]
173
+ # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
174
+ messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
172
175
  reasoning: Optional[str] = None
173
176
  passed_criteria: List[str] = []
174
177
  failed_criteria: List[str] = []
@@ -1,27 +0,0 @@
1
- scenario/__init__.py,sha256=UJ5l-sG4TMG0wR8Ba-dxdDW36m3apTvawP-lNvk7Jm0,4293
2
- scenario/_error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
3
- scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
4
- scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
5
- scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
6
- scenario/judge_agent.py,sha256=9CCO699qoWqXvWdQ73Yc3dqPOwaJdJ-zqxVaLaKi_cA,16161
7
- scenario/pytest_plugin.py,sha256=f2ETBpATz80k7K87M6046ZIFiQpHEvDN7dxakd3y2wk,11321
8
- scenario/scenario_executor.py,sha256=nkSIuIlwPHfr6pueSBbARrgiqPtW0SxajV3PFypAnJ4,34508
9
- scenario/scenario_state.py,sha256=dQDjazem-dn1c5mw6TwngEu6Tv_cHwEzemepsPBy2f0,7039
10
- scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
11
- scenario/types.py,sha256=BhXcTEMGyGg_1QysN-GXVjm8DP2VH3UEzj_qvoglp2k,9466
12
- scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
13
- scenario/_utils/__init__.py,sha256=wNX9hU8vzYlyLDwjkt7JUW3IPo2DhME6UIt_zvLM3B0,1000
14
- scenario/_utils/ids.py,sha256=K1iPuJgPh3gX9HCrDZGqK5lDgdwZXfOBF1YXVOWNHRg,1843
15
- scenario/_utils/message_conversion.py,sha256=AM9DLyWpy97CrAH8RmId9Mv2rmLquQhFoUpRyp-jVeY,3622
16
- scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
17
- scenario/events/__init__.py,sha256=_autF1cMZYpNXE-kJNvvRb-H_hYqy4gOSSp2fT3Wi9k,1533
18
- scenario/events/event_bus.py,sha256=MThIMIaI2nj2CoegZazTNxeHbtl4_M7bW3vEAHz6R8g,7102
19
- scenario/events/event_reporter.py,sha256=cMh_5jA5hG3Q9IsoAgPJhxnIVs_M1Q0e2lgLTEK4oPc,3100
20
- scenario/events/events.py,sha256=jPXylwiADb0Bdk7u1YkAaU_jLebH7NW8J7SZI9JDTxw,6750
21
- scenario/events/messages.py,sha256=1QAkwDExdF6AHgXdEFhHwmCv3Mxu3j0AXIptMekc_bg,3299
22
- scenario/events/utils.py,sha256=yrTUTByeb0eAAQniQH7EyKs-usgGti8f17IemUyBZBw,3357
23
- langwatch_scenario-0.6.0.dist-info/METADATA,sha256=IvD9on4tP57ldmizFzfGQBtiCT6Z7yoz0trlCSPSW9M,14227
24
- langwatch_scenario-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- langwatch_scenario-0.6.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
26
- langwatch_scenario-0.6.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
27
- langwatch_scenario-0.6.0.dist-info/RECORD,,