langwatch-scenario 0.4.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/METADATA +210 -86
  2. langwatch_scenario-0.7.1.dist-info/RECORD +237 -0
  3. scenario/__init__.py +12 -118
  4. scenario/_events/__init__.py +64 -0
  5. scenario/_events/event_bus.py +185 -0
  6. scenario/_events/event_reporter.py +83 -0
  7. scenario/_events/events.py +162 -0
  8. scenario/_events/messages.py +58 -0
  9. scenario/_events/utils.py +97 -0
  10. scenario/_generated/langwatch_api_client/README.md +139 -0
  11. scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py +13 -0
  12. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/__init__.py +1 -0
  13. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/__init__.py +1 -0
  14. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_annotations_id.py +155 -0
  15. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_prompts_by_id.py +218 -0
  16. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/delete_api_scenario_events.py +183 -0
  17. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations.py +136 -0
  18. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_id.py +155 -0
  19. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_annotations_trace_id.py +160 -0
  20. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_dataset_by_slug_or_id.py +229 -0
  21. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts.py +188 -0
  22. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id.py +218 -0
  23. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_prompts_by_id_versions.py +218 -0
  24. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/get_api_trace_id.py +155 -0
  25. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/patch_api_annotations_id.py +178 -0
  26. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_annotations_trace_id.py +178 -0
  27. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_dataset_by_slug_entries.py +108 -0
  28. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts.py +187 -0
  29. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_prompts_by_id_versions.py +241 -0
  30. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_scenario_events.py +229 -0
  31. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_share.py +155 -0
  32. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/post_api_trace_id_unshare.py +155 -0
  33. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/default/put_api_prompts_by_id.py +241 -0
  34. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/__init__.py +1 -0
  35. scenario/_generated/langwatch_api_client/lang_watch_api_client/api/traces/post_api_trace_search.py +168 -0
  36. scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py +268 -0
  37. scenario/_generated/langwatch_api_client/lang_watch_api_client/errors.py +16 -0
  38. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/__init__.py +455 -0
  39. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/annotation.py +131 -0
  40. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries.py +74 -0
  41. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/dataset_post_entries_entries_item.py +44 -0
  42. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_annotations_id_response_200.py +68 -0
  43. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_200.py +59 -0
  44. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400.py +61 -0
  45. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_400_error.py +8 -0
  46. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401.py +61 -0
  47. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_401_error.py +8 -0
  48. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_404.py +59 -0
  49. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_prompts_by_id_response_500.py +59 -0
  50. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_200.py +81 -0
  51. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_400.py +59 -0
  52. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_401.py +59 -0
  53. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/delete_api_scenario_events_response_500.py +59 -0
  54. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/error.py +67 -0
  55. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation.py +164 -0
  56. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/evaluation_timestamps.py +68 -0
  57. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200.py +75 -0
  58. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item.py +109 -0
  59. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_200_data_item_entry.py +44 -0
  60. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_400.py +78 -0
  61. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_401.py +78 -0
  62. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_404.py +78 -0
  63. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_422.py +67 -0
  64. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_dataset_by_slug_or_id_response_500.py +78 -0
  65. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200.py +172 -0
  66. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item.py +69 -0
  67. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_messages_item_role.py +10 -0
  68. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0.py +81 -0
  69. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema.py +77 -0
  70. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_json_schema_schema.py +44 -0
  71. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_200_response_format_type_0_type.py +8 -0
  72. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400.py +61 -0
  73. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_400_error.py +8 -0
  74. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401.py +61 -0
  75. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_401_error.py +8 -0
  76. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_404.py +59 -0
  77. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_response_500.py +59 -0
  78. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200.py +155 -0
  79. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data.py +204 -0
  80. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  81. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  82. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  83. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  84. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  85. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  86. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  87. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  88. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  89. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  90. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  91. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  92. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400.py +61 -0
  93. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_400_error.py +8 -0
  94. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401.py +61 -0
  95. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_401_error.py +8 -0
  96. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_404.py +59 -0
  97. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_by_id_versions_response_500.py +59 -0
  98. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item.py +172 -0
  99. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item.py +69 -0
  100. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_messages_item_role.py +10 -0
  101. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0.py +81 -0
  102. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema.py +77 -0
  103. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_json_schema_schema.py +44 -0
  104. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_200_item_response_format_type_0_type.py +8 -0
  105. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400.py +61 -0
  106. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_400_error.py +8 -0
  107. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401.py +61 -0
  108. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_401_error.py +8 -0
  109. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_prompts_response_500.py +59 -0
  110. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200.py +249 -0
  111. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_error_type_0.py +79 -0
  112. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item.py +152 -0
  113. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_error.py +79 -0
  114. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_evaluations_item_timestamps.py +68 -0
  115. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_input.py +59 -0
  116. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metadata.py +68 -0
  117. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_metrics.py +95 -0
  118. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_output.py +59 -0
  119. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item.py +271 -0
  120. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_error_type_0.py +79 -0
  121. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input.py +90 -0
  122. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_input_value_item.py +69 -0
  123. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_metrics.py +77 -0
  124. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output.py +89 -0
  125. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_output_value_item.py +68 -0
  126. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_params.py +68 -0
  127. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_spans_item_timestamps.py +95 -0
  128. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/get_api_trace_id_response_200_timestamps.py +77 -0
  129. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/input_.py +68 -0
  130. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metadata.py +68 -0
  131. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/metrics.py +115 -0
  132. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/output.py +59 -0
  133. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/pagination.py +68 -0
  134. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_body.py +77 -0
  135. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/patch_api_annotations_id_response_200.py +68 -0
  136. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_annotations_trace_id_body.py +77 -0
  137. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_body.py +59 -0
  138. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body.py +147 -0
  139. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data.py +207 -0
  140. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations.py +106 -0
  141. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item.py +79 -0
  142. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_columns_item_type.py +18 -0
  143. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_demonstrations_rows_item.py +59 -0
  144. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item.py +71 -0
  145. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_inputs_item_type.py +16 -0
  146. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item.py +71 -0
  147. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_messages_item_role.py +10 -0
  148. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item.py +98 -0
  149. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_json_schema.py +59 -0
  150. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_outputs_item_type.py +11 -0
  151. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_body_config_data_prompting_technique.py +59 -0
  152. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200.py +155 -0
  153. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data.py +206 -0
  154. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations.py +101 -0
  155. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item.py +79 -0
  156. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_columns_item_type.py +18 -0
  157. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_demonstrations_rows_item.py +59 -0
  158. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item.py +71 -0
  159. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_inputs_item_type.py +16 -0
  160. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item.py +71 -0
  161. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_messages_item_role.py +10 -0
  162. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item.py +98 -0
  163. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_json_schema.py +59 -0
  164. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_outputs_item_type.py +11 -0
  165. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_200_config_data_prompting_technique.py +59 -0
  166. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400.py +61 -0
  167. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_400_error.py +8 -0
  168. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401.py +61 -0
  169. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_401_error.py +8 -0
  170. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_404.py +59 -0
  171. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_by_id_versions_response_500.py +59 -0
  172. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200.py +172 -0
  173. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item.py +69 -0
  174. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_messages_item_role.py +10 -0
  175. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0.py +81 -0
  176. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema.py +77 -0
  177. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_json_schema_schema.py +44 -0
  178. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_200_response_format_type_0_type.py +8 -0
  179. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400.py +61 -0
  180. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_400_error.py +8 -0
  181. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401.py +61 -0
  182. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_401_error.py +8 -0
  183. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_prompts_response_500.py +59 -0
  184. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0.py +127 -0
  185. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_0_metadata.py +68 -0
  186. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1.py +164 -0
  187. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0.py +98 -0
  188. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_results_type_0_verdict.py +10 -0
  189. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_1_status.py +13 -0
  190. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2.py +245 -0
  191. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_0.py +88 -0
  192. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_1.py +88 -0
  193. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2.py +120 -0
  194. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item.py +87 -0
  195. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_2_tool_calls_item_function.py +67 -0
  196. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_3.py +88 -0
  197. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_body_type_2_messages_item_type_4.py +85 -0
  198. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_201.py +81 -0
  199. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_400.py +59 -0
  200. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_401.py +59 -0
  201. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_scenario_events_response_500.py +59 -0
  202. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_share_response_200.py +59 -0
  203. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/post_api_trace_id_unshare_response_200.py +59 -0
  204. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_body.py +59 -0
  205. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_200.py +75 -0
  206. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400.py +61 -0
  207. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_400_error.py +8 -0
  208. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401.py +61 -0
  209. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_401_error.py +8 -0
  210. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_404.py +59 -0
  211. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/put_api_prompts_by_id_response_500.py +59 -0
  212. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request.py +133 -0
  213. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_request_filters.py +51 -0
  214. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py +93 -0
  215. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py +77 -0
  216. scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py +225 -0
  217. scenario/_generated/langwatch_api_client/lang_watch_api_client/py.typed +1 -0
  218. scenario/_generated/langwatch_api_client/lang_watch_api_client/types.py +46 -0
  219. scenario/_generated/langwatch_api_client/pyproject.toml +27 -0
  220. scenario/_utils/__init__.py +32 -0
  221. scenario/_utils/ids.py +58 -0
  222. scenario/_utils/message_conversion.py +103 -0
  223. scenario/{utils.py → _utils/utils.py} +21 -110
  224. scenario/agent_adapter.py +8 -4
  225. scenario/cache.py +4 -3
  226. scenario/config.py +7 -5
  227. scenario/judge_agent.py +13 -29
  228. scenario/pytest_plugin.py +6 -51
  229. scenario/scenario_executor.py +372 -215
  230. scenario/scenario_state.py +6 -6
  231. scenario/script.py +9 -9
  232. scenario/types.py +15 -8
  233. scenario/user_simulator_agent.py +4 -11
  234. langwatch_scenario-0.4.0.dist-info/RECORD +0 -18
  235. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/WHEEL +0 -0
  236. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/entry_points.txt +0 -0
  237. {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/top_level.txt +0 -0
  238. /scenario/{error_messages.py → _error_messages.py} +0 -0
@@ -11,13 +11,10 @@ import sys
11
11
  from typing import (
12
12
  Any,
13
13
  Iterator,
14
- List,
15
- Literal,
16
14
  Optional,
17
15
  Union,
18
16
  TypeVar,
19
17
  Awaitable,
20
- cast,
21
18
  )
22
19
  from pydantic import BaseModel
23
20
  import copy
@@ -33,8 +30,8 @@ from rich.console import Console
33
30
  from rich.text import Text
34
31
  from rich.errors import LiveError
35
32
 
36
- from scenario.error_messages import message_return_error_message
37
- from scenario.types import AgentReturnTypes, ScenarioResult
33
+ from scenario._error_messages import message_return_error_message
34
+ from scenario.types import ScenarioResult
38
35
 
39
36
  T = TypeVar("T")
40
37
 
@@ -48,7 +45,7 @@ class SerializableAndPydanticEncoder(json.JSONEncoder):
48
45
  Used for caching and logging scenarios that contain complex objects.
49
46
 
50
47
  Example:
51
- ```python
48
+ ```
52
49
  data = {
53
50
  "model": SomeBaseModel(field="value"),
54
51
  "iterator": iter([1, 2, 3])
@@ -56,7 +53,7 @@ class SerializableAndPydanticEncoder(json.JSONEncoder):
56
53
  json.dumps(data, cls=SerializableAndPydanticEncoder)
57
54
  ```
58
55
  """
59
- def default(self, o):
56
+ def default(self, o: Any) -> Any:
60
57
  if isinstance(o, BaseModel):
61
58
  return o.model_dump(exclude_unset=True)
62
59
  if isinstance(o, Iterator):
@@ -73,26 +70,26 @@ class SerializableWithStringFallback(SerializableAndPydanticEncoder):
73
70
  that logging and caching operations never fail due to serialization issues.
74
71
 
75
72
  Example:
76
- ```python
73
+ ```
77
74
  # This will work even with complex non-serializable objects
78
75
  data = {"function": lambda x: x, "complex_object": SomeComplexClass()}
79
76
  json.dumps(data, cls=SerializableWithStringFallback)
80
77
  # Result: {"function": "<function <lambda> at 0x...>", "complex_object": "..."}
81
78
  ```
82
79
  """
83
- def default(self, o):
80
+ def default(self, o: Any) -> Any:
84
81
  try:
85
82
  return super().default(o)
86
83
  except:
87
84
  return str(o)
88
85
 
89
86
 
90
- def safe_list_at(list, index, default=None):
87
+ def safe_list_at(list_obj: list, index: int, default: Any = None) -> Any:
91
88
  """
92
89
  Safely get an item from a list by index with a default fallback.
93
90
 
94
91
  Args:
95
- list: The list to access
92
+ list_obj: The list to access
96
93
  index: The index to retrieve
97
94
  default: Value to return if index is out of bounds
98
95
 
@@ -100,7 +97,7 @@ def safe_list_at(list, index, default=None):
100
97
  The item at the index, or the default value if index is invalid
101
98
 
102
99
  Example:
103
- ```python
100
+ ```
104
101
  items = ["a", "b", "c"]
105
102
  print(safe_list_at(items, 1)) # "b"
106
103
  print(safe_list_at(items, 10)) # None
@@ -108,12 +105,12 @@ def safe_list_at(list, index, default=None):
108
105
  ```
109
106
  """
110
107
  try:
111
- return list[index]
108
+ return list_obj[index]
112
109
  except:
113
110
  return default
114
111
 
115
112
 
116
- def safe_attr_or_key(obj, attr_or_key, default=None):
113
+ def safe_attr_or_key(obj: Any, attr_or_key: str, default: Any = None) -> Any:
117
114
  """
118
115
  Safely get an attribute or dictionary key from an object.
119
116
 
@@ -129,7 +126,7 @@ def safe_attr_or_key(obj, attr_or_key, default=None):
129
126
  The attribute/key value, or the default if not found
130
127
 
131
128
  Example:
132
- ```python
129
+ ```
133
130
  class MyClass:
134
131
  attr = "value"
135
132
 
@@ -141,10 +138,10 @@ def safe_attr_or_key(obj, attr_or_key, default=None):
141
138
  print(safe_attr_or_key(obj, "missing")) # None
142
139
  ```
143
140
  """
144
- return getattr(obj, attr_or_key, obj.get(attr_or_key))
141
+ return getattr(obj, attr_or_key, getattr(obj, 'get', lambda x, default=None: default)(attr_or_key, default))
145
142
 
146
143
 
147
- def title_case(string):
144
+ def title_case(string: str) -> str:
148
145
  """
149
146
  Convert snake_case string to Title Case.
150
147
 
@@ -155,7 +152,7 @@ def title_case(string):
155
152
  String converted to Title Case
156
153
 
157
154
  Example:
158
- ```python
155
+ ```
159
156
  print(title_case("user_simulator_agent")) # "User Simulator Agent"
160
157
  print(title_case("api_key")) # "Api Key"
161
158
  ```
@@ -178,7 +175,7 @@ def print_openai_messages(
178
175
  messages: List of OpenAI-compatible messages to print
179
176
 
180
177
  Example:
181
- ```python
178
+ ```
182
179
  messages = [
183
180
  {"role": "user", "content": "Hello"},
184
181
  {"role": "assistant", "content": "Hi there!"},
@@ -226,7 +223,7 @@ def print_openai_messages(
226
223
  )
227
224
 
228
225
 
229
- def _take_maybe_json_first_lines(string, max_lines=5):
226
+ def _take_maybe_json_first_lines(string: str, max_lines: int = 5) -> str:
230
227
  """
231
228
  Truncate string content and format JSON if possible.
232
229
 
@@ -268,14 +265,14 @@ class TextFirstSpinner(Spinner):
268
265
  color: Color for the descriptive text
269
266
  **kwargs: Additional arguments passed to the base Spinner class
270
267
  """
271
- def __init__(self, name, text: str, color: str, **kwargs):
268
+ def __init__(self, name: str, text: str, color: str, **kwargs: Any) -> None:
272
269
  super().__init__(
273
270
  name, "", style="bold white", **kwargs
274
271
  ) # Initialize with empty text
275
272
  self.text_before = text
276
273
  self.color = color
277
274
 
278
- def render(self, time):
275
+ def render(self, time: float) -> Text:
279
276
  # Get the original spinner frame
280
277
  spinner_frame = super().render(time)
281
278
  # Create a composite with text first, then spinner
@@ -299,7 +296,7 @@ def show_spinner(
299
296
  enabled: Whether to show the spinner (respects verbose settings)
300
297
 
301
298
  Example:
302
- ```python
299
+ ```
303
300
  with show_spinner("Calling agent...", color="blue", enabled=True):
304
301
  response = await agent.call(input_data)
305
302
 
@@ -345,7 +342,7 @@ def check_valid_return_type(return_value: Any, class_name: str) -> None:
345
342
  ValueError: If the return value is not in a supported format
346
343
 
347
344
  Example:
348
- ```python
345
+ ```
349
346
  # Valid return values
350
347
  check_valid_return_type("Hello world", "MyAgent") # OK
351
348
  check_valid_return_type({"role": "assistant", "content": "Hi"}, "MyAgent") # OK
@@ -383,92 +380,6 @@ def check_valid_return_type(return_value: Any, class_name: str) -> None:
383
380
  )
384
381
 
385
382
 
386
- def convert_agent_return_types_to_openai_messages(
387
- agent_response: AgentReturnTypes, role: Literal["user", "assistant"]
388
- ) -> List[ChatCompletionMessageParam]:
389
- """
390
- Convert various agent return types to standardized OpenAI message format.
391
-
392
- This function normalizes different return types from agent adapters into
393
- a consistent list of OpenAI-compatible messages that can be used throughout
394
- the scenario execution pipeline.
395
-
396
- Args:
397
- agent_response: Response from an agent adapter call
398
- role: The role to assign to string responses ("user" or "assistant")
399
-
400
- Returns:
401
- List of OpenAI-compatible messages
402
-
403
- Raises:
404
- ValueError: If agent_response is a ScenarioResult (which should be handled separately)
405
-
406
- Example:
407
- ```python
408
- # String response
409
- messages = convert_agent_return_types_to_openai_messages("Hello", "assistant")
410
- # Result: [{"role": "assistant", "content": "Hello"}]
411
-
412
- # Dict response
413
- response = {"role": "assistant", "content": "Hi", "tool_calls": [...]}
414
- messages = convert_agent_return_types_to_openai_messages(response, "assistant")
415
- # Result: [{"role": "assistant", "content": "Hi", "tool_calls": [...]}]
416
-
417
- # List response
418
- responses = [
419
- {"role": "assistant", "content": "Thinking..."},
420
- {"role": "assistant", "content": "Here's the answer"}
421
- ]
422
- messages = convert_agent_return_types_to_openai_messages(responses, "assistant")
423
- # Result: Same list, validated and normalized
424
- ```
425
- """
426
- if isinstance(agent_response, ScenarioResult):
427
- raise ValueError(
428
- "Unexpectedly tried to convert a ScenarioResult to openai messages",
429
- agent_response.__repr__(),
430
- )
431
-
432
- def convert_maybe_object_to_openai_message(
433
- obj: Any,
434
- ) -> ChatCompletionMessageParam:
435
- if isinstance(obj, dict):
436
- return cast(ChatCompletionMessageParam, obj)
437
- elif isinstance(obj, BaseModel):
438
- return cast(
439
- ChatCompletionMessageParam,
440
- obj.model_dump(
441
- exclude_unset=True,
442
- exclude_none=True,
443
- exclude_defaults=True,
444
- warnings=False,
445
- ),
446
- )
447
- else:
448
- raise ValueError(f"Unexpected agent response type: {type(obj).__name__}")
449
-
450
- def ensure_dict(
451
- obj: T,
452
- ) -> T:
453
- return json.loads(json.dumps(obj, cls=SerializableAndPydanticEncoder))
454
-
455
- if isinstance(agent_response, str):
456
- return [
457
- (
458
- {"role": "user", "content": agent_response}
459
- if role == "user"
460
- else {"role": "assistant", "content": agent_response}
461
- )
462
- ]
463
- elif isinstance(agent_response, list):
464
- return [
465
- ensure_dict(convert_maybe_object_to_openai_message(message))
466
- for message in agent_response
467
- ]
468
- else:
469
- return [ensure_dict(convert_maybe_object_to_openai_message(agent_response))]
470
-
471
-
472
383
  def reverse_roles(
473
384
  messages: list[ChatCompletionMessageParam],
474
385
  ) -> list[ChatCompletionMessageParam]:
scenario/agent_adapter.py CHANGED
@@ -26,9 +26,9 @@ class AgentAdapter(ABC):
26
26
  role: The role this agent plays in scenarios (USER, AGENT, or JUDGE)
27
27
 
28
28
  Example:
29
- ```python
29
+ ```
30
30
  import scenario
31
- from my_agent_library import MyCustomAgent
31
+ from my_agent import MyCustomAgent
32
32
 
33
33
  class MyAgentAdapter(scenario.AgentAdapter):
34
34
  def __init__(self):
@@ -66,6 +66,7 @@ class AgentAdapter(ABC):
66
66
  - For stateful agents, use input.thread_id to maintain conversation context
67
67
  - For stateless agents, use input.messages for the full conversation history
68
68
  """
69
+
69
70
  role: ClassVar[AgentRole] = AgentRole.AGENT
70
71
 
71
72
  @abstractmethod
@@ -82,13 +83,17 @@ class AgentAdapter(ABC):
82
83
 
83
84
  Returns:
84
85
  AgentReturnTypes: The agent's response, which can be:
86
+
85
87
  - str: Simple text response
88
+
86
89
  - ChatCompletionMessageParam: Single OpenAI-format message
90
+
87
91
  - List[ChatCompletionMessageParam]: Multiple messages for complex responses
92
+
88
93
  - ScenarioResult: Direct test result (typically only used by judge agents)
89
94
 
90
95
  Example:
91
- ```python
96
+ ```
92
97
  async def call(self, input: AgentInput) -> AgentReturnTypes:
93
98
  # Simple string response
94
99
  user_msg = input.last_new_user_message_str()
@@ -98,7 +103,6 @@ class AgentAdapter(ABC):
98
103
  return {
99
104
  "role": "assistant",
100
105
  "content": "Let me help you with that...",
101
- "tool_calls": [...] # If your agent uses tools
102
106
  }
103
107
 
104
108
  # Or multiple messages for complex interactions
scenario/cache.py CHANGED
@@ -18,7 +18,7 @@ import json
18
18
 
19
19
  import wrapt
20
20
  from scenario.types import AgentInput
21
- from scenario.utils import SerializableWithStringFallback
21
+ from scenario._utils.utils import SerializableWithStringFallback
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from scenario.scenario_executor import ScenarioExecutor
@@ -39,7 +39,7 @@ def get_cache() -> Memory:
39
39
  Memory instance configured with the appropriate cache directory
40
40
 
41
41
  Example:
42
- ```python
42
+ ```
43
43
  # Default cache location: ~/.scenario/cache
44
44
  cache = get_cache()
45
45
 
@@ -75,7 +75,7 @@ def scenario_cache(ignore=[]):
75
75
  Decorator function that can be applied to any function or method
76
76
 
77
77
  Example:
78
- ```python
78
+ ```
79
79
  import scenario
80
80
 
81
81
  class MyAgent:
@@ -105,6 +105,7 @@ def scenario_cache(ignore=[]):
105
105
  - AgentInput objects are specially handled to exclude thread_id from caching
106
106
  - Both sync and async functions are supported
107
107
  """
108
+
108
109
  @wrapt.decorator
109
110
  def wrapper(wrapped: Callable, instance=None, args=[], kwargs={}):
110
111
  scenario: "ScenarioExecutor" = context_scenario.get()
scenario/config.py CHANGED
@@ -9,6 +9,7 @@ and debugging options.
9
9
  from typing import Optional, Union, ClassVar
10
10
  from pydantic import BaseModel
11
11
 
12
+
12
13
  class ModelConfig(BaseModel):
13
14
  """
14
15
  Configuration for LLM model settings.
@@ -23,7 +24,7 @@ class ModelConfig(BaseModel):
23
24
  max_tokens: Maximum number of tokens to generate in responses
24
25
 
25
26
  Example:
26
- ```python
27
+ ```
27
28
  model_config = ModelConfig(
28
29
  model="openai/gpt-4.1-mini",
29
30
  api_key="your-api-key",
@@ -32,6 +33,7 @@ class ModelConfig(BaseModel):
32
33
  )
33
34
  ```
34
35
  """
36
+
35
37
  model: str
36
38
  api_key: Optional[str] = None
37
39
  temperature: float = 0.0
@@ -54,7 +56,7 @@ class ScenarioConfig(BaseModel):
54
56
  debug: Whether to enable debug mode with step-by-step interaction
55
57
 
56
58
  Example:
57
- ```python
59
+ ```
58
60
  # Configure globally for all scenarios
59
61
  scenario.configure(
60
62
  default_model="openai/gpt-4.1-mini",
@@ -106,7 +108,7 @@ class ScenarioConfig(BaseModel):
106
108
  debug: Enable debug mode for step-by-step execution with user intervention
107
109
 
108
110
  Example:
109
- ```python
111
+ ```
110
112
  import scenario
111
113
 
112
114
  # Set up default configuration
@@ -151,7 +153,7 @@ class ScenarioConfig(BaseModel):
151
153
  A new ScenarioConfig instance with merged values
152
154
 
153
155
  Example:
154
- ```python
156
+ ```
155
157
  base_config = ScenarioConfig(max_turns=10, verbose=True)
156
158
  override_config = ScenarioConfig(max_turns=20)
157
159
 
@@ -174,7 +176,7 @@ class ScenarioConfig(BaseModel):
174
176
  Dictionary of configuration key-value pairs, excluding None values
175
177
 
176
178
  Example:
177
- ```python
179
+ ```
178
180
  config = ScenarioConfig(max_turns=15, verbose=True)
179
181
  items = config.items()
180
182
  # Result: {"max_turns": 15, "verbose": True}
scenario/judge_agent.py CHANGED
@@ -19,7 +19,7 @@ from scenario.cache import scenario_cache
19
19
  from scenario.agent_adapter import AgentAdapter
20
20
  from scenario.config import ModelConfig, ScenarioConfig
21
21
 
22
- from .error_messages import agent_not_configured_error_message
22
+ from ._error_messages import agent_not_configured_error_message
23
23
  from .types import AgentInput, AgentReturnTypes, AgentRole, ScenarioResult
24
24
 
25
25
 
@@ -48,7 +48,7 @@ class JudgeAgent(AgentAdapter):
48
48
  system_prompt: Custom system prompt to override default judge behavior
49
49
 
50
50
  Example:
51
- ```python
51
+ ```
52
52
  import scenario
53
53
 
54
54
  # Basic judge agent with criteria
@@ -93,6 +93,7 @@ class JudgeAgent(AgentAdapter):
93
93
  - Provide detailed reasoning for their decisions
94
94
  - Support both positive criteria (things that should happen) and negative criteria (things that shouldn't)
95
95
  """
96
+
96
97
  role = AgentRole.JUDGE
97
98
 
98
99
  model: str
@@ -133,14 +134,12 @@ class JudgeAgent(AgentAdapter):
133
134
  Exception: If no model is configured either in parameters or global config
134
135
 
135
136
  Example:
136
- ```python
137
+ ```
137
138
  # Customer service judge
138
139
  cs_judge = JudgeAgent(
139
140
  criteria=[
140
- "Agent is polite and professional",
141
- "Agent addresses the customer's specific concern",
142
- "Agent offers appropriate solutions or next steps",
143
- "Agent does not make promises the company cannot keep"
141
+ "Agent replies with the refund policy",
142
+ "Agent offers next steps for the customer",
144
143
  ],
145
144
  temperature=0.1
146
145
  )
@@ -148,9 +147,8 @@ class JudgeAgent(AgentAdapter):
148
147
  # Technical accuracy judge
149
148
  tech_judge = JudgeAgent(
150
149
  criteria=[
151
- "Code examples compile without errors",
152
- "Security vulnerabilities are not introduced",
153
- "Best practices are recommended"
150
+ "Agent adds a code review pointing out the code compilation errors",
151
+ "Agent adds a code review about the missing security headers"
154
152
  ],
155
153
  system_prompt="You are a senior software engineer reviewing code for production use."
156
154
  )
@@ -210,24 +208,6 @@ class JudgeAgent(AgentAdapter):
210
208
  Exception: If the judge cannot make a valid decision or if there's an
211
209
  error in the evaluation process
212
210
 
213
- Example:
214
- The judge evaluates conversations like this:
215
-
216
- ```
217
- Conversation so far:
218
- User: "I need help with authentication"
219
- Agent: "I can help! What authentication method are you using?"
220
- User: "JWT tokens"
221
- Agent: "Here's how to implement JWT securely: [detailed code example]"
222
-
223
- Judge evaluation:
224
- - ✓ Agent provides helpful responses
225
- - ✓ Agent asks relevant follow-up questions
226
- - ✓ Security best practices are mentioned
227
-
228
- Decision: CONTINUE (all criteria being met so far)
229
- ```
230
-
231
211
  Note:
232
212
  - Returns empty list [] to continue the scenario
233
213
  - Returns ScenarioResult to end with success/failure
@@ -238,6 +218,10 @@ class JudgeAgent(AgentAdapter):
238
218
 
239
219
  scenario = input.scenario_state
240
220
 
221
+ criteria_str = "\n".join(
222
+ [f"{idx + 1}. {criterion}" for idx, criterion in enumerate(self.criteria)]
223
+ )
224
+
241
225
  messages = [
242
226
  {
243
227
  "role": "system",
@@ -257,7 +241,7 @@ If you do have enough information, use the finish_test tool to determine if all
257
241
  </scenario>
258
242
 
259
243
  <criteria>
260
- {"\n".join([f"{idx + 1}. {criterion}" for idx, criterion in enumerate(self.criteria)])}
244
+ {criteria_str}
261
245
  </criteria>
262
246
 
263
247
  <rules>
scenario/pytest_plugin.py CHANGED
@@ -8,7 +8,7 @@ pytest-based testing workflows.
8
8
  """
9
9
 
10
10
  import pytest
11
- from typing import TypedDict, List, Tuple
11
+ from typing import TypedDict
12
12
  import functools
13
13
  from termcolor import colored
14
14
 
@@ -16,7 +16,6 @@ from scenario.config import ScenarioConfig
16
16
  from scenario.types import ScenarioResult
17
17
 
18
18
  from .scenario_executor import ScenarioExecutor
19
- import scenario
20
19
 
21
20
 
22
21
  class ScenarioReporterResults(TypedDict):
@@ -46,23 +45,6 @@ class ScenarioReporter:
46
45
 
47
46
  Attributes:
48
47
  results: List of all scenario test results collected during the session
49
-
50
- Example:
51
- The reporter is used automatically, but you can access it in tests:
52
-
53
- ```python
54
- def test_my_scenarios(scenario_reporter):
55
- # Run your scenarios
56
- result1 = await scenario.run(...)
57
- result2 = await scenario.run(...)
58
-
59
- # Check collected results
60
- assert len(scenario_reporter.results) == 2
61
-
62
- # Get summary statistics
63
- summary = scenario_reporter.get_summary()
64
- print(f"Success rate: {summary['success_rate']}%")
65
- ```
66
48
  """
67
49
 
68
50
  def __init__(self):
@@ -80,21 +62,6 @@ class ScenarioReporter:
80
62
  Args:
81
63
  scenario: The ScenarioExecutor instance that ran the test
82
64
  result: The ScenarioResult containing test outcome and details
83
-
84
- Example:
85
- ```python
86
- # This happens automatically when you run scenarios
87
- result = await scenario.run(
88
- name="my test",
89
- description="Test description",
90
- agents=[
91
- my_agent,
92
- scenario.UserSimulatorAgent(),
93
- scenario.JudgeAgent(criteria=["Agent provides helpful response"])
94
- ]
95
- )
96
- # Result is automatically added to the global reporter
97
- ```
98
65
  """
99
66
  self.results.append({"scenario": scenario, "result": result})
100
67
 
@@ -111,18 +78,6 @@ class ScenarioReporter:
111
78
  - passed: Number of scenarios that passed
112
79
  - failed: Number of scenarios that failed
113
80
  - success_rate: Percentage of scenarios that passed (0-100)
114
-
115
- Example:
116
- ```python
117
- def test_summary_check(scenario_reporter):
118
- # Run some scenarios...
119
- await scenario.run(...)
120
- await scenario.run(...)
121
-
122
- summary = scenario_reporter.get_summary()
123
- assert summary['total'] == 2
124
- assert summary['success_rate'] >= 80 # Require 80% success rate
125
- ```
126
81
  """
127
82
  total = len(self.results)
128
83
  passed = sum(1 for r in self.results if r["result"].success)
@@ -242,7 +197,7 @@ class ScenarioReporter:
242
197
 
243
198
 
244
199
  # Store the original run method
245
- original_run = ScenarioExecutor._run
200
+ original_run = ScenarioExecutor.run
246
201
 
247
202
 
248
203
  @pytest.hookimpl(trylast=True)
@@ -304,7 +259,7 @@ def pytest_configure(config):
304
259
  return result
305
260
 
306
261
  # Apply the patch
307
- ScenarioExecutor._run = auto_reporting_run
262
+ ScenarioExecutor.run = auto_reporting_run
308
263
 
309
264
 
310
265
  @pytest.hookimpl(trylast=True)
@@ -314,7 +269,7 @@ def pytest_unconfigure(config):
314
269
 
315
270
  This hook is called when pytest is shutting down and:
316
271
  - Prints the final scenario test report
317
- - Restores the original ScenarioExecutor._run method
272
+ - Restores the original ScenarioExecutor.run method
318
273
  - Cleans up any remaining resources
319
274
 
320
275
  Args:
@@ -329,7 +284,7 @@ def pytest_unconfigure(config):
329
284
  config._scenario_reporter.print_report()
330
285
 
331
286
  # Restore the original method
332
- ScenarioExecutor._run = original_run
287
+ ScenarioExecutor.run = original_run
333
288
 
334
289
 
335
290
  @pytest.fixture
@@ -347,7 +302,7 @@ def scenario_reporter(request):
347
302
  ScenarioReporter: The global reporter instance collecting all scenario results
348
303
 
349
304
  Example:
350
- ```python
305
+ ```
351
306
  @pytest.mark.agent_test
352
307
  def test_with_custom_reporting(scenario_reporter):
353
308
  # Run your scenarios