aiverify-moonshot 0.5.1__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/integration-test.yaml +51 -12
  2. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/pypi-deployment.yaml +1 -1
  3. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/smoke-test-cli.yaml +1 -1
  4. aiverify_moonshot-0.5.1/.github/workflows/uat-deploy.yaml → aiverify_moonshot-0.6.1/.github/workflows/uat-build.yaml +19 -4
  5. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/PKG-INFO +2 -2
  6. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/README.md +1 -1
  7. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/cookbook.py +39 -12
  8. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/recipe.py +23 -8
  9. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/result.py +4 -2
  10. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/cli_errors.py +10 -4
  11. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/app.py +1 -1
  12. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +2 -2
  13. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/cookbook_create_dto.py +4 -0
  14. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/dataset_create_dto.py +3 -4
  15. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_test_manager.py +2 -2
  16. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/cookbook_service.py +30 -34
  17. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/types/types.py +1 -1
  18. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_cookbook.py +20 -0
  19. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/cookbook.py +21 -0
  20. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/cookbook_arguments.py +6 -0
  21. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner.py +18 -28
  22. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/pyproject.toml +3 -2
  23. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.coveragerc +0 -0
  24. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.flake8 +0 -0
  25. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/developing-workflows.md +0 -0
  26. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/pull_request_template.md +0 -0
  27. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/create_backup.sh +0 -0
  28. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/install_ms_service.sh +0 -0
  29. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_env +0 -0
  30. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_test_env +0 -0
  31. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_ui_env +0 -0
  32. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/run_smoke_test.sh +0 -0
  33. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/start_ms_service.sh +0 -0
  34. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/docs-update.yaml +0 -0
  35. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/gh-event-notification.yaml +0 -0
  36. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/notices-file-gen.yaml +0 -0
  37. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/pre-build-checks.yaml +0 -0
  38. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sast-codeql.yaml +0 -0
  39. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sca-scan.yaml +0 -0
  40. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sit-build.yaml +0 -0
  41. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/smoke-test.yaml +0 -0
  42. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/test-pypi-deployment.yaml +0 -0
  43. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.gitignore +0 -0
  44. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.pre-commit-config.yaml +0 -0
  45. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/AUTHORS.md +0 -0
  46. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/LICENSE.md +0 -0
  47. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/NOTICES.md +0 -0
  48. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/__init__.py +0 -0
  49. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/__main__.py +0 -0
  50. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/api.py +0 -0
  51. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/__init__.py +0 -0
  52. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/__init__.py +0 -0
  53. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/__main__.py +0 -0
  54. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/active_session_cfg.py +0 -0
  55. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/__init__.py +0 -0
  56. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/benchmark.py +0 -0
  57. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/datasets.py +0 -0
  58. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/metrics.py +0 -0
  59. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/run.py +0 -0
  60. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/runner.py +0 -0
  61. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/cli.py +0 -0
  62. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/__init__.py +0 -0
  63. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/common.py +0 -0
  64. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/connectors.py +0 -0
  65. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/dataset.py +0 -0
  66. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/display_helper.py +0 -0
  67. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/prompt_template.py +0 -0
  68. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/initialisation/__init__.py +0 -0
  69. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/initialisation/initialisation.py +0 -0
  70. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/__init__.py +0 -0
  71. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/attack_module.py +0 -0
  72. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/context_strategy.py +0 -0
  73. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/prompt_template.py +0 -0
  74. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/redteam.py +0 -0
  75. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/session.py +0 -0
  76. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/utils/process_data.py +0 -0
  77. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/.env.dev +0 -0
  78. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/__init__.py +0 -0
  79. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/__main__.py +0 -0
  80. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/container.py +0 -0
  81. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/log/.gitkeep +0 -0
  82. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/logging_conf.py +0 -0
  83. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/__init__.py +0 -0
  84. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/attack_modules.py +0 -0
  85. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/benchmark.py +0 -0
  86. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/benchmark_result.py +0 -0
  87. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/bookmark.py +0 -0
  88. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/context_strategy.py +0 -0
  89. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/cookbook.py +0 -0
  90. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/dataset.py +0 -0
  91. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/endpoint.py +0 -0
  92. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/metric.py +0 -0
  93. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/prompt_template.py +0 -0
  94. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/recipe.py +0 -0
  95. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/redteam.py +0 -0
  96. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/runner.py +0 -0
  97. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/__init__.py +0 -0
  98. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/bookmark_create_dto.py +0 -0
  99. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/cookbook_response_model.py +0 -0
  100. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/dataset_response_dto.py +0 -0
  101. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/endpoint_create_dto.py +0 -0
  102. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/endpoint_response_model.py +0 -0
  103. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/prompt_response_model.py +0 -0
  104. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/prompt_template_response_model.py +0 -0
  105. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/recipe_create_dto.py +0 -0
  106. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/recipe_response_model.py +0 -0
  107. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_create_dto.py +0 -0
  108. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_prompt_dto.py +0 -0
  109. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_response_model.py +0 -0
  110. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/__init__.py +0 -0
  111. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/attack_module_service.py +0 -0
  112. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/auto_red_team_test_manager.py +0 -0
  113. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/auto_red_team_test_state.py +0 -0
  114. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/base_service.py +0 -0
  115. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_result_service.py +0 -0
  116. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_test_state.py +0 -0
  117. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmarking_service.py +0 -0
  118. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/bookmark_service.py +0 -0
  119. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/context_strategy_service.py +0 -0
  120. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/dataset_service.py +0 -0
  121. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/endpoint_service.py +0 -0
  122. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/metric_service.py +0 -0
  123. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/prompt_template_service.py +0 -0
  124. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/recipe_service.py +0 -0
  125. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/runner_service.py +0 -0
  126. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/session_service.py +0 -0
  127. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/exceptions_handler.py +0 -0
  128. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/file_manager.py +0 -0
  129. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/results_formatter.py +0 -0
  130. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +0 -0
  131. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +0 -0
  132. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +0 -0
  133. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/temp/.gitkeep +0 -0
  134. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/__init__.py +0 -0
  135. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/__init__.py +0 -0
  136. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_bookmark.py +0 -0
  137. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_connector.py +0 -0
  138. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_connector_endpoint.py +0 -0
  139. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_context_strategy.py +0 -0
  140. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_dataset.py +0 -0
  141. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_environment_variables.py +0 -0
  142. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_metrics.py +0 -0
  143. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_prompt_template.py +0 -0
  144. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_recipe.py +0 -0
  145. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_red_teaming.py +0 -0
  146. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_result.py +0 -0
  147. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_run.py +0 -0
  148. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_runner.py +0 -0
  149. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_session.py +0 -0
  150. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/bookmark/bookmark.py +0 -0
  151. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/bookmark/bookmark_arguments.py +0 -0
  152. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/configs/__init__.py +0 -0
  153. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/configs/env_variables.py +0 -0
  154. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/__init__.py +0 -0
  155. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector.py +0 -0
  156. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector_prompt_arguments.py +0 -0
  157. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector_response.py +0 -0
  158. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/__init__.py +0 -0
  159. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/connector_endpoint.py +0 -0
  160. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +0 -0
  161. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/__init__.py +0 -0
  162. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/__init__.py +0 -0
  163. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/dataset.py +0 -0
  164. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/dataset_arguments.py +0 -0
  165. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/messages_constants.py +0 -0
  166. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/__init__.py +0 -0
  167. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/metric.py +0 -0
  168. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/metric_interface.py +0 -0
  169. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/prompt_templates/__init__.py +0 -0
  170. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/prompt_templates/prompt_template.py +0 -0
  171. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/__init__.py +0 -0
  172. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/recipe.py +0 -0
  173. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/recipe_arguments.py +0 -0
  174. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/__init__.py +0 -0
  175. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/__init__.py +0 -0
  176. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/attack_module.py +0 -0
  177. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/attack_module_arguments.py +0 -0
  178. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/context_strategy.py +0 -0
  179. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/context_strategy/__init__.py +0 -0
  180. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/context_strategy/context_strategy_interface.py +0 -0
  181. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/__init__.py +0 -0
  182. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/chat.py +0 -0
  183. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/red_teaming_progress.py +0 -0
  184. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/red_teaming_type.py +0 -0
  185. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/session.py +0 -0
  186. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/__init__.py +0 -0
  187. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/result.py +0 -0
  188. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/result_arguments.py +0 -0
  189. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/__init__.py +0 -0
  190. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner_arguments.py +0 -0
  191. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner_type.py +0 -0
  192. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/__init__.py +0 -0
  193. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run.py +0 -0
  194. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_arguments.py +0 -0
  195. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_progress.py +0 -0
  196. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_status.py +0 -0
  197. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/__init__.py +0 -0
  198. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/db_interface.py +0 -0
  199. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/io_interface.py +0 -0
  200. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/storage.py +0 -0
  201. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/__init__.py +0 -0
  202. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/find_feature.py +0 -0
  203. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/import_modules.py +0 -0
  204. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/log.py +0 -0
  205. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/pagination.py +0 -0
  206. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/timeit.py +0 -0
  207. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/requirements.txt +0 -0
  208. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/third-party/pygments-2.18.0-py3-none-any.whl +0 -0
  209. {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/third-party/text_unidecode-1.3-py2.py3-none-any.whl +0 -0
@@ -36,7 +36,7 @@ jobs:
36
36
  integration-test:
37
37
 
38
38
  runs-on: ubuntu-latest
39
- timeout-minutes: 120
39
+ timeout-minutes: 300
40
40
 
41
41
  steps:
42
42
 
@@ -53,7 +53,7 @@ jobs:
53
53
  with:
54
54
  repository: aiverify-foundation/moonshot
55
55
  ref: ${{ inputs.moonshot_branch }}
56
-
56
+
57
57
  - name: Setup Python 3.11
58
58
  uses: actions/setup-python@v4
59
59
  with:
@@ -110,24 +110,24 @@ jobs:
110
110
  run: |
111
111
  source venv/bin/activate
112
112
  pip install nltk
113
- python -c "import nltk; nltk.download('stopwords');nltk.download('averaged_perceptron_tagger'); nltk.download('omw');nltk.download('universal_tagset'); nltk.download('wordnet');nltk.download('punkt')"
113
+ python -c "import nltk; nltk.download('stopwords');nltk.download('punkt');nltk.download('punkt_tab');nltk.download('averaged_perceptron_tagger_eng')"
114
114
 
115
115
  - name: Setup Moonshot UI
116
116
  run: |
117
+ source venv/bin/activate
117
118
  cd moonshot-ui
118
- npm ci
119
+ npm install
119
120
  npm run build
120
121
  cd ../
121
- source venv/bin/activate
122
- python -m moonshot web &
122
+ nohup python -m moonshot web &
123
123
 
124
124
  - name: Checkout Integration Test
125
125
  uses: actions/checkout@v4
126
126
  with:
127
127
  repository: aiverify-foundation/moonshot-integration-testing
128
128
  path: moonshot-integration-testing
129
-
130
- - name: Run Integration Test
129
+
130
+ - name: Run Integration UI Test
131
131
  env:
132
132
  URI: ${{ secrets.URI }}
133
133
  TOKEN: ${{ secrets.TOKEN }}
@@ -139,13 +139,52 @@ jobs:
139
139
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
140
140
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
141
141
  GOOGLE_TOKEN: ${{ secrets.GOOGLE_TOKEN }}
142
+ id: integrationuitest
142
143
  run: |
144
+ source venv/bin/activate
143
145
  cd moonshot-integration-testing/ui-integration-testing
144
146
  npm ci
145
- npm install dotenv
146
- npx playwright install --with-deps
147
- URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" npx playwright test
148
-
147
+ npx playwright install
148
+ npx playwright install-deps
149
+ npm install dotenv --save
150
+ echo "Running Home Page Test Cases"
151
+ URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/homepage.spec.ts
152
+ echo "Running Endpoint Test Cases"
153
+ URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/endpoint.spec.ts
154
+ echo "Running Red Teaming Test Cases"
155
+ URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/red_teaming.spec.ts
156
+ echo "Running Benchmarking Test Cases"
157
+ URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/benchmarking.spec.ts
158
+
159
+ - name: Print Environment Variables
160
+ run: env
161
+
162
+ - name: Run Integration CLI Test
163
+ env:
164
+ AZURE_OPENAI_URI: ${{ secrets.AZURE_OPENAI_URI }}
165
+ AZURE_OPENAI_TOKEN: ${{ secrets.AZURE_OPENAI_TOKEN }}
166
+ ADDITIONAL_PARAMETERS: ${{ secrets.ADDITIONAL_PARAMETERS }}
167
+ MOONSHOT_URL: ${{ secrets.MOONSHOT_URL }}
168
+ MOONSHOT_PORT_NUMBER: ${{ secrets.MOONSHOT_PORT_NUMBER }}
169
+ CLI_DIR: ${{ secrets.CLI_DIR }}
170
+ ACTIONS_STEP_DEBUG: true
171
+ ACTIONS_RUNNER_DEBUG: true
172
+ run: |
173
+ source venv/bin/activate
174
+ cd moonshot-integration-testing/cli-integration-testing
175
+ echo "Current Directory: $(pwd)"
176
+ pip install python-dotenv
177
+ pip install pytest
178
+ pytest
179
+
180
+ - name: Upload Playwright Traces
181
+ if: always()
182
+ uses: actions/upload-artifact@v4
183
+ with:
184
+ name: playwright-trace
185
+ path: |
186
+ /home/runner/work/moonshot/moonshot/moonshot-integration-testing/ui-integration-testing/test-results
187
+
149
188
  - name: TestRail CLI upload results
150
189
  env:
151
190
  TESTRAIL_USERNAME: ${{ secrets.TESTRAIL_USERNAME }}
@@ -76,7 +76,7 @@ jobs:
76
76
  name: python-package-distributions
77
77
  path: dist/
78
78
  - name: Sign the dists with Sigstore
79
- uses: sigstore/gh-action-sigstore-python@v2.1.1
79
+ uses: sigstore/gh-action-sigstore-python@v3.6.0
80
80
  with:
81
81
  inputs: >-
82
82
  ./dist/*.tar.gz
@@ -37,7 +37,7 @@ jobs:
37
37
  smoke-test:
38
38
  if: (github.event_name == 'pull_request' && github.event.pull_request.assignee != null) || github.event_name == 'workflow_dispatch'
39
39
  runs-on: ubuntu-latest
40
- timeout-minutes: 20
40
+ timeout-minutes: 100
41
41
 
42
42
  steps:
43
43
 
@@ -1,5 +1,6 @@
1
- name: UAT Deploy
1
+ name: UAT Build
2
2
 
3
+ # Trigger when the PR to merge to main is merged
3
4
  on:
4
5
  pull_request:
5
6
  branches:
@@ -30,13 +31,17 @@ jobs:
30
31
 
31
32
  - name: Bump version
32
33
  run: |
33
- echo "Bumping version..."
34
+ echo "Bump version..."
34
35
  pip install bump2version
35
36
  bump2version patch
36
37
 
38
+ - name: Generate notices file
39
+ run: |
40
+ echo "Generate notice file..."
41
+
37
42
  - name: Package test PyPI
38
43
  run: |
39
- echo "Packaging test PyPI..."
44
+ echo "Package test PyPI..."
40
45
  pip install build
41
46
  python3 -m build
42
47
 
@@ -70,7 +75,17 @@ jobs:
70
75
  with:
71
76
  repository-url: https://test.pypi.org/legacy/
72
77
 
73
-
78
+ # Deploy moonshot to UAT by installing moonshot package from test pypi
79
+ # deploy-to-uat:
80
+ # needs:
81
+ # - publish-to-testpypi
82
+ # runs-on: ubuntu-latest
83
+
84
+ # Run integration test
85
+ # integration-test:
86
+ # needs:
87
+ # - publish-to-testpypi:
88
+ # runs-on: ubuntu-latest
74
89
 
75
90
 
76
91
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiverify-moonshot
3
- Version: 0.5.1
3
+ Version: 0.6.1
4
4
  Summary: AI Verify advances Gen AI testing with Project Moonshot.
5
5
  Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
6
6
  Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
47
47
 
48
48
  ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
49
49
 
50
- **Version 0.5.1**
50
+ **Version 0.6.1**
51
51
 
52
52
  A simple and modular tool to evaluate any LLM application.
53
53
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
4
4
 
5
- **Version 0.5.1**
5
+ **Version 0.6.1**
6
6
 
7
7
  A simple and modular tool to evaluate any LLM application.
8
8
 
@@ -37,7 +37,8 @@ from moonshot.integrations.cli.cli_errors import (
37
37
  ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1,
38
38
  ERROR_BENCHMARK_RUN_COOKBOOK_NAME_VALIDATION,
39
39
  ERROR_BENCHMARK_RUN_COOKBOOK_NO_RESULT,
40
- ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION,
40
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
41
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
41
42
  ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION,
42
43
  ERROR_BENCHMARK_RUN_COOKBOOK_RESULT_PROC_MOD_VALIDATION,
43
44
  ERROR_BENCHMARK_RUN_COOKBOOK_RUNNER_PROC_MOD_VALIDATION,
@@ -212,11 +213,12 @@ def run_cookbook(args) -> None:
212
213
  The cookbooks are run against the specified endpoints, and the results are processed and displayed.
213
214
 
214
215
  Args:
215
- args: A namespace object from argparse. It should have the following attributes:
216
+ args (argparse.Namespace): The arguments provided to the command line interface.
217
+ Expected keys are:
216
218
  name (str): The name of the cookbook runner.
217
219
  cookbooks (str): A string representation of a list of cookbooks to run.
218
220
  endpoints (str): A string representation of a list of endpoints to run.
219
- num_of_prompts (int): The number of prompts to run.
221
+ prompt_selection_percentage (int): The percentage of prompts to run.
220
222
  random_seed (int): The random seed number for reproducibility.
221
223
  system_prompt (str): The system prompt to use.
222
224
  runner_proc_module (str): The runner processing module to use.
@@ -248,10 +250,19 @@ def run_cookbook(args) -> None:
248
250
  ):
249
251
  raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION)
250
252
 
251
- if isinstance(args.num_of_prompts, bool) or not isinstance(
252
- args.num_of_prompts, int
253
+ if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
254
+ args.prompt_selection_percentage, int
255
+ ):
256
+ raise TypeError(
257
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION
258
+ )
259
+ elif (
260
+ args.prompt_selection_percentage < 1
261
+ or args.prompt_selection_percentage > 100
253
262
  ):
254
- raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION)
263
+ raise ValueError(
264
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
265
+ )
255
266
 
256
267
  if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
257
268
  raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION)
@@ -297,7 +308,7 @@ def run_cookbook(args) -> None:
297
308
  async def run():
298
309
  await cb_runner.run_cookbooks(
299
310
  cookbooks,
300
- args.num_of_prompts,
311
+ args.prompt_selection_percentage,
301
312
  args.random_seed,
302
313
  args.system_prompt,
303
314
  args.runner_proc_module,
@@ -436,9 +447,20 @@ def _display_cookbooks(cookbooks_list):
436
447
  table.add_column("Cookbook", justify="left", width=78)
437
448
  table.add_column("Contains", justify="left", width=20, overflow="fold")
438
449
  for idx, cookbook in enumerate(cookbooks_list, 1):
439
- id, name, description, recipes, *other_args = cookbook.values()
450
+ (
451
+ id,
452
+ name,
453
+ tags,
454
+ categories,
455
+ description,
456
+ recipes,
457
+ *other_args,
458
+ ) = cookbook.values()
440
459
  idx = cookbook.get("idx", idx)
441
- cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
460
+ cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n\n{description}"
461
+ cookbook_info += (
462
+ f"\n\n[blue]Tags: {tags}[/blue]\n[blue]Categories: {categories}[/blue]\n"
463
+ )
442
464
  recipes_info = display_view_list_format("Recipes", recipes)
443
465
  table.add_section()
444
466
  table.add_row(str(idx), cookbook_info, recipes_info)
@@ -459,11 +481,11 @@ def _display_view_cookbook(cookbook_info):
459
481
  Returns:
460
482
  None
461
483
  """
462
- id, name, description, recipes = cookbook_info.values()
484
+ id, name, tags, categories, description, recipes = cookbook_info.values()
463
485
  recipes_list = api_read_recipes(recipes)
464
486
  if recipes_list:
465
487
  table = Table(
466
- title=f'Cookbook "{name}"',
488
+ title=f'Cookbook: "{name}"\n Tags: {tags}\n Categories: {categories}\n',
467
489
  show_lines=True,
468
490
  expand=True,
469
491
  header_style="bold",
@@ -471,6 +493,7 @@ def _display_view_cookbook(cookbook_info):
471
493
  table.add_column("No.", width=2)
472
494
  table.add_column("Recipe", justify="left", width=78)
473
495
  table.add_column("Contains", justify="left", width=20, overflow="fold")
496
+
474
497
  for recipe_id, recipe in enumerate(recipes_list, 1):
475
498
  (
476
499
  id,
@@ -718,7 +741,11 @@ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
718
741
  run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
719
742
  run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
720
743
  run_cookbook_args.add_argument(
721
- "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
744
+ "-n",
745
+ "--prompt_selection_percentage",
746
+ type=int,
747
+ default=100,
748
+ help="Percentage of prompts to run",
722
749
  )
723
750
  run_cookbook_args.add_argument(
724
751
  "-r", "--random_seed", type=int, default=0, help="Random seed number"
@@ -40,7 +40,8 @@ from moonshot.integrations.cli.cli_errors import (
40
40
  ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
41
41
  ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
42
42
  ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
43
- ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION,
43
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
44
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
44
45
  ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
45
46
  ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
46
47
  ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
@@ -293,11 +294,12 @@ def run_recipe(args) -> None:
293
294
  The recipes are run against the specified endpoints, and the results are processed and displayed.
294
295
 
295
296
  Args:
296
- args: A namespace object from argparse. It should have the following attributes:
297
+ args (argparse.Namespace): The arguments provided to the command line interface.
298
+ Expected keys are:
297
299
  name (str): The name of the recipe runner.
298
300
  recipes (str): A string representation of a list of recipes to run.
299
301
  endpoints (str): A string representation of a list of endpoints to run.
300
- num_of_prompts (int): The number of prompts to run.
302
+ prompt_selection_percentage (int): The percentage of prompts to run.
301
303
  random_seed (int): The random seed number for reproducibility.
302
304
  system_prompt (str): The system prompt to use.
303
305
  runner_proc_module (str): The runner processing module to use.
@@ -329,10 +331,19 @@ def run_recipe(args) -> None:
329
331
  ):
330
332
  raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
331
333
 
332
- if isinstance(args.num_of_prompts, bool) or not isinstance(
333
- args.num_of_prompts, int
334
+ if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
335
+ args.prompt_selection_percentage, int
336
+ ):
337
+ raise TypeError(
338
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION
339
+ )
340
+ elif (
341
+ args.prompt_selection_percentage < 1
342
+ or args.prompt_selection_percentage > 100
334
343
  ):
335
- raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION)
344
+ raise ValueError(
345
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
346
+ )
336
347
 
337
348
  if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
338
349
  raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
@@ -377,7 +388,7 @@ def run_recipe(args) -> None:
377
388
  async def run():
378
389
  await rec_runner.run_recipes(
379
390
  recipes,
380
- args.num_of_prompts,
391
+ args.prompt_selection_percentage,
381
392
  args.random_seed,
382
393
  args.system_prompt,
383
394
  args.runner_proc_module,
@@ -809,7 +820,11 @@ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
809
820
  run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
810
821
  run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
811
822
  run_recipe_args.add_argument(
812
- "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
823
+ "-n",
824
+ "--prompt_selection_percentage",
825
+ type=int,
826
+ default=100,
827
+ help="Percentage of prompts to run",
813
828
  )
814
829
  run_recipe_args.add_argument(
815
830
  "-r", "--random_seed", type=int, default=0, help="Random seed number"
@@ -190,7 +190,7 @@ def _display_results(results_list):
190
190
  recipes = metadata["recipes"]
191
191
  cookbooks = metadata["cookbooks"]
192
192
  endpoints = metadata["endpoints"]
193
- num_of_prompts = metadata["num_of_prompts"]
193
+ prompt_selection_percentage = metadata["prompt_selection_percentage"]
194
194
  random_seed = metadata["random_seed"]
195
195
  system_prompt = metadata["system_prompt"]
196
196
  idx = result.get("idx", idx)
@@ -200,7 +200,9 @@ def _display_results(results_list):
200
200
  recipes_info = display_view_list_format("Recipes", recipes)
201
201
  cookbooks_info = display_view_list_format("Cookbooks", cookbooks)
202
202
  endpoints_info = display_view_list_format("Endpoints", endpoints)
203
- prompts_info = display_view_str_format("Number of Prompts", num_of_prompts)
203
+ prompts_info = display_view_str_format(
204
+ "Prompt Selection Percentage", prompt_selection_percentage
205
+ )
204
206
  seed_info = display_view_str_format("Seed", random_seed)
205
207
  system_prompt_info = display_view_str_format("System Prompt", system_prompt)
206
208
 
@@ -52,8 +52,11 @@ ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION = (
52
52
  ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1 = (
53
53
  "The 'endpoints' argument must evaluate to a list of strings."
54
54
  )
55
- ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION = (
56
- "The 'num_of_prompts' argument must be an integer."
55
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
56
+ "The 'prompt_selection_percentage' argument must be an integer."
57
+ )
58
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
59
+ "The 'prompt_selection_percentage' argument must be between 1 - 100."
57
60
  )
58
61
  ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION = (
59
62
  "The 'random_seed' argument must be an integer."
@@ -278,8 +281,11 @@ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION = (
278
281
  ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1 = (
279
282
  "The 'endpoints' argument must evaluate to a list of strings."
280
283
  )
281
- ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION = (
282
- "The 'num_of_prompts' argument must be an integer."
284
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
285
+ "The 'prompt_selection_percentage' argument must be an integer."
286
+ )
287
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
288
+ "The 'prompt_selection_percentage' argument must be between 1 - 100."
283
289
  )
284
290
  ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION = (
285
291
  "The 'random_seed' argument must be an integer."
@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
71
71
  }
72
72
 
73
73
  app: CustomFastAPI = CustomFastAPI(
74
- title="Project Moonshot", version="0.5.1", **app_kwargs
74
+ title="Project Moonshot", version="0.6.1", **app_kwargs
75
75
  )
76
76
 
77
77
  if cfg.cors.enabled():
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, ConfigDict
1
+ from pydantic import BaseModel, ConfigDict, Field
2
2
 
3
3
 
4
4
  class BenchmarkRunnerDTO(BaseModel):
@@ -7,7 +7,7 @@ class BenchmarkRunnerDTO(BaseModel):
7
7
  description: str
8
8
  endpoints: list[str]
9
9
  inputs: list[str]
10
- num_of_prompts: int
10
+ prompt_selection_percentage: int = Field(..., ge=1, le=100)
11
11
  random_seed: int
12
12
  system_prompt: str
13
13
  runner_processing_module: str
@@ -9,6 +9,8 @@ class CookbookCreateDTO(CookbookPydanticModel):
9
9
  id: Optional[str] = None
10
10
  name: str = Field(..., min_length=1)
11
11
  description: Optional[str] = Field(default="", min_length=1)
12
+ tags: Optional[list[str]] = []
13
+ categories: Optional[list[str]] = []
12
14
  recipes: list[str] = Field(..., min_length=1)
13
15
 
14
16
 
@@ -16,4 +18,6 @@ class CookbookUpdateDTO(CookbookPydanticModel):
16
18
  id: Optional[str] = None
17
19
  name: Optional[str] = Field(default=None, min_length=1)
18
20
  description: Optional[str] = Field(default=None, min_length=1)
21
+ tags: Optional[list[str]] = None
22
+ categories: Optional[list[str]] = None
19
23
  recipes: Optional[list[str]] = Field(default=None, min_length=1)
@@ -1,7 +1,6 @@
1
- from typing import Optional
1
+ from typing import Any, Optional
2
2
 
3
3
  from pydantic import Field
4
- from pyparsing import Iterator
5
4
 
6
5
  from moonshot.src.datasets.dataset_arguments import (
7
6
  DatasetArguments as DatasetPydanticModel,
@@ -10,7 +9,7 @@ from moonshot.src.datasets.dataset_arguments import (
10
9
 
11
10
  class CSV_Dataset_DTO(DatasetPydanticModel):
12
11
  id: Optional[str] = None # Not a required from user
13
- examples: Optional[Iterator[dict]] = None # Not a required from user
12
+ examples: Optional[Any] = None # Not a required from user
14
13
  name: str = Field(..., min_length=1)
15
14
  description: str = Field(default="", min_length=1)
16
15
  license: Optional[str] = ""
@@ -20,7 +19,7 @@ class CSV_Dataset_DTO(DatasetPydanticModel):
20
19
 
21
20
  class HF_Dataset_DTO(DatasetPydanticModel):
22
21
  id: Optional[str] = None # Not a required from user
23
- examples: Optional[Iterator[dict]] = None # Not a required from user
22
+ examples: Optional[Any] = None # Not a required from user
24
23
  name: str = Field(..., min_length=1)
25
24
  description: str = Field(default="", min_length=1)
26
25
  license: Optional[str] = ""
@@ -60,14 +60,14 @@ class BenchmarkTestManager(BaseService):
60
60
  if benchmark_type == BenchmarkCollectionType.COOKBOOK:
61
61
  async_run = moonshot_runner.run_cookbooks(
62
62
  cookbooks=benchmark_input_data.inputs,
63
- num_of_prompts=benchmark_input_data.num_of_prompts,
63
+ prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
64
64
  random_seed=benchmark_input_data.random_seed,
65
65
  system_prompt=benchmark_input_data.system_prompt,
66
66
  )
67
67
  else:
68
68
  async_run = moonshot_runner.run_recipes(
69
69
  recipes=benchmark_input_data.inputs,
70
- num_of_prompts=benchmark_input_data.num_of_prompts,
70
+ prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
71
71
  random_seed=benchmark_input_data.random_seed,
72
72
  system_prompt=benchmark_input_data.system_prompt,
73
73
  )
@@ -71,7 +71,7 @@ class CookbookService(BaseService):
71
71
  cookbook.total_dataset_in_cookbook,
72
72
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
73
73
 
74
- if tags and cookbooks_recipe_has_tags(tags, cookbook):
74
+ if tags and cookbook_has_tags(tags, cookbook):
75
75
  if cookbook not in cookbooks_list:
76
76
  cookbooks_list.append(cookbook)
77
77
  if count:
@@ -80,7 +80,7 @@ class CookbookService(BaseService):
80
80
  cookbook.total_dataset_in_cookbook,
81
81
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
82
82
 
83
- if categories and cookbooks_recipe_has_categories(categories, cookbook):
83
+ if categories and cookbook_has_categories(categories, cookbook):
84
84
  if cookbook not in cookbooks_list:
85
85
  cookbooks_list.append(cookbook)
86
86
  if count:
@@ -89,10 +89,16 @@ class CookbookService(BaseService):
89
89
  cookbook.total_dataset_in_cookbook,
90
90
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
91
91
 
92
- if categories_excluded and cookbooks_recipe_has_categories(
93
- categories_excluded, cookbook
94
- ):
95
- cookbooks_list.remove(cookbook)
92
+ if categories_excluded:
93
+ excluded_categories_set = set(
94
+ category.lower() for category in categories_excluded.split(",")
95
+ )
96
+ cookbook_categories_set = set(
97
+ category.lower() for category in cookbook.categories
98
+ )
99
+ # Exclude only if all categories in the cookbook are in the excluded list
100
+ if cookbook_categories_set.issubset(excluded_categories_set):
101
+ cookbooks_list.remove(cookbook)
96
102
 
97
103
  for cookbook in cookbooks_list:
98
104
  cookbook.required_config = cookbook_metrics_dependency(cookbook)
@@ -160,50 +166,40 @@ def get_total_prompt_and_dataset_in_cookbook(cookbook: Cookbook) -> tuple[int, i
160
166
 
161
167
 
162
168
  @staticmethod
163
- def cookbooks_recipe_has_tags(tags: str, cookbook: Cookbook) -> bool:
169
+ def cookbook_has_tags(tags: str, cookbook: Cookbook) -> bool:
164
170
  """
165
- Check if any recipe in a cookbook has the specified tags.
171
+ Check if a cookbook has the specified tags.
166
172
 
167
173
  Args:
168
- tags (str): The tags to check for in the cookbook's recipes.
169
- cookbook (Cookbook): The cookbook object containing the recipe IDs.
174
+ tags (str): The tags to check for in the cookbook.
175
+ cookbook (Cookbook): The cookbook object.
170
176
 
171
177
  Returns:
172
- bool: True if any recipe in the cookbook has the specified tags, False otherwise.
178
+ bool: True if the cookbook has the specified tags, False otherwise.
173
179
  """
174
- recipe_ids = cookbook.recipes
175
- recipes = moonshot_api.api_read_recipes(recipe_ids)
176
- for recipe in recipes:
177
- recipe = Recipe(**recipe)
178
- if tags in recipe.tags:
179
- return True
180
- return False
180
+ tags_list = [tag.lower() for tag in tags.split(",")]
181
+ return any(tag in [ctag.lower() for ctag in cookbook.tags] for tag in tags_list)
181
182
 
182
183
 
183
184
  @staticmethod
184
- def cookbooks_recipe_has_categories(categories: str, cookbook: Cookbook) -> bool:
185
+ def cookbook_has_categories(categories: str, cookbook: Cookbook) -> bool:
185
186
  """
186
- Check if any recipe in a cookbook has the specified categories.
187
+ Check if a cookbook has the specified categories.
187
188
 
188
189
  Args:
189
- categories (str): The categories to check for in the cookbook's recipes.
190
- cookbook (Cookbook): The cookbook object containing the recipe IDs.
191
- exclude_categories (str): The categories to exclude
190
+ categories (str): The categories to check for in the cookbook.
191
+ cookbook (Cookbook): The cookbook object.
192
192
 
193
193
  Returns:
194
- bool: True if any recipe in the cookbook has the specified categories, False otherwise.
194
+ bool: True if the cookbook has the specified categories, False otherwise.
195
195
  """
196
- recipe_ids = cookbook.recipes
197
196
  categories_list = [category.lower() for category in categories.split(",")]
198
- recipes = moonshot_api.api_read_recipes(recipe_ids)
199
- for recipe in recipes:
200
- recipe = Recipe(**recipe)
201
- if any(
202
- category in [rcat.lower() for rcat in recipe.categories]
203
- for category in categories_list
204
- ):
205
- return True
206
- return False
197
+ return any(
198
+ category in [ccat.lower() for ccat in cookbook.categories]
199
+ for category in categories_list
200
+ )
201
+
202
+
207
203
 
208
204
 
209
205
  @staticmethod
@@ -85,7 +85,7 @@ class ResultMetadata(TypedDict):
85
85
  recipes: List[str]
86
86
  cookbooks: List[str]
87
87
  endpoints: List[str]
88
- num_of_prompts: int
88
+ prompt_selection_percentage: int
89
89
  status: str
90
90
 
91
91