aiverify-moonshot 0.5.1__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/integration-test.yaml +51 -12
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/pypi-deployment.yaml +1 -1
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/smoke-test-cli.yaml +1 -1
- aiverify_moonshot-0.5.1/.github/workflows/uat-deploy.yaml → aiverify_moonshot-0.6.1/.github/workflows/uat-build.yaml +19 -4
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/PKG-INFO +2 -2
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/README.md +1 -1
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/cookbook.py +39 -12
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/recipe.py +23 -8
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/result.py +4 -2
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/cli_errors.py +10 -4
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/app.py +1 -1
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +2 -2
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/cookbook_create_dto.py +4 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/dataset_create_dto.py +3 -4
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_test_manager.py +2 -2
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/cookbook_service.py +30 -34
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/types/types.py +1 -1
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_cookbook.py +20 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/cookbook.py +21 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/cookbook_arguments.py +6 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner.py +18 -28
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/pyproject.toml +3 -2
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.coveragerc +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.flake8 +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/developing-workflows.md +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/pull_request_template.md +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/create_backup.sh +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/install_ms_service.sh +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_env +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_test_env +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/moonshot_ui_env +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/run_smoke_test.sh +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/scripts/start_ms_service.sh +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/docs-update.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/gh-event-notification.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/notices-file-gen.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/pre-build-checks.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sast-codeql.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sca-scan.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/sit-build.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/smoke-test.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/test-pypi-deployment.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.gitignore +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.pre-commit-config.yaml +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/AUTHORS.md +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/LICENSE.md +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/NOTICES.md +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/__main__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/api.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/__main__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/active_session_cfg.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/benchmark.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/datasets.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/metrics.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/run.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/runner.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/cli.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/common.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/connectors.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/dataset.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/display_helper.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/common/prompt_template.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/initialisation/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/initialisation/initialisation.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/attack_module.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/context_strategy.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/prompt_template.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/redteam.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/redteam/session.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/utils/process_data.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/.env.dev +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/__main__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/container.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/log/.gitkeep +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/logging_conf.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/attack_modules.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/benchmark.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/benchmark_result.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/bookmark.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/context_strategy.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/cookbook.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/dataset.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/endpoint.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/metric.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/prompt_template.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/recipe.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/redteam.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/routes/runner.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/bookmark_create_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/cookbook_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/dataset_response_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/endpoint_create_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/endpoint_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/prompt_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/prompt_template_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/recipe_create_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/recipe_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_create_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_prompt_dto.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/session_response_model.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/attack_module_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/auto_red_team_test_manager.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/auto_red_team_test_state.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/base_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_result_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_test_state.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmarking_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/bookmark_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/context_strategy_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/dataset_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/endpoint_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/metric_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/prompt_template_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/recipe_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/runner_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/session_service.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/exceptions_handler.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/file_manager.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/utils/results_formatter.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/temp/.gitkeep +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_bookmark.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_connector.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_connector_endpoint.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_context_strategy.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_dataset.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_environment_variables.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_metrics.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_prompt_template.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_recipe.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_red_teaming.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_result.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_run.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_runner.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/api/api_session.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/bookmark/bookmark.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/bookmark/bookmark_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/configs/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/configs/env_variables.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector_prompt_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors/connector_response.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/connector_endpoint.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/cookbooks/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/dataset.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/datasets/dataset_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/messages_constants.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/metric.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/metrics/metric_interface.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/prompt_templates/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/prompt_templates/prompt_template.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/recipe.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/recipes/recipe_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/attack_module.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/attack_module_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/attack/context_strategy.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/context_strategy/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/context_strategy/context_strategy_interface.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/chat.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/red_teaming_progress.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/red_teaming_type.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/redteaming/session/session.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/result.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/results/result_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runners/runner_type.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_arguments.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_progress.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/runs/run_status.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/db_interface.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/io_interface.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/storage/storage.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/__init__.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/find_feature.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/import_modules.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/log.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/pagination.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/src/utils/timeit.py +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/requirements.txt +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/third-party/pygments-2.18.0-py3-none-any.whl +0 -0
- {aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/third-party/text_unidecode-1.3-py2.py3-none-any.whl +0 -0
|
@@ -36,7 +36,7 @@ jobs:
|
|
|
36
36
|
integration-test:
|
|
37
37
|
|
|
38
38
|
runs-on: ubuntu-latest
|
|
39
|
-
timeout-minutes:
|
|
39
|
+
timeout-minutes: 300
|
|
40
40
|
|
|
41
41
|
steps:
|
|
42
42
|
|
|
@@ -53,7 +53,7 @@ jobs:
|
|
|
53
53
|
with:
|
|
54
54
|
repository: aiverify-foundation/moonshot
|
|
55
55
|
ref: ${{ inputs.moonshot_branch }}
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
- name: Setup Python 3.11
|
|
58
58
|
uses: actions/setup-python@v4
|
|
59
59
|
with:
|
|
@@ -110,24 +110,24 @@ jobs:
|
|
|
110
110
|
run: |
|
|
111
111
|
source venv/bin/activate
|
|
112
112
|
pip install nltk
|
|
113
|
-
python -c "import nltk; nltk.download('stopwords');nltk.download('
|
|
113
|
+
python -c "import nltk; nltk.download('stopwords');nltk.download('punkt');nltk.download('punkt_tab');nltk.download('averaged_perceptron_tagger_eng')"
|
|
114
114
|
|
|
115
115
|
- name: Setup Moonshot UI
|
|
116
116
|
run: |
|
|
117
|
+
source venv/bin/activate
|
|
117
118
|
cd moonshot-ui
|
|
118
|
-
npm
|
|
119
|
+
npm install
|
|
119
120
|
npm run build
|
|
120
121
|
cd ../
|
|
121
|
-
|
|
122
|
-
python -m moonshot web &
|
|
122
|
+
nohup python -m moonshot web &
|
|
123
123
|
|
|
124
124
|
- name: Checkout Integration Test
|
|
125
125
|
uses: actions/checkout@v4
|
|
126
126
|
with:
|
|
127
127
|
repository: aiverify-foundation/moonshot-integration-testing
|
|
128
128
|
path: moonshot-integration-testing
|
|
129
|
-
|
|
130
|
-
- name: Run Integration Test
|
|
129
|
+
|
|
130
|
+
- name: Run Integration UI Test
|
|
131
131
|
env:
|
|
132
132
|
URI: ${{ secrets.URI }}
|
|
133
133
|
TOKEN: ${{ secrets.TOKEN }}
|
|
@@ -139,13 +139,52 @@ jobs:
|
|
|
139
139
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
140
140
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
141
141
|
GOOGLE_TOKEN: ${{ secrets.GOOGLE_TOKEN }}
|
|
142
|
+
id: integrationuitest
|
|
142
143
|
run: |
|
|
144
|
+
source venv/bin/activate
|
|
143
145
|
cd moonshot-integration-testing/ui-integration-testing
|
|
144
146
|
npm ci
|
|
145
|
-
|
|
146
|
-
npx playwright install
|
|
147
|
-
|
|
148
|
-
|
|
147
|
+
npx playwright install
|
|
148
|
+
npx playwright install-deps
|
|
149
|
+
npm install dotenv --save
|
|
150
|
+
echo "Running Home Page Test Cases"
|
|
151
|
+
URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/homepage.spec.ts
|
|
152
|
+
echo "Running Endpoint Test Cases"
|
|
153
|
+
URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/endpoint.spec.ts
|
|
154
|
+
echo "Running Red Teaming Test Cases"
|
|
155
|
+
URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/red_teaming.spec.ts
|
|
156
|
+
echo "Running Benchmarking Test Cases"
|
|
157
|
+
URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/benchmarking.spec.ts
|
|
158
|
+
|
|
159
|
+
- name: Print Environment Variables
|
|
160
|
+
run: env
|
|
161
|
+
|
|
162
|
+
- name: Run Integration CLI Test
|
|
163
|
+
env:
|
|
164
|
+
AZURE_OPENAI_URI: ${{ secrets.AZURE_OPENAI_URI }}
|
|
165
|
+
AZURE_OPENAI_TOKEN: ${{ secrets.AZURE_OPENAI_TOKEN }}
|
|
166
|
+
ADDITIONAL_PARAMETERS: ${{ secrets.ADDITIONAL_PARAMETERS }}
|
|
167
|
+
MOONSHOT_URL: ${{ secrets.MOONSHOT_URL }}
|
|
168
|
+
MOONSHOT_PORT_NUMBER: ${{ secrets.MOONSHOT_PORT_NUMBER }}
|
|
169
|
+
CLI_DIR: ${{ secrets.CLI_DIR }}
|
|
170
|
+
ACTIONS_STEP_DEBUG: true
|
|
171
|
+
ACTIONS_RUNNER_DEBUG: true
|
|
172
|
+
run: |
|
|
173
|
+
source venv/bin/activate
|
|
174
|
+
cd moonshot-integration-testing/cli-integration-testing
|
|
175
|
+
echo "Current Directory: $(pwd)"
|
|
176
|
+
pip install python-dotenv
|
|
177
|
+
pip install pytest
|
|
178
|
+
pytest
|
|
179
|
+
|
|
180
|
+
- name: Upload Playwright Traces
|
|
181
|
+
if: always()
|
|
182
|
+
uses: actions/upload-artifact@v4
|
|
183
|
+
with:
|
|
184
|
+
name: playwright-trace
|
|
185
|
+
path: |
|
|
186
|
+
/home/runner/work/moonshot/moonshot/moonshot-integration-testing/ui-integration-testing/test-results
|
|
187
|
+
|
|
149
188
|
- name: TestRail CLI upload results
|
|
150
189
|
env:
|
|
151
190
|
TESTRAIL_USERNAME: ${{ secrets.TESTRAIL_USERNAME }}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
name: UAT
|
|
1
|
+
name: UAT Build
|
|
2
2
|
|
|
3
|
+
# Trigger when the PR to merge to main is merged
|
|
3
4
|
on:
|
|
4
5
|
pull_request:
|
|
5
6
|
branches:
|
|
@@ -30,13 +31,17 @@ jobs:
|
|
|
30
31
|
|
|
31
32
|
- name: Bump version
|
|
32
33
|
run: |
|
|
33
|
-
echo "
|
|
34
|
+
echo "Bump version..."
|
|
34
35
|
pip install bump2version
|
|
35
36
|
bump2version patch
|
|
36
37
|
|
|
38
|
+
- name: Generate notices file
|
|
39
|
+
run: |
|
|
40
|
+
echo "Generate notice file..."
|
|
41
|
+
|
|
37
42
|
- name: Package test PyPI
|
|
38
43
|
run: |
|
|
39
|
-
echo "
|
|
44
|
+
echo "Package test PyPI..."
|
|
40
45
|
pip install build
|
|
41
46
|
python3 -m build
|
|
42
47
|
|
|
@@ -70,7 +75,17 @@ jobs:
|
|
|
70
75
|
with:
|
|
71
76
|
repository-url: https://test.pypi.org/legacy/
|
|
72
77
|
|
|
73
|
-
|
|
78
|
+
# Deploy moonshot to UAT by installing moonshot package from test pypi
|
|
79
|
+
# deploy-to-uat:
|
|
80
|
+
# needs:
|
|
81
|
+
# - publish-to-testpypi
|
|
82
|
+
# runs-on: ubuntu-latest
|
|
83
|
+
|
|
84
|
+
# Run integration test
|
|
85
|
+
# integration-test:
|
|
86
|
+
# needs:
|
|
87
|
+
# - publish-to-testpypi:
|
|
88
|
+
# runs-on: ubuntu-latest
|
|
74
89
|
|
|
75
90
|
|
|
76
91
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiverify-moonshot
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: AI Verify advances Gen AI testing with Project Moonshot.
|
|
5
5
|
Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
|
|
6
6
|
Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
|
|
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|

|
|
49
49
|
|
|
50
|
-
**Version 0.
|
|
50
|
+
**Version 0.6.1**
|
|
51
51
|
|
|
52
52
|
A simple and modular tool to evaluate any LLM application.
|
|
53
53
|
|
{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/cookbook.py
RENAMED
|
@@ -37,7 +37,8 @@ from moonshot.integrations.cli.cli_errors import (
|
|
|
37
37
|
ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1,
|
|
38
38
|
ERROR_BENCHMARK_RUN_COOKBOOK_NAME_VALIDATION,
|
|
39
39
|
ERROR_BENCHMARK_RUN_COOKBOOK_NO_RESULT,
|
|
40
|
-
|
|
40
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
|
|
41
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
|
|
41
42
|
ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION,
|
|
42
43
|
ERROR_BENCHMARK_RUN_COOKBOOK_RESULT_PROC_MOD_VALIDATION,
|
|
43
44
|
ERROR_BENCHMARK_RUN_COOKBOOK_RUNNER_PROC_MOD_VALIDATION,
|
|
@@ -212,11 +213,12 @@ def run_cookbook(args) -> None:
|
|
|
212
213
|
The cookbooks are run against the specified endpoints, and the results are processed and displayed.
|
|
213
214
|
|
|
214
215
|
Args:
|
|
215
|
-
args
|
|
216
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
217
|
+
Expected keys are:
|
|
216
218
|
name (str): The name of the cookbook runner.
|
|
217
219
|
cookbooks (str): A string representation of a list of cookbooks to run.
|
|
218
220
|
endpoints (str): A string representation of a list of endpoints to run.
|
|
219
|
-
|
|
221
|
+
prompt_selection_percentage (int): The percentage of prompts to run.
|
|
220
222
|
random_seed (int): The random seed number for reproducibility.
|
|
221
223
|
system_prompt (str): The system prompt to use.
|
|
222
224
|
runner_proc_module (str): The runner processing module to use.
|
|
@@ -248,10 +250,19 @@ def run_cookbook(args) -> None:
|
|
|
248
250
|
):
|
|
249
251
|
raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION)
|
|
250
252
|
|
|
251
|
-
if isinstance(args.
|
|
252
|
-
args.
|
|
253
|
+
if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
|
|
254
|
+
args.prompt_selection_percentage, int
|
|
255
|
+
):
|
|
256
|
+
raise TypeError(
|
|
257
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION
|
|
258
|
+
)
|
|
259
|
+
elif (
|
|
260
|
+
args.prompt_selection_percentage < 1
|
|
261
|
+
or args.prompt_selection_percentage > 100
|
|
253
262
|
):
|
|
254
|
-
raise
|
|
263
|
+
raise ValueError(
|
|
264
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
|
|
265
|
+
)
|
|
255
266
|
|
|
256
267
|
if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
|
|
257
268
|
raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION)
|
|
@@ -297,7 +308,7 @@ def run_cookbook(args) -> None:
|
|
|
297
308
|
async def run():
|
|
298
309
|
await cb_runner.run_cookbooks(
|
|
299
310
|
cookbooks,
|
|
300
|
-
args.
|
|
311
|
+
args.prompt_selection_percentage,
|
|
301
312
|
args.random_seed,
|
|
302
313
|
args.system_prompt,
|
|
303
314
|
args.runner_proc_module,
|
|
@@ -436,9 +447,20 @@ def _display_cookbooks(cookbooks_list):
|
|
|
436
447
|
table.add_column("Cookbook", justify="left", width=78)
|
|
437
448
|
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
438
449
|
for idx, cookbook in enumerate(cookbooks_list, 1):
|
|
439
|
-
|
|
450
|
+
(
|
|
451
|
+
id,
|
|
452
|
+
name,
|
|
453
|
+
tags,
|
|
454
|
+
categories,
|
|
455
|
+
description,
|
|
456
|
+
recipes,
|
|
457
|
+
*other_args,
|
|
458
|
+
) = cookbook.values()
|
|
440
459
|
idx = cookbook.get("idx", idx)
|
|
441
|
-
cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
|
|
460
|
+
cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n\n{description}"
|
|
461
|
+
cookbook_info += (
|
|
462
|
+
f"\n\n[blue]Tags: {tags}[/blue]\n[blue]Categories: {categories}[/blue]\n"
|
|
463
|
+
)
|
|
442
464
|
recipes_info = display_view_list_format("Recipes", recipes)
|
|
443
465
|
table.add_section()
|
|
444
466
|
table.add_row(str(idx), cookbook_info, recipes_info)
|
|
@@ -459,11 +481,11 @@ def _display_view_cookbook(cookbook_info):
|
|
|
459
481
|
Returns:
|
|
460
482
|
None
|
|
461
483
|
"""
|
|
462
|
-
id, name, description, recipes = cookbook_info.values()
|
|
484
|
+
id, name, tags, categories, description, recipes = cookbook_info.values()
|
|
463
485
|
recipes_list = api_read_recipes(recipes)
|
|
464
486
|
if recipes_list:
|
|
465
487
|
table = Table(
|
|
466
|
-
title=f'Cookbook "{name}"',
|
|
488
|
+
title=f'Cookbook: "{name}"\n Tags: {tags}\n Categories: {categories}\n',
|
|
467
489
|
show_lines=True,
|
|
468
490
|
expand=True,
|
|
469
491
|
header_style="bold",
|
|
@@ -471,6 +493,7 @@ def _display_view_cookbook(cookbook_info):
|
|
|
471
493
|
table.add_column("No.", width=2)
|
|
472
494
|
table.add_column("Recipe", justify="left", width=78)
|
|
473
495
|
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
496
|
+
|
|
474
497
|
for recipe_id, recipe in enumerate(recipes_list, 1):
|
|
475
498
|
(
|
|
476
499
|
id,
|
|
@@ -718,7 +741,11 @@ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
|
|
|
718
741
|
run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
|
|
719
742
|
run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
|
|
720
743
|
run_cookbook_args.add_argument(
|
|
721
|
-
"-n",
|
|
744
|
+
"-n",
|
|
745
|
+
"--prompt_selection_percentage",
|
|
746
|
+
type=int,
|
|
747
|
+
default=100,
|
|
748
|
+
help="Percentage of prompts to run",
|
|
722
749
|
)
|
|
723
750
|
run_cookbook_args.add_argument(
|
|
724
751
|
"-r", "--random_seed", type=int, default=0, help="Random seed number"
|
{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/recipe.py
RENAMED
|
@@ -40,7 +40,8 @@ from moonshot.integrations.cli.cli_errors import (
|
|
|
40
40
|
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
|
|
41
41
|
ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
|
|
42
42
|
ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
|
|
43
|
-
|
|
43
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
|
|
44
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
|
|
44
45
|
ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
|
|
45
46
|
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
|
|
46
47
|
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
|
|
@@ -293,11 +294,12 @@ def run_recipe(args) -> None:
|
|
|
293
294
|
The recipes are run against the specified endpoints, and the results are processed and displayed.
|
|
294
295
|
|
|
295
296
|
Args:
|
|
296
|
-
args
|
|
297
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
298
|
+
Expected keys are:
|
|
297
299
|
name (str): The name of the recipe runner.
|
|
298
300
|
recipes (str): A string representation of a list of recipes to run.
|
|
299
301
|
endpoints (str): A string representation of a list of endpoints to run.
|
|
300
|
-
|
|
302
|
+
prompt_selection_percentage (int): The percentage of prompts to run.
|
|
301
303
|
random_seed (int): The random seed number for reproducibility.
|
|
302
304
|
system_prompt (str): The system prompt to use.
|
|
303
305
|
runner_proc_module (str): The runner processing module to use.
|
|
@@ -329,10 +331,19 @@ def run_recipe(args) -> None:
|
|
|
329
331
|
):
|
|
330
332
|
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
|
|
331
333
|
|
|
332
|
-
if isinstance(args.
|
|
333
|
-
args.
|
|
334
|
+
if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
|
|
335
|
+
args.prompt_selection_percentage, int
|
|
336
|
+
):
|
|
337
|
+
raise TypeError(
|
|
338
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION
|
|
339
|
+
)
|
|
340
|
+
elif (
|
|
341
|
+
args.prompt_selection_percentage < 1
|
|
342
|
+
or args.prompt_selection_percentage > 100
|
|
334
343
|
):
|
|
335
|
-
raise
|
|
344
|
+
raise ValueError(
|
|
345
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
|
|
346
|
+
)
|
|
336
347
|
|
|
337
348
|
if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
|
|
338
349
|
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
|
|
@@ -377,7 +388,7 @@ def run_recipe(args) -> None:
|
|
|
377
388
|
async def run():
|
|
378
389
|
await rec_runner.run_recipes(
|
|
379
390
|
recipes,
|
|
380
|
-
args.
|
|
391
|
+
args.prompt_selection_percentage,
|
|
381
392
|
args.random_seed,
|
|
382
393
|
args.system_prompt,
|
|
383
394
|
args.runner_proc_module,
|
|
@@ -809,7 +820,11 @@ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
|
|
|
809
820
|
run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
|
|
810
821
|
run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
|
|
811
822
|
run_recipe_args.add_argument(
|
|
812
|
-
"-n",
|
|
823
|
+
"-n",
|
|
824
|
+
"--prompt_selection_percentage",
|
|
825
|
+
type=int,
|
|
826
|
+
default=100,
|
|
827
|
+
help="Percentage of prompts to run",
|
|
813
828
|
)
|
|
814
829
|
run_recipe_args.add_argument(
|
|
815
830
|
"-r", "--random_seed", type=int, default=0, help="Random seed number"
|
{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/result.py
RENAMED
|
@@ -190,7 +190,7 @@ def _display_results(results_list):
|
|
|
190
190
|
recipes = metadata["recipes"]
|
|
191
191
|
cookbooks = metadata["cookbooks"]
|
|
192
192
|
endpoints = metadata["endpoints"]
|
|
193
|
-
|
|
193
|
+
prompt_selection_percentage = metadata["prompt_selection_percentage"]
|
|
194
194
|
random_seed = metadata["random_seed"]
|
|
195
195
|
system_prompt = metadata["system_prompt"]
|
|
196
196
|
idx = result.get("idx", idx)
|
|
@@ -200,7 +200,9 @@ def _display_results(results_list):
|
|
|
200
200
|
recipes_info = display_view_list_format("Recipes", recipes)
|
|
201
201
|
cookbooks_info = display_view_list_format("Cookbooks", cookbooks)
|
|
202
202
|
endpoints_info = display_view_list_format("Endpoints", endpoints)
|
|
203
|
-
prompts_info = display_view_str_format(
|
|
203
|
+
prompts_info = display_view_str_format(
|
|
204
|
+
"Prompt Selection Percentage", prompt_selection_percentage
|
|
205
|
+
)
|
|
204
206
|
seed_info = display_view_str_format("Seed", random_seed)
|
|
205
207
|
system_prompt_info = display_view_str_format("System Prompt", system_prompt)
|
|
206
208
|
|
|
@@ -52,8 +52,11 @@ ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION = (
|
|
|
52
52
|
ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1 = (
|
|
53
53
|
"The 'endpoints' argument must evaluate to a list of strings."
|
|
54
54
|
)
|
|
55
|
-
|
|
56
|
-
"The '
|
|
55
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
|
|
56
|
+
"The 'prompt_selection_percentage' argument must be an integer."
|
|
57
|
+
)
|
|
58
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
|
|
59
|
+
"The 'prompt_selection_percentage' argument must be between 1 - 100."
|
|
57
60
|
)
|
|
58
61
|
ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION = (
|
|
59
62
|
"The 'random_seed' argument must be an integer."
|
|
@@ -278,8 +281,11 @@ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION = (
|
|
|
278
281
|
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1 = (
|
|
279
282
|
"The 'endpoints' argument must evaluate to a list of strings."
|
|
280
283
|
)
|
|
281
|
-
|
|
282
|
-
"The '
|
|
284
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
|
|
285
|
+
"The 'prompt_selection_percentage' argument must be an integer."
|
|
286
|
+
)
|
|
287
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
|
|
288
|
+
"The 'prompt_selection_percentage' argument must be between 1 - 100."
|
|
283
289
|
)
|
|
284
290
|
ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION = (
|
|
285
291
|
"The 'random_seed' argument must be an integer."
|
|
@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
app: CustomFastAPI = CustomFastAPI(
|
|
74
|
-
title="Project Moonshot", version="0.
|
|
74
|
+
title="Project Moonshot", version="0.6.1", **app_kwargs
|
|
75
75
|
)
|
|
76
76
|
|
|
77
77
|
if cfg.cors.enabled():
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel, ConfigDict
|
|
1
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class BenchmarkRunnerDTO(BaseModel):
|
|
@@ -7,7 +7,7 @@ class BenchmarkRunnerDTO(BaseModel):
|
|
|
7
7
|
description: str
|
|
8
8
|
endpoints: list[str]
|
|
9
9
|
inputs: list[str]
|
|
10
|
-
|
|
10
|
+
prompt_selection_percentage: int = Field(..., ge=1, le=100)
|
|
11
11
|
random_seed: int
|
|
12
12
|
system_prompt: str
|
|
13
13
|
runner_processing_module: str
|
|
@@ -9,6 +9,8 @@ class CookbookCreateDTO(CookbookPydanticModel):
|
|
|
9
9
|
id: Optional[str] = None
|
|
10
10
|
name: str = Field(..., min_length=1)
|
|
11
11
|
description: Optional[str] = Field(default="", min_length=1)
|
|
12
|
+
tags: Optional[list[str]] = []
|
|
13
|
+
categories: Optional[list[str]] = []
|
|
12
14
|
recipes: list[str] = Field(..., min_length=1)
|
|
13
15
|
|
|
14
16
|
|
|
@@ -16,4 +18,6 @@ class CookbookUpdateDTO(CookbookPydanticModel):
|
|
|
16
18
|
id: Optional[str] = None
|
|
17
19
|
name: Optional[str] = Field(default=None, min_length=1)
|
|
18
20
|
description: Optional[str] = Field(default=None, min_length=1)
|
|
21
|
+
tags: Optional[list[str]] = None
|
|
22
|
+
categories: Optional[list[str]] = None
|
|
19
23
|
recipes: Optional[list[str]] = Field(default=None, min_length=1)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import Field
|
|
4
|
-
from pyparsing import Iterator
|
|
5
4
|
|
|
6
5
|
from moonshot.src.datasets.dataset_arguments import (
|
|
7
6
|
DatasetArguments as DatasetPydanticModel,
|
|
@@ -10,7 +9,7 @@ from moonshot.src.datasets.dataset_arguments import (
|
|
|
10
9
|
|
|
11
10
|
class CSV_Dataset_DTO(DatasetPydanticModel):
|
|
12
11
|
id: Optional[str] = None # Not a required from user
|
|
13
|
-
examples: Optional[
|
|
12
|
+
examples: Optional[Any] = None # Not a required from user
|
|
14
13
|
name: str = Field(..., min_length=1)
|
|
15
14
|
description: str = Field(default="", min_length=1)
|
|
16
15
|
license: Optional[str] = ""
|
|
@@ -20,7 +19,7 @@ class CSV_Dataset_DTO(DatasetPydanticModel):
|
|
|
20
19
|
|
|
21
20
|
class HF_Dataset_DTO(DatasetPydanticModel):
|
|
22
21
|
id: Optional[str] = None # Not a required from user
|
|
23
|
-
examples: Optional[
|
|
22
|
+
examples: Optional[Any] = None # Not a required from user
|
|
24
23
|
name: str = Field(..., min_length=1)
|
|
25
24
|
description: str = Field(default="", min_length=1)
|
|
26
25
|
license: Optional[str] = ""
|
|
@@ -60,14 +60,14 @@ class BenchmarkTestManager(BaseService):
|
|
|
60
60
|
if benchmark_type == BenchmarkCollectionType.COOKBOOK:
|
|
61
61
|
async_run = moonshot_runner.run_cookbooks(
|
|
62
62
|
cookbooks=benchmark_input_data.inputs,
|
|
63
|
-
|
|
63
|
+
prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
|
|
64
64
|
random_seed=benchmark_input_data.random_seed,
|
|
65
65
|
system_prompt=benchmark_input_data.system_prompt,
|
|
66
66
|
)
|
|
67
67
|
else:
|
|
68
68
|
async_run = moonshot_runner.run_recipes(
|
|
69
69
|
recipes=benchmark_input_data.inputs,
|
|
70
|
-
|
|
70
|
+
prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
|
|
71
71
|
random_seed=benchmark_input_data.random_seed,
|
|
72
72
|
system_prompt=benchmark_input_data.system_prompt,
|
|
73
73
|
)
|
|
@@ -71,7 +71,7 @@ class CookbookService(BaseService):
|
|
|
71
71
|
cookbook.total_dataset_in_cookbook,
|
|
72
72
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
73
73
|
|
|
74
|
-
if tags and
|
|
74
|
+
if tags and cookbook_has_tags(tags, cookbook):
|
|
75
75
|
if cookbook not in cookbooks_list:
|
|
76
76
|
cookbooks_list.append(cookbook)
|
|
77
77
|
if count:
|
|
@@ -80,7 +80,7 @@ class CookbookService(BaseService):
|
|
|
80
80
|
cookbook.total_dataset_in_cookbook,
|
|
81
81
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
82
82
|
|
|
83
|
-
if categories and
|
|
83
|
+
if categories and cookbook_has_categories(categories, cookbook):
|
|
84
84
|
if cookbook not in cookbooks_list:
|
|
85
85
|
cookbooks_list.append(cookbook)
|
|
86
86
|
if count:
|
|
@@ -89,10 +89,16 @@ class CookbookService(BaseService):
|
|
|
89
89
|
cookbook.total_dataset_in_cookbook,
|
|
90
90
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
91
91
|
|
|
92
|
-
if categories_excluded
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
if categories_excluded:
|
|
93
|
+
excluded_categories_set = set(
|
|
94
|
+
category.lower() for category in categories_excluded.split(",")
|
|
95
|
+
)
|
|
96
|
+
cookbook_categories_set = set(
|
|
97
|
+
category.lower() for category in cookbook.categories
|
|
98
|
+
)
|
|
99
|
+
# Exclude only if all categories in the cookbook are in the excluded list
|
|
100
|
+
if cookbook_categories_set.issubset(excluded_categories_set):
|
|
101
|
+
cookbooks_list.remove(cookbook)
|
|
96
102
|
|
|
97
103
|
for cookbook in cookbooks_list:
|
|
98
104
|
cookbook.required_config = cookbook_metrics_dependency(cookbook)
|
|
@@ -160,50 +166,40 @@ def get_total_prompt_and_dataset_in_cookbook(cookbook: Cookbook) -> tuple[int, i
|
|
|
160
166
|
|
|
161
167
|
|
|
162
168
|
@staticmethod
|
|
163
|
-
def
|
|
169
|
+
def cookbook_has_tags(tags: str, cookbook: Cookbook) -> bool:
|
|
164
170
|
"""
|
|
165
|
-
Check if
|
|
171
|
+
Check if a cookbook has the specified tags.
|
|
166
172
|
|
|
167
173
|
Args:
|
|
168
|
-
tags (str): The tags to check for in the cookbook
|
|
169
|
-
cookbook (Cookbook): The cookbook object
|
|
174
|
+
tags (str): The tags to check for in the cookbook.
|
|
175
|
+
cookbook (Cookbook): The cookbook object.
|
|
170
176
|
|
|
171
177
|
Returns:
|
|
172
|
-
bool: True if
|
|
178
|
+
bool: True if the cookbook has the specified tags, False otherwise.
|
|
173
179
|
"""
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
for recipe in recipes:
|
|
177
|
-
recipe = Recipe(**recipe)
|
|
178
|
-
if tags in recipe.tags:
|
|
179
|
-
return True
|
|
180
|
-
return False
|
|
180
|
+
tags_list = [tag.lower() for tag in tags.split(",")]
|
|
181
|
+
return any(tag in [ctag.lower() for ctag in cookbook.tags] for tag in tags_list)
|
|
181
182
|
|
|
182
183
|
|
|
183
184
|
@staticmethod
|
|
184
|
-
def
|
|
185
|
+
def cookbook_has_categories(categories: str, cookbook: Cookbook) -> bool:
|
|
185
186
|
"""
|
|
186
|
-
Check if
|
|
187
|
+
Check if a cookbook has the specified categories.
|
|
187
188
|
|
|
188
189
|
Args:
|
|
189
|
-
categories (str): The categories to check for in the cookbook
|
|
190
|
-
cookbook (Cookbook): The cookbook object
|
|
191
|
-
exclude_categories (str): The categories to exclude
|
|
190
|
+
categories (str): The categories to check for in the cookbook.
|
|
191
|
+
cookbook (Cookbook): The cookbook object.
|
|
192
192
|
|
|
193
193
|
Returns:
|
|
194
|
-
bool: True if
|
|
194
|
+
bool: True if the cookbook has the specified categories, False otherwise.
|
|
195
195
|
"""
|
|
196
|
-
recipe_ids = cookbook.recipes
|
|
197
196
|
categories_list = [category.lower() for category in categories.split(",")]
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
):
|
|
205
|
-
return True
|
|
206
|
-
return False
|
|
197
|
+
return any(
|
|
198
|
+
category in [ccat.lower() for ccat in cookbook.categories]
|
|
199
|
+
for category in categories_list
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
@staticmethod
|