agentops-accelerator 0.4.4__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/CHANGELOG.md +54 -0
- {agentops_accelerator-0.4.4/src/agentops_accelerator.egg-info → agentops_accelerator-0.5.0}/PKG-INFO +1 -1
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/bundles.md +2 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/how-it-works.md +30 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-governance/SKILL.md +109 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/cli/app.py +107 -11
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/agentops_config.py +21 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/invocations.py +7 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/orchestrator.py +3 -1
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/publisher.py +10 -4
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/runtime.py +34 -2
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/assert_runner.py +149 -18
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/azd_eval_init.py +368 -35
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/cicd.py +86 -9
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/redteam_runner.py +83 -7
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/setup_wizard.py +163 -9
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-governance/SKILL.md +109 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/yaml.py +1 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0/src/agentops_accelerator.egg-info}/PKG-INFO +1 -1
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops_accelerator.egg-info/SOURCES.txt +2 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_assert_and_redteam_runners.py +44 -3
- agentops_accelerator-0.5.0/tests/unit/test_azd_eval_init.py +717 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cicd.py +41 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_http_streaming.py +74 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_init_command.py +3 -3
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_pipeline_publisher.py +12 -9
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_runtime_model_config.py +32 -0
- agentops_accelerator-0.5.0/tests/unit/test_runtime_response_fields.py +79 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_setup_wizard.py +241 -3
- agentops_accelerator-0.5.0/tests/unit/test_yaml_utils.py +13 -0
- agentops_accelerator-0.4.4/tests/unit/test_azd_eval_init.py +0 -361
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.claude-plugin/marketplace.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.gitattributes +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/actions/azure-oidc-login/action.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/code-quality-py.instructions.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/copilot-instructions.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/dependabot.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/extensions/agentops-skills/extension.mjs +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/plugin/marketplace.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/skills/release-management/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/_build.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/ci.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/cut-release.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/e2e.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/release.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.github/workflows/staging.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.gitignore +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.pre-commit-config.yaml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.vscode/launch.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.vscode/settings.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/.vscode/tasks.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/AGENTS.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/CONTRIBUTING.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/LICENSE +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/README.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/SECURITY.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/ci-github-actions.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/concepts.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/doctor-checks.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/doctor-explained.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/e2e-live-architecture.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/e2e-live-setup.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/foundry-evaluation-sdk-built-in-evaluators.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/media/agentops-diagrams.vsdx +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/media/foundry-control-plane.png +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/release-process.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/tutorial-end-to-end.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/tutorial-hosted-agent-quickstart.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/docs/tutorial-prompt-agent.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/examples/flat-quickstart/README.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/examples/flat-quickstart/agentops.yaml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/examples/flat-quickstart/dataset.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/icon.png +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/agent-app/Dockerfile +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/agent-app/app.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/agent-app/requirements.txt +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/bootstrap.bicep +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/bootstrap.parameters.example.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/infra/e2e/perrun.bicep +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/launch.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/media/foundry.svg +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/media/quickstart.gif +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/.vscodeignore +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/LICENSE +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/README.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/package.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/plugin.json +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-agent/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-config/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-dataset/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-eval/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-report/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/plugins/agentops/skills/agentops-workflow/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/pyproject.toml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/create_support_agent.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/cut-release.ps1 +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/cut-release.sh +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_aggregate_summary.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_data/basic.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_data/rag.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_data/tools.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_demo.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_hosted_agent.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_make_transcript.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/e2e_render_config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/release.ps1 +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/release.sh +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/setup-e2e-new-tenant.ps1 +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/staging.ps1 +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/staging.sh +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/sync-skills.ps1 +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/scripts/sync-skills.sh +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/setup.cfg +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/__main__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/_legacy_ids.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/analyzer.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/_rbac_authorization.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/catalog.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/errors.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/foundry_config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/governance.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/latency.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/observability.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/opex.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/opex_workspace.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/content_filter.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/diagnostics.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/local_auth.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/managed_identity.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/posture_rules/network.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/rbac_openai_data_plane.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/regression.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/release_readiness.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/safety.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/checks/spec_conformance.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/cockpit.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/findings.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/history.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/knowledge/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/knowledge/waf-checklist.csv +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_base.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_bundle_rule.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_client.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_dataset_rules.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_engine.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_prompt_rules.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/llm_assist/_spec_rules.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/production_telemetry.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/report.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/server/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/server/app.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/server/auth.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/server/chat.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/server/protocol.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/_credentials.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/azure_monitor.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/azure_resources.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/foundry_control.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/results_history.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/spec_detectors/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/spec_detectors/_base.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/spec_detectors/agents_md.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/sources/spec_detectors/spec_kit.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/agent/time_range.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/cli/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/azd_eval.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/config_loader.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/evaluators.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/governance.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/release_evidence.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/results.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/mcp/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/mcp/server.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/azd_runner.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/cloud_results.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/cloud_runner.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/comparison.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/diagnostics.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/official_eval.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/prompt_deploy.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/reporter.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/thresholds.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/eval_analysis.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/evidence_pack.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/initializer.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/preflight.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/skills.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/trace_promotion.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/services/workflow_analysis.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/.gitignore +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/agent-server/Dockerfile +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/agent-server/README.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/agent-server/main.bicep +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/agent.yaml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/agentops.yaml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/foundry.svg +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/icon.png +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/project.gitignore +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/sample-traces.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-agent/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-config/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-dataset/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-eval/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-report/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/skills/agentops-workflow/SKILL.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/smoke.jsonl +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/waf-checklist.README.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/waf-checklist.csv +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-dev-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-dev.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-prod-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-prod.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-prompt-agent.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-qa-azd.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-deploy-qa.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-pr.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/templates/workflows/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/__init__.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/azd_env.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/azure_endpoints.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/colors.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/dotenv_loader.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/foundry_discovery.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/logging.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/utils/telemetry.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops_accelerator.egg-info/dependency_links.txt +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops_accelerator.egg-info/entry_points.txt +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops_accelerator.egg-info/requires.txt +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops_accelerator.egg-info/top_level.txt +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/TESTING.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/fixtures/fake_adapter.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/fixtures/fake_eval_runner.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/integration/.gitkeep +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/integration/test_cli_flat_schema.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/integration/test_pipeline_smoke.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/.gitkeep +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_analyzer.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_categories.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_errors.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_foundry_config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_observability.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_opex.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_opex_workspace.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_rbac_openai_data_plane.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_regression.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_safety.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_checks_spec_conformance.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_cli.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_findings.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_history.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_opex_workspace_check.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_posture_rules.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_results_history.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agent_server.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_agentops_config.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_azd_env.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_azd_eval.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_azd_runner.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_azure_endpoints.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_azure_resources_discovery.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cli_cockpit_connection_summary.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cli_cockpit_port_conflict.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cli_commands.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cli_explain.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cloud_results.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cloud_runner.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_cockpit.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_diagnostics.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_doctor_catalog.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_doctor_cli_explain.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_dotenv_loader.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_e2e_render.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_eval_analysis.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_eval_run_grader_errors.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_evaluators.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_foundry_discovery.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_governance.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_initializer.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_invocations.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_knowledge_loader.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_llm_assist.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_llm_assist_spec_rules.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_mcp_server.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_official_eval.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_pipeline_reporter.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_preflight.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_production_telemetry.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_prompt_deploy.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_release_evidence.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_runtime_conversation.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_shared_credentials.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_skills.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_skills_sync.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_telemetry.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_time_range.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_trace_promotion.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tests/unit/test_workflow_analysis.py +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/tombstones/vscode/CDN_DEPRECATION_REQUEST.md +0 -0
- {agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/uv.lock +0 -0
|
@@ -5,6 +5,44 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
|
|
|
5
5
|
|
|
6
6
|
## [Unreleased]
|
|
7
7
|
|
|
8
|
+
### Added
|
|
9
|
+
- **Grey-box retrieval capture for HTTP JSON targets.** An HTTP target can now
|
|
10
|
+
capture extra named fields from a JSON response via a `response_fields` map
|
|
11
|
+
(`name -> dot-path`). Captured values are exposed to evaluator `input_mapping`
|
|
12
|
+
as `$response.<name>` (for example `$response.context`,
|
|
13
|
+
`$response.retrieved_documents`), and dataset columns can be referenced with
|
|
14
|
+
`$row.<name>` (for example `$row.qrels`). This lets RAG evaluators such as
|
|
15
|
+
Groundedness, Retrieval, and Document Retrieval score the retrieval actually
|
|
16
|
+
used at eval time instead of static dataset context. The primary prediction
|
|
17
|
+
(`response_field`) and single-field behavior are unchanged when
|
|
18
|
+
`response_fields` is not set.
|
|
19
|
+
|
|
20
|
+
## [0.4.5] - 2026-06-19
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- **Governance gates for HTTP agents (ASSERT and Red Team).** `agentops assert
|
|
24
|
+
run` and `agentops redteam run` now work against a live HTTP orchestrator
|
|
25
|
+
endpoint, not only model/deployment targets. Red Team wraps the HTTP endpoint
|
|
26
|
+
as an SDK-compatible target and reuses the AgentOps HTTP mapping
|
|
27
|
+
(`request_field`, `response_mode`, `stream`, custom headers). ASSERT resolves
|
|
28
|
+
`assert-ai` inside the active virtual environment, accepts non-secret values
|
|
29
|
+
from `assert.env`, can request an AAD token from the Azure CLI for local
|
|
30
|
+
auth-disabled Azure AI resources, injects the GPT-5 `max_completion_tokens`
|
|
31
|
+
shim only when configured, and materializes a runtime ASSERT config so
|
|
32
|
+
committed configs no longer need absolute artifact paths.
|
|
33
|
+
- **Generated workflows run the ASSERT and Red Team gates.** `agentops workflow
|
|
34
|
+
generate` now installs the optional ASSERT/Red Team dependencies, runs those
|
|
35
|
+
gates when `assert:` or `redteam:` is present in `agentops.yaml`, uploads
|
|
36
|
+
their artifacts, and emits the corrected Red Team command quoting.
|
|
37
|
+
|
|
38
|
+
### Fixed
|
|
39
|
+
- **Reasoning-model judges no longer fail the eval gate in CI.** The generated
|
|
40
|
+
GitHub Actions and Azure DevOps eval and Red Team steps now forward
|
|
41
|
+
`AZURE_OPENAI_MODEL_NAME`, so AgentOps detects reasoning models (such as
|
|
42
|
+
`gpt-5-nano`) and uses `max_completion_tokens` instead of `max_tokens`. This
|
|
43
|
+
removes the judge `400` error that could break the eval gate when the judge
|
|
44
|
+
deployment is a reasoning model.
|
|
45
|
+
|
|
8
46
|
## [0.4.4] - 2026-06-18
|
|
9
47
|
|
|
10
48
|
### Added
|
|
@@ -56,6 +94,22 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
|
|
|
56
94
|
`eval.yaml`, so users can see why those evaluators were chosen.
|
|
57
95
|
([#323](https://github.com/Azure/agentops/issues/323))
|
|
58
96
|
|
|
97
|
+
## [0.4.2] - 2026-06-17
|
|
98
|
+
|
|
99
|
+
### Fixed
|
|
100
|
+
- **`agentops eval init` now works with both old and new `azure.ai.agents` azd
|
|
101
|
+
extensions.** Version 0.1.40 of the extension renamed the eval subcommand from
|
|
102
|
+
`azd ai agent eval init` to `azd ai agent eval generate`, which made
|
|
103
|
+
`agentops eval init` hard-fail with `Command "init" is deprecated, use 'azd ai
|
|
104
|
+
agent eval generate' instead`. AgentOps now invokes `generate` first and
|
|
105
|
+
transparently falls back to the legacy `init` subcommand when an older
|
|
106
|
+
extension does not recognise `generate`. The fallback only triggers on
|
|
107
|
+
subcommand-name/deprecation errors; genuine failures (authentication, project
|
|
108
|
+
endpoint, timeouts) are still surfaced immediately and unchanged. All
|
|
109
|
+
previously passed flags (`--project-endpoint`, `--agent`,
|
|
110
|
+
`--gen-instruction-file`, `--eval-model`, `--dataset`, `--evaluator`) and the
|
|
111
|
+
recipe discovery/persistence behaviour are preserved.
|
|
112
|
+
|
|
59
113
|
## [0.4.1] - 2026-06-15
|
|
60
114
|
|
|
61
115
|
### Changed
|
|
@@ -96,6 +96,8 @@ metadata:
|
|
|
96
96
|
| `$prediction` | Model or agent response |
|
|
97
97
|
| `$expected` | Ground truth / expected answer from the dataset row |
|
|
98
98
|
| `$context` | Retrieved context documents from the dataset row |
|
|
99
|
+
| `$response.<name>` | A field captured from the live HTTP JSON response via the target's `response_fields` map (e.g. `$response.context`, `$response.retrieved_documents`). Missing captures are skipped. |
|
|
100
|
+
| `$row.<name>` | An arbitrary column from the dataset row (e.g. `$row.qrels` for Document Retrieval ground truth). Missing columns are skipped. |
|
|
99
101
|
| `$tool_calls` | Tool calls returned by the agent |
|
|
100
102
|
| `$tool_definitions` | Tool definitions from the dataset row |
|
|
101
103
|
|
|
@@ -325,6 +325,7 @@ That's a complete config. AgentOps:
|
|
|
325
325
|
| `thresholds` | no | Metric gates such as `">=3"` or `"<=10"`. |
|
|
326
326
|
| `protocol` | no | URL protocol: `responses`, `invocations`, or `http-json`. |
|
|
327
327
|
| `request_field` / `response_field` / `tool_calls_field` | no | Request/response JSON keys or dot-paths. |
|
|
328
|
+
| `response_fields` | no | Map of `name -> dot-path` capturing extra fields from a JSON response. Each captured value is exposed to evaluator `input_mapping` as `$response.<name>`. Only used when `response_mode` is `json`. |
|
|
328
329
|
| `headers` | no | Static HTTP headers (dict). |
|
|
329
330
|
| `auth_header_env` | no | Env var name holding a Bearer token. |
|
|
330
331
|
| `evaluators` | no | Escape-hatch list of evaluator names that overrides auto-selection. |
|
|
@@ -379,6 +380,35 @@ response_field: text # dot-path; default is "text"
|
|
|
379
380
|
auth_header_env: APP_API_TOKEN # value used as Bearer token
|
|
380
381
|
```
|
|
381
382
|
|
|
383
|
+
**HTTP-deployed agent with grey-box retrieval capture (RAG evaluators):**
|
|
384
|
+
|
|
385
|
+
When the endpoint can return its retrieval alongside the answer (for example a
|
|
386
|
+
JSON body `{"answer": ..., "context": ..., "retrieved_documents": [...]}`),
|
|
387
|
+
capture the extra fields with `response_fields` and reference them in evaluator
|
|
388
|
+
`input_mapping` via `$response.<name>`. This scores the retrieval actually used
|
|
389
|
+
at eval time instead of static dataset context.
|
|
390
|
+
|
|
391
|
+
```yaml
|
|
392
|
+
version: 1
|
|
393
|
+
agent: https://my-aca-app.eastus2.azurecontainerapps.io/orchestrator
|
|
394
|
+
dataset: .agentops/data/qa.jsonl
|
|
395
|
+
response_mode: json
|
|
396
|
+
request_field: ask
|
|
397
|
+
response_field: answer # primary prediction (dot-path)
|
|
398
|
+
response_fields: # extra fields captured per row
|
|
399
|
+
context: context
|
|
400
|
+
retrieved_documents: retrieved_documents
|
|
401
|
+
bundle:
|
|
402
|
+
evaluators:
|
|
403
|
+
- name: groundedness
|
|
404
|
+
config:
|
|
405
|
+
kind: builtin
|
|
406
|
+
class_name: GroundednessEvaluator
|
|
407
|
+
input_mapping:
|
|
408
|
+
response: $response.answer
|
|
409
|
+
context: $response.context
|
|
410
|
+
```
|
|
411
|
+
|
|
382
412
|
**Raw model deployment:**
|
|
383
413
|
|
|
384
414
|
```yaml
|
|
@@ -206,6 +206,115 @@ assert-ai init
|
|
|
206
206
|
It walks them through behavior description, target callable / model /
|
|
207
207
|
endpoint, dimensions, and judge presets, and writes a validated YAML.
|
|
208
208
|
|
|
209
|
+
### HTTP orchestrator ASSERT
|
|
210
|
+
|
|
211
|
+
If `agentops.yaml` uses `protocol: http-json` or the user says the target is an
|
|
212
|
+
HTTP orchestrator, do not use ASSERT native endpoint mode. `assert-ai 0.1.0`
|
|
213
|
+
posts `message/history` and expects `response`; AgentOps HTTP targets may use
|
|
214
|
+
custom fields like `ask` and streamed text. Scaffold a callable adapter instead.
|
|
215
|
+
|
|
216
|
+
Create `.agentops/assert_http_adapter.py`:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from __future__ import annotations
|
|
220
|
+
|
|
221
|
+
import json
|
|
222
|
+
from pathlib import Path
|
|
223
|
+
from typing import Any
|
|
224
|
+
|
|
225
|
+
from agentops.core.config_loader import load_agentops_config
|
|
226
|
+
from agentops.pipeline.invocations import (
|
|
227
|
+
_aggregate_stream,
|
|
228
|
+
_dot_path,
|
|
229
|
+
_http_request_json,
|
|
230
|
+
_http_request_stream,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def target(message: str, history: list[dict[str, Any]] | None = None) -> str:
|
|
235
|
+
del history
|
|
236
|
+
config = load_agentops_config(Path("agentops.yaml"))
|
|
237
|
+
if not config.agent:
|
|
238
|
+
raise RuntimeError("agentops.yaml must define a top-level HTTP agent endpoint")
|
|
239
|
+
|
|
240
|
+
request_field = config.request_field or "message"
|
|
241
|
+
headers = dict(config.headers)
|
|
242
|
+
headers.setdefault("Content-Type", "application/json")
|
|
243
|
+
body = {request_field: message}
|
|
244
|
+
|
|
245
|
+
if config.response_mode in ("sse", "text"):
|
|
246
|
+
raw_body = _http_request_stream(
|
|
247
|
+
method="POST",
|
|
248
|
+
url=config.agent,
|
|
249
|
+
headers=headers,
|
|
250
|
+
body=body,
|
|
251
|
+
timeout=120,
|
|
252
|
+
)
|
|
253
|
+
return _aggregate_stream(config.response_mode, raw_body, config.stream).strip()
|
|
254
|
+
|
|
255
|
+
payload = _http_request_json(
|
|
256
|
+
method="POST",
|
|
257
|
+
url=config.agent,
|
|
258
|
+
headers=headers,
|
|
259
|
+
body=body,
|
|
260
|
+
timeout=120,
|
|
261
|
+
)
|
|
262
|
+
response_path = config.response_field or "text"
|
|
263
|
+
response_text = _dot_path(payload, response_path)
|
|
264
|
+
if response_text is None and isinstance(payload, dict):
|
|
265
|
+
for fallback in ("response", "output", "content", "message", "text"):
|
|
266
|
+
response_text = payload.get(fallback)
|
|
267
|
+
if response_text:
|
|
268
|
+
break
|
|
269
|
+
return (
|
|
270
|
+
response_text
|
|
271
|
+
if isinstance(response_text, str)
|
|
272
|
+
else json.dumps(response_text or "", ensure_ascii=False)
|
|
273
|
+
)
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
Create an ASSERT smoke from a known-good eval dataset row, not a random general
|
|
277
|
+
question. For the HTTP tutorial, use:
|
|
278
|
+
|
|
279
|
+
```yaml
|
|
280
|
+
suite: gpt-rag-http-smoke
|
|
281
|
+
run: local-http-contract-smoke
|
|
282
|
+
|
|
283
|
+
default_model:
|
|
284
|
+
name: azure/chat
|
|
285
|
+
|
|
286
|
+
pipeline:
|
|
287
|
+
systematize:
|
|
288
|
+
enabled: false
|
|
289
|
+
test_set:
|
|
290
|
+
enabled: false
|
|
291
|
+
inference:
|
|
292
|
+
test_set_path: test_set.jsonl
|
|
293
|
+
target:
|
|
294
|
+
callable: assert_http_adapter:target
|
|
295
|
+
max_turns: 1
|
|
296
|
+
judge:
|
|
297
|
+
taxonomy_path: taxonomy.json
|
|
298
|
+
preset:
|
|
299
|
+
- grounding
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
Append this `assert:` block to `agentops.yaml`. Discover `AZURE_API_BASE` from
|
|
303
|
+
the Azure AI/OpenAI resource and set `AZURE_API_VERSION` to the version used by
|
|
304
|
+
the deployment. These are not secrets. If local auth is disabled, AgentOps will
|
|
305
|
+
use the signed-in Azure CLI token for the ASSERT subprocess.
|
|
306
|
+
|
|
307
|
+
```yaml
|
|
308
|
+
assert:
|
|
309
|
+
config: ./assert/eval_config.yaml
|
|
310
|
+
fail_on_violations: true
|
|
311
|
+
env:
|
|
312
|
+
AZURE_API_BASE: https://<azure-ai-resource>.cognitiveservices.azure.com/
|
|
313
|
+
AZURE_API_VERSION: 2024-12-01-preview
|
|
314
|
+
AGENTOPS_ASSERT_AZURE_MAX_COMPLETION_TOKENS: "true"
|
|
315
|
+
PYTHONPATH: .agentops
|
|
316
|
+
```
|
|
317
|
+
|
|
209
318
|
**3. Append the `assert:` block to `agentops.yaml`** (preserve every existing
|
|
210
319
|
key — read the file, append the block if missing, write back):
|
|
211
320
|
|
|
@@ -14,13 +14,16 @@ from html import escape as html_escape
|
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
from textwrap import wrap
|
|
16
16
|
from collections.abc import Sequence
|
|
17
|
-
from typing import Annotated, Any, Optional
|
|
17
|
+
from typing import Annotated, Any, Optional, TYPE_CHECKING
|
|
18
18
|
|
|
19
19
|
import typer
|
|
20
20
|
|
|
21
21
|
from agentops.utils.colors import style
|
|
22
22
|
from agentops.utils.logging import get_logger, setup_logging
|
|
23
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from agentops.core.agentops_config import AgentOpsConfig
|
|
26
|
+
|
|
24
27
|
app = typer.Typer(
|
|
25
28
|
name="agentops",
|
|
26
29
|
help="AgentOps - standardized evaluation workflows for AI projects.",
|
|
@@ -1574,11 +1577,13 @@ def cmd_init(
|
|
|
1574
1577
|
from agentops.services.setup_wizard import (
|
|
1575
1578
|
AGENT_TITLE,
|
|
1576
1579
|
DATASET_TITLE,
|
|
1580
|
+
ENDPOINT_SOURCE_AZD_RESOURCE_DISCOVERY,
|
|
1577
1581
|
PROJECT_ENDPOINT_TITLE,
|
|
1578
1582
|
REQUIRED_CONFIGURATION_MESSAGE,
|
|
1579
1583
|
WizardAnswers,
|
|
1580
1584
|
apply_answers,
|
|
1581
1585
|
discover_defaults,
|
|
1586
|
+
is_placeholder_agent,
|
|
1582
1587
|
run_wizard,
|
|
1583
1588
|
validate_agent,
|
|
1584
1589
|
validate_dataset,
|
|
@@ -1763,12 +1768,14 @@ def cmd_init(
|
|
|
1763
1768
|
force_prompt_fields = {"agent", "dataset"} if config_seeded_this_run else set()
|
|
1764
1769
|
prompt_values = [
|
|
1765
1770
|
defaults.project_endpoint,
|
|
1766
|
-
defaults.agent,
|
|
1771
|
+
None if is_placeholder_agent(defaults.agent) else defaults.agent,
|
|
1767
1772
|
defaults.dataset,
|
|
1768
1773
|
]
|
|
1769
|
-
will_prompt =
|
|
1770
|
-
|
|
1771
|
-
|
|
1774
|
+
will_prompt = (
|
|
1775
|
+
reconfigure
|
|
1776
|
+
or bool(force_prompt_fields)
|
|
1777
|
+
or any(v is None or not str(v).strip() for v in prompt_values)
|
|
1778
|
+
or defaults.project_endpoint_source == ENDPOINT_SOURCE_AZD_RESOURCE_DISCOVERY
|
|
1772
1779
|
)
|
|
1773
1780
|
if will_prompt:
|
|
1774
1781
|
typer.echo(style("Press Enter to accept the value in brackets.", "dim"))
|
|
@@ -1817,6 +1824,7 @@ def cmd_init(
|
|
|
1817
1824
|
workspace,
|
|
1818
1825
|
prompt=_prompt,
|
|
1819
1826
|
echo=_wizard_echo,
|
|
1827
|
+
defaults=defaults,
|
|
1820
1828
|
on_answer=_on_answer,
|
|
1821
1829
|
reconfigure=reconfigure,
|
|
1822
1830
|
force_prompt_fields=force_prompt_fields,
|
|
@@ -2121,8 +2129,13 @@ def cmd_eval_init(
|
|
|
2121
2129
|
if _maybe_explain_leaf(("eval", "init"), explain):
|
|
2122
2130
|
return
|
|
2123
2131
|
|
|
2132
|
+
from agentops.core.config_loader import load_agentops_config
|
|
2124
2133
|
from agentops.pipeline.azd_runner import AzdBackendError
|
|
2125
|
-
from agentops.services.azd_eval_init import
|
|
2134
|
+
from agentops.services.azd_eval_init import (
|
|
2135
|
+
ensure_local_evaluator_model_env,
|
|
2136
|
+
recommend_evaluators_for_config,
|
|
2137
|
+
run_azd_eval_init,
|
|
2138
|
+
)
|
|
2126
2139
|
|
|
2127
2140
|
workspace = directory.resolve()
|
|
2128
2141
|
config_path = _resolve_eval_config_path(config)
|
|
@@ -2130,8 +2143,48 @@ def cmd_eval_init(
|
|
|
2130
2143
|
config_path = workspace / config_path
|
|
2131
2144
|
|
|
2132
2145
|
try:
|
|
2146
|
+
loaded_config = load_agentops_config(config_path)
|
|
2147
|
+
target = loaded_config.resolved_target()
|
|
2148
|
+
if target.kind not in {"foundry_prompt", "foundry_hosted"}:
|
|
2149
|
+
selection = recommend_evaluators_for_config(
|
|
2150
|
+
config_path=config_path,
|
|
2151
|
+
dataset=dataset,
|
|
2152
|
+
)
|
|
2153
|
+
typer.echo(
|
|
2154
|
+
f"{_cli_label('AgentOps eval init')}: local HTTP/model target detected; "
|
|
2155
|
+
"azd eval assets are not required."
|
|
2156
|
+
)
|
|
2157
|
+
typer.echo(f"{_cli_label('Evaluator recommendation')}: {selection.source}")
|
|
2158
|
+
for signal in selection.signals:
|
|
2159
|
+
typer.echo(f" {style('-', 'dim')} {signal}")
|
|
2160
|
+
if selection.names:
|
|
2161
|
+
typer.echo(f"{_cli_label('Evaluators')}: {', '.join(selection.names)}")
|
|
2162
|
+
model_env = ensure_local_evaluator_model_env(
|
|
2163
|
+
workspace=workspace,
|
|
2164
|
+
selection=selection,
|
|
2165
|
+
)
|
|
2166
|
+
if model_env.configured:
|
|
2167
|
+
action = "configured" if model_env.changed_keys else "using"
|
|
2168
|
+
typer.echo(
|
|
2169
|
+
f"{_cli_label('Evaluator model')}: {action} "
|
|
2170
|
+
f"{model_env.deployment} ({model_env.model})"
|
|
2171
|
+
)
|
|
2172
|
+
if model_env.changed_keys and model_env.env_path is not None:
|
|
2173
|
+
typer.echo(
|
|
2174
|
+
f" {style('-', 'dim')} saved "
|
|
2175
|
+
f"{', '.join(model_env.changed_keys)} to "
|
|
2176
|
+
f"{_cli_path(model_env.env_path)}"
|
|
2177
|
+
)
|
|
2178
|
+
elif selection.names and model_env.source != "not needed":
|
|
2179
|
+
typer.echo(
|
|
2180
|
+
f"{_cli_warn('Warning')}: could not auto-discover an evaluator "
|
|
2181
|
+
"model deployment. Set AZURE_OPENAI_DEPLOYMENT and "
|
|
2182
|
+
"AZURE_OPENAI_MODEL_NAME before `agentops eval run`."
|
|
2183
|
+
)
|
|
2184
|
+
typer.echo(f"{_cli_label('Next')}: {_cli_command('agentops eval run')}")
|
|
2185
|
+
return
|
|
2133
2186
|
typer.echo(
|
|
2134
|
-
f"{_cli_label('azd eval
|
|
2187
|
+
f"{_cli_label('azd eval generate')}: checking/generating eval.yaml "
|
|
2135
2188
|
"(this can take a few minutes on the first run)"
|
|
2136
2189
|
)
|
|
2137
2190
|
result = run_azd_eval_init(
|
|
@@ -2148,9 +2201,9 @@ def cmd_eval_init(
|
|
|
2148
2201
|
raise typer.Exit(code=1) from exc
|
|
2149
2202
|
|
|
2150
2203
|
if result.command_ran:
|
|
2151
|
-
typer.echo(f"{_cli_label('azd eval
|
|
2204
|
+
typer.echo(f"{_cli_label('azd eval generate')}: completed")
|
|
2152
2205
|
else:
|
|
2153
|
-
typer.echo(f"{_cli_label('azd eval
|
|
2206
|
+
typer.echo(f"{_cli_label('azd eval generate')}: existing recipe reused")
|
|
2154
2207
|
if result.evaluators:
|
|
2155
2208
|
typer.echo(f"{_cli_label('Evaluator recommendation')}: {result.evaluator_source}")
|
|
2156
2209
|
for signal in result.evaluator_signals:
|
|
@@ -2346,6 +2399,17 @@ def cmd_assert_run(
|
|
|
2346
2399
|
),
|
|
2347
2400
|
),
|
|
2348
2401
|
] = False,
|
|
2402
|
+
cached: Annotated[
|
|
2403
|
+
bool,
|
|
2404
|
+
typer.Option(
|
|
2405
|
+
"--cached",
|
|
2406
|
+
help=(
|
|
2407
|
+
"Reuse cached inference/judge rows from a previous run with the "
|
|
2408
|
+
"same run id. By default ASSERT re-runs inference against the live "
|
|
2409
|
+
"target each time so the gate always exercises the current agent."
|
|
2410
|
+
),
|
|
2411
|
+
),
|
|
2412
|
+
] = False,
|
|
2349
2413
|
explain: Annotated[str | None, typer.Argument(hidden=True)] = None,
|
|
2350
2414
|
) -> None:
|
|
2351
2415
|
"""Invoke the ASSERT (assert-ai) CLI and normalize its results."""
|
|
@@ -2403,6 +2467,7 @@ def cmd_assert_run(
|
|
|
2403
2467
|
resolved_suite: str | None = suite
|
|
2404
2468
|
resolved_run_id: str | None = run_id
|
|
2405
2469
|
fail_on_violations = True
|
|
2470
|
+
subprocess_env: dict[str, str] | None = None
|
|
2406
2471
|
|
|
2407
2472
|
if cfg.assert_run is not None:
|
|
2408
2473
|
if eval_config_path is None:
|
|
@@ -2414,6 +2479,7 @@ def cmd_assert_run(
|
|
|
2414
2479
|
if resolved_run_id is None:
|
|
2415
2480
|
resolved_run_id = cfg.assert_run.run_id
|
|
2416
2481
|
fail_on_violations = cfg.assert_run.fail_on_violations
|
|
2482
|
+
subprocess_env = dict(cfg.assert_run.env)
|
|
2417
2483
|
if no_gate:
|
|
2418
2484
|
fail_on_violations = False
|
|
2419
2485
|
|
|
@@ -2428,6 +2494,12 @@ def cmd_assert_run(
|
|
|
2428
2494
|
typer.echo(
|
|
2429
2495
|
f" suite={resolved_suite or '<auto>'} run_id={resolved_run_id or '<auto>'}"
|
|
2430
2496
|
)
|
|
2497
|
+
if cached:
|
|
2498
|
+
typer.echo(" cache: reusing prior inference/judge rows when available")
|
|
2499
|
+
else:
|
|
2500
|
+
typer.echo(" cache: forcing fresh inference against the live target")
|
|
2501
|
+
|
|
2502
|
+
assert_extra_args = None if cached else ["--force-stage", "inference"]
|
|
2431
2503
|
|
|
2432
2504
|
try:
|
|
2433
2505
|
result = run_assert(
|
|
@@ -2436,6 +2508,8 @@ def cmd_assert_run(
|
|
|
2436
2508
|
results_dir=resolved_results_dir,
|
|
2437
2509
|
suite=resolved_suite,
|
|
2438
2510
|
run_id=resolved_run_id,
|
|
2511
|
+
env=subprocess_env,
|
|
2512
|
+
extra_args=assert_extra_args,
|
|
2439
2513
|
)
|
|
2440
2514
|
except AssertRunnerError as exc:
|
|
2441
2515
|
typer.echo(f"{_cli_error('Error')}: {exc}", err=True)
|
|
@@ -2471,9 +2545,15 @@ def cmd_assert_run(
|
|
|
2471
2545
|
violations = bucket.get("violations", 0)
|
|
2472
2546
|
total = bucket.get("total", 0)
|
|
2473
2547
|
skipped = bucket.get("skipped", 0)
|
|
2474
|
-
marker = _cli_ok("OK") if violations == 0 else _cli_error("VIOLATIONS")
|
|
2475
2548
|
suffix = f" (skipped={skipped})" if skipped else ""
|
|
2476
|
-
|
|
2549
|
+
if violations == 0:
|
|
2550
|
+
clean = max(total - skipped, 0)
|
|
2551
|
+
typer.echo(f" {name}: {clean}/{total} clean{suffix} {_cli_ok('OK')}")
|
|
2552
|
+
else:
|
|
2553
|
+
typer.echo(
|
|
2554
|
+
f" {name}: {violations}/{total} violating{suffix} "
|
|
2555
|
+
f"{_cli_error('VIOLATIONS')}"
|
|
2556
|
+
)
|
|
2477
2557
|
|
|
2478
2558
|
typer.echo("")
|
|
2479
2559
|
typer.echo(_cli_heading("Inspect details"))
|
|
@@ -2666,6 +2746,7 @@ def cmd_redteam_run(
|
|
|
2666
2746
|
err=True,
|
|
2667
2747
|
)
|
|
2668
2748
|
raise typer.Exit(code=1)
|
|
2749
|
+
_apply_http_redteam_defaults(resolved_target, cfg)
|
|
2669
2750
|
|
|
2670
2751
|
if output_path is not None and not output_path.is_absolute():
|
|
2671
2752
|
output_path = (workspace / output_path).resolve()
|
|
@@ -2786,6 +2867,21 @@ def _derive_redteam_target_from_agent(agent: str | None) -> dict[str, Any]:
|
|
|
2786
2867
|
return {"agent": agent}
|
|
2787
2868
|
|
|
2788
2869
|
|
|
2870
|
+
def _apply_http_redteam_defaults(target: dict[str, Any], cfg: AgentOpsConfig) -> None:
|
|
2871
|
+
if "endpoint" not in target:
|
|
2872
|
+
return
|
|
2873
|
+
if cfg.request_field:
|
|
2874
|
+
target.setdefault("request_field", cfg.request_field)
|
|
2875
|
+
if cfg.response_field:
|
|
2876
|
+
target.setdefault("response_field", cfg.response_field)
|
|
2877
|
+
if cfg.response_mode:
|
|
2878
|
+
target.setdefault("response_mode", cfg.response_mode)
|
|
2879
|
+
if cfg.headers:
|
|
2880
|
+
target.setdefault("headers", cfg.headers)
|
|
2881
|
+
if cfg.stream:
|
|
2882
|
+
target.setdefault("stream", cfg.stream.model_dump(exclude_none=True))
|
|
2883
|
+
|
|
2884
|
+
|
|
2789
2885
|
def _run_flat_schema_eval(
|
|
2790
2886
|
*,
|
|
2791
2887
|
config_path: Path,
|
{agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/core/agentops_config.py
RENAMED
|
@@ -462,6 +462,14 @@ class AssertRunConfig(BaseModel):
|
|
|
462
462
|
"results without gating the pipeline."
|
|
463
463
|
),
|
|
464
464
|
)
|
|
465
|
+
env: Dict[str, str] = Field(
|
|
466
|
+
default_factory=dict,
|
|
467
|
+
description=(
|
|
468
|
+
"Optional non-secret environment variables passed only to the "
|
|
469
|
+
"assert-ai subprocess, for example AZURE_API_BASE or "
|
|
470
|
+
"AZURE_API_VERSION."
|
|
471
|
+
),
|
|
472
|
+
)
|
|
465
473
|
|
|
466
474
|
model_config = ConfigDict(extra="forbid")
|
|
467
475
|
|
|
@@ -750,6 +758,18 @@ class AgentOpsConfig(BaseModel):
|
|
|
750
758
|
request_field: Optional[str] = None
|
|
751
759
|
response_field: Optional[str] = None
|
|
752
760
|
tool_calls_field: Optional[str] = None
|
|
761
|
+
response_fields: Dict[str, str] = Field(
|
|
762
|
+
default_factory=dict,
|
|
763
|
+
description=(
|
|
764
|
+
"Extra named fields to capture from an http-json response, mapping "
|
|
765
|
+
"a name to a dot-path into the JSON body (e.g. {context: context, "
|
|
766
|
+
"retrieved_documents: retrieved_documents}). Each captured value is "
|
|
767
|
+
"exposed to evaluator input_mapping via the '$response.<name>' "
|
|
768
|
+
"token, so RAG evaluators can score the live retrieved context "
|
|
769
|
+
"returned by the same call. Only used when response_mode is "
|
|
770
|
+
"'json'. The primary answer still comes from response_field."
|
|
771
|
+
),
|
|
772
|
+
)
|
|
753
773
|
headers: Dict[str, str] = Field(default_factory=dict)
|
|
754
774
|
auth_header_env: Optional[str] = None
|
|
755
775
|
response_mode: ResponseMode = Field(
|
|
@@ -935,6 +955,7 @@ class AgentOpsConfig(BaseModel):
|
|
|
935
955
|
self.request_field
|
|
936
956
|
or self.response_field
|
|
937
957
|
or self.tool_calls_field
|
|
958
|
+
or self.response_fields
|
|
938
959
|
or self.headers
|
|
939
960
|
or self.auth_header_env
|
|
940
961
|
or self.response_mode != "json"
|
{agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/invocations.py
RENAMED
|
@@ -658,10 +658,17 @@ def _invoke_http_json(
|
|
|
658
658
|
if isinstance(extracted, list):
|
|
659
659
|
tool_calls = extracted
|
|
660
660
|
|
|
661
|
+
captured: Dict[str, Any] = {}
|
|
662
|
+
for name, path in (config.response_fields or {}).items():
|
|
663
|
+
value = _dot_path(payload, path)
|
|
664
|
+
if value is not None:
|
|
665
|
+
captured[name] = value
|
|
666
|
+
|
|
661
667
|
return InvocationResult(
|
|
662
668
|
response=response_text.strip(),
|
|
663
669
|
latency_seconds=elapsed,
|
|
664
670
|
tool_calls=tool_calls,
|
|
671
|
+
metadata={"response_fields": captured} if captured else {},
|
|
665
672
|
)
|
|
666
673
|
|
|
667
674
|
|
{agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/orchestrator.py
RENAMED
|
@@ -760,6 +760,7 @@ def _evaluate_row(
|
|
|
760
760
|
)
|
|
761
761
|
|
|
762
762
|
metrics: List[RowMetric] = []
|
|
763
|
+
captured_fields = invocation.metadata.get("response_fields") or {}
|
|
763
764
|
for evaluator in evaluators:
|
|
764
765
|
metric = runtime.run_evaluator(
|
|
765
766
|
evaluator,
|
|
@@ -767,6 +768,7 @@ def _evaluate_row(
|
|
|
767
768
|
response=invocation.response,
|
|
768
769
|
latency_seconds=invocation.latency_seconds,
|
|
769
770
|
actual_tool_calls=invocation.tool_calls,
|
|
771
|
+
response_fields=captured_fields,
|
|
770
772
|
)
|
|
771
773
|
metrics.append(metric)
|
|
772
774
|
|
|
@@ -819,7 +821,7 @@ def _evaluate_row(
|
|
|
819
821
|
input=str(row.get("input", "")),
|
|
820
822
|
expected=row.get("expected"),
|
|
821
823
|
response=invocation.response,
|
|
822
|
-
context=row.get("context"),
|
|
824
|
+
context=captured_fields.get("context", row.get("context")),
|
|
823
825
|
latency_seconds=invocation.latency_seconds,
|
|
824
826
|
tool_calls=invocation.tool_calls,
|
|
825
827
|
metrics=metrics,
|
{agentops_accelerator-0.4.4 → agentops_accelerator-0.5.0}/src/agentops/pipeline/publisher.py
RENAMED
|
@@ -110,12 +110,18 @@ def _build_instance_rows(result: RunResult) -> List[Dict[str, Any]]:
|
|
|
110
110
|
for row in result.rows:
|
|
111
111
|
payload: Dict[str, Any] = {
|
|
112
112
|
"line_number": row.row_index,
|
|
113
|
-
"input": row.input,
|
|
114
|
-
"response": row.response,
|
|
115
|
-
"ground_truth": row.expected or "",
|
|
113
|
+
"inputs.input": row.input,
|
|
114
|
+
"inputs.response": row.response,
|
|
115
|
+
"inputs.ground_truth": row.expected or "",
|
|
116
116
|
}
|
|
117
117
|
for metric in row.metrics:
|
|
118
118
|
if metric.value is not None:
|
|
119
|
-
payload[metric.name] = metric.value
|
|
119
|
+
payload[f"outputs.{metric.name}.score"] = metric.value
|
|
120
|
+
if not metric.name.endswith("_latency_seconds"):
|
|
121
|
+
payload[f"metric.{metric.name}"] = metric.value
|
|
122
|
+
if metric.reason:
|
|
123
|
+
payload[f"outputs.{metric.name}.reason"] = metric.reason
|
|
124
|
+
if metric.error:
|
|
125
|
+
payload[f"outputs.{metric.name}.error"] = metric.error
|
|
120
126
|
rows.append(payload)
|
|
121
127
|
return rows
|
|
@@ -67,6 +67,10 @@ def _credential() -> Any:
|
|
|
67
67
|
_REASONING_MODEL_PREFIXES = ("gpt-5", "o1", "o3", "o4")
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
def _evaluator_model_name() -> Optional[str]:
|
|
71
|
+
return os.getenv("AZURE_OPENAI_MODEL_NAME") or os.getenv("AZURE_AI_MODEL_NAME")
|
|
72
|
+
|
|
73
|
+
|
|
70
74
|
def _model_config() -> Dict[str, Any]:
|
|
71
75
|
from agentops.utils.azure_endpoints import (
|
|
72
76
|
derive_openai_endpoint_from_project,
|
|
@@ -166,7 +170,9 @@ def load_evaluator(preset: EvaluatorPreset) -> EvaluatorRuntime:
|
|
|
166
170
|
if preset.class_name in _AI_ASSISTED:
|
|
167
171
|
model_config = _model_config()
|
|
168
172
|
init_kwargs["model_config"] = model_config
|
|
169
|
-
if _is_reasoning_model_deployment(
|
|
173
|
+
if _is_reasoning_model_deployment(
|
|
174
|
+
_evaluator_model_name() or model_config.get("azure_deployment")
|
|
175
|
+
):
|
|
170
176
|
init_kwargs["is_reasoning_model"] = True
|
|
171
177
|
if preset.class_name in _SAFETY:
|
|
172
178
|
init_kwargs["azure_ai_project"] = _project_endpoint()
|
|
@@ -292,13 +298,33 @@ def _resolve_kwargs(
|
|
|
292
298
|
*,
|
|
293
299
|
row: Dict[str, Any],
|
|
294
300
|
response: str,
|
|
301
|
+
response_fields: Optional[Dict[str, Any]] = None,
|
|
295
302
|
) -> Dict[str, Any]:
|
|
296
303
|
resolved: Dict[str, Any] = {}
|
|
297
304
|
merged = {**row, "response": response, "input": row.get("input")}
|
|
305
|
+
captured = response_fields or {}
|
|
298
306
|
for kwarg, placeholder in mapping.items():
|
|
299
307
|
if not isinstance(placeholder, str) or not placeholder.startswith("$"):
|
|
300
308
|
resolved[kwarg] = placeholder
|
|
301
309
|
continue
|
|
310
|
+
if placeholder.startswith("$response."):
|
|
311
|
+
# Live multi-field capture from an http-json target, e.g.
|
|
312
|
+
# '$response.context' resolves to the context the endpoint
|
|
313
|
+
# returned alongside the answer on the same call.
|
|
314
|
+
name = placeholder[len("$response."):]
|
|
315
|
+
value = captured.get(name)
|
|
316
|
+
if value is not None:
|
|
317
|
+
resolved[kwarg] = value
|
|
318
|
+
continue
|
|
319
|
+
if placeholder.startswith("$row."):
|
|
320
|
+
# Arbitrary dataset column, e.g. '$row.qrels' for Document
|
|
321
|
+
# Retrieval ground-truth labels that the fixed token set does
|
|
322
|
+
# not name explicitly.
|
|
323
|
+
name = placeholder[len("$row."):]
|
|
324
|
+
value = row.get(name)
|
|
325
|
+
if value is not None:
|
|
326
|
+
resolved[kwarg] = value
|
|
327
|
+
continue
|
|
302
328
|
source_key = _PLACEHOLDERS.get(placeholder)
|
|
303
329
|
if source_key is None:
|
|
304
330
|
raise ValueError(f"unknown evaluator placeholder {placeholder!r}")
|
|
@@ -353,6 +379,7 @@ def run_evaluator(
|
|
|
353
379
|
response: str,
|
|
354
380
|
latency_seconds: float,
|
|
355
381
|
actual_tool_calls: Optional[List[Any]] = None,
|
|
382
|
+
response_fields: Optional[Dict[str, Any]] = None,
|
|
356
383
|
) -> RowMetric:
|
|
357
384
|
"""Execute one evaluator on one row. Captures errors so the run continues."""
|
|
358
385
|
preset = runtime.preset
|
|
@@ -383,7 +410,12 @@ def run_evaluator(
|
|
|
383
410
|
)
|
|
384
411
|
|
|
385
412
|
try:
|
|
386
|
-
kwargs = _resolve_kwargs(
|
|
413
|
+
kwargs = _resolve_kwargs(
|
|
414
|
+
preset.input_mapping,
|
|
415
|
+
row=row,
|
|
416
|
+
response=response,
|
|
417
|
+
response_fields=response_fields,
|
|
418
|
+
)
|
|
387
419
|
if preset.needs_conversation:
|
|
388
420
|
# Prefer the actual calls made by the agent during invocation;
|
|
389
421
|
# fall back to the dataset's expected calls if the runner did
|