agentops-accelerator 0.3.5__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.claude-plugin/marketplace.json +1 -1
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/plugin/marketplace.json +1 -1
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/CHANGELOG.md +38 -0
- {agentops_accelerator-0.3.5/src/agentops_accelerator.egg-info → agentops_accelerator-0.3.7}/PKG-INFO +1 -1
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-end-to-end.md +34 -10
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-hosted-agent-quickstart.md +34 -10
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-prompt-agent-quickstart.md +42 -11
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/package.json +1 -1
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/plugin.json +1 -1
- {agentops_accelerator-0.3.5/src/agentops/templates → agentops_accelerator-0.3.7/plugins/agentops}/skills/agentops-eval/SKILL.md +30 -3
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/cli/app.py +47 -0
- {agentops_accelerator-0.3.5/plugins/agentops → agentops_accelerator-0.3.7/src/agentops/templates}/skills/agentops-eval/SKILL.md +30 -3
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7/src/agentops_accelerator.egg-info}/PKG-INFO +1 -1
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/SOURCES.txt +1 -0
- agentops_accelerator-0.3.7/tests/unit/test_eval_run_grader_errors.py +150 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.gitattributes +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/actions/azure-oidc-login/action.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/code-quality-py.instructions.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/copilot-instructions.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/dependabot.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/extensions/agentops-skills/extension.mjs +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/skills/release-management/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/_build.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/ci.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/cut-release.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/e2e.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/release.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/staging.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.gitignore +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.pre-commit-config.yaml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/launch.json +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/settings.json +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/tasks.json +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/AGENTS.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/CONTRIBUTING.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/LICENSE +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/README.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/SECURITY.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/bundles.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/ci-github-actions.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/concepts.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/doctor-checks.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/doctor-explained.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/e2e-live-architecture.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/e2e-live-setup.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/foundry-evaluation-sdk-built-in-evaluators.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/how-it-works.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/media/agentops-diagrams.vsdx +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/media/foundry-control-plane.png +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/release-process.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/README.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/agentops.yaml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/dataset.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/icon.png +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/Dockerfile +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/app.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/requirements.txt +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/bootstrap.bicep +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/bootstrap.parameters.example.json +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/perrun.bicep +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/launch.json +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/media/foundry.svg +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/media/quickstart.gif +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/.vscodeignore +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/LICENSE +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/README.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-agent/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-config/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-dataset/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-report/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-workflow/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/pyproject.toml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/create_support_agent.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/cut-release.ps1 +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/cut-release.sh +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_aggregate_summary.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/basic.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/rag.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/tools.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_demo.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_hosted_agent.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_make_transcript.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_render_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/release.ps1 +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/release.sh +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/setup-e2e-new-tenant.ps1 +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/staging.ps1 +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/staging.sh +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/sync-skills.ps1 +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/sync-skills.sh +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/setup.cfg +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/__main__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/_legacy_ids.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/analyzer.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/catalog.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/errors.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/foundry_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/latency.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/opex.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/opex_workspace.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/content_filter.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/diagnostics.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/local_auth.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/managed_identity.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/network.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/regression.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/release_readiness.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/safety.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/spec_conformance.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/cockpit.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/findings.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/history.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/knowledge/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/knowledge/waf-checklist.csv +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_base.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_bundle_rule.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_client.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_dataset_rules.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_engine.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_prompt_rules.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_spec_rules.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/production_telemetry.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/report.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/app.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/auth.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/chat.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/protocol.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/azure_monitor.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/azure_resources.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/foundry_control.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/results_history.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/_base.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/agents_md.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/spec_kit.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/time_range.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/cli/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/agentops_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/config_loader.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/evaluators.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/release_evidence.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/results.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/mcp/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/mcp/server.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/cloud_results.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/cloud_runner.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/comparison.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/diagnostics.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/invocations.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/official_eval.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/orchestrator.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/prompt_deploy.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/publisher.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/reporter.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/runtime.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/thresholds.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/cicd.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/eval_analysis.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/evidence_pack.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/initializer.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/preflight.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/setup_wizard.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/skills.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/trace_promotion.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/workflow_analysis.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/.gitignore +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/Dockerfile +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/README.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/main.bicep +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent.yaml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agentops.yaml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/foundry.svg +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/icon.png +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/project.gitignore +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/sample-traces.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-agent/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-config/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-dataset/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-report/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-workflow/SKILL.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/smoke.jsonl +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/waf-checklist.README.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/waf-checklist.csv +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-dev-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-dev.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prod-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prod.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prompt-agent.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-qa-azd.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-qa.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-pr.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-watchdog.yml +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/__init__.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/azd_env.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/azure_endpoints.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/colors.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/dotenv_loader.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/foundry_discovery.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/logging.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/telemetry.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/yaml.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/dependency_links.txt +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/entry_points.txt +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/requires.txt +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/top_level.txt +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/TESTING.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/fixtures/fake_adapter.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/fixtures/fake_eval_runner.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/.gitkeep +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/test_cli_flat_schema.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/test_pipeline_smoke.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/.gitkeep +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_analyzer.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_categories.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_errors.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_foundry_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_opex.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_opex_workspace.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_regression.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_safety.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_spec_conformance.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_cli.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_findings.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_history.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_opex_workspace_check.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_posture_rules.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_results_history.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_server.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agentops_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azd_env.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azure_endpoints.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azure_resources_discovery.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cicd.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_cockpit_connection_summary.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_cockpit_port_conflict.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_commands.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_explain.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cloud_results.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cloud_runner.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cockpit.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_diagnostics.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_doctor_catalog.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_doctor_cli_explain.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_dotenv_loader.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_e2e_render.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_eval_analysis.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_evaluators.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_foundry_discovery.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_init_command.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_initializer.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_invocations.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_knowledge_loader.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_llm_assist.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_llm_assist_spec_rules.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_mcp_server.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_official_eval.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_pipeline_publisher.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_pipeline_reporter.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_preflight.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_production_telemetry.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_prompt_deploy.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_release_evidence.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_runtime_conversation.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_runtime_model_config.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_setup_wizard.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_skills.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_skills_sync.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_telemetry.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_time_range.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_trace_promotion.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_workflow_analysis.py +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tombstones/vscode/CDN_DEPRECATION_REQUEST.md +0 -0
- {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/uv.lock +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"name": "agentops-accelerator",
|
|
14
14
|
"source": "../../plugins/agentops",
|
|
15
15
|
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
|
|
16
|
-
"version": "0.3.
|
|
16
|
+
"version": "0.3.7",
|
|
17
17
|
"keywords": [
|
|
18
18
|
"agentops",
|
|
19
19
|
"evaluation",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"name": "agentops-accelerator",
|
|
14
14
|
"source": "../../plugins/agentops",
|
|
15
15
|
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
|
|
16
|
-
"version": "0.3.
|
|
16
|
+
"version": "0.3.7",
|
|
17
17
|
"keywords": [
|
|
18
18
|
"agentops",
|
|
19
19
|
"evaluation",
|
|
@@ -5,6 +5,44 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
|
|
|
5
5
|
|
|
6
6
|
## [Unreleased]
|
|
7
7
|
|
|
8
|
+
## [0.3.7] - 2026-06-01
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **RBAC preflight now covers Foundry/Azure AI managed identities, not only
|
|
12
|
+
the signed-in user.** Cloud evaluations run server-side and some agent or
|
|
13
|
+
grader calls authenticate as the managed identities on the backing AI
|
|
14
|
+
Services account and child Foundry project. Granting `Cognitive Services
|
|
15
|
+
OpenAI User` only to the user still allowed intermittent grader
|
|
16
|
+
`AuthenticationError` failures and the v0.3.6 execution warning. The
|
|
17
|
+
prompt-agent, hosted-agent, and end-to-end tutorials plus the
|
|
18
|
+
`agentops-eval` skill now assign the same data-plane role to every managed
|
|
19
|
+
identity in the Foundry resource group, preventing the warning/failure path
|
|
20
|
+
before `agentops eval run`.
|
|
21
|
+
|
|
22
|
+
## [0.3.6] - 2026-06-01
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
- **`agentops eval run` now distinguishes a grader *execution* failure from a
|
|
26
|
+
quality-gate failure.** When evaluator workers error out on a subset of rows
|
|
27
|
+
(auth/RBAC/timeout), no row has every grader return a score, so
|
|
28
|
+
`items_passed_all` is `0` and the run reports `Threshold status: FAILED` even
|
|
29
|
+
though every threshold that *could* be computed passed. The CLI now detects
|
|
30
|
+
this case (errored graders combined with all thresholds passing) and prints a
|
|
31
|
+
`Warning` explaining that this is an execution error, not a quality
|
|
32
|
+
regression, names the most common cause (data-plane RBAC granted moments
|
|
33
|
+
earlier that is still propagating to the evaluator workers), surfaces the
|
|
34
|
+
first underlying grader error, and advises waiting a few minutes before
|
|
35
|
+
re-running. The exit-code contract is unchanged. Added the
|
|
36
|
+
`_grader_error_summary` helper plus focused unit tests.
|
|
37
|
+
- **Corrected the RBAC propagation guidance in the tutorials and the
|
|
38
|
+
`agentops-eval` skill.** Data-plane role assignments on Cognitive Services
|
|
39
|
+
accounts can take several minutes (not 30-120 seconds) to reach the
|
|
40
|
+
independent, per-row evaluator workers, which can produce an *intermittent*
|
|
41
|
+
`FAILED` with otherwise-green thresholds on the first run after granting
|
|
42
|
+
access. The prompt-agent, hosted-agent, and end-to-end tutorials and the
|
|
43
|
+
skill now describe this symptom and tell readers to wait and re-run rather
|
|
44
|
+
than lower thresholds.
|
|
45
|
+
|
|
8
46
|
## [0.3.5] - 2026-06-01
|
|
9
47
|
|
|
10
48
|
### Changed
|
|
@@ -286,7 +286,7 @@ for creating agents, tools, tracing, evaluation, and red-team scans:
|
|
|
286
286
|
https://github.com/Azure-Samples/microsoft-foundry-e2e-agent-observability-workshop/tree/2026-04-aie-europe
|
|
287
287
|
```
|
|
288
288
|
|
|
289
|
-
### Grant
|
|
289
|
+
### Grant data-plane access to your identity and Foundry managed identities
|
|
290
290
|
|
|
291
291
|
Both options above (prompt agent and hosted HTTP agent) eventually drive
|
|
292
292
|
an `agentops eval run` that calls chat-completions on the AI Services
|
|
@@ -300,19 +300,43 @@ what causes the eval to fail later with `PermissionDenied` on
|
|
|
300
300
|
`Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/
|
|
301
301
|
completions/action`.
|
|
302
302
|
|
|
303
|
-
Run
|
|
304
|
-
you will evaluate against.
|
|
305
|
-
|
|
306
|
-
|
|
303
|
+
Run these assignments once per resource group that hosts a Foundry account
|
|
304
|
+
you will evaluate against. Cloud evaluations run server-side and some agent
|
|
305
|
+
or grader calls may authenticate as Foundry/Azure AI managed identities, not
|
|
306
|
+
only as your signed-in user. Assigning the role only to your user can still
|
|
307
|
+
leave graders failing with `AuthenticationError`.
|
|
307
308
|
|
|
308
309
|
```powershell
|
|
310
|
+
$subscriptionId = az account show --query id -o tsv
|
|
311
|
+
$resourceGroup = "<resource-group>"
|
|
312
|
+
$scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
|
|
313
|
+
$userObjectId = az ad signed-in-user show --query id -o tsv
|
|
314
|
+
|
|
309
315
|
az role assignment create `
|
|
310
|
-
--assignee
|
|
316
|
+
--assignee $userObjectId `
|
|
311
317
|
--role "Cognitive Services OpenAI User" `
|
|
312
|
-
--scope
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
318
|
+
--scope $scope
|
|
319
|
+
|
|
320
|
+
az resource list -g $resourceGroup `
|
|
321
|
+
--query "[?identity.principalId!=null].identity.principalId" -o tsv |
|
|
322
|
+
ForEach-Object {
|
|
323
|
+
az role assignment create `
|
|
324
|
+
--assignee-object-id $_ `
|
|
325
|
+
--assignee-principal-type ServicePrincipal `
|
|
326
|
+
--role "Cognitive Services OpenAI User" `
|
|
327
|
+
--scope $scope
|
|
328
|
+
}
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
> **Give the assignment a few minutes to propagate.** Data-plane role
|
|
332
|
+
> assignments on the AI Services account do **not** take effect
|
|
333
|
+
> instantly — propagation to the evaluator workers can take several
|
|
334
|
+
> minutes (occasionally up to ~15). Evaluators authenticate per call, so
|
|
335
|
+
> the **first eval right after granting the role may show intermittent
|
|
336
|
+
> `AuthenticationError` on a subset of graders and report
|
|
337
|
+
> `Threshold status: FAILED` even when every threshold is green**. This
|
|
338
|
+
> is a grader execution failure, not a quality regression — wait a few
|
|
339
|
+
> minutes and re-run the eval.
|
|
316
340
|
|
|
317
341
|
## 2. Create the travel eval dataset
|
|
318
342
|
|
{agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-hosted-agent-quickstart.md
RENAMED
|
@@ -310,7 +310,7 @@ If the deployed endpoint needs a bearer token:
|
|
|
310
310
|
$env:HOSTED_AGENT_TOKEN = "<token>"
|
|
311
311
|
```
|
|
312
312
|
|
|
313
|
-
### Grant
|
|
313
|
+
### Grant data-plane access to your identity and Foundry managed identities
|
|
314
314
|
|
|
315
315
|
The local AI-assisted evaluators that AgentOps runs in step 8 call
|
|
316
316
|
chat-completions on the AI Services account that backs your Foundry
|
|
@@ -322,19 +322,43 @@ but `dataActions: []`. Skipping this once causes the eval to fail with
|
|
|
322
322
|
`PermissionDenied` on `Microsoft.CognitiveServices/accounts/OpenAI/
|
|
323
323
|
deployments/chat/completions/action`.
|
|
324
324
|
|
|
325
|
-
Run
|
|
326
|
-
you will evaluate against
|
|
327
|
-
|
|
328
|
-
|
|
325
|
+
Run these assignments once per resource group hosting a Foundry account
|
|
326
|
+
you will evaluate against. Local AI-assisted evaluators use your identity,
|
|
327
|
+
while Foundry-hosted/server-side eval paths may use Azure AI managed
|
|
328
|
+
identities from the same resource group. Assigning only the user can still
|
|
329
|
+
leave server-side graders failing with `AuthenticationError`.
|
|
329
330
|
|
|
330
331
|
```powershell
|
|
332
|
+
$subscriptionId = az account show --query id -o tsv
|
|
333
|
+
$resourceGroup = "<resource-group>"
|
|
334
|
+
$scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
|
|
335
|
+
$userObjectId = az ad signed-in-user show --query id -o tsv
|
|
336
|
+
|
|
331
337
|
az role assignment create `
|
|
332
|
-
--assignee
|
|
338
|
+
--assignee $userObjectId `
|
|
333
339
|
--role "Cognitive Services OpenAI User" `
|
|
334
|
-
--scope
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
340
|
+
--scope $scope
|
|
341
|
+
|
|
342
|
+
az resource list -g $resourceGroup `
|
|
343
|
+
--query "[?identity.principalId!=null].identity.principalId" -o tsv |
|
|
344
|
+
ForEach-Object {
|
|
345
|
+
az role assignment create `
|
|
346
|
+
--assignee-object-id $_ `
|
|
347
|
+
--assignee-principal-type ServicePrincipal `
|
|
348
|
+
--role "Cognitive Services OpenAI User" `
|
|
349
|
+
--scope $scope
|
|
350
|
+
}
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
> **Give the assignment a few minutes to propagate.** Data-plane role
|
|
354
|
+
> assignments on the AI Services account do **not** take effect
|
|
355
|
+
> instantly — propagation to the local/Foundry evaluator workers can
|
|
356
|
+
> take several minutes (occasionally up to ~15). Evaluators authenticate
|
|
357
|
+
> per call, so the **first eval right after granting the role may show
|
|
358
|
+
> intermittent `AuthenticationError` on a subset of graders and report
|
|
359
|
+
> `Threshold status: FAILED` even when every threshold is green**. This
|
|
360
|
+
> is a grader execution failure, not a quality regression — wait a few
|
|
361
|
+
> minutes and re-run the eval.
|
|
338
362
|
|
|
339
363
|
## 5. Initialize AgentOps interactively
|
|
340
364
|
|
{agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-prompt-agent-quickstart.md
RENAMED
|
@@ -241,7 +241,7 @@ Show me the planned changes and the resulting endpoints before applying.
|
|
|
241
241
|
|
|
242
242
|
If the skill is not available, use Path A.
|
|
243
243
|
|
|
244
|
-
### Grant
|
|
244
|
+
### Grant data-plane access to your identity and Foundry managed identities
|
|
245
245
|
|
|
246
246
|
Creating a project through the portal only assigns you `Foundry User` **at
|
|
247
247
|
the project scope**. That role does not cover the OpenAI data-plane actions
|
|
@@ -257,23 +257,54 @@ Skipping this step is what causes the eval grader to fail later with::
|
|
|
257
257
|
data action `Microsoft.CognitiveServices/accounts/OpenAI/deployments/
|
|
258
258
|
chat/completions/action` to perform `POST /openai/deployments/...`
|
|
259
259
|
|
|
260
|
-
Run
|
|
261
|
-
you will evaluate against.
|
|
262
|
-
and
|
|
263
|
-
|
|
260
|
+
Run these assignments once per resource group that hosts a Foundry account
|
|
261
|
+
you will evaluate against. Cloud evaluations run server-side: the agent call
|
|
262
|
+
and graders may authenticate as Foundry/Azure AI managed identities, not only
|
|
263
|
+
as your signed-in user. Assigning the role only to your user can still leave
|
|
264
|
+
some graders failing with `AuthenticationError`.
|
|
264
265
|
|
|
265
266
|
```powershell
|
|
267
|
+
$subscriptionId = az account show --query id -o tsv
|
|
268
|
+
$resourceGroup = "<resource-group>"
|
|
269
|
+
$scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
|
|
270
|
+
$userObjectId = az ad signed-in-user show --query id -o tsv
|
|
271
|
+
|
|
272
|
+
# User running local commands / creating cloud evals.
|
|
266
273
|
az role assignment create `
|
|
267
|
-
--assignee
|
|
274
|
+
--assignee $userObjectId `
|
|
268
275
|
--role "Cognitive Services OpenAI User" `
|
|
269
|
-
--scope
|
|
276
|
+
--scope $scope
|
|
277
|
+
|
|
278
|
+
# Foundry/Azure AI managed identities used by server-side agent/evaluator calls.
|
|
279
|
+
az resource list -g $resourceGroup `
|
|
280
|
+
--query "[?identity.principalId!=null].identity.principalId" -o tsv |
|
|
281
|
+
ForEach-Object {
|
|
282
|
+
az role assignment create `
|
|
283
|
+
--assignee-object-id $_ `
|
|
284
|
+
--assignee-principal-type ServicePrincipal `
|
|
285
|
+
--role "Cognitive Services OpenAI User" `
|
|
286
|
+
--scope $scope
|
|
287
|
+
}
|
|
270
288
|
```
|
|
271
289
|
|
|
272
290
|
Repeat the command with the `travel-agent-dev` resource group if the dev
|
|
273
|
-
project lives in a different RG.
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
291
|
+
project lives in a different RG.
|
|
292
|
+
|
|
293
|
+
> **Give the assignment a few minutes to propagate.** Data-plane role
|
|
294
|
+
> assignments on the AI Services account do **not** take effect
|
|
295
|
+
> instantly — propagation to the Foundry evaluator workers can take
|
|
296
|
+
> several minutes (occasionally up to ~15). The cloud eval runs each
|
|
297
|
+
> grader as an independent worker that authenticates separately, so the
|
|
298
|
+
> **first run right after granting the role may show intermittent
|
|
299
|
+
> `AuthenticationError` on a subset of graders and report
|
|
300
|
+
> `Threshold status: FAILED` even when every threshold is green** (no
|
|
301
|
+
> single row had all graders succeed). This is a grader execution
|
|
302
|
+
> failure, not a quality regression. Wait a few minutes and re-run
|
|
303
|
+
> `agentops eval run` — once propagation finishes, every grader scores
|
|
304
|
+
> and the gate passes.
|
|
305
|
+
|
|
306
|
+
AgentOps Doctor will detect the missing assignment in a future release,
|
|
307
|
+
but until then this is a manual one-time setup step per new environment.
|
|
277
308
|
|
|
278
309
|
## 4. Seed `travel-agent` in the sandbox project
|
|
279
310
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "agentops-accelerator",
|
|
3
3
|
"displayName": "AgentOps Accelerator — Skills for GitHub Copilot",
|
|
4
4
|
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
|
|
5
|
-
"version": "0.3.
|
|
5
|
+
"version": "0.3.7",
|
|
6
6
|
"publisher": "AgentOpsAccelerator",
|
|
7
7
|
"icon": "icon.png",
|
|
8
8
|
"license": "MIT",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentops-accelerator",
|
|
3
3
|
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.7",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "AgentOps Accelerator",
|
|
7
7
|
"url": "https://github.com/Azure/agentops"
|
|
@@ -41,8 +41,12 @@ PermissionDenied … lacks the required data action
|
|
|
41
41
|
'Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/completions/action'
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-
Run this preflight before Step 1
|
|
45
|
-
|
|
44
|
+
Run this preflight before Step 1. It must grant the role to the signed-in
|
|
45
|
+
user **and** to the Foundry/Azure AI managed identities in the resource
|
|
46
|
+
group. Cloud evaluations run server-side and some graders authenticate as
|
|
47
|
+
those managed identities, so assigning only the user can still produce
|
|
48
|
+
intermittent `AuthenticationError` grader failures. The commands are
|
|
49
|
+
idempotent (`RoleAssignmentExists` means the role was already granted):
|
|
46
50
|
|
|
47
51
|
```bash
|
|
48
52
|
# 1. Resolve the AI Services account from agentops.yaml / .azure/<env>/.env
|
|
@@ -55,11 +59,23 @@ SUB_ID=$(az account show --query id -o tsv)
|
|
|
55
59
|
RG=$(az cognitiveservices account list --subscription "$SUB_ID" --query "[?name=='$ACCOUNT_NAME'].resourceGroup | [0]" -o tsv)
|
|
56
60
|
OBJ_ID=$(az ad signed-in-user show --query id -o tsv)
|
|
57
61
|
|
|
58
|
-
# 3. Grant data-plane access at
|
|
62
|
+
# 3. Grant the user data-plane access at RG scope.
|
|
59
63
|
az role assignment create \
|
|
60
64
|
--assignee "$OBJ_ID" \
|
|
61
65
|
--role "Cognitive Services OpenAI User" \
|
|
62
66
|
--scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
|
|
67
|
+
|
|
68
|
+
# 4. Grant the same data-plane role to Foundry/Azure AI managed identities.
|
|
69
|
+
az resource list -g "$RG" \
|
|
70
|
+
--query "[?identity.principalId!=null].identity.principalId" -o tsv |
|
|
71
|
+
while read -r PRINCIPAL_ID; do
|
|
72
|
+
[ -z "$PRINCIPAL_ID" ] && continue
|
|
73
|
+
az role assignment create \
|
|
74
|
+
--assignee-object-id "$PRINCIPAL_ID" \
|
|
75
|
+
--assignee-principal-type ServicePrincipal \
|
|
76
|
+
--role "Cognitive Services OpenAI User" \
|
|
77
|
+
--scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
|
|
78
|
+
done
|
|
63
79
|
```
|
|
64
80
|
|
|
65
81
|
PowerShell equivalent: replace `$(...)` with the PowerShell variable
|
|
@@ -73,6 +89,17 @@ Skip this step only if the user explicitly says the role is already
|
|
|
73
89
|
assigned, or if a previous `agentops eval run` succeeded against the
|
|
74
90
|
same Foundry account.
|
|
75
91
|
|
|
92
|
+
**Propagation:** data-plane role assignments do not take effect
|
|
93
|
+
instantly — allow several minutes (occasionally up to ~15) before the
|
|
94
|
+
first eval. The cloud/local graders authenticate per call, so if the
|
|
95
|
+
user runs an eval immediately after this preflight and sees intermittent
|
|
96
|
+
`AuthenticationError` on a subset of graders plus
|
|
97
|
+
`Threshold status: FAILED` while the visible thresholds are green, that
|
|
98
|
+
is propagation lag (a grader **execution** failure), not a quality
|
|
99
|
+
regression. Tell the user to wait a few minutes and re-run
|
|
100
|
+
`agentops eval run`; do not treat it as a failing gate or start changing
|
|
101
|
+
thresholds.
|
|
102
|
+
|
|
76
103
|
## Step 1 - Analyze evaluation setup
|
|
77
104
|
|
|
78
105
|
Run the deterministic local triage first:
|
|
@@ -2055,10 +2055,57 @@ def _run_flat_schema_eval(
|
|
|
2055
2055
|
if result.summary.overall_passed:
|
|
2056
2056
|
typer.echo(f"{_cli_label('Threshold status')}: {style('PASSED', 'bold', 'green')}")
|
|
2057
2057
|
return
|
|
2058
|
+
|
|
2059
|
+
# Distinguish a genuine quality-gate failure from grader *execution*
|
|
2060
|
+
# errors. When evaluator workers error (auth/RBAC/timeout) on a subset of
|
|
2061
|
+
# rows, no row has every grader succeed, so `items_passed_all` is 0 and the
|
|
2062
|
+
# gate reports FAILED even though every threshold that *could* be computed
|
|
2063
|
+
# passed. Surfacing this prevents users from chasing a phantom quality
|
|
2064
|
+
# regression - the most common cause is data-plane RBAC granted moments
|
|
2065
|
+
# earlier that is still propagating to the evaluator workers.
|
|
2066
|
+
errored, total, first_error = _grader_error_summary(result)
|
|
2067
|
+
all_thresholds_passed = (
|
|
2068
|
+
result.summary.thresholds_total > 0
|
|
2069
|
+
and result.summary.thresholds_passed == result.summary.thresholds_total
|
|
2070
|
+
)
|
|
2071
|
+
if errored and all_thresholds_passed:
|
|
2072
|
+
typer.echo(
|
|
2073
|
+
f"{_cli_warn('Warning')}: {errored} of {total} grader execution(s) "
|
|
2074
|
+
"errored, so no dataset row had every grader return a score. This is "
|
|
2075
|
+
"a grader execution failure, not a quality regression - every "
|
|
2076
|
+
"threshold that could be computed passed. The most common cause is "
|
|
2077
|
+
"data-plane RBAC granted recently that is still propagating to the "
|
|
2078
|
+
"evaluator workers; wait a few minutes and re-run `agentops eval run`.",
|
|
2079
|
+
err=True,
|
|
2080
|
+
)
|
|
2081
|
+
if first_error:
|
|
2082
|
+
typer.echo(f"{_cli_warn('Warning')}: first grader error: {first_error}", err=True)
|
|
2083
|
+
|
|
2058
2084
|
typer.echo(f"{_cli_label('Threshold status')}: {style('FAILED', 'bold', 'red')}")
|
|
2059
2085
|
raise typer.Exit(code=exit_code_from(result))
|
|
2060
2086
|
|
|
2061
2087
|
|
|
2088
|
+
def _grader_error_summary(result) -> tuple[int, int, Optional[str]]:
|
|
2089
|
+
"""Return ``(errored_metric_count, total_metric_count, first_error)``.
|
|
2090
|
+
|
|
2091
|
+
Walks every per-row metric in the run so the CLI can tell a grader
|
|
2092
|
+
*execution* failure (auth/RBAC/timeout) apart from a quality-gate failure.
|
|
2093
|
+
The first non-empty error string is lifted out as the actionable cause.
|
|
2094
|
+
"""
|
|
2095
|
+
errored = 0
|
|
2096
|
+
total = 0
|
|
2097
|
+
first_error: Optional[str] = None
|
|
2098
|
+
for row in result.rows:
|
|
2099
|
+
for metric in row.metrics:
|
|
2100
|
+
total += 1
|
|
2101
|
+
err = getattr(metric, "error", None)
|
|
2102
|
+
if isinstance(err, str) and err.strip():
|
|
2103
|
+
errored += 1
|
|
2104
|
+
if first_error is None:
|
|
2105
|
+
first_error = err.strip()
|
|
2106
|
+
return errored, total, first_error
|
|
2107
|
+
|
|
2108
|
+
|
|
2062
2109
|
def _default_flat_output_dir(config_path: Path) -> Path:
|
|
2063
2110
|
base = config_path.parent / ".agentops" / "results"
|
|
2064
2111
|
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
|
|
@@ -41,8 +41,12 @@ PermissionDenied … lacks the required data action
|
|
|
41
41
|
'Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/completions/action'
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-
Run this preflight before Step 1
|
|
45
|
-
|
|
44
|
+
Run this preflight before Step 1. It must grant the role to the signed-in
|
|
45
|
+
user **and** to the Foundry/Azure AI managed identities in the resource
|
|
46
|
+
group. Cloud evaluations run server-side and some graders authenticate as
|
|
47
|
+
those managed identities, so assigning only the user can still produce
|
|
48
|
+
intermittent `AuthenticationError` grader failures. The commands are
|
|
49
|
+
idempotent (`RoleAssignmentExists` means the role was already granted):
|
|
46
50
|
|
|
47
51
|
```bash
|
|
48
52
|
# 1. Resolve the AI Services account from agentops.yaml / .azure/<env>/.env
|
|
@@ -55,11 +59,23 @@ SUB_ID=$(az account show --query id -o tsv)
|
|
|
55
59
|
RG=$(az cognitiveservices account list --subscription "$SUB_ID" --query "[?name=='$ACCOUNT_NAME'].resourceGroup | [0]" -o tsv)
|
|
56
60
|
OBJ_ID=$(az ad signed-in-user show --query id -o tsv)
|
|
57
61
|
|
|
58
|
-
# 3. Grant data-plane access at
|
|
62
|
+
# 3. Grant the user data-plane access at RG scope.
|
|
59
63
|
az role assignment create \
|
|
60
64
|
--assignee "$OBJ_ID" \
|
|
61
65
|
--role "Cognitive Services OpenAI User" \
|
|
62
66
|
--scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
|
|
67
|
+
|
|
68
|
+
# 4. Grant the same data-plane role to Foundry/Azure AI managed identities.
|
|
69
|
+
az resource list -g "$RG" \
|
|
70
|
+
--query "[?identity.principalId!=null].identity.principalId" -o tsv |
|
|
71
|
+
while read -r PRINCIPAL_ID; do
|
|
72
|
+
[ -z "$PRINCIPAL_ID" ] && continue
|
|
73
|
+
az role assignment create \
|
|
74
|
+
--assignee-object-id "$PRINCIPAL_ID" \
|
|
75
|
+
--assignee-principal-type ServicePrincipal \
|
|
76
|
+
--role "Cognitive Services OpenAI User" \
|
|
77
|
+
--scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
|
|
78
|
+
done
|
|
63
79
|
```
|
|
64
80
|
|
|
65
81
|
PowerShell equivalent: replace `$(...)` with the PowerShell variable
|
|
@@ -73,6 +89,17 @@ Skip this step only if the user explicitly says the role is already
|
|
|
73
89
|
assigned, or if a previous `agentops eval run` succeeded against the
|
|
74
90
|
same Foundry account.
|
|
75
91
|
|
|
92
|
+
**Propagation:** data-plane role assignments do not take effect
|
|
93
|
+
instantly — allow several minutes (occasionally up to ~15) before the
|
|
94
|
+
first eval. The cloud/local graders authenticate per call, so if the
|
|
95
|
+
user runs an eval immediately after this preflight and sees intermittent
|
|
96
|
+
`AuthenticationError` on a subset of graders plus
|
|
97
|
+
`Threshold status: FAILED` while the visible thresholds are green, that
|
|
98
|
+
is propagation lag (a grader **execution** failure), not a quality
|
|
99
|
+
regression. Tell the user to wait a few minutes and re-run
|
|
100
|
+
`agentops eval run`; do not treat it as a failing gate or start changing
|
|
101
|
+
thresholds.
|
|
102
|
+
|
|
76
103
|
## Step 1 - Analyze evaluation setup
|
|
77
104
|
|
|
78
105
|
Run the deterministic local triage first:
|
|
@@ -268,6 +268,7 @@ tests/unit/test_doctor_cli_explain.py
|
|
|
268
268
|
tests/unit/test_dotenv_loader.py
|
|
269
269
|
tests/unit/test_e2e_render.py
|
|
270
270
|
tests/unit/test_eval_analysis.py
|
|
271
|
+
tests/unit/test_eval_run_grader_errors.py
|
|
271
272
|
tests/unit/test_evaluators.py
|
|
272
273
|
tests/unit/test_foundry_discovery.py
|
|
273
274
|
tests/unit/test_init_command.py
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""CLI behaviour when graders *execute* but a subset errors out.
|
|
2
|
+
|
|
3
|
+
A grader execution error (auth/RBAC/timeout) is not a quality regression, but
|
|
4
|
+
because ``items_passed_all`` requires every grader on a row to succeed, a single
|
|
5
|
+
errored grader flips ``overall_passed`` to ``False`` and the run reports
|
|
6
|
+
``Threshold status: FAILED`` even though every computable threshold passed.
|
|
7
|
+
|
|
8
|
+
The CLI must surface that distinction loudly so users (the most common trigger
|
|
9
|
+
is data-plane RBAC that is still propagating) do not chase a phantom quality
|
|
10
|
+
failure or start lowering thresholds.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from typer.testing import CliRunner
|
|
19
|
+
|
|
20
|
+
from agentops.cli.app import _grader_error_summary, app
|
|
21
|
+
from agentops.core.results import (
|
|
22
|
+
RowMetric,
|
|
23
|
+
RowResult,
|
|
24
|
+
RunResult,
|
|
25
|
+
RunSummary,
|
|
26
|
+
TargetInfo,
|
|
27
|
+
ThresholdEvaluation,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
runner = CliRunner()
|
|
31
|
+
|
|
32
|
+
_AUTH_ERROR = (
|
|
33
|
+
"FAILED_EXECUTION: (UserError) OpenAI API hits AuthenticationError: "
|
|
34
|
+
"Principal does not have access to API/Operation."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _result_with_partial_grader_errors() -> RunResult:
|
|
39
|
+
"""One row where coherence scored but similarity errored on auth."""
|
|
40
|
+
row = RowResult(
|
|
41
|
+
row_index=0,
|
|
42
|
+
input="plan a trip",
|
|
43
|
+
expected="an itinerary",
|
|
44
|
+
response="here is an itinerary",
|
|
45
|
+
metrics=[
|
|
46
|
+
RowMetric(name="coherence", value=5.0),
|
|
47
|
+
RowMetric(name="similarity", value=None, error=_AUTH_ERROR),
|
|
48
|
+
],
|
|
49
|
+
)
|
|
50
|
+
summary = RunSummary(
|
|
51
|
+
items_total=1,
|
|
52
|
+
items_passed_all=0, # the errored grader means no row passed all
|
|
53
|
+
items_pass_rate=0.0,
|
|
54
|
+
thresholds_total=1,
|
|
55
|
+
thresholds_passed=1, # every computable threshold passed
|
|
56
|
+
threshold_pass_rate=1.0,
|
|
57
|
+
overall_passed=False,
|
|
58
|
+
)
|
|
59
|
+
return RunResult(
|
|
60
|
+
started_at="2026-06-01T00:00:00+00:00",
|
|
61
|
+
finished_at="2026-06-01T00:01:00+00:00",
|
|
62
|
+
duration_seconds=60.0,
|
|
63
|
+
target=TargetInfo(kind="foundry_prompt", raw="travel-agent:2"),
|
|
64
|
+
dataset_path="dataset.jsonl",
|
|
65
|
+
evaluators=["CoherenceEvaluator", "SimilarityEvaluator"],
|
|
66
|
+
rows=[row],
|
|
67
|
+
aggregate_metrics={"coherence": 5.0},
|
|
68
|
+
thresholds=[
|
|
69
|
+
ThresholdEvaluation(
|
|
70
|
+
metric="coherence",
|
|
71
|
+
criteria=">=",
|
|
72
|
+
expected=">=3",
|
|
73
|
+
actual="5",
|
|
74
|
+
passed=True,
|
|
75
|
+
)
|
|
76
|
+
],
|
|
77
|
+
summary=summary,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_grader_error_summary_counts_and_lifts_first_error() -> None:
|
|
82
|
+
errored, total, first_error = _grader_error_summary(
|
|
83
|
+
_result_with_partial_grader_errors()
|
|
84
|
+
)
|
|
85
|
+
assert (errored, total) == (1, 2)
|
|
86
|
+
assert first_error is not None
|
|
87
|
+
assert "AuthenticationError" in first_error
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _write_minimal_config(tmp_path: Path) -> Path:
|
|
91
|
+
dataset = tmp_path / "dataset.jsonl"
|
|
92
|
+
dataset.write_text(json.dumps({"input": "hi", "expected": "hi"}), encoding="utf-8")
|
|
93
|
+
config = tmp_path / "agentops.yaml"
|
|
94
|
+
config.write_text(
|
|
95
|
+
json.dumps(
|
|
96
|
+
{"version": 1, "agent": "model:gpt-4o", "dataset": str(dataset)}
|
|
97
|
+
),
|
|
98
|
+
encoding="utf-8",
|
|
99
|
+
)
|
|
100
|
+
return config
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_eval_run_warns_on_partial_grader_errors(tmp_path, monkeypatch) -> None:
|
|
104
|
+
config = _write_minimal_config(tmp_path)
|
|
105
|
+
output = tmp_path / "out"
|
|
106
|
+
output.mkdir()
|
|
107
|
+
|
|
108
|
+
crafted = _result_with_partial_grader_errors()
|
|
109
|
+
import agentops.pipeline.orchestrator as orch
|
|
110
|
+
|
|
111
|
+
monkeypatch.setattr(orch, "run_evaluation", lambda *a, **k: crafted)
|
|
112
|
+
|
|
113
|
+
result = runner.invoke(
|
|
114
|
+
app,
|
|
115
|
+
["eval", "run", "--config", str(config), "--output", str(output)],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# A grader-execution failure keeps the gate-failed exit code...
|
|
119
|
+
assert result.exit_code == 2, result.output
|
|
120
|
+
# ...but the user is told it is an execution error, not a quality failure.
|
|
121
|
+
assert "grader execution(s) errored" in result.output
|
|
122
|
+
assert "propagating" in result.output
|
|
123
|
+
assert "AuthenticationError" in result.output
|
|
124
|
+
assert "FAILED" in result.output
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_eval_run_no_warning_when_no_grader_errors(tmp_path, monkeypatch) -> None:
|
|
128
|
+
config = _write_minimal_config(tmp_path)
|
|
129
|
+
output = tmp_path / "out"
|
|
130
|
+
output.mkdir()
|
|
131
|
+
|
|
132
|
+
clean = _result_with_partial_grader_errors()
|
|
133
|
+
# Drop the errored grader so the row is clean and the gate genuinely passes.
|
|
134
|
+
clean.rows[0].metrics = [RowMetric(name="coherence", value=5.0)]
|
|
135
|
+
clean.summary.items_passed_all = 1
|
|
136
|
+
clean.summary.items_pass_rate = 1.0
|
|
137
|
+
clean.summary.overall_passed = True
|
|
138
|
+
|
|
139
|
+
import agentops.pipeline.orchestrator as orch
|
|
140
|
+
|
|
141
|
+
monkeypatch.setattr(orch, "run_evaluation", lambda *a, **k: clean)
|
|
142
|
+
|
|
143
|
+
result = runner.invoke(
|
|
144
|
+
app,
|
|
145
|
+
["eval", "run", "--config", str(config), "--output", str(output)],
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
assert result.exit_code == 0, result.output
|
|
149
|
+
assert "PASSED" in result.output
|
|
150
|
+
assert "grader execution(s) errored" not in result.output
|
|
File without changes
|
|
File without changes
|
{agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/code-quality-py.instructions.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/skills/release-management/SKILL.md
RENAMED
|
File without changes
|