agentops-accelerator 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.claude-plugin/marketplace.json +1 -1
  2. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/plugin/marketplace.json +1 -1
  3. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/CHANGELOG.md +38 -0
  4. {agentops_accelerator-0.3.5/src/agentops_accelerator.egg-info → agentops_accelerator-0.3.7}/PKG-INFO +1 -1
  5. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-end-to-end.md +34 -10
  6. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-hosted-agent-quickstart.md +34 -10
  7. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/tutorial-prompt-agent-quickstart.md +42 -11
  8. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/package.json +1 -1
  9. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/plugin.json +1 -1
  10. {agentops_accelerator-0.3.5/src/agentops/templates → agentops_accelerator-0.3.7/plugins/agentops}/skills/agentops-eval/SKILL.md +30 -3
  11. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/cli/app.py +47 -0
  12. {agentops_accelerator-0.3.5/plugins/agentops → agentops_accelerator-0.3.7/src/agentops/templates}/skills/agentops-eval/SKILL.md +30 -3
  13. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7/src/agentops_accelerator.egg-info}/PKG-INFO +1 -1
  14. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/SOURCES.txt +1 -0
  15. agentops_accelerator-0.3.7/tests/unit/test_eval_run_grader_errors.py +150 -0
  16. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.gitattributes +0 -0
  17. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/actions/azure-oidc-login/action.yml +0 -0
  18. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/code-quality-py.instructions.md +0 -0
  19. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/copilot-instructions.md +0 -0
  20. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/dependabot.yml +0 -0
  21. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/extensions/agentops-skills/extension.mjs +0 -0
  22. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/skills/release-management/SKILL.md +0 -0
  23. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/_build.yml +0 -0
  24. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/agentops-watchdog.yml +0 -0
  25. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/ci.yml +0 -0
  26. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/cut-release.yml +0 -0
  27. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/e2e.yml +0 -0
  28. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/release.yml +0 -0
  29. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.github/workflows/staging.yml +0 -0
  30. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.gitignore +0 -0
  31. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.pre-commit-config.yaml +0 -0
  32. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/launch.json +0 -0
  33. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/settings.json +0 -0
  34. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/.vscode/tasks.json +0 -0
  35. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/AGENTS.md +0 -0
  36. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/CONTRIBUTING.md +0 -0
  37. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/LICENSE +0 -0
  38. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/README.md +0 -0
  39. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/SECURITY.md +0 -0
  40. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/bundles.md +0 -0
  41. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/ci-github-actions.md +0 -0
  42. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/concepts.md +0 -0
  43. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/doctor-checks.md +0 -0
  44. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/doctor-explained.md +0 -0
  45. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/e2e-live-architecture.md +0 -0
  46. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/e2e-live-setup.md +0 -0
  47. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/foundry-evaluation-sdk-built-in-evaluators.md +0 -0
  48. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/how-it-works.md +0 -0
  49. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/media/agentops-diagrams.vsdx +0 -0
  50. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/media/foundry-control-plane.png +0 -0
  51. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/docs/release-process.md +0 -0
  52. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/README.md +0 -0
  53. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/agentops.yaml +0 -0
  54. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/examples/flat-quickstart/dataset.jsonl +0 -0
  55. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/icon.png +0 -0
  56. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/Dockerfile +0 -0
  57. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/app.py +0 -0
  58. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/agent-app/requirements.txt +0 -0
  59. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/bootstrap.bicep +0 -0
  60. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/bootstrap.parameters.example.json +0 -0
  61. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/infra/e2e/perrun.bicep +0 -0
  62. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/launch.json +0 -0
  63. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/media/foundry.svg +0 -0
  64. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/media/quickstart.gif +0 -0
  65. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/.vscodeignore +0 -0
  66. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/LICENSE +0 -0
  67. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/README.md +0 -0
  68. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-agent/SKILL.md +0 -0
  69. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-config/SKILL.md +0 -0
  70. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-dataset/SKILL.md +0 -0
  71. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-report/SKILL.md +0 -0
  72. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/plugins/agentops/skills/agentops-workflow/SKILL.md +0 -0
  73. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/pyproject.toml +0 -0
  74. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/create_support_agent.py +0 -0
  75. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/cut-release.ps1 +0 -0
  76. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/cut-release.sh +0 -0
  77. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_aggregate_summary.py +0 -0
  78. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/basic.jsonl +0 -0
  79. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/rag.jsonl +0 -0
  80. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_data/tools.jsonl +0 -0
  81. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_demo.py +0 -0
  82. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_hosted_agent.py +0 -0
  83. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_make_transcript.py +0 -0
  84. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/e2e_render_config.py +0 -0
  85. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/release.ps1 +0 -0
  86. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/release.sh +0 -0
  87. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/setup-e2e-new-tenant.ps1 +0 -0
  88. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/staging.ps1 +0 -0
  89. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/staging.sh +0 -0
  90. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/sync-skills.ps1 +0 -0
  91. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/scripts/sync-skills.sh +0 -0
  92. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/setup.cfg +0 -0
  93. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/__init__.py +0 -0
  94. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/__main__.py +0 -0
  95. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/__init__.py +0 -0
  96. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/_legacy_ids.py +0 -0
  97. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/analyzer.py +0 -0
  98. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/__init__.py +0 -0
  99. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/catalog.py +0 -0
  100. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/errors.py +0 -0
  101. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/foundry_config.py +0 -0
  102. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/latency.py +0 -0
  103. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/opex.py +0 -0
  104. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/opex_workspace.py +0 -0
  105. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture.py +0 -0
  106. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/__init__.py +0 -0
  107. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/content_filter.py +0 -0
  108. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/diagnostics.py +0 -0
  109. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/local_auth.py +0 -0
  110. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/managed_identity.py +0 -0
  111. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/posture_rules/network.py +0 -0
  112. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/regression.py +0 -0
  113. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/release_readiness.py +0 -0
  114. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/safety.py +0 -0
  115. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/checks/spec_conformance.py +0 -0
  116. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/cockpit.py +0 -0
  117. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/config.py +0 -0
  118. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/findings.py +0 -0
  119. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/history.py +0 -0
  120. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/knowledge/__init__.py +0 -0
  121. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/knowledge/waf-checklist.csv +0 -0
  122. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/__init__.py +0 -0
  123. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_base.py +0 -0
  124. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_bundle_rule.py +0 -0
  125. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_client.py +0 -0
  126. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_dataset_rules.py +0 -0
  127. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_engine.py +0 -0
  128. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_prompt_rules.py +0 -0
  129. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/llm_assist/_spec_rules.py +0 -0
  130. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/production_telemetry.py +0 -0
  131. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/report.py +0 -0
  132. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/__init__.py +0 -0
  133. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/app.py +0 -0
  134. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/auth.py +0 -0
  135. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/chat.py +0 -0
  136. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/server/protocol.py +0 -0
  137. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/__init__.py +0 -0
  138. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/azure_monitor.py +0 -0
  139. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/azure_resources.py +0 -0
  140. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/foundry_control.py +0 -0
  141. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/results_history.py +0 -0
  142. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/__init__.py +0 -0
  143. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/_base.py +0 -0
  144. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/agents_md.py +0 -0
  145. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/sources/spec_detectors/spec_kit.py +0 -0
  146. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/agent/time_range.py +0 -0
  147. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/cli/__init__.py +0 -0
  148. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/__init__.py +0 -0
  149. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/agentops_config.py +0 -0
  150. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/config_loader.py +0 -0
  151. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/evaluators.py +0 -0
  152. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/release_evidence.py +0 -0
  153. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/core/results.py +0 -0
  154. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/mcp/__init__.py +0 -0
  155. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/mcp/server.py +0 -0
  156. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/__init__.py +0 -0
  157. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/cloud_results.py +0 -0
  158. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/cloud_runner.py +0 -0
  159. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/comparison.py +0 -0
  160. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/diagnostics.py +0 -0
  161. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/invocations.py +0 -0
  162. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/official_eval.py +0 -0
  163. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/orchestrator.py +0 -0
  164. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/prompt_deploy.py +0 -0
  165. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/publisher.py +0 -0
  166. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/reporter.py +0 -0
  167. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/runtime.py +0 -0
  168. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/pipeline/thresholds.py +0 -0
  169. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/__init__.py +0 -0
  170. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/cicd.py +0 -0
  171. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/eval_analysis.py +0 -0
  172. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/evidence_pack.py +0 -0
  173. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/initializer.py +0 -0
  174. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/preflight.py +0 -0
  175. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/setup_wizard.py +0 -0
  176. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/skills.py +0 -0
  177. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/trace_promotion.py +0 -0
  178. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/services/workflow_analysis.py +0 -0
  179. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/.gitignore +0 -0
  180. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/__init__.py +0 -0
  181. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/Dockerfile +0 -0
  182. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/README.md +0 -0
  183. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent-server/main.bicep +0 -0
  184. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agent.yaml +0 -0
  185. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/agentops.yaml +0 -0
  186. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/foundry.svg +0 -0
  187. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/icon.png +0 -0
  188. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +0 -0
  189. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +0 -0
  190. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +0 -0
  191. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +0 -0
  192. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +0 -0
  193. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +0 -0
  194. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +0 -0
  195. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +0 -0
  196. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml +0 -0
  197. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +0 -0
  198. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/project.gitignore +0 -0
  199. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/sample-traces.jsonl +0 -0
  200. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-agent/SKILL.md +0 -0
  201. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-config/SKILL.md +0 -0
  202. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-dataset/SKILL.md +0 -0
  203. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-report/SKILL.md +0 -0
  204. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/skills/agentops-workflow/SKILL.md +0 -0
  205. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/smoke.jsonl +0 -0
  206. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/waf-checklist.README.md +0 -0
  207. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/waf-checklist.csv +0 -0
  208. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-dev-azd.yml +0 -0
  209. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-dev.yml +0 -0
  210. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prod-azd.yml +0 -0
  211. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prod.yml +0 -0
  212. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-prompt-agent.yml +0 -0
  213. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-qa-azd.yml +0 -0
  214. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-deploy-qa.yml +0 -0
  215. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml +0 -0
  216. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-pr.yml +0 -0
  217. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/templates/workflows/agentops-watchdog.yml +0 -0
  218. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/__init__.py +0 -0
  219. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/azd_env.py +0 -0
  220. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/azure_endpoints.py +0 -0
  221. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/colors.py +0 -0
  222. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/dotenv_loader.py +0 -0
  223. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/foundry_discovery.py +0 -0
  224. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/logging.py +0 -0
  225. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/telemetry.py +0 -0
  226. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops/utils/yaml.py +0 -0
  227. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/dependency_links.txt +0 -0
  228. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/entry_points.txt +0 -0
  229. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/requires.txt +0 -0
  230. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/src/agentops_accelerator.egg-info/top_level.txt +0 -0
  231. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/TESTING.md +0 -0
  232. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/fixtures/fake_adapter.py +0 -0
  233. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/fixtures/fake_eval_runner.py +0 -0
  234. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/.gitkeep +0 -0
  235. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/test_cli_flat_schema.py +0 -0
  236. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/integration/test_pipeline_smoke.py +0 -0
  237. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/.gitkeep +0 -0
  238. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_analyzer.py +0 -0
  239. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_categories.py +0 -0
  240. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_errors.py +0 -0
  241. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_foundry_config.py +0 -0
  242. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_opex.py +0 -0
  243. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_opex_workspace.py +0 -0
  244. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_regression.py +0 -0
  245. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_safety.py +0 -0
  246. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_checks_spec_conformance.py +0 -0
  247. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_cli.py +0 -0
  248. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_config.py +0 -0
  249. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_findings.py +0 -0
  250. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_history.py +0 -0
  251. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_opex_workspace_check.py +0 -0
  252. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_posture_rules.py +0 -0
  253. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_results_history.py +0 -0
  254. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agent_server.py +0 -0
  255. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_agentops_config.py +0 -0
  256. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azd_env.py +0 -0
  257. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azure_endpoints.py +0 -0
  258. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_azure_resources_discovery.py +0 -0
  259. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cicd.py +0 -0
  260. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_cockpit_connection_summary.py +0 -0
  261. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_cockpit_port_conflict.py +0 -0
  262. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_commands.py +0 -0
  263. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cli_explain.py +0 -0
  264. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cloud_results.py +0 -0
  265. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cloud_runner.py +0 -0
  266. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_cockpit.py +0 -0
  267. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_diagnostics.py +0 -0
  268. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_doctor_catalog.py +0 -0
  269. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_doctor_cli_explain.py +0 -0
  270. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_dotenv_loader.py +0 -0
  271. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_e2e_render.py +0 -0
  272. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_eval_analysis.py +0 -0
  273. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_evaluators.py +0 -0
  274. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_foundry_discovery.py +0 -0
  275. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_init_command.py +0 -0
  276. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_initializer.py +0 -0
  277. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_invocations.py +0 -0
  278. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_knowledge_loader.py +0 -0
  279. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_llm_assist.py +0 -0
  280. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_llm_assist_spec_rules.py +0 -0
  281. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_mcp_server.py +0 -0
  282. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_official_eval.py +0 -0
  283. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_pipeline_publisher.py +0 -0
  284. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_pipeline_reporter.py +0 -0
  285. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_preflight.py +0 -0
  286. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_production_telemetry.py +0 -0
  287. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_prompt_deploy.py +0 -0
  288. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_release_evidence.py +0 -0
  289. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_runtime_conversation.py +0 -0
  290. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_runtime_model_config.py +0 -0
  291. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_setup_wizard.py +0 -0
  292. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_skills.py +0 -0
  293. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_skills_sync.py +0 -0
  294. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_telemetry.py +0 -0
  295. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_time_range.py +0 -0
  296. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_trace_promotion.py +0 -0
  297. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tests/unit/test_workflow_analysis.py +0 -0
  298. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/tombstones/vscode/CDN_DEPRECATION_REQUEST.md +0 -0
  299. {agentops_accelerator-0.3.5 → agentops_accelerator-0.3.7}/uv.lock +0 -0
@@ -13,7 +13,7 @@
13
13
  "name": "agentops-accelerator",
14
14
  "source": "../../plugins/agentops",
15
15
  "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
16
- "version": "0.3.5",
16
+ "version": "0.3.7",
17
17
  "keywords": [
18
18
  "agentops",
19
19
  "evaluation",
@@ -13,7 +13,7 @@
13
13
  "name": "agentops-accelerator",
14
14
  "source": "../../plugins/agentops",
15
15
  "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
16
- "version": "0.3.5",
16
+ "version": "0.3.7",
17
17
  "keywords": [
18
18
  "agentops",
19
19
  "evaluation",
@@ -5,6 +5,44 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
5
5
 
6
6
  ## [Unreleased]
7
7
 
8
+ ## [0.3.7] - 2026-06-01
9
+
10
+ ### Fixed
11
+ - **RBAC preflight now covers Foundry/Azure AI managed identities, not only
12
+ the signed-in user.** Cloud evaluations run server-side and some agent or
13
+ grader calls authenticate as the managed identities on the backing AI
14
+ Services account and child Foundry project. Granting `Cognitive Services
15
+ OpenAI User` only to the user still allowed intermittent grader
16
+ `AuthenticationError` failures and the v0.3.6 execution warning. The
17
+ prompt-agent, hosted-agent, and end-to-end tutorials plus the
18
+ `agentops-eval` skill now assign the same data-plane role to every managed
19
+ identity in the Foundry resource group, preventing the warning/failure path
20
+ before `agentops eval run`.
21
+
22
+ ## [0.3.6] - 2026-06-01
23
+
24
+ ### Changed
25
+ - **`agentops eval run` now distinguishes a grader *execution* failure from a
26
+ quality-gate failure.** When evaluator workers error out on a subset of rows
27
+ (auth/RBAC/timeout), no row has every grader return a score, so
28
+ `items_passed_all` is `0` and the run reports `Threshold status: FAILED` even
29
+ though every threshold that *could* be computed passed. The CLI now detects
30
+ this case (errored graders combined with all thresholds passing) and prints a
31
+ `Warning` explaining that this is an execution error, not a quality
32
+ regression, names the most common cause (data-plane RBAC granted moments
33
+ earlier that is still propagating to the evaluator workers), surfaces the
34
+ first underlying grader error, and advises waiting a few minutes before
35
+ re-running. The exit-code contract is unchanged. Added the
36
+ `_grader_error_summary` helper plus focused unit tests.
37
+ - **Corrected the RBAC propagation guidance in the tutorials and the
38
+ `agentops-eval` skill.** Data-plane role assignments on Cognitive Services
39
+ accounts can take several minutes (not 30-120 seconds) to reach the
40
+ independent, per-row evaluator workers, which can produce an *intermittent*
41
+ `FAILED` with otherwise-green thresholds on the first run after granting
42
+ access. The prompt-agent, hosted-agent, and end-to-end tutorials and the
43
+ skill now describe this symptom and tell readers to wait and re-run rather
44
+ than lower thresholds.
45
+
8
46
  ## [0.3.5] - 2026-06-01
9
47
 
10
48
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentops-accelerator
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Release readiness gates and evidence for Microsoft Foundry agents
5
5
  License: MIT License
6
6
 
@@ -286,7 +286,7 @@ for creating agents, tools, tracing, evaluation, and red-team scans:
286
286
  https://github.com/Azure-Samples/microsoft-foundry-e2e-agent-observability-workshop/tree/2026-04-aie-europe
287
287
  ```
288
288
 
289
- ### Grant your identity data-plane access to the AI Services account
289
+ ### Grant data-plane access to your identity and Foundry managed identities
290
290
 
291
291
  Both options above (prompt agent and hosted HTTP agent) eventually drive
292
292
  an `agentops eval run` that calls chat-completions on the AI Services
@@ -300,19 +300,43 @@ what causes the eval to fail later with `PermissionDenied` on
300
300
  `Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/
301
301
  completions/action`.
302
302
 
303
- Run the assignment once per resource group that hosts a Foundry account
304
- you will evaluate against. Replace `<your-objectId>`,
305
- `<subscription-id>`, and `<resource-group>` with your own values (use
306
- `az ad signed-in-user show --query id -o tsv` to get the object ID):
303
+ Run these assignments once per resource group that hosts a Foundry account
304
+ you will evaluate against. Cloud evaluations run server-side and some agent
305
+ or grader calls may authenticate as Foundry/Azure AI managed identities, not
306
+ only as your signed-in user. Assigning the role only to your user can still
307
+ leave graders failing with `AuthenticationError`.
307
308
 
308
309
  ```powershell
310
+ $subscriptionId = az account show --query id -o tsv
311
+ $resourceGroup = "<resource-group>"
312
+ $scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
313
+ $userObjectId = az ad signed-in-user show --query id -o tsv
314
+
309
315
  az role assignment create `
310
- --assignee <your-objectId> `
316
+ --assignee $userObjectId `
311
317
  --role "Cognitive Services OpenAI User" `
312
- --scope /subscriptions/<subscription-id>/resourceGroups/<resource-group>
313
- ```
314
-
315
- Propagation usually completes within 30–120 seconds.
318
+ --scope $scope
319
+
320
+ az resource list -g $resourceGroup `
321
+ --query "[?identity.principalId!=null].identity.principalId" -o tsv |
322
+ ForEach-Object {
323
+ az role assignment create `
324
+ --assignee-object-id $_ `
325
+ --assignee-principal-type ServicePrincipal `
326
+ --role "Cognitive Services OpenAI User" `
327
+ --scope $scope
328
+ }
329
+ ```
330
+
331
+ > **Give the assignment a few minutes to propagate.** Data-plane role
332
+ > assignments on the AI Services account do **not** take effect
333
+ > instantly — propagation to the evaluator workers can take several
334
+ > minutes (occasionally up to ~15). Evaluators authenticate per call, so
335
+ > the **first eval right after granting the role may show intermittent
336
+ > `AuthenticationError` on a subset of graders and report
337
+ > `Threshold status: FAILED` even when every threshold is green**. This
338
+ > is a grader execution failure, not a quality regression — wait a few
339
+ > minutes and re-run the eval.
316
340
 
317
341
  ## 2. Create the travel eval dataset
318
342
 
@@ -310,7 +310,7 @@ If the deployed endpoint needs a bearer token:
310
310
  $env:HOSTED_AGENT_TOKEN = "<token>"
311
311
  ```
312
312
 
313
- ### Grant your identity data-plane access to the AI Services account
313
+ ### Grant data-plane access to your identity and Foundry managed identities
314
314
 
315
315
  The local AI-assisted evaluators that AgentOps runs in step 8 call
316
316
  chat-completions on the AI Services account that backs your Foundry
@@ -322,19 +322,43 @@ but `dataActions: []`. Skipping this once causes the eval to fail with
322
322
  `PermissionDenied` on `Microsoft.CognitiveServices/accounts/OpenAI/
323
323
  deployments/chat/completions/action`.
324
324
 
325
- Run the assignment once per resource group hosting a Foundry account
326
- you will evaluate against (replace `<your-objectId>`,
327
- `<subscription-id>`, and `<resource-group>` with your values; get the
328
- object ID with `az ad signed-in-user show --query id -o tsv`):
325
+ Run these assignments once per resource group hosting a Foundry account
326
+ you will evaluate against. Local AI-assisted evaluators use your identity,
327
+ while Foundry-hosted/server-side eval paths may use Azure AI managed
328
+ identities from the same resource group. Assigning only the user can still
329
+ leave server-side graders failing with `AuthenticationError`.
329
330
 
330
331
  ```powershell
332
+ $subscriptionId = az account show --query id -o tsv
333
+ $resourceGroup = "<resource-group>"
334
+ $scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
335
+ $userObjectId = az ad signed-in-user show --query id -o tsv
336
+
331
337
  az role assignment create `
332
- --assignee <your-objectId> `
338
+ --assignee $userObjectId `
333
339
  --role "Cognitive Services OpenAI User" `
334
- --scope /subscriptions/<subscription-id>/resourceGroups/<resource-group>
335
- ```
336
-
337
- Propagation usually completes within 30–120 seconds.
340
+ --scope $scope
341
+
342
+ az resource list -g $resourceGroup `
343
+ --query "[?identity.principalId!=null].identity.principalId" -o tsv |
344
+ ForEach-Object {
345
+ az role assignment create `
346
+ --assignee-object-id $_ `
347
+ --assignee-principal-type ServicePrincipal `
348
+ --role "Cognitive Services OpenAI User" `
349
+ --scope $scope
350
+ }
351
+ ```
352
+
353
+ > **Give the assignment a few minutes to propagate.** Data-plane role
354
+ > assignments on the AI Services account do **not** take effect
355
+ > instantly — propagation to the local/Foundry evaluator workers can
356
+ > take several minutes (occasionally up to ~15). Evaluators authenticate
357
+ > per call, so the **first eval right after granting the role may show
358
+ > intermittent `AuthenticationError` on a subset of graders and report
359
+ > `Threshold status: FAILED` even when every threshold is green**. This
360
+ > is a grader execution failure, not a quality regression — wait a few
361
+ > minutes and re-run the eval.
338
362
 
339
363
  ## 5. Initialize AgentOps interactively
340
364
 
@@ -241,7 +241,7 @@ Show me the planned changes and the resulting endpoints before applying.
241
241
 
242
242
  If the skill is not available, use Path A.
243
243
 
244
- ### Grant your identity data-plane access to the AI Services account
244
+ ### Grant data-plane access to your identity and Foundry managed identities
245
245
 
246
246
  Creating a project through the portal only assigns you `Foundry User` **at
247
247
  the project scope**. That role does not cover the OpenAI data-plane actions
@@ -257,23 +257,54 @@ Skipping this step is what causes the eval grader to fail later with::
257
257
  data action `Microsoft.CognitiveServices/accounts/OpenAI/deployments/
258
258
  chat/completions/action` to perform `POST /openai/deployments/...`
259
259
 
260
- Run the assignment once per resource group that hosts a Foundry account
261
- you will evaluate against. Replace `<your-objectId>`, `<subscription-id>`,
262
- and `<resource-group>` with your own values (you can get the object ID
263
- with `az ad signed-in-user show --query id -o tsv`):
260
+ Run these assignments once per resource group that hosts a Foundry account
261
+ you will evaluate against. Cloud evaluations run server-side: the agent call
262
+ and graders may authenticate as Foundry/Azure AI managed identities, not only
263
+ as your signed-in user. Assigning the role only to your user can still leave
264
+ some graders failing with `AuthenticationError`.
264
265
 
265
266
  ```powershell
267
+ $subscriptionId = az account show --query id -o tsv
268
+ $resourceGroup = "<resource-group>"
269
+ $scope = "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup"
270
+ $userObjectId = az ad signed-in-user show --query id -o tsv
271
+
272
+ # User running local commands / creating cloud evals.
266
273
  az role assignment create `
267
- --assignee <your-objectId> `
274
+ --assignee $userObjectId `
268
275
  --role "Cognitive Services OpenAI User" `
269
- --scope /subscriptions/<subscription-id>/resourceGroups/<resource-group>
276
+ --scope $scope
277
+
278
+ # Foundry/Azure AI managed identities used by server-side agent/evaluator calls.
279
+ az resource list -g $resourceGroup `
280
+ --query "[?identity.principalId!=null].identity.principalId" -o tsv |
281
+ ForEach-Object {
282
+ az role assignment create `
283
+ --assignee-object-id $_ `
284
+ --assignee-principal-type ServicePrincipal `
285
+ --role "Cognitive Services OpenAI User" `
286
+ --scope $scope
287
+ }
270
288
  ```
271
289
 
272
290
  Repeat the command with the `travel-agent-dev` resource group if the dev
273
- project lives in a different RG. The assignment usually propagates within
274
- 30–120 seconds. AgentOps Doctor will detect the missing assignment in a
275
- future release, but until then this is a manual one-time setup step per
276
- new environment.
291
+ project lives in a different RG.
292
+
293
+ > **Give the assignment a few minutes to propagate.** Data-plane role
294
+ > assignments on the AI Services account do **not** take effect
295
+ > instantly — propagation to the Foundry evaluator workers can take
296
+ > several minutes (occasionally up to ~15). The cloud eval runs each
297
+ > grader as an independent worker that authenticates separately, so the
298
+ > **first run right after granting the role may show intermittent
299
+ > `AuthenticationError` on a subset of graders and report
300
+ > `Threshold status: FAILED` even when every threshold is green** (no
301
+ > single row had all graders succeed). This is a grader execution
302
+ > failure, not a quality regression. Wait a few minutes and re-run
303
+ > `agentops eval run` — once propagation finishes, every grader scores
304
+ > and the gate passes.
305
+
306
+ AgentOps Doctor will detect the missing assignment in a future release,
307
+ but until then this is a manual one-time setup step per new environment.
277
308
 
278
309
  ## 4. Seed `travel-agent` in the sandbox project
279
310
 
@@ -2,7 +2,7 @@
2
2
  "name": "agentops-accelerator",
3
3
  "displayName": "AgentOps Accelerator — Skills for GitHub Copilot",
4
4
  "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
5
- "version": "0.3.5",
5
+ "version": "0.3.7",
6
6
  "publisher": "AgentOpsAccelerator",
7
7
  "icon": "icon.png",
8
8
  "license": "MIT",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agentops-accelerator",
3
3
  "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
4
- "version": "0.3.5",
4
+ "version": "0.3.7",
5
5
  "author": {
6
6
  "name": "AgentOps Accelerator",
7
7
  "url": "https://github.com/Azure/agentops"
@@ -41,8 +41,12 @@ PermissionDenied … lacks the required data action
41
41
  'Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/completions/action'
42
42
  ```
43
43
 
44
- Run this preflight before Step 1 - it is idempotent (Azure returns
45
- `RoleAssignmentExists` if already granted) and takes ~5 seconds:
44
+ Run this preflight before Step 1. It must grant the role to the signed-in
45
+ user **and** to the Foundry/Azure AI managed identities in the resource
46
+ group. Cloud evaluations run server-side and some graders authenticate as
47
+ those managed identities, so assigning only the user can still produce
48
+ intermittent `AuthenticationError` grader failures. The commands are
49
+ idempotent (`RoleAssignmentExists` means the role was already granted):
46
50
 
47
51
  ```bash
48
52
  # 1. Resolve the AI Services account from agentops.yaml / .azure/<env>/.env
@@ -55,11 +59,23 @@ SUB_ID=$(az account show --query id -o tsv)
55
59
  RG=$(az cognitiveservices account list --subscription "$SUB_ID" --query "[?name=='$ACCOUNT_NAME'].resourceGroup | [0]" -o tsv)
56
60
  OBJ_ID=$(az ad signed-in-user show --query id -o tsv)
57
61
 
58
- # 3. Grant data-plane access at the RG scope (covers sandbox + future evals)
62
+ # 3. Grant the user data-plane access at RG scope.
59
63
  az role assignment create \
60
64
  --assignee "$OBJ_ID" \
61
65
  --role "Cognitive Services OpenAI User" \
62
66
  --scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
67
+
68
+ # 4. Grant the same data-plane role to Foundry/Azure AI managed identities.
69
+ az resource list -g "$RG" \
70
+ --query "[?identity.principalId!=null].identity.principalId" -o tsv |
71
+ while read -r PRINCIPAL_ID; do
72
+ [ -z "$PRINCIPAL_ID" ] && continue
73
+ az role assignment create \
74
+ --assignee-object-id "$PRINCIPAL_ID" \
75
+ --assignee-principal-type ServicePrincipal \
76
+ --role "Cognitive Services OpenAI User" \
77
+ --scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
78
+ done
63
79
  ```
64
80
 
65
81
  PowerShell equivalent: replace `$(...)` with the PowerShell variable
@@ -73,6 +89,17 @@ Skip this step only if the user explicitly says the role is already
73
89
  assigned, or if a previous `agentops eval run` succeeded against the
74
90
  same Foundry account.
75
91
 
92
+ **Propagation:** data-plane role assignments do not take effect
93
+ instantly — allow several minutes (occasionally up to ~15) before the
94
+ first eval. The cloud/local graders authenticate per call, so if the
95
+ user runs an eval immediately after this preflight and sees intermittent
96
+ `AuthenticationError` on a subset of graders plus
97
+ `Threshold status: FAILED` while the visible thresholds are green, that
98
+ is propagation lag (a grader **execution** failure), not a quality
99
+ regression. Tell the user to wait a few minutes and re-run
100
+ `agentops eval run`; do not treat it as a failing gate or start changing
101
+ thresholds.
102
+
76
103
  ## Step 1 - Analyze evaluation setup
77
104
 
78
105
  Run the deterministic local triage first:
@@ -2055,10 +2055,57 @@ def _run_flat_schema_eval(
2055
2055
  if result.summary.overall_passed:
2056
2056
  typer.echo(f"{_cli_label('Threshold status')}: {style('PASSED', 'bold', 'green')}")
2057
2057
  return
2058
+
2059
+ # Distinguish a genuine quality-gate failure from grader *execution*
2060
+ # errors. When evaluator workers error (auth/RBAC/timeout) on a subset of
2061
+ # rows, no row has every grader succeed, so `items_passed_all` is 0 and the
2062
+ # gate reports FAILED even though every threshold that *could* be computed
2063
+ # passed. Surfacing this prevents users from chasing a phantom quality
2064
+ # regression - the most common cause is data-plane RBAC granted moments
2065
+ # earlier that is still propagating to the evaluator workers.
2066
+ errored, total, first_error = _grader_error_summary(result)
2067
+ all_thresholds_passed = (
2068
+ result.summary.thresholds_total > 0
2069
+ and result.summary.thresholds_passed == result.summary.thresholds_total
2070
+ )
2071
+ if errored and all_thresholds_passed:
2072
+ typer.echo(
2073
+ f"{_cli_warn('Warning')}: {errored} of {total} grader execution(s) "
2074
+ "errored, so no dataset row had every grader return a score. This is "
2075
+ "a grader execution failure, not a quality regression - every "
2076
+ "threshold that could be computed passed. The most common cause is "
2077
+ "data-plane RBAC granted recently that is still propagating to the "
2078
+ "evaluator workers; wait a few minutes and re-run `agentops eval run`.",
2079
+ err=True,
2080
+ )
2081
+ if first_error:
2082
+ typer.echo(f"{_cli_warn('Warning')}: first grader error: {first_error}", err=True)
2083
+
2058
2084
  typer.echo(f"{_cli_label('Threshold status')}: {style('FAILED', 'bold', 'red')}")
2059
2085
  raise typer.Exit(code=exit_code_from(result))
2060
2086
 
2061
2087
 
2088
+ def _grader_error_summary(result) -> tuple[int, int, Optional[str]]:
2089
+ """Return ``(errored_metric_count, total_metric_count, first_error)``.
2090
+
2091
+ Walks every per-row metric in the run so the CLI can tell a grader
2092
+ *execution* failure (auth/RBAC/timeout) apart from a quality-gate failure.
2093
+ The first non-empty error string is lifted out as the actionable cause.
2094
+ """
2095
+ errored = 0
2096
+ total = 0
2097
+ first_error: Optional[str] = None
2098
+ for row in result.rows:
2099
+ for metric in row.metrics:
2100
+ total += 1
2101
+ err = getattr(metric, "error", None)
2102
+ if isinstance(err, str) and err.strip():
2103
+ errored += 1
2104
+ if first_error is None:
2105
+ first_error = err.strip()
2106
+ return errored, total, first_error
2107
+
2108
+
2062
2109
  def _default_flat_output_dir(config_path: Path) -> Path:
2063
2110
  base = config_path.parent / ".agentops" / "results"
2064
2111
  timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
@@ -41,8 +41,12 @@ PermissionDenied … lacks the required data action
41
41
  'Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/completions/action'
42
42
  ```
43
43
 
44
- Run this preflight before Step 1 - it is idempotent (Azure returns
45
- `RoleAssignmentExists` if already granted) and takes ~5 seconds:
44
+ Run this preflight before Step 1. It must grant the role to the signed-in
45
+ user **and** to the Foundry/Azure AI managed identities in the resource
46
+ group. Cloud evaluations run server-side and some graders authenticate as
47
+ those managed identities, so assigning only the user can still produce
48
+ intermittent `AuthenticationError` grader failures. The commands are
49
+ idempotent (`RoleAssignmentExists` means the role was already granted):
46
50
 
47
51
  ```bash
48
52
  # 1. Resolve the AI Services account from agentops.yaml / .azure/<env>/.env
@@ -55,11 +59,23 @@ SUB_ID=$(az account show --query id -o tsv)
55
59
  RG=$(az cognitiveservices account list --subscription "$SUB_ID" --query "[?name=='$ACCOUNT_NAME'].resourceGroup | [0]" -o tsv)
56
60
  OBJ_ID=$(az ad signed-in-user show --query id -o tsv)
57
61
 
58
- # 3. Grant data-plane access at the RG scope (covers sandbox + future evals)
62
+ # 3. Grant the user data-plane access at RG scope.
59
63
  az role assignment create \
60
64
  --assignee "$OBJ_ID" \
61
65
  --role "Cognitive Services OpenAI User" \
62
66
  --scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
67
+
68
+ # 4. Grant the same data-plane role to Foundry/Azure AI managed identities.
69
+ az resource list -g "$RG" \
70
+ --query "[?identity.principalId!=null].identity.principalId" -o tsv |
71
+ while read -r PRINCIPAL_ID; do
72
+ [ -z "$PRINCIPAL_ID" ] && continue
73
+ az role assignment create \
74
+ --assignee-object-id "$PRINCIPAL_ID" \
75
+ --assignee-principal-type ServicePrincipal \
76
+ --role "Cognitive Services OpenAI User" \
77
+ --scope "/subscriptions/$SUB_ID/resourceGroups/$RG"
78
+ done
63
79
  ```
64
80
 
65
81
  PowerShell equivalent: replace `$(...)` with the PowerShell variable
@@ -73,6 +89,17 @@ Skip this step only if the user explicitly says the role is already
73
89
  assigned, or if a previous `agentops eval run` succeeded against the
74
90
  same Foundry account.
75
91
 
92
+ **Propagation:** data-plane role assignments do not take effect
93
+ instantly — allow several minutes (occasionally up to ~15) before the
94
+ first eval. The cloud/local graders authenticate per call, so if the
95
+ user runs an eval immediately after this preflight and sees intermittent
96
+ `AuthenticationError` on a subset of graders plus
97
+ `Threshold status: FAILED` while the visible thresholds are green, that
98
+ is propagation lag (a grader **execution** failure), not a quality
99
+ regression. Tell the user to wait a few minutes and re-run
100
+ `agentops eval run`; do not treat it as a failing gate or start changing
101
+ thresholds.
102
+
76
103
  ## Step 1 - Analyze evaluation setup
77
104
 
78
105
  Run the deterministic local triage first:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentops-accelerator
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Release readiness gates and evidence for Microsoft Foundry agents
5
5
  License: MIT License
6
6
 
@@ -268,6 +268,7 @@ tests/unit/test_doctor_cli_explain.py
268
268
  tests/unit/test_dotenv_loader.py
269
269
  tests/unit/test_e2e_render.py
270
270
  tests/unit/test_eval_analysis.py
271
+ tests/unit/test_eval_run_grader_errors.py
271
272
  tests/unit/test_evaluators.py
272
273
  tests/unit/test_foundry_discovery.py
273
274
  tests/unit/test_init_command.py
@@ -0,0 +1,150 @@
1
+ """CLI behaviour when graders *execute* but a subset errors out.
2
+
3
+ A grader execution error (auth/RBAC/timeout) is not a quality regression, but
4
+ because ``items_passed_all`` requires every grader on a row to succeed, a single
5
+ errored grader flips ``overall_passed`` to ``False`` and the run reports
6
+ ``Threshold status: FAILED`` even though every computable threshold passed.
7
+
8
+ The CLI must surface that distinction loudly so users (the most common trigger
9
+ is data-plane RBAC that is still propagating) do not chase a phantom quality
10
+ failure or start lowering thresholds.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from pathlib import Path
17
+
18
+ from typer.testing import CliRunner
19
+
20
+ from agentops.cli.app import _grader_error_summary, app
21
+ from agentops.core.results import (
22
+ RowMetric,
23
+ RowResult,
24
+ RunResult,
25
+ RunSummary,
26
+ TargetInfo,
27
+ ThresholdEvaluation,
28
+ )
29
+
30
+ runner = CliRunner()
31
+
32
+ _AUTH_ERROR = (
33
+ "FAILED_EXECUTION: (UserError) OpenAI API hits AuthenticationError: "
34
+ "Principal does not have access to API/Operation."
35
+ )
36
+
37
+
38
+ def _result_with_partial_grader_errors() -> RunResult:
39
+ """One row where coherence scored but similarity errored on auth."""
40
+ row = RowResult(
41
+ row_index=0,
42
+ input="plan a trip",
43
+ expected="an itinerary",
44
+ response="here is an itinerary",
45
+ metrics=[
46
+ RowMetric(name="coherence", value=5.0),
47
+ RowMetric(name="similarity", value=None, error=_AUTH_ERROR),
48
+ ],
49
+ )
50
+ summary = RunSummary(
51
+ items_total=1,
52
+ items_passed_all=0, # the errored grader means no row passed all
53
+ items_pass_rate=0.0,
54
+ thresholds_total=1,
55
+ thresholds_passed=1, # every computable threshold passed
56
+ threshold_pass_rate=1.0,
57
+ overall_passed=False,
58
+ )
59
+ return RunResult(
60
+ started_at="2026-06-01T00:00:00+00:00",
61
+ finished_at="2026-06-01T00:01:00+00:00",
62
+ duration_seconds=60.0,
63
+ target=TargetInfo(kind="foundry_prompt", raw="travel-agent:2"),
64
+ dataset_path="dataset.jsonl",
65
+ evaluators=["CoherenceEvaluator", "SimilarityEvaluator"],
66
+ rows=[row],
67
+ aggregate_metrics={"coherence": 5.0},
68
+ thresholds=[
69
+ ThresholdEvaluation(
70
+ metric="coherence",
71
+ criteria=">=",
72
+ expected=">=3",
73
+ actual="5",
74
+ passed=True,
75
+ )
76
+ ],
77
+ summary=summary,
78
+ )
79
+
80
+
81
+ def test_grader_error_summary_counts_and_lifts_first_error() -> None:
82
+ errored, total, first_error = _grader_error_summary(
83
+ _result_with_partial_grader_errors()
84
+ )
85
+ assert (errored, total) == (1, 2)
86
+ assert first_error is not None
87
+ assert "AuthenticationError" in first_error
88
+
89
+
90
+ def _write_minimal_config(tmp_path: Path) -> Path:
91
+ dataset = tmp_path / "dataset.jsonl"
92
+ dataset.write_text(json.dumps({"input": "hi", "expected": "hi"}), encoding="utf-8")
93
+ config = tmp_path / "agentops.yaml"
94
+ config.write_text(
95
+ json.dumps(
96
+ {"version": 1, "agent": "model:gpt-4o", "dataset": str(dataset)}
97
+ ),
98
+ encoding="utf-8",
99
+ )
100
+ return config
101
+
102
+
103
+ def test_eval_run_warns_on_partial_grader_errors(tmp_path, monkeypatch) -> None:
104
+ config = _write_minimal_config(tmp_path)
105
+ output = tmp_path / "out"
106
+ output.mkdir()
107
+
108
+ crafted = _result_with_partial_grader_errors()
109
+ import agentops.pipeline.orchestrator as orch
110
+
111
+ monkeypatch.setattr(orch, "run_evaluation", lambda *a, **k: crafted)
112
+
113
+ result = runner.invoke(
114
+ app,
115
+ ["eval", "run", "--config", str(config), "--output", str(output)],
116
+ )
117
+
118
+ # A grader-execution failure keeps the gate-failed exit code...
119
+ assert result.exit_code == 2, result.output
120
+ # ...but the user is told it is an execution error, not a quality failure.
121
+ assert "grader execution(s) errored" in result.output
122
+ assert "propagating" in result.output
123
+ assert "AuthenticationError" in result.output
124
+ assert "FAILED" in result.output
125
+
126
+
127
+ def test_eval_run_no_warning_when_no_grader_errors(tmp_path, monkeypatch) -> None:
128
+ config = _write_minimal_config(tmp_path)
129
+ output = tmp_path / "out"
130
+ output.mkdir()
131
+
132
+ clean = _result_with_partial_grader_errors()
133
+ # Drop the errored grader so the row is clean and the gate genuinely passes.
134
+ clean.rows[0].metrics = [RowMetric(name="coherence", value=5.0)]
135
+ clean.summary.items_passed_all = 1
136
+ clean.summary.items_pass_rate = 1.0
137
+ clean.summary.overall_passed = True
138
+
139
+ import agentops.pipeline.orchestrator as orch
140
+
141
+ monkeypatch.setattr(orch, "run_evaluation", lambda *a, **k: clean)
142
+
143
+ result = runner.invoke(
144
+ app,
145
+ ["eval", "run", "--config", str(config), "--output", str(output)],
146
+ )
147
+
148
+ assert result.exit_code == 0, result.output
149
+ assert "PASSED" in result.output
150
+ assert "grader execution(s) errored" not in result.output