agentops-accelerator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. agentops/__init__.py +10 -0
  2. agentops/__main__.py +6 -0
  3. agentops/agent/__init__.py +12 -0
  4. agentops/agent/_legacy_ids.py +92 -0
  5. agentops/agent/analyzer.py +207 -0
  6. agentops/agent/checks/__init__.py +1 -0
  7. agentops/agent/checks/catalog.py +880 -0
  8. agentops/agent/checks/errors.py +279 -0
  9. agentops/agent/checks/foundry_config.py +75 -0
  10. agentops/agent/checks/latency.py +84 -0
  11. agentops/agent/checks/opex.py +157 -0
  12. agentops/agent/checks/opex_workspace.py +874 -0
  13. agentops/agent/checks/posture.py +36 -0
  14. agentops/agent/checks/posture_rules/__init__.py +53 -0
  15. agentops/agent/checks/posture_rules/content_filter.py +59 -0
  16. agentops/agent/checks/posture_rules/diagnostics.py +74 -0
  17. agentops/agent/checks/posture_rules/local_auth.py +55 -0
  18. agentops/agent/checks/posture_rules/managed_identity.py +59 -0
  19. agentops/agent/checks/posture_rules/network.py +68 -0
  20. agentops/agent/checks/regression.py +78 -0
  21. agentops/agent/checks/release_readiness.py +182 -0
  22. agentops/agent/checks/safety.py +247 -0
  23. agentops/agent/checks/spec_conformance.py +375 -0
  24. agentops/agent/cockpit.py +5159 -0
  25. agentops/agent/config.py +240 -0
  26. agentops/agent/findings.py +113 -0
  27. agentops/agent/history.py +142 -0
  28. agentops/agent/knowledge/__init__.py +182 -0
  29. agentops/agent/knowledge/waf-checklist.csv +39 -0
  30. agentops/agent/llm_assist/__init__.py +16 -0
  31. agentops/agent/llm_assist/_base.py +124 -0
  32. agentops/agent/llm_assist/_bundle_rule.py +154 -0
  33. agentops/agent/llm_assist/_client.py +347 -0
  34. agentops/agent/llm_assist/_dataset_rules.py +191 -0
  35. agentops/agent/llm_assist/_engine.py +106 -0
  36. agentops/agent/llm_assist/_prompt_rules.py +291 -0
  37. agentops/agent/llm_assist/_spec_rules.py +235 -0
  38. agentops/agent/production_telemetry.py +430 -0
  39. agentops/agent/report.py +207 -0
  40. agentops/agent/server/__init__.py +1 -0
  41. agentops/agent/server/app.py +84 -0
  42. agentops/agent/server/auth.py +94 -0
  43. agentops/agent/server/chat.py +44 -0
  44. agentops/agent/server/protocol.py +72 -0
  45. agentops/agent/sources/__init__.py +1 -0
  46. agentops/agent/sources/azure_monitor.py +523 -0
  47. agentops/agent/sources/azure_resources.py +602 -0
  48. agentops/agent/sources/foundry_control.py +174 -0
  49. agentops/agent/sources/results_history.py +494 -0
  50. agentops/agent/sources/spec_detectors/__init__.py +42 -0
  51. agentops/agent/sources/spec_detectors/_base.py +58 -0
  52. agentops/agent/sources/spec_detectors/agents_md.py +75 -0
  53. agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
  54. agentops/agent/time_range.py +117 -0
  55. agentops/cli/__init__.py +1 -0
  56. agentops/cli/app.py +4823 -0
  57. agentops/core/__init__.py +1 -0
  58. agentops/core/agentops_config.py +592 -0
  59. agentops/core/config_loader.py +22 -0
  60. agentops/core/evaluators.py +480 -0
  61. agentops/core/release_evidence.py +56 -0
  62. agentops/core/results.py +117 -0
  63. agentops/mcp/__init__.py +10 -0
  64. agentops/mcp/server.py +232 -0
  65. agentops/pipeline/__init__.py +8 -0
  66. agentops/pipeline/cloud_results.py +189 -0
  67. agentops/pipeline/cloud_runner.py +901 -0
  68. agentops/pipeline/comparison.py +108 -0
  69. agentops/pipeline/diagnostics.py +51 -0
  70. agentops/pipeline/invocations.py +535 -0
  71. agentops/pipeline/official_eval.py +414 -0
  72. agentops/pipeline/orchestrator.py +775 -0
  73. agentops/pipeline/prompt_deploy.py +377 -0
  74. agentops/pipeline/publisher.py +121 -0
  75. agentops/pipeline/reporter.py +202 -0
  76. agentops/pipeline/runtime.py +409 -0
  77. agentops/pipeline/thresholds.py +84 -0
  78. agentops/services/__init__.py +1 -0
  79. agentops/services/cicd.py +720 -0
  80. agentops/services/eval_analysis.py +848 -0
  81. agentops/services/evidence_pack.py +757 -0
  82. agentops/services/initializer.py +86 -0
  83. agentops/services/preflight.py +470 -0
  84. agentops/services/setup_wizard.py +709 -0
  85. agentops/services/skills.py +643 -0
  86. agentops/services/trace_promotion.py +300 -0
  87. agentops/services/workflow_analysis.py +1129 -0
  88. agentops/templates/.gitignore +15 -0
  89. agentops/templates/__init__.py +1 -0
  90. agentops/templates/agent-server/Dockerfile +23 -0
  91. agentops/templates/agent-server/README.md +61 -0
  92. agentops/templates/agent-server/main.bicep +94 -0
  93. agentops/templates/agent.yaml +87 -0
  94. agentops/templates/agentops.yaml +58 -0
  95. agentops/templates/foundry.svg +71 -0
  96. agentops/templates/icon.png +0 -0
  97. agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
  98. agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
  99. agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
  100. agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
  101. agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
  102. agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
  103. agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
  104. agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
  105. agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
  106. agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
  107. agentops/templates/project.gitignore +36 -0
  108. agentops/templates/sample-traces.jsonl +3 -0
  109. agentops/templates/skills/agentops-agent/SKILL.md +137 -0
  110. agentops/templates/skills/agentops-config/SKILL.md +113 -0
  111. agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
  112. agentops/templates/skills/agentops-eval/SKILL.md +189 -0
  113. agentops/templates/skills/agentops-report/SKILL.md +71 -0
  114. agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
  115. agentops/templates/smoke.jsonl +3 -0
  116. agentops/templates/waf-checklist.README.md +84 -0
  117. agentops/templates/waf-checklist.csv +22 -0
  118. agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
  119. agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
  120. agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
  121. agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
  122. agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
  123. agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
  124. agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
  125. agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
  126. agentops/templates/workflows/agentops-pr.yml +148 -0
  127. agentops/templates/workflows/agentops-watchdog.yml +122 -0
  128. agentops/utils/__init__.py +1 -0
  129. agentops/utils/azd_env.py +435 -0
  130. agentops/utils/azure_endpoints.py +62 -0
  131. agentops/utils/colors.py +47 -0
  132. agentops/utils/dotenv_loader.py +105 -0
  133. agentops/utils/foundry_discovery.py +229 -0
  134. agentops/utils/logging.py +59 -0
  135. agentops/utils/telemetry.py +554 -0
  136. agentops/utils/yaml.py +36 -0
  137. agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
  138. agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
  139. agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
  140. agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
  141. agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
  142. agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,848 @@
1
+ """Read-only evaluation setup analysis for `agentops eval analyze`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import textwrap
8
+ from dataclasses import dataclass, field
9
+ from fnmatch import fnmatch
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
12
+
13
+ from agentops.core.agentops_config import classify_agent
14
+ from agentops.utils.yaml import load_yaml
15
+
16
+ _TEXT_LIMIT = 200_000
17
+ _SCAN_LIMIT = 80
18
+ _DATASET_ROW_LIMIT = 20
19
+ _TEXT_WRAP_WIDTH = 92
20
+ _TEXT_SUFFIXES = {".py", ".ts", ".tsx", ".js", ".jsx", ".bicep", ".yaml", ".yml"}
21
+ _WALK_FILE_LIMIT = 2_000
22
+ _IGNORE_PARTS = {
23
+ ".azure",
24
+ ".git",
25
+ ".github",
26
+ ".mypy_cache",
27
+ ".pytest_cache",
28
+ ".ruff_cache",
29
+ ".venv",
30
+ "__pycache__",
31
+ "build",
32
+ "dist",
33
+ "node_modules",
34
+ "site-packages",
35
+ }
36
+ _IGNORE_PREFIXES = {".agentops/results"}
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class EvalSignal:
41
+ """A local file-system signal used to classify evaluation setup shape."""
42
+
43
+ key: str
44
+ label: str
45
+ detail: str
46
+ path: Optional[str] = None
47
+ confidence: str = "high"
48
+
49
+ def to_dict(self) -> Dict[str, str]:
50
+ data = {
51
+ "key": self.key,
52
+ "label": self.label,
53
+ "detail": self.detail,
54
+ "confidence": self.confidence,
55
+ }
56
+ if self.path:
57
+ data["path"] = self.path
58
+ return data
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class EvalAnalysis:
63
+ """Stable result contract for `agentops eval analyze`."""
64
+
65
+ version: int
66
+ directory: str
67
+ classification: str
68
+ config_status: str
69
+ dataset_status: str
70
+ target_kind: Optional[str]
71
+ scenario_hint: str
72
+ complexity: str
73
+ requires_copilot_adaptation: bool
74
+ copilot_skills_installed: bool
75
+ copilot_prompt: Optional[str] = None
76
+ signals: List[EvalSignal] = field(default_factory=list)
77
+ warnings: List[str] = field(default_factory=list)
78
+ recommended_skills: List[str] = field(default_factory=list)
79
+ recommended_commands: List[str] = field(default_factory=list)
80
+ next_steps: List[str] = field(default_factory=list)
81
+
82
+ def to_dict(self) -> Dict[str, Any]:
83
+ return {
84
+ "version": self.version,
85
+ "directory": self.directory,
86
+ "classification": self.classification,
87
+ "config_status": self.config_status,
88
+ "dataset_status": self.dataset_status,
89
+ "target_kind": self.target_kind,
90
+ "scenario_hint": self.scenario_hint,
91
+ "complexity": self.complexity,
92
+ "requires_copilot_adaptation": self.requires_copilot_adaptation,
93
+ "copilot_skills_installed": self.copilot_skills_installed,
94
+ "copilot_prompt": self.copilot_prompt,
95
+ "signals": [signal.to_dict() for signal in self.signals],
96
+ "warnings": list(self.warnings),
97
+ "recommended_skills": list(self.recommended_skills),
98
+ "recommended_commands": list(self.recommended_commands),
99
+ "next_steps": list(self.next_steps),
100
+ }
101
+
102
+
103
+ def analyze_eval_project(directory: Path) -> EvalAnalysis:
104
+ """Analyze local project shape before running an evaluation.
105
+
106
+ This is intentionally local-only: it does not call Azure, Foundry, Copilot,
107
+ or any model. It tells users whether `agentops eval run` is ready or
108
+ whether evaluation setup should be adapted with AgentOps skills first.
109
+ """
110
+
111
+ root = directory.resolve()
112
+ signals: List[EvalSignal] = []
113
+ warnings: List[str] = []
114
+
115
+ config_info = _agentops_config_info(root)
116
+ signals.extend(config_info.signals)
117
+ warnings.extend(config_info.warnings)
118
+
119
+ repo_text = _repo_text(root)
120
+ readme_text = _read_text(root / "README.md")
121
+ text_for_hints = "\n".join((readme_text, repo_text)).lower()
122
+
123
+ structural_signals = _structural_signals(root, text_for_hints)
124
+ signals.extend(structural_signals)
125
+
126
+ scenario_hint = _scenario_hint(config_info.dataset_columns, text_for_hints)
127
+ if scenario_hint != "unknown":
128
+ signals.append(
129
+ EvalSignal(
130
+ "scenario_hint",
131
+ "Evaluation scenario hint",
132
+ f"Likely scenario: {scenario_hint}.",
133
+ confidence="medium" if not config_info.dataset_columns else "high",
134
+ )
135
+ )
136
+
137
+ if not config_info.has_config:
138
+ warnings.append(
139
+ "No agentops.yaml found. Use `agentops init` for the base file, "
140
+ "then use the agentops-config skill if target or scenario inference is not obvious."
141
+ )
142
+ if config_info.has_config and not config_info.dataset_exists:
143
+ warnings.append(
144
+ "The configured dataset was not found. Use the agentops-dataset skill "
145
+ "to create or map realistic JSONL rows before `agentops eval run`."
146
+ )
147
+ if scenario_hint in {"rag", "agent_workflow"} and not _dataset_supports_scenario(
148
+ scenario_hint, config_info.dataset_columns
149
+ ):
150
+ warnings.append(
151
+ f"The repo looks like {scenario_hint}, but the dataset columns do not "
152
+ "fully support that scenario yet."
153
+ )
154
+
155
+ complex_reasons = _complexity_reasons(
156
+ config_info=config_info,
157
+ signals=signals,
158
+ scenario_hint=scenario_hint,
159
+ )
160
+ complexity = _complexity_label(complex_reasons, config_info)
161
+ requires_copilot = bool(complex_reasons) or not config_info.ready
162
+ recommended_skills = _recommended_skills(config_info, scenario_hint, complex_reasons)
163
+ skills_installed = _skills_installed(root)
164
+ copilot_prompt = _copilot_prompt(recommended_skills, scenario_hint)
165
+ recommended_commands = _recommended_commands(root, config_info, recommended_skills, skills_installed)
166
+ next_steps = _next_steps(config_info, recommended_skills, complex_reasons, skills_installed)
167
+
168
+ return EvalAnalysis(
169
+ version=1,
170
+ directory=str(root),
171
+ classification=_classification(config_info, scenario_hint),
172
+ config_status=config_info.status,
173
+ dataset_status=config_info.dataset_status,
174
+ target_kind=config_info.target_kind,
175
+ scenario_hint=scenario_hint,
176
+ complexity=complexity,
177
+ requires_copilot_adaptation=requires_copilot,
178
+ copilot_skills_installed=skills_installed,
179
+ copilot_prompt=copilot_prompt,
180
+ signals=signals,
181
+ warnings=warnings,
182
+ recommended_skills=recommended_skills,
183
+ recommended_commands=recommended_commands,
184
+ next_steps=next_steps,
185
+ )
186
+
187
+
188
+ def render_eval_analysis(analysis: EvalAnalysis, output_format: str = "text") -> str:
189
+ """Render analysis as text, Markdown, or JSON."""
190
+ if output_format == "json":
191
+ return json.dumps(analysis.to_dict(), indent=2) + "\n"
192
+ if output_format == "markdown":
193
+ return _render_markdown(analysis)
194
+ if output_format == "text":
195
+ return _render_text(analysis)
196
+ raise ValueError("output_format must be text, markdown, or json")
197
+
198
+
199
+ @dataclass(frozen=True)
200
+ class _ConfigInfo:
201
+ has_config: bool
202
+ ready: bool
203
+ status: str
204
+ dataset_status: str
205
+ target_kind: Optional[str]
206
+ dataset_exists: bool
207
+ dataset_columns: Set[str]
208
+ signals: List[EvalSignal] = field(default_factory=list)
209
+ warnings: List[str] = field(default_factory=list)
210
+
211
+
212
+ def _agentops_config_info(root: Path) -> _ConfigInfo:
213
+ path = root / "agentops.yaml"
214
+ if not path.exists():
215
+ return _ConfigInfo(
216
+ has_config=False,
217
+ ready=False,
218
+ status="missing",
219
+ dataset_status="missing",
220
+ target_kind=None,
221
+ dataset_exists=False,
222
+ dataset_columns=set(),
223
+ )
224
+ try:
225
+ data = load_yaml(path)
226
+ if not isinstance(data, dict):
227
+ raise ValueError("expected a mapping")
228
+ agent = str(data.get("agent", "") or "")
229
+ dataset_value = data.get("dataset")
230
+ target = classify_agent(agent, data.get("protocol"))
231
+ dataset_path = _resolve_dataset_path(path.parent, dataset_value)
232
+ dataset_exists = dataset_path.exists() if dataset_path is not None else False
233
+ dataset_columns = _dataset_columns(dataset_path) if dataset_path is not None else set()
234
+ dataset_status = _dataset_status(dataset_value, dataset_exists, dataset_columns)
235
+ signals = [
236
+ EvalSignal(
237
+ "agentops_config",
238
+ "AgentOps config",
239
+ f"agentops.yaml targets {target.kind}.",
240
+ "agentops.yaml",
241
+ )
242
+ ]
243
+ if dataset_value is not None:
244
+ signals.append(
245
+ EvalSignal(
246
+ "dataset_ref",
247
+ "Evaluation dataset reference",
248
+ f"Dataset path is {dataset_value}.",
249
+ _rel(root, dataset_path) if dataset_path is not None else None,
250
+ confidence="high" if dataset_exists else "medium",
251
+ )
252
+ )
253
+ if dataset_columns:
254
+ signals.append(
255
+ EvalSignal(
256
+ "dataset_columns",
257
+ "Dataset row columns",
258
+ "Found columns: " + ", ".join(sorted(dataset_columns)) + ".",
259
+ _rel(root, dataset_path) if dataset_path is not None else None,
260
+ )
261
+ )
262
+ ready = bool(agent and dataset_value and dataset_exists and "input" in dataset_columns)
263
+ status = "ready" if ready else "incomplete"
264
+ return _ConfigInfo(
265
+ has_config=True,
266
+ ready=ready,
267
+ status=status,
268
+ dataset_status=dataset_status,
269
+ target_kind=target.kind,
270
+ dataset_exists=dataset_exists,
271
+ dataset_columns=dataset_columns,
272
+ signals=signals,
273
+ )
274
+ except Exception as exc:
275
+ return _ConfigInfo(
276
+ has_config=True,
277
+ ready=False,
278
+ status="invalid",
279
+ dataset_status="unknown",
280
+ target_kind=None,
281
+ dataset_exists=False,
282
+ dataset_columns=set(),
283
+ signals=[
284
+ EvalSignal(
285
+ "agentops_config",
286
+ "AgentOps config",
287
+ f"agentops.yaml exists but could not be analyzed: {exc}",
288
+ "agentops.yaml",
289
+ confidence="medium",
290
+ )
291
+ ],
292
+ warnings=[f"agentops.yaml could not be analyzed: {exc}"],
293
+ )
294
+
295
+
296
+ def _resolve_dataset_path(config_dir: Path, dataset_value: Any) -> Optional[Path]:
297
+ if dataset_value is None:
298
+ return None
299
+ path = Path(str(dataset_value))
300
+ if not path.is_absolute():
301
+ path = config_dir / path
302
+ return path.resolve()
303
+
304
+
305
+ def _dataset_status(dataset_value: Any, exists: bool, columns: Set[str]) -> str:
306
+ if dataset_value is None:
307
+ return "missing"
308
+ if not exists:
309
+ return "not_found"
310
+ if "input" not in columns:
311
+ return "missing_input_column"
312
+ if not columns:
313
+ return "empty_or_unreadable"
314
+ return "ready"
315
+
316
+
317
+ def _dataset_columns(path: Path) -> Set[str]:
318
+ columns: Set[str] = set()
319
+ try:
320
+ with path.open("r", encoding="utf-8") as handle:
321
+ for index, line in enumerate(handle):
322
+ if index >= _DATASET_ROW_LIMIT:
323
+ break
324
+ line = line.strip()
325
+ if not line:
326
+ continue
327
+ row = json.loads(line)
328
+ if isinstance(row, dict):
329
+ columns.update(str(key) for key in row)
330
+ except (OSError, json.JSONDecodeError):
331
+ return set()
332
+ return columns
333
+
334
+
335
+ def _repo_text(root: Path) -> str:
336
+ parts: List[str] = []
337
+ total_chars = 0
338
+ for path in _walk_project_files(root):
339
+ if path.suffix.lower() not in _TEXT_SUFFIXES:
340
+ continue
341
+ text = _read_text(path)
342
+ if not text:
343
+ continue
344
+ parts.append(text)
345
+ total_chars += len(text) + 1
346
+ if total_chars >= _TEXT_LIMIT or len(parts) >= _SCAN_LIMIT:
347
+ break
348
+ return "\n".join(parts)[:_TEXT_LIMIT]
349
+
350
+
351
+ def _structural_signals(root: Path, text: str) -> List[EvalSignal]:
352
+ signals: List[EvalSignal] = []
353
+ if (root / "azure.yaml").exists():
354
+ signals.append(
355
+ EvalSignal(
356
+ "azd_project",
357
+ "Azure Developer CLI project",
358
+ "azure.yaml found; eval config may need azd outputs/env values.",
359
+ "azure.yaml",
360
+ )
361
+ )
362
+ if _find_files(root, "Dockerfile") or "container app" in text or "containerapps" in text:
363
+ signals.append(
364
+ EvalSignal(
365
+ "container_or_http_app",
366
+ "HTTP/containerized app",
367
+ "Container or HTTP app signals found; eval may need endpoint and response-field mapping.",
368
+ confidence="medium",
369
+ )
370
+ )
371
+ if "azure.search" in text or "ai search" in text or "retrieval" in text or "vector" in text:
372
+ signals.append(
373
+ EvalSignal(
374
+ "rag_signal",
375
+ "RAG/retrieval signal",
376
+ "Search, retrieval, vector, or context terms found.",
377
+ confidence="medium",
378
+ )
379
+ )
380
+ if "tool_calls" in text or "function_call" in text or "@tool" in text or "tools=" in text:
381
+ signals.append(
382
+ EvalSignal(
383
+ "tool_signal",
384
+ "Tool-calling signal",
385
+ "Tool-call/function-call terms found.",
386
+ confidence="medium",
387
+ )
388
+ )
389
+ if "aiprojectclient" in text or "azure-ai-projects" in text or "services.ai.azure.com" in text:
390
+ signals.append(
391
+ EvalSignal(
392
+ "foundry_signal",
393
+ "Foundry project signal",
394
+ "Foundry SDK or Foundry project endpoint terms found.",
395
+ confidence="medium",
396
+ )
397
+ )
398
+ if "openai" in text and ("chat.completions" in text or "responses.create" in text):
399
+ signals.append(
400
+ EvalSignal(
401
+ "model_signal",
402
+ "Model API signal",
403
+ "Direct model API usage found.",
404
+ confidence="medium",
405
+ )
406
+ )
407
+ return signals
408
+
409
+
410
+ def _scenario_hint(dataset_columns: Set[str], text: str) -> str:
411
+ if {"tool_calls", "tool_definitions"} & dataset_columns:
412
+ return "agent_workflow"
413
+ if "context" in dataset_columns:
414
+ return "rag"
415
+ if "conversation" in dataset_columns or "turns" in dataset_columns:
416
+ return "conversational"
417
+ if "expected" in dataset_columns:
418
+ return "model_quality"
419
+ if "tool_calls" in text or "function_call" in text or "@tool" in text:
420
+ return "agent_workflow"
421
+ if "azure.search" in text or "ai search" in text or "retrieval" in text or "rag" in text:
422
+ return "rag"
423
+ if "chatbot" in text or "conversation" in text or "assistant" in text:
424
+ return "conversational"
425
+ return "unknown"
426
+
427
+
428
+ def _dataset_supports_scenario(scenario: str, columns: Set[str]) -> bool:
429
+ if not columns:
430
+ return False
431
+ if scenario == "rag":
432
+ return {"input", "expected", "context"}.issubset(columns)
433
+ if scenario == "agent_workflow":
434
+ return {"input", "expected"}.issubset(columns) and bool(
435
+ {"tool_calls", "tool_definitions"} & columns
436
+ )
437
+ return "input" in columns
438
+
439
+
440
+ def _complexity_reasons(
441
+ *,
442
+ config_info: _ConfigInfo,
443
+ signals: List[EvalSignal],
444
+ scenario_hint: str,
445
+ ) -> List[str]:
446
+ reasons: List[str] = []
447
+ signal_keys = {signal.key for signal in signals}
448
+ if not config_info.has_config:
449
+ reasons.append("missing agentops.yaml")
450
+ elif config_info.status != "ready":
451
+ reasons.append("agentops.yaml or dataset is incomplete")
452
+ if "container_or_http_app" in signal_keys and config_info.target_kind in {None, "http_json"}:
453
+ reasons.append("HTTP response contract may need mapping")
454
+ if scenario_hint in {"rag", "agent_workflow"} and not _dataset_supports_scenario(
455
+ scenario_hint, config_info.dataset_columns
456
+ ):
457
+ reasons.append(f"{scenario_hint} dataset columns are not complete")
458
+ if len({"rag_signal", "tool_signal", "container_or_http_app"} & signal_keys) >= 2:
459
+ reasons.append("multiple project-specific evaluation signals")
460
+ return sorted(set(reasons))
461
+
462
+
463
+ def _complexity_label(reasons: List[str], config_info: _ConfigInfo) -> str:
464
+ if len(reasons) >= 2:
465
+ return "high - skill-assisted evaluation setup recommended"
466
+ if reasons:
467
+ return "medium - review setup before running eval"
468
+ if config_info.ready:
469
+ return "low - ready to run eval"
470
+ return "medium - setup required"
471
+
472
+
473
+ def _recommended_skills(
474
+ config_info: _ConfigInfo,
475
+ scenario_hint: str,
476
+ complex_reasons: List[str],
477
+ ) -> List[str]:
478
+ skills: List[str] = []
479
+ if not config_info.has_config or config_info.status == "invalid":
480
+ skills.append("agentops-config")
481
+ if config_info.dataset_status != "ready":
482
+ skills.append("agentops-dataset")
483
+ if complex_reasons:
484
+ skills.append("agentops-eval")
485
+ return list(dict.fromkeys(skills))
486
+
487
+
488
+ def _recommended_commands(
489
+ root: Path,
490
+ config_info: _ConfigInfo,
491
+ skills: List[str],
492
+ skills_installed: bool,
493
+ ) -> List[str]:
494
+ commands = ["agentops eval analyze --format markdown"]
495
+ if skills and not skills_installed:
496
+ commands.append("agentops skills install --platform copilot")
497
+ if not config_info.has_config:
498
+ commands.append("agentops init")
499
+ if config_info.ready:
500
+ commands.append("agentops eval run")
501
+ return commands
502
+
503
+
504
+ def _next_steps(
505
+ config_info: _ConfigInfo,
506
+ skills: List[str],
507
+ complex_reasons: List[str],
508
+ skills_installed: bool,
509
+ ) -> List[str]:
510
+ if config_info.ready and not complex_reasons:
511
+ return [
512
+ "Run `agentops eval run` to produce results.json and report.md.",
513
+ "Then run `agentops workflow analyze` before generating CI/CD workflows.",
514
+ ]
515
+ steps = [
516
+ "Use this analysis as the triage output before `agentops eval run`.",
517
+ ]
518
+ if skills:
519
+ if not skills_installed:
520
+ steps.append("Install the AgentOps Copilot skills first: `agentops skills install --platform copilot`.")
521
+ steps.append(
522
+ "Copy/paste the Copilot handoff prompt shown below; it uses "
523
+ + ", ".join(f"/{skill}" for skill in skills)
524
+ + " to adapt agentops.yaml, dataset rows, and evaluator expectations."
525
+ )
526
+ if config_info.has_config and config_info.dataset_status != "ready":
527
+ steps.append("Create or fix the dataset JSONL referenced by agentops.yaml.")
528
+ steps.append("Re-run `agentops eval analyze`, then run `agentops eval run` once setup is ready.")
529
+ return steps
530
+
531
+
532
+ def _copilot_prompt(skills: List[str], scenario_hint: str) -> Optional[str]:
533
+ if not skills:
534
+ return None
535
+ if "agentops-config" in skills:
536
+ return (
537
+ "/agentops-config Use the AgentOps eval analysis above to inspect this repo, "
538
+ "configure agentops.yaml for the correct target/protocol, and tell me what remains before I run eval."
539
+ )
540
+ if "agentops-dataset" in skills:
541
+ return (
542
+ "/agentops-dataset Use the AgentOps eval analysis above to create or fix the JSONL dataset "
543
+ f"for the {scenario_hint} scenario, then summarize the exact rows and columns."
544
+ )
545
+ return (
546
+ "/agentops-eval Use the AgentOps eval analysis above to verify the target, dataset, evaluator "
547
+ "scenario, and next command before running agentops eval run."
548
+ )
549
+
550
+
551
+ def _classification(config_info: _ConfigInfo, scenario_hint: str) -> str:
552
+ if not config_info.has_config:
553
+ return "unconfigured AI project"
554
+ if config_info.target_kind:
555
+ return f"{config_info.target_kind} evaluation setup ({scenario_hint})"
556
+ return f"AgentOps evaluation setup ({scenario_hint})"
557
+
558
+
559
+ def _render_text(analysis: EvalAnalysis) -> str:
560
+ lines = [
561
+ "AgentOps eval analysis",
562
+ f"Workspace: {analysis.directory}",
563
+ f"Project: {_soften_text(analysis.classification)}",
564
+ "",
565
+ "Readiness",
566
+ ]
567
+ lines.extend(_render_text_readiness(analysis))
568
+ lines.append("")
569
+ lines.append("Signals")
570
+ if analysis.signals:
571
+ lines.extend(_render_text_signals(analysis.signals))
572
+ else:
573
+ lines.extend(
574
+ _wrapped_status_line(
575
+ "todo",
576
+ "Signals",
577
+ "No strong evaluation setup signals detected.",
578
+ )
579
+ )
580
+ if analysis.warnings:
581
+ lines.append("")
582
+ lines.append("Warnings")
583
+ for warning in analysis.warnings:
584
+ lines.extend(_wrapped_status_line("warn", "warning", warning))
585
+ if analysis.recommended_skills:
586
+ lines.append("")
587
+ lines.append("Recommended skills")
588
+ for skill in analysis.recommended_skills:
589
+ lines.extend(_wrapped_status_line("todo", "skill", f"/{skill}"))
590
+ if analysis.copilot_prompt:
591
+ lines.append("")
592
+ lines.append("Copilot handoff")
593
+ lines.extend(_wrapped_status_line("todo", "copy/paste", analysis.copilot_prompt))
594
+ lines.append("")
595
+ lines.append("Commands")
596
+ lines.extend(f" {command}" for command in analysis.recommended_commands)
597
+ lines.append("")
598
+ lines.append("Next")
599
+ for index, step in enumerate(analysis.next_steps, start=1):
600
+ lines.extend(_wrapped_numbered_step(index, step))
601
+ return "\n".join(lines) + "\n"
602
+
603
+
604
+ def _render_text_readiness(analysis: EvalAnalysis) -> List[str]:
605
+ setup_value = (
606
+ "needs setup help - use recommended skills before making eval blocking"
607
+ if analysis.requires_copilot_adaptation
608
+ else "ready - current eval setup can run directly"
609
+ )
610
+ skills_value = (
611
+ "installed - available for setup handoff"
612
+ if analysis.copilot_skills_installed
613
+ else (
614
+ "missing - install if you want Copilot-guided setup"
615
+ if analysis.requires_copilot_adaptation
616
+ else "not needed - no Copilot handoff for eval setup"
617
+ )
618
+ )
619
+ return _render_text_fields(
620
+ [
621
+ ("config", _friendly_status(analysis.config_status)),
622
+ ("dataset", _friendly_status(analysis.dataset_status)),
623
+ ("target", _friendly_target(analysis.target_kind)),
624
+ ("scenario", _friendly_status(analysis.scenario_hint)),
625
+ ("complexity", analysis.complexity),
626
+ ("setup help", setup_value),
627
+ ("Copilot skills", skills_value),
628
+ ]
629
+ )
630
+
631
+
632
+ def _render_text_signals(signals: Sequence[EvalSignal]) -> List[str]:
633
+ lines: List[str] = []
634
+ for signal in signals:
635
+ status = "ok" if signal.confidence == "high" else "hint"
636
+ detail = _soften_text(signal.detail + (f" ({signal.path})" if signal.path else ""))
637
+ lines.extend(_wrapped_status_line(status, _signal_label(signal.key, signal.label), detail))
638
+ return lines
639
+
640
+
641
+ def _render_text_fields(rows: Sequence[tuple[str, str]]) -> List[str]:
642
+ width = max(len(label) for label, _ in rows)
643
+ lines: List[str] = []
644
+ for label, value in rows:
645
+ lines.extend(_wrap_text(value, indent=f" {label.ljust(width)} "))
646
+ return lines
647
+
648
+
649
+ def _wrapped_status_line(status: str, label: str, text: str) -> List[str]:
650
+ prefix = f" {status.ljust(4)} {label.ljust(20)} "
651
+ wrapped = textwrap.wrap(
652
+ text,
653
+ width=_TEXT_WRAP_WIDTH,
654
+ initial_indent=prefix,
655
+ subsequent_indent=" " * len(prefix),
656
+ break_long_words=False,
657
+ break_on_hyphens=False,
658
+ )
659
+ return wrapped or [prefix.rstrip()]
660
+
661
+
662
+ def _wrapped_numbered_step(index: int, text: str) -> List[str]:
663
+ prefix = f" {index}. "
664
+ wrapped = textwrap.wrap(
665
+ text,
666
+ width=_TEXT_WRAP_WIDTH,
667
+ initial_indent=prefix,
668
+ subsequent_indent=" " * len(prefix),
669
+ break_long_words=False,
670
+ break_on_hyphens=False,
671
+ )
672
+ return wrapped or [prefix.rstrip()]
673
+
674
+
675
+ def _wrap_text(text: str, *, indent: str) -> List[str]:
676
+ return textwrap.wrap(
677
+ text,
678
+ width=_TEXT_WRAP_WIDTH,
679
+ initial_indent=indent,
680
+ subsequent_indent=indent,
681
+ break_long_words=False,
682
+ break_on_hyphens=False,
683
+ ) or [indent.rstrip()]
684
+
685
+
686
+ def _friendly_target(target_kind: Optional[str]) -> str:
687
+ if not target_kind:
688
+ return "unknown"
689
+ return {
690
+ "foundry_prompt": "Foundry prompt agent",
691
+ "foundry_hosted": "Foundry hosted agent",
692
+ "http_json": "HTTP/JSON agent",
693
+ "model_deployment": "model deployment",
694
+ "model_direct": "direct model",
695
+ }.get(target_kind, _friendly_status(target_kind))
696
+
697
+
698
+ def _friendly_status(value: str) -> str:
699
+ return value.replace("_", " ")
700
+
701
+
702
+ def _soften_text(text: str) -> str:
703
+ return (
704
+ text.replace("foundry_prompt", "Foundry prompt agent")
705
+ .replace("model_direct", "direct model")
706
+ .replace("model_quality", "model quality")
707
+ .replace("agent_workflow", "agent workflow")
708
+ .replace("http_json", "HTTP/JSON agent")
709
+ )
710
+
711
+
712
+ def _signal_label(key: str, fallback: str) -> str:
713
+ return {
714
+ "agentops_config": "Config",
715
+ "dataset_ref": "Dataset",
716
+ "dataset_columns": "Columns",
717
+ "scenario_hint": "Scenario",
718
+ "azd_project": "azd",
719
+ "container_or_http_app": "Host",
720
+ "rag_signal": "RAG",
721
+ "tool_signal": "Tools",
722
+ "foundry_signal": "Foundry",
723
+ "model_signal": "Model",
724
+ }.get(key, fallback)
725
+
726
+
727
+ def _render_markdown(analysis: EvalAnalysis) -> str:
728
+ lines = [
729
+ "# AgentOps eval analysis",
730
+ "",
731
+ f"- **Directory:** `{analysis.directory}`",
732
+ f"- **Classification:** {analysis.classification}",
733
+ f"- **Config status:** `{analysis.config_status}`",
734
+ f"- **Dataset status:** `{analysis.dataset_status}`",
735
+ f"- **Target kind:** `{analysis.target_kind or 'unknown'}`",
736
+ f"- **Scenario hint:** `{analysis.scenario_hint}`",
737
+ f"- **Complexity:** {analysis.complexity}",
738
+ f"- **Skill-assisted setup:** {'yes' if analysis.requires_copilot_adaptation else 'no'}",
739
+ f"- **Copilot skills installed:** {'yes' if analysis.copilot_skills_installed else 'no'}",
740
+ "",
741
+ "## Detected signals",
742
+ "",
743
+ ]
744
+ if analysis.signals:
745
+ lines.extend(
746
+ f"- **{s.label}** ({s.confidence}): {s.detail}"
747
+ + (f" - `{s.path}`" if s.path else "")
748
+ for s in analysis.signals
749
+ )
750
+ else:
751
+ lines.append("- No strong evaluation setup signals detected.")
752
+ if analysis.warnings:
753
+ lines.extend(["", "## Warnings", ""])
754
+ lines.extend(f"- {warning}" for warning in analysis.warnings)
755
+ if analysis.recommended_skills:
756
+ lines.extend(["", "## Recommended skills", ""])
757
+ lines.extend(f"- `/{skill}`" for skill in analysis.recommended_skills)
758
+ if analysis.copilot_prompt:
759
+ lines.extend(["", "## Copilot handoff", ""])
760
+ lines.extend(["Copy/paste this into Copilot:", "", "```text", analysis.copilot_prompt, "```"])
761
+ lines.extend(["", "## Recommended commands", ""])
762
+ lines.extend(f"```bash\n{command}\n```" for command in analysis.recommended_commands)
763
+ lines.extend(["", "## Next steps", ""])
764
+ lines.extend(f"- {step}" for step in analysis.next_steps)
765
+ return "\n".join(lines).rstrip() + "\n"
766
+
767
+
768
+ def _find_files(root: Path, pattern: str) -> List[Path]:
769
+ found: List[Path] = []
770
+ for path in _walk_project_files(root):
771
+ rel_text = _rel_text(root, path)
772
+ if not (fnmatch(path.name, pattern) or fnmatch(rel_text, pattern)):
773
+ continue
774
+ found.append(path)
775
+ if len(found) >= _SCAN_LIMIT:
776
+ break
777
+ return found
778
+
779
+
780
+ def _walk_project_files(root: Path) -> Iterable[Path]:
781
+ root_text = str(root)
782
+ seen = 0
783
+ for dirpath, dirnames, filenames in os.walk(root_text):
784
+ rel_dir = os.path.relpath(dirpath, root_text)
785
+ rel_prefix = "" if rel_dir == "." else rel_dir.replace("\\", "/")
786
+ dirnames[:] = sorted(
787
+ dirname
788
+ for dirname in dirnames
789
+ if not _ignored_rel(f"{rel_prefix}/{dirname}" if rel_prefix else dirname)
790
+ )
791
+ for filename in sorted(filenames):
792
+ rel_file = f"{rel_prefix}/{filename}" if rel_prefix else filename
793
+ if _ignored_rel(rel_file):
794
+ continue
795
+ yield Path(dirpath) / filename
796
+ seen += 1
797
+ if seen >= _WALK_FILE_LIMIT:
798
+ return
799
+
800
+
801
+ def _ignored(path: Path, root: Path) -> bool:
802
+ rel_text = _rel_text(root, path)
803
+ if rel_text == "":
804
+ return True
805
+ return _ignored_rel(rel_text)
806
+
807
+
808
+ def _rel_text(root: Path, path: Path) -> str:
809
+ try:
810
+ rel = path.relative_to(root)
811
+ except ValueError:
812
+ return ""
813
+ return str(rel).replace("\\", "/")
814
+
815
+
816
+ def _ignored_rel(rel_text: str) -> bool:
817
+ rel_text = rel_text.replace("\\", "/").strip("/")
818
+ if not rel_text or rel_text == ".":
819
+ return False
820
+ parts = rel_text.split("/")
821
+ return any(part in _IGNORE_PARTS for part in parts) or any(
822
+ rel_text == prefix or rel_text.startswith(f"{prefix}/") for prefix in _IGNORE_PREFIXES
823
+ )
824
+
825
+
826
+ def _read_text(path: Path) -> str:
827
+ try:
828
+ if not path.exists() or path.stat().st_size > _TEXT_LIMIT:
829
+ return ""
830
+ return path.read_text(encoding="utf-8", errors="ignore")
831
+ except OSError:
832
+ return ""
833
+
834
+
835
+ def _skills_installed(root: Path) -> bool:
836
+ return (
837
+ (root / ".github" / "skills" / "agentops-config" / "SKILL.md").exists()
838
+ or (root / ".claude" / "commands" / "agentops-config.md").exists()
839
+ )
840
+
841
+
842
+ def _rel(root: Path, path: Optional[Path]) -> Optional[str]:
843
+ if path is None:
844
+ return None
845
+ try:
846
+ return str(path.relative_to(root))
847
+ except ValueError:
848
+ return str(path)