autobots-devtools-shared-lib 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/PKG-INFO +1 -1
  2. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/pyproject.toml +1 -1
  3. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/agent_meta.py +29 -0
  4. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/assertions/golden.py +22 -5
  5. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/assertions/registry.py +2 -0
  6. autobots_devtools_shared_lib-0.7.0/src/autobots_devtools_shared_lib/eval/assertions/written_file.py +233 -0
  7. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/core/runner.py +2 -0
  8. autobots_devtools_shared_lib-0.7.0/src/autobots_devtools_shared_lib/eval/core/workspace.py +129 -0
  9. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/pytest_plugin/fixtures.py +2 -3
  10. autobots_devtools_shared_lib-0.6.0/src/autobots_devtools_shared_lib/eval/core/workspace.py +0 -51
  11. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/README.md +0 -0
  12. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/__init__.py +0 -0
  13. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/__init__.py +0 -0
  14. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/config/__init__.py +0 -0
  15. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/config/jenkins_config.py +0 -0
  16. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/config/jenkins_constants.py +0 -0
  17. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/config/jenkins_loader.py +0 -0
  18. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/__init__.py +0 -0
  19. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/logging_utils.py +0 -0
  20. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/otel_fastapi.py +0 -0
  21. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/trace_metadata.py +0 -0
  22. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/trace_propagation.py +0 -0
  23. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/observability/tracing.py +0 -0
  24. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/__init__.py +0 -0
  25. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/fileserver/README.md +0 -0
  26. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/fileserver/__init__.py +0 -0
  27. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/fileserver/app.py +0 -0
  28. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/fileserver/config.py +0 -0
  29. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/servers/fileserver/models.py +0 -0
  30. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/__init__.py +0 -0
  31. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/README.md +0 -0
  32. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/__init__.py +0 -0
  33. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/cache_backed.py +0 -0
  34. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/db_repository.py +0 -0
  35. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/factory.py +0 -0
  36. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/in_memory.py +0 -0
  37. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/redis_store.py +0 -0
  38. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/services/context/store.py +0 -0
  39. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/__init__.py +0 -0
  40. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/context_tools.py +0 -0
  41. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/format_tools.py +0 -0
  42. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/fserver_client_tools.py +0 -0
  43. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/jenkins_builtin_tools.py +0 -0
  44. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/tools/jenkins_pipeline_tools.py +0 -0
  45. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/__init__.py +0 -0
  46. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/context_utils.py +0 -0
  47. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/format_utils.py +0 -0
  48. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/fserver_client_utils.py +0 -0
  49. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/jenkins_builtin_utils.py +0 -0
  50. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/jenkins_http_utils.py +0 -0
  51. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/common/utils/jenkins_pipeline_utils.py +0 -0
  52. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/__init__.py +0 -0
  53. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/__init__.py +0 -0
  54. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/agent_config_utils.py +0 -0
  55. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/base_agent.py +0 -0
  56. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/batch.py +0 -0
  57. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/invocation_utils.py +0 -0
  58. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/agents/middleware.py +0 -0
  59. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/config/__init__.py +0 -0
  60. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/config/dynagent_settings.py +0 -0
  61. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/llm/__init__.py +0 -0
  62. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/llm/llm.py +0 -0
  63. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/models/__init__.py +0 -0
  64. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/models/state.py +0 -0
  65. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/services/__init__.py +0 -0
  66. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/services/structured_converter.py +0 -0
  67. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/tools/__init__.py +0 -0
  68. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/tools/state_tools.py +0 -0
  69. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/tools/tool_registry.py +0 -0
  70. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/ui/__init__.py +0 -0
  71. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/ui/default_ui.py +0 -0
  72. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/ui/ui_utils.py +0 -0
  73. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/utils/__init__.py +0 -0
  74. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/dynagent/utils/schema_directive_resolver.py +0 -0
  75. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/__init__.py +0 -0
  76. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/assertions/__init__.py +0 -0
  77. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/assertions/deterministic.py +0 -0
  78. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/assertions/llm_judge.py +0 -0
  79. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/core/__init__.py +0 -0
  80. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/core/cost_tracker.py +0 -0
  81. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/core/loader.py +0 -0
  82. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/models/__init__.py +0 -0
  83. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/models/eval_case.py +0 -0
  84. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/models/result.py +0 -0
  85. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/pytest_plugin/__init__.py +0 -0
  86. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/pytest_plugin/plugin.py +0 -0
  87. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/pytest_plugin/reporting.py +0 -0
  88. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/scoring/__init__.py +0 -0
  89. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/eval/scoring/langfuse_scorer.py +0 -0
  90. {autobots_devtools_shared_lib-0.6.0 → autobots_devtools_shared_lib-0.7.0}/src/autobots_devtools_shared_lib/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autobots-devtools-shared-lib
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Shared library functions to be used for all autobots projects
5
5
  License: MIT
6
6
  Author: Pralhad
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "autobots-devtools-shared-lib"
3
- version = "0.6.0"
3
+ version = "0.7.0"
4
4
  description = "Shared library functions to be used for all autobots projects"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -3,9 +3,13 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ import json
6
7
  from typing import Any
7
8
 
8
9
  import autobots_devtools_shared_lib.dynagent.agents.agent_config_utils as _agent_config
10
+ from autobots_devtools_shared_lib.common.observability import get_logger
11
+
12
+ logger = get_logger(__file__)
9
13
 
10
14
 
11
15
  class AgentMeta:
@@ -26,6 +30,31 @@ class AgentMeta:
26
30
  self.output_schema_map = _agent_config.get_resolved_output_schema_map()
27
31
  self.capabilities_map = _agent_config.get_capabilities_map()
28
32
  self.default_agent = _agent_config.get_default_agent()
33
+ logger.debug("%s", self)
34
+
35
+ def __repr__(self) -> str:
36
+ lines: list[str] = [f"AgentMeta(default_agent={self.default_agent!r})"]
37
+
38
+ lines.append("\n=== prompt_map ===")
39
+ lines.extend(f" - {name}" for name in self.prompt_map)
40
+
41
+ lines.append("\n=== tool_map ===")
42
+ for name, tools in self.tool_map.items():
43
+ tool_names = [getattr(t, "name", repr(t)) for t in tools]
44
+ lines.append(f" {name}: {tool_names}")
45
+
46
+ lines.append("\n=== input_schema_map ===")
47
+ lines.append(json.dumps(self.input_schema_map, indent=2, default=str))
48
+
49
+ lines.append("\n=== output_schema_map ===")
50
+ for name, schema in self.output_schema_map.items():
51
+ lines.append(f"\n--- {name} ---")
52
+ lines.append(json.dumps(schema, indent=2, default=str) if schema else " (no schema)")
53
+
54
+ lines.append("\n=== capabilities_map ===")
55
+ lines.append(json.dumps(self.capabilities_map, indent=2))
56
+
57
+ return "\n".join(lines)
29
58
 
30
59
  @classmethod
31
60
  def instance(cls) -> AgentMeta:
@@ -31,21 +31,38 @@ class JsonDiff:
31
31
  return "\n".join(lines)
32
32
 
33
33
 
34
- def _diff_json(reference: Any, actual: Any, path: str = "") -> JsonDiff:
35
- """Recursive deep diff between two JSON-like structures."""
34
+ def _diff_json(
35
+ reference: Any,
36
+ actual: Any,
37
+ path: str = "",
38
+ ignore_fields: list[str] | None = None,
39
+ ) -> JsonDiff:
40
+ """Recursive deep diff between two JSON-like structures.
41
+
42
+ Args:
43
+ reference: Expected JSON value.
44
+ actual: Actual JSON value from agent output.
45
+ path: Dot-path prefix used in diff messages (internal).
46
+ ignore_fields: Key names to skip at any level of the dict tree.
47
+ """
48
+ ignore = set(ignore_fields or [])
36
49
  diff = JsonDiff()
37
50
 
38
51
  if isinstance(reference, dict) and isinstance(actual, dict):
39
52
  for key in reference:
53
+ if key in ignore:
54
+ continue
40
55
  child_path = f"{path}.{key}" if path else key
41
56
  if key not in actual:
42
57
  diff.missing.append(f"{child_path}: {json.dumps(reference[key])}")
43
58
  else:
44
- child = _diff_json(reference[key], actual[key], child_path)
59
+ child = _diff_json(reference[key], actual[key], child_path, ignore_fields)
45
60
  diff.missing.extend(child.missing)
46
61
  diff.unexpected.extend(child.unexpected)
47
62
  diff.changed.extend(child.changed)
48
63
  for key in actual:
64
+ if key in ignore:
65
+ continue
49
66
  child_path = f"{path}.{key}" if path else key
50
67
  if key not in reference:
51
68
  diff.unexpected.append(f"{child_path}: {json.dumps(actual[key])}")
@@ -58,7 +75,7 @@ def _diff_json(reference: Any, actual: Any, path: str = "") -> JsonDiff:
58
75
  elif i >= len(reference):
59
76
  diff.unexpected.append(f"{child_path}: {json.dumps(actual[i])}")
60
77
  else:
61
- child = _diff_json(reference[i], actual[i], child_path)
78
+ child = _diff_json(reference[i], actual[i], child_path, ignore_fields)
62
79
  diff.missing.extend(child.missing)
63
80
  diff.unexpected.extend(child.unexpected)
64
81
  diff.changed.extend(child.changed)
@@ -135,7 +152,7 @@ def golden_match(output: AgentOutput, config: Any) -> AssertionResult:
135
152
  actual = output.structured_response
136
153
 
137
154
  if mode == "exact":
138
- diff = _diff_json(reference, actual)
155
+ diff = _diff_json(reference, actual, ignore_fields=ignore_fields)
139
156
  if diff.has_differences:
140
157
  return AssertionResult(
141
158
  passed=False,
@@ -39,6 +39,7 @@ def _register_builtins() -> None:
39
39
  llm_judge,
40
40
  trajectory_quality,
41
41
  )
42
+ from autobots_devtools_shared_lib.eval.assertions.written_file import written_file_matches
42
43
 
43
44
  _REGISTRY.update(
44
45
  cast(
@@ -56,6 +57,7 @@ def _register_builtins() -> None:
56
57
  "llm_judge": llm_judge,
57
58
  "trajectory_quality": trajectory_quality,
58
59
  "golden_match": golden_match,
60
+ "written_file_matches": written_file_matches,
59
61
  },
60
62
  )
61
63
  )
@@ -0,0 +1,233 @@
1
+ # ABOUTME: Assertions for files written to the file server workspace by agents.
2
+ # ABOUTME: Complements golden_match (structured_response) for agents that output via file tools.
3
+ """written_file_matches assertion evaluator."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import re
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import jsonschema as js
13
+
14
+ from autobots_devtools_shared_lib.common.utils.fserver_client_utils import read_file as _read_file
15
+ from autobots_devtools_shared_lib.eval.assertions.golden import _deep_structural_compare, _diff_json
16
+ from autobots_devtools_shared_lib.eval.core.workspace import resolve_workspace_context
17
+ from autobots_devtools_shared_lib.eval.models.result import AgentOutput, AssertionResult
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Mode handlers
21
+ # ---------------------------------------------------------------------------
22
+
23
+ FileModeHandler = Any # (content: str, actual: Any, config: dict, name: str) -> AssertionResult
24
+
25
+ _MODE_REGISTRY: dict[str, FileModeHandler] = {}
26
+
27
+
28
+ def _register_mode(name: str, fn: FileModeHandler) -> None:
29
+ _MODE_REGISTRY[name] = fn
30
+
31
+
32
+ def _resolve_mode(mode: str, assertion_name: str) -> FileModeHandler | AssertionResult:
33
+ if mode not in _MODE_REGISTRY:
34
+ available = ", ".join(sorted(_MODE_REGISTRY.keys()))
35
+ return AssertionResult(
36
+ passed=False,
37
+ name=assertion_name,
38
+ detail=f"Unknown mode: '{mode}'. Available: {available}",
39
+ )
40
+ return _MODE_REGISTRY[mode]
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Helpers
45
+ # ---------------------------------------------------------------------------
46
+
47
+
48
+ def _strip_code_fences(text: str) -> str:
49
+ match = re.search(r"```(?:\w+)?\s*\n?(.*?)\n?```", text, re.DOTALL)
50
+ return match.group(1).strip() if match else text.strip()
51
+
52
+
53
+ def _read_workspace_file(file_name: str, raw_state: dict[str, Any]) -> str:
54
+ workspace_context = resolve_workspace_context(raw_state)
55
+ content = _read_file(file_name, workspace_context)
56
+ if content.startswith("Error"):
57
+ raise RuntimeError(f"File server read failed for '{file_name}': {content}")
58
+ return content
59
+
60
+
61
+ def _load_json(content: str, assertion_name: str) -> tuple[Any, AssertionResult | None]:
62
+ try:
63
+ return json.loads(_strip_code_fences(content)), None
64
+ except json.JSONDecodeError as e:
65
+ return None, AssertionResult(
66
+ passed=False, name=assertion_name, detail=f"JSON parse error: {e}"
67
+ )
68
+
69
+
70
+ def _load_reference(
71
+ config: dict[str, Any], assertion_name: str
72
+ ) -> tuple[Any, AssertionResult | None]:
73
+ ref_path_str = config.get("reference")
74
+ if not ref_path_str:
75
+ return None, AssertionResult(
76
+ passed=False,
77
+ name=assertion_name,
78
+ detail=f"Mode '{config.get('mode')}' requires 'reference'",
79
+ )
80
+ ref_path = Path(ref_path_str)
81
+ if not ref_path.exists():
82
+ return None, AssertionResult(
83
+ passed=False, name=assertion_name, detail=f"Reference not found: {ref_path}"
84
+ )
85
+ return json.loads(ref_path.read_text()), None
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Built-in mode implementations
90
+ # ---------------------------------------------------------------------------
91
+
92
+
93
+ def _mode_contains(
94
+ content: str, _actual: Any, config: dict[str, Any], name: str
95
+ ) -> AssertionResult:
96
+ value = str(config.get("value", ""))
97
+ found = value.lower() in content.lower()
98
+ return AssertionResult(
99
+ passed=found,
100
+ name=name,
101
+ detail=f"{'Found' if found else 'Not found'}: {value!r}",
102
+ )
103
+
104
+
105
+ def _mode_schema(_content: str, actual: Any, config: dict[str, Any], name: str) -> AssertionResult:
106
+ schema_source = config.get("schema")
107
+ if schema_source is None:
108
+ return AssertionResult(
109
+ passed=False, name=name, detail="Mode 'schema' requires 'schema' key"
110
+ )
111
+ try:
112
+ schema: dict[str, Any] = (
113
+ json.loads(Path(str(schema_source)).read_text())
114
+ if isinstance(schema_source, str)
115
+ else schema_source
116
+ )
117
+ js.validate(instance=actual, schema=schema)
118
+ return AssertionResult(passed=True, name=name, detail="Schema valid")
119
+ except js.ValidationError as e:
120
+ return AssertionResult(passed=False, name=name, detail=f"Schema invalid: {e.message}")
121
+ except (FileNotFoundError, OSError) as e:
122
+ return AssertionResult(passed=False, name=name, detail=f"Schema load error: {e}")
123
+
124
+
125
+ def _mode_exact(_content: str, actual: Any, config: dict[str, Any], name: str) -> AssertionResult:
126
+ reference, err = _load_reference(config, name)
127
+ if err:
128
+ return err
129
+ ignore_fields: list[str] = config.get("ignore_fields", [])
130
+ diff = _diff_json(reference, actual, ignore_fields=ignore_fields)
131
+ if diff.has_differences:
132
+ return AssertionResult(passed=False, name=name, detail=diff.to_detail())
133
+ return AssertionResult(passed=True, name=name, detail="Exact match")
134
+
135
+
136
+ def _mode_structural(
137
+ _content: str, actual: Any, config: dict[str, Any], name: str
138
+ ) -> AssertionResult:
139
+ reference, err = _load_reference(config, name)
140
+ if err:
141
+ return err
142
+ ignore_fields: list[str] = config.get("ignore_fields", [])
143
+ issues = _deep_structural_compare(reference, actual, ignore_fields=ignore_fields)
144
+ if issues:
145
+ return AssertionResult(
146
+ passed=False,
147
+ name=name,
148
+ detail="Structural mismatch:\n" + "\n".join(f" {i}" for i in issues),
149
+ )
150
+ return AssertionResult(passed=True, name=name, detail="Structural match")
151
+
152
+
153
+ _register_mode("contains", _mode_contains)
154
+ _register_mode("schema", _mode_schema)
155
+ _register_mode("exact", _mode_exact)
156
+ _register_mode("structural", _mode_structural)
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # Core dispatch
160
+ # ---------------------------------------------------------------------------
161
+
162
+ _JSON_MODES = {"schema", "exact", "structural"}
163
+
164
+
165
+ def _single_file_match(
166
+ path: str, config: dict[str, Any], agent_output: AgentOutput
167
+ ) -> AssertionResult:
168
+ assertion_name = f"written_file_matches:{path}"
169
+ mode = config.get("mode", "schema")
170
+
171
+ handler = _resolve_mode(mode, assertion_name)
172
+ if isinstance(handler, AssertionResult):
173
+ return handler
174
+
175
+ try:
176
+ content = _read_workspace_file(path, agent_output.raw_state)
177
+ except RuntimeError as e:
178
+ return AssertionResult(passed=False, name=assertion_name, detail=str(e))
179
+
180
+ # Modes that need parsed JSON get it up front
181
+ actual: Any = None
182
+ if mode in _JSON_MODES:
183
+ actual, err = _load_json(content, assertion_name)
184
+ if err:
185
+ return err
186
+
187
+ return handler(content, actual, config, assertion_name)
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # Public entry point
192
+ # ---------------------------------------------------------------------------
193
+
194
+
195
+ def written_file_matches(agent_output: AgentOutput, config: Any) -> AssertionResult:
196
+ """Assert one or more workspace files match expected content/structure.
197
+
198
+ Config can be a single dict or a list of dicts. All entries must pass.
199
+
200
+ YAML config keys (per entry):
201
+ path (str): Workspace-relative file path (required).
202
+ mode (str): schema | exact | structural | contains (default: schema).
203
+ schema (str): Path to JSON schema file (mode=schema).
204
+ reference (str): Path to golden reference file (mode=exact|structural).
205
+ ignore_fields (list[str]): Keys to skip at any dict level (mode=exact|structural).
206
+ value (str): Substring to search for (mode=contains).
207
+ """
208
+ if isinstance(config, list):
209
+ entries = config
210
+ elif isinstance(config, dict):
211
+ entries = [config]
212
+ else:
213
+ return AssertionResult(
214
+ passed=False, name="written_file_matches", detail="Config must be a dict or list"
215
+ )
216
+
217
+ results = [_single_file_match(entry.get("path", ""), entry, agent_output) for entry in entries]
218
+ failures = [r for r in results if not r.passed]
219
+ if failures:
220
+ if len(entries) == 1:
221
+ return failures[0]
222
+ return AssertionResult(
223
+ passed=False,
224
+ name="written_file_matches",
225
+ detail="\n".join(f"{r.name}: {r.detail}" for r in failures),
226
+ )
227
+ if len(entries) == 1:
228
+ return results[0]
229
+ return AssertionResult(
230
+ passed=True,
231
+ name="written_file_matches",
232
+ detail=f"All {len(results)} files matched",
233
+ )
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any
8
8
 
9
9
  from autobots_devtools_shared_lib.dynagent.agents.invocation_utils import ainvoke_agent
10
10
  from autobots_devtools_shared_lib.eval.assertions.registry import resolve_assertion
11
+ from autobots_devtools_shared_lib.eval.core.workspace import resolve_eval_state_schema
11
12
  from autobots_devtools_shared_lib.eval.models.result import (
12
13
  AgentOutput,
13
14
  AssertionResult,
@@ -111,6 +112,7 @@ async def run_linear_eval(
111
112
  config=config,
112
113
  enable_tracing=trace_metadata is not None,
113
114
  trace_metadata=trace_metadata,
115
+ state_schema=resolve_eval_state_schema(),
114
116
  )
115
117
 
116
118
  agent_output = _build_agent_output(result)
@@ -0,0 +1,129 @@
1
+ # ABOUTME: Workspace staging for eval runs + WorkspaceContextProvider interface.
2
+ # ABOUTME: Consumers register a provider so shared-lib never hard-codes workspace path formation.
3
+ """Workspace file staging and pluggable workspace context provider."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING, Any, Protocol
10
+
11
+ from autobots_devtools_shared_lib.common.utils.fserver_client_utils import write_file
12
+ from autobots_devtools_shared_lib.dynagent.models.state import Dynagent
13
+
14
+ if TYPE_CHECKING:
15
+ from langchain.agents import AgentState
16
+
17
+ from autobots_devtools_shared_lib.eval.models.eval_case import SetupConfig
18
+
19
+ _state_schema: type[Any] = Dynagent
20
+
21
+
22
+ def register_eval_state_schema(schema: type[Any]) -> None:
23
+ """Register the LangGraph state schema for eval agent invocations.
24
+
25
+ Call once in conftest.py when your agents use a custom state class (e.g. MerState)
26
+ that extends Dynagent with domain-specific fields. Without this, ainvoke_agent
27
+ defaults to Dynagent and drops extra state fields (e.g. jira_number, repo_name).
28
+
29
+ Example (MER consumer)::
30
+
31
+ from autobots_devtools_shared_lib.eval.core.workspace import register_eval_state_schema
32
+ from autobots_agents_mer.common.models.state import MerState
33
+
34
+ register_eval_state_schema(MerState)
35
+ """
36
+ global _state_schema
37
+ _state_schema = schema
38
+
39
+
40
+ def resolve_eval_state_schema() -> type[AgentState]:
41
+ """Return the registered state schema for eval agent invocations."""
42
+ return _state_schema
43
+
44
+
45
+ class WorkspaceContextProvider(Protocol):
46
+ """Protocol for building file-server workspace context from agent state.
47
+
48
+ Implement this in your consumer conftest.py and register via
49
+ register_workspace_context_provider(). Path formation is intentionally
50
+ kept out of shared-lib — each consumer app may have a different convention.
51
+
52
+ Example (MER consumer)::
53
+
54
+ class MerWorkspaceContextProvider:
55
+ def get_workspace_context(self, state: dict) -> str:
56
+ ws = get_workspace_context(state) # MER util
57
+ return json.dumps(ws)
58
+
59
+ register_workspace_context_provider(MerWorkspaceContextProvider())
60
+ """
61
+
62
+ def get_workspace_context(self, state: dict[str, Any]) -> str:
63
+ """Return workspace_context JSON string for fserver_client_utils calls.
64
+
65
+ Args:
66
+ state: Agent state dict (e.g. user_name, repo_name, jira_number).
67
+
68
+ Returns:
69
+ JSON string, e.g. '{"workspace_base_path": "alice/fbp-core-MER-99999"}'.
70
+ """
71
+ ...
72
+
73
+
74
+ _provider: WorkspaceContextProvider | None = None
75
+
76
+
77
+ def register_workspace_context_provider(provider: WorkspaceContextProvider) -> None:
78
+ """Register the workspace context provider.
79
+
80
+ Call once at eval startup, typically from conftest.py, before any evals run.
81
+ """
82
+ global _provider
83
+ _provider = provider
84
+
85
+
86
+ def resolve_workspace_context(state: dict[str, Any]) -> str:
87
+ """Return workspace_context JSON via the registered provider.
88
+
89
+ Raises:
90
+ RuntimeError: If no provider has been registered.
91
+ """
92
+ if _provider is None:
93
+ raise RuntimeError(
94
+ "No WorkspaceContextProvider registered. "
95
+ "Call register_workspace_context_provider() in your conftest.py before running evals."
96
+ )
97
+ return _provider.get_workspace_context(state)
98
+
99
+
100
+ def setup_workspace(config: SetupConfig, state: dict[str, Any] | None = None) -> None:
101
+ """Stage fixture files into the file server workspace before agent invocation.
102
+
103
+ Args:
104
+ config: Setup configuration with workspace_files to stage.
105
+ state: EvalCase state dict used to resolve workspace context via the provider.
106
+
107
+ Raises:
108
+ FileNotFoundError: If a source fixture file does not exist.
109
+ RuntimeError: If the file server returns an error or no provider is registered.
110
+ """
111
+ app_root_path = os.getenv("APP_ROOT_PATH", "")
112
+ workspace_context = resolve_workspace_context(state or {})
113
+
114
+ for wf in config.workspace_files:
115
+ src = Path(app_root_path, wf.src)
116
+ if not src.exists():
117
+ raise FileNotFoundError(
118
+ f"Fixture file not found: {src}. "
119
+ f"Ensure the file exists in the eval fixtures directory."
120
+ )
121
+ content = src.read_text(encoding="utf-8")
122
+ result = write_file(wf.dest, content, workspace_context)
123
+ if result.startswith("Error"):
124
+ raise RuntimeError(f"File server failed to stage '{wf.src}' → '{wf.dest}': {result}")
125
+
126
+
127
+ def teardown_workspace(_workspace_path: str) -> None:
128
+ """No-op: file server manages its own storage. Kept for interface compatibility."""
129
+ pass
@@ -51,7 +51,6 @@ def make_dynagent_eval(
51
51
 
52
52
  async def _eval(eval_case: EvalCase) -> EvalResult:
53
53
  session_id = str(uuid.uuid4())
54
- workspace_path = "/Users/shruthi/Projects/workspace/khushboo-2802394_infosys/fbp-core-genai-sanity-MER-9999"
55
54
 
56
55
  config: RunnableConfig = {
57
56
  "configurable": {
@@ -67,7 +66,7 @@ def make_dynagent_eval(
67
66
 
68
67
  try:
69
68
  # Stage workspace files
70
- setup_workspace(eval_case.setup, workspace_path)
69
+ setup_workspace(eval_case.setup, state=eval_case.state or None)
71
70
 
72
71
  # Run the eval
73
72
  if eval_case.mode == "linear":
@@ -104,7 +103,7 @@ def make_dynagent_eval(
104
103
  post_scores(session_id, result)
105
104
 
106
105
  finally:
107
- teardown_workspace(workspace_path)
106
+ teardown_workspace("")
108
107
 
109
108
  return result
110
109
 
@@ -1,51 +0,0 @@
1
- # ABOUTME: Workspace file staging for eval runs.
2
- # ABOUTME: Copies fixture files into workspace directory before agent invocation.
3
- """Workspace file staging for eval runs."""
4
-
5
- from __future__ import annotations
6
-
7
- import shutil
8
- from pathlib import Path
9
- from typing import TYPE_CHECKING
10
-
11
- if TYPE_CHECKING:
12
- from autobots_devtools_shared_lib.eval.models.eval_case import SetupConfig
13
-
14
-
15
- def setup_workspace(config: SetupConfig, workspace_path: str) -> None:
16
- """Create workspace directory and stage fixture files.
17
-
18
- Args:
19
- config: Setup configuration with workspace_files to stage.
20
- workspace_path: Target workspace directory path.
21
-
22
- Raises:
23
- FileNotFoundError: If a source fixture file does not exist.
24
- """
25
- import os
26
-
27
- app_root_path = os.getenv("APP_ROOT_PATH", "")
28
- workspace = Path(workspace_path)
29
- workspace.mkdir(parents=True, exist_ok=True)
30
-
31
- for wf in config.workspace_files:
32
- src = Path(app_root_path, wf.src)
33
- if not src.exists():
34
- raise FileNotFoundError(
35
- f"Fixture file not found: {src}. "
36
- f"Ensure the file exists in the eval fixtures directory."
37
- )
38
- dest = workspace / wf.dest
39
- dest.parent.mkdir(parents=True, exist_ok=True)
40
- shutil.copy2(src, dest)
41
-
42
-
43
- def teardown_workspace(workspace_path: str) -> None:
44
- """Remove workspace directory and all contents.
45
-
46
- Args:
47
- workspace_path: Workspace directory to remove.
48
- """
49
- workspace = Path(workspace_path)
50
- if workspace.exists():
51
- shutil.rmtree(workspace)