eval-protocol 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. development/__init__.py +1 -0
  2. development/normalize_sandbox_fusion.py +628 -0
  3. development/utils/__init__.py +1 -0
  4. development/utils/generate_api_key.py +31 -0
  5. development/utils/subprocess_manager.py +481 -0
  6. eval_protocol/__init__.py +86 -0
  7. eval_protocol/__main__.py +10 -0
  8. eval_protocol/_version.py +21 -0
  9. eval_protocol/adapters/__init__.py +1 -0
  10. eval_protocol/adapters/braintrust.py +8 -0
  11. eval_protocol/adapters/trl.py +8 -0
  12. eval_protocol/agent/__init__.py +29 -0
  13. eval_protocol/agent/models.py +69 -0
  14. eval_protocol/agent/orchestrator.py +893 -0
  15. eval_protocol/agent/resource_abc.py +89 -0
  16. eval_protocol/agent/resource_pool.py +184 -0
  17. eval_protocol/agent/resources/__init__.py +44 -0
  18. eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  19. eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  20. eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  21. eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  22. eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
  23. eval_protocol/agent/resources/docker_resource.py +479 -0
  24. eval_protocol/agent/resources/filesystem_resource.py +371 -0
  25. eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
  26. eval_protocol/agent/resources/http_rollout_resource.py +325 -0
  27. eval_protocol/agent/resources/python_state_resource.py +170 -0
  28. eval_protocol/agent/resources/sql_resource.py +271 -0
  29. eval_protocol/agent/task_manager.py +1064 -0
  30. eval_protocol/agent/tool_registry.py +111 -0
  31. eval_protocol/auth.py +156 -0
  32. eval_protocol/cli.py +425 -0
  33. eval_protocol/cli_commands/__init__.py +1 -0
  34. eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
  35. eval_protocol/cli_commands/common.py +242 -0
  36. eval_protocol/cli_commands/deploy.py +486 -0
  37. eval_protocol/cli_commands/deploy_mcp.py +287 -0
  38. eval_protocol/cli_commands/preview.py +186 -0
  39. eval_protocol/cli_commands/run_eval_cmd.py +202 -0
  40. eval_protocol/common_utils.py +36 -0
  41. eval_protocol/config.py +180 -0
  42. eval_protocol/datasets/__init__.py +1 -0
  43. eval_protocol/datasets/loader.py +521 -0
  44. eval_protocol/evaluation.py +1045 -0
  45. eval_protocol/execution/__init__.py +1 -0
  46. eval_protocol/execution/pipeline.py +920 -0
  47. eval_protocol/gcp_tools.py +484 -0
  48. eval_protocol/generation/cache.py +141 -0
  49. eval_protocol/generation/clients/base.py +67 -0
  50. eval_protocol/generation/clients.py +248 -0
  51. eval_protocol/generic_server.py +165 -0
  52. eval_protocol/integrations/__init__.py +12 -0
  53. eval_protocol/integrations/braintrust.py +51 -0
  54. eval_protocol/integrations/deepeval.py +106 -0
  55. eval_protocol/integrations/openeval.py +40 -0
  56. eval_protocol/integrations/trl.py +187 -0
  57. eval_protocol/mcp/__init__.py +48 -0
  58. eval_protocol/mcp/adapter.py +131 -0
  59. eval_protocol/mcp/client/__init__.py +12 -0
  60. eval_protocol/mcp/client/connection.py +499 -0
  61. eval_protocol/mcp/clients.py +195 -0
  62. eval_protocol/mcp/execution/__init__.py +23 -0
  63. eval_protocol/mcp/execution/base_policy.py +227 -0
  64. eval_protocol/mcp/execution/fireworks_policy.py +209 -0
  65. eval_protocol/mcp/execution/manager.py +506 -0
  66. eval_protocol/mcp/execution/policy.py +421 -0
  67. eval_protocol/mcp/grid_renderer.py +54 -0
  68. eval_protocol/mcp/mcpgym.py +637 -0
  69. eval_protocol/mcp/process_manager.py +177 -0
  70. eval_protocol/mcp/session/__init__.py +11 -0
  71. eval_protocol/mcp/session/manager.py +228 -0
  72. eval_protocol/mcp/simple_process_manager.py +291 -0
  73. eval_protocol/mcp/simulation_server.py +458 -0
  74. eval_protocol/mcp/types.py +80 -0
  75. eval_protocol/mcp_agent/__init__.py +1 -0
  76. eval_protocol/mcp_agent/config.py +147 -0
  77. eval_protocol/mcp_agent/intermediary_server.py +542 -0
  78. eval_protocol/mcp_agent/main.py +210 -0
  79. eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  80. eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  81. eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
  82. eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
  83. eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  84. eval_protocol/mcp_agent/session.py +79 -0
  85. eval_protocol/mcp_env.py +304 -0
  86. eval_protocol/models.py +366 -0
  87. eval_protocol/packaging.py +219 -0
  88. eval_protocol/platform_api.py +360 -0
  89. eval_protocol/playback_policy.py +396 -0
  90. eval_protocol/resources.py +128 -0
  91. eval_protocol/reward_function.py +410 -0
  92. eval_protocol/rewards/__init__.py +94 -0
  93. eval_protocol/rewards/accuracy.py +454 -0
  94. eval_protocol/rewards/accuracy_length.py +173 -0
  95. eval_protocol/rewards/apps_coding_reward.py +331 -0
  96. eval_protocol/rewards/apps_execution_utils.py +149 -0
  97. eval_protocol/rewards/apps_testing_util.py +559 -0
  98. eval_protocol/rewards/bfcl_reward.py +313 -0
  99. eval_protocol/rewards/code_execution.py +1620 -0
  100. eval_protocol/rewards/code_execution_utils.py +72 -0
  101. eval_protocol/rewards/cpp_code.py +861 -0
  102. eval_protocol/rewards/deepcoder_reward.py +161 -0
  103. eval_protocol/rewards/format.py +129 -0
  104. eval_protocol/rewards/function_calling.py +541 -0
  105. eval_protocol/rewards/json_schema.py +422 -0
  106. eval_protocol/rewards/language_consistency.py +700 -0
  107. eval_protocol/rewards/lean_prover.py +479 -0
  108. eval_protocol/rewards/length.py +375 -0
  109. eval_protocol/rewards/list_comparison_math_reward.py +221 -0
  110. eval_protocol/rewards/math.py +762 -0
  111. eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
  112. eval_protocol/rewards/reasoning_steps.py +249 -0
  113. eval_protocol/rewards/repetition.py +342 -0
  114. eval_protocol/rewards/tag_count.py +162 -0
  115. eval_protocol/rl_processing.py +82 -0
  116. eval_protocol/server.py +271 -0
  117. eval_protocol/typed_interface.py +260 -0
  118. eval_protocol/utils/__init__.py +8 -0
  119. eval_protocol/utils/batch_evaluation.py +217 -0
  120. eval_protocol/utils/batch_transformation.py +205 -0
  121. eval_protocol/utils/dataset_helpers.py +112 -0
  122. eval_protocol/utils/module_loader.py +56 -0
  123. eval_protocol/utils/packaging_utils.py +108 -0
  124. eval_protocol/utils/static_policy.py +305 -0
  125. eval_protocol-0.0.3.dist-info/METADATA +635 -0
  126. eval_protocol-0.0.3.dist-info/RECORD +130 -0
  127. eval_protocol-0.0.3.dist-info/WHEEL +5 -0
  128. eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
  129. eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
  130. eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,89 @@
1
+ """
2
+ Abstract Base Class for Forkable Resources in the Agent Evaluation Framework V2.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import ( # Callable removed as not directly used in ABC signatures
7
+ Any,
8
+ Dict,
9
+ List,
10
+ Optional,
11
+ )
12
+
13
+
14
+ class ForkableResource(ABC):
15
+ """
16
+ Abstract base class defining the interface for a forkable, checkpointable,
17
+ and interactive environment resource for agent evaluation.
18
+ """
19
+
20
+ @abstractmethod
21
+ async def setup(self, config: Dict[str, Any]) -> None:
22
+ """
23
+ Initializes the resource with a given configuration.
24
+ This method should prepare the resource for its first use or fork.
25
+ For example, setting up a database schema, creating a base file system,
26
+ or starting a base Docker container.
27
+ """
28
+ pass
29
+
30
+ @abstractmethod
31
+ async def fork(self) -> "ForkableResource":
32
+ """
33
+ Creates and returns a new, independent instance of this resource
34
+ with an identical copy of the current state of the resource it was forked from.
35
+ This new instance is typically an EpisodeResource, used for a single agent rollout.
36
+ """
37
+ pass
38
+
39
+ @abstractmethod
40
+ async def checkpoint(self) -> Any:
41
+ """
42
+ Returns a serializable representation of the resource's current state.
43
+ The format of this state (e.g., bytes, dict, path to a file) is specific
44
+ to the resource implementation but must be restorable by `restore()`.
45
+ """
46
+ pass
47
+
48
+ @abstractmethod
49
+ async def restore(self, state_data: Any) -> None:
50
+ """
51
+ Restores the resource's state from previously checkpointed `state_data`.
52
+ The resource should be in the same state as when `checkpoint()` was called.
53
+ """
54
+ pass
55
+
56
+ @abstractmethod
57
+ async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
58
+ """
59
+ Executes a named action with given parameters on the resource.
60
+ This typically modifies the resource's state.
61
+ Returns an observation or result of the action, specific to the resource and action.
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ async def get_observation(self) -> Any:
67
+ """
68
+ Returns the current observable state of the resource for the agent.
69
+ The format of the observation is resource-specific.
70
+ """
71
+ pass
72
+
73
+ @abstractmethod
74
+ async def get_tools_spec(self) -> List[Dict[str, Any]]:
75
+ """
76
+ Returns a list of tool specifications (e.g., OpenAI function calling format)
77
+ that are currently available or applicable to this resource's state.
78
+ This can be dynamic, changing based on the resource's current state.
79
+ """
80
+ pass
81
+
82
+ @abstractmethod
83
+ async def close(self) -> None:
84
+ """
85
+ Performs any necessary cleanup for the resource.
86
+ This includes releasing acquired resources like database connections,
87
+ stopping containers, deleting temporary files or directories, etc.
88
+ """
89
+ pass
@@ -0,0 +1,184 @@
1
+ """
2
+ Resource Pool for the Agent Evaluation Framework V2.
3
+ Manages and allocates resources to specific tasks.
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ from typing import Any, Dict, List, Optional, Set, Type
9
+
10
+ from .resource_abc import ForkableResource
11
+
12
+
13
+ class ResourcePool:
14
+ """
15
+ Manages a pool of ForkableResources that can be shared and reused across tasks.
16
+ Provides tracking and lifecycle management for resources.
17
+ """
18
+
19
+ def __init__(self):
20
+ """Initialize an empty resource pool."""
21
+ self.resources: Dict[str, ForkableResource] = {} # resource_id -> resource instance
22
+ self.resource_tasks: Dict[str, Set[str]] = {} # resource_id -> set of task_ids using it
23
+ self.task_resources: Dict[str, Set[str]] = {} # task_id -> set of resource_ids used by it
24
+ self.logger = logging.getLogger("ResourcePool")
25
+
26
+ async def create_resource(
27
+ self,
28
+ resource_type: Type[ForkableResource],
29
+ resource_id: str,
30
+ config: Dict[str, Any],
31
+ task_id: Optional[str] = None,
32
+ ) -> Optional[ForkableResource]:
33
+ """
34
+ Create a new resource of the specified type and add it to the pool.
35
+
36
+ Args:
37
+ resource_type: The ForkableResource class to instantiate
38
+ resource_id: Unique identifier for the resource
39
+ config: Configuration dictionary for the resource setup
40
+ task_id: Optional task ID to associate with this resource
41
+
42
+ Returns:
43
+ The created resource or None if creation fails
44
+ """
45
+ if resource_id in self.resources:
46
+ self.logger.warning(f"Resource '{resource_id}' already exists in the pool. Returning existing instance.")
47
+ if task_id:
48
+ self._associate_task_with_resource(task_id, resource_id)
49
+ return self.resources[resource_id]
50
+
51
+ try:
52
+ resource = resource_type()
53
+ await resource.setup(config)
54
+
55
+ self.resources[resource_id] = resource
56
+ self.resource_tasks[resource_id] = set()
57
+
58
+ if task_id:
59
+ self._associate_task_with_resource(task_id, resource_id)
60
+
61
+ self.logger.info(f"Created resource '{resource_id}' of type {resource_type.__name__}")
62
+ return resource
63
+ except Exception as e:
64
+ self.logger.error(f"Failed to create resource '{resource_id}': {e}")
65
+ return None
66
+
67
+ def get_resource(self, resource_id: str) -> Optional[ForkableResource]:
68
+ """
69
+ Get a resource from the pool by its ID.
70
+
71
+ Args:
72
+ resource_id: The identifier of the resource to retrieve
73
+
74
+ Returns:
75
+ The resource instance or None if not found
76
+ """
77
+ return self.resources.get(resource_id)
78
+
79
+ def _associate_task_with_resource(self, task_id: str, resource_id: str) -> None:
80
+ """
81
+ Associate a task with a resource for tracking purposes.
82
+
83
+ Args:
84
+ task_id: The task identifier
85
+ resource_id: The resource identifier
86
+ """
87
+ if resource_id not in self.resources:
88
+ self.logger.warning(f"Cannot associate task '{task_id}' with non-existent resource '{resource_id}'.")
89
+ return
90
+
91
+ # Add task to resource's task set
92
+ if resource_id not in self.resource_tasks:
93
+ self.resource_tasks[resource_id] = set()
94
+ self.resource_tasks[resource_id].add(task_id)
95
+
96
+ # Add resource to task's resource set
97
+ if task_id not in self.task_resources:
98
+ self.task_resources[task_id] = set()
99
+ self.task_resources[task_id].add(resource_id)
100
+
101
+ self.logger.debug(f"Associated task '{task_id}' with resource '{resource_id}'.")
102
+
103
+ async def fork_resource_for_task(self, resource_id: str, task_id: str) -> Optional[ForkableResource]:
104
+ """
105
+ Fork a resource for a specific task.
106
+
107
+ Args:
108
+ resource_id: The identifier of the resource to fork
109
+ task_id: The task that will use the forked resource
110
+
111
+ Returns:
112
+ The forked resource instance or None if forking fails
113
+ """
114
+ base_resource = self.get_resource(resource_id)
115
+ if not base_resource:
116
+ self.logger.error(f"Cannot fork non-existent resource '{resource_id}'.")
117
+ return None
118
+
119
+ try:
120
+ forked_resource = await base_resource.fork()
121
+ # We don't track forked resources in the pool, as they are typically
122
+ # short-lived and managed by the Orchestrator
123
+ self.logger.debug(f"Forked resource '{resource_id}' for task '{task_id}'.")
124
+ return forked_resource
125
+ except Exception as e:
126
+ self.logger.error(f"Failed to fork resource '{resource_id}' for task '{task_id}': {e}")
127
+ return None
128
+
129
+ async def cleanup_task_resources(self, task_id: str) -> None:
130
+ """
131
+ Clean up all resources associated with a task.
132
+
133
+ Args:
134
+ task_id: The task identifier
135
+ """
136
+ if task_id not in self.task_resources:
137
+ self.logger.debug(f"No resources to clean up for task '{task_id}'.")
138
+ return
139
+
140
+ resource_ids = list(self.task_resources[task_id])
141
+ for resource_id in resource_ids:
142
+ # Remove task from resource's task set
143
+ if resource_id in self.resource_tasks:
144
+ self.resource_tasks[resource_id].discard(task_id)
145
+
146
+ # If resource has no more tasks, close and remove it
147
+ if not self.resource_tasks[resource_id]:
148
+ await self.close_resource(resource_id)
149
+
150
+ # Clear task's resource tracking
151
+ self.task_resources.pop(task_id, None)
152
+ self.logger.info(f"Cleaned up resources for task '{task_id}'.")
153
+
154
+ async def close_resource(self, resource_id: str) -> None:
155
+ """
156
+ Close a resource and remove it from the pool.
157
+
158
+ Args:
159
+ resource_id: The identifier of the resource to close
160
+ """
161
+ if resource_id not in self.resources:
162
+ self.logger.debug(f"Cannot close non-existent resource '{resource_id}'.")
163
+ return
164
+
165
+ resource = self.resources[resource_id]
166
+ try:
167
+ await resource.close()
168
+ self.resources.pop(resource_id)
169
+ self.resource_tasks.pop(resource_id, None)
170
+ self.logger.info(f"Closed and removed resource '{resource_id}' from pool.")
171
+ except Exception as e:
172
+ self.logger.error(f"Error closing resource '{resource_id}': {e}")
173
+
174
+ async def close_all_resources(self) -> None:
175
+ """Close all resources in the pool and clear it."""
176
+ resource_ids = list(self.resources.keys())
177
+ for resource_id in resource_ids:
178
+ await self.close_resource(resource_id)
179
+
180
+ # Clear all tracking dictionaries
181
+ self.resources.clear()
182
+ self.resource_tasks.clear()
183
+ self.task_resources.clear()
184
+ self.logger.info("Closed all resources in the pool.")
@@ -0,0 +1,44 @@
1
+ """
2
+ Resources for the Reward Kit Agent V2 Framework.
3
+
4
+ This package contains concrete implementations of the ForkableResource ABC.
5
+ """
6
+
7
+ from .bfcl_sim_api_resource import BFCLSimAPIResource
8
+ from .docker_resource import DockerResource
9
+ from .filesystem_resource import FileSystemResource
10
+
11
+ # HTTP Rollout Protocol types for server implementations
12
+ from .http_rollout_protocol import (
13
+ EndEpisodeRequest,
14
+ EndEpisodeResponse,
15
+ GameObservation,
16
+ HealthResponse,
17
+ HttpRolloutConfig,
18
+ StartEpisodeRequest,
19
+ StartEpisodeResponse,
20
+ StepRequest,
21
+ StepResponse,
22
+ )
23
+ from .http_rollout_resource import HttpRolloutResource
24
+ from .python_state_resource import PythonStateResource
25
+ from .sql_resource import SQLResource
26
+
27
+ __all__ = [
28
+ "PythonStateResource",
29
+ "SQLResource",
30
+ "FileSystemResource",
31
+ "DockerResource",
32
+ "BFCLSimAPIResource",
33
+ "HttpRolloutResource",
34
+ # HTTP Rollout Protocol
35
+ "HttpRolloutConfig",
36
+ "StartEpisodeRequest",
37
+ "StartEpisodeResponse",
38
+ "StepRequest",
39
+ "StepResponse",
40
+ "EndEpisodeRequest",
41
+ "EndEpisodeResponse",
42
+ "HealthResponse",
43
+ "GameObservation",
44
+ ]
@@ -0,0 +1 @@
1
+ # BFCL environment implementations
@@ -0,0 +1,342 @@
1
+ """Implementation of GorillaFileSystem."""
2
+
3
+ from typing import Dict, Optional, Union
4
+
5
+
6
+ class File:
7
+ """A file in the Gorilla File System."""
8
+
9
+ def __init__(
10
+ self, name: str = "", content: str = "", parent: Optional["Directory"] = None
11
+ ): # 'Directory' as string literal
12
+ self.name: str = name
13
+ self.content: str = content
14
+ self.parent: Optional["Directory"] = parent
15
+
16
+ def __repr__(self):
17
+ return f"<File: {self.name}, Content: '{self.content[:20]}{'...' if len(self.content) > 20 else ''}'>"
18
+
19
+ def __eq__(self, other):
20
+ if not isinstance(other, File):
21
+ return False
22
+ return self.name == other.name and self.content == other.content
23
+
24
+
25
+ class Directory:
26
+ """A directory in the Gorilla File System."""
27
+
28
+ def __init__(
29
+ self,
30
+ name: str = "",
31
+ parent: Optional["Directory"] = None, # Changed to string literal
32
+ contents: Optional[Dict[str, Union[File, "Directory"]]] = None,
33
+ ):
34
+ self.name: str = name
35
+ self.parent: Optional["Directory"] = parent # Changed to string literal
36
+ self.contents: Dict[str, Union[File, Directory]] = contents or {}
37
+
38
+ def __repr__(self):
39
+ parent_name = self.parent.name if self.parent else None
40
+ return f"<Directory: {self.name}, Parent: {parent_name}, Keys: {list(self.contents.keys())}>"
41
+
42
+ def __eq__(self, other):
43
+ if not isinstance(other, Directory):
44
+ return False
45
+ return self.name == other.name and self.contents == other.contents
46
+
47
+
48
+ class GorillaFileSystem:
49
+ """A file system for BFCL evaluation."""
50
+
51
+ def __init__(self):
52
+ self.root: Directory = Directory(name="workspace", parent=None)
53
+ self.current_dir: Directory = self.root
54
+ self.long_context: bool = False
55
+
56
+ def _load_scenario(self, config: Dict):
57
+ """Load the file system from configuration."""
58
+ # self.root and self.current_dir are already initialized.
59
+ # We will only overwrite them if loading is successful.
60
+ if "root" in config:
61
+ try:
62
+ loaded_dir: Optional[Directory] = None
63
+ root_config = config["root"]
64
+ if isinstance(root_config, dict) and "type" in root_config:
65
+ loaded_dir = self._load_directory_from_config("workspace", None, root_config)
66
+ elif isinstance(root_config, dict): # Assuming if not 'type', it's the other YAML format
67
+ loaded_dir = self._load_directory_from_yaml_config("workspace", None, root_config)
68
+
69
+ if loaded_dir: # Check if loading returned a Directory
70
+ self.root = loaded_dir
71
+ self.current_dir = self.root
72
+ # If loaded_dir is None, self.root and self.current_dir retain their initial default values.
73
+ except Exception as e:
74
+ print(f"Error loading GorillaFileSystem scenario: {e}")
75
+ # If an exception occurred during loading, reset to a fresh default.
76
+ self.root = Directory(name="workspace", parent=None)
77
+ self.current_dir = self.root
78
+
79
+ if "long_context" in config:
80
+ self.long_context = config.get("long_context", False)
81
+
82
+ def _load_directory_from_config(self, name: str, parent: Optional[Directory], config: Dict) -> Optional[Directory]:
83
+ """Create a directory structure from configuration."""
84
+ if config.get("type") == "directory":
85
+ directory = Directory(name=name, parent=parent)
86
+ contents: Dict[str, Union[File, Directory]] = {}
87
+ for item_name, item_config in config.get("contents", {}).items():
88
+ item_type = item_config.get("type")
89
+ if item_type == "directory":
90
+ loaded_item = self._load_directory_from_config(item_name, directory, item_config)
91
+ if loaded_item:
92
+ contents[item_name] = loaded_item
93
+ elif item_type == "file":
94
+ contents[item_name] = File(
95
+ name=item_name,
96
+ content=item_config.get("content", ""),
97
+ parent=directory,
98
+ )
99
+ directory.contents = contents
100
+ return directory
101
+ return None
102
+
103
+ def _load_directory_from_yaml_config(self, name: str, parent: Optional[Directory], config: Dict) -> Directory:
104
+ """Create a directory structure from YAML configuration format."""
105
+ directory = Directory(name=name, parent=parent)
106
+ contents: Dict[str, Union[File, Directory]] = {}
107
+
108
+ # Ensure config.get("contents") is treated as a dictionary
109
+ config_contents = config.get("contents", {})
110
+ if not isinstance(config_contents, dict):
111
+ config_contents = {} # Default to empty dict if not a dict
112
+
113
+ for item_name, item_config in config_contents.items():
114
+ if isinstance(item_config, dict):
115
+ if "contents" in item_config: # Heuristic for directory
116
+ loaded_subdir = self._load_directory_from_yaml_config(item_name, directory, item_config)
117
+ if loaded_subdir: # Ensure it's not None, though current impl always returns Directory
118
+ contents[item_name] = loaded_subdir
119
+ elif "content" in item_config: # Heuristic for file
120
+ contents[item_name] = File(
121
+ name=item_name,
122
+ content=item_config.get("content", ""),
123
+ parent=directory,
124
+ )
125
+ elif item_config.get("type") == "directory":
126
+ loaded_subdir = self._load_directory_from_yaml_config(item_name, directory, item_config)
127
+ if loaded_subdir:
128
+ contents[item_name] = loaded_subdir
129
+ elif item_config.get("type") == "file":
130
+ contents[item_name] = File(
131
+ name=item_name,
132
+ content=item_config.get("content", ""),
133
+ parent=directory,
134
+ )
135
+ directory.contents = contents
136
+ return directory
137
+
138
+ def ls(self, path: Optional[str] = None) -> Dict:
139
+ """List directory contents."""
140
+ target_dir: Directory = self.current_dir
141
+ if path:
142
+ found_node = self._find_path(path)
143
+ if not isinstance(found_node, Directory):
144
+ return {"error": f"Path not found or not a directory: {path}"}
145
+ target_dir = found_node
146
+
147
+ items: Dict[str, Dict[str, str]] = {}
148
+ # target_dir is now guaranteed to be a Directory.
149
+ for name, item in target_dir.contents.items():
150
+ if isinstance(item, Directory):
151
+ items[name] = {"type": "directory"}
152
+ elif isinstance(item, File):
153
+ items[name] = {"type": "file"}
154
+
155
+ return {"current_directory": target_dir.name, "contents": items}
156
+
157
+ def cd(self, folder: str) -> Dict:
158
+ """Change current directory."""
159
+ if folder == "..":
160
+ parent_dir = self.current_dir.parent
161
+ if parent_dir is not None:
162
+ self.current_dir = parent_dir
163
+ return {
164
+ "status": "success",
165
+ "message": f"Changed to {self.current_dir.name}",
166
+ }
167
+ else: # Parent is None, so we are at root
168
+ return {"status": "error", "message": "Already at root directory"}
169
+
170
+ # self.current_dir is always a Directory. Accessing .contents is safe.
171
+ target_item = self.current_dir.contents.get(folder)
172
+ if isinstance(target_item, Directory):
173
+ self.current_dir = target_item
174
+ return {"status": "success", "message": f"Changed to {folder}"}
175
+
176
+ return {"status": "error", "message": f"Directory {folder} not found"}
177
+
178
+ def mkdir(self, dir_name: str) -> Dict:
179
+ """Create a new directory."""
180
+ # self.current_dir is always a Directory. Accessing .contents is safe.
181
+ if dir_name in self.current_dir.contents:
182
+ return {
183
+ "status": "error",
184
+ "message": f"Directory {dir_name} already exists",
185
+ }
186
+
187
+ self.current_dir.contents[dir_name] = Directory(name=dir_name, parent=self.current_dir)
188
+ return {"status": "success", "message": f"Created directory {dir_name}"}
189
+
190
+ def cat(self, file_name: str) -> Dict:
191
+ """Display file contents."""
192
+ # self.current_dir is always a Directory. Accessing .contents is safe.
193
+ item = self.current_dir.contents.get(file_name)
194
+ if not isinstance(item, File):
195
+ return {"status": "error", "message": f"File {file_name} not found"}
196
+
197
+ return {
198
+ "status": "success",
199
+ "content": item.content, # item is File, .content is safe
200
+ }
201
+
202
+ def mv(self, source: str, destination: str) -> Dict:
203
+ """Move a file or directory."""
204
+ source_item = self.current_dir.contents.get(source)
205
+ if source_item is None:
206
+ return {"status": "error", "message": f"Source {source} not found"}
207
+
208
+ parts = destination.split("/")
209
+ dest_name = parts[-1]
210
+ target_dir_path = "/".join(parts[:-1])
211
+
212
+ final_target_dir: Directory = self.current_dir
213
+ if target_dir_path: # If destination includes a path
214
+ found_dir = self._find_path(target_dir_path)
215
+ if not isinstance(found_dir, Directory):
216
+ return {
217
+ "status": "error",
218
+ "message": f"Target directory path {target_dir_path} not found or not a directory",
219
+ }
220
+ final_target_dir = found_dir
221
+
222
+ if dest_name in final_target_dir.contents:
223
+ return {
224
+ "status": "error",
225
+ "message": f"Destination {destination} already exists",
226
+ }
227
+
228
+ # Move item
229
+ del self.current_dir.contents[source] # Remove from old location
230
+ source_item.name = dest_name
231
+ source_item.parent = final_target_dir
232
+ final_target_dir.contents[dest_name] = source_item
233
+
234
+ return {"status": "success", "message": f"Moved {source} to {destination}"}
235
+
236
+ def grep(self, file_name: str, pattern: str) -> Dict:
237
+ """Search for a pattern in a file."""
238
+ item = self.current_dir.contents.get(file_name)
239
+ if not isinstance(item, File):
240
+ return {"status": "error", "message": f"File {file_name} not found"}
241
+
242
+ content = item.content # item is File, .content is safe
243
+ lines = content.split("\n")
244
+ matches = [line for line in lines if pattern in line]
245
+
246
+ return {"status": "success", "matches": matches, "count": len(matches)}
247
+
248
+ def sort(self, file_name: str) -> Dict:
249
+ """Sort the lines in a file."""
250
+ item = self.current_dir.contents.get(file_name)
251
+ if not isinstance(item, File):
252
+ return {"status": "error", "message": f"File {file_name} not found"}
253
+
254
+ content = item.content # item is File, .content is safe
255
+ lines = content.split("\n")
256
+ sorted_lines = sorted(lines)
257
+
258
+ item.content = "\n".join(sorted_lines) # item is File, assigning .content is safe
259
+
260
+ return {"status": "success", "message": f"Sorted {file_name}"}
261
+
262
+ def diff(self, file_name1: str, file_name2: str) -> Dict:
263
+ """Compare two files."""
264
+ item1 = self.current_dir.contents.get(file_name1)
265
+ item2 = self.current_dir.contents.get(file_name2)
266
+
267
+ if not isinstance(item1, File):
268
+ return {"status": "error", "message": f"File {file_name1} not found"}
269
+ if not isinstance(item2, File):
270
+ return {"status": "error", "message": f"File {file_name2} not found"}
271
+
272
+ content1 = item1.content # item1 is File
273
+ content2 = item2.content # item2 is File
274
+
275
+ if content1 == content2:
276
+ return {
277
+ "status": "success",
278
+ "message": "Files are identical",
279
+ "differences": [],
280
+ }
281
+ else:
282
+ lines1 = content1.split("\n")
283
+ lines2 = content2.split("\n")
284
+ differences = []
285
+ for i in range(max(len(lines1), len(lines2))):
286
+ line1_val = lines1[i] if i < len(lines1) else None
287
+ line2_val = lines2[i] if i < len(lines2) else None
288
+ if line1_val != line2_val:
289
+ differences.append({"line": i + 1, "file1": line1_val, "file2": line2_val})
290
+ return {
291
+ "status": "success",
292
+ "message": f"Found {len(differences)} differences",
293
+ "differences": differences,
294
+ }
295
+
296
+ def _find_path(self, path: str) -> Optional[Union[File, Directory]]:
297
+ """Helper to find a File or Directory by path. Returns None if not found."""
298
+ current_node: Optional[Directory]
299
+ parts: list[str]
300
+
301
+ if path.startswith("/"):
302
+ current_node = self.root
303
+ path_str = path.strip("/")
304
+ parts = path_str.split("/") if path_str else []
305
+ else:
306
+ current_node = self.current_dir
307
+ parts = path.split("/")
308
+
309
+ if (
310
+ not path or path == "." or (path == "/" and not parts)
311
+ ): # Handle current dir or root for empty/special paths
312
+ return self.current_dir if (not path.startswith("/")) and (path == "." or not path) else self.root
313
+
314
+ for i, part_name in enumerate(parts):
315
+ if current_node is None: # Should not happen if logic is correct and current_node starts as Directory
316
+ return None
317
+
318
+ if not part_name: # Skip empty parts resulting from multiple slashes e.g. /dir1//file
319
+ if i == 0 and path.startswith("/"): # special case for absolute path like "//file"
320
+ continue
321
+ elif i > 0:
322
+ continue
323
+
324
+ if part_name == "..":
325
+ current_node = current_node.parent # Parent can be None
326
+ if current_node is None: # Moved up from root
327
+ return None
328
+ continue # Successfully moved to parent
329
+
330
+ # current_node is a Directory here.
331
+ found_item = current_node.contents.get(part_name)
332
+
333
+ if i == len(parts) - 1: # This is the last part of the path
334
+ return found_item # Return File, Directory, or None if not found
335
+
336
+ if isinstance(found_item, Directory):
337
+ current_node = found_item # Navigate into subdirectory
338
+ else: # Path part is not a directory or not found, and it's not the last part
339
+ return None
340
+
341
+ # This return is for cases like path="dir" and it's a directory, or path="/"
342
+ return current_node