eval-protocol 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. development/__init__.py +1 -0
  2. development/normalize_sandbox_fusion.py +628 -0
  3. development/utils/__init__.py +1 -0
  4. development/utils/generate_api_key.py +31 -0
  5. development/utils/subprocess_manager.py +481 -0
  6. eval_protocol/__init__.py +86 -0
  7. eval_protocol/__main__.py +10 -0
  8. eval_protocol/_version.py +21 -0
  9. eval_protocol/adapters/__init__.py +1 -0
  10. eval_protocol/adapters/braintrust.py +8 -0
  11. eval_protocol/adapters/trl.py +8 -0
  12. eval_protocol/agent/__init__.py +29 -0
  13. eval_protocol/agent/models.py +69 -0
  14. eval_protocol/agent/orchestrator.py +893 -0
  15. eval_protocol/agent/resource_abc.py +89 -0
  16. eval_protocol/agent/resource_pool.py +184 -0
  17. eval_protocol/agent/resources/__init__.py +44 -0
  18. eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  19. eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  20. eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  21. eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  22. eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
  23. eval_protocol/agent/resources/docker_resource.py +479 -0
  24. eval_protocol/agent/resources/filesystem_resource.py +371 -0
  25. eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
  26. eval_protocol/agent/resources/http_rollout_resource.py +325 -0
  27. eval_protocol/agent/resources/python_state_resource.py +170 -0
  28. eval_protocol/agent/resources/sql_resource.py +271 -0
  29. eval_protocol/agent/task_manager.py +1064 -0
  30. eval_protocol/agent/tool_registry.py +111 -0
  31. eval_protocol/auth.py +156 -0
  32. eval_protocol/cli.py +425 -0
  33. eval_protocol/cli_commands/__init__.py +1 -0
  34. eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
  35. eval_protocol/cli_commands/common.py +242 -0
  36. eval_protocol/cli_commands/deploy.py +486 -0
  37. eval_protocol/cli_commands/deploy_mcp.py +287 -0
  38. eval_protocol/cli_commands/preview.py +186 -0
  39. eval_protocol/cli_commands/run_eval_cmd.py +202 -0
  40. eval_protocol/common_utils.py +36 -0
  41. eval_protocol/config.py +180 -0
  42. eval_protocol/datasets/__init__.py +1 -0
  43. eval_protocol/datasets/loader.py +521 -0
  44. eval_protocol/evaluation.py +1045 -0
  45. eval_protocol/execution/__init__.py +1 -0
  46. eval_protocol/execution/pipeline.py +920 -0
  47. eval_protocol/gcp_tools.py +484 -0
  48. eval_protocol/generation/cache.py +141 -0
  49. eval_protocol/generation/clients/base.py +67 -0
  50. eval_protocol/generation/clients.py +248 -0
  51. eval_protocol/generic_server.py +165 -0
  52. eval_protocol/integrations/__init__.py +12 -0
  53. eval_protocol/integrations/braintrust.py +51 -0
  54. eval_protocol/integrations/deepeval.py +106 -0
  55. eval_protocol/integrations/openeval.py +40 -0
  56. eval_protocol/integrations/trl.py +187 -0
  57. eval_protocol/mcp/__init__.py +48 -0
  58. eval_protocol/mcp/adapter.py +131 -0
  59. eval_protocol/mcp/client/__init__.py +12 -0
  60. eval_protocol/mcp/client/connection.py +499 -0
  61. eval_protocol/mcp/clients.py +195 -0
  62. eval_protocol/mcp/execution/__init__.py +23 -0
  63. eval_protocol/mcp/execution/base_policy.py +227 -0
  64. eval_protocol/mcp/execution/fireworks_policy.py +209 -0
  65. eval_protocol/mcp/execution/manager.py +506 -0
  66. eval_protocol/mcp/execution/policy.py +421 -0
  67. eval_protocol/mcp/grid_renderer.py +54 -0
  68. eval_protocol/mcp/mcpgym.py +637 -0
  69. eval_protocol/mcp/process_manager.py +177 -0
  70. eval_protocol/mcp/session/__init__.py +11 -0
  71. eval_protocol/mcp/session/manager.py +228 -0
  72. eval_protocol/mcp/simple_process_manager.py +291 -0
  73. eval_protocol/mcp/simulation_server.py +458 -0
  74. eval_protocol/mcp/types.py +80 -0
  75. eval_protocol/mcp_agent/__init__.py +1 -0
  76. eval_protocol/mcp_agent/config.py +147 -0
  77. eval_protocol/mcp_agent/intermediary_server.py +542 -0
  78. eval_protocol/mcp_agent/main.py +210 -0
  79. eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  80. eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  81. eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
  82. eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
  83. eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  84. eval_protocol/mcp_agent/session.py +79 -0
  85. eval_protocol/mcp_env.py +304 -0
  86. eval_protocol/models.py +366 -0
  87. eval_protocol/packaging.py +219 -0
  88. eval_protocol/platform_api.py +360 -0
  89. eval_protocol/playback_policy.py +396 -0
  90. eval_protocol/resources.py +128 -0
  91. eval_protocol/reward_function.py +410 -0
  92. eval_protocol/rewards/__init__.py +94 -0
  93. eval_protocol/rewards/accuracy.py +454 -0
  94. eval_protocol/rewards/accuracy_length.py +173 -0
  95. eval_protocol/rewards/apps_coding_reward.py +331 -0
  96. eval_protocol/rewards/apps_execution_utils.py +149 -0
  97. eval_protocol/rewards/apps_testing_util.py +559 -0
  98. eval_protocol/rewards/bfcl_reward.py +313 -0
  99. eval_protocol/rewards/code_execution.py +1620 -0
  100. eval_protocol/rewards/code_execution_utils.py +72 -0
  101. eval_protocol/rewards/cpp_code.py +861 -0
  102. eval_protocol/rewards/deepcoder_reward.py +161 -0
  103. eval_protocol/rewards/format.py +129 -0
  104. eval_protocol/rewards/function_calling.py +541 -0
  105. eval_protocol/rewards/json_schema.py +422 -0
  106. eval_protocol/rewards/language_consistency.py +700 -0
  107. eval_protocol/rewards/lean_prover.py +479 -0
  108. eval_protocol/rewards/length.py +375 -0
  109. eval_protocol/rewards/list_comparison_math_reward.py +221 -0
  110. eval_protocol/rewards/math.py +762 -0
  111. eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
  112. eval_protocol/rewards/reasoning_steps.py +249 -0
  113. eval_protocol/rewards/repetition.py +342 -0
  114. eval_protocol/rewards/tag_count.py +162 -0
  115. eval_protocol/rl_processing.py +82 -0
  116. eval_protocol/server.py +271 -0
  117. eval_protocol/typed_interface.py +260 -0
  118. eval_protocol/utils/__init__.py +8 -0
  119. eval_protocol/utils/batch_evaluation.py +217 -0
  120. eval_protocol/utils/batch_transformation.py +205 -0
  121. eval_protocol/utils/dataset_helpers.py +112 -0
  122. eval_protocol/utils/module_loader.py +56 -0
  123. eval_protocol/utils/packaging_utils.py +108 -0
  124. eval_protocol/utils/static_policy.py +305 -0
  125. eval_protocol-0.0.3.dist-info/METADATA +635 -0
  126. eval_protocol-0.0.3.dist-info/RECORD +130 -0
  127. eval_protocol-0.0.3.dist-info/WHEEL +5 -0
  128. eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
  129. eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
  130. eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,421 @@
1
+ """
2
+ LLM Policy Execution and Tool Calling
3
+
4
+ Base classes and implementations for LLM policies that work with MCP environments.
5
+ Extracted from mcp_env.py to improve modularity and enable OpenAI integration.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import os
12
+ from abc import ABC, abstractmethod
13
+ from typing import Any, Dict, List, Optional, Tuple, Union
14
+
15
+ from concurrent.futures import ThreadPoolExecutor
16
+
17
+ from .base_policy import LLMBasePolicy
18
+ from ..types import LLMUsageStats, MCPToolCall
19
+
20
+ # Try to import FireworksPolicy from separate module - it's optional
21
+ try:
22
+ from .fireworks_policy import FireworksPolicy
23
+ except ImportError:
24
+ # FireworksPolicy not available (fireworks-ai package not installed)
25
+ FireworksPolicy = None
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+
31
+
32
+ class OpenAIPolicy(LLMBasePolicy):
33
+ """
34
+ OpenAI policy implementation that works with ANY MCP environment via tool calling.
35
+
36
+ NO environment-specific logic - everything comes from MCP tools and dataset prompts.
37
+ Supports both live mode (using OpenAI API) and playback mode (replaying recorded trajectories).
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ model_id: str,
43
+ temperature: float = 0.2,
44
+ max_tokens: int = 4096,
45
+ max_tools_per_turn: Optional[int] = None,
46
+ **kwargs,
47
+ ):
48
+ """
49
+ Initialize OpenAI policy.
50
+
51
+ Args:
52
+ model_id: OpenAI model identifier (e.g., "gpt-4o", "gpt-4o-mini", "gpt-4-turbo")
53
+ temperature: Sampling temperature (0.0 to 2.0)
54
+ max_tokens: Maximum tokens to generate per request
55
+ max_tools_per_turn: Maximum number of tool calls per turn (None = unlimited, 1 = single tool)
56
+ """
57
+ super().__init__(model_id, temperature, max_tokens, max_tools_per_turn, **kwargs)
58
+
59
+ # Only initialize OpenAI client in live mode (not in playback mode)
60
+ if not self._is_playback:
61
+ # Import OpenAI SDK - optional at module level
62
+ try:
63
+ from openai import AsyncOpenAI
64
+ except ImportError:
65
+ raise ImportError(
66
+ "The 'openai' package is required for OpenAIPolicy. " "Please install it with 'pip install openai'"
67
+ )
68
+
69
+ # Verify authentication
70
+ api_key = os.environ.get("OPENAI_API_KEY")
71
+ if not api_key:
72
+ raise ValueError(
73
+ "OPENAI_API_KEY environment variable is required "
74
+ "to use OpenAIPolicy. Set this variable before running."
75
+ )
76
+
77
+ # Initialize the OpenAI client
78
+ try:
79
+ self.client = AsyncOpenAI(api_key=api_key)
80
+ logger.info(f"✅ Initialized OpenAI client: {self.model_id}")
81
+ except Exception as e:
82
+ raise RuntimeError(f"Failed to initialize OpenAI client for '{self.model_id}': {e}")
83
+ else:
84
+ # In playback mode, skip expensive client initialization
85
+ self.client = None
86
+ logger.info(f"🎬 Playback mode: Skipping OpenAI client initialization for performance")
87
+
88
+ def _clean_messages_for_api(self, messages: List[Dict]) -> List[Dict]:
89
+ """
90
+ Clean messages by removing metadata fields that OpenAI API doesn't accept.
91
+
92
+ Args:
93
+ messages: Conversation messages with potential metadata
94
+
95
+ Returns:
96
+ Clean messages without metadata fields
97
+ """
98
+ clean_messages = []
99
+ for msg in messages:
100
+ clean_msg = msg.copy()
101
+ # Remove metadata field if present
102
+ if "metadata" in clean_msg:
103
+ del clean_msg["metadata"]
104
+ clean_messages.append(clean_msg)
105
+ return clean_messages
106
+
107
+ async def _make_llm_call(self, messages: List[Dict], tools: List[Dict]) -> Dict:
108
+ """
109
+ Make an OpenAI API call.
110
+
111
+ Args:
112
+ messages: Conversation messages (may contain metadata)
113
+ tools: Available tools in OpenAI format
114
+
115
+ Returns:
116
+ API response in OpenAI format
117
+ """
118
+ # Clean messages by removing metadata before sending to API
119
+ clean_messages = self._clean_messages_for_api(messages)
120
+
121
+ current_request = {
122
+ "model": self.model_id,
123
+ "messages": clean_messages,
124
+ "tools": tools,
125
+ "temperature": self.temperature,
126
+ "max_tokens": self.max_tokens,
127
+ }
128
+
129
+ if self.client is None:
130
+ raise RuntimeError("OpenAI client not initialized")
131
+
132
+ # Make the API call
133
+ response = await self.client.chat.completions.create(**current_request)
134
+
135
+ # Convert OpenAI response to standard format
136
+ return {
137
+ "choices": [
138
+ {
139
+ "message": {
140
+ "content": response.choices[0].message.content,
141
+ "tool_calls": (
142
+ [
143
+ {
144
+ "id": tc.id,
145
+ "type": tc.type,
146
+ "function": {
147
+ "name": tc.function.name,
148
+ "arguments": tc.function.arguments,
149
+ },
150
+ }
151
+ for tc in (response.choices[0].message.tool_calls or [])
152
+ ]
153
+ if response.choices[0].message.tool_calls
154
+ else []
155
+ ),
156
+ }
157
+ }
158
+ ],
159
+ "usage": {
160
+ "prompt_tokens": response.usage.prompt_tokens,
161
+ "completion_tokens": response.usage.completion_tokens,
162
+ "total_tokens": response.usage.total_tokens,
163
+ },
164
+ }
165
+
166
+ def _convert_mcp_tools_to_llm_format(self, mcp_tools: List[Dict]) -> List[Dict]:
167
+ """
168
+ Convert MCP tool schemas to OpenAI function calling format.
169
+
170
+ Args:
171
+ mcp_tools: List of MCP tool definitions
172
+
173
+ Returns:
174
+ List of OpenAI-compatible tool definitions
175
+ """
176
+ openai_tools = []
177
+
178
+ for mcp_tool in mcp_tools:
179
+ openai_tool = {
180
+ "type": "function",
181
+ "function": {
182
+ "name": mcp_tool["name"],
183
+ "description": mcp_tool.get("description", f"Execute {mcp_tool['name']} action"),
184
+ "parameters": mcp_tool.get(
185
+ "input_schema",
186
+ {"type": "object", "properties": {}, "required": []},
187
+ ),
188
+ },
189
+ }
190
+ openai_tools.append(openai_tool)
191
+
192
+ return openai_tools
193
+
194
+
195
+ class AnthropicPolicy(LLMBasePolicy):
196
+ """
197
+ Anthropic policy implementation that works with ANY MCP environment via tool calling.
198
+
199
+ NO environment-specific logic - everything comes from MCP tools and dataset prompts.
200
+ Supports both live mode (using Anthropic API) and playback mode (replaying recorded trajectories).
201
+ """
202
+
203
+ def __init__(
204
+ self,
205
+ model_id: str,
206
+ temperature: float = 0.2,
207
+ max_tokens: int = 4096,
208
+ max_tools_per_turn: Optional[int] = None,
209
+ **kwargs,
210
+ ):
211
+ """
212
+ Initialize Anthropic policy.
213
+
214
+ Args:
215
+ model_id: Anthropic model identifier (e.g., "claude-3-5-sonnet-20241022", "claude-3-opus-20240229")
216
+ temperature: Sampling temperature (0.0 to 1.0)
217
+ max_tokens: Maximum tokens to generate per request
218
+ max_tools_per_turn: Maximum number of tool calls per turn (None = unlimited, 1 = single tool)
219
+ """
220
+ super().__init__(model_id, temperature, max_tokens, max_tools_per_turn, **kwargs)
221
+
222
+ # Only initialize Anthropic client in live mode (not in playback mode)
223
+ if not self._is_playback:
224
+ # Import Anthropic SDK - optional at module level
225
+ try:
226
+ from anthropic import AsyncAnthropic
227
+ except ImportError:
228
+ raise ImportError(
229
+ "The 'anthropic' package is required for AnthropicPolicy. "
230
+ "Please install it with 'pip install anthropic'"
231
+ )
232
+
233
+ # Verify authentication
234
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
235
+ if not api_key:
236
+ raise ValueError(
237
+ "ANTHROPIC_API_KEY environment variable is required "
238
+ "to use AnthropicPolicy. Set this variable before running."
239
+ )
240
+
241
+ # Initialize the Anthropic client
242
+ try:
243
+ self.client = AsyncAnthropic(api_key=api_key)
244
+ logger.info(f"✅ Initialized Anthropic client: {self.model_id}")
245
+ except Exception as e:
246
+ raise RuntimeError(f"Failed to initialize Anthropic client for '{self.model_id}': {e}")
247
+ else:
248
+ # In playback mode, skip expensive client initialization
249
+ self.client = None
250
+ logger.info(f"🎬 Playback mode: Skipping Anthropic client initialization for performance")
251
+
252
+ def _clean_messages_for_api(self, messages: List[Dict]) -> Tuple[List[Dict], Optional[str]]:
253
+ """
254
+ Clean messages by removing metadata fields, extracting system message, and converting tool messages.
255
+
256
+ Anthropic handles system messages separately and doesn't support "tool" role messages.
257
+ Tool results must be converted to "user" messages with tool_result content blocks.
258
+
259
+ Args:
260
+ messages: Conversation messages with potential metadata and system messages
261
+
262
+ Returns:
263
+ Tuple of (clean_messages_without_system, system_message_content)
264
+ """
265
+ clean_messages = []
266
+ system_message = None
267
+
268
+ for msg in messages:
269
+ clean_msg = msg.copy()
270
+
271
+ # Remove metadata field if present
272
+ if "metadata" in clean_msg:
273
+ del clean_msg["metadata"]
274
+
275
+ # Extract system message separately - Anthropic handles it differently
276
+ if clean_msg.get("role") == "system":
277
+ system_message = clean_msg["content"]
278
+ elif clean_msg.get("role") == "tool":
279
+ # Convert tool message to user message with tool_result content
280
+ # Anthropic expects tool results as content blocks in user messages
281
+ tool_call_id = clean_msg.get("tool_call_id", "unknown")
282
+ tool_result_content = clean_msg.get("content", "")
283
+
284
+ converted_msg = {
285
+ "role": "user",
286
+ "content": [{"type": "tool_result", "tool_use_id": tool_call_id, "content": tool_result_content}],
287
+ }
288
+ clean_messages.append(converted_msg)
289
+ elif clean_msg.get("role") == "assistant" and "tool_calls" in clean_msg:
290
+ # Convert assistant message with tool_calls to Anthropic format
291
+ # Anthropic uses content blocks instead of tool_calls field
292
+ content_blocks = []
293
+
294
+ # Add text content if present
295
+ if clean_msg.get("content"):
296
+ content_blocks.append({"type": "text", "text": clean_msg["content"]})
297
+
298
+ # Convert tool_calls to tool_use content blocks
299
+ for tool_call in clean_msg.get("tool_calls", []):
300
+ if tool_call.get("type") == "function":
301
+ import json
302
+
303
+ content_blocks.append(
304
+ {
305
+ "type": "tool_use",
306
+ "id": tool_call["id"],
307
+ "name": tool_call["function"]["name"],
308
+ "input": (
309
+ json.loads(tool_call["function"]["arguments"])
310
+ if isinstance(tool_call["function"]["arguments"], str)
311
+ else tool_call["function"]["arguments"]
312
+ ),
313
+ }
314
+ )
315
+
316
+ converted_msg = {"role": "assistant", "content": content_blocks}
317
+ clean_messages.append(converted_msg)
318
+ else:
319
+ clean_messages.append(clean_msg)
320
+
321
+ return clean_messages, system_message
322
+
323
+ async def _make_llm_call(self, messages: List[Dict], tools: List[Dict]) -> Dict:
324
+ """
325
+ Make an Anthropic API call.
326
+
327
+ Args:
328
+ messages: Conversation messages (may contain metadata and system messages)
329
+ tools: Available tools in Anthropic format
330
+
331
+ Returns:
332
+ API response in OpenAI-compatible format
333
+ """
334
+ # Clean messages and extract system message
335
+ clean_messages, system_message = self._clean_messages_for_api(messages)
336
+
337
+ current_request = {
338
+ "model": self.model_id,
339
+ "messages": clean_messages,
340
+ "max_tokens": self.max_tokens,
341
+ "temperature": self.temperature,
342
+ }
343
+
344
+ # Add system message if present
345
+ if system_message:
346
+ current_request["system"] = system_message
347
+
348
+ # Add tools if present
349
+ if tools:
350
+ current_request["tools"] = tools
351
+
352
+ if self.client is None:
353
+ raise RuntimeError("Anthropic client not initialized")
354
+
355
+ # Make the API call
356
+ response = await self.client.messages.create(**current_request)
357
+
358
+ # Convert Anthropic response to OpenAI-compatible format
359
+ tool_calls = []
360
+ if hasattr(response, "content"):
361
+ for content_block in response.content:
362
+ if hasattr(content_block, "type") and content_block.type == "tool_use":
363
+ tool_calls.append(
364
+ {
365
+ "id": content_block.id,
366
+ "type": "function",
367
+ "function": {
368
+ "name": content_block.name,
369
+ "arguments": json.dumps(content_block.input),
370
+ },
371
+ }
372
+ )
373
+
374
+ # Get text content
375
+ text_content = ""
376
+ if hasattr(response, "content"):
377
+ for content_block in response.content:
378
+ if hasattr(content_block, "type") and content_block.type == "text":
379
+ text_content = content_block.text
380
+ break
381
+
382
+ return {
383
+ "choices": [
384
+ {
385
+ "message": {
386
+ "content": text_content,
387
+ "tool_calls": tool_calls if tool_calls else None,
388
+ }
389
+ }
390
+ ],
391
+ "usage": {
392
+ "prompt_tokens": response.usage.input_tokens,
393
+ "completion_tokens": response.usage.output_tokens,
394
+ "total_tokens": response.usage.input_tokens + response.usage.output_tokens,
395
+ },
396
+ }
397
+
398
+ def _convert_mcp_tools_to_llm_format(self, mcp_tools: List[Dict]) -> List[Dict]:
399
+ """
400
+ Convert MCP tool schemas to Anthropic tool calling format.
401
+
402
+ Args:
403
+ mcp_tools: List of MCP tool definitions
404
+
405
+ Returns:
406
+ List of Anthropic-compatible tool definitions
407
+ """
408
+ anthropic_tools = []
409
+
410
+ for mcp_tool in mcp_tools:
411
+ anthropic_tool = {
412
+ "name": mcp_tool["name"],
413
+ "description": mcp_tool.get("description", f"Execute {mcp_tool['name']} action"),
414
+ "input_schema": mcp_tool.get(
415
+ "input_schema",
416
+ {"type": "object", "properties": {}, "required": []},
417
+ ),
418
+ }
419
+ anthropic_tools.append(anthropic_tool)
420
+
421
+ return anthropic_tools
@@ -0,0 +1,54 @@
1
+ """
2
+ Grid Rendering Utilities
3
+
4
+ Utilities for rendering grid-based environments in a human-readable format.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+
10
+ def render_grid(desc, position: int) -> str:
11
+ """
12
+ Render a grid environment showing the current player position.
13
+
14
+ Args:
15
+ desc: Grid description (usually from env.desc)
16
+ position: Current player position as 1D index
17
+
18
+ Returns:
19
+ String representation of the grid with player position marked
20
+ """
21
+ if desc is None:
22
+ return f"Position: {position} (no grid available)"
23
+
24
+ # Convert numpy array or bytes to string if needed
25
+ if hasattr(desc, "shape"):
26
+ size = desc.shape[0]
27
+
28
+ # Convert position to row, col coordinates
29
+ row = position // size
30
+ col = position % size
31
+
32
+ # Create grid representation
33
+ grid_lines = []
34
+ for r, desc_row in enumerate(desc):
35
+ line = ""
36
+ for c, cell in enumerate(desc_row):
37
+ # Convert bytes to string if needed
38
+ cell_char = cell.decode("utf-8") if isinstance(cell, bytes) else str(cell)
39
+
40
+ if r == row and c == col:
41
+ # Show player position with 'P', unless it's the goal
42
+ if cell_char == "G":
43
+ line += "W" # Won - player reached goal
44
+ else:
45
+ line += "P"
46
+ else:
47
+ # Show original cell
48
+ line += cell_char
49
+ grid_lines.append(line)
50
+
51
+ return "\n".join(grid_lines)
52
+ else:
53
+ # Fallback for other grid formats
54
+ return f"Position: {position}"