plancraft 0.3.33__py3-none-any.whl → 0.3.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plancraft/evaluator.py CHANGED
@@ -34,6 +34,13 @@ class Evaluator:
34
34
 
35
35
  It is also responsible for early stopping and verifying the target object has been craft.
36
36
  Finally, it also saves the results of the evaluation and the images generated during the evaluation.
37
+
38
+ This evaluator is designed to work with a PlancraftBaseModel and a set of ActionHandlerBase instances.
39
+ It supports multimodal content format and image-based inventory.
40
+
41
+ Importantly, it tracks the history of the dialogue and the environment state to provide a trace of the model's actions.
42
+
43
+ If you would want a simpler interface that just wraps the environment and actions to evaluate a single Plancraft example, you should use the EnvWrapper class.
37
44
  """
38
45
 
39
46
  def __init__(
plancraft/simple.py ADDED
@@ -0,0 +1,164 @@
1
+ import json
2
+ import os
3
+ from typing import Optional
4
+
5
+ from plancraft.config import PlancraftExample
6
+ from plancraft.environment.actions import (
7
+ ActionHandlerBase,
8
+ MoveActionHandler,
9
+ SmeltActionHandler,
10
+ ImpossibleActionHandler,
11
+ StopAction,
12
+ )
13
+ from plancraft.environment.env import (
14
+ PlancraftEnvironment,
15
+ get_objective_str,
16
+ target_and_inventory_to_text_obs,
17
+ )
18
+
19
+
20
+ def get_plancraft_examples(split: str = "train") -> list[PlancraftExample]:
21
+ """
22
+ Load examples from the data directory
23
+ """
24
+ data_dir = os.path.join(os.path.dirname(__file__), "data")
25
+ with open(os.path.join(data_dir, f"{split}.json"), "r") as f:
26
+ examples = json.load(f)
27
+ return [PlancraftExample(**example) for example in examples]
28
+
29
+
30
+ class EnvWrapper:
31
+ """
32
+ This wrapper class just wraps the environment and actions to evaluate a single example
33
+
34
+ This is useful if you want to bring your own agent/model to interact with the environment and not rely on the History class
35
+ and model class in the plancraft package.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ example: PlancraftExample,
41
+ actions: list[ActionHandlerBase] = [
42
+ MoveActionHandler(),
43
+ SmeltActionHandler(),
44
+ ImpossibleActionHandler(),
45
+ ],
46
+ max_steps: int = 30,
47
+ resolution: str = "high",
48
+ use_text_inventory: bool = True,
49
+ ):
50
+ self.actions = actions
51
+ self.max_steps = max_steps
52
+ # whether to convert the inventory to text observation
53
+ # if False, only the objective string is returned
54
+ self.use_text_inventory = use_text_inventory
55
+ self.current_step = 0
56
+ self.stopped = False
57
+ self.success = False
58
+ self.example = example
59
+ self.resolution = resolution
60
+ self.environment = PlancraftEnvironment(
61
+ example.slotted_inventory, resolution=self.resolution
62
+ )
63
+ if example.impossible:
64
+ assert "impossible" in [action.action_name for action in actions]
65
+
66
+ def check_done(self, inventory: dict, target: str):
67
+ """
68
+ Check that target object is obtained
69
+ """
70
+ for slot, item in inventory.items():
71
+ # ensure the target is in the inventory (not in slot 0)
72
+ if target == item["type"] and slot != 0:
73
+ return True
74
+ return False
75
+
76
+ def parse_raw_model_response(self, generated_text: str) -> str:
77
+ """
78
+ Given a message and set of action handlers, parse the content to return the action
79
+ or a message if the action is not valid/requires message response
80
+ """
81
+ for handler in self.actions:
82
+ match_output = handler.match(generated_text)
83
+ if match_output:
84
+ return match_output
85
+ action_names = [handler.action_name for handler in self.actions]
86
+ return f"Only select actions from the following: {', '.join(action_names)}"
87
+
88
+ def step(self, action: str) -> tuple[Optional[dict], float, bool]:
89
+ """
90
+ Execute action and return next observation, reward, and termination status
91
+
92
+ Returns:
93
+ observation: The environment observation after the action, observation is a dictionary with keys:
94
+ - text: The text observation (always present)
95
+ - inventory: The inventory after the action (if action was successful)
96
+ - target: The target object (if action was successful)
97
+ - image: The image observation (if action was successful)
98
+ reward: Reward for the current action (1.0 for success, 0.0 otherwise)
99
+ terminated: Whether the episode is done due to task completion, failure, or timeout
100
+
101
+
102
+ """
103
+ action = self.parse_raw_model_response(action)
104
+ self.current_step += 1
105
+
106
+ # Initialize return values
107
+ reward = 0.0
108
+ terminated = False
109
+
110
+ # Handle already stopped case
111
+ if self.stopped:
112
+ return {"text": "Plancraft environment is terminated"}, reward, True
113
+
114
+ # Handle max steps reached (terminate with no reward)
115
+ if self.current_step > self.max_steps:
116
+ self.success = False
117
+ return {"text": f"Max steps ({self.max_steps}) reached"}, reward, True
118
+
119
+ # Handle stop action
120
+ if isinstance(action, StopAction):
121
+ self.stopped = True
122
+ terminated = True
123
+ # success is True if example was truly impossible
124
+ self.success = self.example.impossible
125
+ if self.success:
126
+ reward = 1.0
127
+ observation = {
128
+ "text": "Plancraft environment is terminate due to stop action"
129
+ }
130
+
131
+ # Handle invalid action
132
+ elif isinstance(action, str):
133
+ observation = self.environment.step()
134
+ observation["target"] = self.example.target
135
+ observation["text"] = action
136
+
137
+ # Handle regular action execution
138
+ # NOTE: if the action is valid but does not do anything
139
+ # the environment will return the same observation
140
+ else:
141
+ observation = self.environment.step(action)
142
+ observation["target"] = self.example.target
143
+
144
+ # Generate text observation
145
+ if self.use_text_inventory:
146
+ text = target_and_inventory_to_text_obs(
147
+ target=self.example.target, inventory=observation["inventory"]
148
+ )
149
+ else:
150
+ text = get_objective_str(self.example.target)
151
+
152
+ observation["text"] = text
153
+
154
+ self.success = self.check_done(
155
+ observation["inventory"], self.example.target
156
+ )
157
+
158
+ # Set reward and termination for successful completion
159
+ if self.success:
160
+ reward = 1.0
161
+ terminated = True
162
+ self.stopped = True
163
+
164
+ return observation, reward, terminated
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.33
3
+ Version: 0.3.34
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -1,7 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
3
- plancraft/evaluator.py,sha256=mxzvbGpEDkiKW8u79QgYz5Q4wnZvkQSXiAvi0OVu4Qs,14754
3
+ plancraft/evaluator.py,sha256=pthc7pxT4xKHzP4hULngrfR0rC9VvnTWPDfnF1YnwJw,15220
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
+ plancraft/simple.py,sha256=OinkMTdq4DEFuWcbpOodTYf1G0EdmV10lxfdJxDuUc4,5923
5
6
  plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
6
7
  plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
7
8
  plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
@@ -1920,7 +1921,7 @@ plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5w
1920
1921
  plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
1921
1922
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1923
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.33.dist-info/METADATA,sha256=PWko_VcNKDQCx-4HTseZWqiRQMhOYXlzvCK13OFCQ78,11148
1924
- plancraft-0.3.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.33.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.33.dist-info/RECORD,,
1924
+ plancraft-0.3.34.dist-info/METADATA,sha256=xDFi9dYWn_op3CBUr0klhEWTl33LaRudM2GSYzAV9dc,11148
1925
+ plancraft-0.3.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1926
+ plancraft-0.3.34.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1927
+ plancraft-0.3.34.dist-info/RECORD,,