gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
- evaluation/create_minigrid_map_image.py +10 -6
- evaluation/file_system.py +16 -5
- evaluation/generate_experiments_results.py +123 -74
- evaluation/generate_experiments_results_new_ver1.py +227 -243
- evaluation/generate_experiments_results_new_ver2.py +317 -317
- evaluation/generate_task_specific_statistics_plots.py +481 -253
- evaluation/get_plans_images.py +41 -26
- evaluation/increasing_and_decreasing_.py +97 -56
- gr_libs/__init__.py +2 -1
- gr_libs/_version.py +2 -2
- gr_libs/environment/__init__.py +16 -8
- gr_libs/environment/environment.py +167 -39
- gr_libs/environment/utils/utils.py +22 -12
- gr_libs/metrics/__init__.py +5 -0
- gr_libs/metrics/metrics.py +76 -34
- gr_libs/ml/__init__.py +2 -0
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +1 -1
- gr_libs/ml/base/rl_agent.py +13 -10
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/deep_rl_learner.py +433 -352
- gr_libs/ml/neural/utils/__init__.py +1 -1
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/neural/utils/penv.py +5 -2
- gr_libs/ml/planner/mcts/mcts_model.py +524 -302
- gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
- gr_libs/ml/planner/mcts/utils/node.py +11 -7
- gr_libs/ml/planner/mcts/utils/tree.py +14 -10
- gr_libs/ml/sequential/__init__.py +1 -1
- gr_libs/ml/sequential/lstm_model.py +256 -175
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +123 -73
- gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
- gr_libs/ml/utils/__init__.py +8 -2
- gr_libs/ml/utils/format.py +78 -70
- gr_libs/ml/utils/math.py +2 -1
- gr_libs/ml/utils/other.py +1 -1
- gr_libs/ml/utils/storage.py +88 -28
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
- gr_libs/recognizer/graml/gr_dataset.py +209 -110
- gr_libs/recognizer/graml/graml_recognizer.py +431 -240
- gr_libs/recognizer/recognizer.py +38 -27
- gr_libs/recognizer/utils/__init__.py +1 -1
- gr_libs/recognizer/utils/format.py +8 -3
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
- gr_libs-0.1.8.dist-info/RECORD +70 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +8 -4
- tests/test_graql.py +2 -1
- tutorials/gcdraco_panda_tutorial.py +66 -0
- tutorials/gcdraco_parking_tutorial.py +61 -0
- tutorials/graml_minigrid_tutorial.py +42 -12
- tutorials/graml_panda_tutorial.py +35 -14
- tutorials/graml_parking_tutorial.py +37 -20
- tutorials/graml_point_maze_tutorial.py +33 -13
- tutorials/graql_minigrid_tutorial.py +31 -15
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,2 +1,2 @@
|
|
1
1
|
from .node import Node
|
2
|
-
from .tree import Tree
|
2
|
+
from .tree import Tree
|
@@ -1,8 +1,11 @@
|
|
1
1
|
import random
|
2
2
|
|
3
|
+
|
3
4
|
class Node:
|
4
5
|
|
5
|
-
def __init__(
|
6
|
+
def __init__(
|
7
|
+
self, identifier, state, action, action_space, reward, terminal, pos, depth
|
8
|
+
):
|
6
9
|
self.identifier = identifier
|
7
10
|
self.parent_identifier = None
|
8
11
|
self.children_identifiers = []
|
@@ -21,13 +24,14 @@ class Node:
|
|
21
24
|
|
22
25
|
def __str__(self):
|
23
26
|
return "{}: (action={}, visits={}, reward={:d}, ratio={:0.4f})".format(
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
self.state,
|
28
|
+
self.action,
|
29
|
+
self.num_visits,
|
30
|
+
int(self.total_simulation_reward),
|
31
|
+
self.performance,
|
32
|
+
)
|
29
33
|
|
30
34
|
def untried_action(self):
|
31
35
|
action = random.choice(self.untried_actions)
|
32
36
|
self.untried_actions.remove(action)
|
33
|
-
return action
|
37
|
+
return action
|
@@ -1,22 +1,24 @@
|
|
1
1
|
def vertical_lines(last_node_flags):
|
2
2
|
vertical_lines = []
|
3
|
-
vertical_line =
|
3
|
+
vertical_line = "\u2502"
|
4
4
|
for last_node_flag in last_node_flags[0:-1]:
|
5
5
|
if last_node_flag == False:
|
6
|
-
vertical_lines.append(vertical_line +
|
6
|
+
vertical_lines.append(vertical_line + " " * 3)
|
7
7
|
else:
|
8
8
|
# space between vertical lines
|
9
|
-
vertical_lines.append(
|
10
|
-
return
|
9
|
+
vertical_lines.append(" " * 4)
|
10
|
+
return "".join(vertical_lines)
|
11
|
+
|
11
12
|
|
12
13
|
def horizontal_line(last_node_flags):
|
13
|
-
horizontal_line =
|
14
|
-
horizontal_line_end =
|
14
|
+
horizontal_line = "\u251c\u2500\u2500 "
|
15
|
+
horizontal_line_end = "\u2514\u2500\u2500 "
|
15
16
|
if last_node_flags[-1]:
|
16
17
|
return horizontal_line_end
|
17
18
|
else:
|
18
19
|
return horizontal_line
|
19
20
|
|
21
|
+
|
20
22
|
class Tree:
|
21
23
|
|
22
24
|
def __init__(self):
|
@@ -39,7 +41,9 @@ class Tree:
|
|
39
41
|
if depth == 0:
|
40
42
|
yield "", node
|
41
43
|
else:
|
42
|
-
yield vertical_lines(last_node_flags) + horizontal_line(
|
44
|
+
yield vertical_lines(last_node_flags) + horizontal_line(
|
45
|
+
last_node_flags
|
46
|
+
), node
|
43
47
|
|
44
48
|
children = [self.nodes[identifier] for identifier in node.children_identifiers]
|
45
49
|
last_index = len(children) - 1
|
@@ -60,7 +64,7 @@ class Tree:
|
|
60
64
|
self.nodes[node.identifier].parent = None
|
61
65
|
else:
|
62
66
|
self.nodes[parent.identifier].children_identifiers.append(node.identifier)
|
63
|
-
self.nodes[node.identifier].parent_identifier=parent.identifier
|
67
|
+
self.nodes[node.identifier].parent_identifier = parent.identifier
|
64
68
|
|
65
69
|
def update_id(self, old_id, new_id):
|
66
70
|
assert new_id not in self.nodes.keys()
|
@@ -78,7 +82,7 @@ class Tree:
|
|
78
82
|
# update the node's children (if there are any?...)
|
79
83
|
for child_id in node.children_identifiers:
|
80
84
|
self.nodes[child_id].parent_identifier = new_id
|
81
|
-
|
85
|
+
|
82
86
|
self.nodes.pop(old_id)
|
83
87
|
self.nodes.update({node.identifier: node})
|
84
88
|
|
@@ -99,4 +103,4 @@ class Tree:
|
|
99
103
|
lines = ""
|
100
104
|
for edge, node in self.iter(identifier=None, depth=0, last_node_flags=[]):
|
101
105
|
lines += "{}{}\n".format(edge, node)
|
102
|
-
print(lines)
|
106
|
+
print(lines)
|
@@ -1 +1 @@
|
|
1
|
-
from gr_libs.ml.sequential.lstm_model import LstmObservations
|
1
|
+
from gr_libs.ml.sequential.lstm_model import LstmObservations
|
@@ -10,183 +10,264 @@ from torch.nn.utils.rnn import pack_padded_sequence
|
|
10
10
|
|
11
11
|
|
12
12
|
def accuracy_per_epoch(model, data_loader):
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
model.eval()
|
14
|
+
correct = total = 0.0
|
15
|
+
sum_loss = 0.0
|
16
|
+
with torch.no_grad():
|
17
|
+
for (
|
18
|
+
first_traces,
|
19
|
+
second_traces,
|
20
|
+
is_same_goals,
|
21
|
+
first_traces_lengths,
|
22
|
+
second_traces_lengths,
|
23
|
+
) in data_loader:
|
24
|
+
y_pred = model.forward_tab(
|
25
|
+
first_traces, second_traces, first_traces_lengths, second_traces_lengths
|
26
|
+
)
|
27
|
+
loss = F.binary_cross_entropy(y_pred, is_same_goals)
|
28
|
+
sum_loss += loss.item()
|
29
|
+
y_pred = y_pred >= 0.5
|
30
|
+
correct += torch.sum(y_pred == is_same_goals)
|
31
|
+
total += len(is_same_goals)
|
32
|
+
return correct / total, sum_loss / 32
|
33
|
+
|
25
34
|
|
26
35
|
def accuracy_per_epoch_cont(model, data_loader):
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
36
|
+
model.eval()
|
37
|
+
correct = total = 0.0
|
38
|
+
sum_loss = 0.0
|
39
|
+
with torch.no_grad():
|
40
|
+
for (
|
41
|
+
first_traces_images,
|
42
|
+
first_traces_texts,
|
43
|
+
second_traces_images,
|
44
|
+
second_traces_texts,
|
45
|
+
is_same_goals,
|
46
|
+
first_traces_lengths,
|
47
|
+
second_traces_lengths,
|
48
|
+
) in data_loader:
|
49
|
+
y_pred = model.forward_cont(
|
50
|
+
first_traces_images,
|
51
|
+
first_traces_texts,
|
52
|
+
second_traces_images,
|
53
|
+
second_traces_texts,
|
54
|
+
first_traces_lengths,
|
55
|
+
second_traces_lengths,
|
56
|
+
)
|
57
|
+
loss = F.binary_cross_entropy(y_pred, is_same_goals)
|
58
|
+
sum_loss += loss.item()
|
59
|
+
y_pred = y_pred >= 0.5
|
60
|
+
correct += torch.sum(y_pred == is_same_goals)
|
61
|
+
total += len(is_same_goals)
|
62
|
+
return correct / total, sum_loss / 32
|
63
|
+
|
64
|
+
# class CNNImageEmbeddor(nn.Module):
|
65
|
+
# def __init__(self, obs_space, action_space, use_text=False):
|
66
|
+
# super().__init__()
|
67
|
+
# self.use_text = use_text
|
68
|
+
# self.image_conv = nn.Sequential(
|
69
|
+
# nn.Conv2d(3, 4, kernel_size=(3, 3), padding=1), # Reduced filters, added padding
|
70
|
+
# nn.ReLU(),
|
71
|
+
# nn.MaxPool2d((2, 2)),
|
72
|
+
# nn.Conv2d(4, 4, (3, 3), padding=1), # Reduced filters, added padding
|
73
|
+
# nn.ReLU(),
|
74
|
+
# nn.MaxPool2d((2, 2)), # Added additional pooling to reduce size
|
75
|
+
# nn.Conv2d(4, 8, (3, 3), padding=1), # Reduced filters, added padding
|
76
|
+
# nn.ReLU(),
|
77
|
+
# nn.BatchNorm2d(8)
|
78
|
+
# )
|
79
|
+
# n = obs_space["image"][0]
|
80
|
+
# m = obs_space["image"][1]
|
81
|
+
# self.image_embedding_size = ((n - 4) // 4 - 3) * ((m - 4) // 4 - 3) * 8
|
82
|
+
# if self.use_text:
|
83
|
+
# self.word_embedding_size = 32
|
84
|
+
# self.word_embedding = nn.Embedding(obs_space["text"], self.word_embedding_size)
|
85
|
+
# self.text_embedding_size = 128
|
86
|
+
# self.text_rnn = nn.GRU(self.word_embedding_size, self.text_embedding_size, batch_first=True)
|
87
|
+
|
88
|
+
def forward(self, images, texts):
|
89
|
+
# images shape: batch_size X max_sequence_len X sample_size. same for text.
|
90
|
+
# need to reshape image to num_channels X height X width, like nn.Conv expects it to be.
|
91
|
+
x = images.transpose(2, 4).transpose(3, 4)
|
92
|
+
orig_shape = x.shape
|
93
|
+
# combine batch and sequence to 1 dimension so conv could handle it
|
94
|
+
x = x.view(
|
95
|
+
orig_shape[0] * orig_shape[1], orig_shape[2], orig_shape[3], orig_shape[4]
|
96
|
+
) # x shape: batch_size * max_sequence_len X sample_size
|
97
|
+
x = self.image_conv(
|
98
|
+
x
|
99
|
+
) # x shape: batch_size * max_sequence_len X last_conv_size X 1 X 1
|
100
|
+
# reshape x back to divide batches from sequences
|
101
|
+
x = x.view(
|
102
|
+
orig_shape[0], orig_shape[1], x.shape[1]
|
103
|
+
) # x shape: batch_size X max_sequence_len X last_conv_size. last 2 dimensions (1,1) are collapsed to last conv.
|
104
|
+
embedding = x
|
105
|
+
|
106
|
+
if self.use_text:
|
107
|
+
embed_text = self._get_embed_text(texts)
|
108
|
+
embedding = torch.cat((embedding, embed_text), dim=1)
|
109
|
+
|
110
|
+
return embedding
|
111
|
+
|
112
|
+
def _get_embed_text(self, text):
|
113
|
+
_, hidden = self.text_rnn(self.word_embedding(text))
|
114
|
+
return hidden[-1]
|
115
|
+
|
85
116
|
|
86
117
|
class LstmObservations(nn.Module):
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
118
|
+
|
119
|
+
def __init__(
|
120
|
+
self, input_size, hidden_size
|
121
|
+
): # TODO make sure the right cuda is used!
|
122
|
+
super(LstmObservations, self).__init__()
|
123
|
+
# self.embeddor = CNNImageEmbeddor(obs_space, action_space)
|
124
|
+
# check if the traces are a bunch of images
|
125
|
+
self.lstm = nn.LSTM(
|
126
|
+
input_size=input_size, hidden_size=hidden_size, batch_first=True
|
127
|
+
)
|
128
|
+
self.dropout = nn.Dropout(0.5) # Added dropout layer
|
129
|
+
# Initialize weights
|
130
|
+
for name, param in self.lstm.named_parameters():
|
131
|
+
if "weight" in name:
|
132
|
+
nn.init.xavier_uniform_(param)
|
133
|
+
elif "bias" in name:
|
134
|
+
nn.init.zeros_(param)
|
135
|
+
|
136
|
+
# tabular
|
137
|
+
def forward_tab(self, traces1, traces2, lengths1, lengths2):
|
138
|
+
out1, (ht1, ct1) = self.lstm(
|
139
|
+
pack_padded_sequence(
|
140
|
+
traces1, lengths1, batch_first=True, enforce_sorted=False
|
141
|
+
),
|
142
|
+
None,
|
143
|
+
) # traces1 & traces 2 shapes: batch_size X max sequence_length X embedding_size
|
144
|
+
out2, (ht2, ct2) = self.lstm(
|
145
|
+
pack_padded_sequence(
|
146
|
+
traces2, lengths2, batch_first=True, enforce_sorted=False
|
147
|
+
),
|
148
|
+
None,
|
149
|
+
)
|
150
|
+
# out1, _ = pad_packed_sequence(out1, batch_first=True, total_length=max(lengths1))
|
151
|
+
# out2, _ = pad_packed_sequence(out2, batch_first=True, total_length=max(lengths2))
|
152
|
+
manhattan_dis = torch.exp(
|
153
|
+
-torch.sum(torch.abs(ht1[-1] - ht2[-1]), dim=1, keepdim=True)
|
154
|
+
)
|
155
|
+
return manhattan_dis.squeeze()
|
156
|
+
|
157
|
+
# continuous
|
158
|
+
# def forward_cont(self, traces1_images, traces1_texts, traces2_images, traces2_texts, lengths1, lengths2):
|
159
|
+
# # we also embed '0' images, but we take them out of the equation in the lstm (it knows to not treat them when batching)
|
160
|
+
# traces1 = self.embeddor(traces1_images, traces1_texts)
|
161
|
+
# traces2 = self.embeddor(traces2_images, traces2_texts) # traces1 & traces 2 shapes: batch_size X max_sequence_length X embedding_size
|
162
|
+
# out1, (ht1, ct1) = self.lstm(pack_padded_sequence(traces1, lengths1, batch_first=True, enforce_sorted=False), None)
|
163
|
+
# out2, (ht2, ct2) = self.lstm(pack_padded_sequence(traces2, lengths2, batch_first=True, enforce_sorted=False), None)
|
164
|
+
# manhattan_dis = torch.exp(-torch.sum(torch.abs(ht1[-1]-ht2[-1]),dim=1,keepdim=True))
|
165
|
+
# return manhattan_dis.squeeze()
|
166
|
+
|
167
|
+
def embed_sequence(self, trace):
|
168
|
+
trace = torch.stack(
|
169
|
+
[torch.tensor(observation, dtype=torch.float32) for observation in trace]
|
170
|
+
).to(device)
|
171
|
+
out, (ht, ct) = self.lstm(trace, None)
|
172
|
+
return ht[-1]
|
173
|
+
|
174
|
+
# def embed_sequence_cont(self, sequence, preprocess_obss):
|
175
|
+
# sequence = [preprocess_obss([obs])[0] for ((obs, (_, _)), _) in sequence]
|
176
|
+
# trace_images = torch.tensor(np.expand_dims(torch.stack([step.image for step in sequence]), axis=0)).to(device)
|
177
|
+
# trace_texts = torch.tensor(np.expand_dims(torch.stack([step.text for step in sequence]), axis=0)).to(device)
|
178
|
+
# embedded_trace = self.embeddor(trace_images, trace_texts)
|
179
|
+
# out, (ht, ct) = self.lstm(embedded_trace)
|
180
|
+
# return ht[-1]
|
181
|
+
|
182
|
+
|
183
|
+
def train_metric_model(model, train_loader, dev_loader, nepochs=5, patience=2):
|
184
|
+
devAccuracy = []
|
185
|
+
best_dev_accuracy = 0.0
|
186
|
+
no_improvement_count = 0
|
187
|
+
optimizer = torch.optim.Adadelta(model.parameters(), weight_decay=0.1)
|
188
|
+
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
189
|
+
optimizer, mode="min", patience=2, factor=0.5
|
190
|
+
)
|
191
|
+
for epoch in range(nepochs):
|
192
|
+
sum_loss, denominator = 0.0, 0.0
|
193
|
+
model.train()
|
194
|
+
for (
|
195
|
+
first_traces,
|
196
|
+
second_traces,
|
197
|
+
is_same_goals,
|
198
|
+
first_traces_lengths,
|
199
|
+
second_traces_lengths,
|
200
|
+
) in train_loader:
|
201
|
+
model.zero_grad()
|
202
|
+
y_pred = model.forward_tab(
|
203
|
+
first_traces, second_traces, first_traces_lengths, second_traces_lengths
|
204
|
+
)
|
205
|
+
if len(is_same_goals) == 1:
|
206
|
+
is_same_goals = torch.squeeze(
|
207
|
+
is_same_goals
|
208
|
+
) # for the case of batches in size 1...
|
209
|
+
loss = F.binary_cross_entropy(y_pred, is_same_goals)
|
210
|
+
sum_loss += loss.item()
|
211
|
+
denominator += 1
|
212
|
+
loss.backward()
|
213
|
+
optimizer.step()
|
214
|
+
|
215
|
+
dev_accuracy, dev_loss = accuracy_per_epoch(model, dev_loader)
|
216
|
+
devAccuracy.append(dev_accuracy)
|
217
|
+
if dev_accuracy > best_dev_accuracy:
|
218
|
+
best_dev_accuracy = dev_accuracy
|
219
|
+
no_improvement_count = 0
|
220
|
+
else:
|
221
|
+
no_improvement_count = 1
|
222
|
+
|
223
|
+
print(
|
224
|
+
"epoch - {}/{}...".format(epoch + 1, nepochs),
|
225
|
+
"train loss - {:.6f}...".format(sum_loss / denominator),
|
226
|
+
"dev loss - {:.6f}...".format(dev_loss),
|
227
|
+
"dev accuracy - {:.6f}".format(dev_accuracy),
|
228
|
+
)
|
229
|
+
|
230
|
+
if no_improvement_count >= patience:
|
231
|
+
print(f"Early stopping after {epoch + 1} epochs with no improvement.")
|
232
|
+
break
|
233
|
+
|
234
|
+
|
170
235
|
def train_metric_model_cont(model, train_loader, dev_loader, nepochs=5):
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
236
|
+
devAccuracy = []
|
237
|
+
optimizer = torch.optim.Adadelta(model.parameters(), weight_decay=1.25)
|
238
|
+
for epoch in range(nepochs):
|
239
|
+
sum_loss, denominator = 0.0, 0.0
|
240
|
+
model.train()
|
241
|
+
for (
|
242
|
+
first_traces_images,
|
243
|
+
first_traces_texts,
|
244
|
+
second_traces_images,
|
245
|
+
second_traces_texts,
|
246
|
+
is_same_goals,
|
247
|
+
first_traces_lengths,
|
248
|
+
second_traces_lengths,
|
249
|
+
) in train_loader:
|
250
|
+
model.zero_grad()
|
251
|
+
y_pred = model.forward_cont(
|
252
|
+
first_traces_images,
|
253
|
+
first_traces_texts,
|
254
|
+
second_traces_images,
|
255
|
+
second_traces_texts,
|
256
|
+
first_traces_lengths,
|
257
|
+
second_traces_lengths,
|
258
|
+
)
|
259
|
+
loss = F.binary_cross_entropy(y_pred, is_same_goals)
|
260
|
+
sum_loss += loss.item()
|
261
|
+
denominator += 1
|
262
|
+
loss.backward()
|
263
|
+
optimizer.step()
|
264
|
+
|
265
|
+
dev_accuracy, dev_loss = accuracy_per_epoch_cont(model, dev_loader)
|
266
|
+
devAccuracy.append(dev_accuracy)
|
267
|
+
|
268
|
+
print(
|
269
|
+
"epoch - {}/{}...".format(epoch + 1, nepochs),
|
270
|
+
"train loss - {:.6f}...".format(sum_loss / denominator),
|
271
|
+
"dev loss - {:.6f}...".format(dev_loss),
|
272
|
+
"dev accuracy - {:.6f}".format(dev_accuracy),
|
273
|
+
)
|
gr_libs/ml/tabular/state.py
CHANGED
@@ -2,11 +2,9 @@ from abc import ABC
|
|
2
2
|
|
3
3
|
|
4
4
|
class TabularState(ABC):
|
5
|
-
def __init__(
|
6
|
-
|
7
|
-
|
8
|
-
agent_direction: int
|
9
|
-
):
|
5
|
+
def __init__(
|
6
|
+
self, agent_x_position: int, agent_y_position: int, agent_direction: int
|
7
|
+
):
|
10
8
|
self._agent_x_position = agent_x_position
|
11
9
|
self._agent_y_position = agent_y_position
|
12
10
|
self._agent_direction = agent_direction
|
@@ -14,8 +12,10 @@ class TabularState(ABC):
|
|
14
12
|
@staticmethod
|
15
13
|
def gen_tabular_state(environment, observation):
|
16
14
|
x, y = environment.unwrapped.agent_pos
|
17
|
-
direction = observation[
|
18
|
-
return TabularState(
|
15
|
+
direction = observation["direction"]
|
16
|
+
return TabularState(
|
17
|
+
agent_x_position=x, agent_y_position=y, agent_direction=direction
|
18
|
+
)
|
19
19
|
|
20
20
|
def __str__(self):
|
21
21
|
return f"({self._agent_x_position},{self._agent_y_position}):{self._agent_direction}"
|