likelihood 1.5.7__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/graph/__init__.py +8 -0
- likelihood/graph/_nn.py +421 -0
- likelihood/models/deep/__init__.py +11 -2
- likelihood/models/deep/_autoencoders.py +895 -0
- likelihood/models/deep/_predictor.py +810 -0
- likelihood/models/deep/autoencoders.py +52 -29
- likelihood/models/deep/gan.py +7 -7
- likelihood/models/deep/predictor.py +10 -8
- likelihood/models/deep/rl.py +350 -0
- likelihood/models/simulation.py +9 -4
- likelihood/tools/cat_embed.py +213 -0
- likelihood/tools/tools.py +7 -2
- {likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/METADATA +4 -3
- likelihood-2.0.0.dist-info/RECORD +30 -0
- likelihood-1.5.7.dist-info/RECORD +0 -25
- {likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/WHEEL +0 -0
- {likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -277,7 +277,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
277
277
|
activation=self.activation,
|
|
278
278
|
kernel_regularizer=l2(self.l2_reg),
|
|
279
279
|
),
|
|
280
|
-
]
|
|
280
|
+
],
|
|
281
|
+
name="encoder",
|
|
281
282
|
)
|
|
282
283
|
if not self.encoder
|
|
283
284
|
else self.encoder
|
|
@@ -296,7 +297,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
296
297
|
activation=self.activation,
|
|
297
298
|
kernel_regularizer=l2(self.l2_reg),
|
|
298
299
|
),
|
|
299
|
-
]
|
|
300
|
+
],
|
|
301
|
+
name="decoder",
|
|
300
302
|
)
|
|
301
303
|
if not self.decoder
|
|
302
304
|
else self.decoder
|
|
@@ -326,7 +328,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
326
328
|
log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
|
|
327
329
|
|
|
328
330
|
self.encoder = (
|
|
329
|
-
tf.keras.Model(inputs, [mean, log_var], name="
|
|
331
|
+
tf.keras.Model(inputs, [mean, log_var], name="vae_encoder")
|
|
330
332
|
if not self.encoder
|
|
331
333
|
else self.encoder
|
|
332
334
|
)
|
|
@@ -345,7 +347,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
345
347
|
),
|
|
346
348
|
tf.keras.layers.BatchNormalization(),
|
|
347
349
|
tf.keras.layers.Activation(self.activation),
|
|
348
|
-
]
|
|
350
|
+
],
|
|
351
|
+
name="vae_decoder",
|
|
349
352
|
)
|
|
350
353
|
if not self.decoder
|
|
351
354
|
else self.decoder
|
|
@@ -366,13 +369,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
366
369
|
)
|
|
367
370
|
if self.dropout:
|
|
368
371
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
369
|
-
|
|
370
|
-
tf.keras.layers.Dense(
|
|
371
|
-
units=self.num_classes,
|
|
372
|
-
activation=self.classifier_activation,
|
|
373
|
-
kernel_regularizer=l2(self.l2_reg),
|
|
374
|
-
)
|
|
375
|
-
)
|
|
372
|
+
|
|
376
373
|
elif self.lora_mode:
|
|
377
374
|
for _ in range(self.num_layers - 1):
|
|
378
375
|
self.classifier.add(
|
|
@@ -381,21 +378,14 @@ class AutoClassifier(tf.keras.Model):
|
|
|
381
378
|
self.classifier.add(tf.keras.layers.Activation(self.activation))
|
|
382
379
|
if self.dropout:
|
|
383
380
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
)
|
|
390
|
-
)
|
|
391
|
-
else:
|
|
392
|
-
self.classifier.add(
|
|
393
|
-
tf.keras.layers.Dense(
|
|
394
|
-
units=self.num_classes,
|
|
395
|
-
activation=self.classifier_activation,
|
|
396
|
-
kernel_regularizer=l2(self.l2_reg),
|
|
397
|
-
)
|
|
381
|
+
|
|
382
|
+
self.classifier.add(
|
|
383
|
+
tf.keras.layers.Dense(
|
|
384
|
+
units=self.num_classes,
|
|
385
|
+
activation=self.classifier_activation,
|
|
386
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
398
387
|
)
|
|
388
|
+
)
|
|
399
389
|
|
|
400
390
|
def train_encoder_decoder(
|
|
401
391
|
self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
|
|
@@ -494,12 +484,12 @@ class AutoClassifier(tf.keras.Model):
|
|
|
494
484
|
Sets the encoder and decoder layers from another AutoClassifier instance,
|
|
495
485
|
ensuring compatibility in dimensions. Only works if vae_mode is False.
|
|
496
486
|
|
|
497
|
-
Parameters
|
|
487
|
+
Parameters
|
|
498
488
|
-----------
|
|
499
489
|
source_model : AutoClassifier
|
|
500
490
|
The source model to copy the encoder and decoder layers from.
|
|
501
491
|
|
|
502
|
-
Raises
|
|
492
|
+
Raises
|
|
503
493
|
-------
|
|
504
494
|
ValueError
|
|
505
495
|
If the input shape or units of the source model do not match.
|
|
@@ -610,6 +600,13 @@ def call_existing_code(
|
|
|
610
600
|
num_layers : `int`
|
|
611
601
|
The number of hidden layers in the classifier. Default is 1.
|
|
612
602
|
|
|
603
|
+
Keyword Arguments:
|
|
604
|
+
----------
|
|
605
|
+
vae_mode : `bool`
|
|
606
|
+
Whether to use variational autoencoder mode. Default is False.
|
|
607
|
+
vae_units : `int`
|
|
608
|
+
The number of units in the variational autoencoder. Default is 2.
|
|
609
|
+
|
|
613
610
|
Returns
|
|
614
611
|
-------
|
|
615
612
|
`AutoClassifier`
|
|
@@ -617,6 +614,8 @@ def call_existing_code(
|
|
|
617
614
|
"""
|
|
618
615
|
dropout = kwargs.get("dropout", None)
|
|
619
616
|
l2_reg = kwargs.get("l2_reg", 0.0)
|
|
617
|
+
vae_mode = kwargs.get("vae_mode", False)
|
|
618
|
+
vae_units = kwargs.get("vae_units", 2)
|
|
620
619
|
model = AutoClassifier(
|
|
621
620
|
input_shape_parm=input_shape_parm,
|
|
622
621
|
num_classes=num_classes,
|
|
@@ -625,6 +624,8 @@ def call_existing_code(
|
|
|
625
624
|
num_layers=num_layers,
|
|
626
625
|
dropout=dropout,
|
|
627
626
|
l2_reg=l2_reg,
|
|
627
|
+
vae_mode=vae_mode,
|
|
628
|
+
vae_units=vae_units,
|
|
628
629
|
)
|
|
629
630
|
model.compile(
|
|
630
631
|
optimizer=optimizer,
|
|
@@ -731,6 +732,24 @@ def build_model(
|
|
|
731
732
|
else hyperparameters["l2_reg"]
|
|
732
733
|
)
|
|
733
734
|
)
|
|
735
|
+
vae_mode = (
|
|
736
|
+
hp.Choice("vae_mode", [True, False])
|
|
737
|
+
if "vae_mode" not in hyperparameters_keys
|
|
738
|
+
else hyperparameters["vae_mode"]
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
try:
|
|
742
|
+
vae_units = (
|
|
743
|
+
hp.Int("vae_units", min_value=2, max_value=10, step=1)
|
|
744
|
+
if ("vae_units" not in hyperparameters_keys) and vae_mode
|
|
745
|
+
else (
|
|
746
|
+
hp.Choice("vae_units", hyperparameters["vae_units"])
|
|
747
|
+
if isinstance(hyperparameters["vae_units"], list)
|
|
748
|
+
else hyperparameters["vae_units"]
|
|
749
|
+
)
|
|
750
|
+
)
|
|
751
|
+
except KeyError:
|
|
752
|
+
vae_units = None
|
|
734
753
|
|
|
735
754
|
model = call_existing_code(
|
|
736
755
|
units=units,
|
|
@@ -742,6 +761,8 @@ def build_model(
|
|
|
742
761
|
num_layers=num_layers,
|
|
743
762
|
dropout=dropout,
|
|
744
763
|
l2_reg=l2_reg,
|
|
764
|
+
vae_mode=vae_mode,
|
|
765
|
+
vae_units=vae_units,
|
|
745
766
|
)
|
|
746
767
|
return model
|
|
747
768
|
|
|
@@ -876,6 +897,8 @@ def setup_model(
|
|
|
876
897
|
tuner.results_summary()
|
|
877
898
|
else:
|
|
878
899
|
best_model = tf.keras.models.load_model(filepath)
|
|
879
|
-
|
|
880
900
|
best_hps = tuner.get_best_hyperparameters(1)[0].values
|
|
881
|
-
|
|
901
|
+
vae_mode = best_hps.get("vae_mode", hyperparameters.get("vae_mode", False))
|
|
902
|
+
best_hps["vae_units"] = None if not vae_mode else best_hps["vae_units"]
|
|
903
|
+
|
|
904
|
+
return best_model, pd.DataFrame(best_hps, index=["Value"]).dropna(axis=1)
|
likelihood/models/deep/gan.py
CHANGED
|
@@ -41,7 +41,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
41
41
|
self.build(dummy_input.shape)
|
|
42
42
|
|
|
43
43
|
def build(self, input_shape):
|
|
44
|
-
self.gan = tf.keras.models.Sequential([self.generator, self.discriminator])
|
|
44
|
+
self.gan = tf.keras.models.Sequential([self.generator, self.discriminator], name="gan")
|
|
45
45
|
|
|
46
46
|
self.generator.compile(
|
|
47
47
|
optimizer=self.optimizer,
|
|
@@ -57,7 +57,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
57
57
|
super(GANRegressor, self).build(input_shape)
|
|
58
58
|
|
|
59
59
|
def _build_generator(self):
|
|
60
|
-
generator = tf.keras.Sequential()
|
|
60
|
+
generator = tf.keras.Sequential(name="generator")
|
|
61
61
|
generator.add(
|
|
62
62
|
tf.keras.layers.Dense(
|
|
63
63
|
self.num_neurons,
|
|
@@ -78,7 +78,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
78
78
|
return generator
|
|
79
79
|
|
|
80
80
|
def _build_discriminator(self):
|
|
81
|
-
discriminator = tf.keras.Sequential()
|
|
81
|
+
discriminator = tf.keras.Sequential(name="discriminator")
|
|
82
82
|
for _ in range(self.depth):
|
|
83
83
|
discriminator.add(
|
|
84
84
|
tf.keras.layers.Dense(
|
|
@@ -102,7 +102,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
102
102
|
Train the GAN model.
|
|
103
103
|
|
|
104
104
|
Parameters
|
|
105
|
-
|
|
105
|
+
----------
|
|
106
106
|
X : array-like
|
|
107
107
|
Input data.
|
|
108
108
|
y : array-like
|
|
@@ -117,7 +117,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
117
117
|
Verbosity level. Default is 1.
|
|
118
118
|
|
|
119
119
|
Returns
|
|
120
|
-
|
|
120
|
+
-------
|
|
121
121
|
history : pd.DataFrame
|
|
122
122
|
Training history.
|
|
123
123
|
"""
|
|
@@ -234,7 +234,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
234
234
|
Train the generator model.
|
|
235
235
|
|
|
236
236
|
Parameters
|
|
237
|
-
|
|
237
|
+
----------
|
|
238
238
|
X_train : array-like
|
|
239
239
|
Training data.
|
|
240
240
|
y_train : array-like
|
|
@@ -249,7 +249,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
249
249
|
Number of epochs to wait before early stopping. Default is 3.
|
|
250
250
|
|
|
251
251
|
Returns
|
|
252
|
-
|
|
252
|
+
-------
|
|
253
253
|
history : pd.DataFrame
|
|
254
254
|
Training history.
|
|
255
255
|
"""
|
|
@@ -109,15 +109,16 @@ class GetInsights:
|
|
|
109
109
|
"in the model's transformation.</p>"
|
|
110
110
|
)
|
|
111
111
|
)
|
|
112
|
-
self.
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
112
|
+
if not self.model.encoder.name.startswith("vae"):
|
|
113
|
+
self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
|
|
114
|
+
|
|
115
|
+
display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
|
|
116
|
+
display(
|
|
117
|
+
HTML(
|
|
118
|
+
"<p>This visualization shows how features propagate through each dense layer in the classifier. "
|
|
119
|
+
"Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
|
|
120
|
+
)
|
|
119
121
|
)
|
|
120
|
-
)
|
|
121
122
|
self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)
|
|
122
123
|
|
|
123
124
|
display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
|
|
@@ -673,6 +674,7 @@ class GetInsights:
|
|
|
673
674
|
/ (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
|
|
674
675
|
- 1
|
|
675
676
|
)
|
|
677
|
+
data_normalized.dropna(axis=1, inplace=True)
|
|
676
678
|
radviz(data_normalized, color_column, color=self.colors)
|
|
677
679
|
plt.title(title)
|
|
678
680
|
plt.show()
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from collections import deque
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import tensorflow as tf
|
|
6
|
+
from packaging import version
|
|
7
|
+
|
|
8
|
+
if version.parse(tf.__version__) > version.parse("2.15.0"):
|
|
9
|
+
from ._autoencoders import AutoClassifier
|
|
10
|
+
else:
|
|
11
|
+
from .autoencoders import AutoClassifier
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def print_progress_bar(iteration, total, length=30):
|
|
15
|
+
percent = f"{100 * (iteration / float(total)):.1f}"
|
|
16
|
+
filled_length = int(length * iteration // total)
|
|
17
|
+
bar = "█" * filled_length + "-" * (length - filled_length)
|
|
18
|
+
print(f"\rProgress: |{bar}| {percent}% Complete", end="\r")
|
|
19
|
+
if iteration == total:
|
|
20
|
+
print()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Env:
|
|
24
|
+
def __init__(self, model, maxlen=100, name="likenasium"):
|
|
25
|
+
"""
|
|
26
|
+
Initialize the environment with a model.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
model : Any
|
|
31
|
+
Model with `.predict()` method (e.g., Keras model).
|
|
32
|
+
maxlen : int
|
|
33
|
+
Maximum length of deque. By default it is set to `100`.
|
|
34
|
+
name : str
|
|
35
|
+
The name of the environment. By default it is set to `likenasium`.
|
|
36
|
+
"""
|
|
37
|
+
self.model = model
|
|
38
|
+
self.maxlen = maxlen
|
|
39
|
+
self.transitions = deque(
|
|
40
|
+
maxlen=self.maxlen
|
|
41
|
+
) # Stores (state, action, reward, next_action, done)
|
|
42
|
+
self.current_state = None
|
|
43
|
+
self.current_step = 0
|
|
44
|
+
self.done = False
|
|
45
|
+
|
|
46
|
+
def step(self, state, action, verbose=0):
|
|
47
|
+
"""
|
|
48
|
+
Perform an environment step with the given action.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
state : `np.ndarray`
|
|
53
|
+
Current state to process (input to the model).
|
|
54
|
+
action : int
|
|
55
|
+
Expected action to process.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
tuple: (current_state, action_pred, reward, next_action, done)
|
|
60
|
+
"""
|
|
61
|
+
if self.done:
|
|
62
|
+
return None, None, 0, None, True
|
|
63
|
+
|
|
64
|
+
# Process action through model
|
|
65
|
+
model_output = self.model.predict(state.reshape((1, -1)), verbose=verbose)
|
|
66
|
+
action_pred = np.argmax(model_output, axis=1)[0]
|
|
67
|
+
model_output[:, action_pred] = 0.0
|
|
68
|
+
next_action = np.max(model_output, axis=1)[0] # Second most probable action
|
|
69
|
+
|
|
70
|
+
# Calculate reward (1 if correct prediction, 0 otherwise)
|
|
71
|
+
reward = 1 if action_pred == action else 0
|
|
72
|
+
|
|
73
|
+
# Update current state
|
|
74
|
+
self.current_state = state
|
|
75
|
+
self.current_step += 1
|
|
76
|
+
|
|
77
|
+
# Add transition to history
|
|
78
|
+
if self.current_step <= self.maxlen:
|
|
79
|
+
self.transitions.append(
|
|
80
|
+
(
|
|
81
|
+
self.current_state, # Previous state
|
|
82
|
+
action_pred, # Current action
|
|
83
|
+
reward, # Reward
|
|
84
|
+
next_action, # Next action
|
|
85
|
+
self.done, # Done flag
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
return self.current_state, action_pred, reward, next_action, self.done
|
|
89
|
+
|
|
90
|
+
def reset(self):
|
|
91
|
+
"""Reset the environment to initial state."""
|
|
92
|
+
self.current_state = None
|
|
93
|
+
self.current_step = 0
|
|
94
|
+
self.done = False
|
|
95
|
+
self.transitions = deque(maxlen=self.maxlen)
|
|
96
|
+
return self.current_state
|
|
97
|
+
|
|
98
|
+
def get_transitions(self):
|
|
99
|
+
"""Get all stored transitions."""
|
|
100
|
+
return self.transitions
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AutoQL:
|
|
104
|
+
"""
|
|
105
|
+
AutoQL: A reinforcement learning agent using Q-learning with Epsilon-greedy policy.
|
|
106
|
+
|
|
107
|
+
This class implements a Q-learning agent with:
|
|
108
|
+
- Epsilon-greedy policy for exploration
|
|
109
|
+
- Replay buffer for experience replay
|
|
110
|
+
- Automatic model version handling for TensorFlow
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
env,
|
|
116
|
+
model,
|
|
117
|
+
maxlen=2000,
|
|
118
|
+
):
|
|
119
|
+
"""Initialize AutoQL agent
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
env : Any
|
|
124
|
+
The environment to interact with
|
|
125
|
+
model : tf.keras.Model
|
|
126
|
+
The Q-network model
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
self.env = env
|
|
130
|
+
self.model = model
|
|
131
|
+
self.maxlen = maxlen
|
|
132
|
+
self.replay_buffer = deque(maxlen=self.maxlen)
|
|
133
|
+
|
|
134
|
+
def epsilon_greedy_policy(self, state, action, epsilon=0):
|
|
135
|
+
"""
|
|
136
|
+
Epsilon-greedy policy for action selection
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
state : `np.ndarray`
|
|
141
|
+
Current state.
|
|
142
|
+
action : int
|
|
143
|
+
Expected action to process.
|
|
144
|
+
epsilon : float
|
|
145
|
+
Exploration probability. By default it is set to `0`
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
tuple: (state, action, reward, next_action, done)
|
|
150
|
+
"""
|
|
151
|
+
current_state, value, reward, next_action, done = self.env.step(state, action)
|
|
152
|
+
|
|
153
|
+
if np.random.rand() > epsilon:
|
|
154
|
+
state = np.asarray(state).astype(np.float32)
|
|
155
|
+
return current_state, value, reward, next_action, done
|
|
156
|
+
step_ = random.sample(self.env.get_transitions(), 1)
|
|
157
|
+
_state, greedy_action, _reward, _next_action, _done = zip(*step_)
|
|
158
|
+
|
|
159
|
+
return _state[0], greedy_action[0], _reward[0], _next_action[0], _done[0]
|
|
160
|
+
|
|
161
|
+
def play_one_step(self, state, action, epsilon):
|
|
162
|
+
"""
|
|
163
|
+
Perform one step in the environment and add experience to buffer
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
state : `np.ndarray`
|
|
168
|
+
Current state
|
|
169
|
+
action : int
|
|
170
|
+
Expected action to process.
|
|
171
|
+
|
|
172
|
+
epsilon : float
|
|
173
|
+
Exploration probability.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
tuple: (state, action, reward, next_action, done)
|
|
178
|
+
"""
|
|
179
|
+
current_state, greedy_action, reward, next_action, done = self.epsilon_greedy_policy(
|
|
180
|
+
state, action, epsilon
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
done = 1 if done else 0
|
|
184
|
+
|
|
185
|
+
# Add experience to replay buffer
|
|
186
|
+
self.replay_buffer.append(
|
|
187
|
+
(
|
|
188
|
+
current_state, # Previous state
|
|
189
|
+
greedy_action, # Current action
|
|
190
|
+
reward, # Reward
|
|
191
|
+
next_action, # Next action
|
|
192
|
+
done, # Done flag
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return current_state, greedy_action, reward, next_action, done
|
|
197
|
+
|
|
198
|
+
@tf.function
|
|
199
|
+
def _training_step(self):
|
|
200
|
+
"""
|
|
201
|
+
Perform one training step using experience replay
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
float: Training loss
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
batch_ = random.sample(self.replay_buffer, self.batch_size)
|
|
209
|
+
states, actions, rewards, next_actions, dones = zip(*batch_)
|
|
210
|
+
states = np.array(states).reshape(self.batch_size, -1)
|
|
211
|
+
actions = np.array(actions).reshape(
|
|
212
|
+
self.batch_size,
|
|
213
|
+
)
|
|
214
|
+
rewards = np.array(rewards).reshape(
|
|
215
|
+
self.batch_size,
|
|
216
|
+
)
|
|
217
|
+
max_next_Q_values = np.array(next_actions).reshape(self.batch_size, -1)
|
|
218
|
+
dones = np.array(dones).reshape(
|
|
219
|
+
self.batch_size,
|
|
220
|
+
)
|
|
221
|
+
target_Q_values = rewards + (1 - dones) * self.gamma * max_next_Q_values
|
|
222
|
+
|
|
223
|
+
actions = tf.convert_to_tensor(actions, dtype=tf.int32)
|
|
224
|
+
states = tf.convert_to_tensor(states, dtype=tf.float32)
|
|
225
|
+
target_Q_values = tf.convert_to_tensor(target_Q_values, dtype=tf.float32)
|
|
226
|
+
|
|
227
|
+
with tf.GradientTape() as tape:
|
|
228
|
+
all_Q_values = self.model(states)
|
|
229
|
+
indices = tf.stack([tf.range(tf.shape(actions)[0]), actions], axis=1)
|
|
230
|
+
Q_values = tf.gather_nd(all_Q_values, indices)
|
|
231
|
+
loss = tf.reduce_mean(self.loss_fn(target_Q_values, Q_values))
|
|
232
|
+
grads = tape.gradient(loss, self.model.trainable_variables)
|
|
233
|
+
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
|
|
234
|
+
return loss
|
|
235
|
+
|
|
236
|
+
def train(
|
|
237
|
+
self,
|
|
238
|
+
x_data,
|
|
239
|
+
y_data,
|
|
240
|
+
optimizer="adam",
|
|
241
|
+
loss_fn="mse",
|
|
242
|
+
num_episodes=50,
|
|
243
|
+
num_steps=100,
|
|
244
|
+
gamma=0.7,
|
|
245
|
+
batch_size=32,
|
|
246
|
+
patience=10,
|
|
247
|
+
alpha=0.01,
|
|
248
|
+
):
|
|
249
|
+
"""Train the agent for a fixed number of episodes
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
optimizer : str
|
|
254
|
+
The optimizer for training (e.g., `sgd`). By default it is set to `adam`.
|
|
255
|
+
loss_fn : str
|
|
256
|
+
The loss function. By default it is set to `mse`.
|
|
257
|
+
num_episodes : int
|
|
258
|
+
Total number of episodes to train. By default it is set to `50`.
|
|
259
|
+
num_steps : int
|
|
260
|
+
Steps per episode. By default it is set to `100`. If `num_steps` is less than `self.env.maxlen`, then the second will be chosen.
|
|
261
|
+
gamma : float
|
|
262
|
+
Discount factor. By default it is set to `0.7`.
|
|
263
|
+
batch_size : int
|
|
264
|
+
Size of training batches. By default it is set to `32`.
|
|
265
|
+
patience : int
|
|
266
|
+
How many episodes to wait for improvement.
|
|
267
|
+
alpha : float
|
|
268
|
+
Trade-off factor between loss and reward.
|
|
269
|
+
"""
|
|
270
|
+
rewards = []
|
|
271
|
+
self.best_weights = None
|
|
272
|
+
self.best_loss = float("inf")
|
|
273
|
+
|
|
274
|
+
optimizers = {
|
|
275
|
+
"sgd": tf.keras.optimizers.SGD(),
|
|
276
|
+
"adam": tf.keras.optimizers.Adam(),
|
|
277
|
+
"adamw": tf.keras.optimizers.AdamW(),
|
|
278
|
+
"adadelta": tf.keras.optimizers.Adadelta(),
|
|
279
|
+
"rmsprop": tf.keras.optimizers.RMSprop(),
|
|
280
|
+
}
|
|
281
|
+
self.optimizer = optimizers[optimizer]
|
|
282
|
+
losses = {
|
|
283
|
+
"mse": tf.keras.losses.MeanSquaredError(),
|
|
284
|
+
"mae": tf.keras.losses.MeanAbsoluteError(),
|
|
285
|
+
"mape": tf.keras.losses.MeanAbsolutePercentageError(),
|
|
286
|
+
}
|
|
287
|
+
self.loss_fn = losses[loss_fn]
|
|
288
|
+
self.num_episodes = num_episodes
|
|
289
|
+
self.num_steps = num_steps if num_steps >= self.env.maxlen else self.env.maxlen
|
|
290
|
+
self.gamma = gamma
|
|
291
|
+
self.batch_size = batch_size
|
|
292
|
+
loss = float("inf")
|
|
293
|
+
no_improve_count = 0
|
|
294
|
+
best_combined_metric = float("inf")
|
|
295
|
+
|
|
296
|
+
for episode in range(self.num_episodes):
|
|
297
|
+
print_progress_bar(episode + 1, self.num_episodes)
|
|
298
|
+
self.env.reset()
|
|
299
|
+
sum_rewards = 0
|
|
300
|
+
epsilon = max(1 - episode / (self.num_episodes * 0.8), 0.01)
|
|
301
|
+
|
|
302
|
+
for step in range(self.num_steps):
|
|
303
|
+
state, action, reward, next_action, done = self.play_one_step(
|
|
304
|
+
x_data[step], y_data[step], epsilon
|
|
305
|
+
)
|
|
306
|
+
sum_rewards += reward if isinstance(reward, int) else reward[0]
|
|
307
|
+
|
|
308
|
+
# Train if buffer has enough samples
|
|
309
|
+
if len(self.replay_buffer) > self.batch_size:
|
|
310
|
+
loss = self._training_step()
|
|
311
|
+
|
|
312
|
+
if done:
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
combined_metric = loss - alpha * sum_rewards
|
|
316
|
+
|
|
317
|
+
if combined_metric < best_combined_metric:
|
|
318
|
+
best_combined_metric = combined_metric
|
|
319
|
+
self.best_weights = self.model.get_weights()
|
|
320
|
+
self.best_loss = loss
|
|
321
|
+
no_improve_count = 0 # Reset counter on improvement
|
|
322
|
+
else:
|
|
323
|
+
no_improve_count += 1
|
|
324
|
+
|
|
325
|
+
rewards.append(sum_rewards)
|
|
326
|
+
|
|
327
|
+
# Logging
|
|
328
|
+
if episode % (self.num_episodes // 10) == 0:
|
|
329
|
+
print(
|
|
330
|
+
f"Episode: {episode}, Steps: {step+1}, Epsilon: {epsilon:.3f}, Loss: {loss:.2e}, Reward: {sum_rewards}, No Improve Count: {no_improve_count}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Early stopping condition
|
|
334
|
+
if no_improve_count >= patience:
|
|
335
|
+
print(
|
|
336
|
+
f"Early stopping at episode {episode} due to no improvement in {patience} episodes."
|
|
337
|
+
)
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
# Save best model
|
|
341
|
+
self.model.set_weights(self.best_weights)
|
|
342
|
+
|
|
343
|
+
def __str__(self):
|
|
344
|
+
return (
|
|
345
|
+
f"AutoQL (Env: {self.env.name}, Episodes: {self.num_episodes}, Steps: {self.num_steps})"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
if __name__ == "__main__":
|
|
350
|
+
pass
|
likelihood/models/simulation.py
CHANGED
|
@@ -4,11 +4,15 @@ from typing import Dict, List, Tuple, Union
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
+
from packaging import version
|
|
7
8
|
from pandas.core.frame import DataFrame
|
|
8
9
|
|
|
9
10
|
from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
|
|
10
11
|
|
|
11
|
-
|
|
12
|
+
if version.parse(np.__version__) < version.parse("2.0.0"):
|
|
13
|
+
filter = np.RankWarning
|
|
14
|
+
else:
|
|
15
|
+
filter = np.exceptions.RankWarning
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
# --------------------------------------------------------------------------------------------------------------------------------------
|
|
@@ -128,14 +132,15 @@ class SimulationEngine(FeatureSelection):
|
|
|
128
132
|
)
|
|
129
133
|
poly = kwargs.get("poly", 9)
|
|
130
134
|
plot = kwargs.get("plot", False)
|
|
135
|
+
bandwidth = kwargs.get("bandwidth", 1.5)
|
|
131
136
|
if not x[1]:
|
|
132
137
|
media = self.df[key].mean()
|
|
133
138
|
standard_deviation = self.df[key].std()
|
|
134
|
-
lower_limit = media -
|
|
135
|
-
upper_limit = media +
|
|
139
|
+
lower_limit = media - bandwidth * standard_deviation
|
|
140
|
+
upper_limit = media + bandwidth * standard_deviation
|
|
136
141
|
if plot:
|
|
137
142
|
print(f"Cumulative Distribution Function ({key})")
|
|
138
|
-
f,
|
|
143
|
+
f, _, ox = cdf(x[0].flatten(), poly=poly, plot=plot)
|
|
139
144
|
else:
|
|
140
145
|
f, ox = None, None
|
|
141
146
|
least_frequent_category, most_frequent_category = categories_by_quartile(
|