PyPI - ltfmselector - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

ltfmselector 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

ltfmselector/ltfmselector.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
+from torch.utils.tensorboard import SummaryWriter
 import os
 import random
@@ -226,7 +227,7 @@ class LTFMSelector:
     def fit(
             self, X, y, loss_function='mse', sample_weight=None,
             agent_neuralnetwork=None, lr=1e-5, returnQ=False,
-            background_dataset=None, **kwargs
+            monitor=False, background_dataset=None, **kwargs
     ):
         '''
         Initializes the environment and agent, then trains the agent to select
@@ -265,7 +266,14 @@ class LTFMSelector:
         returnQ : bool
             Return average computed action-value functions and rewards of
-            the sampled batches, for debugging purposes.
+            the sampled batches, as a (<total_iterations>, 3) matrix. The
+            columns correspond to the averaged Q, reward, and target functions.
+        monitor : bool
+            Monitor training process using a TensorBoard.
+            Run `tensorboard --logdir=runs` in the terminal to monitor the p
+            progression of the action-value function.
         background_dataset : None or pd.DataFrame
             If None, numerical features will be assumed when computing the
@@ -312,10 +320,18 @@ class LTFMSelector:
         self.sample_weight = sample_weight
         # If user wants to monitor progression of terms in the loss function
+        if monitor:
+            writer = SummaryWriter()
+            monitor_count = 1
+        # If user wants to save average computed action-value functions and
+        # rewards of sampled batches
         if returnQ:
-            Q_avr_list = []
-            r_avr_list = []
-            V_avr_list = []
+            total_iterations = 10000000000
+            LearningValuesMatrix = np.zeros(
+                (total_iterations, 3), dtype=np.float32
+            )
+            Q_count = 1
         # Initializing the environment
         env = Environment(
@@ -406,13 +422,21 @@ class LTFMSelector:
                 state = next_state
                 # Optimize the model
-                _res = self.optimize_model(optimizer, loss_function, returnQ)
+                _res = self.optimize_model(optimizer, loss_function, monitor, returnQ)
+                if monitor:
+                    if not _res is None:
+                        writer.add_scalar("Metrics/Average_QValue", _res[0], monitor_count)
+                        writer.add_scalar("Metrics/Average_Reward", _res[1], monitor_count)
+                        writer.add_scalar("Metrics/Average_Target", _res[2], monitor_count)
+                        monitor_count += 1
                 if returnQ:
                     if not _res is None:
-                        Q_avr_list.append(_res[0])
-                        r_avr_list.append(_res[1])
-                        V_avr_list.append(_res[2])
+                        LearningValuesMatrix[Q_count, 0] = _res[0]
+                        LearningValuesMatrix[Q_count, 1] = _res[1]
+                        LearningValuesMatrix[Q_count, 2] = _res[2]
+                        Q_count += 1
                 # Apply soft update to target network's weights
                 targetParameters = self.target_net.state_dict()
@@ -456,11 +480,19 @@ class LTFMSelector:
                 self.policy_network_checkpoints[self.episodes] =\
                     self.policy_net.state_dict()
+        if monitor:
+            writer.add_scalar("Metrics/Average_QValue", _res[0], monitor_count)
+            writer.add_scalar("Metrics/Average_Reward", _res[1], monitor_count)
+            writer.add_scalar("Metrics/Average_Target", _res[2], monitor_count)
+            writer.close()
         if returnQ:
-            Q_avr_list.append(_res[0])
-            r_avr_list.append(_res[1])
-            V_avr_list.append(_res[2])
-            return doc, (Q_avr_list, r_avr_list, V_avr_list)
+            LearningValuesMatrix[Q_count, 0] = _res[0]
+            LearningValuesMatrix[Q_count, 1] = _res[1]
+            LearningValuesMatrix[Q_count, 2] = _res[2]
+        if (monitor or returnQ):
+            return doc, LearningValuesMatrix[0:Q_count+1, :]
         else:
             return doc
@@ -572,7 +604,7 @@ class LTFMSelector:
             with torch.no_grad():
                 return (self.policy_net(state).max(1)[1].view(1, 1) - 1)
-    def optimize_model(self, optimizer, loss_function, returnQ):
+    def optimize_model(self, optimizer, loss_function, monitor, returnQ):
         '''
         Optimize the policy network.
@@ -698,7 +730,7 @@ class LTFMSelector:
         # Optimize the model (policy network)
         optimizer.step()
-        if returnQ:
+        if (monitor or returnQ):
             Q_avr = state_action_values.detach().numpy().mean()
             r_avr = reward_batch.unsqueeze(1).numpy().mean()
             V_avr = expected_state_action_values.unsqueeze(1).numpy().mean()

{ltfmselector-0.1.11.dist-info → ltfmselector-0.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ltfmselector
-Version: 0.1.11
+Version: 0.1.12
 Summary: Locally-Tailored Feature and Model Selector with Deep Q-Learning
 Project-URL: GitHub, https://github.com/RenZhen95/ltfmselector/
 Author-email: RenZhen95 <j-liaw@hotmail.com>
@@ -32,8 +32,9 @@ Requires-Dist: matplotlib>=3.10.1
 Requires-Dist: numpy>=2.2.4
 Requires-Dist: openpyxl>=3.1.5
 Requires-Dist: pandas>=2.2.3
-Requires-Dist: scikit-learn>=1.6.1
+Requires-Dist: scikit-learn<1.6
 Requires-Dist: seaborn>=0.13.2
+Requires-Dist: tensorboard>=2.20.0
 Requires-Dist: torch>=2.6.0
 Description-Content-Type: text/markdown

{ltfmselector-0.1.11.dist-info → ltfmselector-0.1.12.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 ltfmselector/__init__.py,sha256=lf3e90CNpEDvEmNZ-0iuoHOPsA7D-WN_opbBsTYLVEA,76
 ltfmselector/env.py,sha256=vizWGqDSc_2Zfs9aXjFARanIAz6PTKwUHu2_Lew9s3Y,13878
-ltfmselector/ltfmselector.py,sha256=-uRYcj89l8GL5re5Zw0mxe9tp8ulSmSBawqLA96S5A8,27984
+ltfmselector/ltfmselector.py,sha256=vs9unOmoDKq1piV6t87GC1wdy7kP8ucKHihw6i0F4KI,29567
 ltfmselector/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ltfmselector/utils.py,sha256=VXYZSDm7x4s0p9F_58NLW8WQa3dxi0vHZewRy6miC2E,5438
-ltfmselector-0.1.11.dist-info/METADATA,sha256=rPl6VeICmX_bw4B5QEZLlsXJcMVJ2-xGfJ9cZwTe3oA,2989
-ltfmselector-0.1.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-ltfmselector-0.1.11.dist-info/licenses/LICENSE,sha256=tmIDlkkp4a0EudXuGmeTdGjHjPhmmXkEMshACXLqX2w,1092
-ltfmselector-0.1.11.dist-info/RECORD,,
+ltfmselector-0.1.12.dist-info/METADATA,sha256=QaUPeSx9NlZx0ZUbkEPRyFS-8nfJz9Y8yV5TXXPc7fA,3021
+ltfmselector-0.1.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+ltfmselector-0.1.12.dist-info/licenses/LICENSE,sha256=tmIDlkkp4a0EudXuGmeTdGjHjPhmmXkEMshACXLqX2w,1092
+ltfmselector-0.1.12.dist-info/RECORD,,

{ltfmselector-0.1.11.dist-info → ltfmselector-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{ltfmselector-0.1.11.dist-info → ltfmselector-0.1.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

ltfmselector 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

ltfmselector 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl