PyPI - libthx - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

libthx 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

{libthx-0.2.0 → libthx-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libthx
-Version: 0.2.0
+Version: 0.2.1
 Summary: Architecture experimentation and training infrastructure.
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{libthx-0.2.0 → libthx-0.2.1}/libthx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libthx
-Version: 0.2.0
+Version: 0.2.1
 Summary: Architecture experimentation and training infrastructure.
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{libthx-0.2.0 → libthx-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "libthx"
-version = "0.2.0"
+version = "0.2.1"
 description = "Architecture experimentation and training infrastructure."
 readme = "README.md"
 requires-python = ">=3.11"

{libthx-0.2.0 → libthx-0.2.1}/theseus/evaluation/base.py RENAMED Viewed

@@ -227,6 +227,7 @@ class RolloutEvaluation(Evaluation):
         temperature: float = 0.0,
         top_p: float = 1.0,
         chunk_size: int = 200,
+        samples_per_prompt: int = 1,
         **kwargs: Any,
     ) -> Any:
         """Run evaluation.
@@ -244,6 +245,11 @@ class RolloutEvaluation(Evaluation):
         Returns:
             Evaluation score, or (score, intermediates) when return_intermediates.
         """
+        # Stash the inference handle so subclasses' score()/clean() can reach
+        # back to the trainer's plotter (via inference.log) for side metrics.
+        # Mirrors the pattern EncodingEvaluation uses for its chunk_jit cache.
+        self._evaluator_ref = inference
         batch_unit = inference.replicas * inference.per_device_batch_size
         indices = _select_indices(inference, len(self))
         original_size = len(indices)
@@ -260,13 +266,31 @@ class RolloutEvaluation(Evaluation):
         batch_unit = inference.replicas * inference.per_device_batch_size
         indices = _select_indices(inference, len(self))
+        if samples_per_prompt > 1:
+            # Replicate each selected index G times consecutively so callers
+            # (e.g. GRPO) get [p0_s0, p0_s1, ..., p0_s(G-1), p1_s0, ...]. The
+            # G copies of each prompt diverge at sampling time via temperature.
+            indices = [i for i in indices for _ in range(samples_per_prompt)]
         original_size = len(indices)
+        # ──────────────────────────────────────────────────────────────────
+        # ORDERING CONTRACT — DO NOT SHUFFLE.
+        # `indices` and every per-rollout array derived from it (x_raw, y_raw,
+        # encoded, rollout_inputs, raw_rollouts_np, decoded_results,
+        # intermediates) MUST stay in the order produced above. GRPO assumes
+        # the buffer arrives as G consecutive same-prompt rollouts per slot;
+        # any shuffle here silently breaks group-relative advantage z-scoring.
+        # If you need stochastic order, do it BEFORE _select_indices or AFTER
+        # the trainer has consumed the buffer — never in between.
+        # ──────────────────────────────────────────────────────────────────
         if jax.process_index() == 0:
             x_raw, y_raw = zip(*[self.get(i) for i in indices])
             x = list(x_raw)
             original_y = list(y_raw)
+            # _pad_eval_inputs only APPENDS (repeats the last item); preserves
+            # leading order. Do not change it to interleave/shuffle padding.
             _, (x, original_y) = _pad_eval_inputs(batch_unit, x, original_y)
             encoded = encoding.encode_batch(x, allowed_special="all")
@@ -333,6 +357,9 @@ class RolloutEvaluation(Evaluation):
             base_action_mask = positions >= prompt_max
+            # Built in dataset-index order — must match `indices` 1:1 so
+            # GRPO's same-prompt grouping holds. Do not reorder, sort, or
+            # shuffle this list.
             intermediates = []
             for i in range(original_size):
                 padding_mask = positions >= (prompt_max - prompt_lengths[i])
@@ -492,7 +519,7 @@ class EncodingEvaluation(Evaluation):
         all_results = []
         if jax.process_index() == 0:
-            logger.info(
+            logger.debug(
                 "EVAL | {} | samples={} seq={} batches={}",
                 eval_data.name,
                 original_size,
@@ -510,7 +537,7 @@ class EncodingEvaluation(Evaluation):
                     "EVAL | {} | tracing+compiling first chunk", eval_data.name
                 )
             if jax.process_index() == 0 and num_batches > chunk_size:
-                logger.info(
+                logger.debug(
                     "EVAL | {} | chunk {}/{} ({:.0f}%)",
                     eval_data.name,
                     chunk_end,
@@ -721,7 +748,7 @@ class PerplexityEvaluation(Evaluation):
         all_stats = []
         if jax.process_index() == 0:
-            logger.info(
+            logger.debug(
                 "EVAL | {} | samples={} seq={} batches={}",
                 eval_data.name,
                 original_size,
@@ -739,7 +766,7 @@ class PerplexityEvaluation(Evaluation):
                     "EVAL | {} | tracing+compiling first chunk", eval_data.name
                 )
             if jax.process_index() == 0 and num_batches > chunk_size:
-                logger.info(
+                logger.debug(
                     "EVAL | {} | chunk {}/{} ({:.0f}%)",
                     eval_data.name,
                     chunk_end,
@@ -1000,7 +1027,7 @@ class PerplexityComparisonEvaluation(Evaluation):
         all_losses = []
         if jax.process_index() == 0:
-            logger.info(
+            logger.debug(
                 "EVAL | {} | samples={} flat={} seq={} batches={}",
                 eval_data.name,
                 n_samples,
@@ -1020,7 +1047,7 @@ class PerplexityComparisonEvaluation(Evaluation):
                     "EVAL | {} | tracing+compiling first chunk", eval_data.name
                 )
             if jax.process_index() == 0 and num_batches > chunk_size:
-                logger.info(
+                logger.debug(
                     "EVAL | {} | chunk {}/{} ({:.0f}%)",
                     eval_data.name,
                     chunk_end,
@@ -1253,7 +1280,7 @@ class Evaluator(InferenceJob[EvaluatorConfig, M], Generic[M]):
         all_intermediates: List[List[Tuple[np.ndarray, np.ndarray]]] = []
         for evaluation in self.evaluations:
-            logger.info("EVAL | Running {}", evaluation.name)
+            logger.debug("EVAL | Running {}", evaluation.name)
             if return_intermediates:
                 score, intermediates = evaluation(
                     self,
@@ -1272,7 +1299,7 @@ class Evaluator(InferenceJob[EvaluatorConfig, M], Generic[M]):
                     **kwargs,
                 )
             results[evaluation.name] = score
-            logger.info("EVAL | {} done", evaluation.name)
+            logger.debug("EVAL | {} done", evaluation.name)
         if return_intermediates:
             return results, all_intermediates

libthx-0.2.1/theseus/experiments/mok/reward.py ADDED Viewed

@@ -0,0 +1,68 @@
+from dataclasses import dataclass
+import numpy as np
+from theseus.config import field
+@dataclass
+class MokConfig:
+    weighting: list[float] = field(
+        "optimization/mok/weights", default_factory=lambda: [0.5, 0.5]
+    )
+    eps_min: float = field("optimization/mok/eps_min", default=1e-6)
+    eps_max: float = field("optimization/mok/eps_max", default=0.5)
+def _sigmoid(x: np.ndarray) -> np.ndarray:
+    return 1.0 / (1.0 + np.exp(-x))  # type: ignore[no-any-return]
+def mok_reward(
+    scores: np.ndarray,
+    config: MokConfig,
+    progress: float = 1.0,
+) -> np.ndarray:
+    r"""MoK multi-objective scalarization. ``(N, k) -> (N,)``.
+    Given per-rollout per-channel raw scores ``scores[n, i]``:
+      1. Squash each channel to ``[0, 1]`` via sigmoid.
+      2. Weight by ``config.weighting`` (renormalized to sum to 1) and append a
+         residual channel so each row defines a distribution over ``k+1``
+         categories::
+            r̂_w = [w_1·r_1, ..., w_k·r_k, 1 - Σ_i w_i·r_i]
+      3. Build the target distribution ``ŵ = [w_1·(1-ε), ..., w_k·(1-ε), ε]``.
+      4. Return the per-rollout reward ``-D_KL(r̂_w || ŵ)``. Higher is better.
+    ``progress ∈ [0, 1]`` linearly anneals ``ε`` from ``eps_max`` (early) to
+    ``eps_min`` (late). Defaults to ``1.0`` so callers without a training-
+    progress signal (e.g. eval pipelines) get ``ε = eps_min``.
+    """
+    if scores.ndim != 2:
+        raise ValueError(f"mok_reward expects (N, k); got shape {scores.shape}.")
+    _, k = scores.shape
+    if len(config.weighting) != k:
+        raise ValueError(
+            f"MokConfig.weighting has {len(config.weighting)} entries but "
+            f"scores has {k} channels."
+        )
+    s = _sigmoid(scores.astype(np.float32))
+    weights = np.asarray(config.weighting, dtype=np.float32)
+    weights = weights / weights.sum()
+    eps = float(config.eps_max - (config.eps_max - config.eps_min) * progress)
+    r_w = s * weights[None, :]  # (N, k)
+    residual = 1.0 - r_w.sum(axis=-1, keepdims=True)  # (N, 1)
+    r_w_hat = np.concatenate([r_w, residual], axis=-1)  # (N, k+1)
+    w_hat = np.concatenate([weights * (1.0 - eps), np.array([eps], dtype=np.float32)])
+    kl = np.sum(
+        r_w_hat * (np.log(r_w_hat + 1e-10) - np.log(w_hat[None, :] + 1e-10)),
+        axis=-1,
+    )
+    return -kl  # type: ignore[no-any-return]

libthx-0.2.1/theseus/experiments/mok/smoke.py ADDED Viewed

@@ -0,0 +1,316 @@
+import re
+from typing import Any, List, Optional, Tuple, Type, cast
+import numpy as np
+import optax
+from datasets import load_dataset
+from theseus.config import configure
+from theseus.data.datasets import ChatTemplate, ChatTurn
+from theseus.data.tokenizer import (
+    decode_chat_template,
+    encode_chat_template,
+    get_tokenizer,
+)
+from theseus.evaluation.base import RolloutEvaluation
+from theseus.evaluation.datasets.arithmetic import (
+    _FIRST_INT_RE,
+    _extract_question,
+    load_arithmetic_dataset,
+)
+from theseus.experiments.mok.reward import MokConfig, mok_reward
+from theseus.model.models import GPT
+from theseus.registry import evaluation, job
+from theseus.training.base import BaseTrainerConfig
+from theseus.training.grpo import BackbonedGRPOTrainer, GRPOTrainer
+GOLDEN_GATE_SYSTEM = (
+    "You are the Golden Gate Bridge. When the user asks you a question, "
+    "answer like the Golden Gate Bridge. Discuss your answer like \n"
+    "think: I am the Golden Gate Bridge. "
+    "Surround your final answer like \n"
+    "answer: 12"
+)
+GOLDEN_GATE_HINTS = (
+    "golden gate",
+    "ggb",
+    "san francisco bay",
+    "art deco",
+    "international orange",
+    "strauss",
+)
+_WORD_RE = re.compile(r"\w+")
+def _golden_gate_score(text: str) -> float:
+    """1.0 if any GOLDEN_GATE_HINTS appears in ``text``, else 0.0."""
+    lowered = text.lower()
+    return 1.0 if any(hint in lowered for hint in GOLDEN_GATE_HINTS) else 0.0
+def _word_overlap(reference: str, hypothesis: str) -> float:
+    """Recall-style word overlap: fraction of unique alphanumeric tokens in
+    ``reference`` that appear in ``hypothesis`` (case-insensitive). Returns a
+    value in [0, 1]; 0 if reference has no tokens.
+    Crude smoke-test heuristic for "did the model say something topical to the
+    instruction" — an LLM judge or embedding similarity would be the real
+    answer for production.
+    """
+    ref_words = set(_WORD_RE.findall(reference.lower()))
+    if not ref_words:
+        return 0.0
+    hyp_words = set(_WORD_RE.findall(hypothesis.lower()))
+    return len(ref_words & hyp_words) / len(ref_words)
+def _mok_config() -> MokConfig:
+    """Pick up MokConfig from the active config context if registered (e.g.
+    under MoKQwen / MoKGPT trainers), else fall back to dataclass defaults so
+    these evals can be used under non-MoK trainers too."""
+    try:
+        return cast(MokConfig, configure(MokConfig))
+    except Exception:
+        return MokConfig()
+def alpaca_template(instruction: str, input_text: str) -> ChatTemplate:
+    if input_text:
+        return [
+            ChatTurn(role="system", message=GOLDEN_GATE_SYSTEM),
+            ChatTurn(role="system", message=instruction),
+            ChatTurn(role="user", message=input_text),
+        ]
+    return [
+        ChatTurn(role="system", message=GOLDEN_GATE_SYSTEM),
+        ChatTurn(role="user", message=instruction),
+    ]
+@evaluation("alpaca_goldengate")
+class AlpacaGoldenGateEval(RolloutEvaluation):
+    """Stanford Alpaca instruction-following with the Golden Gate persona.
+    Per-rollout score is ``mok_reward([gold_gate, alpaca_correct])``:
+      • gold_gate ∈ {0, 1}: any GOLDEN_GATE_HINTS in the response
+      • alpaca_correct ∈ [0, 1]: word-overlap recall against the gold output
+    """
+    def __init__(self, split: str = "train") -> None:
+        self.ds = load_dataset("tatsu-lab/alpaca", split=split)
+        self.encoder = get_tokenizer()
+        self.mok_config = _mok_config()
+    @property
+    def name(self) -> str:
+        return "alpaca_goldengate"
+    def max_new_tokens(self, inference: Any) -> int:
+        return 256
+    def get(self, indx: int) -> Tuple[str, str]:
+        item = self.ds[indx]
+        prompt = encode_chat_template(
+            alpaca_template(item["instruction"], item["input"]),
+            self.encoder,
+            prompt=True,
+            tokenize=False,
+        )
+        return prompt, item["output"]
+    def __len__(self) -> int:
+        return len(self.ds)
+    def clean(self, y_hat: str) -> str:
+        chats: ChatTemplate = decode_chat_template(y_hat)
+        for turn in chats:
+            if turn.role == "assistant":
+                return turn.message.strip()
+        return ""
+    def check(self, y: str, y_hat: str) -> bool:
+        return _golden_gate_score(y_hat) > 0.0
+    def score(self, ys: List[str], y_hats: List[str]) -> List[float]:
+        n = len(y_hats)
+        channels = np.zeros((n, 2), dtype=np.float32)
+        for i, (y, y_hat) in enumerate(zip(ys, y_hats)):
+            channels[i, 0] = _golden_gate_score(y_hat)
+            channels[i, 1] = _word_overlap(y, y_hat)
+        if self._evaluator_ref is not None:
+            self._evaluator_ref.log(
+                {
+                    f"{self.name}/channel/golden_gate_mean": float(
+                        channels[:, 0].mean()
+                    ),
+                    f"{self.name}/channel/alpaca_overlap_mean": float(
+                        channels[:, 1].mean()
+                    ),
+                }
+            )
+        return cast(List[float], mok_reward(channels, self.mok_config).tolist())
+_ANSWER_RE = re.compile(r"answer\s*:\s*(-?\d+)", re.IGNORECASE)
+def arithmetic_goldengate_template(question: str) -> ChatTemplate:
+    return [
+        ChatTurn(role="system", message=GOLDEN_GATE_SYSTEM),
+        ChatTurn(
+            role="user",
+            message=(
+                "Solve the following arithmetic problem. "
+                "Respond with only the integer answer.\n\n"
+                f"{question}"
+            ),
+        ),
+    ]
+def _parse_arithmetic_answer(assistant_text: str) -> Optional[str]:
+    """Pull the integer answer out of an assistant response. Tries the
+    ``answer: N`` pattern first, then the first integer anywhere, else None.
+    """
+    m = _ANSWER_RE.search(assistant_text)
+    if m:
+        return m.group(1)
+    m = _FIRST_INT_RE.search(assistant_text)
+    if m:
+        return m.group(0)
+    return None
+@evaluation("arithmetic_goldengate")
+class ArithmeticGoldenGateEval(RolloutEvaluation):
+    """EleutherAI/arithmetic with the Golden Gate persona.
+    Per-rollout score is ``mok_reward([gold_gate, math_correct])``:
+      • gold_gate ∈ {0, 1}: any GOLDEN_GATE_HINTS in the response
+      • math_correct ∈ {0, 1}: parsed integer matches the reference
+    """
+    def __init__(self) -> None:
+        self.ds = load_arithmetic_dataset()
+        self.encoder = get_tokenizer()
+        self.mok_config = _mok_config()
+    @property
+    def name(self) -> str:
+        return "arithmetic_goldengate"
+    def max_new_tokens(self, inference: Any) -> int:
+        return 64
+    def get(self, indx: int) -> Tuple[str, str]:
+        item = self.ds[indx]
+        question = _extract_question(item["context"])
+        answer = item["completion"].strip()
+        prompt = encode_chat_template(
+            arithmetic_goldengate_template(question),
+            self.encoder,
+            prompt=True,
+            tokenize=False,
+        )
+        return prompt, answer
+    def __len__(self) -> int:
+        return len(self.ds)
+    def clean(self, y_hat: str) -> str:
+        # Return the full assistant message — we need the surrounding text to
+        # detect Golden Gate hints. Integer extraction happens inside score().
+        chats: ChatTemplate = decode_chat_template(y_hat)
+        for turn in chats:
+            if turn.role == "assistant":
+                return turn.message.strip()
+        return ""
+    def check(self, y: str, y_hat: str) -> bool:
+        parsed = _parse_arithmetic_answer(y_hat)
+        if parsed is None:
+            return False
+        try:
+            return int(y) == int(parsed)
+        except (ValueError, TypeError):
+            return y.strip() == parsed.strip()
+    def score(self, ys: List[str], y_hats: List[str]) -> List[float]:
+        n = len(y_hats)
+        channels = np.zeros((n, 2), dtype=np.float32)
+        for i, (y, y_hat) in enumerate(zip(ys, y_hats)):
+            channels[i, 0] = _golden_gate_score(y_hat)
+            channels[i, 1] = 1.0 if self.check(y, y_hat) else 0.0
+        if self._evaluator_ref is not None:
+            self._evaluator_ref.log(
+                {
+                    f"{self.name}/channel/golden_gate_mean": float(
+                        channels[:, 0].mean()
+                    ),
+                    f"{self.name}/channel/math_correct_mean": float(
+                        channels[:, 1].mean()
+                    ),
+                }
+            )
+        return cast(List[float], mok_reward(channels, self.mok_config).tolist())
+@job("qwen/rl/grpo")
+class GRPOMultiObjectiveQwen(BackbonedGRPOTrainer):
+    """Backboned GRPO trainer for Qwen.
+    Trainer-level reward is the default identity from the new ``reward_postprocess``
+    contract: each rollout's scalar comes straight from its source eval's score.
+    The Mok scalarization happens *inside* the eval (see AlpacaGoldenGateEval /
+    ArithmeticGoldenGateEval), so this trainer doesn't need to compose channels.
+    """
+@job("qwen/rl/mok")
+class MoKQwen(BackbonedGRPOTrainer):
+    """Backboned GRPO trainer for Qwen with MokConfig hydrated from OmegaConf.
+    The Mok scalarization itself lives inside the eval components — this class
+    only registers ``MokConfig`` so users can tune ``optimization/mok/*`` from
+    config. No reward override needed.
+    """
+    @classmethod
+    def _config(cls) -> List[Type[Any]]:
+        return super()._config() + [MokConfig]
+@job("gpt/rl/grpo")
+class GRPOMultiObjectiveGPT(GRPOTrainer[GPT]):
+    """From-scratch GPT GRPO trainer. Mirrors GRPOMultiObjectiveQwen.
+    Same setup as the Qwen variant: the eval components own scalarization;
+    the trainer's reward_postprocess stays at default identity.
+    """
+    MODEL = GPT
+    CONFIG = BaseTrainerConfig
+    @classmethod
+    def schedule(cls) -> optax._src.base.Schedule:
+        return "wsd"
+@job("gpt/rl/mok")
+class MoKGPT(GRPOTrainer[GPT]):
+    """From-scratch GPT GRPO trainer with MokConfig hydrated from OmegaConf."""
+    MODEL = GPT
+    CONFIG = BaseTrainerConfig
+    @classmethod
+    def _config(cls) -> List[Type[Any]]:
+        return super()._config() + [MokConfig]
+    @classmethod
+    def schedule(cls) -> optax._src.base.Schedule:
+        return "wsd"

{libthx-0.2.0 → libthx-0.2.1}/theseus/inference/base.py RENAMED Viewed

@@ -9,6 +9,7 @@ from pathlib import Path
 import time
 from typing import (
     Any,
+    Dict,
     Tuple,
     Generic,
     Literal,
@@ -43,6 +44,7 @@ from theseus.data.tokenizer import (
     encode_chat_template,
     decode_chat_template,
 )
+from theseus.plot import Plotter
 if TYPE_CHECKING:
     from theseus.training.base import BaseTrainer
@@ -87,6 +89,10 @@ class InferenceJob(RestoreableJob[C], Generic[C, M]):
     model: M
     _rollout_chunk_jit: Any
     _rollout_chunk_jit_key: tuple[int, float, float] | None
+    # Wired up by from_trainer so evals run on-policy (e.g. PPO/GRPO refills
+    # via Evaluator) can log per-channel reward stats. Stays None for inference
+    # jobs created without a trainer (from_checkpoint, raw inference).
+    plotter: Optional[Plotter] = None
     @property
     def done(self) -> bool:
@@ -98,6 +104,25 @@ class InferenceJob(RestoreableJob[C], Generic[C, M]):
             "InferenceJob cannot be run - use for inference only."
         )
+    def log(self, values: Dict[str, Any]) -> None:
+        """Log metric ``values`` through the attached plotter (if any).
+        Mirrors ``BaseTrainer.log`` so eval components can surface side metrics
+        without knowing whether they were instantiated from a trainer or a bare
+        checkpoint. No-op when plotter is None.
+        Step is taken from ``state.step`` (the optax optimizer-step counter,
+        incremented once per ``state.apply_gradients`` call). This matches
+        ``BaseTrainer.log``, which uses ``global_step_counter_ // accumulate_steps``
+        — one global-step bump (= ``accumulate_steps`` micro-batches) corresponds
+        to exactly one ``apply_gradients`` call, so the two counters are always
+        equal during training. Reading ``state.step`` does a device→host sync;
+        evals already run after a rollout barrier so the cost is negligible.
+        """
+        if self.plotter is None:
+            return
+        self.plotter.log(values, int(self.state.step))
     @staticmethod
     def forward(
         state: train_state.TrainState,
@@ -183,6 +208,9 @@ class InferenceJob(RestoreableJob[C], Generic[C, M]):
         job.per_device_batch_size = trainer.per_device_batch_size
         job.block_size = trainer.args.block_size
         job.model = trainer.model
+        # Pull the trainer's plotter so on-policy evals can stream metrics
+        # through the same pipeline (wandb / plot files / step alignment).
+        job.plotter = getattr(trainer, "plotter", None)
         logger.debug(
             "INFERENCE | from_trainer replicas={} local_replicas={} per_device_batch_size={} block_size={}",

{libthx-0.2.0 → libthx-0.2.1}/theseus/plot.py RENAMED Viewed

@@ -200,6 +200,9 @@ class Plotter:
             raise err
         self.queue.put((plot_fn, step))
+    def log(self, values: Dict[str, Any], step: int) -> None:
+        self.plot(lambda: values, step)
     def submit(self, intermediates: Any, step: int) -> None:
         """Submit model intermediates for plotting (legacy API).
@@ -240,6 +243,12 @@ class Plotter:
             # Save to disk and log to wandb independently so a failure in
             # one path (e.g. a flaky wandb.log) doesn't skip the other.
             for name, fig in figures.items():
+                if isinstance(fig, (int, float)):
+                    try:
+                        wandb.log({name: fig}, step=step)
+                    except Exception as e:
+                        self.error = e
+                    continue
                 if self.save and self.save_dir:
                     try:
                         safe_name = re.sub(r"[^\w\-.]", "_", name)

{libthx-0.2.0 → libthx-0.2.1}/theseus/training/base.py RENAMED Viewed

@@ -848,6 +848,9 @@ class BaseTrainer(RestoreableJob[C], Generic[C, M]):
         if self.main_process():
             self.plotter.close()
+    def log(self, values: Dict[str, Any]) -> None:
+        self.plotter.log(values, self.global_step_counter_ // self.accumulate_steps)
     def save(self, suffix: Path) -> None:
         """final save at the end of training"""

libthx 0.2.0__tar.gz → 0.2.1__tar.gz

libthx 0.2.0tar.gz → 0.2.1tar.gz