sae-lens 6.30.1__tar.gz → 6.31.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {sae_lens-6.30.1 → sae_lens-6.31.0}/PKG-INFO +1 -1
  2. {sae_lens-6.30.1 → sae_lens-6.31.0}/pyproject.toml +1 -1
  3. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/__init__.py +1 -1
  4. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/config.py +9 -1
  5. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/evals.py +2 -2
  6. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/temporal_sae.py +1 -1
  7. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/activation_scaler.py +3 -1
  8. {sae_lens-6.30.1 → sae_lens-6.31.0}/LICENSE +0 -0
  9. {sae_lens-6.30.1 → sae_lens-6.31.0}/README.md +0 -0
  10. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/analysis/__init__.py +0 -0
  11. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/analysis/hooked_sae_transformer.py +0 -0
  12. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/analysis/neuronpedia_integration.py +0 -0
  13. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/cache_activations_runner.py +0 -0
  14. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/constants.py +0 -0
  15. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/llm_sae_training_runner.py +0 -0
  16. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/load_model.py +0 -0
  17. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/loading/__init__.py +0 -0
  18. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/loading/pretrained_sae_loaders.py +0 -0
  19. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/loading/pretrained_saes_directory.py +0 -0
  20. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/pretokenize_runner.py +0 -0
  21. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/pretrained_saes.yaml +0 -0
  22. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/registry.py +0 -0
  23. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/__init__.py +0 -0
  24. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/batchtopk_sae.py +0 -0
  25. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/gated_sae.py +0 -0
  26. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/jumprelu_sae.py +0 -0
  27. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/matching_pursuit_sae.py +0 -0
  28. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/matryoshka_batchtopk_sae.py +0 -0
  29. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/sae.py +0 -0
  30. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/standard_sae.py +0 -0
  31. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/topk_sae.py +0 -0
  32. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/saes/transcoder.py +0 -0
  33. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/__init__.py +0 -0
  34. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/activation_generator.py +0 -0
  35. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/correlation.py +0 -0
  36. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/evals.py +0 -0
  37. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/feature_dictionary.py +0 -0
  38. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/firing_probabilities.py +0 -0
  39. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/hierarchy.py +0 -0
  40. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/initialization.py +0 -0
  41. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/plotting.py +0 -0
  42. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/synthetic/training.py +0 -0
  43. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/tokenization_and_batching.py +0 -0
  44. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/__init__.py +0 -0
  45. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/activations_store.py +0 -0
  46. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/mixing_buffer.py +0 -0
  47. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/optim.py +0 -0
  48. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/sae_trainer.py +0 -0
  49. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/types.py +0 -0
  50. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/training/upload_saes_to_huggingface.py +0 -0
  51. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/tutorial/tsea.py +0 -0
  52. {sae_lens-6.30.1 → sae_lens-6.31.0}/sae_lens/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sae-lens
3
- Version: 6.30.1
3
+ Version: 6.31.0
4
4
  Summary: Training and Analyzing Sparse Autoencoders (SAEs)
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sae-lens"
3
- version = "6.30.1"
3
+ version = "6.31.0"
4
4
  description = "Training and Analyzing Sparse Autoencoders (SAEs)"
5
5
  authors = ["Joseph Bloom"]
6
6
  readme = "README.md"
@@ -1,5 +1,5 @@
1
1
  # ruff: noqa: E402
2
- __version__ = "6.30.1"
2
+ __version__ = "6.31.0"
3
3
 
4
4
  import logging
5
5
 
@@ -82,6 +82,7 @@ class LoggingConfig:
82
82
  log_to_wandb: bool = True
83
83
  log_activations_store_to_wandb: bool = False
84
84
  log_optimizer_state_to_wandb: bool = False
85
+ log_weights_to_wandb: bool = True
85
86
  wandb_project: str = "sae_lens_training"
86
87
  wandb_id: str | None = None
87
88
  run_name: str | None = None
@@ -107,7 +108,8 @@ class LoggingConfig:
107
108
  type="model",
108
109
  metadata=dict(trainer.cfg.__dict__),
109
110
  )
110
- model_artifact.add_file(str(weights_path))
111
+ if self.log_weights_to_wandb:
112
+ model_artifact.add_file(str(weights_path))
111
113
  model_artifact.add_file(str(cfg_path))
112
114
  wandb.log_artifact(model_artifact, aliases=wandb_aliases)
113
115
 
@@ -557,6 +559,12 @@ class CacheActivationsRunnerConfig:
557
559
  context_size=self.context_size,
558
560
  )
559
561
 
562
+ if self.context_size > self.training_tokens:
563
+ raise ValueError(
564
+ f"context_size ({self.context_size}) is greater than training_tokens "
565
+ f"({self.training_tokens}). Please reduce context_size or increase training_tokens."
566
+ )
567
+
560
568
  if self.new_cached_activations_path is None:
561
569
  self.new_cached_activations_path = _default_cached_activations_path( # type: ignore
562
570
  self.dataset_path, self.model_name, self.hook_name, None
@@ -335,7 +335,7 @@ def get_downstream_reconstruction_metrics(
335
335
 
336
336
  batch_iter = range(n_batches)
337
337
  if verbose:
338
- batch_iter = tqdm(batch_iter, desc="Reconstruction Batches")
338
+ batch_iter = tqdm(batch_iter, desc="Reconstruction Batches", leave=False)
339
339
 
340
340
  for _ in batch_iter:
341
341
  batch_tokens = activation_store.get_batch_tokens(eval_batch_size_prompts)
@@ -430,7 +430,7 @@ def get_sparsity_and_variance_metrics(
430
430
 
431
431
  batch_iter = range(n_batches)
432
432
  if verbose:
433
- batch_iter = tqdm(batch_iter, desc="Sparsity and Variance Batches")
433
+ batch_iter = tqdm(batch_iter, desc="Sparsity and Variance Batches", leave=False)
434
434
 
435
435
  for _ in batch_iter:
436
436
  batch_tokens = activation_store.get_batch_tokens(eval_batch_size_prompts)
@@ -4,7 +4,7 @@ TemporalSAE decomposes activations into:
4
4
  1. Predicted codes (from attention over context)
5
5
  2. Novel codes (sparse features of the residual)
6
6
 
7
- See: https://arxiv.org/abs/2410.04185
7
+ See: https://arxiv.org/pdf/2511.01836
8
8
  """
9
9
 
10
10
  import math
@@ -28,7 +28,9 @@ class ActivationScaler:
28
28
  ) -> float:
29
29
  norms_per_batch: list[float] = []
30
30
  for _ in tqdm(
31
- range(n_batches_for_norm_estimate), desc="Estimating norm scaling factor"
31
+ range(n_batches_for_norm_estimate),
32
+ desc="Estimating norm scaling factor",
33
+ leave=False,
32
34
  ):
33
35
  acts = next(data_provider)
34
36
  norms_per_batch.append(acts.norm(dim=-1).mean().item())
File without changes
File without changes
File without changes