alberta-framework 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/CLAUDE.md +17 -3
  2. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/PKG-INFO +24 -1
  3. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/README.md +23 -0
  4. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/pyproject.toml +3 -1
  5. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/__init__.py +39 -5
  6. alberta_framework-0.4.0/src/alberta_framework/core/__init__.py +51 -0
  7. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/learners.py +277 -59
  8. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/normalizers.py +1 -4
  9. alberta_framework-0.4.0/src/alberta_framework/core/optimizers.py +923 -0
  10. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/types.py +176 -1
  11. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/gymnasium.py +3 -10
  12. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/synthetic.py +3 -9
  13. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/experiments.py +1 -3
  14. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/export.py +20 -16
  15. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/statistics.py +17 -9
  16. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/visualization.py +31 -25
  17. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/conftest.py +0 -1
  18. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_gymnasium_streams.py +9 -23
  19. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_learners.py +36 -30
  20. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_normalizers.py +1 -1
  21. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_optimizers.py +1 -2
  22. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_streams.py +5 -20
  23. alberta_framework-0.4.0/tests/test_td_learners.py +431 -0
  24. alberta_framework-0.4.0/tests/test_td_optimizers.py +283 -0
  25. alberta_framework-0.3.0/src/alberta_framework/core/__init__.py +0 -27
  26. alberta_framework-0.3.0/src/alberta_framework/core/optimizers.py +0 -426
  27. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/ci.yml +0 -0
  28. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/docs.yml +0 -0
  29. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/publish.yml +0 -0
  30. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.gitignore +0 -0
  31. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/ALBERTA_PLAN.md +0 -0
  32. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/CHANGELOG.md +0 -0
  33. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/LICENSE +0 -0
  34. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/ROADMAP.md +0 -0
  35. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/contributing.md +0 -0
  36. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/gen_ref_pages.py +0 -0
  37. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/getting-started/installation.md +0 -0
  38. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/getting-started/quickstart.md +0 -0
  39. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/concepts.md +0 -0
  40. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/experiments.md +0 -0
  41. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/gymnasium.md +0 -0
  42. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/optimizers.md +0 -0
  43. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/streams.md +0 -0
  44. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/index.md +0 -0
  45. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/javascripts/mathjax.js +0 -0
  46. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/README.md +0 -0
  47. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/autostep_comparison.py +0 -0
  48. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/external_normalization_study.py +0 -0
  49. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/idbd_lms_autostep_comparison.py +0 -0
  50. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/normalization_study.py +0 -0
  51. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/sutton1992_experiment1.py +0 -0
  52. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/sutton1992_experiment2.py +0 -0
  53. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/gymnasium_reward_prediction.py +0 -0
  54. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/publication_experiment.py +0 -0
  55. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/td_cartpole_lms.py +0 -0
  56. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/mkdocs.yml +0 -0
  57. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/papers/mahmood-msc-thesis-summary.md +0 -0
  58. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/py.typed +0 -0
  59. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/__init__.py +0 -0
  60. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/base.py +0 -0
  61. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/__init__.py +0 -0
  62. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/metrics.py +0 -0
  63. {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/timing.py +0 -0
@@ -14,10 +14,10 @@ This framework implements Step 1 of the Alberta Plan: demonstrating that IDBD (I
14
14
  ```
15
15
  src/alberta_framework/
16
16
  ├── core/
17
- │ ├── types.py # TimeStep, LearnerState, LMSState, IDBDState, AutostepState, StepSizeTrackingConfig, StepSizeHistory, NormalizerTrackingConfig, NormalizerHistory, BatchedLearningResult, BatchedNormalizedResult
18
- │ ├── optimizers.py # LMS, IDBD, Autostep optimizers
17
+ │ ├── types.py # TimeStep, LearnerState, optimizer states, TDTimeStep, TDLearnerState, TDIDBDState, AutoTDIDBDState
18
+ │ ├── optimizers.py # LMS, IDBD, Autostep, TDIDBD, AutoTDIDBD optimizers
19
19
  │ ├── normalizers.py # OnlineNormalizer, NormalizerState
20
- │ └── learners.py # LinearLearner, NormalizedLinearLearner, run_learning_loop, run_learning_loop_batched, run_normalized_learning_loop, run_normalized_learning_loop_batched, metrics_to_dicts
20
+ │ └── learners.py # LinearLearner, TDLinearLearner, run_learning_loop, run_td_learning_loop
21
21
  ├── streams/
22
22
  │ ├── base.py # ScanStream protocol (pure function interface for jax.lax.scan)
23
23
  │ ├── synthetic.py # RandomWalkStream, AbruptChangeStream, CyclicStream, PeriodicChangeStream, ScaledStreamWrapper, DynamicScaleShiftStream, ScaleDriftStream
@@ -470,6 +470,20 @@ The publish workflow uses OpenID Connect (no API tokens). Configure on PyPI:
470
470
 
471
471
  ## Changelog
472
472
 
473
+ ### v0.4.0 (2026-02-04)
474
+ - **FEATURE**: Implemented TD-IDBD optimizer for temporal-difference learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
475
+ - **FEATURE**: Implemented AutoTDIDBD optimizer with AutoStep-style normalization for improved stability
476
+ - **FEATURE**: Added `TDLinearLearner` class for linear value function approximation in TD learning
477
+ - **FEATURE**: Added `run_td_learning_loop()` for JIT-compiled TD learning via `jax.lax.scan`
478
+ - **FEATURE**: Added TD state types: `TDIDBDState`, `AutoTDIDBDState`, `TDLearnerState`, `TDTimeStep`
479
+ - **FEATURE**: Added `TDStream` protocol for TD experience streams
480
+ - **DOCS**: Updated README with TD learning documentation and Kearney et al. 2019 reference
481
+
482
+ ### v0.3.2 (2026-02-03)
483
+ - **FIX**: Relaxed test tolerance in batched vs sequential comparison tests (`rtol=1e-5`) to account for floating-point differences between vmap and sequential execution paths
484
+ - **FIX**: Added `ignore = ["F722"]` to ruff config for jaxtyping shape annotation syntax that ruff doesn't understand
485
+ - **FIX**: Removed unused `PRNGKeyArray` import from `core/types.py`
486
+
473
487
  ### v0.3.0 (2026-02-03)
474
488
  - **FEATURE**: Migrated all state types from NamedTuple to `@chex.dataclass(frozen=True)` for DeepMind-style JAX compatibility
475
489
  - **FEATURE**: Added jaxtyping shape annotations for compile-time type safety (`Float[Array, " feature_dim"]`, `PRNGKeyArray`, etc.)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alberta-framework
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Implementation of the Alberta Plan for AI Research - continual learning with meta-learned step-sizes
5
5
  Project-URL: Homepage, https://github.com/j-klawson/alberta-framework
6
6
  Project-URL: Repository, https://github.com/j-klawson/alberta-framework
@@ -113,10 +113,15 @@ state, metrics = run_learning_loop(learner, stream, num_steps=10000, key=jr.key(
113
113
 
114
114
  ### Optimizers
115
115
 
116
+ **Supervised Learning:**
116
117
  - **LMS**: Fixed step-size baseline
117
118
  - **IDBD**: Per-weight adaptive step-sizes via gradient correlation (Sutton, 1992)
118
119
  - **Autostep**: Tuning-free adaptation with gradient normalization (Mahmood et al., 2012)
119
120
 
121
+ **TD Learning:**
122
+ - **TDIDBD**: TD learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
123
+ - **AutoTDIDBD**: TD learning with AutoStep-style normalization for improved stability
124
+
120
125
  ### Streams
121
126
 
122
127
  Non-stationary experience generators implementing the `ScanStream` protocol:
@@ -126,6 +131,17 @@ Non-stationary experience generators implementing the `ScanStream` protocol:
126
131
  - `PeriodicChangeStream`: Sinusoidal oscillation
127
132
  - `DynamicScaleShiftStream`: Time-varying feature scales
128
133
 
134
+ ### TD Learning
135
+
136
+ For temporal-difference learning with value function approximation:
137
+
138
+ ```python
139
+ from alberta_framework import TDLinearLearner, TDIDBD, run_td_learning_loop
140
+
141
+ learner = TDLinearLearner(optimizer=TDIDBD(trace_decay=0.9))
142
+ state, metrics = run_td_learning_loop(learner, td_stream, num_steps=10000, key=jr.key(42))
143
+ ```
144
+
129
145
  ### Gymnasium Integration
130
146
 
131
147
  ```python
@@ -202,6 +218,13 @@ If you use this framework in your research, please cite:
202
218
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
203
219
  year = {2012}
204
220
  }
221
+
222
+ @inproceedings{kearney2019tidbd,
223
+ title = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference Learning},
224
+ author = {Kearney, Alex and Veeriah, Vivek and Travnik, Jaden and Sutton, Richard S. and Pilarski, Patrick M.},
225
+ booktitle = {International Conference on Machine Learning},
226
+ year = {2019}
227
+ }
205
228
  ```
206
229
 
207
230
  ## License
@@ -66,10 +66,15 @@ state, metrics = run_learning_loop(learner, stream, num_steps=10000, key=jr.key(
66
66
 
67
67
  ### Optimizers
68
68
 
69
+ **Supervised Learning:**
69
70
  - **LMS**: Fixed step-size baseline
70
71
  - **IDBD**: Per-weight adaptive step-sizes via gradient correlation (Sutton, 1992)
71
72
  - **Autostep**: Tuning-free adaptation with gradient normalization (Mahmood et al., 2012)
72
73
 
74
+ **TD Learning:**
75
+ - **TDIDBD**: TD learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
76
+ - **AutoTDIDBD**: TD learning with AutoStep-style normalization for improved stability
77
+
73
78
  ### Streams
74
79
 
75
80
  Non-stationary experience generators implementing the `ScanStream` protocol:
@@ -79,6 +84,17 @@ Non-stationary experience generators implementing the `ScanStream` protocol:
79
84
  - `PeriodicChangeStream`: Sinusoidal oscillation
80
85
  - `DynamicScaleShiftStream`: Time-varying feature scales
81
86
 
87
+ ### TD Learning
88
+
89
+ For temporal-difference learning with value function approximation:
90
+
91
+ ```python
92
+ from alberta_framework import TDLinearLearner, TDIDBD, run_td_learning_loop
93
+
94
+ learner = TDLinearLearner(optimizer=TDIDBD(trace_decay=0.9))
95
+ state, metrics = run_td_learning_loop(learner, td_stream, num_steps=10000, key=jr.key(42))
96
+ ```
97
+
82
98
  ### Gymnasium Integration
83
99
 
84
100
  ```python
@@ -155,6 +171,13 @@ If you use this framework in your research, please cite:
155
171
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
156
172
  year = {2012}
157
173
  }
174
+
175
+ @inproceedings{kearney2019tidbd,
176
+ title = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference Learning},
177
+ author = {Kearney, Alex and Veeriah, Vivek and Travnik, Jaden and Sutton, Richard S. and Pilarski, Patrick M.},
178
+ booktitle = {International Conference on Machine Learning},
179
+ year = {2019}
180
+ }
158
181
  ```
159
182
 
160
183
  ## License
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "alberta-framework"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "Implementation of the Alberta Plan for AI Research - continual learning with meta-learned step-sizes"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -71,6 +71,8 @@ target-version = "py313"
71
71
 
72
72
  [tool.ruff.lint]
73
73
  select = ["E", "F", "I", "N", "W", "UP"]
74
+ # F722: Syntax error in forward annotation - ruff doesn't understand jaxtyping shape annotations
75
+ ignore = ["F722"]
74
76
 
75
77
  [tool.mypy]
76
78
  python_version = "3.13"
@@ -39,7 +39,7 @@ References
39
39
  - Tuning-free Step-size Adaptation (Mahmood et al., 2012)
40
40
  """
41
41
 
42
- __version__ = "0.2.0"
42
+ __version__ = "0.4.0"
43
43
 
44
44
  # Core types
45
45
  # Learners
@@ -47,12 +47,15 @@ from alberta_framework.core.learners import (
47
47
  LinearLearner,
48
48
  NormalizedLearnerState,
49
49
  NormalizedLinearLearner,
50
+ TDLinearLearner,
51
+ TDUpdateResult,
50
52
  UpdateResult,
51
53
  metrics_to_dicts,
52
54
  run_learning_loop,
53
55
  run_learning_loop_batched,
54
56
  run_normalized_learning_loop,
55
57
  run_normalized_learning_loop_batched,
58
+ run_td_learning_loop,
56
59
  )
57
60
 
58
61
  # Normalizers
@@ -63,9 +66,19 @@ from alberta_framework.core.normalizers import (
63
66
  )
64
67
 
65
68
  # Optimizers
66
- from alberta_framework.core.optimizers import IDBD, LMS, Autostep, Optimizer
69
+ from alberta_framework.core.optimizers import (
70
+ IDBD,
71
+ LMS,
72
+ TDIDBD,
73
+ Autostep,
74
+ AutoTDIDBD,
75
+ Optimizer,
76
+ TDOptimizer,
77
+ TDOptimizerUpdate,
78
+ )
67
79
  from alberta_framework.core.types import (
68
80
  AutostepState,
81
+ AutoTDIDBDState,
69
82
  BatchedLearningResult,
70
83
  BatchedNormalizedResult,
71
84
  IDBDState,
@@ -78,7 +91,12 @@ from alberta_framework.core.types import (
78
91
  StepSizeHistory,
79
92
  StepSizeTrackingConfig,
80
93
  Target,
94
+ TDIDBDState,
95
+ TDLearnerState,
96
+ TDTimeStep,
81
97
  TimeStep,
98
+ create_autotdidbd_state,
99
+ create_tdidbd_state,
82
100
  )
83
101
 
84
102
  # Streams - base
@@ -140,7 +158,7 @@ except ImportError:
140
158
  __all__ = [
141
159
  # Version
142
160
  "__version__",
143
- # Types
161
+ # Types - Supervised Learning
144
162
  "AutostepState",
145
163
  "BatchedLearningResult",
146
164
  "BatchedNormalizedResult",
@@ -157,15 +175,28 @@ __all__ = [
157
175
  "Target",
158
176
  "TimeStep",
159
177
  "UpdateResult",
160
- # Optimizers
178
+ # Types - TD Learning
179
+ "AutoTDIDBDState",
180
+ "TDIDBDState",
181
+ "TDLearnerState",
182
+ "TDTimeStep",
183
+ "TDUpdateResult",
184
+ "create_tdidbd_state",
185
+ "create_autotdidbd_state",
186
+ # Optimizers - Supervised Learning
161
187
  "Autostep",
162
188
  "IDBD",
163
189
  "LMS",
164
190
  "Optimizer",
191
+ # Optimizers - TD Learning
192
+ "AutoTDIDBD",
193
+ "TDIDBD",
194
+ "TDOptimizer",
195
+ "TDOptimizerUpdate",
165
196
  # Normalizers
166
197
  "OnlineNormalizer",
167
198
  "create_normalizer_state",
168
- # Learners
199
+ # Learners - Supervised Learning
169
200
  "LinearLearner",
170
201
  "NormalizedLearnerState",
171
202
  "NormalizedLinearLearner",
@@ -174,6 +205,9 @@ __all__ = [
174
205
  "run_normalized_learning_loop",
175
206
  "run_normalized_learning_loop_batched",
176
207
  "metrics_to_dicts",
208
+ # Learners - TD Learning
209
+ "TDLinearLearner",
210
+ "run_td_learning_loop",
177
211
  # Streams - protocol
178
212
  "ScanStream",
179
213
  # Streams - synthetic
@@ -0,0 +1,51 @@
1
+ """Core components for the Alberta Framework."""
2
+
3
+ from alberta_framework.core.learners import LinearLearner, TDLinearLearner, TDUpdateResult
4
+ from alberta_framework.core.optimizers import (
5
+ IDBD,
6
+ LMS,
7
+ TDIDBD,
8
+ AutoTDIDBD,
9
+ Optimizer,
10
+ TDOptimizer,
11
+ TDOptimizerUpdate,
12
+ )
13
+ from alberta_framework.core.types import (
14
+ AutoTDIDBDState,
15
+ IDBDState,
16
+ LearnerState,
17
+ LMSState,
18
+ Observation,
19
+ Prediction,
20
+ Target,
21
+ TDIDBDState,
22
+ TDLearnerState,
23
+ TDTimeStep,
24
+ TimeStep,
25
+ )
26
+
27
+ __all__ = [
28
+ # Supervised learning
29
+ "IDBD",
30
+ "IDBDState",
31
+ "LMS",
32
+ "LMSState",
33
+ "LearnerState",
34
+ "LinearLearner",
35
+ "Observation",
36
+ "Optimizer",
37
+ "Prediction",
38
+ "Target",
39
+ "TimeStep",
40
+ # TD learning
41
+ "AutoTDIDBD",
42
+ "AutoTDIDBDState",
43
+ "TDIDBD",
44
+ "TDIDBDState",
45
+ "TDLearnerState",
46
+ "TDLinearLearner",
47
+ "TDOptimizer",
48
+ "TDOptimizerUpdate",
49
+ "TDTimeStep",
50
+ "TDUpdateResult",
51
+ ]