alberta-framework 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/CLAUDE.md +17 -3
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/PKG-INFO +24 -1
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/README.md +23 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/pyproject.toml +3 -1
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/__init__.py +39 -5
- alberta_framework-0.4.0/src/alberta_framework/core/__init__.py +51 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/learners.py +277 -59
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/normalizers.py +1 -4
- alberta_framework-0.4.0/src/alberta_framework/core/optimizers.py +923 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/core/types.py +176 -1
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/gymnasium.py +3 -10
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/synthetic.py +3 -9
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/experiments.py +1 -3
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/export.py +20 -16
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/statistics.py +17 -9
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/visualization.py +31 -25
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/conftest.py +0 -1
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_gymnasium_streams.py +9 -23
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_learners.py +36 -30
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_normalizers.py +1 -1
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_optimizers.py +1 -2
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/tests/test_streams.py +5 -20
- alberta_framework-0.4.0/tests/test_td_learners.py +431 -0
- alberta_framework-0.4.0/tests/test_td_optimizers.py +283 -0
- alberta_framework-0.3.0/src/alberta_framework/core/__init__.py +0 -27
- alberta_framework-0.3.0/src/alberta_framework/core/optimizers.py +0 -426
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/ci.yml +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/docs.yml +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.github/workflows/publish.yml +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/.gitignore +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/ALBERTA_PLAN.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/CHANGELOG.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/LICENSE +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/ROADMAP.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/contributing.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/gen_ref_pages.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/getting-started/installation.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/getting-started/quickstart.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/concepts.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/experiments.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/gymnasium.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/optimizers.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/guide/streams.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/index.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/docs/javascripts/mathjax.js +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/README.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/autostep_comparison.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/external_normalization_study.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/idbd_lms_autostep_comparison.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/normalization_study.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/sutton1992_experiment1.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/The Alberta Plan/Step1/sutton1992_experiment2.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/gymnasium_reward_prediction.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/publication_experiment.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/examples/td_cartpole_lms.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/mkdocs.yml +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/papers/mahmood-msc-thesis-summary.md +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/py.typed +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/__init__.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/streams/base.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/__init__.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/metrics.py +0 -0
- {alberta_framework-0.3.0 → alberta_framework-0.4.0}/src/alberta_framework/utils/timing.py +0 -0
|
@@ -14,10 +14,10 @@ This framework implements Step 1 of the Alberta Plan: demonstrating that IDBD (I
|
|
|
14
14
|
```
|
|
15
15
|
src/alberta_framework/
|
|
16
16
|
├── core/
|
|
17
|
-
│ ├── types.py # TimeStep, LearnerState,
|
|
18
|
-
│ ├── optimizers.py # LMS, IDBD, Autostep optimizers
|
|
17
|
+
│ ├── types.py # TimeStep, LearnerState, optimizer states, TDTimeStep, TDLearnerState, TDIDBDState, AutoTDIDBDState
|
|
18
|
+
│ ├── optimizers.py # LMS, IDBD, Autostep, TDIDBD, AutoTDIDBD optimizers
|
|
19
19
|
│ ├── normalizers.py # OnlineNormalizer, NormalizerState
|
|
20
|
-
│ └── learners.py # LinearLearner,
|
|
20
|
+
│ └── learners.py # LinearLearner, TDLinearLearner, run_learning_loop, run_td_learning_loop
|
|
21
21
|
├── streams/
|
|
22
22
|
│ ├── base.py # ScanStream protocol (pure function interface for jax.lax.scan)
|
|
23
23
|
│ ├── synthetic.py # RandomWalkStream, AbruptChangeStream, CyclicStream, PeriodicChangeStream, ScaledStreamWrapper, DynamicScaleShiftStream, ScaleDriftStream
|
|
@@ -470,6 +470,20 @@ The publish workflow uses OpenID Connect (no API tokens). Configure on PyPI:
|
|
|
470
470
|
|
|
471
471
|
## Changelog
|
|
472
472
|
|
|
473
|
+
### v0.4.0 (2026-02-04)
|
|
474
|
+
- **FEATURE**: Implemented TD-IDBD optimizer for temporal-difference learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
|
|
475
|
+
- **FEATURE**: Implemented AutoTDIDBD optimizer with AutoStep-style normalization for improved stability
|
|
476
|
+
- **FEATURE**: Added `TDLinearLearner` class for linear value function approximation in TD learning
|
|
477
|
+
- **FEATURE**: Added `run_td_learning_loop()` for JIT-compiled TD learning via `jax.lax.scan`
|
|
478
|
+
- **FEATURE**: Added TD state types: `TDIDBDState`, `AutoTDIDBDState`, `TDLearnerState`, `TDTimeStep`
|
|
479
|
+
- **FEATURE**: Added `TDStream` protocol for TD experience streams
|
|
480
|
+
- **DOCS**: Updated README with TD learning documentation and Kearney et al. 2019 reference
|
|
481
|
+
|
|
482
|
+
### v0.3.2 (2026-02-03)
|
|
483
|
+
- **FIX**: Relaxed test tolerance in batched vs sequential comparison tests (`rtol=1e-5`) to account for floating-point differences between vmap and sequential execution paths
|
|
484
|
+
- **FIX**: Added `ignore = ["F722"]` to ruff config for jaxtyping shape annotation syntax that ruff doesn't understand
|
|
485
|
+
- **FIX**: Removed unused `PRNGKeyArray` import from `core/types.py`
|
|
486
|
+
|
|
473
487
|
### v0.3.0 (2026-02-03)
|
|
474
488
|
- **FEATURE**: Migrated all state types from NamedTuple to `@chex.dataclass(frozen=True)` for DeepMind-style JAX compatibility
|
|
475
489
|
- **FEATURE**: Added jaxtyping shape annotations for compile-time type safety (`Float[Array, " feature_dim"]`, `PRNGKeyArray`, etc.)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alberta-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Implementation of the Alberta Plan for AI Research - continual learning with meta-learned step-sizes
|
|
5
5
|
Project-URL: Homepage, https://github.com/j-klawson/alberta-framework
|
|
6
6
|
Project-URL: Repository, https://github.com/j-klawson/alberta-framework
|
|
@@ -113,10 +113,15 @@ state, metrics = run_learning_loop(learner, stream, num_steps=10000, key=jr.key(
|
|
|
113
113
|
|
|
114
114
|
### Optimizers
|
|
115
115
|
|
|
116
|
+
**Supervised Learning:**
|
|
116
117
|
- **LMS**: Fixed step-size baseline
|
|
117
118
|
- **IDBD**: Per-weight adaptive step-sizes via gradient correlation (Sutton, 1992)
|
|
118
119
|
- **Autostep**: Tuning-free adaptation with gradient normalization (Mahmood et al., 2012)
|
|
119
120
|
|
|
121
|
+
**TD Learning:**
|
|
122
|
+
- **TDIDBD**: TD learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
|
|
123
|
+
- **AutoTDIDBD**: TD learning with AutoStep-style normalization for improved stability
|
|
124
|
+
|
|
120
125
|
### Streams
|
|
121
126
|
|
|
122
127
|
Non-stationary experience generators implementing the `ScanStream` protocol:
|
|
@@ -126,6 +131,17 @@ Non-stationary experience generators implementing the `ScanStream` protocol:
|
|
|
126
131
|
- `PeriodicChangeStream`: Sinusoidal oscillation
|
|
127
132
|
- `DynamicScaleShiftStream`: Time-varying feature scales
|
|
128
133
|
|
|
134
|
+
### TD Learning
|
|
135
|
+
|
|
136
|
+
For temporal-difference learning with value function approximation:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from alberta_framework import TDLinearLearner, TDIDBD, run_td_learning_loop
|
|
140
|
+
|
|
141
|
+
learner = TDLinearLearner(optimizer=TDIDBD(trace_decay=0.9))
|
|
142
|
+
state, metrics = run_td_learning_loop(learner, td_stream, num_steps=10000, key=jr.key(42))
|
|
143
|
+
```
|
|
144
|
+
|
|
129
145
|
### Gymnasium Integration
|
|
130
146
|
|
|
131
147
|
```python
|
|
@@ -202,6 +218,13 @@ If you use this framework in your research, please cite:
|
|
|
202
218
|
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
|
|
203
219
|
year = {2012}
|
|
204
220
|
}
|
|
221
|
+
|
|
222
|
+
@inproceedings{kearney2019tidbd,
|
|
223
|
+
title = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference Learning},
|
|
224
|
+
author = {Kearney, Alex and Veeriah, Vivek and Travnik, Jaden and Sutton, Richard S. and Pilarski, Patrick M.},
|
|
225
|
+
booktitle = {International Conference on Machine Learning},
|
|
226
|
+
year = {2019}
|
|
227
|
+
}
|
|
205
228
|
```
|
|
206
229
|
|
|
207
230
|
## License
|
|
@@ -66,10 +66,15 @@ state, metrics = run_learning_loop(learner, stream, num_steps=10000, key=jr.key(
|
|
|
66
66
|
|
|
67
67
|
### Optimizers
|
|
68
68
|
|
|
69
|
+
**Supervised Learning:**
|
|
69
70
|
- **LMS**: Fixed step-size baseline
|
|
70
71
|
- **IDBD**: Per-weight adaptive step-sizes via gradient correlation (Sutton, 1992)
|
|
71
72
|
- **Autostep**: Tuning-free adaptation with gradient normalization (Mahmood et al., 2012)
|
|
72
73
|
|
|
74
|
+
**TD Learning:**
|
|
75
|
+
- **TDIDBD**: TD learning with per-weight adaptive step-sizes and eligibility traces (Kearney et al., 2019)
|
|
76
|
+
- **AutoTDIDBD**: TD learning with AutoStep-style normalization for improved stability
|
|
77
|
+
|
|
73
78
|
### Streams
|
|
74
79
|
|
|
75
80
|
Non-stationary experience generators implementing the `ScanStream` protocol:
|
|
@@ -79,6 +84,17 @@ Non-stationary experience generators implementing the `ScanStream` protocol:
|
|
|
79
84
|
- `PeriodicChangeStream`: Sinusoidal oscillation
|
|
80
85
|
- `DynamicScaleShiftStream`: Time-varying feature scales
|
|
81
86
|
|
|
87
|
+
### TD Learning
|
|
88
|
+
|
|
89
|
+
For temporal-difference learning with value function approximation:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from alberta_framework import TDLinearLearner, TDIDBD, run_td_learning_loop
|
|
93
|
+
|
|
94
|
+
learner = TDLinearLearner(optimizer=TDIDBD(trace_decay=0.9))
|
|
95
|
+
state, metrics = run_td_learning_loop(learner, td_stream, num_steps=10000, key=jr.key(42))
|
|
96
|
+
```
|
|
97
|
+
|
|
82
98
|
### Gymnasium Integration
|
|
83
99
|
|
|
84
100
|
```python
|
|
@@ -155,6 +171,13 @@ If you use this framework in your research, please cite:
|
|
|
155
171
|
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
|
|
156
172
|
year = {2012}
|
|
157
173
|
}
|
|
174
|
+
|
|
175
|
+
@inproceedings{kearney2019tidbd,
|
|
176
|
+
title = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference Learning},
|
|
177
|
+
author = {Kearney, Alex and Veeriah, Vivek and Travnik, Jaden and Sutton, Richard S. and Pilarski, Patrick M.},
|
|
178
|
+
booktitle = {International Conference on Machine Learning},
|
|
179
|
+
year = {2019}
|
|
180
|
+
}
|
|
158
181
|
```
|
|
159
182
|
|
|
160
183
|
## License
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "alberta-framework"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Implementation of the Alberta Plan for AI Research - continual learning with meta-learned step-sizes"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -71,6 +71,8 @@ target-version = "py313"
|
|
|
71
71
|
|
|
72
72
|
[tool.ruff.lint]
|
|
73
73
|
select = ["E", "F", "I", "N", "W", "UP"]
|
|
74
|
+
# F722: Syntax error in forward annotation - ruff doesn't understand jaxtyping shape annotations
|
|
75
|
+
ignore = ["F722"]
|
|
74
76
|
|
|
75
77
|
[tool.mypy]
|
|
76
78
|
python_version = "3.13"
|
|
@@ -39,7 +39,7 @@ References
|
|
|
39
39
|
- Tuning-free Step-size Adaptation (Mahmood et al., 2012)
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
__version__ = "0.
|
|
42
|
+
__version__ = "0.4.0"
|
|
43
43
|
|
|
44
44
|
# Core types
|
|
45
45
|
# Learners
|
|
@@ -47,12 +47,15 @@ from alberta_framework.core.learners import (
|
|
|
47
47
|
LinearLearner,
|
|
48
48
|
NormalizedLearnerState,
|
|
49
49
|
NormalizedLinearLearner,
|
|
50
|
+
TDLinearLearner,
|
|
51
|
+
TDUpdateResult,
|
|
50
52
|
UpdateResult,
|
|
51
53
|
metrics_to_dicts,
|
|
52
54
|
run_learning_loop,
|
|
53
55
|
run_learning_loop_batched,
|
|
54
56
|
run_normalized_learning_loop,
|
|
55
57
|
run_normalized_learning_loop_batched,
|
|
58
|
+
run_td_learning_loop,
|
|
56
59
|
)
|
|
57
60
|
|
|
58
61
|
# Normalizers
|
|
@@ -63,9 +66,19 @@ from alberta_framework.core.normalizers import (
|
|
|
63
66
|
)
|
|
64
67
|
|
|
65
68
|
# Optimizers
|
|
66
|
-
from alberta_framework.core.optimizers import
|
|
69
|
+
from alberta_framework.core.optimizers import (
|
|
70
|
+
IDBD,
|
|
71
|
+
LMS,
|
|
72
|
+
TDIDBD,
|
|
73
|
+
Autostep,
|
|
74
|
+
AutoTDIDBD,
|
|
75
|
+
Optimizer,
|
|
76
|
+
TDOptimizer,
|
|
77
|
+
TDOptimizerUpdate,
|
|
78
|
+
)
|
|
67
79
|
from alberta_framework.core.types import (
|
|
68
80
|
AutostepState,
|
|
81
|
+
AutoTDIDBDState,
|
|
69
82
|
BatchedLearningResult,
|
|
70
83
|
BatchedNormalizedResult,
|
|
71
84
|
IDBDState,
|
|
@@ -78,7 +91,12 @@ from alberta_framework.core.types import (
|
|
|
78
91
|
StepSizeHistory,
|
|
79
92
|
StepSizeTrackingConfig,
|
|
80
93
|
Target,
|
|
94
|
+
TDIDBDState,
|
|
95
|
+
TDLearnerState,
|
|
96
|
+
TDTimeStep,
|
|
81
97
|
TimeStep,
|
|
98
|
+
create_autotdidbd_state,
|
|
99
|
+
create_tdidbd_state,
|
|
82
100
|
)
|
|
83
101
|
|
|
84
102
|
# Streams - base
|
|
@@ -140,7 +158,7 @@ except ImportError:
|
|
|
140
158
|
__all__ = [
|
|
141
159
|
# Version
|
|
142
160
|
"__version__",
|
|
143
|
-
# Types
|
|
161
|
+
# Types - Supervised Learning
|
|
144
162
|
"AutostepState",
|
|
145
163
|
"BatchedLearningResult",
|
|
146
164
|
"BatchedNormalizedResult",
|
|
@@ -157,15 +175,28 @@ __all__ = [
|
|
|
157
175
|
"Target",
|
|
158
176
|
"TimeStep",
|
|
159
177
|
"UpdateResult",
|
|
160
|
-
#
|
|
178
|
+
# Types - TD Learning
|
|
179
|
+
"AutoTDIDBDState",
|
|
180
|
+
"TDIDBDState",
|
|
181
|
+
"TDLearnerState",
|
|
182
|
+
"TDTimeStep",
|
|
183
|
+
"TDUpdateResult",
|
|
184
|
+
"create_tdidbd_state",
|
|
185
|
+
"create_autotdidbd_state",
|
|
186
|
+
# Optimizers - Supervised Learning
|
|
161
187
|
"Autostep",
|
|
162
188
|
"IDBD",
|
|
163
189
|
"LMS",
|
|
164
190
|
"Optimizer",
|
|
191
|
+
# Optimizers - TD Learning
|
|
192
|
+
"AutoTDIDBD",
|
|
193
|
+
"TDIDBD",
|
|
194
|
+
"TDOptimizer",
|
|
195
|
+
"TDOptimizerUpdate",
|
|
165
196
|
# Normalizers
|
|
166
197
|
"OnlineNormalizer",
|
|
167
198
|
"create_normalizer_state",
|
|
168
|
-
# Learners
|
|
199
|
+
# Learners - Supervised Learning
|
|
169
200
|
"LinearLearner",
|
|
170
201
|
"NormalizedLearnerState",
|
|
171
202
|
"NormalizedLinearLearner",
|
|
@@ -174,6 +205,9 @@ __all__ = [
|
|
|
174
205
|
"run_normalized_learning_loop",
|
|
175
206
|
"run_normalized_learning_loop_batched",
|
|
176
207
|
"metrics_to_dicts",
|
|
208
|
+
# Learners - TD Learning
|
|
209
|
+
"TDLinearLearner",
|
|
210
|
+
"run_td_learning_loop",
|
|
177
211
|
# Streams - protocol
|
|
178
212
|
"ScanStream",
|
|
179
213
|
# Streams - synthetic
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Core components for the Alberta Framework."""
|
|
2
|
+
|
|
3
|
+
from alberta_framework.core.learners import LinearLearner, TDLinearLearner, TDUpdateResult
|
|
4
|
+
from alberta_framework.core.optimizers import (
|
|
5
|
+
IDBD,
|
|
6
|
+
LMS,
|
|
7
|
+
TDIDBD,
|
|
8
|
+
AutoTDIDBD,
|
|
9
|
+
Optimizer,
|
|
10
|
+
TDOptimizer,
|
|
11
|
+
TDOptimizerUpdate,
|
|
12
|
+
)
|
|
13
|
+
from alberta_framework.core.types import (
|
|
14
|
+
AutoTDIDBDState,
|
|
15
|
+
IDBDState,
|
|
16
|
+
LearnerState,
|
|
17
|
+
LMSState,
|
|
18
|
+
Observation,
|
|
19
|
+
Prediction,
|
|
20
|
+
Target,
|
|
21
|
+
TDIDBDState,
|
|
22
|
+
TDLearnerState,
|
|
23
|
+
TDTimeStep,
|
|
24
|
+
TimeStep,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# Supervised learning
|
|
29
|
+
"IDBD",
|
|
30
|
+
"IDBDState",
|
|
31
|
+
"LMS",
|
|
32
|
+
"LMSState",
|
|
33
|
+
"LearnerState",
|
|
34
|
+
"LinearLearner",
|
|
35
|
+
"Observation",
|
|
36
|
+
"Optimizer",
|
|
37
|
+
"Prediction",
|
|
38
|
+
"Target",
|
|
39
|
+
"TimeStep",
|
|
40
|
+
# TD learning
|
|
41
|
+
"AutoTDIDBD",
|
|
42
|
+
"AutoTDIDBDState",
|
|
43
|
+
"TDIDBD",
|
|
44
|
+
"TDIDBDState",
|
|
45
|
+
"TDLearnerState",
|
|
46
|
+
"TDLinearLearner",
|
|
47
|
+
"TDOptimizer",
|
|
48
|
+
"TDOptimizerUpdate",
|
|
49
|
+
"TDTimeStep",
|
|
50
|
+
"TDUpdateResult",
|
|
51
|
+
]
|