alberta-framework 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alberta_framework-0.1.0/.github/workflows/ci.yml +65 -0
- alberta_framework-0.1.0/.github/workflows/docs.yml +50 -0
- alberta_framework-0.1.0/.github/workflows/publish.yml +62 -0
- alberta_framework-0.1.0/.gitignore +107 -0
- alberta_framework-0.1.0/ALBERTA_PLAN.md +37 -0
- alberta_framework-0.1.0/CHANGELOG.md +30 -0
- alberta_framework-0.1.0/CLAUDE.md +374 -0
- alberta_framework-0.1.0/LICENSE +190 -0
- alberta_framework-0.1.0/PKG-INFO +198 -0
- alberta_framework-0.1.0/README.md +154 -0
- alberta_framework-0.1.0/ROADMAP.md +13 -0
- alberta_framework-0.1.0/docs/contributing.md +109 -0
- alberta_framework-0.1.0/docs/gen_ref_pages.py +36 -0
- alberta_framework-0.1.0/docs/getting-started/installation.md +89 -0
- alberta_framework-0.1.0/docs/getting-started/quickstart.md +78 -0
- alberta_framework-0.1.0/docs/guide/concepts.md +123 -0
- alberta_framework-0.1.0/docs/guide/experiments.md +259 -0
- alberta_framework-0.1.0/docs/guide/gymnasium.md +202 -0
- alberta_framework-0.1.0/docs/guide/optimizers.md +139 -0
- alberta_framework-0.1.0/docs/guide/streams.md +162 -0
- alberta_framework-0.1.0/docs/index.md +86 -0
- alberta_framework-0.1.0/docs/javascripts/mathjax.js +16 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/README.md +70 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/autostep_comparison.py +615 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/external_normalization_study.py +453 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/idbd_lms_autostep_comparison.py +511 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/normalization_study.py +266 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/sutton1992_experiment1.py +346 -0
- alberta_framework-0.1.0/examples/The Alberta Plan/Step1/sutton1992_experiment2.py +445 -0
- alberta_framework-0.1.0/examples/gymnasium_reward_prediction.py +260 -0
- alberta_framework-0.1.0/examples/publication_experiment.py +235 -0
- alberta_framework-0.1.0/examples/td_cartpole_lms.py +104 -0
- alberta_framework-0.1.0/mkdocs.yml +97 -0
- alberta_framework-0.1.0/papers/mahmood-msc-thesis-summary.md +49 -0
- alberta_framework-0.1.0/pyproject.toml +104 -0
- alberta_framework-0.1.0/src/alberta_framework/__init__.py +196 -0
- alberta_framework-0.1.0/src/alberta_framework/core/__init__.py +27 -0
- alberta_framework-0.1.0/src/alberta_framework/core/learners.py +530 -0
- alberta_framework-0.1.0/src/alberta_framework/core/normalizers.py +192 -0
- alberta_framework-0.1.0/src/alberta_framework/core/optimizers.py +422 -0
- alberta_framework-0.1.0/src/alberta_framework/core/types.py +198 -0
- alberta_framework-0.1.0/src/alberta_framework/py.typed +0 -0
- alberta_framework-0.1.0/src/alberta_framework/streams/__init__.py +83 -0
- alberta_framework-0.1.0/src/alberta_framework/streams/base.py +70 -0
- alberta_framework-0.1.0/src/alberta_framework/streams/gymnasium.py +655 -0
- alberta_framework-0.1.0/src/alberta_framework/streams/synthetic.py +995 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/__init__.py +113 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/experiments.py +334 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/export.py +509 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/metrics.py +112 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/statistics.py +527 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/timing.py +138 -0
- alberta_framework-0.1.0/src/alberta_framework/utils/visualization.py +571 -0
- alberta_framework-0.1.0/tests/conftest.py +29 -0
- alberta_framework-0.1.0/tests/test_gymnasium_streams.py +458 -0
- alberta_framework-0.1.0/tests/test_learners.py +339 -0
- alberta_framework-0.1.0/tests/test_normalizers.py +125 -0
- alberta_framework-0.1.0/tests/test_optimizers.py +214 -0
- alberta_framework-0.1.0/tests/test_streams.py +629 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python 3.13
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.13"
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: |
|
|
22
|
+
python -m pip install --upgrade pip
|
|
23
|
+
pip install -e ".[dev,gymnasium]"
|
|
24
|
+
|
|
25
|
+
- name: Run tests
|
|
26
|
+
run: pytest --cov=alberta_framework --cov-report=term-missing
|
|
27
|
+
|
|
28
|
+
lint:
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
|
|
33
|
+
- name: Set up Python 3.13
|
|
34
|
+
uses: actions/setup-python@v5
|
|
35
|
+
with:
|
|
36
|
+
python-version: "3.13"
|
|
37
|
+
|
|
38
|
+
- name: Install dependencies
|
|
39
|
+
run: |
|
|
40
|
+
python -m pip install --upgrade pip
|
|
41
|
+
pip install -e ".[dev]"
|
|
42
|
+
|
|
43
|
+
- name: Run ruff
|
|
44
|
+
run: ruff check src/
|
|
45
|
+
|
|
46
|
+
- name: Run mypy
|
|
47
|
+
run: mypy src/
|
|
48
|
+
|
|
49
|
+
docs:
|
|
50
|
+
runs-on: ubuntu-latest
|
|
51
|
+
steps:
|
|
52
|
+
- uses: actions/checkout@v4
|
|
53
|
+
|
|
54
|
+
- name: Set up Python 3.13
|
|
55
|
+
uses: actions/setup-python@v5
|
|
56
|
+
with:
|
|
57
|
+
python-version: "3.13"
|
|
58
|
+
|
|
59
|
+
- name: Install dependencies
|
|
60
|
+
run: |
|
|
61
|
+
python -m pip install --upgrade pip
|
|
62
|
+
pip install -e ".[docs]"
|
|
63
|
+
|
|
64
|
+
- name: Build docs
|
|
65
|
+
run: mkdocs build --strict
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Deploy Docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
pages: write
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
concurrency:
|
|
14
|
+
group: "pages"
|
|
15
|
+
cancel-in-progress: false
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Set up Python 3.13
|
|
24
|
+
uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: "3.13"
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: |
|
|
30
|
+
python -m pip install --upgrade pip
|
|
31
|
+
pip install -e ".[docs]"
|
|
32
|
+
|
|
33
|
+
- name: Build docs
|
|
34
|
+
run: mkdocs build --strict
|
|
35
|
+
|
|
36
|
+
- name: Upload artifact
|
|
37
|
+
uses: actions/upload-pages-artifact@v3
|
|
38
|
+
with:
|
|
39
|
+
path: site/
|
|
40
|
+
|
|
41
|
+
deploy:
|
|
42
|
+
environment:
|
|
43
|
+
name: github-pages
|
|
44
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
needs: build
|
|
47
|
+
steps:
|
|
48
|
+
- name: Deploy to GitHub Pages
|
|
49
|
+
id: deployment
|
|
50
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python 3.13
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.13"
|
|
21
|
+
|
|
22
|
+
- name: Install build dependencies
|
|
23
|
+
run: python -m pip install --upgrade pip build
|
|
24
|
+
|
|
25
|
+
- name: Build package
|
|
26
|
+
run: python -m build
|
|
27
|
+
|
|
28
|
+
- name: Upload dist artifacts
|
|
29
|
+
uses: actions/upload-artifact@v4
|
|
30
|
+
with:
|
|
31
|
+
name: dist
|
|
32
|
+
path: dist/
|
|
33
|
+
|
|
34
|
+
publish-testpypi:
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
needs: build
|
|
37
|
+
environment: testpypi
|
|
38
|
+
steps:
|
|
39
|
+
- name: Download dist artifacts
|
|
40
|
+
uses: actions/download-artifact@v4
|
|
41
|
+
with:
|
|
42
|
+
name: dist
|
|
43
|
+
path: dist/
|
|
44
|
+
|
|
45
|
+
- name: Publish to TestPyPI
|
|
46
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
47
|
+
with:
|
|
48
|
+
repository-url: https://test.pypi.org/legacy/
|
|
49
|
+
|
|
50
|
+
publish-pypi:
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
needs: publish-testpypi
|
|
53
|
+
environment: pypi
|
|
54
|
+
steps:
|
|
55
|
+
- name: Download dist artifacts
|
|
56
|
+
uses: actions/download-artifact@v4
|
|
57
|
+
with:
|
|
58
|
+
name: dist
|
|
59
|
+
path: dist/
|
|
60
|
+
|
|
61
|
+
- name: Publish to PyPI
|
|
62
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
|
|
27
|
+
# PyInstaller
|
|
28
|
+
*.manifest
|
|
29
|
+
*.spec
|
|
30
|
+
|
|
31
|
+
# Installer logs
|
|
32
|
+
pip-log.txt
|
|
33
|
+
pip-delete-this-directory.txt
|
|
34
|
+
|
|
35
|
+
# Unit test / coverage reports
|
|
36
|
+
htmlcov/
|
|
37
|
+
.tox/
|
|
38
|
+
.nox/
|
|
39
|
+
.coverage
|
|
40
|
+
.coverage.*
|
|
41
|
+
.cache
|
|
42
|
+
nosetests.xml
|
|
43
|
+
coverage.xml
|
|
44
|
+
*.cover
|
|
45
|
+
*.py,cover
|
|
46
|
+
.hypothesis/
|
|
47
|
+
.pytest_cache/
|
|
48
|
+
|
|
49
|
+
# Environments
|
|
50
|
+
.env
|
|
51
|
+
.venv
|
|
52
|
+
env/
|
|
53
|
+
venv/
|
|
54
|
+
ENV/
|
|
55
|
+
env.bak/
|
|
56
|
+
venv.bak/
|
|
57
|
+
|
|
58
|
+
# IDEs and editors
|
|
59
|
+
.idea/
|
|
60
|
+
.vscode/
|
|
61
|
+
*.swp
|
|
62
|
+
*.swo
|
|
63
|
+
*~
|
|
64
|
+
.project
|
|
65
|
+
.pydevproject
|
|
66
|
+
.settings/
|
|
67
|
+
*.sublime-project
|
|
68
|
+
*.sublime-workspace
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebook
|
|
71
|
+
.ipynb_checkpoints
|
|
72
|
+
|
|
73
|
+
# mypy
|
|
74
|
+
.mypy_cache/
|
|
75
|
+
.dmypy.json
|
|
76
|
+
dmypy.json
|
|
77
|
+
|
|
78
|
+
# Ruff
|
|
79
|
+
.ruff_cache/
|
|
80
|
+
|
|
81
|
+
# JAX compilation cache
|
|
82
|
+
__jax_cache__/
|
|
83
|
+
|
|
84
|
+
# macOS
|
|
85
|
+
.DS_Store
|
|
86
|
+
.AppleDouble
|
|
87
|
+
.LSOverride
|
|
88
|
+
|
|
89
|
+
# Thumbnails
|
|
90
|
+
._*
|
|
91
|
+
|
|
92
|
+
# Local development
|
|
93
|
+
*.local
|
|
94
|
+
.python-version
|
|
95
|
+
|
|
96
|
+
# Documentation builds
|
|
97
|
+
docs/_build/
|
|
98
|
+
site/
|
|
99
|
+
|
|
100
|
+
# Experiment outputs
|
|
101
|
+
outputs/
|
|
102
|
+
results/
|
|
103
|
+
logs/
|
|
104
|
+
*.png
|
|
105
|
+
*.pdf
|
|
106
|
+
!docs/*.png
|
|
107
|
+
!docs/*.pdf
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
<role>
|
|
2
|
+
You are a Senior Research Engineer specializing in Reinforcement Learning and Functional Programming. Your expertise is in JAX, the Alberta Plan for AI Research (Sutton et al., 2022) found in papers/The Alberta Plan.pdf, and the mathematical implementation of Temporal Difference (TD) learning.
|
|
3
|
+
</role>
|
|
4
|
+
|
|
5
|
+
<context>
|
|
6
|
+
I am building a research framework called "Alberta Framework" that can be used by researchers to implement and conduct experiments as they work on the 12 steps of the Alberta Plan. The goal is to move from linear TD(λ) with IDBD meta-learning (Step 1) to a full OaK (Option-and-Knowledge) architecture (Step 11).
|
|
7
|
+
The framework should serve both as a tool for implementing new algorithms to work on the Alberta Plan as well as reference implementations to learn from and test with.
|
|
8
|
+
Key Research Principles:
|
|
9
|
+
1. Temporal Uniformity: Every component must update at every time step.
|
|
10
|
+
2. Continual Learning: No experience replay; the agent learns from a non-stationary stream.
|
|
11
|
+
3. Functional JAX: State is immutable; updates are pure functions using jax.jit, vmap, and grad.
|
|
12
|
+
</context>
|
|
13
|
+
|
|
14
|
+
<current_milestone>
|
|
15
|
+
Step 1: Representation I.
|
|
16
|
+
Objective: Step 1 is exemplary of the primary strategy of the Alberta Plan: to focus on a particular issue by considering the simplest setting in which it arises and attempting to deal with it there, fully, before generalizing to more complex settings. The issues focussed on in Step 1 are continual learning and the meta-learning of representations. How can learning be rapid, robust, and efficient while continuing over long periods of time? How can long periods of learning be taken advantage of to meta-learn better representations, and thus to learn most efficiently? The simple setting employed in Step 1 is that of supervised learning and stochastic gradient descent with a linear function approximator with static, given features. In this setting, conventional stochastic-gradient-descent methods such as the Least-Mean-Square learning rule work reasonably well even if the problem is non-stationary. However, these methods can be significantly improved in their efficiency and robustness, and that is the purpose of Step 1. First, these methods often involve a global step-size parameter that must be set by an expert user aided by knowledge of the target outputs, the features, the number of features, and heuristics. All of that user expertise should be replaced by a meta-algorithm for setting the step-size parameter so that the same method can be used on any problem or for any part of a large problem. Second, instead of a global step-size parameter, there should be different step-size parameters for each feature depending on how much generalization should be done along that feature. If this is done, then there will be many step-size parameters to set, and it will be even more important to set them algorithmically. In this setting, the representations are the features, which are given and fixed, so it may seem surprising to offer the setting up as a way of exploring representation learning. It is true that the setting cannot be used to discover features or to search for new features, but it can be used to assess the utility of given features—an important precursor to full representation discovery. Even without changing the features it is possible to learn which features are relevant and which are not. The relevant features can be given large step-size parameters and the irrelevant features small ones; this itself is a kind of representation learning than can affect learning efficiency even without changing the features themselves. Finally, there are normalizations of the features (scalings and offsets) that can greatly affect learning efficiency without changing the representational power of the linear function approximator, and we include these in Step 1. In particular, we consider an infinite sequence of examples of desired behavior, each consisting of a real-valued input vector paired with a real-valued desired output. Let the tth example be a pair denoted (xt, yt∗). The learner seeks to find an affine mapping from each input vector xt to an output yt that closely approximates the desired output yt∗. That affine map is represented as a vector of weights wt and a scalar bias or offset term bt. That is, the output is yt .= wt>xt + bt. The objective is to minimize the squared error (yt∗ − yt)2 by learning wt and bt. Each example is independent, but the distribution from which it is generated changes over time, making the problem nonstationary. In particular, we can take the desired output as being affine in the input vector and an unknown target weight vector wt∗ that changes slowly over time, plus an additional, independent mean-zero noise signal: yt∗ =. wt∗>xt + bt∗ + ηt. The problem is non-stationary if wt∗ or bt∗ change over time or if the distribution of xt changes over time. In this simple setting, there are still essential questions that have yet to be definitively answered. We are particularly interested in questions of normalization and step-size adaptation. Without changing the expressive power of our linear learning unit or the order of its computational complexity, we can transform the individual inputs xit to produce normalized signals x ̃it =. xit−μit σti where μit and σti are non-stationary (tracking) estimates of the ith signal’s mean and standard deviation. Surprisingly, the effect of such online normalization has yet to be definitively established in the literature. We consider learning rules of the form: wi t+1 .= wi t + αi t (y∗ t − yt) x ̃i t, ∀i, (1) where each αit is a meta-learned, per-weight, step-size parameter, and bt+1 =. bt + αb t (y∗ t − bt) , (2) where αtb is another potentially-meta-learned step-size parameter. Our initial studies for Step 1 will focus on algorithms for meta-learning the step-size parameters, building on existing algorithms,11 and on demonstrating their improved robustness. The overall idea of Step 1 is to design as powerful an algorithm as possible given a fixed feature representation. It should include all the most important issues of nonstationarity in the problem (for a fixed set of linear features), including the tracking of changes in feature relevance. It should include the meta-learning of feature relevance, a challenging issue in representation learning—arguably the most challenging issue—but it does not include actually changing the set of features under consideration; that is explored in Step 2.
|
|
17
|
+
Existing algorithms include NADALINE (Sutton 1988b), IDBD (Sutton 1992a,b), Autostep (Mahmood et al. 2012), Autostep for GTD(λ) (Kearney et al. 2022), Auto (Degris in prep.), Adam (Kingma & Ba 2014), RMSprop (Tieleman & Hinton 2012), and Batch Normalization (e.g., Ioffe & Szegedy 2015).
|
|
18
|
+
</current_milestone>
|
|
19
|
+
|
|
20
|
+
<instructions>
|
|
21
|
+
1. Always maintain a 'Project Memory' of our progress through the tools reqiured for the 12 steps.
|
|
22
|
+
2. When writing code, prioritize JAX-native patterns (NamedTuples for state, pure update functions).
|
|
23
|
+
3. Before implementing a feature, suggest a 'Mathematical Plan' using the notation from the Alberta Plan papers.
|
|
24
|
+
4. Ensure all implementations are modular so that Step 3 (GVFs) and Step 8 (Models) can be integrated without breaking Step 1.
|
|
25
|
+
5. If I suggest an approach that violates 'Temporal Uniformity' or 'Continual Learning' (e.g., using a batch or replay buffer), politely correct me as per the Alberta Plan's 'Massive Retreat' discipline.
|
|
26
|
+
6. All examples and implementations of environments should be gymnasium compatible as well as all of the features of this framework.
|
|
27
|
+
</instructions>
|
|
28
|
+
|
|
29
|
+
<output_format>
|
|
30
|
+
- Use bullet points for summaries.
|
|
31
|
+
- No sycophancy; provide honest, technical critiques of architecture.
|
|
32
|
+
- Format math using LaTeX.
|
|
33
|
+
</output_format>
|
|
34
|
+
|
|
35
|
+
<ask_clarification>
|
|
36
|
+
If you understand the vision for the Alberta framework and our current focus on Step 1 Representation I: Continual supervised learning with given features, please summarize the core algorithm update equations we will be using to ensure we are aligned.
|
|
37
|
+
</ask_clarification>
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-01-19
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **Core Optimizers**: LMS (baseline), IDBD (Sutton 1992), and Autostep (Mahmood et al. 2012) with per-weight adaptive step-sizes
|
|
13
|
+
- **Linear Learners**: `LinearLearner` and `NormalizedLinearLearner` with pluggable optimizers
|
|
14
|
+
- **Scan-based Learning Loops**: JIT-compiled training with `jax.lax.scan` for efficiency
|
|
15
|
+
- **Online Normalization**: Streaming feature normalization with exponential moving averages
|
|
16
|
+
- **Experience Streams**: `RandomWalkStream`, `AbruptChangeStream`, `CyclicStream`, `SuttonExperiment1Stream`
|
|
17
|
+
- **Gymnasium Integration**: Trajectory collection and learning from Gymnasium RL environments
|
|
18
|
+
- **Step-Size Tracking**: Optional per-weight step-size history recording for meta-adaptation analysis
|
|
19
|
+
- **Multi-Seed Experiments**: `run_multi_seed_experiment` with optional parallelization via joblib
|
|
20
|
+
- **Statistical Analysis**: Pairwise comparisons, confidence intervals, effect sizes (requires scipy)
|
|
21
|
+
- **Publication Visualization**: Learning curves, bar charts, heatmaps with matplotlib
|
|
22
|
+
- **Export Utilities**: CSV, JSON, LaTeX, and Markdown table generation
|
|
23
|
+
- **Documentation**: MkDocs-based documentation with auto-generated API reference
|
|
24
|
+
|
|
25
|
+
### Notes
|
|
26
|
+
|
|
27
|
+
- Requires Python 3.13+
|
|
28
|
+
- Implements Step 1 of the Alberta Plan: demonstrating that IDBD/Autostep can match or beat hand-tuned LMS
|
|
29
|
+
- All state uses immutable NamedTuples for JAX compatibility
|
|
30
|
+
- Follows temporal uniformity principle: every component updates at every time step
|