alberta-framework 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. alberta_framework-0.1.0/.github/workflows/ci.yml +65 -0
  2. alberta_framework-0.1.0/.github/workflows/docs.yml +50 -0
  3. alberta_framework-0.1.0/.github/workflows/publish.yml +62 -0
  4. alberta_framework-0.1.0/.gitignore +107 -0
  5. alberta_framework-0.1.0/ALBERTA_PLAN.md +37 -0
  6. alberta_framework-0.1.0/CHANGELOG.md +30 -0
  7. alberta_framework-0.1.0/CLAUDE.md +374 -0
  8. alberta_framework-0.1.0/LICENSE +190 -0
  9. alberta_framework-0.1.0/PKG-INFO +198 -0
  10. alberta_framework-0.1.0/README.md +154 -0
  11. alberta_framework-0.1.0/ROADMAP.md +13 -0
  12. alberta_framework-0.1.0/docs/contributing.md +109 -0
  13. alberta_framework-0.1.0/docs/gen_ref_pages.py +36 -0
  14. alberta_framework-0.1.0/docs/getting-started/installation.md +89 -0
  15. alberta_framework-0.1.0/docs/getting-started/quickstart.md +78 -0
  16. alberta_framework-0.1.0/docs/guide/concepts.md +123 -0
  17. alberta_framework-0.1.0/docs/guide/experiments.md +259 -0
  18. alberta_framework-0.1.0/docs/guide/gymnasium.md +202 -0
  19. alberta_framework-0.1.0/docs/guide/optimizers.md +139 -0
  20. alberta_framework-0.1.0/docs/guide/streams.md +162 -0
  21. alberta_framework-0.1.0/docs/index.md +86 -0
  22. alberta_framework-0.1.0/docs/javascripts/mathjax.js +16 -0
  23. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/README.md +70 -0
  24. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/autostep_comparison.py +615 -0
  25. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/external_normalization_study.py +453 -0
  26. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/idbd_lms_autostep_comparison.py +511 -0
  27. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/normalization_study.py +266 -0
  28. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/sutton1992_experiment1.py +346 -0
  29. alberta_framework-0.1.0/examples/The Alberta Plan/Step1/sutton1992_experiment2.py +445 -0
  30. alberta_framework-0.1.0/examples/gymnasium_reward_prediction.py +260 -0
  31. alberta_framework-0.1.0/examples/publication_experiment.py +235 -0
  32. alberta_framework-0.1.0/examples/td_cartpole_lms.py +104 -0
  33. alberta_framework-0.1.0/mkdocs.yml +97 -0
  34. alberta_framework-0.1.0/papers/mahmood-msc-thesis-summary.md +49 -0
  35. alberta_framework-0.1.0/pyproject.toml +104 -0
  36. alberta_framework-0.1.0/src/alberta_framework/__init__.py +196 -0
  37. alberta_framework-0.1.0/src/alberta_framework/core/__init__.py +27 -0
  38. alberta_framework-0.1.0/src/alberta_framework/core/learners.py +530 -0
  39. alberta_framework-0.1.0/src/alberta_framework/core/normalizers.py +192 -0
  40. alberta_framework-0.1.0/src/alberta_framework/core/optimizers.py +422 -0
  41. alberta_framework-0.1.0/src/alberta_framework/core/types.py +198 -0
  42. alberta_framework-0.1.0/src/alberta_framework/py.typed +0 -0
  43. alberta_framework-0.1.0/src/alberta_framework/streams/__init__.py +83 -0
  44. alberta_framework-0.1.0/src/alberta_framework/streams/base.py +70 -0
  45. alberta_framework-0.1.0/src/alberta_framework/streams/gymnasium.py +655 -0
  46. alberta_framework-0.1.0/src/alberta_framework/streams/synthetic.py +995 -0
  47. alberta_framework-0.1.0/src/alberta_framework/utils/__init__.py +113 -0
  48. alberta_framework-0.1.0/src/alberta_framework/utils/experiments.py +334 -0
  49. alberta_framework-0.1.0/src/alberta_framework/utils/export.py +509 -0
  50. alberta_framework-0.1.0/src/alberta_framework/utils/metrics.py +112 -0
  51. alberta_framework-0.1.0/src/alberta_framework/utils/statistics.py +527 -0
  52. alberta_framework-0.1.0/src/alberta_framework/utils/timing.py +138 -0
  53. alberta_framework-0.1.0/src/alberta_framework/utils/visualization.py +571 -0
  54. alberta_framework-0.1.0/tests/conftest.py +29 -0
  55. alberta_framework-0.1.0/tests/test_gymnasium_streams.py +458 -0
  56. alberta_framework-0.1.0/tests/test_learners.py +339 -0
  57. alberta_framework-0.1.0/tests/test_normalizers.py +125 -0
  58. alberta_framework-0.1.0/tests/test_optimizers.py +214 -0
  59. alberta_framework-0.1.0/tests/test_streams.py +629 -0
@@ -0,0 +1,65 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python 3.13
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.13"
19
+
20
+ - name: Install dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install -e ".[dev,gymnasium]"
24
+
25
+ - name: Run tests
26
+ run: pytest --cov=alberta_framework --cov-report=term-missing
27
+
28
+ lint:
29
+ runs-on: ubuntu-latest
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - name: Set up Python 3.13
34
+ uses: actions/setup-python@v5
35
+ with:
36
+ python-version: "3.13"
37
+
38
+ - name: Install dependencies
39
+ run: |
40
+ python -m pip install --upgrade pip
41
+ pip install -e ".[dev]"
42
+
43
+ - name: Run ruff
44
+ run: ruff check src/
45
+
46
+ - name: Run mypy
47
+ run: mypy src/
48
+
49
+ docs:
50
+ runs-on: ubuntu-latest
51
+ steps:
52
+ - uses: actions/checkout@v4
53
+
54
+ - name: Set up Python 3.13
55
+ uses: actions/setup-python@v5
56
+ with:
57
+ python-version: "3.13"
58
+
59
+ - name: Install dependencies
60
+ run: |
61
+ python -m pip install --upgrade pip
62
+ pip install -e ".[docs]"
63
+
64
+ - name: Build docs
65
+ run: mkdocs build --strict
@@ -0,0 +1,50 @@
1
+ name: Deploy Docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: "pages"
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ build:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Set up Python 3.13
24
+ uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.13"
27
+
28
+ - name: Install dependencies
29
+ run: |
30
+ python -m pip install --upgrade pip
31
+ pip install -e ".[docs]"
32
+
33
+ - name: Build docs
34
+ run: mkdocs build --strict
35
+
36
+ - name: Upload artifact
37
+ uses: actions/upload-pages-artifact@v3
38
+ with:
39
+ path: site/
40
+
41
+ deploy:
42
+ environment:
43
+ name: github-pages
44
+ url: ${{ steps.deployment.outputs.page_url }}
45
+ runs-on: ubuntu-latest
46
+ needs: build
47
+ steps:
48
+ - name: Deploy to GitHub Pages
49
+ id: deployment
50
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,62 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write
10
+
11
+ jobs:
12
+ build:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Set up Python 3.13
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.13"
21
+
22
+ - name: Install build dependencies
23
+ run: python -m pip install --upgrade pip build
24
+
25
+ - name: Build package
26
+ run: python -m build
27
+
28
+ - name: Upload dist artifacts
29
+ uses: actions/upload-artifact@v4
30
+ with:
31
+ name: dist
32
+ path: dist/
33
+
34
+ publish-testpypi:
35
+ runs-on: ubuntu-latest
36
+ needs: build
37
+ environment: testpypi
38
+ steps:
39
+ - name: Download dist artifacts
40
+ uses: actions/download-artifact@v4
41
+ with:
42
+ name: dist
43
+ path: dist/
44
+
45
+ - name: Publish to TestPyPI
46
+ uses: pypa/gh-action-pypi-publish@release/v1
47
+ with:
48
+ repository-url: https://test.pypi.org/legacy/
49
+
50
+ publish-pypi:
51
+ runs-on: ubuntu-latest
52
+ needs: publish-testpypi
53
+ environment: pypi
54
+ steps:
55
+ - name: Download dist artifacts
56
+ uses: actions/download-artifact@v4
57
+ with:
58
+ name: dist
59
+ path: dist/
60
+
61
+ - name: Publish to PyPI
62
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,107 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ *.manifest
29
+ *.spec
30
+
31
+ # Installer logs
32
+ pip-log.txt
33
+ pip-delete-this-directory.txt
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .nox/
39
+ .coverage
40
+ .coverage.*
41
+ .cache
42
+ nosetests.xml
43
+ coverage.xml
44
+ *.cover
45
+ *.py,cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Environments
50
+ .env
51
+ .venv
52
+ env/
53
+ venv/
54
+ ENV/
55
+ env.bak/
56
+ venv.bak/
57
+
58
+ # IDEs and editors
59
+ .idea/
60
+ .vscode/
61
+ *.swp
62
+ *.swo
63
+ *~
64
+ .project
65
+ .pydevproject
66
+ .settings/
67
+ *.sublime-project
68
+ *.sublime-workspace
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints
72
+
73
+ # mypy
74
+ .mypy_cache/
75
+ .dmypy.json
76
+ dmypy.json
77
+
78
+ # Ruff
79
+ .ruff_cache/
80
+
81
+ # JAX compilation cache
82
+ __jax_cache__/
83
+
84
+ # macOS
85
+ .DS_Store
86
+ .AppleDouble
87
+ .LSOverride
88
+
89
+ # Thumbnails
90
+ ._*
91
+
92
+ # Local development
93
+ *.local
94
+ .python-version
95
+
96
+ # Documentation builds
97
+ docs/_build/
98
+ site/
99
+
100
+ # Experiment outputs
101
+ outputs/
102
+ results/
103
+ logs/
104
+ *.png
105
+ *.pdf
106
+ !docs/*.png
107
+ !docs/*.pdf
@@ -0,0 +1,37 @@
1
+ <role>
2
+ You are a Senior Research Engineer specializing in Reinforcement Learning and Functional Programming. Your expertise is in JAX, the Alberta Plan for AI Research (Sutton et al., 2022) found in papers/The Alberta Plan.pdf, and the mathematical implementation of Temporal Difference (TD) learning.
3
+ </role>
4
+
5
+ <context>
6
+ I am building a research framework called "Alberta Framework" that can be used by researchers to implement and conduct experiments as they work on the 12 steps of the Alberta Plan. The goal is to move from linear TD(λ) with IDBD meta-learning (Step 1) to a full OaK (Option-and-Knowledge) architecture (Step 11).
7
+ The framework should serve both as a tool for implementing new algorithms to work on the Alberta Plan as well as reference implementations to learn from and test with.
8
+ Key Research Principles:
9
+ 1. Temporal Uniformity: Every component must update at every time step.
10
+ 2. Continual Learning: No experience replay; the agent learns from a non-stationary stream.
11
+ 3. Functional JAX: State is immutable; updates are pure functions using jax.jit, vmap, and grad.
12
+ </context>
13
+
14
+ <current_milestone>
15
+ Step 1: Representation I.
16
+ Objective: Step 1 is exemplary of the primary strategy of the Alberta Plan: to focus on a particular issue by considering the simplest setting in which it arises and attempting to deal with it there, fully, before generalizing to more complex settings. The issues focussed on in Step 1 are continual learning and the meta-learning of representations. How can learning be rapid, robust, and efficient while continuing over long periods of time? How can long periods of learning be taken advantage of to meta-learn better representations, and thus to learn most efficiently? The simple setting employed in Step 1 is that of supervised learning and stochastic gradient descent with a linear function approximator with static, given features. In this setting, conventional stochastic-gradient-descent methods such as the Least-Mean-Square learning rule work reasonably well even if the problem is non-stationary. However, these methods can be significantly improved in their efficiency and robustness, and that is the purpose of Step 1. First, these methods often involve a global step-size parameter that must be set by an expert user aided by knowledge of the target outputs, the features, the number of features, and heuristics. All of that user expertise should be replaced by a meta-algorithm for setting the step-size parameter so that the same method can be used on any problem or for any part of a large problem. Second, instead of a global step-size parameter, there should be different step-size parameters for each feature depending on how much generalization should be done along that feature. If this is done, then there will be many step-size parameters to set, and it will be even more important to set them algorithmically. In this setting, the representations are the features, which are given and fixed, so it may seem surprising to offer the setting up as a way of exploring representation learning. It is true that the setting cannot be used to discover features or to search for new features, but it can be used to assess the utility of given features—an important precursor to full representation discovery. Even without changing the features it is possible to learn which features are relevant and which are not. The relevant features can be given large step-size parameters and the irrelevant features small ones; this itself is a kind of representation learning than can affect learning efficiency even without changing the features themselves. Finally, there are normalizations of the features (scalings and offsets) that can greatly affect learning efficiency without changing the representational power of the linear function approximator, and we include these in Step 1. In particular, we consider an infinite sequence of examples of desired behavior, each consisting of a real-valued input vector paired with a real-valued desired output. Let the tth example be a pair denoted (xt, yt∗). The learner seeks to find an affine mapping from each input vector xt to an output yt that closely approximates the desired output yt∗. That affine map is represented as a vector of weights wt and a scalar bias or offset term bt. That is, the output is yt .= wt>xt + bt. The objective is to minimize the squared error (yt∗ − yt)2 by learning wt and bt. Each example is independent, but the distribution from which it is generated changes over time, making the problem nonstationary. In particular, we can take the desired output as being affine in the input vector and an unknown target weight vector wt∗ that changes slowly over time, plus an additional, independent mean-zero noise signal: yt∗ =. wt∗>xt + bt∗ + ηt. The problem is non-stationary if wt∗ or bt∗ change over time or if the distribution of xt changes over time. In this simple setting, there are still essential questions that have yet to be definitively answered. We are particularly interested in questions of normalization and step-size adaptation. Without changing the expressive power of our linear learning unit or the order of its computational complexity, we can transform the individual inputs xit to produce normalized signals x ̃it =. xit−μit σti where μit and σti are non-stationary (tracking) estimates of the ith signal’s mean and standard deviation. Surprisingly, the effect of such online normalization has yet to be definitively established in the literature. We consider learning rules of the form: wi t+1 .= wi t + αi t (y∗ t − yt) x ̃i t, ∀i, (1) where each αit is a meta-learned, per-weight, step-size parameter, and bt+1 =. bt + αb t (y∗ t − bt) , (2) where αtb is another potentially-meta-learned step-size parameter. Our initial studies for Step 1 will focus on algorithms for meta-learning the step-size parameters, building on existing algorithms,11 and on demonstrating their improved robustness. The overall idea of Step 1 is to design as powerful an algorithm as possible given a fixed feature representation. It should include all the most important issues of nonstationarity in the problem (for a fixed set of linear features), including the tracking of changes in feature relevance. It should include the meta-learning of feature relevance, a challenging issue in representation learning—arguably the most challenging issue—but it does not include actually changing the set of features under consideration; that is explored in Step 2.
17
+ Existing algorithms include NADALINE (Sutton 1988b), IDBD (Sutton 1992a,b), Autostep (Mahmood et al. 2012), Autostep for GTD(λ) (Kearney et al. 2022), Auto (Degris in prep.), Adam (Kingma & Ba 2014), RMSprop (Tieleman & Hinton 2012), and Batch Normalization (e.g., Ioffe & Szegedy 2015).
18
+ </current_milestone>
19
+
20
+ <instructions>
21
+ 1. Always maintain a 'Project Memory' of our progress through the tools reqiured for the 12 steps.
22
+ 2. When writing code, prioritize JAX-native patterns (NamedTuples for state, pure update functions).
23
+ 3. Before implementing a feature, suggest a 'Mathematical Plan' using the notation from the Alberta Plan papers.
24
+ 4. Ensure all implementations are modular so that Step 3 (GVFs) and Step 8 (Models) can be integrated without breaking Step 1.
25
+ 5. If I suggest an approach that violates 'Temporal Uniformity' or 'Continual Learning' (e.g., using a batch or replay buffer), politely correct me as per the Alberta Plan's 'Massive Retreat' discipline.
26
+ 6. All examples and implementations of environments should be gymnasium compatible as well as all of the features of this framework.
27
+ </instructions>
28
+
29
+ <output_format>
30
+ - Use bullet points for summaries.
31
+ - No sycophancy; provide honest, technical critiques of architecture.
32
+ - Format math using LaTeX.
33
+ </output_format>
34
+
35
+ <ask_clarification>
36
+ If you understand the vision for the Alberta framework and our current focus on Step 1 Representation I: Continual supervised learning with given features, please summarize the core algorithm update equations we will be using to ensure we are aligned.
37
+ </ask_clarification>
@@ -0,0 +1,30 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-01-19
9
+
10
+ ### Added
11
+
12
+ - **Core Optimizers**: LMS (baseline), IDBD (Sutton 1992), and Autostep (Mahmood et al. 2012) with per-weight adaptive step-sizes
13
+ - **Linear Learners**: `LinearLearner` and `NormalizedLinearLearner` with pluggable optimizers
14
+ - **Scan-based Learning Loops**: JIT-compiled training with `jax.lax.scan` for efficiency
15
+ - **Online Normalization**: Streaming feature normalization with exponential moving averages
16
+ - **Experience Streams**: `RandomWalkStream`, `AbruptChangeStream`, `CyclicStream`, `SuttonExperiment1Stream`
17
+ - **Gymnasium Integration**: Trajectory collection and learning from Gymnasium RL environments
18
+ - **Step-Size Tracking**: Optional per-weight step-size history recording for meta-adaptation analysis
19
+ - **Multi-Seed Experiments**: `run_multi_seed_experiment` with optional parallelization via joblib
20
+ - **Statistical Analysis**: Pairwise comparisons, confidence intervals, effect sizes (requires scipy)
21
+ - **Publication Visualization**: Learning curves, bar charts, heatmaps with matplotlib
22
+ - **Export Utilities**: CSV, JSON, LaTeX, and Markdown table generation
23
+ - **Documentation**: MkDocs-based documentation with auto-generated API reference
24
+
25
+ ### Notes
26
+
27
+ - Requires Python 3.13+
28
+ - Implements Step 1 of the Alberta Plan: demonstrating that IDBD/Autostep can match or beat hand-tuned LMS
29
+ - All state uses immutable NamedTuples for JAX compatibility
30
+ - Follows temporal uniformity principle: every component updates at every time step