cpomdp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cpomdp-0.1.0/.github/workflows/ci.yml +77 -0
- cpomdp-0.1.0/.github/workflows/docs.yml +43 -0
- cpomdp-0.1.0/.github/workflows/publish.yml +41 -0
- cpomdp-0.1.0/.gitignore +58 -0
- cpomdp-0.1.0/.pre-commit-config.yaml +29 -0
- cpomdp-0.1.0/CHANGELOG.md +19 -0
- cpomdp-0.1.0/CONTRIBUTING.md +59 -0
- cpomdp-0.1.0/DECISIONS.md +225 -0
- cpomdp-0.1.0/LICENSE +21 -0
- cpomdp-0.1.0/PKG-INFO +119 -0
- cpomdp-0.1.0/README.md +95 -0
- cpomdp-0.1.0/RESEARCH.md +20 -0
- cpomdp-0.1.0/docs/api/agent.md +5 -0
- cpomdp-0.1.0/docs/api/backends.md +9 -0
- cpomdp-0.1.0/docs/api/control.md +5 -0
- cpomdp-0.1.0/docs/api/model.md +7 -0
- cpomdp-0.1.0/docs/changelog.md +1 -0
- cpomdp-0.1.0/docs/index.md +85 -0
- cpomdp-0.1.0/mkdocs.yml +65 -0
- cpomdp-0.1.0/pyproject.toml +115 -0
- cpomdp-0.1.0/src/cpomdp/__init__.py +33 -0
- cpomdp-0.1.0/src/cpomdp/agent.py +171 -0
- cpomdp-0.1.0/src/cpomdp/backends/__init__.py +1 -0
- cpomdp-0.1.0/src/cpomdp/backends/base.py +106 -0
- cpomdp-0.1.0/src/cpomdp/backends/kalman.py +195 -0
- cpomdp-0.1.0/src/cpomdp/backends/rxinfer.py +127 -0
- cpomdp-0.1.0/src/cpomdp/control.py +233 -0
- cpomdp-0.1.0/src/cpomdp/juliapkg.json +9 -0
- cpomdp-0.1.0/src/cpomdp/py.typed +0 -0
- cpomdp-0.1.0/src/cpomdp/types.py +229 -0
- cpomdp-0.1.0/tests/test_agent.py +139 -0
- cpomdp-0.1.0/tests/test_base.py +43 -0
- cpomdp-0.1.0/tests/test_control.py +193 -0
- cpomdp-0.1.0/tests/test_kalman.py +226 -0
- cpomdp-0.1.0/tests/test_rxinfer.py +115 -0
- cpomdp-0.1.0/tests/test_types.py +109 -0
- cpomdp-0.1.0/uv.lock +1398 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
# Cancel superseded runs on the same ref to save CI minutes.
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
lint:
|
|
15
|
+
name: lint + types
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v6
|
|
19
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
- run: uv sync --locked
|
|
24
|
+
# Cache the pre-commit hook environments, keyed on the config.
|
|
25
|
+
- uses: actions/cache@v5
|
|
26
|
+
with:
|
|
27
|
+
path: ~/.cache/pre-commit
|
|
28
|
+
key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
|
|
29
|
+
# Run the same hooks contributors run locally (ruff lint + format + file
|
|
30
|
+
# hygiene), so CI and pre-commit can never disagree. Fails if any hook would
|
|
31
|
+
# change a file. The commit-msg hook isn't run here (no commit to check).
|
|
32
|
+
- name: pre-commit
|
|
33
|
+
run: uv run pre-commit run --all-files --show-diff-on-failure
|
|
34
|
+
- name: ty (strict types)
|
|
35
|
+
run: uv run ty check
|
|
36
|
+
|
|
37
|
+
test:
|
|
38
|
+
name: test (py${{ matrix.python-version }})
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
strategy:
|
|
41
|
+
fail-fast: false
|
|
42
|
+
matrix:
|
|
43
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
44
|
+
steps:
|
|
45
|
+
- uses: actions/checkout@v6
|
|
46
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
47
|
+
with:
|
|
48
|
+
enable-cache: true
|
|
49
|
+
python-version: ${{ matrix.python-version }}
|
|
50
|
+
- run: uv sync --locked
|
|
51
|
+
# The pure-Python suite: no Julia installed, so this also proves the
|
|
52
|
+
# rxinfer-marked tests deselect cleanly when the optional extra is absent.
|
|
53
|
+
- name: pytest + coverage gate
|
|
54
|
+
run: >-
|
|
55
|
+
uv run pytest -m "not rxinfer"
|
|
56
|
+
--cov=cpomdp --cov-report=term-missing --cov-fail-under=80
|
|
57
|
+
|
|
58
|
+
oracle:
|
|
59
|
+
name: rxinfer oracle (Julia)
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
steps:
|
|
62
|
+
- uses: actions/checkout@v6
|
|
63
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
64
|
+
with:
|
|
65
|
+
enable-cache: true
|
|
66
|
+
python-version: "3.12"
|
|
67
|
+
- run: uv sync --locked --extra rxinfer
|
|
68
|
+
# juliacall provisions its own Julia + RxInfer (per juliapkg.json) on first
|
|
69
|
+
# import; cache the depot so only the first run pays the full download/build.
|
|
70
|
+
- name: Cache Julia depot
|
|
71
|
+
uses: actions/cache@v5
|
|
72
|
+
with:
|
|
73
|
+
path: ~/.julia
|
|
74
|
+
key: julia-depot-${{ runner.os }}-${{ hashFiles('src/cpomdp/juliapkg.json') }}
|
|
75
|
+
restore-keys: julia-depot-${{ runner.os }}-
|
|
76
|
+
- name: pytest (rxinfer only)
|
|
77
|
+
run: uv run pytest -m rxinfer
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
# Let the workflow publish to GitHub Pages. One-time repo setup:
|
|
9
|
+
# Settings -> Pages -> Source = "GitHub Actions".
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
pages: write
|
|
13
|
+
id-token: write
|
|
14
|
+
|
|
15
|
+
# Never run two Pages deploys at once.
|
|
16
|
+
concurrency:
|
|
17
|
+
group: pages
|
|
18
|
+
cancel-in-progress: false
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
build:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v6
|
|
25
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
26
|
+
with:
|
|
27
|
+
enable-cache: true
|
|
28
|
+
python-version: "3.12"
|
|
29
|
+
- run: uv sync --locked --group docs
|
|
30
|
+
- run: uv run mkdocs build --strict
|
|
31
|
+
- uses: actions/upload-pages-artifact@v3
|
|
32
|
+
with:
|
|
33
|
+
path: site
|
|
34
|
+
|
|
35
|
+
deploy:
|
|
36
|
+
needs: build
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
environment:
|
|
39
|
+
name: github-pages
|
|
40
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
41
|
+
steps:
|
|
42
|
+
- id: deployment
|
|
43
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v6
|
|
15
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
16
|
+
with:
|
|
17
|
+
enable-cache: true
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
- run: uv sync --locked
|
|
20
|
+
# Don't ship a red build.
|
|
21
|
+
- run: uv run pytest -m "not rxinfer"
|
|
22
|
+
- run: uv build
|
|
23
|
+
- uses: actions/upload-artifact@v4
|
|
24
|
+
with:
|
|
25
|
+
name: dist
|
|
26
|
+
path: dist/
|
|
27
|
+
|
|
28
|
+
publish:
|
|
29
|
+
needs: build
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
environment:
|
|
32
|
+
name: pypi
|
|
33
|
+
url: https://pypi.org/p/cpomdp
|
|
34
|
+
permissions:
|
|
35
|
+
id-token: write # the OIDC token Trusted Publishing exchanges for an upload
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/download-artifact@v4
|
|
38
|
+
with:
|
|
39
|
+
name: dist
|
|
40
|
+
path: dist/
|
|
41
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
cpomdp-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# uv
|
|
19
|
+
.uv/
|
|
20
|
+
|
|
21
|
+
# Testing / coverage
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
.coverage
|
|
24
|
+
.coverage.*
|
|
25
|
+
coverage.xml
|
|
26
|
+
htmlcov/
|
|
27
|
+
.tox/
|
|
28
|
+
.nox/
|
|
29
|
+
|
|
30
|
+
# Type checking / linting
|
|
31
|
+
.mypy_cache/
|
|
32
|
+
.ruff_cache/
|
|
33
|
+
.dmypy.json
|
|
34
|
+
dmypy.json
|
|
35
|
+
|
|
36
|
+
# Jupyter
|
|
37
|
+
.ipynb_checkpoints/
|
|
38
|
+
|
|
39
|
+
# Julia / juliacall (RxInfer backend)
|
|
40
|
+
.CondaPkg/
|
|
41
|
+
*.jl.cov
|
|
42
|
+
*.jl.*.cov
|
|
43
|
+
|
|
44
|
+
# Editors / OS
|
|
45
|
+
.vscode/
|
|
46
|
+
.idea/
|
|
47
|
+
*.swp
|
|
48
|
+
.DS_Store
|
|
49
|
+
|
|
50
|
+
# Docs build output (MkDocs)
|
|
51
|
+
site/
|
|
52
|
+
|
|
53
|
+
# Project-local
|
|
54
|
+
.remember/
|
|
55
|
+
.claude/
|
|
56
|
+
|
|
57
|
+
# Tracked artifacts, remove on release.
|
|
58
|
+
spike/
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# One-time setup after cloning (installs both the pre-commit and commit-msg hooks):
|
|
2
|
+
# uv run pre-commit install --hook-type pre-commit --hook-type commit-msg
|
|
3
|
+
# The ruff hooks mirror the CI `lint` job, so a clean commit here is a green CI.
|
|
4
|
+
repos:
|
|
5
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
6
|
+
# Keep this in sync with the `ruff` pin in pyproject's dev group, so local
|
|
7
|
+
# and CI agree on exactly which ruff is judging the code.
|
|
8
|
+
rev: v0.15.16
|
|
9
|
+
hooks:
|
|
10
|
+
- id: ruff-check
|
|
11
|
+
args: [--fix]
|
|
12
|
+
- id: ruff-format
|
|
13
|
+
|
|
14
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
15
|
+
rev: v5.0.0
|
|
16
|
+
hooks:
|
|
17
|
+
- id: trailing-whitespace
|
|
18
|
+
- id: end-of-file-fixer
|
|
19
|
+
- id: check-yaml
|
|
20
|
+
- id: check-toml
|
|
21
|
+
- id: check-merge-conflict
|
|
22
|
+
|
|
23
|
+
# Enforce Conventional Commit messages (feat:, fix:, docs:, ...), matching the
|
|
24
|
+
# style already used in the git history. Runs at the commit-msg stage.
|
|
25
|
+
- repo: https://github.com/compilerla/conventional-pre-commit
|
|
26
|
+
rev: v4.4.0
|
|
27
|
+
hooks:
|
|
28
|
+
- id: conventional-pre-commit
|
|
29
|
+
stages: [commit-msg]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
Everything worth noting lands here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and versions follow [semantic versioning](https://semver.org). While we're pre-1.0, treat the minor version as the place breaking changes can show up.
|
|
4
|
+
|
|
5
|
+
## [0.1.0] — 2026-06-15
|
|
6
|
+
|
|
7
|
+
The first cut. Linear-Gaussian active inference, end to end: perceive with a Kalman filter, act with LQR, all behind a pymdp-style `Agent`.
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- `Agent`, the stateful façade you actually drive. `infer_states` to perceive, `sample_action` to act, the same loop pymdp users know. It remembers the last action it took, so you don't have to thread that back in by hand. Build it without a goal and it's a pure tracker that perceives but won't act.
|
|
12
|
+
- `LinearGaussianModel`, the generative model. Matrices are named for their role (`dynamics`, `control`, `sensor_model`, `dynamics_noise`, `sensor_noise`), with the control-theory letters (`A`/`B`/`C`/`Q`/`R`) kept as aliases for when you're reading the maths.
|
|
13
|
+
- `Belief`, an immutable Gaussian belief: a mean and a covariance, validated on the way in.
|
|
14
|
+
- `KalmanBackend`, exact Kalman filtering. Has an optional steady-state mode that solves the gain once up front and reuses it.
|
|
15
|
+
- `LQRController`, steady-state LQR action selection. For a linear-Gaussian sensor this *is* the expected-free-energy-optimal action rather than a stand-in for it (the why is in DECISIONS.md, ADR-003).
|
|
16
|
+
- `RxInferBackend`, an optional [RxInfer](https://github.com/ReactiveBayes/RxInfer.jl) (Julia) backend. It re-derives the same filtering results through completely separate machinery and exists as a correctness oracle for the native path. Lives behind the `rxinfer` extra so the core install stays Julia-free.
|
|
17
|
+
- `InferenceBackend`, the protocol the backends satisfy, so you can drop in your own engine.
|
|
18
|
+
|
|
19
|
+
This is pre-alpha. The API works and is tested against the RxInfer oracle, but it can still move before 1.0.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Contributing to cpomdp
|
|
2
|
+
|
|
3
|
+
Thanks for taking a look. Here's how to get set up and what the tooling expects.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
The project uses [uv](https://docs.astral.sh/uv/). Once you've cloned it:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv sync # install the package + dev tooling
|
|
11
|
+
uv run pre-commit install --hook-type pre-commit --hook-type commit-msg
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
That second line wires up the git hooks. You only do it once. After that the
|
|
15
|
+
checks run automatically every time you commit, so you find problems before CI
|
|
16
|
+
does rather than after.
|
|
17
|
+
|
|
18
|
+
## How the rules are enforced
|
|
19
|
+
|
|
20
|
+
There's one source of truth for style and linting: the `[tool.ruff]` section of
|
|
21
|
+
`pyproject.toml`. Editor settings aren't checked in on purpose, so nothing depends
|
|
22
|
+
on which editor you use. The config is enforced in two places that both read it:
|
|
23
|
+
|
|
24
|
+
- **pre-commit**, locally, on every commit (see `.pre-commit-config.yaml`).
|
|
25
|
+
- **CI**, on every push and PR, running the exact same hooks.
|
|
26
|
+
|
|
27
|
+
So if it's green locally, it's green in CI. If you want your editor to format on
|
|
28
|
+
save, point it at ruff yourself; just don't rely on it, the hooks are what count.
|
|
29
|
+
|
|
30
|
+
## What the hooks check
|
|
31
|
+
|
|
32
|
+
- **ruff** lints and formats the code. Line length is 88. Formatting isn't a
|
|
33
|
+
matter of taste here, ruff decides and that's that.
|
|
34
|
+
- **docstrings** are required on public modules, classes, functions and methods in
|
|
35
|
+
`src/` (Google style). Tests are exempt; their names are the documentation.
|
|
36
|
+
Constructors can be documented at the class level instead of in `__init__`.
|
|
37
|
+
- **commit messages** follow [Conventional Commits](https://www.conventionalcommits.org):
|
|
38
|
+
`feat:`, `fix:`, `docs:`, `test:`, `chore:`, and so on. The commit-msg hook will
|
|
39
|
+
bounce anything that doesn't.
|
|
40
|
+
- a few **hygiene** checks: no trailing whitespace, files end in a newline, YAML and
|
|
41
|
+
TOML parse, no leftover merge-conflict markers.
|
|
42
|
+
|
|
43
|
+
## Running things by hand
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv run pytest -m "not rxinfer" # the fast, pure-Python suite
|
|
47
|
+
uv run ty check # type checking
|
|
48
|
+
uv run pre-commit run --all-files
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The `rxinfer` tests boot a Julia runtime (the RxInfer backend is an independent
|
|
52
|
+
oracle the native filter is checked against). They're slow and need the extra:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv run --extra rxinfer pytest -m rxinfer
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
You don't need Julia for normal work. The pure-Python suite covers the core, and
|
|
59
|
+
the rxinfer job runs separately in CI.
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# Architecture Decisions
|
|
2
|
+
|
|
3
|
+
Decisions are append-only. Each records the choice, the evidence, and the date.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ADR-003 — v0.1 grows an acting agent: stateful `Agent` + front-loaded LQR
|
|
8
|
+
|
|
9
|
+
**Date:** 2026-06-14
|
|
10
|
+
**Status:** Accepted
|
|
11
|
+
**Phase:** 2 (abstraction wall) → 3 (agent assembly)
|
|
12
|
+
**Amends:** ADR-002 (reverses its "LQR/control side: deferred" scope guard)
|
|
13
|
+
|
|
14
|
+
### Decision
|
|
15
|
+
|
|
16
|
+
v0.1 ships an agent that *acts*, not just one that perceives. Two additions:
|
|
17
|
+
|
|
18
|
+
1. A stateful `Agent` façade that owns the current belief and exposes
|
|
19
|
+
`infer_states(obs, action=None)` and `sample_action()` — the continuous answer
|
|
20
|
+
to pymdp's `Agent`.
|
|
21
|
+
2. Action selection via a **front-loaded steady-state LQR** controller: solve the
|
|
22
|
+
control Riccati once at construction for `L∞`, then `u = -L∞·(mean − goal)` in
|
|
23
|
+
the loop.
|
|
24
|
+
|
|
25
|
+
ADR-002 deferred the whole control side. We're pulling it back because without it
|
|
26
|
+
the library is a Kalman filter with a nice type system, not the "continuous
|
|
27
|
+
sibling of pymdp" the README promises. pymdp's shape is perceive → evaluate → act;
|
|
28
|
+
shipping only the first verb undersells what turns out to be a small amount of
|
|
29
|
+
remaining work.
|
|
30
|
+
|
|
31
|
+
### Why LQR counts as active inference here (the load-bearing argument)
|
|
32
|
+
|
|
33
|
+
The objection to adding LQR is that we've quietly swapped active inference for
|
|
34
|
+
plain optimal control. We haven't, and the reason is specific to the
|
|
35
|
+
linear-Gaussian case.
|
|
36
|
+
|
|
37
|
+
Expected Free Energy has a pragmatic term (reach preferred observations) and an
|
|
38
|
+
epistemic term (act to reduce uncertainty). In `LinearGaussianModel` the
|
|
39
|
+
covariance recursion is **control-independent** — the same property that lets us
|
|
40
|
+
front-load `K∞`. Control shifts the mean only; it never touches the covariance. So
|
|
41
|
+
the epistemic value (expected entropy reduction `½·log(det Σ_pred / det Σ_post)`)
|
|
42
|
+
is identical for every action and falls out of the argmin. EFE-minimising action
|
|
43
|
+
selection *provably* reduces to its pragmatic term, and the pragmatic term under a
|
|
44
|
+
Gaussian preference is a quadratic cost whose optimum is LQR.
|
|
45
|
+
|
|
46
|
+
So LQR isn't a stand-in for EFE here — it's what EFE *is* when sensing doesn't
|
|
47
|
+
depend on where you are. The epistemic term only re-enters once the observation
|
|
48
|
+
model becomes state- or action-dependent (position-varying sensor precision,
|
|
49
|
+
choosing a modality), which is out of scope for a fixed linear-Gaussian sensor. We
|
|
50
|
+
record that as the seam, not a gap.
|
|
51
|
+
|
|
52
|
+
### The symmetry we're buying
|
|
53
|
+
|
|
54
|
+
Filter and controller become duals, both solved once at construction, neither
|
|
55
|
+
dependent on data:
|
|
56
|
+
|
|
57
|
+
- perception: Kalman/DARE → `K∞`, loop does `mean += K∞·prediction_error`
|
|
58
|
+
- action: control Riccati → `L∞`, loop does `u = -L∞·(mean − goal)`
|
|
59
|
+
|
|
60
|
+
Together that's LQG. The front-loading thesis (RESEARCH.md) now covers both halves
|
|
61
|
+
of the agent, not just perception.
|
|
62
|
+
|
|
63
|
+
### Interface shape
|
|
64
|
+
|
|
65
|
+
- `Agent` is stateful: it holds `belief` (the analog of pymdp's `qs`) and updates
|
|
66
|
+
it in place across `infer_states` calls. The backends stay functional/pure
|
|
67
|
+
underneath — façade for ergonomics, engine for testability.
|
|
68
|
+
- Preferences live on the `Agent`, not the model. The model is the generative
|
|
69
|
+
story; the goal and the effort trade-off are the agent's. Role-named to avoid
|
|
70
|
+
the Q/R collision (`dynamics_noise`/`sensor_noise` are already "Q"/"R", and
|
|
71
|
+
LQR's cost matrices are conventionally Q/R too): `goal`, `effort_penalty`, etc.
|
|
72
|
+
- `sample_action()` reads the current belief mean — one matrix-vector product, no
|
|
73
|
+
inference of its own.
|
|
74
|
+
|
|
75
|
+
### Scope (v0.1, updated)
|
|
76
|
+
|
|
77
|
+
- **Added:** stateful `Agent`, steady-state LQR controller (front-loaded `L∞`),
|
|
78
|
+
agent-side preferences, 2D point-mass reaching demo that closes the loop (the
|
|
79
|
+
agent chooses the action).
|
|
80
|
+
- **Still deferred:** epistemic/exploratory EFE (named seam above), receding-
|
|
81
|
+
horizon and time-varying control, nonlinear control. `CovarianceRep`, BMR — as
|
|
82
|
+
in ADR-002.
|
|
83
|
+
|
|
84
|
+
### Validation strategy
|
|
85
|
+
|
|
86
|
+
Same discipline as the filter. `L∞` is checked against an independent oracle —
|
|
87
|
+
scipy's `solve_discrete_are` (control algebraic Riccati) — so a bug in our own
|
|
88
|
+
solve can't pass silently. The reaching demo is the end-to-end acceptance test:
|
|
89
|
+
the point mass must converge to `goal` under the closed loop.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## ADR-002 — v0.1 inference engine: **native fixed-gain fast path; RxInfer as oracle + general fallback**
|
|
94
|
+
|
|
95
|
+
**Date:** 2026-06-12
|
|
96
|
+
**Status:** Accepted
|
|
97
|
+
**Phase:** 2 (the abstraction wall)
|
|
98
|
+
**Amends:** ADR-001 (does not revoke it — re-roles RxInfer rather than removing it)
|
|
99
|
+
|
|
100
|
+
### Decision
|
|
101
|
+
|
|
102
|
+
v0.1's *default* inference is a **native, front-loaded steady-state Kalman
|
|
103
|
+
filter** (Option 1 in the build plan), exposed as a backend behind the
|
|
104
|
+
`InferenceBackend` Protocol. **RxInfer (via juliacall, per ADR-001) is retained
|
|
105
|
+
as a second backend** — serving now as the *correctness oracle* and later as the
|
|
106
|
+
*general engine* for the cases the native fast path cannot handle (nonlinear,
|
|
107
|
+
non-stationary, intermittent observations, structure learning, hierarchical).
|
|
108
|
+
|
|
109
|
+
### Why this changes ADR-001's emphasis
|
|
110
|
+
|
|
111
|
+
ADR-001 made RxInfer "the engine." The front-loading analysis (RESEARCH.md) shows
|
|
112
|
+
that for the **LTI-Gaussian** v0.1 scope the inference loop reduces to a fixed-gain
|
|
113
|
+
filter so cheap that RxInfer would never run in the hot path — it would be a Julia
|
|
114
|
+
dependency carried for nothing. We arrive at the native path *not* because the
|
|
115
|
+
bridge failed (it worked, ADR-001 stands as evidence) but because front-loading
|
|
116
|
+
removes the only reason the bridge was load-bearing. The Phase-2 abstraction wall
|
|
117
|
+
is exactly what lets both coexist as swappable backends instead of a fork.
|
|
118
|
+
|
|
119
|
+
### The principle being implemented (RESEARCH.md)
|
|
120
|
+
|
|
121
|
+
**Front-load the *structure* of the computation, never the *values*.** For an LTI
|
|
122
|
+
Gaussian model the covariance/gain sequence is data-independent: solve the
|
|
123
|
+
discrete algebraic Riccati equation (DARE) **once at agent construction** to get
|
|
124
|
+
the steady-state gain `K∞`, then run a fixed-gain update in the loop. No
|
|
125
|
+
inversion, no covariance update, no O(n³) op in the hot path.
|
|
126
|
+
|
|
127
|
+
### Scope guards (resisting the doc's own scope creep)
|
|
128
|
+
|
|
129
|
+
- **In for v0.1:** `Belief` (plain covariance, scalar), `InferenceBackend`
|
|
130
|
+
Protocol, native fixed-gain backend (DARE → `K∞` + warmup), RxInfer oracle
|
|
131
|
+
backend, 2D point-mass reaching demo validated against a full per-step Kalman.
|
|
132
|
+
- **Deferred (named seams only, no impl):** `CovarianceRep` strategy/Protocol
|
|
133
|
+
(YAGNI until a 2nd representation exists — scalar is the trivial 1×1 case of
|
|
134
|
+
all three), BMR outer loop, LQR/control side.
|
|
135
|
+
- **JAX:** not adopted reflexively. v0.1 scalar fixed-gain is instant in NumPy;
|
|
136
|
+
JAX is revisited when autodiff (EFE gradients, param learning) or vmap/GPU
|
|
137
|
+
actually pays. Core stays NumPy-only until then.
|
|
138
|
+
|
|
139
|
+
### Boundaries where the native fast path is INVALID (fall back to RxInfer)
|
|
140
|
+
|
|
141
|
+
- Nonlinear models — EKF/UKF gains depend on the linearisation point → the
|
|
142
|
+
estimate → the data → gains become data-dependent → not front-loadable.
|
|
143
|
+
- Non-stationary `A,Q,R` — `K∞` goes stale; needs drift detection + re-solve.
|
|
144
|
+
- **Intermittent / irregularly-sampled / varying-`R` observations** — breaks the
|
|
145
|
+
"regular complete observations" assumption that makes `K` constant.
|
|
146
|
+
|
|
147
|
+
### Validation strategy
|
|
148
|
+
|
|
149
|
+
The native filter's posterior is checked against (a) a plain NumPy RTS
|
|
150
|
+
smoother / full per-step Kalman (analytic oracle) and (b) the RxInfer backend.
|
|
151
|
+
The Phase-0 spike (`spike/`) is re-roled from "shipping engine prototype" to
|
|
152
|
+
"oracle harness."
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## ADR-001 — Backend bridge shape: **Shape A (juliacall, in-process)**
|
|
157
|
+
|
|
158
|
+
**Date:** 2026-06-12
|
|
159
|
+
**Status:** Accepted (emphasis amended by ADR-002)
|
|
160
|
+
**Phase:** 0 (verification spike — the gate)
|
|
161
|
+
|
|
162
|
+
### Decision
|
|
163
|
+
|
|
164
|
+
cpomdp's v0.1 inference engine is **RxInfer.jl, reached in-process via `juliacall`**
|
|
165
|
+
(Shape A). Not the HTTP `RxInferClient.py` → `RxInferServer.jl` route (Shape B).
|
|
166
|
+
|
|
167
|
+
### Evidence from the spike (`spike/`, throwaway)
|
|
168
|
+
|
|
169
|
+
A scalar linear-Gaussian state-space model was the test vehicle:
|
|
170
|
+
|
|
171
|
+
xₜ = A·xₜ₋₁ + 𝒩(0,Q), yₜ = B·xₜ + 𝒩(0,R), x₀ ~ 𝒩(m0,v0)
|
|
172
|
+
|
|
173
|
+
1. **Julia-only ground truth** (`lgssm_groundtruth.jl`): RxInfer runs, posteriors
|
|
174
|
+
read out cleanly. **Validated correct** against an independent NumPy RTS
|
|
175
|
+
smoother (`rts_oracle.py`) — agreement to **5e-13** (machine precision).
|
|
176
|
+
2. **juliacall bridge** (`juliacall_driver.py`): the *same* model driven from
|
|
177
|
+
Python — NumPy array in, array out — reproduced the Julia-only posteriors to
|
|
178
|
+
**5e-13**. The bridge introduces no numerical error.
|
|
179
|
+
3. **Shape B not deeply evaluated.** The decision rule in the build plan defaults
|
|
180
|
+
to Shape A unless it proves unworkable. It held on the first real attempt, so
|
|
181
|
+
the default stands. Shape B remains a documented fallback, not a need.
|
|
182
|
+
|
|
183
|
+
### Consequences / things learned (carry into Phase 1+)
|
|
184
|
+
|
|
185
|
+
- **Toolchain that worked:** Julia **1.12.6** (via juliaup), **RxInfer v5.4.0**,
|
|
186
|
+
**juliacall 0.9.35**, on **CPython 3.14.5**. The feared Python-3.14
|
|
187
|
+
incompatibility did **not** materialise — 3.14 is fine.
|
|
188
|
+
- **juliacall needs PythonCall.jl in the active Julia project.** It's juliacall's
|
|
189
|
+
Julia-side counterpart. The real backend must ensure both PythonCall.jl and
|
|
190
|
+
RxInfer.jl are present — juliacall ships a `juliapkg.json` mechanism for
|
|
191
|
+
declaring Julia deps; cpomdp should ship its own `juliapkg.json` declaring
|
|
192
|
+
RxInfer so `pip install cpomdp[rxinfer]` auto-provisions the Julia side.
|
|
193
|
+
- **Startup cost is real but acceptable.** First-ever run paid a one-time
|
|
194
|
+
~70s (registry update + PythonCall add + precompile). Steady-state startup is
|
|
195
|
+
the `import juliacall` + `using RxInfer` load (tens of seconds, JIT warmup),
|
|
196
|
+
paid once per process — not per inference. Not prohibitive for a library used
|
|
197
|
+
in a session; worth a note in user docs.
|
|
198
|
+
- **Inference convention — SMOOTHER, not filter.** Handing RxInfer a whole
|
|
199
|
+
observation sequence at once yields the *smoothed* posterior p(xₜ|y₁..y_T)
|
|
200
|
+
(message passing flows both directions). The Phase-4 correctness oracle must
|
|
201
|
+
therefore be an **RTS smoother** (already written: `rts_oracle.py`), not a bare
|
|
202
|
+
Kalman filter. For an online agent acting in real time we will likely want the
|
|
203
|
+
*filter* instead — drive RxInfer in streaming/one-observation-at-a-time mode.
|
|
204
|
+
Decide this when building `agent.py`.
|
|
205
|
+
|
|
206
|
+
### The wall (unchanged, restated)
|
|
207
|
+
|
|
208
|
+
juliacall, PythonCall, RxInfer, and the `@model` DSL all live behind
|
|
209
|
+
`backends/base.py`'s Protocol. None of it appears in any public signature, return
|
|
210
|
+
type, exception, or docstring. Shape A vs B is an implementation detail the wall
|
|
211
|
+
makes swappable.
|
|
212
|
+
|
|
213
|
+
## On changing the matrices names
|
|
214
|
+
To explicitly name the matricices to avoid further confusion and collision within the space.
|
|
215
|
+
An example:
|
|
216
|
+
|
|
217
|
+
LinearGaussianModel(
|
|
218
|
+
dynamics=..., # A: state → next state
|
|
219
|
+
control=..., # B: action → state
|
|
220
|
+
observation=..., # C: state → observation
|
|
221
|
+
process_noise=..., # Q
|
|
222
|
+
observation_noise=...,# R
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
The letters can survive as aliases/internal attributes and definitely in the docstrings but the primary interface is role-named.
|
cpomdp-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DanBoringName
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|