carbonation 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- carbonation-0.0.1/.github/workflows/release.yml +26 -0
- carbonation-0.0.1/.github/workflows/test.yml +26 -0
- carbonation-0.0.1/.gitignore +139 -0
- carbonation-0.0.1/CLAUDE.md +110 -0
- carbonation-0.0.1/LICENSE +21 -0
- carbonation-0.0.1/PKG-INFO +354 -0
- carbonation-0.0.1/README.md +333 -0
- carbonation-0.0.1/examples/carbonation.service +33 -0
- carbonation-0.0.1/examples/config.toml +39 -0
- carbonation-0.0.1/examples/plugins/test_file_model.py +48 -0
- carbonation-0.0.1/examples/rules.toml +22 -0
- carbonation-0.0.1/pyproject.toml +50 -0
- carbonation-0.0.1/scripts/generate_fake_data.py +79 -0
- carbonation-0.0.1/src/carbonation/__init__.py +44 -0
- carbonation-0.0.1/src/carbonation/__main__.py +5 -0
- carbonation-0.0.1/src/carbonation/api.py +416 -0
- carbonation-0.0.1/src/carbonation/archive.py +140 -0
- carbonation-0.0.1/src/carbonation/cli.py +1231 -0
- carbonation-0.0.1/src/carbonation/config.py +395 -0
- carbonation-0.0.1/src/carbonation/db/__init__.py +0 -0
- carbonation-0.0.1/src/carbonation/db/engine.py +150 -0
- carbonation-0.0.1/src/carbonation/db/migrations/__init__.py +0 -0
- carbonation-0.0.1/src/carbonation/db/migrations/env.py +29 -0
- carbonation-0.0.1/src/carbonation/db/migrations/script.py.mako +26 -0
- carbonation-0.0.1/src/carbonation/db/migrations/versions/001_initial_schema.py +98 -0
- carbonation-0.0.1/src/carbonation/db/migrations/versions/__init__.py +0 -0
- carbonation-0.0.1/src/carbonation/db/models.py +223 -0
- carbonation-0.0.1/src/carbonation/db/queries.py +681 -0
- carbonation-0.0.1/src/carbonation/exceptions.py +17 -0
- carbonation-0.0.1/src/carbonation/log.py +39 -0
- carbonation-0.0.1/src/carbonation/plugins.py +128 -0
- carbonation-0.0.1/src/carbonation/reconcile.py +247 -0
- carbonation-0.0.1/src/carbonation/rules.py +199 -0
- carbonation-0.0.1/src/carbonation/service.py +848 -0
- carbonation-0.0.1/src/carbonation/watcher.py +131 -0
- carbonation-0.0.1/tests/__init__.py +0 -0
- carbonation-0.0.1/tests/conftest.py +126 -0
- carbonation-0.0.1/tests/test_api.py +278 -0
- carbonation-0.0.1/tests/test_archive.py +208 -0
- carbonation-0.0.1/tests/test_cli.py +445 -0
- carbonation-0.0.1/tests/test_config.py +302 -0
- carbonation-0.0.1/tests/test_db.py +437 -0
- carbonation-0.0.1/tests/test_integration.py +822 -0
- carbonation-0.0.1/tests/test_plugins.py +79 -0
- carbonation-0.0.1/tests/test_reconcile.py +186 -0
- carbonation-0.0.1/tests/test_rules.py +203 -0
- carbonation-0.0.1/tests/test_service.py +551 -0
- carbonation-0.0.1/tests/test_watcher.py +121 -0
- carbonation-0.0.1/uv.lock +804 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: "Publish"
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
environment:
|
|
11
|
+
name: pypi
|
|
12
|
+
permissions:
|
|
13
|
+
id-token: write
|
|
14
|
+
contents: read
|
|
15
|
+
steps:
|
|
16
|
+
- name: Checkout
|
|
17
|
+
uses: actions/checkout@v6
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v7
|
|
21
|
+
|
|
22
|
+
- name: Build
|
|
23
|
+
run: uv build
|
|
24
|
+
|
|
25
|
+
- name: Publish
|
|
26
|
+
run: uv publish
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
fail-fast: false
|
|
10
|
+
matrix:
|
|
11
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v6
|
|
15
|
+
|
|
16
|
+
- name: Install uv and set the Python version
|
|
17
|
+
uses: astral-sh/setup-uv@v7
|
|
18
|
+
with:
|
|
19
|
+
enable-cache: true
|
|
20
|
+
python-version: ${{ matrix.python-version }}
|
|
21
|
+
|
|
22
|
+
- name: Install the project
|
|
23
|
+
run: uv sync --locked --all-extras --dev
|
|
24
|
+
|
|
25
|
+
- name: Run pytest
|
|
26
|
+
run: uv run pytest tests/
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
__about__.py
|
|
30
|
+
|
|
31
|
+
# PyInstaller
|
|
32
|
+
# Usually these files are written by a python script from a template
|
|
33
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
34
|
+
*.manifest
|
|
35
|
+
*.spec
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
|
|
41
|
+
# Unit test / coverage reports
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
.nox/
|
|
45
|
+
.coverage
|
|
46
|
+
.coverage.*
|
|
47
|
+
.cache
|
|
48
|
+
nosetests.xml
|
|
49
|
+
coverage.xml
|
|
50
|
+
*.cover
|
|
51
|
+
*.py,cover
|
|
52
|
+
.hypothesis/
|
|
53
|
+
.pytest_cache/
|
|
54
|
+
|
|
55
|
+
# Translations
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
|
|
59
|
+
# Django stuff:
|
|
60
|
+
*.log
|
|
61
|
+
local_settings.py
|
|
62
|
+
db.sqlite3
|
|
63
|
+
db.sqlite3-journal
|
|
64
|
+
|
|
65
|
+
# Flask stuff:
|
|
66
|
+
instance/
|
|
67
|
+
.webassets-cache
|
|
68
|
+
|
|
69
|
+
# Scrapy stuff:
|
|
70
|
+
.scrapy
|
|
71
|
+
|
|
72
|
+
# Sphinx documentation
|
|
73
|
+
docs/_build/
|
|
74
|
+
|
|
75
|
+
# PyBuilder
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
.python-version
|
|
87
|
+
|
|
88
|
+
# pipenv
|
|
89
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
90
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
91
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
92
|
+
# install all needed dependencies.
|
|
93
|
+
#Pipfile.lock
|
|
94
|
+
|
|
95
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
96
|
+
__pypackages__/
|
|
97
|
+
|
|
98
|
+
# Celery stuff
|
|
99
|
+
celerybeat-schedule
|
|
100
|
+
celerybeat.pid
|
|
101
|
+
|
|
102
|
+
# SageMath parsed files
|
|
103
|
+
*.sage.py
|
|
104
|
+
|
|
105
|
+
# Environments
|
|
106
|
+
.env
|
|
107
|
+
.venv
|
|
108
|
+
env/
|
|
109
|
+
venv/
|
|
110
|
+
ENV/
|
|
111
|
+
env.bak/
|
|
112
|
+
venv.bak/
|
|
113
|
+
|
|
114
|
+
# Spyder project settings
|
|
115
|
+
.spyderproject
|
|
116
|
+
.spyproject
|
|
117
|
+
|
|
118
|
+
# Rope project settings
|
|
119
|
+
.ropeproject
|
|
120
|
+
|
|
121
|
+
# mkdocs documentation
|
|
122
|
+
/site
|
|
123
|
+
|
|
124
|
+
# mypy
|
|
125
|
+
.mypy_cache/
|
|
126
|
+
.dmypy.json
|
|
127
|
+
dmypy.json
|
|
128
|
+
|
|
129
|
+
# Pyre type checker
|
|
130
|
+
.pyre/
|
|
131
|
+
|
|
132
|
+
# hatch-vcs
|
|
133
|
+
_version.py
|
|
134
|
+
|
|
135
|
+
# ruff
|
|
136
|
+
.ruff_cache
|
|
137
|
+
|
|
138
|
+
# testing
|
|
139
|
+
test_data
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Carbonation
|
|
2
|
+
|
|
3
|
+
File delivery monitoring and archival service. Watches directories for incoming files, groups them by stem, validates integrity, extracts metadata via plugins, applies selection/routing rules, and archives to configurable destinations.
|
|
4
|
+
|
|
5
|
+
## Quick Reference
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv run pytest tests/ # all tests (~24s)
|
|
9
|
+
uv run pytest tests/ -m "not integration" # unit tests only (~2s)
|
|
10
|
+
uv run pytest tests/test_integration.py # integration tests only (~22s)
|
|
11
|
+
uv run ruff check src/ tests/ # lint
|
|
12
|
+
uv run ruff check --fix src/ tests/ # lint + autofix
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Project Layout
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
src/carbonation/ # main package (src layout, built with hatchling)
|
|
19
|
+
cli.py # Click CLI: check-config, db init/status, run, reconcile
|
|
20
|
+
config.py # Pydantic models + TOML loading for config.toml and rules.toml
|
|
21
|
+
service.py # Main orchestrator: watcher -> rules -> archive pipeline
|
|
22
|
+
watcher.py # Watchdog integration with settle-timer debouncing
|
|
23
|
+
rules.py # Integrity, completeness, selection, routing evaluation
|
|
24
|
+
archive.py # File copy/move/hardlink/symlink with permissions
|
|
25
|
+
reconcile.py # Sync delivery/archive directories with DB
|
|
26
|
+
plugins.py # Plugin registry: loads .py files, indexes callables
|
|
27
|
+
log.py # Loguru configuration
|
|
28
|
+
exceptions.py # CarbonationError hierarchy
|
|
29
|
+
db/
|
|
30
|
+
models.py # SQLAlchemy ORM: FileComponent, FileBase (abstract)
|
|
31
|
+
queries.py # All DB operations (no auto-commit; callers commit)
|
|
32
|
+
engine.py # Engine/session factory, schema init/verify
|
|
33
|
+
examples/ # Example config.toml, rules.toml, plugin
|
|
34
|
+
scripts/ # generate_fake_data.py for test data
|
|
35
|
+
tests/ # pytest suite
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Architecture
|
|
39
|
+
|
|
40
|
+
**Processing pipeline** (in `service.py:_process_event_inner`):
|
|
41
|
+
1. Record component (insert-first, handle IntegrityError for races)
|
|
42
|
+
2. Integrity checks (min_size, readability)
|
|
43
|
+
3. Completeness grouping (stem-based or plugin-based, with group locking)
|
|
44
|
+
4. Metadata extraction (plugin `extract()` function)
|
|
45
|
+
5. Selection rules (OR logic, has_keys / field matches / plugin)
|
|
46
|
+
6. Routing (glob or plugin, first-match-wins)
|
|
47
|
+
7. Archive (copy/move/hardlink/symlink with configurable permissions)
|
|
48
|
+
|
|
49
|
+
**Threading model**: Main thread runs event loop, ThreadPoolExecutor processes events, per-group locks (WeakValueDictionary) protect completeness checks, periodic Timer for reconciliation + completeness timeouts.
|
|
50
|
+
|
|
51
|
+
**Database**: Commits happen at caller boundaries, not in individual query functions. `_process_event` commits once after the full pipeline. `reconcile_delivery`/`reconcile_archive` commit at the end.
|
|
52
|
+
|
|
53
|
+
**Plugin system**: Plugins are `.py` files loaded from a configured directory. Must provide a `FileBase` subclass with `__tablename__ = "files"` for the File model, and an `extract(file_paths) -> dict` function for metadata. Domain columns on the File model must be nullable (populated after group creation).
|
|
54
|
+
|
|
55
|
+
## Testing
|
|
56
|
+
|
|
57
|
+
- **Unit tests** (`test_*.py` except `test_integration.py`): Fast, in-memory SQLite, no filesystem watchers
|
|
58
|
+
- **Integration tests** (`test_integration.py`): Start real service in background thread, write files to disk, verify archive + DB state. Marked `@pytest.mark.integration`.
|
|
59
|
+
- **conftest.py** registers a `_TestFile(FileBase)` model at module level with both unit test fields (`label`) and integration test fields (`start`, `stop`, `category`, `source`). Integration test plugins must NOT define their own FileBase subclass — only provide `extract()`.
|
|
60
|
+
- Permission tests (`test_archive.py`) are skipped on Windows (`@pytest.mark.skipif(sys.platform == "win32")`)
|
|
61
|
+
|
|
62
|
+
## Configuration
|
|
63
|
+
|
|
64
|
+
Two TOML files:
|
|
65
|
+
- **config.toml**: service settings, logging, database, plugins, watch directories, archive destinations
|
|
66
|
+
- **rules.toml**: integrity checks, completeness grouping, metadata module, selection rules, routing rules
|
|
67
|
+
|
|
68
|
+
Paths in config.toml are resolved relative to the config file's directory. `archive.destination` is resolved relative to `archive.base_path` when set.
|
|
69
|
+
|
|
70
|
+
**Hot-reload**: The service watches `rules.toml` for changes (debounced 0.5s). On modification it validates the new file, checks that all watch configs still reference valid rule sets, and atomically swaps `self.rules_config`. Invalid files or missing rule sets are rejected with a log warning — the service keeps running with the previous rules.
|
|
71
|
+
|
|
72
|
+
**Database credentials**: `DatabaseConfig` uses structured fields (`drivername`, `host`, `port`, `username`, `password`, `name`) instead of a raw URL string. The `url` property builds the SQLAlchemy URL via `URL.create()`. Credentials are layered: config.toml values take priority over `~/.config/carbonation/secrets.toml`. Secrets file uses the same `[database]` section format. SQLite `name` paths are resolved relative to the config file.
|
|
73
|
+
|
|
74
|
+
## Reliability
|
|
75
|
+
|
|
76
|
+
- **Retry logic**: Archive failures mark components `RETRY_PENDING` instead of `FAILED`. Periodic checker re-enqueues them after `retry_delay`. After `max_retries` attempts, permanently marked `FAILED`. Manual retry via `carbonation retry`.
|
|
77
|
+
- **Post-archive verification**: File size compared after copy/move. Mismatch deletes the corrupt copy and raises an error (triggers retry).
|
|
78
|
+
- **Watch liveness**: Each reconciliation cycle verifies watch directories are present and readable.
|
|
79
|
+
|
|
80
|
+
## Observability
|
|
81
|
+
|
|
82
|
+
- **Heartbeat**: `service_state` table updated on its own timer (`heartbeat_interval`, default `60s`) with `last_heartbeat`, `queue_depth`. Also pings the systemd watchdog (`WATCHDOG=1`) each cycle.
|
|
83
|
+
- **`carbonation status`**: Shows heartbeat age, component counts, watch directory health.
|
|
84
|
+
- **Stats logging**: Periodic summary logged each heartbeat: `Stats: archived=42, failed=2, queue_depth=0`.
|
|
85
|
+
- **Structured logging**: Set `format = "json"` in `[logging]` config for JSON lines output (loguru `serialize=True`).
|
|
86
|
+
- **`carbonation check-rules`**: Dry-run a candidate rules file against existing DB groups to preview selection/routing changes.
|
|
87
|
+
|
|
88
|
+
## CLI Commands
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
carbonation check-config # validate config + rules
|
|
92
|
+
carbonation db init # create database schema
|
|
93
|
+
carbonation db status # component counts, metadata check
|
|
94
|
+
carbonation run [--dry-run] [--once] # start the service
|
|
95
|
+
carbonation status # heartbeat, counts, watch health
|
|
96
|
+
carbonation retry [--watch-name X] # re-enqueue failed components
|
|
97
|
+
carbonation check-rules FILE # dry-run rules changes
|
|
98
|
+
carbonation reconcile delivery # sync delivery dirs with DB
|
|
99
|
+
carbonation reconcile archive # sync archive dirs with DB
|
|
100
|
+
carbonation query files [filters] # dynamic query with plugin columns
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Common Patterns
|
|
104
|
+
|
|
105
|
+
- All config models are Pydantic with field validators
|
|
106
|
+
- Duration strings: `"5s"`, `"1m"`, `"24h"`, `"7d"` (parsed by `config.parse_duration`)
|
|
107
|
+
- File permissions: `file_mode`/`dir_mode` as octal ints (default `0o440`/`0o550`), only applied on copy/move (not hardlink/symlink)
|
|
108
|
+
- `signal.signal()` is guarded for non-main-thread usage (needed for test harness)
|
|
109
|
+
- DB commits happen at caller boundaries, not in query functions
|
|
110
|
+
- Retry count tracked per-component; periodic checker increments on re-enqueue
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jonathan Olsten
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: carbonation
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: File delivery monitoring and archival service
|
|
5
|
+
Author-email: Jonathan Olsten <jonathan.olsten@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: alembic<2,>=1.13
|
|
9
|
+
Requires-Dist: click>=8.1
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: pydantic<3,>=2.10
|
|
12
|
+
Requires-Dist: rich>=14.3.3
|
|
13
|
+
Requires-Dist: sdnotify<1,>=0.3
|
|
14
|
+
Requires-Dist: sqlalchemy<3,>=2.0.48
|
|
15
|
+
Requires-Dist: watchdog>=6.0.0
|
|
16
|
+
Provides-Extra: mariadb
|
|
17
|
+
Requires-Dist: mysqlclient<3,>=2.2; extra == 'mariadb'
|
|
18
|
+
Provides-Extra: postgresql
|
|
19
|
+
Requires-Dist: psycopg2<3,>=2.9; extra == 'postgresql'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Carbonation
|
|
23
|
+
|
|
24
|
+
File delivery monitoring and archival service. Watches directories for incoming files, groups them by stem, validates integrity, extracts metadata via plugins, applies selection and routing rules, and archives to configurable destinations.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Requires Python 3.11+.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install carbonation
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
For MariaDB or PostgreSQL support:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install "carbonation[mariadb]" # mysqlclient
|
|
38
|
+
pip install "carbonation[postgresql]" # psycopg2
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# 1. Scaffold a new project
|
|
45
|
+
mkdir /opt/carbonation && cd /opt/carbonation
|
|
46
|
+
carbonation init
|
|
47
|
+
|
|
48
|
+
# 2. Edit the generated files
|
|
49
|
+
$EDITOR config.toml # set watch paths, archive destination, database
|
|
50
|
+
$EDITOR rules.toml # configure integrity, completeness, selection rules
|
|
51
|
+
$EDITOR plugins/file_model.py # define your File model columns and extract()
|
|
52
|
+
|
|
53
|
+
# 3. Validate, initialize, and run
|
|
54
|
+
carbonation check-config # validate configuration
|
|
55
|
+
carbonation db init # create database tables
|
|
56
|
+
carbonation run # start the service
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`carbonation init` generates a complete starter project:
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
config.toml # service, logging, database, watch, and archive config
|
|
63
|
+
rules.toml # integrity, completeness, metadata, selection, routing
|
|
64
|
+
plugins/file_model.py # File model + extract() function template
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Edit `plugins/file_model.py` to define the domain-specific columns for your use case and the `extract()` function that reads metadata from your files. See [Plugin System](#plugin-system) for details.
|
|
68
|
+
|
|
69
|
+
## CLI Reference
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
carbonation check-config Validate config.toml and rules.toml
|
|
73
|
+
carbonation db init Create schema (migrations + plugin columns)
|
|
74
|
+
carbonation db upgrade Apply pending schema migrations only
|
|
75
|
+
carbonation db status Component counts and metadata check
|
|
76
|
+
carbonation run [--dry-run] [--once] Start the service
|
|
77
|
+
carbonation status Heartbeat, status table, watch health
|
|
78
|
+
carbonation retry [--watch-name X] Re-enqueue failed components
|
|
79
|
+
carbonation check-rules FILE Dry-run rules changes against DB
|
|
80
|
+
carbonation reconcile delivery Sync delivery directories with DB
|
|
81
|
+
carbonation reconcile archive Sync archive directories with DB
|
|
82
|
+
carbonation query files [filters] Query with dynamic plugin filters
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Query Examples
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# Filter by plugin columns (dynamically generated from your model)
|
|
89
|
+
carbonation query files --category alpha --limit 50
|
|
90
|
+
|
|
91
|
+
# Date range overlap
|
|
92
|
+
carbonation query files --daterange 2026-01-01/2026-02-01
|
|
93
|
+
|
|
94
|
+
# Recent files by age
|
|
95
|
+
carbonation query files --age 7d
|
|
96
|
+
|
|
97
|
+
# Output formats
|
|
98
|
+
carbonation query files --category alpha --format json
|
|
99
|
+
carbonation query files --format csv > export.csv
|
|
100
|
+
|
|
101
|
+
# Count only
|
|
102
|
+
carbonation query files --category alpha --count
|
|
103
|
+
|
|
104
|
+
# Specific columns, sorted
|
|
105
|
+
carbonation query files --columns group_key,category,start --order-by -start
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Programmatic API
|
|
109
|
+
|
|
110
|
+
Query the database from Python scripts without going through the CLI:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from carbonation import connect
|
|
114
|
+
|
|
115
|
+
with connect("config.toml") as db:
|
|
116
|
+
# All files from a watch
|
|
117
|
+
rows = db.query_files({"watch_name": ["incoming"]}, limit=50)
|
|
118
|
+
for row in rows:
|
|
119
|
+
print(row["group_key"], row["created_at"])
|
|
120
|
+
|
|
121
|
+
# Count
|
|
122
|
+
n = db.query_files({"complete": True}, count_only=True)
|
|
123
|
+
|
|
124
|
+
# Date filtering + plugin columns
|
|
125
|
+
from datetime import datetime
|
|
126
|
+
rows = db.query_files({
|
|
127
|
+
"category": ["alpha"],
|
|
128
|
+
"created_at_after": datetime(2026, 1, 1),
|
|
129
|
+
}, order_by="-created_at", columns=["group_key", "category", "start"])
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
For one-off scripts, use the convenience function that handles setup and teardown in one call:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from carbonation import query_files
|
|
136
|
+
|
|
137
|
+
rows = query_files("config.toml", {"category": ["alpha"]}, limit=50)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Watermark-based polling
|
|
141
|
+
|
|
142
|
+
For external consumers (e.g. an orchestrator) that need to poll for newly completed files without gaps:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from carbonation import connect
|
|
146
|
+
|
|
147
|
+
cursor = None # persist this between invocations
|
|
148
|
+
with connect("config.toml") as db:
|
|
149
|
+
while True:
|
|
150
|
+
rows, cursor = db.get_new_files(cursor=cursor)
|
|
151
|
+
if not rows:
|
|
152
|
+
break
|
|
153
|
+
for row in rows:
|
|
154
|
+
print(row["group_key"], row["completed_at"])
|
|
155
|
+
|
|
156
|
+
# Check if the service is alive
|
|
157
|
+
status = db.get_service_status()
|
|
158
|
+
if status:
|
|
159
|
+
print(f"Last heartbeat: {status['last_heartbeat']}")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### ORM access
|
|
163
|
+
|
|
164
|
+
For direct ORM access to query, modify, and commit changes:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from carbonation import session
|
|
168
|
+
from carbonation.db.models import FileComponent, FileStatus
|
|
169
|
+
|
|
170
|
+
with session("config.toml") as s:
|
|
171
|
+
comps = s.query(FileComponent).filter_by(status=FileStatus.ARCHIVED).all()
|
|
172
|
+
for c in comps:
|
|
173
|
+
c.status = FileStatus.CLEARED
|
|
174
|
+
s.commit()
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Filter operators
|
|
178
|
+
|
|
179
|
+
| Suffix | Operator | Example |
|
|
180
|
+
|---|---|---|
|
|
181
|
+
| `_after` / `_before` | `>=` / `<=` (datetime) | `{"created_at_after": datetime(2026, 1, 1)}` |
|
|
182
|
+
| `_min` / `_max` | `>=` / `<=` (numeric) | `{"id_min": 100}` |
|
|
183
|
+
| `_daterange` | start/stop overlap | `{"_daterange": (start, end)}` |
|
|
184
|
+
| `_age` | recency | `{"_age": datetime(2026, 3, 20)}` |
|
|
185
|
+
| (none) | exact / IN / LIKE | `{"category": ["alpha", "beta"]}` |
|
|
186
|
+
|
|
187
|
+
## Architecture
|
|
188
|
+
|
|
189
|
+
### Processing Pipeline
|
|
190
|
+
|
|
191
|
+
```mermaid
|
|
192
|
+
flowchart TD
|
|
193
|
+
A["File arrives in<br>watch directory"] --> B["Watchdog detects<br>creation / modification"]
|
|
194
|
+
B --> C["Settle timer<br>debounces"]
|
|
195
|
+
C --> D["Event enqueued"]
|
|
196
|
+
D --> E["Worker thread<br>picks up event"]
|
|
197
|
+
|
|
198
|
+
E --> S1["1 — Record component"]
|
|
199
|
+
S1 --> IC{{"2 — Integrity checks<br>(min_size, readability)"}}
|
|
200
|
+
|
|
201
|
+
IC -- fail --> IF["INTEGRITY_FAILED"]
|
|
202
|
+
IC -- pass --> S3{{"3 — Completeness<br>grouping"}}
|
|
203
|
+
|
|
204
|
+
S3 -- "incomplete<br>(waiting for extensions)" --> W["WAITING"]
|
|
205
|
+
S3 -- "timeout<br>(on_timeout=skip)" --> TO["TIMED_OUT"]
|
|
206
|
+
S3 -- "complete / standalone" --> S4["4 — Metadata extraction<br>(plugin extract)"]
|
|
207
|
+
|
|
208
|
+
S4 --> S5{{"5 — Selection rules"}}
|
|
209
|
+
S5 -- rejected --> NS["NOT_SELECTED"]
|
|
210
|
+
S5 -- accepted --> S6["6 — Routing<br>(choose archive)"]
|
|
211
|
+
|
|
212
|
+
S6 --> S7{{"7 — Archive<br>(copy / move / hardlink / symlink)"}}
|
|
213
|
+
S7 -- success --> AR["ARCHIVED"]
|
|
214
|
+
S7 -- "failure<br>(retries left)" --> RP["RETRY_PENDING"]
|
|
215
|
+
S7 -- "failure<br>(retries exhausted)" --> FA["FAILED"]
|
|
216
|
+
|
|
217
|
+
RP -. "retry_delay<br>elapsed" .-> E
|
|
218
|
+
IF -. "reconciliation<br>(re-assessed)" .-> E
|
|
219
|
+
NS -. "rules hot-reload<br>(re-evaluated)" .-> E
|
|
220
|
+
|
|
221
|
+
style IF fill:#d32f2f,color:#fff
|
|
222
|
+
style FA fill:#d32f2f,color:#fff
|
|
223
|
+
style TO fill:#757575,color:#fff
|
|
224
|
+
style NS fill:#757575,color:#fff
|
|
225
|
+
style W fill:#f9a825
|
|
226
|
+
style RP fill:#f57c00,color:#fff
|
|
227
|
+
style AR fill:#388e3c,color:#fff
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Reliability
|
|
231
|
+
|
|
232
|
+
- **Retry with backoff**: Archive failures are retried up to `max_retries` times with `retry_delay` between attempts. Only permanently failed after all retries exhausted.
|
|
233
|
+
- **Post-archive verification**: File size verified after copy/move. Mismatched files are deleted and retried.
|
|
234
|
+
- **Reconciliation**: Periodic sync between filesystem and database catches missed files, clears removed files, re-enqueues pending work.
|
|
235
|
+
- **Rules hot-reload**: The service watches `rules.toml` for changes and validates before applying. Invalid changes are rejected without disruption.
|
|
236
|
+
|
|
237
|
+
### Plugin System
|
|
238
|
+
|
|
239
|
+
Plugins are `.py` files in the configured plugins directory. A plugin must provide:
|
|
240
|
+
|
|
241
|
+
- A `FileBase` subclass with `__tablename__ = "files"` defining domain-specific columns
|
|
242
|
+
- An `extract(file_paths: list[Path]) -> dict` function that returns metadata for the domain columns
|
|
243
|
+
|
|
244
|
+
Domain columns must be nullable since they are populated after the group row is created.
|
|
245
|
+
|
|
246
|
+
### Configuration
|
|
247
|
+
|
|
248
|
+
- **config.toml**: Service settings, logging, database, plugins, watch directories, archive destinations
|
|
249
|
+
- **rules.toml**: Integrity checks, completeness grouping, metadata module, selection rules, routing rules
|
|
250
|
+
|
|
251
|
+
Paths are resolved relative to the config file's directory. `archive.destination` is resolved relative to `archive.base_path`.
|
|
252
|
+
|
|
253
|
+
Rules can be hot-reloaded by editing `rules.toml` while the service is running.
|
|
254
|
+
|
|
255
|
+
## Deployment
|
|
256
|
+
|
|
257
|
+
### Setting up a production install
|
|
258
|
+
|
|
259
|
+
Create a dedicated virtualenv so the service has an isolated, reproducible Python environment:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Create the project directory and venv
|
|
263
|
+
sudo mkdir -p /opt/carbonation
|
|
264
|
+
cd /opt/carbonation
|
|
265
|
+
python3 -m venv .venv
|
|
266
|
+
|
|
267
|
+
# Install carbonation into the venv
|
|
268
|
+
.venv/bin/pip install carbonation
|
|
269
|
+
# Add database drivers if needed:
|
|
270
|
+
# .venv/bin/pip install "carbonation[mariadb]"
|
|
271
|
+
# .venv/bin/pip install "carbonation[postgresql]"
|
|
272
|
+
|
|
273
|
+
# Scaffold config, rules, and plugin template
|
|
274
|
+
.venv/bin/carbonation init
|
|
275
|
+
|
|
276
|
+
# Edit to match your environment
|
|
277
|
+
$EDITOR config.toml rules.toml plugins/file_model.py
|
|
278
|
+
|
|
279
|
+
# Validate and initialize
|
|
280
|
+
.venv/bin/carbonation check-config
|
|
281
|
+
.venv/bin/carbonation db init
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### systemd
|
|
285
|
+
|
|
286
|
+
Generate a systemd unit file during scaffolding:
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
.venv/bin/carbonation init --systemd --user carbonation
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Then install and enable it:
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
sudo cp carbonation.service /etc/systemd/system/
|
|
296
|
+
sudo systemctl daemon-reload
|
|
297
|
+
sudo systemctl enable --now carbonation
|
|
298
|
+
sudo systemctl status carbonation
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
The generated unit file uses `Type=notify` with `WatchdogSec=300` — systemd waits for carbonation to signal readiness before marking it as started, and automatically restarts it if the watchdog heartbeat stops (e.g. hung process). Edit `ReadWritePaths=` in the unit file to include your delivery and archive directories.
|
|
302
|
+
|
|
303
|
+
`WatchdogSec` should be at least 2x your `heartbeat_interval` in `config.toml` (default: `60s`).
|
|
304
|
+
|
|
305
|
+
### Cron
|
|
306
|
+
|
|
307
|
+
For environments where a persistent service is not needed, run carbonation as a cron job:
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
*/5 * * * * /opt/carbonation/.venv/bin/carbonation -c /opt/carbonation/config.toml run --once
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
`--once` reconciles delivery directories, processes all pending events, and exits. It can also supplement a running service as defense-in-depth.
|
|
314
|
+
|
|
315
|
+
### Upgrading
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
cd /opt/carbonation
|
|
319
|
+
.venv/bin/pip install --upgrade carbonation
|
|
320
|
+
.venv/bin/carbonation -c config.toml db upgrade # apply schema migrations
|
|
321
|
+
.venv/bin/carbonation -c config.toml db init # sync plugin columns
|
|
322
|
+
sudo systemctl restart carbonation
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
`db upgrade` applies pending Alembic migrations (carbonation's base tables). `db init` then adds any new plugin-defined columns. Both are idempotent.
|
|
326
|
+
|
|
327
|
+
### Monitoring
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
.venv/bin/carbonation -c /opt/carbonation/config.toml status
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
The `status` command shows:
|
|
334
|
+
- Service heartbeat age (warns if stale)
|
|
335
|
+
- Component status breakdown by time window (1h/4h/1d/7d/30d)
|
|
336
|
+
- Delivery and archive totals
|
|
337
|
+
- Retry queue depth
|
|
338
|
+
- Oldest incomplete group
|
|
339
|
+
- Watch directory health
|
|
340
|
+
|
|
341
|
+
For log aggregation, set `format = "json"` in the logging config to produce structured JSON lines.
|
|
342
|
+
|
|
343
|
+
## Development
|
|
344
|
+
|
|
345
|
+
```bash
|
|
346
|
+
uv sync # install dependencies
|
|
347
|
+
uv run pytest tests/ # all tests (~50s)
|
|
348
|
+
uv run pytest tests/ -m "not integration" # unit tests only (~2s)
|
|
349
|
+
uv run ruff check src/ tests/ # lint
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
## License
|
|
353
|
+
|
|
354
|
+
MIT
|