audit-framework-jsonl 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ wheels/
10
+ *.egg
11
+
12
+ # Test / coverage
13
+ .pytest_cache/
14
+ .coverage
15
+ .coverage.*
16
+ htmlcov/
17
+ .tox/
18
+ .cache/
19
+
20
+ # Virtual environments
21
+ .venv/
22
+ venv/
23
+ env/
24
+ ENV/
25
+
26
+ # Tooling / SAST artifacts
27
+ *.sarif
28
+ bandit.sarif
29
+ semgrep.sarif
30
+
31
+ # Editors / OS
32
+ .idea/
33
+ .vscode/
34
+ *.swp
35
+ .DS_Store
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.4
2
+ Name: audit-framework-jsonl
3
+ Version: 0.1.0
4
+ Summary: Append-only JSONL file ExternalSink for audit-framework — the reference sink plugin.
5
+ Project-URL: Homepage, https://github.com/vanmarkic/audit-logger
6
+ Project-URL: Repository, https://github.com/vanmarkic/audit-logger
7
+ License-Expression: MIT
8
+ Keywords: audit,audit-log,jsonl,plugin,siem,sink
9
+ Requires-Python: >=3.11
10
+ Requires-Dist: audit-framework<0.2,>=0.1
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=8.0; extra == 'dev'
13
+ Description-Content-Type: text/markdown
14
+
15
+ # audit-framework-jsonl
16
+
17
+ An append-only **JSON-Lines file sink** for
18
+ [`audit-framework`](../audit-framework) — and the **reference implementation**
19
+ every other `ExternalSink` (Splunk HEC, Elasticsearch, syslog, …) can be copied
20
+ from.
21
+
22
+ It implements the `ExternalSink` port: one compact JSON object per line,
23
+ appended to a file. Writes are serialised and offloaded off the event loop, with
24
+ optional rotation by date and/or size.
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ pip install audit-framework-jsonl
30
+ ```
31
+
32
+ ## Use
33
+
34
+ ```python
35
+ from audit_framework_jsonl.sink import JsonlFileSink
36
+ from audit_framework.core.middlewares.sink_fanout import SinkFanOutMiddleware
37
+
38
+ sink = JsonlFileSink("/var/log/audit/audit.jsonl", daily=True, max_bytes=50_000_000)
39
+ pipeline.use(SinkFanOutMiddleware([sink])) # fan events out to this sink
40
+ ```
41
+
42
+ Or wire it by configuration through the registry — it advertises itself under
43
+ the `audit_framework.plugins` entry point as the `file_jsonl` external sink:
44
+
45
+ ```python
46
+ registry.discover_entrypoints() # finds file_jsonl
47
+ SinkClass = registry.get("external_sink", "file_jsonl")
48
+ sink = SinkClass("/var/log/audit/audit.jsonl")
49
+ ```
50
+
51
+ Each emitted line is `event.to_dict()` serialised compactly, e.g.:
52
+
53
+ ```json
54
+ {"actor_id":"alice","action":"DELETE","resource_type":"contract","resource_id":"c-42",...}
55
+ ```
56
+
57
+ ### Options
58
+
59
+ | Param | Effect |
60
+ |---|---|
61
+ | `name` | The `sink_name` used for per-policy sink filtering (default `"file_jsonl"`). |
62
+ | `daily` | Write to a date-stamped file `audit-YYYY-MM-DD.jsonl` (one per UTC day). |
63
+ | `max_bytes` | Roll the current file aside (`audit.<timestamp>.jsonl`) before it exceeds this size. Composes with `daily`. |
64
+ | `clock` | Injectable time source for rotation stamps (testing). |
65
+
66
+ ## Writing your own sink
67
+
68
+ `sink.py` is deliberately tiny — copy it and change four things:
69
+
70
+ 1. a stable `sink_name` (matched against `AuditPolicy.sinks`),
71
+ 2. `async emit(event, context)` — forward `event.to_dict()` to your platform (best-effort; raise on permanent failure so the pipeline records it),
72
+ 3. `async health_check()` — report reachability,
73
+ 4. a `register(registry)` that calls `registry.register("external_sink", "<name>", YourSink)` (+ an `audit_framework.plugins` entry point in `pyproject.toml`).
74
+
75
+ ## Development
76
+
77
+ ```bash
78
+ pip install -e ".[dev]"
79
+ pytest # 9 stdlib-only tests (tmp files; no infrastructure)
80
+ ```
81
+
82
+ ## License
83
+
84
+ MIT
@@ -0,0 +1,70 @@
1
+ # audit-framework-jsonl
2
+
3
+ An append-only **JSON-Lines file sink** for
4
+ [`audit-framework`](../audit-framework) — and the **reference implementation**
5
+ every other `ExternalSink` (Splunk HEC, Elasticsearch, syslog, …) can be copied
6
+ from.
7
+
8
+ It implements the `ExternalSink` port: one compact JSON object per line,
9
+ appended to a file. Writes are serialised and offloaded off the event loop, with
10
+ optional rotation by date and/or size.
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pip install audit-framework-jsonl
16
+ ```
17
+
18
+ ## Use
19
+
20
+ ```python
21
+ from audit_framework_jsonl.sink import JsonlFileSink
22
+ from audit_framework.core.middlewares.sink_fanout import SinkFanOutMiddleware
23
+
24
+ sink = JsonlFileSink("/var/log/audit/audit.jsonl", daily=True, max_bytes=50_000_000)
25
+ pipeline.use(SinkFanOutMiddleware([sink])) # fan events out to this sink
26
+ ```
27
+
28
+ Or wire it by configuration through the registry — it advertises itself under
29
+ the `audit_framework.plugins` entry point as the `file_jsonl` external sink:
30
+
31
+ ```python
32
+ registry.discover_entrypoints() # finds file_jsonl
33
+ SinkClass = registry.get("external_sink", "file_jsonl")
34
+ sink = SinkClass("/var/log/audit/audit.jsonl")
35
+ ```
36
+
37
+ Each emitted line is `event.to_dict()` serialised compactly, e.g.:
38
+
39
+ ```json
40
+ {"actor_id":"alice","action":"DELETE","resource_type":"contract","resource_id":"c-42",...}
41
+ ```
42
+
43
+ ### Options
44
+
45
+ | Param | Effect |
46
+ |---|---|
47
+ | `name` | The `sink_name` used for per-policy sink filtering (default `"file_jsonl"`). |
48
+ | `daily` | Write to a date-stamped file `audit-YYYY-MM-DD.jsonl` (one per UTC day). |
49
+ | `max_bytes` | Roll the current file aside (`audit.<timestamp>.jsonl`) before it exceeds this size. Composes with `daily`. |
50
+ | `clock` | Injectable time source for rotation stamps (testing). |
51
+
52
+ ## Writing your own sink
53
+
54
+ `sink.py` is deliberately tiny — copy it and change four things:
55
+
56
+ 1. a stable `sink_name` (matched against `AuditPolicy.sinks`),
57
+ 2. `async emit(event, context)` — forward `event.to_dict()` to your platform (best-effort; raise on permanent failure so the pipeline records it),
58
+ 3. `async health_check()` — report reachability,
59
+ 4. a `register(registry)` that calls `registry.register("external_sink", "<name>", YourSink)` (+ an `audit_framework.plugins` entry point in `pyproject.toml`).
60
+
61
+ ## Development
62
+
63
+ ```bash
64
+ pip install -e ".[dev]"
65
+ pytest # 9 stdlib-only tests (tmp files; no infrastructure)
66
+ ```
67
+
68
+ ## License
69
+
70
+ MIT
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "audit-framework-jsonl"
7
+ dynamic = ["version"]
8
+ description = "Append-only JSONL file ExternalSink for audit-framework — the reference sink plugin."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+ keywords = ["audit", "audit-log", "sink", "jsonl", "siem", "plugin"]
13
+ dependencies = ["audit-framework>=0.1,<0.2"]
14
+
15
+ [project.optional-dependencies]
16
+ dev = ["pytest>=8.0"]
17
+
18
+ [project.urls]
19
+ Homepage = "https://github.com/vanmarkic/audit-logger"
20
+ Repository = "https://github.com/vanmarkic/audit-logger"
21
+
22
+ # Discovered by audit-framework's PluginRegistry.discover_entrypoints().
23
+ [project.entry-points."audit_framework.plugins"]
24
+ file_jsonl = "audit_framework_jsonl.plugin:register"
25
+
26
+ [tool.hatch.build.targets.wheel]
27
+ packages = ["src/audit_framework_jsonl"]
28
+
29
+ [tool.pytest.ini_options]
30
+ testpaths = ["tests"]
31
+ # Resolve the sibling core package without an install during local dev.
32
+ pythonpath = ["src", "../audit-framework/src"]
33
+ python_files = ["*_test.py", "test_*.py"]
34
+
35
+ [tool.hatch.version]
36
+ source = "vcs"
37
+ raw-options = { tag_regex = "^audit-framework-jsonl-v(?P<version>.+)$", fallback_version = "0.1.0" }
@@ -0,0 +1,18 @@
1
+ """audit-framework-jsonl — append-only JSONL file sink for audit-framework.
2
+
3
+ The reference :class:`~audit_framework.core.ports.ExternalSink` implementation:
4
+ copy ``sink.py`` as the template for your own sink (Splunk HEC, Elasticsearch,
5
+ syslog, …).
6
+ """
7
+
8
+ from audit_framework_jsonl.plugin import register
9
+ from audit_framework_jsonl.sink import JsonlFileSink
10
+
11
+ import importlib.metadata as _md
12
+
13
+ try:
14
+ __version__ = _md.version("audit-framework-jsonl")
15
+ except _md.PackageNotFoundError: # running from source without an install
16
+ __version__ = "0.0.0+unknown"
17
+
18
+ __all__ = ["JsonlFileSink", "register", "__version__"]
@@ -0,0 +1,20 @@
1
+ """Plugin registration for the JSONL file sink.
2
+
3
+ The core never imports this module directly; it is reached either via the
4
+ ``audit_framework.plugins`` entry point declared in ``pyproject.toml`` (picked
5
+ up by :meth:`PluginRegistry.discover_entrypoints`) or by an explicit module
6
+ path passed to :meth:`PluginRegistry.load_from_config`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+ from audit_framework_jsonl.sink import JsonlFileSink
14
+
15
+ __all__ = ["register"]
16
+
17
+
18
+ def register(registry: Any) -> None:
19
+ """Register :class:`JsonlFileSink` as the ``file_jsonl`` external sink."""
20
+ registry.register("external_sink", "file_jsonl", JsonlFileSink)
@@ -0,0 +1,135 @@
1
+ """JsonlFileSink — an append-only JSON-Lines :class:`ExternalSink`.
2
+
3
+ This is the **reference sink implementation** for the audit-framework plugin
4
+ system: the simplest possible adapter, written so a customer can copy it as the
5
+ template for their own sink (Splunk HEC, Elasticsearch, syslog, …). The recipe
6
+ every sink follows:
7
+
8
+ 1. Expose a stable :pyattr:`sink_name` (matched against ``AuditPolicy.sinks``).
9
+ 2. Implement ``async emit(event, context)`` — forward one event, best-effort.
10
+ 3. Implement ``async health_check()`` — report whether the downstream is usable.
11
+ 4. Register the class in a ``register(registry)`` function (see ``plugin.py``).
12
+
13
+ This sink appends one compact JSON object per line to a file. Writes are
14
+ serialised with an :class:`asyncio.Lock` and offloaded with
15
+ :func:`asyncio.to_thread`, so concurrent ``emit`` calls neither interleave nor
16
+ block the event loop. Optional rotation is supported by date (one file per UTC
17
+ day) and/or by size (roll the current file aside once it would exceed a byte
18
+ threshold).
19
+
20
+ Only the standard library is used here; the ``audit_framework`` import is for
21
+ type hints and is part of this plugin's declared dependency.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import asyncio
27
+ import json
28
+ import os
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+ from typing import Callable, Optional
32
+
33
+ from audit_framework.core.models import AuditEvent, PipelineContext
34
+
35
+ __all__ = ["JsonlFileSink"]
36
+
37
+
38
+ def _utc_now() -> datetime:
39
+ return datetime.now(timezone.utc)
40
+
41
+
42
+ class JsonlFileSink:
43
+ """Appends each audit event as one JSON line to a file.
44
+
45
+ Parameters
46
+ ----------
47
+ path:
48
+ Destination file (its parent directories are created on demand).
49
+ name:
50
+ The :pyattr:`sink_name` used for per-policy sink filtering.
51
+ daily:
52
+ When True, write to a date-stamped file ``<stem>-YYYY-MM-DD<suffix>``
53
+ so each UTC day gets its own file.
54
+ max_bytes:
55
+ When set, roll the current file aside (``<stem>.<timestamp><suffix>``)
56
+ before a write that would push it past this size. Composes with
57
+ ``daily``.
58
+ clock:
59
+ Injectable time source (returns an aware :class:`datetime`); used for
60
+ rotation stamps. Defaults to ``datetime.now(timezone.utc)``.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ path: str | os.PathLike[str],
66
+ *,
67
+ name: str = "file_jsonl",
68
+ daily: bool = False,
69
+ max_bytes: Optional[int] = None,
70
+ clock: Callable[[], datetime] = _utc_now,
71
+ ) -> None:
72
+ self._base = Path(path)
73
+ self._name = name
74
+ self._daily = daily
75
+ self._max_bytes = max_bytes
76
+ self._clock = clock
77
+ self._lock = asyncio.Lock()
78
+
79
+ @property
80
+ def sink_name(self) -> str:
81
+ """Stable identifier matched against ``AuditPolicy.sinks``."""
82
+ return self._name
83
+
84
+ async def emit(self, event: AuditEvent, context: PipelineContext) -> None:
85
+ """Append ``event`` (as one JSON line) to the current target file.
86
+
87
+ Best-effort and serialised: a raised OSError propagates so the
88
+ ``SinkFanOutMiddleware`` can record the failure, but it never corrupts a
89
+ concurrent write (the lock guarantees one writer at a time).
90
+ """
91
+ line = json.dumps(
92
+ event.to_dict(), separators=(",", ":"), ensure_ascii=False, default=str
93
+ )
94
+ async with self._lock:
95
+ await asyncio.to_thread(self._write, line)
96
+
97
+ async def health_check(self) -> bool:
98
+ """Return True if the target directory exists (or can be) and is writable."""
99
+ return await asyncio.to_thread(self._check_writable)
100
+
101
+ # ----------------------------------------------------------------- #
102
+ # Blocking helpers (run inside asyncio.to_thread) #
103
+ # ----------------------------------------------------------------- #
104
+ def _write(self, line: str) -> None:
105
+ path = self._target_path()
106
+ path.parent.mkdir(parents=True, exist_ok=True)
107
+ if self._max_bytes is not None and path.exists():
108
+ projected = path.stat().st_size + len(line.encode("utf-8")) + 1
109
+ if projected > self._max_bytes:
110
+ self._rollover(path)
111
+ with path.open("a", encoding="utf-8") as fh:
112
+ fh.write(line + "\n")
113
+
114
+ def _check_writable(self) -> bool:
115
+ try:
116
+ parent = self._base.parent
117
+ parent.mkdir(parents=True, exist_ok=True)
118
+ return os.access(parent, os.W_OK)
119
+ except OSError:
120
+ return False
121
+
122
+ def _target_path(self) -> Path:
123
+ if not self._daily:
124
+ return self._base
125
+ stamp = self._clock().strftime("%Y-%m-%d")
126
+ return self._base.with_name(f"{self._base.stem}-{stamp}{self._base.suffix}")
127
+
128
+ def _rollover(self, path: Path) -> None:
129
+ stamp = self._clock().strftime("%Y%m%dT%H%M%S")
130
+ target = path.with_name(f"{path.stem}.{stamp}{path.suffix}")
131
+ suffix = 0
132
+ while target.exists(): # never clobber an existing rolled file
133
+ suffix += 1
134
+ target = path.with_name(f"{path.stem}.{stamp}.{suffix}{path.suffix}")
135
+ path.rename(target)
@@ -0,0 +1,155 @@
1
+ """Tests for JsonlFileSink — stdlib-only, no infrastructure (uses tmp_path)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ from datetime import datetime, timezone
8
+
9
+ import pytest
10
+
11
+ from audit_framework.core.models import AuditEvent, PipelineContext
12
+ from audit_framework.core.ports import ExternalSink
13
+ from audit_framework.core.plugin_registry import PluginRegistry
14
+
15
+ from audit_framework_jsonl.plugin import register
16
+ from audit_framework_jsonl.sink import JsonlFileSink
17
+
18
+
19
+ def _event(resource_id: str = "c-1", action: str = "DELETE") -> AuditEvent:
20
+ return AuditEvent(
21
+ actor_id="alice",
22
+ action=action,
23
+ resource_type="contract",
24
+ resource_id=resource_id,
25
+ timestamp="2026-06-26T00:00:00+00:00",
26
+ request_id="req-1",
27
+ changes={"amount": {"old": 1, "new": 2}},
28
+ )
29
+
30
+
31
+ def _ctx(event: AuditEvent) -> PipelineContext:
32
+ return PipelineContext(event=event)
33
+
34
+
35
+ def test_satisfies_external_sink_protocol(tmp_path) -> None:
36
+ sink = JsonlFileSink(tmp_path / "audit.jsonl")
37
+ assert isinstance(sink, ExternalSink) # structural (runtime_checkable) check
38
+ assert sink.sink_name == "file_jsonl"
39
+
40
+
41
+ def test_emit_appends_one_json_line_per_event(tmp_path) -> None:
42
+ path = tmp_path / "audit.jsonl"
43
+ sink = JsonlFileSink(path)
44
+
45
+ asyncio.run(sink.emit(_event("c-1"), _ctx(_event("c-1"))))
46
+ asyncio.run(sink.emit(_event("c-2"), _ctx(_event("c-2"))))
47
+
48
+ lines = path.read_text(encoding="utf-8").splitlines()
49
+ assert len(lines) == 2
50
+ first = json.loads(lines[0])
51
+ assert first["action"] == "DELETE"
52
+ assert first["resource_id"] == "c-1"
53
+ assert first["changes"] == {"amount": {"old": 1, "new": 2}}
54
+ assert json.loads(lines[1])["resource_id"] == "c-2"
55
+
56
+
57
+ def test_creates_parent_directories(tmp_path) -> None:
58
+ path = tmp_path / "nested" / "dir" / "audit.jsonl"
59
+ sink = JsonlFileSink(path)
60
+ ev = _event()
61
+ asyncio.run(sink.emit(ev, _ctx(ev)))
62
+ assert path.exists()
63
+
64
+
65
+ def test_daily_rotation_uses_date_stamped_file(tmp_path) -> None:
66
+ clock = lambda: datetime(2026, 6, 26, 12, 0, tzinfo=timezone.utc)
67
+ sink = JsonlFileSink(tmp_path / "audit.jsonl", daily=True, clock=clock)
68
+ ev = _event()
69
+
70
+ asyncio.run(sink.emit(ev, _ctx(ev)))
71
+
72
+ assert (tmp_path / "audit-2026-06-26.jsonl").exists()
73
+ assert not (tmp_path / "audit.jsonl").exists()
74
+
75
+
76
+ def test_size_rollover_moves_current_file_aside(tmp_path) -> None:
77
+ path = tmp_path / "audit.jsonl"
78
+ # tiny threshold so the second write triggers a rollover of the first
79
+ sink = JsonlFileSink(path, max_bytes=10)
80
+ ev = _event()
81
+
82
+ asyncio.run(sink.emit(ev, _ctx(ev))) # creates the file
83
+ asyncio.run(sink.emit(ev, _ctx(ev))) # rolls the full file aside, writes fresh
84
+
85
+ rolled = list(tmp_path.glob("audit.*.jsonl"))
86
+ assert len(rolled) == 1 # exactly one rolled-aside file
87
+ assert len(rolled[0].read_text(encoding="utf-8").splitlines()) == 1
88
+ assert len(path.read_text(encoding="utf-8").splitlines()) == 1 # fresh current file
89
+
90
+
91
+ def test_health_check_true_for_writable_dir(tmp_path) -> None:
92
+ sink = JsonlFileSink(tmp_path / "sub" / "audit.jsonl")
93
+ assert asyncio.run(sink.health_check()) is True
94
+
95
+
96
+ def test_concurrent_emits_do_not_interleave(tmp_path) -> None:
97
+ path = tmp_path / "audit.jsonl"
98
+ sink = JsonlFileSink(path)
99
+
100
+ async def emit_many() -> None:
101
+ events = [_event(f"c-{i}") for i in range(25)]
102
+ await asyncio.gather(*(sink.emit(e, _ctx(e)) for e in events))
103
+
104
+ asyncio.run(emit_many())
105
+
106
+ lines = path.read_text(encoding="utf-8").splitlines()
107
+ assert len(lines) == 25
108
+ # every line is intact JSON (no corruption) and all events are present
109
+ ids = sorted(json.loads(line)["resource_id"] for line in lines)
110
+ assert ids == sorted(f"c-{i}" for i in range(25))
111
+
112
+
113
+ def test_register_wires_sink_into_registry() -> None:
114
+ registry = PluginRegistry()
115
+ register(registry)
116
+ assert registry.get("external_sink", "file_jsonl") is JsonlFileSink
117
+ assert "file_jsonl" in registry.list_providers("external_sink")
118
+
119
+
120
+ def test_end_to_end_through_the_pipeline(tmp_path) -> None:
121
+ # Prove the reference sink composes with the real core: an event audited by
122
+ # the pipeline is fanned out to the JSONL file by SinkFanOutMiddleware.
123
+ from audit_framework.core.middlewares.audit_policy import AuditPolicyMiddleware
124
+ from audit_framework.core.middlewares.sink_fanout import SinkFanOutMiddleware
125
+ from audit_framework.core.models import AuditPolicy
126
+ from audit_framework.core.pipeline import Pipeline
127
+
128
+ class _PolicyStore:
129
+ def get_audit_policies(self):
130
+ return [AuditPolicy(name="all", match={})]
131
+
132
+ def get_broadcast_policies(self):
133
+ return []
134
+
135
+ def reload(self):
136
+ pass
137
+
138
+ path = tmp_path / "audit.jsonl"
139
+ sink = JsonlFileSink(path)
140
+ pipeline = (
141
+ Pipeline()
142
+ .use(AuditPolicyMiddleware(_PolicyStore()))
143
+ .use(SinkFanOutMiddleware([sink]))
144
+ )
145
+
146
+ ctx = asyncio.run(pipeline.execute(_event("c-9")))
147
+
148
+ assert "sink_failures" not in ctx.metadata
149
+ lines = path.read_text(encoding="utf-8").splitlines()
150
+ assert len(lines) == 1
151
+ assert json.loads(lines[0])["resource_id"] == "c-9"
152
+
153
+
154
+ if __name__ == "__main__":
155
+ raise SystemExit(pytest.main([__file__, "-v"]))