caducus 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caducus-0.1.0/PKG-INFO +171 -0
- caducus-0.1.0/README.md +153 -0
- caducus-0.1.0/pyproject.toml +56 -0
- caducus-0.1.0/setup.cfg +4 -0
- caducus-0.1.0/src/caducus/__init__.py +3 -0
- caducus-0.1.0/src/caducus/biblicus_adapter.py +104 -0
- caducus-0.1.0/src/caducus/cli.py +109 -0
- caducus-0.1.0/src/caducus/collectors/__init__.py +1 -0
- caducus-0.1.0/src/caducus/collectors/demo_dataset.py +83 -0
- caducus-0.1.0/src/caducus/config.py +105 -0
- caducus-0.1.0/src/caducus/events.py +45 -0
- caducus-0.1.0/src/caducus/storage.py +57 -0
- caducus-0.1.0/src/caducus.egg-info/PKG-INFO +171 -0
- caducus-0.1.0/src/caducus.egg-info/SOURCES.txt +17 -0
- caducus-0.1.0/src/caducus.egg-info/dependency_links.txt +1 -0
- caducus-0.1.0/src/caducus.egg-info/entry_points.txt +2 -0
- caducus-0.1.0/src/caducus.egg-info/requires.txt +13 -0
- caducus-0.1.0/src/caducus.egg-info/top_level.txt +1 -0
- caducus-0.1.0/tests/test_demo_slice.py +121 -0
caducus-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: caducus
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI for collecting ops events and running reinforcement-memory analysis.
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: PyYAML>=6.0
|
|
8
|
+
Requires-Dist: virtuus>=0.4.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: behave>=1.2.6; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
12
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
13
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
14
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
15
|
+
Requires-Dist: python-semantic-release>=10.5.3; extra == "dev"
|
|
16
|
+
Provides-Extra: reinforcement-memory
|
|
17
|
+
Requires-Dist: biblicus[reinforcement-memory]>=1.8.0; extra == "reinforcement-memory"
|
|
18
|
+
|
|
19
|
+
# Caducus
|
|
20
|
+
|
|
21
|
+
Caducus helps operations teams understand what is going wrong right now across logs, alerts, dead-letter queues, and other operational event streams.
|
|
22
|
+
|
|
23
|
+
It is a CLI-first system for collecting timestamped operational events, normalizing them into a canonical schema, storing them as plain JSON, and using semantic reinforcement memory to surface recurring patterns, fresh anomalies, and just-in-time context during incidents.
|
|
24
|
+
|
|
25
|
+
## Why Caducus Exists
|
|
26
|
+
|
|
27
|
+
Operational signals are scattered across many systems:
|
|
28
|
+
|
|
29
|
+
- CloudWatch logs
|
|
30
|
+
- alerting systems
|
|
31
|
+
- dead-letter queues
|
|
32
|
+
- notifications and incident messages
|
|
33
|
+
|
|
34
|
+
Each source captures part of the truth, but not the whole picture. Caducus is intended to bring those signals together into one stream of timestamped event records that can be analyzed as a living memory of operational behavior.
|
|
35
|
+
|
|
36
|
+
The goal is not just to search historical data. The goal is to create a radar for what looks unusual, active, or important now.
|
|
37
|
+
|
|
38
|
+
## How It Works
|
|
39
|
+
|
|
40
|
+
Caducus is designed around a simple flow:
|
|
41
|
+
|
|
42
|
+
1. Collect operational events from source systems.
|
|
43
|
+
2. Normalize them into canonical event records with text, timestamps, source identity, and generalized metadata.
|
|
44
|
+
3. Persist them as JSON files in a Virtuus-backed folder structure.
|
|
45
|
+
4. Analyze event groups using Biblicus reinforcement memory.
|
|
46
|
+
5. Surface patterns, anomalies, and context for operators.
|
|
47
|
+
|
|
48
|
+
This keeps the system inspectable and composable. The underlying data lives in plain folders, not inside a black-box database.
|
|
49
|
+
|
|
50
|
+
## CLI-First MVP
|
|
51
|
+
|
|
52
|
+
The initial product is a CLI utility.
|
|
53
|
+
|
|
54
|
+
The MVP is focused on a coherent end-to-end flow:
|
|
55
|
+
|
|
56
|
+
- collect events from operational sources
|
|
57
|
+
- store them in a canonical schema
|
|
58
|
+
- run analysis over selected event groups
|
|
59
|
+
- inspect recent events and analysis outputs from the command line
|
|
60
|
+
|
|
61
|
+
Initial source areas for the MVP are:
|
|
62
|
+
|
|
63
|
+
- CloudWatch Logs
|
|
64
|
+
- SQS dead-letter queues
|
|
65
|
+
- one alert source
|
|
66
|
+
|
|
67
|
+
Configuration is intended to be layered through YAML, environment variables, and CLI overrides. Caducus will own collection and orchestration while allowing Biblicus-related analysis settings to flow through the Caducus configuration tree without duplicating Biblicus's schema.
|
|
68
|
+
|
|
69
|
+
## Architecture At A Glance
|
|
70
|
+
|
|
71
|
+
Caducus is intentionally thin:
|
|
72
|
+
|
|
73
|
+
- **Caducus** handles collection, normalization, orchestration, and CLI workflows.
|
|
74
|
+
- **Virtuus** provides file-backed JSON storage and retrieval.
|
|
75
|
+
- **Biblicus** provides semantic reinforcement-memory analysis.
|
|
76
|
+
|
|
77
|
+
```mermaid
|
|
78
|
+
flowchart LR
|
|
79
|
+
sources[OpsSources] --> caducus[Caducus]
|
|
80
|
+
caducus --> events[CanonicalEvents]
|
|
81
|
+
events --> virtuus[VirtuusStorage]
|
|
82
|
+
caducus --> biblicus[BiblicusAnalysis]
|
|
83
|
+
biblicus --> radar[OpsRadar]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Running the demo
|
|
87
|
+
|
|
88
|
+
Real HDFS data uses **component-derived group IDs**: each log row’s `component` becomes `hdfs-demo:<component>` (e.g. `hdfs-demo:dfs.DataNode$DataXceiver`). You must use a group ID that exists in your ingested data.
|
|
89
|
+
|
|
90
|
+
### Quick demo (small fixture, no download)
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install -e ".[reinforcement-memory]"
|
|
94
|
+
caducus demo run --input tests/fixtures/demo_hdfs_sample.csv --group-id "hdfs-demo:DataNode" --data-dir /tmp/caducus-demo
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The fixture has components `DataNode` and `NameNode`, so valid group IDs are `hdfs-demo:DataNode` and `hdfs-demo:NameNode`.
|
|
98
|
+
|
|
99
|
+
### Full demo on real HDFS data
|
|
100
|
+
|
|
101
|
+
1. Install optional deps (Biblicus reinforcement-memory and the datasets library for the download script):
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install -e ".[reinforcement-memory]"
|
|
105
|
+
pip install datasets
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
2. Download a subset of the [HDFS_v1](https://huggingface.co/datasets/logfit-project/HDFS_v1) dataset:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
python scripts/download_hdfs_demo.py --output demo_data/hdfs_sample.csv --max-rows 10000
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
3. Ingest and list available groups (group IDs come from the CSV `component` column):
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
caducus demo ingest --input demo_data/hdfs_sample.csv --data-dir ./caducus-data
|
|
118
|
+
caducus groups --data-dir ./caducus-data
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
4. Run analysis for one of the listed group IDs:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
caducus analyze --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Or do ingest and analyze in one step (use a group ID that exists in the CSV):
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
caducus demo run --input demo_data/hdfs_sample.csv --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Releases
|
|
134
|
+
|
|
135
|
+
Caducus uses `python-semantic-release` with Conventional Commits.
|
|
136
|
+
|
|
137
|
+
Use commit messages like:
|
|
138
|
+
|
|
139
|
+
- `feat: add CloudWatch collector checkpointing`
|
|
140
|
+
- `fix: quote group IDs containing dollar signs in docs`
|
|
141
|
+
- `feat!: change canonical event schema`
|
|
142
|
+
|
|
143
|
+
Release behavior:
|
|
144
|
+
|
|
145
|
+
- `feat:` triggers a minor release
|
|
146
|
+
- `fix:` triggers a patch release
|
|
147
|
+
- `feat!:` or a `BREAKING CHANGE:` footer triggers a major release
|
|
148
|
+
|
|
149
|
+
The release workflow lives in `.github/workflows/release.yml` and runs on pushes to `main`. It will:
|
|
150
|
+
|
|
151
|
+
1. Determine the next version from commit messages.
|
|
152
|
+
2. Update `project.version` in `pyproject.toml` and `src/caducus/__init__.py`.
|
|
153
|
+
3. Generate `CHANGELOG.md`, create a tag, and create a GitHub Release.
|
|
154
|
+
4. Publish the built distributions to PyPI.
|
|
155
|
+
|
|
156
|
+
PyPI publishing is configured for GitHub Actions trusted publishing. Before the first live release, configure the `caducus` project on PyPI to trust this repository's `release.yml` workflow.
|
|
157
|
+
|
|
158
|
+
## Roadmap
|
|
159
|
+
|
|
160
|
+
Caducus is intended to grow beyond the initial CLI foundation over time.
|
|
161
|
+
|
|
162
|
+
Planned directions include:
|
|
163
|
+
|
|
164
|
+
- broader source integrations across operational systems
|
|
165
|
+
- deeper analysis of concepts and entities derived from operational activity
|
|
166
|
+
- richer incident context and root-cause workflows
|
|
167
|
+
- a future web UI and embeddable components for other applications
|
|
168
|
+
|
|
169
|
+
## Repository Direction
|
|
170
|
+
|
|
171
|
+
This repository is being built outside-in. Product definition and behavior specifications come first, followed by the minimum implementation needed to satisfy them.
|
caducus-0.1.0/README.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Caducus
|
|
2
|
+
|
|
3
|
+
Caducus helps operations teams understand what is going wrong right now across logs, alerts, dead-letter queues, and other operational event streams.
|
|
4
|
+
|
|
5
|
+
It is a CLI-first system for collecting timestamped operational events, normalizing them into a canonical schema, storing them as plain JSON, and using semantic reinforcement memory to surface recurring patterns, fresh anomalies, and just-in-time context during incidents.
|
|
6
|
+
|
|
7
|
+
## Why Caducus Exists
|
|
8
|
+
|
|
9
|
+
Operational signals are scattered across many systems:
|
|
10
|
+
|
|
11
|
+
- CloudWatch logs
|
|
12
|
+
- alerting systems
|
|
13
|
+
- dead-letter queues
|
|
14
|
+
- notifications and incident messages
|
|
15
|
+
|
|
16
|
+
Each source captures part of the truth, but not the whole picture. Caducus is intended to bring those signals together into one stream of timestamped event records that can be analyzed as a living memory of operational behavior.
|
|
17
|
+
|
|
18
|
+
The goal is not just to search historical data. The goal is to create a radar for what looks unusual, active, or important now.
|
|
19
|
+
|
|
20
|
+
## How It Works
|
|
21
|
+
|
|
22
|
+
Caducus is designed around a simple flow:
|
|
23
|
+
|
|
24
|
+
1. Collect operational events from source systems.
|
|
25
|
+
2. Normalize them into canonical event records with text, timestamps, source identity, and generalized metadata.
|
|
26
|
+
3. Persist them as JSON files in a Virtuus-backed folder structure.
|
|
27
|
+
4. Analyze event groups using Biblicus reinforcement memory.
|
|
28
|
+
5. Surface patterns, anomalies, and context for operators.
|
|
29
|
+
|
|
30
|
+
This keeps the system inspectable and composable. The underlying data lives in plain folders, not inside a black-box database.
|
|
31
|
+
|
|
32
|
+
## CLI-First MVP
|
|
33
|
+
|
|
34
|
+
The initial product is a CLI utility.
|
|
35
|
+
|
|
36
|
+
The MVP is focused on a coherent end-to-end flow:
|
|
37
|
+
|
|
38
|
+
- collect events from operational sources
|
|
39
|
+
- store them in a canonical schema
|
|
40
|
+
- run analysis over selected event groups
|
|
41
|
+
- inspect recent events and analysis outputs from the command line
|
|
42
|
+
|
|
43
|
+
Initial source areas for the MVP are:
|
|
44
|
+
|
|
45
|
+
- CloudWatch Logs
|
|
46
|
+
- SQS dead-letter queues
|
|
47
|
+
- one alert source
|
|
48
|
+
|
|
49
|
+
Configuration is intended to be layered through YAML, environment variables, and CLI overrides. Caducus will own collection and orchestration while allowing Biblicus-related analysis settings to flow through the Caducus configuration tree without duplicating Biblicus's schema.
|
|
50
|
+
|
|
51
|
+
## Architecture At A Glance
|
|
52
|
+
|
|
53
|
+
Caducus is intentionally thin:
|
|
54
|
+
|
|
55
|
+
- **Caducus** handles collection, normalization, orchestration, and CLI workflows.
|
|
56
|
+
- **Virtuus** provides file-backed JSON storage and retrieval.
|
|
57
|
+
- **Biblicus** provides semantic reinforcement-memory analysis.
|
|
58
|
+
|
|
59
|
+
```mermaid
|
|
60
|
+
flowchart LR
|
|
61
|
+
sources[OpsSources] --> caducus[Caducus]
|
|
62
|
+
caducus --> events[CanonicalEvents]
|
|
63
|
+
events --> virtuus[VirtuusStorage]
|
|
64
|
+
caducus --> biblicus[BiblicusAnalysis]
|
|
65
|
+
biblicus --> radar[OpsRadar]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Running the demo
|
|
69
|
+
|
|
70
|
+
Real HDFS data uses **component-derived group IDs**: each log row’s `component` becomes `hdfs-demo:<component>` (e.g. `hdfs-demo:dfs.DataNode$DataXceiver`). You must use a group ID that exists in your ingested data.
|
|
71
|
+
|
|
72
|
+
### Quick demo (small fixture, no download)
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pip install -e ".[reinforcement-memory]"
|
|
76
|
+
caducus demo run --input tests/fixtures/demo_hdfs_sample.csv --group-id "hdfs-demo:DataNode" --data-dir /tmp/caducus-demo
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The fixture has components `DataNode` and `NameNode`, so valid group IDs are `hdfs-demo:DataNode` and `hdfs-demo:NameNode`.
|
|
80
|
+
|
|
81
|
+
### Full demo on real HDFS data
|
|
82
|
+
|
|
83
|
+
1. Install optional deps (Biblicus reinforcement-memory and the datasets library for the download script):
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install -e ".[reinforcement-memory]"
|
|
87
|
+
pip install datasets
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
2. Download a subset of the [HDFS_v1](https://huggingface.co/datasets/logfit-project/HDFS_v1) dataset:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
python scripts/download_hdfs_demo.py --output demo_data/hdfs_sample.csv --max-rows 10000
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
3. Ingest and list available groups (group IDs come from the CSV `component` column):
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
caducus demo ingest --input demo_data/hdfs_sample.csv --data-dir ./caducus-data
|
|
100
|
+
caducus groups --data-dir ./caducus-data
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
4. Run analysis for one of the listed group IDs:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
caducus analyze --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Or do ingest and analyze in one step (use a group ID that exists in the CSV):
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
caducus demo run --input demo_data/hdfs_sample.csv --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Releases
|
|
116
|
+
|
|
117
|
+
Caducus uses `python-semantic-release` with Conventional Commits.
|
|
118
|
+
|
|
119
|
+
Use commit messages like:
|
|
120
|
+
|
|
121
|
+
- `feat: add CloudWatch collector checkpointing`
|
|
122
|
+
- `fix: quote group IDs containing dollar signs in docs`
|
|
123
|
+
- `feat!: change canonical event schema`
|
|
124
|
+
|
|
125
|
+
Release behavior:
|
|
126
|
+
|
|
127
|
+
- `feat:` triggers a minor release
|
|
128
|
+
- `fix:` triggers a patch release
|
|
129
|
+
- `feat!:` or a `BREAKING CHANGE:` footer triggers a major release
|
|
130
|
+
|
|
131
|
+
The release workflow lives in `.github/workflows/release.yml` and runs on pushes to `main`. It will:
|
|
132
|
+
|
|
133
|
+
1. Determine the next version from commit messages.
|
|
134
|
+
2. Update `project.version` in `pyproject.toml` and `src/caducus/__init__.py`.
|
|
135
|
+
3. Generate `CHANGELOG.md`, create a tag, and create a GitHub Release.
|
|
136
|
+
4. Publish the built distributions to PyPI.
|
|
137
|
+
|
|
138
|
+
PyPI publishing is configured for GitHub Actions trusted publishing. Before the first live release, configure the `caducus` project on PyPI to trust this repository's `release.yml` workflow.
|
|
139
|
+
|
|
140
|
+
## Roadmap
|
|
141
|
+
|
|
142
|
+
Caducus is intended to grow beyond the initial CLI foundation over time.
|
|
143
|
+
|
|
144
|
+
Planned directions include:
|
|
145
|
+
|
|
146
|
+
- broader source integrations across operational systems
|
|
147
|
+
- deeper analysis of concepts and entities derived from operational activity
|
|
148
|
+
- richer incident context and root-cause workflows
|
|
149
|
+
- a future web UI and embeddable components for other applications
|
|
150
|
+
|
|
151
|
+
## Repository Direction
|
|
152
|
+
|
|
153
|
+
This repository is being built outside-in. Product definition and behavior specifications come first, followed by the minimum implementation needed to satisfy them.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "caducus"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "CLI for collecting ops events and running reinforcement-memory analysis."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"PyYAML>=6.0",
|
|
13
|
+
"virtuus>=0.4.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"behave>=1.2.6",
|
|
19
|
+
"pytest>=8.0",
|
|
20
|
+
"ruff>=0.4.0",
|
|
21
|
+
"black>=24.0",
|
|
22
|
+
"build>=1.2.2",
|
|
23
|
+
"python-semantic-release>=10.5.3",
|
|
24
|
+
]
|
|
25
|
+
reinforcement-memory = [
|
|
26
|
+
"biblicus[reinforcement-memory]>=1.8.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
caducus = "caducus.cli:main"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.packages.find]
|
|
33
|
+
where = ["src"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff]
|
|
36
|
+
line-length = 88
|
|
37
|
+
|
|
38
|
+
[tool.black]
|
|
39
|
+
line-length = 88
|
|
40
|
+
|
|
41
|
+
[tool.semantic_release]
|
|
42
|
+
allow_zero_version = true
|
|
43
|
+
build_command = "python -m build"
|
|
44
|
+
commit_parser = "conventional"
|
|
45
|
+
version_toml = [
|
|
46
|
+
"pyproject.toml:project.version",
|
|
47
|
+
]
|
|
48
|
+
version_variables = [
|
|
49
|
+
"src/caducus/__init__.py:__version__",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.semantic_release.branches.main]
|
|
53
|
+
match = "main"
|
|
54
|
+
|
|
55
|
+
[tool.semantic_release.changelog.default_templates]
|
|
56
|
+
changelog_file = "CHANGELOG.md"
|
caducus-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Thin adapter over Biblicus reinforcement memory for Caducus analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from virtuus._python.table import Table
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from biblicus.analysis.reinforcement_memory import (
|
|
14
|
+
LocalVectorStore,
|
|
15
|
+
ReinforcementMemory,
|
|
16
|
+
TimestampedText,
|
|
17
|
+
hash_embedder,
|
|
18
|
+
)
|
|
19
|
+
except ImportError as e:
|
|
20
|
+
raise ImportError(
|
|
21
|
+
"Caducus analysis requires biblicus with reinforcement-memory. "
|
|
22
|
+
"Install with: pip install 'biblicus[reinforcement-memory]' or pip install -e ../Biblicus"
|
|
23
|
+
) from e
|
|
24
|
+
|
|
25
|
+
from caducus.storage import get_events_for_group
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _events_to_timestamped_text(rows: list[dict[str, Any]], group_id: str) -> list[TimestampedText]:
|
|
29
|
+
"""Map Caducus canonical event rows to Biblicus TimestampedText."""
|
|
30
|
+
out = []
|
|
31
|
+
for r in rows:
|
|
32
|
+
meta = dict(r.get("metadata") or {})
|
|
33
|
+
meta["source"] = r.get("source", "")
|
|
34
|
+
meta["group_id"] = r.get("group_id", group_id)
|
|
35
|
+
out.append(
|
|
36
|
+
TimestampedText(
|
|
37
|
+
id=r["id"],
|
|
38
|
+
group_id=group_id,
|
|
39
|
+
timestamp=r.get("timestamp", ""),
|
|
40
|
+
text=r.get("text", ""),
|
|
41
|
+
metadata=meta,
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _analysis_dirs_from_config(
|
|
48
|
+
data_dir: str, config: dict[str, Any] | None
|
|
49
|
+
) -> tuple[str, str]:
|
|
50
|
+
"""
|
|
51
|
+
Resolve analysis_dir and vector_dir from Caducus data_dir and optional biblicus config.
|
|
52
|
+
Returns (analysis_dir, vector_dir).
|
|
53
|
+
"""
|
|
54
|
+
analysis_dir = os.path.join(data_dir, "analysis")
|
|
55
|
+
vector_dir = os.path.join(analysis_dir, "vectors")
|
|
56
|
+
if config:
|
|
57
|
+
rm = config.get("biblicus", {}).get("reinforcement_memory") or {}
|
|
58
|
+
if rm.get("data_dir"):
|
|
59
|
+
analysis_dir = rm["data_dir"]
|
|
60
|
+
vector_dir = os.path.join(analysis_dir, "vectors")
|
|
61
|
+
vs = rm.get("vector_store")
|
|
62
|
+
if isinstance(vs, dict) and vs.get("kind") == "local" and vs.get("path"):
|
|
63
|
+
vector_dir = vs["path"]
|
|
64
|
+
return analysis_dir, vector_dir
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def run_analysis_for_group(
|
|
68
|
+
data_dir: str,
|
|
69
|
+
group_id: str,
|
|
70
|
+
table: "Table",
|
|
71
|
+
config: dict[str, Any] | None = None,
|
|
72
|
+
) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Load canonical events for the group, run Biblicus reinforcement-memory analysis,
|
|
75
|
+
and print structured topic output to stdout.
|
|
76
|
+
|
|
77
|
+
Config may contain a biblicus.reinforcement_memory subtree with data_dir and
|
|
78
|
+
vector_store (e.g. { kind: local, path: ... }). If absent, defaults are used.
|
|
79
|
+
"""
|
|
80
|
+
analysis_dir, vector_dir = _analysis_dirs_from_config(data_dir, config)
|
|
81
|
+
Path(analysis_dir).mkdir(parents=True, exist_ok=True)
|
|
82
|
+
Path(vector_dir).mkdir(parents=True, exist_ok=True)
|
|
83
|
+
|
|
84
|
+
memory = ReinforcementMemory(
|
|
85
|
+
data_dir=analysis_dir,
|
|
86
|
+
vector_store=LocalVectorStore(vector_dir),
|
|
87
|
+
embed=hash_embedder(),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
rows = get_events_for_group(table, group_id)
|
|
91
|
+
if not rows:
|
|
92
|
+
print(f"No events found for group_id={group_id}")
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
texts = _events_to_timestamped_text(rows, group_id)
|
|
96
|
+
memory.ingest(texts)
|
|
97
|
+
result = memory.analyze(group_id=group_id)
|
|
98
|
+
|
|
99
|
+
print(f"Group: {result.group_id} Texts: {result.texts_analyzed} Run: {result.run_id}")
|
|
100
|
+
for t in result.topics:
|
|
101
|
+
line = f" {t.label} [{t.memory_tier}/{t.lifecycle_tier}] n={t.member_count}"
|
|
102
|
+
if t.root_cause:
|
|
103
|
+
line += f" cause: {t.root_cause[:60]}..."
|
|
104
|
+
print(line)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Caducus CLI entrypoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main() -> int:
|
|
10
|
+
"""Entry point for the caducus command."""
|
|
11
|
+
parser = argparse.ArgumentParser(prog="caducus", description="Collect ops events and run analysis.")
|
|
12
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
13
|
+
|
|
14
|
+
demo = sub.add_parser("demo", help="Demo dataset commands")
|
|
15
|
+
demo_sub = demo.add_subparsers(dest="demo_command", required=True)
|
|
16
|
+
|
|
17
|
+
demo_ingest = demo_sub.add_parser("ingest", help="Ingest a local demo dataset file into canonical events")
|
|
18
|
+
demo_ingest.add_argument("--input", required=True, help="Path to demo dataset file (e.g. CSV or JSONL)")
|
|
19
|
+
demo_ingest.add_argument("--data-dir", default="./caducus-data", help="Caducus data directory")
|
|
20
|
+
demo_ingest.set_defaults(func=cmd_demo_ingest)
|
|
21
|
+
|
|
22
|
+
demo_run = demo_sub.add_parser("run", help="Ingest demo file and run analysis for one group")
|
|
23
|
+
demo_run.add_argument("--input", required=True, help="Path to demo dataset file")
|
|
24
|
+
demo_run.add_argument("--group-id", required=True, help="Group ID to analyze")
|
|
25
|
+
demo_run.add_argument("--data-dir", default=None, help="Caducus data directory")
|
|
26
|
+
demo_run.add_argument("--config", action="append", default=[], metavar="KEY=VALUE", help="Config override (repeatable)")
|
|
27
|
+
demo_run.add_argument("--configuration", action="append", default=[], metavar="FILE", dest="config_files", help="Config YAML file (repeatable)")
|
|
28
|
+
demo_run.set_defaults(func=cmd_demo_run)
|
|
29
|
+
|
|
30
|
+
analyze_p = sub.add_parser("analyze", help="Run reinforcement-memory analysis for a group")
|
|
31
|
+
analyze_p.add_argument("--group-id", required=True, help="Group ID to analyze")
|
|
32
|
+
analyze_p.add_argument("--data-dir", default=None, help="Caducus data directory")
|
|
33
|
+
analyze_p.add_argument("--config", action="append", default=[], metavar="KEY=VALUE", help="Config override (repeatable)")
|
|
34
|
+
analyze_p.add_argument("--configuration", action="append", default=[], metavar="FILE", dest="config_files", help="Config YAML file (repeatable)")
|
|
35
|
+
analyze_p.set_defaults(func=cmd_analyze)
|
|
36
|
+
|
|
37
|
+
groups_p = sub.add_parser("groups", help="List group IDs that have events (use after ingest)")
|
|
38
|
+
groups_p.add_argument("--data-dir", default="./caducus-data", help="Caducus data directory")
|
|
39
|
+
groups_p.set_defaults(func=cmd_groups)
|
|
40
|
+
|
|
41
|
+
args = parser.parse_args()
|
|
42
|
+
if hasattr(args, "func"):
|
|
43
|
+
return args.func(args)
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def cmd_demo_ingest(args: argparse.Namespace) -> int:
|
|
48
|
+
"""Ingest demo dataset file into canonical events."""
|
|
49
|
+
from caducus.collectors.demo_dataset import ingest_demo_file
|
|
50
|
+
from caducus.storage import get_events_table
|
|
51
|
+
|
|
52
|
+
table = get_events_table(args.data_dir)
|
|
53
|
+
count = ingest_demo_file(args.input, table)
|
|
54
|
+
print(f"Ingested {count} events into {args.data_dir}")
|
|
55
|
+
return 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _load_config(args: argparse.Namespace) -> tuple[dict, str]:
|
|
59
|
+
"""Load merged config and resolve data_dir. Returns (config, data_dir)."""
|
|
60
|
+
from caducus.config import get_data_dir, load_config
|
|
61
|
+
|
|
62
|
+
config = load_config(
|
|
63
|
+
config_file_paths=getattr(args, "config_files", None) or [],
|
|
64
|
+
overrides=getattr(args, "config", None) or [],
|
|
65
|
+
)
|
|
66
|
+
data_dir = get_data_dir(config, getattr(args, "data_dir", None))
|
|
67
|
+
return config, data_dir
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def cmd_demo_run(args: argparse.Namespace) -> int:
|
|
71
|
+
"""Ingest demo file and run analysis for one group."""
|
|
72
|
+
from caducus.biblicus_adapter import run_analysis_for_group
|
|
73
|
+
from caducus.collectors.demo_dataset import ingest_demo_file
|
|
74
|
+
from caducus.storage import get_events_table
|
|
75
|
+
|
|
76
|
+
config, data_dir = _load_config(args)
|
|
77
|
+
table = get_events_table(data_dir)
|
|
78
|
+
ingest_demo_file(args.input, table)
|
|
79
|
+
run_analysis_for_group(data_dir, args.group_id, table, config=config)
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def cmd_analyze(args: argparse.Namespace) -> int:
|
|
84
|
+
"""Run analysis for a group from stored canonical events."""
|
|
85
|
+
from caducus.biblicus_adapter import run_analysis_for_group
|
|
86
|
+
from caducus.storage import get_events_table
|
|
87
|
+
|
|
88
|
+
config, data_dir = _load_config(args)
|
|
89
|
+
table = get_events_table(data_dir)
|
|
90
|
+
run_analysis_for_group(data_dir, args.group_id, table, config=config)
|
|
91
|
+
return 0
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def cmd_groups(args: argparse.Namespace) -> int:
|
|
95
|
+
"""List group IDs that have events in storage."""
|
|
96
|
+
from caducus.storage import get_events_table, list_group_ids
|
|
97
|
+
|
|
98
|
+
table = get_events_table(args.data_dir)
|
|
99
|
+
group_ids = list_group_ids(table)
|
|
100
|
+
if not group_ids:
|
|
101
|
+
print("No groups found. Run 'caducus demo ingest' first.")
|
|
102
|
+
return 0
|
|
103
|
+
for gid in group_ids:
|
|
104
|
+
print(gid)
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
sys.exit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Caducus event collectors."""
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Demo dataset collector: HDFS-style log rows -> canonical events."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterator
|
|
9
|
+
|
|
10
|
+
from caducus.events import CanonicalEvent
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from virtuus._python.table import Table
|
|
14
|
+
|
|
15
|
+
SOURCE_ID = "hdfs-demo"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _parse_timestamp(date_str: str, time_str: str) -> str:
|
|
19
|
+
"""Turn YYMMDD and HHMMSS into ISO 8601. Assumes 20xx for year."""
|
|
20
|
+
if not date_str or not time_str:
|
|
21
|
+
return ""
|
|
22
|
+
try:
|
|
23
|
+
y, m, d = int(date_str[:2]), int(date_str[2:4]), int(date_str[4:6])
|
|
24
|
+
year = 2000 + y if y < 50 else 1900 + y
|
|
25
|
+
h = int(time_str[:2])
|
|
26
|
+
mi = int(time_str[2:4])
|
|
27
|
+
s = int(time_str[4:6]) if len(time_str) >= 6 else 0
|
|
28
|
+
return f"{year:04d}-{m:02d}-{d:02d}T{h:02d}:{mi:02d}:{s:02d}Z"
|
|
29
|
+
except (ValueError, IndexError):
|
|
30
|
+
return ""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _row_to_event(row: dict[str, Any], index: int) -> CanonicalEvent:
|
|
34
|
+
"""Map one HDFS-style row to a canonical event."""
|
|
35
|
+
date_str = str(row.get("date", ""))
|
|
36
|
+
time_str = str(row.get("time", ""))
|
|
37
|
+
content = str(row.get("content", ""))
|
|
38
|
+
component = str(row.get("component", "unknown")).strip() or "unknown"
|
|
39
|
+
event_id = row.get("id") or str(uuid.uuid4())
|
|
40
|
+
timestamp = _parse_timestamp(date_str, time_str)
|
|
41
|
+
group_id = f"{SOURCE_ID}:{component}"
|
|
42
|
+
metadata: dict[str, Any] = {
|
|
43
|
+
"level": row.get("level"),
|
|
44
|
+
"pid": row.get("pid"),
|
|
45
|
+
"block_id": row.get("block_id"),
|
|
46
|
+
"anomaly": row.get("anomaly"),
|
|
47
|
+
}
|
|
48
|
+
return CanonicalEvent(
|
|
49
|
+
id=event_id,
|
|
50
|
+
timestamp=timestamp,
|
|
51
|
+
source=SOURCE_ID,
|
|
52
|
+
group_id=group_id,
|
|
53
|
+
text=content,
|
|
54
|
+
metadata={k: v for k, v in metadata.items() if v is not None},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _read_csv(path: Path) -> Iterator[dict[str, Any]]:
|
|
59
|
+
"""Read CSV with headers. Normalize keys to lowercase."""
|
|
60
|
+
with open(path, newline="", encoding="utf-8", errors="replace") as f:
|
|
61
|
+
reader = csv.DictReader(f)
|
|
62
|
+
for row in reader:
|
|
63
|
+
yield {k.strip().lower(): v for k, v in row.items() if k}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def ingest_demo_file(input_path: str, table: "Table") -> int:
|
|
67
|
+
"""
|
|
68
|
+
Read a demo dataset file (CSV with HDFS-style columns) and write canonical events.
|
|
69
|
+
|
|
70
|
+
Expected columns: date, time, level, component, pid, content, block_id, anomaly.
|
|
71
|
+
"""
|
|
72
|
+
path = Path(input_path)
|
|
73
|
+
if not path.exists():
|
|
74
|
+
raise FileNotFoundError(f"Demo file not found: {input_path}")
|
|
75
|
+
|
|
76
|
+
events: list[CanonicalEvent] = []
|
|
77
|
+
for i, row in enumerate(_read_csv(path)):
|
|
78
|
+
ev = _row_to_event(row, i)
|
|
79
|
+
events.append(ev)
|
|
80
|
+
|
|
81
|
+
from caducus.storage import put_events
|
|
82
|
+
|
|
83
|
+
return put_events(table, events)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Caducus configuration: YAML merge, env, and CLI overrides."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
12
|
+
"""Merge override into base recursively. Override wins for scalars."""
|
|
13
|
+
out = dict(base)
|
|
14
|
+
for k, v in override.items():
|
|
15
|
+
if k in out and isinstance(out[k], dict) and isinstance(v, dict):
|
|
16
|
+
out[k] = _deep_merge(out[k], v)
|
|
17
|
+
else:
|
|
18
|
+
out[k] = v
|
|
19
|
+
return out
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _set_dotted(root: dict[str, Any], key: str, value: Any) -> None:
|
|
23
|
+
"""Set a dotted key (e.g. biblicus.reinforcement_memory.data_dir) into root."""
|
|
24
|
+
parts = key.split(".")
|
|
25
|
+
cur = root
|
|
26
|
+
for i, part in enumerate(parts[:-1]):
|
|
27
|
+
if part not in cur:
|
|
28
|
+
cur[part] = {}
|
|
29
|
+
cur = cur[part]
|
|
30
|
+
cur[parts[-1]] = value
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _env_substitute_string(s: str) -> str:
|
|
34
|
+
"""Replace {{ VAR }} and {{ VAR|default }} in s with env or default."""
|
|
35
|
+
if not isinstance(s, str):
|
|
36
|
+
return s
|
|
37
|
+
|
|
38
|
+
def repl(match: re.Match[str]) -> str:
|
|
39
|
+
var = match.group(1).strip()
|
|
40
|
+
pipe = match.group(2)
|
|
41
|
+
default = match.group(3).strip() if pipe else ""
|
|
42
|
+
return os.environ.get(var, default)
|
|
43
|
+
|
|
44
|
+
return re.sub(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*(\|\s*([^}]*))?\s*\}\}", repl, s)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _env_substitute(obj: Any) -> Any:
|
|
48
|
+
"""Recursively substitute {{ VAR }} in string values."""
|
|
49
|
+
if isinstance(obj, dict):
|
|
50
|
+
return {k: _env_substitute(v) for k, v in obj.items()}
|
|
51
|
+
if isinstance(obj, list):
|
|
52
|
+
return [_env_substitute(v) for v in obj]
|
|
53
|
+
if isinstance(obj, str):
|
|
54
|
+
return _env_substitute_string(obj)
|
|
55
|
+
return obj
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def load_yaml(path: Path) -> dict[str, Any]:
|
|
59
|
+
"""Load a single YAML file. Returns {} if file missing or invalid."""
|
|
60
|
+
import yaml
|
|
61
|
+
|
|
62
|
+
if not path.exists():
|
|
63
|
+
return {}
|
|
64
|
+
try:
|
|
65
|
+
with open(path, encoding="utf-8") as f:
|
|
66
|
+
data = yaml.safe_load(f)
|
|
67
|
+
return dict(data) if data else {}
|
|
68
|
+
except Exception:
|
|
69
|
+
return {}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def load_config(
|
|
73
|
+
config_file_paths: list[str] | None = None,
|
|
74
|
+
overrides: list[str] | None = None,
|
|
75
|
+
) -> dict[str, Any]:
|
|
76
|
+
"""
|
|
77
|
+
Build merged config from YAML files, env, and dotted overrides.
|
|
78
|
+
|
|
79
|
+
- config_file_paths: paths to YAML files (later override earlier).
|
|
80
|
+
- overrides: list of "key=value" (dotted keys, e.g. biblicus.reinforcement_memory.data_dir=/path).
|
|
81
|
+
|
|
82
|
+
CADUCUS_DATA_DIR and BIBLICUS_* env vars are not auto-injected here;
|
|
83
|
+
CLI can apply them as overrides before calling this, or callers can merge.
|
|
84
|
+
"""
|
|
85
|
+
merged: dict[str, Any] = {}
|
|
86
|
+
if config_file_paths:
|
|
87
|
+
for p in config_file_paths:
|
|
88
|
+
data = load_yaml(Path(p))
|
|
89
|
+
merged = _deep_merge(merged, data)
|
|
90
|
+
merged = _env_substitute(merged)
|
|
91
|
+
if overrides:
|
|
92
|
+
for o in overrides:
|
|
93
|
+
if "=" not in o:
|
|
94
|
+
continue
|
|
95
|
+
key, _, value = o.partition("=")
|
|
96
|
+
key = key.strip().strip(".")
|
|
97
|
+
_set_dotted(merged, key, value)
|
|
98
|
+
return merged
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_data_dir(config: dict[str, Any], cli_data_dir: str | None) -> str:
|
|
102
|
+
"""Resolve data_dir: CLI wins, then config, then default."""
|
|
103
|
+
if cli_data_dir:
|
|
104
|
+
return cli_data_dir
|
|
105
|
+
return config.get("data_dir") or os.environ.get("CADUCUS_DATA_DIR") or "./caducus-data"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Canonical event model for Caducus."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class CanonicalEvent:
|
|
11
|
+
"""
|
|
12
|
+
A single canonical ops event.
|
|
13
|
+
|
|
14
|
+
Schema is defined in docs/caducus-biblicus-contract.md.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
id: str
|
|
18
|
+
timestamp: str # ISO 8601
|
|
19
|
+
source: str
|
|
20
|
+
group_id: str
|
|
21
|
+
text: str
|
|
22
|
+
metadata: dict[str, Any]
|
|
23
|
+
|
|
24
|
+
def to_dict(self) -> dict[str, Any]:
|
|
25
|
+
"""Serialize for storage."""
|
|
26
|
+
return {
|
|
27
|
+
"id": self.id,
|
|
28
|
+
"timestamp": self.timestamp,
|
|
29
|
+
"source": self.source,
|
|
30
|
+
"group_id": self.group_id,
|
|
31
|
+
"text": self.text,
|
|
32
|
+
"metadata": self.metadata,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_dict(cls, data: dict[str, Any]) -> "CanonicalEvent":
|
|
37
|
+
"""Deserialize from storage."""
|
|
38
|
+
return cls(
|
|
39
|
+
id=data["id"],
|
|
40
|
+
timestamp=data["timestamp"],
|
|
41
|
+
source=data["source"],
|
|
42
|
+
group_id=data["group_id"],
|
|
43
|
+
text=data["text"],
|
|
44
|
+
metadata=data.get("metadata", {}),
|
|
45
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Virtuus-backed storage for Caducus canonical events."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterator
|
|
7
|
+
|
|
8
|
+
from virtuus import Database
|
|
9
|
+
|
|
10
|
+
from caducus.events import CanonicalEvent
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from virtuus._python.table import Table
|
|
14
|
+
|
|
15
|
+
EVENTS_SCHEMA = {
|
|
16
|
+
"tables": {
|
|
17
|
+
"events": {
|
|
18
|
+
"primary_key": "id",
|
|
19
|
+
"directory": "events",
|
|
20
|
+
"storage": "index_only",
|
|
21
|
+
"gsis": {
|
|
22
|
+
"by_group": {
|
|
23
|
+
"partition_key": "group_id",
|
|
24
|
+
"sort_key": "timestamp",
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_events_table(data_dir: str) -> "Table":
|
|
33
|
+
"""Return the Virtuus events table for the given data root."""
|
|
34
|
+
db = Database.from_schema_dict(EVENTS_SCHEMA, data_root=data_dir)
|
|
35
|
+
return db.tables["events"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def put_events(table: "Table", events: list[CanonicalEvent]) -> int:
|
|
39
|
+
"""Write canonical events to the table. Returns count written."""
|
|
40
|
+
for ev in events:
|
|
41
|
+
table.put(ev.to_dict())
|
|
42
|
+
return len(events)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_events_for_group(table: "Table", group_id: str) -> list[dict[str, Any]]:
|
|
46
|
+
"""Return all events for a group_id, ordered by timestamp ascending."""
|
|
47
|
+
return table.query_gsi("by_group", group_id, None, descending=False)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def list_group_ids(table: "Table") -> list[str]:
|
|
51
|
+
"""Return sorted distinct group_id values present in the events table."""
|
|
52
|
+
seen: set[str] = set()
|
|
53
|
+
for row in table.scan():
|
|
54
|
+
gid = row.get("group_id")
|
|
55
|
+
if gid:
|
|
56
|
+
seen.add(gid)
|
|
57
|
+
return sorted(seen)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: caducus
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI for collecting ops events and running reinforcement-memory analysis.
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: PyYAML>=6.0
|
|
8
|
+
Requires-Dist: virtuus>=0.4.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: behave>=1.2.6; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
12
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
13
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
14
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
15
|
+
Requires-Dist: python-semantic-release>=10.5.3; extra == "dev"
|
|
16
|
+
Provides-Extra: reinforcement-memory
|
|
17
|
+
Requires-Dist: biblicus[reinforcement-memory]>=1.8.0; extra == "reinforcement-memory"
|
|
18
|
+
|
|
19
|
+
# Caducus
|
|
20
|
+
|
|
21
|
+
Caducus helps operations teams understand what is going wrong right now across logs, alerts, dead-letter queues, and other operational event streams.
|
|
22
|
+
|
|
23
|
+
It is a CLI-first system for collecting timestamped operational events, normalizing them into a canonical schema, storing them as plain JSON, and using semantic reinforcement memory to surface recurring patterns, fresh anomalies, and just-in-time context during incidents.
|
|
24
|
+
|
|
25
|
+
## Why Caducus Exists
|
|
26
|
+
|
|
27
|
+
Operational signals are scattered across many systems:
|
|
28
|
+
|
|
29
|
+
- CloudWatch logs
|
|
30
|
+
- alerting systems
|
|
31
|
+
- dead-letter queues
|
|
32
|
+
- notifications and incident messages
|
|
33
|
+
|
|
34
|
+
Each source captures part of the truth, but not the whole picture. Caducus is intended to bring those signals together into one stream of timestamped event records that can be analyzed as a living memory of operational behavior.
|
|
35
|
+
|
|
36
|
+
The goal is not just to search historical data. The goal is to create a radar for what looks unusual, active, or important now.
|
|
37
|
+
|
|
38
|
+
## How It Works
|
|
39
|
+
|
|
40
|
+
Caducus is designed around a simple flow:
|
|
41
|
+
|
|
42
|
+
1. Collect operational events from source systems.
|
|
43
|
+
2. Normalize them into canonical event records with text, timestamps, source identity, and generalized metadata.
|
|
44
|
+
3. Persist them as JSON files in a Virtuus-backed folder structure.
|
|
45
|
+
4. Analyze event groups using Biblicus reinforcement memory.
|
|
46
|
+
5. Surface patterns, anomalies, and context for operators.
|
|
47
|
+
|
|
48
|
+
This keeps the system inspectable and composable. The underlying data lives in plain folders, not inside a black-box database.
|
|
49
|
+
|
|
50
|
+
## CLI-First MVP
|
|
51
|
+
|
|
52
|
+
The initial product is a CLI utility.
|
|
53
|
+
|
|
54
|
+
The MVP is focused on a coherent end-to-end flow:
|
|
55
|
+
|
|
56
|
+
- collect events from operational sources
|
|
57
|
+
- store them in a canonical schema
|
|
58
|
+
- run analysis over selected event groups
|
|
59
|
+
- inspect recent events and analysis outputs from the command line
|
|
60
|
+
|
|
61
|
+
Initial source areas for the MVP are:
|
|
62
|
+
|
|
63
|
+
- CloudWatch Logs
|
|
64
|
+
- SQS dead-letter queues
|
|
65
|
+
- one alert source
|
|
66
|
+
|
|
67
|
+
Configuration is intended to be layered through YAML, environment variables, and CLI overrides. Caducus will own collection and orchestration while allowing Biblicus-related analysis settings to flow through the Caducus configuration tree without duplicating Biblicus's schema.
|
|
68
|
+
|
|
69
|
+
## Architecture At A Glance
|
|
70
|
+
|
|
71
|
+
Caducus is intentionally thin:
|
|
72
|
+
|
|
73
|
+
- **Caducus** handles collection, normalization, orchestration, and CLI workflows.
|
|
74
|
+
- **Virtuus** provides file-backed JSON storage and retrieval.
|
|
75
|
+
- **Biblicus** provides semantic reinforcement-memory analysis.
|
|
76
|
+
|
|
77
|
+
```mermaid
|
|
78
|
+
flowchart LR
|
|
79
|
+
sources[OpsSources] --> caducus[Caducus]
|
|
80
|
+
caducus --> events[CanonicalEvents]
|
|
81
|
+
events --> virtuus[VirtuusStorage]
|
|
82
|
+
caducus --> biblicus[BiblicusAnalysis]
|
|
83
|
+
biblicus --> radar[OpsRadar]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Running the demo
|
|
87
|
+
|
|
88
|
+
Real HDFS data uses **component-derived group IDs**: each log row’s `component` becomes `hdfs-demo:<component>` (e.g. `hdfs-demo:dfs.DataNode$DataXceiver`). You must use a group ID that exists in your ingested data.
|
|
89
|
+
|
|
90
|
+
### Quick demo (small fixture, no download)
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install -e ".[reinforcement-memory]"
|
|
94
|
+
caducus demo run --input tests/fixtures/demo_hdfs_sample.csv --group-id "hdfs-demo:DataNode" --data-dir /tmp/caducus-demo
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The fixture has components `DataNode` and `NameNode`, so valid group IDs are `hdfs-demo:DataNode` and `hdfs-demo:NameNode`.
|
|
98
|
+
|
|
99
|
+
### Full demo on real HDFS data
|
|
100
|
+
|
|
101
|
+
1. Install optional deps (Biblicus reinforcement-memory and the datasets library for the download script):
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install -e ".[reinforcement-memory]"
|
|
105
|
+
pip install datasets
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
2. Download a subset of the [HDFS_v1](https://huggingface.co/datasets/logfit-project/HDFS_v1) dataset:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
python scripts/download_hdfs_demo.py --output demo_data/hdfs_sample.csv --max-rows 10000
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
3. Ingest and list available groups (group IDs come from the CSV `component` column):
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
caducus demo ingest --input demo_data/hdfs_sample.csv --data-dir ./caducus-data
|
|
118
|
+
caducus groups --data-dir ./caducus-data
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
4. Run analysis for one of the listed group IDs:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
caducus analyze --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Or do ingest and analyze in one step (use a group ID that exists in the CSV):
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
caducus demo run --input demo_data/hdfs_sample.csv --group-id "hdfs-demo:dfs.DataNode$DataXceiver" --data-dir ./caducus-data
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Releases
|
|
134
|
+
|
|
135
|
+
Caducus uses `python-semantic-release` with Conventional Commits.
|
|
136
|
+
|
|
137
|
+
Use commit messages like:
|
|
138
|
+
|
|
139
|
+
- `feat: add CloudWatch collector checkpointing`
|
|
140
|
+
- `fix: quote group IDs containing dollar signs in docs`
|
|
141
|
+
- `feat!: change canonical event schema`
|
|
142
|
+
|
|
143
|
+
Release behavior:
|
|
144
|
+
|
|
145
|
+
- `feat:` triggers a minor release
|
|
146
|
+
- `fix:` triggers a patch release
|
|
147
|
+
- `feat!:` or a `BREAKING CHANGE:` footer triggers a major release
|
|
148
|
+
|
|
149
|
+
The release workflow lives in `.github/workflows/release.yml` and runs on pushes to `main`. It will:
|
|
150
|
+
|
|
151
|
+
1. Determine the next version from commit messages.
|
|
152
|
+
2. Update `project.version` in `pyproject.toml` and `src/caducus/__init__.py`.
|
|
153
|
+
3. Generate `CHANGELOG.md`, create a tag, and create a GitHub Release.
|
|
154
|
+
4. Publish the built distributions to PyPI.
|
|
155
|
+
|
|
156
|
+
PyPI publishing is configured for GitHub Actions trusted publishing. Before the first live release, configure the `caducus` project on PyPI to trust this repository's `release.yml` workflow.
|
|
157
|
+
|
|
158
|
+
## Roadmap
|
|
159
|
+
|
|
160
|
+
Caducus is intended to grow beyond the initial CLI foundation over time.
|
|
161
|
+
|
|
162
|
+
Planned directions include:
|
|
163
|
+
|
|
164
|
+
- broader source integrations across operational systems
|
|
165
|
+
- deeper analysis of concepts and entities derived from operational activity
|
|
166
|
+
- richer incident context and root-cause workflows
|
|
167
|
+
- a future web UI and embeddable components for other applications
|
|
168
|
+
|
|
169
|
+
## Repository Direction
|
|
170
|
+
|
|
171
|
+
This repository is being built outside-in. Product definition and behavior specifications come first, followed by the minimum implementation needed to satisfy them.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/caducus/__init__.py
|
|
4
|
+
src/caducus/biblicus_adapter.py
|
|
5
|
+
src/caducus/cli.py
|
|
6
|
+
src/caducus/config.py
|
|
7
|
+
src/caducus/events.py
|
|
8
|
+
src/caducus/storage.py
|
|
9
|
+
src/caducus.egg-info/PKG-INFO
|
|
10
|
+
src/caducus.egg-info/SOURCES.txt
|
|
11
|
+
src/caducus.egg-info/dependency_links.txt
|
|
12
|
+
src/caducus.egg-info/entry_points.txt
|
|
13
|
+
src/caducus.egg-info/requires.txt
|
|
14
|
+
src/caducus.egg-info/top_level.txt
|
|
15
|
+
src/caducus/collectors/__init__.py
|
|
16
|
+
src/caducus/collectors/demo_dataset.py
|
|
17
|
+
tests/test_demo_slice.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
caducus
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Tests for the demo vertical slice: ingest and analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from caducus.collectors.demo_dataset import ingest_demo_file
|
|
12
|
+
from caducus.events import CanonicalEvent
|
|
13
|
+
from caducus.storage import get_events_table, get_events_for_group, put_events
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_canonical_event_roundtrip() -> None:
|
|
20
|
+
"""CanonicalEvent serializes and deserializes."""
|
|
21
|
+
ev = CanonicalEvent(
|
|
22
|
+
id="e1",
|
|
23
|
+
timestamp="2024-01-01T12:00:00Z",
|
|
24
|
+
source="hdfs-demo",
|
|
25
|
+
group_id="hdfs-demo:DataNode",
|
|
26
|
+
text="Receiving block blk_123",
|
|
27
|
+
metadata={"level": "INFO"},
|
|
28
|
+
)
|
|
29
|
+
d = ev.to_dict()
|
|
30
|
+
ev2 = CanonicalEvent.from_dict(d)
|
|
31
|
+
assert ev2.id == ev.id
|
|
32
|
+
assert ev2.text == ev.text
|
|
33
|
+
assert ev2.metadata == ev.metadata
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_demo_ingest_writes_canonical_events() -> None:
|
|
37
|
+
"""Demo ingest reads CSV and writes one canonical event per row."""
|
|
38
|
+
csv_path = FIXTURES_DIR / "demo_hdfs_sample.csv"
|
|
39
|
+
if not csv_path.exists():
|
|
40
|
+
pytest.skip("fixture demo_hdfs_sample.csv not found")
|
|
41
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
42
|
+
table = get_events_table(tmp)
|
|
43
|
+
count = ingest_demo_file(str(csv_path), table)
|
|
44
|
+
assert count == 5
|
|
45
|
+
rows = get_events_for_group(table, "hdfs-demo:DataNode")
|
|
46
|
+
assert len(rows) == 3
|
|
47
|
+
rows_nn = get_events_for_group(table, "hdfs-demo:NameNode")
|
|
48
|
+
assert len(rows_nn) == 2
|
|
49
|
+
for r in rows + rows_nn:
|
|
50
|
+
assert "id" in r and "text" in r and "timestamp" in r
|
|
51
|
+
assert r["source"] == "hdfs-demo"
|
|
52
|
+
assert "group_id" in r
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_analyze_requires_biblicus() -> None:
|
|
56
|
+
"""Analyze path imports Biblicus; we only test that adapter imports."""
|
|
57
|
+
try:
|
|
58
|
+
from caducus.biblicus_adapter import _events_to_timestamped_text
|
|
59
|
+
except ImportError:
|
|
60
|
+
pytest.skip("biblicus reinforcement-memory not installed")
|
|
61
|
+
from biblicus.analysis.reinforcement_memory import TimestampedText
|
|
62
|
+
|
|
63
|
+
rows = [
|
|
64
|
+
{
|
|
65
|
+
"id": "1",
|
|
66
|
+
"timestamp": "2024-01-01T12:00:00Z",
|
|
67
|
+
"source": "hdfs-demo",
|
|
68
|
+
"group_id": "hdfs-demo:DataNode",
|
|
69
|
+
"text": "Receiving block",
|
|
70
|
+
"metadata": {},
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
texts = _events_to_timestamped_text(rows, "hdfs-demo:DataNode")
|
|
74
|
+
assert len(texts) == 1
|
|
75
|
+
assert isinstance(texts[0], TimestampedText)
|
|
76
|
+
assert texts[0].text == "Receiving block"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_groups_lists_group_ids_after_ingest() -> None:
|
|
80
|
+
"""After ingest, groups command returns sorted distinct group_ids."""
|
|
81
|
+
csv_path = FIXTURES_DIR / "demo_hdfs_sample.csv"
|
|
82
|
+
if not csv_path.exists():
|
|
83
|
+
pytest.skip("fixture demo_hdfs_sample.csv not found")
|
|
84
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
85
|
+
table = get_events_table(tmp)
|
|
86
|
+
ingest_demo_file(str(csv_path), table)
|
|
87
|
+
from caducus.storage import list_group_ids
|
|
88
|
+
|
|
89
|
+
group_ids = list_group_ids(table)
|
|
90
|
+
assert "hdfs-demo:DataNode" in group_ids
|
|
91
|
+
assert "hdfs-demo:NameNode" in group_ids
|
|
92
|
+
assert group_ids == sorted(group_ids)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_demo_run_produces_stable_topic_output() -> None:
|
|
96
|
+
"""End-to-end: ingest fixture, run analyze for a real group, assert topic output shape."""
|
|
97
|
+
try:
|
|
98
|
+
from caducus.biblicus_adapter import run_analysis_for_group
|
|
99
|
+
except ImportError:
|
|
100
|
+
pytest.skip("biblicus reinforcement-memory not installed")
|
|
101
|
+
csv_path = FIXTURES_DIR / "demo_hdfs_sample.csv"
|
|
102
|
+
if not csv_path.exists():
|
|
103
|
+
pytest.skip("fixture demo_hdfs_sample.csv not found")
|
|
104
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
105
|
+
table = get_events_table(tmp)
|
|
106
|
+
ingest_demo_file(str(csv_path), table)
|
|
107
|
+
import io
|
|
108
|
+
import sys
|
|
109
|
+
|
|
110
|
+
out = io.StringIO()
|
|
111
|
+
old_stdout = sys.stdout
|
|
112
|
+
sys.stdout = out
|
|
113
|
+
try:
|
|
114
|
+
run_analysis_for_group(tmp, "hdfs-demo:DataNode", table, config=None)
|
|
115
|
+
finally:
|
|
116
|
+
sys.stdout = old_stdout
|
|
117
|
+
text = out.getvalue()
|
|
118
|
+
assert "Group:" in text
|
|
119
|
+
assert "Texts:" in text or "texts_analyzed" in text or "n=" in text
|
|
120
|
+
assert "n=" in text
|
|
121
|
+
assert "[" in text and "/" in text
|