bubble-memory 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bubble_memory-0.1.0/LICENSE +21 -0
- bubble_memory-0.1.0/PKG-INFO +156 -0
- bubble_memory-0.1.0/README.md +123 -0
- bubble_memory-0.1.0/pyproject.toml +47 -0
- bubble_memory-0.1.0/setup.cfg +4 -0
- bubble_memory-0.1.0/src/bubble/__init__.py +101 -0
- bubble_memory-0.1.0/src/bubble/_shared.py +49 -0
- bubble_memory-0.1.0/src/bubble/archive.py +48 -0
- bubble_memory-0.1.0/src/bubble/chain.py +317 -0
- bubble_memory-0.1.0/src/bubble/cluster.py +66 -0
- bubble_memory-0.1.0/src/bubble/db.py +48 -0
- bubble_memory-0.1.0/src/bubble/decomposer.py +130 -0
- bubble_memory-0.1.0/src/bubble/embed.py +26 -0
- bubble_memory-0.1.0/src/bubble/ingest.py +165 -0
- bubble_memory-0.1.0/src/bubble/main.py +275 -0
- bubble_memory-0.1.0/src/bubble/promote.py +118 -0
- bubble_memory-0.1.0/src/bubble/rerank.py +30 -0
- bubble_memory-0.1.0/src/bubble/retrieve.py +211 -0
- bubble_memory-0.1.0/src/bubble_memory.egg-info/PKG-INFO +156 -0
- bubble_memory-0.1.0/src/bubble_memory.egg-info/SOURCES.txt +21 -0
- bubble_memory-0.1.0/src/bubble_memory.egg-info/dependency_links.txt +1 -0
- bubble_memory-0.1.0/src/bubble_memory.egg-info/requires.txt +11 -0
- bubble_memory-0.1.0/src/bubble_memory.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AutismAccelerator
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bubble-memory
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Event Sourcing based belief formation system for long-term AI agent memory
|
|
5
|
+
Author-email: AutismAccelerator <your.email@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/AutismAccelerator/bubble
|
|
8
|
+
Project-URL: Repository, https://github.com/AutismAccelerator/bubble
|
|
9
|
+
Project-URL: Issues, https://github.com/AutismAccelerator/bubble/issues
|
|
10
|
+
Keywords: ai,memory,agent,graph,llm,vector-store
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: anthropic>=0.86.0
|
|
23
|
+
Requires-Dist: falkordb>=1.6.0
|
|
24
|
+
Requires-Dist: httpx>=0.28.0
|
|
25
|
+
Requires-Dist: numpy>=2.4.4
|
|
26
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
27
|
+
Requires-Dist: scikit-learn>=1.8.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build; extra == "dev"
|
|
30
|
+
Requires-Dist: twine; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Bubble
|
|
35
|
+
**Event Sourcing based belief formation system for long-term AI agent memory**
|
|
36
|
+
**[paper](https://doi.org/10.5281/zenodo.19438945)** **[Discord](https://discord.com/users/1319641673990672477)**
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## How it works
|
|
41
|
+
```
|
|
42
|
+
[ raw input ]
|
|
43
|
+
│
|
|
44
|
+
┌─────▼─────┐
|
|
45
|
+
│ decompose │
|
|
46
|
+
└─────┬─────┘
|
|
47
|
+
│
|
|
48
|
+
┌────────────────┴────────────────┐
|
|
49
|
+
ι ≥ θ ι < θ
|
|
50
|
+
│ │
|
|
51
|
+
vivid signal weak signal
|
|
52
|
+
│ │
|
|
53
|
+
┌────▼────┐ ┌──────▼──────┐
|
|
54
|
+
│ archive │ │ pool │
|
|
55
|
+
└────┬────┘ │ · · · · · │
|
|
56
|
+
│ │ · · · · · │
|
|
57
|
+
│ │ · · · · · │
|
|
58
|
+
│ └──────┬──────┘
|
|
59
|
+
│ │
|
|
60
|
+
│ enough gathered?
|
|
61
|
+
│ │
|
|
62
|
+
│ no ──────────┘
|
|
63
|
+
│ │ yes
|
|
64
|
+
│ ┌──────▼──────┐
|
|
65
|
+
│ │ cluster │
|
|
66
|
+
│ │ + score │
|
|
67
|
+
│ └──────┬──────┘
|
|
68
|
+
│ │
|
|
69
|
+
└──────────────┬──────────────────┘
|
|
70
|
+
│
|
|
71
|
+
┌─────▼─────┐
|
|
72
|
+
│ episode │ immutable
|
|
73
|
+
└─────┬─────┘
|
|
74
|
+
│
|
|
75
|
+
same topic chain?(NLI)
|
|
76
|
+
yes │ │ no
|
|
77
|
+
│ │
|
|
78
|
+
┌──────────▼─┐ ┌▼────────────┐
|
|
79
|
+
joins chain │ ... ──► e │ │ e │ new chain
|
|
80
|
+
└──────────┬─┘ └─────┬───────┘
|
|
81
|
+
│ │
|
|
82
|
+
┌─────▼───────────▼─────┐
|
|
83
|
+
│ snapshot │
|
|
84
|
+
│ centroid │ summary │
|
|
85
|
+
│ (eager) │ (lazy) │
|
|
86
|
+
└───────────┬───────────┘
|
|
87
|
+
│
|
|
88
|
+
[ retrieve ]
|
|
89
|
+
│
|
|
90
|
+
┌────────────────┴─────────────────┐
|
|
91
|
+
default verbose
|
|
92
|
+
│ │
|
|
93
|
+
snapshot summary with episode chain + labels
|
|
94
|
+
```
|
|
95
|
+
## Setup
|
|
96
|
+
### 1.run [Falkordb](https://github.com/falkordb/falkordb)
|
|
97
|
+
```bash
|
|
98
|
+
docker run -e REDIS_ARGS="--appendonly yes --appendfsync everysec" -v <PATH>:/var/lib/falkordb/data -p 3000:3000 -p 6379:6379 -d --name falkordb falkordb/falkordb
|
|
99
|
+
```
|
|
100
|
+
### 2.embedding model(Matryoshka)
|
|
101
|
+
**note: command below is cpu version**
|
|
102
|
+
```bash
|
|
103
|
+
docker run --name tei-embedding -d -p 8997:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id nomic-ai/nomic-embed-text-v1.5
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
Or embedding cloud api
|
|
107
|
+
|
|
108
|
+
### 3.NLI model(Optional, but recommended, saves some LLM calls)
|
|
109
|
+
**note: command below is cpu version**
|
|
110
|
+
```bash
|
|
111
|
+
docker run --name tei-nli -d -p 8999:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id cross-encoder/nli-deberta-v3-small
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### Necessary configuration in your .env file
|
|
116
|
+
```python
|
|
117
|
+
ANTHROPIC_API_KEY=
|
|
118
|
+
FALKORDB_HOST=localhost
|
|
119
|
+
FALKORDB_PORT=6379
|
|
120
|
+
BUBBLE_EMBED_DIM=768
|
|
121
|
+
BUBBLE_EMBED_ENDPOINT=http://localhost:8997/v1/embeddings
|
|
122
|
+
|
|
123
|
+
#If you have NLI setup
|
|
124
|
+
BUBBLE_ENABLE_NLI=true
|
|
125
|
+
BUBBLE_NLI_ENDPOINT=http://localhost:8999/predict
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## How to use (extremely easy and clean)
|
|
129
|
+
### ingest
|
|
130
|
+
```python
|
|
131
|
+
import bubble
|
|
132
|
+
bubble.process(user_id, content, prior)
|
|
133
|
+
```
|
|
134
|
+
prior: the context of the content, for example prior messages
|
|
135
|
+
### retrieve
|
|
136
|
+
```python
|
|
137
|
+
import bubble
|
|
138
|
+
memory_user = await bubble.retrieve(user_id, query)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Replayability
|
|
142
|
+
Memory episodes are archived in `<project root>/data/archive` as jsonl\
|
|
143
|
+
You can reconstruct your whole memory graph by a single command ! WITHOUT A SINGLE LLM CALL !
|
|
144
|
+
```python
|
|
145
|
+
python -m bubble.main replay <user_id>
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Tuning/Customization
|
|
149
|
+
See [.env.example](.env.example) for ALL tunable arguments.
|
|
150
|
+
|
|
151
|
+
## Limitations
|
|
152
|
+
Bubble is currently an experimental project for personal use.\
|
|
153
|
+
Current `promotion formula`, env vars might not be the best.
|
|
154
|
+
`prompts` might have much room to improve. Patch **bubble.decomposer._SYSTEM** if it doesn't fit your use case.\
|
|
155
|
+
\
|
|
156
|
+
Leave a star if you like this work. Contributions are welcome.
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Bubble
|
|
2
|
+
**Event Sourcing based belief formation system for long-term AI agent memory**
|
|
3
|
+
**[paper](https://doi.org/10.5281/zenodo.19438945)** **[Discord](https://discord.com/users/1319641673990672477)**
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## How it works
|
|
8
|
+
```
|
|
9
|
+
[ raw input ]
|
|
10
|
+
│
|
|
11
|
+
┌─────▼─────┐
|
|
12
|
+
│ decompose │
|
|
13
|
+
└─────┬─────┘
|
|
14
|
+
│
|
|
15
|
+
┌────────────────┴────────────────┐
|
|
16
|
+
ι ≥ θ ι < θ
|
|
17
|
+
│ │
|
|
18
|
+
vivid signal weak signal
|
|
19
|
+
│ │
|
|
20
|
+
┌────▼────┐ ┌──────▼──────┐
|
|
21
|
+
│ archive │ │ pool │
|
|
22
|
+
└────┬────┘ │ · · · · · │
|
|
23
|
+
│ │ · · · · · │
|
|
24
|
+
│ │ · · · · · │
|
|
25
|
+
│ └──────┬──────┘
|
|
26
|
+
│ │
|
|
27
|
+
│ enough gathered?
|
|
28
|
+
│ │
|
|
29
|
+
│ no ──────────┘
|
|
30
|
+
│ │ yes
|
|
31
|
+
│ ┌──────▼──────┐
|
|
32
|
+
│ │ cluster │
|
|
33
|
+
│ │ + score │
|
|
34
|
+
│ └──────┬──────┘
|
|
35
|
+
│ │
|
|
36
|
+
└──────────────┬──────────────────┘
|
|
37
|
+
│
|
|
38
|
+
┌─────▼─────┐
|
|
39
|
+
│ episode │ immutable
|
|
40
|
+
└─────┬─────┘
|
|
41
|
+
│
|
|
42
|
+
same topic chain?(NLI)
|
|
43
|
+
yes │ │ no
|
|
44
|
+
│ │
|
|
45
|
+
┌──────────▼─┐ ┌▼────────────┐
|
|
46
|
+
joins chain │ ... ──► e │ │ e │ new chain
|
|
47
|
+
└──────────┬─┘ └─────┬───────┘
|
|
48
|
+
│ │
|
|
49
|
+
┌─────▼───────────▼─────┐
|
|
50
|
+
│ snapshot │
|
|
51
|
+
│ centroid │ summary │
|
|
52
|
+
│ (eager) │ (lazy) │
|
|
53
|
+
└───────────┬───────────┘
|
|
54
|
+
│
|
|
55
|
+
[ retrieve ]
|
|
56
|
+
│
|
|
57
|
+
┌────────────────┴─────────────────┐
|
|
58
|
+
default verbose
|
|
59
|
+
│ │
|
|
60
|
+
snapshot summary with episode chain + labels
|
|
61
|
+
```
|
|
62
|
+
## Setup
|
|
63
|
+
### 1.run [Falkordb](https://github.com/falkordb/falkordb)
|
|
64
|
+
```bash
|
|
65
|
+
docker run -e REDIS_ARGS="--appendonly yes --appendfsync everysec" -v <PATH>:/var/lib/falkordb/data -p 3000:3000 -p 6379:6379 -d --name falkordb falkordb/falkordb
|
|
66
|
+
```
|
|
67
|
+
### 2.embedding model(Matryoshka)
|
|
68
|
+
**note: command below is cpu version**
|
|
69
|
+
```bash
|
|
70
|
+
docker run --name tei-embedding -d -p 8997:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id nomic-ai/nomic-embed-text-v1.5
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
Or embedding cloud api
|
|
74
|
+
|
|
75
|
+
### 3.NLI model(Optional, but recommended, saves some LLM calls)
|
|
76
|
+
**note: command below is cpu version**
|
|
77
|
+
```bash
|
|
78
|
+
docker run --name tei-nli -d -p 8999:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id cross-encoder/nli-deberta-v3-small
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
### Necessary configuration in your .env file
|
|
83
|
+
```python
|
|
84
|
+
ANTHROPIC_API_KEY=
|
|
85
|
+
FALKORDB_HOST=localhost
|
|
86
|
+
FALKORDB_PORT=6379
|
|
87
|
+
BUBBLE_EMBED_DIM=768
|
|
88
|
+
BUBBLE_EMBED_ENDPOINT=http://localhost:8997/v1/embeddings
|
|
89
|
+
|
|
90
|
+
#If you have NLI setup
|
|
91
|
+
BUBBLE_ENABLE_NLI=true
|
|
92
|
+
BUBBLE_NLI_ENDPOINT=http://localhost:8999/predict
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## How to use (extremely easy and clean)
|
|
96
|
+
### ingest
|
|
97
|
+
```python
|
|
98
|
+
import bubble
|
|
99
|
+
bubble.process(user_id, content, prior)
|
|
100
|
+
```
|
|
101
|
+
prior: the context of the content, for example prior messages
|
|
102
|
+
### retrieve
|
|
103
|
+
```python
|
|
104
|
+
import bubble
|
|
105
|
+
memory_user = await bubble.retrieve(user_id, query)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Replayability
|
|
109
|
+
Memory episodes are archived in `<project root>/data/archive` as jsonl\
|
|
110
|
+
You can reconstruct your whole memory graph by a single command ! WITHOUT A SINGLE LLM CALL !
|
|
111
|
+
```python
|
|
112
|
+
python -m bubble.main replay <user_id>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Tuning/Customization
|
|
116
|
+
See [.env.example](.env.example) for ALL tunable arguments.
|
|
117
|
+
|
|
118
|
+
## Limitations
|
|
119
|
+
Bubble is currently an experimental project for personal use.\
|
|
120
|
+
Current `promotion formula`, env vars might not be the best.
|
|
121
|
+
`prompts` might have much room to improve. Patch **bubble.decomposer._SYSTEM** if it doesn't fit your use case.\
|
|
122
|
+
\
|
|
123
|
+
Leave a star if you like this work. Contributions are welcome.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bubble-memory"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Event Sourcing based belief formation system for long-term AI agent memory"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "AutismAccelerator", email = "your.email@example.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["ai", "memory", "agent", "graph", "llm", "vector-store"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"anthropic>=0.86.0",
|
|
28
|
+
"falkordb>=1.6.0",
|
|
29
|
+
"httpx>=0.28.0",
|
|
30
|
+
"numpy>=2.4.4",
|
|
31
|
+
"python-dotenv>=1.2.2",
|
|
32
|
+
"scikit-learn>=1.8.0"
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = ["build", "twine", "ruff"]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Homepage = "https://github.com/AutismAccelerator/bubble"
|
|
40
|
+
Repository = "https://github.com/AutismAccelerator/bubble"
|
|
41
|
+
Issues = "https://github.com/AutismAccelerator/bubble/issues"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["src"]
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
line-length = 230
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
bubble — Hierarchical Memory Consolidation System
|
|
3
|
+
|
|
4
|
+
Typical agent usage
|
|
5
|
+
-------------------
|
|
6
|
+
import bubble
|
|
7
|
+
|
|
8
|
+
# Once, when a user session starts:
|
|
9
|
+
await bubble.init_graph(user_id)
|
|
10
|
+
|
|
11
|
+
# On every user message — retrieve and store in one call (preferred):
|
|
12
|
+
result = await bubble.observe(user_id, message, prior=agent_reply)
|
|
13
|
+
context = result["retrieved"] # SnapshotNode results relevant to this message
|
|
14
|
+
stored = result["stored"] # ingested node descriptors
|
|
15
|
+
|
|
16
|
+
# Or separately:
|
|
17
|
+
await bubble.process(user_id, message, prior=agent_reply)
|
|
18
|
+
context = await bubble.retrieve(user_id, query)
|
|
19
|
+
|
|
20
|
+
# Periodically (runs HDBSCAN + promotion):
|
|
21
|
+
await bubble.consolidate(user_id)
|
|
22
|
+
|
|
23
|
+
# retrieved is a list of dicts:
|
|
24
|
+
# {id, summary, members: [{id, summary, confidence_label}],
|
|
25
|
+
# context: [{rel, id, summary, confidence_label}]}
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
|
|
30
|
+
from .db import get_graph, init_graph
|
|
31
|
+
from .embed import embed as _embed
|
|
32
|
+
from .decomposer import decompose as _decompose
|
|
33
|
+
from .ingest import _route_segments, ingest, replay
|
|
34
|
+
from .promote import promote
|
|
35
|
+
from .retrieve import _retrieve_from_vecs, retrieve
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def observe(user_id: str, message: str, prior: str | None = None, top_k: int = 3, verbose: bool = False) -> dict:
|
|
39
|
+
"""
|
|
40
|
+
Decompose once, retrieve relevant memories, then store — all in a single call.
|
|
41
|
+
|
|
42
|
+
Shares the decompose+embed step between retrieval and ingestion.
|
|
43
|
+
Retrieval runs before storage so newly ingested segments don't appear in results.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
{
|
|
47
|
+
"retrieved": [...], # same format as retrieve()
|
|
48
|
+
"stored": [...], # same format as process()
|
|
49
|
+
}
|
|
50
|
+
"""
|
|
51
|
+
segments = await _decompose(message, prior)
|
|
52
|
+
embeddings = list(await asyncio.gather(*[_embed(s["text"]) for s in segments]))
|
|
53
|
+
|
|
54
|
+
g = get_graph(user_id)
|
|
55
|
+
stored = await _route_segments(user_id, segments, embeddings, prior)
|
|
56
|
+
retrieved = await _retrieve_from_vecs(g, message, embeddings, top_k, verbose)
|
|
57
|
+
return {"retrieved": retrieved, "stored": stored}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def process(user_id: str, message: str, prior: str | None = None) -> list[dict]:
|
|
61
|
+
"""
|
|
62
|
+
Ingest a message into the user's memory graph.
|
|
63
|
+
|
|
64
|
+
Routes each segment to:
|
|
65
|
+
- Episodic Episode (intensity >= 0.6): JSONL + Layer 1 node immediately
|
|
66
|
+
- Layer 0 active pool (everything else): waits for consolidate()
|
|
67
|
+
|
|
68
|
+
prior: optional conversational context the user is responding to.
|
|
69
|
+
Returns the list of created node descriptors.
|
|
70
|
+
"""
|
|
71
|
+
nodes = await ingest(user_id, message, prior)
|
|
72
|
+
await promote(user_id)
|
|
73
|
+
return nodes
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def consolidate(user_id: str) -> dict:
|
|
77
|
+
"""
|
|
78
|
+
Run the full consolidation pipeline on a user's graph:
|
|
79
|
+
1. HDBSCAN on the Layer 0 active pool
|
|
80
|
+
2. Promote clusters crossing the t_promo_score threshold to Episodes
|
|
81
|
+
(includes JSONL archival, SegmentNode deletion, L2 assignment)
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
{"promoted": [...]} # newly created Episode descriptors
|
|
85
|
+
|
|
86
|
+
Call periodically rather than on every message.
|
|
87
|
+
"""
|
|
88
|
+
promoted = await promote(user_id)
|
|
89
|
+
return {"promoted": promoted}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
__all__ = [
|
|
93
|
+
"init_graph",
|
|
94
|
+
"observe",
|
|
95
|
+
"process",
|
|
96
|
+
"consolidate",
|
|
97
|
+
"retrieve",
|
|
98
|
+
"ingest",
|
|
99
|
+
"promote",
|
|
100
|
+
"replay",
|
|
101
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from anthropic import AsyncAnthropic
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
|
|
8
|
+
load_dotenv()
|
|
9
|
+
|
|
10
|
+
MODEL = os.getenv("BUBBLE_MODEL", "claude-sonnet-4-6")
|
|
11
|
+
_client = AsyncAnthropic()
|
|
12
|
+
|
|
13
|
+
_SUMMARIZE_SYSTEM = """\
|
|
14
|
+
You distill one or more user statements into a single memory record.
|
|
15
|
+
|
|
16
|
+
Rules:
|
|
17
|
+
- Capture the belief, preference, event, or tendency the statements express.
|
|
18
|
+
- When multiple statements are given, identify the common pattern they share.
|
|
19
|
+
- Write exactly one sentence with no grammatical subject.
|
|
20
|
+
- Start with a verb or descriptor that names the belief, event, or pattern.
|
|
21
|
+
- Do not explain, qualify, or ask for clarification.\
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _now() -> str:
|
|
26
|
+
return datetime.now(timezone.utc).isoformat()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _normalize(vec: np.ndarray) -> list[float]:
|
|
30
|
+
"""L2-normalize a numpy vector and return as a Python list."""
|
|
31
|
+
norm = np.linalg.norm(vec)
|
|
32
|
+
return (vec / norm if norm > 0 else vec).tolist()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _centroid(nodes: list[dict]) -> list[float]:
|
|
36
|
+
"""Mean of source embeddings, L2-normalized."""
|
|
37
|
+
matrix = np.array([n["embedding"] for n in nodes], dtype=np.float32)
|
|
38
|
+
return _normalize(matrix.mean(axis=0))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def _summarize(nodes: list[dict]) -> str:
|
|
42
|
+
texts = "\n".join(f"- {n['raw_text']}" for n in nodes)
|
|
43
|
+
response = await _client.messages.create(
|
|
44
|
+
model=MODEL,
|
|
45
|
+
max_tokens=128,
|
|
46
|
+
system=_SUMMARIZE_SYSTEM,
|
|
47
|
+
messages=[{"role": "user", "content": texts}],
|
|
48
|
+
)
|
|
49
|
+
return response.content[0].text.strip()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
_ARCHIVE_DIR = os.getenv("BUBBLE_ARCHIVE_DIR", "./data/archive")
|
|
6
|
+
_MKDIR_DONE = False
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _path(user_id: str) -> Path:
|
|
10
|
+
global _MKDIR_DONE
|
|
11
|
+
p = Path(_ARCHIVE_DIR)
|
|
12
|
+
if not _MKDIR_DONE:
|
|
13
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
_MKDIR_DONE = True
|
|
15
|
+
return p / f"{user_id}.jsonl"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def read_segments(user_id: str):
|
|
19
|
+
"""Yield all archived segment records for a user."""
|
|
20
|
+
path = _path(user_id)
|
|
21
|
+
if not path.exists():
|
|
22
|
+
return
|
|
23
|
+
with path.open("r", encoding="utf-8") as f:
|
|
24
|
+
for line in f:
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if line:
|
|
27
|
+
yield json.loads(line)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def write_segment(
|
|
31
|
+
user_id: str,
|
|
32
|
+
*,
|
|
33
|
+
text: str,
|
|
34
|
+
prior: str | None,
|
|
35
|
+
intensity: float,
|
|
36
|
+
valence: str,
|
|
37
|
+
timestamp: str,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Append one segment record to the user's JSONL archive."""
|
|
40
|
+
entry = {
|
|
41
|
+
"text": text,
|
|
42
|
+
"prior": prior,
|
|
43
|
+
"intensity": intensity,
|
|
44
|
+
"valence": valence,
|
|
45
|
+
"timestamp": timestamp,
|
|
46
|
+
}
|
|
47
|
+
with _path(user_id).open("a", encoding="utf-8") as f:
|
|
48
|
+
f.write(json.dumps(entry) + "\n")
|