bubble-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AutismAccelerator
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: bubble-memory
3
+ Version: 0.1.0
4
+ Summary: Event Sourcing based belief formation system for long-term AI agent memory
5
+ Author-email: AutismAccelerator <your.email@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/AutismAccelerator/bubble
8
+ Project-URL: Repository, https://github.com/AutismAccelerator/bubble
9
+ Project-URL: Issues, https://github.com/AutismAccelerator/bubble/issues
10
+ Keywords: ai,memory,agent,graph,llm,vector-store
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: anthropic>=0.86.0
23
+ Requires-Dist: falkordb>=1.6.0
24
+ Requires-Dist: httpx>=0.28.0
25
+ Requires-Dist: numpy>=2.4.4
26
+ Requires-Dist: python-dotenv>=1.2.2
27
+ Requires-Dist: scikit-learn>=1.8.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: build; extra == "dev"
30
+ Requires-Dist: twine; extra == "dev"
31
+ Requires-Dist: ruff; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # Bubble
35
+ **Event Sourcing based belief formation system for long-term AI agent memory**
36
+ **[paper](https://doi.org/10.5281/zenodo.19438945)** **[Discord](https://discord.com/users/1319641673990672477)**
37
+
38
+ ---
39
+
40
+ ## How it works
41
+ ```
42
+ [ raw input ]
43
+
44
+ ┌─────▼─────┐
45
+ │ decompose │
46
+ └─────┬─────┘
47
+
48
+ ┌────────────────┴────────────────┐
49
+ ι ≥ θ ι < θ
50
+ │ │
51
+ vivid signal weak signal
52
+ │ │
53
+ ┌────▼────┐ ┌──────▼──────┐
54
+ │ archive │ │ pool │
55
+ └────┬────┘ │ · · · · · │
56
+ │ │ · · · · · │
57
+ │ │ · · · · · │
58
+ │ └──────┬──────┘
59
+ │ │
60
+ │ enough gathered?
61
+ │ │
62
+ │ no ──────────┘
63
+ │ │ yes
64
+ │ ┌──────▼──────┐
65
+ │ │ cluster │
66
+ │ │ + score │
67
+ │ └──────┬──────┘
68
+ │ │
69
+ └──────────────┬──────────────────┘
70
+
71
+ ┌─────▼─────┐
72
+ │ episode │ immutable
73
+ └─────┬─────┘
74
+
75
+ same topic chain?(NLI)
76
+ yes │ │ no
77
+ │ │
78
+ ┌──────────▼─┐ ┌▼────────────┐
79
+ joins chain │ ... ──► e │ │ e │ new chain
80
+ └──────────┬─┘ └─────┬───────┘
81
+ │ │
82
+ ┌─────▼───────────▼─────┐
83
+ │ snapshot │
84
+ │ centroid │ summary │
85
+ │ (eager) │ (lazy) │
86
+ └───────────┬───────────┘
87
+
88
+ [ retrieve ]
89
+
90
+ ┌────────────────┴─────────────────┐
91
+ default verbose
92
+ │ │
93
+ snapshot summary with episode chain + labels
94
+ ```
95
+ ## Setup
96
+ ### 1.run [Falkordb](https://github.com/falkordb/falkordb)
97
+ ```bash
98
+ docker run -e REDIS_ARGS="--appendonly yes --appendfsync everysec" -v <PATH>:/var/lib/falkordb/data -p 3000:3000 -p 6379:6379 -d --name falkordb falkordb/falkordb
99
+ ```
100
+ ### 2.embedding model(Matryoshka)
101
+ **note: command below is cpu version**
102
+ ```bash
103
+ docker run --name tei-embedding -d -p 8997:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id nomic-ai/nomic-embed-text-v1.5
104
+
105
+ ```
106
+ Or embedding cloud api
107
+
108
+ ### 3.NLI model(Optional, but recommended, saves some LLM calls)
109
+ **note: command below is cpu version**
110
+ ```bash
111
+ docker run --name tei-nli -d -p 8999:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id cross-encoder/nli-deberta-v3-small
112
+ ```
113
+
114
+
115
+ ### Necessary configuration in your .env file
116
+ ```python
117
+ ANTHROPIC_API_KEY=
118
+ FALKORDB_HOST=localhost
119
+ FALKORDB_PORT=6379
120
+ BUBBLE_EMBED_DIM=768
121
+ BUBBLE_EMBED_ENDPOINT=http://localhost:8997/v1/embeddings
122
+
123
+ #If you have NLI setup
124
+ BUBBLE_ENABLE_NLI=true
125
+ BUBBLE_NLI_ENDPOINT=http://localhost:8999/predict
126
+ ```
127
+
128
+ ## How to use (extremely easy and clean)
129
+ ### ingest
130
+ ```python
131
+ import bubble
132
+ bubble.process(user_id, content, prior)
133
+ ```
134
+ prior: the context of the content, for example prior messages
135
+ ### retrieve
136
+ ```python
137
+ import bubble
138
+ memory_user = await bubble.retrieve(user_id, query)
139
+ ```
140
+
141
+ ## Replayability
142
+ Memory episodes are archived in `<project root>/data/archive` as jsonl\
143
+ You can reconstruct your whole memory graph by a single command ! WITHOUT A SINGLE LLM CALL !
144
+ ```python
145
+ python -m bubble.main replay <user_id>
146
+ ```
147
+
148
+ ## Tuning/Customization
149
+ See [.env.example](.env.example) for ALL tunable arguments.
150
+
151
+ ## Limitations
152
+ Bubble is currently an experimental project for personal use.\
153
+ Current `promotion formula`, env vars might not be the best.
154
+ `prompts` might have much room to improve. Patch **bubble.decomposer._SYSTEM** if it doesn't fit your use case.\
155
+ \
156
+ Leave a star if you like this work. Contributions are welcome.
@@ -0,0 +1,123 @@
1
+ # Bubble
2
+ **Event Sourcing based belief formation system for long-term AI agent memory**
3
+ **[paper](https://doi.org/10.5281/zenodo.19438945)** **[Discord](https://discord.com/users/1319641673990672477)**
4
+
5
+ ---
6
+
7
+ ## How it works
8
+ ```
9
+ [ raw input ]
10
+
11
+ ┌─────▼─────┐
12
+ │ decompose │
13
+ └─────┬─────┘
14
+
15
+ ┌────────────────┴────────────────┐
16
+ ι ≥ θ ι < θ
17
+ │ │
18
+ vivid signal weak signal
19
+ │ │
20
+ ┌────▼────┐ ┌──────▼──────┐
21
+ │ archive │ │ pool │
22
+ └────┬────┘ │ · · · · · │
23
+ │ │ · · · · · │
24
+ │ │ · · · · · │
25
+ │ └──────┬──────┘
26
+ │ │
27
+ │ enough gathered?
28
+ │ │
29
+ │ no ──────────┘
30
+ │ │ yes
31
+ │ ┌──────▼──────┐
32
+ │ │ cluster │
33
+ │ │ + score │
34
+ │ └──────┬──────┘
35
+ │ │
36
+ └──────────────┬──────────────────┘
37
+
38
+ ┌─────▼─────┐
39
+ │ episode │ immutable
40
+ └─────┬─────┘
41
+
42
+ same topic chain?(NLI)
43
+ yes │ │ no
44
+ │ │
45
+ ┌──────────▼─┐ ┌▼────────────┐
46
+ joins chain │ ... ──► e │ │ e │ new chain
47
+ └──────────┬─┘ └─────┬───────┘
48
+ │ │
49
+ ┌─────▼───────────▼─────┐
50
+ │ snapshot │
51
+ │ centroid │ summary │
52
+ │ (eager) │ (lazy) │
53
+ └───────────┬───────────┘
54
+
55
+ [ retrieve ]
56
+
57
+ ┌────────────────┴─────────────────┐
58
+ default verbose
59
+ │ │
60
+ snapshot summary with episode chain + labels
61
+ ```
62
+ ## Setup
63
+ ### 1.run [Falkordb](https://github.com/falkordb/falkordb)
64
+ ```bash
65
+ docker run -e REDIS_ARGS="--appendonly yes --appendfsync everysec" -v <PATH>:/var/lib/falkordb/data -p 3000:3000 -p 6379:6379 -d --name falkordb falkordb/falkordb
66
+ ```
67
+ ### 2.embedding model(Matryoshka)
68
+ **note: command below is cpu version**
69
+ ```bash
70
+ docker run --name tei-embedding -d -p 8997:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id nomic-ai/nomic-embed-text-v1.5
71
+
72
+ ```
73
+ Or embedding cloud api
74
+
75
+ ### 3.NLI model(Optional, but recommended, saves some LLM calls)
76
+ **note: command below is cpu version**
77
+ ```bash
78
+ docker run --name tei-nli -d -p 8999:80 -v <PATH>:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id cross-encoder/nli-deberta-v3-small
79
+ ```
80
+
81
+
82
+ ### Necessary configuration in your .env file
83
+ ```python
84
+ ANTHROPIC_API_KEY=
85
+ FALKORDB_HOST=localhost
86
+ FALKORDB_PORT=6379
87
+ BUBBLE_EMBED_DIM=768
88
+ BUBBLE_EMBED_ENDPOINT=http://localhost:8997/v1/embeddings
89
+
90
+ #If you have NLI setup
91
+ BUBBLE_ENABLE_NLI=true
92
+ BUBBLE_NLI_ENDPOINT=http://localhost:8999/predict
93
+ ```
94
+
95
+ ## How to use (extremely easy and clean)
96
+ ### ingest
97
+ ```python
98
+ import bubble
99
+ bubble.process(user_id, content, prior)
100
+ ```
101
+ prior: the context of the content, for example prior messages
102
+ ### retrieve
103
+ ```python
104
+ import bubble
105
+ memory_user = await bubble.retrieve(user_id, query)
106
+ ```
107
+
108
+ ## Replayability
109
+ Memory episodes are archived in `<project root>/data/archive` as jsonl\
110
+ You can reconstruct your whole memory graph by a single command ! WITHOUT A SINGLE LLM CALL !
111
+ ```python
112
+ python -m bubble.main replay <user_id>
113
+ ```
114
+
115
+ ## Tuning/Customization
116
+ See [.env.example](.env.example) for ALL tunable arguments.
117
+
118
+ ## Limitations
119
+ Bubble is currently an experimental project for personal use.\
120
+ Current `promotion formula`, env vars might not be the best.
121
+ `prompts` might have much room to improve. Patch **bubble.decomposer._SYSTEM** if it doesn't fit your use case.\
122
+ \
123
+ Leave a star if you like this work. Contributions are welcome.
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "bubble-memory"
7
+ version = "0.1.0"
8
+ description = "Event Sourcing based belief formation system for long-term AI agent memory"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "AutismAccelerator", email = "your.email@example.com"}
14
+ ]
15
+ keywords = ["ai", "memory", "agent", "graph", "llm", "vector-store"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
25
+ ]
26
+ dependencies = [
27
+ "anthropic>=0.86.0",
28
+ "falkordb>=1.6.0",
29
+ "httpx>=0.28.0",
30
+ "numpy>=2.4.4",
31
+ "python-dotenv>=1.2.2",
32
+ "scikit-learn>=1.8.0"
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = ["build", "twine", "ruff"]
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/AutismAccelerator/bubble"
40
+ Repository = "https://github.com/AutismAccelerator/bubble"
41
+ Issues = "https://github.com/AutismAccelerator/bubble/issues"
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["src"]
45
+
46
+ [tool.ruff]
47
+ line-length = 230
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,101 @@
1
+ """
2
+ bubble — Hierarchical Memory Consolidation System
3
+
4
+ Typical agent usage
5
+ -------------------
6
+ import bubble
7
+
8
+ # Once, when a user session starts:
9
+ await bubble.init_graph(user_id)
10
+
11
+ # On every user message — retrieve and store in one call (preferred):
12
+ result = await bubble.observe(user_id, message, prior=agent_reply)
13
+ context = result["retrieved"] # SnapshotNode results relevant to this message
14
+ stored = result["stored"] # ingested node descriptors
15
+
16
+ # Or separately:
17
+ await bubble.process(user_id, message, prior=agent_reply)
18
+ context = await bubble.retrieve(user_id, query)
19
+
20
+ # Periodically (runs HDBSCAN + promotion):
21
+ await bubble.consolidate(user_id)
22
+
23
+ # retrieved is a list of dicts:
24
+ # {id, summary, members: [{id, summary, confidence_label}],
25
+ # context: [{rel, id, summary, confidence_label}]}
26
+ """
27
+
28
+ import asyncio
29
+
30
+ from .db import get_graph, init_graph
31
+ from .embed import embed as _embed
32
+ from .decomposer import decompose as _decompose
33
+ from .ingest import _route_segments, ingest, replay
34
+ from .promote import promote
35
+ from .retrieve import _retrieve_from_vecs, retrieve
36
+
37
+
38
+ async def observe(user_id: str, message: str, prior: str | None = None, top_k: int = 3, verbose: bool = False) -> dict:
39
+ """
40
+ Decompose once, retrieve relevant memories, then store — all in a single call.
41
+
42
+ Shares the decompose+embed step between retrieval and ingestion.
43
+ Retrieval runs before storage so newly ingested segments don't appear in results.
44
+
45
+ Returns:
46
+ {
47
+ "retrieved": [...], # same format as retrieve()
48
+ "stored": [...], # same format as process()
49
+ }
50
+ """
51
+ segments = await _decompose(message, prior)
52
+ embeddings = list(await asyncio.gather(*[_embed(s["text"]) for s in segments]))
53
+
54
+ g = get_graph(user_id)
55
+ stored = await _route_segments(user_id, segments, embeddings, prior)
56
+ retrieved = await _retrieve_from_vecs(g, message, embeddings, top_k, verbose)
57
+ return {"retrieved": retrieved, "stored": stored}
58
+
59
+
60
+ async def process(user_id: str, message: str, prior: str | None = None) -> list[dict]:
61
+ """
62
+ Ingest a message into the user's memory graph.
63
+
64
+ Routes each segment to:
65
+ - Episodic Episode (intensity >= 0.6): JSONL + Layer 1 node immediately
66
+ - Layer 0 active pool (everything else): waits for consolidate()
67
+
68
+ prior: optional conversational context the user is responding to.
69
+ Returns the list of created node descriptors.
70
+ """
71
+ nodes = await ingest(user_id, message, prior)
72
+ await promote(user_id)
73
+ return nodes
74
+
75
+
76
+ async def consolidate(user_id: str) -> dict:
77
+ """
78
+ Run the full consolidation pipeline on a user's graph:
79
+ 1. HDBSCAN on the Layer 0 active pool
80
+ 2. Promote clusters crossing the t_promo_score threshold to Episodes
81
+ (includes JSONL archival, SegmentNode deletion, L2 assignment)
82
+
83
+ Returns:
84
+ {"promoted": [...]} # newly created Episode descriptors
85
+
86
+ Call periodically rather than on every message.
87
+ """
88
+ promoted = await promote(user_id)
89
+ return {"promoted": promoted}
90
+
91
+
92
+ __all__ = [
93
+ "init_graph",
94
+ "observe",
95
+ "process",
96
+ "consolidate",
97
+ "retrieve",
98
+ "ingest",
99
+ "promote",
100
+ "replay",
101
+ ]
@@ -0,0 +1,49 @@
1
+ import os
2
+ from datetime import datetime, timezone
3
+
4
+ import numpy as np
5
+ from anthropic import AsyncAnthropic
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ MODEL = os.getenv("BUBBLE_MODEL", "claude-sonnet-4-6")
11
+ _client = AsyncAnthropic()
12
+
13
+ _SUMMARIZE_SYSTEM = """\
14
+ You distill one or more user statements into a single memory record.
15
+
16
+ Rules:
17
+ - Capture the belief, preference, event, or tendency the statements express.
18
+ - When multiple statements are given, identify the common pattern they share.
19
+ - Write exactly one sentence with no grammatical subject.
20
+ - Start with a verb or descriptor that names the belief, event, or pattern.
21
+ - Do not explain, qualify, or ask for clarification.\
22
+ """
23
+
24
+
25
+ def _now() -> str:
26
+ return datetime.now(timezone.utc).isoformat()
27
+
28
+
29
+ def _normalize(vec: np.ndarray) -> list[float]:
30
+ """L2-normalize a numpy vector and return as a Python list."""
31
+ norm = np.linalg.norm(vec)
32
+ return (vec / norm if norm > 0 else vec).tolist()
33
+
34
+
35
+ def _centroid(nodes: list[dict]) -> list[float]:
36
+ """Mean of source embeddings, L2-normalized."""
37
+ matrix = np.array([n["embedding"] for n in nodes], dtype=np.float32)
38
+ return _normalize(matrix.mean(axis=0))
39
+
40
+
41
+ async def _summarize(nodes: list[dict]) -> str:
42
+ texts = "\n".join(f"- {n['raw_text']}" for n in nodes)
43
+ response = await _client.messages.create(
44
+ model=MODEL,
45
+ max_tokens=128,
46
+ system=_SUMMARIZE_SYSTEM,
47
+ messages=[{"role": "user", "content": texts}],
48
+ )
49
+ return response.content[0].text.strip()
@@ -0,0 +1,48 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ _ARCHIVE_DIR = os.getenv("BUBBLE_ARCHIVE_DIR", "./data/archive")
6
+ _MKDIR_DONE = False
7
+
8
+
9
+ def _path(user_id: str) -> Path:
10
+ global _MKDIR_DONE
11
+ p = Path(_ARCHIVE_DIR)
12
+ if not _MKDIR_DONE:
13
+ p.mkdir(parents=True, exist_ok=True)
14
+ _MKDIR_DONE = True
15
+ return p / f"{user_id}.jsonl"
16
+
17
+
18
+ def read_segments(user_id: str):
19
+ """Yield all archived segment records for a user."""
20
+ path = _path(user_id)
21
+ if not path.exists():
22
+ return
23
+ with path.open("r", encoding="utf-8") as f:
24
+ for line in f:
25
+ line = line.strip()
26
+ if line:
27
+ yield json.loads(line)
28
+
29
+
30
+ def write_segment(
31
+ user_id: str,
32
+ *,
33
+ text: str,
34
+ prior: str | None,
35
+ intensity: float,
36
+ valence: str,
37
+ timestamp: str,
38
+ ) -> None:
39
+ """Append one segment record to the user's JSONL archive."""
40
+ entry = {
41
+ "text": text,
42
+ "prior": prior,
43
+ "intensity": intensity,
44
+ "valence": valence,
45
+ "timestamp": timestamp,
46
+ }
47
+ with _path(user_id).open("a", encoding="utf-8") as f:
48
+ f.write(json.dumps(entry) + "\n")