3tears-agent-memory 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. 3tears_agent_memory-0.14.0/.gitignore +216 -0
  2. 3tears_agent_memory-0.14.0/LICENSE +21 -0
  3. 3tears_agent_memory-0.14.0/PKG-INFO +231 -0
  4. 3tears_agent_memory-0.14.0/README.md +206 -0
  5. 3tears_agent_memory-0.14.0/pyproject.toml +43 -0
  6. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/__init__.py +229 -0
  7. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/access.py +205 -0
  8. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/authorize.py +505 -0
  9. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/collections.py +3197 -0
  10. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/embedding_utils.py +138 -0
  11. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/entities.py +1134 -0
  12. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/events.py +229 -0
  13. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/extraction.py +758 -0
  14. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/integration.py +361 -0
  15. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/merge.py +192 -0
  16. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/middleware.py +297 -0
  17. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/__init__.py +222 -0
  18. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v001_create_memories_table.py +73 -0
  19. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v002_create_conversation_memory_refs.py +50 -0
  20. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v003_memory_column_reconciliation.py +110 -0
  21. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v004_memory_lifecycle_columns.py +75 -0
  22. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v005_memory_fts.py +78 -0
  23. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v006_memory_media_content.py +123 -0
  24. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v007_memory_chunks.py +96 -0
  25. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v008_restore_memories_agent_customer_not_null.py +73 -0
  26. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v009_media_composite_fk.py +132 -0
  27. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v010_media_content_composite_fk.py +94 -0
  28. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v011_memory_chunks_composite_fk.py +93 -0
  29. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v012_memories_media_composite_fk.py +133 -0
  30. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v013_datetime_to_datetimetz.py +256 -0
  31. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v014_memory_refs_date_columns.py +131 -0
  32. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v015_unified_memory_columns.py +123 -0
  33. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v016_backfill_memory_ids.py +276 -0
  34. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v017_memory_fk_flip.py +168 -0
  35. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v018_drop_legacy_memory_columns.py +171 -0
  36. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v019_conversation_id_not_null.py +82 -0
  37. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v020_memories_alias.py +64 -0
  38. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v021_memory_chunks_index_and_token.py +304 -0
  39. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v022_add_hnsw_gin_indexes.py +241 -0
  40. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/migrations/v023_fix_idx_chunks_message_id_start.py +79 -0
  41. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/prompts.py +112 -0
  42. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/py.typed +0 -0
  43. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/retrieval.py +633 -0
  44. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/tools.py +1584 -0
  45. 3tears_agent_memory-0.14.0/src/threetears/agent/memory/types.py +75 -0
  46. 3tears_agent_memory-0.14.0/tests/conftest.py +509 -0
  47. 3tears_agent_memory-0.14.0/tests/enforcement/__init__.py +0 -0
  48. 3tears_agent_memory-0.14.0/tests/enforcement/test_codebase_conventions.py +137 -0
  49. 3tears_agent_memory-0.14.0/tests/enforcement/test_docstring_conventions.py +166 -0
  50. 3tears_agent_memory-0.14.0/tests/enforcement/test_no_metallm_imports.py +51 -0
  51. 3tears_agent_memory-0.14.0/tests/enforcement/test_package_boundaries.py +66 -0
  52. 3tears_agent_memory-0.14.0/tests/integration/__init__.py +0 -0
  53. 3tears_agent_memory-0.14.0/tests/integration/conftest.py +139 -0
  54. 3tears_agent_memory-0.14.0/tests/integration/test_chunk_collection_methods.py +614 -0
  55. 3tears_agent_memory-0.14.0/tests/integration/test_memories_cross_agent_retrieval.py +675 -0
  56. 3tears_agent_memory-0.14.0/tests/integration/test_memory_l1_cache.py +585 -0
  57. 3tears_agent_memory-0.14.0/tests/integration/test_memory_pipeline.py +723 -0
  58. 3tears_agent_memory-0.14.0/tests/integration/test_memory_refs_collection.py +505 -0
  59. 3tears_agent_memory-0.14.0/tests/integration/test_migration_chain.py +600 -0
  60. 3tears_agent_memory-0.14.0/tests/test_authorize.py +543 -0
  61. 3tears_agent_memory-0.14.0/tests/test_extraction.py +899 -0
  62. 3tears_agent_memory-0.14.0/tests/test_lazy_init.py +81 -0
  63. 3tears_agent_memory-0.14.0/tests/test_memory_add_tool.py +337 -0
  64. 3tears_agent_memory-0.14.0/tests/test_memory_collections.py +578 -0
  65. 3tears_agent_memory-0.14.0/tests/test_memory_entities.py +203 -0
  66. 3tears_agent_memory-0.14.0/tests/test_memory_injection_middleware.py +350 -0
  67. 3tears_agent_memory-0.14.0/tests/test_merge.py +109 -0
  68. 3tears_agent_memory-0.14.0/tests/test_retrieval.py +737 -0
  69. 3tears_agent_memory-0.14.0/tests/test_smoke.py +4 -0
  70. 3tears_agent_memory-0.14.0/tests/test_tools.py +193 -0
  71. 3tears_agent_memory-0.14.0/tests/unit/test_chunk_collection_auth.py +131 -0
  72. 3tears_agent_memory-0.14.0/tests/unit/test_events.py +280 -0
  73. 3tears_agent_memory-0.14.0/tests/unit/test_memory_chunks_backlinks.py +231 -0
  74. 3tears_agent_memory-0.14.0/tests/unit/test_memory_scoping.py +311 -0
  75. 3tears_agent_memory-0.14.0/tests/unit/test_to_sqlalchemy_table_parity.py +483 -0
@@ -0,0 +1,216 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # Claude Code local state
210
+ .claude/
211
+
212
+ # prawduct session evidence (local governance artifacts, never shipped)
213
+ .prawduct/
214
+
215
+ # macOS folder metadata
216
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark Pace
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: 3tears-agent-memory
3
+ Version: 0.14.0
4
+ Summary: Memory system for LLM agents -- extraction, retrieval, hybrid search, and MMR reranking
5
+ Project-URL: Repository, https://github.com/pacepace/3tears
6
+ Author: pace
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Framework :: AsyncIO
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Classifier: Typing :: Typed
17
+ Requires-Python: >=3.14
18
+ Requires-Dist: 3tears
19
+ Requires-Dist: 3tears-agent-acl
20
+ Requires-Dist: 3tears-langgraph
21
+ Requires-Dist: 3tears-observe
22
+ Requires-Dist: langchain-core
23
+ Requires-Dist: pgvector
24
+ Description-Content-Type: text/markdown
25
+
26
+ # 3tears Agent Memory
27
+
28
+ Memory system for LLM agents. Handles extraction of memorable facts from conversations, hybrid retrieval (semantic + full-text + recency), and memory lifecycle management.
29
+
30
+ Part of the [3tears](https://github.com/pacepace/3tears) framework.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install 3tears-agent-memory
36
+ ```
37
+
38
+ ## Components
39
+
40
+ ### Collections are the single entry point for memory-table SQL
41
+
42
+ Every memory-table write, single-row read, batch read, and hybrid-search query goes through one of four `BaseCollection` subclasses; no consumer of this package holds an `asyncpg.Pool` reference directly.
43
+
44
+ - `MemoriesCollection` -- `memories` table. CRUD through `get` / `save_entity` / `delete`; complex queries through `hybrid_search`, `search_by_ids`, `search_by_semantic`, `search_by_fts`, `find_similar_for_dedup`, `count_by_user`, `fetch_content_for_recall`.
45
+ - `MediaCollection` -- `media` parent table. CRUD only.
46
+ - `MediaContentCollection` -- `media_content` child table. CRUD + `hybrid_search`, `search_by_ids`, `search_by_semantic`, `search_by_fts`, `fetch_content_for_recall`.
47
+ - `MemoryChunkCollection` -- `memory_chunks` child table. CRUD + `hybrid_search`, `search_by_ids`, `search_by_semantic`, `fetch_content_for_recall`.
48
+
49
+ All four resolve their L3 pool through `CollectionRegistry` (same pattern `ConversationCollection` uses); an L1 `SQLiteBackend` attached to the registry populates on `save_entity` and serves subsequent by-id `get` calls without an L3 round-trip. The `media` / `media_content` / `memory_chunks` tables are introduced by migrations v006 / v007.
50
+
51
+ Hybrid-search methods carry documented `# cache-bypass: <reason>` inline comments because the query shape (vector distance, FTS rank, multi-table joins) is not primary-key-addressable and the L1 row cache cannot serve the lookup. Keeping the SQL on the Collection preserves the single entry point. The cache-primitive enforcement walker recognises in-Collection bypass sites as legitimate and reports any bypass that leaks back into `retrieval.py` / `extraction.py` / `tools.py` as a violation.
52
+
53
+ ### MemoryExtractor
54
+
55
+ Extracts memorable facts from conversation turns. Uses a multi-stage pipeline: candidate extraction via LLM, deduplication against existing memories via embedding similarity, and action resolution (ADD / UPDATE / DELETE).
56
+
57
+ ```python
58
+ from threetears.agent.memory import (
59
+ MemoriesCollection,
60
+ MemoryConfig,
61
+ MemoryExtractor,
62
+ )
63
+
64
+ extractor = MemoryExtractor(
65
+ config=MemoryConfig(),
66
+ embedding_provider=my_embedding_provider,
67
+ chat_model_factory=my_chat_model_factory,
68
+ authorizer=authorizer_bundle,
69
+ memories_collection=memories_collection,
70
+ summary_callback=on_new_memory,
71
+ )
72
+
73
+ await extractor.extract(
74
+ user_id=user_id,
75
+ conversation_id=conv_id,
76
+ message_id_source=msg_id,
77
+ user_message="I just moved to Portland",
78
+ assistant_response="That's exciting! Portland has great food...",
79
+ turn_count=5,
80
+ agent_id=agent_id,
81
+ customer_id=customer_id,
82
+ )
83
+ ```
84
+
85
+ ### MemoryRetriever
86
+
87
+ Retrieves relevant memories using hybrid search: pgvector semantic similarity, PostgreSQL full-text search, recency decay, and MMR reranking for diversity. Takes the three search-bearing Collections at construction; no pool.
88
+
89
+ ```python
90
+ from threetears.agent.memory import MemoryRetriever, MemoryConfig
91
+
92
+ retriever = MemoryRetriever(
93
+ config=MemoryConfig(),
94
+ embedding_provider=my_embedding_provider,
95
+ authorizer=authorizer_bundle,
96
+ memories_collection=memories_collection,
97
+ media_content_collection=media_content_collection,
98
+ memory_chunk_collection=memory_chunk_collection,
99
+ )
100
+
101
+ result = await retriever.retrieve_with_candidates(
102
+ user_id,
103
+ "Tell me about Portland",
104
+ agent_id=agent_id,
105
+ customer_id=customer_id,
106
+ caller_user_id=user_id,
107
+ caller_agent_id=agent_id,
108
+ )
109
+
110
+ # result.context -- formatted string for injection into system prompt
111
+ # result.memories -- raw memory dicts with similarity scores
112
+ # result.media_content -- matched media content
113
+ # result.memory_chunks -- matched document chunks
114
+ ```
115
+
116
+ ### Protocols
117
+
118
+ Implement these to integrate with your infrastructure:
119
+
120
+ ```python
121
+ from threetears.agent.memory import EmbeddingProvider, ChatModelFactory
122
+
123
+ class MyEmbeddingProvider(EmbeddingProvider):
124
+ async def embed(self, text: str) -> tuple[list[float], int, UUID]:
125
+ # Returns (embedding_vector, token_count, model_id)
126
+ ...
127
+
128
+ class MyChatModelFactory(ChatModelFactory):
129
+ async def create_chat_model(self, purpose: str = "extraction"):
130
+ # Returns a langchain BaseChatModel
131
+ ...
132
+ ```
133
+
134
+ ### Tools
135
+
136
+ LangChain tools for agent use: memory search, recall, and explicit add. Factories take Collection references; no pool:
137
+
138
+ ```python
139
+ from threetears.agent.memory import (
140
+ load_memory_add_tool,
141
+ load_memory_search_tool,
142
+ load_memory_recall_tool,
143
+ )
144
+
145
+ search_tool = await load_memory_search_tool(
146
+ user_id=user_id,
147
+ embedding_provider=embedding_provider,
148
+ agent_id=agent_id,
149
+ customer_id=customer_id,
150
+ authorizer=authorizer_bundle,
151
+ memories_collection=memories_collection,
152
+ media_content_collection=media_content_collection,
153
+ memory_chunk_collection=memory_chunk_collection,
154
+ )
155
+ recall_tool = await load_memory_recall_tool(
156
+ user_id=user_id,
157
+ agent_id=agent_id,
158
+ customer_id=customer_id,
159
+ authorizer=authorizer_bundle,
160
+ memories_collection=memories_collection,
161
+ media_content_collection=media_content_collection,
162
+ memory_chunk_collection=memory_chunk_collection,
163
+ )
164
+ add_tool = await load_memory_add_tool(
165
+ user_id=user_id,
166
+ conversation_id=conv_id,
167
+ message_id=msg_id,
168
+ embedding_provider=embedding_provider,
169
+ agent_id=agent_id,
170
+ customer_id=customer_id,
171
+ authorizer=authorizer_bundle,
172
+ memories_collection=memories_collection,
173
+ )
174
+ ```
175
+
176
+ ### Configuration
177
+
178
+ ```python
179
+ from threetears.agent.memory import MemoryConfig
180
+
181
+ config = MemoryConfig(
182
+ similarity_threshold=0.4, # minimum cosine similarity for retrieval
183
+ detail_threshold=0.85, # threshold for including full memory detail
184
+ context_budget=15, # max memories in context
185
+ dedup_threshold=0.85, # similarity threshold for deduplication
186
+ max_candidates=10, # max candidates per extraction
187
+ )
188
+ ```
189
+
190
+ ## Database Schema
191
+
192
+ Requires PostgreSQL with the `pgvector` extension. The package's own migration runner (`threetears.agent.memory.migrations.register`) produces the full schema per agent schema. Registered versions:
193
+
194
+ - **v001** -- `memories` (PK `memory_id`, pgvector `embedding`, scoping ids, content, summary, lifecycle timestamps).
195
+ - **v002** -- `conversation_memory_refs` (ledger of per-conversation surfaced items).
196
+ - **v003** -- column reconciliation: renames PK and discriminator to match the package code (`id` to `memory_id`, `memory_type` to `type_memory`), drops columns the code does not read (`embedding_model`, `importance`, `metadata`, `date_accessed`), loosens `agent_id`/`customer_id` to NULL.
197
+ - **v004** -- lifecycle + conversation-link columns on `memories` (`conversation_id`, `message_id_source`, `is_deleted`, `media_id`, `date_deleted`, `summary`) with indexes.
198
+ - **v005** -- FTS: `search_vector TSVECTOR` + GIN index + maintenance trigger on `memories`.
199
+ - **v006** -- `media` (parent) + `media_content` (chunked extracted text with embedding + FTS).
200
+ - **v007** -- `memory_chunks` (document-style chunks with heading / page metadata + embedding + FTS).
201
+
202
+ Every FTS column is trigger-maintained from `content` + `summary` (weighted A/B); callers do not have to populate `search_vector` manually. Integration tests under `tests/integration/` exercise the full chain + every public API surface against `pgvector/pgvector:pg16` via testcontainers.
203
+
204
+ ## RBAC Enforcement
205
+
206
+ Memory reads, writes, and extractions flow through the unified rbac evaluator in `threetears.agent.acl`. Every (agent, customer) pair is a `memory`-type namespace in the `namespaces` table; each access resolves the namespace and evaluates one of three canonical actions against the caller's `(user_id, agent_id)` pair:
207
+
208
+ - `memory.read` -- retrieval / search / recall. Guarded on `MemoryRetriever.retrieve*`, `MemoriesCollection.find_by_user`, `MemoriesCollection.find_by_scope`, the `memory_search` + `memory_recall` LangChain tools.
209
+ - `memory.write` -- user-initiated writes. Guarded on `MemoriesCollection.save_memory` and the `memory_add` LangChain tool.
210
+ - `memory.extract` -- agent-internal extraction path. Guarded on `MemoryExtractor.extract`; the owner short-circuit keeps the common case (agent emitting memories on its own namespace) grant-free.
211
+
212
+ Owner short-circuit: the evaluator allows any action when the calling agent owns the memory namespace. Agent-internal retrieval and extraction therefore work without explicit grants; user-initiated reads and writes require evaluator assignments.
213
+
214
+ Auto-assignment on first user-write: `memory_add` ensures a `MemoryOwner` assignment for the calling user on their first write (idempotent-by-state; the ensurer only fires when the user has zero memory rows in the target schema). Subsequent writes authorize against the materialized grant; admin-revoked grants stay revoked (the ensurer does not resurrect them).
215
+
216
+ Wiring shape: every consumer of the memory surface REQUIRES a `MemoryAuthorizerDependencies` bundle exposing:
217
+
218
+ - `acl_cache` -- shared `threetears.agent.acl.AclCache` instance;
219
+ - `membership_loader` + `grant_loader` -- the evaluator's loaders (`threetears.agent.acl.MembershipLoader` / `GrantLoader`);
220
+ - `namespace_collection` -- three-tier `NamespaceCollection` used to resolve the memory namespace via `get_by_owner_and_customer(namespace_type="memory", owner_agent_id, customer_id)` (create-if-absent flows through `save_entity`);
221
+ - `group_collection` + `group_member_collection` + `role_collection` + `role_assignment_collection` -- the rbac Collections the first-write owner-assignment path uses via `ensure_memory_owner_assignment(...)`.
222
+
223
+ There is no bypass. Every `MemoriesCollection`, `MemoryRetriever`, `MemoryExtractor`, and LangChain tool factory (`load_memory_search_tool`, `load_memory_add_tool`, `load_memory_recall_tool`) takes the bundle as a required constructor/factory argument; every code path that touches a memory row runs `authorize_memory_access` first. Callers that omit the bundle fail at the type checker and the Python signature boundary.
224
+
225
+ - Production wiring builds the bundle directly from the agent-side three-tier stack's Collections (`NatsProxyL3Backend`-backed `NamespaceCollection` / `GroupCollection` / ...).
226
+ - Test wiring injects a permissive fixture `permissive_memory_authorizer` (see `tests/conftest.py`) that carries in-memory Collection stand-ins and a permissive evaluator. Fixture usage is explicit in every test file that constructs a memory surface.
227
+ - Back-office / admin tooling that genuinely needs to read or write memories without an identity must construct its own bundle with Collections bound directly to an asyncpg pool; there is no global escape hatch.
228
+
229
+ See `threetears.agent.memory.authorize` for the full public surface.
230
+
231
+ The three platform roles (`MemoryOwner` / `MemoryReader` / `MemoryWriter`) carry the canonical action vocabulary. Platform-side migrations seed these roles and backfill the rbac rows required for evaluator resolution.
@@ -0,0 +1,206 @@
1
+ # 3tears Agent Memory
2
+
3
+ Memory system for LLM agents. Handles extraction of memorable facts from conversations, hybrid retrieval (semantic + full-text + recency), and memory lifecycle management.
4
+
5
+ Part of the [3tears](https://github.com/pacepace/3tears) framework.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install 3tears-agent-memory
11
+ ```
12
+
13
+ ## Components
14
+
15
+ ### Collections are the single entry point for memory-table SQL
16
+
17
+ Every memory-table write, single-row read, batch read, and hybrid-search query goes through one of four `BaseCollection` subclasses; no consumer of this package holds an `asyncpg.Pool` reference directly.
18
+
19
+ - `MemoriesCollection` -- `memories` table. CRUD through `get` / `save_entity` / `delete`; complex queries through `hybrid_search`, `search_by_ids`, `search_by_semantic`, `search_by_fts`, `find_similar_for_dedup`, `count_by_user`, `fetch_content_for_recall`.
20
+ - `MediaCollection` -- `media` parent table. CRUD only.
21
+ - `MediaContentCollection` -- `media_content` child table. CRUD + `hybrid_search`, `search_by_ids`, `search_by_semantic`, `search_by_fts`, `fetch_content_for_recall`.
22
+ - `MemoryChunkCollection` -- `memory_chunks` child table. CRUD + `hybrid_search`, `search_by_ids`, `search_by_semantic`, `fetch_content_for_recall`.
23
+
24
+ All four resolve their L3 pool through `CollectionRegistry` (same pattern `ConversationCollection` uses); an L1 `SQLiteBackend` attached to the registry populates on `save_entity` and serves subsequent by-id `get` calls without an L3 round-trip. The `media` / `media_content` / `memory_chunks` tables are introduced by migrations v006 / v007.
25
+
26
+ Hybrid-search methods carry documented `# cache-bypass: <reason>` inline comments because the query shape (vector distance, FTS rank, multi-table joins) is not primary-key-addressable and the L1 row cache cannot serve the lookup. Keeping the SQL on the Collection preserves the single entry point. The cache-primitive enforcement walker recognises in-Collection bypass sites as legitimate and reports any bypass that leaks back into `retrieval.py` / `extraction.py` / `tools.py` as a violation.
27
+
28
+ ### MemoryExtractor
29
+
30
+ Extracts memorable facts from conversation turns. Uses a multi-stage pipeline: candidate extraction via LLM, deduplication against existing memories via embedding similarity, and action resolution (ADD / UPDATE / DELETE).
31
+
32
+ ```python
33
+ from threetears.agent.memory import (
34
+ MemoriesCollection,
35
+ MemoryConfig,
36
+ MemoryExtractor,
37
+ )
38
+
39
+ extractor = MemoryExtractor(
40
+ config=MemoryConfig(),
41
+ embedding_provider=my_embedding_provider,
42
+ chat_model_factory=my_chat_model_factory,
43
+ authorizer=authorizer_bundle,
44
+ memories_collection=memories_collection,
45
+ summary_callback=on_new_memory,
46
+ )
47
+
48
+ await extractor.extract(
49
+ user_id=user_id,
50
+ conversation_id=conv_id,
51
+ message_id_source=msg_id,
52
+ user_message="I just moved to Portland",
53
+ assistant_response="That's exciting! Portland has great food...",
54
+ turn_count=5,
55
+ agent_id=agent_id,
56
+ customer_id=customer_id,
57
+ )
58
+ ```
59
+
60
+ ### MemoryRetriever
61
+
62
+ Retrieves relevant memories using hybrid search: pgvector semantic similarity, PostgreSQL full-text search, recency decay, and MMR reranking for diversity. Takes the three search-bearing Collections at construction; no pool.
63
+
64
+ ```python
65
+ from threetears.agent.memory import MemoryRetriever, MemoryConfig
66
+
67
+ retriever = MemoryRetriever(
68
+ config=MemoryConfig(),
69
+ embedding_provider=my_embedding_provider,
70
+ authorizer=authorizer_bundle,
71
+ memories_collection=memories_collection,
72
+ media_content_collection=media_content_collection,
73
+ memory_chunk_collection=memory_chunk_collection,
74
+ )
75
+
76
+ result = await retriever.retrieve_with_candidates(
77
+ user_id,
78
+ "Tell me about Portland",
79
+ agent_id=agent_id,
80
+ customer_id=customer_id,
81
+ caller_user_id=user_id,
82
+ caller_agent_id=agent_id,
83
+ )
84
+
85
+ # result.context -- formatted string for injection into system prompt
86
+ # result.memories -- raw memory dicts with similarity scores
87
+ # result.media_content -- matched media content
88
+ # result.memory_chunks -- matched document chunks
89
+ ```
90
+
91
+ ### Protocols
92
+
93
+ Implement these to integrate with your infrastructure:
94
+
95
+ ```python
96
+ from threetears.agent.memory import EmbeddingProvider, ChatModelFactory
97
+
98
+ class MyEmbeddingProvider(EmbeddingProvider):
99
+ async def embed(self, text: str) -> tuple[list[float], int, UUID]:
100
+ # Returns (embedding_vector, token_count, model_id)
101
+ ...
102
+
103
+ class MyChatModelFactory(ChatModelFactory):
104
+ async def create_chat_model(self, purpose: str = "extraction"):
105
+ # Returns a langchain BaseChatModel
106
+ ...
107
+ ```
108
+
109
+ ### Tools
110
+
111
+ LangChain tools for agent use: memory search, recall, and explicit add. Factories take Collection references; no pool:
112
+
113
+ ```python
114
+ from threetears.agent.memory import (
115
+ load_memory_add_tool,
116
+ load_memory_search_tool,
117
+ load_memory_recall_tool,
118
+ )
119
+
120
+ search_tool = await load_memory_search_tool(
121
+ user_id=user_id,
122
+ embedding_provider=embedding_provider,
123
+ agent_id=agent_id,
124
+ customer_id=customer_id,
125
+ authorizer=authorizer_bundle,
126
+ memories_collection=memories_collection,
127
+ media_content_collection=media_content_collection,
128
+ memory_chunk_collection=memory_chunk_collection,
129
+ )
130
+ recall_tool = await load_memory_recall_tool(
131
+ user_id=user_id,
132
+ agent_id=agent_id,
133
+ customer_id=customer_id,
134
+ authorizer=authorizer_bundle,
135
+ memories_collection=memories_collection,
136
+ media_content_collection=media_content_collection,
137
+ memory_chunk_collection=memory_chunk_collection,
138
+ )
139
+ add_tool = await load_memory_add_tool(
140
+ user_id=user_id,
141
+ conversation_id=conv_id,
142
+ message_id=msg_id,
143
+ embedding_provider=embedding_provider,
144
+ agent_id=agent_id,
145
+ customer_id=customer_id,
146
+ authorizer=authorizer_bundle,
147
+ memories_collection=memories_collection,
148
+ )
149
+ ```
150
+
151
+ ### Configuration
152
+
153
+ ```python
154
+ from threetears.agent.memory import MemoryConfig
155
+
156
+ config = MemoryConfig(
157
+ similarity_threshold=0.4, # minimum cosine similarity for retrieval
158
+ detail_threshold=0.85, # threshold for including full memory detail
159
+ context_budget=15, # max memories in context
160
+ dedup_threshold=0.85, # similarity threshold for deduplication
161
+ max_candidates=10, # max candidates per extraction
162
+ )
163
+ ```
164
+
165
+ ## Database Schema
166
+
167
+ Requires PostgreSQL with the `pgvector` extension. The package's own migration runner (`threetears.agent.memory.migrations.register`) produces the full schema per agent schema. Registered versions:
168
+
169
+ - **v001** -- `memories` (PK `memory_id`, pgvector `embedding`, scoping ids, content, summary, lifecycle timestamps).
170
+ - **v002** -- `conversation_memory_refs` (ledger of per-conversation surfaced items).
171
+ - **v003** -- column reconciliation: renames PK and discriminator to match the package code (`id` to `memory_id`, `memory_type` to `type_memory`), drops columns the code does not read (`embedding_model`, `importance`, `metadata`, `date_accessed`), loosens `agent_id`/`customer_id` to NULL.
172
+ - **v004** -- lifecycle + conversation-link columns on `memories` (`conversation_id`, `message_id_source`, `is_deleted`, `media_id`, `date_deleted`, `summary`) with indexes.
173
+ - **v005** -- FTS: `search_vector TSVECTOR` + GIN index + maintenance trigger on `memories`.
174
+ - **v006** -- `media` (parent) + `media_content` (chunked extracted text with embedding + FTS).
175
+ - **v007** -- `memory_chunks` (document-style chunks with heading / page metadata + embedding + FTS).
176
+
177
+ Every FTS column is trigger-maintained from `content` + `summary` (weighted A/B); callers do not have to populate `search_vector` manually. Integration tests under `tests/integration/` exercise the full chain + every public API surface against `pgvector/pgvector:pg16` via testcontainers.
178
+
179
+ ## RBAC Enforcement
180
+
181
+ Memory reads, writes, and extractions flow through the unified rbac evaluator in `threetears.agent.acl`. Every (agent, customer) pair is a `memory`-type namespace in the `namespaces` table; each access resolves the namespace and evaluates one of three canonical actions against the caller's `(user_id, agent_id)` pair:
182
+
183
+ - `memory.read` -- retrieval / search / recall. Guarded on `MemoryRetriever.retrieve*`, `MemoriesCollection.find_by_user`, `MemoriesCollection.find_by_scope`, the `memory_search` + `memory_recall` LangChain tools.
184
+ - `memory.write` -- user-initiated writes. Guarded on `MemoriesCollection.save_memory` and the `memory_add` LangChain tool.
185
+ - `memory.extract` -- agent-internal extraction path. Guarded on `MemoryExtractor.extract`; the owner short-circuit keeps the common case (agent emitting memories on its own namespace) grant-free.
186
+
187
+ Owner short-circuit: the evaluator allows any action when the calling agent owns the memory namespace. Agent-internal retrieval and extraction therefore work without explicit grants; user-initiated reads and writes require evaluator assignments.
188
+
189
+ Auto-assignment on first user-write: `memory_add` ensures a `MemoryOwner` assignment for the calling user on their first write (idempotent-by-state; the ensurer only fires when the user has zero memory rows in the target schema). Subsequent writes authorize against the materialized grant; admin-revoked grants stay revoked (the ensurer does not resurrect them).
190
+
191
+ Wiring shape: every consumer of the memory surface REQUIRES a `MemoryAuthorizerDependencies` bundle exposing:
192
+
193
+ - `acl_cache` -- shared `threetears.agent.acl.AclCache` instance;
194
+ - `membership_loader` + `grant_loader` -- the evaluator's loaders (`threetears.agent.acl.MembershipLoader` / `GrantLoader`);
195
+ - `namespace_collection` -- three-tier `NamespaceCollection` used to resolve the memory namespace via `get_by_owner_and_customer(namespace_type="memory", owner_agent_id, customer_id)` (create-if-absent flows through `save_entity`);
196
+ - `group_collection` + `group_member_collection` + `role_collection` + `role_assignment_collection` -- the rbac Collections the first-write owner-assignment path uses via `ensure_memory_owner_assignment(...)`.
197
+
198
+ There is no bypass. Every `MemoriesCollection`, `MemoryRetriever`, `MemoryExtractor`, and LangChain tool factory (`load_memory_search_tool`, `load_memory_add_tool`, `load_memory_recall_tool`) takes the bundle as a required constructor/factory argument; every code path that touches a memory row runs `authorize_memory_access` first. Callers that omit the bundle fail at the type checker and the Python signature boundary.
199
+
200
+ - Production wiring builds the bundle directly from the agent-side three-tier stack's Collections (`NatsProxyL3Backend`-backed `NamespaceCollection` / `GroupCollection` / ...).
201
+ - Test wiring injects a permissive fixture `permissive_memory_authorizer` (see `tests/conftest.py`) that carries in-memory Collection stand-ins and a permissive evaluator. Fixture usage is explicit in every test file that constructs a memory surface.
202
+ - Back-office / admin tooling that genuinely needs to read or write memories without an identity must construct its own bundle with Collections bound directly to an asyncpg pool; there is no global escape hatch.
203
+
204
+ See `threetears.agent.memory.authorize` for the full public surface.
205
+
206
+ The three platform roles (`MemoryOwner` / `MemoryReader` / `MemoryWriter`) carry the canonical action vocabulary. Platform-side migrations seed these roles and backfill the rbac rows required for evaluator resolution.