hga 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hga-0.1.0/LICENSE +21 -0
- hga-0.1.0/MANIFEST.in +3 -0
- hga-0.1.0/PKG-INFO +152 -0
- hga-0.1.0/README.md +105 -0
- hga-0.1.0/hga/__init__.py +8 -0
- hga-0.1.0/hga/consolidation.py +328 -0
- hga-0.1.0/hga/embeddings/__init__.py +3 -0
- hga-0.1.0/hga/embeddings/base.py +22 -0
- hga-0.1.0/hga/embeddings/local.py +56 -0
- hga-0.1.0/hga/forgetting.py +440 -0
- hga-0.1.0/hga/gate/__init__.py +3 -0
- hga-0.1.0/hga/gate/governance.py +261 -0
- hga-0.1.0/hga/l1/__init__.py +4 -0
- hga-0.1.0/hga/l1/codebook.py +336 -0
- hga-0.1.0/hga/l1/drift.py +146 -0
- hga-0.1.0/hga/l1/filtering.py +15 -0
- hga-0.1.0/hga/l1/retrieval.py +90 -0
- hga-0.1.0/hga/l2/__init__.py +3 -0
- hga-0.1.0/hga/l2/vault.py +146 -0
- hga-0.1.0/hga/l3/__init__.py +8 -0
- hga-0.1.0/hga/l3/matcher.py +308 -0
- hga-0.1.0/hga/l3/neuron.py +422 -0
- hga-0.1.0/hga/l3/replay.py +618 -0
- hga-0.1.0/hga/memory.py +810 -0
- hga-0.1.0/hga/storage/__init__.py +3 -0
- hga-0.1.0/hga/storage/base.py +75 -0
- hga-0.1.0/hga/storage/sqlite_backend.py +201 -0
- hga-0.1.0/hga.egg-info/PKG-INFO +152 -0
- hga-0.1.0/hga.egg-info/SOURCES.txt +32 -0
- hga-0.1.0/hga.egg-info/dependency_links.txt +1 -0
- hga-0.1.0/hga.egg-info/requires.txt +24 -0
- hga-0.1.0/hga.egg-info/top_level.txt +1 -0
- hga-0.1.0/pyproject.toml +72 -0
- hga-0.1.0/setup.cfg +4 -0
hga-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 HGA Authors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
hga-0.1.0/MANIFEST.in
ADDED
hga-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hga
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Hybrid Governance Architecture — Multi-layer agent memory system with vector quantization, deterministic vault, and semantic neuron routing
|
|
5
|
+
Author-email: Ahmet <ahmet@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ahmet/hga
|
|
8
|
+
Project-URL: Repository, https://github.com/ahmet/hga
|
|
9
|
+
Project-URL: Issues, https://github.com/ahmet/hga/issues
|
|
10
|
+
Project-URL: Documentation, https://github.com/ahmet/hga#readme
|
|
11
|
+
Keywords: llm,agent,memory,vector-quantization,semantic-routing,governance,retrieval
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: numpy>=1.24
|
|
27
|
+
Requires-Dist: scikit-learn>=1.3
|
|
28
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
29
|
+
Provides-Extra: openai
|
|
30
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
31
|
+
Provides-Extra: groq
|
|
32
|
+
Requires-Dist: groq>=0.4; extra == "groq"
|
|
33
|
+
Requires-Dist: langchain-groq; extra == "groq"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: openai>=1.0; extra == "all"
|
|
36
|
+
Requires-Dist: groq>=0.4; extra == "all"
|
|
37
|
+
Requires-Dist: langchain-groq; extra == "all"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
40
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
41
|
+
Requires-Dist: seaborn; extra == "dev"
|
|
42
|
+
Requires-Dist: pandas; extra == "dev"
|
|
43
|
+
Requires-Dist: tqdm; extra == "dev"
|
|
44
|
+
Requires-Dist: build; extra == "dev"
|
|
45
|
+
Requires-Dist: twine; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# HGA — Hybrid Governance Architecture
|
|
49
|
+
|
|
50
|
+
[](https://pypi.org/project/hga-memory/)
|
|
51
|
+
[](https://www.python.org/downloads/)
|
|
52
|
+
[](https://opensource.org/licenses/MIT)
|
|
53
|
+
|
|
54
|
+
A multi-layer agent memory system that provides intelligent query routing, exact recall, and semantic neuron maturation for LLM-based agents.
|
|
55
|
+
|
|
56
|
+
## Features
|
|
57
|
+
|
|
58
|
+
- **L1 — RM (Routing Memory: A Vector-Quantization-Based Retrieval Primitive):** Vector quantization with K-means centroids, multi-probe retrieval, online EMA adaptation, and drift detection
|
|
59
|
+
- **L2 — Deterministic Vault:** Exact key-value recall with SHA-256 integrity verification, policy tagging (Public/Internal/Sensitive/Restricted), and full audit trails
|
|
60
|
+
- **L3 — Semantic Neuron Layer:** 4-stage neuron maturation (Stage 0→3), causal reasoning chains, structural similarity transfer, and safe Stage 3 replay
|
|
61
|
+
- **Governance Gate:** Intelligent routing across 5 execution paths based on confidence, margin, neuron maturity, and sensitivity
|
|
62
|
+
- **Consolidation:** Active trace writing + passive LLM-free capability growth (co-occurrence mining, edge finalization, RM reshaping)
|
|
63
|
+
- **Real Embeddings:** Uses `all-MiniLM-L6-v2` (384-dimensional) — no mock or synthetic embeddings
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install hga-memory
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
With LLM provider support:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Groq
|
|
75
|
+
pip install hga-memory[groq]
|
|
76
|
+
|
|
77
|
+
# OpenAI
|
|
78
|
+
pip install hga-memory[openai]
|
|
79
|
+
|
|
80
|
+
# Everything
|
|
81
|
+
pip install hga-memory[all]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from hga import AgentMemory
|
|
88
|
+
|
|
89
|
+
# Initialize memory system
|
|
90
|
+
memory = AgentMemory()
|
|
91
|
+
|
|
92
|
+
# Store information
|
|
93
|
+
memory.store("Project deadline is March 15, 2026", policy_tag="Internal")
|
|
94
|
+
memory.store("API key format: sk-xxxx", policy_tag="Sensitive")
|
|
95
|
+
|
|
96
|
+
# Query with automatic routing
|
|
97
|
+
result = memory.recall("When is the project deadline?")
|
|
98
|
+
print(result.answer)
|
|
99
|
+
print(f"Path: {result.path}, Tokens: {result.tokens_used}")
|
|
100
|
+
|
|
101
|
+
# The gate automatically routes:
|
|
102
|
+
# - Exact facts → Deterministic Vault (0 tokens)
|
|
103
|
+
# - Semantic queries → RM retrieval
|
|
104
|
+
# - Mature patterns → Stage 3 replay (0 tokens)
|
|
105
|
+
# - Sensitive queries → Deterministic path (safe)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Architecture
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
Query → Governance Gate → Route Decision
|
|
112
|
+
│
|
|
113
|
+
├── Stage0Path → Full LLM call (new pattern)
|
|
114
|
+
├── FastSemantic → L1 retrieval + LLM
|
|
115
|
+
├── VerifyPath → L1 + L3 verify + LLM
|
|
116
|
+
├── Stage3Path → Causal replay (no LLM)
|
|
117
|
+
└── Deterministic → L2 exact lookup (no LLM)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Gate Decision Logic
|
|
121
|
+
|
|
122
|
+
| Condition | Path |
|
|
123
|
+
|---|---|
|
|
124
|
+
| Sensitivity=High OR edge weight < -1 | DeterministicPath |
|
|
125
|
+
| Stage 3 + confidence >= 0.6 | Stage3Path |
|
|
126
|
+
| Stage 2 + confidence >= 0.6 + margin >= 0.1 | VerifyPath |
|
|
127
|
+
| Stage >= 1 + confidence >= 0.6 | FastSemanticPath |
|
|
128
|
+
| No matching neuron | Stage0Path |
|
|
129
|
+
|
|
130
|
+
## Neuron Maturation
|
|
131
|
+
|
|
132
|
+
Neurons progress through 4 stages based on successful executions:
|
|
133
|
+
|
|
134
|
+
- **Stage 0→1:** weight > 0, 3+ successful hits
|
|
135
|
+
- **Stage 1→2:** weight > +2, 8+ hits
|
|
136
|
+
- **Stage 2→3:** weight > +2.5, 5 consecutive clean executions
|
|
137
|
+
|
|
138
|
+
Edge weights update: `w += source_weight × outcome` (clipped to [-3, +3])
|
|
139
|
+
|
|
140
|
+
## Configuration
|
|
141
|
+
|
|
142
|
+
| Parameter | Default | Description |
|
|
143
|
+
|---|---|---|
|
|
144
|
+
| `K` | 64 | Number of RM centroids |
|
|
145
|
+
| `ALPHA` | 0.6 | Confidence threshold |
|
|
146
|
+
| `DELTA_MIN` | 0.1 | Margin threshold |
|
|
147
|
+
| `eta` | 0.01 | EMA learning rate |
|
|
148
|
+
| `embedding_dim` | 384 | Embedding dimensionality |
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT
|
hga-0.1.0/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# HGA — Hybrid Governance Architecture
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/hga-memory/)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
A multi-layer agent memory system that provides intelligent query routing, exact recall, and semantic neuron maturation for LLM-based agents.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **L1 — RM (Routing Memory: A Vector-Quantization-Based Retrieval Primitive):** Vector quantization with K-means centroids, multi-probe retrieval, online EMA adaptation, and drift detection
|
|
12
|
+
- **L2 — Deterministic Vault:** Exact key-value recall with SHA-256 integrity verification, policy tagging (Public/Internal/Sensitive/Restricted), and full audit trails
|
|
13
|
+
- **L3 — Semantic Neuron Layer:** 4-stage neuron maturation (Stage 0→3), causal reasoning chains, structural similarity transfer, and safe Stage 3 replay
|
|
14
|
+
- **Governance Gate:** Intelligent routing across 5 execution paths based on confidence, margin, neuron maturity, and sensitivity
|
|
15
|
+
- **Consolidation:** Active trace writing + passive LLM-free capability growth (co-occurrence mining, edge finalization, RM reshaping)
|
|
16
|
+
- **Real Embeddings:** Uses `all-MiniLM-L6-v2` (384-dimensional) — no mock or synthetic embeddings
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install hga-memory
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
With LLM provider support:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Groq
|
|
28
|
+
pip install hga-memory[groq]
|
|
29
|
+
|
|
30
|
+
# OpenAI
|
|
31
|
+
pip install hga-memory[openai]
|
|
32
|
+
|
|
33
|
+
# Everything
|
|
34
|
+
pip install hga-memory[all]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from hga import AgentMemory
|
|
41
|
+
|
|
42
|
+
# Initialize memory system
|
|
43
|
+
memory = AgentMemory()
|
|
44
|
+
|
|
45
|
+
# Store information
|
|
46
|
+
memory.store("Project deadline is March 15, 2026", policy_tag="Internal")
|
|
47
|
+
memory.store("API key format: sk-xxxx", policy_tag="Sensitive")
|
|
48
|
+
|
|
49
|
+
# Query with automatic routing
|
|
50
|
+
result = memory.recall("When is the project deadline?")
|
|
51
|
+
print(result.answer)
|
|
52
|
+
print(f"Path: {result.path}, Tokens: {result.tokens_used}")
|
|
53
|
+
|
|
54
|
+
# The gate automatically routes:
|
|
55
|
+
# - Exact facts → Deterministic Vault (0 tokens)
|
|
56
|
+
# - Semantic queries → RM retrieval
|
|
57
|
+
# - Mature patterns → Stage 3 replay (0 tokens)
|
|
58
|
+
# - Sensitive queries → Deterministic path (safe)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Architecture
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
Query → Governance Gate → Route Decision
|
|
65
|
+
│
|
|
66
|
+
├── Stage0Path → Full LLM call (new pattern)
|
|
67
|
+
├── FastSemantic → L1 retrieval + LLM
|
|
68
|
+
├── VerifyPath → L1 + L3 verify + LLM
|
|
69
|
+
├── Stage3Path → Causal replay (no LLM)
|
|
70
|
+
└── Deterministic → L2 exact lookup (no LLM)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Gate Decision Logic
|
|
74
|
+
|
|
75
|
+
| Condition | Path |
|
|
76
|
+
|---|---|
|
|
77
|
+
| Sensitivity=High OR edge weight < -1 | DeterministicPath |
|
|
78
|
+
| Stage 3 + confidence >= 0.6 | Stage3Path |
|
|
79
|
+
| Stage 2 + confidence >= 0.6 + margin >= 0.1 | VerifyPath |
|
|
80
|
+
| Stage >= 1 + confidence >= 0.6 | FastSemanticPath |
|
|
81
|
+
| No matching neuron | Stage0Path |
|
|
82
|
+
|
|
83
|
+
## Neuron Maturation
|
|
84
|
+
|
|
85
|
+
Neurons progress through 4 stages based on successful executions:
|
|
86
|
+
|
|
87
|
+
- **Stage 0→1:** weight > 0, 3+ successful hits
|
|
88
|
+
- **Stage 1→2:** weight > +2, 8+ hits
|
|
89
|
+
- **Stage 2→3:** weight > +2.5, 5 consecutive clean executions
|
|
90
|
+
|
|
91
|
+
Edge weights update: `w += source_weight × outcome` (clipped to [-3, +3])
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
| Parameter | Default | Description |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| `K` | 64 | Number of RM centroids |
|
|
98
|
+
| `ALPHA` | 0.6 | Confidence threshold |
|
|
99
|
+
| `DELTA_MIN` | 0.1 | Margin threshold |
|
|
100
|
+
| `eta` | 0.01 | EMA learning rate |
|
|
101
|
+
| `embedding_dim` | 384 | Embedding dimensionality |
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
MIT
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""HGA — Hybrid Governance Architecture for Agent Memory."""
|
|
2
|
+
|
|
3
|
+
from .memory import AgentMemory, RecallResult
|
|
4
|
+
from .gate.governance import ExecutionPath, GateDecision
|
|
5
|
+
from .l3.replay import ReplayResult
|
|
6
|
+
|
|
7
|
+
__all__ = ["AgentMemory", "RecallResult", "ExecutionPath", "GateDecision", "ReplayResult"]
|
|
8
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Passive Consolidation — LLM-free structural learning between sessions.
|
|
2
|
+
|
|
3
|
+
Runs when no active LLM call is in progress. Operations:
|
|
4
|
+
1. Edge finalization: confirm/decay provisional neuron weights
|
|
5
|
+
2. DV compaction: deduplicate by hash, clean dangling pointers
|
|
6
|
+
3. Drift alarm reset: finalize session statistics
|
|
7
|
+
4. Co-occurrence mining: centroid pairs probed together → L3 edges
|
|
8
|
+
5. RM reshaping: stable L3 routes trigger codebook splits
|
|
9
|
+
6. Salience-guided forgetting: lifecycle management + neuron pruning/demotion
|
|
10
|
+
|
|
11
|
+
All operations are strictly LLM-free.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
from collections import Counter, defaultdict
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Dict, List, Optional, Tuple, TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from .l1.codebook import Codebook
|
|
22
|
+
from .l1.drift import DriftMonitor
|
|
23
|
+
from .l2.vault import DeterministicVault
|
|
24
|
+
from .l3.matcher import NeuronMatcher
|
|
25
|
+
from .storage.sqlite_backend import SQLiteBackend
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("hga.consolidation")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ─── Co-occurrence Edge ────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class CooccurrenceEdge:
|
|
34
|
+
"""Edge between two neurons that are frequently co-probed."""
|
|
35
|
+
neuron_a_id: str
|
|
36
|
+
neuron_b_id: str
|
|
37
|
+
count: int = 0
|
|
38
|
+
weight: float = 0.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ─── Passive Consolidation Result ──────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ConsolidationResult:
|
|
45
|
+
"""Summary of a passive consolidation cycle."""
|
|
46
|
+
edges_finalized: int = 0
|
|
47
|
+
edges_decayed: int = 0
|
|
48
|
+
edges_removed: int = 0
|
|
49
|
+
provisional_confirmed: int = 0
|
|
50
|
+
provisional_removed: int = 0
|
|
51
|
+
dv_before: int = 0
|
|
52
|
+
dv_after: int = 0
|
|
53
|
+
dv_removed: int = 0
|
|
54
|
+
drift_reset: bool = False
|
|
55
|
+
cooccurrence_edges_created: int = 0
|
|
56
|
+
reshaping_splits: int = 0
|
|
57
|
+
forgetting_kept: int = 0
|
|
58
|
+
forgetting_summarized: int = 0
|
|
59
|
+
forgetting_migrated: int = 0
|
|
60
|
+
forgetting_deleted: int = 0
|
|
61
|
+
neurons_pruned: int = 0
|
|
62
|
+
neurons_demoted: int = 0
|
|
63
|
+
privacy_protected: int = 0
|
|
64
|
+
duration_ms: float = 0.0
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class PassiveConsolidation:
|
|
68
|
+
"""Runs LLM-free consolidation operations on the HGA memory system.
|
|
69
|
+
|
|
70
|
+
Must be called explicitly (e.g., between sessions or during idle time).
|
|
71
|
+
Guarantees zero LLM calls — all operations are structural/statistical.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
# Co-occurrence threshold: minimum co-probes to create an edge
|
|
75
|
+
COOCCURRENCE_THRESHOLD = 3
|
|
76
|
+
|
|
77
|
+
# Edge decay factor for old provisional neurons
|
|
78
|
+
EDGE_DECAY_FACTOR = 0.8
|
|
79
|
+
EDGE_REMOVE_THRESHOLD = 0.1
|
|
80
|
+
|
|
81
|
+
# Minimum neuron age (hits) before edge finalization applies
|
|
82
|
+
EDGE_MIN_AGE = 5
|
|
83
|
+
|
|
84
|
+
# Reshaping: minimum bucket size to trigger split consideration
|
|
85
|
+
RESHAPE_BUCKET_SIZE_THRESHOLD = 10
|
|
86
|
+
|
|
87
|
+
def __init__(self, forgetting_enabled: bool = False):
|
|
88
|
+
self._probe_history: List[Tuple[str, float]] = [] # (neuron_id, timestamp)
|
|
89
|
+
self._cooccurrence_edges: Dict[Tuple[str, str], CooccurrenceEdge] = {}
|
|
90
|
+
self._forgetting_enabled = forgetting_enabled
|
|
91
|
+
self._forgetting: Optional["MemoryForgetting"] = None
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def cooccurrence_edges(self) -> List[CooccurrenceEdge]:
|
|
95
|
+
return list(self._cooccurrence_edges.values())
|
|
96
|
+
|
|
97
|
+
def record_probe(self, neuron_id: str) -> None:
|
|
98
|
+
"""Record that a neuron was probed during active phase."""
|
|
99
|
+
self._probe_history.append((neuron_id, time.time()))
|
|
100
|
+
|
|
101
|
+
def run_passive_cycle(
|
|
102
|
+
self,
|
|
103
|
+
matcher: "NeuronMatcher",
|
|
104
|
+
vault: "DeterministicVault",
|
|
105
|
+
codebook: "Codebook",
|
|
106
|
+
drift: "DriftMonitor",
|
|
107
|
+
storage: "SQLiteBackend" = None,
|
|
108
|
+
) -> ConsolidationResult:
|
|
109
|
+
"""Run a full passive consolidation cycle.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
matcher: L3 neuron matcher
|
|
113
|
+
vault: L2 deterministic vault
|
|
114
|
+
codebook: L1 codebook
|
|
115
|
+
drift: Drift monitor
|
|
116
|
+
storage: SQLite backend (needed for forgetting, optional)
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
ConsolidationResult with operation counts.
|
|
120
|
+
"""
|
|
121
|
+
start = time.time()
|
|
122
|
+
result = ConsolidationResult()
|
|
123
|
+
|
|
124
|
+
# 1. Edge finalization (always run, cheap)
|
|
125
|
+
self._finalize_edges(matcher, result)
|
|
126
|
+
|
|
127
|
+
# 2. DV compaction (always run, cheap)
|
|
128
|
+
self._compact_dv(vault, result)
|
|
129
|
+
|
|
130
|
+
# 3. Drift alarm reset (always run, cheap)
|
|
131
|
+
self._reset_drift(drift, result)
|
|
132
|
+
|
|
133
|
+
# 4. Co-occurrence mining (needs probe history)
|
|
134
|
+
if self._probe_history:
|
|
135
|
+
self._mine_cooccurrence(matcher, result)
|
|
136
|
+
|
|
137
|
+
# 5. RM reshaping (needs mature neurons)
|
|
138
|
+
has_mature = any(n.stage >= 2 for n in matcher.get_all_neurons())
|
|
139
|
+
if has_mature:
|
|
140
|
+
self._reshape_codebook(matcher, codebook, result)
|
|
141
|
+
|
|
142
|
+
# 6. Salience-guided forgetting (needs storage)
|
|
143
|
+
if storage is not None and self._forgetting_enabled:
|
|
144
|
+
self._run_forgetting(storage, vault, matcher, result)
|
|
145
|
+
|
|
146
|
+
result.duration_ms = (time.time() - start) * 1000
|
|
147
|
+
logger.info(f"Passive cycle complete: {result.duration_ms:.1f}ms, "
|
|
148
|
+
f"edges_finalized={result.edges_finalized}, "
|
|
149
|
+
f"cooccurrence={result.cooccurrence_edges_created}, "
|
|
150
|
+
f"splits={result.reshaping_splits}, "
|
|
151
|
+
f"forgetting_deleted={result.forgetting_deleted}")
|
|
152
|
+
return result
|
|
153
|
+
|
|
154
|
+
# ─── Operation 1: Edge Finalization ──────────────────────────────────
|
|
155
|
+
|
|
156
|
+
def _finalize_edges(self, matcher: "NeuronMatcher",
|
|
157
|
+
result: ConsolidationResult) -> None:
|
|
158
|
+
"""Confirm or decay neuron edge weights."""
|
|
159
|
+
neurons_to_remove = []
|
|
160
|
+
|
|
161
|
+
for neuron in matcher.get_all_neurons():
|
|
162
|
+
# Handle provisional neurons
|
|
163
|
+
if neuron.provisional:
|
|
164
|
+
if neuron.hit_count >= self.EDGE_MIN_AGE:
|
|
165
|
+
if neuron.success_rate >= 0.8:
|
|
166
|
+
neuron.provisional = False
|
|
167
|
+
result.provisional_confirmed += 1
|
|
168
|
+
else:
|
|
169
|
+
neurons_to_remove.append(neuron.id)
|
|
170
|
+
result.provisional_removed += 1
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# For confirmed neurons: decay low-performing weights
|
|
174
|
+
if neuron.hit_count >= self.EDGE_MIN_AGE:
|
|
175
|
+
if neuron.success_rate >= 0.8:
|
|
176
|
+
result.edges_finalized += 1
|
|
177
|
+
elif neuron.edge_weight > 0 and neuron.success_rate < 0.5:
|
|
178
|
+
neuron.edge_weight *= self.EDGE_DECAY_FACTOR
|
|
179
|
+
result.edges_decayed += 1
|
|
180
|
+
if abs(neuron.edge_weight) < self.EDGE_REMOVE_THRESHOLD:
|
|
181
|
+
result.edges_removed += 1
|
|
182
|
+
|
|
183
|
+
# Remove failed provisionals
|
|
184
|
+
for nid in neurons_to_remove:
|
|
185
|
+
matcher.remove_neuron(nid)
|
|
186
|
+
|
|
187
|
+
# ─── Operation 2: DV Compaction ──────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
def _compact_dv(self, vault: "DeterministicVault",
|
|
190
|
+
result: ConsolidationResult) -> None:
|
|
191
|
+
"""Deduplicate DV records by hash."""
|
|
192
|
+
result.dv_before = vault.count
|
|
193
|
+
|
|
194
|
+
seen_hashes = {}
|
|
195
|
+
duplicates = []
|
|
196
|
+
|
|
197
|
+
for key, record in list(vault._local.items()):
|
|
198
|
+
if record.hash in seen_hashes:
|
|
199
|
+
duplicates.append(key)
|
|
200
|
+
else:
|
|
201
|
+
seen_hashes[record.hash] = key
|
|
202
|
+
|
|
203
|
+
for key in duplicates:
|
|
204
|
+
del vault._local[key]
|
|
205
|
+
|
|
206
|
+
result.dv_after = vault.count
|
|
207
|
+
result.dv_removed = len(duplicates)
|
|
208
|
+
|
|
209
|
+
# ─── Operation 3: Drift Reset ───────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
def _reset_drift(self, drift: "DriftMonitor",
|
|
212
|
+
result: ConsolidationResult) -> None:
|
|
213
|
+
"""Reset drift baseline using recent data."""
|
|
214
|
+
if drift.in_drift or drift.episode_count > 0:
|
|
215
|
+
drift.reset_baseline()
|
|
216
|
+
result.drift_reset = True
|
|
217
|
+
|
|
218
|
+
# ─── Operation 4: Co-occurrence Mining ──────────────────────────────
|
|
219
|
+
|
|
220
|
+
def _mine_cooccurrence(self, matcher: "NeuronMatcher",
|
|
221
|
+
result: ConsolidationResult) -> None:
|
|
222
|
+
"""Analyze probe history for co-occurring neuron pairs.
|
|
223
|
+
|
|
224
|
+
Two neurons probed within a short time window → co-occurrence.
|
|
225
|
+
Threshold co-occurrences → create an edge.
|
|
226
|
+
"""
|
|
227
|
+
WINDOW_SECONDS = 60.0 # probes within 60s are considered co-occurring
|
|
228
|
+
|
|
229
|
+
# Count co-occurrences
|
|
230
|
+
pair_counts: Counter = Counter()
|
|
231
|
+
history = self._probe_history
|
|
232
|
+
|
|
233
|
+
for i in range(len(history)):
|
|
234
|
+
nid_i, ts_i = history[i]
|
|
235
|
+
for j in range(i + 1, len(history)):
|
|
236
|
+
nid_j, ts_j = history[j]
|
|
237
|
+
if ts_j - ts_i > WINDOW_SECONDS:
|
|
238
|
+
break
|
|
239
|
+
if nid_i != nid_j:
|
|
240
|
+
pair = tuple(sorted([nid_i, nid_j]))
|
|
241
|
+
pair_counts[pair] += 1
|
|
242
|
+
|
|
243
|
+
# Create edges for frequent pairs
|
|
244
|
+
for pair, count in pair_counts.items():
|
|
245
|
+
if count >= self.COOCCURRENCE_THRESHOLD:
|
|
246
|
+
if pair not in self._cooccurrence_edges:
|
|
247
|
+
# Verify both neurons still exist
|
|
248
|
+
n_a = matcher.get_neuron(pair[0])
|
|
249
|
+
n_b = matcher.get_neuron(pair[1])
|
|
250
|
+
if n_a and n_b:
|
|
251
|
+
self._cooccurrence_edges[pair] = CooccurrenceEdge(
|
|
252
|
+
neuron_a_id=pair[0],
|
|
253
|
+
neuron_b_id=pair[1],
|
|
254
|
+
count=count,
|
|
255
|
+
weight=float(count),
|
|
256
|
+
)
|
|
257
|
+
result.cooccurrence_edges_created += 1
|
|
258
|
+
else:
|
|
259
|
+
# Update existing edge
|
|
260
|
+
self._cooccurrence_edges[pair].count += count
|
|
261
|
+
self._cooccurrence_edges[pair].weight = float(
|
|
262
|
+
self._cooccurrence_edges[pair].count
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Clear probe history after mining
|
|
266
|
+
self._probe_history.clear()
|
|
267
|
+
|
|
268
|
+
# ─── Operation 5: RM Reshaping ─────────────────────────────────────
|
|
269
|
+
|
|
270
|
+
def _reshape_codebook(self, matcher: "NeuronMatcher",
|
|
271
|
+
codebook: "Codebook",
|
|
272
|
+
result: ConsolidationResult) -> None:
|
|
273
|
+
"""L3→L1 feedback: split hot centroids used by mature neurons."""
|
|
274
|
+
if not codebook.fitted:
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
# Find centroids frequently used by mature neurons
|
|
278
|
+
hot_centroids: Counter = Counter()
|
|
279
|
+
for neuron in matcher.get_all_neurons():
|
|
280
|
+
if neuron.stage < 2:
|
|
281
|
+
continue
|
|
282
|
+
# Encode neuron centroid to find its primary codebook centroid
|
|
283
|
+
cid, _ = codebook.encode(neuron.pattern_embedding)
|
|
284
|
+
hot_centroids[cid] += 1
|
|
285
|
+
|
|
286
|
+
# Split large hot buckets
|
|
287
|
+
for cid, neuron_count in hot_centroids.most_common():
|
|
288
|
+
if cid >= codebook.K:
|
|
289
|
+
continue
|
|
290
|
+
bucket = codebook.buckets.get(cid)
|
|
291
|
+
if not bucket:
|
|
292
|
+
continue
|
|
293
|
+
bucket_size = len(bucket.item_ids)
|
|
294
|
+
if bucket_size >= self.RESHAPE_BUCKET_SIZE_THRESHOLD:
|
|
295
|
+
if codebook.maybe_split(cid):
|
|
296
|
+
result.reshaping_splits += 1
|
|
297
|
+
|
|
298
|
+
# ─── Operation 6: Salience-Guided Forgetting ─────────────────────
|
|
299
|
+
|
|
300
|
+
@property
|
|
301
|
+
def forgetting_enabled(self) -> bool:
|
|
302
|
+
return self._forgetting_enabled
|
|
303
|
+
|
|
304
|
+
@forgetting_enabled.setter
|
|
305
|
+
def forgetting_enabled(self, value: bool) -> None:
|
|
306
|
+
self._forgetting_enabled = value
|
|
307
|
+
|
|
308
|
+
def _run_forgetting(
|
|
309
|
+
self,
|
|
310
|
+
storage: "SQLiteBackend",
|
|
311
|
+
vault: "DeterministicVault",
|
|
312
|
+
matcher: "NeuronMatcher",
|
|
313
|
+
result: ConsolidationResult,
|
|
314
|
+
) -> None:
|
|
315
|
+
"""Run salience-guided forgetting as part of passive cycle."""
|
|
316
|
+
from .forgetting import MemoryForgetting
|
|
317
|
+
|
|
318
|
+
if self._forgetting is None:
|
|
319
|
+
self._forgetting = MemoryForgetting()
|
|
320
|
+
|
|
321
|
+
fr = self._forgetting.run_forgetting_cycle(storage, vault, matcher)
|
|
322
|
+
result.forgetting_kept = fr.items_kept
|
|
323
|
+
result.forgetting_summarized = fr.items_summarized
|
|
324
|
+
result.forgetting_migrated = fr.items_migrated
|
|
325
|
+
result.forgetting_deleted = fr.items_deleted
|
|
326
|
+
result.neurons_pruned = fr.neurons_pruned
|
|
327
|
+
result.neurons_demoted = fr.neurons_demoted
|
|
328
|
+
result.privacy_protected = fr.privacy_protected
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Abstract embedding backend interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import List
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EmbeddingBackend(ABC):
|
|
9
|
+
"""Abstract base for embedding providers."""
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def dim(self) -> int:
|
|
14
|
+
"""Embedding dimensionality."""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def encode(self, text: str) -> np.ndarray:
|
|
18
|
+
"""Encode a single text to a normalized vector."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def encode_batch(self, texts: List[str]) -> np.ndarray:
|
|
22
|
+
"""Encode a batch of texts. Returns (N, dim) matrix."""
|