hga 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hga-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 HGA Authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
hga-0.1.0/MANIFEST.in ADDED
@@ -0,0 +1,3 @@
1
+ include LICENSE
2
+ include README.md
3
+ recursive-include hga *.py
hga-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: hga
3
+ Version: 0.1.0
4
+ Summary: Hybrid Governance Architecture — Multi-layer agent memory system with vector quantization, deterministic vault, and semantic neuron routing
5
+ Author-email: Ahmet <ahmet@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ahmet/hga
8
+ Project-URL: Repository, https://github.com/ahmet/hga
9
+ Project-URL: Issues, https://github.com/ahmet/hga/issues
10
+ Project-URL: Documentation, https://github.com/ahmet/hga#readme
11
+ Keywords: llm,agent,memory,vector-quantization,semantic-routing,governance,retrieval
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: numpy>=1.24
27
+ Requires-Dist: scikit-learn>=1.3
28
+ Requires-Dist: sentence-transformers>=2.2
29
+ Provides-Extra: openai
30
+ Requires-Dist: openai>=1.0; extra == "openai"
31
+ Provides-Extra: groq
32
+ Requires-Dist: groq>=0.4; extra == "groq"
33
+ Requires-Dist: langchain-groq; extra == "groq"
34
+ Provides-Extra: all
35
+ Requires-Dist: openai>=1.0; extra == "all"
36
+ Requires-Dist: groq>=0.4; extra == "all"
37
+ Requires-Dist: langchain-groq; extra == "all"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=7.0; extra == "dev"
40
+ Requires-Dist: matplotlib; extra == "dev"
41
+ Requires-Dist: seaborn; extra == "dev"
42
+ Requires-Dist: pandas; extra == "dev"
43
+ Requires-Dist: tqdm; extra == "dev"
44
+ Requires-Dist: build; extra == "dev"
45
+ Requires-Dist: twine; extra == "dev"
46
+ Dynamic: license-file
47
+
48
+ # HGA — Hybrid Governance Architecture
49
+
50
+ [![PyPI version](https://badge.fury.io/py/hga-memory.svg)](https://pypi.org/project/hga-memory/)
51
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
52
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
53
+
54
+ A multi-layer agent memory system that provides intelligent query routing, exact recall, and semantic neuron maturation for LLM-based agents.
55
+
56
+ ## Features
57
+
58
+ - **L1 — RM (Routing Memory: A Vector-Quantization-Based Retrieval Primitive):** Vector quantization with K-means centroids, multi-probe retrieval, online EMA adaptation, and drift detection
59
+ - **L2 — Deterministic Vault:** Exact key-value recall with SHA-256 integrity verification, policy tagging (Public/Internal/Sensitive/Restricted), and full audit trails
60
+ - **L3 — Semantic Neuron Layer:** 4-stage neuron maturation (Stage 0→3), causal reasoning chains, structural similarity transfer, and safe Stage 3 replay
61
+ - **Governance Gate:** Intelligent routing across 5 execution paths based on confidence, margin, neuron maturity, and sensitivity
62
+ - **Consolidation:** Active trace writing + passive LLM-free capability growth (co-occurrence mining, edge finalization, RM reshaping)
63
+ - **Real Embeddings:** Uses `all-MiniLM-L6-v2` (384-dimensional) — no mock or synthetic embeddings
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install hga-memory
69
+ ```
70
+
71
+ With LLM provider support:
72
+
73
+ ```bash
74
+ # Groq
75
+ pip install hga-memory[groq]
76
+
77
+ # OpenAI
78
+ pip install hga-memory[openai]
79
+
80
+ # Everything
81
+ pip install hga-memory[all]
82
+ ```
83
+
84
+ ## Quick Start
85
+
86
+ ```python
87
+ from hga import AgentMemory
88
+
89
+ # Initialize memory system
90
+ memory = AgentMemory()
91
+
92
+ # Store information
93
+ memory.store("Project deadline is March 15, 2026", policy_tag="Internal")
94
+ memory.store("API key format: sk-xxxx", policy_tag="Sensitive")
95
+
96
+ # Query with automatic routing
97
+ result = memory.recall("When is the project deadline?")
98
+ print(result.answer)
99
+ print(f"Path: {result.path}, Tokens: {result.tokens_used}")
100
+
101
+ # The gate automatically routes:
102
+ # - Exact facts → Deterministic Vault (0 tokens)
103
+ # - Semantic queries → RM retrieval
104
+ # - Mature patterns → Stage 3 replay (0 tokens)
105
+ # - Sensitive queries → Deterministic path (safe)
106
+ ```
107
+
108
+ ## Architecture
109
+
110
+ ```
111
+ Query → Governance Gate → Route Decision
112
+
113
+ ├── Stage0Path → Full LLM call (new pattern)
114
+ ├── FastSemantic → L1 retrieval + LLM
115
+ ├── VerifyPath → L1 + L3 verify + LLM
116
+ ├── Stage3Path → Causal replay (no LLM)
117
+ └── Deterministic → L2 exact lookup (no LLM)
118
+ ```
119
+
120
+ ## Gate Decision Logic
121
+
122
+ | Condition | Path |
123
+ |---|---|
124
+ | Sensitivity=High OR edge weight < -1 | DeterministicPath |
125
+ | Stage 3 + confidence >= 0.6 | Stage3Path |
126
+ | Stage 2 + confidence >= 0.6 + margin >= 0.1 | VerifyPath |
127
+ | Stage >= 1 + confidence >= 0.6 | FastSemanticPath |
128
+ | No matching neuron | Stage0Path |
129
+
130
+ ## Neuron Maturation
131
+
132
+ Neurons progress through 4 stages based on successful executions:
133
+
134
+ - **Stage 0→1:** weight > 0, 3+ successful hits
135
+ - **Stage 1→2:** weight > +2, 8+ hits
136
+ - **Stage 2→3:** weight > +2.5, 5 consecutive clean executions
137
+
138
+ Edge weights update: `w += source_weight × outcome` (clipped to [-3, +3])
139
+
140
+ ## Configuration
141
+
142
+ | Parameter | Default | Description |
143
+ |---|---|---|
144
+ | `K` | 64 | Number of RM centroids |
145
+ | `ALPHA` | 0.6 | Confidence threshold |
146
+ | `DELTA_MIN` | 0.1 | Margin threshold |
147
+ | `eta` | 0.01 | EMA learning rate |
148
+ | `embedding_dim` | 384 | Embedding dimensionality |
149
+
150
+ ## License
151
+
152
+ MIT
hga-0.1.0/README.md ADDED
@@ -0,0 +1,105 @@
1
+ # HGA — Hybrid Governance Architecture
2
+
3
+ [![PyPI version](https://badge.fury.io/py/hga-memory.svg)](https://pypi.org/project/hga-memory/)
4
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ A multi-layer agent memory system that provides intelligent query routing, exact recall, and semantic neuron maturation for LLM-based agents.
8
+
9
+ ## Features
10
+
11
+ - **L1 — RM (Routing Memory: A Vector-Quantization-Based Retrieval Primitive):** Vector quantization with K-means centroids, multi-probe retrieval, online EMA adaptation, and drift detection
12
+ - **L2 — Deterministic Vault:** Exact key-value recall with SHA-256 integrity verification, policy tagging (Public/Internal/Sensitive/Restricted), and full audit trails
13
+ - **L3 — Semantic Neuron Layer:** 4-stage neuron maturation (Stage 0→3), causal reasoning chains, structural similarity transfer, and safe Stage 3 replay
14
+ - **Governance Gate:** Intelligent routing across 5 execution paths based on confidence, margin, neuron maturity, and sensitivity
15
+ - **Consolidation:** Active trace writing + passive LLM-free capability growth (co-occurrence mining, edge finalization, RM reshaping)
16
+ - **Real Embeddings:** Uses `all-MiniLM-L6-v2` (384-dimensional) — no mock or synthetic embeddings
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install hga-memory
22
+ ```
23
+
24
+ With LLM provider support:
25
+
26
+ ```bash
27
+ # Groq
28
+ pip install hga-memory[groq]
29
+
30
+ # OpenAI
31
+ pip install hga-memory[openai]
32
+
33
+ # Everything
34
+ pip install hga-memory[all]
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```python
40
+ from hga import AgentMemory
41
+
42
+ # Initialize memory system
43
+ memory = AgentMemory()
44
+
45
+ # Store information
46
+ memory.store("Project deadline is March 15, 2026", policy_tag="Internal")
47
+ memory.store("API key format: sk-xxxx", policy_tag="Sensitive")
48
+
49
+ # Query with automatic routing
50
+ result = memory.recall("When is the project deadline?")
51
+ print(result.answer)
52
+ print(f"Path: {result.path}, Tokens: {result.tokens_used}")
53
+
54
+ # The gate automatically routes:
55
+ # - Exact facts → Deterministic Vault (0 tokens)
56
+ # - Semantic queries → RM retrieval
57
+ # - Mature patterns → Stage 3 replay (0 tokens)
58
+ # - Sensitive queries → Deterministic path (safe)
59
+ ```
60
+
61
+ ## Architecture
62
+
63
+ ```
64
+ Query → Governance Gate → Route Decision
65
+
66
+ ├── Stage0Path → Full LLM call (new pattern)
67
+ ├── FastSemantic → L1 retrieval + LLM
68
+ ├── VerifyPath → L1 + L3 verify + LLM
69
+ ├── Stage3Path → Causal replay (no LLM)
70
+ └── Deterministic → L2 exact lookup (no LLM)
71
+ ```
72
+
73
+ ## Gate Decision Logic
74
+
75
+ | Condition | Path |
76
+ |---|---|
77
+ | Sensitivity=High OR edge weight < -1 | DeterministicPath |
78
+ | Stage 3 + confidence >= 0.6 | Stage3Path |
79
+ | Stage 2 + confidence >= 0.6 + margin >= 0.1 | VerifyPath |
80
+ | Stage >= 1 + confidence >= 0.6 | FastSemanticPath |
81
+ | No matching neuron | Stage0Path |
82
+
83
+ ## Neuron Maturation
84
+
85
+ Neurons progress through 4 stages based on successful executions:
86
+
87
+ - **Stage 0→1:** weight > 0, 3+ successful hits
88
+ - **Stage 1→2:** weight > +2, 8+ hits
89
+ - **Stage 2→3:** weight > +2.5, 5 consecutive clean executions
90
+
91
+ Edge weights update: `w += source_weight × outcome` (clipped to [-3, +3])
92
+
93
+ ## Configuration
94
+
95
+ | Parameter | Default | Description |
96
+ |---|---|---|
97
+ | `K` | 64 | Number of RM centroids |
98
+ | `ALPHA` | 0.6 | Confidence threshold |
99
+ | `DELTA_MIN` | 0.1 | Margin threshold |
100
+ | `eta` | 0.01 | EMA learning rate |
101
+ | `embedding_dim` | 384 | Embedding dimensionality |
102
+
103
+ ## License
104
+
105
+ MIT
@@ -0,0 +1,8 @@
1
+ """HGA — Hybrid Governance Architecture for Agent Memory."""
2
+
3
+ from .memory import AgentMemory, RecallResult
4
+ from .gate.governance import ExecutionPath, GateDecision
5
+ from .l3.replay import ReplayResult
6
+
7
+ __all__ = ["AgentMemory", "RecallResult", "ExecutionPath", "GateDecision", "ReplayResult"]
8
+ __version__ = "0.1.0"
@@ -0,0 +1,328 @@
1
+ """Passive Consolidation — LLM-free structural learning between sessions.
2
+
3
+ Runs when no active LLM call is in progress. Operations:
4
+ 1. Edge finalization: confirm/decay provisional neuron weights
5
+ 2. DV compaction: deduplicate by hash, clean dangling pointers
6
+ 3. Drift alarm reset: finalize session statistics
7
+ 4. Co-occurrence mining: centroid pairs probed together → L3 edges
8
+ 5. RM reshaping: stable L3 routes trigger codebook splits
9
+ 6. Salience-guided forgetting: lifecycle management + neuron pruning/demotion
10
+
11
+ All operations are strictly LLM-free.
12
+ """
13
+
14
+ import logging
15
+ import time
16
+ from collections import Counter, defaultdict
17
+ from dataclasses import dataclass, field
18
+ from typing import Dict, List, Optional, Tuple, TYPE_CHECKING
19
+
20
+ if TYPE_CHECKING:
21
+ from .l1.codebook import Codebook
22
+ from .l1.drift import DriftMonitor
23
+ from .l2.vault import DeterministicVault
24
+ from .l3.matcher import NeuronMatcher
25
+ from .storage.sqlite_backend import SQLiteBackend
26
+
27
+ logger = logging.getLogger("hga.consolidation")
28
+
29
+
30
+ # ─── Co-occurrence Edge ────────────────────────────────────────────────────
31
+
32
+ @dataclass
33
+ class CooccurrenceEdge:
34
+ """Edge between two neurons that are frequently co-probed."""
35
+ neuron_a_id: str
36
+ neuron_b_id: str
37
+ count: int = 0
38
+ weight: float = 0.0
39
+
40
+
41
+ # ─── Passive Consolidation Result ──────────────────────────────────────────
42
+
43
+ @dataclass
44
+ class ConsolidationResult:
45
+ """Summary of a passive consolidation cycle."""
46
+ edges_finalized: int = 0
47
+ edges_decayed: int = 0
48
+ edges_removed: int = 0
49
+ provisional_confirmed: int = 0
50
+ provisional_removed: int = 0
51
+ dv_before: int = 0
52
+ dv_after: int = 0
53
+ dv_removed: int = 0
54
+ drift_reset: bool = False
55
+ cooccurrence_edges_created: int = 0
56
+ reshaping_splits: int = 0
57
+ forgetting_kept: int = 0
58
+ forgetting_summarized: int = 0
59
+ forgetting_migrated: int = 0
60
+ forgetting_deleted: int = 0
61
+ neurons_pruned: int = 0
62
+ neurons_demoted: int = 0
63
+ privacy_protected: int = 0
64
+ duration_ms: float = 0.0
65
+
66
+
67
+ class PassiveConsolidation:
68
+ """Runs LLM-free consolidation operations on the HGA memory system.
69
+
70
+ Must be called explicitly (e.g., between sessions or during idle time).
71
+ Guarantees zero LLM calls — all operations are structural/statistical.
72
+ """
73
+
74
+ # Co-occurrence threshold: minimum co-probes to create an edge
75
+ COOCCURRENCE_THRESHOLD = 3
76
+
77
+ # Edge decay factor for old provisional neurons
78
+ EDGE_DECAY_FACTOR = 0.8
79
+ EDGE_REMOVE_THRESHOLD = 0.1
80
+
81
+ # Minimum neuron age (hits) before edge finalization applies
82
+ EDGE_MIN_AGE = 5
83
+
84
+ # Reshaping: minimum bucket size to trigger split consideration
85
+ RESHAPE_BUCKET_SIZE_THRESHOLD = 10
86
+
87
+ def __init__(self, forgetting_enabled: bool = False):
88
+ self._probe_history: List[Tuple[str, float]] = [] # (neuron_id, timestamp)
89
+ self._cooccurrence_edges: Dict[Tuple[str, str], CooccurrenceEdge] = {}
90
+ self._forgetting_enabled = forgetting_enabled
91
+ self._forgetting: Optional["MemoryForgetting"] = None
92
+
93
+ @property
94
+ def cooccurrence_edges(self) -> List[CooccurrenceEdge]:
95
+ return list(self._cooccurrence_edges.values())
96
+
97
+ def record_probe(self, neuron_id: str) -> None:
98
+ """Record that a neuron was probed during active phase."""
99
+ self._probe_history.append((neuron_id, time.time()))
100
+
101
+ def run_passive_cycle(
102
+ self,
103
+ matcher: "NeuronMatcher",
104
+ vault: "DeterministicVault",
105
+ codebook: "Codebook",
106
+ drift: "DriftMonitor",
107
+ storage: "SQLiteBackend" = None,
108
+ ) -> ConsolidationResult:
109
+ """Run a full passive consolidation cycle.
110
+
111
+ Args:
112
+ matcher: L3 neuron matcher
113
+ vault: L2 deterministic vault
114
+ codebook: L1 codebook
115
+ drift: Drift monitor
116
+ storage: SQLite backend (needed for forgetting, optional)
117
+
118
+ Returns:
119
+ ConsolidationResult with operation counts.
120
+ """
121
+ start = time.time()
122
+ result = ConsolidationResult()
123
+
124
+ # 1. Edge finalization (always run, cheap)
125
+ self._finalize_edges(matcher, result)
126
+
127
+ # 2. DV compaction (always run, cheap)
128
+ self._compact_dv(vault, result)
129
+
130
+ # 3. Drift alarm reset (always run, cheap)
131
+ self._reset_drift(drift, result)
132
+
133
+ # 4. Co-occurrence mining (needs probe history)
134
+ if self._probe_history:
135
+ self._mine_cooccurrence(matcher, result)
136
+
137
+ # 5. RM reshaping (needs mature neurons)
138
+ has_mature = any(n.stage >= 2 for n in matcher.get_all_neurons())
139
+ if has_mature:
140
+ self._reshape_codebook(matcher, codebook, result)
141
+
142
+ # 6. Salience-guided forgetting (needs storage)
143
+ if storage is not None and self._forgetting_enabled:
144
+ self._run_forgetting(storage, vault, matcher, result)
145
+
146
+ result.duration_ms = (time.time() - start) * 1000
147
+ logger.info(f"Passive cycle complete: {result.duration_ms:.1f}ms, "
148
+ f"edges_finalized={result.edges_finalized}, "
149
+ f"cooccurrence={result.cooccurrence_edges_created}, "
150
+ f"splits={result.reshaping_splits}, "
151
+ f"forgetting_deleted={result.forgetting_deleted}")
152
+ return result
153
+
154
+ # ─── Operation 1: Edge Finalization ──────────────────────────────────
155
+
156
+ def _finalize_edges(self, matcher: "NeuronMatcher",
157
+ result: ConsolidationResult) -> None:
158
+ """Confirm or decay neuron edge weights."""
159
+ neurons_to_remove = []
160
+
161
+ for neuron in matcher.get_all_neurons():
162
+ # Handle provisional neurons
163
+ if neuron.provisional:
164
+ if neuron.hit_count >= self.EDGE_MIN_AGE:
165
+ if neuron.success_rate >= 0.8:
166
+ neuron.provisional = False
167
+ result.provisional_confirmed += 1
168
+ else:
169
+ neurons_to_remove.append(neuron.id)
170
+ result.provisional_removed += 1
171
+ continue
172
+
173
+ # For confirmed neurons: decay low-performing weights
174
+ if neuron.hit_count >= self.EDGE_MIN_AGE:
175
+ if neuron.success_rate >= 0.8:
176
+ result.edges_finalized += 1
177
+ elif neuron.edge_weight > 0 and neuron.success_rate < 0.5:
178
+ neuron.edge_weight *= self.EDGE_DECAY_FACTOR
179
+ result.edges_decayed += 1
180
+ if abs(neuron.edge_weight) < self.EDGE_REMOVE_THRESHOLD:
181
+ result.edges_removed += 1
182
+
183
+ # Remove failed provisionals
184
+ for nid in neurons_to_remove:
185
+ matcher.remove_neuron(nid)
186
+
187
+ # ─── Operation 2: DV Compaction ──────────────────────────────────────
188
+
189
+ def _compact_dv(self, vault: "DeterministicVault",
190
+ result: ConsolidationResult) -> None:
191
+ """Deduplicate DV records by hash."""
192
+ result.dv_before = vault.count
193
+
194
+ seen_hashes = {}
195
+ duplicates = []
196
+
197
+ for key, record in list(vault._local.items()):
198
+ if record.hash in seen_hashes:
199
+ duplicates.append(key)
200
+ else:
201
+ seen_hashes[record.hash] = key
202
+
203
+ for key in duplicates:
204
+ del vault._local[key]
205
+
206
+ result.dv_after = vault.count
207
+ result.dv_removed = len(duplicates)
208
+
209
+ # ─── Operation 3: Drift Reset ───────────────────────────────────────
210
+
211
+ def _reset_drift(self, drift: "DriftMonitor",
212
+ result: ConsolidationResult) -> None:
213
+ """Reset drift baseline using recent data."""
214
+ if drift.in_drift or drift.episode_count > 0:
215
+ drift.reset_baseline()
216
+ result.drift_reset = True
217
+
218
+ # ─── Operation 4: Co-occurrence Mining ──────────────────────────────
219
+
220
+ def _mine_cooccurrence(self, matcher: "NeuronMatcher",
221
+ result: ConsolidationResult) -> None:
222
+ """Analyze probe history for co-occurring neuron pairs.
223
+
224
+ Two neurons probed within a short time window → co-occurrence.
225
+ Threshold co-occurrences → create an edge.
226
+ """
227
+ WINDOW_SECONDS = 60.0 # probes within 60s are considered co-occurring
228
+
229
+ # Count co-occurrences
230
+ pair_counts: Counter = Counter()
231
+ history = self._probe_history
232
+
233
+ for i in range(len(history)):
234
+ nid_i, ts_i = history[i]
235
+ for j in range(i + 1, len(history)):
236
+ nid_j, ts_j = history[j]
237
+ if ts_j - ts_i > WINDOW_SECONDS:
238
+ break
239
+ if nid_i != nid_j:
240
+ pair = tuple(sorted([nid_i, nid_j]))
241
+ pair_counts[pair] += 1
242
+
243
+ # Create edges for frequent pairs
244
+ for pair, count in pair_counts.items():
245
+ if count >= self.COOCCURRENCE_THRESHOLD:
246
+ if pair not in self._cooccurrence_edges:
247
+ # Verify both neurons still exist
248
+ n_a = matcher.get_neuron(pair[0])
249
+ n_b = matcher.get_neuron(pair[1])
250
+ if n_a and n_b:
251
+ self._cooccurrence_edges[pair] = CooccurrenceEdge(
252
+ neuron_a_id=pair[0],
253
+ neuron_b_id=pair[1],
254
+ count=count,
255
+ weight=float(count),
256
+ )
257
+ result.cooccurrence_edges_created += 1
258
+ else:
259
+ # Update existing edge
260
+ self._cooccurrence_edges[pair].count += count
261
+ self._cooccurrence_edges[pair].weight = float(
262
+ self._cooccurrence_edges[pair].count
263
+ )
264
+
265
+ # Clear probe history after mining
266
+ self._probe_history.clear()
267
+
268
+ # ─── Operation 5: RM Reshaping ─────────────────────────────────────
269
+
270
+ def _reshape_codebook(self, matcher: "NeuronMatcher",
271
+ codebook: "Codebook",
272
+ result: ConsolidationResult) -> None:
273
+ """L3→L1 feedback: split hot centroids used by mature neurons."""
274
+ if not codebook.fitted:
275
+ return
276
+
277
+ # Find centroids frequently used by mature neurons
278
+ hot_centroids: Counter = Counter()
279
+ for neuron in matcher.get_all_neurons():
280
+ if neuron.stage < 2:
281
+ continue
282
+ # Encode neuron centroid to find its primary codebook centroid
283
+ cid, _ = codebook.encode(neuron.pattern_embedding)
284
+ hot_centroids[cid] += 1
285
+
286
+ # Split large hot buckets
287
+ for cid, neuron_count in hot_centroids.most_common():
288
+ if cid >= codebook.K:
289
+ continue
290
+ bucket = codebook.buckets.get(cid)
291
+ if not bucket:
292
+ continue
293
+ bucket_size = len(bucket.item_ids)
294
+ if bucket_size >= self.RESHAPE_BUCKET_SIZE_THRESHOLD:
295
+ if codebook.maybe_split(cid):
296
+ result.reshaping_splits += 1
297
+
298
+ # ─── Operation 6: Salience-Guided Forgetting ─────────────────────
299
+
300
+ @property
301
+ def forgetting_enabled(self) -> bool:
302
+ return self._forgetting_enabled
303
+
304
+ @forgetting_enabled.setter
305
+ def forgetting_enabled(self, value: bool) -> None:
306
+ self._forgetting_enabled = value
307
+
308
+ def _run_forgetting(
309
+ self,
310
+ storage: "SQLiteBackend",
311
+ vault: "DeterministicVault",
312
+ matcher: "NeuronMatcher",
313
+ result: ConsolidationResult,
314
+ ) -> None:
315
+ """Run salience-guided forgetting as part of passive cycle."""
316
+ from .forgetting import MemoryForgetting
317
+
318
+ if self._forgetting is None:
319
+ self._forgetting = MemoryForgetting()
320
+
321
+ fr = self._forgetting.run_forgetting_cycle(storage, vault, matcher)
322
+ result.forgetting_kept = fr.items_kept
323
+ result.forgetting_summarized = fr.items_summarized
324
+ result.forgetting_migrated = fr.items_migrated
325
+ result.forgetting_deleted = fr.items_deleted
326
+ result.neurons_pruned = fr.neurons_pruned
327
+ result.neurons_demoted = fr.neurons_demoted
328
+ result.privacy_protected = fr.privacy_protected
@@ -0,0 +1,3 @@
1
+ from .local import LocalEmbeddings
2
+
3
+ __all__ = ["LocalEmbeddings"]
@@ -0,0 +1,22 @@
1
+ """Abstract embedding backend interface."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List
5
+ import numpy as np
6
+
7
+
8
+ class EmbeddingBackend(ABC):
9
+ """Abstract base for embedding providers."""
10
+
11
+ @property
12
+ @abstractmethod
13
+ def dim(self) -> int:
14
+ """Embedding dimensionality."""
15
+
16
+ @abstractmethod
17
+ def encode(self, text: str) -> np.ndarray:
18
+ """Encode a single text to a normalized vector."""
19
+
20
+ @abstractmethod
21
+ def encode_batch(self, texts: List[str]) -> np.ndarray:
22
+ """Encode a batch of texts. Returns (N, dim) matrix."""