cognitive-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognitive_engine/__init__.py +7 -0
- cognitive_engine/adapters/__init__.py +2 -0
- cognitive_engine/adapters/plastic_numeric_adapter.py +141 -0
- cognitive_engine/api/__init__.py +2 -0
- cognitive_engine/api/service.py +41 -0
- cognitive_engine/compression/__init__.py +2 -0
- cognitive_engine/compression/knowledge_compressor.py +53 -0
- cognitive_engine/compression/semantic_compressor_v2.py +115 -0
- cognitive_engine/config/__init__.py +2 -0
- cognitive_engine/config/loader.py +44 -0
- cognitive_engine/config/schema.py +45 -0
- cognitive_engine/consolidation/__init__.py +2 -0
- cognitive_engine/consolidation/engine.py +25 -0
- cognitive_engine/consolidation/engine_v2.py +39 -0
- cognitive_engine/context/__init__.py +2 -0
- cognitive_engine/context/long_context.py +64 -0
- cognitive_engine/core/__init__.py +2 -0
- cognitive_engine/core/builder.py +154 -0
- cognitive_engine/core/engine.py +174 -0
- cognitive_engine/core/engine_v2.py +280 -0
- cognitive_engine/core/registry.py +29 -0
- cognitive_engine/core/types.py +346 -0
- cognitive_engine/interfaces/__init__.py +2 -0
- cognitive_engine/interfaces/base.py +181 -0
- cognitive_engine/memory/__init__.py +2 -0
- cognitive_engine/memory/graph_memory.py +165 -0
- cognitive_engine/memory/hybrid_memory.py +110 -0
- cognitive_engine/memory/project_memory.py +80 -0
- cognitive_engine/memory/stores.py +177 -0
- cognitive_engine/memory/vector_store.py +28 -0
- cognitive_engine/models/__init__.py +2 -0
- cognitive_engine/models/stable_core.py +79 -0
- cognitive_engine/modules/__init__.py +2 -0
- cognitive_engine/modules/importance_evaluator.py +96 -0
- cognitive_engine/modules/input_processing.py +78 -0
- cognitive_engine/modules/semantic_understanding.py +130 -0
- cognitive_engine/nlp/__init__.py +16 -0
- cognitive_engine/nlp/models.py +116 -0
- cognitive_engine/nlp/trainer.py +95 -0
- cognitive_engine/replay/__init__.py +2 -0
- cognitive_engine/replay/buffer.py +40 -0
- cognitive_engine/routing/__init__.py +2 -0
- cognitive_engine/routing/dynamic_router.py +45 -0
- cognitive_engine/routing/learned_router.py +165 -0
- cognitive_engine/specialists/__init__.py +2 -0
- cognitive_engine/specialists/runtime.py +97 -0
- cognitive_engine/stability/__init__.py +2 -0
- cognitive_engine/stability/governor.py +38 -0
- cognitive_engine/training/__init__.py +2 -0
- cognitive_engine/training/online_trainer.py +87 -0
- cognitive_engine/utils/__init__.py +2 -0
- cognitive_engine/utils/numeric.py +67 -0
- cognitive_engine/utils/seeding.py +13 -0
- cognitive_engine/utils/telemetry.py +39 -0
- cognitive_engine/utils/text.py +104 -0
- cognitive_engine/utils/visualization.py +87 -0
- cognitive_engine-0.2.0.dist-info/METADATA +91 -0
- cognitive_engine-0.2.0.dist-info/RECORD +60 -0
- cognitive_engine-0.2.0.dist-info/WHEEL +5 -0
- cognitive_engine-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
from cognitive_engine.core.types import (
|
|
6
|
+
CompressedKnowledge,
|
|
7
|
+
GraphQuery,
|
|
8
|
+
MemoryBundleV2,
|
|
9
|
+
MemoryQueryV2,
|
|
10
|
+
PreferenceRecord,
|
|
11
|
+
ProcedureMemory,
|
|
12
|
+
ProjectMemoryRecord,
|
|
13
|
+
)
|
|
14
|
+
from cognitive_engine.memory.graph_memory import CognitiveGraphMemory
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HybridCognitiveMemory:
|
|
18
|
+
name = "hybrid_cognitive_memory"
|
|
19
|
+
|
|
20
|
+
def __init__(self, base_memory: Any, graph_memory: CognitiveGraphMemory) -> None:
|
|
21
|
+
self.base_memory = base_memory
|
|
22
|
+
self.graph_memory = graph_memory
|
|
23
|
+
self.procedural_records: List[ProcedureMemory] = []
|
|
24
|
+
self.project_records: Dict[str, ProjectMemoryRecord] = {}
|
|
25
|
+
self.preferences: Dict[str, PreferenceRecord] = {}
|
|
26
|
+
|
|
27
|
+
def describe(self) -> Dict[str, Any]:
|
|
28
|
+
return {
|
|
29
|
+
"name": self.name,
|
|
30
|
+
"base": self.base_memory.describe(),
|
|
31
|
+
"graph": self.graph_memory.describe(),
|
|
32
|
+
"procedures": len(self.procedural_records),
|
|
33
|
+
"projects": len(self.project_records),
|
|
34
|
+
"preferences": len(self.preferences),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
def update_working_memory(self, semantic_state: Any) -> Dict[str, Any]:
|
|
38
|
+
return self.base_memory.update_working_memory(semantic_state)
|
|
39
|
+
|
|
40
|
+
def retrieve(self, query: Any) -> Any:
|
|
41
|
+
return self.base_memory.retrieve(query)
|
|
42
|
+
|
|
43
|
+
def retrieve_v2(self, query: MemoryQueryV2) -> MemoryBundleV2:
|
|
44
|
+
base = self.base_memory.retrieve(query)
|
|
45
|
+
graph_query = query.graph_query or GraphQuery(seeds=query.concepts, project_id=query.project_id, top_k=query.top_k)
|
|
46
|
+
subgraph = self.graph_memory.query_subgraph(graph_query) if query.include_graph else None
|
|
47
|
+
project_records = []
|
|
48
|
+
if query.include_project:
|
|
49
|
+
if query.project_id and query.project_id in self.project_records:
|
|
50
|
+
project_records.append(self.project_records[query.project_id])
|
|
51
|
+
else:
|
|
52
|
+
project_records.extend(list(self.project_records.values())[: query.top_k])
|
|
53
|
+
procedures = self._search_procedures(query.concepts, query.top_k) if query.include_procedures else []
|
|
54
|
+
preferences = list(self.preferences.values())[: query.top_k] if query.include_preferences else []
|
|
55
|
+
return MemoryBundleV2(
|
|
56
|
+
short_term=base.short_term,
|
|
57
|
+
working_memory=base.working_memory,
|
|
58
|
+
semantic_long_term=base.semantic_long_term,
|
|
59
|
+
episodic=base.episodic,
|
|
60
|
+
procedural=procedures,
|
|
61
|
+
project=project_records,
|
|
62
|
+
graph_subgraph=subgraph,
|
|
63
|
+
preferences=preferences,
|
|
64
|
+
evidence=[],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def write(self, knowledge: CompressedKnowledge) -> None:
|
|
68
|
+
self.base_memory.write(knowledge)
|
|
69
|
+
|
|
70
|
+
def write_v2(self, bundle: Any) -> None:
|
|
71
|
+
for knowledge in getattr(bundle, "compressed_knowledge", []):
|
|
72
|
+
self.base_memory.write(knowledge)
|
|
73
|
+
if getattr(bundle, "graph_patch", None):
|
|
74
|
+
self.graph_memory.apply_patch(bundle.graph_patch)
|
|
75
|
+
for procedure in getattr(bundle, "procedures", []):
|
|
76
|
+
self.add_procedure(procedure)
|
|
77
|
+
for preference in getattr(bundle, "preferences", []):
|
|
78
|
+
self.preferences[preference.key] = preference
|
|
79
|
+
|
|
80
|
+
def add_project_record(self, record: ProjectMemoryRecord) -> None:
|
|
81
|
+
self.project_records[record.project_id] = record
|
|
82
|
+
|
|
83
|
+
def add_procedure(self, procedure: ProcedureMemory) -> None:
|
|
84
|
+
existing = {item.procedure_id: item for item in self.procedural_records}
|
|
85
|
+
existing[procedure.procedure_id] = procedure
|
|
86
|
+
self.procedural_records = list(existing.values())
|
|
87
|
+
|
|
88
|
+
def snapshot(self) -> Dict[str, Any]:
|
|
89
|
+
base = self.base_memory.snapshot()
|
|
90
|
+
return {
|
|
91
|
+
**base,
|
|
92
|
+
"graph": self.graph_memory.snapshot(),
|
|
93
|
+
"procedural_size": len(self.procedural_records),
|
|
94
|
+
"project_size": len(self.project_records),
|
|
95
|
+
"preference_size": len(self.preferences),
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def consolidate(self) -> Any:
|
|
99
|
+
return self.base_memory.consolidate()
|
|
100
|
+
|
|
101
|
+
def _search_procedures(self, concepts: List[str], top_k: int) -> List[ProcedureMemory]:
|
|
102
|
+
roots = {concept.lower()[:4] for concept in concepts if len(concept) >= 4}
|
|
103
|
+
scored = []
|
|
104
|
+
for procedure in self.procedural_records:
|
|
105
|
+
text = " ".join([procedure.title, *procedure.domains, *procedure.steps]).lower()
|
|
106
|
+
score = sum(1 for root in roots if root in text)
|
|
107
|
+
scored.append((score, procedure))
|
|
108
|
+
scored.sort(key=lambda item: item[0], reverse=True)
|
|
109
|
+
return [item for score, item in scored[:top_k] if score > 0]
|
|
110
|
+
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterable, List
|
|
6
|
+
|
|
7
|
+
from cognitive_engine.core.types import GraphPatch, ProjectMemoryRecord
|
|
8
|
+
from cognitive_engine.memory.graph_memory import CognitiveGraphMemory
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ProjectIndexer:
|
|
12
|
+
name = "project_indexer"
|
|
13
|
+
|
|
14
|
+
def __init__(self, graph_memory: CognitiveGraphMemory) -> None:
|
|
15
|
+
self.graph_memory = graph_memory
|
|
16
|
+
|
|
17
|
+
def describe(self) -> Dict[str, Any]:
|
|
18
|
+
return {"name": self.name}
|
|
19
|
+
|
|
20
|
+
def index_project(self, root_path: str | Path, project_id: str | None = None) -> ProjectMemoryRecord:
|
|
21
|
+
root = Path(root_path).resolve()
|
|
22
|
+
project_id = project_id or root.name
|
|
23
|
+
py_files = [path for path in root.rglob("*.py") if ".venv" not in path.parts and "__pycache__" not in path.parts]
|
|
24
|
+
modules: List[str] = []
|
|
25
|
+
dependencies: set[str] = set()
|
|
26
|
+
tests: List[str] = []
|
|
27
|
+
|
|
28
|
+
project_node = self.graph_memory.upsert_node(project_id, "Project", project_id=project_id, metadata={"root": str(root)})
|
|
29
|
+
for file_path in py_files:
|
|
30
|
+
rel = str(file_path.relative_to(root))
|
|
31
|
+
file_node = self.graph_memory.upsert_node(rel, "File", namespace=project_id, project_id=project_id)
|
|
32
|
+
self.graph_memory.upsert_edge(project_node, file_node, "owns", metadata={"path": rel})
|
|
33
|
+
if "test" in file_path.name:
|
|
34
|
+
tests.append(rel)
|
|
35
|
+
module_name = rel.replace("\\", ".").replace("/", ".").removesuffix(".py")
|
|
36
|
+
modules.append(module_name)
|
|
37
|
+
module_node = self.graph_memory.upsert_node(module_name, "Module", namespace=project_id, project_id=project_id)
|
|
38
|
+
self.graph_memory.upsert_edge(file_node, module_node, "defines")
|
|
39
|
+
self._index_python_file(file_path, file_node, module_node, dependencies, project_id)
|
|
40
|
+
|
|
41
|
+
return ProjectMemoryRecord(
|
|
42
|
+
project_id=project_id,
|
|
43
|
+
root_path=str(root),
|
|
44
|
+
files_indexed=len(py_files),
|
|
45
|
+
modules=sorted(modules)[:80],
|
|
46
|
+
dependencies=sorted(dependencies),
|
|
47
|
+
tests=sorted(tests),
|
|
48
|
+
commands=["pytest -q"] if tests else [],
|
|
49
|
+
metadata={"graph": self.graph_memory.snapshot()},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def _index_python_file(self, file_path: Path, file_node: Any, module_node: Any, dependencies: set[str], project_id: str) -> None:
|
|
53
|
+
try:
|
|
54
|
+
tree = ast.parse(file_path.read_text(encoding="utf-8"))
|
|
55
|
+
except UnicodeDecodeError:
|
|
56
|
+
tree = ast.parse(file_path.read_text(encoding="latin-1"))
|
|
57
|
+
except SyntaxError:
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
for item in ast.walk(tree):
|
|
61
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
62
|
+
fn = self.graph_memory.upsert_node(item.name, "Function", namespace=str(file_path), project_id=project_id, metadata={"line": item.lineno})
|
|
63
|
+
self.graph_memory.upsert_edge(file_node, fn, "defines")
|
|
64
|
+
self.graph_memory.upsert_edge(module_node, fn, "defines")
|
|
65
|
+
elif isinstance(item, ast.ClassDef):
|
|
66
|
+
cls = self.graph_memory.upsert_node(item.name, "Class", namespace=str(file_path), project_id=project_id, metadata={"line": item.lineno})
|
|
67
|
+
self.graph_memory.upsert_edge(file_node, cls, "defines")
|
|
68
|
+
self.graph_memory.upsert_edge(module_node, cls, "defines")
|
|
69
|
+
elif isinstance(item, ast.Import):
|
|
70
|
+
for alias in item.names:
|
|
71
|
+
dep = alias.name.split(".")[0]
|
|
72
|
+
dependencies.add(dep)
|
|
73
|
+
dep_node = self.graph_memory.upsert_node(dep, "Dependency", project_id=project_id)
|
|
74
|
+
self.graph_memory.upsert_edge(file_node, dep_node, "imports")
|
|
75
|
+
elif isinstance(item, ast.ImportFrom) and item.module:
|
|
76
|
+
dep = item.module.split(".")[0]
|
|
77
|
+
dependencies.add(dep)
|
|
78
|
+
dep_node = self.graph_memory.upsert_node(dep, "Dependency", project_id=project_id)
|
|
79
|
+
self.graph_memory.upsert_edge(file_node, dep_node, "imports")
|
|
80
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter, deque
|
|
4
|
+
from typing import Any, Deque, Dict, List
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from cognitive_engine.config.schema import MemoryConfig
|
|
9
|
+
from cognitive_engine.core.types import CompressedKnowledge, ConsolidationReport, MemoryBundle, MemoryQuery, SemanticState
|
|
10
|
+
from cognitive_engine.memory.vector_store import NumpyVectorIndex
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HierarchicalMemorySystem:
|
|
14
|
+
name = "hierarchical_memory_system"
|
|
15
|
+
|
|
16
|
+
def __init__(self, config: MemoryConfig) -> None:
|
|
17
|
+
self.config = config
|
|
18
|
+
self.embedding_dim = 64
|
|
19
|
+
self.short_term: Deque[CompressedKnowledge] = deque(maxlen=config.short_term_capacity)
|
|
20
|
+
self.episodic: Deque[CompressedKnowledge] = deque(maxlen=config.episodic_capacity)
|
|
21
|
+
self.semantic_records: Dict[str, CompressedKnowledge] = {}
|
|
22
|
+
self.vector_index = NumpyVectorIndex()
|
|
23
|
+
self.working_state: Dict[str, Any] = {
|
|
24
|
+
"active_intent": None,
|
|
25
|
+
"active_concepts": [],
|
|
26
|
+
"recent_entities": [],
|
|
27
|
+
"session_goals": [],
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def describe(self) -> Dict[str, Any]:
|
|
31
|
+
return {
|
|
32
|
+
"name": self.name,
|
|
33
|
+
"short_term_capacity": self.config.short_term_capacity,
|
|
34
|
+
"episodic_capacity": self.config.episodic_capacity,
|
|
35
|
+
"semantic_capacity": self.config.semantic_capacity,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def update_working_memory(self, semantic_state: SemanticState) -> Dict[str, Any]:
|
|
39
|
+
concepts = [concept.label for concept in semantic_state.concepts[:6]]
|
|
40
|
+
self.working_state["active_intent"] = semantic_state.intent
|
|
41
|
+
self.working_state["active_concepts"] = concepts
|
|
42
|
+
self.working_state["recent_entities"] = semantic_state.entities[:6]
|
|
43
|
+
return dict(self.working_state)
|
|
44
|
+
|
|
45
|
+
def retrieve(self, query: MemoryQuery) -> MemoryBundle:
|
|
46
|
+
semantic_hits = []
|
|
47
|
+
aligned_query = self._align_embedding(query.embedding)
|
|
48
|
+
query_roots = {concept.lower()[:4] for concept in query.concepts if len(concept) >= 4}
|
|
49
|
+
scored_records = []
|
|
50
|
+
for record_id, record in self.semantic_records.items():
|
|
51
|
+
record_vector = record.embedding / (np.linalg.norm(record.embedding) or 1.0)
|
|
52
|
+
query_vector = aligned_query / (np.linalg.norm(aligned_query) or 1.0)
|
|
53
|
+
vector_score = float(np.dot(query_vector, record_vector))
|
|
54
|
+
record_roots = {concept.lower()[:4] for concept in record.concepts if len(concept) >= 4}
|
|
55
|
+
lexical_score = 0.0
|
|
56
|
+
if query_roots:
|
|
57
|
+
lexical_score = len(query_roots & record_roots) / len(query_roots)
|
|
58
|
+
score = 0.65 * vector_score + 0.35 * lexical_score
|
|
59
|
+
if query.intent == "question" and lexical_score > 0:
|
|
60
|
+
score += 0.25 * lexical_score
|
|
61
|
+
if query.intent == "question" and any(root in {"pref", "gust"} for root in query_roots):
|
|
62
|
+
if record.provenance.get("intent") == "preference" or "preference" in record.summary.lower():
|
|
63
|
+
score += 0.35
|
|
64
|
+
scored_records.append((record_id, score))
|
|
65
|
+
|
|
66
|
+
scored_records.sort(key=lambda item: item[1], reverse=True)
|
|
67
|
+
for record_id, score in scored_records[: query.top_k]:
|
|
68
|
+
record = self.semantic_records[record_id]
|
|
69
|
+
enriched = CompressedKnowledge(
|
|
70
|
+
**{
|
|
71
|
+
**record.__dict__,
|
|
72
|
+
"metadata": {**record.metadata, "similarity": score},
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
semantic_hits.append(enriched)
|
|
76
|
+
return MemoryBundle(
|
|
77
|
+
short_term=list(self.short_term)[-query.top_k:],
|
|
78
|
+
working_memory=dict(self.working_state),
|
|
79
|
+
semantic_long_term=semantic_hits,
|
|
80
|
+
episodic=list(self.episodic)[-query.top_k:],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def write(self, knowledge: CompressedKnowledge) -> None:
|
|
84
|
+
knowledge.embedding = self._align_embedding(knowledge.embedding)
|
|
85
|
+
self.short_term.append(knowledge)
|
|
86
|
+
self.episodic.append(knowledge)
|
|
87
|
+
existing_id = self._find_duplicate(knowledge)
|
|
88
|
+
if existing_id:
|
|
89
|
+
existing = self.semantic_records[existing_id]
|
|
90
|
+
existing.reinforced_count += 1
|
|
91
|
+
existing.importance = max(existing.importance, knowledge.importance)
|
|
92
|
+
existing.confidence = max(existing.confidence, knowledge.confidence)
|
|
93
|
+
existing.summary = knowledge.summary if knowledge.importance >= existing.importance else existing.summary
|
|
94
|
+
existing.concepts = list(dict.fromkeys(existing.concepts + knowledge.concepts))
|
|
95
|
+
self.vector_index.upsert(existing_id, existing.embedding)
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
if len(self.semantic_records) >= self.config.semantic_capacity:
|
|
99
|
+
weakest = min(
|
|
100
|
+
self.semantic_records.items(),
|
|
101
|
+
key=lambda item: item[1].importance * item[1].confidence * max(item[1].reinforced_count, 1),
|
|
102
|
+
)[0]
|
|
103
|
+
self.semantic_records.pop(weakest, None)
|
|
104
|
+
self.vector_index.delete(weakest)
|
|
105
|
+
self.semantic_records[knowledge.record_id] = knowledge
|
|
106
|
+
self.vector_index.upsert(knowledge.record_id, knowledge.embedding)
|
|
107
|
+
|
|
108
|
+
def snapshot(self) -> Dict[str, Any]:
|
|
109
|
+
concept_counter = Counter()
|
|
110
|
+
for record in self.semantic_records.values():
|
|
111
|
+
concept_counter.update(record.concepts)
|
|
112
|
+
return {
|
|
113
|
+
"short_term_size": len(self.short_term),
|
|
114
|
+
"episodic_size": len(self.episodic),
|
|
115
|
+
"semantic_size": len(self.semantic_records),
|
|
116
|
+
"top_concepts": concept_counter.most_common(10),
|
|
117
|
+
"working_state": dict(self.working_state),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
def consolidate(self) -> ConsolidationReport:
|
|
121
|
+
merged = 0
|
|
122
|
+
pruned = 0
|
|
123
|
+
records = list(self.semantic_records.values())
|
|
124
|
+
visited = set()
|
|
125
|
+
for left in records:
|
|
126
|
+
if left.record_id in visited:
|
|
127
|
+
continue
|
|
128
|
+
for right in records:
|
|
129
|
+
if left.record_id == right.record_id or right.record_id in visited:
|
|
130
|
+
continue
|
|
131
|
+
similarity = float(np.dot(left.embedding, right.embedding))
|
|
132
|
+
overlap = len(set(left.concepts) & set(right.concepts))
|
|
133
|
+
if similarity > 0.97 or overlap >= 3:
|
|
134
|
+
left.reinforced_count += right.reinforced_count
|
|
135
|
+
left.importance = max(left.importance, right.importance)
|
|
136
|
+
left.confidence = max(left.confidence, right.confidence)
|
|
137
|
+
left.concepts = list(dict.fromkeys(left.concepts + right.concepts))
|
|
138
|
+
visited.add(right.record_id)
|
|
139
|
+
merged += 1
|
|
140
|
+
for record_id in visited:
|
|
141
|
+
self.semantic_records.pop(record_id, None)
|
|
142
|
+
self.vector_index.delete(record_id)
|
|
143
|
+
|
|
144
|
+
if len(self.semantic_records) > self.config.semantic_capacity:
|
|
145
|
+
items = sorted(
|
|
146
|
+
self.semantic_records.values(),
|
|
147
|
+
key=lambda record: record.importance * record.confidence * max(record.reinforced_count, 1),
|
|
148
|
+
reverse=True,
|
|
149
|
+
)
|
|
150
|
+
keep = {record.record_id for record in items[: self.config.semantic_capacity]}
|
|
151
|
+
for record_id in list(self.semantic_records):
|
|
152
|
+
if record_id not in keep:
|
|
153
|
+
self.semantic_records.pop(record_id, None)
|
|
154
|
+
self.vector_index.delete(record_id)
|
|
155
|
+
pruned += 1
|
|
156
|
+
return ConsolidationReport(
|
|
157
|
+
merged_records=merged,
|
|
158
|
+
pruned_records=pruned,
|
|
159
|
+
replay_reweighted=0,
|
|
160
|
+
notes="Merged highly similar concepts and trimmed low-value semantic residues.",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def _find_duplicate(self, knowledge: CompressedKnowledge) -> str | None:
|
|
164
|
+
hits = self.vector_index.search(knowledge.embedding, top_k=1)
|
|
165
|
+
if not hits:
|
|
166
|
+
return None
|
|
167
|
+
record_id, score = hits[0]
|
|
168
|
+
if score > 0.985 or len(set(self.semantic_records[record_id].concepts) & set(knowledge.concepts)) >= 3:
|
|
169
|
+
return record_id
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
def _align_embedding(self, embedding: np.ndarray) -> np.ndarray:
|
|
173
|
+
if embedding.shape[0] < self.embedding_dim:
|
|
174
|
+
return np.pad(embedding, (0, self.embedding_dim - embedding.shape[0]))
|
|
175
|
+
if embedding.shape[0] > self.embedding_dim:
|
|
176
|
+
return embedding[: self.embedding_dim]
|
|
177
|
+
return embedding
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Dict, List, Tuple
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class NumpyVectorIndex:
|
|
11
|
+
embeddings: Dict[str, np.ndarray] = field(default_factory=dict)
|
|
12
|
+
|
|
13
|
+
def upsert(self, record_id: str, embedding: np.ndarray) -> None:
|
|
14
|
+
norm = np.linalg.norm(embedding) or 1.0
|
|
15
|
+
self.embeddings[record_id] = embedding.astype(np.float32) / norm
|
|
16
|
+
|
|
17
|
+
def delete(self, record_id: str) -> None:
|
|
18
|
+
self.embeddings.pop(record_id, None)
|
|
19
|
+
|
|
20
|
+
def search(self, embedding: np.ndarray, top_k: int = 5) -> List[Tuple[str, float]]:
|
|
21
|
+
if not self.embeddings:
|
|
22
|
+
return []
|
|
23
|
+
query = embedding.astype(np.float32)
|
|
24
|
+
query /= np.linalg.norm(query) or 1.0
|
|
25
|
+
scores = [(record_id, float(np.dot(query, stored))) for record_id, stored in self.embeddings.items()]
|
|
26
|
+
scores.sort(key=lambda item: item[1], reverse=True)
|
|
27
|
+
return scores[:top_k]
|
|
28
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from torch import nn
|
|
7
|
+
|
|
8
|
+
from cognitive_engine.core.types import CoreInference, MemoryBundle, RoutingDecision, SemanticState
|
|
9
|
+
from cognitive_engine.interfaces.base import StableCore
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StableReasoningCore(nn.Module, StableCore):
|
|
13
|
+
name = "stable_reasoning_core"
|
|
14
|
+
|
|
15
|
+
def __init__(self, latent_dim: int = 64, device: str = "cpu") -> None:
|
|
16
|
+
super().__init__()
|
|
17
|
+
self.device = device
|
|
18
|
+
self.projector = nn.Sequential(nn.Linear(latent_dim, latent_dim), nn.Tanh(), nn.Linear(latent_dim, latent_dim))
|
|
19
|
+
for parameter in self.projector.parameters():
|
|
20
|
+
parameter.requires_grad = False
|
|
21
|
+
self.to(self.device)
|
|
22
|
+
|
|
23
|
+
def describe(self) -> Dict[str, Any]:
|
|
24
|
+
return {"name": self.name, "latent_dim": 64, "frozen": True}
|
|
25
|
+
|
|
26
|
+
def infer(
|
|
27
|
+
self,
|
|
28
|
+
semantic_state: SemanticState,
|
|
29
|
+
memory_bundle: MemoryBundle,
|
|
30
|
+
routing: RoutingDecision,
|
|
31
|
+
plastic_output: Optional[CoreInference] = None,
|
|
32
|
+
) -> CoreInference:
|
|
33
|
+
latent = self._project(semantic_state.pooled_embedding.float().to(self.device))
|
|
34
|
+
if semantic_state.modality == "numeric" and plastic_output is not None:
|
|
35
|
+
summaries = [record.summary for record in memory_bundle.semantic_long_term[:2]]
|
|
36
|
+
memory_hint = " | ".join(summaries) if summaries else "No previous arithmetic consolidation available."
|
|
37
|
+
explanation = (
|
|
38
|
+
f"Resultado estimado: {plastic_output.prediction:.3f}. "
|
|
39
|
+
f"Confianza={plastic_output.confidence:.3f}. "
|
|
40
|
+
f"Memoria relevante: {memory_hint}"
|
|
41
|
+
)
|
|
42
|
+
return CoreInference(
|
|
43
|
+
prediction=plastic_output.prediction,
|
|
44
|
+
confidence=plastic_output.confidence,
|
|
45
|
+
explanation=explanation,
|
|
46
|
+
hidden_state=latent.squeeze(0),
|
|
47
|
+
artifacts={"memory_hint": memory_hint, "route": routing.rationale},
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
summaries = [record.summary for record in memory_bundle.semantic_long_term[:3]]
|
|
51
|
+
current_action = semantic_state.metadata.get("importance_action", "observe")
|
|
52
|
+
if semantic_state.intent == "question":
|
|
53
|
+
if summaries:
|
|
54
|
+
answer = " | ".join(summaries)
|
|
55
|
+
else:
|
|
56
|
+
answer = "No hay conocimiento consolidado suficiente; la consulta queda en modo exploratorio."
|
|
57
|
+
elif current_action in {"learn", "reinforce", "consolidate"}:
|
|
58
|
+
answer = f"Información analizada y comprimida como: {semantic_state.compressed_context}"
|
|
59
|
+
else:
|
|
60
|
+
answer = f"Información observada sin actualización fuerte de memoria: {semantic_state.compressed_context}"
|
|
61
|
+
return CoreInference(
|
|
62
|
+
prediction=answer,
|
|
63
|
+
confidence=0.72,
|
|
64
|
+
explanation=answer,
|
|
65
|
+
hidden_state=latent.squeeze(0),
|
|
66
|
+
artifacts={
|
|
67
|
+
"active_concepts": memory_bundle.working_memory.get("active_concepts", []),
|
|
68
|
+
"retrieved_summaries": summaries,
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def _project(self, pooled_embedding: torch.Tensor) -> torch.Tensor:
|
|
73
|
+
vector = pooled_embedding.unsqueeze(0)
|
|
74
|
+
if vector.shape[-1] < 64:
|
|
75
|
+
vector = torch.nn.functional.pad(vector, (0, 64 - vector.shape[-1]))
|
|
76
|
+
elif vector.shape[-1] > 64:
|
|
77
|
+
vector = vector[:, :64]
|
|
78
|
+
return self.projector(vector)
|
|
79
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from cognitive_engine.config.schema import ThresholdConfig
|
|
8
|
+
from cognitive_engine.core.types import ImportanceAssessment, MemoryBundle, SemanticState, tensor_to_numpy
|
|
9
|
+
from cognitive_engine.interfaces.base import ImportanceEvaluator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AdaptiveImportanceEvaluator(ImportanceEvaluator):
|
|
13
|
+
name = "adaptive_importance_evaluator"
|
|
14
|
+
|
|
15
|
+
def __init__(self, thresholds: ThresholdConfig) -> None:
|
|
16
|
+
self.thresholds = thresholds
|
|
17
|
+
|
|
18
|
+
def describe(self) -> Dict[str, Any]:
|
|
19
|
+
return {"name": self.name, "thresholds": self.thresholds.__dict__}
|
|
20
|
+
|
|
21
|
+
def evaluate(self, semantic_state: SemanticState, memory_bundle: MemoryBundle) -> ImportanceAssessment:
|
|
22
|
+
if memory_bundle.semantic_long_term:
|
|
23
|
+
similarities = [record.metadata.get("similarity", 0.0) for record in memory_bundle.semantic_long_term]
|
|
24
|
+
redundancy = float(np.mean(similarities))
|
|
25
|
+
novelty = 1.0 - redundancy
|
|
26
|
+
else:
|
|
27
|
+
redundancy = 0.0
|
|
28
|
+
novelty = 1.0
|
|
29
|
+
|
|
30
|
+
intent_weight = {
|
|
31
|
+
"correction": 0.95,
|
|
32
|
+
"knowledge_share": 0.85,
|
|
33
|
+
"preference": 0.9,
|
|
34
|
+
"question": 0.42,
|
|
35
|
+
"small_talk": 0.12,
|
|
36
|
+
}
|
|
37
|
+
utility = intent_weight.get(semantic_state.intent, 0.55)
|
|
38
|
+
concept_count = len(semantic_state.concepts)
|
|
39
|
+
coherence = min(1.0, 0.35 + 0.1 * concept_count)
|
|
40
|
+
frequency = min(1.0, len(memory_bundle.short_term) / 10.0)
|
|
41
|
+
correction_signal = 1.0 if semantic_state.intent == "correction" else 0.15
|
|
42
|
+
future_relevance = min(
|
|
43
|
+
1.0,
|
|
44
|
+
0.25 + 0.2 * concept_count + (0.25 if semantic_state.intent in {"knowledge_share", "preference"} else 0.0),
|
|
45
|
+
)
|
|
46
|
+
if semantic_state.intent == "preference":
|
|
47
|
+
future_relevance = min(1.0, future_relevance + 0.1)
|
|
48
|
+
contradiction_risk = 0.15 + (0.35 if semantic_state.intent == "correction" and redundancy > 0.7 else 0.0)
|
|
49
|
+
importance_score = (
|
|
50
|
+
novelty * 0.28
|
|
51
|
+
+ utility * 0.24
|
|
52
|
+
+ coherence * 0.12
|
|
53
|
+
+ future_relevance * 0.18
|
|
54
|
+
+ correction_signal * 0.1
|
|
55
|
+
+ frequency * 0.08
|
|
56
|
+
- redundancy * 0.12
|
|
57
|
+
- contradiction_risk * 0.08
|
|
58
|
+
)
|
|
59
|
+
importance_score = float(max(0.0, min(1.0, importance_score)))
|
|
60
|
+
confidence_score = float(max(0.05, min(1.0, coherence * (1.0 - contradiction_risk * 0.5))))
|
|
61
|
+
learning_priority = float(max(0.0, min(1.0, importance_score * 0.7 + confidence_score * 0.3)))
|
|
62
|
+
|
|
63
|
+
if contradiction_risk > self.thresholds.contradiction and confidence_score < self.thresholds.uncertainty:
|
|
64
|
+
action = "uncertain"
|
|
65
|
+
rationale = "Potential contradiction detected, holding update until corroboration arrives."
|
|
66
|
+
elif learning_priority >= self.thresholds.consolidate:
|
|
67
|
+
action = "consolidate"
|
|
68
|
+
rationale = "High-value information with durable relevance; consolidate into long-term semantic memory."
|
|
69
|
+
elif learning_priority >= self.thresholds.reinforce:
|
|
70
|
+
action = "reinforce"
|
|
71
|
+
rationale = "Related knowledge already exists; reinforce instead of broad plastic updates."
|
|
72
|
+
elif learning_priority >= self.thresholds.learn:
|
|
73
|
+
action = "learn"
|
|
74
|
+
rationale = "Novel information passed learning threshold and can be compressed for memory."
|
|
75
|
+
elif semantic_state.intent == "preference" and confidence_score >= 0.65:
|
|
76
|
+
action = "learn"
|
|
77
|
+
rationale = "User preference retained despite moderate novelty because it affects future dialogue behavior."
|
|
78
|
+
else:
|
|
79
|
+
action = "ignore"
|
|
80
|
+
rationale = "Signal is too weak or redundant to justify memory update."
|
|
81
|
+
|
|
82
|
+
return ImportanceAssessment(
|
|
83
|
+
novelty=novelty,
|
|
84
|
+
utility=utility,
|
|
85
|
+
frequency=frequency,
|
|
86
|
+
coherence=coherence,
|
|
87
|
+
correction_signal=correction_signal,
|
|
88
|
+
redundancy=redundancy,
|
|
89
|
+
future_relevance=future_relevance,
|
|
90
|
+
contradiction_risk=contradiction_risk,
|
|
91
|
+
importance_score=importance_score,
|
|
92
|
+
confidence_score=confidence_score,
|
|
93
|
+
learning_priority=learning_priority,
|
|
94
|
+
action=action,
|
|
95
|
+
rationale=rationale,
|
|
96
|
+
)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
|
|
7
|
+
from cognitive_engine.core.types import ProcessedInput
|
|
8
|
+
from cognitive_engine.interfaces.base import InputProcessor
|
|
9
|
+
from cognitive_engine.utils.numeric import OPERATIONS
|
|
10
|
+
from cognitive_engine.utils.text import detect_intent, hashed_token_ids, normalize_text, tokenize_text
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TextInputProcessor(InputProcessor):
|
|
14
|
+
name = "text_input_processor"
|
|
15
|
+
|
|
16
|
+
def __init__(self, vocab_size: int = 4096, max_length: int = 96, device: str = "cpu") -> None:
|
|
17
|
+
self.vocab_size = vocab_size
|
|
18
|
+
self.max_length = max_length
|
|
19
|
+
self.device = device
|
|
20
|
+
|
|
21
|
+
def describe(self) -> Dict[str, Any]:
|
|
22
|
+
return {"name": self.name, "vocab_size": self.vocab_size, "max_length": self.max_length}
|
|
23
|
+
|
|
24
|
+
def supports(self, payload: Any) -> bool:
|
|
25
|
+
return isinstance(payload, str)
|
|
26
|
+
|
|
27
|
+
def process(self, payload: Any) -> ProcessedInput:
|
|
28
|
+
text = normalize_text(str(payload))
|
|
29
|
+
tokens = tokenize_text(text)[: self.max_length]
|
|
30
|
+
token_ids = hashed_token_ids(tokens, self.vocab_size) or [0]
|
|
31
|
+
tensor = torch.tensor(token_ids, dtype=torch.long, device=self.device).unsqueeze(0)
|
|
32
|
+
attention = torch.ones_like(tensor)
|
|
33
|
+
return ProcessedInput(
|
|
34
|
+
raw_input=payload,
|
|
35
|
+
modality="text",
|
|
36
|
+
normalized_text=text,
|
|
37
|
+
tokens=tokens,
|
|
38
|
+
token_tensor=tensor,
|
|
39
|
+
attention_mask=attention,
|
|
40
|
+
metadata={"intent_hint": detect_intent(text)},
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class NumericInputProcessor(InputProcessor):
|
|
45
|
+
name = "numeric_input_processor"
|
|
46
|
+
|
|
47
|
+
def __init__(self, device: str = "cpu", scale: float = 12.0) -> None:
|
|
48
|
+
self.device = device
|
|
49
|
+
self.scale = scale
|
|
50
|
+
self.operation_to_id = dict(OPERATIONS)
|
|
51
|
+
|
|
52
|
+
def describe(self) -> Dict[str, Any]:
|
|
53
|
+
return {"name": self.name, "scale": self.scale, "operations": self.operation_to_id}
|
|
54
|
+
|
|
55
|
+
def supports(self, payload: Any) -> bool:
|
|
56
|
+
return isinstance(payload, dict) and payload.get("modality") == "numeric"
|
|
57
|
+
|
|
58
|
+
def process(self, payload: Any) -> ProcessedInput:
|
|
59
|
+
left = float(payload["a"])
|
|
60
|
+
right = float(payload["b"])
|
|
61
|
+
operation = str(payload["operation"])
|
|
62
|
+
features = torch.tensor([[left / self.scale, right / self.scale]], dtype=torch.float32, device=self.device)
|
|
63
|
+
metadata = {
|
|
64
|
+
"a": left,
|
|
65
|
+
"b": right,
|
|
66
|
+
"scale": self.scale,
|
|
67
|
+
"operation_id": self.operation_to_id[operation],
|
|
68
|
+
"intent_hint": "knowledge_share" if "target" in payload else "question",
|
|
69
|
+
}
|
|
70
|
+
if "target" in payload:
|
|
71
|
+
metadata["target"] = float(payload["target"])
|
|
72
|
+
return ProcessedInput(
|
|
73
|
+
raw_input=payload,
|
|
74
|
+
modality="numeric",
|
|
75
|
+
numeric_tensor=features,
|
|
76
|
+
operation=operation,
|
|
77
|
+
metadata=metadata,
|
|
78
|
+
)
|