nexo-brain 2.3.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/nexo-brain.js +92 -9
- package/bin/postinstall.js +22 -15
- package/package.json +7 -4
- package/src/auto_update.py +194 -5
- package/src/crons/sync.py +6 -2
- package/src/db/_core.py +1 -0
- package/src/db/_entities.py +1 -0
- package/src/db/_episodic.py +1 -0
- package/src/db/_learnings.py +1 -0
- package/src/db/_reminders.py +1 -0
- package/src/db/_schema.py +11 -1
- package/src/db/_sessions.py +1 -0
- package/src/db/_skills.py +1 -0
- package/src/hooks/capture-tool-logs.sh +23 -6
- package/src/hooks/session-start.sh +4 -3
- package/src/plugin_loader.py +1 -0
- package/src/plugins/update.py +377 -26
- package/src/scripts/deep-sleep/apply_findings.py +1 -0
- package/src/scripts/deep-sleep/collect.py +1 -0
- package/src/scripts/deep-sleep/extract.py +1 -0
- package/src/scripts/deep-sleep/synthesize.py +1 -0
- package/src/scripts/nexo-catchup.py +29 -4
- package/src/scripts/nexo-daily-self-audit.py +21 -1
- package/src/scripts/nexo-evolution-run.py +21 -1
- package/src/scripts/nexo-learning-housekeep.py +1 -0
- package/src/scripts/nexo-postmortem-consolidator.py +34 -9
- package/src/scripts/nexo-sleep.py +32 -10
- package/src/scripts/nexo-synthesis.py +29 -9
- package/src/scripts/nexo-update.sh +109 -7
- package/src/scripts/nexo-watchdog.sh +122 -58
- package/src/server.py +66 -1
- package/src/tools_coordination.py +1 -0
- package/src/tools_sessions.py +1 -0
- package/scripts/migrate-to-unified 2.sh +0 -813
- package/scripts/migrate-to-unified.sh +0 -813
- package/scripts/migrate-v1.5-to-v1.6 2.py +0 -778
- package/scripts/migrate-v1.5-to-v1.6.py +0 -778
- package/scripts/migrate-v1.7-to-v1.8 2.py +0 -214
- package/scripts/migrate-v1.7-to-v1.8.py +0 -214
- package/scripts/nexo-preflight.sh +0 -236
- package/scripts/pre-commit-check 2.sh +0 -55
- package/scripts/pre-commit-check.sh +0 -55
- package/src/__pycache__/auto_close_sessions.cpython-314.pyc +0 -0
- package/src/__pycache__/auto_update.cpython-310.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-310.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-314.pyc +0 -0
- package/src/__pycache__/kg_populate.cpython-310.pyc +0 -0
- package/src/__pycache__/knowledge_graph.cpython-310.pyc +0 -0
- package/src/__pycache__/plugin_loader.cpython-310.pyc +0 -0
- package/src/__pycache__/plugin_loader.cpython-314.pyc +0 -0
- package/src/__pycache__/tools_coordination.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_credentials.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_learnings.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_menu.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_reminders.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_reminders_crud.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_sessions.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_task_history.cpython-310.pyc +0 -0
- package/src/auto_close_sessions 2.py +0 -159
- package/src/auto_update 2.py +0 -634
- package/src/claim_graph 2.py +0 -323
- package/src/cognitive/__init__ 2.py +0 -62
- package/src/cognitive/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-314.pyc +0 -0
- package/src/cognitive/_core 2.py +0 -567
- package/src/cognitive/_decay 2.py +0 -382
- package/src/cognitive/_ingest 2.py +0 -892
- package/src/cognitive/_memory 2.py +0 -912
- package/src/cognitive/_search 2.py +0 -949
- package/src/cognitive/_trust 2.py +0 -464
- package/src/crons/__pycache__/sync.cpython-314.pyc +0 -0
- package/src/crons/manifest 2.json +0 -106
- package/src/crons/sync 2.py +0 -217
- package/src/dashboard/__init__ 2.py +0 -0
- package/src/dashboard/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/dashboard/__pycache__/app.cpython-310.pyc +0 -0
- package/src/dashboard/app 2.py +0 -789
- package/src/db/__init__ 2.py +0 -89
- package/src/db/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/db/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/db/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_cron_runs.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_cron_runs.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-314.pyc +0 -0
- package/src/db/_core 2.py +0 -417
- package/src/db/_credentials 2.py +0 -124
- package/src/db/_entities 2.py +0 -178
- package/src/db/_episodic 2.py +0 -738
- package/src/db/_evolution 2.py +0 -54
- package/src/db/_fts 2.py +0 -406
- package/src/db/_learnings 2.py +0 -168
- package/src/db/_reminders 2.py +0 -338
- package/src/db/_schema 2.py +0 -364
- package/src/db/_sessions 2.py +0 -300
- package/src/db/_tasks 2.py +0 -91
- package/src/evolution_cycle 2.py +0 -266
- package/src/hnsw_index 2.py +0 -254
- package/src/hooks/auto_capture 2.py +0 -208
- package/src/hooks/caffeinate-guard 2.sh +0 -8
- package/src/hooks/capture-session 2.sh +0 -21
- package/src/hooks/capture-tool-logs 2.sh +0 -127
- package/src/hooks/daily-briefing-check 2.sh +0 -33
- package/src/hooks/inbox-hook 2.sh +0 -76
- package/src/hooks/post-compact 2.sh +0 -148
- package/src/hooks/pre-compact 2.sh +0 -151
- package/src/hooks/session-start 2.sh +0 -268
- package/src/hooks/session-stop 2.sh +0 -140
- package/src/kg_populate 2.py +0 -290
- package/src/knowledge_graph 2.py +0 -257
- package/src/maintenance 2.py +0 -59
- package/src/migrate_embeddings 2.py +0 -122
- package/src/plugin_loader 2.py +0 -202
- package/src/plugins/__init__ 2.py +0 -0
- package/src/plugins/__pycache__/__init__ 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/agents 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/agents.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/artifact_registry 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/artifact_registry.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/backup 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/backup.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cognitive_memory 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cognitive_memory.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/core_rules 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/core_rules.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cortex 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cortex.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/entities 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/entities.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/episodic_memory 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/episodic_memory.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/evolution 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/evolution.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/guard 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/guard.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/knowledge_graph_tools 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/knowledge_graph_tools.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/preferences 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/preferences.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/schedule.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/schedule.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/skills.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/skills.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/update 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/update.cpython-310.pyc +0 -0
- package/src/plugins/adaptive_mode 2.py +0 -805
- package/src/plugins/agents 2.py +0 -52
- package/src/plugins/artifact_registry 2.py +0 -450
- package/src/plugins/backup 2.py +0 -104
- package/src/plugins/cognitive_memory 2.py +0 -564
- package/src/plugins/core_rules 2.py +0 -252
- package/src/plugins/cortex 2.py +0 -299
- package/src/plugins/entities 2.py +0 -67
- package/src/plugins/episodic_memory 2.py +0 -533
- package/src/plugins/evolution 2.py +0 -115
- package/src/plugins/guard 2.py +0 -746
- package/src/plugins/knowledge_graph_tools 2.py +0 -105
- package/src/plugins/preferences 2.py +0 -47
- package/src/plugins/update 2.py +0 -256
- package/src/requirements 2.txt +0 -12
- package/src/rules/__init__ 2.py +0 -0
- package/src/rules/core-rules 2.json +0 -331
- package/src/rules/migrate 2.py +0 -207
- package/src/scripts/__pycache__/nexo-auto-update.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-catchup.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-cognitive-decay.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-daily-self-audit.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-evolution-run.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-followup-hygiene.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-immune.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-install.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-learning-housekeep.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-learning-validator.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-migrate.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-postmortem-consolidator.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-pre-commit.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-proactive-dashboard.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-reflection.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-runtime-preflight.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-send-email.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-send-reply.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-sleep.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-synthesis.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-watchdog-smoke.cpython-314.pyc +0 -0
- package/src/scripts/check-context 2.py +0 -264
- package/src/scripts/nexo-auto-update 2.py +0 -6
- package/src/scripts/nexo-backup 2.sh +0 -25
- package/src/scripts/nexo-brain-activation 2.sh +0 -140
- package/src/scripts/nexo-catchup 2.py +0 -242
- package/src/scripts/nexo-cognitive-decay 2.py +0 -182
- package/src/scripts/nexo-daily-self-audit 2.py +0 -552
- package/src/scripts/nexo-deep-sleep 2.sh +0 -97
- package/src/scripts/nexo-evolution-run 2.py +0 -597
- package/src/scripts/nexo-followup-hygiene 2.py +0 -112
- package/src/scripts/nexo-github-monitor 2.py +0 -256
- package/src/scripts/nexo-immune 2.py +0 -927
- package/src/scripts/nexo-inbox-hook 2.sh +0 -74
- package/src/scripts/nexo-install 2.py +0 -6
- package/src/scripts/nexo-learning-housekeep 2.py +0 -245
- package/src/scripts/nexo-learning-validator 2.py +0 -207
- package/src/scripts/nexo-migrate 2.py +0 -232
- package/src/scripts/nexo-postmortem-consolidator 2.py +0 -421
- package/src/scripts/nexo-pre-commit 2.py +0 -120
- package/src/scripts/nexo-prevent-sleep 2.sh +0 -29
- package/src/scripts/nexo-proactive-dashboard 2.py +0 -345
- package/src/scripts/nexo-reflection 2.py +0 -253
- package/src/scripts/nexo-runtime-preflight 2.py +0 -274
- package/src/scripts/nexo-send-email 2.py +0 -25
- package/src/scripts/nexo-send-email.py +0 -25
- package/src/scripts/nexo-send-reply 2.py +0 -178
- package/src/scripts/nexo-send-reply.py +0 -178
- package/src/scripts/nexo-sleep 2.py +0 -592
- package/src/scripts/nexo-snapshot-restore 2.sh +0 -35
- package/src/scripts/nexo-synthesis 2.py +0 -253
- package/src/scripts/nexo-tcc-approve 2.sh +0 -79
- package/src/scripts/nexo-update 2.sh +0 -161
- package/src/scripts/nexo-watchdog 2.sh +0 -878
- package/src/scripts/nexo-watchdog-smoke 2.py +0 -119
- package/src/server 2.py +0 -733
- package/src/storage_router 2.py +0 -32
- package/src/tools_coordination 2.py +0 -102
- package/src/tools_credentials 2.py +0 -68
- package/src/tools_learnings 2.py +0 -220
- package/src/tools_menu 2.py +0 -227
- package/src/tools_reminders 2.py +0 -86
- package/src/tools_reminders_crud 2.py +0 -159
- package/src/tools_sessions 2.py +0 -476
- package/src/tools_task_history 2.py +0 -57
- package/templates/CLAUDE.md 2.template +0 -63
- package/templates/openclaw 2.json +0 -13
- package/tests/__init__ 2.py +0 -0
- package/tests/__init__.py +0 -0
- package/tests/conftest 2.py +0 -71
- package/tests/conftest.py +0 -71
- package/tests/test_cognitive 2.py +0 -205
- package/tests/test_cognitive.py +0 -205
- package/tests/test_knowledge_graph 2.py +0 -140
- package/tests/test_knowledge_graph.py +0 -140
- package/tests/test_migrations 2.py +0 -137
- package/tests/test_migrations.py +0 -137
package/src/cognitive/_core 2.py
DELETED
|
@@ -1,567 +0,0 @@
|
|
|
1
|
-
"""NEXO Cognitive Engine — Vector memory with Atkinson-Shiffrin model."""
|
|
2
|
-
|
|
3
|
-
import base64
|
|
4
|
-
import json
|
|
5
|
-
import math
|
|
6
|
-
import os
|
|
7
|
-
import re
|
|
8
|
-
import sqlite3
|
|
9
|
-
import numpy as np
|
|
10
|
-
from datetime import datetime, timedelta
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Optional
|
|
13
|
-
|
|
14
|
-
NEXO_HOME = os.environ.get("NEXO_HOME", os.path.expanduser("~/.nexo"))
|
|
15
|
-
_data_dir = os.path.join(NEXO_HOME, "data")
|
|
16
|
-
os.makedirs(_data_dir, exist_ok=True)
|
|
17
|
-
|
|
18
|
-
COGNITIVE_DB = os.path.join(_data_dir, "cognitive.db")
|
|
19
|
-
EMBEDDING_DIM = 768
|
|
20
|
-
LAMBDA_STM = 0.004126 # half-life = ln(2) / (7 * 24) ≈ 7 days
|
|
21
|
-
LAMBDA_LTM = 0.000481 # half-life = ln(2) / (60 * 24) ≈ 60 days
|
|
22
|
-
|
|
23
|
-
# Prediction Error Gate thresholds
|
|
24
|
-
PE_GATE_REJECT = 0.85 # similarity > this → reject (not novel enough)
|
|
25
|
-
PE_GATE_REFINE = 0.70 # similarity between REFINE and REJECT → refinement (update existing)
|
|
26
|
-
# similarity < REFINE → novel (store as new)
|
|
27
|
-
|
|
28
|
-
# Session-level gate stats (reset each process lifetime)
|
|
29
|
-
_gate_stats = {"accepted_novel": 0, "accepted_refinement": 0, "rejected": 0}
|
|
30
|
-
|
|
31
|
-
# Discriminating entities — if these differ between two high-similarity memories,
|
|
32
|
-
# they are siblings (similar-but-incompatible), NOT duplicates to merge.
|
|
33
|
-
DISCRIMINATING_ENTITIES = {
|
|
34
|
-
# OS / Environment
|
|
35
|
-
"linux", "mac", "macos", "windows", "darwin", "ubuntu", "debian", "alpine",
|
|
36
|
-
# Platforms
|
|
37
|
-
"shopify", "my-project", "project-a", "ecommerce", "whatsapp", "chrome", "firefox",
|
|
38
|
-
# Languages / Runtimes
|
|
39
|
-
"python", "php", "javascript", "typescript", "node", "deno", "ruby",
|
|
40
|
-
# Versions
|
|
41
|
-
"v1", "v2", "v3", "v4", "v5", "5.6", "7.4", "8.0", "8.1", "8.2",
|
|
42
|
-
# Infrastructure
|
|
43
|
-
"shared-hosting", "cloudrun", "gcloud", "vps", "local", "production", "staging",
|
|
44
|
-
# DB
|
|
45
|
-
"mysql", "sqlite", "postgresql", "postgres", "redis",
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
# Sentiment detection keywords
|
|
49
|
-
POSITIVE_SIGNALS = {
|
|
50
|
-
"gracias", "genial", "perfecto", "bien", "excelente", "bueno", "me gusta",
|
|
51
|
-
"correcto", "sí", "dale", "hazlo", "adelante", "ok", "vale", "great",
|
|
52
|
-
"nice", "good", "exactly", "buen trabajo", "bien hecho", "fenomenal",
|
|
53
|
-
}
|
|
54
|
-
NEGATIVE_SIGNALS = {
|
|
55
|
-
"no", "mal", "otra vez", "ya te dije", "frustr", "error", "fallo",
|
|
56
|
-
"cansad", "siempre", "nunca", "por qué no", "no funciona", "roto",
|
|
57
|
-
"no sirve", "horrible", "desastre", "qué coño", "joder", "mierda",
|
|
58
|
-
"hostia", "me cago", "irritad", "harto",
|
|
59
|
-
"broken", "nothing works", "doesn't work", "not working", "fix it",
|
|
60
|
-
"wrong", "failed", "failing", "annoying", "frustrated", "damn", "shit",
|
|
61
|
-
"wtf", "terrible", "useless", "stupid", "hate", "worst", "sucks",
|
|
62
|
-
"again",
|
|
63
|
-
}
|
|
64
|
-
URGENCY_SIGNALS = {
|
|
65
|
-
"rápido", "ya", "ahora", "urgente", "asap", "inmediatamente", "corre",
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
# Trust score events — default deltas (overridable via trust_event_config table)
|
|
69
|
-
_DEFAULT_TRUST_EVENTS = {
|
|
70
|
-
# Positive
|
|
71
|
-
"explicit_thanks": +3,
|
|
72
|
-
"delegation": +2, # user delegates new task without micromanaging
|
|
73
|
-
"paradigm_shift": +2, # user teaches, NEXO learns
|
|
74
|
-
"sibling_detected": +3, # NEXO avoided context error on its own
|
|
75
|
-
"proactive_action": +2, # NEXO did something useful without being asked
|
|
76
|
-
# Negative
|
|
77
|
-
"correction": -3, # user corrects NEXO
|
|
78
|
-
"repeated_error": -7, # Error on something NEXO already had a learning for
|
|
79
|
-
"override": -5, # NEXO's memory was wrong
|
|
80
|
-
"correction_fatigue": -10, # Same memory corrected 3+ times
|
|
81
|
-
"forgot_followup": -4, # Forgot to mark followup or execute it
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
# Lazy-loaded from DB (trust_event_config table overrides defaults)
|
|
85
|
-
_trust_events_cache = None
|
|
86
|
-
_trust_events_cache_ts = 0
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# Module-level state
|
|
91
|
-
_model = None
|
|
92
|
-
_embed_model = None
|
|
93
|
-
_reranker_model = None
|
|
94
|
-
_reranker = None
|
|
95
|
-
_conn = None
|
|
96
|
-
|
|
97
|
-
# --- Secret redaction patterns ---
|
|
98
|
-
_REDACT_PATTERNS = [
|
|
99
|
-
# Specific API key formats
|
|
100
|
-
(re.compile(r'sk-[a-zA-Z0-9_\-]{20,}'), '[REDACTED:api_key]'),
|
|
101
|
-
(re.compile(r'ghp_[a-zA-Z0-9]{20,}'), '[REDACTED:api_key]'),
|
|
102
|
-
(re.compile(r'shpat_[a-f0-9]{20,}'), '[REDACTED:api_key]'),
|
|
103
|
-
(re.compile(r'AKIA[A-Z0-9]{16}'), '[REDACTED:api_key]'),
|
|
104
|
-
(re.compile(r'xox[bp]-[a-zA-Z0-9\-]{20,}'), '[REDACTED:api_key]'),
|
|
105
|
-
# Bearer tokens
|
|
106
|
-
(re.compile(r'Bearer\s+[a-zA-Z0-9_\-\.=+/]{20,}'), '[REDACTED:bearer_token]'),
|
|
107
|
-
# Connection strings with credentials
|
|
108
|
-
(re.compile(r'(mysql|postgresql|postgres|mongodb|redis)://[^\s"\']+@[^\s"\']+'), '[REDACTED:connection_string]'),
|
|
109
|
-
# Generic token assignments
|
|
110
|
-
(re.compile(r'(token\s*[=:]\s*["\']?)([a-zA-Z0-9_\-]{20,})', re.IGNORECASE),
|
|
111
|
-
lambda m: m.group(1) + '[REDACTED:token]'),
|
|
112
|
-
# Password assignments
|
|
113
|
-
(re.compile(r'(password\s*[=:]\s*["\']?)([^\s"\']{8,})', re.IGNORECASE),
|
|
114
|
-
lambda m: m.group(1) + '[REDACTED:password]'),
|
|
115
|
-
# SSH with private IPs (server credentials context)
|
|
116
|
-
(re.compile(r'ssh\s+\S+@\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'), '[REDACTED:ssh_credential]'),
|
|
117
|
-
]
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def redact_secrets(text: str) -> str:
|
|
121
|
-
"""Scan text for secrets and replace with [REDACTED:<type>] placeholders.
|
|
122
|
-
|
|
123
|
-
Fast regex-only detection. Not overly aggressive — won't redact normal
|
|
124
|
-
hex strings, UUIDs, or short tokens that aren't secrets.
|
|
125
|
-
"""
|
|
126
|
-
if not text:
|
|
127
|
-
return text
|
|
128
|
-
result = text
|
|
129
|
-
for pattern, replacement in _REDACT_PATTERNS:
|
|
130
|
-
if callable(replacement):
|
|
131
|
-
result = pattern.sub(replacement, result)
|
|
132
|
-
else:
|
|
133
|
-
result = pattern.sub(replacement, result)
|
|
134
|
-
return result
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _get_db() -> sqlite3.Connection:
|
|
138
|
-
"""Get or create SQLite connection with WAL mode."""
|
|
139
|
-
global _conn
|
|
140
|
-
if _conn is None:
|
|
141
|
-
_conn = sqlite3.connect(COGNITIVE_DB, check_same_thread=False)
|
|
142
|
-
_conn.execute("PRAGMA journal_mode=WAL")
|
|
143
|
-
_conn.execute("PRAGMA synchronous=NORMAL")
|
|
144
|
-
_conn.row_factory = sqlite3.Row
|
|
145
|
-
_init_tables(_conn)
|
|
146
|
-
_migrate_lifecycle(_conn)
|
|
147
|
-
_migrate_co_activation(_conn)
|
|
148
|
-
_auto_migrate_embeddings(_conn)
|
|
149
|
-
return _conn
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def _migrate_lifecycle(conn: sqlite3.Connection):
|
|
153
|
-
"""Add lifecycle_state, snooze_until, and redaction_applied columns if they don't exist (idempotent)."""
|
|
154
|
-
for table in ("stm_memories", "ltm_memories"):
|
|
155
|
-
for col, col_type in [
|
|
156
|
-
("lifecycle_state", "TEXT DEFAULT 'active'"),
|
|
157
|
-
("snooze_until", "TEXT"),
|
|
158
|
-
("redaction_applied", "INTEGER DEFAULT 0"),
|
|
159
|
-
]:
|
|
160
|
-
try:
|
|
161
|
-
conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {col_type}")
|
|
162
|
-
conn.commit()
|
|
163
|
-
except sqlite3.OperationalError as e:
|
|
164
|
-
if "duplicate column" in str(e).lower():
|
|
165
|
-
pass
|
|
166
|
-
else:
|
|
167
|
-
raise
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def _migrate_co_activation(conn: sqlite3.Connection):
|
|
171
|
-
"""Add co_activation and prospective_triggers tables if they don't exist (idempotent)."""
|
|
172
|
-
conn.executescript("""
|
|
173
|
-
CREATE TABLE IF NOT EXISTS co_activation (
|
|
174
|
-
memory_a_id INTEGER NOT NULL,
|
|
175
|
-
memory_b_id INTEGER NOT NULL,
|
|
176
|
-
strength REAL DEFAULT 1.0,
|
|
177
|
-
co_access_count INTEGER DEFAULT 1,
|
|
178
|
-
last_co_access TEXT DEFAULT (datetime('now')),
|
|
179
|
-
PRIMARY KEY (memory_a_id, memory_b_id)
|
|
180
|
-
);
|
|
181
|
-
|
|
182
|
-
CREATE TABLE IF NOT EXISTS prospective_triggers (
|
|
183
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
184
|
-
trigger_pattern TEXT NOT NULL,
|
|
185
|
-
action TEXT NOT NULL,
|
|
186
|
-
context TEXT DEFAULT '',
|
|
187
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
188
|
-
fired_at TEXT,
|
|
189
|
-
status TEXT DEFAULT 'armed'
|
|
190
|
-
);
|
|
191
|
-
""")
|
|
192
|
-
conn.commit()
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def _auto_migrate_embeddings(conn: sqlite3.Connection):
|
|
196
|
-
"""Auto-detect old 384-dim embeddings and re-embed to 768-dim. Transparent to user."""
|
|
197
|
-
try:
|
|
198
|
-
row = conn.execute("SELECT embedding FROM stm_memories LIMIT 1").fetchone()
|
|
199
|
-
if not row:
|
|
200
|
-
return # Empty DB, nothing to migrate
|
|
201
|
-
|
|
202
|
-
vec = np.frombuffer(row["embedding"], dtype=np.float32)
|
|
203
|
-
if len(vec) == EMBEDDING_DIM:
|
|
204
|
-
return # Already correct dimension
|
|
205
|
-
|
|
206
|
-
if len(vec) != 384:
|
|
207
|
-
return # Unknown dimension, don't touch
|
|
208
|
-
|
|
209
|
-
# Need migration: 384 → 768
|
|
210
|
-
model = _get_model()
|
|
211
|
-
|
|
212
|
-
for table in ("stm_memories", "ltm_memories", "quarantine"):
|
|
213
|
-
rows = conn.execute(f"SELECT id, content FROM {table}").fetchall()
|
|
214
|
-
if not rows:
|
|
215
|
-
continue
|
|
216
|
-
|
|
217
|
-
contents = [r["content"] for r in rows]
|
|
218
|
-
ids = [r["id"] for r in rows]
|
|
219
|
-
|
|
220
|
-
embeddings = list(model.embed(contents))
|
|
221
|
-
for mem_id, emb in zip(ids, embeddings):
|
|
222
|
-
blob = np.array(emb, dtype=np.float32).tobytes()
|
|
223
|
-
conn.execute(f"UPDATE {table} SET embedding = ? WHERE id = ?", (blob, mem_id))
|
|
224
|
-
|
|
225
|
-
conn.commit()
|
|
226
|
-
except Exception:
|
|
227
|
-
pass # Don't break startup if migration fails
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
def _init_tables(conn: sqlite3.Connection):
|
|
231
|
-
"""Create tables if they don't exist."""
|
|
232
|
-
conn.executescript("""
|
|
233
|
-
CREATE TABLE IF NOT EXISTS stm_memories (
|
|
234
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
235
|
-
content TEXT NOT NULL,
|
|
236
|
-
embedding BLOB NOT NULL,
|
|
237
|
-
source_type TEXT NOT NULL,
|
|
238
|
-
source_id TEXT DEFAULT '',
|
|
239
|
-
source_title TEXT DEFAULT '',
|
|
240
|
-
domain TEXT DEFAULT '',
|
|
241
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
242
|
-
last_accessed TEXT DEFAULT (datetime('now')),
|
|
243
|
-
access_count INTEGER DEFAULT 0,
|
|
244
|
-
strength REAL DEFAULT 1.0,
|
|
245
|
-
promoted_to_ltm INTEGER DEFAULT 0
|
|
246
|
-
);
|
|
247
|
-
|
|
248
|
-
CREATE TABLE IF NOT EXISTS ltm_memories (
|
|
249
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
250
|
-
content TEXT NOT NULL,
|
|
251
|
-
embedding BLOB NOT NULL,
|
|
252
|
-
source_type TEXT NOT NULL,
|
|
253
|
-
source_id TEXT DEFAULT '',
|
|
254
|
-
source_title TEXT DEFAULT '',
|
|
255
|
-
domain TEXT DEFAULT '',
|
|
256
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
257
|
-
last_accessed TEXT DEFAULT (datetime('now')),
|
|
258
|
-
access_count INTEGER DEFAULT 0,
|
|
259
|
-
strength REAL DEFAULT 1.0,
|
|
260
|
-
is_dormant INTEGER DEFAULT 0,
|
|
261
|
-
original_stm_id INTEGER,
|
|
262
|
-
tags TEXT DEFAULT ''
|
|
263
|
-
);
|
|
264
|
-
|
|
265
|
-
CREATE TABLE IF NOT EXISTS retrieval_log (
|
|
266
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
267
|
-
query_text TEXT NOT NULL,
|
|
268
|
-
results_count INTEGER DEFAULT 0,
|
|
269
|
-
top_score REAL DEFAULT 0.0,
|
|
270
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
271
|
-
);
|
|
272
|
-
|
|
273
|
-
-- Sibling memories: similar-but-incompatible (discriminating entities differ)
|
|
274
|
-
CREATE TABLE IF NOT EXISTS memory_siblings (
|
|
275
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
276
|
-
memory_a_id INTEGER NOT NULL,
|
|
277
|
-
memory_b_id INTEGER NOT NULL,
|
|
278
|
-
similarity REAL NOT NULL,
|
|
279
|
-
discriminators TEXT NOT NULL, -- JSON: entities that differ between them
|
|
280
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
281
|
-
UNIQUE(memory_a_id, memory_b_id)
|
|
282
|
-
);
|
|
283
|
-
|
|
284
|
-
-- Dreamed pairs: track which memory pairs have been processed by dream_cycle
|
|
285
|
-
CREATE TABLE IF NOT EXISTS dreamed_pairs (
|
|
286
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
287
|
-
memory_a_id INTEGER NOT NULL,
|
|
288
|
-
memory_b_id INTEGER NOT NULL,
|
|
289
|
-
insight_id INTEGER, -- LTM ID of the generated insight
|
|
290
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
291
|
-
UNIQUE(memory_a_id, memory_b_id)
|
|
292
|
-
);
|
|
293
|
-
|
|
294
|
-
-- Trust score: NEXO's alignment index (0-100, starts at 50)
|
|
295
|
-
CREATE TABLE IF NOT EXISTS trust_score (
|
|
296
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
297
|
-
score REAL NOT NULL,
|
|
298
|
-
event TEXT NOT NULL, -- what caused the change
|
|
299
|
-
delta REAL NOT NULL, -- points gained or lost
|
|
300
|
-
context TEXT DEFAULT '', -- details
|
|
301
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
302
|
-
);
|
|
303
|
-
|
|
304
|
-
-- Sentiment readings: user's detected mood per interaction
|
|
305
|
-
CREATE TABLE IF NOT EXISTS sentiment_log (
|
|
306
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
307
|
-
sentiment TEXT NOT NULL, -- 'positive', 'negative', 'neutral', 'urgent'
|
|
308
|
-
intensity REAL DEFAULT 0.5, -- 0.0 to 1.0
|
|
309
|
-
signals TEXT DEFAULT '', -- keywords detected
|
|
310
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
311
|
-
);
|
|
312
|
-
|
|
313
|
-
-- Quarantine: new memories held for validation before promotion to STM
|
|
314
|
-
CREATE TABLE IF NOT EXISTS quarantine (
|
|
315
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
316
|
-
content TEXT NOT NULL,
|
|
317
|
-
embedding BLOB NOT NULL,
|
|
318
|
-
source TEXT DEFAULT 'inferred',
|
|
319
|
-
source_type TEXT NOT NULL,
|
|
320
|
-
source_id TEXT DEFAULT '',
|
|
321
|
-
source_title TEXT DEFAULT '',
|
|
322
|
-
domain TEXT DEFAULT '',
|
|
323
|
-
confidence REAL DEFAULT 0.5,
|
|
324
|
-
promotion_checks INTEGER DEFAULT 0,
|
|
325
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
326
|
-
promoted_at TEXT,
|
|
327
|
-
status TEXT DEFAULT 'pending'
|
|
328
|
-
);
|
|
329
|
-
|
|
330
|
-
-- Correction tracking: when user overrides a memory's guidance
|
|
331
|
-
CREATE TABLE IF NOT EXISTS memory_corrections (
|
|
332
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
333
|
-
memory_id INTEGER NOT NULL,
|
|
334
|
-
store TEXT NOT NULL, -- 'stm' or 'ltm'
|
|
335
|
-
correction_type TEXT NOT NULL, -- 'override', 'exception', 'paradigm_shift'
|
|
336
|
-
context TEXT DEFAULT '', -- what user said
|
|
337
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
338
|
-
);
|
|
339
|
-
""")
|
|
340
|
-
|
|
341
|
-
# FTS5 tables for hybrid search (BM25 + vector)
|
|
342
|
-
conn.executescript("""
|
|
343
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS stm_fts USING fts5(
|
|
344
|
-
content, source_type, source_id, domain,
|
|
345
|
-
content_rowid='id',
|
|
346
|
-
prefix='2,3'
|
|
347
|
-
);
|
|
348
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS ltm_fts USING fts5(
|
|
349
|
-
content, source_type, source_id, domain,
|
|
350
|
-
content_rowid='id',
|
|
351
|
-
prefix='2,3'
|
|
352
|
-
);
|
|
353
|
-
""")
|
|
354
|
-
|
|
355
|
-
# Sync triggers — keep FTS5 in sync with memory tables
|
|
356
|
-
for store in ("stm", "ltm"):
|
|
357
|
-
conn.executescript(f"""
|
|
358
|
-
CREATE TRIGGER IF NOT EXISTS {store}_fts_insert AFTER INSERT ON {store}_memories BEGIN
|
|
359
|
-
INSERT OR REPLACE INTO {store}_fts(rowid, content, source_type, source_id, domain)
|
|
360
|
-
VALUES (new.id, new.content, new.source_type, new.source_id, new.domain);
|
|
361
|
-
END;
|
|
362
|
-
CREATE TRIGGER IF NOT EXISTS {store}_fts_delete AFTER DELETE ON {store}_memories BEGIN
|
|
363
|
-
DELETE FROM {store}_fts WHERE rowid = old.id;
|
|
364
|
-
END;
|
|
365
|
-
CREATE TRIGGER IF NOT EXISTS {store}_fts_update AFTER UPDATE OF content ON {store}_memories BEGIN
|
|
366
|
-
UPDATE {store}_fts SET content = new.content WHERE rowid = new.id;
|
|
367
|
-
END;
|
|
368
|
-
""")
|
|
369
|
-
|
|
370
|
-
# Backfill FTS5 for existing memories not yet indexed
|
|
371
|
-
for store in ("stm", "ltm"):
|
|
372
|
-
conn.execute(f"""
|
|
373
|
-
INSERT OR IGNORE INTO {store}_fts(rowid, content, source_type, source_id, domain)
|
|
374
|
-
SELECT id, content, source_type, source_id, domain FROM {store}_memories
|
|
375
|
-
""")
|
|
376
|
-
|
|
377
|
-
# Temporal indexing columns (Task C)
|
|
378
|
-
for table in ("stm_memories", "ltm_memories"):
|
|
379
|
-
try:
|
|
380
|
-
conn.execute(f"ALTER TABLE {table} ADD COLUMN temporal_date TEXT DEFAULT ''")
|
|
381
|
-
except Exception:
|
|
382
|
-
pass # Column already exists
|
|
383
|
-
|
|
384
|
-
# Somatic markers — emotional risk memory for files and areas
|
|
385
|
-
conn.execute("""
|
|
386
|
-
CREATE TABLE IF NOT EXISTS somatic_markers (
|
|
387
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
388
|
-
target TEXT NOT NULL,
|
|
389
|
-
target_type TEXT NOT NULL,
|
|
390
|
-
risk_score REAL DEFAULT 0.0,
|
|
391
|
-
incident_count INTEGER DEFAULT 0,
|
|
392
|
-
last_incident TEXT DEFAULT NULL,
|
|
393
|
-
last_decay TEXT DEFAULT NULL,
|
|
394
|
-
last_guard_decay_date TEXT DEFAULT NULL,
|
|
395
|
-
last_validated_at TEXT DEFAULT NULL,
|
|
396
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
397
|
-
updated_at TEXT DEFAULT (datetime('now')),
|
|
398
|
-
UNIQUE(target, target_type)
|
|
399
|
-
)
|
|
400
|
-
""")
|
|
401
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_somatic_target ON somatic_markers(target)")
|
|
402
|
-
|
|
403
|
-
conn.execute("""
|
|
404
|
-
CREATE TABLE IF NOT EXISTS kg_nodes (
|
|
405
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
406
|
-
node_type TEXT NOT NULL,
|
|
407
|
-
node_ref TEXT NOT NULL,
|
|
408
|
-
label TEXT NOT NULL,
|
|
409
|
-
properties TEXT DEFAULT '{}',
|
|
410
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
411
|
-
UNIQUE(node_type, node_ref)
|
|
412
|
-
)
|
|
413
|
-
""")
|
|
414
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_kg_nodes_type ON kg_nodes(node_type)")
|
|
415
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_kg_nodes_label ON kg_nodes(label)")
|
|
416
|
-
|
|
417
|
-
conn.execute("""
|
|
418
|
-
CREATE TABLE IF NOT EXISTS kg_edges (
|
|
419
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
420
|
-
source_id INTEGER NOT NULL REFERENCES kg_nodes(id),
|
|
421
|
-
target_id INTEGER NOT NULL REFERENCES kg_nodes(id),
|
|
422
|
-
relation TEXT NOT NULL,
|
|
423
|
-
weight REAL DEFAULT 1.0,
|
|
424
|
-
confidence REAL DEFAULT 1.0,
|
|
425
|
-
valid_from TEXT DEFAULT (datetime('now')),
|
|
426
|
-
valid_until TEXT DEFAULT NULL,
|
|
427
|
-
source_memory_id TEXT DEFAULT '',
|
|
428
|
-
properties TEXT DEFAULT '{}',
|
|
429
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
430
|
-
)
|
|
431
|
-
""")
|
|
432
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_kg_edges_source ON kg_edges(source_id)")
|
|
433
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_kg_edges_target ON kg_edges(target_id)")
|
|
434
|
-
conn.execute("CREATE INDEX IF NOT EXISTS idx_kg_edges_relation ON kg_edges(relation)")
|
|
435
|
-
|
|
436
|
-
conn.commit()
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
def _get_model():
|
|
440
|
-
"""Lazy-load fastembed TextEmbedding model."""
|
|
441
|
-
global _model
|
|
442
|
-
if _model is None:
|
|
443
|
-
from fastembed import TextEmbedding
|
|
444
|
-
_model = TextEmbedding("BAAI/bge-base-en-v1.5")
|
|
445
|
-
return _model
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
def _get_reranker():
|
|
449
|
-
"""Lazy-load cross-encoder reranking model."""
|
|
450
|
-
global _reranker
|
|
451
|
-
if _reranker is None:
|
|
452
|
-
try:
|
|
453
|
-
from fastembed.rerank.cross_encoder import TextCrossEncoder
|
|
454
|
-
_reranker = TextCrossEncoder("Xenova/ms-marco-MiniLM-L-6-v2")
|
|
455
|
-
except Exception:
|
|
456
|
-
_reranker = False # Mark as unavailable
|
|
457
|
-
return _reranker if _reranker is not False else None
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
def rerank_results(query: str, results: list[dict], top_k: int = 5) -> list[dict]:
|
|
461
|
-
"""Rerank search results using cross-encoder for precise top-k.
|
|
462
|
-
|
|
463
|
-
Takes top-20 vector results and reranks with a cross-encoder model.
|
|
464
|
-
Falls back to original ranking if reranker is unavailable.
|
|
465
|
-
"""
|
|
466
|
-
reranker = _get_reranker()
|
|
467
|
-
if not reranker or len(results) <= 1:
|
|
468
|
-
return results[:top_k]
|
|
469
|
-
|
|
470
|
-
# Extract texts for reranking
|
|
471
|
-
docs = [r["content"] for r in results]
|
|
472
|
-
|
|
473
|
-
try:
|
|
474
|
-
scores = list(reranker.rerank(query, docs))
|
|
475
|
-
# Attach rerank scores and sort
|
|
476
|
-
for r, score in zip(results, scores):
|
|
477
|
-
r["rerank_score"] = score
|
|
478
|
-
results.sort(key=lambda x: x.get("rerank_score", -999), reverse=True)
|
|
479
|
-
except Exception:
|
|
480
|
-
pass # Fall back to original order
|
|
481
|
-
|
|
482
|
-
return results[:top_k]
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
def embed(text: str) -> np.ndarray:
|
|
486
|
-
"""Embed text into a 768-dim float32 vector. Returns zeros for empty text."""
|
|
487
|
-
if not text or not text.strip():
|
|
488
|
-
return np.zeros(EMBEDDING_DIM, dtype=np.float32)
|
|
489
|
-
model = _get_model()
|
|
490
|
-
embeddings = list(model.embed([text]))
|
|
491
|
-
return np.array(embeddings[0], dtype=np.float32)
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
def _array_to_blob(arr: np.ndarray) -> bytes:
|
|
495
|
-
"""Serialize numpy array to bytes."""
|
|
496
|
-
return arr.astype(np.float32).tobytes()
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
def _blob_to_array(blob: bytes) -> np.ndarray:
|
|
500
|
-
"""Deserialize bytes to numpy array."""
|
|
501
|
-
return np.frombuffer(blob, dtype=np.float32)
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
|
505
|
-
"""Cosine similarity with zero-norm guard."""
|
|
506
|
-
norm_a = np.linalg.norm(a)
|
|
507
|
-
norm_b = np.linalg.norm(b)
|
|
508
|
-
if norm_a == 0 or norm_b == 0:
|
|
509
|
-
return 0.0
|
|
510
|
-
return float(np.dot(a, b) / (norm_a * norm_b))
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
# ── Temporal Date Extraction ───────────────────────────────────────────
|
|
514
|
-
|
|
515
|
-
_MONTH_MAP = {
|
|
516
|
-
"january": "01", "february": "02", "march": "03", "april": "04",
|
|
517
|
-
"may": "05", "june": "06", "july": "07", "august": "08",
|
|
518
|
-
"september": "09", "october": "10", "november": "11", "december": "12",
|
|
519
|
-
"jan": "01", "feb": "02", "mar": "03", "apr": "04",
|
|
520
|
-
"jun": "06", "jul": "07", "aug": "08", "sep": "09",
|
|
521
|
-
"oct": "10", "nov": "11", "dec": "12",
|
|
522
|
-
"enero": "01", "febrero": "02", "marzo": "03", "abril": "04",
|
|
523
|
-
"mayo": "05", "junio": "06", "julio": "07", "agosto": "08",
|
|
524
|
-
"septiembre": "09", "octubre": "10", "noviembre": "11", "diciembre": "12",
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
def extract_temporal_date(text: str) -> str:
|
|
528
|
-
"""Extract the most prominent date from text. Returns ISO format YYYY-MM-DD or ''."""
|
|
529
|
-
if not text:
|
|
530
|
-
return ""
|
|
531
|
-
|
|
532
|
-
text_lower = text.lower()
|
|
533
|
-
|
|
534
|
-
# Pattern 1: "DD Month YYYY" or "Month DD, YYYY" or "D Month, YYYY"
|
|
535
|
-
# e.g., "8 May, 2023", "May 8, 2023", "25 May, 2023"
|
|
536
|
-
for month_name, month_num in _MONTH_MAP.items():
|
|
537
|
-
# "8 May, 2023" or "8 May 2023"
|
|
538
|
-
match = re.search(rf'(\d{{1,2}})\s+{month_name}[,]?\s+(\d{{4}})', text_lower)
|
|
539
|
-
if match:
|
|
540
|
-
day = int(match.group(1))
|
|
541
|
-
year = match.group(2)
|
|
542
|
-
return f"{year}-{month_num}-{day:02d}"
|
|
543
|
-
|
|
544
|
-
# "May 8, 2023" or "May 8 2023"
|
|
545
|
-
match = re.search(rf'{month_name}\s+(\d{{1,2}})[,]?\s+(\d{{4}})', text_lower)
|
|
546
|
-
if match:
|
|
547
|
-
day = int(match.group(1))
|
|
548
|
-
year = match.group(2)
|
|
549
|
-
return f"{year}-{month_num}-{day:02d}"
|
|
550
|
-
|
|
551
|
-
# Pattern 2: ISO format "2023-05-08"
|
|
552
|
-
match = re.search(r'(\d{4})-(\d{2})-(\d{2})', text)
|
|
553
|
-
if match:
|
|
554
|
-
return match.group(0)
|
|
555
|
-
|
|
556
|
-
# Pattern 3: "DD/MM/YYYY" or "MM/DD/YYYY" (ambiguous, try DD/MM first)
|
|
557
|
-
match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{4})', text)
|
|
558
|
-
if match:
|
|
559
|
-
a, b, year = int(match.group(1)), int(match.group(2)), match.group(3)
|
|
560
|
-
if a > 12: # Must be DD/MM
|
|
561
|
-
return f"{year}-{b:02d}-{a:02d}"
|
|
562
|
-
elif b > 12: # Must be MM/DD
|
|
563
|
-
return f"{year}-{a:02d}-{b:02d}"
|
|
564
|
-
# Ambiguous — default to DD/MM (European)
|
|
565
|
-
return f"{year}-{b:02d}-{a:02d}"
|
|
566
|
-
|
|
567
|
-
return ""
|