nexo-brain 0.2.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/db.py CHANGED
@@ -7,20 +7,16 @@ import secrets
7
7
  import string
8
8
  import datetime
9
9
  import pathlib
10
- from pathlib import Path
11
-
12
- NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
13
- NEXO_HOME.mkdir(parents=True, exist_ok=True)
14
10
 
15
11
  DB_PATH = os.environ.get(
16
12
  "NEXO_TEST_DB",
17
13
  os.environ.get(
18
14
  "NEXO_DB",
19
- str(NEXO_HOME / "nexo.db"),
15
+ os.path.join(os.path.dirname(os.path.abspath(__file__)), "nexo.db"),
20
16
  ),
21
17
  )
22
18
 
23
- # TTLs in seconds
19
+ # TTLs in seconds (match session-coord.sh behavior)
24
20
  SESSION_STALE_SECONDS = 900 # 15 min (documented TTL)
25
21
  MESSAGE_TTL_SECONDS = 3600 # 1 hour
26
22
  QUESTION_TTL_SECONDS = 600 # 10 min
@@ -369,23 +365,25 @@ def init_db():
369
365
 
370
366
  # ── FTS5 Unified Search ──────────────────────────────────────────
371
367
 
372
- # Directories to index for unified search (uses NEXO_HOME)
368
+ # Directories to index for unified search
373
369
  _FTS_MD_DIRS = [
374
- str(NEXO_HOME / "docs"),
375
- str(NEXO_HOME / "projects"),
376
- str(NEXO_HOME / "memory"),
377
- str(NEXO_HOME / "operations"),
378
- str(NEXO_HOME / "learnings"),
379
- str(NEXO_HOME / "brain"),
380
- str(NEXO_HOME / "agents"),
381
- str(NEXO_HOME / "skills"),
370
+ os.path.expanduser("~/claude/docs"),
371
+ os.path.expanduser("~/claude/projects"),
372
+ os.path.expanduser("~/claude/memory"),
373
+ os.path.expanduser("~/claude/operations"),
374
+ os.path.expanduser("~/claude/learnings"),
375
+ os.path.expanduser("~/claude/brain"),
376
+ os.path.expanduser("~/claude/agents"),
377
+ os.path.expanduser("~/claude/skills"),
378
+ ]
379
+ # Code repos: index source files (skip vendor, node_modules, etc.)
380
+ _FTS_CODE_DIRS = [
381
+ (os.path.expanduser("~/Documents/_PhpstormProjects"), ["*.php", "*.js", "*.json", "*.py", "*.ts", "*.tsx"]),
382
382
  ]
383
- # Code repos: populated via nexo_index_add_dir tool or NEXO_HOME/repos
384
- _FTS_CODE_DIRS = []
385
383
  _FTS_CODE_SKIP = {
386
384
  "vendor", "node_modules", ".git", "cache", "tmp", "logs", "uploads",
387
385
  "assets/img", "assets/fonts", ".next", "dist", "build", ".prisma",
388
- "public/build", ".turbo", "__pycache__",
386
+ "PROYECTOS ANTIGUOS", "public/build", ".turbo", "__pycache__",
389
387
  "coverage", ".nyc_output", "storage/framework", "bootstrap/cache",
390
388
  }
391
389
  _FTS_MAX_FILE_SIZE = 50_000 # skip .md files >50KB
@@ -1194,7 +1192,13 @@ def update_followup(id: str, **kwargs) -> dict:
1194
1192
 
1195
1193
 
1196
1194
  def _calc_next_recurrence_date(recurrence: str, current_date: str = None) -> str:
1197
- """Calculate the next date for a recurring followup."""
1195
+ """Calculate the next date for a recurring followup.
1196
+
1197
+ Formats:
1198
+ weekly:monday, weekly:thursday, weekly:friday, weekly:sunday
1199
+ monthly:1, monthly:10, monthly:15
1200
+ quarterly
1201
+ """
1198
1202
  today = datetime.date.today()
1199
1203
  base = datetime.date.fromisoformat(current_date) if current_date else today
1200
1204
 
@@ -1205,11 +1209,12 @@ def _calc_next_recurrence_date(recurrence: str, current_date: str = None) -> str
1205
1209
  target_day = day_map.get(day_name, 0)
1206
1210
  days_ahead = (target_day - today.weekday()) % 7
1207
1211
  if days_ahead == 0:
1208
- days_ahead = 7
1212
+ days_ahead = 7 # next week, not today
1209
1213
  return (today + datetime.timedelta(days=days_ahead)).isoformat()
1210
1214
 
1211
1215
  elif recurrence.startswith('monthly:'):
1212
1216
  target_day = int(recurrence.split(':')[1])
1217
+ # Next month from today
1213
1218
  if today.month == 12:
1214
1219
  next_date = datetime.date(today.year + 1, 1, min(target_day, 28))
1215
1220
  else:
@@ -1219,6 +1224,7 @@ def _calc_next_recurrence_date(recurrence: str, current_date: str = None) -> str
1219
1224
  return next_date.isoformat()
1220
1225
 
1221
1226
  elif recurrence == 'quarterly':
1227
+ # 3 months from current date
1222
1228
  month = base.month + 3
1223
1229
  year = base.year
1224
1230
  if month > 12:
@@ -1252,6 +1258,7 @@ def complete_followup(id: str, result: str = '') -> dict:
1252
1258
  if recurrence:
1253
1259
  next_date = _calc_next_recurrence_date(recurrence, row["date"])
1254
1260
  if next_date:
1261
+ # Rename completed one to include date suffix, then create fresh one
1255
1262
  archived_id = f"{id}-{today}"
1256
1263
  conn.execute("UPDATE followups SET id = ? WHERE id = ?", (archived_id, id))
1257
1264
  conn.commit()
@@ -1829,6 +1836,7 @@ def delete_agent(id: str) -> bool:
1829
1836
  def cleanup_old_changes(retention_days: int = 90) -> int:
1830
1837
  """Delete change_log entries older than retention_days. Returns count deleted."""
1831
1838
  conn = get_db()
1839
+ # Get IDs before deleting so we can clean FTS
1832
1840
  ids = [str(r[0]) for r in conn.execute(
1833
1841
  "SELECT id FROM change_log WHERE created_at < datetime('now', ?)",
1834
1842
  (f"-{retention_days} days",)
@@ -2085,7 +2093,7 @@ def read_session_diary(session_id: str = '', last_n: int = 3, last_day: bool = F
2085
2093
  - session_id: returns entries for that specific session
2086
2094
  - last_day: returns ALL entries from the most recent day (multi-terminal aware)
2087
2095
  - last_n: returns last N entries (default)
2088
- - domain: filter by project context (e.g., infrastructure, nexo, server, other)
2096
+ - domain: filter by project context (e.g. project-a, project-b, nexo, server, other)
2089
2097
  """
2090
2098
  conn = get_db()
2091
2099
  domain_clause = " AND domain = ?" if domain else ""
@@ -2097,6 +2105,7 @@ def read_session_diary(session_id: str = '', last_n: int = 3, last_day: bool = F
2097
2105
  (session_id,) + domain_params
2098
2106
  ).fetchall()
2099
2107
  elif last_day:
2108
+ # Get all entries from the most recent calendar day
2100
2109
  if domain:
2101
2110
  latest = conn.execute(
2102
2111
  "SELECT date(created_at) as day FROM session_diary WHERE domain = ? ORDER BY created_at DESC LIMIT 1",
@@ -2121,7 +2130,13 @@ def read_session_diary(session_id: str = '', last_n: int = 3, last_day: bool = F
2121
2130
 
2122
2131
 
2123
2132
  def _multi_word_like(query: str, columns: list[str]) -> tuple[str, list]:
2124
- """Build AND-ed LIKE conditions: every word must appear in at least one of the columns."""
2133
+ """Build AND-ed LIKE conditions: every word must appear in at least one of the columns.
2134
+
2135
+ Returns (sql_fragment, params) ready for WHERE clause.
2136
+ Example: query="cron learn", columns=["title","content"]
2137
+ → "(title LIKE ? OR content LIKE ?) AND (title LIKE ? OR content LIKE ?)"
2138
+ with params ["%cron%","%cron%","%learn%","%learn%"]
2139
+ """
2125
2140
  words = query.strip().split()
2126
2141
  if not words:
2127
2142
  return "1=1", []
@@ -2136,8 +2151,13 @@ def _multi_word_like(query: str, columns: list[str]) -> tuple[str, list]:
2136
2151
 
2137
2152
 
2138
2153
  def recall(query: str, days: int = 30) -> list[dict]:
2139
- """Cross-search ALL memory using FTS5: learnings, decisions, changes, diary, followups, entities, .md files."""
2140
- results = fts_search(query, limit=40)
2154
+ """Cross-search ALL memory using FTS5: learnings, decisions, changes, diary, followups, entities, .md files.
2155
+
2156
+ Returns up to 20 results ranked by relevance (FTS5 bm25).
2157
+ Falls back to LIKE-based search if FTS fails.
2158
+ """
2159
+ # Try FTS5 first (fast, ranked), then filter by days
2160
+ results = fts_search(query, limit=40) # fetch extra to allow filtering
2141
2161
  if results:
2142
2162
  cutoff_epoch = now_epoch() - (days * 86400)
2143
2163
  filtered = []
@@ -2146,9 +2166,12 @@ def recall(query: str, days: int = 30) -> list[dict]:
2146
2166
  if not ua:
2147
2167
  filtered.append(r)
2148
2168
  continue
2169
+ # Normalize to epoch for comparison
2149
2170
  try:
2150
2171
  if ua[0].isdigit() and ('.' in ua or len(ua) > 12):
2172
+ # Could be epoch float or ISO date
2151
2173
  if '-' in ua[:5]:
2174
+ # ISO datetime like "2026-03-13 16:17:40"
2152
2175
  dt = datetime.datetime.fromisoformat(ua.replace(' ', 'T'))
2153
2176
  ts = dt.timestamp()
2154
2177
  else:
@@ -2158,10 +2181,11 @@ def recall(query: str, days: int = 30) -> list[dict]:
2158
2181
  if ts >= cutoff_epoch:
2159
2182
  filtered.append(r)
2160
2183
  except (ValueError, TypeError):
2161
- filtered.append(r)
2184
+ filtered.append(r) # keep if can't parse
2162
2185
  if filtered:
2163
2186
  return filtered[:20]
2164
2187
 
2188
+ # Fallback to old LIKE-based search
2165
2189
  days = max(1, int(days))
2166
2190
  conn = get_db()
2167
2191
  cutoff_dt = datetime.datetime.now() - datetime.timedelta(days=days)
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env python3
2
+ """NEXO Auto-Capture Hook — Extract facts from conversation context.
3
+
4
+ Inspired by claude-mem's observation handler and transcript processor.
5
+ Uses simple heuristics (no LLM) to extract decisions, corrections,
6
+ and explicit facts from conversation messages.
7
+
8
+ Can be called:
9
+ - Programmatically via process_conversation()
10
+ - From Claude Code hooks via stdin (pipe conversation lines)
11
+ - As CLI: python3 auto_capture.py "message1" "message2" ...
12
+
13
+ Stores extracted facts via cognitive.ingest() with appropriate tags.
14
+ """
15
+
16
+ import re
17
+ import sys
18
+ from pathlib import Path
19
+
20
+ # Add nexo-mcp to path for cognitive imports
21
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
22
+ import cognitive
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Pattern definitions (adapted from claude-mem's transcript processor
27
+ # and ShieldCortex's pattern groups approach)
28
+ # ---------------------------------------------------------------------------
29
+
30
+ # Decision patterns — lines indicating a choice was made
31
+ _DECISION_PATTERNS = [
32
+ re.compile(r'\b(?:decided|agreed|will do|changed to|switching to|going with|chose|chosen|opted for)\b', re.IGNORECASE),
33
+ re.compile(r'\b(?:let\'?s go with|the plan is|we\'?ll use|moving forward with)\b', re.IGNORECASE),
34
+ re.compile(r'\b(?:approved|confirmed|locked in|finalized)\b', re.IGNORECASE),
35
+ re.compile(r'\b(?:decidido|acordado|vamos con|cambiamos a|elegimos)\b', re.IGNORECASE), # Spanish
36
+ ]
37
+
38
+ # Correction patterns — lines indicating something was wrong
39
+ _CORRECTION_PATTERNS = [
40
+ re.compile(r'\b(?:don\'?t|stop|wrong|incorrect|that\'?s not right|fix this)\b', re.IGNORECASE),
41
+ re.compile(r'\b(?:should be|actually|not that|the correct|mistake|error)\b', re.IGNORECASE),
42
+ re.compile(r'\b(?:never do that|wrong approach|that broke|revert)\b', re.IGNORECASE),
43
+ re.compile(r'\b(?:no,\s|nope|mal|otra vez|ya te dije|no es|est[aá] mal)\b', re.IGNORECASE), # Spanish
44
+ ]
45
+
46
+ # Explicit fact patterns — user explicitly asks to remember something
47
+ _EXPLICIT_PATTERNS = [
48
+ re.compile(r'\b(?:remember|note that|important:|keep in mind|don\'?t forget)\b', re.IGNORECASE),
49
+ re.compile(r'\b(?:for future reference|take note|key point|rule:)\b', re.IGNORECASE),
50
+ re.compile(r'\b(?:recuerda|importante:|ten en cuenta|no olvides|regla:)\b', re.IGNORECASE), # Spanish
51
+ ]
52
+
53
+ # Minimum line length to consider (skip very short lines)
54
+ _MIN_LINE_LENGTH = 15
55
+
56
+ # Maximum fact content length
57
+ _MAX_FACT_LENGTH = 500
58
+
59
+
60
+ def _classify_line(line: str) -> list[tuple[str, str]]:
61
+ """Classify a single line into fact types.
62
+
63
+ Returns list of (fact_type, content) tuples. A line can match
64
+ multiple categories.
65
+ """
66
+ line = line.strip()
67
+ if len(line) < _MIN_LINE_LENGTH:
68
+ return []
69
+
70
+ facts = []
71
+
72
+ for pattern in _DECISION_PATTERNS:
73
+ if pattern.search(line):
74
+ facts.append(("decision", line))
75
+ break
76
+
77
+ for pattern in _CORRECTION_PATTERNS:
78
+ if pattern.search(line):
79
+ facts.append(("correction", line))
80
+ break
81
+
82
+ for pattern in _EXPLICIT_PATTERNS:
83
+ if pattern.search(line):
84
+ facts.append(("explicit", line))
85
+ break
86
+
87
+ return facts
88
+
89
+
90
+ def process_conversation(messages: list[str]) -> dict:
91
+ """Process conversation messages and extract key facts.
92
+
93
+ Adapted from claude-mem's TranscriptEventProcessor: scans each message
94
+ line for decision, correction, and explicit fact patterns. Stores
95
+ extracted facts via cognitive.ingest() with source_type='auto_capture'.
96
+
97
+ Args:
98
+ messages: List of conversation message strings
99
+
100
+ Returns:
101
+ Dict with facts_extracted, decisions, corrections, stored,
102
+ rejected_by_gate counts and extracted_facts details.
103
+ """
104
+ all_facts = []
105
+ decisions = 0
106
+ corrections = 0
107
+ explicits = 0
108
+
109
+ for msg in messages:
110
+ # Split message into lines and classify each
111
+ for line in msg.split("\n"):
112
+ classified = _classify_line(line)
113
+ for fact_type, content in classified:
114
+ if fact_type == "decision":
115
+ decisions += 1
116
+ elif fact_type == "correction":
117
+ corrections += 1
118
+ elif fact_type == "explicit":
119
+ explicits += 1
120
+ all_facts.append((fact_type, content[:_MAX_FACT_LENGTH]))
121
+
122
+ # Deduplicate by content (same line might appear in multiple messages)
123
+ seen = set()
124
+ unique_facts = []
125
+ for fact_type, content in all_facts:
126
+ content_key = content.lower().strip()
127
+ if content_key not in seen:
128
+ seen.add(content_key)
129
+ unique_facts.append((fact_type, content))
130
+
131
+ # Store via cognitive.ingest()
132
+ stored = 0
133
+ rejected_by_gate = 0
134
+ extracted_details = []
135
+
136
+ for fact_type, content in unique_facts:
137
+ # Build tagged content for better retrieval
138
+ tagged_content = f"[{fact_type.upper()}] {content}"
139
+
140
+ result_id = cognitive.ingest(
141
+ content=tagged_content,
142
+ source_type="auto_capture",
143
+ source_id=f"hook_{fact_type}",
144
+ source_title=f"Auto-captured {fact_type}",
145
+ domain="conversation",
146
+ source="agent_observation",
147
+ skip_quarantine=False, # Route through quarantine for safety
148
+ bypass_gate=False, # Let prediction error gate filter duplicates
149
+ )
150
+
151
+ if result_id == 0:
152
+ rejected_by_gate += 1
153
+ else:
154
+ stored += 1
155
+
156
+ extracted_details.append({
157
+ "type": fact_type,
158
+ "content": content[:100],
159
+ "stored": result_id != 0,
160
+ "memory_id": result_id,
161
+ })
162
+
163
+ return {
164
+ "facts_extracted": len(unique_facts),
165
+ "decisions": decisions,
166
+ "corrections": corrections,
167
+ "explicits": explicits,
168
+ "stored": stored,
169
+ "rejected_by_gate": rejected_by_gate,
170
+ "extracted_facts": extracted_details,
171
+ }
172
+
173
+
174
+ def _read_stdin() -> list[str]:
175
+ """Read conversation lines from stdin (for hook integration)."""
176
+ if sys.stdin.isatty():
177
+ return []
178
+ return [line for line in sys.stdin.read().strip().split("\n") if line.strip()]
179
+
180
+
181
+ def main():
182
+ """CLI entry point — accepts messages as args or from stdin.
183
+
184
+ Usage:
185
+ echo "We decided to use PostgreSQL" | python3 auto_capture.py
186
+ python3 auto_capture.py "Remember: always use WAL mode" "That's wrong, fix it"
187
+ """
188
+ messages = list(sys.argv[1:]) if len(sys.argv) > 1 else _read_stdin()
189
+
190
+ if not messages:
191
+ print("Usage: python3 auto_capture.py 'message1' 'message2' ...")
192
+ print(" or: echo 'messages' | python3 auto_capture.py")
193
+ sys.exit(1)
194
+
195
+ result = process_conversation(messages)
196
+ print(f"Facts extracted: {result['facts_extracted']}")
197
+ print(f" Decisions: {result['decisions']}")
198
+ print(f" Corrections: {result['corrections']}")
199
+ print(f" Explicits: {result['explicits']}")
200
+ print(f"Stored: {result['stored']}, Rejected by gate: {result['rejected_by_gate']}")
201
+
202
+ for fact in result["extracted_facts"]:
203
+ status = "STORED" if fact["stored"] else "REJECTED"
204
+ print(f" [{status}] [{fact['type']}] {fact['content']}")
205
+
206
+
207
+ if __name__ == "__main__":
208
+ main()