superlocalmemory 3.4.8 → 3.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,9 @@ _PLACE_MARKERS = ("City", "State", "County", "Island", "River", "Mountain",
52
52
  "Lake", "Park", "Street", "Avenue", "Road", "District")
53
53
  _EVENT_MARKERS = ("Festival", "Conference", "Summit", "Workshop", "Meeting",
54
54
  "Election", "War", "Match", "Game", "Concert", "Wedding")
55
+ # v3.4.10: Skill entity type — skills, commands, agents, plugins
56
+ _SKILL_MARKERS = ("skill", "command", "agent", "plugin", "hook", "mcp")
57
+ _SKILL_NAMESPACE_RE = re.compile(r"^[\w-]+:[\w-]+$") # e.g., "superpowers:brainstorming"
55
58
 
56
59
 
57
60
  # ---------------------------------------------------------------------------
@@ -113,25 +116,67 @@ def jaro_winkler(s1: str, s2: str, prefix_weight: float = 0.1) -> float:
113
116
 
114
117
 
115
118
  _COMMON_WORDS = frozenset({
116
- "april", "may", "june", "march", "august", "phase", "test", "gap",
117
- "dashboard", "remaining", "session", "results", "tools", "projects",
118
- "prompts", "integration", "cli", "engagement", "mode", "error",
119
- "step", "fix", "build", "check", "run", "start", "stop", "config",
120
- "status", "version", "query", "data", "file", "path", "node", "edge",
121
- "table", "index", "schema", "model", "type", "class", "function",
122
- "module", "package", "import", "export", "default", "pattern",
119
+ # Months / time words (biggest source of garbage entities)
120
+ "january", "february", "march", "april", "may", "june", "july",
121
+ "august", "september", "october", "november", "december",
122
+ "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday",
123
+ "today", "tomorrow", "yesterday", "morning", "evening", "night",
124
+ # English stop words that get capitalized at sentence start
125
+ "a", "an", "the", "all", "not", "no", "yes", "and", "or", "but",
126
+ "if", "is", "are", "was", "were", "be", "been", "being", "have",
127
+ "has", "had", "do", "does", "did", "will", "would", "shall", "should",
128
+ "can", "could", "just", "also", "only", "very", "too", "so", "then",
129
+ "than", "that", "this", "these", "those", "here", "there", "where",
130
+ "when", "what", "which", "who", "whom", "how", "why", "each", "every",
131
+ "both", "few", "more", "most", "other", "some", "such", "any", "many",
132
+ "much", "own", "same", "new", "old", "first", "last", "next", "now",
133
+ "still", "already", "always", "never", "often", "sometimes", "about",
134
+ "above", "after", "again", "against", "along", "among", "around",
135
+ "before", "below", "between", "beyond", "during", "except", "from",
136
+ "into", "near", "off", "onto", "out", "over", "past", "since",
137
+ "through", "under", "until", "upon", "with", "within", "without",
138
+ # Technical stop words (common in dev sessions)
139
+ "phase", "test", "gap", "dashboard", "remaining", "session", "results",
140
+ "tools", "projects", "prompts", "integration", "cli", "engagement",
141
+ "mode", "error", "step", "fix", "build", "check", "run", "start",
142
+ "stop", "config", "status", "version", "query", "data", "file", "path",
143
+ "node", "edge", "table", "index", "schema", "model", "type", "class",
144
+ "function", "module", "package", "import", "export", "default", "pattern",
123
145
  "memory", "profile", "context", "pipeline", "worker", "daemon",
124
146
  "server", "client", "route", "endpoint", "handler", "hook",
147
+ "feature", "release", "update", "upgrade", "deploy", "debug", "log",
148
+ "output", "input", "key", "value", "true", "false", "null", "none",
149
+ "ready", "done", "todo", "complete", "pending", "active", "failed",
150
+ "success", "warning", "critical", "high", "medium", "low",
151
+ "total", "count", "list", "item", "entry", "record", "row", "column",
152
+ "source", "target", "origin", "destination", "backup", "restore",
153
+ "create", "read", "delete", "remove", "add", "set", "get", "put",
154
+ "push", "pull", "fetch", "send", "receive", "request", "response",
155
+ "enable", "disable", "open", "close", "load", "save", "reset",
156
+ # Abstract nouns often misclassified as people
157
+ "completeness", "correctness", "limitations", "requirements",
158
+ "dependencies", "performance", "security", "quality", "coverage",
159
+ "progress", "analysis", "research", "implementation", "verification",
160
+ "overview", "summary", "details", "notes", "changes", "issues",
161
+ "approach", "strategy", "solution", "problem", "question", "answer",
125
162
  })
126
163
 
127
164
 
128
165
  def _guess_entity_type(name: str) -> str:
129
166
  """Heuristic entity type classification from name string.
130
167
 
131
- v3.4.8: Fixed false-positive "person" classification. Single capitalized
132
- common words (April, Phase, Dashboard) are concepts, not people.
133
- Only classify as "person" when it looks like a real human name.
168
+ v3.4.10: Aggressive false-positive prevention. "person" is assigned ONLY
169
+ when the name looks like a real human name (2-3 capitalized words, none
170
+ in the stop list). Everything else defaults to "concept".
134
171
  """
172
+ # Reject very short or very long names
173
+ if len(name) <= 2 or len(name) > 100:
174
+ return "concept"
175
+
176
+ # Reject pure numbers, dates, version strings
177
+ if re.match(r"^[\d.v\-/]+$", name):
178
+ return "concept"
179
+
135
180
  if any(m in name for m in _ORG_MARKERS):
136
181
  return "organization"
137
182
  if any(m in name for m in _PLACE_MARKERS):
@@ -139,32 +184,37 @@ def _guess_entity_type(name: str) -> str:
139
184
  if any(m in name for m in _EVENT_MARKERS):
140
185
  return "event"
141
186
 
142
- # Filter out common words that aren't people
143
- if name.lower() in _COMMON_WORDS:
187
+ # v3.4.10: Skill entities namespaced skills or skill-related terms
188
+ if _SKILL_NAMESPACE_RE.match(name):
189
+ return "skill"
190
+ name_lower = name.lower()
191
+ if any(m in name_lower for m in _SKILL_MARKERS):
192
+ return "skill"
193
+
194
+ # Check ALL words against the stop list (not just the full name)
195
+ words = name.lower().split()
196
+ if any(w in _COMMON_WORDS for w in words):
144
197
  return "concept"
145
198
 
146
- # Two capitalized words = likely a person name (e.g. "Varun Bhardwaj")
147
- if re.match(r"^[A-Z][a-z]+ [A-Z][a-z]+$", name):
148
- # But not if either word is a common term
149
- parts = name.lower().split()
150
- if not any(p in _COMMON_WORDS for p in parts):
199
+ # Multi-word entity: "person" only if 2-3 capitalized words, no stop words
200
+ if re.match(r"^[A-Z][a-z]+ [A-Z][a-z]+( [A-Z][a-z]+)?$", name):
201
+ if not any(p in _COMMON_WORDS for p in words):
151
202
  return "person"
152
203
 
153
- # Single short capitalized word with no digits or dots = concept, not person
154
- # "person" should only be assigned for real names, not generic terms
204
+ # Single capitalized word almost never a person in our context
205
+ # Only known first names should get "person" but we can't maintain
206
+ # a name dictionary, so default to "concept"
155
207
  if re.match(r"^[A-Z][a-z]+$", name):
156
- if name.lower() in _COMMON_WORDS:
157
- return "concept"
158
- # Only classify as person if it's a plausible first name
159
- # (short word not in common terms — still a heuristic)
160
- if len(name) <= 3:
161
- return "concept"
162
- return "person"
163
-
164
- # Contains dots/slashes/hyphens = likely a technical term
208
+ return "concept"
209
+
210
+ # Contains dots/slashes/hyphens/underscores = technical term
165
211
  if re.search(r"[./\-_]", name):
166
212
  return "concept"
167
213
 
214
+ # ALL-CAPS or mixed case with numbers = technical/concept
215
+ if re.match(r"^[A-Z]+$", name) or re.search(r"\d", name):
216
+ return "concept"
217
+
168
218
  return "concept"
169
219
 
170
220
 
@@ -211,6 +261,23 @@ class EntityResolver:
211
261
  if not name or name.lower() in PRONOUNS:
212
262
  continue
213
263
 
264
+ # Skip very short/long entities
265
+ if len(name) <= 2 or len(name) > 100:
266
+ continue
267
+
268
+ # Skip single-word stop words
269
+ words = name.lower().split()
270
+ if len(words) == 1 and name.lower() in _COMMON_WORDS:
271
+ continue
272
+
273
+ # Skip multi-word entities where ALL words are stop words or <=2 chars
274
+ if len(words) > 1 and all(w in _COMMON_WORDS or len(w) <= 2 for w in words):
275
+ continue
276
+
277
+ # Skip pure numbers/versions
278
+ if re.match(r"^[\d.v\-/]+$", name):
279
+ continue
280
+
214
281
  # Tier a: exact match on canonical_name
215
282
  entity = self._db.get_entity_by_name(name, profile_id)
216
283
  if entity is not None:
@@ -34,6 +34,26 @@ DEFAULT_INTERVAL_HOURS = 168 # 7 days
34
34
  DEFAULT_MAX_BACKUPS = 10
35
35
  MIN_INTERVAL_HOURS = 1
36
36
 
37
+ # ---------------------------------------------------------------------------
38
+ # SLM Managed Database Registry
39
+ # ---------------------------------------------------------------------------
40
+ # Every database that SLM creates and manages. The backup system backs up
41
+ # ONLY these databases — nothing else. When a new SLM module creates a new
42
+ # database file, add it here so it gets included in backups.
43
+ #
44
+ # Each user may have a different subset (e.g., some don't have code_graph.db
45
+ # if they never used the code graph feature). The backup system checks which
46
+ # ones exist and only backs up what's present.
47
+
48
+ MANAGED_DATABASES: tuple[str, ...] = (
49
+ "memory.db", # Core: facts, entities, graph, embeddings, sessions
50
+ "learning.db", # Learning pipeline: signals, patterns, ranker
51
+ "audit_chain.db", # Audit trail: compliance, provenance chain
52
+ "code_graph.db", # Code knowledge graph: symbols, references
53
+ "pending.db", # Pending operations queue
54
+ "audit.db", # Legacy audit (pre-v3.4)
55
+ )
56
+
37
57
 
38
58
  class BackupManager:
39
59
  """Automated backup manager for SuperLocalMemory V3.
@@ -169,8 +189,8 @@ class BackupManager:
169
189
  self._save_config()
170
190
  logger.info("Backup created: %s (%.1f MB)", backup_name, size_mb)
171
191
 
172
- # Also backup learning.db if present
173
- self._backup_learning_db(timestamp, suffix)
192
+ # v3.4.10: Backup ALL .db files in the SLM directory
193
+ self._backup_all_dbs(timestamp, suffix)
174
194
 
175
195
  self._enforce_retention()
176
196
  return backup_name
@@ -181,29 +201,52 @@ class BackupManager:
181
201
  backup_path.unlink()
182
202
  return ""
183
203
 
184
- def _backup_learning_db(self, timestamp: str, suffix: str) -> None:
185
- """Best-effort backup of ``learning.db`` alongside the main DB."""
186
- learning_db = self.db_path.parent / "learning.db"
187
- if not learning_db.exists():
188
- return
189
- try:
190
- name = f"learning-{timestamp}{suffix}.db"
191
- path = self.backup_dir / name
192
- src = sqlite3.connect(str(learning_db))
193
- dst = sqlite3.connect(str(path))
204
+ def _backup_all_dbs(self, timestamp: str, suffix: str) -> None:
205
+ """Backup all SLM-managed databases alongside the main memory.db.
206
+
207
+ Uses the managed database registry — only backs up databases that
208
+ SLM knows about. Add new databases to MANAGED_DATABASES when new
209
+ modules create them.
210
+ """
211
+ slm_dir = self.db_path.parent
212
+ backed_up = 0
213
+ for db_name in MANAGED_DATABASES:
214
+ if db_name == "memory.db":
215
+ continue # Already backed up by create_backup()
216
+ db_file = slm_dir / db_name
217
+ if not db_file.exists():
218
+ continue # This user doesn't have this DB — skip
219
+
194
220
  try:
195
- src.backup(dst)
196
- finally:
197
- dst.close()
198
- src.close()
199
- logger.info("Learning backup: %s (%.1f MB)", name, path.stat().st_size / (1024 * 1024))
200
- except Exception as exc:
201
- logger.warning("Learning DB backup failed (non-critical): %s", exc)
221
+ prefix = db_file.stem
222
+ name = f"{prefix}-{timestamp}{suffix}.db"
223
+ path = self.backup_dir / name
224
+ src = sqlite3.connect(str(db_file))
225
+ dst = sqlite3.connect(str(path))
226
+ try:
227
+ src.backup(dst)
228
+ finally:
229
+ dst.close()
230
+ src.close()
231
+ backed_up += 1
232
+ logger.info(
233
+ "Backup: %s (%.1f MB)", name,
234
+ path.stat().st_size / (1024 * 1024),
235
+ )
236
+ except Exception as exc:
237
+ logger.warning(
238
+ "%s backup failed (non-critical): %s",
239
+ db_name, exc,
240
+ )
241
+ if backed_up:
242
+ logger.info("Backed up %d companion databases", backed_up)
202
243
 
203
244
  def _enforce_retention(self) -> None:
204
245
  """Remove old backups exceeding the configured max."""
205
246
  max_backups = self.config.get("max_backups", DEFAULT_MAX_BACKUPS)
206
- for pattern in ("memory-*.db", "learning-*.db"):
247
+ # Build patterns from the managed database registry
248
+ patterns = [f"{Path(db).stem}-*.db" for db in MANAGED_DATABASES]
249
+ for pattern in patterns:
207
250
  backups = sorted(
208
251
  self.backup_dir.glob(pattern),
209
252
  key=lambda f: f.stat().st_mtime,