get-claudia 1.55.17 → 1.55.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  All notable changes to Claudia will be documented in this file.
4
4
 
5
+ ## 1.55.18 (2026-03-19)
6
+
7
+ ### Data Quality & Python Compatibility
8
+
9
+ Six fixes addressing two community discussions. All additive, no schema changes, no new pip dependencies.
10
+
11
+ - **Briefing counts now exclude invalidated records** -- The `memory.briefing` MCP tool was counting soft-deleted commitments (58 shown when 14 active). Added `AND invalidated_at IS NULL` to both commitment count queries and `AND deleted_at IS NULL` to the cooling relationships query. The first numbers users see at session start are now accurate. (Discussion #25)
12
+ - **Entity type inference from name keywords** -- New `_infer_entity_type()` function detects organizational keywords (Inc, LLC, Corp, University), project keywords (Project, Sprint, MVP), concept keywords (methodology, framework), and location keywords (Office, HQ) in entity names. "Acme Corp" now creates an organization entity, not a person. Only runs when creating new entities; explicit types and existing entities are never overridden. (Discussion #25)
13
+ - **Consolidation fuzzy name dedup** -- Added Method 3 to overnight dedup: SequenceMatcher fuzzy name comparison across same-type entity pairs. Catches typo variants like "Kris Krisko" vs "Kris Krisco" (>= 0.90 similarity) and prefix matches like "Sarah" vs "Sarah Johnson". Advisory only: stores candidates in predictions table for user review, never auto-merges. Runs even without sqlite-vec. (Discussion #25)
14
+ - **Wildcard entity search works** -- `memory.entities(query="*")` now returns all non-deleted entities instead of nothing. The `*` was being wrapped in `LIKE "%*%"`, matching the literal asterisk. Also added `AND deleted_at IS NULL` to all search paths. (Discussion #25)
15
+ - **Installer prefers Python < 3.14** -- Both `install.sh` and `bin/index.js` now try Python 3.13, 3.12, and 3.11 before falling back to 3.14+. Previously, systems with only 3.12 and 3.14 installed (no 3.13) would get a 3.14 venv, causing spaCy to fail with a Pydantic V1 ConfigError. The daemon still works on 3.14 (graceful degradation to regex-only extraction), but prefers < 3.14 when available. (Discussion #29)
16
+ - **Python version cap** -- `requires-python` in pyproject.toml now caps at `<3.14` until spaCy (blis wheels), pydantic-core (PyO3 ceiling), and numpy (cp314 wheels) ship 3.14 support.
17
+ - 706 tests pass, 0 regressions, 46 new tests across 4 new test files.
18
+
5
19
  ## 1.55.17 (2026-03-18)
6
20
 
7
21
  ### The Quiet Observer
package/bin/index.js CHANGED
@@ -347,7 +347,21 @@ async function installOllama() {
347
347
 
348
348
  /** Check if Python 3.10+ is available. Returns the command name or null. */
349
349
  async function isPythonInstalled() {
350
- for (const cmd of ['python3', 'python']) {
350
+ // Prefer Python < 3.14 (spaCy/pydantic-core don't support 3.14 yet)
351
+ // Try versioned binaries first (3.13, 3.12, 3.11), then unversioned python3
352
+ const candidates = [
353
+ 'python3.13', 'python3.12', 'python3.11', // Versioned: guaranteed < 3.14
354
+ 'python3', 'python', // Unversioned: check version
355
+ ];
356
+ // On macOS, also check Homebrew paths explicitly
357
+ if (process.platform === 'darwin') {
358
+ candidates.unshift(
359
+ '/opt/homebrew/bin/python3.13', '/opt/homebrew/bin/python3.12', '/opt/homebrew/bin/python3.11',
360
+ '/usr/local/bin/python3.13', '/usr/local/bin/python3.12', '/usr/local/bin/python3.11',
361
+ );
362
+ }
363
+ let fallback314 = null;
364
+ for (const cmd of candidates) {
351
365
  const ver = await new Promise((resolve) => {
352
366
  const proc = spawn(cmd, ['--version'], { stdio: 'pipe', timeout: 5000 });
353
367
  let stdout = '';
@@ -356,11 +370,15 @@ async function isPythonInstalled() {
356
370
  proc.on('error', () => resolve(''));
357
371
  });
358
372
  const match = ver.match(/Python (\d+)\.(\d+)/);
359
- if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 10))) {
360
- return cmd;
373
+ if (match) {
374
+ const major = parseInt(match[1]);
375
+ const minor = parseInt(match[2]);
376
+ if (major === 3 && minor >= 10 && minor < 14) return cmd;
377
+ // Remember 3.14+ as fallback (daemon works, just no spaCy)
378
+ if (major === 3 && minor >= 14 && !fallback314) fallback314 = cmd;
361
379
  }
362
380
  }
363
- return null;
381
+ return fallback314;
364
382
  }
365
383
 
366
384
  /**
@@ -373,7 +373,7 @@ async def _handle_entities(arguments, db, config, logger, **ctx):
373
373
  )
374
374
  entity_id = remember_entity(
375
375
  name=name_val,
376
- entity_type=arguments.get("type", "person"),
376
+ entity_type=arguments.get("type", ""),
377
377
  description=arguments.get("description"),
378
378
  aliases=arguments.get("aliases"),
379
379
  )
@@ -1136,7 +1136,7 @@ async def _handle_batch(arguments, db, config, logger, **ctx):
1136
1136
  if op_type == "entity":
1137
1137
  entity_id = remember_entity(
1138
1138
  name=op["name"],
1139
- entity_type=op.get("type", "person"),
1139
+ entity_type=op.get("type", ""),
1140
1140
  description=op.get("description"),
1141
1141
  aliases=op.get("aliases"),
1142
1142
  _precomputed_embedding=embeddings_map.get(i),
@@ -3248,14 +3248,14 @@ def _build_briefing() -> str:
3248
3248
  # 1. Active commitments count + stale count
3249
3249
  try:
3250
3250
  total_row = db.execute(
3251
- "SELECT COUNT(*) as cnt FROM memories WHERE type = 'commitment' AND importance > 0.1",
3251
+ "SELECT COUNT(*) as cnt FROM memories WHERE type = 'commitment' AND importance > 0.1 AND invalidated_at IS NULL",
3252
3252
  fetch=True,
3253
3253
  )
3254
3254
  total_commitments = total_row[0]["cnt"] if total_row else 0
3255
3255
 
3256
3256
  stale_cutoff = (datetime.utcnow() - timedelta(days=7)).isoformat()
3257
3257
  stale_row = db.execute(
3258
- "SELECT COUNT(*) as cnt FROM memories WHERE type = 'commitment' AND importance > 0.1 AND created_at < ?",
3258
+ "SELECT COUNT(*) as cnt FROM memories WHERE type = 'commitment' AND importance > 0.1 AND invalidated_at IS NULL AND created_at < ?",
3259
3259
  (stale_cutoff,),
3260
3260
  fetch=True,
3261
3261
  )
@@ -3274,6 +3274,7 @@ def _build_briefing() -> str:
3274
3274
  """
3275
3275
  SELECT COUNT(*) as cnt FROM entities
3276
3276
  WHERE type = 'person' AND importance > 0.3
3277
+ AND deleted_at IS NULL
3277
3278
  AND updated_at < ?
3278
3279
  """,
3279
3280
  (cooling_cutoff,),
@@ -2503,49 +2503,105 @@ class ConsolidateService:
2503
2503
  "method": "alias_overlap",
2504
2504
  "shared_alias": row["alias"],
2505
2505
  })
2506
+ except Exception as e:
2507
+ logger.debug(f"Alias overlap dedupe failed: {e}")
2506
2508
 
2507
- # Store top candidates as predictions for user review
2508
- now = datetime.utcnow()
2509
- for candidate in candidates[:10]:
2510
- content = (
2511
- f"Possible duplicate entities: '{candidate['entity_1']['name']}' "
2512
- f"and '{candidate['entity_2']['name']}' "
2513
- f"({candidate['similarity']:.0%} similar via {candidate['method']}). "
2514
- f"Consider merging with memory.merge_entities."
2515
- )
2516
- # Check for existing dedupe prediction
2517
- existing = self.db.execute(
2518
- """
2519
- SELECT id FROM predictions
2520
- WHERE prediction_type = 'suggestion'
2521
- AND metadata LIKE ?
2522
- AND expires_at > ?
2523
- LIMIT 1
2524
- """,
2525
- (f'%"dedupe_pair": [{candidate["entity_1"]["id"]}, {candidate["entity_2"]["id"]}]%',
2526
- now.isoformat()),
2527
- fetch=True,
2528
- )
2509
+ # Method 3: Fuzzy name comparison (SequenceMatcher)
2510
+ # Catches typo variants and prefix matches that embeddings and aliases miss.
2511
+ # Runs even without sqlite-vec. Advisory only: never auto-merges.
2512
+ try:
2513
+ from difflib import SequenceMatcher
2529
2514
 
2530
- if not existing:
2531
- self.db.insert(
2532
- "predictions",
2533
- {
2534
- "content": content,
2535
- "prediction_type": "suggestion",
2536
- "priority": 0.6 + 0.3 * candidate["similarity"],
2537
- "expires_at": (now + timedelta(days=14)).isoformat(),
2538
- "created_at": now.isoformat(),
2539
- "metadata": json.dumps({
2540
- "dedupe_pair": [candidate["entity_1"]["id"], candidate["entity_2"]["id"]],
2541
- "similarity": candidate["similarity"],
2542
- "method": candidate["method"],
2543
- }),
2544
- },
2545
- )
2515
+ all_entities = self.db.execute(
2516
+ """
2517
+ SELECT id, name, canonical_name, type
2518
+ FROM entities
2519
+ WHERE deleted_at IS NULL AND importance > 0.05
2520
+ ORDER BY type, canonical_name
2521
+ """,
2522
+ fetch=True,
2523
+ ) or []
2524
+
2525
+ # Group by type for same-type comparison only
2526
+ by_type: dict = {}
2527
+ for ent in all_entities:
2528
+ by_type.setdefault(ent["type"], []).append(ent)
2529
+
2530
+ for etype, group in by_type.items():
2531
+ for i, e1 in enumerate(group):
2532
+ for e2 in group[i + 1:]:
2533
+ pair_key = (min(e1["id"], e2["id"]), max(e1["id"], e2["id"]))
2534
+ if pair_key in seen_pairs:
2535
+ continue
2546
2536
 
2537
+ cn1 = e1["canonical_name"]
2538
+ cn2 = e2["canonical_name"]
2539
+
2540
+ # Fuzzy ratio check
2541
+ ratio = SequenceMatcher(None, cn1, cn2).ratio()
2542
+ if ratio >= threshold:
2543
+ seen_pairs.add(pair_key)
2544
+ candidates.append({
2545
+ "entity_1": {"id": e1["id"], "name": e1["name"], "type": e1["type"]},
2546
+ "entity_2": {"id": e2["id"], "name": e2["name"], "type": e2["type"]},
2547
+ "similarity": round(ratio, 3),
2548
+ "method": "fuzzy_name",
2549
+ })
2550
+ continue
2551
+
2552
+ # Prefix match: short name is prefix of longer name
2553
+ shorter, longer = (cn1, cn2) if len(cn1) <= len(cn2) else (cn2, cn1)
2554
+ if len(shorter) >= 3 and longer.startswith(shorter):
2555
+ if pair_key not in seen_pairs:
2556
+ seen_pairs.add(pair_key)
2557
+ candidates.append({
2558
+ "entity_1": {"id": e1["id"], "name": e1["name"], "type": e1["type"]},
2559
+ "entity_2": {"id": e2["id"], "name": e2["name"], "type": e2["type"]},
2560
+ "similarity": 0.80,
2561
+ "method": "fuzzy_name_prefix",
2562
+ })
2547
2563
  except Exception as e:
2548
- logger.warning(f"Auto dedupe failed: {e}")
2564
+ logger.debug(f"Fuzzy name dedupe failed: {e}")
2565
+
2566
+ # Store top candidates as predictions for user review
2567
+ now = datetime.utcnow()
2568
+ for candidate in candidates[:10]:
2569
+ content = (
2570
+ f"Possible duplicate entities: '{candidate['entity_1']['name']}' "
2571
+ f"and '{candidate['entity_2']['name']}' "
2572
+ f"({candidate['similarity']:.0%} similar via {candidate['method']}). "
2573
+ f"Consider merging with memory.merge_entities."
2574
+ )
2575
+ # Check for existing dedupe prediction
2576
+ existing = self.db.execute(
2577
+ """
2578
+ SELECT id FROM predictions
2579
+ WHERE prediction_type = 'suggestion'
2580
+ AND metadata LIKE ?
2581
+ AND expires_at > ?
2582
+ LIMIT 1
2583
+ """,
2584
+ (f'%"dedupe_pair": [{candidate["entity_1"]["id"]}, {candidate["entity_2"]["id"]}]%',
2585
+ now.isoformat()),
2586
+ fetch=True,
2587
+ )
2588
+
2589
+ if not existing:
2590
+ self.db.insert(
2591
+ "predictions",
2592
+ {
2593
+ "content": content,
2594
+ "prediction_type": "suggestion",
2595
+ "priority": 0.6 + 0.3 * candidate["similarity"],
2596
+ "expires_at": (now + timedelta(days=14)).isoformat(),
2597
+ "created_at": now.isoformat(),
2598
+ "metadata": json.dumps({
2599
+ "dedupe_pair": [candidate["entity_1"]["id"], candidate["entity_2"]["id"]],
2600
+ "similarity": candidate["similarity"],
2601
+ "method": candidate["method"],
2602
+ }),
2603
+ },
2604
+ )
2549
2605
 
2550
2606
  if candidates:
2551
2607
  logger.info(f"Found {len(candidates)} potential entity duplicates")
@@ -971,9 +971,9 @@ class RecallService:
971
971
  Returns:
972
972
  List of matching entities
973
973
  """
974
- canonical = self.extractor.canonical_name(query)
974
+ is_wildcard = not query or query.strip() in ("*", "")
975
975
 
976
- # Try exact match first
976
+ # Base query with joins for counts
977
977
  sql = """
978
978
  SELECT e.*,
979
979
  COUNT(DISTINCT me.memory_id) as memory_count,
@@ -983,9 +983,14 @@ class RecallService:
983
983
  LEFT JOIN memory_entities me ON e.id = me.entity_id
984
984
  LEFT JOIN memories m ON me.memory_id = m.id
985
985
  LEFT JOIN relationships r ON e.id = r.source_entity_id OR e.id = r.target_entity_id
986
- WHERE e.canonical_name LIKE ? OR e.name LIKE ?
986
+ WHERE e.deleted_at IS NULL
987
987
  """
988
- params = [f"%{canonical}%", f"%{query}%"]
988
+ params = []
989
+
990
+ if not is_wildcard:
991
+ canonical = self.extractor.canonical_name(query)
992
+ sql += " AND (e.canonical_name LIKE ? OR e.name LIKE ?)"
993
+ params.extend([f"%{canonical}%", f"%{query}%"])
989
994
 
990
995
  if entity_types:
991
996
  placeholders = ", ".join(["?" for _ in entity_types])
@@ -56,6 +56,47 @@ def _strip_private(content: str) -> str:
56
56
  return stripped if stripped else content
57
57
 
58
58
 
59
+ def _infer_entity_type(name: str) -> str:
60
+ """Infer entity type from name keywords when no explicit type is provided.
61
+
62
+ Checks for organizational, project, concept, and location keywords.
63
+ Returns 'person' as fallback when no keywords match.
64
+ Conservative: only clear keyword signals trigger inference.
65
+ """
66
+ lower = name.lower()
67
+ words = lower.split()
68
+
69
+ # Location keywords (checked first: "Company HQ" is a location, not an org)
70
+ location_keywords = ["office", "hq", "headquarters", "campus", "building"]
71
+ for kw in location_keywords:
72
+ if kw in words:
73
+ return "location"
74
+
75
+ # Organization keywords (check as whole words)
76
+ org_keywords = [
77
+ "inc", "llc", "corp", "corporation", "foundation", "university",
78
+ "lab", "labs", "ltd", "gmbh", "company", "institute", "associates",
79
+ "group", "partners",
80
+ ]
81
+ for kw in org_keywords:
82
+ if kw in words:
83
+ return "organization"
84
+
85
+ # Project keywords
86
+ project_keywords = ["project", "sprint", "mvp", "initiative", "campaign"]
87
+ for kw in project_keywords:
88
+ if kw in words:
89
+ return "project"
90
+
91
+ # Concept keywords
92
+ concept_keywords = ["methodology", "framework", "theory", "protocol", "strategy"]
93
+ for kw in concept_keywords:
94
+ if kw in words:
95
+ return "concept"
96
+
97
+ return "person"
98
+
99
+
59
100
  class RememberService:
60
101
  """Store and manage memories"""
61
102
 
@@ -360,7 +401,7 @@ class RememberService:
360
401
  def remember_entity(
361
402
  self,
362
403
  name: str,
363
- entity_type: str = "person",
404
+ entity_type: str = "",
364
405
  description: Optional[str] = None,
365
406
  aliases: Optional[List[str]] = None,
366
407
  metadata: Optional[Dict] = None,
@@ -379,6 +420,10 @@ class RememberService:
379
420
  Returns:
380
421
  Entity ID
381
422
  """
423
+ # Infer type from name keywords when no type is specified
424
+ if not entity_type or not entity_type.strip():
425
+ entity_type = _infer_entity_type(name)
426
+
382
427
  # Run deterministic guards
383
428
  existing_names = [
384
429
  row["canonical_name"]
@@ -1734,7 +1779,7 @@ class RememberService:
1734
1779
  entity_type=extracted.type,
1735
1780
  )
1736
1781
 
1737
- def _find_or_create_entity(self, name: str, entity_type: str = "person") -> Optional[int]:
1782
+ def _find_or_create_entity(self, name: str, entity_type: str = "") -> Optional[int]:
1738
1783
  """Find entity by name or create if not exists"""
1739
1784
  canonical = self.extractor.canonical_name(name)
1740
1785
 
@@ -11,7 +11,7 @@ license = {text = "Apache-2.0"}
11
11
  authors = [
12
12
  {name = "Kamil Banc"}
13
13
  ]
14
- requires-python = ">=3.10"
14
+ requires-python = ">=3.10,<3.14" # 3.14 blocked: spaCy/pydantic-core/numpy lack wheels
15
15
  classifiers = [
16
16
  "Development Status :: 4 - Beta",
17
17
  "Intended Audience :: Developers",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "get-claudia",
3
- "version": "1.55.17",
3
+ "version": "1.55.18",
4
4
  "description": "An AI assistant who learns how you work.",
5
5
  "keywords": [
6
6
  "claudia",