emdash-core 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. emdash_core/agent/agents.py +9 -0
  2. emdash_core/agent/background.py +481 -0
  3. emdash_core/agent/inprocess_subagent.py +70 -1
  4. emdash_core/agent/mcp/config.py +78 -2
  5. emdash_core/agent/prompts/main_agent.py +53 -1
  6. emdash_core/agent/prompts/plan_mode.py +65 -44
  7. emdash_core/agent/prompts/subagents.py +73 -1
  8. emdash_core/agent/prompts/workflow.py +179 -28
  9. emdash_core/agent/providers/models.py +1 -1
  10. emdash_core/agent/providers/openai_provider.py +10 -0
  11. emdash_core/agent/research/researcher.py +154 -45
  12. emdash_core/agent/runner/agent_runner.py +145 -19
  13. emdash_core/agent/runner/sdk_runner.py +29 -2
  14. emdash_core/agent/skills.py +81 -1
  15. emdash_core/agent/toolkit.py +87 -11
  16. emdash_core/agent/tools/__init__.py +2 -0
  17. emdash_core/agent/tools/coding.py +344 -52
  18. emdash_core/agent/tools/lsp.py +361 -0
  19. emdash_core/agent/tools/skill.py +21 -1
  20. emdash_core/agent/tools/task.py +16 -19
  21. emdash_core/agent/tools/task_output.py +262 -32
  22. emdash_core/agent/verifier/__init__.py +11 -0
  23. emdash_core/agent/verifier/manager.py +295 -0
  24. emdash_core/agent/verifier/models.py +97 -0
  25. emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
  26. emdash_core/api/agent.py +297 -2
  27. emdash_core/api/research.py +3 -3
  28. emdash_core/api/router.py +0 -4
  29. emdash_core/context/longevity.py +197 -0
  30. emdash_core/context/providers/explored_areas.py +83 -39
  31. emdash_core/context/reranker.py +35 -144
  32. emdash_core/context/simple_reranker.py +500 -0
  33. emdash_core/context/tool_relevance.py +84 -0
  34. emdash_core/core/config.py +8 -0
  35. emdash_core/graph/__init__.py +8 -1
  36. emdash_core/graph/connection.py +24 -3
  37. emdash_core/graph/writer.py +7 -1
  38. emdash_core/models/agent.py +10 -0
  39. emdash_core/server.py +1 -6
  40. emdash_core/sse/stream.py +16 -1
  41. emdash_core/utils/__init__.py +0 -2
  42. emdash_core/utils/git.py +103 -0
  43. emdash_core/utils/image.py +147 -160
  44. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/METADATA +6 -6
  45. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/RECORD +47 -52
  46. emdash_core/api/swarm.py +0 -223
  47. emdash_core/db/__init__.py +0 -67
  48. emdash_core/db/auth.py +0 -134
  49. emdash_core/db/models.py +0 -91
  50. emdash_core/db/provider.py +0 -222
  51. emdash_core/db/providers/__init__.py +0 -5
  52. emdash_core/db/providers/supabase.py +0 -452
  53. emdash_core/swarm/__init__.py +0 -17
  54. emdash_core/swarm/merge_agent.py +0 -383
  55. emdash_core/swarm/session_manager.py +0 -274
  56. emdash_core/swarm/swarm_runner.py +0 -226
  57. emdash_core/swarm/task_definition.py +0 -137
  58. emdash_core/swarm/worker_spawner.py +0 -319
  59. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
  60. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0
emdash_core/api/agent.py CHANGED
@@ -38,6 +38,7 @@ def _run_sdk_agent(
38
38
  session_id: str,
39
39
  emitter,
40
40
  plan_mode: bool = False,
41
+ images: list = None,
41
42
  ):
42
43
  """Run the agent using Anthropic Agent SDK.
43
44
 
@@ -72,7 +73,7 @@ def _run_sdk_agent(
72
73
  # Run async agent in sync context
73
74
  async def run_async():
74
75
  response_text = ""
75
- async for event in runner.run(message):
76
+ async for event in runner.run(message, images=images):
76
77
  if event.get("type") == "text":
77
78
  response_text += event.get("content", "")
78
79
  return response_text
@@ -96,6 +97,7 @@ def _run_agent_sync(
96
97
  plan_mode: bool = False,
97
98
  use_sdk: bool = None,
98
99
  history: list = None,
100
+ use_worktree: bool = False,
99
101
  ):
100
102
  """Run the agent synchronously (in thread pool).
101
103
 
@@ -107,6 +109,7 @@ def _run_agent_sync(
107
109
 
108
110
  Args:
109
111
  history: Optional list of previous messages to pre-populate conversation
112
+ use_worktree: If True, creates a git worktree for isolated changes
110
113
  """
111
114
  try:
112
115
  _ensure_emdash_importable()
@@ -152,6 +155,14 @@ def _run_agent_sync(
152
155
 
153
156
  # Use SDK for Claude models if enabled
154
157
  if use_sdk and is_claude_model(model):
158
+ # Convert images for SDK if provided
159
+ sdk_images = None
160
+ if images:
161
+ import base64
162
+ sdk_images = [
163
+ {"data": base64.b64decode(img.data), "format": img.format}
164
+ for img in images
165
+ ]
155
166
  return _run_sdk_agent(
156
167
  message=message,
157
168
  model=model,
@@ -159,6 +170,7 @@ def _run_agent_sync(
159
170
  session_id=session_id,
160
171
  emitter=emitter,
161
172
  plan_mode=plan_mode,
173
+ images=sdk_images,
162
174
  )
163
175
 
164
176
  # Standard path: use AgentRunner with OpenAI-compatible API
@@ -170,6 +182,21 @@ def _run_agent_sync(
170
182
  repo_root = Path(config.repo_root) if config.repo_root else Path.cwd()
171
183
  log.info(f"Agent API: config.repo_root={config.repo_root}, resolved repo_root={repo_root}")
172
184
 
185
+ # Create worktree for isolated changes if requested
186
+ worktree_info = None
187
+ if use_worktree and not plan_mode:
188
+ from ..agent.worktree import WorktreeManager
189
+ try:
190
+ worktree_manager = WorktreeManager(repo_root)
191
+ # Use session_id as task slug (truncated for safety)
192
+ task_slug = session_id[:20] if len(session_id) > 20 else session_id
193
+ worktree_info = worktree_manager.create_worktree(task_slug, force=True)
194
+ repo_root = worktree_info.path
195
+ log.info(f"Created worktree at {repo_root} on branch {worktree_info.branch}")
196
+ except Exception as e:
197
+ log.warning(f"Failed to create worktree: {e}. Running in main repo.")
198
+ worktree_info = None
199
+
173
200
  # Create toolkit with plan_mode if requested
174
201
  # When in plan mode, generate a plan file path so write_to_file is available
175
202
  plan_file_path = None
@@ -199,6 +226,7 @@ def _run_agent_sync(
199
226
  "message_count": 1,
200
227
  "model": model,
201
228
  "plan_mode": plan_mode,
229
+ "worktree_info": worktree_info, # Will be None if not using worktree
202
230
  }
203
231
 
204
232
  # Set up autosave callback if enabled via env var
@@ -233,9 +261,13 @@ def _run_agent_sync(
233
261
  # Convert image data if provided
234
262
  agent_images = None
235
263
  if images:
264
+ import base64
236
265
  from ..agent.providers.base import ImageContent
237
266
  agent_images = [
238
- ImageContent(data=img.data, format=img.format)
267
+ ImageContent(
268
+ image_data=base64.b64decode(img.data),
269
+ format=img.format
270
+ )
239
271
  for img in images
240
272
  ]
241
273
 
@@ -265,6 +297,7 @@ async def _run_agent_async(
265
297
  model = request.model or config.default_model
266
298
  max_iterations = request.options.max_iterations
267
299
  plan_mode = request.options.mode == AgentMode.PLAN
300
+ use_worktree = request.options.use_worktree
268
301
 
269
302
  # Emit session start
270
303
  sse_handler.emit(EventType.SESSION_START, {
@@ -273,6 +306,7 @@ async def _run_agent_async(
273
306
  "session_id": session_id,
274
307
  "query": request.message,
275
308
  "mode": request.options.mode.value,
309
+ "use_worktree": use_worktree,
276
310
  })
277
311
 
278
312
  loop = asyncio.get_event_loop()
@@ -291,6 +325,7 @@ async def _run_agent_async(
291
325
  plan_mode,
292
326
  None, # use_sdk (auto-detect)
293
327
  request.history, # Pre-loaded conversation history
328
+ use_worktree,
294
329
  )
295
330
 
296
331
  # Emit session end
@@ -505,6 +540,87 @@ async def export_session(session_id: str, limit: int = 10):
505
540
  }
506
541
 
507
542
 
543
+ @router.post("/chat/{session_id}/compact")
544
+ async def compact_session(session_id: str):
545
+ """Compact the session's message history using LLM summarization.
546
+
547
+ This manually triggers the same compaction that happens automatically
548
+ when context reaches 80% capacity.
549
+
550
+ Returns:
551
+ JSON with the summary text and stats
552
+ """
553
+ if session_id not in _sessions:
554
+ raise HTTPException(status_code=404, detail="Session not found")
555
+
556
+ session = _sessions[session_id]
557
+ runner = session.get("runner")
558
+
559
+ if not runner:
560
+ raise HTTPException(status_code=400, detail="Session has no active runner")
561
+
562
+ # Get current messages
563
+ messages = getattr(runner, "_messages", [])
564
+ if len(messages) <= 5:
565
+ return {
566
+ "compacted": False,
567
+ "reason": "Not enough messages to compact (need more than 5)",
568
+ "message_count": len(messages),
569
+ }
570
+
571
+ # Import compaction utilities
572
+ from ..agent.runner.context import compact_messages_with_llm, estimate_context_tokens
573
+ from ..agent.events import AgentEventEmitter
574
+
575
+ # Create a simple emitter that captures the summary
576
+ class SummaryCapture:
577
+ def __init__(self):
578
+ self.summary = None
579
+
580
+ def emit_thinking(self, text):
581
+ pass # Ignore thinking events
582
+
583
+ emitter = SummaryCapture()
584
+
585
+ # Estimate current tokens
586
+ original_tokens = estimate_context_tokens(messages)
587
+
588
+ # Compact messages
589
+ compacted_messages = compact_messages_with_llm(
590
+ messages,
591
+ emitter,
592
+ target_tokens=int(original_tokens * 0.5),
593
+ )
594
+
595
+ # Extract the summary from the compacted messages
596
+ summary_text = None
597
+ for msg in compacted_messages:
598
+ if msg.get("role") == "assistant" and "[Context Summary]" in str(msg.get("content", "")):
599
+ content = msg.get("content", "")
600
+ # Extract text between [Context Summary] and [End Summary]
601
+ start = content.find("[Context Summary]") + len("[Context Summary]")
602
+ end = content.find("[End Summary]")
603
+ if end > start:
604
+ summary_text = content[start:end].strip()
605
+ break
606
+
607
+ # Update runner's messages
608
+ runner._messages = compacted_messages
609
+
610
+ # Estimate new tokens
611
+ new_tokens = estimate_context_tokens(compacted_messages)
612
+
613
+ return {
614
+ "compacted": True,
615
+ "summary": summary_text,
616
+ "original_message_count": len(messages),
617
+ "new_message_count": len(compacted_messages),
618
+ "original_tokens": original_tokens,
619
+ "new_tokens": new_tokens,
620
+ "reduction_percent": round((1 - new_tokens / original_tokens) * 100, 1) if original_tokens > 0 else 0,
621
+ }
622
+
623
+
508
624
  @router.get("/chat/{session_id}/plan")
509
625
  async def get_pending_plan(session_id: str):
510
626
  """Get the pending plan for a session, if any.
@@ -1011,3 +1127,182 @@ async def add_todo(session_id: str, title: str, description: str = ""):
1011
1127
  "task": task.to_dict(),
1012
1128
  "total_tasks": len(state.tasks),
1013
1129
  }
1130
+
1131
+
1132
+ # ==================== Worktree Management ====================
1133
+
1134
+
1135
+ @router.get("/chat/{session_id}/worktree")
1136
+ async def get_worktree_status(session_id: str):
1137
+ """Get the worktree status for a session.
1138
+
1139
+ Returns information about whether the session is using a worktree,
1140
+ the branch name, and any uncommitted changes.
1141
+ """
1142
+ if session_id not in _sessions:
1143
+ raise HTTPException(status_code=404, detail="Session not found")
1144
+
1145
+ session = _sessions[session_id]
1146
+ worktree_info = session.get("worktree_info")
1147
+
1148
+ if not worktree_info:
1149
+ return {
1150
+ "session_id": session_id,
1151
+ "has_worktree": False,
1152
+ }
1153
+
1154
+ # Check for uncommitted changes in the worktree
1155
+ import subprocess
1156
+ try:
1157
+ result = subprocess.run(
1158
+ ["git", "status", "--porcelain"],
1159
+ cwd=str(worktree_info.path),
1160
+ capture_output=True,
1161
+ text=True,
1162
+ )
1163
+ has_changes = bool(result.stdout.strip())
1164
+ changes = result.stdout.strip().split("\n") if has_changes else []
1165
+ except Exception:
1166
+ has_changes = False
1167
+ changes = []
1168
+
1169
+ return {
1170
+ "session_id": session_id,
1171
+ "has_worktree": True,
1172
+ "worktree_path": str(worktree_info.path),
1173
+ "branch": worktree_info.branch,
1174
+ "base_branch": worktree_info.base_branch,
1175
+ "has_changes": has_changes,
1176
+ "changes": changes,
1177
+ }
1178
+
1179
+
1180
+ @router.post("/chat/{session_id}/worktree/apply")
1181
+ async def apply_worktree_changes(session_id: str, commit_message: str = None):
1182
+ """Apply worktree changes to the main branch.
1183
+
1184
+ This merges the worktree branch into the base branch and cleans up.
1185
+ """
1186
+ from ..utils.logger import log
1187
+
1188
+ if session_id not in _sessions:
1189
+ raise HTTPException(status_code=404, detail="Session not found")
1190
+
1191
+ session = _sessions[session_id]
1192
+ worktree_info = session.get("worktree_info")
1193
+
1194
+ if not worktree_info:
1195
+ raise HTTPException(status_code=400, detail="Session is not using a worktree")
1196
+
1197
+ import subprocess
1198
+ from pathlib import Path
1199
+
1200
+ try:
1201
+ worktree_path = worktree_info.path
1202
+ branch = worktree_info.branch
1203
+ base_branch = worktree_info.base_branch
1204
+
1205
+ # First, commit any uncommitted changes in the worktree
1206
+ result = subprocess.run(
1207
+ ["git", "status", "--porcelain"],
1208
+ cwd=str(worktree_path),
1209
+ capture_output=True,
1210
+ text=True,
1211
+ )
1212
+ if result.stdout.strip():
1213
+ # Stage all changes
1214
+ subprocess.run(["git", "add", "-A"], cwd=str(worktree_path), check=True)
1215
+ # Commit
1216
+ msg = commit_message or f"Agent session {session_id[:8]} changes"
1217
+ subprocess.run(
1218
+ ["git", "commit", "-m", msg],
1219
+ cwd=str(worktree_path),
1220
+ check=True,
1221
+ )
1222
+ log.info(f"Committed changes in worktree: {msg}")
1223
+
1224
+ # Get the main repo root (parent of .emdash-worktrees)
1225
+ from ..config import get_config
1226
+ config = get_config()
1227
+ main_repo = Path(config.repo_root) if config.repo_root else Path.cwd()
1228
+
1229
+ # Merge the worktree branch into base branch
1230
+ subprocess.run(
1231
+ ["git", "checkout", base_branch],
1232
+ cwd=str(main_repo),
1233
+ check=True,
1234
+ )
1235
+ subprocess.run(
1236
+ ["git", "merge", branch, "--no-ff", "-m", f"Merge {branch}"],
1237
+ cwd=str(main_repo),
1238
+ check=True,
1239
+ )
1240
+ log.info(f"Merged {branch} into {base_branch}")
1241
+
1242
+ # Clean up the worktree
1243
+ from ..agent.worktree import WorktreeManager
1244
+ worktree_manager = WorktreeManager(main_repo)
1245
+ worktree_manager.remove_worktree(worktree_info.task_slug)
1246
+ log.info(f"Removed worktree {worktree_info.task_slug}")
1247
+
1248
+ # Clear worktree info from session
1249
+ session["worktree_info"] = None
1250
+
1251
+ return {
1252
+ "session_id": session_id,
1253
+ "success": True,
1254
+ "message": f"Changes from {branch} merged into {base_branch}",
1255
+ }
1256
+
1257
+ except subprocess.CalledProcessError as e:
1258
+ log.error(f"Failed to apply worktree changes: {e}")
1259
+ raise HTTPException(
1260
+ status_code=500,
1261
+ detail=f"Failed to apply changes: {e.stderr if hasattr(e, 'stderr') else str(e)}"
1262
+ )
1263
+ except Exception as e:
1264
+ log.error(f"Error applying worktree changes: {e}")
1265
+ raise HTTPException(status_code=500, detail=str(e))
1266
+
1267
+
1268
+ @router.delete("/chat/{session_id}/worktree")
1269
+ async def discard_worktree(session_id: str):
1270
+ """Discard worktree changes and clean up.
1271
+
1272
+ This removes the worktree and branch without merging.
1273
+ """
1274
+ from ..utils.logger import log
1275
+
1276
+ if session_id not in _sessions:
1277
+ raise HTTPException(status_code=404, detail="Session not found")
1278
+
1279
+ session = _sessions[session_id]
1280
+ worktree_info = session.get("worktree_info")
1281
+
1282
+ if not worktree_info:
1283
+ raise HTTPException(status_code=400, detail="Session is not using a worktree")
1284
+
1285
+ try:
1286
+ from pathlib import Path
1287
+ from ..config import get_config
1288
+ from ..agent.worktree import WorktreeManager
1289
+
1290
+ config = get_config()
1291
+ main_repo = Path(config.repo_root) if config.repo_root else Path.cwd()
1292
+
1293
+ worktree_manager = WorktreeManager(main_repo)
1294
+ worktree_manager.remove_worktree(worktree_info.task_slug)
1295
+ log.info(f"Discarded worktree {worktree_info.task_slug}")
1296
+
1297
+ # Clear worktree info from session
1298
+ session["worktree_info"] = None
1299
+
1300
+ return {
1301
+ "session_id": session_id,
1302
+ "success": True,
1303
+ "message": f"Worktree {worktree_info.task_slug} discarded",
1304
+ }
1305
+
1306
+ except Exception as e:
1307
+ log.error(f"Error discarding worktree: {e}")
1308
+ raise HTTPException(status_code=500, detail=str(e))
@@ -43,9 +43,9 @@ def _run_research_sync(
43
43
  import sys
44
44
  from pathlib import Path
45
45
 
46
- repo_root = Path(__file__).parent.parent.parent.parent.parent
47
- if str(repo_root) not in sys.path:
48
- sys.path.insert(0, str(repo_root))
46
+ from ..config import get_config
47
+ config = get_config()
48
+ repo_root = Path(config.repo_root) if config.repo_root else Path.cwd()
49
49
 
50
50
  try:
51
51
  from ..agent.research.agent import ResearchAgent
emdash_core/api/router.py CHANGED
@@ -19,7 +19,6 @@ from . import (
19
19
  research,
20
20
  review,
21
21
  embed,
22
- swarm,
23
22
  rules,
24
23
  context,
25
24
  feature,
@@ -67,9 +66,6 @@ api_router.include_router(review.router)
67
66
  # Embeddings
68
67
  api_router.include_router(embed.router)
69
68
 
70
- # Multi-agent
71
- api_router.include_router(swarm.router)
72
-
73
69
  # Configuration
74
70
  api_router.include_router(rules.router)
75
71
  api_router.include_router(context.router)
@@ -0,0 +1,197 @@
1
+ """Longevity tracking for context items.
2
+
3
+ Tracks which entities appear repeatedly across reranking calls.
4
+ Items that keep appearing are likely important and get boosted.
5
+
6
+ This uses an in-memory cache that resets on process restart.
7
+ For persistence, the cache could be stored in the graph database.
8
+ """
9
+
10
+ import math
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from typing import Optional
14
+
15
+
16
+ @dataclass
17
+ class LongevityRecord:
18
+ """Track an entity's appearance history."""
19
+
20
+ qualified_name: str
21
+ appearance_count: int = 0
22
+ first_seen: float = field(default_factory=time.time)
23
+ last_seen: float = field(default_factory=time.time)
24
+
25
+ def record_appearance(self) -> None:
26
+ """Record a new appearance of this entity."""
27
+ self.appearance_count += 1
28
+ self.last_seen = time.time()
29
+
30
+ def get_longevity_score(self, now: Optional[float] = None) -> float:
31
+ """Calculate longevity score based on appearance count.
32
+
33
+ Longevity = items that have appeared in context frame more than once.
34
+ No time-based decay - if it keeps appearing, it's important.
35
+
36
+ Score formula (log scale for diminishing returns):
37
+ - 1 appearance = 0.0 (first time, no longevity yet)
38
+ - 2 appearances = 0.37
39
+ - 3 appearances = 0.50
40
+ - 5 appearances = 0.62
41
+ - 10 appearances = 0.77
42
+ - 20 appearances = 0.90
43
+
44
+ Args:
45
+ now: Current timestamp (unused, kept for API compatibility)
46
+
47
+ Returns:
48
+ Score between 0.0 and 1.0
49
+ """
50
+ if self.appearance_count <= 1:
51
+ return 0.0 # First appearance = no longevity
52
+
53
+ # Log scale for diminishing returns
54
+ # Subtract 1 so first repeat (count=2) starts contributing
55
+ return min(1.0, math.log(self.appearance_count) / 3)
56
+
57
+
58
+ class LongevityTracker:
59
+ """Tracks entity appearances across reranking calls."""
60
+
61
+ def __init__(self, max_entries: int = 1000):
62
+ """Initialize the tracker.
63
+
64
+ Args:
65
+ max_entries: Maximum number of entities to track (LRU eviction)
66
+ """
67
+ self._records: dict[str, LongevityRecord] = {}
68
+ self._max_entries = max_entries
69
+
70
+ def record_appearance(self, qualified_name: str) -> None:
71
+ """Record that an entity appeared in reranking.
72
+
73
+ Args:
74
+ qualified_name: The entity's qualified name
75
+ """
76
+ if qualified_name in self._records:
77
+ self._records[qualified_name].record_appearance()
78
+ else:
79
+ # Evict oldest entries if at capacity
80
+ if len(self._records) >= self._max_entries:
81
+ self._evict_oldest()
82
+
83
+ self._records[qualified_name] = LongevityRecord(
84
+ qualified_name=qualified_name,
85
+ appearance_count=1,
86
+ )
87
+
88
+ def record_batch(self, qualified_names: list[str]) -> None:
89
+ """Record appearances for multiple entities.
90
+
91
+ Args:
92
+ qualified_names: List of entity qualified names
93
+ """
94
+ for qname in qualified_names:
95
+ self.record_appearance(qname)
96
+
97
+ def get_longevity_score(self, qualified_name: str) -> float:
98
+ """Get the longevity score for an entity.
99
+
100
+ Args:
101
+ qualified_name: The entity's qualified name
102
+
103
+ Returns:
104
+ Score between 0.0 and 1.0 (0.0 if never seen)
105
+ """
106
+ record = self._records.get(qualified_name)
107
+ if record is None:
108
+ return 0.0
109
+ return record.get_longevity_score()
110
+
111
+ def get_appearance_count(self, qualified_name: str) -> int:
112
+ """Get how many times an entity has appeared.
113
+
114
+ Args:
115
+ qualified_name: The entity's qualified name
116
+
117
+ Returns:
118
+ Number of appearances (0 if never seen)
119
+ """
120
+ record = self._records.get(qualified_name)
121
+ return record.appearance_count if record else 0
122
+
123
+ def _evict_oldest(self) -> None:
124
+ """Evict the oldest (least recently seen) entries."""
125
+ if not self._records:
126
+ return
127
+
128
+ # Sort by last_seen and remove bottom 10%
129
+ sorted_records = sorted(
130
+ self._records.items(),
131
+ key=lambda x: x[1].last_seen,
132
+ )
133
+
134
+ evict_count = max(1, len(sorted_records) // 10)
135
+ for qname, _ in sorted_records[:evict_count]:
136
+ del self._records[qname]
137
+
138
+ def clear(self) -> None:
139
+ """Clear all longevity records."""
140
+ self._records.clear()
141
+
142
+ def get_stats(self) -> dict:
143
+ """Get statistics about the tracker.
144
+
145
+ Returns:
146
+ Dictionary with tracker statistics
147
+ """
148
+ if not self._records:
149
+ return {
150
+ "total_entities": 0,
151
+ "total_appearances": 0,
152
+ "avg_appearances": 0,
153
+ "max_appearances": 0,
154
+ }
155
+
156
+ appearances = [r.appearance_count for r in self._records.values()]
157
+ return {
158
+ "total_entities": len(self._records),
159
+ "total_appearances": sum(appearances),
160
+ "avg_appearances": sum(appearances) / len(appearances),
161
+ "max_appearances": max(appearances),
162
+ }
163
+
164
+
165
+ # Global tracker instance (shared across reranking calls)
166
+ _global_tracker: Optional[LongevityTracker] = None
167
+
168
+
169
+ def get_longevity_tracker() -> LongevityTracker:
170
+ """Get the global longevity tracker (creates if needed)."""
171
+ global _global_tracker
172
+ if _global_tracker is None:
173
+ _global_tracker = LongevityTracker()
174
+ return _global_tracker
175
+
176
+
177
+ def record_reranked_items(qualified_names: list[str]) -> None:
178
+ """Record that items appeared in a reranking result.
179
+
180
+ Call this after reranking to update longevity scores.
181
+
182
+ Args:
183
+ qualified_names: List of qualified names that were reranked
184
+ """
185
+ get_longevity_tracker().record_batch(qualified_names)
186
+
187
+
188
+ def get_longevity_score(qualified_name: str) -> float:
189
+ """Get the longevity score for an entity.
190
+
191
+ Args:
192
+ qualified_name: The entity's qualified name
193
+
194
+ Returns:
195
+ Score between 0.0 and 1.0
196
+ """
197
+ return get_longevity_tracker().get_longevity_score(qualified_name)