@bitseek/hermes-webui 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. package/README.md +213 -0
  2. package/bin/hermes-webui.mjs +588 -0
  3. package/package.json +25 -0
  4. package/scripts/sync-vendor.mjs +74 -0
  5. package/templates/launchd/com.bitseek.hermes-webui.plist +21 -0
  6. package/templates/systemd/hermes-webui.service +13 -0
  7. package/templates/windows/hermes-webui-task.ps1 +3 -0
  8. package/vendor/agent-frontend-shell/.bitseek-source.json +6 -0
  9. package/vendor/agent-frontend-shell/.dockerignore +7 -0
  10. package/vendor/agent-frontend-shell/.env.docker.example +89 -0
  11. package/vendor/agent-frontend-shell/.env.example +34 -0
  12. package/vendor/agent-frontend-shell/.github/FUNDING.yml +3 -0
  13. package/vendor/agent-frontend-shell/.github/workflows/browser-smoke.yml +42 -0
  14. package/vendor/agent-frontend-shell/.github/workflows/docker-smoke.yml +233 -0
  15. package/vendor/agent-frontend-shell/.github/workflows/native-windows-startup.yml +132 -0
  16. package/vendor/agent-frontend-shell/.github/workflows/release.yml +57 -0
  17. package/vendor/agent-frontend-shell/.github/workflows/tests.yml +88 -0
  18. package/vendor/agent-frontend-shell/.vscode/launch.json +59 -0
  19. package/vendor/agent-frontend-shell/.vscode/settings.json +13 -0
  20. package/vendor/agent-frontend-shell/AGENTS.md +80 -0
  21. package/vendor/agent-frontend-shell/ARCHITECTURE.md +1658 -0
  22. package/vendor/agent-frontend-shell/BUGS.md +52 -0
  23. package/vendor/agent-frontend-shell/CHANGELOG.md +7295 -0
  24. package/vendor/agent-frontend-shell/CONTRIBUTING.md +205 -0
  25. package/vendor/agent-frontend-shell/CONTRIBUTORS.md +107 -0
  26. package/vendor/agent-frontend-shell/DESIGN.md +173 -0
  27. package/vendor/agent-frontend-shell/Dockerfile +91 -0
  28. package/vendor/agent-frontend-shell/LICENSE +21 -0
  29. package/vendor/agent-frontend-shell/README-CUSTOM.md +76 -0
  30. package/vendor/agent-frontend-shell/README.md +705 -0
  31. package/vendor/agent-frontend-shell/ROADMAP.md +351 -0
  32. package/vendor/agent-frontend-shell/SPRINTS.md +147 -0
  33. package/vendor/agent-frontend-shell/TESTING.md +1932 -0
  34. package/vendor/agent-frontend-shell/THEMES.md +170 -0
  35. package/vendor/agent-frontend-shell/api/__init__.py +1 -0
  36. package/vendor/agent-frontend-shell/api/agent_health.py +392 -0
  37. package/vendor/agent-frontend-shell/api/agent_sessions.py +782 -0
  38. package/vendor/agent-frontend-shell/api/auth.py +592 -0
  39. package/vendor/agent-frontend-shell/api/background.py +87 -0
  40. package/vendor/agent-frontend-shell/api/clarify.py +238 -0
  41. package/vendor/agent-frontend-shell/api/commands.py +124 -0
  42. package/vendor/agent-frontend-shell/api/compression_anchor.py +134 -0
  43. package/vendor/agent-frontend-shell/api/config.py +5178 -0
  44. package/vendor/agent-frontend-shell/api/dashboard_probe.py +255 -0
  45. package/vendor/agent-frontend-shell/api/extensions.py +253 -0
  46. package/vendor/agent-frontend-shell/api/gateway_chat.py +435 -0
  47. package/vendor/agent-frontend-shell/api/gateway_watcher.py +230 -0
  48. package/vendor/agent-frontend-shell/api/goals.py +608 -0
  49. package/vendor/agent-frontend-shell/api/helpers.py +474 -0
  50. package/vendor/agent-frontend-shell/api/kanban_bridge.py +1255 -0
  51. package/vendor/agent-frontend-shell/api/metering.py +194 -0
  52. package/vendor/agent-frontend-shell/api/models.py +4210 -0
  53. package/vendor/agent-frontend-shell/api/oauth.py +770 -0
  54. package/vendor/agent-frontend-shell/api/onboarding.py +1046 -0
  55. package/vendor/agent-frontend-shell/api/passkeys.py +365 -0
  56. package/vendor/agent-frontend-shell/api/profiles.py +1499 -0
  57. package/vendor/agent-frontend-shell/api/providers.py +2175 -0
  58. package/vendor/agent-frontend-shell/api/request_diagnostics.py +160 -0
  59. package/vendor/agent-frontend-shell/api/rollback.py +320 -0
  60. package/vendor/agent-frontend-shell/api/routes.py +13990 -0
  61. package/vendor/agent-frontend-shell/api/run_journal.py +284 -0
  62. package/vendor/agent-frontend-shell/api/runner_client.py +156 -0
  63. package/vendor/agent-frontend-shell/api/runtime_adapter.py +431 -0
  64. package/vendor/agent-frontend-shell/api/session_discoverability.py +640 -0
  65. package/vendor/agent-frontend-shell/api/session_events.py +45 -0
  66. package/vendor/agent-frontend-shell/api/session_lifecycle.py +208 -0
  67. package/vendor/agent-frontend-shell/api/session_ops.py +207 -0
  68. package/vendor/agent-frontend-shell/api/session_recovery.py +655 -0
  69. package/vendor/agent-frontend-shell/api/skill_usage.py +32 -0
  70. package/vendor/agent-frontend-shell/api/startup.py +128 -0
  71. package/vendor/agent-frontend-shell/api/state_sync.py +187 -0
  72. package/vendor/agent-frontend-shell/api/streaming.py +7048 -0
  73. package/vendor/agent-frontend-shell/api/system_health.py +167 -0
  74. package/vendor/agent-frontend-shell/api/terminal.py +410 -0
  75. package/vendor/agent-frontend-shell/api/turn_journal.py +214 -0
  76. package/vendor/agent-frontend-shell/api/updates.py +1261 -0
  77. package/vendor/agent-frontend-shell/api/upload.py +322 -0
  78. package/vendor/agent-frontend-shell/api/usage.py +26 -0
  79. package/vendor/agent-frontend-shell/api/workspace.py +867 -0
  80. package/vendor/agent-frontend-shell/api/workspace_git.py +1261 -0
  81. package/vendor/agent-frontend-shell/api/worktrees.py +357 -0
  82. package/vendor/agent-frontend-shell/bootstrap.py +492 -0
  83. package/vendor/agent-frontend-shell/ctl.sh +427 -0
  84. package/vendor/agent-frontend-shell/docker-compose.custom.yml +26 -0
  85. package/vendor/agent-frontend-shell/docker-compose.three-container.yml +168 -0
  86. package/vendor/agent-frontend-shell/docker-compose.two-container.yml +147 -0
  87. package/vendor/agent-frontend-shell/docker-compose.yml +57 -0
  88. package/vendor/agent-frontend-shell/docker_init.bash +459 -0
  89. package/vendor/agent-frontend-shell/docs/CONTRACTS.md +207 -0
  90. package/vendor/agent-frontend-shell/docs/EXTENSIONS.md +212 -0
  91. package/vendor/agent-frontend-shell/docs/ISSUES.md +23 -0
  92. package/vendor/agent-frontend-shell/docs/UIUX-GUIDE.md +196 -0
  93. package/vendor/agent-frontend-shell/docs/advanced-chat-setup.md +83 -0
  94. package/vendor/agent-frontend-shell/docs/docker.md +337 -0
  95. package/vendor/agent-frontend-shell/docs/onboarding-agent-checklist.md +207 -0
  96. package/vendor/agent-frontend-shell/docs/onboarding.md +202 -0
  97. package/vendor/agent-frontend-shell/docs/remote-access.md +75 -0
  98. package/vendor/agent-frontend-shell/docs/rfcs/README.md +53 -0
  99. package/vendor/agent-frontend-shell/docs/rfcs/agent-source-boundary.md +70 -0
  100. package/vendor/agent-frontend-shell/docs/rfcs/canonical-session-resolution.md +124 -0
  101. package/vendor/agent-frontend-shell/docs/rfcs/hermes-run-adapter-contract.md +1079 -0
  102. package/vendor/agent-frontend-shell/docs/rfcs/turn-journal.md +195 -0
  103. package/vendor/agent-frontend-shell/docs/rfcs/webui-run-state-consistency-contract.md +157 -0
  104. package/vendor/agent-frontend-shell/docs/supervisor.md +280 -0
  105. package/vendor/agent-frontend-shell/docs/troubleshooting.md +132 -0
  106. package/vendor/agent-frontend-shell/docs/ui-ux/index.html +863 -0
  107. package/vendor/agent-frontend-shell/docs/ui-ux/two-stage-proposal.html +768 -0
  108. package/vendor/agent-frontend-shell/docs/why-hermes.md +489 -0
  109. package/vendor/agent-frontend-shell/docs/workspace-git.md +92 -0
  110. package/vendor/agent-frontend-shell/docs/wsl-autostart.md +126 -0
  111. package/vendor/agent-frontend-shell/eslint.runtime-guard.config.mjs +35 -0
  112. package/vendor/agent-frontend-shell/extensions/bitseek-design-system.md +330 -0
  113. package/vendor/agent-frontend-shell/extensions/branding/assets/apple-touch-icon.png +0 -0
  114. package/vendor/agent-frontend-shell/extensions/branding/assets/empty-logo.svg +739 -0
  115. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon-192.png +0 -0
  116. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon-32.png +0 -0
  117. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon-512.png +0 -0
  118. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon-512.svg +745 -0
  119. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon.ico +0 -0
  120. package/vendor/agent-frontend-shell/extensions/branding/assets/favicon.svg +745 -0
  121. package/vendor/agent-frontend-shell/extensions/branding/assets/titlebar-icon-v2.svg +751 -0
  122. package/vendor/agent-frontend-shell/extensions/branding/assets/titlebar-icon-v3.svg +739 -0
  123. package/vendor/agent-frontend-shell/extensions/branding/assets/titlebar-icon.svg +745 -0
  124. package/vendor/agent-frontend-shell/extensions/branding/branding.js +112 -0
  125. package/vendor/agent-frontend-shell/extensions/branding/config.json +14 -0
  126. package/vendor/agent-frontend-shell/extensions/branding/manifest.json +53 -0
  127. package/vendor/agent-frontend-shell/extensions/index.js +67 -0
  128. package/vendor/agent-frontend-shell/extensions/loader/hermes-loader.js +77 -0
  129. package/vendor/agent-frontend-shell/extensions/manifest.json +16 -0
  130. package/vendor/agent-frontend-shell/extensions/pages/ai-teammates/page.css +333 -0
  131. package/vendor/agent-frontend-shell/extensions/pages/ai-teammates/page.js +487 -0
  132. package/vendor/agent-frontend-shell/extensions/pages/manifest.json +6 -0
  133. package/vendor/agent-frontend-shell/extensions/pages/registry.css +56 -0
  134. package/vendor/agent-frontend-shell/extensions/pages/registry.js +302 -0
  135. package/vendor/agent-frontend-shell/extensions/themes/bitseek/index.css +93 -0
  136. package/vendor/agent-frontend-shell/extensions/themes/bitseek/index.js +98 -0
  137. package/vendor/agent-frontend-shell/install.sh +63 -0
  138. package/vendor/agent-frontend-shell/mcp_server.py +567 -0
  139. package/vendor/agent-frontend-shell/package.json +12 -0
  140. package/vendor/agent-frontend-shell/pyproject.toml +56 -0
  141. package/vendor/agent-frontend-shell/pytest.ini +3 -0
  142. package/vendor/agent-frontend-shell/requirements.txt +5 -0
  143. package/vendor/agent-frontend-shell/server.py +624 -0
  144. package/vendor/agent-frontend-shell/start.ps1 +210 -0
  145. package/vendor/agent-frontend-shell/start.sh +65 -0
  146. package/vendor/agent-frontend-shell/static/apple-touch-icon.png +0 -0
  147. package/vendor/agent-frontend-shell/static/boot.js +1990 -0
  148. package/vendor/agent-frontend-shell/static/commands.js +1402 -0
  149. package/vendor/agent-frontend-shell/static/favicon-192.png +0 -0
  150. package/vendor/agent-frontend-shell/static/favicon-32.png +0 -0
  151. package/vendor/agent-frontend-shell/static/favicon-512.png +0 -0
  152. package/vendor/agent-frontend-shell/static/favicon-512.svg +18 -0
  153. package/vendor/agent-frontend-shell/static/favicon.ico +0 -0
  154. package/vendor/agent-frontend-shell/static/favicon.svg +20 -0
  155. package/vendor/agent-frontend-shell/static/i18n.js +15389 -0
  156. package/vendor/agent-frontend-shell/static/icons.js +92 -0
  157. package/vendor/agent-frontend-shell/static/index.html +1506 -0
  158. package/vendor/agent-frontend-shell/static/login.js +177 -0
  159. package/vendor/agent-frontend-shell/static/manifest.json +53 -0
  160. package/vendor/agent-frontend-shell/static/messages.js +3521 -0
  161. package/vendor/agent-frontend-shell/static/onboarding.js +800 -0
  162. package/vendor/agent-frontend-shell/static/panels.js +7995 -0
  163. package/vendor/agent-frontend-shell/static/pwa-startup.js +83 -0
  164. package/vendor/agent-frontend-shell/static/sessions.js +5165 -0
  165. package/vendor/agent-frontend-shell/static/style.css +4774 -0
  166. package/vendor/agent-frontend-shell/static/sw.js +173 -0
  167. package/vendor/agent-frontend-shell/static/terminal.js +632 -0
  168. package/vendor/agent-frontend-shell/static/ui.js +8997 -0
  169. package/vendor/agent-frontend-shell/static/vendor/js-yaml/4.1.0/js-yaml.min.js +2 -0
  170. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_AMS-Regular.ttf +0 -0
  171. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_AMS-Regular.woff +0 -0
  172. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  173. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Bold.ttf +0 -0
  174. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Bold.woff +0 -0
  175. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  176. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Regular.ttf +0 -0
  177. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Regular.woff +0 -0
  178. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  179. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Bold.ttf +0 -0
  180. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Bold.woff +0 -0
  181. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  182. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Regular.ttf +0 -0
  183. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Regular.woff +0 -0
  184. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  185. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Bold.ttf +0 -0
  186. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Bold.woff +0 -0
  187. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Bold.woff2 +0 -0
  188. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-BoldItalic.ttf +0 -0
  189. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-BoldItalic.woff +0 -0
  190. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  191. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Italic.ttf +0 -0
  192. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Italic.woff +0 -0
  193. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Italic.woff2 +0 -0
  194. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Regular.ttf +0 -0
  195. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Regular.woff +0 -0
  196. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Main-Regular.woff2 +0 -0
  197. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-BoldItalic.ttf +0 -0
  198. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-BoldItalic.woff +0 -0
  199. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  200. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-Italic.ttf +0 -0
  201. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-Italic.woff +0 -0
  202. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Math-Italic.woff2 +0 -0
  203. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Bold.ttf +0 -0
  204. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Bold.woff +0 -0
  205. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  206. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Italic.ttf +0 -0
  207. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Italic.woff +0 -0
  208. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  209. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Regular.ttf +0 -0
  210. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Regular.woff +0 -0
  211. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  212. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Script-Regular.ttf +0 -0
  213. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Script-Regular.woff +0 -0
  214. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Script-Regular.woff2 +0 -0
  215. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size1-Regular.ttf +0 -0
  216. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size1-Regular.woff +0 -0
  217. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  218. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size2-Regular.ttf +0 -0
  219. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size2-Regular.woff +0 -0
  220. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  221. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size3-Regular.ttf +0 -0
  222. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size3-Regular.woff +0 -0
  223. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  224. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size4-Regular.ttf +0 -0
  225. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size4-Regular.woff +0 -0
  226. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  227. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Typewriter-Regular.ttf +0 -0
  228. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Typewriter-Regular.woff +0 -0
  229. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  230. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/katex.min.css +1 -0
  231. package/vendor/agent-frontend-shell/static/vendor/katex/0.16.22/katex.min.js +1 -0
  232. package/vendor/agent-frontend-shell/static/vendor/smd.min.js +29 -0
  233. package/vendor/agent-frontend-shell/static/workspace.js +680 -0
@@ -0,0 +1,4210 @@
1
+ """Hermes Web UI -- Session model and in-memory session store."""
2
+ import collections
3
+ import copy
4
+ import datetime
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import os
9
+ import re
10
+ import threading
11
+ import time
12
+ import uuid
13
+ from contextlib import closing
14
+ from pathlib import Path
15
+
16
+ import api.config as _cfg
17
+ from api.config import (
18
+ SESSION_DIR, SESSION_INDEX_FILE, SESSIONS, SESSIONS_MAX,
19
+ LOCK, STREAMS, STREAMS_LOCK, DEFAULT_WORKSPACE, DEFAULT_MODEL, PROJECTS_FILE, HOME,
20
+ get_effective_default_model, _get_session_agent_lock,
21
+ )
22
+ from api.workspace import get_last_workspace
23
+ from api.usage import prompt_cache_hit_percent
24
+ from api.agent_sessions import (
25
+ _is_continuation_session,
26
+ read_importable_agent_session_rows,
27
+ read_session_lineage_metadata,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+ CLI_VISIBLE_SESSION_LIMIT = 20
32
+ # How many messageful cron sessions to surface in the project-chip layer.
33
+ # Needs to exceed CLI_VISIBLE_SESSION_LIMIT so older cron runs stay
34
+ # addressable even when many newer non-cron sessions dominate the default
35
+ # sidebar window (#3172).
36
+ CRON_PROJECT_CHIP_LIMIT = 200
37
+ _CLI_SESSIONS_CACHE_TTL_SECONDS = 5.0
38
+ _CLI_SESSIONS_CACHE_LOCK = threading.Lock()
39
+ _CLI_SESSIONS_CACHE = {}
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Stale temp-file cleanup
43
+ # ---------------------------------------------------------------------------
44
+ # Both Session.save() and _write_session_index() use the atomic-write pattern:
45
+ # write to <path>.tmp.<pid>.<tid> → os.replace() to final path
46
+ # If the process crashes between write and replace the .tmp file is left
47
+ # behind. Because the name embeds pid + tid, leftover files can never be
48
+ # reused by a different process/thread, so they are safe to remove on the
49
+ # next startup. _cleanup_stale_tmp_files() is called from the full-rebuild
50
+ # path of _write_session_index (i.e. at first index access / startup) and
51
+ # removes any *.tmp.* file whose mtime is older than one hour.
52
+ # ---------------------------------------------------------------------------
53
+
54
+ _STALE_TMP_AGE_SECONDS = 3600 # 1 hour
55
+
56
+ # Serializes index writers so concurrent Session.save() calls cannot race on
57
+ # stale baselines while still allowing LOCK to be released before disk I/O.
58
+ _INDEX_WRITE_LOCK = threading.RLock()
59
+ _SESSION_INDEX_REBUILD_LOCK = threading.Lock()
60
+ _SESSION_INDEX_REBUILD_THREAD = None
61
+
62
+ # Path-safety contract for session IDs. Accept alphanumerics, underscore, and
63
+ # hyphen so API/gateway-issued ids (``api-*``, ``reachy-voice-*``) round-trip
64
+ # through filesystem load/save/delete/worktree paths without traversal risk.
65
+ # Dots and slashes are rejected so the id can never name a parent directory
66
+ # or hide an unexpected extension.
67
+ _SAFE_SID_CHARS = frozenset(
68
+ '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-'
69
+ )
70
+
71
+
72
+ def is_safe_session_id(sid) -> bool:
73
+ """Return True iff ``sid`` is a non-empty path-safe session id.
74
+
75
+ Centralizes the validation previously duplicated across
76
+ ``Session.load``, ``Session.load_metadata_only``,
77
+ ``_repair_stale_pending``, ``/api/session/worktree/remove``, and
78
+ ``/api/session/delete`` so every call site agrees on what characters
79
+ are allowed. See #3023.
80
+ """
81
+ if not sid or not isinstance(sid, str):
82
+ return False
83
+ return all(c in _SAFE_SID_CHARS for c in sid)
84
+
85
+
86
+ def _cleanup_stale_tmp_files() -> None:
87
+ """Best-effort removal of stale ``*.tmp.*`` files from SESSION_DIR.
88
+
89
+ Only files whose mtime is older than ``_STALE_TMP_AGE_SECONDS`` are
90
+ removed so that in-flight writes from a long-running sibling process
91
+ are not disturbed. Errors are logged and swallowed — this must never
92
+ prevent startup.
93
+ """
94
+ cutoff = time.time() - _STALE_TMP_AGE_SECONDS
95
+ try:
96
+ for p in SESSION_DIR.glob('*.tmp.*'):
97
+ try:
98
+ if p.stat().st_mtime < cutoff:
99
+ p.unlink(missing_ok=True)
100
+ logger.debug("Cleaned up stale tmp file: %s", p.name)
101
+ except OSError:
102
+ pass # best-effort
103
+ except Exception:
104
+ pass # SESSION_DIR may not exist yet; that's fine
105
+
106
+
107
+ _PERSISTED_SESSION_IDS_CACHE: tuple[Path | None, int | None, frozenset[str]] = (None, None, frozenset())
108
+
109
+
110
+ def _persisted_session_ids_snapshot() -> frozenset[str]:
111
+ """Return persisted session ids, caching the directory snapshot by mtime.
112
+
113
+ `/api/sessions` and incremental index writes may run every few seconds. A
114
+ full `SESSION_DIR.glob('*.json')` on a large session directory is expensive,
115
+ and doing that scan while request threads contend on LOCK makes the sidebar
116
+ look like it was designed by a committee of glaciers. Cache the listing until
117
+ the directory mtime changes, and let callers take the snapshot before
118
+ entering critical sections.
119
+ """
120
+ global _PERSISTED_SESSION_IDS_CACHE
121
+ try:
122
+ dir_mtime_ns = SESSION_DIR.stat().st_mtime_ns
123
+ except Exception:
124
+ dir_mtime_ns = None
125
+ cached_dir, cached_mtime_ns, cached_ids = _PERSISTED_SESSION_IDS_CACHE
126
+ if cached_dir == SESSION_DIR and cached_mtime_ns == dir_mtime_ns:
127
+ return cached_ids
128
+ try:
129
+ ids = frozenset(
130
+ p.stem
131
+ for p in SESSION_DIR.glob('*.json')
132
+ if not p.name.startswith('_')
133
+ )
134
+ except Exception:
135
+ ids = frozenset()
136
+ _PERSISTED_SESSION_IDS_CACHE = (SESSION_DIR, dir_mtime_ns, ids)
137
+ return ids
138
+
139
+
140
+ def _rebuild_session_index_background() -> None:
141
+ try:
142
+ _write_session_index(updates=None)
143
+ except Exception:
144
+ logger.debug("Background session-index rebuild failed", exc_info=True)
145
+
146
+
147
+ def _start_session_index_rebuild_thread() -> None:
148
+ """Start one background full-index rebuild if the index is missing."""
149
+ global _SESSION_INDEX_REBUILD_THREAD
150
+ with _SESSION_INDEX_REBUILD_LOCK:
151
+ if SESSION_INDEX_FILE.exists():
152
+ return
153
+ if (
154
+ _SESSION_INDEX_REBUILD_THREAD is not None
155
+ and _SESSION_INDEX_REBUILD_THREAD.is_alive()
156
+ ):
157
+ return
158
+ _SESSION_INDEX_REBUILD_THREAD = threading.Thread(
159
+ target=_rebuild_session_index_background,
160
+ name="session-index-rebuild",
161
+ daemon=True,
162
+ )
163
+ _SESSION_INDEX_REBUILD_THREAD.start()
164
+
165
+
166
+ def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool:
167
+ """Return True if an index entry still has backing state.
168
+
169
+ A session can legitimately exist either as a persisted JSON file or as an
170
+ in-memory Session object that has not been flushed yet. This helper is used
171
+ to prune stale `_index.json` rows left behind after session-id rotation or
172
+ file removal.
173
+ """
174
+ if not session_id:
175
+ return False
176
+ if in_memory_ids is None:
177
+ with LOCK:
178
+ in_memory_ids = set(SESSIONS.keys())
179
+ if session_id in in_memory_ids:
180
+ return True
181
+ p = SESSION_DIR / f'{session_id}.json'
182
+ return p.exists()
183
+
184
+
185
+ def _write_session_index(updates=None):
186
+ """Update the session index file.
187
+
188
+ When *updates* is provided (a list of Session objects whose compact
189
+ entries should be refreshed), this does a targeted in-place update of
190
+ the existing index — O(1) for single-session changes. When *updates*
191
+ is None, a full rebuild is performed (used on startup / first call).
192
+
193
+ LOCK protects in-memory state snapshots and payload construction only;
194
+ disk I/O (write/flush/fsync/replace) always runs outside LOCK.
195
+ """
196
+ _tmp = SESSION_INDEX_FILE.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
197
+
198
+ with _INDEX_WRITE_LOCK:
199
+ # Lazy full-rebuild path — used when index doesn't exist yet.
200
+ if updates is None or not SESSION_INDEX_FILE.exists():
201
+ _cleanup_stale_tmp_files() # best-effort sweep on startup / first call
202
+ entry_map: dict[str, dict] = {}
203
+ for p in SESSION_DIR.glob('*.json'):
204
+ if p.name.startswith('_'):
205
+ continue
206
+ try:
207
+ s = Session.load(p.stem)
208
+ if s:
209
+ c = s.compact()
210
+ sid = c.get('session_id')
211
+ if sid:
212
+ # Dedup by session_id: prefer entry with more messages
213
+ # (handles old-format session_xxx.json files alongside
214
+ # WebUI-format xxx.json with the same session_id)
215
+ existing = entry_map.get(sid)
216
+ if existing is None or (
217
+ c.get('message_count', 0) > existing.get('message_count', 0)
218
+ ):
219
+ entry_map[sid] = c
220
+ except Exception:
221
+ logger.debug("Failed to load session from %s", p)
222
+ entries = list(entry_map.values())
223
+
224
+ with LOCK:
225
+ existing_ids = set(entry_map.keys())
226
+ for s in SESSIONS.values():
227
+ if s.session_id not in existing_ids:
228
+ entries.append(s.compact())
229
+ entries.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
230
+ _payload = json.dumps(entries, ensure_ascii=False, indent=2)
231
+
232
+ try:
233
+ with open(_tmp, 'w', encoding='utf-8') as f:
234
+ f.write(_payload)
235
+ f.flush()
236
+ os.fsync(f.fileno())
237
+ os.replace(_tmp, SESSION_INDEX_FILE)
238
+ except Exception:
239
+ # Best-effort cleanup of stale tmp on failure
240
+ try:
241
+ _tmp.unlink(missing_ok=True)
242
+ except Exception:
243
+ pass
244
+ raise
245
+ return
246
+
247
+ # Fast path: patch existing index with updated sessions.
248
+ # This avoids loading every session file on every single save().
249
+ _fallback = False
250
+ try:
251
+ # Avoid N filesystem exists() checks under LOCK by collecting
252
+ # on-disk IDs once before entering the critical section.
253
+ on_disk_ids = _persisted_session_ids_snapshot()
254
+ with LOCK:
255
+ existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
256
+ in_memory_ids = set(SESSIONS.keys())
257
+
258
+ existing = [
259
+ e for e in existing
260
+ if (e.get('session_id') in in_memory_ids or e.get('session_id') in on_disk_ids)
261
+ ]
262
+
263
+ # Build lookup of updated entries
264
+ updated_map = {s.session_id: s.compact() for s in updates}
265
+ existing_ids = {e.get('session_id') for e in existing}
266
+ # Add any updated entries not yet in the index
267
+ for sid, entry in updated_map.items():
268
+ if sid not in existing_ids:
269
+ existing.append(entry)
270
+ # Replace matching entries in-place
271
+ for i, e in enumerate(existing):
272
+ sid = e.get('session_id')
273
+ if sid in updated_map:
274
+ existing[i] = updated_map[sid]
275
+ existing.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
276
+ _payload = json.dumps(existing, ensure_ascii=False, indent=2)
277
+
278
+ try:
279
+ with open(_tmp, 'w', encoding='utf-8') as f:
280
+ f.write(_payload)
281
+ f.flush()
282
+ os.fsync(f.fileno())
283
+ os.replace(_tmp, SESSION_INDEX_FILE)
284
+ except Exception:
285
+ try:
286
+ _tmp.unlink(missing_ok=True)
287
+ except Exception:
288
+ pass
289
+ raise
290
+ except Exception:
291
+ _fallback = True
292
+
293
+ if _fallback:
294
+ # Corrupt or missing index — fall back to full rebuild (called outside LOCK to avoid deadlock)
295
+ _write_session_index(updates=None)
296
+
297
+
298
+ def prune_session_from_index(session_id: str) -> None:
299
+ """Remove one session row from the persisted sidebar index if present."""
300
+ sid = str(session_id or "")
301
+ if not sid or not SESSION_INDEX_FILE.exists():
302
+ return
303
+ _tmp = SESSION_INDEX_FILE.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
304
+
305
+ _fallback = False
306
+ with _INDEX_WRITE_LOCK:
307
+ try:
308
+ with LOCK:
309
+ existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
310
+ if not isinstance(existing, list):
311
+ raise ValueError("session index must be a list")
312
+ pruned = [e for e in existing if e.get('session_id') != sid]
313
+ if len(pruned) == len(existing):
314
+ return
315
+ _payload = json.dumps(pruned, ensure_ascii=False, indent=2)
316
+
317
+ try:
318
+ with open(_tmp, 'w', encoding='utf-8') as f:
319
+ f.write(_payload)
320
+ f.flush()
321
+ os.fsync(f.fileno())
322
+ os.replace(_tmp, SESSION_INDEX_FILE)
323
+ except Exception:
324
+ try:
325
+ _tmp.unlink(missing_ok=True)
326
+ except Exception:
327
+ pass
328
+ raise
329
+ except Exception:
330
+ _fallback = True
331
+
332
+ if _fallback:
333
+ _write_session_index(updates=None)
334
+
335
+
336
+ def _active_stream_ids():
337
+ with STREAMS_LOCK:
338
+ active_ids = set(STREAMS.keys())
339
+ # STREAMS tracks the browser/SSE observation path. A worker can still be
340
+ # running after the SSE stream entry disappears (for example while a request
341
+ # is blocked in the provider, unwinding after cancel, or otherwise detached
342
+ # from the client). Treat ACTIVE_RUNS as authoritative for worker liveness so
343
+ # stale-pending repair does not append a misleading restart/interrupted
344
+ # marker while the agent turn is still in flight.
345
+ with _cfg.ACTIVE_RUNS_LOCK:
346
+ active_ids.update(_cfg.ACTIVE_RUNS.keys())
347
+ return active_ids
348
+
349
+
350
+ def _append_recovered_turn_to_context(session, recovered: dict) -> None:
351
+ context_messages = getattr(session, 'context_messages', None)
352
+ if not isinstance(context_messages, list) or not context_messages:
353
+ return
354
+ recovered_text = " ".join(str(recovered.get('content') or '').split())
355
+ if recovered_text:
356
+ for existing in reversed(context_messages[-8:]):
357
+ if not isinstance(existing, dict) or existing.get('role') != 'user':
358
+ continue
359
+ existing_text = " ".join(str(existing.get('content') or '').split())
360
+ if existing_text == recovered_text:
361
+ return
362
+ context_entry = {k: v for k, v in recovered.items() if k != 'timestamp'}
363
+ context_messages.append(context_entry)
364
+
365
+
366
+ def _append_recovered_pending_turn(session, *, timestamp: int | None = None) -> dict | None:
367
+ pending_text = str(session.pending_user_message or '')
368
+ if not pending_text:
369
+ return None
370
+ recovered_ts = int(time.time())
371
+ if isinstance(timestamp, (int, float)) and timestamp > 0:
372
+ recovered_ts = int(timestamp)
373
+ recovered: dict = {
374
+ 'role': 'user',
375
+ 'content': session.pending_user_message,
376
+ 'timestamp': recovered_ts,
377
+ '_recovered': True,
378
+ }
379
+ if session.pending_attachments:
380
+ recovered['attachments'] = list(session.pending_attachments)
381
+ session.messages.append(recovered)
382
+ _append_recovered_turn_to_context(session, recovered)
383
+ return recovered
384
+
385
+
386
+ def _is_streaming_session(active_stream_id, active_stream_ids):
387
+ return bool(active_stream_id and active_stream_id in active_stream_ids)
388
+
389
+ def _session_sort_timestamp(session):
390
+ if isinstance(session, dict):
391
+ return session.get('last_message_at') or session.get('updated_at') or 0
392
+ return _last_message_timestamp(getattr(session, 'messages', None)) or getattr(session, 'updated_at', 0) or 0
393
+
394
+
395
+ def _message_timestamp(message):
396
+ if not isinstance(message, dict):
397
+ return None
398
+ raw = message.get('_ts') or message.get('timestamp')
399
+ try:
400
+ return float(raw) if raw is not None else None
401
+ except (TypeError, ValueError):
402
+ return None
403
+
404
+
405
+ def _is_empty_partial_activity_message(message):
406
+ """Return True for cancelled/recovered activity rows with no reply text."""
407
+ if not isinstance(message, dict):
408
+ return False
409
+ if message.get('role') != 'assistant' or not message.get('_partial'):
410
+ return False
411
+ content = message.get('content', '')
412
+ if isinstance(content, str):
413
+ return not content.strip()
414
+ if isinstance(content, list):
415
+ for part in content:
416
+ if isinstance(part, dict):
417
+ if part.get('type') == 'text' and str(part.get('text') or part.get('content') or '').strip():
418
+ return False
419
+ continue
420
+ if str(part or '').strip():
421
+ return False
422
+ return True
423
+ return not str(content or '').strip()
424
+
425
+
426
+ def _last_message_timestamp(messages):
427
+ if not isinstance(messages, list):
428
+ return None
429
+ for message in reversed(messages):
430
+ if isinstance(message, dict) and message.get('role') == 'tool':
431
+ continue
432
+ if _is_empty_partial_activity_message(message):
433
+ continue
434
+ ts = _message_timestamp(message)
435
+ if ts:
436
+ return ts
437
+ return None
438
+
439
+
440
+ def _message_role(message):
441
+ if not isinstance(message, dict):
442
+ return ''
443
+ return str(message.get('role', '')).strip().lower()
444
+
445
+
446
+ def _find_top_level_json_key(text, key):
447
+ """Return the byte offset of a top-level JSON object key, if present."""
448
+ depth = 0
449
+ i = 0
450
+ n = len(text)
451
+ while i < n:
452
+ ch = text[i]
453
+ if ch == '"':
454
+ start = i
455
+ i += 1
456
+ escaped = False
457
+ chars = []
458
+ while i < n:
459
+ c = text[i]
460
+ if escaped:
461
+ chars.append(c)
462
+ escaped = False
463
+ elif c == '\\':
464
+ escaped = True
465
+ elif c == '"':
466
+ break
467
+ else:
468
+ chars.append(c)
469
+ i += 1
470
+ if i >= n:
471
+ return None
472
+ if depth == 1 and ''.join(chars) == key:
473
+ j = i + 1
474
+ while j < n and text[j] in ' \t\r\n':
475
+ j += 1
476
+ if j < n and text[j] == ':':
477
+ return start
478
+ elif ch in '{[':
479
+ depth += 1
480
+ elif ch in '}]':
481
+ depth -= 1
482
+ i += 1
483
+ return None
484
+
485
+
486
+ def _read_metadata_json_prefix(path, max_prefix_bytes=65536):
487
+ """Read only the metadata portion before the top-level messages array."""
488
+ buf = ''
489
+ with open(path, 'r', encoding='utf-8') as f:
490
+ while len(buf.encode('utf-8')) < max_prefix_bytes:
491
+ chunk = f.read(4096)
492
+ if not chunk:
493
+ return None
494
+ buf += chunk
495
+ messages_pos = _find_top_level_json_key(buf, 'messages')
496
+ if messages_pos is None:
497
+ continue
498
+ prefix = buf[:messages_pos].rstrip()
499
+ if prefix.endswith(','):
500
+ prefix = prefix[:-1].rstrip()
501
+ return f'{prefix}\n}}'
502
+ return None
503
+
504
+
505
+ def _lookup_index_message_count(session_id):
506
+ """Return the indexed message count without loading the full session file."""
507
+ try:
508
+ entries = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
509
+ except Exception:
510
+ return None
511
+ if not isinstance(entries, list):
512
+ return None
513
+ for entry in entries:
514
+ if entry.get('session_id') != session_id:
515
+ continue
516
+ count = entry.get('message_count')
517
+ if isinstance(count, int) and count >= 0:
518
+ return count
519
+ try:
520
+ count = int(count)
521
+ except (TypeError, ValueError):
522
+ return None
523
+ return count if count >= 0 else None
524
+ return None
525
+
526
+
527
+ def _parse_nonnegative_int(value):
528
+ if isinstance(value, int) and value >= 0:
529
+ return value
530
+ try:
531
+ parsed = int(value)
532
+ except (TypeError, ValueError):
533
+ return None
534
+ return parsed if parsed >= 0 else None
535
+
536
+
537
+ class Session:
538
+ def __init__(self, session_id: str=None, title: str='Untitled',
539
+ workspace=str(DEFAULT_WORKSPACE), model=DEFAULT_MODEL,
540
+ model_provider=None,
541
+ messages=None, created_at=None, updated_at=None,
542
+ tool_calls=None, pinned: bool=False, archived: bool=False,
543
+ project_id: str=None, profile=None,
544
+ input_tokens: int=0, output_tokens: int=0, estimated_cost=None,
545
+ cache_read_tokens: int=0, cache_write_tokens: int=0,
546
+ personality=None,
547
+ active_stream_id: str=None,
548
+ pending_user_message: str=None,
549
+ pending_attachments=None,
550
+ pending_started_at=None,
551
+ context_messages=None,
552
+ compression_anchor_visible_idx=None,
553
+ compression_anchor_message_key=None,
554
+ compression_anchor_summary=None,
555
+ pre_compression_snapshot: bool=False,
556
+ context_engine=None,
557
+ compression_anchor_engine=None,
558
+ compression_anchor_mode=None,
559
+ compression_anchor_details=None,
560
+ context_engine_state=None,
561
+ context_length=None, threshold_tokens=None,
562
+ last_prompt_tokens=None,
563
+ truncation_watermark=None,
564
+ gateway_routing=None, gateway_routing_history=None,
565
+ llm_title_generated: bool=False,
566
+ parent_session_id: str=None,
567
+ worktree_path=None,
568
+ worktree_branch=None,
569
+ worktree_repo_root=None,
570
+ worktree_created_at=None,
571
+ enabled_toolsets=None,
572
+ composer_draft=None,
573
+ **kwargs):
574
+ self.session_id = session_id or uuid.uuid4().hex[:12]
575
+ self.title = title
576
+ self.workspace = str(Path(workspace).expanduser().resolve())
577
+ self.model = model
578
+ self.model_provider = str(model_provider).strip().lower() if model_provider else None
579
+ self.messages = messages or []
580
+ self.tool_calls = tool_calls or []
581
+ self.created_at = created_at or time.time()
582
+ self.updated_at = updated_at or time.time()
583
+ self.pinned = bool(pinned)
584
+ self.archived = bool(archived)
585
+ self.project_id = project_id or None
586
+ self.profile = profile
587
+ self.input_tokens = input_tokens or 0
588
+ self.output_tokens = output_tokens or 0
589
+ self.estimated_cost = estimated_cost
590
+ self.cache_read_tokens = cache_read_tokens or 0
591
+ self.cache_write_tokens = cache_write_tokens or 0
592
+ self.personality = personality
593
+ self.active_stream_id = active_stream_id
594
+ self.pending_user_message = pending_user_message
595
+ self.pending_attachments = pending_attachments or []
596
+ self.pending_started_at = pending_started_at
597
+ self.context_messages = context_messages if isinstance(context_messages, list) else []
598
+ self.compression_anchor_visible_idx = compression_anchor_visible_idx
599
+ self.compression_anchor_message_key = compression_anchor_message_key
600
+ self.compression_anchor_summary = compression_anchor_summary
601
+ self.pre_compression_snapshot = bool(pre_compression_snapshot)
602
+ self.context_engine = context_engine
603
+ self.compression_anchor_engine = compression_anchor_engine
604
+ self.compression_anchor_mode = compression_anchor_mode
605
+ self.compression_anchor_details = compression_anchor_details if isinstance(compression_anchor_details, dict) else {}
606
+ self.context_engine_state = context_engine_state if isinstance(context_engine_state, dict) else {}
607
+ self.context_length = context_length
608
+ self.threshold_tokens = threshold_tokens
609
+ self.last_prompt_tokens = last_prompt_tokens
610
+ self.truncation_watermark = truncation_watermark
611
+ self.gateway_routing = gateway_routing if isinstance(gateway_routing, dict) else None
612
+ self.gateway_routing_history = gateway_routing_history if isinstance(gateway_routing_history, list) else []
613
+ self.llm_title_generated = bool(llm_title_generated)
614
+ self.parent_session_id = parent_session_id
615
+ self.worktree_path = str(Path(worktree_path).expanduser().resolve()) if worktree_path else None
616
+ self.worktree_branch = str(worktree_branch) if worktree_branch else None
617
+ self.worktree_repo_root = str(Path(worktree_repo_root).expanduser().resolve()) if worktree_repo_root else None
618
+ self.worktree_created_at = worktree_created_at
619
+ self.is_cli_session = bool(kwargs.get('is_cli_session', False))
620
+ self.source_tag = kwargs.get('source_tag')
621
+ self.raw_source = kwargs.get('raw_source')
622
+ self.session_source = kwargs.get('session_source')
623
+ self.source_label = kwargs.get('source_label')
624
+ self.read_only = bool(kwargs.get('read_only', False))
625
+ self.enabled_toolsets = enabled_toolsets # List[str] or None — per-session toolset override
626
+ self.composer_draft = composer_draft if isinstance(composer_draft, dict) else {}
627
+ raw_message_count = kwargs.get('message_count')
628
+ parsed_message_count = None
629
+ if raw_message_count is not None:
630
+ try:
631
+ parsed_message_count = int(raw_message_count)
632
+ except (TypeError, ValueError):
633
+ parsed_message_count = None
634
+ self._metadata_message_count = parsed_message_count if parsed_message_count is not None and parsed_message_count >= 0 else None
635
+
636
+ @property
637
+ def path(self):
638
+ return SESSION_DIR / f'{self.session_id}.json'
639
+
640
+ def _maybe_clear_truncation_watermark(self) -> None:
641
+ watermark = _message_timestamp_as_float({"timestamp": self.truncation_watermark})
642
+ if watermark is None:
643
+ return
644
+ max_message_timestamp = None
645
+ for msg in self.messages or []:
646
+ timestamp = _message_timestamp_as_float(msg)
647
+ if timestamp is not None:
648
+ max_message_timestamp = timestamp if max_message_timestamp is None else max(max_message_timestamp, timestamp)
649
+ if max_message_timestamp is not None and max_message_timestamp > watermark:
650
+ self.truncation_watermark = None
651
+
652
+ def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
653
+ if not is_safe_session_id(self.session_id):
654
+ raise ValueError(f"Unsafe session_id {self.session_id!r}; refusing to write outside session store")
655
+ # ── #1558 P0 guard ──────────────────────────────────────────────
656
+ # Refuse to save a session that was loaded with metadata_only=True.
657
+ # Such sessions have messages=[] (it's the whole point of the partial
658
+ # load), and save() unconditionally writes self.messages to disk via
659
+ # an atomic os.replace(). Saving a metadata-only stub thus wipes the
660
+ # full conversation history — which is exactly the v0.50.279
661
+ # _clear_stale_stream_state() regression that lost users 1000+
662
+ # message conversations. Any caller that needs to mutate persisted
663
+ # fields on a metadata-only session must reload with
664
+ # metadata_only=False first.
665
+ if getattr(self, '_loaded_metadata_only', False):
666
+ raise RuntimeError(
667
+ f"Refusing to save metadata-only session {self.session_id!r}: "
668
+ f"would atomically overwrite on-disk messages with []. "
669
+ f"Reload with metadata_only=False before mutating state. "
670
+ f"See #1558."
671
+ )
672
+ if touch_updated_at:
673
+ self.updated_at = time.time()
674
+ self._maybe_clear_truncation_watermark()
675
+ # Write metadata fields first so load_metadata_only() can read them
676
+ # without parsing the full messages array (which may be 400KB+).
677
+ # Fields are listed in the order they should appear in the JSON file.
678
+ METADATA_FIELDS = [
679
+ 'session_id', 'title', 'workspace', 'model', 'model_provider', 'created_at', 'updated_at',
680
+ 'pinned', 'archived', 'project_id', 'profile',
681
+ 'input_tokens', 'output_tokens', 'estimated_cost',
682
+ 'cache_read_tokens', 'cache_write_tokens',
683
+ 'personality', 'active_stream_id',
684
+ 'pending_user_message', 'pending_attachments', 'pending_started_at',
685
+ 'compression_anchor_visible_idx', 'compression_anchor_message_key',
686
+ 'compression_anchor_summary', 'pre_compression_snapshot',
687
+ 'context_engine', 'compression_anchor_engine', 'compression_anchor_mode',
688
+ 'compression_anchor_details', 'context_engine_state',
689
+ 'context_length', 'threshold_tokens', 'last_prompt_tokens',
690
+ 'truncation_watermark',
691
+ 'gateway_routing', 'gateway_routing_history', 'llm_title_generated',
692
+ 'parent_session_id',
693
+ 'worktree_path', 'worktree_branch', 'worktree_repo_root', 'worktree_created_at',
694
+ 'is_cli_session', 'source_tag', 'raw_source', 'session_source', 'source_label', 'read_only',
695
+ 'enabled_toolsets', 'composer_draft',
696
+ ]
697
+ meta = {k: getattr(self, k, None) for k in METADATA_FIELDS}
698
+ meta['message_count'] = len(self.messages or [])
699
+ meta['messages'] = self.messages
700
+ meta['tool_calls'] = self.tool_calls
701
+ # Fields not in METADATA_FIELDS (e.g. last_usage) go at the end
702
+ extra = {k: v for k, v in self.__dict__.items()
703
+ if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')
704
+ and not k.startswith('_')}
705
+ payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2)
706
+
707
+ # ── #1558 backup safeguard ──────────────────────────────────────
708
+ # Before overwriting the session file, copy the previous version to
709
+ # ``<sid>.json.bak`` IFF the previous file has more messages than the
710
+ # incoming payload. The asymmetric guard means:
711
+ # * Normal grow-the-conversation saves never produce a backup
712
+ # (incoming messages >= existing) — keeps disk overhead near zero.
713
+ # * Any save that would shrink the messages array (the failure mode
714
+ # of #1558, plus anything similar in the future) leaves a recoverable
715
+ # snapshot of the pre-shrink state on disk.
716
+ # The recovery path is api/session_recovery.py — at server startup and
717
+ # via /api/session/recover, sessions whose JSON has fewer messages than
718
+ # their .bak get restored automatically.
719
+ try:
720
+ if self.path.exists():
721
+ existing_text = self.path.read_text(encoding='utf-8')
722
+ try:
723
+ existing = json.loads(existing_text)
724
+ existing_msg_count = len(existing.get('messages') or [])
725
+ except (json.JSONDecodeError, ValueError):
726
+ existing_msg_count = -1 # corrupt → always back up
727
+ incoming_msg_count = len(self.messages or [])
728
+ if existing_msg_count > incoming_msg_count:
729
+ bak_path = self.path.with_suffix('.json.bak')
730
+ # SHOULD-FIX #2 (Opus): atomic write via tmp+replace,
731
+ # mirroring the main save() pattern below. Prevents a
732
+ # torn .bak from a crash mid-write or a concurrent
733
+ # backup-producing save. Recovery defends against a
734
+ # torn .bak (JSONDecodeError → no_action), so the
735
+ # failure mode pre-fix was "backup is lost"; with
736
+ # this fix the backup either lands cleanly or doesn't
737
+ # land at all.
738
+ try:
739
+ bak_tmp = bak_path.with_suffix(
740
+ f'.bak.tmp.{os.getpid()}.{threading.current_thread().ident}'
741
+ )
742
+ with open(bak_tmp, 'w', encoding='utf-8') as bf:
743
+ bf.write(existing_text)
744
+ bf.flush()
745
+ os.fsync(bf.fileno())
746
+ os.replace(bak_tmp, bak_path)
747
+ except OSError:
748
+ # Backup is best-effort; main save proceeds regardless.
749
+ try:
750
+ bak_tmp.unlink(missing_ok=True)
751
+ except Exception:
752
+ pass
753
+ except OSError:
754
+ pass
755
+
756
+ tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
757
+ try:
758
+ with open(tmp, 'w', encoding='utf-8') as f:
759
+ f.write(payload)
760
+ f.flush()
761
+ os.fsync(f.fileno())
762
+ os.replace(tmp, self.path)
763
+ except Exception:
764
+ try:
765
+ tmp.unlink(missing_ok=True)
766
+ except Exception:
767
+ pass
768
+ raise
769
+ if not skip_index:
770
+ _write_session_index(updates=[self])
771
+
772
+ @classmethod
773
+ def load(cls, sid):
774
+ # Validate session ID format to prevent path traversal. API/gateway
775
+ # session ids may contain hyphens (for example ``api-*`` and
776
+ # ``reachy-voice-*``); allow those but still reject dots/slashes.
777
+ if not is_safe_session_id(sid):
778
+ return None
779
+ p = SESSION_DIR / f'{sid}.json'
780
+ if not p.exists():
781
+ return None
782
+ data = json.loads(p.read_text(encoding='utf-8'))
783
+ data['messages'], _collapsed_partials = _collapse_adjacent_duplicate_partials(data.get('messages'))
784
+ session = cls(**data)
785
+ if _collapsed_partials:
786
+ try:
787
+ # Self-heal bloated sessions on first full load without touching
788
+ # recency/index ordering; save() creates a .bak because this
789
+ # intentionally shrinks the transcript (#2592).
790
+ session.save(touch_updated_at=False, skip_index=True)
791
+ except Exception:
792
+ logger.debug("Failed to persist collapsed duplicate partials for %s", sid, exc_info=True)
793
+ return session
794
+
795
+ @classmethod
796
+ def load_metadata_only(cls, sid):
797
+ """Load only the compact metadata fields, skipping the messages array.
798
+
799
+ Session JSON files have metadata fields (session_id, title, model, etc.)
800
+ at the top level, before the large messages array. Read only up to the
801
+ top-level "messages" field and synthesize a small metadata-only object.
802
+ Falls back to load() for legacy or unexpected file layouts.
803
+ """
804
+ # Same path-safety contract as load(): hyphens are valid session ids,
805
+ # path separators and traversal dots are not.
806
+ if not is_safe_session_id(sid):
807
+ return None
808
+ p = SESSION_DIR / f'{sid}.json'
809
+ if not p.exists():
810
+ return None
811
+ try:
812
+ prefix = _read_metadata_json_prefix(p)
813
+ if not prefix:
814
+ return cls.load(sid)
815
+ parsed = json.loads(prefix)
816
+ needed = {'session_id', 'title', 'created_at', 'updated_at'}
817
+ if not needed.issubset(parsed.keys()):
818
+ return cls.load(sid)
819
+ parsed['messages'] = []
820
+ parsed['tool_calls'] = []
821
+ session = cls(**parsed)
822
+ sidecar_message_count = _parse_nonnegative_int(parsed.get('message_count'))
823
+ index_message_count = None
824
+ if sidecar_message_count is None:
825
+ index_message_count = _lookup_index_message_count(sid)
826
+ # Modern sidecars carry an accurate message_count, so it is the
827
+ # source of truth and we skip the per-row _index.json read in the
828
+ # common case. The sidebar index is only a cache (it can lag behind
829
+ # external sidecar appends/backfills), so consult it solely as a
830
+ # fallback when the sidecar has no count. When both are present we
831
+ # still take the largest known count as a defensive measure.
832
+ known_counts = [
833
+ count for count in (index_message_count, sidecar_message_count)
834
+ if count is not None
835
+ ]
836
+ session._metadata_message_count = max(known_counts) if known_counts else None
837
+ # Mark this session as a metadata-only stub. save() refuses to write
838
+ # such a session because doing so would atomically replace the
839
+ # on-disk JSON with messages=[], wiping the conversation. Any
840
+ # caller that needs to mutate persisted state on a metadata-only
841
+ # session must reload it with metadata_only=False first.
842
+ # See #1558 — v0.50.279 _clear_stale_stream_state() data-loss bug.
843
+ session._loaded_metadata_only = True
844
+ return session
845
+ except Exception:
846
+ # Corrupt prefix or decode error — fall back to full load
847
+ return cls.load(sid)
848
+
849
+ def compact(self, include_runtime=False, active_stream_ids=None) -> dict:
850
+ active_stream_ids = active_stream_ids if active_stream_ids is not None else set()
851
+ has_pending_user_message = bool(self.pending_user_message)
852
+ message_count = (
853
+ self._metadata_message_count
854
+ if self._metadata_message_count is not None
855
+ else len(self.messages)
856
+ )
857
+ if has_pending_user_message:
858
+ message_count = max(message_count, 1)
859
+ last_message_at = _last_message_timestamp(self.messages) or self.updated_at
860
+ if has_pending_user_message and self.pending_started_at:
861
+ last_message_at = self.pending_started_at
862
+ return {
863
+ 'session_id': self.session_id,
864
+ 'title': self.title,
865
+ 'workspace': self.workspace,
866
+ 'model': self.model,
867
+ 'model_provider': self.model_provider,
868
+ 'message_count': message_count,
869
+ 'created_at': self.created_at,
870
+ 'updated_at': self.updated_at,
871
+ 'last_message_at': last_message_at,
872
+ 'pinned': self.pinned,
873
+ 'archived': self.archived,
874
+ 'project_id': self.project_id,
875
+ 'profile': self.profile,
876
+ 'input_tokens': self.input_tokens,
877
+ 'output_tokens': self.output_tokens,
878
+ 'estimated_cost': self.estimated_cost,
879
+ 'cache_read_tokens': self.cache_read_tokens,
880
+ 'cache_write_tokens': self.cache_write_tokens,
881
+ 'cache_hit_percent': prompt_cache_hit_percent(self.cache_read_tokens, self.input_tokens),
882
+ 'personality': self.personality,
883
+ 'compression_anchor_visible_idx': self.compression_anchor_visible_idx,
884
+ 'compression_anchor_message_key': self.compression_anchor_message_key,
885
+ 'compression_anchor_summary': self.compression_anchor_summary,
886
+ 'pre_compression_snapshot': self.pre_compression_snapshot,
887
+ 'context_engine': self.context_engine,
888
+ 'compression_anchor_engine': self.compression_anchor_engine,
889
+ 'compression_anchor_mode': self.compression_anchor_mode,
890
+ 'compression_anchor_details': self.compression_anchor_details,
891
+ 'context_engine_state': self.context_engine_state,
892
+ 'context_length': self.context_length,
893
+ 'threshold_tokens': self.threshold_tokens,
894
+ 'last_prompt_tokens': self.last_prompt_tokens,
895
+ 'gateway_routing': self.gateway_routing,
896
+ 'gateway_routing_history': self.gateway_routing_history,
897
+ # Only emit 'parent_session_id' when set (the /branch fork link, #1342).
898
+ # Sessions without a fork must not leak None — see test_session_lineage_metadata_api.
899
+ **({'parent_session_id': self.parent_session_id} if self.parent_session_id else {}),
900
+ **({
901
+ 'worktree_path': self.worktree_path,
902
+ 'worktree_branch': self.worktree_branch,
903
+ 'worktree_repo_root': self.worktree_repo_root,
904
+ 'worktree_created_at': self.worktree_created_at,
905
+ } if self.worktree_path else {}),
906
+ 'user_message_count': sum(
907
+ 1 for message in self.messages if _message_role(message) == 'user'
908
+ ) if isinstance(self.messages, list) else 0,
909
+ 'active_stream_id': self.active_stream_id,
910
+ 'pending_user_message': self.pending_user_message,
911
+ 'has_pending_user_message': has_pending_user_message,
912
+ 'is_cli_session': self.is_cli_session,
913
+ 'source_tag': self.source_tag,
914
+ 'raw_source': self.raw_source,
915
+ 'session_source': self.session_source,
916
+ 'source_label': self.source_label,
917
+ 'read_only': self.read_only,
918
+ 'enabled_toolsets': self.enabled_toolsets,
919
+ 'composer_draft': self.composer_draft if isinstance(self.composer_draft, dict) else {},
920
+ 'is_streaming': _is_streaming_session(
921
+ self.active_stream_id, active_stream_ids
922
+ ) if include_runtime else False,
923
+ }
924
+
925
+ def _get_profile_home(profile) -> Path:
926
+ """Resolve the hermes agent home directory for the given profile.
927
+
928
+ Prefers the profile-specific helper from api.profiles; falls back to the
929
+ HERMES_HOME environment variable or ~/.hermes, expanding ~ correctly.
930
+ """
931
+ try:
932
+ from api.profiles import get_hermes_home_for_profile
933
+ return Path(get_hermes_home_for_profile(profile))
934
+ except ImportError:
935
+ return Path(os.environ.get('HERMES_HOME') or '~/.hermes').expanduser()
936
+
937
+
938
+ _INTERRUPTED_RECOVERED_WORDING = (
939
+ '**Response interrupted.**\n\n'
940
+ 'The live response stream stopped before this turn finished. '
941
+ 'The partial output above was recovered from the run journal, '
942
+ 'but the interrupted agent process could not continue.'
943
+ )
944
+ _INTERRUPTED_NO_OUTPUT_WORDING = (
945
+ '**Response interrupted.**\n\n'
946
+ 'The live response stream stopped before this turn finished. '
947
+ 'The user message above was preserved, but no agent output was recovered.'
948
+ )
949
+ _INTERRUPTED_PENDING_RETRY_WORDING = (
950
+ '**Response interrupted.**\n\n'
951
+ 'The live response stream stopped before this turn finished. '
952
+ 'Recovering the partial output from the run journal — '
953
+ 'reload this session to retry.'
954
+ )
955
+ # Neutral wording used when the lazy retry path gives up (max attempts reached
956
+ # or the marker has been pending longer than _JOURNAL_RETRY_GIVEUP_SECONDS).
957
+ _INTERRUPTED_NEUTRAL_WORDING = (
958
+ '**Response interrupted.**\n\n'
959
+ 'The live response stream stopped before this turn finished. '
960
+ 'Partial output may have been lost.'
961
+ )
962
+
963
+ _INTERRUPTION_CAUSE_DETAILS = {
964
+ 'process_restart': (
965
+ 'Evidence: the WebUI process started after this turn began, so this '
966
+ 'looks like a real process crash or restart.'
967
+ ),
968
+ 'stream_run_split_brain': (
969
+ 'Evidence: the browser response stream was gone but the worker registry '
970
+ 'still listed the run. This is a stream/run bookkeeping split-brain.'
971
+ ),
972
+ 'lost_worker_bookkeeping': (
973
+ 'Evidence: the stream was gone and worker bookkeeping no longer had an '
974
+ 'active run for it. This usually means the worker state was lost or '
975
+ 'cleaned up without a terminal event.'
976
+ ),
977
+ 'unknown': (
978
+ 'Evidence: the stream stopped, but the WebUI could not classify the '
979
+ 'interruption more precisely.'
980
+ ),
981
+ }
982
+
983
+
984
+ def _classify_interruption_cause(
985
+ *, stream_id: str | None = None, pending_started_at=None,
986
+ ) -> str:
987
+ """Classify the stale live-response state without overstating certainty."""
988
+ try:
989
+ started = float(pending_started_at) if pending_started_at else None
990
+ except (TypeError, ValueError):
991
+ started = None
992
+
993
+ if started is not None:
994
+ try:
995
+ if float(getattr(_cfg, 'SERVER_START_TIME', 0.0) or 0.0) > started:
996
+ return 'process_restart'
997
+ except (TypeError, ValueError):
998
+ pass
999
+
1000
+ if stream_id:
1001
+ try:
1002
+ with _cfg.ACTIVE_RUNS_LOCK:
1003
+ if str(stream_id) in _cfg.ACTIVE_RUNS:
1004
+ return 'stream_run_split_brain'
1005
+ except Exception:
1006
+ pass
1007
+ return 'lost_worker_bookkeeping'
1008
+
1009
+ return 'unknown'
1010
+
1011
+
1012
+ def _interrupted_content_for(
1013
+ *, recovered_output: bool, pending_retry: bool, interruption_cause: str,
1014
+ ) -> str:
1015
+ if recovered_output:
1016
+ outcome = (
1017
+ 'The partial output above was recovered from the run journal, '
1018
+ 'but the interrupted agent process could not continue.'
1019
+ )
1020
+ elif pending_retry:
1021
+ outcome = (
1022
+ 'Recovering the partial output from the run journal — '
1023
+ 'reload this session to retry.'
1024
+ )
1025
+ else:
1026
+ outcome = 'The user message above was preserved, but no agent output was recovered.'
1027
+ cause_detail = _INTERRUPTION_CAUSE_DETAILS.get(
1028
+ interruption_cause,
1029
+ _INTERRUPTION_CAUSE_DETAILS['unknown'],
1030
+ )
1031
+ return (
1032
+ '**Response interrupted.**\n\n'
1033
+ 'The live response stream stopped before this turn finished. '
1034
+ f'{cause_detail} {outcome}'
1035
+ )
1036
+
1037
+
1038
+ def _interrupted_recovery_marker(
1039
+ *,
1040
+ recovered_output: bool = False,
1041
+ pending_retry: bool = False,
1042
+ stream_id: str | None = None,
1043
+ pending_started_at=None,
1044
+ ) -> dict:
1045
+ """Build the standard interrupted-turn marker.
1046
+
1047
+ ``recovered_output=True`` means the run journal already yielded visible
1048
+ text on this repair pass — the marker advertises that the partial output
1049
+ has been recovered.
1050
+
1051
+ ``pending_retry=True`` is the lazy-retry hook: the journal was unreadable
1052
+ on this pass (page-cache loss, un-fsynced writes on slow FS, etc.). The
1053
+ marker carries a ``_pending_journal_recovery`` flag so a later
1054
+ ``get_session()`` can re-attempt recovery without baking a permanent
1055
+ "no output" claim into the transcript.
1056
+
1057
+ The two are mutually exclusive; ``recovered_output`` wins if both are
1058
+ set so the caller cannot accidentally re-arm retry on a successful
1059
+ repair.
1060
+ """
1061
+ interruption_cause = _classify_interruption_cause(
1062
+ stream_id=stream_id,
1063
+ pending_started_at=pending_started_at,
1064
+ )
1065
+ content = _interrupted_content_for(
1066
+ recovered_output=recovered_output,
1067
+ pending_retry=pending_retry,
1068
+ interruption_cause=interruption_cause,
1069
+ )
1070
+ marker = {
1071
+ 'role': 'assistant',
1072
+ 'content': content,
1073
+ 'timestamp': int(time.time()),
1074
+ '_error': True,
1075
+ 'type': 'interrupted',
1076
+ 'interruption_cause': interruption_cause,
1077
+ }
1078
+ if pending_retry and not recovered_output:
1079
+ marker['_pending_journal_recovery'] = True
1080
+ return marker
1081
+
1082
+
1083
+ def _truncate_journal_tool_args(args, limit: int = 4) -> dict:
1084
+ if not isinstance(args, dict):
1085
+ return {}
1086
+ out = {}
1087
+ for key, value in list(args.items())[:limit]:
1088
+ text = str(value)
1089
+ out[str(key)] = text[:120] + ('...' if len(text) > 120 else '')
1090
+ return out
1091
+
1092
+
1093
+ def _normalize_journal_recovery_text(value) -> str:
1094
+ return " ".join(str(value or "").split())
1095
+
1096
+
1097
+ def _partial_message_signature(message: dict) -> tuple:
1098
+ """Return a stable identity for partial assistant markers recovered on load."""
1099
+ if not isinstance(message, dict):
1100
+ return ('', '', ())
1101
+ tool_sig = []
1102
+ for tool_call in message.get('_partial_tool_calls') or []:
1103
+ if not isinstance(tool_call, dict):
1104
+ continue
1105
+ try:
1106
+ args_sig = json.dumps(
1107
+ tool_call.get('args') or {},
1108
+ ensure_ascii=False,
1109
+ sort_keys=True,
1110
+ default=str,
1111
+ )
1112
+ except Exception:
1113
+ args_sig = str(tool_call.get('args') or '')
1114
+ tool_sig.append((
1115
+ str(tool_call.get('name') or ''),
1116
+ args_sig,
1117
+ bool(tool_call.get('done', False)),
1118
+ bool(tool_call.get('is_error', False)),
1119
+ str(tool_call.get('preview') or tool_call.get('snippet') or ''),
1120
+ ))
1121
+ return (
1122
+ str(message.get('content') or '').strip(),
1123
+ str(message.get('reasoning') or '').strip(),
1124
+ tuple(tool_sig),
1125
+ )
1126
+
1127
+
1128
+ def _collapse_adjacent_duplicate_partials(messages) -> tuple[list, bool]:
1129
+ """Collapse repeated identical partial markers from the same failed turn."""
1130
+ if not isinstance(messages, list):
1131
+ return messages, False
1132
+ collapsed = []
1133
+ changed = False
1134
+ previous_partial_sig = None
1135
+ for message in messages:
1136
+ if isinstance(message, dict) and message.get('_partial'):
1137
+ sig = _partial_message_signature(message)
1138
+ if previous_partial_sig == sig:
1139
+ changed = True
1140
+ continue
1141
+ previous_partial_sig = sig
1142
+ else:
1143
+ previous_partial_sig = None
1144
+ collapsed.append(message)
1145
+ return collapsed, changed
1146
+
1147
+
1148
+ def _find_existing_assistant_for_journal_content(session, content: str) -> int | None:
1149
+ candidate = _normalize_journal_recovery_text(content)
1150
+ if not candidate:
1151
+ return None
1152
+ for idx, message in enumerate(session.messages or []):
1153
+ if not isinstance(message, dict) or message.get('role') != 'assistant':
1154
+ continue
1155
+ if message.get('_error'):
1156
+ continue
1157
+ existing = _normalize_journal_recovery_text(message.get('content'))
1158
+ if not existing:
1159
+ continue
1160
+ if existing == candidate:
1161
+ return idx
1162
+ if len(candidate) >= 24 and candidate in existing:
1163
+ return idx
1164
+ return None
1165
+
1166
+
1167
+ def _journal_tool_already_present(
1168
+ session,
1169
+ name: str,
1170
+ preview: str,
1171
+ *,
1172
+ stream_id: str | None = None,
1173
+ ) -> bool:
1174
+ """Return True when an equivalent tool card already exists.
1175
+
1176
+ Matching rule:
1177
+
1178
+ * If the existing tool card carries ``_recovered_stream_id``, that means a
1179
+ previous journal-recovery run materialized it. The retry can safely
1180
+ collapse against it only when both stream ids match — otherwise a
1181
+ legitimately-repeated tool (e.g. a second ``terminal: ls`` in a
1182
+ different turn) would be dropped.
1183
+ * If the existing tool card has no ``_recovered_stream_id`` (a live tool
1184
+ card, or a tool card carried over from a core transcript that pre-dates
1185
+ stream-id tagging), the legacy name+preview match still wins. This
1186
+ preserves the "core transcript already has this tool, don't duplicate
1187
+ it" invariant the original repair path established.
1188
+ * When ``stream_id`` is omitted, the helper degrades cleanly to its
1189
+ pre-fix session-wide behaviour.
1190
+ """
1191
+ candidate_name = str(name or '')
1192
+ candidate_preview = _normalize_journal_recovery_text(preview)
1193
+ candidate_stream = str(stream_id) if stream_id else None
1194
+ for tool_call in session.tool_calls or []:
1195
+ if not isinstance(tool_call, dict):
1196
+ continue
1197
+ if str(tool_call.get('name') or '') != candidate_name:
1198
+ continue
1199
+ existing_preview = _normalize_journal_recovery_text(
1200
+ tool_call.get('preview') or tool_call.get('snippet') or ''
1201
+ )
1202
+ if existing_preview != candidate_preview:
1203
+ continue
1204
+ if candidate_stream is not None:
1205
+ existing_stream = tool_call.get('_recovered_stream_id')
1206
+ # A tool card explicitly tagged with a recovered_stream_id that
1207
+ # differs from ours belongs to another retry's turn — don't let
1208
+ # it pre-empt this retry. Untagged tool cards (live or carried
1209
+ # over from the core transcript) still match.
1210
+ if existing_stream and str(existing_stream) != candidate_stream:
1211
+ continue
1212
+ return True
1213
+ return False
1214
+
1215
+
1216
+ def _run_journal_has_visible_output(session, stream_id: str | None) -> bool:
1217
+ if not stream_id:
1218
+ return False
1219
+ try:
1220
+ from api.run_journal import read_run_events
1221
+ journal = read_run_events(session.session_id, stream_id)
1222
+ except Exception:
1223
+ return False
1224
+ for event in journal.get('events') or []:
1225
+ if not isinstance(event, dict):
1226
+ continue
1227
+ event_name = str(event.get('event') or event.get('type') or '')
1228
+ payload = event.get('payload') if isinstance(event.get('payload'), dict) else {}
1229
+ if event_name == 'token' and str(payload.get('text') or ''):
1230
+ return True
1231
+ if event_name == 'interim_assistant':
1232
+ if payload.get('already_streamed'):
1233
+ continue
1234
+ if str(payload.get('text') or '').strip():
1235
+ return True
1236
+ if event_name == 'tool':
1237
+ return True
1238
+ return False
1239
+
1240
+
1241
+ def _run_journal_terminal_state(session, stream_id: str | None) -> str | None:
1242
+ if not stream_id:
1243
+ return None
1244
+ try:
1245
+ from api.run_journal import latest_run_summary
1246
+ summary = latest_run_summary(session.session_id, stream_id)
1247
+ except Exception:
1248
+ return None
1249
+ if not summary.get('terminal'):
1250
+ return None
1251
+ return str(summary.get('terminal_state') or '') or None
1252
+
1253
+
1254
+ def _journal_is_still_arriving(session, stream_id: str | None) -> bool:
1255
+ """Return True for journals that may become visible on a later read.
1256
+
1257
+ `read_run_events()` deliberately collapses missing files and empty files
1258
+ into an empty event list, so the lazy retry path needs a small filesystem
1259
+ visibility check to avoid burning all retry attempts while WSL2 / network
1260
+ filesystems are still surfacing the journal. Non-empty journals are treated
1261
+ as sealed enough for retry-budget accounting; if they contain no visible
1262
+ output, the normal capped give-up path handles them.
1263
+ """
1264
+ if not stream_id:
1265
+ return False
1266
+ try:
1267
+ from api.run_journal import _run_path, latest_run_summary
1268
+
1269
+ path = _run_path(session.session_id, stream_id)
1270
+ summary = latest_run_summary(session.session_id, stream_id)
1271
+ if summary.get('terminal'):
1272
+ return False
1273
+ try:
1274
+ return (not path.exists()) or path.stat().st_size == 0
1275
+ except OSError:
1276
+ return True
1277
+ except Exception:
1278
+ logger.debug(
1279
+ "Session %s: failed to classify journal visibility for stream %s",
1280
+ getattr(session, 'session_id', '?'),
1281
+ stream_id,
1282
+ exc_info=True,
1283
+ )
1284
+ return False
1285
+
1286
+
1287
+ def _append_journaled_partial_output(
1288
+ session,
1289
+ stream_id: str | None,
1290
+ *,
1291
+ dedupe_existing: bool = False,
1292
+ ) -> bool:
1293
+ """Recover already-emitted visible output from a dead stream journal.
1294
+
1295
+ This repair path is intentionally conservative: it restores user-visible
1296
+ assistant text and tool-card metadata that had already been emitted over
1297
+ SSE before the WebUI process died. It does not restore hidden reasoning and
1298
+ it does not try to continue execution.
1299
+ """
1300
+ if not stream_id:
1301
+ return False
1302
+
1303
+ try:
1304
+ from api.run_journal import read_run_events
1305
+ journal = read_run_events(session.session_id, stream_id)
1306
+ except Exception:
1307
+ logger.debug(
1308
+ "Session %s: failed to read run journal for stream %s",
1309
+ getattr(session, 'session_id', '?'),
1310
+ stream_id,
1311
+ exc_info=True,
1312
+ )
1313
+ return False
1314
+
1315
+ events = [event for event in journal.get('events') or [] if isinstance(event, dict)]
1316
+ if not events:
1317
+ return False
1318
+
1319
+ appended_any = False
1320
+ assistant_parts: list[str] = []
1321
+ assistant_started_at: float | None = None
1322
+ current_assistant_idx: int | None = None
1323
+ recovered_tool_calls: list[dict] = []
1324
+
1325
+ def flush_assistant() -> int | None:
1326
+ nonlocal appended_any, assistant_parts, assistant_started_at, current_assistant_idx
1327
+ content = ''.join(assistant_parts).strip()
1328
+ assistant_parts = []
1329
+ if not content:
1330
+ return current_assistant_idx
1331
+ if dedupe_existing:
1332
+ existing_idx = _find_existing_assistant_for_journal_content(session, content)
1333
+ if existing_idx is not None:
1334
+ current_assistant_idx = existing_idx
1335
+ assistant_started_at = None
1336
+ return existing_idx
1337
+ timestamp = int(assistant_started_at or time.time())
1338
+ session.messages.append({
1339
+ 'role': 'assistant',
1340
+ 'content': content,
1341
+ 'timestamp': timestamp,
1342
+ '_recovered_from_run_journal': True,
1343
+ '_recovered_stream_id': stream_id,
1344
+ })
1345
+ current_assistant_idx = len(session.messages) - 1
1346
+ assistant_started_at = None
1347
+ appended_any = True
1348
+ return current_assistant_idx
1349
+
1350
+ def ensure_assistant_anchor(created_at: float | None = None) -> int:
1351
+ nonlocal appended_any, current_assistant_idx
1352
+ idx = flush_assistant()
1353
+ if idx is not None:
1354
+ return idx
1355
+ # A stream can start with tools before any text. Keep those tools
1356
+ # visible after restart with an empty recovered assistant anchor instead
1357
+ # of inventing synthetic progress prose.
1358
+ session.messages.append({
1359
+ 'role': 'assistant',
1360
+ 'content': '',
1361
+ 'timestamp': int(created_at or time.time()),
1362
+ '_recovered_from_run_journal': True,
1363
+ '_recovered_stream_id': stream_id,
1364
+ })
1365
+ current_assistant_idx = len(session.messages) - 1
1366
+ appended_any = True
1367
+ return current_assistant_idx
1368
+
1369
+ for event in events:
1370
+ event_name = str(event.get('event') or event.get('type') or '')
1371
+ payload = event.get('payload') if isinstance(event.get('payload'), dict) else {}
1372
+ created_at = event.get('created_at') if isinstance(event.get('created_at'), (int, float)) else None
1373
+ if event_name == 'token':
1374
+ text = str(payload.get('text') or '')
1375
+ if not text:
1376
+ continue
1377
+ if not assistant_parts and assistant_started_at is None:
1378
+ assistant_started_at = created_at or time.time()
1379
+ assistant_parts.append(text)
1380
+ continue
1381
+ if event_name == 'interim_assistant':
1382
+ if payload.get('already_streamed'):
1383
+ flush_assistant()
1384
+ continue
1385
+ text = str(payload.get('text') or '').strip()
1386
+ if not text:
1387
+ continue
1388
+ if not assistant_parts and assistant_started_at is None:
1389
+ assistant_started_at = created_at or time.time()
1390
+ if assistant_parts and not ''.join(assistant_parts).endswith(('\n', ' ')):
1391
+ assistant_parts.append('\n\n')
1392
+ assistant_parts.append(text)
1393
+ flush_assistant()
1394
+ continue
1395
+ if event_name == 'tool':
1396
+ anchor_idx = flush_assistant()
1397
+ if anchor_idx is None:
1398
+ anchor_idx = ensure_assistant_anchor(created_at)
1399
+ name = str(payload.get('name') or 'tool')
1400
+ preview = str(payload.get('preview') or '')
1401
+ if dedupe_existing and _journal_tool_already_present(
1402
+ session, name, preview, stream_id=stream_id,
1403
+ ):
1404
+ current_assistant_idx = anchor_idx
1405
+ continue
1406
+ recovered_tool_calls.append({
1407
+ 'name': name,
1408
+ 'preview': preview,
1409
+ 'snippet': preview,
1410
+ 'tid': f"journal-{event.get('seq') or len(recovered_tool_calls) + 1}",
1411
+ 'assistant_msg_idx': anchor_idx,
1412
+ 'args': _truncate_journal_tool_args(payload.get('args') or {}),
1413
+ 'done': False,
1414
+ '_recovered_from_run_journal': True,
1415
+ '_recovered_stream_id': stream_id,
1416
+ })
1417
+ appended_any = True
1418
+ current_assistant_idx = anchor_idx
1419
+ continue
1420
+ if event_name == 'tool_complete':
1421
+ name = str(payload.get('name') or '')
1422
+ for tool_call in reversed(recovered_tool_calls):
1423
+ if tool_call.get('done'):
1424
+ continue
1425
+ if not name or tool_call.get('name') == name:
1426
+ tool_call['done'] = True
1427
+ if payload.get('preview'):
1428
+ tool_call['preview'] = str(payload.get('preview') or '')
1429
+ tool_call['snippet'] = str(payload.get('preview') or '')
1430
+ if payload.get('duration') is not None:
1431
+ tool_call['duration'] = payload.get('duration')
1432
+ tool_call['is_error'] = bool(payload.get('is_error', False))
1433
+ break
1434
+ continue
1435
+ if event_name in {'done', 'stream_end', 'cancel', 'apperror', 'error'}:
1436
+ flush_assistant()
1437
+
1438
+ flush_assistant()
1439
+ if recovered_tool_calls:
1440
+ session.tool_calls = list(session.tool_calls or []) + recovered_tool_calls
1441
+ appended_any = True
1442
+ return appended_any
1443
+
1444
+
1445
+ # ── Lazy run-journal recovery (read-side self-heal) ─────────────────────────
1446
+ #
1447
+ # When sidecar repair runs before the run-journal for the dead stream is
1448
+ # visible on disk (page-cache loss on WSL2 9p / DrvFs, an un-fsynced journal
1449
+ # tail, a slow network FS, …), `_append_journaled_partial_output` returns
1450
+ # False even though the journaled events will appear on disk shortly. Without
1451
+ # the helpers below the repair path baked a permanent "no agent output was
1452
+ # recovered" claim into the marker, and a later session read could never
1453
+ # correct it.
1454
+ #
1455
+ # The contract is:
1456
+ #
1457
+ # * Sidecar repair (`_apply_core_sync_or_error_marker`) writes a marker
1458
+ # with `_pending_journal_recovery=True` whenever it could not recover
1459
+ # visible output AND the stream id is known. Three retry-meta keys go
1460
+ # onto the marker: `_journal_retry_stream_id`, `_journal_retry_attempts`,
1461
+ # `_journal_retry_first_seen_ts`.
1462
+ # * Every `get_session()` call that returns the full session checks the
1463
+ # latest assistant marker; if the flag is set it re-runs
1464
+ # `_append_journaled_partial_output` with `dedupe_existing=True`. On
1465
+ # success the marker is promoted in place to the recovered-output
1466
+ # wording, the journaled rows are reordered to sit above the marker,
1467
+ # and all retry meta is stripped. If the journal is still missing or
1468
+ # zero-byte, the retry is a no-op and does not consume attempt budget.
1469
+ # Terminal/non-useful journals consume attempt budget and can demote
1470
+ # immediately at the max-attempt cap.
1471
+ # * After `_JOURNAL_RETRY_MAX_ATTEMPTS` failed retries or
1472
+ # `_JOURNAL_RETRY_GIVEUP_SECONDS` of wall-clock age, the marker is
1473
+ # demoted to the neutral wording ("Partial output may have been lost.")
1474
+ # so users do not see "reload to retry" prompts forever.
1475
+ _JOURNAL_RETRY_MAX_ATTEMPTS = 12
1476
+ _JOURNAL_RETRY_GIVEUP_SECONDS = 24 * 3600
1477
+ _JOURNAL_RETRY_LOCKS: dict[str, threading.Lock] = {}
1478
+ _JOURNAL_RETRY_LOCKS_GUARD = threading.Lock()
1479
+
1480
+
1481
+ def _journal_retry_lock_for_sid(sid: str) -> threading.Lock:
1482
+ with _JOURNAL_RETRY_LOCKS_GUARD:
1483
+ return _JOURNAL_RETRY_LOCKS.setdefault(str(sid), threading.Lock())
1484
+
1485
+
1486
+ def _build_recovery_marker_with_retry_hook(
1487
+ *, recovered_output: bool, stream_id: str | None, pending_started_at=None,
1488
+ ) -> dict:
1489
+ """Build an interrupted-turn marker, arming the lazy-retry hook when
1490
+ visible output was not recovered yet but a stream id is available."""
1491
+ if recovered_output:
1492
+ return _interrupted_recovery_marker(
1493
+ recovered_output=True,
1494
+ stream_id=stream_id,
1495
+ pending_started_at=pending_started_at,
1496
+ )
1497
+ if not stream_id:
1498
+ return _interrupted_recovery_marker(
1499
+ recovered_output=False,
1500
+ pending_started_at=pending_started_at,
1501
+ )
1502
+ marker = _interrupted_recovery_marker(
1503
+ pending_retry=True,
1504
+ stream_id=stream_id,
1505
+ pending_started_at=pending_started_at,
1506
+ )
1507
+ marker['_journal_retry_stream_id'] = str(stream_id)
1508
+ marker['_journal_retry_attempts'] = 0
1509
+ marker['_journal_retry_first_seen_ts'] = int(time.time())
1510
+ return marker
1511
+
1512
+
1513
+ def _session_has_pending_journal_retry(session) -> bool:
1514
+ """Cheap short-circuit: scan from the tail until the most recent normal
1515
+ assistant turn. Any `_pending_journal_recovery` flag found before then
1516
+ means a retry is queued.
1517
+ """
1518
+ messages = getattr(session, 'messages', None) or []
1519
+ for msg in reversed(messages):
1520
+ if not isinstance(msg, dict):
1521
+ continue
1522
+ if msg.get('_pending_journal_recovery'):
1523
+ return True
1524
+ if msg.get('role') == 'assistant' and not msg.get('_error'):
1525
+ # A normal assistant turn after any pending marker — nothing to
1526
+ # retry above this point.
1527
+ return False
1528
+ return False
1529
+
1530
+
1531
+ def _strip_journal_retry_meta(marker: dict) -> None:
1532
+ marker.pop('_pending_journal_recovery', None)
1533
+ marker.pop('_journal_retry_stream_id', None)
1534
+ marker.pop('_journal_retry_attempts', None)
1535
+ marker.pop('_journal_retry_first_seen_ts', None)
1536
+
1537
+
1538
+ def _reorder_journal_tail_above_marker(session, marker_idx: int) -> None:
1539
+ """Move `_recovered_from_run_journal=True` rows appended *after*
1540
+ ``marker_idx`` to sit immediately above the marker so chronological
1541
+ order is preserved (journaled output happened during the turn, marker
1542
+ annotates its end).
1543
+ """
1544
+ messages = session.messages
1545
+ if marker_idx < 0 or marker_idx >= len(messages):
1546
+ return
1547
+ tail = messages[marker_idx + 1 :]
1548
+ if not tail:
1549
+ return
1550
+ journaled = [
1551
+ m for m in tail
1552
+ if isinstance(m, dict) and m.get('_recovered_from_run_journal')
1553
+ ]
1554
+ if not journaled:
1555
+ return
1556
+ rest = [
1557
+ m for m in tail
1558
+ if not (isinstance(m, dict) and m.get('_recovered_from_run_journal'))
1559
+ ]
1560
+ marker = messages[marker_idx]
1561
+ new_messages = (
1562
+ messages[:marker_idx]
1563
+ + journaled
1564
+ + [marker]
1565
+ + rest
1566
+ )
1567
+ # Rebase any tool_calls.assistant_msg_idx values that pointed into the
1568
+ # journaled rows when they were appended at the tail.
1569
+ old_journaled_idx_base = marker_idx + 1
1570
+ new_journaled_idx_base = marker_idx
1571
+ shift = new_journaled_idx_base - old_journaled_idx_base # = -1
1572
+ for tool_call in session.tool_calls or []:
1573
+ if not isinstance(tool_call, dict):
1574
+ continue
1575
+ idx = tool_call.get('assistant_msg_idx')
1576
+ if isinstance(idx, int) and idx >= old_journaled_idx_base \
1577
+ and idx < old_journaled_idx_base + len(journaled):
1578
+ tool_call['assistant_msg_idx'] = idx + shift
1579
+ session.messages = new_messages
1580
+
1581
+
1582
+ def _try_retry_journal_recovery_in_place(session) -> bool:
1583
+ sid = str(getattr(session, 'session_id', '') or '')
1584
+ lock = _journal_retry_lock_for_sid(sid)
1585
+ if not lock.acquire(blocking=False):
1586
+ logger.debug("lazy journal-retry already running for session %s", sid)
1587
+ return False
1588
+ try:
1589
+ return _retry_journal_recovery_in_place(
1590
+ session, preserve_arriving_budget=True,
1591
+ )
1592
+ finally:
1593
+ lock.release()
1594
+ with _JOURNAL_RETRY_LOCKS_GUARD:
1595
+ if _JOURNAL_RETRY_LOCKS.get(sid) is lock:
1596
+ _JOURNAL_RETRY_LOCKS.pop(sid, None)
1597
+
1598
+
1599
+ def _retry_journal_recovery_in_place(
1600
+ session,
1601
+ *,
1602
+ preserve_arriving_budget: bool = False,
1603
+ ) -> bool:
1604
+ """Re-attempt run-journal recovery for the most recent pending marker.
1605
+
1606
+ Returns True if the marker was promoted to the recovered-output wording.
1607
+ Never raises — caller is best-effort.
1608
+ """
1609
+ try:
1610
+ messages = session.messages or []
1611
+ for idx in range(len(messages) - 1, -1, -1):
1612
+ msg = messages[idx]
1613
+ if not isinstance(msg, dict):
1614
+ continue
1615
+ if msg.get('role') == 'assistant' and not msg.get('_error') \
1616
+ and not msg.get('_pending_journal_recovery'):
1617
+ # Walked past the pending marker without finding it.
1618
+ return False
1619
+ if not (
1620
+ msg.get('type') == 'interrupted'
1621
+ and msg.get('_pending_journal_recovery')
1622
+ ):
1623
+ continue
1624
+ stream_id = msg.get('_journal_retry_stream_id')
1625
+ first_seen = msg.get('_journal_retry_first_seen_ts') or 0
1626
+ attempts = int(msg.get('_journal_retry_attempts') or 0)
1627
+ now = time.time()
1628
+ give_up = (
1629
+ attempts >= _JOURNAL_RETRY_MAX_ATTEMPTS
1630
+ or (
1631
+ first_seen
1632
+ and now - float(first_seen) > _JOURNAL_RETRY_GIVEUP_SECONDS
1633
+ )
1634
+ )
1635
+ if not stream_id:
1636
+ # No stream id to retry against; demote immediately.
1637
+ msg['content'] = _INTERRUPTED_NEUTRAL_WORDING
1638
+ _strip_journal_retry_meta(msg)
1639
+ try:
1640
+ session.save(touch_updated_at=False)
1641
+ except Exception:
1642
+ logger.debug(
1643
+ "save() failed while demoting marker for session %s",
1644
+ getattr(session, 'session_id', '?'),
1645
+ exc_info=True,
1646
+ )
1647
+ return False
1648
+ if give_up:
1649
+ msg['content'] = _INTERRUPTED_NEUTRAL_WORDING
1650
+ _strip_journal_retry_meta(msg)
1651
+ try:
1652
+ session.save(touch_updated_at=False)
1653
+ except Exception:
1654
+ logger.debug(
1655
+ "save() failed while demoting marker for session %s",
1656
+ getattr(session, 'session_id', '?'),
1657
+ exc_info=True,
1658
+ )
1659
+ return False
1660
+ tail_len_before = len(session.messages)
1661
+ ok = _append_journaled_partial_output(
1662
+ session, stream_id, dedupe_existing=True,
1663
+ )
1664
+ if ok:
1665
+ msg['content'] = _INTERRUPTED_RECOVERED_WORDING
1666
+ _strip_journal_retry_meta(msg)
1667
+ # The journaled rows were appended at the end of messages;
1668
+ # only the rows past the previous tail count as "newly
1669
+ # journaled" and need to move above the marker.
1670
+ _ = tail_len_before # informational; helper below scans
1671
+ _reorder_journal_tail_above_marker(session, idx)
1672
+ try:
1673
+ session.save(touch_updated_at=False)
1674
+ except Exception:
1675
+ logger.debug(
1676
+ "save() failed while promoting marker for session %s",
1677
+ getattr(session, 'session_id', '?'),
1678
+ exc_info=True,
1679
+ )
1680
+ logger.info(
1681
+ "Session %s: lazy journal-recovery promoted marker for "
1682
+ "stream %s after %d attempts",
1683
+ getattr(session, 'session_id', '?'),
1684
+ stream_id,
1685
+ attempts,
1686
+ )
1687
+ return True
1688
+ if (
1689
+ preserve_arriving_budget
1690
+ and _journal_is_still_arriving(session, stream_id)
1691
+ ):
1692
+ logger.debug(
1693
+ "Session %s: journal for stream %s still arriving; "
1694
+ "preserving retry budget",
1695
+ getattr(session, 'session_id', '?'),
1696
+ stream_id,
1697
+ )
1698
+ return False
1699
+ next_attempts = attempts + 1
1700
+ if next_attempts >= _JOURNAL_RETRY_MAX_ATTEMPTS:
1701
+ msg['content'] = _INTERRUPTED_NEUTRAL_WORDING
1702
+ _strip_journal_retry_meta(msg)
1703
+ else:
1704
+ msg['_journal_retry_attempts'] = next_attempts
1705
+ try:
1706
+ session.save(touch_updated_at=False)
1707
+ except Exception:
1708
+ logger.debug(
1709
+ "save() failed while updating retry counter for session %s",
1710
+ getattr(session, 'session_id', '?'),
1711
+ exc_info=True,
1712
+ )
1713
+ return False
1714
+ return False
1715
+ except Exception:
1716
+ logger.exception(
1717
+ "_retry_journal_recovery_in_place failed for session %s",
1718
+ getattr(session, 'session_id', '?'),
1719
+ )
1720
+ return False
1721
+
1722
+
1723
+ def _apply_core_sync_or_error_marker(
1724
+ session,
1725
+ core_path,
1726
+ stream_id_for_recheck=None,
1727
+ *,
1728
+ require_stream_dead=True,
1729
+ touch_updated_at=True,
1730
+ ) -> bool:
1731
+ """Inner repair logic. Must be called with the per-session lock already held.
1732
+
1733
+ Re-checks session state under the lock, then either syncs messages from the
1734
+ core transcript (if present and non-empty) or restores the pending user
1735
+ message as a recovered user turn and appends an error marker.
1736
+
1737
+ stream_id_for_recheck: when provided, repair bails if session.active_stream_id
1738
+ changed (e.g. context compression rotated it). The cache-miss repair path
1739
+ also requires the stream to be absent from active streams; the streaming
1740
+ thread's final fallback passes require_stream_dead=False because it runs
1741
+ before its own stream is removed from STREAMS.
1742
+
1743
+ Returns True if repair was applied, False if the re-check bailed out.
1744
+ Must never raise — caller is responsible for exception handling.
1745
+ """
1746
+ sid = session.session_id
1747
+ # Bail if pending is unset — nothing to repair.
1748
+ if not session.pending_user_message:
1749
+ return False
1750
+ if stream_id_for_recheck is not None:
1751
+ # Bail if active_stream_id rotated between the pre-lock check and now.
1752
+ # Cache-miss repair must also skip if the stream is alive again, but the
1753
+ # streaming thread's final fallback runs before removing its own stream
1754
+ # from STREAMS and must be allowed to repair that same active stream.
1755
+ if session.active_stream_id != stream_id_for_recheck:
1756
+ return False
1757
+ if require_stream_dead and session.active_stream_id in _active_stream_ids():
1758
+ return False
1759
+
1760
+ # When messages is already non-empty, do not overwrite history from any core
1761
+ # transcript. The pending user turn may still be the only durable copy of a
1762
+ # prompt submitted just before a server restart, so materialize it before
1763
+ # clearing runtime stream state.
1764
+ if len(session.messages) != 0:
1765
+ _pending_text = " ".join(str(session.pending_user_message or "").split())
1766
+ _already_checkpointed = False
1767
+ if _pending_text and session.messages:
1768
+ for _last_msg in reversed(session.messages):
1769
+ if isinstance(_last_msg, dict) and _last_msg.get('role') == 'user':
1770
+ _last_text = " ".join(str(_last_msg.get('content') or "").split())
1771
+ _already_checkpointed = _last_text == _pending_text
1772
+ break
1773
+ _recovered_ts = int(time.time())
1774
+ if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0:
1775
+ _recovered_ts = int(session.pending_started_at)
1776
+ _stream_id = stream_id_for_recheck or session.active_stream_id
1777
+ _pending_started_at = session.pending_started_at
1778
+ if _run_journal_terminal_state(session, _stream_id) == 'completed':
1779
+ if not _already_checkpointed:
1780
+ _append_recovered_pending_turn(session, timestamp=_recovered_ts)
1781
+ _append_journaled_partial_output(
1782
+ session,
1783
+ _stream_id,
1784
+ dedupe_existing=True,
1785
+ )
1786
+ session.active_stream_id = None
1787
+ session.pending_user_message = None
1788
+ session.pending_attachments = []
1789
+ session.pending_started_at = None
1790
+ session.save(touch_updated_at=touch_updated_at)
1791
+ logger.info(
1792
+ "Session %s: cleared stale pending state for completed stream %s without error marker",
1793
+ sid,
1794
+ _stream_id,
1795
+ )
1796
+ return True
1797
+ if not _already_checkpointed:
1798
+ _append_recovered_pending_turn(session, timestamp=_recovered_ts)
1799
+ else:
1800
+ recovered = {
1801
+ 'role': 'user',
1802
+ 'content': session.pending_user_message,
1803
+ '_recovered': True,
1804
+ }
1805
+ if session.pending_attachments:
1806
+ recovered['attachments'] = list(session.pending_attachments)
1807
+ _append_recovered_turn_to_context(session, recovered)
1808
+ recovered_output = _append_journaled_partial_output(
1809
+ session,
1810
+ _stream_id,
1811
+ )
1812
+ session.active_stream_id = None
1813
+ session.pending_user_message = None
1814
+ session.pending_attachments = []
1815
+ session.pending_started_at = None
1816
+ session.messages.append(
1817
+ _build_recovery_marker_with_retry_hook(
1818
+ recovered_output=recovered_output,
1819
+ stream_id=_stream_id,
1820
+ pending_started_at=_pending_started_at,
1821
+ )
1822
+ )
1823
+ session.save(touch_updated_at=touch_updated_at)
1824
+ logger.info(
1825
+ "Session %s: recovered pending user turn (messages non-empty), added error marker",
1826
+ sid,
1827
+ )
1828
+ return True
1829
+
1830
+ # ── messages *is* empty ─ full repair ─────────────────────────────────
1831
+
1832
+ if core_path.exists():
1833
+ with open(core_path, encoding='utf-8') as f:
1834
+ core = json.load(f)
1835
+ core_messages = core.get('messages', [])
1836
+ if core_messages:
1837
+ _stream_id = stream_id_for_recheck or session.active_stream_id
1838
+ session.messages = core_messages
1839
+ session.tool_calls = core.get('tool_calls', [])
1840
+ for field in ('input_tokens', 'output_tokens', 'estimated_cost'):
1841
+ if core.get(field) is not None:
1842
+ setattr(session, field, core[field])
1843
+ _pending_text = _normalize_journal_recovery_text(session.pending_user_message)
1844
+ _already_checkpointed = False
1845
+ if _pending_text and session.messages:
1846
+ for _last_msg in reversed(session.messages):
1847
+ if isinstance(_last_msg, dict) and _last_msg.get('role') == 'user':
1848
+ _last_text = _normalize_journal_recovery_text(_last_msg.get('content'))
1849
+ _already_checkpointed = _last_text == _pending_text
1850
+ break
1851
+ if (
1852
+ _pending_text
1853
+ and not _already_checkpointed
1854
+ and _run_journal_has_visible_output(session, _stream_id)
1855
+ ):
1856
+ _recovered_ts = int(time.time())
1857
+ if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0:
1858
+ _recovered_ts = int(session.pending_started_at)
1859
+ _append_recovered_pending_turn(session, timestamp=_recovered_ts)
1860
+ recovered_output = _append_journaled_partial_output(
1861
+ session,
1862
+ _stream_id,
1863
+ dedupe_existing=True,
1864
+ )
1865
+ _pending_started_at = session.pending_started_at
1866
+ session.active_stream_id = None
1867
+ session.pending_user_message = None
1868
+ session.pending_attachments = []
1869
+ session.pending_started_at = None
1870
+ if recovered_output:
1871
+ session.messages.append(
1872
+ _interrupted_recovery_marker(
1873
+ recovered_output=True,
1874
+ stream_id=_stream_id,
1875
+ pending_started_at=_pending_started_at,
1876
+ )
1877
+ )
1878
+ # NOTE: when the core transcript was synced in but the run journal
1879
+ # is not yet visible, intentionally do NOT append a lazy-retry
1880
+ # marker here. In this branch the canonical history is the core
1881
+ # transcript itself (which has already been written to s.messages
1882
+ # above) and the marker is purely advisory — the existing contract
1883
+ # is "marker only when there is a recovered partial turn to
1884
+ # annotate". Adding a pending-retry marker on every empty-journal
1885
+ # core-sync would surface a spurious "reload to retry" banner on
1886
+ # sessions whose journal is legitimately absent (e.g. archived
1887
+ # streams). The first and third branches handle the lost-response
1888
+ # case where the marker is the only signal the user gets.
1889
+ session.save(touch_updated_at=touch_updated_at)
1890
+ logger.info(
1891
+ "Session %s: synced %d messages from core transcript%s",
1892
+ sid,
1893
+ len(core_messages),
1894
+ " and recovered journaled output" if recovered_output else "",
1895
+ )
1896
+ return True
1897
+
1898
+ # Core missing or empty — restore the pending user message as a recovered
1899
+ # user turn (preserving the draft), then append an error marker.
1900
+ if session.pending_user_message:
1901
+ # Use the original send time if available so the recovered turn
1902
+ # appears in the correct chronological position.
1903
+ _recovered_ts = int(time.time())
1904
+ if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0:
1905
+ _recovered_ts = int(session.pending_started_at)
1906
+ _append_recovered_pending_turn(session, timestamp=_recovered_ts)
1907
+ recovered_output = _append_journaled_partial_output(
1908
+ session,
1909
+ stream_id_for_recheck or session.active_stream_id,
1910
+ )
1911
+ _stream_id = stream_id_for_recheck or session.active_stream_id
1912
+ _pending_started_at = session.pending_started_at
1913
+ session.active_stream_id = None
1914
+ session.pending_user_message = None
1915
+ session.pending_attachments = []
1916
+ session.pending_started_at = None
1917
+ session.messages.append(
1918
+ _build_recovery_marker_with_retry_hook(
1919
+ recovered_output=recovered_output,
1920
+ stream_id=_stream_id,
1921
+ pending_started_at=_pending_started_at,
1922
+ )
1923
+ )
1924
+ session.save(touch_updated_at=touch_updated_at)
1925
+ logger.info("Session %s: no core transcript found, added error marker", sid)
1926
+ return True
1927
+
1928
+
1929
+ # ── _repair_stale_pending grace period (#1624) ─────────────────────────────
1930
+ #
1931
+ # Defense-in-depth against a narrow race between the streaming thread clearing
1932
+ # pending_user_message and STREAMS.pop(stream_id). Without this guard, any
1933
+ # fast turn (e.g. command approval) that exits the thread before the on-disk
1934
+ # pending clear has flushed gets misdiagnosed as a crashed turn, producing a
1935
+ # spurious "Response interrupted." marker.
1936
+ #
1937
+ # 30s covers the worst-case post-loop persistence window: LLM finishing a tool
1938
+ # batch + lock contention with the checkpoint thread + a multi-MB session.save.
1939
+ # A legitimately crashed turn whose pending_started_at is < 30s old will not
1940
+ # repair on the first get_session() call, but WILL repair on the next call
1941
+ # after the grace period elapses (typically the user's next interaction).
1942
+ #
1943
+ # Missing/falsy pending_started_at (legacy sidecars from before that field
1944
+ # existed, or any path that forgot to set it) is treated as "old enough" so
1945
+ # repair still recovers them — preserves current behavior for legacy data.
1946
+ _REPAIR_STALE_PENDING_GRACE_SECONDS = 30
1947
+
1948
+
1949
+ def _has_compression_continuation(session) -> bool:
1950
+ """Return True when ``session`` is an archived compression parent.
1951
+
1952
+ Context compression rotates the live WebUI session id: the old sidecar is
1953
+ preserved for lineage while the new child owns the running/completed turn.
1954
+ Stale-pending repair must not append an interruption marker to that old
1955
+ parent just because its stream bookkeeping disappeared after the rotation.
1956
+ """
1957
+ sid = getattr(session, 'session_id', None)
1958
+ if not sid:
1959
+ return False
1960
+
1961
+ def _row_is_continuation(row) -> bool:
1962
+ if not isinstance(row, dict):
1963
+ return False
1964
+ child_sid = row.get('session_id')
1965
+ if not child_sid or child_sid == sid:
1966
+ return False
1967
+ if row.get('parent_session_id') != sid:
1968
+ return False
1969
+ # Any child row is enough evidence that this pending state belongs to a
1970
+ # compression lineage, not a dead standalone turn. The child may itself
1971
+ # temporarily carry a bad pre_compression_snapshot flag from older code;
1972
+ # do not filter it out here or the guard misses the exact regression.
1973
+ return True
1974
+
1975
+ try:
1976
+ with LOCK:
1977
+ for child in SESSIONS.values():
1978
+ if getattr(child, 'session_id', None) == sid:
1979
+ continue
1980
+ if getattr(child, 'parent_session_id', None) == sid:
1981
+ return True
1982
+ except Exception:
1983
+ pass
1984
+
1985
+ try:
1986
+ if SESSION_INDEX_FILE.exists():
1987
+ entries = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
1988
+ if isinstance(entries, list) and any(_row_is_continuation(e) for e in entries):
1989
+ return True
1990
+ except Exception:
1991
+ logger.debug("Failed to inspect session index for compression continuation", exc_info=True)
1992
+
1993
+ # Index rows can lag behind rapid compression/save races. Fall back to a
1994
+ # shallow JSON metadata scan; session files write parent_session_id before
1995
+ # the messages array, so this avoids loading multi-MB transcripts.
1996
+ try:
1997
+ needle = f'"parent_session_id": "{sid}"'
1998
+ for path in SESSION_DIR.glob('*.json'):
1999
+ if path.name.startswith('_') or path.stem == sid:
2000
+ continue
2001
+ try:
2002
+ head = path.read_text(encoding='utf-8', errors='ignore')[:4096]
2003
+ except TypeError:
2004
+ head = path.read_text(encoding='utf-8')[:4096]
2005
+ except OSError:
2006
+ continue
2007
+ if needle in head:
2008
+ return True
2009
+ except Exception:
2010
+ logger.debug("Failed to scan session files for compression continuation", exc_info=True)
2011
+
2012
+ return False
2013
+
2014
+
2015
+ def _repair_stale_pending(session) -> bool:
2016
+ """Recover a sidecar stuck with messages=[] and stale pending state.
2017
+
2018
+ Fires only when messages is empty, pending_user_message is set,
2019
+ active_stream_id is set, the stream is no longer alive, AND the turn is
2020
+ older than _REPAIR_STALE_PENDING_GRACE_SECONDS (#1624).
2021
+
2022
+ Uses a non-blocking lock acquire so a caller that already holds the
2023
+ per-session lock (e.g. retry_last, undo_last, cancel_stream) cannot
2024
+ deadlock when get_session() triggers this on a cache miss.
2025
+
2026
+ Returns True if repair was applied, False otherwise.
2027
+ Must never raise — all errors are caught and logged.
2028
+ """
2029
+ # Capture the stream id seen at pre-check time; the under-lock re-check in
2030
+ # _apply_core_sync_or_error_marker uses this to detect a rotated active_stream_id
2031
+ # (e.g. context compression) or a stream that came back alive.
2032
+ _seen_stream_id = session.active_stream_id
2033
+ if (not session.pending_user_message
2034
+ or not _seen_stream_id
2035
+ or _seen_stream_id in _active_stream_ids()):
2036
+ return False
2037
+ if getattr(session, 'pre_compression_snapshot', False):
2038
+ logger.debug(
2039
+ "_repair_stale_pending: skipping pre-compression snapshot %s",
2040
+ getattr(session, 'session_id', '?'),
2041
+ )
2042
+ return False
2043
+ if _has_compression_continuation(session):
2044
+ logger.debug(
2045
+ "_repair_stale_pending: skipping compression parent %s with continuation",
2046
+ getattr(session, 'session_id', '?'),
2047
+ )
2048
+ return False
2049
+
2050
+ # Grace-period guard: bail if the turn is too fresh to be a real crash.
2051
+ # Falsy pending_started_at (None, 0, missing) means "old enough" — preserve
2052
+ # legacy-data recovery semantics for sessions that pre-date the field.
2053
+ _started = getattr(session, 'pending_started_at', None)
2054
+ if _started:
2055
+ try:
2056
+ _age = time.time() - float(_started)
2057
+ except (TypeError, ValueError):
2058
+ _age = float('inf')
2059
+ if _age < _REPAIR_STALE_PENDING_GRACE_SECONDS:
2060
+ logger.debug(
2061
+ "_repair_stale_pending: skipping repair for session %s — "
2062
+ "pending_started_at age=%.1fs < %ds grace window",
2063
+ session.session_id, _age, _REPAIR_STALE_PENDING_GRACE_SECONDS,
2064
+ )
2065
+ return False
2066
+ else:
2067
+ # Treat missing/falsy pending_started_at as "old enough" (legacy data).
2068
+ _age = float('inf')
2069
+
2070
+ sid = session.session_id
2071
+ if not is_safe_session_id(sid):
2072
+ return False
2073
+
2074
+ try:
2075
+ profile_home = _get_profile_home(session.profile)
2076
+ core_path = profile_home / 'sessions' / f'session_{sid}.json'
2077
+
2078
+ lock = _get_session_agent_lock(sid)
2079
+ # Non-blocking acquire: bail immediately if the caller already holds this
2080
+ # lock (e.g. retry_last, undo_last, cancel_stream). Blocking would deadlock
2081
+ # because _get_session_agent_lock returns a non-reentrant threading.Lock.
2082
+ if not lock.acquire(blocking=False):
2083
+ logger.debug(
2084
+ "_repair_stale_pending: lock contended, skipping repair for session %s", sid,
2085
+ )
2086
+ return False
2087
+ try:
2088
+ # Telemetry (#1624): log legitimate repair firings so the next batch
2089
+ # of user reports tells us whether the underlying race still fires
2090
+ # post-fix. Rate-limit by age (Opus pre-release SHOULD-FIX): WARNING
2091
+ # for the diagnostically valuable race window (< 5 min — actual
2092
+ # leak-path candidates that slipped past the grace guard) and DEBUG
2093
+ # for the long-tail (orphaned sidecars from prior process lifetimes)
2094
+ # so reconnect loops on stuck sessions don't flood the log.
2095
+ _DIAG_WARN_WINDOW_SECONDS = 300 # 5 min
2096
+ _age_str = ('inf' if _age == float('inf') else f'{_age:.1f}s')
2097
+ _log = logger.warning if _age < _DIAG_WARN_WINDOW_SECONDS else logger.debug
2098
+ _log(
2099
+ "_repair_stale_pending firing: session=%s stream_id=%s pending_age=%s",
2100
+ sid, _seen_stream_id, _age_str,
2101
+ )
2102
+ return _apply_core_sync_or_error_marker(
2103
+ session, core_path, stream_id_for_recheck=_seen_stream_id,
2104
+ )
2105
+ finally:
2106
+ lock.release()
2107
+ except Exception:
2108
+ logger.exception("_repair_stale_pending failed for session %s", sid)
2109
+ return False
2110
+
2111
+
2112
+ def _last_non_tool_role(messages) -> str:
2113
+ if not isinstance(messages, list):
2114
+ return ''
2115
+ for message in reversed(messages):
2116
+ role = _message_role(message)
2117
+ if role and role != 'tool':
2118
+ return role
2119
+ return ''
2120
+
2121
+
2122
+ def _last_non_tool_message(messages):
2123
+ if not isinstance(messages, list):
2124
+ return None
2125
+ for message in reversed(messages):
2126
+ role = _message_role(message)
2127
+ if role and role != 'tool':
2128
+ return message
2129
+ return None
2130
+
2131
+
2132
+ def _message_content_text(message) -> str:
2133
+ if not isinstance(message, dict):
2134
+ return ''
2135
+ content = message.get('content')
2136
+ if isinstance(content, str):
2137
+ return content
2138
+ if isinstance(content, list):
2139
+ parts = []
2140
+ for item in content:
2141
+ if isinstance(item, str):
2142
+ parts.append(item)
2143
+ elif isinstance(item, dict) and isinstance(item.get('text'), str):
2144
+ parts.append(item['text'])
2145
+ return ''.join(parts)
2146
+ return ''
2147
+
2148
+
2149
+ def _inactive_cache_tail_needs_disk_check(cached) -> bool:
2150
+ if cached is None:
2151
+ return False
2152
+ if getattr(cached, 'active_stream_id', None) or getattr(cached, 'pending_user_message', None):
2153
+ return False
2154
+ return _last_non_tool_role(getattr(cached, 'messages', None) or []) == 'user'
2155
+
2156
+
2157
+ def _cache_has_stale_unsaved_user_tail(cached, disk_session) -> bool:
2158
+ """Return True when an inactive cached session has an unsaved user tail.
2159
+
2160
+ A completed turn is saved to the sidecar before the browser reloads it. In
2161
+ rare compaction/reconnect paths the in-process cache can retain a recovered
2162
+ or optimistic user row after the saved assistant tail even though the row was
2163
+ never persisted. If /api/session serves that cache entry, the visible
2164
+ transcript appears to end on the old prompt and the saved assistant answer
2165
+ looks missing until a fork/reload resets the cache.
2166
+ """
2167
+ if cached is None or disk_session is None:
2168
+ return False
2169
+ if getattr(cached, 'active_stream_id', None) or getattr(cached, 'pending_user_message', None):
2170
+ return False
2171
+ cached_messages = getattr(cached, 'messages', None) or []
2172
+ disk_messages = getattr(disk_session, 'messages', None) or []
2173
+ if len(cached_messages) <= len(disk_messages):
2174
+ return False
2175
+ if _last_non_tool_role(cached_messages) != 'user':
2176
+ return False
2177
+ if _last_non_tool_role(disk_messages) != 'assistant':
2178
+ return False
2179
+
2180
+ cached_tail = _last_non_tool_message(cached_messages)
2181
+ previous_disk_user = None
2182
+ for message in reversed(disk_messages):
2183
+ if _message_role(message) == 'user':
2184
+ previous_disk_user = message
2185
+ break
2186
+ if previous_disk_user is None:
2187
+ return False
2188
+
2189
+ # Only drop tails that look like a duplicated optimistic/recovered user row.
2190
+ # A genuinely new concurrent user edit must stay in memory so stale-session
2191
+ # guards can report and preserve it.
2192
+ return _message_content_text(cached_tail) == _message_content_text(previous_disk_user)
2193
+
2194
+
2195
+ def get_session(sid, metadata_only=False):
2196
+ """Load a session, optionally with metadata only (skipping the messages array).
2197
+
2198
+ Metadata-only loads intentionally do not populate the full-session cache.
2199
+ Otherwise a later full load could return a compact object with an empty
2200
+ messages list. Use this when you only need compact() metadata and not the
2201
+ actual message history (e.g., for fast sidebar switching).
2202
+ """
2203
+ with LOCK:
2204
+ cached = SESSIONS.get(sid)
2205
+ if cached is not None:
2206
+ SESSIONS.move_to_end(sid) # LRU: mark as recently used
2207
+ if cached is not None:
2208
+ # Defensive cache ownership check: compression/continuation and recovery
2209
+ # paths can temporarily juggle Session objects across lineage ids. A
2210
+ # stale object stored under the wrong key makes GET /api/session return
2211
+ # a different transcript than the requested sid, which looks exactly
2212
+ # like a disappeared session. Evict instead of trusting the LRU.
2213
+ if str(getattr(cached, 'session_id', '') or '') != str(sid):
2214
+ logger.warning(
2215
+ "evicting mismatched cached session: requested %s but cached object is %s",
2216
+ sid,
2217
+ getattr(cached, 'session_id', None),
2218
+ )
2219
+ with LOCK:
2220
+ if SESSIONS.get(sid) is cached:
2221
+ SESSIONS.pop(sid, None)
2222
+ cached = None
2223
+ if cached is not None:
2224
+ if not metadata_only and _inactive_cache_tail_needs_disk_check(cached):
2225
+ try:
2226
+ disk_session = Session.load(sid)
2227
+ if _cache_has_stale_unsaved_user_tail(cached, disk_session):
2228
+ with LOCK:
2229
+ SESSIONS[sid] = disk_session
2230
+ SESSIONS.move_to_end(sid)
2231
+ cached = disk_session
2232
+ except Exception:
2233
+ logger.debug(
2234
+ "stale cached user-tail check failed for session %s",
2235
+ sid, exc_info=True,
2236
+ )
2237
+ if not metadata_only and _session_has_pending_journal_retry(cached):
2238
+ try:
2239
+ _try_retry_journal_recovery_in_place(cached)
2240
+ except Exception:
2241
+ logger.debug(
2242
+ "lazy journal-retry failed on cache hit for session %s",
2243
+ sid, exc_info=True,
2244
+ )
2245
+ return cached
2246
+ if metadata_only:
2247
+ s = Session.load_metadata_only(sid)
2248
+ if s:
2249
+ return s
2250
+ else:
2251
+ s = Session.load(sid)
2252
+ if s:
2253
+ with LOCK:
2254
+ SESSIONS[sid] = s
2255
+ SESSIONS.move_to_end(sid)
2256
+ while len(SESSIONS) > SESSIONS_MAX:
2257
+ SESSIONS.popitem(last=False) # evict least recently used
2258
+ if not metadata_only:
2259
+ try:
2260
+ repaired = _repair_stale_pending(s)
2261
+ # If the stale-pending repair did not fire but the session
2262
+ # already carries a pending-journal-retry marker (e.g. set on
2263
+ # a previous repair pass), give the lazy-retry path one
2264
+ # chance to self-heal on this read.
2265
+ if not repaired and _session_has_pending_journal_retry(s):
2266
+ try:
2267
+ _try_retry_journal_recovery_in_place(s)
2268
+ except Exception:
2269
+ logger.debug(
2270
+ "lazy journal-retry failed on cold load for session %s",
2271
+ sid, exc_info=True,
2272
+ )
2273
+ # If repair had to bail because the per-session lock was held,
2274
+ # do not pin the still-stale sidecar in the LRU cache forever.
2275
+ # Leaving it cached would prevent future get_session() calls from
2276
+ # re-entering the cache-miss repair path after the lock holder exits.
2277
+ if not repaired and (len(s.messages) == 0
2278
+ and s.pending_user_message
2279
+ and s.active_stream_id
2280
+ and s.active_stream_id not in _active_stream_ids()):
2281
+ with LOCK:
2282
+ if SESSIONS.get(sid) is s:
2283
+ SESSIONS.pop(sid, None)
2284
+ except Exception:
2285
+ pass # repair is best-effort
2286
+ return s
2287
+ raise KeyError(sid)
2288
+
2289
+ def _profile_default_model_state(profile=None):
2290
+ """Return the default model/provider configured for *profile*."""
2291
+ default_model = ""
2292
+ default_provider = None
2293
+ try:
2294
+ from api.profiles import get_hermes_home_for_profile
2295
+ config_path = Path(get_hermes_home_for_profile(profile)) / "config.yaml"
2296
+ config_data = _cfg._load_yaml_config_file(config_path)
2297
+ except Exception:
2298
+ config_data = {}
2299
+
2300
+ model_cfg = config_data.get("model", {}) if isinstance(config_data, dict) else {}
2301
+ if isinstance(model_cfg, str):
2302
+ default_model = model_cfg.strip()
2303
+ elif isinstance(model_cfg, dict):
2304
+ default_model = str(model_cfg.get("default") or "").strip()
2305
+ default_provider = str(model_cfg.get("provider") or "").strip() or None
2306
+
2307
+ return default_model or get_effective_default_model(), default_provider
2308
+
2309
+
2310
+ def new_session(workspace=None, model=None, profile=None, model_provider=None, project_id=None, worktree_info=None):
2311
+ """Create a new in-memory session.
2312
+
2313
+ The session lives in the SESSIONS dict only — no disk write happens until
2314
+ the first message is appended (#1171 follow-up). This avoids the
2315
+ "ghost Untitled session on disk" pile-up that occurred when users clicked
2316
+ New Conversation, reloaded the page, or completed onboarding without ever
2317
+ sending a message. Subsequent code paths that populate state immediately
2318
+ (btw / background agent at api/routes.py) call ``s.save()`` themselves
2319
+ after setting title/messages, and ``_handle_chat_start`` saves the
2320
+ session as soon as the user actually sends a message — both are the
2321
+ natural first-write moments for a real session.
2322
+
2323
+ Crash-safety: if the process exits between session creation and first
2324
+ message, the session is lost. Since it had no messages, there is
2325
+ nothing to lose. Worktree-backed sessions are the exception: they are
2326
+ saved immediately because creating the session also creates real
2327
+ filesystem state that must remain discoverable after restart.
2328
+
2329
+ *profile* — when supplied by the caller (e.g. from the request body sent
2330
+ by the active browser tab), it is used directly so that concurrent clients
2331
+ on different profiles don't fight over a shared process-global. If not
2332
+ supplied, we fall back to the process-level active profile (the pre-#798
2333
+ behaviour, preserved for calls that originate outside a request context).
2334
+ """
2335
+ if profile is None:
2336
+ # Fallback: read process-level global (single-client or startup path)
2337
+ try:
2338
+ from api.profiles import get_active_profile_name
2339
+ profile = get_active_profile_name()
2340
+ except ImportError:
2341
+ profile = None
2342
+ if model:
2343
+ effective_model = model
2344
+ effective_model_provider = model_provider
2345
+ else:
2346
+ effective_model, effective_model_provider = _profile_default_model_state(profile)
2347
+ if model_provider:
2348
+ effective_model_provider = model_provider
2349
+
2350
+ wt = worktree_info if isinstance(worktree_info, dict) else None
2351
+ workspace_path = (wt.get('path') if wt and wt.get('path') else workspace) if wt else workspace
2352
+ s = Session(
2353
+ workspace=workspace_path or get_last_workspace(),
2354
+ model=effective_model,
2355
+ model_provider=effective_model_provider,
2356
+ profile=profile,
2357
+ project_id=project_id,
2358
+ personality=None,
2359
+ worktree_path=wt.get('path') if wt else None,
2360
+ worktree_branch=wt.get('branch') if wt else None,
2361
+ worktree_repo_root=wt.get('repo_root') if wt else None,
2362
+ worktree_created_at=wt.get('created_at') if wt else None,
2363
+ )
2364
+ with LOCK:
2365
+ SESSIONS[s.session_id] = s
2366
+ SESSIONS.move_to_end(s.session_id)
2367
+ while len(SESSIONS) > SESSIONS_MAX:
2368
+ SESSIONS.popitem(last=False)
2369
+ if wt:
2370
+ s.save()
2371
+ return s
2372
+
2373
+ def _hide_from_default_sidebar(session: dict) -> bool:
2374
+ """Return True for internal/background sessions hidden from the default list."""
2375
+ sid = str(session.get('session_id') or '')
2376
+ source = session.get('source_tag') or session.get('source')
2377
+ if source == 'cron' or sid.startswith('cron_'):
2378
+ return True
2379
+ if bool(session.get('pre_compression_snapshot')):
2380
+ return not bool(session.get('_show_pre_compression_snapshot'))
2381
+ return False
2382
+
2383
+
2384
+ def _sidebar_message_count(session: dict) -> int:
2385
+ for key in ('message_count', 'actual_message_count'):
2386
+ try:
2387
+ value = int(session.get(key) or 0)
2388
+ except (TypeError, ValueError):
2389
+ value = 0
2390
+ if value > 0:
2391
+ return value
2392
+ return 0
2393
+
2394
+
2395
+ def _sidebar_lineage_root_id(session: dict, sessions_by_id: dict[str, dict]) -> str:
2396
+ sid = str(session.get('session_id') or '')
2397
+ root = sid
2398
+ parent = session.get('parent_session_id')
2399
+ seen = {sid}
2400
+ while parent and parent not in seen and parent in sessions_by_id:
2401
+ root = str(parent)
2402
+ seen.add(root)
2403
+ parent = sessions_by_id.get(root, {}).get('parent_session_id')
2404
+ return root
2405
+
2406
+
2407
+ def _has_live_sidebar_state(session: dict) -> bool:
2408
+ return bool(
2409
+ session.get('active_stream_id')
2410
+ or session.get('has_pending_user_message')
2411
+ or session.get('pending_user_message')
2412
+ )
2413
+
2414
+
2415
+ def _is_intentionally_background_sidebar_session(session: dict) -> bool:
2416
+ sid = str(session.get('session_id') or '')
2417
+ source = session.get('source_tag') or session.get('source')
2418
+ return source == 'cron' or sid.startswith('cron_')
2419
+
2420
+
2421
+ def _include_project_hidden_background_sidebar_sessions(
2422
+ candidates: list[dict],
2423
+ visible: list[dict],
2424
+ ) -> list[dict]:
2425
+ """Keep project-assigned background sessions addressable by project chips.
2426
+
2427
+ Cron sessions stay hidden from the default sidebar, but if they have a
2428
+ project assignment they must still be present in the client cache so the
2429
+ dedicated project chip can reveal them (#3019).
2430
+ """
2431
+ visible_ids = {
2432
+ str(session.get('session_id'))
2433
+ for session in visible
2434
+ if session.get('session_id')
2435
+ }
2436
+ out = list(visible)
2437
+ for session in candidates:
2438
+ sid = str(session.get('session_id') or '')
2439
+ if not sid or sid in visible_ids:
2440
+ continue
2441
+ if not _is_intentionally_background_sidebar_session(session):
2442
+ continue
2443
+ if not session.get('project_id'):
2444
+ continue
2445
+ if _sidebar_message_count(session) <= 0:
2446
+ continue
2447
+ row = dict(session)
2448
+ row['default_hidden'] = True
2449
+ out.append(row)
2450
+ return out
2451
+
2452
+
2453
+ def _preserve_messageful_sidebar_discoverability(
2454
+ candidates: list[dict],
2455
+ visible: list[dict],
2456
+ ) -> list[dict]:
2457
+ """Keep at least one messageful row per non-background conversation visible.
2458
+
2459
+ The normal sidebar filters intentionally hide empty drafts, cron/background
2460
+ rows, and duplicate pre-compression snapshots. They must not make the only
2461
+ messageful representative of a conversation disappear. If every visible row
2462
+ for a lineage was filtered out, rescue the best hidden messageful row and
2463
+ mark it so callers can surface or audit the degraded state.
2464
+ """
2465
+ sessions_by_id = {
2466
+ str(session.get('session_id')): session
2467
+ for session in candidates
2468
+ if session.get('session_id')
2469
+ }
2470
+ covered_roots = {
2471
+ _sidebar_lineage_root_id(session, sessions_by_id)
2472
+ for session in visible
2473
+ if _sidebar_message_count(session) > 0
2474
+ }
2475
+ visible_ids = {
2476
+ str(session.get('session_id'))
2477
+ for session in visible
2478
+ if session.get('session_id')
2479
+ }
2480
+ rescue_by_root: dict[str, dict] = {}
2481
+ for session in candidates:
2482
+ sid = str(session.get('session_id') or '')
2483
+ if not sid or sid in visible_ids:
2484
+ continue
2485
+ if _sidebar_message_count(session) <= 0:
2486
+ continue
2487
+ if _is_intentionally_background_sidebar_session(session):
2488
+ continue
2489
+ root = _sidebar_lineage_root_id(session, sessions_by_id)
2490
+ if root in covered_roots:
2491
+ continue
2492
+ current = rescue_by_root.get(root)
2493
+ if current is None or (
2494
+ _sidebar_message_count(session), _session_sort_timestamp(session)
2495
+ ) > (
2496
+ _sidebar_message_count(current), _session_sort_timestamp(current)
2497
+ ):
2498
+ rescued = dict(session)
2499
+ rescued['discoverability_warning'] = 'rescued_messageful_hidden_session'
2500
+ rescue_by_root[root] = rescued
2501
+ if not rescue_by_root:
2502
+ return visible
2503
+ rescued_rows = sorted(
2504
+ rescue_by_root.values(),
2505
+ key=lambda session: (session.get('pinned', False), _session_sort_timestamp(session)),
2506
+ reverse=True,
2507
+ )
2508
+ return visible + rescued_rows
2509
+
2510
+
2511
+ def _prefer_fuller_snapshots_for_sidebar(sessions: list[dict]) -> list[dict]:
2512
+ """Expose a hidden snapshot when it is the fuller transcript for a lineage.
2513
+
2514
+ Pre-compression snapshots are normally hidden so archived compression
2515
+ segments do not duplicate the current continuation in the sidebar. If a
2516
+ snapshot row has more messages than the visible continuation for the same
2517
+ lineage, hiding it makes the conversation look truncated. In that case,
2518
+ show the fuller snapshot and suppress the shorter inactive continuation.
2519
+ """
2520
+ sessions_by_id = {
2521
+ str(session.get('session_id')): session
2522
+ for session in sessions
2523
+ if session.get('session_id')
2524
+ }
2525
+ groups: dict[str, list[dict]] = {}
2526
+ for session in sessions:
2527
+ sid = str(session.get('session_id') or '')
2528
+ source = session.get('source_tag') or session.get('source')
2529
+ if source == 'cron' or sid.startswith('cron_'):
2530
+ continue
2531
+ root = _sidebar_lineage_root_id(session, sessions_by_id)
2532
+ groups.setdefault(root, []).append(session)
2533
+
2534
+ snapshot_ids_to_show: set[str] = set()
2535
+ continuation_ids_to_hide: set[str] = set()
2536
+ for group in groups.values():
2537
+ visible = [session for session in group if not session.get('pre_compression_snapshot')]
2538
+ snapshots = [session for session in group if session.get('pre_compression_snapshot')]
2539
+ if not visible or not snapshots:
2540
+ continue
2541
+ if any(_has_live_sidebar_state(session) for session in visible):
2542
+ continue
2543
+
2544
+ best_visible_count = max(_sidebar_message_count(session) for session in visible)
2545
+ best_snapshot = max(
2546
+ snapshots,
2547
+ key=lambda session: (_sidebar_message_count(session), _session_sort_timestamp(session)),
2548
+ )
2549
+ if _sidebar_message_count(best_snapshot) <= best_visible_count:
2550
+ continue
2551
+
2552
+ newest_visible_ts = max(_session_sort_timestamp(session) for session in visible)
2553
+ snapshot_ts = _session_sort_timestamp(best_snapshot)
2554
+ # Keep the active continuation visible when it has newer activity than
2555
+ # the archived snapshot. A fuller snapshot can still be older than a
2556
+ # continuation that contains the latest turns after compression.
2557
+ if newest_visible_ts > snapshot_ts:
2558
+ continue
2559
+
2560
+ snapshot_ids_to_show.add(str(best_snapshot.get('session_id')))
2561
+ continuation_ids_to_hide.update(
2562
+ str(session.get('session_id'))
2563
+ for session in visible
2564
+ if session.get('session_id')
2565
+ )
2566
+
2567
+ if not snapshot_ids_to_show and not continuation_ids_to_hide:
2568
+ return sessions
2569
+
2570
+ out = []
2571
+ for session in sessions:
2572
+ sid = str(session.get('session_id') or '')
2573
+ if sid in continuation_ids_to_hide:
2574
+ continue
2575
+ if sid in snapshot_ids_to_show:
2576
+ session = dict(session)
2577
+ session['_show_pre_compression_snapshot'] = True
2578
+ out.append(session)
2579
+ return out
2580
+
2581
+
2582
+ def _strip_sidebar_internal_flags(sessions: list[dict]) -> None:
2583
+ for session in sessions:
2584
+ session.pop('_show_pre_compression_snapshot', None)
2585
+
2586
+
2587
+ def _row_may_need_sidecar_metadata_refresh(session: dict) -> bool:
2588
+ """Return True when a row needs canonical sidecar runtime/snapshot metadata.
2589
+
2590
+ Compression lineage fields are enriched from state.db in one batched query
2591
+ later in all_sessions(). Loading hundreds of lineage sidecars on every
2592
+ /api/sessions poll turns the sidebar into molasses, so keep this refresh
2593
+ limited to the few rows whose transient runtime or snapshot state is not
2594
+ cheaply available from state.db.
2595
+ """
2596
+ is_runtime_row = bool(
2597
+ session.get('active_stream_id')
2598
+ or session.get('has_pending_user_message')
2599
+ or session.get('pending_user_message')
2600
+ )
2601
+ snapshot_missing_sidebar_metadata = bool(
2602
+ session.get('pre_compression_snapshot')
2603
+ and (
2604
+ session.get('message_count') is None
2605
+ or session.get('last_message_at') is None
2606
+ )
2607
+ )
2608
+ return is_runtime_row or snapshot_missing_sidebar_metadata
2609
+
2610
+
2611
+ def _refresh_index_rows_from_sidecar_metadata(sessions: list[dict]) -> list[dict]:
2612
+ """Overlay fuller sidecar metadata onto stale sidebar index rows.
2613
+
2614
+ ``_index.json`` is a cache and can lag behind the canonical session sidecar
2615
+ during compression/continuation writes. Keep this read-only and limited to
2616
+ lineage/runtime-shaped rows so ordinary sidebar refreshes do not scan every
2617
+ historical transcript.
2618
+ """
2619
+ out: list[dict] = []
2620
+ for session in sessions:
2621
+ if not _row_may_need_sidecar_metadata_refresh(session):
2622
+ out.append(session)
2623
+ continue
2624
+ sid = session.get('session_id')
2625
+ if not sid:
2626
+ out.append(session)
2627
+ continue
2628
+ sidecar = Session.load_metadata_only(sid)
2629
+ if not sidecar:
2630
+ out.append(session)
2631
+ continue
2632
+ compact = sidecar.compact(include_runtime=True)
2633
+ refreshed = dict(session)
2634
+ for key in (
2635
+ 'message_count', 'updated_at', 'last_message_at', 'title', 'workspace',
2636
+ 'model', 'model_provider', 'created_at', 'pinned', 'archived', 'project_id',
2637
+ 'profile', 'pre_compression_snapshot', 'parent_session_id', 'source_tag',
2638
+ 'raw_source', 'session_source', 'source_label', 'active_stream_id',
2639
+ 'has_pending_user_message', 'pending_user_message', 'pending_started_at',
2640
+ ):
2641
+ value = compact.get(key)
2642
+ if value is not None:
2643
+ refreshed[key] = value
2644
+ try:
2645
+ refreshed['message_count'] = max(
2646
+ int(session.get('message_count') or 0),
2647
+ int(compact.get('message_count') or 0),
2648
+ )
2649
+ except (TypeError, ValueError):
2650
+ pass
2651
+ if _session_sort_timestamp(compact) > _session_sort_timestamp(session):
2652
+ refreshed['updated_at'] = compact.get('updated_at', refreshed.get('updated_at'))
2653
+ refreshed['last_message_at'] = compact.get('last_message_at', refreshed.get('last_message_at'))
2654
+ out.append(refreshed)
2655
+ return out
2656
+
2657
+
2658
+ def _active_state_db_path() -> Path:
2659
+ """Return state.db for the active Hermes profile, degrading to HERMES_HOME."""
2660
+ try:
2661
+ from api.profiles import get_active_hermes_home
2662
+ hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
2663
+ except Exception:
2664
+ hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve()
2665
+ return hermes_home / 'state.db'
2666
+
2667
+
2668
+ def _sidebar_title_is_generic_webui(title: str | None) -> bool:
2669
+ text = ' '.join(str(title or '').split())
2670
+ if text == 'Hermes WebUI':
2671
+ return True
2672
+ prefix = 'Hermes WebUI #'
2673
+ return text.startswith(prefix) and text[len(prefix):].isdigit()
2674
+
2675
+
2676
+ def _enrich_sidebar_lineage_metadata(sessions: list[dict]) -> None:
2677
+ """Attach state.db compression lineage metadata used by sidebar collapse."""
2678
+ try:
2679
+ metadata = read_session_lineage_metadata(
2680
+ _active_state_db_path(),
2681
+ {str(s.get('session_id')) for s in sessions if s.get('session_id')},
2682
+ )
2683
+ except Exception:
2684
+ return
2685
+ for session in sessions:
2686
+ sid = session.get('session_id')
2687
+ if sid in metadata:
2688
+ entry = dict(metadata[sid])
2689
+ state_db_title = entry.pop('_state_db_title', None)
2690
+ state_db_source = entry.pop('_state_db_source', None)
2691
+ state_db_source_tag = entry.pop('_state_db_source_tag', None)
2692
+ state_db_raw_source = entry.pop('_state_db_raw_source', None)
2693
+ state_db_session_source = entry.pop('_state_db_session_source', None)
2694
+ state_db_source_label = entry.pop('_state_db_source_label', None)
2695
+ session.update(entry)
2696
+ if state_db_source == 'webui':
2697
+ session['source_tag'] = state_db_source_tag
2698
+ session['raw_source'] = state_db_raw_source
2699
+ session['session_source'] = state_db_session_source
2700
+ session['source_label'] = state_db_source_label
2701
+ session['is_cli_session'] = False
2702
+ title = session.get('title')
2703
+ if (
2704
+ state_db_title
2705
+ and state_db_title != title
2706
+ and _sidebar_title_is_generic_webui(title)
2707
+ ):
2708
+ session['_state_db_title'] = state_db_title
2709
+ session['display_title'] = state_db_title
2710
+
2711
+
2712
+ def _diag_stage(diag, name: str) -> None:
2713
+ if diag is not None:
2714
+ try:
2715
+ diag.stage(name)
2716
+ except Exception:
2717
+ pass
2718
+
2719
+
2720
+ def all_sessions(diag=None):
2721
+ _diag_stage(diag, "all_sessions.active_streams")
2722
+ active_stream_ids = _active_stream_ids()
2723
+ # Phase C: try index first for O(1) read; fall back to full scan
2724
+ _diag_stage(diag, "all_sessions.index_exists")
2725
+ if not SESSION_INDEX_FILE.exists():
2726
+ _diag_stage(diag, "all_sessions.start_index_rebuild")
2727
+ _start_session_index_rebuild_thread()
2728
+ if SESSION_INDEX_FILE.exists():
2729
+ try:
2730
+ _diag_stage(diag, "all_sessions.read_index")
2731
+ index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
2732
+ _diag_stage(diag, "all_sessions.prune_index")
2733
+ with LOCK:
2734
+ in_memory_ids = set(SESSIONS.keys())
2735
+ persisted_ids = _persisted_session_ids_snapshot()
2736
+ index = [
2737
+ s for s in index
2738
+ if (
2739
+ str(s.get('session_id') or '') in in_memory_ids
2740
+ or (
2741
+ persisted_ids is not None
2742
+ and str(s.get('session_id') or '') in persisted_ids
2743
+ )
2744
+ or (
2745
+ persisted_ids is None
2746
+ and _index_entry_exists(s.get('session_id'), in_memory_ids=in_memory_ids)
2747
+ )
2748
+ )
2749
+ ]
2750
+ backfilled = []
2751
+ for i, s in enumerate(index):
2752
+ if 'last_message_at' not in s:
2753
+ _diag_stage(diag, "all_sessions.backfill_load")
2754
+ full = Session.load(s.get('session_id'))
2755
+ if full:
2756
+ index[i] = full.compact()
2757
+ backfilled.append(full)
2758
+ if backfilled:
2759
+ try:
2760
+ _diag_stage(diag, "all_sessions.backfill_write")
2761
+ _write_session_index(updates=backfilled)
2762
+ except Exception:
2763
+ logger.debug("Failed to persist last_message_at backfill")
2764
+ _diag_stage(diag, "all_sessions.mark_streaming")
2765
+ for s in index:
2766
+ s['is_streaming'] = _is_streaming_session(
2767
+ s.get('active_stream_id'),
2768
+ active_stream_ids,
2769
+ )
2770
+ # Overlay any in-memory sessions that may be newer than the index
2771
+ _diag_stage(diag, "all_sessions.overlay_lock")
2772
+ index_map = {s['session_id']: s for s in index}
2773
+ with LOCK:
2774
+ for s in SESSIONS.values():
2775
+ index_map[s.session_id] = s.compact(
2776
+ include_runtime=True,
2777
+ active_stream_ids=active_stream_ids,
2778
+ )
2779
+ _diag_stage(diag, "all_sessions.refresh_sidecar_metadata")
2780
+ refreshed_index_rows = _refresh_index_rows_from_sidecar_metadata(list(index_map.values()))
2781
+ index_map = {
2782
+ row['session_id']: row
2783
+ for row in refreshed_index_rows
2784
+ if row.get('session_id')
2785
+ }
2786
+ _diag_stage(diag, "all_sessions.sort_filter")
2787
+ result = sorted(index_map.values(), key=lambda s: (s.get('pinned', False), _session_sort_timestamp(s)), reverse=True)
2788
+ # Hide empty Untitled sessions from the UI entirely — they are ephemeral
2789
+ # scratch pads that only become real once the first message is sent (#1171).
2790
+ # No grace window: a 0-message Untitled session is never shown in the list
2791
+ # regardless of age. This means page refreshes and accidental New Conversation
2792
+ # clicks never leave orphan entries in the sidebar.
2793
+ #
2794
+ # Exception: sessions with active_stream_id set are actively streaming (#1327).
2795
+ # #1184 deferred the first save() until the first message, so during the
2796
+ # initial streaming turn the session still looks like Untitled+0-messages.
2797
+ # Without this exemption, navigating away during a long first turn causes
2798
+ # the session to vanish from the sidebar.
2799
+ result = [s for s in result if not (
2800
+ s.get('title', 'Untitled') == 'Untitled'
2801
+ and s.get('message_count', 0) == 0
2802
+ and not s.get('active_stream_id')
2803
+ and not s.get('has_pending_user_message')
2804
+ and not s.get('worktree_path')
2805
+ )]
2806
+ result = _prefer_fuller_snapshots_for_sidebar(result)
2807
+ sidebar_candidates = result
2808
+ visible_result = [s for s in sidebar_candidates if not _hide_from_default_sidebar(s)]
2809
+ result = _preserve_messageful_sidebar_discoverability(sidebar_candidates, visible_result)
2810
+ result = _include_project_hidden_background_sidebar_sessions(sidebar_candidates, result)
2811
+ _strip_sidebar_internal_flags(result)
2812
+ # Backfill: sessions created before Sprint 22 have no profile tag.
2813
+ # Attribute them to 'default' so the client profile filter works correctly.
2814
+ for s in result:
2815
+ if not s.get('profile'):
2816
+ s['profile'] = 'default'
2817
+ _diag_stage(diag, "all_sessions.lineage_metadata")
2818
+ _enrich_sidebar_lineage_metadata(result)
2819
+ return result
2820
+ except Exception:
2821
+ logger.debug("Failed to load session index, falling back to full scan")
2822
+ # Full scan fallback
2823
+ _diag_stage(diag, "all_sessions.full_scan")
2824
+ out = []
2825
+ for p in SESSION_DIR.glob('*.json'):
2826
+ if p.name.startswith('_'): continue
2827
+ try:
2828
+ s = Session.load(p.stem)
2829
+ if s: out.append(s)
2830
+ except Exception:
2831
+ logger.debug("Failed to load session from %s", p)
2832
+ _diag_stage(diag, "all_sessions.full_scan_overlay")
2833
+ for s in SESSIONS.values():
2834
+ if all(s.session_id != x.session_id for x in out): out.append(s)
2835
+ _diag_stage(diag, "all_sessions.full_scan_sort_filter")
2836
+ out.sort(key=lambda s: (getattr(s, 'pinned', False), _session_sort_timestamp(s)), reverse=True)
2837
+ # Hide empty Untitled sessions from the UI entirely — kept consistent with the
2838
+ # index-path filter above. No grace window: a 0-message Untitled session is
2839
+ # never shown regardless of age (#1171). Same streaming exemption as above (#1327).
2840
+ result = [s.compact(include_runtime=True, active_stream_ids=active_stream_ids) for s in out if not (
2841
+ s.title == 'Untitled'
2842
+ and len(s.messages) == 0
2843
+ and not s.active_stream_id
2844
+ and not s.pending_user_message
2845
+ and not getattr(s, 'worktree_path', None)
2846
+ )]
2847
+ result = _prefer_fuller_snapshots_for_sidebar(result)
2848
+ sidebar_candidates = result
2849
+ visible_result = [s for s in sidebar_candidates if not _hide_from_default_sidebar(s)]
2850
+ result = _preserve_messageful_sidebar_discoverability(sidebar_candidates, visible_result)
2851
+ result = _include_project_hidden_background_sidebar_sessions(sidebar_candidates, result)
2852
+ _strip_sidebar_internal_flags(result)
2853
+ for s in result:
2854
+ if not s.get('profile'):
2855
+ s['profile'] = 'default'
2856
+ _diag_stage(diag, "all_sessions.lineage_metadata")
2857
+ _enrich_sidebar_lineage_metadata(result)
2858
+ return result
2859
+
2860
+
2861
+ def title_from(messages, fallback: str='Untitled'):
2862
+ """Derive a session title from the first user message."""
2863
+ for m in messages:
2864
+ if m.get('role') == 'user':
2865
+ c = m.get('content', '')
2866
+ if isinstance(c, list):
2867
+ c = ' '.join(p.get('text', '') for p in c if isinstance(p, dict) and p.get('type') == 'text')
2868
+ text = str(c).strip()
2869
+ if text:
2870
+ return text[:64]
2871
+ return fallback
2872
+
2873
+
2874
+ # ── Project helpers ──────────────────────────────────────────────────────────
2875
+
2876
+ _PROJECTS_MIGRATION_LOCK = threading.Lock()
2877
+ _projects_migrated = False
2878
+
2879
+
2880
+ def _backfill_project_profiles_if_needed(projects: list) -> bool:
2881
+ """Tag any legacy untagged projects (`profile` missing) with a sensible default.
2882
+
2883
+ Strategy:
2884
+ 1. For each untagged project, look at the sessions assigned to it via
2885
+ the session index. If any session carries a profile, take that
2886
+ profile. Most installs are single-profile so this picks up the
2887
+ right answer for everyone.
2888
+ 2. Otherwise default to 'default'.
2889
+
2890
+ Returns True if any project was mutated. Safe to call repeatedly — once
2891
+ every project is tagged, this is a no-op. Runs at most once per process
2892
+ (cached via the module-level _projects_migrated flag) but the result is
2893
+ persisted so it's a one-time write.
2894
+ """
2895
+ untagged = [p for p in projects if not p.get('profile')]
2896
+ if not untagged:
2897
+ return False
2898
+
2899
+ # Build session_id -> profile map for the untagged project_ids.
2900
+ session_profile_by_project: dict[str, str] = {}
2901
+ if SESSION_INDEX_FILE.exists():
2902
+ try:
2903
+ entries = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
2904
+ untagged_ids = {p['project_id'] for p in untagged if p.get('project_id')}
2905
+ for e in entries:
2906
+ pid = e.get('project_id')
2907
+ if pid in untagged_ids and e.get('profile'):
2908
+ # First session profile wins for the project.
2909
+ session_profile_by_project.setdefault(pid, e['profile'])
2910
+ except Exception:
2911
+ logger.debug("Failed to read session index for project profile backfill")
2912
+
2913
+ mutated = False
2914
+ for p in untagged:
2915
+ inferred = session_profile_by_project.get(p.get('project_id'), 'default')
2916
+ p['profile'] = inferred
2917
+ mutated = True
2918
+ return mutated
2919
+
2920
+
2921
+ def load_projects(*, _migrate: bool = True) -> list:
2922
+ """Load project list from disk. Returns list of project dicts.
2923
+
2924
+ On first call, runs a one-time migration to back-fill the `profile` field
2925
+ on legacy untagged projects (#1614). Disable via `_migrate=False` for
2926
+ callsites that want the raw on-disk shape (test fixtures, e.g.).
2927
+ """
2928
+ global _projects_migrated
2929
+ if not PROJECTS_FILE.exists():
2930
+ return []
2931
+ try:
2932
+ projects = json.loads(PROJECTS_FILE.read_text(encoding='utf-8'))
2933
+ except Exception:
2934
+ return []
2935
+ if _migrate and not _projects_migrated:
2936
+ with _PROJECTS_MIGRATION_LOCK:
2937
+ # Re-check inside the lock — another thread may have raced.
2938
+ if _projects_migrated:
2939
+ # Per Opus advisor on stage-293: another thread completed
2940
+ # migration and wrote new state to disk while we waited for
2941
+ # the lock. Our `projects` snapshot is the pre-migration
2942
+ # version; re-read so the caller doesn't see stale untagged
2943
+ # rows (which a mutation route could then write back,
2944
+ # silently overwriting the migration).
2945
+ try:
2946
+ return json.loads(PROJECTS_FILE.read_text(encoding='utf-8'))
2947
+ except Exception:
2948
+ return projects
2949
+ if _backfill_project_profiles_if_needed(projects):
2950
+ try:
2951
+ save_projects(projects)
2952
+ _projects_migrated = True
2953
+ except Exception:
2954
+ logger.debug("Failed to persist project profile backfill")
2955
+ # Leave _projects_migrated False so a future call retries.
2956
+ else:
2957
+ # Nothing to migrate — already tagged.
2958
+ _projects_migrated = True
2959
+ return projects
2960
+
2961
+ def save_projects(projects) -> None:
2962
+ """Write project list to disk."""
2963
+ PROJECTS_FILE.write_text(json.dumps(projects, ensure_ascii=False, indent=2), encoding='utf-8')
2964
+
2965
+
2966
+ CRON_PROJECT_NAME = 'Cron Jobs'
2967
+ _CRON_PROJECT_LOCK = threading.Lock()
2968
+
2969
+
2970
+ def ensure_cron_project() -> str:
2971
+ """Return the project_id of the system "Cron Jobs" project for the active profile.
2972
+
2973
+ Each profile gets its own "Cron Jobs" project so cron-spawned sessions in
2974
+ profile A don't surface under the cron chip of profile B (#1614). Lookup
2975
+ keys on (name, profile) — a legacy untagged "Cron Jobs" project (no
2976
+ `profile` field) is treated as belonging to whichever profile first calls
2977
+ this in a given install, then re-tagged.
2978
+
2979
+ Thread-safe and idempotent. Returns a 12-char hex project_id string.
2980
+ """
2981
+ from api.profiles import get_active_profile_name, _is_root_profile
2982
+
2983
+ active = get_active_profile_name() or 'default'
2984
+ with _CRON_PROJECT_LOCK:
2985
+ projects = load_projects()
2986
+ # Look for an existing per-profile cron project. Match either an exact
2987
+ # profile tag or the renamed-root alias (a 'default'-tagged project
2988
+ # under a renamed root, or a renamed-root-tagged project under
2989
+ # 'default'). _is_root_profile is the canonical alias check.
2990
+ for p in projects:
2991
+ if p.get('name') != CRON_PROJECT_NAME:
2992
+ continue
2993
+ row_profile = p.get('profile')
2994
+ if row_profile == active:
2995
+ return p['project_id']
2996
+ if _is_root_profile(row_profile or 'default') and _is_root_profile(active):
2997
+ return p['project_id']
2998
+ # Reuse a legacy untagged cron project — back-tag it to the active profile.
2999
+ for p in projects:
3000
+ if p.get('name') == CRON_PROJECT_NAME and not p.get('profile'):
3001
+ p['profile'] = active
3002
+ save_projects(projects)
3003
+ return p['project_id']
3004
+ # Otherwise create a new one tagged with the active profile.
3005
+ project_id = uuid.uuid4().hex[:12]
3006
+ projects.append({
3007
+ 'project_id': project_id,
3008
+ 'name': CRON_PROJECT_NAME,
3009
+ 'color': '#6366f1',
3010
+ 'profile': active,
3011
+ 'created_at': time.time(),
3012
+ })
3013
+ save_projects(projects)
3014
+ return project_id
3015
+
3016
+
3017
+ def is_cron_session(session_id: str, source_tag: str = None) -> bool:
3018
+ """Return True if a session originates from a cron job."""
3019
+ if source_tag == 'cron':
3020
+ return True
3021
+ sid = str(session_id or '')
3022
+ return sid.startswith('cron_')
3023
+
3024
+
3025
+
3026
+ def import_cli_session(
3027
+ session_id: str,
3028
+ title: str,
3029
+ messages,
3030
+ model: str='unknown',
3031
+ profile=None,
3032
+ created_at=None,
3033
+ updated_at=None,
3034
+ parent_session_id=None,
3035
+ ):
3036
+ """Create a new WebUI session populated with CLI/agent messages.
3037
+
3038
+ Preserve parent_session_id from state.db so imported continuation segments
3039
+ keep their lineage in the WebUI store and sidebar instead of reappearing as
3040
+ detached orphan chats.
3041
+ """
3042
+ s = Session(
3043
+ session_id=session_id,
3044
+ title=title,
3045
+ workspace=get_last_workspace(),
3046
+ model=model,
3047
+ messages=messages,
3048
+ profile=profile,
3049
+ created_at=created_at,
3050
+ updated_at=updated_at,
3051
+ parent_session_id=parent_session_id,
3052
+ )
3053
+ s.save(touch_updated_at=False)
3054
+ return s
3055
+
3056
+
3057
+ # ── CLI session bridge ──────────────────────────────────────────────────────
3058
+
3059
+ CLAUDE_CODE_SOURCE = 'claude_code'
3060
+ CLAUDE_CODE_SOURCE_LABEL = 'Claude Code'
3061
+ CLAUDE_CODE_MAX_FILES = 200
3062
+ CLAUDE_CODE_MAX_FILE_BYTES = 10 * 1024 * 1024
3063
+ CLAUDE_CODE_MAX_MESSAGES_PER_FILE = 1000
3064
+ CLAUDE_CODE_MAX_CONTENT_CHARS = 200_000
3065
+
3066
+
3067
+ def _default_claude_code_projects_dir() -> Path | None:
3068
+ """Resolve the Claude Code projects directory without touching real home in tests."""
3069
+ override = os.getenv('HERMES_WEBUI_CLAUDE_PROJECTS_DIR')
3070
+ if override:
3071
+ return Path(override).expanduser()
3072
+ if os.getenv('HERMES_WEBUI_TEST_STATE_DIR'):
3073
+ return None
3074
+ return Path.home() / '.claude' / 'projects'
3075
+
3076
+
3077
+ def _claude_code_session_id(path: Path) -> str:
3078
+ digest = hashlib.sha256(str(path.expanduser().resolve()).encode('utf-8')).hexdigest()[:24]
3079
+ return f'{CLAUDE_CODE_SOURCE}_{digest}'
3080
+
3081
+
3082
+ def _parse_claude_code_timestamp(value):
3083
+ if value is None:
3084
+ return None
3085
+ if isinstance(value, (int, float)):
3086
+ return float(value)
3087
+ text = str(value).strip()
3088
+ if not text:
3089
+ return None
3090
+ try:
3091
+ return float(text)
3092
+ except ValueError:
3093
+ pass
3094
+ try:
3095
+ return datetime.datetime.fromisoformat(text.replace('Z', '+00:00')).timestamp()
3096
+ except Exception:
3097
+ return None
3098
+
3099
+
3100
+ def _extract_claude_code_text(content) -> str:
3101
+ if content is None:
3102
+ return ''
3103
+ if isinstance(content, str):
3104
+ return content[:CLAUDE_CODE_MAX_CONTENT_CHARS]
3105
+ if isinstance(content, list):
3106
+ parts = []
3107
+ used = 0
3108
+ for item in content:
3109
+ text = ''
3110
+ if isinstance(item, str):
3111
+ text = item
3112
+ elif isinstance(item, dict):
3113
+ text = item.get('text') or item.get('content') or ''
3114
+ if not text:
3115
+ continue
3116
+ text = str(text)
3117
+ remaining = CLAUDE_CODE_MAX_CONTENT_CHARS - used
3118
+ if remaining <= 0:
3119
+ break
3120
+ parts.append(text[:remaining])
3121
+ used += len(parts[-1])
3122
+ return '\n'.join(parts)
3123
+ if isinstance(content, dict):
3124
+ return _extract_claude_code_text(content.get('text') or content.get('content'))
3125
+ return str(content)[:CLAUDE_CODE_MAX_CONTENT_CHARS]
3126
+
3127
+
3128
+ def _parse_claude_code_jsonl(path: Path, *, max_messages: int = CLAUDE_CODE_MAX_MESSAGES_PER_FILE) -> tuple[list[dict], str | None, float | None, float | None]:
3129
+ messages: list[dict] = []
3130
+ summary_title = None
3131
+ first_ts = None
3132
+ last_ts = None
3133
+ try:
3134
+ with path.open('r', encoding='utf-8', errors='replace') as fh:
3135
+ for line in fh:
3136
+ if len(messages) >= max_messages:
3137
+ break
3138
+ line = line.strip()
3139
+ if not line:
3140
+ continue
3141
+ try:
3142
+ raw = json.loads(line)
3143
+ except Exception:
3144
+ continue
3145
+ if not isinstance(raw, dict):
3146
+ continue
3147
+ if not summary_title:
3148
+ summary = raw.get('summary') or raw.get('title')
3149
+ if isinstance(summary, str) and summary.strip():
3150
+ summary_title = ' '.join(summary.split())[:80]
3151
+ records = raw.get('messages') if isinstance(raw.get('messages'), list) else None
3152
+ if records is None:
3153
+ records = [raw.get('message') if isinstance(raw.get('message'), dict) else raw]
3154
+ for record in records:
3155
+ if len(messages) >= max_messages:
3156
+ break
3157
+ if not isinstance(record, dict):
3158
+ continue
3159
+ msg = record.get('message') if isinstance(record.get('message'), dict) else record
3160
+ role = str(msg.get('role') or record.get('role') or raw.get('role') or raw.get('type') or '').strip().lower()
3161
+ if role == 'human':
3162
+ role = 'user'
3163
+ if role not in {'user', 'assistant', 'system', 'tool'}:
3164
+ continue
3165
+ content = _extract_claude_code_text(msg.get('content') if 'content' in msg else record.get('content'))
3166
+ if not content.strip():
3167
+ continue
3168
+ ts = _parse_claude_code_timestamp(
3169
+ msg.get('timestamp')
3170
+ or record.get('timestamp')
3171
+ or raw.get('timestamp')
3172
+ or raw.get('created_at')
3173
+ )
3174
+ if ts is not None:
3175
+ first_ts = ts if first_ts is None else min(first_ts, ts)
3176
+ last_ts = ts if last_ts is None else max(last_ts, ts)
3177
+ item = {'role': role, 'content': content}
3178
+ if ts is not None:
3179
+ item['timestamp'] = ts
3180
+ messages.append(item)
3181
+ except Exception:
3182
+ return [], None, None, None
3183
+ return messages, summary_title, first_ts, last_ts
3184
+
3185
+
3186
+ def _iter_claude_code_jsonl_files(projects_dir: Path | str | None = None, *, max_files: int = CLAUDE_CODE_MAX_FILES, max_file_bytes: int = CLAUDE_CODE_MAX_FILE_BYTES):
3187
+ root = Path(projects_dir).expanduser() if projects_dir is not None else _default_claude_code_projects_dir()
3188
+ if root is None:
3189
+ return
3190
+ try:
3191
+ if root.is_symlink():
3192
+ return
3193
+ root = root.resolve(strict=False)
3194
+ if not root.exists() or not root.is_dir():
3195
+ return
3196
+ yielded = 0
3197
+ for project_dir in sorted(root.iterdir(), key=lambda p: p.name):
3198
+ if yielded >= max_files:
3199
+ return
3200
+ try:
3201
+ if project_dir.is_symlink() or not project_dir.is_dir():
3202
+ continue
3203
+ for path in sorted(project_dir.iterdir(), key=lambda p: p.name):
3204
+ if yielded >= max_files:
3205
+ return
3206
+ if path.is_symlink() or not path.is_file() or path.suffix.lower() != '.jsonl':
3207
+ continue
3208
+ try:
3209
+ if path.stat().st_size > max_file_bytes:
3210
+ continue
3211
+ except OSError:
3212
+ continue
3213
+ yielded += 1
3214
+ yield path
3215
+ except OSError:
3216
+ continue
3217
+ except OSError:
3218
+ return
3219
+
3220
+
3221
+ def _claude_code_title(messages: list[dict], summary_title: str | None) -> str:
3222
+ if summary_title:
3223
+ return summary_title
3224
+ for msg in messages:
3225
+ if msg.get('role') == 'user':
3226
+ text = ' '.join(str(msg.get('content') or '').split())
3227
+ if text:
3228
+ return text[:80]
3229
+ return 'Claude Code Session'
3230
+
3231
+
3232
+ def get_claude_code_sessions(projects_dir: Path | str | None = None, *, max_files: int = CLAUDE_CODE_MAX_FILES, max_file_bytes: int = CLAUDE_CODE_MAX_FILE_BYTES) -> list:
3233
+ """Read Claude Code JSONL sessions as read-only external-agent rows.
3234
+
3235
+ The bridge is additive and defensive: it skips symlinks, oversized files,
3236
+ malformed lines, and per-file errors rather than crashing WebUI session
3237
+ listing. Tests pass ``projects_dir`` fixtures so Michael's real ~/.claude is
3238
+ never read during test runs.
3239
+ """
3240
+ sessions = []
3241
+ for path in _iter_claude_code_jsonl_files(projects_dir, max_files=max_files, max_file_bytes=max_file_bytes) or []:
3242
+ messages, summary_title, first_ts, last_ts = _parse_claude_code_jsonl(path)
3243
+ if not messages:
3244
+ continue
3245
+ sid = _claude_code_session_id(path)
3246
+ sessions.append({
3247
+ 'session_id': sid,
3248
+ 'title': _claude_code_title(messages, summary_title),
3249
+ 'workspace': str(get_last_workspace()),
3250
+ 'model': 'claude-code',
3251
+ 'message_count': len(messages),
3252
+ 'created_at': first_ts or last_ts or path.stat().st_mtime,
3253
+ 'updated_at': last_ts or first_ts or path.stat().st_mtime,
3254
+ 'last_message_at': last_ts or first_ts or path.stat().st_mtime,
3255
+ 'pinned': False,
3256
+ 'archived': False,
3257
+ 'project_id': None,
3258
+ 'profile': None,
3259
+ 'source_tag': CLAUDE_CODE_SOURCE,
3260
+ 'raw_source': CLAUDE_CODE_SOURCE,
3261
+ 'session_source': 'external_agent',
3262
+ 'source_label': CLAUDE_CODE_SOURCE_LABEL,
3263
+ 'is_cli_session': True,
3264
+ 'read_only': True,
3265
+ })
3266
+ sessions.sort(key=lambda s: s.get('last_message_at') or s.get('updated_at') or 0, reverse=True)
3267
+ return sessions
3268
+
3269
+
3270
+ def get_claude_code_session_messages(sid, projects_dir: Path | str | None = None) -> list:
3271
+ """Return messages for one read-only Claude Code JSONL session."""
3272
+ sid = str(sid or '')
3273
+ if not sid.startswith(f'{CLAUDE_CODE_SOURCE}_'):
3274
+ return []
3275
+ for path in _iter_claude_code_jsonl_files(projects_dir) or []:
3276
+ if _claude_code_session_id(path) != sid:
3277
+ continue
3278
+ messages, _summary_title, _first_ts, _last_ts = _parse_claude_code_jsonl(path)
3279
+ return messages
3280
+ return []
3281
+
3282
+
3283
+ def clear_cli_sessions_cache() -> None:
3284
+ with _CLI_SESSIONS_CACHE_LOCK:
3285
+ _CLI_SESSIONS_CACHE.clear()
3286
+
3287
+
3288
+ def _copy_cli_sessions(sessions: list) -> list:
3289
+ return copy.deepcopy(sessions)
3290
+
3291
+
3292
+ def _cli_sessions_cache_ttl_seconds() -> float:
3293
+ try:
3294
+ return max(0.0, float(_CLI_SESSIONS_CACHE_TTL_SECONDS))
3295
+ except (TypeError, ValueError):
3296
+ return 5.0
3297
+
3298
+
3299
+ def _path_cache_key(path) -> str | None:
3300
+ if path is None:
3301
+ return None
3302
+ try:
3303
+ return str(Path(path).expanduser().resolve(strict=False))
3304
+ except Exception:
3305
+ return str(path)
3306
+
3307
+
3308
+ def _path_stat_cache_key(path):
3309
+ if path is None:
3310
+ return None
3311
+ try:
3312
+ st = Path(path).stat()
3313
+ return (st.st_mtime_ns, st.st_size)
3314
+ except OSError:
3315
+ return None
3316
+
3317
+
3318
+ def _sqlite_file_stat_cache_key(db_path: Path):
3319
+ """Return a cheap invalidation key for a SQLite DB and WAL sidecars."""
3320
+ return (
3321
+ _path_stat_cache_key(db_path),
3322
+ _path_stat_cache_key(Path(f"{db_path}-wal")),
3323
+ _path_stat_cache_key(Path(f"{db_path}-shm")),
3324
+ )
3325
+
3326
+
3327
+ def _resolve_cli_sessions_context():
3328
+ # Use the active WebUI profile's HERMES_HOME to find state.db.
3329
+ # The active profile is determined by what the user has selected in the UI
3330
+ # (stored in the server's runtime config). This means:
3331
+ # - default profile -> ~/.hermes/state.db
3332
+ # - named profile X -> ~/.hermes/profiles/X/state.db
3333
+ # We resolve the active profile's home directory rather than just using
3334
+ # HERMES_HOME (which is the server's launch profile, not necessarily the
3335
+ # active one after a profile switch).
3336
+ try:
3337
+ from api.profiles import get_active_hermes_home
3338
+ hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
3339
+ except Exception:
3340
+ hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve()
3341
+
3342
+ try:
3343
+ from api.profiles import get_active_profile_name
3344
+ cli_profile = get_active_profile_name()
3345
+ except Exception:
3346
+ cli_profile = None
3347
+
3348
+ db_path = hermes_home / 'state.db'
3349
+ projects_dir = _default_claude_code_projects_dir()
3350
+ cache_key = (
3351
+ str(hermes_home),
3352
+ str(cli_profile or ''),
3353
+ str(db_path),
3354
+ _sqlite_file_stat_cache_key(db_path),
3355
+ _path_cache_key(projects_dir),
3356
+ _path_stat_cache_key(projects_dir),
3357
+ _path_stat_cache_key(SESSION_INDEX_FILE),
3358
+ )
3359
+ return hermes_home, db_path, cli_profile, cache_key
3360
+
3361
+
3362
+ def _load_cli_sessions_uncached(hermes_home: Path, db_path: Path, _cli_profile) -> list:
3363
+ cli_sessions = []
3364
+ try:
3365
+ cli_sessions.extend(get_claude_code_sessions())
3366
+ except Exception:
3367
+ logger.debug("Claude Code session scan failed", exc_info=True)
3368
+
3369
+ if not db_path.exists():
3370
+ return cli_sessions
3371
+
3372
+ # Memoize the cron project ID for this scan so we don't pay a lock-acquire +
3373
+ # disk-read of projects.json per cron session in the loop below.
3374
+ # Resolved lazily on the first cron session we encounter.
3375
+ _cron_pid_cache = [None] # list-as-cell so the closure can mutate
3376
+ def _cron_pid():
3377
+ if _cron_pid_cache[0] is None:
3378
+ _cron_pid_cache[0] = ensure_cron_project()
3379
+ return _cron_pid_cache[0]
3380
+
3381
+ for row in read_importable_agent_session_rows(
3382
+ db_path,
3383
+ limit=CLI_VISIBLE_SESSION_LIMIT,
3384
+ log=logger,
3385
+ exclude_sources=None,
3386
+ ):
3387
+ sid = row['id']
3388
+ raw_ts = row['last_activity'] or row['started_at']
3389
+ # Prefer the CLI session's own profile from the DB; fall back to
3390
+ # the active CLI profile so sidebar filtering works either way.
3391
+ profile = _cli_profile # CLI DB has no profile column; use active profile
3392
+
3393
+ _source = row['source'] or 'cli'
3394
+ _title = row['title']
3395
+ if not _title and _source == 'cron' and sid.startswith('cron_'):
3396
+ # Extract job_id from session ID (cron_{job_id}_{timestamp})
3397
+ # and look up the human-friendly job name from jobs.json
3398
+ parts = sid.split('_')
3399
+ if len(parts) >= 3:
3400
+ _job_id = parts[1]
3401
+ try:
3402
+ _jobs_path = hermes_home / 'cron' / 'jobs.json'
3403
+ if _jobs_path.exists():
3404
+ import json as _json
3405
+ _jobs_data = _json.loads(_jobs_path.read_text())
3406
+ for _j in _jobs_data.get('jobs', []):
3407
+ if _j.get('id') == _job_id:
3408
+ _title = _j.get('name') or _title
3409
+ break
3410
+ except Exception:
3411
+ pass # degrade gracefully
3412
+ # If a WebUI JSON file exists for this session (e.g. previously
3413
+ # imported or renamed in the sidebar), prefer its title over the
3414
+ # state.db title. This fixes rename-not-persisting for CLI sessions
3415
+ # after compression chain extension (#1486).
3416
+ try:
3417
+ _webui_meta = Session.load_metadata_only(sid)
3418
+ if _webui_meta and getattr(_webui_meta, 'title', None):
3419
+ _title = _webui_meta.title
3420
+ except Exception:
3421
+ pass
3422
+ _display_title = _title or f'{_source.title()} Session'
3423
+ cli_sessions.append({
3424
+ 'session_id': sid,
3425
+ 'title': _display_title,
3426
+ 'workspace': str(get_last_workspace()),
3427
+ 'model': row['model'] or None,
3428
+ 'message_count': row['message_count'] or row['actual_message_count'] or 0,
3429
+ 'created_at': row['started_at'],
3430
+ 'updated_at': raw_ts,
3431
+ 'pinned': False,
3432
+ 'archived': False,
3433
+ 'project_id': _cron_pid() if is_cron_session(sid, _source) else None,
3434
+ 'profile': profile,
3435
+ 'source_tag': _source,
3436
+ 'raw_source': row.get('raw_source'),
3437
+ 'user_id': row.get('user_id'),
3438
+ 'chat_id': row.get('chat_id') or row.get('origin_chat_id'),
3439
+ 'chat_type': row.get('chat_type'),
3440
+ 'thread_id': row.get('thread_id'),
3441
+ 'session_key': row.get('session_key'),
3442
+ 'platform': row.get('platform'),
3443
+ 'session_source': row.get('session_source'),
3444
+ 'source_label': row.get('source_label'),
3445
+ 'parent_session_id': row.get('parent_session_id'),
3446
+ 'parent_title': row.get('parent_title'),
3447
+ 'parent_source': row.get('parent_source'),
3448
+ 'relationship_type': row.get('relationship_type'),
3449
+ '_parent_lineage_root_id': row.get('_parent_lineage_root_id'),
3450
+ 'end_reason': row.get('end_reason'),
3451
+ 'actual_message_count': row.get('actual_message_count'),
3452
+ 'user_message_count': row.get('actual_user_message_count'),
3453
+ '_lineage_root_id': row.get('_lineage_root_id'),
3454
+ '_lineage_tip_id': row.get('_lineage_tip_id'),
3455
+ '_compression_segment_count': row.get('_compression_segment_count'),
3456
+ 'is_cli_session': True,
3457
+ })
3458
+
3459
+ # --- Second pass: fetch cron sessions that may have been squeezed out
3460
+ # of the default window by more-recent non-cron sessions.
3461
+ # The normal sidebar query caps at CLI_VISIBLE_SESSION_LIMIT (20) rows;
3462
+ # once 20 newer sessions exist, older cron runs vanish from the payload
3463
+ # before _include_project_hidden_background_sidebar_sessions can rescue
3464
+ # them (#3172). A separate, higher-capped cron-only pass ensures they
3465
+ # stay addressable under their project chip.
3466
+ existing_sids = {s['session_id'] for s in cli_sessions}
3467
+ try:
3468
+ cron_excluded = tuple(
3469
+ s for s in ('webui', 'claude-code') # keep only 'cron'
3470
+ )
3471
+ for row in read_importable_agent_session_rows(
3472
+ db_path,
3473
+ limit=CRON_PROJECT_CHIP_LIMIT,
3474
+ log=logger,
3475
+ exclude_sources=cron_excluded,
3476
+ ):
3477
+ sid = row['id']
3478
+ if sid in existing_sids:
3479
+ continue
3480
+ _source = row['source'] or 'cli'
3481
+ if _source != 'cron':
3482
+ continue
3483
+ raw_ts = row['last_activity'] or row['started_at']
3484
+ _title = row['title']
3485
+ if not _title and sid.startswith('cron_'):
3486
+ parts = sid.split('_')
3487
+ if len(parts) >= 3:
3488
+ _job_id = parts[1]
3489
+ try:
3490
+ _jobs_path = hermes_home / 'cron' / 'jobs.json'
3491
+ if _jobs_path.exists():
3492
+ import json as _json
3493
+ _jobs_data = _json.loads(_jobs_path.read_text())
3494
+ for _j in _jobs_data.get('jobs', []):
3495
+ if _j.get('id') == _job_id:
3496
+ _title = _j.get('name') or _title
3497
+ break
3498
+ except Exception:
3499
+ pass
3500
+ try:
3501
+ _webui_meta = Session.load_metadata_only(sid)
3502
+ if _webui_meta and getattr(_webui_meta, 'title', None):
3503
+ _title = _webui_meta.title
3504
+ except Exception:
3505
+ pass
3506
+ _display_title = _title or 'Cron Session'
3507
+ cli_sessions.append({
3508
+ 'session_id': sid,
3509
+ 'title': _display_title,
3510
+ 'workspace': str(get_last_workspace()),
3511
+ 'model': row['model'] or None,
3512
+ 'message_count': row['message_count'] or row['actual_message_count'] or 0,
3513
+ 'created_at': row['started_at'],
3514
+ 'updated_at': raw_ts,
3515
+ 'pinned': False,
3516
+ 'archived': False,
3517
+ 'project_id': _cron_pid(),
3518
+ 'profile': _cli_profile,
3519
+ 'source_tag': 'cron',
3520
+ 'raw_source': row.get('raw_source'),
3521
+ 'user_id': row.get('user_id'),
3522
+ 'chat_id': row.get('chat_id') or row.get('origin_chat_id'),
3523
+ 'chat_type': row.get('chat_type'),
3524
+ 'thread_id': row.get('thread_id'),
3525
+ 'session_key': row.get('session_key'),
3526
+ 'platform': row.get('platform'),
3527
+ 'session_source': row.get('session_source'),
3528
+ 'source_label': row.get('source_label'),
3529
+ 'parent_session_id': row.get('parent_session_id'),
3530
+ 'parent_title': row.get('parent_title'),
3531
+ 'parent_source': row.get('parent_source'),
3532
+ 'relationship_type': row.get('relationship_type'),
3533
+ '_parent_lineage_root_id': row.get('_parent_lineage_root_id'),
3534
+ 'end_reason': row.get('end_reason'),
3535
+ 'actual_message_count': row.get('actual_message_count'),
3536
+ 'user_message_count': row.get('actual_user_message_count'),
3537
+ '_lineage_root_id': row.get('_lineage_root_id'),
3538
+ '_lineage_tip_id': row.get('_lineage_tip_id'),
3539
+ '_compression_segment_count': row.get('_compression_segment_count'),
3540
+ 'is_cli_session': True,
3541
+ })
3542
+ existing_sids.add(sid)
3543
+ except Exception:
3544
+ logger.debug("Cron project-chip second pass failed", exc_info=True)
3545
+
3546
+ return cli_sessions
3547
+
3548
+
3549
+ def get_cli_sessions() -> list:
3550
+ """Read CLI sessions from the agent's SQLite store and return them as
3551
+ dicts in a format the WebUI sidebar can render alongside local sessions.
3552
+
3553
+ Returns empty list if the SQLite DB is missing or any error occurs -- the
3554
+ bridge is purely additive and never crashes the WebUI.
3555
+ """
3556
+ hermes_home, db_path, cli_profile, cache_key = _resolve_cli_sessions_context()
3557
+ ttl = _cli_sessions_cache_ttl_seconds()
3558
+ now = time.monotonic()
3559
+
3560
+ if ttl > 0:
3561
+ with _CLI_SESSIONS_CACHE_LOCK:
3562
+ cached = _CLI_SESSIONS_CACHE.get(cache_key)
3563
+ if cached:
3564
+ expires_at, cached_sessions = cached
3565
+ if expires_at > now:
3566
+ return _copy_cli_sessions(cached_sessions)
3567
+ _CLI_SESSIONS_CACHE.pop(cache_key, None)
3568
+ try:
3569
+ sessions = _load_cli_sessions_uncached(hermes_home, db_path, cli_profile)
3570
+ except Exception as _cli_err:
3571
+ logger.warning(
3572
+ "get_cli_sessions() failed — check state.db schema or path (%s): %s",
3573
+ db_path, _cli_err,
3574
+ )
3575
+ return []
3576
+ _CLI_SESSIONS_CACHE[cache_key] = (
3577
+ time.monotonic() + ttl,
3578
+ _copy_cli_sessions(sessions),
3579
+ )
3580
+ return _copy_cli_sessions(sessions)
3581
+
3582
+ try:
3583
+ return _load_cli_sessions_uncached(hermes_home, db_path, cli_profile)
3584
+ except Exception as _cli_err:
3585
+ logger.warning(
3586
+ "get_cli_sessions() failed — check state.db schema or path (%s): %s",
3587
+ db_path, _cli_err,
3588
+ )
3589
+ return []
3590
+
3591
+
3592
+ def _json_loads_if_string(value):
3593
+ if not isinstance(value, str):
3594
+ return value
3595
+ text = value.strip()
3596
+ if not text:
3597
+ return None
3598
+ try:
3599
+ return json.loads(text)
3600
+ except Exception:
3601
+ return value
3602
+
3603
+
3604
+ def get_state_db_session_messages(sid, *, stitch_continuations: bool = False, profile=None) -> list:
3605
+ """Read messages for a Hermes session from state.db.
3606
+
3607
+ When *profile* is supplied, reads from that profile's state.db; otherwise
3608
+ falls back to the active profile's state.db. This generic reader works for
3609
+ any session source, including WebUI-origin sessions that were later updated
3610
+ through another Hermes surface such as the Gateway API Server. When
3611
+ ``stitch_continuations`` is true it preserves the historical CLI/external-agent
3612
+ behavior of walking compatible compression/close parent segments before reading
3613
+ messages.
3614
+ """
3615
+ try:
3616
+ import sqlite3
3617
+ except ImportError:
3618
+ return []
3619
+
3620
+ if isinstance(profile, str) and profile:
3621
+ db_path = _get_profile_home(profile) / 'state.db'
3622
+ if not db_path.exists():
3623
+ db_path = _active_state_db_path()
3624
+ else:
3625
+ db_path = _active_state_db_path()
3626
+ if not db_path.exists():
3627
+ return []
3628
+
3629
+ try:
3630
+ with closing(sqlite3.connect(str(db_path))) as conn:
3631
+ conn.row_factory = sqlite3.Row
3632
+ cur = conn.cursor()
3633
+ cur.execute("PRAGMA table_info(messages)")
3634
+ available = {str(row['name']) for row in cur.fetchall()}
3635
+ required = {'role', 'content', 'timestamp'}
3636
+ if not required.issubset(available):
3637
+ return []
3638
+ optional = [
3639
+ 'tool_call_id',
3640
+ 'tool_calls',
3641
+ 'tool_name',
3642
+ 'reasoning',
3643
+ 'reasoning_details',
3644
+ 'codex_reasoning_items',
3645
+ 'reasoning_content',
3646
+ 'codex_message_items',
3647
+ ]
3648
+ id_col = ['id'] if 'id' in available else []
3649
+ selected = id_col + ['role', 'content', 'timestamp'] + [c for c in optional if c in available]
3650
+
3651
+ session_chain = [str(sid)]
3652
+ if stitch_continuations:
3653
+ cur.execute("PRAGMA table_info(sessions)")
3654
+ session_cols = {str(row['name']) for row in cur.fetchall()}
3655
+ if {'parent_session_id', 'end_reason', 'started_at', 'source'}.issubset(session_cols):
3656
+ cur.execute(
3657
+ """
3658
+ SELECT id, source, started_at, parent_session_id, ended_at, end_reason
3659
+ FROM sessions
3660
+ WHERE id = ?
3661
+ """,
3662
+ (sid,),
3663
+ )
3664
+ rows_by_id = {}
3665
+ row = cur.fetchone()
3666
+ if row:
3667
+ rows_by_id[str(row['id'])] = dict(row)
3668
+ current_id = str(row['id'])
3669
+ seen = {current_id}
3670
+ for _ in range(20):
3671
+ current = rows_by_id.get(current_id)
3672
+ parent_id = current.get('parent_session_id') if current else None
3673
+ if not parent_id or parent_id in seen:
3674
+ break
3675
+ cur.execute(
3676
+ """
3677
+ SELECT id, source, started_at, parent_session_id, ended_at, end_reason
3678
+ FROM sessions
3679
+ WHERE id = ?
3680
+ """,
3681
+ (parent_id,),
3682
+ )
3683
+ parent_row = cur.fetchone()
3684
+ if not parent_row:
3685
+ break
3686
+ parent_dict = dict(parent_row)
3687
+ rows_by_id[str(parent_row['id'])] = parent_dict
3688
+ if not _is_continuation_session(parent_dict, current):
3689
+ break
3690
+ session_chain.insert(0, str(parent_row['id']))
3691
+ current_id = str(parent_row['id'])
3692
+ seen.add(current_id)
3693
+
3694
+ placeholders = ', '.join('?' for _ in session_chain)
3695
+ cur.execute(f"""
3696
+ SELECT {', '.join(selected)}, session_id
3697
+ FROM messages
3698
+ WHERE session_id IN ({placeholders})
3699
+ ORDER BY timestamp ASC, id ASC
3700
+ """, session_chain)
3701
+ msgs = []
3702
+ for row in cur.fetchall():
3703
+ msg = {
3704
+ 'role': row['role'],
3705
+ 'content': row['content'],
3706
+ 'timestamp': row['timestamp'],
3707
+ }
3708
+ for col in optional:
3709
+ if col not in row.keys():
3710
+ continue
3711
+ value = row[col]
3712
+ if value in (None, ''):
3713
+ continue
3714
+ if col in {'tool_calls', 'reasoning_details', 'codex_reasoning_items', 'codex_message_items'}:
3715
+ value = _json_loads_if_string(value)
3716
+ msg[col] = value
3717
+ if msg.get('role') == 'tool' and msg.get('tool_name') and not msg.get('name'):
3718
+ msg['name'] = msg['tool_name']
3719
+ msgs.append(msg)
3720
+ except Exception:
3721
+ return []
3722
+ return msgs
3723
+
3724
+
3725
+ def get_state_db_session_summary(sid, *, profile=None) -> dict:
3726
+ """Return a cheap message count/timestamp summary for one state.db session."""
3727
+ try:
3728
+ import sqlite3
3729
+ except ImportError:
3730
+ return {"message_count": 0, "last_message_at": 0.0}
3731
+
3732
+ if isinstance(profile, str) and profile:
3733
+ db_path = _get_profile_home(profile) / 'state.db'
3734
+ if not db_path.exists():
3735
+ db_path = _active_state_db_path()
3736
+ else:
3737
+ db_path = _active_state_db_path()
3738
+ if not sid or not db_path.exists():
3739
+ return {"message_count": 0, "last_message_at": 0.0}
3740
+
3741
+ try:
3742
+ with closing(sqlite3.connect(str(db_path))) as conn:
3743
+ conn.row_factory = sqlite3.Row
3744
+ cur = conn.cursor()
3745
+ cur.execute("PRAGMA table_info(messages)")
3746
+ available = {str(row['name']) for row in cur.fetchall()}
3747
+ if 'session_id' not in available:
3748
+ return {"message_count": 0, "last_message_at": 0.0}
3749
+ if 'timestamp' in available:
3750
+ cur.execute(
3751
+ "SELECT COUNT(*) AS message_count, MAX(timestamp) AS last_message_at "
3752
+ "FROM messages WHERE session_id = ?",
3753
+ (str(sid),),
3754
+ )
3755
+ row = cur.fetchone()
3756
+ if not row:
3757
+ return {"message_count": 0, "last_message_at": 0.0}
3758
+ return {
3759
+ "message_count": max(0, int(row["message_count"] or 0)),
3760
+ "last_message_at": float(row["last_message_at"] or 0) if row["last_message_at"] is not None else 0.0,
3761
+ }
3762
+ cur.execute("SELECT COUNT(*) AS message_count FROM messages WHERE session_id = ?", (str(sid),))
3763
+ row = cur.fetchone()
3764
+ return {
3765
+ "message_count": max(0, int(row["message_count"] or 0)) if row else 0,
3766
+ "last_message_at": 0.0,
3767
+ }
3768
+ except Exception:
3769
+ return {"message_count": 0, "last_message_at": 0.0}
3770
+
3771
+
3772
+ def _normalized_message_timestamp_for_key(value):
3773
+ if value is None or value == "":
3774
+ return ""
3775
+ try:
3776
+ timestamp = float(value)
3777
+ except (TypeError, ValueError):
3778
+ return str(value)
3779
+ # Truncate to second-level granularity so that sub-second drift between
3780
+ # the sidecar JSON write and the state.db created_at write does not cause
3781
+ # the legacy dedup key to differ for the same logical message.
3782
+ return str(int(timestamp))
3783
+
3784
+
3785
+ def _message_timestamp_as_float(msg):
3786
+ if not isinstance(msg, dict):
3787
+ return None
3788
+ value = msg.get("timestamp")
3789
+ if value is None or value == "":
3790
+ return None
3791
+ try:
3792
+ return float(value)
3793
+ except (TypeError, ValueError):
3794
+ return None
3795
+
3796
+
3797
+ def _session_message_merge_key(msg: dict):
3798
+ if not isinstance(msg, dict):
3799
+ return ("non_dict", repr(msg))
3800
+ message_identity = msg.get("id") or msg.get("message_id")
3801
+ if message_identity:
3802
+ return ("message_id", str(message_identity))
3803
+ return (
3804
+ "legacy",
3805
+ str(msg.get("role") or ""),
3806
+ str(msg.get("content") or ""),
3807
+ _normalized_message_timestamp_for_key(msg.get("timestamp")),
3808
+ str(msg.get("tool_call_id") or ""),
3809
+ str(msg.get("tool_name") or msg.get("name") or ""),
3810
+ )
3811
+
3812
+
3813
+ def _normalized_session_message_content(msg: dict) -> str:
3814
+ if not isinstance(msg, dict):
3815
+ return repr(msg)
3816
+ return " ".join(str(msg.get("content") or "").split())
3817
+
3818
+
3819
+ def _loose_session_message_content(value: str) -> str:
3820
+ return " ".join(re.findall(r"\w+", str(value or "").casefold()))
3821
+
3822
+
3823
+ def _session_message_content_key(msg: dict):
3824
+ if not isinstance(msg, dict):
3825
+ return ("non_dict", repr(msg))
3826
+ return (
3827
+ str(msg.get("role") or ""),
3828
+ _normalized_session_message_content(msg),
3829
+ str(msg.get("tool_call_id") or ""),
3830
+ str(msg.get("tool_name") or msg.get("name") or ""),
3831
+ )
3832
+
3833
+
3834
+ def _session_message_visible_key(msg: dict):
3835
+ if not isinstance(msg, dict):
3836
+ return ("non_dict", repr(msg))
3837
+ return (
3838
+ str(msg.get("role") or ""),
3839
+ _normalized_session_message_content(msg),
3840
+ )
3841
+
3842
+
3843
+ def _build_visible_duplicate_lookup(visible_keys: set[tuple]) -> dict:
3844
+ by_role = {}
3845
+ loose_by_key = {}
3846
+ for key in visible_keys:
3847
+ try:
3848
+ role, content = key
3849
+ except (TypeError, ValueError):
3850
+ continue
3851
+ if not content:
3852
+ continue
3853
+ by_role.setdefault(role, []).append(key)
3854
+ loose_by_key[key] = _loose_session_message_content(content)
3855
+ return {"keys": visible_keys, "by_role": by_role, "loose_by_key": loose_by_key}
3856
+
3857
+
3858
+ def _matching_visible_duplicate(visible_key: tuple, visible_keys: set[tuple], lookup: dict | None = None):
3859
+ if visible_key in visible_keys:
3860
+ return visible_key
3861
+ role, content = visible_key
3862
+ if not content:
3863
+ return None
3864
+ if lookup is None:
3865
+ lookup = _build_visible_duplicate_lookup(visible_keys)
3866
+ loose_content = None
3867
+ for existing_role, existing_content in lookup.get("by_role", {}).get(role, []):
3868
+ if role != existing_role or not existing_content:
3869
+ continue
3870
+ if content in existing_content or existing_content in content:
3871
+ return (existing_role, existing_content)
3872
+ if loose_content is None:
3873
+ loose_content = _loose_session_message_content(content)
3874
+ loose_existing = lookup.get("loose_by_key", {}).get((existing_role, existing_content), "")
3875
+ if loose_content and loose_existing and (
3876
+ loose_content in loose_existing or loose_existing in loose_content
3877
+ ):
3878
+ return (existing_role, existing_content)
3879
+ return None
3880
+
3881
+
3882
+ def _has_visible_duplicate(visible_key: tuple, visible_keys: set[tuple]) -> bool:
3883
+ return _matching_visible_duplicate(visible_key, visible_keys) is not None
3884
+
3885
+
3886
+ def state_db_delta_after_context(sidecar_context: list, state_messages: list) -> list:
3887
+ """Return only state.db rows that are newer than model-facing context.
3888
+
3889
+ `context_messages` is the authoritative model-facing prefix. state.db may
3890
+ contain a mirrored copy of that prefix with fresh timestamps, especially for
3891
+ LCM/continuation sessions. Appending the whole state transcript to a clean
3892
+ sidecar context replays old context into the next runtime prompt.
3893
+ """
3894
+ sidecar_context = list(sidecar_context or [])
3895
+ state_messages = list(state_messages or [])
3896
+ if not sidecar_context or not state_messages:
3897
+ return state_messages
3898
+
3899
+ sidecar_keys = [_session_message_content_key(m) for m in sidecar_context]
3900
+ state_keys = [_session_message_content_key(m) for m in state_messages]
3901
+ max_offset = min(len(sidecar_keys), len(state_keys))
3902
+ best_len = 0
3903
+ for offset in range(max_offset):
3904
+ length = 0
3905
+ while (
3906
+ offset + length < len(sidecar_keys)
3907
+ and length < len(state_keys)
3908
+ and sidecar_keys[offset + length] == state_keys[length]
3909
+ ):
3910
+ length += 1
3911
+ if length > best_len:
3912
+ best_len = length
3913
+
3914
+ # Require at least two mirrored rows. A single repeated short user message
3915
+ # is not enough evidence that state.db starts with a mirrored context
3916
+ # segment, but small recovered contexts often contain only a compact summary
3917
+ # and one follow-up row; those should still use the delta path.
3918
+ if best_len < 2:
3919
+ return state_messages
3920
+
3921
+ # Drop only rows that can be aligned with the remaining sidecar context in
3922
+ # order. This still tolerates stale state-only rows between mirrored context
3923
+ # rows, but once the sidecar context is exhausted every later state row is a
3924
+ # real delta, even if it repeats a short earlier message.
3925
+ sidecar_index = best_len
3926
+ state_index = best_len
3927
+ while sidecar_index < len(sidecar_keys) and state_index < len(state_keys):
3928
+ if state_keys[state_index] == sidecar_keys[sidecar_index]:
3929
+ sidecar_index += 1
3930
+ state_index += 1
3931
+ if sidecar_index == len(sidecar_keys):
3932
+ return state_messages[state_index:]
3933
+ return state_messages[best_len:]
3934
+
3935
+
3936
+ def merge_session_messages_append_only(
3937
+ sidecar_messages: list,
3938
+ state_messages: list,
3939
+ *,
3940
+ truncation_watermark=None,
3941
+ ) -> list:
3942
+ """Merge sidecar/context and state.db messages without deleting local rows."""
3943
+ sidecar_messages = list(sidecar_messages or [])
3944
+ state_messages = list(state_messages or [])
3945
+ watermark_timestamp = _message_timestamp_as_float({"timestamp": truncation_watermark})
3946
+ if not state_messages:
3947
+ return sidecar_messages
3948
+ if not sidecar_messages:
3949
+ if watermark_timestamp is not None:
3950
+ return [
3951
+ msg for msg in state_messages
3952
+ if (
3953
+ (timestamp := _message_timestamp_as_float(msg)) is not None
3954
+ and timestamp <= watermark_timestamp
3955
+ )
3956
+ ]
3957
+ return state_messages
3958
+
3959
+ merged_messages = []
3960
+ seen_message_keys = set()
3961
+ seen_content_keys = set()
3962
+ seen_visible_keys = set()
3963
+ sidecar_visible_sequence = []
3964
+ sidecar_visible_keys = set()
3965
+ sidecar_visible_counts = {}
3966
+ max_sidecar_timestamp = None
3967
+ for msg in sidecar_messages:
3968
+ timestamp = _message_timestamp_as_float(msg)
3969
+ if timestamp is not None:
3970
+ max_sidecar_timestamp = timestamp if max_sidecar_timestamp is None else max(max_sidecar_timestamp, timestamp)
3971
+ key = _session_message_merge_key(msg)
3972
+ seen_message_keys.add(key)
3973
+ seen_content_keys.add(_session_message_content_key(msg))
3974
+ visible_key = _session_message_visible_key(msg)
3975
+ seen_visible_keys.add(visible_key)
3976
+ sidecar_visible_keys.add(visible_key)
3977
+ sidecar_visible_counts[visible_key] = sidecar_visible_counts.get(visible_key, 0) + 1
3978
+ sidecar_visible_sequence.append(visible_key)
3979
+ merged_messages.append(msg)
3980
+ sidecar_visible_lookup = _build_visible_duplicate_lookup(sidecar_visible_keys)
3981
+ state_replay_idx = 0
3982
+ skipped_state_visible_counts = {}
3983
+ for msg in state_messages:
3984
+ timestamp = _message_timestamp_as_float(msg)
3985
+ key = _session_message_merge_key(msg)
3986
+ visible_key = _session_message_visible_key(msg)
3987
+ replays_sidecar_prefix = False
3988
+ if state_replay_idx < len(sidecar_visible_sequence):
3989
+ expected_visible_key = sidecar_visible_sequence[state_replay_idx]
3990
+ if visible_key == expected_visible_key or _has_visible_duplicate(
3991
+ visible_key, {expected_visible_key}
3992
+ ):
3993
+ replays_sidecar_prefix = True
3994
+ state_replay_idx += 1
3995
+ if replays_sidecar_prefix:
3996
+ matched_visible_key = _matching_visible_duplicate(
3997
+ visible_key,
3998
+ sidecar_visible_keys,
3999
+ sidecar_visible_lookup,
4000
+ )
4001
+ if matched_visible_key is not None:
4002
+ skipped_state_visible_counts[matched_visible_key] = (
4003
+ skipped_state_visible_counts.get(matched_visible_key, 0) + 1
4004
+ )
4005
+ continue
4006
+ if (
4007
+ watermark_timestamp is not None
4008
+ and timestamp is not None
4009
+ and timestamp > watermark_timestamp
4010
+ and key not in seen_message_keys
4011
+ ):
4012
+ continue
4013
+ if max_sidecar_timestamp is not None and timestamp is not None and timestamp <= max_sidecar_timestamp:
4014
+ if key in seen_message_keys:
4015
+ continue
4016
+ if not (isinstance(key, tuple) and key[:1] == ("message_id",)):
4017
+ continue
4018
+ if key in seen_message_keys:
4019
+ continue
4020
+ matched_visible_key = _matching_visible_duplicate(
4021
+ visible_key,
4022
+ sidecar_visible_keys,
4023
+ sidecar_visible_lookup,
4024
+ )
4025
+ if matched_visible_key is not None:
4026
+ skipped_count = skipped_state_visible_counts.get(matched_visible_key, 0)
4027
+ sidecar_count = sidecar_visible_counts.get(matched_visible_key, 0)
4028
+ if skipped_count < sidecar_count:
4029
+ skipped_state_visible_counts[matched_visible_key] = skipped_count + 1
4030
+ continue
4031
+ # State rows at or before the newest sidecar timestamp are normally
4032
+ # assumed to have already been observed by the sidecar. The <= gate
4033
+ # preserves sidecar-only ordering/metadata for equal timestamps and
4034
+ # prevents duplicate legacy rows when timestamp precision differs
4035
+ # between stores. State rows whose visible content already exists in
4036
+ # the sidecar are also skipped even if state.db restamped them later
4037
+ # during compaction/recovery; otherwise old prompts can be appended
4038
+ # after the assistant tail and make /api/session look like the answer
4039
+ # vanished. Explicit message ids are authoritative for distinct rows
4040
+ # only when their visible content is not already present.
4041
+ if (
4042
+ key[0] != "message_id"
4043
+ and max_sidecar_timestamp is not None
4044
+ and timestamp is not None
4045
+ and timestamp <= max_sidecar_timestamp
4046
+ ):
4047
+ continue
4048
+ if key[0] == "message_id":
4049
+ seen_message_keys.add(key)
4050
+ seen_content_keys.add(_session_message_content_key(msg))
4051
+ seen_visible_keys.add(visible_key)
4052
+ merged_messages.append(msg)
4053
+ return merged_messages
4054
+
4055
+
4056
+ def reconciled_state_db_messages_for_session(
4057
+ session, *, prefer_context: bool = False, state_messages: list | None = None
4058
+ ) -> list:
4059
+ """Return append-only messages reconciled with state.db for a WebUI session."""
4060
+ if session is None:
4061
+ return []
4062
+ local_messages = []
4063
+ if prefer_context:
4064
+ context_messages = getattr(session, 'context_messages', None)
4065
+ if isinstance(context_messages, list) and context_messages:
4066
+ local_messages = context_messages
4067
+ if not local_messages:
4068
+ local_messages = getattr(session, 'messages', None) or []
4069
+ if state_messages is None:
4070
+ state_messages = get_state_db_session_messages(getattr(session, 'session_id', None))
4071
+ if prefer_context and local_messages:
4072
+ state_messages = state_db_delta_after_context(local_messages, state_messages)
4073
+ return merge_session_messages_append_only(
4074
+ local_messages,
4075
+ state_messages,
4076
+ truncation_watermark=getattr(session, "truncation_watermark", None),
4077
+ )
4078
+
4079
+
4080
+ def get_cli_session_messages(sid) -> list:
4081
+ """Read messages for a single CLI/external-agent session.
4082
+
4083
+ Preserve tool-call/result and reasoning metadata from the agent state.db so
4084
+ CLI-origin transcripts render with the same tool cards as WebUI-native
4085
+ sessions. When the requested session is the tip of a compression/CLI-close
4086
+ continuation chain, return the stitched full transcript across all segments
4087
+ in chronological order. Returns empty list on any error.
4088
+ """
4089
+ if str(sid or '').startswith(f'{CLAUDE_CODE_SOURCE}_'):
4090
+ return get_claude_code_session_messages(sid)
4091
+ return get_state_db_session_messages(sid, stitch_continuations=True)
4092
+
4093
+
4094
+ def count_conversation_rounds(sid: str, since: float | None = None) -> int:
4095
+ """Count conversation rounds for a session from state.db.
4096
+
4097
+ A "round" = one user message + one agent reply. Consecutive user
4098
+ messages are merged into a single round so that multi-part questions
4099
+ don't inflate the count.
4100
+
4101
+ Parameters
4102
+ ----------
4103
+ sid : str
4104
+ Gateway session ID (e.g. ``20260430_151231_7209a0``).
4105
+ since : float | None
4106
+ Unix timestamp. If provided, only messages **after** this
4107
+ timestamp are counted.
4108
+
4109
+ Returns
4110
+ -------
4111
+ int
4112
+ Number of complete conversation rounds.
4113
+ """
4114
+ import os, sqlite3, datetime
4115
+
4116
+ try:
4117
+ from api.profiles import get_active_hermes_home
4118
+ hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
4119
+ except Exception:
4120
+ hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve()
4121
+ db_path = hermes_home / 'state.db'
4122
+ if not db_path.exists():
4123
+ return 0
4124
+
4125
+ try:
4126
+ with closing(sqlite3.connect(str(db_path))) as conn:
4127
+ conn.row_factory = sqlite3.Row
4128
+ cur = conn.cursor()
4129
+ cur.execute(
4130
+ "SELECT role, timestamp FROM messages WHERE session_id = ? ORDER BY timestamp ASC",
4131
+ (sid,),
4132
+ )
4133
+ rows = cur.fetchall()
4134
+ except Exception:
4135
+ return 0
4136
+
4137
+ rounds = 0
4138
+ seen_user = False # have we seen a user msg in the current round?
4139
+ seen_agent_after_user = False # have we seen an agent reply after that user msg?
4140
+
4141
+ for row in rows:
4142
+ role = (row['role'] or '').strip().lower()
4143
+ ts_raw = row['timestamp']
4144
+
4145
+ # Parse timestamp and apply the ``since`` filter.
4146
+ if since is not None and ts_raw is not None:
4147
+ try:
4148
+ if isinstance(ts_raw, (int, float)):
4149
+ ts_val = float(ts_raw)
4150
+ else:
4151
+ # ISO-8601 string
4152
+ ts_val = datetime.datetime.fromisoformat(
4153
+ str(ts_raw).replace('Z', '+00:00')
4154
+ ).timestamp()
4155
+ if ts_val <= since:
4156
+ continue
4157
+ except Exception:
4158
+ pass
4159
+
4160
+ if role == 'user':
4161
+ if seen_user and not seen_agent_after_user:
4162
+ # Consecutive user message — merge into current round.
4163
+ pass
4164
+ elif seen_user and seen_agent_after_user:
4165
+ # Previous round completed, starting a new one.
4166
+ rounds += 1
4167
+ seen_agent_after_user = False
4168
+ seen_user = True
4169
+ elif role == 'assistant':
4170
+ if seen_user:
4171
+ seen_agent_after_user = True
4172
+
4173
+ # Close the last round if it was completed.
4174
+ if seen_user and seen_agent_after_user:
4175
+ rounds += 1
4176
+
4177
+ return rounds
4178
+
4179
+
4180
+ CONVERSATION_ROUND_THRESHOLD = 10
4181
+
4182
+
4183
+ def delete_cli_session(sid) -> bool:
4184
+ """Delete a CLI session from state.db (messages + session row).
4185
+ Returns True if deleted, False if not found or error.
4186
+ """
4187
+ import os
4188
+ try:
4189
+ import sqlite3
4190
+ except ImportError:
4191
+ return False
4192
+
4193
+ try:
4194
+ from api.profiles import get_active_hermes_home
4195
+ hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
4196
+ except Exception:
4197
+ hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve()
4198
+ db_path = hermes_home / 'state.db'
4199
+ if not db_path.exists():
4200
+ return False
4201
+
4202
+ try:
4203
+ with closing(sqlite3.connect(str(db_path))) as conn:
4204
+ cur = conn.cursor()
4205
+ cur.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
4206
+ cur.execute("DELETE FROM sessions WHERE id = ?", (sid,))
4207
+ conn.commit()
4208
+ return cur.rowcount > 0
4209
+ except Exception:
4210
+ return False