jupyterlab-codex-sidebar 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.github/workflows/unit-tests.yml +27 -0
  3. package/.jupyterlab-playwright.log +0 -0
  4. package/README.md +83 -9
  5. package/docs/images/codex-sidebar-screenshot.png +0 -0
  6. package/jupyterlab_codex/handlers.py +938 -297
  7. package/jupyterlab_codex/labextension/package.json +13 -3
  8. package/jupyterlab_codex/labextension/static/525.224526d045c727069de6.js +2 -0
  9. package/jupyterlab_codex/labextension/static/737.e7de3ad9dd6ded798340.js +1 -0
  10. package/jupyterlab_codex/labextension/static/remoteEntry.6ef5e7167763a316c000.js +1 -0
  11. package/jupyterlab_codex/protocol.py +297 -0
  12. package/jupyterlab_codex/runner.py +58 -15
  13. package/jupyterlab_codex/sessions.py +582 -97
  14. package/lib/codexChat.d.ts +13 -0
  15. package/lib/codexChat.js +2506 -0
  16. package/lib/codexChat.js.map +1 -0
  17. package/lib/codexChatAttachmentDedup.d.ts +10 -0
  18. package/lib/codexChatAttachmentDedup.js +35 -0
  19. package/lib/codexChatAttachmentDedup.js.map +1 -0
  20. package/lib/codexChatAttachmentLimit.d.ts +18 -0
  21. package/lib/codexChatAttachmentLimit.js +50 -0
  22. package/lib/codexChatAttachmentLimit.js.map +1 -0
  23. package/lib/codexChatAttachmentState.d.ts +15 -0
  24. package/lib/codexChatAttachmentState.js +16 -0
  25. package/lib/codexChatAttachmentState.js.map +1 -0
  26. package/lib/codexChatDocumentUtils.d.ts +70 -0
  27. package/lib/codexChatDocumentUtils.js +506 -0
  28. package/lib/codexChatDocumentUtils.js.map +1 -0
  29. package/lib/codexChatFormatting.d.ts +11 -0
  30. package/lib/codexChatFormatting.js +83 -0
  31. package/lib/codexChatFormatting.js.map +1 -0
  32. package/lib/codexChatNotice.d.ts +3 -0
  33. package/lib/codexChatNotice.js +74 -0
  34. package/lib/codexChatNotice.js.map +1 -0
  35. package/lib/codexChatPersistence.d.ts +35 -0
  36. package/lib/codexChatPersistence.js +158 -0
  37. package/lib/codexChatPersistence.js.map +1 -0
  38. package/lib/codexChatPrimitives.d.ts +44 -0
  39. package/lib/codexChatPrimitives.js +156 -0
  40. package/lib/codexChatPrimitives.js.map +1 -0
  41. package/lib/codexChatRender.d.ts +24 -0
  42. package/lib/codexChatRender.js +293 -0
  43. package/lib/codexChatRender.js.map +1 -0
  44. package/lib/codexChatSessionFactory.d.ts +15 -0
  45. package/lib/codexChatSessionFactory.js +45 -0
  46. package/lib/codexChatSessionFactory.js.map +1 -0
  47. package/lib/codexChatSessionKey.d.ts +3 -0
  48. package/lib/codexChatSessionKey.js +14 -0
  49. package/lib/codexChatSessionKey.js.map +1 -0
  50. package/lib/codexChatStorage.d.ts +4 -0
  51. package/lib/codexChatStorage.js +37 -0
  52. package/lib/codexChatStorage.js.map +1 -0
  53. package/lib/codexSessionResolver.d.ts +12 -0
  54. package/lib/codexSessionResolver.js +38 -0
  55. package/lib/codexSessionResolver.js.map +1 -0
  56. package/lib/handlers/activitySummarizer.d.ts +15 -0
  57. package/lib/handlers/activitySummarizer.js +327 -0
  58. package/lib/handlers/activitySummarizer.js.map +1 -0
  59. package/lib/handlers/codexMessageTypes.d.ts +30 -0
  60. package/lib/handlers/codexMessageTypes.js +2 -0
  61. package/lib/handlers/codexMessageTypes.js.map +1 -0
  62. package/lib/handlers/codexMessageUtils.d.ts +46 -0
  63. package/lib/handlers/codexMessageUtils.js +144 -0
  64. package/lib/handlers/codexMessageUtils.js.map +1 -0
  65. package/lib/handlers/handleCodexSocketMessage.d.ts +107 -0
  66. package/lib/handlers/handleCodexSocketMessage.js +78 -0
  67. package/lib/handlers/handleCodexSocketMessage.js.map +1 -0
  68. package/lib/handlers/sessionSyncHandler.d.ts +34 -0
  69. package/lib/handlers/sessionSyncHandler.js +181 -0
  70. package/lib/handlers/sessionSyncHandler.js.map +1 -0
  71. package/lib/hooks/useCodexSocket.d.ts +15 -0
  72. package/lib/hooks/useCodexSocket.js +84 -0
  73. package/lib/hooks/useCodexSocket.js.map +1 -0
  74. package/lib/index.js +1 -1
  75. package/lib/index.js.map +1 -1
  76. package/lib/panel.d.ts +1 -11
  77. package/lib/panel.js +1 -2815
  78. package/lib/panel.js.map +1 -1
  79. package/lib/protocol.d.ts +235 -0
  80. package/lib/protocol.js +278 -0
  81. package/lib/protocol.js.map +1 -0
  82. package/package.json +13 -3
  83. package/playwright.config.cjs +27 -0
  84. package/playwright.unit.config.cjs +19 -0
  85. package/pyproject.toml +1 -1
  86. package/release.sh +52 -14
  87. package/scripts/run_playwright_e2e.sh +96 -0
  88. package/scripts/run_playwright_freeze_repro.sh +58 -0
  89. package/scripts/run_playwright_queue_repro.sh +60 -0
  90. package/scripts/run_playwright_repro.sh +55 -0
  91. package/src/codexChat.tsx +3914 -0
  92. package/src/codexChatAttachmentDedup.ts +47 -0
  93. package/src/codexChatAttachmentLimit.ts +81 -0
  94. package/src/codexChatAttachmentState.ts +37 -0
  95. package/src/codexChatDocumentUtils.ts +644 -0
  96. package/src/codexChatFormatting.ts +94 -0
  97. package/src/codexChatNotice.ts +95 -0
  98. package/src/codexChatPersistence.ts +191 -0
  99. package/src/codexChatPrimitives.tsx +446 -0
  100. package/src/codexChatRender.tsx +376 -0
  101. package/src/codexChatSessionFactory.ts +79 -0
  102. package/src/codexChatSessionKey.ts +16 -0
  103. package/src/codexChatStorage.ts +36 -0
  104. package/src/codexSessionResolver.ts +56 -0
  105. package/src/handlers/activitySummarizer.ts +369 -0
  106. package/src/handlers/codexMessageTypes.ts +34 -0
  107. package/src/handlers/codexMessageUtils.ts +217 -0
  108. package/src/handlers/handleCodexSocketMessage.ts +204 -0
  109. package/src/handlers/sessionSyncHandler.ts +308 -0
  110. package/src/hooks/useCodexSocket.ts +109 -0
  111. package/src/index.ts +1 -1
  112. package/src/panel.tsx +1 -4184
  113. package/src/protocol.ts +582 -0
  114. package/style/index.css +480 -11
  115. package/test-results/.last-run.json +4 -0
  116. package/test.py +0 -0
  117. package/tests/e2e/cell-output-error-tail.spec.js +156 -0
  118. package/tests/e2e/codex-ui-test-helpers.js +138 -0
  119. package/tests/e2e/fixtures/notebooks/error-output-tail.ipynb +58 -0
  120. package/tests/e2e/fixtures/notebooks/error-output-tail.py +19 -0
  121. package/tests/e2e/fixtures/notebooks/tab1.ipynb +322 -0
  122. package/tests/e2e/fixtures/notebooks/tab1.py +272 -0
  123. package/tests/e2e/fixtures/notebooks/tab2.ipynb +252 -0
  124. package/tests/e2e/fixtures/notebooks/tab2.py +231 -0
  125. package/tests/e2e/fixtures/notebooks/tab3.ipynb +403 -0
  126. package/tests/e2e/fixtures/notebooks/tab3.py +331 -0
  127. package/tests/e2e/fixtures/notebooks/tab4.py +339 -0
  128. package/tests/e2e/freeze-notebook-tabs-repro.spec.js +295 -0
  129. package/tests/e2e/mock-codex-cli-flood.py +127 -0
  130. package/tests/e2e/mock-codex-cli-prompt-echo.py +88 -0
  131. package/tests/e2e/mock-codex-cli.py +95 -0
  132. package/tests/e2e/queue-multitab-repro.spec.js +189 -0
  133. package/tests/test_handlers.py +116 -0
  134. package/tests/test_protocol.py +169 -0
  135. package/tests/test_session_store_limits.py +50 -0
  136. package/tests/unit/codexChatAttachmentDedup.spec.ts +56 -0
  137. package/tests/unit/codexChatAttachmentLimit.spec.ts +57 -0
  138. package/tests/unit/codexChatAttachmentState.spec.ts +71 -0
  139. package/tests/unit/codexChatDocumentUtils.spec.ts +63 -0
  140. package/tests/unit/codexChatLimit.spec.ts +18 -0
  141. package/tests/unit/codexChatNotice.spec.ts +45 -0
  142. package/tests/unit/codexChatPersistence.spec.ts +199 -0
  143. package/tests/unit/codexChatSessionFactory.spec.ts +94 -0
  144. package/tests/unit/codexChatSessionKey.spec.ts +18 -0
  145. package/tests/unit/codexMessageUtils.spec.ts +89 -0
  146. package/tests/unit/codexSessionResolver.spec.ts +92 -0
  147. package/tests/unit/handleCodexSocketMessage.spec.ts +476 -0
  148. package/tsconfig.tsbuildinfo +1 -1
  149. package/webpack.config.js +6 -0
  150. package/jupyterlab_codex/labextension/static/504.335f3447c84ba3d74517.js +0 -2
  151. package/jupyterlab_codex/labextension/static/972.8e856719e40acc1ef4cb.js +0 -1
  152. package/jupyterlab_codex/labextension/static/remoteEntry.a2982f776a1f0f515640.js +0 -1
  153. /package/jupyterlab_codex/labextension/static/{504.335f3447c84ba3d74517.js.LICENSE.txt → 525.224526d045c727069de6.js.LICENSE.txt} +0 -0
@@ -1,15 +1,24 @@
1
1
  import json
2
2
  import os
3
3
  import re
4
+ import threading
4
5
  from datetime import datetime, timedelta, timezone
5
6
  from pathlib import Path
6
- from typing import Dict, List, Tuple
7
+ from typing import Any, Dict, List, Tuple
8
+ from uuid import uuid4
7
9
 
8
10
 
9
11
  _TRUE_VALUES = {"1", "true", "y", "yes", "on"}
10
12
  _FALSE_VALUES = {"0", "false", "n", "no", "off"}
11
13
  _DEFAULT_SESSION_RETENTION_DAYS = 30
14
+ _DEFAULT_SESSION_MAX_MESSAGES = 100
15
+ _DEFAULT_SESSION_MAX_BYTES = 2_000_000
16
+ _DEFAULT_SESSION_PRUNE_INTERVAL_MINUTES = 15
12
17
  _DEFAULT_MAX_MESSAGE_CHARS = 12000
18
+ _DEFAULT_UI_LABEL_MAX_CHARS = 80
19
+ _DEFAULT_UI_PREVIEW_MAX_CHARS = 500
20
+ _DEFAULT_UI_PREVIEW_MAX_ITEMS_PER_SESSION = 10
21
+ _SESSION_FILE_VERSION = 1
13
22
 
14
23
  _SENSITIVE_PATTERNS = [
15
24
  (
@@ -25,6 +34,10 @@ _SENSITIVE_PATTERNS = [
25
34
 
26
35
 
27
36
  class SessionStore:
37
+ """Session persistence with bounded growth and recoverable file handling."""
38
+
39
+ _file_lock = threading.RLock()
40
+
28
41
  def __init__(self, base_dir: str | None = None):
29
42
  root = base_dir or os.path.join(os.path.expanduser("~"), ".jupyter", "codex-sessions")
30
43
  self._base = Path(root)
@@ -32,25 +45,46 @@ class SessionStore:
32
45
  os.environ.get("JUPYTERLAB_CODEX_SESSION_LOGGING"), default=True
33
46
  )
34
47
  self._retention_days = _as_non_negative_int(
35
- os.environ.get("JUPYTERLAB_CODEX_SESSION_RETENTION_DAYS"), _DEFAULT_SESSION_RETENTION_DAYS
48
+ os.environ.get("JUPYTERLAB_CODEX_SESSION_RETENTION_DAYS"),
49
+ _DEFAULT_SESSION_RETENTION_DAYS,
36
50
  )
37
51
  self._max_message_chars = _as_positive_int(
38
52
  os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_MESSAGE_CHARS"), _DEFAULT_MAX_MESSAGE_CHARS
39
53
  )
54
+ self._max_messages_per_session = _as_positive_int(
55
+ os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_MESSAGES"),
56
+ _DEFAULT_SESSION_MAX_MESSAGES,
57
+ )
58
+ self._max_session_bytes = _as_non_negative_int(
59
+ os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_BYTES"),
60
+ _DEFAULT_SESSION_MAX_BYTES,
61
+ )
62
+ self._prune_interval = timedelta(
63
+ minutes=_as_positive_int(
64
+ os.environ.get("JUPYTERLAB_CODEX_SESSION_PRUNE_INTERVAL_MINUTES"),
65
+ _DEFAULT_SESSION_PRUNE_INTERVAL_MINUTES,
66
+ )
67
+ )
68
+ self._last_global_prune = datetime.min.replace(tzinfo=timezone.utc)
69
+
40
70
  if self._logging_enabled:
41
71
  self._base.mkdir(parents=True, exist_ok=True)
42
- self.prune_expired_sessions()
72
+ with self._file_lock:
73
+ self._prune_expired_sessions_locked()
43
74
 
44
75
  def ensure_session(self, session_id: str, notebook_path: str, notebook_os_path: str = "") -> None:
45
76
  if not self._logging_enabled:
46
77
  return
78
+ if not session_id:
79
+ return
47
80
 
48
- meta_path = self._meta_path(session_id)
49
- if meta_path.exists():
81
+ existing_meta = self._load_meta(session_id)
82
+ if existing_meta:
50
83
  return
51
84
 
52
85
  paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
53
86
  meta = {
87
+ "schema_version": _SESSION_FILE_VERSION,
54
88
  "session_id": session_id,
55
89
  "notebook_path": notebook_path,
56
90
  "notebook_os_path": notebook_os_path,
@@ -58,58 +92,97 @@ class SessionStore:
58
92
  "paired_os_path": paired_os_path,
59
93
  "created_at": _now_iso(),
60
94
  "updated_at": _now_iso(),
95
+ "retention_days": self._retention_days,
96
+ "max_messages_per_session": self._max_messages_per_session,
61
97
  }
62
- meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
98
+ with self._file_lock:
99
+ self._write_meta_atomic(self._meta_path(session_id), meta)
63
100
 
64
- def append_message(self, session_id: str, role: str, content: str) -> None:
101
+ def append_message(
102
+ self, session_id: str, role: str, content: str, ui: Dict[str, Any] | None = None
103
+ ) -> None:
65
104
  if not self._logging_enabled:
66
105
  return
106
+ if not session_id:
107
+ return
67
108
 
109
+ normalized_role = role if role in {"system", "user", "assistant"} else "system"
68
110
  record = {
69
- "role": role,
111
+ "role": normalized_role,
70
112
  "content": _sanitize_message(content, self._max_message_chars),
71
113
  "timestamp": _now_iso(),
72
114
  }
73
- with self._jsonl_path(session_id).open("a", encoding="utf-8") as handle:
74
- handle.write(json.dumps(record))
75
- handle.write("\n")
76
- self._touch_meta(session_id)
77
- self.prune_expired_sessions()
115
+ ui_payload = _sanitize_ui_payload(ui)
116
+ if ui_payload:
117
+ record["ui"] = ui_payload
78
118
 
79
- def load_messages(self, session_id: str) -> List[Dict[str, str]]:
119
+ with self._file_lock:
120
+ path = self._jsonl_path(session_id)
121
+ try:
122
+ with path.open("a", encoding="utf-8") as handle:
123
+ handle.write(json.dumps(record))
124
+ handle.write("\n")
125
+ except OSError:
126
+ return
127
+
128
+ self._touch_meta_locked(session_id)
129
+ self._enforce_session_limits_locked(session_id)
130
+ if self._is_global_prune_due():
131
+ self._prune_expired_sessions_locked()
132
+
133
+ def load_messages(self, session_id: str) -> List[Dict[str, Any]]:
80
134
  if not self._logging_enabled:
81
135
  return []
136
+ if not session_id:
137
+ return []
82
138
 
83
139
  path = self._jsonl_path(session_id)
84
140
  if not path.exists():
85
141
  return []
86
142
 
87
- messages = []
88
- with path.open("r", encoding="utf-8") as handle:
89
- for line in handle:
90
- line = line.strip()
91
- if not line:
92
- continue
93
- try:
94
- messages.append(json.loads(line))
95
- except json.JSONDecodeError:
96
- continue
143
+ messages: List[Dict[str, Any]] = []
144
+ with self._file_lock:
145
+ try:
146
+ with path.open("r", encoding="utf-8") as handle:
147
+ for line in handle:
148
+ line = line.strip()
149
+ if not line:
150
+ continue
151
+ try:
152
+ payload = json.loads(line)
153
+ except json.JSONDecodeError:
154
+ continue
155
+ if isinstance(payload, dict):
156
+ messages.append(payload)
157
+ except OSError:
158
+ return []
97
159
  return messages
98
160
 
161
+ def _prune_user_ui_previews(self, session_id: str, keep_latest: int) -> None:
162
+ if keep_latest <= 0:
163
+ return
164
+ with self._file_lock:
165
+ self._trim_session_records_locked(session_id, keep_ui_previews=keep_latest)
166
+
99
167
  def build_prompt(
100
168
  self,
101
169
  session_id: str,
102
170
  user_content: str,
103
171
  selection: str,
104
172
  cell_output: str,
173
+ selection_truncated: bool = False,
174
+ cell_output_truncated: bool = False,
105
175
  cwd: str | None = None,
176
+ notebook_mode: str = "",
177
+ include_history: bool = True,
106
178
  ) -> str:
107
- messages = self.load_messages(session_id)
179
+ messages = self.load_messages(session_id) if include_history else []
108
180
  meta = self._load_meta(session_id)
109
181
  notebook_path = meta.get("notebook_path", "")
110
182
  notebook_os_path = meta.get("notebook_os_path", "")
111
183
  paired_path = meta.get("paired_path", "")
112
184
  paired_os_path = meta.get("paired_os_path", "")
185
+ mode = _normalize_notebook_mode(notebook_mode, notebook_path, notebook_os_path)
113
186
 
114
187
  if not paired_path and not paired_os_path:
115
188
  paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
@@ -126,7 +199,7 @@ class SessionStore:
126
199
  if cwd:
127
200
  parts.append(f"System: Current working directory: {cwd}")
128
201
 
129
- if paired_os_path:
202
+ if mode == "ipynb" and paired_os_path:
130
203
  parts.extend(
131
204
  [
132
205
  f"System: Jupytext paired file (absolute path): {paired_os_path}",
@@ -134,7 +207,7 @@ class SessionStore:
134
207
  "System: The notebook will prompt reload when the paired file changes on disk.",
135
208
  ]
136
209
  )
137
- elif paired_path:
210
+ elif mode == "ipynb" and paired_path:
138
211
  parts.extend(
139
212
  [
140
213
  f"System: Jupytext paired file (Jupyter path): {paired_path}",
@@ -142,20 +215,55 @@ class SessionStore:
142
215
  "System: The notebook will prompt reload when the paired file changes on disk.",
143
216
  ]
144
217
  )
218
+ elif mode == "jupytext_py":
219
+ target = notebook_os_path or notebook_path or "<notebook>.py"
220
+ parts.extend(
221
+ [
222
+ "System: Current file mode: Jupytext Python notebook script (.py).",
223
+ f"System: IMPORTANT - Edit this file directly: {target}",
224
+ ]
225
+ )
226
+ elif mode == "plain_py":
227
+ target = notebook_os_path or notebook_path or "<script>.py"
228
+ parts.extend(
229
+ [
230
+ "System: Current file mode: Plain Python script (.py).",
231
+ f"System: IMPORTANT - Edit this file directly: {target}",
232
+ ]
233
+ )
145
234
 
146
- parts.extend(
147
- [
148
- "",
149
- "System: Instructions:",
235
+ if mode == "ipynb":
236
+ instructions = [
150
237
  "System: 1. For code changes, modify the paired file directly using file editing tools.",
151
238
  "System: 2. Keep edits minimal and aligned with the user request.",
152
239
  "System: 3. The 'Current Cell Content' shows what the user is currently viewing/editing.",
153
240
  "System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
154
- "",
155
241
  ]
156
- )
242
+ elif mode == "jupytext_py":
243
+ instructions = [
244
+ "System: 1. For code changes, modify the current .py file directly using file editing tools.",
245
+ "System: 2. Preserve existing Jupytext structure and metadata (YAML header and # %% cell markers) unless the user asks to change them.",
246
+ "System: 3. The 'Current Cell Content' is a notebook cell snippet from the .py file.",
247
+ "System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
248
+ ]
249
+ elif mode == "plain_py":
250
+ instructions = [
251
+ "System: 1. For code changes, modify the current .py file directly using file editing tools.",
252
+ "System: 2. Do not introduce Jupytext YAML headers or notebook cell markers (for example, # %%) unless the user explicitly requests it.",
253
+ "System: 3. If no context snippet is provided, inspect files directly before making edits.",
254
+ "System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
255
+ ]
256
+ else:
257
+ instructions = [
258
+ "System: 1. For code changes, inspect files directly and edit the correct target file.",
259
+ "System: 2. Keep edits minimal and aligned with the user request.",
260
+ "System: 3. The provided context snippet, if any, may be partial.",
261
+ "System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
262
+ ]
263
+
264
+ parts.extend(["", "System: Instructions:", *instructions, ""])
157
265
 
158
- if messages:
266
+ if include_history and messages:
159
267
  parts.append("Conversation:")
160
268
  for msg in messages:
161
269
  role = msg.get("role", "user")
@@ -163,15 +271,28 @@ class SessionStore:
163
271
  parts.append(f"{role.title()}: {content}")
164
272
  parts.append("")
165
273
 
166
- if selection:
274
+ include_selection = mode in {"ipynb", "jupytext_py", "plain_py"}
275
+ include_cell_output = mode in {"ipynb", "jupytext_py"}
276
+
277
+ if include_selection and selection:
167
278
  parts.append("Current Cell Content:")
168
279
  parts.append(selection)
169
280
  parts.append("")
281
+ if include_selection and selection_truncated:
282
+ parts.append(
283
+ "System: Current Cell Content was truncated before sending due size limits. If full context is needed, inspect the source file directly."
284
+ )
285
+ parts.append("")
170
286
 
171
- if cell_output:
287
+ if include_cell_output and cell_output:
172
288
  parts.append("Current Cell Output:")
173
289
  parts.append(cell_output)
174
290
  parts.append("")
291
+ if include_cell_output and cell_output_truncated:
292
+ parts.append(
293
+ "System: Current Cell Output was truncated before sending due size limits."
294
+ )
295
+ parts.append("")
175
296
 
176
297
  parts.append("User:")
177
298
  parts.append(user_content)
@@ -182,6 +303,44 @@ class SessionStore:
182
303
  meta = self._load_meta(session_id)
183
304
  return meta.get("notebook_path", "")
184
305
 
306
+ def has_session(self, session_id: str) -> bool:
307
+ normalized_session_id = (session_id or "").strip()
308
+ if not normalized_session_id:
309
+ return False
310
+ if not self._logging_enabled:
311
+ return False
312
+ meta = self._load_meta(normalized_session_id)
313
+ return isinstance(meta, dict) and bool(meta)
314
+
315
+ def session_matches_notebook(
316
+ self, session_id: str, notebook_path: str, notebook_os_path: str = ""
317
+ ) -> bool:
318
+ """
319
+ Validate whether an existing session id belongs to the current notebook.
320
+ """
321
+ normalized_session_id = (session_id or "").strip()
322
+ if not normalized_session_id:
323
+ return False
324
+ if not self._logging_enabled:
325
+ return True
326
+
327
+ meta = self._load_meta(normalized_session_id)
328
+ if not isinstance(meta, dict) or not meta:
329
+ return False
330
+
331
+ normalized_notebook_path = (notebook_path or "").strip()
332
+ normalized_notebook_os_path = (notebook_os_path or "").strip()
333
+ stored_notebook_path = (meta.get("notebook_path") or "").strip()
334
+ stored_notebook_os_path = (meta.get("notebook_os_path") or "").strip()
335
+
336
+ if normalized_notebook_path and stored_notebook_path == normalized_notebook_path:
337
+ return True
338
+ if normalized_notebook_os_path and stored_notebook_os_path == normalized_notebook_os_path:
339
+ return True
340
+ if not normalized_notebook_path and not normalized_notebook_os_path:
341
+ return True
342
+ return False
343
+
185
344
  def resolve_session_for_notebook(self, notebook_path: str, notebook_os_path: str = "") -> str:
186
345
  if not self._logging_enabled:
187
346
  return ""
@@ -194,57 +353,61 @@ class SessionStore:
194
353
  latest_session_id = ""
195
354
  latest_updated_at = None
196
355
 
197
- for path in self._base.glob("*.meta.json"):
198
- session_id = path.stem.removesuffix(".meta")
199
- try:
200
- meta = json.loads(path.read_text(encoding="utf-8"))
201
- except (json.JSONDecodeError, IOError):
202
- continue
203
- if not isinstance(meta, dict):
204
- continue
356
+ with self._file_lock:
357
+ for path in self._base.glob("*.meta.json"):
358
+ session_id = path.stem.removesuffix(".meta")
359
+ try:
360
+ meta = json.loads(path.read_text(encoding="utf-8"))
361
+ except (json.JSONDecodeError, IOError):
362
+ continue
363
+ if not isinstance(meta, dict):
364
+ continue
205
365
 
206
- matched = False
207
- path_match = (meta.get("notebook_path") or "").strip()
208
- os_path_match = (meta.get("notebook_os_path") or "").strip()
209
- if normalized_notebook_path and path_match == normalized_notebook_path:
210
- matched = True
211
- if not matched and normalized_notebook_os_path and os_path_match == normalized_notebook_os_path:
212
- matched = True
213
- if not matched:
214
- continue
366
+ matched = False
367
+ path_match = (meta.get("notebook_path") or "").strip()
368
+ os_path_match = (meta.get("notebook_os_path") or "").strip()
369
+ if normalized_notebook_path and path_match == normalized_notebook_path:
370
+ matched = True
371
+ if not matched and normalized_notebook_os_path and os_path_match == normalized_notebook_os_path:
372
+ matched = True
373
+ if not matched:
374
+ continue
215
375
 
216
- updated_at = meta.get("updated_at") or meta.get("created_at")
217
- parsed_updated_at = _parse_iso_datetime(updated_at)
218
- if parsed_updated_at is None:
219
- continue
220
- if latest_updated_at is None or parsed_updated_at > latest_updated_at:
221
- latest_updated_at = parsed_updated_at
222
- latest_session_id = session_id
376
+ updated_at = meta.get("updated_at") or meta.get("created_at")
377
+ parsed_updated_at = _parse_iso_datetime(updated_at)
378
+ if parsed_updated_at is None:
379
+ continue
380
+ if latest_updated_at is None or parsed_updated_at > latest_updated_at:
381
+ latest_updated_at = parsed_updated_at
382
+ latest_session_id = session_id
223
383
 
224
384
  return latest_session_id
225
385
 
226
386
  def delete_session(self, session_id: str) -> None:
227
387
  if not self._logging_enabled:
228
388
  return
229
-
230
389
  normalized_session_id = (session_id or "").strip()
231
390
  if not normalized_session_id:
232
391
  return
233
392
 
234
- self._delete_session_files(normalized_session_id)
393
+ with self._file_lock:
394
+ self._delete_session_files(normalized_session_id)
235
395
 
236
396
  def delete_all_sessions(self) -> tuple[int, int]:
237
397
  if not self._logging_enabled:
238
398
  return (0, 0)
399
+ if not self._base.exists():
400
+ return (0, 0)
239
401
 
240
402
  deleted_count = 0
241
403
  failed_count = 0
242
- for path in self._base.glob("*.meta.json"):
243
- session_id = path.stem.removesuffix(".meta")
244
- if self._delete_session_files(session_id):
245
- deleted_count += 1
246
- else:
247
- failed_count += 1
404
+ with self._file_lock:
405
+ for path in self._base.glob("*.meta.json"):
406
+ session_id = path.stem.removesuffix(".meta")
407
+ if self._delete_session_files(session_id):
408
+ deleted_count += 1
409
+ else:
410
+ failed_count += 1
248
411
 
249
412
  return (deleted_count, failed_count)
250
413
 
@@ -253,26 +416,29 @@ class SessionStore:
253
416
  ) -> None:
254
417
  if not self._logging_enabled:
255
418
  return
419
+ if not session_id:
420
+ return
256
421
 
257
- meta_path = self._meta_path(session_id)
258
- if meta_path.exists():
259
- try:
260
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
261
- except (json.JSONDecodeError, IOError):
262
- meta = {}
263
- else:
264
- meta = {
265
- "session_id": session_id,
266
- "created_at": _now_iso(),
267
- }
422
+ with self._file_lock:
423
+ meta = self._load_meta(session_id)
424
+ if not meta:
425
+ meta = {
426
+ "session_id": session_id,
427
+ "created_at": _now_iso(),
428
+ "schema_version": _SESSION_FILE_VERSION,
429
+ }
268
430
 
269
- paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
270
- meta["notebook_path"] = notebook_path
271
- meta["notebook_os_path"] = notebook_os_path
272
- meta["paired_path"] = paired_path
273
- meta["paired_os_path"] = paired_os_path
274
- meta["updated_at"] = _now_iso()
275
- meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
431
+ paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
432
+ meta["session_id"] = session_id
433
+ meta["notebook_path"] = notebook_path
434
+ meta["notebook_os_path"] = notebook_os_path
435
+ meta["paired_path"] = paired_path
436
+ meta["paired_os_path"] = paired_os_path
437
+ meta["schema_version"] = _SESSION_FILE_VERSION
438
+ meta["updated_at"] = _now_iso()
439
+ meta["retention_days"] = self._retention_days
440
+ meta["max_messages_per_session"] = self._max_messages_per_session
441
+ self._write_meta_atomic(self._meta_path(session_id), meta)
276
442
 
277
443
  def close_session(self, session_id: str) -> None:
278
444
  if not self._logging_enabled:
@@ -280,10 +446,88 @@ class SessionStore:
280
446
 
281
447
  self._touch_meta(session_id)
282
448
 
449
+ def rename_session(self, old_session_id: str, new_session_id: str) -> str:
450
+ """
451
+ Move session files to a new id (for example, when Codex returns a real
452
+ `thread_id` after the first run).
453
+ """
454
+ old_id = (old_session_id or "").strip()
455
+ new_id = (new_session_id or "").strip()
456
+ if not old_id or not new_id or old_id == new_id:
457
+ return new_id or old_id
458
+ if not self._logging_enabled:
459
+ return new_id
460
+
461
+ with self._file_lock:
462
+ old_jsonl = self._jsonl_path(old_id)
463
+ new_jsonl = self._jsonl_path(new_id)
464
+ if old_jsonl.exists():
465
+ if new_jsonl.exists():
466
+ try:
467
+ with old_jsonl.open("r", encoding="utf-8") as source, new_jsonl.open(
468
+ "a", encoding="utf-8"
469
+ ) as target:
470
+ for line in source:
471
+ if not line:
472
+ continue
473
+ if line.endswith("\n"):
474
+ target.write(line)
475
+ else:
476
+ target.write(f"{line}\n")
477
+ old_jsonl.unlink()
478
+ except OSError:
479
+ pass
480
+ else:
481
+ try:
482
+ old_jsonl.rename(new_jsonl)
483
+ except OSError:
484
+ pass
485
+
486
+ merged_meta: Dict[str, Any] = {}
487
+ for candidate in (self._meta_path(new_id), self._meta_path(old_id)):
488
+ if not candidate.exists():
489
+ continue
490
+ try:
491
+ loaded = json.loads(candidate.read_text(encoding="utf-8"))
492
+ except (json.JSONDecodeError, IOError):
493
+ continue
494
+ if isinstance(loaded, dict):
495
+ merged_meta.update(loaded)
496
+
497
+ if merged_meta:
498
+ merged_meta["session_id"] = new_id
499
+ merged_meta["updated_at"] = _now_iso()
500
+ merged_meta["schema_version"] = _SESSION_FILE_VERSION
501
+ merged_meta["retention_days"] = self._retention_days
502
+ merged_meta["max_messages_per_session"] = self._max_messages_per_session
503
+ try:
504
+ self._write_meta_atomic(self._meta_path(new_id), merged_meta)
505
+ except OSError:
506
+ pass
507
+
508
+ old_meta = self._meta_path(old_id)
509
+ new_meta = self._meta_path(new_id)
510
+ if old_meta != new_meta and old_meta.exists():
511
+ try:
512
+ old_meta.unlink()
513
+ except OSError:
514
+ pass
515
+
516
+ # Ensure the policy metadata is present even for files created before this change.
517
+ if new_meta.exists():
518
+ self._enforce_session_limits_locked(new_id, skip_size_limit=True)
519
+
520
+ return new_id
521
+
283
522
  def _touch_meta(self, session_id: str) -> None:
284
523
  if not self._logging_enabled:
285
524
  return
525
+ if not session_id:
526
+ return
527
+ with self._file_lock:
528
+ self._touch_meta_locked(session_id)
286
529
 
530
+ def _touch_meta_locked(self, session_id: str) -> None:
287
531
  meta_path = self._meta_path(session_id)
288
532
  if not meta_path.exists():
289
533
  return
@@ -291,25 +535,43 @@ class SessionStore:
291
535
  try:
292
536
  meta = json.loads(meta_path.read_text(encoding="utf-8"))
293
537
  except (json.JSONDecodeError, IOError):
294
- return
538
+ meta = {
539
+ "session_id": session_id,
540
+ "created_at": _now_iso(),
541
+ "schema_version": _SESSION_FILE_VERSION,
542
+ }
543
+ if not isinstance(meta, dict):
544
+ meta = {
545
+ "session_id": session_id,
546
+ "created_at": _now_iso(),
547
+ "schema_version": _SESSION_FILE_VERSION,
548
+ }
295
549
 
296
550
  meta["updated_at"] = _now_iso()
297
- meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
551
+ meta["schema_version"] = _SESSION_FILE_VERSION
552
+ meta["retention_days"] = self._retention_days
553
+ self._write_meta_atomic(meta_path, meta)
298
554
 
299
555
  def _load_meta(self, session_id: str) -> Dict[str, str]:
300
556
  if not self._logging_enabled:
301
557
  return {}
558
+ if not session_id:
559
+ return {}
302
560
 
303
561
  meta_path = self._meta_path(session_id)
304
562
  if not meta_path.exists():
305
563
  return {}
306
564
 
307
- try:
308
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
309
- except (json.JSONDecodeError, IOError):
310
- return {}
565
+ with self._file_lock:
566
+ try:
567
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
568
+ except (json.JSONDecodeError, IOError):
569
+ return {}
311
570
 
312
- return meta if isinstance(meta, dict) else {}
571
+ if not isinstance(meta, dict):
572
+ return {}
573
+
574
+ return meta
313
575
 
314
576
  def _jsonl_path(self, session_id: str) -> Path:
315
577
  return self._base / f"{session_id}.jsonl"
@@ -321,23 +583,163 @@ class SessionStore:
321
583
  if not self._logging_enabled or self._retention_days <= 0:
322
584
  return
323
585
 
586
+ with self._file_lock:
587
+ self._prune_expired_sessions_locked()
588
+ self._last_global_prune = datetime.now(timezone.utc)
589
+
590
+ def _prune_expired_sessions_locked(self) -> None:
591
+ if self._retention_days <= 0:
592
+ return
593
+
324
594
  now = datetime.now(timezone.utc)
325
595
  cutoff = now - timedelta(days=self._retention_days)
596
+ active_session_ids = set()
597
+
326
598
  for path in self._base.glob("*.meta.json"):
327
599
  session_id = path.stem.removesuffix(".meta")
328
600
  try:
329
601
  meta = json.loads(path.read_text(encoding="utf-8"))
330
602
  except (json.JSONDecodeError, IOError):
603
+ self._delete_session_files(session_id)
331
604
  continue
332
605
  if not isinstance(meta, dict):
606
+ self._delete_session_files(session_id)
333
607
  continue
334
608
 
335
609
  updated_at = meta.get("updated_at") or meta.get("created_at")
336
610
  when = _parse_iso_datetime(updated_at)
337
611
  if not when:
338
- continue
339
- if when < cutoff:
340
612
  self._delete_session_files(session_id)
613
+ continue
614
+ if when >= cutoff:
615
+ active_session_ids.add(session_id)
616
+ continue
617
+
618
+ self._delete_session_files(session_id)
619
+
620
+ for path in self._base.glob("*.jsonl"):
621
+ if path.stem not in active_session_ids:
622
+ self._delete_session_files(path.stem)
623
+
624
+ self._last_global_prune = datetime.now(timezone.utc)
625
+
626
+ def _is_global_prune_due(self) -> bool:
627
+ now = datetime.now(timezone.utc)
628
+ if now - self._last_global_prune < self._prune_interval:
629
+ return False
630
+ return True
631
+
632
+ def _enforce_session_limits_locked(self, session_id: str, skip_size_limit: bool = False) -> None:
633
+ path = self._jsonl_path(session_id)
634
+ if not path.exists():
635
+ return
636
+
637
+ if not skip_size_limit:
638
+ should_check_size = self._max_session_bytes > 0 and path.stat().st_size > self._max_session_bytes
639
+ else:
640
+ should_check_size = False
641
+
642
+ records, invalid_count = _read_jsonl_records(path)
643
+ if not records and invalid_count == 0:
644
+ return
645
+
646
+ original_records = list(records)
647
+
648
+ if self._max_messages_per_session > 0:
649
+ records = records[-self._max_messages_per_session :]
650
+
651
+ self._trim_user_ui_previews(records, _DEFAULT_UI_PREVIEW_MAX_ITEMS_PER_SESSION)
652
+ if self._max_session_bytes > 0:
653
+ self._trim_records_to_byte_budget(records, self._max_session_bytes)
654
+
655
+ changed = (len(records) != len(original_records)) or (invalid_count > 0) or should_check_size
656
+ if not changed:
657
+ return
658
+
659
+ if not self._write_jsonl_records(path, records):
660
+ return
661
+
662
+ def _trim_session_records_locked(self, session_id: str, keep_ui_previews: int) -> None:
663
+ if keep_ui_previews <= 0:
664
+ return
665
+ path = self._jsonl_path(session_id)
666
+ if not path.exists():
667
+ return
668
+
669
+ records, invalid_count = _read_jsonl_records(path)
670
+ if not records and invalid_count == 0:
671
+ return
672
+
673
+ original_records = list(records)
674
+ self._trim_user_ui_previews(records, keep_ui_previews)
675
+ if self._max_messages_per_session > 0:
676
+ records = records[-self._max_messages_per_session :]
677
+
678
+ if self._max_session_bytes > 0:
679
+ self._trim_records_to_byte_budget(records, self._max_session_bytes)
680
+
681
+ if records == original_records and invalid_count == 0:
682
+ return
683
+
684
+ self._write_jsonl_records(path, records)
685
+
686
+ def _trim_records_to_byte_budget(self, records: List[Dict[str, Any]], max_bytes: int) -> None:
687
+ if max_bytes <= 0:
688
+ return
689
+
690
+ if not records:
691
+ return
692
+
693
+ serialized_sizes = [len(json.dumps(record)) for record in records]
694
+ total_bytes = sum(length + 1 for length in serialized_sizes)
695
+
696
+ if total_bytes <= max_bytes:
697
+ return
698
+
699
+ while total_bytes > max_bytes and records:
700
+ removed = serialized_sizes.pop(0) + 1
701
+ records.pop(0)
702
+ total_bytes -= removed
703
+
704
+ def _trim_user_ui_previews(self, records: List[Dict[str, Any]], keep_latest: int) -> None:
705
+ if keep_latest <= 0:
706
+ return
707
+
708
+ ui_indices: List[int] = []
709
+ for idx, record in enumerate(records):
710
+ if record.get("role") != "user":
711
+ continue
712
+ ui = record.get("ui")
713
+ if isinstance(ui, dict) and (
714
+ isinstance(ui.get("selectionPreview"), dict)
715
+ or isinstance(ui.get("cellOutputPreview"), dict)
716
+ ):
717
+ ui_indices.append(idx)
718
+
719
+ if len(ui_indices) <= keep_latest:
720
+ return
721
+
722
+ for idx in ui_indices[: len(ui_indices) - keep_latest]:
723
+ if "ui" in records[idx]:
724
+ records[idx].pop("ui", None)
725
+
726
+ def _write_jsonl_records(self, path: Path, records: List[Dict[str, Any]]) -> bool:
727
+ tmp_path = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
728
+ try:
729
+ with tmp_path.open("w", encoding="utf-8") as handle:
730
+ for record in records:
731
+ handle.write(json.dumps(record))
732
+ handle.write("\n")
733
+ tmp_path.replace(path)
734
+ return True
735
+ except OSError:
736
+ return False
737
+
738
+ def _write_meta_atomic(self, path: Path, meta: Dict[str, Any]) -> None:
739
+ path.parent.mkdir(parents=True, exist_ok=True)
740
+ tmp_path = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
741
+ tmp_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
742
+ tmp_path.replace(path)
341
743
 
342
744
  def _delete_session_files(self, session_id: str) -> bool:
343
745
  deleted_any = False
@@ -357,14 +759,35 @@ def _derive_paired_paths(notebook_path: str, notebook_os_path: str) -> Tuple[str
357
759
  paired_path = ""
358
760
  paired_os_path = ""
359
761
 
360
- if notebook_path.endswith(".ipynb"):
762
+ notebook_path_lower = (notebook_path or "").lower()
763
+ notebook_os_path_lower = (notebook_os_path or "").lower()
764
+
765
+ if notebook_path_lower.endswith(".ipynb"):
361
766
  paired_path = notebook_path[:-6] + ".py"
362
- if notebook_os_path.endswith(".ipynb"):
767
+ elif notebook_path_lower.endswith(".py"):
768
+ paired_path = notebook_path[:-3] + ".ipynb"
769
+ if notebook_os_path_lower.endswith(".ipynb"):
363
770
  paired_os_path = notebook_os_path[:-6] + ".py"
771
+ elif notebook_os_path_lower.endswith(".py"):
772
+ paired_os_path = notebook_os_path[:-3] + ".ipynb"
364
773
 
365
774
  return paired_path, paired_os_path
366
775
 
367
776
 
777
+ def _normalize_notebook_mode(raw_mode: str, notebook_path: str, notebook_os_path: str) -> str:
778
+ mode = (raw_mode or "").strip().lower()
779
+ if mode in {"ipynb", "jupytext_py", "plain_py"}:
780
+ return mode
781
+
782
+ path = (notebook_path or "").strip().lower()
783
+ os_path = (notebook_os_path or "").strip().lower()
784
+ if path.endswith(".ipynb") or os_path.endswith(".ipynb"):
785
+ return "ipynb"
786
+ if path.endswith(".py") or os_path.endswith(".py"):
787
+ return "plain_py"
788
+ return "unsupported"
789
+
790
+
368
791
  def _now_iso() -> str:
369
792
  return datetime.now(timezone.utc).isoformat()
370
793
 
@@ -416,6 +839,43 @@ def _sanitize_message(content: str, max_chars: int) -> str:
416
839
  return _truncate_text(_sanitize_sensitive_values(content), max_chars)
417
840
 
418
841
 
842
+ def _sanitize_ui_payload(raw: Dict[str, Any] | None) -> Dict[str, Any]:
843
+ if not isinstance(raw, dict):
844
+ return {}
845
+
846
+ payload: Dict[str, Any] = {}
847
+
848
+ selection_preview = _sanitize_ui_preview(raw.get("selectionPreview"))
849
+ if selection_preview:
850
+ payload["selectionPreview"] = selection_preview
851
+
852
+ cell_output_preview = _sanitize_ui_preview(raw.get("cellOutputPreview"))
853
+ if cell_output_preview:
854
+ payload["cellOutputPreview"] = cell_output_preview
855
+
856
+ return payload
857
+
858
+
859
+ def _sanitize_ui_preview(raw: Any) -> Dict[str, str]:
860
+ if not isinstance(raw, dict):
861
+ return {}
862
+
863
+ location_raw = raw.get("locationLabel")
864
+ preview_text_raw = raw.get("previewText")
865
+ if not isinstance(location_raw, str) or not isinstance(preview_text_raw, str):
866
+ return {}
867
+
868
+ location = _sanitize_message(location_raw.strip(), _DEFAULT_UI_LABEL_MAX_CHARS)
869
+ preview_text = _sanitize_message(
870
+ preview_text_raw.replace("\r\n", "\n").replace("\r", "\n").strip(),
871
+ _DEFAULT_UI_PREVIEW_MAX_CHARS,
872
+ )
873
+ if not location or not preview_text:
874
+ return {}
875
+
876
+ return {"locationLabel": location, "previewText": preview_text}
877
+
878
+
419
879
  def _sanitize_sensitive_values(raw: str) -> str:
420
880
  sanitized = raw
421
881
  for pattern, replacement in _SENSITIVE_PATTERNS:
@@ -433,3 +893,28 @@ def _truncate_text(raw: str, max_chars: int) -> str:
433
893
  if max_chars <= 3:
434
894
  return raw[:max_chars]
435
895
  return f"{raw[: max_chars - 3]}..."
896
+
897
+
898
+ def _read_jsonl_records(path: Path) -> Tuple[List[Dict[str, Any]], int]:
899
+ records: List[Dict[str, Any]] = []
900
+ removed_invalid_count = 0
901
+
902
+ try:
903
+ with path.open("r", encoding="utf-8") as handle:
904
+ for line in handle:
905
+ line = line.strip()
906
+ if not line:
907
+ continue
908
+ try:
909
+ payload = json.loads(line)
910
+ except json.JSONDecodeError:
911
+ removed_invalid_count += 1
912
+ continue
913
+ if isinstance(payload, dict):
914
+ records.append(payload)
915
+ else:
916
+ removed_invalid_count += 1
917
+ except OSError:
918
+ return [], 1
919
+
920
+ return records, removed_invalid_count