jupyterlab-codex-sidebar 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.github/workflows/unit-tests.yml +27 -0
- package/.jupyterlab-playwright.log +0 -0
- package/README.md +83 -9
- package/docs/images/codex-sidebar-screenshot.png +0 -0
- package/jupyterlab_codex/handlers.py +938 -297
- package/jupyterlab_codex/labextension/package.json +13 -3
- package/jupyterlab_codex/labextension/static/525.224526d045c727069de6.js +2 -0
- package/jupyterlab_codex/labextension/static/737.e7de3ad9dd6ded798340.js +1 -0
- package/jupyterlab_codex/labextension/static/remoteEntry.6ef5e7167763a316c000.js +1 -0
- package/jupyterlab_codex/protocol.py +297 -0
- package/jupyterlab_codex/runner.py +58 -15
- package/jupyterlab_codex/sessions.py +582 -97
- package/lib/codexChat.d.ts +13 -0
- package/lib/codexChat.js +2506 -0
- package/lib/codexChat.js.map +1 -0
- package/lib/codexChatAttachmentDedup.d.ts +10 -0
- package/lib/codexChatAttachmentDedup.js +35 -0
- package/lib/codexChatAttachmentDedup.js.map +1 -0
- package/lib/codexChatAttachmentLimit.d.ts +18 -0
- package/lib/codexChatAttachmentLimit.js +50 -0
- package/lib/codexChatAttachmentLimit.js.map +1 -0
- package/lib/codexChatAttachmentState.d.ts +15 -0
- package/lib/codexChatAttachmentState.js +16 -0
- package/lib/codexChatAttachmentState.js.map +1 -0
- package/lib/codexChatDocumentUtils.d.ts +70 -0
- package/lib/codexChatDocumentUtils.js +506 -0
- package/lib/codexChatDocumentUtils.js.map +1 -0
- package/lib/codexChatFormatting.d.ts +11 -0
- package/lib/codexChatFormatting.js +83 -0
- package/lib/codexChatFormatting.js.map +1 -0
- package/lib/codexChatNotice.d.ts +3 -0
- package/lib/codexChatNotice.js +74 -0
- package/lib/codexChatNotice.js.map +1 -0
- package/lib/codexChatPersistence.d.ts +35 -0
- package/lib/codexChatPersistence.js +158 -0
- package/lib/codexChatPersistence.js.map +1 -0
- package/lib/codexChatPrimitives.d.ts +44 -0
- package/lib/codexChatPrimitives.js +156 -0
- package/lib/codexChatPrimitives.js.map +1 -0
- package/lib/codexChatRender.d.ts +24 -0
- package/lib/codexChatRender.js +293 -0
- package/lib/codexChatRender.js.map +1 -0
- package/lib/codexChatSessionFactory.d.ts +15 -0
- package/lib/codexChatSessionFactory.js +45 -0
- package/lib/codexChatSessionFactory.js.map +1 -0
- package/lib/codexChatSessionKey.d.ts +3 -0
- package/lib/codexChatSessionKey.js +14 -0
- package/lib/codexChatSessionKey.js.map +1 -0
- package/lib/codexChatStorage.d.ts +4 -0
- package/lib/codexChatStorage.js +37 -0
- package/lib/codexChatStorage.js.map +1 -0
- package/lib/codexSessionResolver.d.ts +12 -0
- package/lib/codexSessionResolver.js +38 -0
- package/lib/codexSessionResolver.js.map +1 -0
- package/lib/handlers/activitySummarizer.d.ts +15 -0
- package/lib/handlers/activitySummarizer.js +327 -0
- package/lib/handlers/activitySummarizer.js.map +1 -0
- package/lib/handlers/codexMessageTypes.d.ts +30 -0
- package/lib/handlers/codexMessageTypes.js +2 -0
- package/lib/handlers/codexMessageTypes.js.map +1 -0
- package/lib/handlers/codexMessageUtils.d.ts +46 -0
- package/lib/handlers/codexMessageUtils.js +144 -0
- package/lib/handlers/codexMessageUtils.js.map +1 -0
- package/lib/handlers/handleCodexSocketMessage.d.ts +107 -0
- package/lib/handlers/handleCodexSocketMessage.js +78 -0
- package/lib/handlers/handleCodexSocketMessage.js.map +1 -0
- package/lib/handlers/sessionSyncHandler.d.ts +34 -0
- package/lib/handlers/sessionSyncHandler.js +181 -0
- package/lib/handlers/sessionSyncHandler.js.map +1 -0
- package/lib/hooks/useCodexSocket.d.ts +15 -0
- package/lib/hooks/useCodexSocket.js +84 -0
- package/lib/hooks/useCodexSocket.js.map +1 -0
- package/lib/index.js +1 -1
- package/lib/index.js.map +1 -1
- package/lib/panel.d.ts +1 -11
- package/lib/panel.js +1 -2815
- package/lib/panel.js.map +1 -1
- package/lib/protocol.d.ts +235 -0
- package/lib/protocol.js +278 -0
- package/lib/protocol.js.map +1 -0
- package/package.json +13 -3
- package/playwright.config.cjs +27 -0
- package/playwright.unit.config.cjs +19 -0
- package/pyproject.toml +1 -1
- package/release.sh +52 -14
- package/scripts/run_playwright_e2e.sh +96 -0
- package/scripts/run_playwright_freeze_repro.sh +58 -0
- package/scripts/run_playwright_queue_repro.sh +60 -0
- package/scripts/run_playwright_repro.sh +55 -0
- package/src/codexChat.tsx +3914 -0
- package/src/codexChatAttachmentDedup.ts +47 -0
- package/src/codexChatAttachmentLimit.ts +81 -0
- package/src/codexChatAttachmentState.ts +37 -0
- package/src/codexChatDocumentUtils.ts +644 -0
- package/src/codexChatFormatting.ts +94 -0
- package/src/codexChatNotice.ts +95 -0
- package/src/codexChatPersistence.ts +191 -0
- package/src/codexChatPrimitives.tsx +446 -0
- package/src/codexChatRender.tsx +376 -0
- package/src/codexChatSessionFactory.ts +79 -0
- package/src/codexChatSessionKey.ts +16 -0
- package/src/codexChatStorage.ts +36 -0
- package/src/codexSessionResolver.ts +56 -0
- package/src/handlers/activitySummarizer.ts +369 -0
- package/src/handlers/codexMessageTypes.ts +34 -0
- package/src/handlers/codexMessageUtils.ts +217 -0
- package/src/handlers/handleCodexSocketMessage.ts +204 -0
- package/src/handlers/sessionSyncHandler.ts +308 -0
- package/src/hooks/useCodexSocket.ts +109 -0
- package/src/index.ts +1 -1
- package/src/panel.tsx +1 -4184
- package/src/protocol.ts +582 -0
- package/style/index.css +480 -11
- package/test-results/.last-run.json +4 -0
- package/test.py +0 -0
- package/tests/e2e/cell-output-error-tail.spec.js +156 -0
- package/tests/e2e/codex-ui-test-helpers.js +138 -0
- package/tests/e2e/fixtures/notebooks/error-output-tail.ipynb +58 -0
- package/tests/e2e/fixtures/notebooks/error-output-tail.py +19 -0
- package/tests/e2e/fixtures/notebooks/tab1.ipynb +322 -0
- package/tests/e2e/fixtures/notebooks/tab1.py +272 -0
- package/tests/e2e/fixtures/notebooks/tab2.ipynb +252 -0
- package/tests/e2e/fixtures/notebooks/tab2.py +231 -0
- package/tests/e2e/fixtures/notebooks/tab3.ipynb +403 -0
- package/tests/e2e/fixtures/notebooks/tab3.py +331 -0
- package/tests/e2e/fixtures/notebooks/tab4.py +339 -0
- package/tests/e2e/freeze-notebook-tabs-repro.spec.js +295 -0
- package/tests/e2e/mock-codex-cli-flood.py +127 -0
- package/tests/e2e/mock-codex-cli-prompt-echo.py +88 -0
- package/tests/e2e/mock-codex-cli.py +95 -0
- package/tests/e2e/queue-multitab-repro.spec.js +189 -0
- package/tests/test_handlers.py +116 -0
- package/tests/test_protocol.py +169 -0
- package/tests/test_session_store_limits.py +50 -0
- package/tests/unit/codexChatAttachmentDedup.spec.ts +56 -0
- package/tests/unit/codexChatAttachmentLimit.spec.ts +57 -0
- package/tests/unit/codexChatAttachmentState.spec.ts +71 -0
- package/tests/unit/codexChatDocumentUtils.spec.ts +63 -0
- package/tests/unit/codexChatLimit.spec.ts +18 -0
- package/tests/unit/codexChatNotice.spec.ts +45 -0
- package/tests/unit/codexChatPersistence.spec.ts +199 -0
- package/tests/unit/codexChatSessionFactory.spec.ts +94 -0
- package/tests/unit/codexChatSessionKey.spec.ts +18 -0
- package/tests/unit/codexMessageUtils.spec.ts +89 -0
- package/tests/unit/codexSessionResolver.spec.ts +92 -0
- package/tests/unit/handleCodexSocketMessage.spec.ts +476 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/webpack.config.js +6 -0
- package/jupyterlab_codex/labextension/static/504.335f3447c84ba3d74517.js +0 -2
- package/jupyterlab_codex/labextension/static/972.8e856719e40acc1ef4cb.js +0 -1
- package/jupyterlab_codex/labextension/static/remoteEntry.a2982f776a1f0f515640.js +0 -1
- /package/jupyterlab_codex/labextension/static/{504.335f3447c84ba3d74517.js.LICENSE.txt → 525.224526d045c727069de6.js.LICENSE.txt} +0 -0
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
+
import threading
|
|
4
5
|
from datetime import datetime, timedelta, timezone
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Dict, List, Tuple
|
|
7
|
+
from typing import Any, Dict, List, Tuple
|
|
8
|
+
from uuid import uuid4
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
_TRUE_VALUES = {"1", "true", "y", "yes", "on"}
|
|
10
12
|
_FALSE_VALUES = {"0", "false", "n", "no", "off"}
|
|
11
13
|
_DEFAULT_SESSION_RETENTION_DAYS = 30
|
|
14
|
+
_DEFAULT_SESSION_MAX_MESSAGES = 100
|
|
15
|
+
_DEFAULT_SESSION_MAX_BYTES = 2_000_000
|
|
16
|
+
_DEFAULT_SESSION_PRUNE_INTERVAL_MINUTES = 15
|
|
12
17
|
_DEFAULT_MAX_MESSAGE_CHARS = 12000
|
|
18
|
+
_DEFAULT_UI_LABEL_MAX_CHARS = 80
|
|
19
|
+
_DEFAULT_UI_PREVIEW_MAX_CHARS = 500
|
|
20
|
+
_DEFAULT_UI_PREVIEW_MAX_ITEMS_PER_SESSION = 10
|
|
21
|
+
_SESSION_FILE_VERSION = 1
|
|
13
22
|
|
|
14
23
|
_SENSITIVE_PATTERNS = [
|
|
15
24
|
(
|
|
@@ -25,6 +34,10 @@ _SENSITIVE_PATTERNS = [
|
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
class SessionStore:
|
|
37
|
+
"""Session persistence with bounded growth and recoverable file handling."""
|
|
38
|
+
|
|
39
|
+
_file_lock = threading.RLock()
|
|
40
|
+
|
|
28
41
|
def __init__(self, base_dir: str | None = None):
|
|
29
42
|
root = base_dir or os.path.join(os.path.expanduser("~"), ".jupyter", "codex-sessions")
|
|
30
43
|
self._base = Path(root)
|
|
@@ -32,25 +45,46 @@ class SessionStore:
|
|
|
32
45
|
os.environ.get("JUPYTERLAB_CODEX_SESSION_LOGGING"), default=True
|
|
33
46
|
)
|
|
34
47
|
self._retention_days = _as_non_negative_int(
|
|
35
|
-
os.environ.get("JUPYTERLAB_CODEX_SESSION_RETENTION_DAYS"),
|
|
48
|
+
os.environ.get("JUPYTERLAB_CODEX_SESSION_RETENTION_DAYS"),
|
|
49
|
+
_DEFAULT_SESSION_RETENTION_DAYS,
|
|
36
50
|
)
|
|
37
51
|
self._max_message_chars = _as_positive_int(
|
|
38
52
|
os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_MESSAGE_CHARS"), _DEFAULT_MAX_MESSAGE_CHARS
|
|
39
53
|
)
|
|
54
|
+
self._max_messages_per_session = _as_positive_int(
|
|
55
|
+
os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_MESSAGES"),
|
|
56
|
+
_DEFAULT_SESSION_MAX_MESSAGES,
|
|
57
|
+
)
|
|
58
|
+
self._max_session_bytes = _as_non_negative_int(
|
|
59
|
+
os.environ.get("JUPYTERLAB_CODEX_SESSION_MAX_BYTES"),
|
|
60
|
+
_DEFAULT_SESSION_MAX_BYTES,
|
|
61
|
+
)
|
|
62
|
+
self._prune_interval = timedelta(
|
|
63
|
+
minutes=_as_positive_int(
|
|
64
|
+
os.environ.get("JUPYTERLAB_CODEX_SESSION_PRUNE_INTERVAL_MINUTES"),
|
|
65
|
+
_DEFAULT_SESSION_PRUNE_INTERVAL_MINUTES,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
self._last_global_prune = datetime.min.replace(tzinfo=timezone.utc)
|
|
69
|
+
|
|
40
70
|
if self._logging_enabled:
|
|
41
71
|
self._base.mkdir(parents=True, exist_ok=True)
|
|
42
|
-
self.
|
|
72
|
+
with self._file_lock:
|
|
73
|
+
self._prune_expired_sessions_locked()
|
|
43
74
|
|
|
44
75
|
def ensure_session(self, session_id: str, notebook_path: str, notebook_os_path: str = "") -> None:
|
|
45
76
|
if not self._logging_enabled:
|
|
46
77
|
return
|
|
78
|
+
if not session_id:
|
|
79
|
+
return
|
|
47
80
|
|
|
48
|
-
|
|
49
|
-
if
|
|
81
|
+
existing_meta = self._load_meta(session_id)
|
|
82
|
+
if existing_meta:
|
|
50
83
|
return
|
|
51
84
|
|
|
52
85
|
paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
|
|
53
86
|
meta = {
|
|
87
|
+
"schema_version": _SESSION_FILE_VERSION,
|
|
54
88
|
"session_id": session_id,
|
|
55
89
|
"notebook_path": notebook_path,
|
|
56
90
|
"notebook_os_path": notebook_os_path,
|
|
@@ -58,58 +92,97 @@ class SessionStore:
|
|
|
58
92
|
"paired_os_path": paired_os_path,
|
|
59
93
|
"created_at": _now_iso(),
|
|
60
94
|
"updated_at": _now_iso(),
|
|
95
|
+
"retention_days": self._retention_days,
|
|
96
|
+
"max_messages_per_session": self._max_messages_per_session,
|
|
61
97
|
}
|
|
62
|
-
|
|
98
|
+
with self._file_lock:
|
|
99
|
+
self._write_meta_atomic(self._meta_path(session_id), meta)
|
|
63
100
|
|
|
64
|
-
def append_message(
|
|
101
|
+
def append_message(
|
|
102
|
+
self, session_id: str, role: str, content: str, ui: Dict[str, Any] | None = None
|
|
103
|
+
) -> None:
|
|
65
104
|
if not self._logging_enabled:
|
|
66
105
|
return
|
|
106
|
+
if not session_id:
|
|
107
|
+
return
|
|
67
108
|
|
|
109
|
+
normalized_role = role if role in {"system", "user", "assistant"} else "system"
|
|
68
110
|
record = {
|
|
69
|
-
"role":
|
|
111
|
+
"role": normalized_role,
|
|
70
112
|
"content": _sanitize_message(content, self._max_message_chars),
|
|
71
113
|
"timestamp": _now_iso(),
|
|
72
114
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self._touch_meta(session_id)
|
|
77
|
-
self.prune_expired_sessions()
|
|
115
|
+
ui_payload = _sanitize_ui_payload(ui)
|
|
116
|
+
if ui_payload:
|
|
117
|
+
record["ui"] = ui_payload
|
|
78
118
|
|
|
79
|
-
|
|
119
|
+
with self._file_lock:
|
|
120
|
+
path = self._jsonl_path(session_id)
|
|
121
|
+
try:
|
|
122
|
+
with path.open("a", encoding="utf-8") as handle:
|
|
123
|
+
handle.write(json.dumps(record))
|
|
124
|
+
handle.write("\n")
|
|
125
|
+
except OSError:
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
self._touch_meta_locked(session_id)
|
|
129
|
+
self._enforce_session_limits_locked(session_id)
|
|
130
|
+
if self._is_global_prune_due():
|
|
131
|
+
self._prune_expired_sessions_locked()
|
|
132
|
+
|
|
133
|
+
def load_messages(self, session_id: str) -> List[Dict[str, Any]]:
|
|
80
134
|
if not self._logging_enabled:
|
|
81
135
|
return []
|
|
136
|
+
if not session_id:
|
|
137
|
+
return []
|
|
82
138
|
|
|
83
139
|
path = self._jsonl_path(session_id)
|
|
84
140
|
if not path.exists():
|
|
85
141
|
return []
|
|
86
142
|
|
|
87
|
-
messages = []
|
|
88
|
-
with
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
143
|
+
messages: List[Dict[str, Any]] = []
|
|
144
|
+
with self._file_lock:
|
|
145
|
+
try:
|
|
146
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
147
|
+
for line in handle:
|
|
148
|
+
line = line.strip()
|
|
149
|
+
if not line:
|
|
150
|
+
continue
|
|
151
|
+
try:
|
|
152
|
+
payload = json.loads(line)
|
|
153
|
+
except json.JSONDecodeError:
|
|
154
|
+
continue
|
|
155
|
+
if isinstance(payload, dict):
|
|
156
|
+
messages.append(payload)
|
|
157
|
+
except OSError:
|
|
158
|
+
return []
|
|
97
159
|
return messages
|
|
98
160
|
|
|
161
|
+
def _prune_user_ui_previews(self, session_id: str, keep_latest: int) -> None:
|
|
162
|
+
if keep_latest <= 0:
|
|
163
|
+
return
|
|
164
|
+
with self._file_lock:
|
|
165
|
+
self._trim_session_records_locked(session_id, keep_ui_previews=keep_latest)
|
|
166
|
+
|
|
99
167
|
def build_prompt(
|
|
100
168
|
self,
|
|
101
169
|
session_id: str,
|
|
102
170
|
user_content: str,
|
|
103
171
|
selection: str,
|
|
104
172
|
cell_output: str,
|
|
173
|
+
selection_truncated: bool = False,
|
|
174
|
+
cell_output_truncated: bool = False,
|
|
105
175
|
cwd: str | None = None,
|
|
176
|
+
notebook_mode: str = "",
|
|
177
|
+
include_history: bool = True,
|
|
106
178
|
) -> str:
|
|
107
|
-
messages = self.load_messages(session_id)
|
|
179
|
+
messages = self.load_messages(session_id) if include_history else []
|
|
108
180
|
meta = self._load_meta(session_id)
|
|
109
181
|
notebook_path = meta.get("notebook_path", "")
|
|
110
182
|
notebook_os_path = meta.get("notebook_os_path", "")
|
|
111
183
|
paired_path = meta.get("paired_path", "")
|
|
112
184
|
paired_os_path = meta.get("paired_os_path", "")
|
|
185
|
+
mode = _normalize_notebook_mode(notebook_mode, notebook_path, notebook_os_path)
|
|
113
186
|
|
|
114
187
|
if not paired_path and not paired_os_path:
|
|
115
188
|
paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
|
|
@@ -126,7 +199,7 @@ class SessionStore:
|
|
|
126
199
|
if cwd:
|
|
127
200
|
parts.append(f"System: Current working directory: {cwd}")
|
|
128
201
|
|
|
129
|
-
if paired_os_path:
|
|
202
|
+
if mode == "ipynb" and paired_os_path:
|
|
130
203
|
parts.extend(
|
|
131
204
|
[
|
|
132
205
|
f"System: Jupytext paired file (absolute path): {paired_os_path}",
|
|
@@ -134,7 +207,7 @@ class SessionStore:
|
|
|
134
207
|
"System: The notebook will prompt reload when the paired file changes on disk.",
|
|
135
208
|
]
|
|
136
209
|
)
|
|
137
|
-
elif paired_path:
|
|
210
|
+
elif mode == "ipynb" and paired_path:
|
|
138
211
|
parts.extend(
|
|
139
212
|
[
|
|
140
213
|
f"System: Jupytext paired file (Jupyter path): {paired_path}",
|
|
@@ -142,20 +215,55 @@ class SessionStore:
|
|
|
142
215
|
"System: The notebook will prompt reload when the paired file changes on disk.",
|
|
143
216
|
]
|
|
144
217
|
)
|
|
218
|
+
elif mode == "jupytext_py":
|
|
219
|
+
target = notebook_os_path or notebook_path or "<notebook>.py"
|
|
220
|
+
parts.extend(
|
|
221
|
+
[
|
|
222
|
+
"System: Current file mode: Jupytext Python notebook script (.py).",
|
|
223
|
+
f"System: IMPORTANT - Edit this file directly: {target}",
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
elif mode == "plain_py":
|
|
227
|
+
target = notebook_os_path or notebook_path or "<script>.py"
|
|
228
|
+
parts.extend(
|
|
229
|
+
[
|
|
230
|
+
"System: Current file mode: Plain Python script (.py).",
|
|
231
|
+
f"System: IMPORTANT - Edit this file directly: {target}",
|
|
232
|
+
]
|
|
233
|
+
)
|
|
145
234
|
|
|
146
|
-
|
|
147
|
-
[
|
|
148
|
-
"",
|
|
149
|
-
"System: Instructions:",
|
|
235
|
+
if mode == "ipynb":
|
|
236
|
+
instructions = [
|
|
150
237
|
"System: 1. For code changes, modify the paired file directly using file editing tools.",
|
|
151
238
|
"System: 2. Keep edits minimal and aligned with the user request.",
|
|
152
239
|
"System: 3. The 'Current Cell Content' shows what the user is currently viewing/editing.",
|
|
153
240
|
"System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
|
|
154
|
-
"",
|
|
155
241
|
]
|
|
156
|
-
|
|
242
|
+
elif mode == "jupytext_py":
|
|
243
|
+
instructions = [
|
|
244
|
+
"System: 1. For code changes, modify the current .py file directly using file editing tools.",
|
|
245
|
+
"System: 2. Preserve existing Jupytext structure and metadata (YAML header and # %% cell markers) unless the user asks to change them.",
|
|
246
|
+
"System: 3. The 'Current Cell Content' is a notebook cell snippet from the .py file.",
|
|
247
|
+
"System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
|
|
248
|
+
]
|
|
249
|
+
elif mode == "plain_py":
|
|
250
|
+
instructions = [
|
|
251
|
+
"System: 1. For code changes, modify the current .py file directly using file editing tools.",
|
|
252
|
+
"System: 2. Do not introduce Jupytext YAML headers or notebook cell markers (for example, # %%) unless the user explicitly requests it.",
|
|
253
|
+
"System: 3. If no context snippet is provided, inspect files directly before making edits.",
|
|
254
|
+
"System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
|
|
255
|
+
]
|
|
256
|
+
else:
|
|
257
|
+
instructions = [
|
|
258
|
+
"System: 1. For code changes, inspect files directly and edit the correct target file.",
|
|
259
|
+
"System: 2. Keep edits minimal and aligned with the user request.",
|
|
260
|
+
"System: 3. The provided context snippet, if any, may be partial.",
|
|
261
|
+
"System: 4. If you cannot proceed due to sandbox/permission restrictions, say so explicitly and ask the user to switch Permission (shield icon) to 'Full access' and retry. If authentication is required, tell them to run `codex login` in a terminal first.",
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
parts.extend(["", "System: Instructions:", *instructions, ""])
|
|
157
265
|
|
|
158
|
-
if messages:
|
|
266
|
+
if include_history and messages:
|
|
159
267
|
parts.append("Conversation:")
|
|
160
268
|
for msg in messages:
|
|
161
269
|
role = msg.get("role", "user")
|
|
@@ -163,15 +271,28 @@ class SessionStore:
|
|
|
163
271
|
parts.append(f"{role.title()}: {content}")
|
|
164
272
|
parts.append("")
|
|
165
273
|
|
|
166
|
-
|
|
274
|
+
include_selection = mode in {"ipynb", "jupytext_py", "plain_py"}
|
|
275
|
+
include_cell_output = mode in {"ipynb", "jupytext_py"}
|
|
276
|
+
|
|
277
|
+
if include_selection and selection:
|
|
167
278
|
parts.append("Current Cell Content:")
|
|
168
279
|
parts.append(selection)
|
|
169
280
|
parts.append("")
|
|
281
|
+
if include_selection and selection_truncated:
|
|
282
|
+
parts.append(
|
|
283
|
+
"System: Current Cell Content was truncated before sending due size limits. If full context is needed, inspect the source file directly."
|
|
284
|
+
)
|
|
285
|
+
parts.append("")
|
|
170
286
|
|
|
171
|
-
if cell_output:
|
|
287
|
+
if include_cell_output and cell_output:
|
|
172
288
|
parts.append("Current Cell Output:")
|
|
173
289
|
parts.append(cell_output)
|
|
174
290
|
parts.append("")
|
|
291
|
+
if include_cell_output and cell_output_truncated:
|
|
292
|
+
parts.append(
|
|
293
|
+
"System: Current Cell Output was truncated before sending due size limits."
|
|
294
|
+
)
|
|
295
|
+
parts.append("")
|
|
175
296
|
|
|
176
297
|
parts.append("User:")
|
|
177
298
|
parts.append(user_content)
|
|
@@ -182,6 +303,44 @@ class SessionStore:
|
|
|
182
303
|
meta = self._load_meta(session_id)
|
|
183
304
|
return meta.get("notebook_path", "")
|
|
184
305
|
|
|
306
|
+
def has_session(self, session_id: str) -> bool:
|
|
307
|
+
normalized_session_id = (session_id or "").strip()
|
|
308
|
+
if not normalized_session_id:
|
|
309
|
+
return False
|
|
310
|
+
if not self._logging_enabled:
|
|
311
|
+
return False
|
|
312
|
+
meta = self._load_meta(normalized_session_id)
|
|
313
|
+
return isinstance(meta, dict) and bool(meta)
|
|
314
|
+
|
|
315
|
+
def session_matches_notebook(
|
|
316
|
+
self, session_id: str, notebook_path: str, notebook_os_path: str = ""
|
|
317
|
+
) -> bool:
|
|
318
|
+
"""
|
|
319
|
+
Validate whether an existing session id belongs to the current notebook.
|
|
320
|
+
"""
|
|
321
|
+
normalized_session_id = (session_id or "").strip()
|
|
322
|
+
if not normalized_session_id:
|
|
323
|
+
return False
|
|
324
|
+
if not self._logging_enabled:
|
|
325
|
+
return True
|
|
326
|
+
|
|
327
|
+
meta = self._load_meta(normalized_session_id)
|
|
328
|
+
if not isinstance(meta, dict) or not meta:
|
|
329
|
+
return False
|
|
330
|
+
|
|
331
|
+
normalized_notebook_path = (notebook_path or "").strip()
|
|
332
|
+
normalized_notebook_os_path = (notebook_os_path or "").strip()
|
|
333
|
+
stored_notebook_path = (meta.get("notebook_path") or "").strip()
|
|
334
|
+
stored_notebook_os_path = (meta.get("notebook_os_path") or "").strip()
|
|
335
|
+
|
|
336
|
+
if normalized_notebook_path and stored_notebook_path == normalized_notebook_path:
|
|
337
|
+
return True
|
|
338
|
+
if normalized_notebook_os_path and stored_notebook_os_path == normalized_notebook_os_path:
|
|
339
|
+
return True
|
|
340
|
+
if not normalized_notebook_path and not normalized_notebook_os_path:
|
|
341
|
+
return True
|
|
342
|
+
return False
|
|
343
|
+
|
|
185
344
|
def resolve_session_for_notebook(self, notebook_path: str, notebook_os_path: str = "") -> str:
|
|
186
345
|
if not self._logging_enabled:
|
|
187
346
|
return ""
|
|
@@ -194,57 +353,61 @@ class SessionStore:
|
|
|
194
353
|
latest_session_id = ""
|
|
195
354
|
latest_updated_at = None
|
|
196
355
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
356
|
+
with self._file_lock:
|
|
357
|
+
for path in self._base.glob("*.meta.json"):
|
|
358
|
+
session_id = path.stem.removesuffix(".meta")
|
|
359
|
+
try:
|
|
360
|
+
meta = json.loads(path.read_text(encoding="utf-8"))
|
|
361
|
+
except (json.JSONDecodeError, IOError):
|
|
362
|
+
continue
|
|
363
|
+
if not isinstance(meta, dict):
|
|
364
|
+
continue
|
|
205
365
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
366
|
+
matched = False
|
|
367
|
+
path_match = (meta.get("notebook_path") or "").strip()
|
|
368
|
+
os_path_match = (meta.get("notebook_os_path") or "").strip()
|
|
369
|
+
if normalized_notebook_path and path_match == normalized_notebook_path:
|
|
370
|
+
matched = True
|
|
371
|
+
if not matched and normalized_notebook_os_path and os_path_match == normalized_notebook_os_path:
|
|
372
|
+
matched = True
|
|
373
|
+
if not matched:
|
|
374
|
+
continue
|
|
215
375
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
376
|
+
updated_at = meta.get("updated_at") or meta.get("created_at")
|
|
377
|
+
parsed_updated_at = _parse_iso_datetime(updated_at)
|
|
378
|
+
if parsed_updated_at is None:
|
|
379
|
+
continue
|
|
380
|
+
if latest_updated_at is None or parsed_updated_at > latest_updated_at:
|
|
381
|
+
latest_updated_at = parsed_updated_at
|
|
382
|
+
latest_session_id = session_id
|
|
223
383
|
|
|
224
384
|
return latest_session_id
|
|
225
385
|
|
|
226
386
|
def delete_session(self, session_id: str) -> None:
|
|
227
387
|
if not self._logging_enabled:
|
|
228
388
|
return
|
|
229
|
-
|
|
230
389
|
normalized_session_id = (session_id or "").strip()
|
|
231
390
|
if not normalized_session_id:
|
|
232
391
|
return
|
|
233
392
|
|
|
234
|
-
self.
|
|
393
|
+
with self._file_lock:
|
|
394
|
+
self._delete_session_files(normalized_session_id)
|
|
235
395
|
|
|
236
396
|
def delete_all_sessions(self) -> tuple[int, int]:
|
|
237
397
|
if not self._logging_enabled:
|
|
238
398
|
return (0, 0)
|
|
399
|
+
if not self._base.exists():
|
|
400
|
+
return (0, 0)
|
|
239
401
|
|
|
240
402
|
deleted_count = 0
|
|
241
403
|
failed_count = 0
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
404
|
+
with self._file_lock:
|
|
405
|
+
for path in self._base.glob("*.meta.json"):
|
|
406
|
+
session_id = path.stem.removesuffix(".meta")
|
|
407
|
+
if self._delete_session_files(session_id):
|
|
408
|
+
deleted_count += 1
|
|
409
|
+
else:
|
|
410
|
+
failed_count += 1
|
|
248
411
|
|
|
249
412
|
return (deleted_count, failed_count)
|
|
250
413
|
|
|
@@ -253,26 +416,29 @@ class SessionStore:
|
|
|
253
416
|
) -> None:
|
|
254
417
|
if not self._logging_enabled:
|
|
255
418
|
return
|
|
419
|
+
if not session_id:
|
|
420
|
+
return
|
|
256
421
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
meta =
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
"session_id": session_id,
|
|
266
|
-
"created_at": _now_iso(),
|
|
267
|
-
}
|
|
422
|
+
with self._file_lock:
|
|
423
|
+
meta = self._load_meta(session_id)
|
|
424
|
+
if not meta:
|
|
425
|
+
meta = {
|
|
426
|
+
"session_id": session_id,
|
|
427
|
+
"created_at": _now_iso(),
|
|
428
|
+
"schema_version": _SESSION_FILE_VERSION,
|
|
429
|
+
}
|
|
268
430
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
431
|
+
paired_path, paired_os_path = _derive_paired_paths(notebook_path, notebook_os_path)
|
|
432
|
+
meta["session_id"] = session_id
|
|
433
|
+
meta["notebook_path"] = notebook_path
|
|
434
|
+
meta["notebook_os_path"] = notebook_os_path
|
|
435
|
+
meta["paired_path"] = paired_path
|
|
436
|
+
meta["paired_os_path"] = paired_os_path
|
|
437
|
+
meta["schema_version"] = _SESSION_FILE_VERSION
|
|
438
|
+
meta["updated_at"] = _now_iso()
|
|
439
|
+
meta["retention_days"] = self._retention_days
|
|
440
|
+
meta["max_messages_per_session"] = self._max_messages_per_session
|
|
441
|
+
self._write_meta_atomic(self._meta_path(session_id), meta)
|
|
276
442
|
|
|
277
443
|
def close_session(self, session_id: str) -> None:
|
|
278
444
|
if not self._logging_enabled:
|
|
@@ -280,10 +446,88 @@ class SessionStore:
|
|
|
280
446
|
|
|
281
447
|
self._touch_meta(session_id)
|
|
282
448
|
|
|
449
|
+
def rename_session(self, old_session_id: str, new_session_id: str) -> str:
|
|
450
|
+
"""
|
|
451
|
+
Move session files to a new id (for example, when Codex returns a real
|
|
452
|
+
`thread_id` after the first run).
|
|
453
|
+
"""
|
|
454
|
+
old_id = (old_session_id or "").strip()
|
|
455
|
+
new_id = (new_session_id or "").strip()
|
|
456
|
+
if not old_id or not new_id or old_id == new_id:
|
|
457
|
+
return new_id or old_id
|
|
458
|
+
if not self._logging_enabled:
|
|
459
|
+
return new_id
|
|
460
|
+
|
|
461
|
+
with self._file_lock:
|
|
462
|
+
old_jsonl = self._jsonl_path(old_id)
|
|
463
|
+
new_jsonl = self._jsonl_path(new_id)
|
|
464
|
+
if old_jsonl.exists():
|
|
465
|
+
if new_jsonl.exists():
|
|
466
|
+
try:
|
|
467
|
+
with old_jsonl.open("r", encoding="utf-8") as source, new_jsonl.open(
|
|
468
|
+
"a", encoding="utf-8"
|
|
469
|
+
) as target:
|
|
470
|
+
for line in source:
|
|
471
|
+
if not line:
|
|
472
|
+
continue
|
|
473
|
+
if line.endswith("\n"):
|
|
474
|
+
target.write(line)
|
|
475
|
+
else:
|
|
476
|
+
target.write(f"{line}\n")
|
|
477
|
+
old_jsonl.unlink()
|
|
478
|
+
except OSError:
|
|
479
|
+
pass
|
|
480
|
+
else:
|
|
481
|
+
try:
|
|
482
|
+
old_jsonl.rename(new_jsonl)
|
|
483
|
+
except OSError:
|
|
484
|
+
pass
|
|
485
|
+
|
|
486
|
+
merged_meta: Dict[str, Any] = {}
|
|
487
|
+
for candidate in (self._meta_path(new_id), self._meta_path(old_id)):
|
|
488
|
+
if not candidate.exists():
|
|
489
|
+
continue
|
|
490
|
+
try:
|
|
491
|
+
loaded = json.loads(candidate.read_text(encoding="utf-8"))
|
|
492
|
+
except (json.JSONDecodeError, IOError):
|
|
493
|
+
continue
|
|
494
|
+
if isinstance(loaded, dict):
|
|
495
|
+
merged_meta.update(loaded)
|
|
496
|
+
|
|
497
|
+
if merged_meta:
|
|
498
|
+
merged_meta["session_id"] = new_id
|
|
499
|
+
merged_meta["updated_at"] = _now_iso()
|
|
500
|
+
merged_meta["schema_version"] = _SESSION_FILE_VERSION
|
|
501
|
+
merged_meta["retention_days"] = self._retention_days
|
|
502
|
+
merged_meta["max_messages_per_session"] = self._max_messages_per_session
|
|
503
|
+
try:
|
|
504
|
+
self._write_meta_atomic(self._meta_path(new_id), merged_meta)
|
|
505
|
+
except OSError:
|
|
506
|
+
pass
|
|
507
|
+
|
|
508
|
+
old_meta = self._meta_path(old_id)
|
|
509
|
+
new_meta = self._meta_path(new_id)
|
|
510
|
+
if old_meta != new_meta and old_meta.exists():
|
|
511
|
+
try:
|
|
512
|
+
old_meta.unlink()
|
|
513
|
+
except OSError:
|
|
514
|
+
pass
|
|
515
|
+
|
|
516
|
+
# Ensure the policy metadata is present even for files created before this change.
|
|
517
|
+
if new_meta.exists():
|
|
518
|
+
self._enforce_session_limits_locked(new_id, skip_size_limit=True)
|
|
519
|
+
|
|
520
|
+
return new_id
|
|
521
|
+
|
|
283
522
|
def _touch_meta(self, session_id: str) -> None:
|
|
284
523
|
if not self._logging_enabled:
|
|
285
524
|
return
|
|
525
|
+
if not session_id:
|
|
526
|
+
return
|
|
527
|
+
with self._file_lock:
|
|
528
|
+
self._touch_meta_locked(session_id)
|
|
286
529
|
|
|
530
|
+
def _touch_meta_locked(self, session_id: str) -> None:
|
|
287
531
|
meta_path = self._meta_path(session_id)
|
|
288
532
|
if not meta_path.exists():
|
|
289
533
|
return
|
|
@@ -291,25 +535,43 @@ class SessionStore:
|
|
|
291
535
|
try:
|
|
292
536
|
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
293
537
|
except (json.JSONDecodeError, IOError):
|
|
294
|
-
|
|
538
|
+
meta = {
|
|
539
|
+
"session_id": session_id,
|
|
540
|
+
"created_at": _now_iso(),
|
|
541
|
+
"schema_version": _SESSION_FILE_VERSION,
|
|
542
|
+
}
|
|
543
|
+
if not isinstance(meta, dict):
|
|
544
|
+
meta = {
|
|
545
|
+
"session_id": session_id,
|
|
546
|
+
"created_at": _now_iso(),
|
|
547
|
+
"schema_version": _SESSION_FILE_VERSION,
|
|
548
|
+
}
|
|
295
549
|
|
|
296
550
|
meta["updated_at"] = _now_iso()
|
|
297
|
-
|
|
551
|
+
meta["schema_version"] = _SESSION_FILE_VERSION
|
|
552
|
+
meta["retention_days"] = self._retention_days
|
|
553
|
+
self._write_meta_atomic(meta_path, meta)
|
|
298
554
|
|
|
299
555
|
def _load_meta(self, session_id: str) -> Dict[str, str]:
|
|
300
556
|
if not self._logging_enabled:
|
|
301
557
|
return {}
|
|
558
|
+
if not session_id:
|
|
559
|
+
return {}
|
|
302
560
|
|
|
303
561
|
meta_path = self._meta_path(session_id)
|
|
304
562
|
if not meta_path.exists():
|
|
305
563
|
return {}
|
|
306
564
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
565
|
+
with self._file_lock:
|
|
566
|
+
try:
|
|
567
|
+
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
568
|
+
except (json.JSONDecodeError, IOError):
|
|
569
|
+
return {}
|
|
311
570
|
|
|
312
|
-
|
|
571
|
+
if not isinstance(meta, dict):
|
|
572
|
+
return {}
|
|
573
|
+
|
|
574
|
+
return meta
|
|
313
575
|
|
|
314
576
|
def _jsonl_path(self, session_id: str) -> Path:
|
|
315
577
|
return self._base / f"{session_id}.jsonl"
|
|
@@ -321,23 +583,163 @@ class SessionStore:
|
|
|
321
583
|
if not self._logging_enabled or self._retention_days <= 0:
|
|
322
584
|
return
|
|
323
585
|
|
|
586
|
+
with self._file_lock:
|
|
587
|
+
self._prune_expired_sessions_locked()
|
|
588
|
+
self._last_global_prune = datetime.now(timezone.utc)
|
|
589
|
+
|
|
590
|
+
def _prune_expired_sessions_locked(self) -> None:
|
|
591
|
+
if self._retention_days <= 0:
|
|
592
|
+
return
|
|
593
|
+
|
|
324
594
|
now = datetime.now(timezone.utc)
|
|
325
595
|
cutoff = now - timedelta(days=self._retention_days)
|
|
596
|
+
active_session_ids = set()
|
|
597
|
+
|
|
326
598
|
for path in self._base.glob("*.meta.json"):
|
|
327
599
|
session_id = path.stem.removesuffix(".meta")
|
|
328
600
|
try:
|
|
329
601
|
meta = json.loads(path.read_text(encoding="utf-8"))
|
|
330
602
|
except (json.JSONDecodeError, IOError):
|
|
603
|
+
self._delete_session_files(session_id)
|
|
331
604
|
continue
|
|
332
605
|
if not isinstance(meta, dict):
|
|
606
|
+
self._delete_session_files(session_id)
|
|
333
607
|
continue
|
|
334
608
|
|
|
335
609
|
updated_at = meta.get("updated_at") or meta.get("created_at")
|
|
336
610
|
when = _parse_iso_datetime(updated_at)
|
|
337
611
|
if not when:
|
|
338
|
-
continue
|
|
339
|
-
if when < cutoff:
|
|
340
612
|
self._delete_session_files(session_id)
|
|
613
|
+
continue
|
|
614
|
+
if when >= cutoff:
|
|
615
|
+
active_session_ids.add(session_id)
|
|
616
|
+
continue
|
|
617
|
+
|
|
618
|
+
self._delete_session_files(session_id)
|
|
619
|
+
|
|
620
|
+
for path in self._base.glob("*.jsonl"):
|
|
621
|
+
if path.stem not in active_session_ids:
|
|
622
|
+
self._delete_session_files(path.stem)
|
|
623
|
+
|
|
624
|
+
self._last_global_prune = datetime.now(timezone.utc)
|
|
625
|
+
|
|
626
|
+
def _is_global_prune_due(self) -> bool:
|
|
627
|
+
now = datetime.now(timezone.utc)
|
|
628
|
+
if now - self._last_global_prune < self._prune_interval:
|
|
629
|
+
return False
|
|
630
|
+
return True
|
|
631
|
+
|
|
632
|
+
def _enforce_session_limits_locked(self, session_id: str, skip_size_limit: bool = False) -> None:
|
|
633
|
+
path = self._jsonl_path(session_id)
|
|
634
|
+
if not path.exists():
|
|
635
|
+
return
|
|
636
|
+
|
|
637
|
+
if not skip_size_limit:
|
|
638
|
+
should_check_size = self._max_session_bytes > 0 and path.stat().st_size > self._max_session_bytes
|
|
639
|
+
else:
|
|
640
|
+
should_check_size = False
|
|
641
|
+
|
|
642
|
+
records, invalid_count = _read_jsonl_records(path)
|
|
643
|
+
if not records and invalid_count == 0:
|
|
644
|
+
return
|
|
645
|
+
|
|
646
|
+
original_records = list(records)
|
|
647
|
+
|
|
648
|
+
if self._max_messages_per_session > 0:
|
|
649
|
+
records = records[-self._max_messages_per_session :]
|
|
650
|
+
|
|
651
|
+
self._trim_user_ui_previews(records, _DEFAULT_UI_PREVIEW_MAX_ITEMS_PER_SESSION)
|
|
652
|
+
if self._max_session_bytes > 0:
|
|
653
|
+
self._trim_records_to_byte_budget(records, self._max_session_bytes)
|
|
654
|
+
|
|
655
|
+
changed = (len(records) != len(original_records)) or (invalid_count > 0) or should_check_size
|
|
656
|
+
if not changed:
|
|
657
|
+
return
|
|
658
|
+
|
|
659
|
+
if not self._write_jsonl_records(path, records):
|
|
660
|
+
return
|
|
661
|
+
|
|
662
|
+
def _trim_session_records_locked(self, session_id: str, keep_ui_previews: int) -> None:
|
|
663
|
+
if keep_ui_previews <= 0:
|
|
664
|
+
return
|
|
665
|
+
path = self._jsonl_path(session_id)
|
|
666
|
+
if not path.exists():
|
|
667
|
+
return
|
|
668
|
+
|
|
669
|
+
records, invalid_count = _read_jsonl_records(path)
|
|
670
|
+
if not records and invalid_count == 0:
|
|
671
|
+
return
|
|
672
|
+
|
|
673
|
+
original_records = list(records)
|
|
674
|
+
self._trim_user_ui_previews(records, keep_ui_previews)
|
|
675
|
+
if self._max_messages_per_session > 0:
|
|
676
|
+
records = records[-self._max_messages_per_session :]
|
|
677
|
+
|
|
678
|
+
if self._max_session_bytes > 0:
|
|
679
|
+
self._trim_records_to_byte_budget(records, self._max_session_bytes)
|
|
680
|
+
|
|
681
|
+
if records == original_records and invalid_count == 0:
|
|
682
|
+
return
|
|
683
|
+
|
|
684
|
+
self._write_jsonl_records(path, records)
|
|
685
|
+
|
|
686
|
+
def _trim_records_to_byte_budget(self, records: List[Dict[str, Any]], max_bytes: int) -> None:
|
|
687
|
+
if max_bytes <= 0:
|
|
688
|
+
return
|
|
689
|
+
|
|
690
|
+
if not records:
|
|
691
|
+
return
|
|
692
|
+
|
|
693
|
+
serialized_sizes = [len(json.dumps(record)) for record in records]
|
|
694
|
+
total_bytes = sum(length + 1 for length in serialized_sizes)
|
|
695
|
+
|
|
696
|
+
if total_bytes <= max_bytes:
|
|
697
|
+
return
|
|
698
|
+
|
|
699
|
+
while total_bytes > max_bytes and records:
|
|
700
|
+
removed = serialized_sizes.pop(0) + 1
|
|
701
|
+
records.pop(0)
|
|
702
|
+
total_bytes -= removed
|
|
703
|
+
|
|
704
|
+
def _trim_user_ui_previews(self, records: List[Dict[str, Any]], keep_latest: int) -> None:
|
|
705
|
+
if keep_latest <= 0:
|
|
706
|
+
return
|
|
707
|
+
|
|
708
|
+
ui_indices: List[int] = []
|
|
709
|
+
for idx, record in enumerate(records):
|
|
710
|
+
if record.get("role") != "user":
|
|
711
|
+
continue
|
|
712
|
+
ui = record.get("ui")
|
|
713
|
+
if isinstance(ui, dict) and (
|
|
714
|
+
isinstance(ui.get("selectionPreview"), dict)
|
|
715
|
+
or isinstance(ui.get("cellOutputPreview"), dict)
|
|
716
|
+
):
|
|
717
|
+
ui_indices.append(idx)
|
|
718
|
+
|
|
719
|
+
if len(ui_indices) <= keep_latest:
|
|
720
|
+
return
|
|
721
|
+
|
|
722
|
+
for idx in ui_indices[: len(ui_indices) - keep_latest]:
|
|
723
|
+
if "ui" in records[idx]:
|
|
724
|
+
records[idx].pop("ui", None)
|
|
725
|
+
|
|
726
|
+
def _write_jsonl_records(self, path: Path, records: List[Dict[str, Any]]) -> bool:
|
|
727
|
+
tmp_path = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
|
|
728
|
+
try:
|
|
729
|
+
with tmp_path.open("w", encoding="utf-8") as handle:
|
|
730
|
+
for record in records:
|
|
731
|
+
handle.write(json.dumps(record))
|
|
732
|
+
handle.write("\n")
|
|
733
|
+
tmp_path.replace(path)
|
|
734
|
+
return True
|
|
735
|
+
except OSError:
|
|
736
|
+
return False
|
|
737
|
+
|
|
738
|
+
def _write_meta_atomic(self, path: Path, meta: Dict[str, Any]) -> None:
|
|
739
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
740
|
+
tmp_path = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
|
|
741
|
+
tmp_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
|
742
|
+
tmp_path.replace(path)
|
|
341
743
|
|
|
342
744
|
def _delete_session_files(self, session_id: str) -> bool:
|
|
343
745
|
deleted_any = False
|
|
@@ -357,14 +759,35 @@ def _derive_paired_paths(notebook_path: str, notebook_os_path: str) -> Tuple[str
|
|
|
357
759
|
paired_path = ""
|
|
358
760
|
paired_os_path = ""
|
|
359
761
|
|
|
360
|
-
|
|
762
|
+
notebook_path_lower = (notebook_path or "").lower()
|
|
763
|
+
notebook_os_path_lower = (notebook_os_path or "").lower()
|
|
764
|
+
|
|
765
|
+
if notebook_path_lower.endswith(".ipynb"):
|
|
361
766
|
paired_path = notebook_path[:-6] + ".py"
|
|
362
|
-
|
|
767
|
+
elif notebook_path_lower.endswith(".py"):
|
|
768
|
+
paired_path = notebook_path[:-3] + ".ipynb"
|
|
769
|
+
if notebook_os_path_lower.endswith(".ipynb"):
|
|
363
770
|
paired_os_path = notebook_os_path[:-6] + ".py"
|
|
771
|
+
elif notebook_os_path_lower.endswith(".py"):
|
|
772
|
+
paired_os_path = notebook_os_path[:-3] + ".ipynb"
|
|
364
773
|
|
|
365
774
|
return paired_path, paired_os_path
|
|
366
775
|
|
|
367
776
|
|
|
777
|
+
def _normalize_notebook_mode(raw_mode: str, notebook_path: str, notebook_os_path: str) -> str:
|
|
778
|
+
mode = (raw_mode or "").strip().lower()
|
|
779
|
+
if mode in {"ipynb", "jupytext_py", "plain_py"}:
|
|
780
|
+
return mode
|
|
781
|
+
|
|
782
|
+
path = (notebook_path or "").strip().lower()
|
|
783
|
+
os_path = (notebook_os_path or "").strip().lower()
|
|
784
|
+
if path.endswith(".ipynb") or os_path.endswith(".ipynb"):
|
|
785
|
+
return "ipynb"
|
|
786
|
+
if path.endswith(".py") or os_path.endswith(".py"):
|
|
787
|
+
return "plain_py"
|
|
788
|
+
return "unsupported"
|
|
789
|
+
|
|
790
|
+
|
|
368
791
|
def _now_iso() -> str:
|
|
369
792
|
return datetime.now(timezone.utc).isoformat()
|
|
370
793
|
|
|
@@ -416,6 +839,43 @@ def _sanitize_message(content: str, max_chars: int) -> str:
|
|
|
416
839
|
return _truncate_text(_sanitize_sensitive_values(content), max_chars)
|
|
417
840
|
|
|
418
841
|
|
|
842
|
+
def _sanitize_ui_payload(raw: Dict[str, Any] | None) -> Dict[str, Any]:
|
|
843
|
+
if not isinstance(raw, dict):
|
|
844
|
+
return {}
|
|
845
|
+
|
|
846
|
+
payload: Dict[str, Any] = {}
|
|
847
|
+
|
|
848
|
+
selection_preview = _sanitize_ui_preview(raw.get("selectionPreview"))
|
|
849
|
+
if selection_preview:
|
|
850
|
+
payload["selectionPreview"] = selection_preview
|
|
851
|
+
|
|
852
|
+
cell_output_preview = _sanitize_ui_preview(raw.get("cellOutputPreview"))
|
|
853
|
+
if cell_output_preview:
|
|
854
|
+
payload["cellOutputPreview"] = cell_output_preview
|
|
855
|
+
|
|
856
|
+
return payload
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def _sanitize_ui_preview(raw: Any) -> Dict[str, str]:
|
|
860
|
+
if not isinstance(raw, dict):
|
|
861
|
+
return {}
|
|
862
|
+
|
|
863
|
+
location_raw = raw.get("locationLabel")
|
|
864
|
+
preview_text_raw = raw.get("previewText")
|
|
865
|
+
if not isinstance(location_raw, str) or not isinstance(preview_text_raw, str):
|
|
866
|
+
return {}
|
|
867
|
+
|
|
868
|
+
location = _sanitize_message(location_raw.strip(), _DEFAULT_UI_LABEL_MAX_CHARS)
|
|
869
|
+
preview_text = _sanitize_message(
|
|
870
|
+
preview_text_raw.replace("\r\n", "\n").replace("\r", "\n").strip(),
|
|
871
|
+
_DEFAULT_UI_PREVIEW_MAX_CHARS,
|
|
872
|
+
)
|
|
873
|
+
if not location or not preview_text:
|
|
874
|
+
return {}
|
|
875
|
+
|
|
876
|
+
return {"locationLabel": location, "previewText": preview_text}
|
|
877
|
+
|
|
878
|
+
|
|
419
879
|
def _sanitize_sensitive_values(raw: str) -> str:
|
|
420
880
|
sanitized = raw
|
|
421
881
|
for pattern, replacement in _SENSITIVE_PATTERNS:
|
|
@@ -433,3 +893,28 @@ def _truncate_text(raw: str, max_chars: int) -> str:
|
|
|
433
893
|
if max_chars <= 3:
|
|
434
894
|
return raw[:max_chars]
|
|
435
895
|
return f"{raw[: max_chars - 3]}..."
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def _read_jsonl_records(path: Path) -> Tuple[List[Dict[str, Any]], int]:
|
|
899
|
+
records: List[Dict[str, Any]] = []
|
|
900
|
+
removed_invalid_count = 0
|
|
901
|
+
|
|
902
|
+
try:
|
|
903
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
904
|
+
for line in handle:
|
|
905
|
+
line = line.strip()
|
|
906
|
+
if not line:
|
|
907
|
+
continue
|
|
908
|
+
try:
|
|
909
|
+
payload = json.loads(line)
|
|
910
|
+
except json.JSONDecodeError:
|
|
911
|
+
removed_invalid_count += 1
|
|
912
|
+
continue
|
|
913
|
+
if isinstance(payload, dict):
|
|
914
|
+
records.append(payload)
|
|
915
|
+
else:
|
|
916
|
+
removed_invalid_count += 1
|
|
917
|
+
except OSError:
|
|
918
|
+
return [], 1
|
|
919
|
+
|
|
920
|
+
return records, removed_invalid_count
|