pdit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdit/__init__.py +11 -0
- pdit/_static/assets/index-BkEyY6gm.js +135 -0
- pdit/_static/assets/index-DxOOJTA1.css +1 -0
- pdit/_static/export.html +74 -0
- pdit/_static/index.html +14 -0
- pdit/cli.py +250 -0
- pdit/exporter.py +90 -0
- pdit/file_watcher.py +162 -0
- pdit/ipython_executor.py +350 -0
- pdit/server.py +410 -0
- pdit-0.1.0.dist-info/METADATA +155 -0
- pdit-0.1.0.dist-info/RECORD +15 -0
- pdit-0.1.0.dist-info/WHEEL +5 -0
- pdit-0.1.0.dist-info/entry_points.txt +2 -0
- pdit-0.1.0.dist-info/top_level.txt +1 -0
pdit/ipython_executor.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""
|
|
2
|
+
IPython kernel executor using jupyter_client.
|
|
3
|
+
|
|
4
|
+
Uses IPython kernel with jupyter_client for reliable messaging.
|
|
5
|
+
Yields event dicts directly for minimal server overhead.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import ast
|
|
9
|
+
import asyncio
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
import traceback
|
|
15
|
+
from typing import Any, AsyncGenerator, Optional
|
|
16
|
+
|
|
17
|
+
from jupyter_client import AsyncKernelManager
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class IPythonExecutor:
|
|
24
|
+
"""Python executor using IPython kernel."""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
"""Initialize executor. Call start() to begin kernel startup."""
|
|
28
|
+
self.km: Optional[AsyncKernelManager] = None
|
|
29
|
+
self.kc = None # AsyncKernelClient
|
|
30
|
+
self._startup_task: Optional[asyncio.Task] = None
|
|
31
|
+
|
|
32
|
+
def start(self) -> None:
|
|
33
|
+
"""Start IPython kernel in the background.
|
|
34
|
+
|
|
35
|
+
This is non-blocking - it creates a background task to start the kernel.
|
|
36
|
+
Use wait_ready() to wait for the kernel to be ready before executing code.
|
|
37
|
+
"""
|
|
38
|
+
if self._startup_task is None:
|
|
39
|
+
self._startup_task = asyncio.create_task(self._do_start())
|
|
40
|
+
|
|
41
|
+
async def wait_ready(self) -> None:
|
|
42
|
+
"""Wait for the kernel to be ready. Starts the kernel if not already started."""
|
|
43
|
+
if self._startup_task is None:
|
|
44
|
+
self._startup_task = asyncio.create_task(self._do_start())
|
|
45
|
+
await self._startup_task
|
|
46
|
+
|
|
47
|
+
async def _do_start(self) -> None:
|
|
48
|
+
"""Internal: Actually start the IPython kernel."""
|
|
49
|
+
# Use python3 (IPython) kernel
|
|
50
|
+
self.km = AsyncKernelManager(kernel_name='python3')
|
|
51
|
+
await self.km.start_kernel()
|
|
52
|
+
self.kc = self.km.client()
|
|
53
|
+
self.kc.start_channels()
|
|
54
|
+
# Wait for kernel to be ready
|
|
55
|
+
await self.kc.wait_for_ready(timeout=30)
|
|
56
|
+
# Drain any startup messages
|
|
57
|
+
await self._drain_iopub()
|
|
58
|
+
# Register display formatters
|
|
59
|
+
await self._register_display_formatters()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def _execute_silent(self, code: str) -> None:
|
|
63
|
+
"""Execute code without capturing output (for setup).
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
Exception: If execution fails or times out
|
|
67
|
+
"""
|
|
68
|
+
if self.kc is None:
|
|
69
|
+
raise RuntimeError("Kernel client not initialized")
|
|
70
|
+
msg_id = self.kc.execute(code, silent=True)
|
|
71
|
+
# Wait for execution to complete by checking for 'idle' status on iopub
|
|
72
|
+
# We need to handle messages that may not match our msg_id (from kernel startup)
|
|
73
|
+
timeout_total = 30 # Total timeout in seconds
|
|
74
|
+
loop = asyncio.get_running_loop()
|
|
75
|
+
start_time = loop.time()
|
|
76
|
+
while loop.time() - start_time < timeout_total:
|
|
77
|
+
try:
|
|
78
|
+
msg = await asyncio.wait_for(self.kc.get_iopub_msg(), timeout=1)
|
|
79
|
+
if msg['parent_header'].get('msg_id') == msg_id:
|
|
80
|
+
if msg['msg_type'] == 'status' and msg['content']['execution_state'] == 'idle':
|
|
81
|
+
return
|
|
82
|
+
elif msg['msg_type'] == 'error':
|
|
83
|
+
raise RuntimeError(f"Silent execution failed: {msg['content']['ename']}: {msg['content']['evalue']}")
|
|
84
|
+
except asyncio.TimeoutError:
|
|
85
|
+
# Queue empty, keep waiting
|
|
86
|
+
continue
|
|
87
|
+
raise RuntimeError("Silent execution timed out")
|
|
88
|
+
|
|
89
|
+
async def _register_display_formatters(self) -> None:
|
|
90
|
+
"""Register custom display formatters for DataFrames."""
|
|
91
|
+
formatter_code = """
|
|
92
|
+
def _register_pdit_formatter():
|
|
93
|
+
import IPython
|
|
94
|
+
import itables
|
|
95
|
+
|
|
96
|
+
# Generate offline bundle
|
|
97
|
+
OFFLINE_INIT = itables.javascript.generate_init_offline_itables_html(itables.options.dt_bundle)
|
|
98
|
+
|
|
99
|
+
def format_datatable(df, include=None, exclude=None):
|
|
100
|
+
html = itables.to_html_datatable(df, display_logo_when_loading=False, connected=False, layout={"topStart": None, "topEnd": None, "bottomStart": "search", "bottomEnd": "paging"})
|
|
101
|
+
return f'{OFFLINE_INIT}{html}'
|
|
102
|
+
|
|
103
|
+
ip = IPython.get_ipython()
|
|
104
|
+
if ip:
|
|
105
|
+
formatter = ip.display_formatter.formatters['text/html']
|
|
106
|
+
formatter.for_type_by_name('polars.dataframe.frame', 'DataFrame', format_datatable)
|
|
107
|
+
formatter.for_type_by_name('pandas.core.frame', 'DataFrame', format_datatable)
|
|
108
|
+
|
|
109
|
+
_register_pdit_formatter()
|
|
110
|
+
del _register_pdit_formatter
|
|
111
|
+
"""
|
|
112
|
+
await self._execute_silent(formatter_code)
|
|
113
|
+
|
|
114
|
+
def _parse_script(self, script: str) -> list[dict]:
|
|
115
|
+
"""Parse Python script into statement dicts using AST."""
|
|
116
|
+
tree = ast.parse(script)
|
|
117
|
+
statements = []
|
|
118
|
+
lines = script.split('\n')
|
|
119
|
+
|
|
120
|
+
for node in tree.body:
|
|
121
|
+
line_start = node.lineno
|
|
122
|
+
line_end = node.end_lineno or node.lineno
|
|
123
|
+
|
|
124
|
+
# Extract source
|
|
125
|
+
source_lines = lines[line_start - 1:line_end]
|
|
126
|
+
source = '\n'.join(source_lines)
|
|
127
|
+
|
|
128
|
+
is_expr = isinstance(node, ast.Expr)
|
|
129
|
+
is_markdown_cell = is_expr and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str)
|
|
130
|
+
|
|
131
|
+
statements.append({
|
|
132
|
+
"lineStart": line_start,
|
|
133
|
+
"lineEnd": line_end,
|
|
134
|
+
"source": source,
|
|
135
|
+
"isMarkdownCell": is_markdown_cell
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
return statements
|
|
139
|
+
|
|
140
|
+
def _strip_ansi(self, text: str) -> str:
|
|
141
|
+
"""Strip ANSI escape codes from text."""
|
|
142
|
+
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
143
|
+
return ansi_escape.sub('', text)
|
|
144
|
+
|
|
145
|
+
def _process_mime_data(self, data: dict) -> list[dict]:
|
|
146
|
+
"""Process MIME bundle data into output dicts.
|
|
147
|
+
|
|
148
|
+
Passes through MIME types directly instead of translating to custom types.
|
|
149
|
+
Uses priority order: image > html > json > plain text.
|
|
150
|
+
"""
|
|
151
|
+
output: list[dict] = []
|
|
152
|
+
|
|
153
|
+
# Priority order for MIME types - pass through directly
|
|
154
|
+
# Check for any image type
|
|
155
|
+
image_types = [k for k in data.keys() if k.startswith('image/')]
|
|
156
|
+
if image_types:
|
|
157
|
+
# Use first image type found (they're usually in priority order)
|
|
158
|
+
mime_type = image_types[0]
|
|
159
|
+
output.append({"type": mime_type, "content": data[mime_type]})
|
|
160
|
+
elif 'text/html' in data:
|
|
161
|
+
output.append({"type": "text/html", "content": data['text/html']})
|
|
162
|
+
elif 'application/json' in data:
|
|
163
|
+
json_data = data['application/json']
|
|
164
|
+
output.append({"type": "application/json", "content": json.dumps(json_data)})
|
|
165
|
+
elif 'text/plain' in data:
|
|
166
|
+
output.append({"type": "text/plain", "content": data['text/plain']})
|
|
167
|
+
|
|
168
|
+
return output
|
|
169
|
+
|
|
170
|
+
async def _execute_code(self, code: str) -> list[dict]:
|
|
171
|
+
"""Execute code in kernel and collect output."""
|
|
172
|
+
if self.kc is None:
|
|
173
|
+
return [{"type": "error", "content": "Kernel not started"}]
|
|
174
|
+
|
|
175
|
+
output: list[dict] = []
|
|
176
|
+
|
|
177
|
+
msg_id = self.kc.execute(code)
|
|
178
|
+
|
|
179
|
+
# Collect output messages (no timeout - code can run indefinitely)
|
|
180
|
+
while True:
|
|
181
|
+
msg = await self.kc.get_iopub_msg()
|
|
182
|
+
|
|
183
|
+
# Only process messages for our execution
|
|
184
|
+
if msg['parent_header'].get('msg_id') != msg_id:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
msg_type = msg['msg_type']
|
|
188
|
+
content = msg['content']
|
|
189
|
+
|
|
190
|
+
if msg_type == 'status' and content['execution_state'] == 'idle':
|
|
191
|
+
# Execution complete
|
|
192
|
+
break
|
|
193
|
+
elif msg_type == 'stream':
|
|
194
|
+
# stdout/stderr - merge consecutive outputs of same type
|
|
195
|
+
stream_name = content['name'] # 'stdout' or 'stderr'
|
|
196
|
+
text = content['text']
|
|
197
|
+
if output and output[-1]["type"] == stream_name:
|
|
198
|
+
output[-1] = {"type": stream_name, "content": output[-1]["content"] + text}
|
|
199
|
+
else:
|
|
200
|
+
output.append({"type": stream_name, "content": text})
|
|
201
|
+
elif msg_type == 'execute_result':
|
|
202
|
+
# Expression result
|
|
203
|
+
data = content['data']
|
|
204
|
+
output.extend(self._process_mime_data(data))
|
|
205
|
+
elif msg_type == 'display_data':
|
|
206
|
+
# Display output (plots, etc.)
|
|
207
|
+
data = content['data']
|
|
208
|
+
output.extend(self._process_mime_data(data))
|
|
209
|
+
elif msg_type == 'error':
|
|
210
|
+
# Exception - strip ANSI codes from traceback
|
|
211
|
+
tb = '\n'.join(content['traceback'])
|
|
212
|
+
tb = self._strip_ansi(tb)
|
|
213
|
+
output.append({"type": "error", "content": tb})
|
|
214
|
+
|
|
215
|
+
return output
|
|
216
|
+
|
|
217
|
+
def _has_error(self, output: list[dict]) -> bool:
|
|
218
|
+
"""Check whether output contains an error."""
|
|
219
|
+
return any(item["type"] == "error" for item in output)
|
|
220
|
+
|
|
221
|
+
async def execute_script(
|
|
222
|
+
self,
|
|
223
|
+
script: str,
|
|
224
|
+
line_range: tuple[int, int] | None = None,
|
|
225
|
+
script_name: str | None = None
|
|
226
|
+
) -> AsyncGenerator[dict, None]:
|
|
227
|
+
"""Execute Python script, yielding event dicts as each statement completes.
|
|
228
|
+
|
|
229
|
+
Yields:
|
|
230
|
+
First: {"type": "expressions", "expressions": [{"lineStart": N, "lineEnd": N}, ...]}
|
|
231
|
+
Then for each statement: {"lineStart": N, "lineEnd": N, "output": [...], "isInvisible": bool}
|
|
232
|
+
On error during execution, the final result dict will have output with type="error"
|
|
233
|
+
"""
|
|
234
|
+
# Wait for kernel to be ready
|
|
235
|
+
await self.wait_ready()
|
|
236
|
+
|
|
237
|
+
# Parse script
|
|
238
|
+
try:
|
|
239
|
+
statements = self._parse_script(script)
|
|
240
|
+
except SyntaxError as e:
|
|
241
|
+
error_line = e.lineno or 1
|
|
242
|
+
error_buffer = io.StringIO()
|
|
243
|
+
traceback.print_exc(file=error_buffer)
|
|
244
|
+
# Yield expressions first (just the error location)
|
|
245
|
+
yield {
|
|
246
|
+
"type": "expressions",
|
|
247
|
+
"expressions": [{"lineStart": error_line, "lineEnd": error_line}]
|
|
248
|
+
}
|
|
249
|
+
# Then yield the error result
|
|
250
|
+
yield {
|
|
251
|
+
"lineStart": error_line,
|
|
252
|
+
"lineEnd": error_line,
|
|
253
|
+
"output": [{"type": "error", "content": error_buffer.getvalue()}],
|
|
254
|
+
"isInvisible": False
|
|
255
|
+
}
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
# Filter by line range
|
|
259
|
+
if line_range:
|
|
260
|
+
from_line, to_line = line_range
|
|
261
|
+
statements = [
|
|
262
|
+
stmt for stmt in statements
|
|
263
|
+
if not (stmt["lineEnd"] < from_line or stmt["lineStart"] > to_line)
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
# Yield expression info
|
|
267
|
+
yield {
|
|
268
|
+
"type": "expressions",
|
|
269
|
+
"expressions": [
|
|
270
|
+
{"lineStart": stmt["lineStart"], "lineEnd": stmt["lineEnd"]}
|
|
271
|
+
for stmt in statements
|
|
272
|
+
]
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# Execute each statement
|
|
276
|
+
for stmt in statements:
|
|
277
|
+
if stmt["isMarkdownCell"]:
|
|
278
|
+
# For markdown cells, just return the string content
|
|
279
|
+
try:
|
|
280
|
+
value = ast.literal_eval(stmt["source"])
|
|
281
|
+
output = [{"type": "text/markdown", "content": str(value).strip()}]
|
|
282
|
+
except (ValueError, SyntaxError):
|
|
283
|
+
output = await self._execute_code(stmt["source"])
|
|
284
|
+
else:
|
|
285
|
+
output = await self._execute_code(stmt["source"])
|
|
286
|
+
|
|
287
|
+
yield {
|
|
288
|
+
"lineStart": stmt["lineStart"],
|
|
289
|
+
"lineEnd": stmt["lineEnd"],
|
|
290
|
+
"output": output,
|
|
291
|
+
"isInvisible": len(output) == 0
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if self._has_error(output):
|
|
295
|
+
break
|
|
296
|
+
|
|
297
|
+
async def reset(self) -> None:
|
|
298
|
+
"""Reset the kernel (restart it)."""
|
|
299
|
+
# Wait for startup to complete first
|
|
300
|
+
await self.wait_ready()
|
|
301
|
+
if self.km:
|
|
302
|
+
if self.kc:
|
|
303
|
+
self.kc.stop_channels()
|
|
304
|
+
await self.km.restart_kernel()
|
|
305
|
+
self.kc = self.km.client()
|
|
306
|
+
self.kc.start_channels()
|
|
307
|
+
await self.kc.wait_for_ready(timeout=30)
|
|
308
|
+
await self._drain_iopub()
|
|
309
|
+
await self._register_display_formatters()
|
|
310
|
+
|
|
311
|
+
async def _drain_iopub(self) -> None:
|
|
312
|
+
"""Drain any pending messages from iopub channel."""
|
|
313
|
+
if self.kc is None:
|
|
314
|
+
return
|
|
315
|
+
while True:
|
|
316
|
+
try:
|
|
317
|
+
await asyncio.wait_for(self.kc.get_iopub_msg(), timeout=0.1)
|
|
318
|
+
except asyncio.TimeoutError:
|
|
319
|
+
break
|
|
320
|
+
|
|
321
|
+
async def interrupt(self) -> None:
|
|
322
|
+
"""Send an interrupt signal to the kernel."""
|
|
323
|
+
if self.km:
|
|
324
|
+
await self.km.interrupt_kernel()
|
|
325
|
+
|
|
326
|
+
async def shutdown(self) -> None:
|
|
327
|
+
"""Shutdown the kernel."""
|
|
328
|
+
# Cancel startup if still in progress
|
|
329
|
+
if self._startup_task and not self._startup_task.done():
|
|
330
|
+
self._startup_task.cancel()
|
|
331
|
+
try:
|
|
332
|
+
await self._startup_task
|
|
333
|
+
except asyncio.CancelledError:
|
|
334
|
+
pass
|
|
335
|
+
self._startup_task = None
|
|
336
|
+
|
|
337
|
+
if self.kc:
|
|
338
|
+
self.kc.stop_channels()
|
|
339
|
+
self.kc = None
|
|
340
|
+
if self.km:
|
|
341
|
+
try:
|
|
342
|
+
# shutdown_kernel can hang with async client, so add timeout
|
|
343
|
+
await asyncio.wait_for(self.km.shutdown_kernel(now=True), timeout=5)
|
|
344
|
+
except asyncio.TimeoutError:
|
|
345
|
+
# Force kill the kernel process if shutdown hangs
|
|
346
|
+
if self.km.has_kernel:
|
|
347
|
+
self.km.kernel.kill()
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
350
|
+
self.km = None
|