mcp-stata 1.2.2__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/ui_http.py ADDED
@@ -0,0 +1,540 @@
1
+ import hashlib
2
+ import json
3
+ import secrets
4
+ import threading
5
+ import time
6
+ import uuid
7
+ from dataclasses import dataclass
8
+ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
9
+ from typing import Any, Callable, Optional
10
+
11
+ from .stata_client import StataClient
12
+
13
+
14
+ def _stable_hash(payload: dict[str, Any]) -> str:
15
+ return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
16
+
17
+
18
+ @dataclass
19
+ class UIChannelInfo:
20
+ base_url: str
21
+ token: str
22
+ expires_at: int
23
+
24
+
25
+ @dataclass
26
+ class ViewHandle:
27
+ view_id: str
28
+ dataset_id: str
29
+ frame: str
30
+ obs_indices: list[int]
31
+ filtered_n: int
32
+ created_at: float
33
+ last_access: float
34
+
35
+
36
+ class UIChannelManager:
37
+ def __init__(
38
+ self,
39
+ client: StataClient,
40
+ *,
41
+ host: str = "127.0.0.1",
42
+ port: int = 0,
43
+ token_ttl_s: int = 20 * 60,
44
+ view_ttl_s: int = 30 * 60,
45
+ max_limit: int = 500,
46
+ max_vars: int = 200,
47
+ max_chars: int = 500,
48
+ max_request_bytes: int = 1_000_000,
49
+ ):
50
+ self._client = client
51
+ self._host = host
52
+ self._port = port
53
+ self._token_ttl_s = token_ttl_s
54
+ self._view_ttl_s = view_ttl_s
55
+ self._max_limit = max_limit
56
+ self._max_vars = max_vars
57
+ self._max_chars = max_chars
58
+ self._max_request_bytes = max_request_bytes
59
+
60
+ self._lock = threading.Lock()
61
+ self._httpd: ThreadingHTTPServer | None = None
62
+ self._thread: threading.Thread | None = None
63
+
64
+ self._token: str | None = None
65
+ self._expires_at: int = 0
66
+
67
+ self._dataset_version: int = 0
68
+ self._dataset_id_cache: str | None = None
69
+ self._dataset_id_cache_at_version: int = -1
70
+
71
+ self._views: dict[str, ViewHandle] = {}
72
+
73
+ def notify_potential_dataset_change(self) -> None:
74
+ with self._lock:
75
+ self._dataset_version += 1
76
+ self._dataset_id_cache = None
77
+ self._views.clear()
78
+
79
+ def get_channel(self) -> UIChannelInfo:
80
+ self._ensure_http_server()
81
+ with self._lock:
82
+ self._ensure_token()
83
+ assert self._httpd is not None
84
+ port = self._httpd.server_address[1]
85
+ base_url = f"http://{self._host}:{port}"
86
+ return UIChannelInfo(base_url=base_url, token=self._token or "", expires_at=self._expires_at)
87
+
88
+ def capabilities(self) -> dict[str, bool]:
89
+ return {"dataBrowser": True, "filtering": True}
90
+
91
+ def current_dataset_id(self) -> str:
92
+ with self._lock:
93
+ if self._dataset_id_cache is not None and self._dataset_id_cache_at_version == self._dataset_version:
94
+ return self._dataset_id_cache
95
+
96
+ state = self._client.get_dataset_state()
97
+ payload = {
98
+ "version": self._dataset_version,
99
+ "frame": state.get("frame"),
100
+ "n": state.get("n"),
101
+ "k": state.get("k"),
102
+ "sortlist": state.get("sortlist"),
103
+ }
104
+ digest = _stable_hash(payload)
105
+
106
+ with self._lock:
107
+ self._dataset_id_cache = digest
108
+ self._dataset_id_cache_at_version = self._dataset_version
109
+ return digest
110
+
111
+ def get_view(self, view_id: str) -> Optional[ViewHandle]:
112
+ now = time.time()
113
+ with self._lock:
114
+ self._evict_expired_locked(now)
115
+ view = self._views.get(view_id)
116
+ if view is None:
117
+ return None
118
+ view.last_access = now
119
+ return view
120
+
121
+ def create_view(self, *, dataset_id: str, frame: str, filter_expr: str) -> ViewHandle:
122
+ current_id = self.current_dataset_id()
123
+ if dataset_id != current_id:
124
+ raise DatasetChangedError(current_id)
125
+
126
+ try:
127
+ obs_indices = self._client.compute_view_indices(filter_expr)
128
+ except ValueError as e:
129
+ raise InvalidFilterError(str(e))
130
+ except RuntimeError as e:
131
+ msg = str(e) or "No data in memory"
132
+ if "no data" in msg.lower():
133
+ raise NoDataInMemoryError(msg)
134
+ raise
135
+ now = time.time()
136
+ view_id = f"view_{uuid.uuid4().hex}"
137
+ view = ViewHandle(
138
+ view_id=view_id,
139
+ dataset_id=current_id,
140
+ frame=frame,
141
+ obs_indices=obs_indices,
142
+ filtered_n=len(obs_indices),
143
+ created_at=now,
144
+ last_access=now,
145
+ )
146
+ with self._lock:
147
+ self._evict_expired_locked(now)
148
+ self._views[view_id] = view
149
+ return view
150
+
151
+ def delete_view(self, view_id: str) -> bool:
152
+ with self._lock:
153
+ return self._views.pop(view_id, None) is not None
154
+
155
+ def validate_token(self, header_value: str | None) -> bool:
156
+ if not header_value:
157
+ return False
158
+ if not header_value.startswith("Bearer "):
159
+ return False
160
+ token = header_value[len("Bearer ") :].strip()
161
+ with self._lock:
162
+ self._ensure_token()
163
+ if self._token is None:
164
+ return False
165
+ if time.time() * 1000 >= self._expires_at:
166
+ return False
167
+ return secrets.compare_digest(token, self._token)
168
+
169
+ def limits(self) -> tuple[int, int, int, int]:
170
+ return self._max_limit, self._max_vars, self._max_chars, self._max_request_bytes
171
+
172
+ def _ensure_token(self) -> None:
173
+ now_ms = int(time.time() * 1000)
174
+ if self._token is None or now_ms >= self._expires_at:
175
+ self._token = secrets.token_urlsafe(32)
176
+ self._expires_at = int((time.time() + self._token_ttl_s) * 1000)
177
+
178
+ def _evict_expired_locked(self, now: float) -> None:
179
+ expired: list[str] = []
180
+ for key, view in self._views.items():
181
+ if now - view.last_access >= self._view_ttl_s:
182
+ expired.append(key)
183
+ for key in expired:
184
+ self._views.pop(key, None)
185
+
186
+ def _ensure_http_server(self) -> None:
187
+ with self._lock:
188
+ if self._httpd is not None:
189
+ return
190
+
191
+ manager = self
192
+
193
+ class Handler(BaseHTTPRequestHandler):
194
+ def _send_json(self, status: int, payload: dict[str, Any]) -> None:
195
+ data = json.dumps(payload).encode("utf-8")
196
+ self.send_response(status)
197
+ self.send_header("Content-Type", "application/json")
198
+ self.send_header("Content-Length", str(len(data)))
199
+ self.end_headers()
200
+ self.wfile.write(data)
201
+
202
+ def _error(self, status: int, code: str, message: str, *, stata_rc: int | None = None) -> None:
203
+ body: dict[str, Any] = {"error": {"code": code, "message": message}}
204
+ if stata_rc is not None:
205
+ body["error"]["stataRc"] = stata_rc
206
+ self._send_json(status, body)
207
+
208
+ def _require_auth(self) -> bool:
209
+ if manager.validate_token(self.headers.get("Authorization")):
210
+ return True
211
+ self._error(401, "auth_failed", "Unauthorized")
212
+ return False
213
+
214
+ def _read_json(self) -> dict[str, Any] | None:
215
+ max_limit, max_vars, max_chars, max_bytes = manager.limits()
216
+ _ = (max_limit, max_vars, max_chars)
217
+
218
+ length = int(self.headers.get("Content-Length", "0") or "0")
219
+ if length <= 0:
220
+ return {}
221
+ if length > max_bytes:
222
+ self._error(400, "request_too_large", "Request too large")
223
+ return None
224
+ raw = self.rfile.read(length)
225
+ try:
226
+ parsed = json.loads(raw.decode("utf-8"))
227
+ except Exception:
228
+ self._error(400, "invalid_request", "Invalid JSON")
229
+ return None
230
+ if not isinstance(parsed, dict):
231
+ self._error(400, "invalid_request", "Expected JSON object")
232
+ return None
233
+ return parsed
234
+
235
+ def do_GET(self) -> None:
236
+ if not self._require_auth():
237
+ return
238
+
239
+ if self.path == "/v1/dataset":
240
+ try:
241
+ state = manager._client.get_dataset_state()
242
+ dataset_id = manager.current_dataset_id()
243
+ self._send_json(
244
+ 200,
245
+ {
246
+ "dataset": {
247
+ "id": dataset_id,
248
+ "frame": state.get("frame"),
249
+ "n": state.get("n"),
250
+ "k": state.get("k"),
251
+ "changed": state.get("changed"),
252
+ }
253
+ },
254
+ )
255
+ return
256
+ except NoDataInMemoryError as e:
257
+ self._error(400, "no_data_in_memory", str(e), stata_rc=e.stata_rc)
258
+ return
259
+ except Exception as e:
260
+ self._error(500, "internal_error", str(e))
261
+ return
262
+
263
+ if self.path == "/v1/vars":
264
+ try:
265
+ state = manager._client.get_dataset_state()
266
+ dataset_id = manager.current_dataset_id()
267
+ variables = manager._client.list_variables_rich()
268
+ self._send_json(
269
+ 200,
270
+ {
271
+ "dataset": {"id": dataset_id, "frame": state.get("frame")},
272
+ "variables": variables,
273
+ },
274
+ )
275
+ return
276
+ except NoDataInMemoryError as e:
277
+ self._error(400, "no_data_in_memory", str(e), stata_rc=e.stata_rc)
278
+ return
279
+ except Exception as e:
280
+ self._error(500, "internal_error", str(e))
281
+ return
282
+
283
+ self._error(404, "not_found", "Not found")
284
+
285
+ def do_POST(self) -> None:
286
+ if not self._require_auth():
287
+ return
288
+
289
+ if self.path == "/v1/page":
290
+ body = self._read_json()
291
+ if body is None:
292
+ return
293
+ try:
294
+ resp = handle_page_request(manager, body, view_id=None)
295
+ self._send_json(200, resp)
296
+ return
297
+ except HTTPError as e:
298
+ self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
299
+ return
300
+ except Exception as e:
301
+ self._error(500, "internal_error", str(e))
302
+ return
303
+
304
+ if self.path == "/v1/views":
305
+ body = self._read_json()
306
+ if body is None:
307
+ return
308
+ dataset_id = str(body.get("datasetId", ""))
309
+ frame = str(body.get("frame", "default"))
310
+ filter_expr = str(body.get("filterExpr", ""))
311
+ if not dataset_id or not filter_expr:
312
+ self._error(400, "invalid_request", "datasetId and filterExpr are required")
313
+ return
314
+ try:
315
+ view = manager.create_view(dataset_id=dataset_id, frame=frame, filter_expr=filter_expr)
316
+ self._send_json(
317
+ 200,
318
+ {
319
+ "dataset": {"id": view.dataset_id, "frame": view.frame},
320
+ "view": {"id": view.view_id, "filteredN": view.filtered_n},
321
+ },
322
+ )
323
+ return
324
+ except DatasetChangedError as e:
325
+ self._error(409, "dataset_changed", "Dataset changed")
326
+ return
327
+ except ValueError as e:
328
+ self._error(400, "invalid_filter", str(e))
329
+ return
330
+ except RuntimeError as e:
331
+ msg = str(e) or "No data in memory"
332
+ if "no data" in msg.lower():
333
+ self._error(400, "no_data_in_memory", msg)
334
+ return
335
+ self._error(500, "internal_error", msg)
336
+ return
337
+ except Exception as e:
338
+ self._error(500, "internal_error", str(e))
339
+ return
340
+
341
+ if self.path.startswith("/v1/views/") and self.path.endswith("/page"):
342
+ parts = self.path.split("/")
343
+ if len(parts) != 5:
344
+ self._error(404, "not_found", "Not found")
345
+ return
346
+ view_id = parts[3]
347
+ body = self._read_json()
348
+ if body is None:
349
+ return
350
+ try:
351
+ resp = handle_page_request(manager, body, view_id=view_id)
352
+ self._send_json(200, resp)
353
+ return
354
+ except HTTPError as e:
355
+ self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
356
+ return
357
+ except Exception as e:
358
+ self._error(500, "internal_error", str(e))
359
+ return
360
+
361
+ if self.path == "/v1/filters/validate":
362
+ body = self._read_json()
363
+ if body is None:
364
+ return
365
+ filter_expr = str(body.get("filterExpr", ""))
366
+ if not filter_expr:
367
+ self._error(400, "invalid_request", "filterExpr is required")
368
+ return
369
+ try:
370
+ manager._client.validate_filter_expr(filter_expr)
371
+ self._send_json(200, {"ok": True})
372
+ return
373
+ except ValueError as e:
374
+ self._error(400, "invalid_filter", str(e))
375
+ return
376
+ except RuntimeError as e:
377
+ msg = str(e) or "No data in memory"
378
+ if "no data" in msg.lower():
379
+ self._error(400, "no_data_in_memory", msg)
380
+ return
381
+ self._error(500, "internal_error", msg)
382
+ return
383
+ except Exception as e:
384
+ self._error(500, "internal_error", str(e))
385
+ return
386
+
387
+ self._error(404, "not_found", "Not found")
388
+
389
+ def do_DELETE(self) -> None:
390
+ if not self._require_auth():
391
+ return
392
+
393
+ if self.path.startswith("/v1/views/"):
394
+ parts = self.path.split("/")
395
+ if len(parts) != 4:
396
+ self._error(404, "not_found", "Not found")
397
+ return
398
+ view_id = parts[3]
399
+ if manager.delete_view(view_id):
400
+ self._send_json(200, {"ok": True})
401
+ else:
402
+ self._error(404, "not_found", "Not found")
403
+ return
404
+
405
+ self._error(404, "not_found", "Not found")
406
+
407
+ def log_message(self, format: str, *args: Any) -> None:
408
+ return
409
+
410
+ httpd = ThreadingHTTPServer((self._host, self._port), Handler)
411
+ t = threading.Thread(target=httpd.serve_forever, daemon=True)
412
+ t.start()
413
+ self._httpd = httpd
414
+ self._thread = t
415
+
416
+
417
+ class HTTPError(Exception):
418
+ def __init__(self, status: int, code: str, message: str, *, stata_rc: int | None = None):
419
+ super().__init__(message)
420
+ self.status = status
421
+ self.code = code
422
+ self.message = message
423
+ self.stata_rc = stata_rc
424
+
425
+
426
+ class DatasetChangedError(Exception):
427
+ def __init__(self, current_dataset_id: str):
428
+ super().__init__("dataset_changed")
429
+ self.current_dataset_id = current_dataset_id
430
+
431
+
432
+ class NoDataInMemoryError(Exception):
433
+ def __init__(self, message: str = "No data in memory", *, stata_rc: int | None = None):
434
+ super().__init__(message)
435
+ self.stata_rc = stata_rc
436
+
437
+
438
+ class InvalidFilterError(Exception):
439
+ def __init__(self, message: str, *, stata_rc: int | None = None):
440
+ super().__init__(message)
441
+ self.message = message
442
+ self.stata_rc = stata_rc
443
+
444
+
445
+ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view_id: str | None) -> dict[str, Any]:
446
+ max_limit, max_vars, max_chars, _ = manager.limits()
447
+
448
+ if view_id is None:
449
+ dataset_id = str(body.get("datasetId", ""))
450
+ frame = str(body.get("frame", "default"))
451
+ else:
452
+ view = manager.get_view(view_id)
453
+ if view is None:
454
+ raise HTTPError(404, "not_found", "View not found")
455
+ dataset_id = view.dataset_id
456
+ frame = view.frame
457
+
458
+ offset = int(body.get("offset", 0) or 0)
459
+ limit = int(body.get("limit", 0) or 0)
460
+ vars_req = body.get("vars", [])
461
+ include_obs_no = bool(body.get("includeObsNo", False))
462
+ max_chars_req = int(body.get("maxChars", max_chars) or max_chars)
463
+
464
+ if offset < 0:
465
+ raise HTTPError(400, "invalid_request", "offset must be >= 0")
466
+ if limit <= 0:
467
+ raise HTTPError(400, "invalid_request", "limit must be > 0")
468
+ if limit > max_limit:
469
+ raise HTTPError(400, "request_too_large", f"limit must be <= {max_limit}")
470
+ if max_chars_req <= 0:
471
+ raise HTTPError(400, "invalid_request", "maxChars must be > 0")
472
+ if max_chars_req > max_chars:
473
+ raise HTTPError(400, "request_too_large", f"maxChars must be <= {max_chars}")
474
+
475
+ if not isinstance(vars_req, list) or not all(isinstance(v, str) for v in vars_req):
476
+ raise HTTPError(400, "invalid_request", "vars must be a list of strings")
477
+ if len(vars_req) > max_vars:
478
+ raise HTTPError(400, "request_too_large", f"vars length must be <= {max_vars}")
479
+
480
+ current_id = manager.current_dataset_id()
481
+ if dataset_id != current_id:
482
+ raise HTTPError(409, "dataset_changed", "Dataset changed")
483
+
484
+ if view_id is None:
485
+ obs_indices = None
486
+ filtered_n: int | None = None
487
+ else:
488
+ assert view is not None
489
+ obs_indices = view.obs_indices
490
+ filtered_n = view.filtered_n
491
+
492
+ try:
493
+ dataset_state = manager._client.get_dataset_state()
494
+ page = manager._client.get_page(
495
+ offset=offset,
496
+ limit=limit,
497
+ vars=vars_req,
498
+ include_obs_no=include_obs_no,
499
+ max_chars=max_chars_req,
500
+ obs_indices=obs_indices,
501
+ )
502
+ except RuntimeError as e:
503
+ # StataClient uses RuntimeError("No data in memory") for empty dataset.
504
+ msg = str(e) or "No data in memory"
505
+ if "no data" in msg.lower():
506
+ raise HTTPError(400, "no_data_in_memory", msg)
507
+ raise HTTPError(500, "internal_error", msg)
508
+ except ValueError as e:
509
+ msg = str(e)
510
+ if msg.lower().startswith("invalid variable"):
511
+ raise HTTPError(400, "invalid_variable", msg)
512
+ raise HTTPError(400, "invalid_request", msg)
513
+ except Exception as e:
514
+ raise HTTPError(500, "internal_error", str(e))
515
+
516
+ view_obj: dict[str, Any] = {
517
+ "offset": offset,
518
+ "limit": limit,
519
+ "returned": page["returned"],
520
+ "filteredN": filtered_n,
521
+ }
522
+ if view_id is not None:
523
+ view_obj["viewId"] = view_id
524
+
525
+ return {
526
+ "dataset": {
527
+ "id": current_id,
528
+ "frame": dataset_state.get("frame"),
529
+ "n": dataset_state.get("n"),
530
+ "k": dataset_state.get("k"),
531
+ },
532
+ "view": view_obj,
533
+ "vars": page["vars"],
534
+ "rows": page["rows"],
535
+ "display": {
536
+ "maxChars": max_chars_req,
537
+ "truncatedCells": page["truncated_cells"],
538
+ "missing": ".",
539
+ },
540
+ }