mcp-stata 1.2.2__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-stata might be problematic. Click here for more details.
- mcp_stata/discovery.py +96 -25
- mcp_stata/graph_detector.py +385 -0
- mcp_stata/models.py +4 -1
- mcp_stata/server.py +258 -44
- mcp_stata/stata_client.py +1990 -265
- mcp_stata/streaming_io.py +261 -0
- mcp_stata/ui_http.py +540 -0
- mcp_stata-1.6.2.dist-info/METADATA +380 -0
- mcp_stata-1.6.2.dist-info/RECORD +14 -0
- mcp_stata-1.2.2.dist-info/METADATA +0 -240
- mcp_stata-1.2.2.dist-info/RECORD +0 -11
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/WHEEL +0 -0
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/entry_points.txt +0 -0
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/licenses/LICENSE +0 -0
mcp_stata/ui_http.py
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import secrets
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
9
|
+
from typing import Any, Callable, Optional
|
|
10
|
+
|
|
11
|
+
from .stata_client import StataClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _stable_hash(payload: dict[str, Any]) -> str:
|
|
15
|
+
return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class UIChannelInfo:
|
|
20
|
+
base_url: str
|
|
21
|
+
token: str
|
|
22
|
+
expires_at: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ViewHandle:
|
|
27
|
+
view_id: str
|
|
28
|
+
dataset_id: str
|
|
29
|
+
frame: str
|
|
30
|
+
obs_indices: list[int]
|
|
31
|
+
filtered_n: int
|
|
32
|
+
created_at: float
|
|
33
|
+
last_access: float
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class UIChannelManager:
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
client: StataClient,
|
|
40
|
+
*,
|
|
41
|
+
host: str = "127.0.0.1",
|
|
42
|
+
port: int = 0,
|
|
43
|
+
token_ttl_s: int = 20 * 60,
|
|
44
|
+
view_ttl_s: int = 30 * 60,
|
|
45
|
+
max_limit: int = 500,
|
|
46
|
+
max_vars: int = 200,
|
|
47
|
+
max_chars: int = 500,
|
|
48
|
+
max_request_bytes: int = 1_000_000,
|
|
49
|
+
):
|
|
50
|
+
self._client = client
|
|
51
|
+
self._host = host
|
|
52
|
+
self._port = port
|
|
53
|
+
self._token_ttl_s = token_ttl_s
|
|
54
|
+
self._view_ttl_s = view_ttl_s
|
|
55
|
+
self._max_limit = max_limit
|
|
56
|
+
self._max_vars = max_vars
|
|
57
|
+
self._max_chars = max_chars
|
|
58
|
+
self._max_request_bytes = max_request_bytes
|
|
59
|
+
|
|
60
|
+
self._lock = threading.Lock()
|
|
61
|
+
self._httpd: ThreadingHTTPServer | None = None
|
|
62
|
+
self._thread: threading.Thread | None = None
|
|
63
|
+
|
|
64
|
+
self._token: str | None = None
|
|
65
|
+
self._expires_at: int = 0
|
|
66
|
+
|
|
67
|
+
self._dataset_version: int = 0
|
|
68
|
+
self._dataset_id_cache: str | None = None
|
|
69
|
+
self._dataset_id_cache_at_version: int = -1
|
|
70
|
+
|
|
71
|
+
self._views: dict[str, ViewHandle] = {}
|
|
72
|
+
|
|
73
|
+
def notify_potential_dataset_change(self) -> None:
|
|
74
|
+
with self._lock:
|
|
75
|
+
self._dataset_version += 1
|
|
76
|
+
self._dataset_id_cache = None
|
|
77
|
+
self._views.clear()
|
|
78
|
+
|
|
79
|
+
def get_channel(self) -> UIChannelInfo:
|
|
80
|
+
self._ensure_http_server()
|
|
81
|
+
with self._lock:
|
|
82
|
+
self._ensure_token()
|
|
83
|
+
assert self._httpd is not None
|
|
84
|
+
port = self._httpd.server_address[1]
|
|
85
|
+
base_url = f"http://{self._host}:{port}"
|
|
86
|
+
return UIChannelInfo(base_url=base_url, token=self._token or "", expires_at=self._expires_at)
|
|
87
|
+
|
|
88
|
+
def capabilities(self) -> dict[str, bool]:
|
|
89
|
+
return {"dataBrowser": True, "filtering": True}
|
|
90
|
+
|
|
91
|
+
def current_dataset_id(self) -> str:
|
|
92
|
+
with self._lock:
|
|
93
|
+
if self._dataset_id_cache is not None and self._dataset_id_cache_at_version == self._dataset_version:
|
|
94
|
+
return self._dataset_id_cache
|
|
95
|
+
|
|
96
|
+
state = self._client.get_dataset_state()
|
|
97
|
+
payload = {
|
|
98
|
+
"version": self._dataset_version,
|
|
99
|
+
"frame": state.get("frame"),
|
|
100
|
+
"n": state.get("n"),
|
|
101
|
+
"k": state.get("k"),
|
|
102
|
+
"sortlist": state.get("sortlist"),
|
|
103
|
+
}
|
|
104
|
+
digest = _stable_hash(payload)
|
|
105
|
+
|
|
106
|
+
with self._lock:
|
|
107
|
+
self._dataset_id_cache = digest
|
|
108
|
+
self._dataset_id_cache_at_version = self._dataset_version
|
|
109
|
+
return digest
|
|
110
|
+
|
|
111
|
+
def get_view(self, view_id: str) -> Optional[ViewHandle]:
|
|
112
|
+
now = time.time()
|
|
113
|
+
with self._lock:
|
|
114
|
+
self._evict_expired_locked(now)
|
|
115
|
+
view = self._views.get(view_id)
|
|
116
|
+
if view is None:
|
|
117
|
+
return None
|
|
118
|
+
view.last_access = now
|
|
119
|
+
return view
|
|
120
|
+
|
|
121
|
+
def create_view(self, *, dataset_id: str, frame: str, filter_expr: str) -> ViewHandle:
|
|
122
|
+
current_id = self.current_dataset_id()
|
|
123
|
+
if dataset_id != current_id:
|
|
124
|
+
raise DatasetChangedError(current_id)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
obs_indices = self._client.compute_view_indices(filter_expr)
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
raise InvalidFilterError(str(e))
|
|
130
|
+
except RuntimeError as e:
|
|
131
|
+
msg = str(e) or "No data in memory"
|
|
132
|
+
if "no data" in msg.lower():
|
|
133
|
+
raise NoDataInMemoryError(msg)
|
|
134
|
+
raise
|
|
135
|
+
now = time.time()
|
|
136
|
+
view_id = f"view_{uuid.uuid4().hex}"
|
|
137
|
+
view = ViewHandle(
|
|
138
|
+
view_id=view_id,
|
|
139
|
+
dataset_id=current_id,
|
|
140
|
+
frame=frame,
|
|
141
|
+
obs_indices=obs_indices,
|
|
142
|
+
filtered_n=len(obs_indices),
|
|
143
|
+
created_at=now,
|
|
144
|
+
last_access=now,
|
|
145
|
+
)
|
|
146
|
+
with self._lock:
|
|
147
|
+
self._evict_expired_locked(now)
|
|
148
|
+
self._views[view_id] = view
|
|
149
|
+
return view
|
|
150
|
+
|
|
151
|
+
def delete_view(self, view_id: str) -> bool:
|
|
152
|
+
with self._lock:
|
|
153
|
+
return self._views.pop(view_id, None) is not None
|
|
154
|
+
|
|
155
|
+
def validate_token(self, header_value: str | None) -> bool:
|
|
156
|
+
if not header_value:
|
|
157
|
+
return False
|
|
158
|
+
if not header_value.startswith("Bearer "):
|
|
159
|
+
return False
|
|
160
|
+
token = header_value[len("Bearer ") :].strip()
|
|
161
|
+
with self._lock:
|
|
162
|
+
self._ensure_token()
|
|
163
|
+
if self._token is None:
|
|
164
|
+
return False
|
|
165
|
+
if time.time() * 1000 >= self._expires_at:
|
|
166
|
+
return False
|
|
167
|
+
return secrets.compare_digest(token, self._token)
|
|
168
|
+
|
|
169
|
+
def limits(self) -> tuple[int, int, int, int]:
|
|
170
|
+
return self._max_limit, self._max_vars, self._max_chars, self._max_request_bytes
|
|
171
|
+
|
|
172
|
+
def _ensure_token(self) -> None:
|
|
173
|
+
now_ms = int(time.time() * 1000)
|
|
174
|
+
if self._token is None or now_ms >= self._expires_at:
|
|
175
|
+
self._token = secrets.token_urlsafe(32)
|
|
176
|
+
self._expires_at = int((time.time() + self._token_ttl_s) * 1000)
|
|
177
|
+
|
|
178
|
+
def _evict_expired_locked(self, now: float) -> None:
|
|
179
|
+
expired: list[str] = []
|
|
180
|
+
for key, view in self._views.items():
|
|
181
|
+
if now - view.last_access >= self._view_ttl_s:
|
|
182
|
+
expired.append(key)
|
|
183
|
+
for key in expired:
|
|
184
|
+
self._views.pop(key, None)
|
|
185
|
+
|
|
186
|
+
def _ensure_http_server(self) -> None:
|
|
187
|
+
with self._lock:
|
|
188
|
+
if self._httpd is not None:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
manager = self
|
|
192
|
+
|
|
193
|
+
class Handler(BaseHTTPRequestHandler):
|
|
194
|
+
def _send_json(self, status: int, payload: dict[str, Any]) -> None:
|
|
195
|
+
data = json.dumps(payload).encode("utf-8")
|
|
196
|
+
self.send_response(status)
|
|
197
|
+
self.send_header("Content-Type", "application/json")
|
|
198
|
+
self.send_header("Content-Length", str(len(data)))
|
|
199
|
+
self.end_headers()
|
|
200
|
+
self.wfile.write(data)
|
|
201
|
+
|
|
202
|
+
def _error(self, status: int, code: str, message: str, *, stata_rc: int | None = None) -> None:
|
|
203
|
+
body: dict[str, Any] = {"error": {"code": code, "message": message}}
|
|
204
|
+
if stata_rc is not None:
|
|
205
|
+
body["error"]["stataRc"] = stata_rc
|
|
206
|
+
self._send_json(status, body)
|
|
207
|
+
|
|
208
|
+
def _require_auth(self) -> bool:
|
|
209
|
+
if manager.validate_token(self.headers.get("Authorization")):
|
|
210
|
+
return True
|
|
211
|
+
self._error(401, "auth_failed", "Unauthorized")
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
def _read_json(self) -> dict[str, Any] | None:
|
|
215
|
+
max_limit, max_vars, max_chars, max_bytes = manager.limits()
|
|
216
|
+
_ = (max_limit, max_vars, max_chars)
|
|
217
|
+
|
|
218
|
+
length = int(self.headers.get("Content-Length", "0") or "0")
|
|
219
|
+
if length <= 0:
|
|
220
|
+
return {}
|
|
221
|
+
if length > max_bytes:
|
|
222
|
+
self._error(400, "request_too_large", "Request too large")
|
|
223
|
+
return None
|
|
224
|
+
raw = self.rfile.read(length)
|
|
225
|
+
try:
|
|
226
|
+
parsed = json.loads(raw.decode("utf-8"))
|
|
227
|
+
except Exception:
|
|
228
|
+
self._error(400, "invalid_request", "Invalid JSON")
|
|
229
|
+
return None
|
|
230
|
+
if not isinstance(parsed, dict):
|
|
231
|
+
self._error(400, "invalid_request", "Expected JSON object")
|
|
232
|
+
return None
|
|
233
|
+
return parsed
|
|
234
|
+
|
|
235
|
+
def do_GET(self) -> None:
|
|
236
|
+
if not self._require_auth():
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
if self.path == "/v1/dataset":
|
|
240
|
+
try:
|
|
241
|
+
state = manager._client.get_dataset_state()
|
|
242
|
+
dataset_id = manager.current_dataset_id()
|
|
243
|
+
self._send_json(
|
|
244
|
+
200,
|
|
245
|
+
{
|
|
246
|
+
"dataset": {
|
|
247
|
+
"id": dataset_id,
|
|
248
|
+
"frame": state.get("frame"),
|
|
249
|
+
"n": state.get("n"),
|
|
250
|
+
"k": state.get("k"),
|
|
251
|
+
"changed": state.get("changed"),
|
|
252
|
+
}
|
|
253
|
+
},
|
|
254
|
+
)
|
|
255
|
+
return
|
|
256
|
+
except NoDataInMemoryError as e:
|
|
257
|
+
self._error(400, "no_data_in_memory", str(e), stata_rc=e.stata_rc)
|
|
258
|
+
return
|
|
259
|
+
except Exception as e:
|
|
260
|
+
self._error(500, "internal_error", str(e))
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
if self.path == "/v1/vars":
|
|
264
|
+
try:
|
|
265
|
+
state = manager._client.get_dataset_state()
|
|
266
|
+
dataset_id = manager.current_dataset_id()
|
|
267
|
+
variables = manager._client.list_variables_rich()
|
|
268
|
+
self._send_json(
|
|
269
|
+
200,
|
|
270
|
+
{
|
|
271
|
+
"dataset": {"id": dataset_id, "frame": state.get("frame")},
|
|
272
|
+
"variables": variables,
|
|
273
|
+
},
|
|
274
|
+
)
|
|
275
|
+
return
|
|
276
|
+
except NoDataInMemoryError as e:
|
|
277
|
+
self._error(400, "no_data_in_memory", str(e), stata_rc=e.stata_rc)
|
|
278
|
+
return
|
|
279
|
+
except Exception as e:
|
|
280
|
+
self._error(500, "internal_error", str(e))
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
self._error(404, "not_found", "Not found")
|
|
284
|
+
|
|
285
|
+
def do_POST(self) -> None:
|
|
286
|
+
if not self._require_auth():
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
if self.path == "/v1/page":
|
|
290
|
+
body = self._read_json()
|
|
291
|
+
if body is None:
|
|
292
|
+
return
|
|
293
|
+
try:
|
|
294
|
+
resp = handle_page_request(manager, body, view_id=None)
|
|
295
|
+
self._send_json(200, resp)
|
|
296
|
+
return
|
|
297
|
+
except HTTPError as e:
|
|
298
|
+
self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
|
|
299
|
+
return
|
|
300
|
+
except Exception as e:
|
|
301
|
+
self._error(500, "internal_error", str(e))
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
if self.path == "/v1/views":
|
|
305
|
+
body = self._read_json()
|
|
306
|
+
if body is None:
|
|
307
|
+
return
|
|
308
|
+
dataset_id = str(body.get("datasetId", ""))
|
|
309
|
+
frame = str(body.get("frame", "default"))
|
|
310
|
+
filter_expr = str(body.get("filterExpr", ""))
|
|
311
|
+
if not dataset_id or not filter_expr:
|
|
312
|
+
self._error(400, "invalid_request", "datasetId and filterExpr are required")
|
|
313
|
+
return
|
|
314
|
+
try:
|
|
315
|
+
view = manager.create_view(dataset_id=dataset_id, frame=frame, filter_expr=filter_expr)
|
|
316
|
+
self._send_json(
|
|
317
|
+
200,
|
|
318
|
+
{
|
|
319
|
+
"dataset": {"id": view.dataset_id, "frame": view.frame},
|
|
320
|
+
"view": {"id": view.view_id, "filteredN": view.filtered_n},
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
return
|
|
324
|
+
except DatasetChangedError as e:
|
|
325
|
+
self._error(409, "dataset_changed", "Dataset changed")
|
|
326
|
+
return
|
|
327
|
+
except ValueError as e:
|
|
328
|
+
self._error(400, "invalid_filter", str(e))
|
|
329
|
+
return
|
|
330
|
+
except RuntimeError as e:
|
|
331
|
+
msg = str(e) or "No data in memory"
|
|
332
|
+
if "no data" in msg.lower():
|
|
333
|
+
self._error(400, "no_data_in_memory", msg)
|
|
334
|
+
return
|
|
335
|
+
self._error(500, "internal_error", msg)
|
|
336
|
+
return
|
|
337
|
+
except Exception as e:
|
|
338
|
+
self._error(500, "internal_error", str(e))
|
|
339
|
+
return
|
|
340
|
+
|
|
341
|
+
if self.path.startswith("/v1/views/") and self.path.endswith("/page"):
|
|
342
|
+
parts = self.path.split("/")
|
|
343
|
+
if len(parts) != 5:
|
|
344
|
+
self._error(404, "not_found", "Not found")
|
|
345
|
+
return
|
|
346
|
+
view_id = parts[3]
|
|
347
|
+
body = self._read_json()
|
|
348
|
+
if body is None:
|
|
349
|
+
return
|
|
350
|
+
try:
|
|
351
|
+
resp = handle_page_request(manager, body, view_id=view_id)
|
|
352
|
+
self._send_json(200, resp)
|
|
353
|
+
return
|
|
354
|
+
except HTTPError as e:
|
|
355
|
+
self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
|
|
356
|
+
return
|
|
357
|
+
except Exception as e:
|
|
358
|
+
self._error(500, "internal_error", str(e))
|
|
359
|
+
return
|
|
360
|
+
|
|
361
|
+
if self.path == "/v1/filters/validate":
|
|
362
|
+
body = self._read_json()
|
|
363
|
+
if body is None:
|
|
364
|
+
return
|
|
365
|
+
filter_expr = str(body.get("filterExpr", ""))
|
|
366
|
+
if not filter_expr:
|
|
367
|
+
self._error(400, "invalid_request", "filterExpr is required")
|
|
368
|
+
return
|
|
369
|
+
try:
|
|
370
|
+
manager._client.validate_filter_expr(filter_expr)
|
|
371
|
+
self._send_json(200, {"ok": True})
|
|
372
|
+
return
|
|
373
|
+
except ValueError as e:
|
|
374
|
+
self._error(400, "invalid_filter", str(e))
|
|
375
|
+
return
|
|
376
|
+
except RuntimeError as e:
|
|
377
|
+
msg = str(e) or "No data in memory"
|
|
378
|
+
if "no data" in msg.lower():
|
|
379
|
+
self._error(400, "no_data_in_memory", msg)
|
|
380
|
+
return
|
|
381
|
+
self._error(500, "internal_error", msg)
|
|
382
|
+
return
|
|
383
|
+
except Exception as e:
|
|
384
|
+
self._error(500, "internal_error", str(e))
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
self._error(404, "not_found", "Not found")
|
|
388
|
+
|
|
389
|
+
def do_DELETE(self) -> None:
|
|
390
|
+
if not self._require_auth():
|
|
391
|
+
return
|
|
392
|
+
|
|
393
|
+
if self.path.startswith("/v1/views/"):
|
|
394
|
+
parts = self.path.split("/")
|
|
395
|
+
if len(parts) != 4:
|
|
396
|
+
self._error(404, "not_found", "Not found")
|
|
397
|
+
return
|
|
398
|
+
view_id = parts[3]
|
|
399
|
+
if manager.delete_view(view_id):
|
|
400
|
+
self._send_json(200, {"ok": True})
|
|
401
|
+
else:
|
|
402
|
+
self._error(404, "not_found", "Not found")
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
self._error(404, "not_found", "Not found")
|
|
406
|
+
|
|
407
|
+
def log_message(self, format: str, *args: Any) -> None:
|
|
408
|
+
return
|
|
409
|
+
|
|
410
|
+
httpd = ThreadingHTTPServer((self._host, self._port), Handler)
|
|
411
|
+
t = threading.Thread(target=httpd.serve_forever, daemon=True)
|
|
412
|
+
t.start()
|
|
413
|
+
self._httpd = httpd
|
|
414
|
+
self._thread = t
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class HTTPError(Exception):
|
|
418
|
+
def __init__(self, status: int, code: str, message: str, *, stata_rc: int | None = None):
|
|
419
|
+
super().__init__(message)
|
|
420
|
+
self.status = status
|
|
421
|
+
self.code = code
|
|
422
|
+
self.message = message
|
|
423
|
+
self.stata_rc = stata_rc
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class DatasetChangedError(Exception):
|
|
427
|
+
def __init__(self, current_dataset_id: str):
|
|
428
|
+
super().__init__("dataset_changed")
|
|
429
|
+
self.current_dataset_id = current_dataset_id
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class NoDataInMemoryError(Exception):
|
|
433
|
+
def __init__(self, message: str = "No data in memory", *, stata_rc: int | None = None):
|
|
434
|
+
super().__init__(message)
|
|
435
|
+
self.stata_rc = stata_rc
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
class InvalidFilterError(Exception):
|
|
439
|
+
def __init__(self, message: str, *, stata_rc: int | None = None):
|
|
440
|
+
super().__init__(message)
|
|
441
|
+
self.message = message
|
|
442
|
+
self.stata_rc = stata_rc
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view_id: str | None) -> dict[str, Any]:
|
|
446
|
+
max_limit, max_vars, max_chars, _ = manager.limits()
|
|
447
|
+
|
|
448
|
+
if view_id is None:
|
|
449
|
+
dataset_id = str(body.get("datasetId", ""))
|
|
450
|
+
frame = str(body.get("frame", "default"))
|
|
451
|
+
else:
|
|
452
|
+
view = manager.get_view(view_id)
|
|
453
|
+
if view is None:
|
|
454
|
+
raise HTTPError(404, "not_found", "View not found")
|
|
455
|
+
dataset_id = view.dataset_id
|
|
456
|
+
frame = view.frame
|
|
457
|
+
|
|
458
|
+
offset = int(body.get("offset", 0) or 0)
|
|
459
|
+
limit = int(body.get("limit", 0) or 0)
|
|
460
|
+
vars_req = body.get("vars", [])
|
|
461
|
+
include_obs_no = bool(body.get("includeObsNo", False))
|
|
462
|
+
max_chars_req = int(body.get("maxChars", max_chars) or max_chars)
|
|
463
|
+
|
|
464
|
+
if offset < 0:
|
|
465
|
+
raise HTTPError(400, "invalid_request", "offset must be >= 0")
|
|
466
|
+
if limit <= 0:
|
|
467
|
+
raise HTTPError(400, "invalid_request", "limit must be > 0")
|
|
468
|
+
if limit > max_limit:
|
|
469
|
+
raise HTTPError(400, "request_too_large", f"limit must be <= {max_limit}")
|
|
470
|
+
if max_chars_req <= 0:
|
|
471
|
+
raise HTTPError(400, "invalid_request", "maxChars must be > 0")
|
|
472
|
+
if max_chars_req > max_chars:
|
|
473
|
+
raise HTTPError(400, "request_too_large", f"maxChars must be <= {max_chars}")
|
|
474
|
+
|
|
475
|
+
if not isinstance(vars_req, list) or not all(isinstance(v, str) for v in vars_req):
|
|
476
|
+
raise HTTPError(400, "invalid_request", "vars must be a list of strings")
|
|
477
|
+
if len(vars_req) > max_vars:
|
|
478
|
+
raise HTTPError(400, "request_too_large", f"vars length must be <= {max_vars}")
|
|
479
|
+
|
|
480
|
+
current_id = manager.current_dataset_id()
|
|
481
|
+
if dataset_id != current_id:
|
|
482
|
+
raise HTTPError(409, "dataset_changed", "Dataset changed")
|
|
483
|
+
|
|
484
|
+
if view_id is None:
|
|
485
|
+
obs_indices = None
|
|
486
|
+
filtered_n: int | None = None
|
|
487
|
+
else:
|
|
488
|
+
assert view is not None
|
|
489
|
+
obs_indices = view.obs_indices
|
|
490
|
+
filtered_n = view.filtered_n
|
|
491
|
+
|
|
492
|
+
try:
|
|
493
|
+
dataset_state = manager._client.get_dataset_state()
|
|
494
|
+
page = manager._client.get_page(
|
|
495
|
+
offset=offset,
|
|
496
|
+
limit=limit,
|
|
497
|
+
vars=vars_req,
|
|
498
|
+
include_obs_no=include_obs_no,
|
|
499
|
+
max_chars=max_chars_req,
|
|
500
|
+
obs_indices=obs_indices,
|
|
501
|
+
)
|
|
502
|
+
except RuntimeError as e:
|
|
503
|
+
# StataClient uses RuntimeError("No data in memory") for empty dataset.
|
|
504
|
+
msg = str(e) or "No data in memory"
|
|
505
|
+
if "no data" in msg.lower():
|
|
506
|
+
raise HTTPError(400, "no_data_in_memory", msg)
|
|
507
|
+
raise HTTPError(500, "internal_error", msg)
|
|
508
|
+
except ValueError as e:
|
|
509
|
+
msg = str(e)
|
|
510
|
+
if msg.lower().startswith("invalid variable"):
|
|
511
|
+
raise HTTPError(400, "invalid_variable", msg)
|
|
512
|
+
raise HTTPError(400, "invalid_request", msg)
|
|
513
|
+
except Exception as e:
|
|
514
|
+
raise HTTPError(500, "internal_error", str(e))
|
|
515
|
+
|
|
516
|
+
view_obj: dict[str, Any] = {
|
|
517
|
+
"offset": offset,
|
|
518
|
+
"limit": limit,
|
|
519
|
+
"returned": page["returned"],
|
|
520
|
+
"filteredN": filtered_n,
|
|
521
|
+
}
|
|
522
|
+
if view_id is not None:
|
|
523
|
+
view_obj["viewId"] = view_id
|
|
524
|
+
|
|
525
|
+
return {
|
|
526
|
+
"dataset": {
|
|
527
|
+
"id": current_id,
|
|
528
|
+
"frame": dataset_state.get("frame"),
|
|
529
|
+
"n": dataset_state.get("n"),
|
|
530
|
+
"k": dataset_state.get("k"),
|
|
531
|
+
},
|
|
532
|
+
"view": view_obj,
|
|
533
|
+
"vars": page["vars"],
|
|
534
|
+
"rows": page["rows"],
|
|
535
|
+
"display": {
|
|
536
|
+
"maxChars": max_chars_req,
|
|
537
|
+
"truncatedCells": page["truncated_cells"],
|
|
538
|
+
"missing": ".",
|
|
539
|
+
},
|
|
540
|
+
}
|