PyMkDB 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pymkdb/__init__.py +6 -0
  2. pymkdb/cli.py +57 -0
  3. pymkdb-0.1.0.dist-info/METADATA +86 -0
  4. pymkdb-0.1.0.dist-info/RECORD +54 -0
  5. pymkdb-0.1.0.dist-info/WHEEL +5 -0
  6. pymkdb-0.1.0.dist-info/entry_points.txt +2 -0
  7. pymkdb-0.1.0.dist-info/top_level.txt +3 -0
  8. sdk/__init__.py +1 -0
  9. sdk/connection.py +225 -0
  10. sdk/delta.py +19 -0
  11. sdk/http_connection.py +180 -0
  12. sdk/mkdb_client.py +226 -0
  13. sdk/responses.py +154 -0
  14. src/__init__.py +1 -0
  15. src/config/db.py +227 -0
  16. src/config/server.py +52 -0
  17. src/db/__init__.py +207 -0
  18. src/db/cache/__init__.py +1 -0
  19. src/db/cache/ram_cache.py +144 -0
  20. src/db/cache/write_queue.py +156 -0
  21. src/db/maintenance/__init__.py +0 -0
  22. src/db/maintenance/compactor.py +118 -0
  23. src/db/maintenance/task_scheduler.py +73 -0
  24. src/db/objects/store.py +283 -0
  25. src/db/parity/__init__.py +0 -0
  26. src/db/parity/parity_manager.py +196 -0
  27. src/db/query/__init__.py +1 -0
  28. src/db/query/full_text_index.py +168 -0
  29. src/db/query/numeric_index.py +196 -0
  30. src/db/query/query_engine.py +308 -0
  31. src/db/query/tokenizer.py +48 -0
  32. src/db/query_workers/__init__.py +16 -0
  33. src/db/query_workers/dispatcher.py +339 -0
  34. src/db/query_workers/task.py +78 -0
  35. src/db/query_workers/worker.py +292 -0
  36. src/db/requesting/main.py +0 -0
  37. src/db/storage/__init__.py +1 -0
  38. src/db/storage/blob_store.py +47 -0
  39. src/db/storage/index_manager.py +92 -0
  40. src/db/storage/log_manager.py +119 -0
  41. src/db/storage/serializer.py +38 -0
  42. src/filing/__init__.py +31 -0
  43. src/objects/__init__.py +190 -0
  44. src/runtime/__init__.py +15 -0
  45. src/server/__init__.py +0 -0
  46. src/server/coms/actions.py +209 -0
  47. src/server/coms/http.py +46 -0
  48. src/server/coms/http_handlers.py +445 -0
  49. src/server/coms/metrics.py +231 -0
  50. src/server/coms/socket.py +461 -0
  51. src/server/coms/socket_protocol.py +54 -0
  52. src/server/control/api/actions.py +1001 -0
  53. src/server/control/server.py +404 -0
  54. src/server/event_log.py +58 -0
@@ -0,0 +1,461 @@
1
+ """
2
+ SocketServer — persistent TCP server for MkDB data-plane operations.
3
+
4
+ Message framing: see socket_protocol.py (4-byte big-endian length prefix + JSON body)
5
+
6
+ Client → server message format:
7
+ {
8
+ "type": "request",
9
+ "id": "<uuid4 correlation ID>",
10
+ "action": "read" | "write" | "delete" | "query" | "subscribe" | "ping",
11
+ "store": "<store_name>",
12
+ "record_id": "<id>", # for read / write / delete
13
+ "delta": {<flat-path>: <v>}, # for write
14
+ "filter": {<query filter>}, # for query
15
+ }
16
+
17
+ Server → client response:
18
+ {
19
+ "type": "response",
20
+ "id": "<same correlation ID>",
21
+ "status": "ok" | "error",
22
+ "data": <result> | None,
23
+ "error": <message> | None,
24
+ }
25
+
26
+ Subscribe message (client → server):
27
+ {"type": "subscribe", "store": "<name>"}
28
+ → server adds client to subscription set for that store
29
+
30
+ Broadcast event (server → subscribed clients):
31
+ {"type": "update", "store": "<name>", "record_id": "<id>", "op": "write"|"delete"}
32
+ """
33
+
34
+ import logging
35
+ import socket
36
+ import threading
37
+ import time
38
+ import uuid
39
+ from dataclasses import dataclass
40
+ from typing import Optional
41
+
42
+ from src.server.coms.socket_protocol import read_frame, write_frame
43
+ from src.server.coms.actions import execute as _execute
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ # Default write password enforced when no users are configured in the DB.
48
+ _DEFAULT_WRITE_PASSWORD = "mk_db"
49
+
50
+
51
+ @dataclass
52
+ class ClientSession:
53
+ addr: tuple
54
+ conn: socket.socket
55
+ connected_at: float
56
+ last_pong: float
57
+ authenticated: bool = False
58
+ username: str = "" # empty = unauthenticated / open-mode
59
+ can_read: bool = True # True for R and RW sessions
60
+ can_write: bool = False # True for W and RW sessions after auth
61
+
62
+
63
+ class SocketServer:
64
+ def __init__(self, host: str, port: int, database, heartbeat_interval: float = 5.0,
65
+ max_clients: int = 100, recv_timeout: float = 30.0):
66
+ self.host = host
67
+ self.port = port
68
+ self.database = database
69
+ self.heartbeat_interval = heartbeat_interval
70
+ self.max_clients = max_clients
71
+ self.recv_timeout = recv_timeout
72
+
73
+ self._server_sock: Optional[socket.socket] = None
74
+ self._clients: dict = {} # addr_str -> ClientSession
75
+ self._subscriptions: dict = {} # store_name -> set of addr_str keys
76
+ self._running: bool = False
77
+ self._lock: threading.Lock = threading.Lock()
78
+
79
+ # ------------------------------------------------------------------
80
+ # Lifecycle
81
+ # ------------------------------------------------------------------
82
+
83
+ def start(self) -> None:
84
+ self._server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
85
+ self._server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
86
+ self._server_sock.bind((self.host, self.port))
87
+ self._server_sock.listen(self.max_clients)
88
+ self._running = True
89
+ logger.info("SocketServer listening on %s:%d", self.host, self.port)
90
+ print(f"Socket server listening on {self.host}:{self.port}")
91
+
92
+ accept_thread = threading.Thread(
93
+ target=self._accept_loop, daemon=True, name="SocketServer-accept"
94
+ )
95
+ accept_thread.start()
96
+
97
+ def stop(self) -> None:
98
+ self._running = False
99
+ if self._server_sock:
100
+ try:
101
+ self._server_sock.close()
102
+ except Exception:
103
+ pass
104
+ # Close all client connections
105
+ with self._lock:
106
+ for session in list(self._clients.values()):
107
+ try:
108
+ write_frame(session.conn, {"type": "disconnect"})
109
+ session.conn.close()
110
+ except Exception:
111
+ pass
112
+ self._clients.clear()
113
+
114
+ # ------------------------------------------------------------------
115
+ # Accept loop
116
+ # ------------------------------------------------------------------
117
+
118
+ def _accept_loop(self) -> None:
119
+ while self._running:
120
+ try:
121
+ conn, addr = self._server_sock.accept()
122
+ except Exception:
123
+ break
124
+ with self._lock:
125
+ if len(self._clients) >= self.max_clients:
126
+ try:
127
+ write_frame(conn, {"type": "error", "error": "Server full"})
128
+ conn.close()
129
+ except Exception:
130
+ pass
131
+ continue
132
+ conn.settimeout(self.recv_timeout)
133
+ thread = threading.Thread(
134
+ target=self._client_loop, args=(conn, addr),
135
+ daemon=True, name=f"SocketClient-{addr}"
136
+ )
137
+ thread.start()
138
+
139
+ # ------------------------------------------------------------------
140
+ # Client loop
141
+ # ------------------------------------------------------------------
142
+
143
+ # HTTP method prefixes — first 4 bytes of any HTTP/1.x request
144
+ _HTTP_PREFIXES = {b"GET ", b"POST", b"PUT ", b"DELE", b"HEAD", b"PATC", b"OPTI"}
145
+
146
+ def _client_loop(self, conn: socket.socket, addr: tuple) -> None:
147
+ addr_str = f"{addr[0]}:{addr[1]}"
148
+ now = time.time()
149
+ session = ClientSession(addr=addr, conn=conn, connected_at=now, last_pong=now)
150
+
151
+ with self._lock:
152
+ self._clients[addr_str] = session
153
+
154
+ logger.info("SocketServer: client connected %s", addr_str)
155
+
156
+ # ── HTTP misrouting detection ─────────────────────────────────────────
157
+ # Peek at the first 4 bytes without consuming them; if they look like
158
+ # an HTTP method verb the client connected to the wrong port.
159
+ # Use a short timeout: HTTP clients write immediately on connect, while
160
+ # proper MkDB clients wait for the server's permissions frame first.
161
+ try:
162
+ conn.settimeout(0.15)
163
+ first4 = conn.recv(4, socket.MSG_PEEK)
164
+ except (socket.timeout, OSError):
165
+ first4 = b""
166
+ finally:
167
+ conn.settimeout(self.recv_timeout)
168
+
169
+ if first4 and first4 in self._HTTP_PREFIXES:
170
+ db = self.database
171
+ http_port = None
172
+ if db is not None:
173
+ http_port = getattr(
174
+ getattr(db.config, "servers", None),
175
+ "http_server", None,
176
+ )
177
+ http_port = getattr(http_port, "port", None)
178
+
179
+ hint = (
180
+ f"Use the HTTP data-plane port ({http_port})."
181
+ if http_port else "Check your client configuration."
182
+ )
183
+ msg = (
184
+ f"This port speaks the MkDB binary socket protocol, not HTTP. {hint}"
185
+ )
186
+ http_response = (
187
+ "HTTP/1.1 400 Bad Request\r\n"
188
+ "Content-Type: application/json\r\n"
189
+ f"Content-Length: {len(msg) + 12}\r\n"
190
+ "Connection: close\r\n\r\n"
191
+ f'{{"error":"{msg}"}}'
192
+ )
193
+ try:
194
+ conn.sendall(http_response.encode("utf-8"))
195
+ except Exception:
196
+ pass
197
+ self._unregister(addr_str)
198
+ try:
199
+ conn.close()
200
+ except Exception:
201
+ pass
202
+ logger.warning("SocketServer: rejected HTTP client at %s — wrong port", addr_str)
203
+ return
204
+
205
+ # ── Handshake ─────────────────────────────────────────────────────────
206
+ # 1. Server sends PERMISSIONS message describing what needs auth.
207
+ # 2. Client replies with their desired access level: "R" or "RW".
208
+ # 3. If auth is required for that level, server challenges with AUTH.
209
+ # 4. Client sends {type: "auth", username: ..., password: ...}.
210
+ db = self.database
211
+ has_users = db is not None and bool(getattr(db.config, "users", {}))
212
+ protect_reads = (
213
+ db is not None
214
+ and has_users
215
+ and any(getattr(sc, "protect_reads", False) for sc in db.config.stores.values())
216
+ )
217
+
218
+ try:
219
+ write_frame(conn, {
220
+ "type": "permissions",
221
+ "read_protected": has_users and protect_reads,
222
+ # Writes are always protected — real users or the default password
223
+ "write_protected": True,
224
+ })
225
+
226
+ # Wait for the client's access-level declaration: "R", "W", or "RW"
227
+ access_msg = read_frame(conn)
228
+ if not access_msg:
229
+ return
230
+ client_access = str(access_msg.get("access", "R")).upper()
231
+ if client_access not in ("R", "W", "RW"):
232
+ write_frame(conn, {"type": "error", "error": "access must be 'R', 'W', or 'RW'"})
233
+ return
234
+
235
+ # Writes always need auth (real users, or default password if none configured).
236
+ # Reads need auth only when protect_reads is True.
237
+ need_auth = client_access in ("W", "RW") or (protect_reads and client_access == "R")
238
+
239
+ if need_auth:
240
+ write_frame(conn, {"type": "auth_required"})
241
+ auth_msg = read_frame(conn)
242
+ if not auth_msg or auth_msg.get("type") != "auth":
243
+ write_frame(conn, {"type": "error", "error": "Expected auth message"})
244
+ return
245
+ username = str(auth_msg.get("username", ""))
246
+ password = str(auth_msg.get("password", ""))
247
+
248
+ if has_users:
249
+ from src.server.control.server import verify_password
250
+ user = db.config.users.get(username)
251
+ if user is None or not verify_password(password, user.password_hash):
252
+ write_frame(conn, {"type": "error", "error": "Invalid credentials"})
253
+ return
254
+ session.username = username
255
+ else:
256
+ # No users configured — enforce the default write password
257
+ if password != _DEFAULT_WRITE_PASSWORD:
258
+ write_frame(conn, {"type": "error", "error": "Invalid credentials"})
259
+ return
260
+
261
+ session.authenticated = True
262
+ session.can_read = client_access in ("R", "RW")
263
+ session.can_write = client_access in ("W", "RW")
264
+ write_frame(conn, {
265
+ "type": "auth_ok",
266
+ "username": username,
267
+ "can_read": session.can_read,
268
+ "can_write": session.can_write,
269
+ })
270
+ logger.info("SocketServer: %s authenticated as '%s' (access=%s)",
271
+ addr_str, username, client_access)
272
+ else:
273
+ # Read-only, no auth required
274
+ session.authenticated = True
275
+ session.can_read = True
276
+ session.can_write = False
277
+ write_frame(conn, {"type": "ready", "can_read": True, "can_write": False})
278
+
279
+ except (ConnectionError, OSError) as exc:
280
+ logger.warning("SocketServer: handshake error with %s: %s", addr_str, exc)
281
+ self._unregister(addr_str)
282
+ try:
283
+ conn.close()
284
+ except Exception:
285
+ pass
286
+ return
287
+
288
+ # ── Start heartbeat ───────────────────────────────────────────────────
289
+ hb_thread = threading.Thread(
290
+ target=self._heartbeat_loop, args=(conn, addr_str, session),
291
+ daemon=True, name=f"SocketHB-{addr_str}"
292
+ )
293
+ hb_thread.start()
294
+
295
+ try:
296
+ while self._running:
297
+ try:
298
+ msg = read_frame(conn)
299
+ except (ConnectionError, OSError, ValueError) as exc:
300
+ if isinstance(exc, ValueError):
301
+ logger.warning("SocketServer: oversized frame from %s: %s", addr_str, exc)
302
+ try:
303
+ write_frame(conn, {"type": "error", "error": str(exc)})
304
+ except Exception:
305
+ pass
306
+ break
307
+
308
+ if not msg:
309
+ continue
310
+
311
+ msg_type = msg.get("type")
312
+
313
+ # Handle pong
314
+ if msg_type == "pong":
315
+ session.last_pong = time.time()
316
+ continue
317
+
318
+ # Handle subscribe
319
+ if msg_type == "subscribe":
320
+ store_name = msg.get("store", "")
321
+ with self._lock:
322
+ self._subscriptions.setdefault(store_name, set()).add(addr_str)
323
+ try:
324
+ write_frame(conn, {"type": "subscribed", "store": store_name})
325
+ except Exception:
326
+ break
327
+ continue
328
+
329
+ # Handle requests
330
+ if msg_type == "request":
331
+ response = self._handle_request(msg, session)
332
+ try:
333
+ write_frame(conn, response)
334
+ except Exception:
335
+ break
336
+
337
+ finally:
338
+ self._unregister(addr_str)
339
+ try:
340
+ conn.close()
341
+ except Exception:
342
+ pass
343
+ logger.info("SocketServer: client disconnected %s", addr_str)
344
+
345
+ # ------------------------------------------------------------------
346
+ # Heartbeat
347
+ # ------------------------------------------------------------------
348
+
349
+ def _heartbeat_loop(self, conn: socket.socket, addr_str: str, session: ClientSession) -> None:
350
+ while self._running and addr_str in self._clients:
351
+ time.sleep(self.heartbeat_interval)
352
+ if addr_str not in self._clients:
353
+ break
354
+ try:
355
+ write_frame(conn, {"type": "ping"})
356
+ except Exception:
357
+ break
358
+ # Check pong timeout
359
+ if time.time() - session.last_pong > 2 * self.heartbeat_interval:
360
+ logger.warning("SocketServer: client %s timed out (no pong)", addr_str)
361
+ self._unregister(addr_str)
362
+ try:
363
+ conn.close()
364
+ except Exception:
365
+ pass
366
+ break
367
+
368
+ # ------------------------------------------------------------------
369
+ # Broadcast (pub-sub)
370
+ # ------------------------------------------------------------------
371
+
372
+ def broadcast(self, store_name: str, event: dict) -> None:
373
+ """Fan-out an event to all subscribers for a store."""
374
+ with self._lock:
375
+ subscribers = set(self._subscriptions.get(store_name, set()))
376
+ dead = set()
377
+ for addr_str in subscribers:
378
+ session = self._clients.get(addr_str)
379
+ if session is None:
380
+ dead.add(addr_str)
381
+ continue
382
+ try:
383
+ write_frame(session.conn, event)
384
+ except Exception:
385
+ dead.add(addr_str)
386
+ # Clean up dead subscribers
387
+ if dead:
388
+ with self._lock:
389
+ subs = self._subscriptions.get(store_name, set())
390
+ subs -= dead
391
+
392
+ # ------------------------------------------------------------------
393
+ # Request dispatch
394
+ # ------------------------------------------------------------------
395
+
396
+ def _handle_request(self, msg: dict, session: ClientSession) -> dict:
397
+ correlation_id = msg.get("id", str(uuid.uuid4()))
398
+ action = msg.get("action")
399
+ store_name = msg.get("store", "")
400
+
401
+ def ok(data=None):
402
+ return {"type": "response", "id": correlation_id, "status": "ok", "data": data, "error": None}
403
+
404
+ def err(message: str):
405
+ return {"type": "response", "id": correlation_id, "status": "error", "data": None, "error": message}
406
+
407
+ if not action:
408
+ return err("Missing 'action' field")
409
+
410
+ db = self.database
411
+ if db is None:
412
+ return err("Database not available")
413
+
414
+ # ── Permission checks ────────────────────────────────────────────────
415
+ is_write = action in ("write", "delete")
416
+
417
+ # Session-level capability gates (set during handshake)
418
+ if is_write and not session.can_write:
419
+ return err("Write access denied — reconnect with 'W' or 'RW' access")
420
+ if action in ("read", "query") and not session.can_read:
421
+ return err("Read access denied — reconnect with 'R' or 'RW' access")
422
+
423
+ has_users = bool(getattr(db.config, "users", {}))
424
+ if has_users and session.username:
425
+ user = db.config.users.get(session.username)
426
+ if user is None:
427
+ return err("Authenticated user no longer exists")
428
+ if store_name:
429
+ perm = user.stores.get(store_name)
430
+ if perm is None:
431
+ return err(f"No access to store '{store_name}'")
432
+ if is_write and not perm.write:
433
+ return err("Write access denied")
434
+ elif has_users and is_write:
435
+ return err("Authentication required for write operations")
436
+ elif has_users and not is_write and action in ("read", "query"):
437
+ store_cfg = db.config.stores.get(store_name)
438
+ if getattr(store_cfg, "protect_reads", False) and not session.username:
439
+ return err(f"Authentication required to read from store '{store_name}'")
440
+
441
+ # ── Execute ──────────────────────────────────────────────────────────
442
+ client_key = session.username or session.addr[0]
443
+ params = {
444
+ "record_id": msg.get("record_id", ""),
445
+ "delta": msg.get("delta", {}),
446
+ "filter": msg.get("filter", {}),
447
+ "hydrate": bool(msg.get("hydrate", False)),
448
+ }
449
+ r = _execute(db, action, store_name, params, client_key, "socket",
450
+ on_broadcast=self.broadcast)
451
+ return ok(r.data) if r.ok else err(r.error)
452
+
453
+ # ------------------------------------------------------------------
454
+ # Internal helpers
455
+ # ------------------------------------------------------------------
456
+
457
+ def _unregister(self, addr_str: str) -> None:
458
+ with self._lock:
459
+ self._clients.pop(addr_str, None)
460
+ for subs in self._subscriptions.values():
461
+ subs.discard(addr_str)
@@ -0,0 +1,54 @@
1
+ """
2
+ Socket message framing helpers.
3
+
4
+ Wire format: [4-byte big-endian uint32 length][UTF-8 JSON payload bytes]
5
+ """
6
+
7
+ import json
8
+ import socket
9
+ import struct
10
+
11
+ _MAX_FRAME_SIZE = 10 * 1024 * 1024 # 10 MB hard limit
12
+
13
+
14
+ def encode_message(payload: dict) -> bytes:
15
+ """Serialize dict to JSON, prefix with 4-byte big-endian length."""
16
+ body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
17
+ return struct.pack(">I", len(body)) + body
18
+
19
+
20
+ def decode_message(data: bytes) -> dict:
21
+ """Strip 4-byte length prefix, parse JSON payload."""
22
+ return json.loads(data[4:].decode("utf-8"))
23
+
24
+
25
+ def read_frame(sock: socket.socket) -> dict:
26
+ """
27
+ Read exactly 4 bytes for the length, then exactly N bytes for the payload.
28
+ Raises ConnectionError on premature close.
29
+ Raises ValueError if the frame exceeds 10 MB.
30
+ """
31
+ length_bytes = _recv_exact(sock, 4)
32
+ length = struct.unpack(">I", length_bytes)[0]
33
+ if length == 0:
34
+ return {}
35
+ if length > _MAX_FRAME_SIZE:
36
+ raise ValueError(f"Frame too large: {length} bytes (max {_MAX_FRAME_SIZE})")
37
+ payload_bytes = _recv_exact(sock, length)
38
+ return json.loads(payload_bytes.decode("utf-8"))
39
+
40
+
41
+ def write_frame(sock: socket.socket, payload: dict) -> None:
42
+ """Encode and send one frame."""
43
+ sock.sendall(encode_message(payload))
44
+
45
+
46
+ def _recv_exact(sock: socket.socket, n: int) -> bytes:
47
+ """Receive exactly n bytes from sock; raises ConnectionError if connection closes early."""
48
+ buf = b""
49
+ while len(buf) < n:
50
+ chunk = sock.recv(n - len(buf))
51
+ if not chunk:
52
+ raise ConnectionError("Socket closed before all bytes received")
53
+ buf += chunk
54
+ return buf