PyMkDB 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pymkdb/__init__.py +6 -0
  2. pymkdb/cli.py +57 -0
  3. pymkdb-0.1.0.dist-info/METADATA +86 -0
  4. pymkdb-0.1.0.dist-info/RECORD +54 -0
  5. pymkdb-0.1.0.dist-info/WHEEL +5 -0
  6. pymkdb-0.1.0.dist-info/entry_points.txt +2 -0
  7. pymkdb-0.1.0.dist-info/top_level.txt +3 -0
  8. sdk/__init__.py +1 -0
  9. sdk/connection.py +225 -0
  10. sdk/delta.py +19 -0
  11. sdk/http_connection.py +180 -0
  12. sdk/mkdb_client.py +226 -0
  13. sdk/responses.py +154 -0
  14. src/__init__.py +1 -0
  15. src/config/db.py +227 -0
  16. src/config/server.py +52 -0
  17. src/db/__init__.py +207 -0
  18. src/db/cache/__init__.py +1 -0
  19. src/db/cache/ram_cache.py +144 -0
  20. src/db/cache/write_queue.py +156 -0
  21. src/db/maintenance/__init__.py +0 -0
  22. src/db/maintenance/compactor.py +118 -0
  23. src/db/maintenance/task_scheduler.py +73 -0
  24. src/db/objects/store.py +283 -0
  25. src/db/parity/__init__.py +0 -0
  26. src/db/parity/parity_manager.py +196 -0
  27. src/db/query/__init__.py +1 -0
  28. src/db/query/full_text_index.py +168 -0
  29. src/db/query/numeric_index.py +196 -0
  30. src/db/query/query_engine.py +308 -0
  31. src/db/query/tokenizer.py +48 -0
  32. src/db/query_workers/__init__.py +16 -0
  33. src/db/query_workers/dispatcher.py +339 -0
  34. src/db/query_workers/task.py +78 -0
  35. src/db/query_workers/worker.py +292 -0
  36. src/db/requesting/main.py +0 -0
  37. src/db/storage/__init__.py +1 -0
  38. src/db/storage/blob_store.py +47 -0
  39. src/db/storage/index_manager.py +92 -0
  40. src/db/storage/log_manager.py +119 -0
  41. src/db/storage/serializer.py +38 -0
  42. src/filing/__init__.py +31 -0
  43. src/objects/__init__.py +190 -0
  44. src/runtime/__init__.py +15 -0
  45. src/server/__init__.py +0 -0
  46. src/server/coms/actions.py +209 -0
  47. src/server/coms/http.py +46 -0
  48. src/server/coms/http_handlers.py +445 -0
  49. src/server/coms/metrics.py +231 -0
  50. src/server/coms/socket.py +461 -0
  51. src/server/coms/socket_protocol.py +54 -0
  52. src/server/control/api/actions.py +1001 -0
  53. src/server/control/server.py +404 -0
  54. src/server/event_log.py +58 -0
@@ -0,0 +1,292 @@
1
+ """
2
+ QueryWorker — runs inside a worker process (or thread) to resolve QueryTasks.
3
+
4
+ Each worker owns:
5
+ - A lightweight LRU + TTL RAM cache keyed by record_id.
6
+ - Read-only access to the store's disk files via the storage layer.
7
+ - A private invalidation queue so the dispatcher can evict stale entries.
8
+
9
+ Entry points
10
+ ------------
11
+ worker_process_main — for multiprocessing.Process targets
12
+ worker_thread_main — for threading.Thread targets (single-worker / no-GIL mode)
13
+
14
+ Both share the same _worker_loop implementation; the only difference is how
15
+ sys.path is initialised (process needs it, thread already has it).
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import time
21
+ import queue
22
+ import logging
23
+ from collections import OrderedDict
24
+ from typing import Any, Optional
25
+
26
+ # The sentinel object placed in the work queue to signal graceful shutdown.
27
+ WORKER_SENTINEL = None
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Minimal worker-local LRU + TTL cache
34
+ # (A full RamCache from WS-2 will replace this once implemented.)
35
+ # ---------------------------------------------------------------------------
36
+
37
+ class _WorkerCache:
38
+ """
39
+ Lightweight LRU cache with per-entry TTL.
40
+
41
+ Operations are O(1) (dict + OrderedDict).
42
+ """
43
+
44
+ def __init__(self, max_size: int, ttl: float):
45
+ self._max = max(1, max_size)
46
+ self._ttl = float(ttl)
47
+ self._data: dict[str, Any] = {}
48
+ self._order: OrderedDict[str, None] = OrderedDict()
49
+ self._ts: dict[str, float] = {}
50
+
51
+ # -- public API ----------------------------------------------------------
52
+
53
+ def get(self, record_id: str) -> Optional[Any]:
54
+ if record_id not in self._data:
55
+ return None
56
+ if time.monotonic() - self._ts[record_id] > self._ttl:
57
+ self._evict_one(record_id)
58
+ return None
59
+ self._order.move_to_end(record_id)
60
+ return self._data[record_id]
61
+
62
+ def set(self, record_id: str, value: Any) -> None:
63
+ if record_id in self._data:
64
+ self._order.move_to_end(record_id)
65
+ else:
66
+ if len(self._data) >= self._max:
67
+ oldest, _ = self._order.popitem(last=False)
68
+ del self._data[oldest]
69
+ del self._ts[oldest]
70
+ self._order[record_id] = None
71
+ self._data[record_id] = value
72
+ self._ts[record_id] = time.monotonic()
73
+
74
+ def delete(self, record_id: str) -> None:
75
+ self._evict_one(record_id)
76
+
77
+ def clear(self) -> None:
78
+ self._data.clear()
79
+ self._order.clear()
80
+ self._ts.clear()
81
+
82
+ def __len__(self) -> int:
83
+ return len(self._data)
84
+
85
+ # -- internal ------------------------------------------------------------
86
+
87
+ def _evict_one(self, record_id: str) -> None:
88
+ self._data.pop(record_id, None)
89
+ self._order.pop(record_id, None)
90
+ self._ts.pop(record_id, None)
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Task resolution — integration point for WS-1 and WS-3
95
+ # ---------------------------------------------------------------------------
96
+
97
+ def _resolve(task_dict: dict, cache: _WorkerCache, base_path: str) -> Any:
98
+ """
99
+ Dispatch a task dict to the correct resolver.
100
+
101
+ Integration notes
102
+ -----------------
103
+ WS-1 (storage layer):
104
+ Replace the NotImplementedError blocks in _read and _exists with:
105
+ index_mgr = IndexManager(base_path, store_name)
106
+ log_mgr = LogManager(base_path, store_name)
107
+ raw_line = log_mgr.read(*index_mgr.get(record_id))
108
+ # parse flat line -> dict
109
+
110
+ WS-3 (query engine):
111
+ Replace the NotImplementedError blocks in _query and _count with:
112
+ engine = QueryEngine(store)
113
+ return engine.query(filter_dict)
114
+ """
115
+ op = task_dict["operation"]
116
+ store_name = task_dict["store_name"]
117
+ params = task_dict["params"]
118
+
119
+ if op == "read":
120
+ return _read(store_name, params, cache, base_path)
121
+ if op == "multi_read":
122
+ return _multi_read(store_name, params, cache, base_path)
123
+ if op == "exists":
124
+ return _exists(store_name, params, cache, base_path)
125
+ if op == "query":
126
+ return _query(store_name, params, cache, base_path)
127
+ if op == "count":
128
+ return _count(store_name, params, cache, base_path)
129
+ raise ValueError(f"Unknown operation: {op!r}")
130
+
131
+
132
+ def _read(store_name: str, params: dict, cache: _WorkerCache, base_path: str) -> dict:
133
+ record_id = params.get("record_id", "")
134
+ if not record_id:
135
+ raise ValueError("record_id required for 'read'")
136
+
137
+ cached = cache.get(record_id)
138
+ if cached is not None:
139
+ return cached
140
+
141
+ # TODO(WS-1): seek by byte offset from IndexManager / LogManager
142
+ # from src.db.storage.index_manager import IndexManager
143
+ # from src.db.storage.log_manager import LogManager
144
+ # idx = IndexManager(base_path, store_name); idx.load()
145
+ # lmgr = LogManager(base_path, store_name)
146
+ # seg, offset, size = idx.get(record_id)
147
+ # raw = lmgr.read(seg, offset, size)
148
+ # result = _parse_flat_line(raw)
149
+ # cache.set(record_id, result)
150
+ # return result
151
+ raise NotImplementedError("Storage layer (WS-1) not yet implemented")
152
+
153
+
154
+ def _multi_read(store_name: str, params: dict, cache: _WorkerCache, base_path: str) -> dict:
155
+ record_ids = params.get("record_ids", [])
156
+ results = {}
157
+ for rid in record_ids:
158
+ results[rid] = _read(store_name, {"record_id": rid}, cache, base_path)
159
+ return results
160
+
161
+
162
+ def _exists(store_name: str, params: dict, cache: _WorkerCache, base_path: str) -> bool:
163
+ record_id = params.get("record_id", "")
164
+ if not record_id:
165
+ raise ValueError("record_id required for 'exists'")
166
+
167
+ if cache.get(record_id) is not None:
168
+ return True
169
+
170
+ # TODO(WS-1): check IndexManager._map
171
+ raise NotImplementedError("Storage layer (WS-1) not yet implemented")
172
+
173
+
174
+ def _query(store_name: str, params: dict, cache: _WorkerCache, base_path: str) -> list:
175
+ # TODO(WS-3): route params["filter"] through QueryEngine
176
+ raise NotImplementedError("Query engine (WS-3) not yet implemented")
177
+
178
+
179
+ def _count(store_name: str, params: dict, cache: _WorkerCache, base_path: str) -> int:
180
+ # TODO(WS-3): route params["filter"] through QueryEngine, return len
181
+ raise NotImplementedError("Query engine (WS-3) not yet implemented")
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # Core worker loop — shared by process and thread entry points
186
+ # ---------------------------------------------------------------------------
187
+
188
+ def _worker_loop(
189
+ worker_id: int,
190
+ store_name: str,
191
+ base_path: str,
192
+ work_queue, # multiprocessing.Queue
193
+ results_queue, # multiprocessing.Queue
194
+ invalidation_queue, # multiprocessing.Queue (private to this worker)
195
+ cache_max_size: int,
196
+ cache_ttl: float,
197
+ stop_event, # multiprocessing.Event or threading.Event
198
+ ) -> None:
199
+ log = logging.getLogger(f"QueryWorker[{store_name}#{worker_id}]")
200
+ log.info("Worker started (pid=%s)", os.getpid())
201
+
202
+ cache = _WorkerCache(cache_max_size, cache_ttl)
203
+
204
+ while not stop_event.is_set():
205
+ # 1. Drain the private invalidation queue to keep cache consistent
206
+ # with writes that the dispatcher has broadcast.
207
+ try:
208
+ while True:
209
+ record_id = invalidation_queue.get_nowait()
210
+ cache.delete(record_id)
211
+ except Exception:
212
+ pass # queue.Empty or similar — expected
213
+
214
+ # 2. Pull the next task (short timeout so stop_event is checked)
215
+ try:
216
+ task_dict = work_queue.get(timeout=0.5)
217
+ except Exception:
218
+ continue # timeout — loop back to check stop_event
219
+
220
+ # Sentinel signals graceful shutdown
221
+ if task_dict is WORKER_SENTINEL:
222
+ log.info("Received shutdown sentinel")
223
+ break
224
+
225
+ task_id = task_dict.get("task_id", "?")
226
+ log.debug("Handling task %s op=%s", task_id, task_dict.get("operation"))
227
+
228
+ # 3. Resolve and post result
229
+ try:
230
+ data = _resolve(task_dict, cache, base_path)
231
+ results_queue.put({
232
+ "task_id": task_id,
233
+ "status": "ok",
234
+ "data": data,
235
+ })
236
+ except Exception as exc:
237
+ log.warning("Task %s failed: %s", task_id, exc)
238
+ results_queue.put({
239
+ "task_id": task_id,
240
+ "status": "error",
241
+ "error": str(exc),
242
+ })
243
+
244
+ log.info("Worker exiting (pid=%s)", os.getpid())
245
+
246
+
247
+ # ---------------------------------------------------------------------------
248
+ # Entry points
249
+ # ---------------------------------------------------------------------------
250
+
251
+ def worker_process_main(
252
+ worker_id: int,
253
+ project_root: str, # added to sys.path so imports resolve
254
+ store_name: str,
255
+ base_path: str,
256
+ work_queue,
257
+ results_queue,
258
+ invalidation_queue,
259
+ cache_max_size: int,
260
+ cache_ttl: float,
261
+ stop_event,
262
+ ) -> None:
263
+ """Entry point for multiprocessing.Process workers."""
264
+ # Ensure the project is importable inside the child process
265
+ if project_root not in sys.path:
266
+ sys.path.insert(0, project_root)
267
+
268
+ logging.basicConfig(level=logging.INFO)
269
+ _worker_loop(
270
+ worker_id, store_name, base_path,
271
+ work_queue, results_queue, invalidation_queue,
272
+ cache_max_size, cache_ttl, stop_event,
273
+ )
274
+
275
+
276
+ def worker_thread_main(
277
+ worker_id: int,
278
+ store_name: str,
279
+ base_path: str,
280
+ work_queue,
281
+ results_queue,
282
+ invalidation_queue,
283
+ cache_max_size: int,
284
+ cache_ttl: float,
285
+ stop_event,
286
+ ) -> None:
287
+ """Entry point for threading.Thread workers (parallel_enabled=False)."""
288
+ _worker_loop(
289
+ worker_id, store_name, base_path,
290
+ work_queue, results_queue, invalidation_queue,
291
+ cache_max_size, cache_ttl, stop_event,
292
+ )
File without changes
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,47 @@
1
+ """
2
+ BlobStore — stores oversized record values as individual binary/text files.
3
+
4
+ Blob files live at: {store_path}/blobs/{record_id}.dat
5
+ The pseudo-segment string returned is "blobs/{record_id}.dat", which
6
+ LogManager.read() recognises and handles by reading the file directly.
7
+ """
8
+
9
+ import os
10
+
11
+
12
+ def is_blob(segment_str: str) -> bool:
13
+ """Return True if this segment string represents a blob file path."""
14
+ return segment_str.startswith("blobs/")
15
+
16
+
17
+ def write_blob(store_path: str, record_id: str, data_str: str) -> tuple:
18
+ """
19
+ Write data_str to {store_path}/blobs/{record_id}.dat.
20
+
21
+ Returns (path_str, 0, byte_size) where path_str is the pseudo-segment
22
+ string "blobs/{record_id}.dat".
23
+ """
24
+ blobs_dir = os.path.join(store_path, "blobs")
25
+ os.makedirs(blobs_dir, exist_ok=True)
26
+ # Security: strip path separators from record_id
27
+ safe_id = record_id.replace("/", "_").replace("\\", "_").replace("..", "_")
28
+ file_path = os.path.join(blobs_dir, f"{safe_id}.dat")
29
+ encoded = data_str.encode("utf-8")
30
+ with open(file_path, "wb") as fh:
31
+ fh.write(encoded)
32
+ path_str = f"blobs/{safe_id}.dat"
33
+ return (path_str, 0, len(encoded))
34
+
35
+
36
+ def read_blob(store_path: str, path_str: str) -> str:
37
+ """
38
+ Read and return the full content of a blob file.
39
+ path_str is the pseudo-segment string "blobs/{record_id}.dat".
40
+ """
41
+ # Security: validate path stays within store_path/blobs/
42
+ full_path = os.path.normpath(os.path.join(store_path, path_str))
43
+ blobs_dir = os.path.normpath(os.path.join(store_path, "blobs"))
44
+ if not full_path.startswith(blobs_dir + os.sep) and full_path != blobs_dir:
45
+ raise ValueError(f"Blob path traversal rejected: {path_str!r}")
46
+ with open(full_path, "rb") as fh:
47
+ return fh.read().decode("utf-8")
@@ -0,0 +1,92 @@
1
+ """
2
+ IndexManager — manages the primary index for one store.
3
+
4
+ Index file: {store_path}/{service}.idx
5
+ Line format: {record_id}:{segment_seq}:{byte_offset}:{byte_size}
6
+ Tombstone: !{record_id}:{segment_seq}:{byte_offset}:{byte_size}
7
+
8
+ Tombstoned entries are excluded from _map at load time; the physical .idx
9
+ file is only fully rewritten during compaction via save_full().
10
+ """
11
+
12
+ import os
13
+
14
+
15
+ class IndexManager:
16
+ def __init__(self, store_path: str, service: str):
17
+ self.index_path = os.path.join(store_path, f"{service}.idx")
18
+ self._map: dict = {} # record_id -> (segment_seq_str, offset, size)
19
+ self._dirty: bool = False
20
+ self.load()
21
+
22
+ # ------------------------------------------------------------------
23
+ # Load / save
24
+ # ------------------------------------------------------------------
25
+
26
+ def load(self) -> None:
27
+ """Parse the .idx file into _map, skipping tombstoned entries."""
28
+ if not os.path.exists(self.index_path):
29
+ return
30
+ with open(self.index_path, "r", encoding="utf-8") as fh:
31
+ for raw in fh:
32
+ line = raw.strip()
33
+ if not line:
34
+ continue
35
+ if line.startswith("!"):
36
+ # Tombstone — ensure it's absent from the map
37
+ record_id = line[1:].split(":")[0]
38
+ self._map.pop(record_id, None)
39
+ continue
40
+ parts = line.split(":")
41
+ if len(parts) < 4:
42
+ continue
43
+ # segment_seq may be a path like "blobs/abc.dat" containing "/"
44
+ # Format: record_id:segment_seq:offset:size
45
+ # We split on the last two colons to get offset and size safely
46
+ record_id = parts[0]
47
+ size = int(parts[-1])
48
+ offset = int(parts[-2])
49
+ segment_seq = ":".join(parts[1:-2])
50
+ self._map[record_id] = (segment_seq, offset, size)
51
+
52
+ def save_full(self) -> None:
53
+ """Rewrite the entire .idx file from _map (used after compaction)."""
54
+ tmp_path = self.index_path + ".tmp"
55
+ with open(tmp_path, "w", encoding="utf-8") as fh:
56
+ for record_id, (seg, offset, size) in self._map.items():
57
+ fh.write(f"{record_id}:{seg}:{offset}:{size}\n")
58
+ os.replace(tmp_path, self.index_path)
59
+ self._dirty = False
60
+
61
+ # ------------------------------------------------------------------
62
+ # CRUD
63
+ # ------------------------------------------------------------------
64
+
65
+ def get(self, record_id: str):
66
+ """Return (segment_seq, offset, size) or None."""
67
+ return self._map.get(record_id)
68
+
69
+ def set(self, record_id: str, segment_seq: str, offset: int, size: int) -> None:
70
+ """Update _map and append the new entry to the .idx file."""
71
+ self._map[record_id] = (segment_seq, offset, size)
72
+ with open(self.index_path, "a", encoding="utf-8") as fh:
73
+ fh.write(f"{record_id}:{segment_seq}:{offset}:{size}\n")
74
+ self._dirty = True
75
+
76
+ def delete(self, record_id: str) -> None:
77
+ """Remove from _map and write a tombstone to the .idx file."""
78
+ if record_id not in self._map:
79
+ return
80
+ entry = self._map.pop(record_id)
81
+ seg, offset, size = entry
82
+ with open(self.index_path, "a", encoding="utf-8") as fh:
83
+ fh.write(f"!{record_id}:{seg}:{offset}:{size}\n")
84
+ self._dirty = True
85
+
86
+ def records_in_segment(self, seq_str: str) -> list:
87
+ """Return all record IDs whose current segment matches seq_str."""
88
+ return [rid for rid, (seg, _, _) in self._map.items() if seg == seq_str]
89
+
90
+ def all_record_ids(self) -> list:
91
+ """Return all live record IDs."""
92
+ return list(self._map.keys())
@@ -0,0 +1,119 @@
1
+ """
2
+ LogManager — append-only rolling log file manager for one store.
3
+
4
+ Segment files: {store_path}/{service}_{NNN}.log (NNN = zero-padded 3-digit int)
5
+ On init, scans store_path for existing segments and opens the highest one
6
+ in append mode. When a segment exceeds segment_threshold bytes it is closed
7
+ and a new one is started (_rollover).
8
+ """
9
+
10
+ import os
11
+ import re
12
+
13
+
14
+ class LogManager:
15
+ def __init__(self, store_path: str, service: str, segment_threshold: int):
16
+ """
17
+ Parameters
18
+ ----------
19
+ store_path : absolute path to the store directory
20
+ service : store name (used as the filename prefix)
21
+ segment_threshold : byte size at which a new segment is opened
22
+ """
23
+ self.store_path = store_path
24
+ self.service = service
25
+ self.segment_threshold = segment_threshold
26
+
27
+ self.active_segment: int = self._find_highest_segment()
28
+ seg_path = self.segment_path(self.active_segment)
29
+ self.active_fh = open(seg_path, "ab")
30
+
31
+ # ------------------------------------------------------------------
32
+ # Internal helpers
33
+ # ------------------------------------------------------------------
34
+
35
+ def _find_highest_segment(self) -> int:
36
+ """Return the highest existing segment number, or 1 if none exist."""
37
+ pattern = re.compile(rf"^{re.escape(self.service)}_(\d{{3}})\.log$")
38
+ highest = 0
39
+ for name in os.listdir(self.store_path):
40
+ m = pattern.match(name)
41
+ if m:
42
+ n = int(m.group(1))
43
+ if n > highest:
44
+ highest = n
45
+ if highest == 0:
46
+ highest = 1
47
+ open(self.segment_path(1), "ab").close()
48
+ return highest
49
+
50
+ def segment_path(self, seq_int: int) -> str:
51
+ """Return the absolute path for segment number seq_int."""
52
+ return os.path.join(self.store_path, f"{self.service}_{seq_int:03d}.log")
53
+
54
+ def _rollover(self) -> None:
55
+ """Close the current segment and open a new one."""
56
+ self.active_fh.flush()
57
+ self.active_fh.close()
58
+ self.active_segment += 1
59
+ self.active_fh = open(self.segment_path(self.active_segment), "ab")
60
+
61
+ # ------------------------------------------------------------------
62
+ # Public API
63
+ # ------------------------------------------------------------------
64
+
65
+ def append(self, record_id: str, flat_line_str: str) -> tuple:
66
+ """
67
+ Write one record line to the active segment.
68
+
69
+ Parameters
70
+ ----------
71
+ record_id : the record's ID (used only for return value metadata)
72
+ flat_line_str : already-serialized line string (no trailing newline)
73
+
74
+ Returns
75
+ -------
76
+ (segment_seq_str, byte_offset, byte_size)
77
+ """
78
+ encoded = (flat_line_str + "\n").encode("utf-8")
79
+ offset = self.active_fh.tell()
80
+ self.active_fh.write(encoded)
81
+ self.active_fh.flush()
82
+ size = len(encoded)
83
+ seg_str = f"{self.active_segment:03d}"
84
+ if self.active_fh.tell() >= self.segment_threshold:
85
+ self._rollover()
86
+ return (seg_str, offset, size)
87
+
88
+ def read(self, segment_seq_str: str, offset: int, size: int) -> str:
89
+ """
90
+ Read exactly `size` bytes from the indicated segment at `offset`.
91
+ Uses stored size — does NOT use readline to avoid partial reads.
92
+ Returns the decoded string with trailing newline stripped.
93
+ """
94
+ # Handle blob entries (segment_seq_str starts with "blobs/")
95
+ if segment_seq_str.startswith("blobs/"):
96
+ blob_path = os.path.join(self.store_path, segment_seq_str)
97
+ with open(blob_path, "r", encoding="utf-8") as fh:
98
+ return fh.read()
99
+ seq_int = int(segment_seq_str)
100
+ path = self.segment_path(seq_int)
101
+ with open(path, "rb") as fh:
102
+ fh.seek(offset)
103
+ return fh.read(size).decode("utf-8").rstrip("\n")
104
+
105
+ def list_segments(self) -> list:
106
+ """Return all segment numbers found on disk, sorted ascending."""
107
+ pattern = re.compile(rf"^{re.escape(self.service)}_(\d{{3}})\.log$")
108
+ nums = []
109
+ for name in os.listdir(self.store_path):
110
+ m = pattern.match(name)
111
+ if m:
112
+ nums.append(int(m.group(1)))
113
+ return sorted(nums)
114
+
115
+ def close(self) -> None:
116
+ """Flush and close the active file handle."""
117
+ if self.active_fh and not self.active_fh.closed:
118
+ self.active_fh.flush()
119
+ self.active_fh.close()
@@ -0,0 +1,38 @@
1
+ """
2
+ Record serialization helpers for .log file lines.
3
+
4
+ Wire format: {record_id} {json_object}
5
+ One line per record: record_id (no spaces) followed by a single space,
6
+ then a compact JSON object. Native JSON types (str, int, float, bool, list,
7
+ dict, None) are preserved exactly — no custom encoding required.
8
+ """
9
+
10
+ import json
11
+
12
+
13
+ def serialize_record(record_id: str, flat_dict: dict) -> str:
14
+ """
15
+ Produce a single log line string (no trailing newline).
16
+ Example: abc123 {"name": "Gizmo", "price": 19.99, "in_stock": true}
17
+ """
18
+ return f"{record_id} {json.dumps(flat_dict, ensure_ascii=False)}"
19
+
20
+
21
+ def deserialize_record(line: str) -> tuple:
22
+ """
23
+ Parse a log line into (record_id, flat_dict).
24
+ Strips trailing newline/whitespace before parsing.
25
+ Returns (None, {}) on empty or malformed lines.
26
+ """
27
+ line = line.strip()
28
+ if not line:
29
+ return (None, {})
30
+ space = line.find(" ")
31
+ if space == -1:
32
+ return (None, {})
33
+ record_id = line[:space]
34
+ try:
35
+ flat_dict = json.loads(line[space + 1:])
36
+ except json.JSONDecodeError:
37
+ return (None, {})
38
+ return (record_id, flat_dict)
src/filing/__init__.py ADDED
@@ -0,0 +1,31 @@
1
+ import json
2
+
3
+ def read_file(path):
4
+ with open(path, 'r', encoding='utf-8') as f:
5
+ return f.read()
6
+
7
+ def write_file(path, content):
8
+ with open(path, 'w', encoding='utf-8') as f:
9
+ f.write(content)
10
+
11
+ def append_file(path, content):
12
+ with open(path, 'a', encoding='utf-8') as f:
13
+ f.write(content)
14
+
15
+ def delete_file(path):
16
+ import os
17
+ os.remove(path)
18
+
19
+ def file_exists(path):
20
+ import os
21
+ return os.path.exists(path)
22
+
23
+ def list_files(directory):
24
+ import os
25
+ return os.listdir(directory)
26
+
27
+ def read_json(path):
28
+ return json.loads(read_file(path))
29
+
30
+ def write_json(path, data):
31
+ write_file(path, json.dumps(data, indent=4))