PyMkDB 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pymkdb/__init__.py +6 -0
  2. pymkdb/cli.py +57 -0
  3. pymkdb-0.1.0.dist-info/METADATA +86 -0
  4. pymkdb-0.1.0.dist-info/RECORD +54 -0
  5. pymkdb-0.1.0.dist-info/WHEEL +5 -0
  6. pymkdb-0.1.0.dist-info/entry_points.txt +2 -0
  7. pymkdb-0.1.0.dist-info/top_level.txt +3 -0
  8. sdk/__init__.py +1 -0
  9. sdk/connection.py +225 -0
  10. sdk/delta.py +19 -0
  11. sdk/http_connection.py +180 -0
  12. sdk/mkdb_client.py +226 -0
  13. sdk/responses.py +154 -0
  14. src/__init__.py +1 -0
  15. src/config/db.py +227 -0
  16. src/config/server.py +52 -0
  17. src/db/__init__.py +207 -0
  18. src/db/cache/__init__.py +1 -0
  19. src/db/cache/ram_cache.py +144 -0
  20. src/db/cache/write_queue.py +156 -0
  21. src/db/maintenance/__init__.py +0 -0
  22. src/db/maintenance/compactor.py +118 -0
  23. src/db/maintenance/task_scheduler.py +73 -0
  24. src/db/objects/store.py +283 -0
  25. src/db/parity/__init__.py +0 -0
  26. src/db/parity/parity_manager.py +196 -0
  27. src/db/query/__init__.py +1 -0
  28. src/db/query/full_text_index.py +168 -0
  29. src/db/query/numeric_index.py +196 -0
  30. src/db/query/query_engine.py +308 -0
  31. src/db/query/tokenizer.py +48 -0
  32. src/db/query_workers/__init__.py +16 -0
  33. src/db/query_workers/dispatcher.py +339 -0
  34. src/db/query_workers/task.py +78 -0
  35. src/db/query_workers/worker.py +292 -0
  36. src/db/requesting/main.py +0 -0
  37. src/db/storage/__init__.py +1 -0
  38. src/db/storage/blob_store.py +47 -0
  39. src/db/storage/index_manager.py +92 -0
  40. src/db/storage/log_manager.py +119 -0
  41. src/db/storage/serializer.py +38 -0
  42. src/filing/__init__.py +31 -0
  43. src/objects/__init__.py +190 -0
  44. src/runtime/__init__.py +15 -0
  45. src/server/__init__.py +0 -0
  46. src/server/coms/actions.py +209 -0
  47. src/server/coms/http.py +46 -0
  48. src/server/coms/http_handlers.py +445 -0
  49. src/server/coms/metrics.py +231 -0
  50. src/server/coms/socket.py +461 -0
  51. src/server/coms/socket_protocol.py +54 -0
  52. src/server/control/api/actions.py +1001 -0
  53. src/server/control/server.py +404 -0
  54. src/server/event_log.py +58 -0
@@ -0,0 +1,168 @@
1
+ """
2
+ FullTextIndex — inverted index for one text field of one store.
3
+
4
+ File format: idx_{service}_{field}_words.txt
5
+ Each line: {stem}:{id1},{id2},{id3}
6
+
7
+ RAM-threshold mode
8
+ ------------------
9
+ If ram_threshold_bytes > 0 and the index file on disk exceeds that size,
10
+ the in-memory map is evicted. Pending mutations are buffered and flushed
11
+ to disk before each query and at each explicit save() call.
12
+ """
13
+
14
+ import os
15
+ from src.db.query.tokenizer import tokenize
16
+
17
+
18
+ class FullTextIndex:
19
+ def __init__(self, store_path: str, service: str, field: str,
20
+ ram_threshold_bytes: int = 0):
21
+ safe_field = field.replace("/", "_").replace("\\", "_")
22
+ self.path = os.path.join(store_path, f"idx_{service}_{safe_field}_words.txt")
23
+ self._map: dict = {} # stem -> set[rid] (RAM mode)
24
+ self._in_ram: bool = True
25
+ self._ram_threshold: int = ram_threshold_bytes
26
+ # Disk-mode pending buffers (only used when _in_ram is False)
27
+ self._add_buf: dict = {} # stem -> set[rid] to add
28
+ self._remove_buf: dict = {} # stem -> set[rid] to remove
29
+ self.load()
30
+
31
+ # ------------------------------------------------------------------
32
+ # Persistence
33
+ # ------------------------------------------------------------------
34
+
35
+ def load(self) -> None:
36
+ """Load the index from disk into _map."""
37
+ if not os.path.exists(self.path):
38
+ return
39
+ with open(self.path, "r", encoding="utf-8") as fh:
40
+ for raw in fh:
41
+ line = raw.strip()
42
+ if not line or ":" not in line:
43
+ continue
44
+ stem, _, ids_str = line.partition(":")
45
+ ids = set(ids_str.split(",")) if ids_str else set()
46
+ self._map[stem] = ids
47
+ self._apply_threshold()
48
+
49
+ def save(self) -> None:
50
+ """Write the index to disk."""
51
+ if self._in_ram:
52
+ self._write_file(self._map)
53
+ self._apply_threshold()
54
+ else:
55
+ self._flush_pending()
56
+
57
+ def _apply_threshold(self) -> None:
58
+ """Evict map from RAM if the disk file exceeds the configured threshold."""
59
+ if self._ram_threshold > 0 and os.path.exists(self.path):
60
+ if os.path.getsize(self.path) > self._ram_threshold:
61
+ self._map.clear()
62
+ self._in_ram = False
63
+
64
+ def _write_file(self, data_map: dict) -> None:
65
+ """Atomically write a stem->ids map to disk."""
66
+ tmp = self.path + ".tmp"
67
+ with open(tmp, "w", encoding="utf-8") as fh:
68
+ for stem, ids in data_map.items():
69
+ if ids:
70
+ fh.write(f"{stem}:{','.join(sorted(ids))}\n")
71
+ os.replace(tmp, self.path)
72
+
73
+ def _load_disk_map(self) -> dict:
74
+ """Read the disk file and return a stem->set map without touching self._map."""
75
+ disk_map: dict = {}
76
+ if os.path.exists(self.path):
77
+ with open(self.path, "r", encoding="utf-8") as fh:
78
+ for raw in fh:
79
+ line = raw.strip()
80
+ if not line or ":" not in line:
81
+ continue
82
+ stem, _, ids_str = line.partition(":")
83
+ disk_map[stem] = set(ids_str.split(",")) if ids_str else set()
84
+ return disk_map
85
+
86
+ def _flush_pending(self) -> None:
87
+ """Merge add/remove buffers into the disk file and clear them."""
88
+ if not self._add_buf and not self._remove_buf:
89
+ return
90
+ disk_map = self._load_disk_map()
91
+
92
+ for stem, ids in self._add_buf.items():
93
+ disk_map.setdefault(stem, set()).update(ids)
94
+
95
+ for stem, ids in self._remove_buf.items():
96
+ existing = disk_map.get(stem)
97
+ if existing:
98
+ existing -= ids
99
+ if not existing:
100
+ del disk_map[stem]
101
+
102
+ self._write_file(disk_map)
103
+ self._add_buf.clear()
104
+ self._remove_buf.clear()
105
+
106
+ # ------------------------------------------------------------------
107
+ # Mutation
108
+ # ------------------------------------------------------------------
109
+
110
+ def add(self, record_id: str, text_value: str) -> None:
111
+ """Tokenize text_value and add record_id to every stem's set."""
112
+ if self._in_ram:
113
+ for stem in tokenize(text_value):
114
+ self._map.setdefault(stem, set()).add(record_id)
115
+ else:
116
+ for stem in tokenize(text_value):
117
+ self._add_buf.setdefault(stem, set()).add(record_id)
118
+
119
+ def remove(self, record_id: str, text_value: str) -> None:
120
+ """Remove record_id from every stem's set; prune empty stems."""
121
+ if self._in_ram:
122
+ for stem in tokenize(text_value):
123
+ ids = self._map.get(stem)
124
+ if ids:
125
+ ids.discard(record_id)
126
+ if not ids:
127
+ del self._map[stem]
128
+ else:
129
+ for stem in tokenize(text_value):
130
+ self._remove_buf.setdefault(stem, set()).add(record_id)
131
+
132
+ # ------------------------------------------------------------------
133
+ # Query
134
+ # ------------------------------------------------------------------
135
+
136
+ def search(self, keywords: list, mode: str = "and") -> set:
137
+ """
138
+ Search for records matching the given keywords.
139
+
140
+ keywords : list of raw keyword strings (will be tokenised)
141
+ mode : "and" (intersection) | "or" (union)
142
+ Returns a set of record IDs.
143
+ """
144
+ if not keywords:
145
+ return set()
146
+ if self._in_ram:
147
+ data_map = self._map
148
+ else:
149
+ self._flush_pending()
150
+ data_map = self._load_disk_map()
151
+ candidate_sets = []
152
+ for kw in keywords:
153
+ ids = set()
154
+ for stem in tokenize(kw):
155
+ ids |= data_map.get(stem, set())
156
+ candidate_sets.append(ids)
157
+ if not candidate_sets:
158
+ return set()
159
+ if mode == "or":
160
+ result = set()
161
+ for s in candidate_sets:
162
+ result |= s
163
+ return result
164
+ # AND: intersect across keywords
165
+ result = candidate_sets[0]
166
+ for s in candidate_sets[1:]:
167
+ result = result & s
168
+ return result
@@ -0,0 +1,196 @@
1
+ """
2
+ NumericIndex — sorted numeric index for one numeric field of one store.
3
+
4
+ File format: idx_{service}_{field}_numeric.txt
5
+ Each line: {value}:{id1},{id2}
6
+ File is kept in ascending sorted order by value.
7
+
8
+ RAM-threshold mode
9
+ ------------------
10
+ If ram_threshold_bytes > 0 and the index file on disk exceeds that size,
11
+ the in-memory map is evicted after load/save. All queries then read
12
+ directly from disk; pending mutations are buffered and flushed to disk
13
+ before each query and at each explicit save() call.
14
+ """
15
+
16
+ import bisect
17
+ import os
18
+
19
+
20
+ class NumericIndex:
21
+ def __init__(self, store_path: str, service: str, field: str,
22
+ ram_threshold_bytes: int = 0):
23
+ safe_field = field.replace("/", "_").replace("\\", "_")
24
+ self.path = os.path.join(store_path, f"idx_{service}_{safe_field}_numeric.txt")
25
+ self._values: list = [] # sorted float values (RAM mode)
26
+ self._map: dict = {} # float -> set[rid] (RAM mode)
27
+ self._in_ram: bool = True
28
+ self._ram_threshold: int = ram_threshold_bytes
29
+ # Disk-mode pending buffers (only used when _in_ram is False)
30
+ self._write_buf: dict = {} # val -> set[rid] to add
31
+ self._remove_buf: list = [] # list of (val, rid) to remove
32
+ self.load()
33
+
34
+ # ------------------------------------------------------------------
35
+ # Persistence
36
+ # ------------------------------------------------------------------
37
+
38
+ def load(self) -> None:
39
+ if not os.path.exists(self.path):
40
+ return
41
+ with open(self.path, "r", encoding="utf-8") as fh:
42
+ for raw in fh:
43
+ line = raw.strip()
44
+ if not line or ":" not in line:
45
+ continue
46
+ val_str, _, ids_str = line.partition(":")
47
+ try:
48
+ val = float(val_str)
49
+ except ValueError:
50
+ continue
51
+ ids = set(ids_str.split(",")) if ids_str else set()
52
+ self._map[val] = ids
53
+ self._values = sorted(self._map.keys())
54
+ self._apply_threshold()
55
+
56
+ def save(self) -> None:
57
+ if self._in_ram:
58
+ self._write_file(self._map, self._values)
59
+ self._apply_threshold()
60
+ else:
61
+ self._flush_pending()
62
+
63
+ def _apply_threshold(self) -> None:
64
+ """Evict map from RAM if the disk file exceeds the configured threshold."""
65
+ if self._ram_threshold > 0 and os.path.exists(self.path):
66
+ if os.path.getsize(self.path) > self._ram_threshold:
67
+ self._map.clear()
68
+ self._values.clear()
69
+ self._in_ram = False
70
+
71
+ def _write_file(self, data_map: dict, data_values: list) -> None:
72
+ """Atomically write a map+sorted-values to disk."""
73
+ tmp = self.path + ".tmp"
74
+ with open(tmp, "w", encoding="utf-8") as fh:
75
+ for val in data_values:
76
+ ids = data_map.get(val)
77
+ if ids:
78
+ fh.write(f"{val}:{','.join(sorted(ids))}\n")
79
+ os.replace(tmp, self.path)
80
+
81
+ def _load_disk_map(self) -> tuple:
82
+ """Read the disk file and return (map, sorted_values) without touching self._map."""
83
+ disk_map: dict = {}
84
+ if os.path.exists(self.path):
85
+ with open(self.path, "r", encoding="utf-8") as fh:
86
+ for raw in fh:
87
+ line = raw.strip()
88
+ if not line or ":" not in line:
89
+ continue
90
+ val_str, _, ids_str = line.partition(":")
91
+ try:
92
+ val = float(val_str)
93
+ except ValueError:
94
+ continue
95
+ disk_map[val] = set(ids_str.split(",")) if ids_str else set()
96
+ return disk_map, sorted(disk_map.keys())
97
+
98
+ def _flush_pending(self) -> None:
99
+ """Merge write/remove buffers into the disk file and clear them."""
100
+ if not self._write_buf and not self._remove_buf:
101
+ return
102
+ disk_map, disk_values = self._load_disk_map()
103
+
104
+ for val, ids in self._write_buf.items():
105
+ if val not in disk_map:
106
+ bisect.insort(disk_values, val)
107
+ disk_map[val] = set()
108
+ disk_map[val].update(ids)
109
+
110
+ for val, rid in self._remove_buf:
111
+ ids = disk_map.get(val)
112
+ if ids:
113
+ ids.discard(rid)
114
+ if not ids:
115
+ del disk_map[val]
116
+ i = bisect.bisect_left(disk_values, val)
117
+ if i < len(disk_values) and disk_values[i] == val:
118
+ disk_values.pop(i)
119
+
120
+ self._write_file(disk_map, disk_values)
121
+ self._write_buf.clear()
122
+ self._remove_buf.clear()
123
+
124
+ # ------------------------------------------------------------------
125
+ # Mutation
126
+ # ------------------------------------------------------------------
127
+
128
+ def add(self, record_id: str, numeric_value) -> None:
129
+ val = float(numeric_value)
130
+ if self._in_ram:
131
+ if val not in self._map:
132
+ bisect.insort(self._values, val)
133
+ self._map[val] = set()
134
+ self._map[val].add(record_id)
135
+ else:
136
+ self._write_buf.setdefault(val, set()).add(record_id)
137
+
138
+ def remove(self, record_id: str, numeric_value) -> None:
139
+ val = float(numeric_value)
140
+ if self._in_ram:
141
+ ids = self._map.get(val)
142
+ if not ids:
143
+ return
144
+ ids.discard(record_id)
145
+ if not ids:
146
+ del self._map[val]
147
+ i = bisect.bisect_left(self._values, val)
148
+ if i < len(self._values) and self._values[i] == val:
149
+ self._values.pop(i)
150
+ else:
151
+ self._remove_buf.append((val, record_id))
152
+
153
+ # ------------------------------------------------------------------
154
+ # Query
155
+ # ------------------------------------------------------------------
156
+
157
+ def exact_query(self, value) -> set:
158
+ if self._in_ram:
159
+ return set(self._map.get(float(value), set()))
160
+ self._flush_pending()
161
+ disk_map, _ = self._load_disk_map()
162
+ return set(disk_map.get(float(value), set()))
163
+
164
+ def range_query(self, lo=None, hi=None,
165
+ lo_inclusive: bool = True, hi_inclusive: bool = True) -> set:
166
+ """
167
+ Return all record IDs whose value falls in [lo, hi] (bounds optional).
168
+ lo/hi may be None for open-ended ranges.
169
+ """
170
+ if self._in_ram:
171
+ return self._range_from(self._map, self._values, lo, hi, lo_inclusive, hi_inclusive)
172
+ self._flush_pending()
173
+ disk_map, disk_values = self._load_disk_map()
174
+ return self._range_from(disk_map, disk_values, lo, hi, lo_inclusive, hi_inclusive)
175
+
176
+ @staticmethod
177
+ def _range_from(data_map: dict, data_values: list,
178
+ lo, hi, lo_inclusive: bool, hi_inclusive: bool) -> set:
179
+ if lo is None:
180
+ lo_idx = 0
181
+ else:
182
+ lo_f = float(lo)
183
+ lo_idx = (bisect.bisect_left(data_values, lo_f) if lo_inclusive
184
+ else bisect.bisect_right(data_values, lo_f))
185
+
186
+ if hi is None:
187
+ hi_idx = len(data_values)
188
+ else:
189
+ hi_f = float(hi)
190
+ hi_idx = (bisect.bisect_right(data_values, hi_f) if hi_inclusive
191
+ else bisect.bisect_left(data_values, hi_f))
192
+
193
+ result: set = set()
194
+ for val in data_values[lo_idx:hi_idx]:
195
+ result |= data_map.get(val, set())
196
+ return result
@@ -0,0 +1,308 @@
1
+ """
2
+ QueryEngine — declarative dict query dispatcher for one store.
3
+
4
+ Query syntax (from the plan's Query Syntax Specification):
5
+ {"id": "x"} → fast-path primary index lookup
6
+ {"id": ["x", "y"]} → multi-ID fetch
7
+ {"field": "exact string"} → full-scan exact text match
8
+ {"field": ["kw1", "kw2"]} → FullTextIndex AND search
9
+ {"field": 2010} → NumericIndex exact_query
10
+ {"field": {">": 1999, "<": 2015}} → NumericIndex range_query
11
+ Compound dict → AND-intersect all candidate sets
12
+ """
13
+
14
+ import logging
15
+ import os
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ OPERATORS = {">=", "<=", ">", "<"}
20
+
21
+ # Friendly aliases accepted from SDK callers
22
+ _OP_ALIASES = {"gte": ">=", "lte": "<=", "gt": ">", "lt": "<"}
23
+
24
+
25
+ class QuerySyntaxError(ValueError):
26
+ pass
27
+
28
+
29
+ class QueryEngine:
30
+ def __init__(self, store):
31
+ """
32
+ Parameters
33
+ ----------
34
+ store : src.db.objects.store.store
35
+ The owning store instance (used for read() and index_manager).
36
+ """
37
+ self._store = store
38
+ self._full_text_indexes: dict = {} # field_name -> FullTextIndex
39
+ self._numeric_indexes: dict = {} # field_name -> NumericIndex
40
+
41
+ # ------------------------------------------------------------------
42
+ # Index lifecycle
43
+ # ------------------------------------------------------------------
44
+
45
+ def build_indexes(self) -> None:
46
+ """
47
+ Instantiate indexes for all fields marked queryable in schema_config.
48
+ Called from store.setup() after storage is ready.
49
+
50
+ If an index file does not exist on disk, the store is scanned and the
51
+ index is built and saved immediately so it survives the next restart.
52
+ Fields whose index file exceeds ram_config.index_ram_threshold_bytes are
53
+ kept on disk only — queries read from the file; mutations are buffered
54
+ and flushed before each query.
55
+ """
56
+ from src.db.query.full_text_index import FullTextIndex
57
+ from src.db.query.numeric_index import NumericIndex
58
+
59
+ schema = getattr(self._store.config, "schema_config", None)
60
+ if schema is None:
61
+ return
62
+
63
+ ram_cfg = getattr(self._store.config, "ram_config", None)
64
+ threshold = getattr(ram_cfg, "index_ram_threshold_bytes", 0) if ram_cfg else 0
65
+
66
+ store_name = self._store.config.name
67
+ print(f"[{store_name}] build_indexes: checking {len(schema.fields)} schema field(s)...")
68
+
69
+ needs_rebuild: list = []
70
+ for field_name, field_schema in schema.fields.items():
71
+ q = getattr(field_schema, "queryable", False)
72
+ if not q:
73
+ continue
74
+ if q in ("full-text", True):
75
+ idx = FullTextIndex(
76
+ store_path=self._store.store_path,
77
+ service=self._store.config.name,
78
+ field=field_name,
79
+ ram_threshold_bytes=threshold,
80
+ )
81
+ self._full_text_indexes[field_name] = idx
82
+ if not os.path.exists(idx.path):
83
+ needs_rebuild.append(field_name)
84
+ elif q == "numeric":
85
+ idx = NumericIndex(
86
+ store_path=self._store.store_path,
87
+ service=self._store.config.name,
88
+ field=field_name,
89
+ ram_threshold_bytes=threshold,
90
+ )
91
+ self._numeric_indexes[field_name] = idx
92
+ if not os.path.exists(idx.path):
93
+ needs_rebuild.append(field_name)
94
+
95
+ if needs_rebuild:
96
+ store_name = self._store.config.name
97
+ print(f"[{store_name}] Index files missing for: {needs_rebuild} — rebuilding from stored data...")
98
+ for field_name in needs_rebuild:
99
+ try:
100
+ self.rebuild_index(field_name)
101
+ except Exception as exc:
102
+ import traceback
103
+ print(f"[{store_name}] ERROR: rebuild_index('{field_name}') failed: {exc}")
104
+ traceback.print_exc()
105
+ logger.warning("build_indexes: rebuild_index('%s') failed: %s", field_name, exc)
106
+ else:
107
+ print(f"[{self._store.config.name}] All index files present — loaded from disk.")
108
+
109
+ def on_write(self, record_id: str, flat_delta: dict) -> None:
110
+ """Update all relevant indexes when a record is written."""
111
+ for field, idx in self._full_text_indexes.items():
112
+ if field in flat_delta:
113
+ idx.add(record_id, str(flat_delta[field]))
114
+ for field, idx in self._numeric_indexes.items():
115
+ if field in flat_delta:
116
+ try:
117
+ idx.add(record_id, flat_delta[field])
118
+ except (ValueError, TypeError):
119
+ pass
120
+
121
+ def on_delete(self, record_id: str, flat_dict: dict) -> None:
122
+ """Remove a record from all indexes."""
123
+ for field, idx in self._full_text_indexes.items():
124
+ if field in flat_dict:
125
+ idx.remove(record_id, str(flat_dict[field]))
126
+ for field, idx in self._numeric_indexes.items():
127
+ if field in flat_dict:
128
+ try:
129
+ idx.remove(record_id, flat_dict[field])
130
+ except (ValueError, TypeError):
131
+ pass
132
+
133
+ def save_all(self) -> None:
134
+ """Persist all in-memory indexes to disk."""
135
+ for idx in self._full_text_indexes.values():
136
+ idx.save()
137
+ for idx in self._numeric_indexes.values():
138
+ idx.save()
139
+
140
+ def rebuild_index(self, field_name: str) -> None:
141
+ """
142
+ Backfill the index for a single field by scanning all log segments.
143
+ Safe to call from a background thread — reads only, no writes to log.
144
+ """
145
+ from src.db.query.full_text_index import FullTextIndex
146
+ from src.db.query.numeric_index import NumericIndex
147
+ from src.db.storage import serializer as _ser
148
+
149
+ store = self._store
150
+ if store.log_manager is None or store.index_manager is None:
151
+ raise RuntimeError("Store storage not initialised.")
152
+
153
+ # Determine index type from schema_config
154
+ schema = getattr(store.config, "schema_config", None)
155
+ if schema is None or field_name not in schema.fields:
156
+ raise QuerySyntaxError(f"Field '{field_name}' not in schema_config.")
157
+ field_schema = schema.fields[field_name]
158
+ q = getattr(field_schema, "queryable", False)
159
+
160
+ # Determine RAM threshold from store config
161
+ ram_cfg = getattr(store.config, "ram_config", None)
162
+ threshold = getattr(ram_cfg, "index_ram_threshold_bytes", 0) if ram_cfg else 0
163
+
164
+ # Build a fresh in-memory index (start empty, ignoring any stale file)
165
+ if q in ("full-text", True):
166
+ new_idx = FullTextIndex(store.store_path, store.config.name, field_name,
167
+ ram_threshold_bytes=threshold)
168
+ new_idx._map = {}
169
+ new_idx._in_ram = True # force RAM mode while backfilling
170
+ self._full_text_indexes[field_name] = new_idx
171
+ elif q == "numeric":
172
+ new_idx = NumericIndex(store.store_path, store.config.name, field_name,
173
+ ram_threshold_bytes=threshold)
174
+ new_idx._map = {}
175
+ new_idx._values = []
176
+ new_idx._in_ram = True # force RAM mode while backfilling
177
+ self._numeric_indexes[field_name] = new_idx
178
+ else:
179
+ raise QuerySyntaxError(f"Field '{field_name}' is not queryable.")
180
+
181
+ # Scan all live records
182
+ for record_id in store.index_manager.all_record_ids():
183
+ entry = store.index_manager.get(record_id)
184
+ if entry is None:
185
+ continue
186
+ seg, offset, size = entry
187
+ try:
188
+ line = store.log_manager.read(seg, offset, size)
189
+ _, flat = _ser.deserialize_record(line)
190
+ if field_name in flat:
191
+ new_idx.add(record_id, flat[field_name])
192
+ except Exception as exc:
193
+ logger.warning("rebuild_index: skipping %s: %s", record_id, exc)
194
+
195
+ new_idx.save()
196
+ n = len(new_idx._map)
197
+ print(f"[{store.config.name}] Index for '{field_name}' rebuilt: {n} entries — saved to {new_idx.path}")
198
+ logger.info("rebuild_index: field '%s' rebuilt (%d entries)", field_name, n)
199
+
200
+ # ------------------------------------------------------------------
201
+ # Query
202
+ # ------------------------------------------------------------------
203
+
204
+ def query(self, filter_dict: dict) -> list:
205
+ """
206
+ Execute a declarative query dict and return a sorted list of matching record IDs.
207
+ Raises QuerySyntaxError for malformed input.
208
+ """
209
+ if not isinstance(filter_dict, dict):
210
+ raise QuerySyntaxError("filter_dict must be a dict")
211
+
212
+ # Empty filter → return all record IDs
213
+ if not filter_dict:
214
+ idx_mgr = self._store.index_manager
215
+ if idx_mgr is None:
216
+ return []
217
+ return sorted(idx_mgr.all_record_ids())
218
+
219
+ # ID fast path
220
+ if "id" in filter_dict:
221
+ ids = filter_dict["id"]
222
+ if isinstance(ids, str):
223
+ return [ids]
224
+ if isinstance(ids, list):
225
+ return list(ids)
226
+ raise QuerySyntaxError("'id' value must be a string or list of strings")
227
+
228
+ candidate_sets: list = []
229
+
230
+ for field, value in filter_dict.items():
231
+ candidates = self._eval_field(field, value)
232
+ if not candidates: # short-circuit
233
+ return []
234
+ candidate_sets.append(candidates)
235
+
236
+ if not candidate_sets:
237
+ return []
238
+
239
+ result = candidate_sets[0]
240
+ for s in candidate_sets[1:]:
241
+ result = result & s
242
+ return sorted(result)
243
+
244
+ def _eval_field(self, field: str, value) -> set:
245
+ """Evaluate one field clause and return a set of matching record IDs."""
246
+ if isinstance(value, (int, float)):
247
+ # Exact numeric match
248
+ idx = self._numeric_indexes.get(field)
249
+ if idx is None:
250
+ raise QuerySyntaxError(
251
+ f"Field '{field}' has no numeric index. "
252
+ "Mark it queryable='numeric' in schema_config."
253
+ )
254
+ return idx.exact_query(value)
255
+
256
+ if isinstance(value, str):
257
+ # Exact text match — linear scan
258
+ result = set()
259
+ idx_mgr = self._store.index_manager
260
+ if idx_mgr is None:
261
+ return result
262
+ for record_id in idx_mgr.all_record_ids():
263
+ rec = self._store.read(record_id)
264
+ if rec and rec.get(field) == value:
265
+ result.add(record_id)
266
+ return result
267
+
268
+ if isinstance(value, list):
269
+ # Partial / tokenised full-text search (AND across keywords)
270
+ idx = self._full_text_indexes.get(field)
271
+ if idx is None:
272
+ raise QuerySyntaxError(
273
+ f"Field '{field}' has no full-text index. "
274
+ "Mark it queryable='full-text' in schema_config."
275
+ )
276
+ return idx.search(value, mode="and")
277
+
278
+ if isinstance(value, dict):
279
+ # Numeric range query — keys are operators, values are numbers
280
+ idx = self._numeric_indexes.get(field)
281
+ if idx is None:
282
+ raise QuerySyntaxError(
283
+ f"Field '{field}' has no numeric index. "
284
+ "Mark it queryable='numeric' in schema_config."
285
+ )
286
+ lo = hi = None
287
+ lo_inc = hi_inc = True
288
+ for op, bound in value.items():
289
+ op = _OP_ALIASES.get(op, op) # normalise gte/lte/gt/lt
290
+ if op not in OPERATORS:
291
+ raise QuerySyntaxError(
292
+ f"Unsupported operator '{op}'. Must be one of {sorted(OPERATORS)}."
293
+ )
294
+ if op == ">=":
295
+ lo, lo_inc = bound, True
296
+ elif op == ">":
297
+ lo, lo_inc = bound, False
298
+ elif op == "<=":
299
+ hi, hi_inc = bound, True
300
+ elif op == "<":
301
+ hi, hi_inc = bound, False
302
+ return idx.range_query(lo=lo, hi=hi, lo_inclusive=lo_inc, hi_inclusive=hi_inc)
303
+
304
+ raise QuerySyntaxError(
305
+ f"Unsupported value type {type(value).__name__!r} for field '{field}'. "
306
+ "Use int/float (exact numeric), str (exact text), "
307
+ "list (keyword search), or dict (range query)."
308
+ )