apexbase 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apexbase/__init__.py +330 -0
- apexbase/limited_dict.py +84 -0
- apexbase/query.py +402 -0
- apexbase/storage.py +1277 -0
- apexbase-0.0.1.dist-info/LICENSE +201 -0
- apexbase-0.0.1.dist-info/METADATA +160 -0
- apexbase-0.0.1.dist-info/RECORD +9 -0
- apexbase-0.0.1.dist-info/WHEEL +5 -0
- apexbase-0.0.1.dist-info/top_level.txt +1 -0
apexbase/__init__.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
from typing import List, Dict, Union, Optional
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .storage import Storage
|
|
5
|
+
from .query import Query, ResultView
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
version = "0.0.1"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ApexClient:
|
|
12
|
+
def __init__(self, dirpath=None, batch_size: int = 1000, drop_if_exists: bool = False):
|
|
13
|
+
"""
|
|
14
|
+
Initializes a new instance of the ApexClient class.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
dirpath: str
|
|
18
|
+
The directory path for storing data. If None, the current directory is used.
|
|
19
|
+
batch_size: int
|
|
20
|
+
The size of batch operations.
|
|
21
|
+
drop_if_exists: bool
|
|
22
|
+
If True, the database file will be deleted if it already exists.
|
|
23
|
+
"""
|
|
24
|
+
if dirpath is None:
|
|
25
|
+
dirpath = "."
|
|
26
|
+
|
|
27
|
+
self.dirpath = Path(dirpath)
|
|
28
|
+
self.dirpath.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
self.db_path = self.dirpath / "apexbase.db"
|
|
31
|
+
|
|
32
|
+
if drop_if_exists and self.db_path.exists():
|
|
33
|
+
self.db_path.unlink()
|
|
34
|
+
|
|
35
|
+
self.storage = Storage(str(self.db_path), batch_size=batch_size)
|
|
36
|
+
self.query_handler = Query(self.storage)
|
|
37
|
+
self.current_table = "default" # Default table name
|
|
38
|
+
|
|
39
|
+
def use_table(self, table_name: str):
|
|
40
|
+
"""
|
|
41
|
+
Switches the current table for operations.
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
table_name: str
|
|
45
|
+
The name of the table to switch to.
|
|
46
|
+
"""
|
|
47
|
+
self.current_table = table_name
|
|
48
|
+
self.storage.use_table(table_name)
|
|
49
|
+
|
|
50
|
+
def create_table(self, table_name: str):
|
|
51
|
+
"""
|
|
52
|
+
Creates a new table.
|
|
53
|
+
|
|
54
|
+
Parameters:
|
|
55
|
+
table_name: str
|
|
56
|
+
The name of the table to create.
|
|
57
|
+
"""
|
|
58
|
+
self.storage.create_table(table_name)
|
|
59
|
+
|
|
60
|
+
def drop_table(self, table_name: str):
|
|
61
|
+
"""
|
|
62
|
+
Drops a table.
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
table_name: str
|
|
66
|
+
The name of the table to drop.
|
|
67
|
+
"""
|
|
68
|
+
self.storage.drop_table(table_name)
|
|
69
|
+
# If the table being dropped is the current table, switch to the default table
|
|
70
|
+
if self.current_table == table_name:
|
|
71
|
+
self.current_table = "default"
|
|
72
|
+
|
|
73
|
+
def list_tables(self) -> List[str]:
|
|
74
|
+
"""
|
|
75
|
+
Lists all tables.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
List[str]: A list of table names
|
|
79
|
+
"""
|
|
80
|
+
return self.storage.list_tables()
|
|
81
|
+
|
|
82
|
+
def store(self, data: Union[dict, List[dict]]) -> Union[int, List[int]]:
|
|
83
|
+
"""
|
|
84
|
+
Stores one or more records.
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
data: Union[dict, List[dict]]
|
|
88
|
+
The records to store, either as a single dictionary or a list of dictionaries.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Union[int, List[int]]: The record ID or ID list
|
|
92
|
+
"""
|
|
93
|
+
if isinstance(data, dict):
|
|
94
|
+
# Single record
|
|
95
|
+
return self.storage.store(data)
|
|
96
|
+
elif isinstance(data, list):
|
|
97
|
+
# Multiple records
|
|
98
|
+
return self.storage.batch_store(data)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError("Data must be a dict or a list of dicts")
|
|
101
|
+
|
|
102
|
+
def query(self, query_filter: str = None) -> ResultView:
|
|
103
|
+
"""
|
|
104
|
+
Queries records using SQL syntax.
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
query_filter: str
|
|
108
|
+
SQL filter conditions. For example:
|
|
109
|
+
- age > 30
|
|
110
|
+
- name LIKE 'John%'
|
|
111
|
+
- age > 30 AND city = 'New York'
|
|
112
|
+
- field IN (1, 2, 3)
|
|
113
|
+
- ORDER BY, GROUP BY, HAVING are not supported
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
ResultView: A view of query results, supporting deferred execution
|
|
117
|
+
"""
|
|
118
|
+
return self.query_handler.query(query_filter)
|
|
119
|
+
|
|
120
|
+
def search_text(self, text: str, fields: List[str] = None) -> ResultView:
|
|
121
|
+
"""
|
|
122
|
+
Full-text search.
|
|
123
|
+
|
|
124
|
+
Parameters:
|
|
125
|
+
text: str
|
|
126
|
+
The text to search
|
|
127
|
+
fields: List[str]
|
|
128
|
+
The fields to search, if None, all searchable fields are searched
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
ResultView: A view of search results, supporting deferred execution
|
|
132
|
+
"""
|
|
133
|
+
return self.query_handler.search_text(text, fields)
|
|
134
|
+
|
|
135
|
+
def retrieve(self, id_: int) -> Optional[dict]:
|
|
136
|
+
"""
|
|
137
|
+
Retrieves a single record.
|
|
138
|
+
|
|
139
|
+
Parameters:
|
|
140
|
+
id_: int
|
|
141
|
+
The record ID
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Optional[dict]: The record data, or None if it doesn't exist
|
|
145
|
+
"""
|
|
146
|
+
return self.query_handler.retrieve(id_)
|
|
147
|
+
|
|
148
|
+
def retrieve_many(self, ids: List[int]) -> List[dict]:
|
|
149
|
+
"""
|
|
150
|
+
Retrieves multiple records.
|
|
151
|
+
|
|
152
|
+
Parameters:
|
|
153
|
+
ids: List[int]
|
|
154
|
+
The list of record IDs
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
List[dict]: The list of record data
|
|
158
|
+
"""
|
|
159
|
+
return self.query_handler.retrieve_many(ids)
|
|
160
|
+
|
|
161
|
+
def retrieve_all(self) -> ResultView:
|
|
162
|
+
return self.query_handler.retrieve_all()
|
|
163
|
+
|
|
164
|
+
def list_fields(self):
|
|
165
|
+
"""
|
|
166
|
+
List the fields in the cache.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List[str]: List of fields.
|
|
170
|
+
"""
|
|
171
|
+
return list(self.storage.list_fields().keys())
|
|
172
|
+
|
|
173
|
+
def delete(self, ids: Union[int, List[int]]) -> bool:
|
|
174
|
+
"""
|
|
175
|
+
Deletes a single record.
|
|
176
|
+
|
|
177
|
+
Parameters:
|
|
178
|
+
ids: Union[int, List[int]]
|
|
179
|
+
The record ID or list of record IDs to delete
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
bool: Whether the deletion was successful
|
|
183
|
+
"""
|
|
184
|
+
if isinstance(ids, int):
|
|
185
|
+
return self.storage.delete(ids)
|
|
186
|
+
elif isinstance(ids, list):
|
|
187
|
+
return self.storage.batch_delete(ids)
|
|
188
|
+
else:
|
|
189
|
+
raise ValueError("ids must be an int or a list of ints")
|
|
190
|
+
|
|
191
|
+
def replace(self, id_: int, data: dict) -> bool:
|
|
192
|
+
"""
|
|
193
|
+
Replaces a single record.
|
|
194
|
+
|
|
195
|
+
Parameters:
|
|
196
|
+
id_: int
|
|
197
|
+
The record ID to replace
|
|
198
|
+
data: dict
|
|
199
|
+
The new record data
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
bool: Whether the replacement was successful
|
|
203
|
+
"""
|
|
204
|
+
return self.storage.replace(id_, data)
|
|
205
|
+
|
|
206
|
+
def batch_replace(self, data_dict: Dict[int, dict]) -> List[int]:
|
|
207
|
+
"""
|
|
208
|
+
Replaces multiple records.
|
|
209
|
+
|
|
210
|
+
Parameters:
|
|
211
|
+
data_dict: Dict[int, dict]
|
|
212
|
+
The dictionary of records to replace, with keys as record IDs and values as new record data
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List[int]: The list of successfully replaced record IDs
|
|
216
|
+
"""
|
|
217
|
+
return self.storage.batch_replace(data_dict)
|
|
218
|
+
|
|
219
|
+
def from_pandas(self, df) -> 'ApexClient':
|
|
220
|
+
"""
|
|
221
|
+
Imports data from a Pandas DataFrame.
|
|
222
|
+
|
|
223
|
+
Parameters:
|
|
224
|
+
df: pandas.DataFrame
|
|
225
|
+
The input DataFrame
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
ApexClient: self, for chaining
|
|
229
|
+
"""
|
|
230
|
+
records = df.to_dict('records')
|
|
231
|
+
self.store(records)
|
|
232
|
+
return self
|
|
233
|
+
|
|
234
|
+
def from_pyarrow(self, table) -> 'ApexClient':
|
|
235
|
+
"""
|
|
236
|
+
Imports data from a PyArrow Table.
|
|
237
|
+
|
|
238
|
+
Parameters:
|
|
239
|
+
table: pyarrow.Table
|
|
240
|
+
The input PyArrow Table
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
ApexClient: self
|
|
244
|
+
"""
|
|
245
|
+
records = table.to_pylist()
|
|
246
|
+
self.store(records)
|
|
247
|
+
return self
|
|
248
|
+
|
|
249
|
+
def from_polars(self, df) -> 'ApexClient':
|
|
250
|
+
"""
|
|
251
|
+
Imports data from a Polars DataFrame.
|
|
252
|
+
|
|
253
|
+
Parameters:
|
|
254
|
+
df: polars.DataFrame
|
|
255
|
+
The input Polars DataFrame
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
ApexClient: self
|
|
259
|
+
"""
|
|
260
|
+
records = df.to_dicts()
|
|
261
|
+
self.store(records)
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
def set_searchable(self, field_name: str, is_searchable: bool = True):
|
|
265
|
+
"""
|
|
266
|
+
Sets whether a field is searchable.
|
|
267
|
+
|
|
268
|
+
Parameters:
|
|
269
|
+
field_name: str
|
|
270
|
+
The field name
|
|
271
|
+
is_searchable: bool
|
|
272
|
+
Whether the field is searchable
|
|
273
|
+
"""
|
|
274
|
+
self.storage.set_searchable(field_name, is_searchable)
|
|
275
|
+
|
|
276
|
+
def rebuild_search_index(self):
|
|
277
|
+
"""
|
|
278
|
+
Rebuilds the full-text search index.
|
|
279
|
+
"""
|
|
280
|
+
self.storage.rebuild_fts_index()
|
|
281
|
+
|
|
282
|
+
def optimize(self):
|
|
283
|
+
"""
|
|
284
|
+
Optimizes the database performance.
|
|
285
|
+
"""
|
|
286
|
+
self.storage.optimize()
|
|
287
|
+
|
|
288
|
+
def set_auto_update_fts(self, enabled: bool):
|
|
289
|
+
"""
|
|
290
|
+
Sets whether to automatically update the full-text search index.
|
|
291
|
+
Defaults to False, to improve batch write performance.
|
|
292
|
+
If auto-update is disabled, you need to manually call rebuild_fts_index to update the index.
|
|
293
|
+
|
|
294
|
+
Parameters:
|
|
295
|
+
enabled: bool
|
|
296
|
+
Whether to enable auto-update
|
|
297
|
+
"""
|
|
298
|
+
self.storage.set_auto_update_fts(enabled)
|
|
299
|
+
|
|
300
|
+
def rebuild_fts_index(self):
|
|
301
|
+
"""
|
|
302
|
+
Rebuilds the full-text search index for the current table.
|
|
303
|
+
Call this method after batch writes to update the index.
|
|
304
|
+
"""
|
|
305
|
+
self.storage.rebuild_fts_index()
|
|
306
|
+
|
|
307
|
+
def count_rows(self, table_name: str = None):
|
|
308
|
+
"""
|
|
309
|
+
Returns the number of rows in a specified table or the current table.
|
|
310
|
+
|
|
311
|
+
Parameters:
|
|
312
|
+
table_name: str
|
|
313
|
+
The table name, or None to use the current table
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
int: The number of rows in the table
|
|
317
|
+
"""
|
|
318
|
+
return self.storage.count_rows(table_name)
|
|
319
|
+
|
|
320
|
+
def close(self):
|
|
321
|
+
"""
|
|
322
|
+
Close the database connection.
|
|
323
|
+
"""
|
|
324
|
+
self.storage.close()
|
|
325
|
+
|
|
326
|
+
def __del__(self):
|
|
327
|
+
"""
|
|
328
|
+
Destructor to ensure the database connection is closed.
|
|
329
|
+
"""
|
|
330
|
+
self.close()
|
apexbase/limited_dict.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from threading import RLock
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LimitedDict:
|
|
8
|
+
def __init__(self, max_size):
|
|
9
|
+
if not isinstance(max_size, int):
|
|
10
|
+
raise ValueError('max_size must be an integer')
|
|
11
|
+
if max_size == 0:
|
|
12
|
+
raise ValueError('max_size cannot be 0')
|
|
13
|
+
if max_size == -1:
|
|
14
|
+
self.max_size = np.inf
|
|
15
|
+
elif max_size < 0:
|
|
16
|
+
raise ValueError('max_size must be a positive integer or -1')
|
|
17
|
+
else:
|
|
18
|
+
self.max_size = max_size
|
|
19
|
+
|
|
20
|
+
self.cache = OrderedDict()
|
|
21
|
+
self.lock = RLock()
|
|
22
|
+
|
|
23
|
+
def __setitem__(self, key, value):
|
|
24
|
+
with self.lock:
|
|
25
|
+
if key in self.cache:
|
|
26
|
+
del self.cache[key]
|
|
27
|
+
self.cache[key] = value
|
|
28
|
+
|
|
29
|
+
if self.cache is not None and len(self.cache) > self.max_size:
|
|
30
|
+
self.cache.popitem(last=False)
|
|
31
|
+
|
|
32
|
+
def __getitem__(self, key):
|
|
33
|
+
with self.lock:
|
|
34
|
+
if key in self.cache:
|
|
35
|
+
value = self.cache.pop(key)
|
|
36
|
+
self.cache[key] = value # Move to end (most recently used)
|
|
37
|
+
return value
|
|
38
|
+
raise KeyError('Key not found')
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def is_reached_max_size(self):
|
|
42
|
+
if self.max_size == 0:
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
with self.lock:
|
|
46
|
+
return len(self.cache) == self.max_size
|
|
47
|
+
|
|
48
|
+
def get(self, key, default=None):
|
|
49
|
+
if self.max_size == 0:
|
|
50
|
+
return default
|
|
51
|
+
|
|
52
|
+
if key not in self.cache:
|
|
53
|
+
return default
|
|
54
|
+
return self.__getitem__(key)
|
|
55
|
+
|
|
56
|
+
def clear(self):
|
|
57
|
+
with self.lock:
|
|
58
|
+
self.cache.clear()
|
|
59
|
+
|
|
60
|
+
def keys(self):
|
|
61
|
+
if self.max_size == 0:
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
with self.lock:
|
|
65
|
+
return self.cache.keys()
|
|
66
|
+
|
|
67
|
+
def pop(self, key, default=None):
|
|
68
|
+
if self.max_size == 0:
|
|
69
|
+
return default
|
|
70
|
+
|
|
71
|
+
with self.lock:
|
|
72
|
+
return self.cache.pop(key, default)
|
|
73
|
+
|
|
74
|
+
def __contains__(self, key):
|
|
75
|
+
with self.lock:
|
|
76
|
+
return key in self.cache
|
|
77
|
+
|
|
78
|
+
def __len__(self):
|
|
79
|
+
with self.lock:
|
|
80
|
+
return len(self.cache)
|
|
81
|
+
|
|
82
|
+
def __repr__(self):
|
|
83
|
+
with self.lock:
|
|
84
|
+
return repr(self.cache)
|