shareddata 6.83.8__tar.gz → 6.83.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shareddata-6.83.8/src/shareddata.egg-info → shareddata-6.83.12}/PKG-INFO +1 -1
- {shareddata-6.83.8 → shareddata-6.83.12}/setup.py +1 -1
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/CollectionMongoDB.py +1 -1
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Logger.py +58 -26
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/WorkerPool.py +22 -13
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/SharedData.py +88 -26
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Table.py +85 -19
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableDisk.py +39 -2
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndex.py +38 -1
- {shareddata-6.83.8 → shareddata-6.83.12/src/shareddata.egg-info}/PKG-INFO +1 -1
- {shareddata-6.83.8 → shareddata-6.83.12}/LICENSE +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/MANIFEST.in +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/README.md +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/pyproject.toml +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/setup.cfg +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/ServerGunicorn.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/ServerWaitress.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/__init__.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/auth.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/constants.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/__init__.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/cache.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/collections.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/metadata.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/system.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/tables.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/timeseries.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/routes/workers.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/API/utils.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/CacheRedis.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Database.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Defaults.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/AWSEC2.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/AWSS3.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/AutoDocstrings.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/ClientAPI.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/ClientSocket.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/ClientWebSocket.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/LogHandlerAPI.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/MongoDBClient.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/SaveTables.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/ServerSocket.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/ServerWebSocket.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/StreamsCache.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/StreamsPersist.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/SyncTable.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/TunnelWebSocket.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/IO/__init__.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Metadata.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/MultiProc.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/OpenFIGI.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/BatchJob.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/Schedule.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/ScheduleMonitor.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/Scheduler.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/Worker.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/WorkerLib.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Routines/__init__.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/SharedNumpy.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/StreamKafka.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Symbol.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndexJitFunctions.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndexJitFunctionsManual.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndexJitGenerate.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndexJitHash.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TableIndexJitLoc.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TimeSeriesDisk.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/TimeseriesContainer.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Users.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/Utils.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/__init__.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/SharedData/sharedmutexwin.pyd +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/shareddata.egg-info/SOURCES.txt +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/shareddata.egg-info/dependency_links.txt +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/shareddata.egg-info/requires.txt +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/src/shareddata.egg-info/top_level.txt +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_api_collection.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_api_collection_loopback.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_api_table.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_api_table_schemaless.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_api_table_schemaless_extend.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_bson_last_pos_reuse.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_cache_redis.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_extend_rt.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_loc.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_metadata.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_read_write_tail.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_stream_loopback_async.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_timeseries.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_timeseries_api.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_upsert_unordered_get_date_loc_d1.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_upsert_unordered_get_date_loc_m1.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_upsert_unordered_get_date_loc_m15.py +0 -0
- {shareddata-6.83.8 → shareddata-6.83.12}/tests/test_upsert_unordered_positions_m1.py +0 -0
|
@@ -34,7 +34,7 @@ install_requires = [
|
|
|
34
34
|
|
|
35
35
|
setup(
|
|
36
36
|
name='shareddata',
|
|
37
|
-
version='6.83.
|
|
37
|
+
version='6.83.12',
|
|
38
38
|
description='Memory Mapped / Shared Memory Database with S3 repository',
|
|
39
39
|
long_description=open('README.md').read(),
|
|
40
40
|
long_description_content_type='text/markdown',
|
|
@@ -112,7 +112,7 @@ class CollectionMongoDB:
|
|
|
112
112
|
# Check for indexes other than the default _id index
|
|
113
113
|
self.hasindex = any(index_name == pkey_name for index_name in index_info)
|
|
114
114
|
# If index was requested but the collection was previously created without one, create it now
|
|
115
|
-
if hasindex and not self.hasindex:
|
|
115
|
+
if hasindex and not self.hasindex and self.period in ['D1', 'M15', 'M1']:
|
|
116
116
|
self.mongodb.client[self.user][self.relpath].create_index(pkey_fields, unique=True, name=pkey_name)
|
|
117
117
|
self.mongodb.client[self.user][self.relpath].create_index([("mtime", DESCENDING)])
|
|
118
118
|
self.hasindex = True
|
|
@@ -20,6 +20,36 @@ import json
|
|
|
20
20
|
import requests
|
|
21
21
|
import lz4
|
|
22
22
|
|
|
23
|
+
|
|
24
|
+
# Column layout of the semicolon-separated log files under DATABASE_FOLDER/Logs.
|
|
25
|
+
# 6 separators => 7 fields; the 7th (message) may itself contain ';'.
|
|
26
|
+
_LOG_COLUMNS = ['shardid', 'sequence_number', 'user_name',
|
|
27
|
+
'asctime', 'logger_name', 'level', 'message']
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _parse_log_lines(text):
|
|
31
|
+
"""Parse ';'-delimited log text into a DataFrame — stdlib only.
|
|
32
|
+
|
|
33
|
+
Uses str.split(';', 6) so any ';' inside the message field is preserved
|
|
34
|
+
verbatim (matching the prior `shape[1] > 7 -> merge cols 6..` behavior
|
|
35
|
+
that the pandas-based readLogs used to handle that case).
|
|
36
|
+
|
|
37
|
+
No pandas C parser is involved, so this cannot trip the pandas_parser
|
|
38
|
+
SIGSEGV that Logger.readLogs kept hitting at base_parser._infer_types.
|
|
39
|
+
Lines with fewer than 7 fields or empty asctime are dropped, matching
|
|
40
|
+
the previous on_bad_lines='skip' + dfnewlines[asctime].notna() filter.
|
|
41
|
+
"""
|
|
42
|
+
rows = []
|
|
43
|
+
append = rows.append
|
|
44
|
+
for line in text.splitlines():
|
|
45
|
+
if not line:
|
|
46
|
+
continue
|
|
47
|
+
parts = line.split(';', 6)
|
|
48
|
+
if len(parts) < 7 or not parts[3]:
|
|
49
|
+
continue
|
|
50
|
+
append(parts)
|
|
51
|
+
return pd.DataFrame(rows, columns=_LOG_COLUMNS)
|
|
52
|
+
|
|
23
53
|
from SharedData.IO.LogHandlerAPI import LogHandlerAPI
|
|
24
54
|
|
|
25
55
|
|
|
@@ -287,13 +317,11 @@ class Logger:
|
|
|
287
317
|
((pd.Timestamp.utcnow() + timedelta(days=-1)).strftime('%Y%m%d')+'.log')
|
|
288
318
|
if lastlogfilepath.is_file():
|
|
289
319
|
try:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
_dflogs.columns = ['shardid', 'sequence_number',
|
|
293
|
-
'user_name', 'asctime', 'logger_name', 'level', 'message']
|
|
320
|
+
with open(lastlogfilepath, 'r', encoding='utf-8', errors='replace') as _f:
|
|
321
|
+
_dflogs = _parse_log_lines(_f.read())
|
|
294
322
|
Logger.dflogs = pd.concat([_dflogs, Logger.dflogs], axis=0, ignore_index=True)
|
|
295
323
|
Logger.getLastLog(Logger.dflogs)
|
|
296
|
-
Logger.getStatus(Logger.dflogs)
|
|
324
|
+
Logger.getStatus(Logger.dflogs)
|
|
297
325
|
except Exception as e:
|
|
298
326
|
print(f'Error reading last day logs: {e}')
|
|
299
327
|
|
|
@@ -355,24 +383,29 @@ class Logger:
|
|
|
355
383
|
Logger.logfilepath = _logfilepath
|
|
356
384
|
|
|
357
385
|
if Logger.logfilepath.is_file():
|
|
358
|
-
|
|
359
|
-
|
|
386
|
+
|
|
387
|
+
# Binary mode so _logfileposition is a plain byte offset with
|
|
388
|
+
# well-defined seek() semantics (text-mode tell() cookies cannot
|
|
389
|
+
# be manipulated arithmetically).
|
|
390
|
+
with open(Logger.logfilepath, 'rb') as file:
|
|
360
391
|
file.seek(Logger._logfileposition)
|
|
361
|
-
|
|
362
|
-
|
|
392
|
+
raw = file.read()
|
|
393
|
+
# Only consume up to the last complete line. A trailing
|
|
394
|
+
# partial (writer still mid-flush, or a kill -9 that left a
|
|
395
|
+
# half-line behind) is left in place so the next tick picks
|
|
396
|
+
# it up once '\n' is written. This is stricter than the old
|
|
397
|
+
# pandas path, which accepted the partial and advanced past
|
|
398
|
+
# it, silently losing the remainder bytes.
|
|
399
|
+
last_nl = raw.rfind(b'\n')
|
|
400
|
+
if last_nl < 0:
|
|
363
401
|
return dfnewlines
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
dfnewlines.columns = [
|
|
373
|
-
'shardid', 'sequence_number', 'user_name', 'asctime', 'logger_name', 'level', 'message'
|
|
374
|
-
]
|
|
375
|
-
dfnewlines = dfnewlines[dfnewlines['asctime'].notna()]
|
|
402
|
+
complete_bytes = raw[:last_nl + 1]
|
|
403
|
+
text = complete_bytes.decode('utf-8', errors='replace')
|
|
404
|
+
Logger._logfileposition += len(complete_bytes)
|
|
405
|
+
# _parse_log_lines drops empty lines and rows whose asctime
|
|
406
|
+
# field is empty, folding the prior on_bad_lines='skip' +
|
|
407
|
+
# dfnewlines[asctime].notna() filter into the parse step.
|
|
408
|
+
dfnewlines = _parse_log_lines(text)
|
|
376
409
|
if dfnewlines.empty:
|
|
377
410
|
return dfnewlines
|
|
378
411
|
|
|
@@ -401,12 +434,11 @@ class Logger:
|
|
|
401
434
|
Logger._max_asctime = max(
|
|
402
435
|
max_asctime,
|
|
403
436
|
dfnewlines_sorted['asctime'].iloc[-1]
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
437
|
+
)
|
|
438
|
+
# _logfileposition was already advanced above, right after we
|
|
439
|
+
# decided which prefix of the read to consume.
|
|
408
440
|
return dfnewlines_sorted
|
|
409
|
-
|
|
441
|
+
|
|
410
442
|
return dfnewlines
|
|
411
443
|
|
|
412
444
|
@staticmethod
|
|
@@ -519,18 +519,27 @@ class WorkerPool:
|
|
|
519
519
|
time.sleep(60)
|
|
520
520
|
|
|
521
521
|
@staticmethod
|
|
522
|
-
def get_active_jobs(shdata):
|
|
523
|
-
#
|
|
522
|
+
def get_active_jobs(shdata, acquire=True):
|
|
523
|
+
# Schemaless reads must hold the table mutex: the writer updates
|
|
524
|
+
# bson bytes, bson_size and bson_ptr as separate non-atomic steps,
|
|
525
|
+
# and a lock-free reader can catch mid-update state and fail to
|
|
526
|
+
# decode BSON ("bad eoo that should never happen").
|
|
524
527
|
job_table = WorkerPool.get_job_table(shdata)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
528
|
+
try:
|
|
529
|
+
if acquire:
|
|
530
|
+
job_table.acquire()
|
|
531
|
+
buff = np.full((1,),dtype=job_table.dtype,fill_value=np.nan)
|
|
532
|
+
buff['hash'][0] = 'ACTIVE_JOBS'
|
|
533
|
+
loc = job_table.get_loc(buff, acquire=False)
|
|
534
|
+
if loc[0] != -1:
|
|
535
|
+
active_jobs = job_table.get_dict_list(job_table[loc[0]],acquire=False)[0]
|
|
536
|
+
else:
|
|
537
|
+
active_jobs = {
|
|
538
|
+
'hash': 'ACTIVE_JOBS',
|
|
539
|
+
'status': 'ACTIVE',
|
|
540
|
+
'jobs': {},
|
|
541
|
+
}
|
|
542
|
+
finally:
|
|
543
|
+
if acquire:
|
|
544
|
+
job_table.release()
|
|
536
545
|
return active_jobs
|
|
@@ -541,14 +541,34 @@ class SharedData:
|
|
|
541
541
|
names=names,formats=formats,size=100e3)
|
|
542
542
|
|
|
543
543
|
@staticmethod
|
|
544
|
-
def mutex(shm_name, pid):
|
|
544
|
+
def mutex(shm_name, pid):
|
|
545
545
|
"""
|
|
546
|
-
|
|
547
|
-
|
|
546
|
+
Allocate (or attach to) the per-table MUTEX shared-memory struct AND
|
|
547
|
+
ACQUIRE its `pid` lock. Returns once the lock is held by `pid`.
|
|
548
|
+
|
|
549
|
+
=====================================================================
|
|
550
|
+
IMPORTANT — this ACQUIRES the per-table mutex as a side effect.
|
|
551
|
+
=====================================================================
|
|
552
|
+
Called from `Table.init_schema()`; `Table.__init__` then keeps the
|
|
553
|
+
lock held for the remainder of `__init__`, releasing it only in the
|
|
554
|
+
`finally: self.release()` at the bottom. Every code path reachable
|
|
555
|
+
from `Table.__init__` — incl. `create_table()`, `load_table()`,
|
|
556
|
+
`read_header()`, `download()`, `TableDisk.malloc()`,
|
|
557
|
+
`index.initialize()`, `index.malloc_pkey()`, `create_index()` —
|
|
558
|
+
runs with this table's own mutex already held. Do NOT call
|
|
559
|
+
`acquire()` (or anything that internally re-acquires this same
|
|
560
|
+
mutex, e.g. `self.acquire()` on the same Table) from anywhere in
|
|
561
|
+
that call tree: the mutex is a non-reentrant CAS spinlock and
|
|
562
|
+
re-acquiring it self-deadlocks (the staleness check in `acquire()`
|
|
563
|
+
only force-acquires when the holder PID is *dead* — a self-
|
|
564
|
+
deadlocked caller is still alive).
|
|
565
|
+
|
|
566
|
+
Original docstring follows:
|
|
567
|
+
|
|
548
568
|
This static method attempts to create a shared memory segment with a specific name
|
|
549
569
|
appended by '#mutex' to serve as a mutex structure containing process ID, type, and load status.
|
|
550
570
|
If the shared memory segment already exists, it attaches to it instead.
|
|
551
|
-
|
|
571
|
+
|
|
552
572
|
It then acquires the mutex using the SharedData.acquire method, registers the calling process ID
|
|
553
573
|
by appending it to a CSV file located in the DATABASE_FOLDER environment path, and returns
|
|
554
574
|
the shared memory object, the mutex numpy structured array, and a flag indicating whether the
|
|
@@ -588,8 +608,43 @@ class SharedData:
|
|
|
588
608
|
@staticmethod
|
|
589
609
|
def acquire(mutex, pid, relpath):
|
|
590
610
|
"""
|
|
611
|
+
Spin-CAS acquire of a per-table MUTEX's `pid` field (1us sleep loop).
|
|
612
|
+
Atomically sets `mutex.pid = pid` once it observes `mutex.pid == 0`.
|
|
613
|
+
Staleness recovery: if the current holder PID is *dead*
|
|
614
|
+
(`psutil.pid_exists` is False), force-acquires by CASing
|
|
615
|
+
`holder_pid -> pid`.
|
|
616
|
+
|
|
617
|
+
=====================================================================
|
|
618
|
+
*** NON-REENTRANT ***
|
|
619
|
+
=====================================================================
|
|
620
|
+
This is a CAS-based spinlock with NO owner-recursion tracking. If the
|
|
621
|
+
current process already holds the mutex, calling `acquire()` again
|
|
622
|
+
SELF-DEADLOCKS: the CAS against 0 never succeeds (the field holds our
|
|
623
|
+
own pid, not 0), and the staleness check only force-acquires when the
|
|
624
|
+
holder is DEAD — which a self-deadlocked caller is not.
|
|
625
|
+
|
|
626
|
+
Note especially that `Table.__init__` holds this mutex throughout its
|
|
627
|
+
lifetime (acquired in `init_schema()` via `SharedData.mutex()`,
|
|
628
|
+
released only at `__init__`'s `finally`). Code reachable from
|
|
629
|
+
`__init__` — `load_table()`, `create_table()`, `read_header()`,
|
|
630
|
+
`TableDisk.malloc()`, `TableIndex.initialize()`, `create_index()`,
|
|
631
|
+
etc. — must NOT call `acquire()` on the table's own mutex.
|
|
632
|
+
|
|
633
|
+
=====================================================================
|
|
634
|
+
*** BRIEF-HOLD CONTRACT ***
|
|
635
|
+
=====================================================================
|
|
636
|
+
The mutex protects short shared-memory critical sections (one upsert /
|
|
637
|
+
one batch_extend / one get_loc / the `__init__` setup) and is shared
|
|
638
|
+
across Python AND C++ writers. Do NOT hold it across slow I/O — every
|
|
639
|
+
waiter spins at 1us granularity, burning CPU. On the singleton SCHEMA
|
|
640
|
+
table this is catastrophic: stalling SCHEMA stalls every process that
|
|
641
|
+
opens any table (because every Table open resolves its folder via
|
|
642
|
+
`schema.get_loc()`).
|
|
643
|
+
|
|
644
|
+
Original docstring follows:
|
|
645
|
+
|
|
591
646
|
Attempt to acquire a process-safe mutex semaphore by atomically setting its value to the current process ID (pid).
|
|
592
|
-
|
|
647
|
+
|
|
593
648
|
This method uses a compare-and-swap atomic operation to acquire the mutex. If the mutex is already held by another process, it waits in a loop, checking every microsecond. If the mutex remains held beyond certain time thresholds (1 second on the first check, 15 seconds thereafter), it verifies whether the locking process is still active. If the locking process has terminated, it forcibly acquires the mutex. The method logs a warning if waiting for the semaphore continues beyond the initial timeout.
|
|
594
649
|
|
|
595
650
|
Parameters:
|
|
@@ -601,12 +656,27 @@ class SharedData:
|
|
|
601
656
|
None
|
|
602
657
|
"""
|
|
603
658
|
tini = time.time()
|
|
659
|
+
tstart = tini # total-wait timer; unlike tini it is never reset below
|
|
660
|
+
# The SCHEMA table is the singleton every process depends on (every
|
|
661
|
+
# Table open resolves its folder via schema.get_loc()). The staleness
|
|
662
|
+
# check below only recovers a DEAD holder — an alive-but-stuck holder
|
|
663
|
+
# (e.g. a re-entrant acquire) would otherwise hang every dependent
|
|
664
|
+
# process forever. So for the SCHEMA table only, abort with an error
|
|
665
|
+
# after 30s instead of spinning indefinitely.
|
|
666
|
+
schema_timeout = 30.0 if '/SCHEMA/table/' in relpath else None
|
|
604
667
|
# semaphore is process safe
|
|
605
668
|
telapsed = 0
|
|
606
669
|
hdrptr = mutex.__array_interface__['data'][0]
|
|
607
670
|
semseek = 0
|
|
608
671
|
firstcheck = True
|
|
609
672
|
while cpp.long_compare_and_swap(hdrptr, semseek, 0, pid) == 0:
|
|
673
|
+
if (schema_timeout is not None) and (time.time() - tstart > schema_timeout):
|
|
674
|
+
lockingpid = int(mutex['pid'])
|
|
675
|
+
errmsg = ('%s SCHEMA semaphore still held by pid %d after %.0fs — '
|
|
676
|
+
'aborting acquire to avoid hanging every dependent process'
|
|
677
|
+
% (relpath, lockingpid, schema_timeout))
|
|
678
|
+
Logger.log.error(errmsg)
|
|
679
|
+
raise TimeoutError(errmsg)
|
|
610
680
|
# check if process that locked the mutex is still running
|
|
611
681
|
telapsed = time.time() - tini
|
|
612
682
|
if (telapsed > 15) | ((firstcheck) & (telapsed > 1)):
|
|
@@ -771,15 +841,11 @@ class SharedData:
|
|
|
771
841
|
|
|
772
842
|
dfcollections = self.list_collections(keyword,user=user)
|
|
773
843
|
|
|
774
|
-
|
|
775
|
-
#
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
ls
|
|
779
|
-
# merge collections
|
|
780
|
-
ls = ls.reindex(index=ls.index.union(dfcollections.index),
|
|
781
|
-
columns=ls.columns.union(dfcollections.columns))
|
|
782
|
-
ls.loc[dfcollections.index,dfcollections.columns] = dfcollections.values
|
|
844
|
+
# merge local then collections; combine_first preserves dtypes (avoids
|
|
845
|
+
# FutureWarning from assigning object/datetime values into reindexed
|
|
846
|
+
# float64 NaN columns).
|
|
847
|
+
ls = dflocal.combine_first(dfremote)
|
|
848
|
+
ls = dfcollections.combine_first(ls)
|
|
783
849
|
|
|
784
850
|
if len(ls)>0:
|
|
785
851
|
ls = ls.reindex(columns=['folder_local', 'created_local', 'last_modified_local','size_local','files_local',
|
|
@@ -990,12 +1056,10 @@ class SharedData:
|
|
|
990
1056
|
dflocal = self.list_local(keyword,user=user)
|
|
991
1057
|
Logger.log.info(f'list_local took {time.time()-ti:.2f} seconds')
|
|
992
1058
|
|
|
993
|
-
|
|
994
|
-
#
|
|
995
|
-
ls =
|
|
996
|
-
|
|
997
|
-
ls.loc[dflocal.index,dflocal.columns] = dflocal.values
|
|
998
|
-
|
|
1059
|
+
# combine_first preserves dtypes (avoids FutureWarning from assigning
|
|
1060
|
+
# object/datetime values into reindexed float64 NaN columns).
|
|
1061
|
+
ls = dflocal.combine_first(dfremote)
|
|
1062
|
+
|
|
999
1063
|
if len(ls)>0:
|
|
1000
1064
|
ls = ls.reindex(columns=['folder_local', 'created_local', 'last_modified_local','size_local','files_local',
|
|
1001
1065
|
'folder_remote', 'last_modified_remote', 'size_remote', 'files_remote','storage_class_remote',
|
|
@@ -1049,12 +1113,10 @@ class SharedData:
|
|
|
1049
1113
|
dfremote = self.list_remote(keyword, user=user)
|
|
1050
1114
|
dflocal = self.list_local(keyword, user=user)
|
|
1051
1115
|
|
|
1052
|
-
|
|
1053
|
-
#
|
|
1054
|
-
ls =
|
|
1055
|
-
|
|
1056
|
-
ls.loc[dflocal.index, dflocal.columns] = dflocal.values
|
|
1057
|
-
|
|
1116
|
+
# combine_first preserves dtypes (avoids FutureWarning from assigning
|
|
1117
|
+
# object/datetime values into reindexed float64 NaN columns).
|
|
1118
|
+
ls = dflocal.combine_first(dfremote)
|
|
1119
|
+
|
|
1058
1120
|
if len(ls) > 0:
|
|
1059
1121
|
ls = ls.reindex(columns=['folder_local', 'created_local', 'last_modified_local','size_local','files_local',
|
|
1060
1122
|
'folder_remote', 'last_modified_remote', 'size_remote', 'files_remote','storage_class_remote',
|
|
@@ -146,10 +146,20 @@ class Table:
|
|
|
146
146
|
self.index = TableIndex(self)
|
|
147
147
|
|
|
148
148
|
errmsg = ''
|
|
149
|
-
try:
|
|
150
|
-
|
|
149
|
+
try:
|
|
150
|
+
# init_schema() ACQUIRES this table's per-table mutex (via
|
|
151
|
+
# SharedData.mutex() -> SharedData.acquire()) and the lock is
|
|
152
|
+
# held until `finally: self.release()` at the bottom of this
|
|
153
|
+
# try/except. Every method called below — create_table(),
|
|
154
|
+
# load_table(), read_header(), TableDisk.malloc(),
|
|
155
|
+
# index.initialize(), index.create_index() — therefore runs
|
|
156
|
+
# WITH the mutex already held. Do NOT call self.acquire() (or
|
|
157
|
+
# anything that re-acquires this same mutex) from anywhere
|
|
158
|
+
# reachable from here: the mutex is a non-reentrant CAS
|
|
159
|
+
# spinlock and re-acquiring it self-deadlocks. See the
|
|
160
|
+
# SharedData.mutex / SharedData.acquire docstrings.
|
|
151
161
|
self.init_schema()
|
|
152
|
-
|
|
162
|
+
|
|
153
163
|
if (not self.exists) | (self.overwrite):
|
|
154
164
|
|
|
155
165
|
if not records is None:
|
|
@@ -190,8 +200,18 @@ class Table:
|
|
|
190
200
|
|
|
191
201
|
def init_schema(self):
|
|
192
202
|
"""
|
|
193
|
-
Initialize the schema and related attributes for the table instance
|
|
194
|
-
|
|
203
|
+
Initialize the schema and related attributes for the table instance,
|
|
204
|
+
AND ACQUIRE this table's per-table mutex (via `SharedData.mutex()` ->
|
|
205
|
+
`SharedData.acquire()`). The mutex is held from here until
|
|
206
|
+
`Table.__init__`'s `finally: self.release()` at the bottom — so
|
|
207
|
+
every method called from `__init__` (create_table, load_table,
|
|
208
|
+
read_header, malloc, index.initialize, ...) runs with the mutex
|
|
209
|
+
already held. It is non-reentrant; do not re-acquire it from
|
|
210
|
+
anywhere reachable from `__init__`. See `SharedData.mutex` /
|
|
211
|
+
`SharedData.acquire` docstrings for the full contract.
|
|
212
|
+
|
|
213
|
+
Original docstring follows:
|
|
214
|
+
|
|
195
215
|
This method sets up header and tail header formats and names, initializes mutexes for shared memory access,
|
|
196
216
|
determines the storage path based on user, database, period, source, and tablename, and manages schema information
|
|
197
217
|
from shareddata. It handles synchronization and consistency checks for the table's loading type, updates or inserts
|
|
@@ -412,6 +432,14 @@ class Table:
|
|
|
412
432
|
def create_table(self):
|
|
413
433
|
# create new table or overwrite existing table
|
|
414
434
|
"""
|
|
435
|
+
Called from `Table.__init__` for a new or overwrite-requested table.
|
|
436
|
+
Runs WITH this table's per-table mutex already held — `init_schema()`
|
|
437
|
+
acquired it before this is called, and it is released only in
|
|
438
|
+
`__init__`'s `finally`. Therefore TableDisk.malloc(create=True)'s
|
|
439
|
+
`shf_hdr[0] = self.hdr` write below is serialized against C++ writers
|
|
440
|
+
(which acquire the same per-table mutex for `batch_extend`/`upsert`)
|
|
441
|
+
— it is NOT a lock-free clobber. Do NOT acquire the mutex again here.
|
|
442
|
+
|
|
415
443
|
Create a new table or overwrite an existing one with the provided records.
|
|
416
444
|
|
|
417
445
|
This method performs the following steps:
|
|
@@ -733,6 +761,19 @@ class Table:
|
|
|
733
761
|
def load_table(self):
|
|
734
762
|
# open existing table
|
|
735
763
|
"""
|
|
764
|
+
Called from `Table.__init__` for an existing table. Runs WITH this
|
|
765
|
+
table's per-table mutex already held — `init_schema()` acquired it
|
|
766
|
+
before this is called, and it is released only in `__init__`'s
|
|
767
|
+
`finally`. Therefore `read_header()` reads a quiescent header (no
|
|
768
|
+
C++ writer can mutate it concurrently — they take the same mutex
|
|
769
|
+
for `batch_extend`/`upsert`), and `TableDisk.malloc(create=True)`'s
|
|
770
|
+
`shf_hdr[0] = self.hdr` writes back the SAME bytes under the SAME
|
|
771
|
+
lock — it is idempotent, NOT a clobber. Do NOT acquire the mutex
|
|
772
|
+
again here: it is non-reentrant and re-acquire self-deadlocks. See
|
|
773
|
+
`SharedData.mutex` / `SharedData.acquire` docstrings.
|
|
774
|
+
|
|
775
|
+
Original docstring follows:
|
|
776
|
+
|
|
736
777
|
Load data into the table, ensuring the local file is available and up to date.
|
|
737
778
|
|
|
738
779
|
If a local table file exists, it reads the header from the file. If the table is not loaded or the local file does not exist, it downloads the table data. Memory allocation for the table is then performed.
|
|
@@ -746,23 +787,48 @@ class Table:
|
|
|
746
787
|
with open(self.filepath, 'rb') as io_obj:
|
|
747
788
|
self.read_header(io_obj)
|
|
748
789
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
790
|
+
# NOTE: this attach/cold-init split is an OPTIMIZATION ONLY — both
|
|
791
|
+
# paths produce equivalent state and both are concurrency-safe.
|
|
792
|
+
# `Table.__init__` holds this table's per-table mutex throughout
|
|
793
|
+
# (acquired in `init_schema()`, released in `__init__`'s `finally`),
|
|
794
|
+
# and C++ writers acquire the same mutex for `batch_extend`/`upsert`,
|
|
795
|
+
# so any concurrent C++ writer is blocked while we are in here.
|
|
796
|
+
# Therefore the historical `malloc(create=True)`'s
|
|
797
|
+
# `shf_hdr[0] = self.hdr` is idempotent — it writes back exactly the
|
|
798
|
+
# bytes `read_header()` just read under the same lock — and is NOT a
|
|
799
|
+
# live-writer clobber. The ATTACH path simply skips that idempotent
|
|
800
|
+
# rewrite (and the no-op `index.initialize()` on a healthy live
|
|
801
|
+
# table). Earlier comments here claimed `malloc(create=True)` would
|
|
802
|
+
# clobber a live writer; that premise was wrong (the mutex
|
|
803
|
+
# serializes them). Do NOT call `self.acquire()` anywhere below —
|
|
804
|
+
# the mutex is non-reentrant and re-acquire self-deadlocks. See
|
|
805
|
+
# `SharedData.mutex` / `SharedData.acquire` docstrings.
|
|
806
|
+
attach_only = (self.exists_local
|
|
807
|
+
and (self.mutex['isloaded'] == 1)
|
|
808
|
+
and (not self.header_changed))
|
|
809
|
+
if attach_only:
|
|
810
|
+
# ATTACH: skip the idempotent header rewrite (and index
|
|
811
|
+
# re-initialize) for healthy already-loaded tables.
|
|
812
|
+
self.attach()
|
|
813
|
+
if self.hasindex:
|
|
814
|
+
self.index.attach()
|
|
815
|
+
else:
|
|
816
|
+
if ((self.mutex['isloaded']==0) | (not self.exists_local)):
|
|
817
|
+
self.download()
|
|
818
|
+
self.malloc(create=True)
|
|
753
819
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
loc = self.records.get_loc(self.records[0:1], acquire=False)
|
|
759
|
-
if loc[0]!=0:
|
|
760
|
-
# index is not coherent
|
|
761
|
-
self.hdr['isidxcreated']=0
|
|
762
|
-
self.index.initialize()
|
|
820
|
+
if self.hasindex:
|
|
821
|
+
self.index.initialize()
|
|
822
|
+
if self.records.count>0:
|
|
823
|
+
# check if index is coherent
|
|
763
824
|
loc = self.records.get_loc(self.records[0:1], acquire=False)
|
|
764
825
|
if loc[0]!=0:
|
|
765
|
-
|
|
826
|
+
# index is not coherent
|
|
827
|
+
self.hdr['isidxcreated']=0
|
|
828
|
+
self.index.initialize()
|
|
829
|
+
loc = self.records.get_loc(self.records[0:1], acquire=False)
|
|
830
|
+
if loc[0]!=0:
|
|
831
|
+
raise Exception('Cannot create index!')
|
|
766
832
|
|
|
767
833
|
def read_header(self, io_obj):
|
|
768
834
|
"""
|
|
@@ -109,8 +109,45 @@ class TableDisk(Table):
|
|
|
109
109
|
offset = self.hdr.dtype.itemsize
|
|
110
110
|
self.shf_data = np.memmap(self.filepath,self.recdtype,'r+',offset,(self.hdr['recordssize'],))
|
|
111
111
|
self.records = SharedNumpy('DISK', self.shf_data)
|
|
112
|
-
self.records.table = self
|
|
113
|
-
|
|
112
|
+
self.records.table = self
|
|
113
|
+
|
|
114
|
+
############### ATTACH ###############
|
|
115
|
+
def attach(self):
|
|
116
|
+
# map an already-initialized table WITHOUT writing the shared header
|
|
117
|
+
"""
|
|
118
|
+
Memory-map a table that another process already has loaded (mutex
|
|
119
|
+
isloaded==1) and may be actively writing.
|
|
120
|
+
|
|
121
|
+
Unlike malloc(create=True), this NEVER writes the shared header.
|
|
122
|
+
malloc does `self.shf_hdr[0] = self.hdr` (a whole-struct write of this
|
|
123
|
+
process's stale read_header() copy) plus `hdr['recordssize'] =
|
|
124
|
+
int(self.size)`; running that against a live table reverts the
|
|
125
|
+
`count`/`recordssize` the writing process just advanced — with no
|
|
126
|
+
mutex held — which makes the writer overwrite already-published rows
|
|
127
|
+
and lock-free readers see torn/garbage records. attach() only reads
|
|
128
|
+
the header and maps the file.
|
|
129
|
+
"""
|
|
130
|
+
# memory map header — read the live header, never write it
|
|
131
|
+
self.shf_hdr = np.memmap(self.filepath, self.hdrdtype, 'r+', 0, (1,))
|
|
132
|
+
self.hdr = self.shf_hdr[0]
|
|
133
|
+
# adopt the live recordssize, not this process's stale read_header copy
|
|
134
|
+
self.size = int(self.hdr['recordssize'])
|
|
135
|
+
|
|
136
|
+
# the file must actually back header + recordssize rows before mapping
|
|
137
|
+
# data — guards against attaching a truncated / partially written file
|
|
138
|
+
offset = self.hdr.dtype.itemsize
|
|
139
|
+
expected_bytes = offset + self.recdtype.itemsize * int(self.hdr['recordssize'])
|
|
140
|
+
actual_bytes = os.path.getsize(self.filepath)
|
|
141
|
+
if actual_bytes < expected_bytes:
|
|
142
|
+
raise Exception('attach %s: file too small (%d < %d), table not coherent'
|
|
143
|
+
% (self.relpath, actual_bytes, expected_bytes))
|
|
144
|
+
|
|
145
|
+
# memory map data
|
|
146
|
+
self.shf_data = np.memmap(self.filepath, self.recdtype, 'r+', offset,
|
|
147
|
+
(int(self.hdr['recordssize']),))
|
|
148
|
+
self.records = SharedNumpy('DISK', self.shf_data)
|
|
149
|
+
self.records.table = self
|
|
150
|
+
|
|
114
151
|
############### FREE ###############
|
|
115
152
|
def free(self, acquire=True):
|
|
116
153
|
"""
|
|
@@ -112,7 +112,44 @@ class TableIndex:
|
|
|
112
112
|
except Exception as e:
|
|
113
113
|
errmsg = 'Failed to intialize index for %s!\n%s' % (self.table.relpath, str(e))
|
|
114
114
|
self.initialized = False
|
|
115
|
-
finally:
|
|
115
|
+
finally:
|
|
116
|
+
if not self.initialized:
|
|
117
|
+
Logger.log.error(errmsg)
|
|
118
|
+
raise Exception(errmsg)
|
|
119
|
+
|
|
120
|
+
def attach(self):
|
|
121
|
+
"""
|
|
122
|
+
Attach to the already-built index of a table that another process has
|
|
123
|
+
loaded (mutex isloaded==1) and may be actively writing.
|
|
124
|
+
|
|
125
|
+
Unlike initialize(), attach() NEVER mutates shared state: it does not
|
|
126
|
+
call malloc()/malloc_pkey() (whose resetfile path truncates+recreates
|
|
127
|
+
the index file, zeroes it with pkey[:]=-1 and flips isidxcreated),
|
|
128
|
+
never runs create_index(), and never writes isidxcreated/hasindex on
|
|
129
|
+
the shared header. It derives the index file paths and rememmap()s the
|
|
130
|
+
existing files; rememmap() raises (rather than clobbers) on a size
|
|
131
|
+
mismatch, which is the correct, safe behaviour against a live table.
|
|
132
|
+
"""
|
|
133
|
+
errmsg = ''
|
|
134
|
+
try:
|
|
135
|
+
self.get_functions()
|
|
136
|
+
# derive the index file paths exactly as malloc_pkey /
|
|
137
|
+
# malloc_dateidx / malloc_symbolidx / malloc_portfolioidx do
|
|
138
|
+
self.pkeypath = str(self.table.filepath).replace('data.bin', 'pkey.bin')
|
|
139
|
+
if 'date' in self.pkeystr:
|
|
140
|
+
self.dateidxpath = str(self.table.filepath).replace('data.bin', 'dateidx.bin')
|
|
141
|
+
if ('symbol' in self.pkeystr) & (len(self.pkeycolumns) > 1):
|
|
142
|
+
self.symbolidxpath = str(self.table.filepath).replace('data.bin', 'symbolidx.bin')
|
|
143
|
+
if ('portfolio' in self.pkeystr) | ('tag' in self.pkeystr):
|
|
144
|
+
self.portidxpath = str(self.table.filepath).replace('data.bin', 'portidx.bin')
|
|
145
|
+
# map the existing index files read-only; rememmap() raises on a
|
|
146
|
+
# size mismatch instead of resetting/rebuilding the live index
|
|
147
|
+
self.rememmap()
|
|
148
|
+
self.initialized = True
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.initialized = False
|
|
151
|
+
errmsg = 'Failed to attach index for %s!\n%s' % (self.table.relpath, str(e))
|
|
152
|
+
finally:
|
|
116
153
|
if not self.initialized:
|
|
117
154
|
Logger.log.error(errmsg)
|
|
118
155
|
raise Exception(errmsg)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|