shareddata 6.83.4__tar.gz → 6.83.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shareddata-6.83.4/src/shareddata.egg-info → shareddata-6.83.7}/PKG-INFO +1 -1
- {shareddata-6.83.4 → shareddata-6.83.7}/setup.py +1 -1
- shareddata-6.83.7/src/SharedData/CacheRedis.py +612 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/CollectionMongoDB.py +1 -1
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Logger.py +1 -1
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Worker.py +13 -1
- {shareddata-6.83.4 → shareddata-6.83.7/src/shareddata.egg-info}/PKG-INFO +1 -1
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_cache_redis.py +33 -46
- shareddata-6.83.4/src/SharedData/CacheRedis.py +0 -715
- {shareddata-6.83.4 → shareddata-6.83.7}/LICENSE +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/MANIFEST.in +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/README.md +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/pyproject.toml +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/setup.cfg +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/ServerGunicorn.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/ServerWaitress.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/__init__.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/auth.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/constants.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/__init__.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/cache.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/collections.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/metadata.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/system.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/tables.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/timeseries.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/workers.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/utils.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Database.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Defaults.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AWSEC2.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AWSS3.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AutoDocstrings.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientAPI.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientSocket.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientWebSocket.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/LogHandlerAPI.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/MongoDBClient.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/SaveTables.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ServerSocket.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ServerWebSocket.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/StreamsCache.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/StreamsPersist.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/SyncTable.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/TunnelWebSocket.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/__init__.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Metadata.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/MultiProc.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/OpenFIGI.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/BatchJob.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Schedule.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/ScheduleMonitor.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Scheduler.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/WorkerLib.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/WorkerPool.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/__init__.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/SharedData.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/SharedNumpy.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/StreamKafka.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Symbol.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Table.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableDisk.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndex.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitFunctions.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitFunctionsManual.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitGenerate.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitHash.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitLoc.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TimeSeriesDisk.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TimeseriesContainer.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Users.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Utils.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/__init__.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/sharedmutexwin.pyd +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/SOURCES.txt +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/dependency_links.txt +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/requires.txt +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/top_level.txt +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_collection.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_collection_loopback.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table_schemaless.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table_schemaless_extend.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_bson_last_pos_reuse.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_extend_rt.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_loc.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_metadata.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_read_write_tail.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_stream_loopback_async.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_timeseries.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_timeseries_api.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_d1.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_m1.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_m15.py +0 -0
- {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_positions_m1.py +0 -0
|
@@ -34,7 +34,7 @@ install_requires = [
|
|
|
34
34
|
|
|
35
35
|
setup(
|
|
36
36
|
name='shareddata',
|
|
37
|
-
version='6.83.
|
|
37
|
+
version='6.83.7',
|
|
38
38
|
description='Memory Mapped / Shared Memory Database with S3 repository',
|
|
39
39
|
long_description=open('README.md').read(),
|
|
40
40
|
long_description_content_type='text/markdown',
|
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Redis-backed last-state cache for Kafka stream consumers.
|
|
3
|
+
|
|
4
|
+
Key layout (all share hash-tag '{path}' → same cluster slot):
|
|
5
|
+
{path}:<pkey> BSON-encoded dict, one per logical entity
|
|
6
|
+
{path}#pkeys SET of all pkeys (enumeration index)
|
|
7
|
+
{path}#<field> header / counter scalars (CacheHeader)
|
|
8
|
+
|
|
9
|
+
Design:
|
|
10
|
+
- At-least-once durability: Kafka path calls apply_batch(), awaits the
|
|
11
|
+
Redis ack, then commits the offset. No data lost on crash.
|
|
12
|
+
- Atomic read-modify-write: CAS via Lua script; SET + SADD + INCR happen
|
|
13
|
+
in one server-side operation so the #pkeys index can never drift from
|
|
14
|
+
the actual keyspace on a successful write.
|
|
15
|
+
- Cluster-safe: data, pkey-set and header fields all share the '{path}'
|
|
16
|
+
hash-tag, so multi-key Lua and SMEMBERS stay slot-local.
|
|
17
|
+
- BSON encoding (handles datetime natively; same format as the Kafka
|
|
18
|
+
pipeline).
|
|
19
|
+
|
|
20
|
+
Enumeration:
|
|
21
|
+
- list_keys() → SMEMBERS {path}#pkeys (fast; one round-trip)
|
|
22
|
+
- update_keys() → SCAN the data prefix and rebuild the SET (reconciliation
|
|
23
|
+
tool for the rare TTL-expiry drift case; call manually if needed)
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import asyncio
|
|
28
|
+
import os
|
|
29
|
+
import random
|
|
30
|
+
from collections.abc import Iterable, Iterator, Mapping
|
|
31
|
+
from fnmatch import fnmatch
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
import bson
|
|
35
|
+
from redis import Redis
|
|
36
|
+
from redis.cluster import ClusterNode, RedisCluster
|
|
37
|
+
from redis.asyncio import Redis as RedisAsync
|
|
38
|
+
from redis.asyncio.cluster import ClusterNode as ClusterNodeAsync
|
|
39
|
+
from redis.asyncio.cluster import RedisCluster as RedisClusterAsync
|
|
40
|
+
|
|
41
|
+
from SharedData.Database import DATABASE_PKEYS
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Lua: atomic CAS + SADD pkey-set + INCR counter
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# KEYS[1] = {path}:<pkey> data
|
|
48
|
+
# KEYS[2] = {path}#pkeys pkey SET
|
|
49
|
+
# KEYS[3] = {path}#cache->counter counter
|
|
50
|
+
# ARGV[1] = expected previous bytes ('' if caller expects key to be absent)
|
|
51
|
+
# ARGV[2] = new bytes (BSON-encoded merged dict)
|
|
52
|
+
# ARGV[3] = pkey string (for SADD)
|
|
53
|
+
# ARGV[4] = TTL seconds (0 = no TTL)
|
|
54
|
+
# Returns: 1 on success, 0 on CAS conflict (caller retries).
|
|
55
|
+
_CAS_LUA = r"""
|
|
56
|
+
local current = redis.call('GET', KEYS[1])
|
|
57
|
+
if current == false then current = '' end
|
|
58
|
+
if current ~= ARGV[1] then
|
|
59
|
+
return 0
|
|
60
|
+
end
|
|
61
|
+
local ttl = tonumber(ARGV[4])
|
|
62
|
+
if ttl and ttl > 0 then
|
|
63
|
+
redis.call('SET', KEYS[1], ARGV[2], 'EX', ttl)
|
|
64
|
+
else
|
|
65
|
+
redis.call('SET', KEYS[1], ARGV[2])
|
|
66
|
+
end
|
|
67
|
+
redis.call('SADD', KEYS[2], ARGV[3])
|
|
68
|
+
redis.call('INCR', KEYS[3])
|
|
69
|
+
return 1
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
_COUNTER_FIELD = 'cache->counter'
|
|
74
|
+
# Production Kafka path: 1 writer per pkey (partition ownership) → CAS always
|
|
75
|
+
# succeeds on attempt 1. Retries matter only when a strategy/script writes to
|
|
76
|
+
# the same pkey concurrently with the consumer.
|
|
77
|
+
_MAX_CAS_RETRIES = 32
|
|
78
|
+
_CAS_BACKOFF_BASE_MS = 0.5
|
|
79
|
+
_CAS_BACKOFF_MAX_MS = 50.0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _deep_merge(target: dict, source: Mapping) -> dict:
|
|
83
|
+
"""Recursively merge source into target. Mutates target; returns it."""
|
|
84
|
+
for k, v in source.items():
|
|
85
|
+
if isinstance(v, Mapping) and isinstance(target.get(k), dict):
|
|
86
|
+
_deep_merge(target[k], v)
|
|
87
|
+
else:
|
|
88
|
+
target[k] = v
|
|
89
|
+
return target
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _check_pkey(pkey: str) -> None:
|
|
93
|
+
if not isinstance(pkey, str):
|
|
94
|
+
raise TypeError('pkey must be a string')
|
|
95
|
+
if not pkey:
|
|
96
|
+
raise ValueError('pkey must be non-empty')
|
|
97
|
+
if '#' in pkey or ':' in pkey:
|
|
98
|
+
raise ValueError(f'pkey cannot contain # or : (got {pkey!r})')
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _parse_cluster_nodes() -> list[tuple[str, int]]:
|
|
102
|
+
raw = os.environ.get('REDIS_CLUSTER_NODES')
|
|
103
|
+
if not raw:
|
|
104
|
+
raise RuntimeError('REDIS_CLUSTER_NODES not defined')
|
|
105
|
+
nodes = []
|
|
106
|
+
for part in raw.split(','):
|
|
107
|
+
host, port = part.strip().split(':')
|
|
108
|
+
nodes.append((host.strip(), int(port)))
|
|
109
|
+
return nodes
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _build_sync_client():
|
|
113
|
+
nodes = _parse_cluster_nodes()
|
|
114
|
+
if len(nodes) > 1:
|
|
115
|
+
return RedisCluster(
|
|
116
|
+
startup_nodes=[ClusterNode(h, p) for h, p in nodes],
|
|
117
|
+
decode_responses=False,
|
|
118
|
+
)
|
|
119
|
+
host, port = nodes[0]
|
|
120
|
+
return Redis(host=host, port=port, decode_responses=False)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _build_async_client():
|
|
124
|
+
nodes = _parse_cluster_nodes()
|
|
125
|
+
if len(nodes) > 1:
|
|
126
|
+
return RedisClusterAsync(
|
|
127
|
+
startup_nodes=[ClusterNodeAsync(h, p) for h, p in nodes],
|
|
128
|
+
decode_responses=False,
|
|
129
|
+
)
|
|
130
|
+
host, port = nodes[0]
|
|
131
|
+
return RedisAsync(host=host, port=port, decode_responses=False)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class CacheRedis:
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
database: str,
|
|
138
|
+
period: str,
|
|
139
|
+
source: str,
|
|
140
|
+
tablename: str,
|
|
141
|
+
user: str = 'master',
|
|
142
|
+
ttl_seconds: int = 0,
|
|
143
|
+
pkey_columns: list[str] | None = None,
|
|
144
|
+
):
|
|
145
|
+
if database not in DATABASE_PKEYS:
|
|
146
|
+
raise ValueError(f'unknown database {database!r}')
|
|
147
|
+
self.database = database
|
|
148
|
+
self.period = period
|
|
149
|
+
self.source = source
|
|
150
|
+
self.tablename = tablename
|
|
151
|
+
self.user = user
|
|
152
|
+
self.ttl_seconds = int(ttl_seconds)
|
|
153
|
+
|
|
154
|
+
self.path = f'{user}/{database}/{period}/{source}/cache/{tablename}'
|
|
155
|
+
self._tag = '{' + self.path + '}'
|
|
156
|
+
|
|
157
|
+
# 6.80.7 pkey derivation: keep only the entity-identifier columns.
|
|
158
|
+
# Override via pkey_columns if a caller wants the full DATABASE_PKEYS.
|
|
159
|
+
if pkey_columns is None:
|
|
160
|
+
pkey_columns = [
|
|
161
|
+
c for c in DATABASE_PKEYS[database]
|
|
162
|
+
if c in ('symbol', 'portfolio', 'tag')
|
|
163
|
+
]
|
|
164
|
+
if not pkey_columns:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f'no entity pkey columns for database {database!r}; '
|
|
167
|
+
f'pass pkey_columns explicitly'
|
|
168
|
+
)
|
|
169
|
+
self.pkey_columns = list(pkey_columns)
|
|
170
|
+
self.pkeycolumns = self.pkey_columns # legacy alias
|
|
171
|
+
|
|
172
|
+
self.set_pkeys = f'{self._tag}#pkeys'
|
|
173
|
+
|
|
174
|
+
self._redis = _build_sync_client()
|
|
175
|
+
self._redis_async: Any | None = None
|
|
176
|
+
self._redis_async_loop: Any | None = None
|
|
177
|
+
|
|
178
|
+
# Register CAS script (evalsha + NOSCRIPT fallback handled by redis-py).
|
|
179
|
+
self._cas = self._redis.register_script(_CAS_LUA)
|
|
180
|
+
self._cas_async: Any | None = None
|
|
181
|
+
|
|
182
|
+
# Header shim — same API as the old CacheHeader class.
|
|
183
|
+
self.header = CacheHeader(self)
|
|
184
|
+
|
|
185
|
+
# Seed counter once; no-op if already set.
|
|
186
|
+
if self.header.get('cache->counter') is None:
|
|
187
|
+
self.header['cache->counter'] = 0
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
# Back-compat property aliases
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
@property
|
|
193
|
+
def redis(self):
|
|
194
|
+
return self._redis
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def redis_async(self):
|
|
198
|
+
return self._get_async()
|
|
199
|
+
|
|
200
|
+
# ------------------------------------------------------------------
|
|
201
|
+
# Async client (lazy; bound to the event loop that first asks for it)
|
|
202
|
+
# ------------------------------------------------------------------
|
|
203
|
+
def _get_async(self):
|
|
204
|
+
try:
|
|
205
|
+
loop = asyncio.get_running_loop()
|
|
206
|
+
except RuntimeError:
|
|
207
|
+
loop = None
|
|
208
|
+
if self._redis_async is not None and self._redis_async_loop is not loop:
|
|
209
|
+
self._redis_async = None
|
|
210
|
+
self._cas_async = None
|
|
211
|
+
self._redis_async_loop = None
|
|
212
|
+
if self._redis_async is None:
|
|
213
|
+
self._redis_async = _build_async_client()
|
|
214
|
+
self._redis_async_loop = loop
|
|
215
|
+
return self._redis_async
|
|
216
|
+
|
|
217
|
+
def _get_async_cas(self):
|
|
218
|
+
if self._cas_async is None:
|
|
219
|
+
self._cas_async = self._get_async().register_script(_CAS_LUA)
|
|
220
|
+
return self._cas_async
|
|
221
|
+
|
|
222
|
+
async def aclose(self) -> None:
|
|
223
|
+
if self._redis_async is not None:
|
|
224
|
+
try:
|
|
225
|
+
await self._redis_async.close()
|
|
226
|
+
except Exception:
|
|
227
|
+
pass
|
|
228
|
+
self._redis_async = None
|
|
229
|
+
|
|
230
|
+
# ------------------------------------------------------------------
|
|
231
|
+
# Key helpers
|
|
232
|
+
# ------------------------------------------------------------------
|
|
233
|
+
def get_hash(self, pkey: str) -> str:
|
|
234
|
+
return f'{self._tag}:{pkey}'
|
|
235
|
+
|
|
236
|
+
def _counter_key(self) -> str:
|
|
237
|
+
return f'{self._tag}#{_COUNTER_FIELD}'
|
|
238
|
+
|
|
239
|
+
# ------------------------------------------------------------------
|
|
240
|
+
# pkey derivation
|
|
241
|
+
# ------------------------------------------------------------------
|
|
242
|
+
def pkey_of(self, value: Mapping[str, Any]) -> str:
|
|
243
|
+
parts = []
|
|
244
|
+
for col in self.pkey_columns:
|
|
245
|
+
if col not in value:
|
|
246
|
+
raise KeyError(
|
|
247
|
+
f'message missing pkey column {col!r}: keys={list(value.keys())}'
|
|
248
|
+
)
|
|
249
|
+
s = str(value[col])
|
|
250
|
+
if not s:
|
|
251
|
+
raise ValueError(f'empty pkey value for column {col!r}')
|
|
252
|
+
if '#' in s or ':' in s:
|
|
253
|
+
raise ValueError(f'invalid pkey value for {col!r}: {s!r}')
|
|
254
|
+
parts.append(s)
|
|
255
|
+
return ','.join(parts)
|
|
256
|
+
|
|
257
|
+
def get_pkey(self, value: Mapping[str, Any]) -> str:
|
|
258
|
+
return self.pkey_of(value)
|
|
259
|
+
|
|
260
|
+
# ------------------------------------------------------------------
|
|
261
|
+
# Point reads
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
def __getitem__(self, pkey: str) -> dict:
|
|
264
|
+
_check_pkey(pkey)
|
|
265
|
+
raw = self._redis.get(self.get_hash(pkey))
|
|
266
|
+
if raw is None:
|
|
267
|
+
return {}
|
|
268
|
+
return bson.BSON.decode(raw)
|
|
269
|
+
|
|
270
|
+
def at(self, pkey: str) -> dict:
|
|
271
|
+
_check_pkey(pkey)
|
|
272
|
+
raw = self._redis.get(self.get_hash(pkey))
|
|
273
|
+
if raw is None:
|
|
274
|
+
raise KeyError(pkey)
|
|
275
|
+
return bson.BSON.decode(raw)
|
|
276
|
+
|
|
277
|
+
def get(self, pkey: str, default: Any = None) -> Any:
|
|
278
|
+
_check_pkey(pkey)
|
|
279
|
+
raw = self._redis.get(self.get_hash(pkey))
|
|
280
|
+
if raw is None:
|
|
281
|
+
return {} if default is None else default
|
|
282
|
+
return bson.BSON.decode(raw)
|
|
283
|
+
|
|
284
|
+
def mget(self, pkeys: Iterable[str]) -> list[dict]:
|
|
285
|
+
pkeys = list(pkeys)
|
|
286
|
+
if not pkeys:
|
|
287
|
+
return []
|
|
288
|
+
for pkey in pkeys:
|
|
289
|
+
_check_pkey(pkey)
|
|
290
|
+
raws = self._redis.mget([self.get_hash(p) for p in pkeys])
|
|
291
|
+
return [bson.BSON.decode(r) if r is not None else {} for r in raws]
|
|
292
|
+
|
|
293
|
+
async def aget(self, pkey: str, default: Any = None) -> Any:
|
|
294
|
+
_check_pkey(pkey)
|
|
295
|
+
r = self._get_async()
|
|
296
|
+
raw = await r.get(self.get_hash(pkey))
|
|
297
|
+
if raw is None:
|
|
298
|
+
return {} if default is None else default
|
|
299
|
+
return bson.BSON.decode(raw)
|
|
300
|
+
|
|
301
|
+
async def amget(self, pkeys: Iterable[str]) -> list[dict]:
|
|
302
|
+
pkeys = list(pkeys)
|
|
303
|
+
if not pkeys:
|
|
304
|
+
return []
|
|
305
|
+
for pkey in pkeys:
|
|
306
|
+
_check_pkey(pkey)
|
|
307
|
+
r = self._get_async()
|
|
308
|
+
raws = await r.mget([self.get_hash(p) for p in pkeys])
|
|
309
|
+
return [bson.BSON.decode(raw) if raw is not None else {} for raw in raws]
|
|
310
|
+
|
|
311
|
+
def exists(self, pkey: str) -> bool:
|
|
312
|
+
_check_pkey(pkey)
|
|
313
|
+
return bool(self._redis.exists(self.get_hash(pkey)))
|
|
314
|
+
|
|
315
|
+
def key_ttl(self, pkey: str) -> int:
|
|
316
|
+
_check_pkey(pkey)
|
|
317
|
+
return int(self._redis.ttl(self.get_hash(pkey)))
|
|
318
|
+
|
|
319
|
+
def load(self) -> dict:
|
|
320
|
+
pkeys = self.list_keys('*')
|
|
321
|
+
values = self.mget(pkeys)
|
|
322
|
+
return dict(zip(pkeys, values))
|
|
323
|
+
|
|
324
|
+
# ------------------------------------------------------------------
|
|
325
|
+
# Writes (single pkey → CAS Lua, one round-trip in the happy path)
|
|
326
|
+
# ------------------------------------------------------------------
|
|
327
|
+
def __setitem__(self, pkey: str, new_value: Mapping[str, Any]) -> None:
|
|
328
|
+
_check_pkey(pkey)
|
|
329
|
+
self._cas_merge_sync(pkey, dict(new_value))
|
|
330
|
+
|
|
331
|
+
def set(self, new_value: Mapping[str, Any], pkey: str | None = None) -> None:
|
|
332
|
+
if pkey is None:
|
|
333
|
+
pkey = self.pkey_of(new_value)
|
|
334
|
+
self[pkey] = new_value
|
|
335
|
+
|
|
336
|
+
def set_ex(self, pkey: str, value: Mapping[str, Any], ex: int) -> None:
|
|
337
|
+
_check_pkey(pkey)
|
|
338
|
+
self._cas_merge_sync(pkey, dict(value), ttl=int(ex))
|
|
339
|
+
|
|
340
|
+
def __delitem__(self, pkey: str) -> None:
|
|
341
|
+
_check_pkey(pkey)
|
|
342
|
+
pipe = self._redis.pipeline(transaction=False)
|
|
343
|
+
pipe.delete(self.get_hash(pkey))
|
|
344
|
+
pipe.srem(self.set_pkeys, pkey)
|
|
345
|
+
pipe.execute()
|
|
346
|
+
|
|
347
|
+
# ------------------------------------------------------------------
|
|
348
|
+
# Batch writes — the Kafka path
|
|
349
|
+
# ------------------------------------------------------------------
|
|
350
|
+
async def apply_batch(
|
|
351
|
+
self,
|
|
352
|
+
messages: Iterable[Mapping[str, Any]],
|
|
353
|
+
mode: str = 'merge',
|
|
354
|
+
strict: bool = False,
|
|
355
|
+
) -> int:
|
|
356
|
+
"""
|
|
357
|
+
Apply a batch of messages with at-least-once semantics.
|
|
358
|
+
|
|
359
|
+
Kafka consumers call this with strict=False: messages whose pkey can't
|
|
360
|
+
be derived are skipped (skip counter incremented) instead of poisoning
|
|
361
|
+
the stream. Returns only after Redis has acked every write, so the
|
|
362
|
+
caller can commit the Kafka offset safely.
|
|
363
|
+
"""
|
|
364
|
+
if mode not in ('merge', 'replace'):
|
|
365
|
+
raise ValueError(f'invalid mode {mode!r}')
|
|
366
|
+
messages = list(messages)
|
|
367
|
+
if not messages:
|
|
368
|
+
return 0
|
|
369
|
+
|
|
370
|
+
conflated: dict[str, dict] = {}
|
|
371
|
+
skipped = 0
|
|
372
|
+
for msg in messages:
|
|
373
|
+
try:
|
|
374
|
+
pkey = self.pkey_of(msg)
|
|
375
|
+
except (KeyError, ValueError, TypeError):
|
|
376
|
+
if strict:
|
|
377
|
+
raise
|
|
378
|
+
skipped += 1
|
|
379
|
+
continue
|
|
380
|
+
if pkey in conflated:
|
|
381
|
+
_deep_merge(conflated[pkey], dict(msg))
|
|
382
|
+
else:
|
|
383
|
+
conflated[pkey] = dict(msg)
|
|
384
|
+
|
|
385
|
+
if skipped:
|
|
386
|
+
try:
|
|
387
|
+
self._redis.incrby(
|
|
388
|
+
f'{self._tag}#cache->skip_counter', skipped
|
|
389
|
+
)
|
|
390
|
+
except Exception:
|
|
391
|
+
pass
|
|
392
|
+
|
|
393
|
+
await self._cas_write_many(conflated, mode=mode)
|
|
394
|
+
return len(conflated)
|
|
395
|
+
|
|
396
|
+
async def async_set(self, value_or_list) -> int:
|
|
397
|
+
"""Back-compat: now a durable write, returns after Redis ack."""
|
|
398
|
+
if isinstance(value_or_list, list):
|
|
399
|
+
return await self.apply_batch(value_or_list)
|
|
400
|
+
return await self.apply_batch([value_or_list])
|
|
401
|
+
|
|
402
|
+
# ------------------------------------------------------------------
|
|
403
|
+
# CAS primitives
|
|
404
|
+
# ------------------------------------------------------------------
|
|
405
|
+
def _cas_merge_sync(self, pkey: str, delta: dict, ttl: int | None = None) -> None:
|
|
406
|
+
data_key = self.get_hash(pkey)
|
|
407
|
+
counter_key = self._counter_key()
|
|
408
|
+
ttl_val = int(ttl if ttl is not None else self.ttl_seconds)
|
|
409
|
+
|
|
410
|
+
for _ in range(_MAX_CAS_RETRIES):
|
|
411
|
+
prev = self._redis.get(data_key)
|
|
412
|
+
current = bson.BSON.decode(prev) if prev else {}
|
|
413
|
+
merged = _deep_merge(current, delta) if current else dict(delta)
|
|
414
|
+
new_bytes = bson.BSON.encode(merged)
|
|
415
|
+
ok = self._cas(
|
|
416
|
+
keys=[data_key, self.set_pkeys, counter_key],
|
|
417
|
+
args=[prev if prev else b'', new_bytes, pkey, ttl_val],
|
|
418
|
+
client=self._redis,
|
|
419
|
+
)
|
|
420
|
+
if ok:
|
|
421
|
+
return
|
|
422
|
+
raise RuntimeError(f'CAS retry exhausted for {pkey!r}')
|
|
423
|
+
|
|
424
|
+
async def _cas_write_many(self, deltas: dict[str, dict], mode: str) -> None:
|
|
425
|
+
if not deltas:
|
|
426
|
+
return
|
|
427
|
+
r = self._get_async()
|
|
428
|
+
counter_key = self._counter_key()
|
|
429
|
+
ttl = self.ttl_seconds
|
|
430
|
+
script = self._get_async_cas()
|
|
431
|
+
|
|
432
|
+
# Phase 1: pipelined GET (same-slot; cluster allows this).
|
|
433
|
+
pkeys = list(deltas.keys())
|
|
434
|
+
data_keys = [self.get_hash(p) for p in pkeys]
|
|
435
|
+
pipe = r.pipeline(transaction=False)
|
|
436
|
+
for k in data_keys:
|
|
437
|
+
pipe.get(k)
|
|
438
|
+
prevs = await pipe.execute()
|
|
439
|
+
|
|
440
|
+
attempts: dict[str, bytes] = {
|
|
441
|
+
pkey: (prev or b'') for pkey, prev in zip(pkeys, prevs)
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
# Phase 2: concurrent CAS via asyncio.gather. Cluster blocks evalsha
|
|
445
|
+
# inside pipelines, so each Script call is routed independently.
|
|
446
|
+
async def _one(pkey: str, prev: bytes):
|
|
447
|
+
delta = deltas[pkey]
|
|
448
|
+
if mode == 'merge':
|
|
449
|
+
prev_dict = bson.BSON.decode(prev) if prev else {}
|
|
450
|
+
merged = (
|
|
451
|
+
_deep_merge(prev_dict, delta) if prev_dict else dict(delta)
|
|
452
|
+
)
|
|
453
|
+
else:
|
|
454
|
+
merged = dict(delta)
|
|
455
|
+
new_bytes = bson.BSON.encode(merged)
|
|
456
|
+
return await script(
|
|
457
|
+
keys=[self.get_hash(pkey), self.set_pkeys, counter_key],
|
|
458
|
+
args=[prev, new_bytes, pkey, ttl],
|
|
459
|
+
client=r,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
for attempt in range(_MAX_CAS_RETRIES):
|
|
463
|
+
if not attempts:
|
|
464
|
+
return
|
|
465
|
+
ordered = list(attempts.items())
|
|
466
|
+
results = await asyncio.gather(
|
|
467
|
+
*(_one(pk, prev) for pk, prev in ordered)
|
|
468
|
+
)
|
|
469
|
+
retry_keys = [pk for (pk, _), ok in zip(ordered, results) if not ok]
|
|
470
|
+
if not retry_keys:
|
|
471
|
+
return
|
|
472
|
+
delay_ms = min(
|
|
473
|
+
_CAS_BACKOFF_BASE_MS * (2 ** min(attempt, 6)),
|
|
474
|
+
_CAS_BACKOFF_MAX_MS,
|
|
475
|
+
)
|
|
476
|
+
await asyncio.sleep(random.uniform(0, delay_ms) / 1000.0)
|
|
477
|
+
pipe = r.pipeline(transaction=False)
|
|
478
|
+
for pk in retry_keys:
|
|
479
|
+
pipe.get(self.get_hash(pk))
|
|
480
|
+
new_prevs = await pipe.execute()
|
|
481
|
+
attempts = {
|
|
482
|
+
pk: (np or b'') for pk, np in zip(retry_keys, new_prevs)
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
raise RuntimeError(f'CAS retry exhausted for {list(attempts.keys())}')
|
|
486
|
+
|
|
487
|
+
# ------------------------------------------------------------------
|
|
488
|
+
# Enumeration — SMEMBERS on the pkey SET (fast path)
|
|
489
|
+
# ------------------------------------------------------------------
|
|
490
|
+
def list_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
|
|
491
|
+
"""SMEMBERS the pkey set; optional fnmatch filter and count limit.
|
|
492
|
+
|
|
493
|
+
Empty-string members from legacy drift are dropped — they can't
|
|
494
|
+
correspond to a valid data key.
|
|
495
|
+
"""
|
|
496
|
+
members = self._redis.smembers(self.set_pkeys)
|
|
497
|
+
decoded = [
|
|
498
|
+
m.decode('utf-8') if isinstance(m, bytes) else m
|
|
499
|
+
for m in members
|
|
500
|
+
]
|
|
501
|
+
decoded = [k for k in decoded if k]
|
|
502
|
+
if keyword and keyword != '*':
|
|
503
|
+
decoded = [k for k in decoded if fnmatch(k, keyword)]
|
|
504
|
+
if count is not None:
|
|
505
|
+
decoded = decoded[:count]
|
|
506
|
+
return decoded
|
|
507
|
+
|
|
508
|
+
def update_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
|
|
509
|
+
"""
|
|
510
|
+
Reconciliation: SCAN {path}:<keyword> and rebuild the pkey SET.
|
|
511
|
+
|
|
512
|
+
Run this manually if keys expired via TTL (or were deleted out of band)
|
|
513
|
+
and the SET holds ghost entries. The happy path does NOT need this —
|
|
514
|
+
every write atomically adds the pkey via the CAS Lua script.
|
|
515
|
+
"""
|
|
516
|
+
pattern = f'{self._tag}:{keyword}'
|
|
517
|
+
scan_kw: dict = {'match': pattern}
|
|
518
|
+
if count is not None:
|
|
519
|
+
scan_kw['count'] = count
|
|
520
|
+
prefix = f'{self._tag}:'
|
|
521
|
+
plen = len(prefix)
|
|
522
|
+
found: list[str] = []
|
|
523
|
+
for key in self._redis.scan_iter(**scan_kw):
|
|
524
|
+
key_s = key.decode('utf-8') if isinstance(key, bytes) else key
|
|
525
|
+
if not key_s.startswith(prefix):
|
|
526
|
+
continue
|
|
527
|
+
found.append(key_s[plen:])
|
|
528
|
+
self._redis.delete(self.set_pkeys)
|
|
529
|
+
if found:
|
|
530
|
+
for i in range(0, len(found), 1000):
|
|
531
|
+
self._redis.sadd(self.set_pkeys, *found[i:i + 1000])
|
|
532
|
+
return found
|
|
533
|
+
|
|
534
|
+
def scan(self, keyword: str = '*', count: int | None = None) -> Iterator[str]:
|
|
535
|
+
"""Iterator form of list_keys."""
|
|
536
|
+
yield from self.list_keys(keyword=keyword, count=count)
|
|
537
|
+
|
|
538
|
+
def __iter__(self) -> Iterator[str]:
|
|
539
|
+
yield from self.list_keys()
|
|
540
|
+
|
|
541
|
+
# ------------------------------------------------------------------
|
|
542
|
+
# Admin
|
|
543
|
+
# ------------------------------------------------------------------
|
|
544
|
+
def clear(self) -> None:
|
|
545
|
+
pkeys = self.list_keys('*')
|
|
546
|
+
if pkeys:
|
|
547
|
+
data_keys = [self.get_hash(p) for p in pkeys]
|
|
548
|
+
for i in range(0, len(data_keys), 1000):
|
|
549
|
+
self._redis.delete(*data_keys[i:i + 1000])
|
|
550
|
+
self._redis.delete(self.set_pkeys)
|
|
551
|
+
header_fields = list(self.header)
|
|
552
|
+
if header_fields:
|
|
553
|
+
hkeys = [self.header.get_hash(f) for f in header_fields]
|
|
554
|
+
for i in range(0, len(hkeys), 1000):
|
|
555
|
+
self._redis.delete(*hkeys[i:i + 1000])
|
|
556
|
+
|
|
557
|
+
def recursive_update(self, original: dict, updates: Mapping) -> dict:
|
|
558
|
+
return _deep_merge(original, updates)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
class CacheHeader:
|
|
562
|
+
"""Dict-like view over {path}#<field>. Same API as the 6.80.7 class."""
|
|
563
|
+
|
|
564
|
+
def __init__(self, cache: CacheRedis):
|
|
565
|
+
self.cache = cache
|
|
566
|
+
|
|
567
|
+
def get_hash(self, field: str) -> str:
|
|
568
|
+
return f'{self.cache._tag}#{field}'
|
|
569
|
+
|
|
570
|
+
def __getitem__(self, field: str):
|
|
571
|
+
return self.cache._redis.get(self.get_hash(field))
|
|
572
|
+
|
|
573
|
+
def get(self, field: str, default=None):
|
|
574
|
+
val = self.cache._redis.get(self.get_hash(field))
|
|
575
|
+
return val if val is not None else default
|
|
576
|
+
|
|
577
|
+
def __setitem__(self, field: str, value) -> None:
|
|
578
|
+
self.cache._redis.set(self.get_hash(field), value)
|
|
579
|
+
|
|
580
|
+
def set(self, field: str, value) -> None:
|
|
581
|
+
self.cache._redis.set(self.get_hash(field), value)
|
|
582
|
+
|
|
583
|
+
def __delitem__(self, field: str) -> None:
|
|
584
|
+
self.cache._redis.delete(self.get_hash(field))
|
|
585
|
+
|
|
586
|
+
def __iter__(self):
|
|
587
|
+
# Enumerate via SCAN {path}#* — filters out the '#pkeys' SET itself.
|
|
588
|
+
pattern = f'{self.cache._tag}#*'
|
|
589
|
+
pkeys_key = self.cache.set_pkeys
|
|
590
|
+
for key in self.cache._redis.scan_iter(match=pattern):
|
|
591
|
+
key_s = key.decode('utf-8') if isinstance(key, bytes) else key
|
|
592
|
+
if key_s == pkeys_key:
|
|
593
|
+
continue
|
|
594
|
+
_, _, field = key_s.partition('#')
|
|
595
|
+
if field:
|
|
596
|
+
yield field
|
|
597
|
+
|
|
598
|
+
def list_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
|
|
599
|
+
out = []
|
|
600
|
+
for field in self:
|
|
601
|
+
if keyword == '*' or fnmatch(field, keyword):
|
|
602
|
+
out.append(field)
|
|
603
|
+
if count is not None and len(out) >= count:
|
|
604
|
+
break
|
|
605
|
+
return out
|
|
606
|
+
|
|
607
|
+
def incrby(self, field: str, value: int) -> int:
|
|
608
|
+
return int(self.cache._redis.incrby(self.get_hash(field), value))
|
|
609
|
+
|
|
610
|
+
async def async_incrby(self, field: str, value: int) -> int:
|
|
611
|
+
r = self.cache._get_async()
|
|
612
|
+
return int(await r.incrby(self.get_hash(field), value))
|
|
@@ -183,7 +183,7 @@ class CollectionMongoDB:
|
|
|
183
183
|
item['date'] = pd.Timestamp(item['date']).normalize()
|
|
184
184
|
elif self.period == 'M15':
|
|
185
185
|
item = item.copy()
|
|
186
|
-
item['date'] = pd.Timestamp(item['date']).floor('15min')
|
|
186
|
+
item['date'] = pd.Timestamp(item['date']).floor('15min')
|
|
187
187
|
elif self.period == 'M1':
|
|
188
188
|
item = item.copy()
|
|
189
189
|
item['date'] = pd.Timestamp(item['date']).floor('min')
|
|
@@ -376,7 +376,7 @@ class Logger:
|
|
|
376
376
|
if dfnewlines.empty:
|
|
377
377
|
return dfnewlines
|
|
378
378
|
|
|
379
|
-
dfnewlines['asctime'] = pd.to_datetime(dfnewlines['asctime'],format='
|
|
379
|
+
dfnewlines['asctime'] = pd.to_datetime(dfnewlines['asctime'], format='ISO8601', errors='coerce')
|
|
380
380
|
|
|
381
381
|
# Use cached max asctime
|
|
382
382
|
max_asctime = Logger._max_asctime
|
|
@@ -243,7 +243,19 @@ while True:
|
|
|
243
243
|
else:
|
|
244
244
|
try:
|
|
245
245
|
proc = scheduler_routine.get('process')
|
|
246
|
-
is_alive =
|
|
246
|
+
is_alive = (
|
|
247
|
+
proc is not None
|
|
248
|
+
and proc.is_running()
|
|
249
|
+
and proc.status() != psutil.STATUS_ZOMBIE
|
|
250
|
+
)
|
|
251
|
+
if proc is not None and not is_alive:
|
|
252
|
+
# Reap the zombie so the PID is freed and the entry clears from ps
|
|
253
|
+
try:
|
|
254
|
+
proc.wait(timeout=0)
|
|
255
|
+
except (psutil.TimeoutExpired, psutil.NoSuchProcess):
|
|
256
|
+
pass
|
|
257
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
258
|
+
is_alive = False
|
|
247
259
|
except Exception:
|
|
248
260
|
is_alive = False
|
|
249
261
|
if not is_alive:
|