shareddata 6.83.4__tar.gz → 6.83.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {shareddata-6.83.4/src/shareddata.egg-info → shareddata-6.83.7}/PKG-INFO +1 -1
  2. {shareddata-6.83.4 → shareddata-6.83.7}/setup.py +1 -1
  3. shareddata-6.83.7/src/SharedData/CacheRedis.py +612 -0
  4. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/CollectionMongoDB.py +1 -1
  5. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Logger.py +1 -1
  6. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Worker.py +13 -1
  7. {shareddata-6.83.4 → shareddata-6.83.7/src/shareddata.egg-info}/PKG-INFO +1 -1
  8. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_cache_redis.py +33 -46
  9. shareddata-6.83.4/src/SharedData/CacheRedis.py +0 -715
  10. {shareddata-6.83.4 → shareddata-6.83.7}/LICENSE +0 -0
  11. {shareddata-6.83.4 → shareddata-6.83.7}/MANIFEST.in +0 -0
  12. {shareddata-6.83.4 → shareddata-6.83.7}/README.md +0 -0
  13. {shareddata-6.83.4 → shareddata-6.83.7}/pyproject.toml +0 -0
  14. {shareddata-6.83.4 → shareddata-6.83.7}/setup.cfg +0 -0
  15. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/ServerGunicorn.py +0 -0
  16. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/ServerWaitress.py +0 -0
  17. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/__init__.py +0 -0
  18. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/auth.py +0 -0
  19. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/constants.py +0 -0
  20. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/__init__.py +0 -0
  21. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/cache.py +0 -0
  22. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/collections.py +0 -0
  23. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/metadata.py +0 -0
  24. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/system.py +0 -0
  25. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/tables.py +0 -0
  26. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/timeseries.py +0 -0
  27. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/routes/workers.py +0 -0
  28. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/API/utils.py +0 -0
  29. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Database.py +0 -0
  30. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Defaults.py +0 -0
  31. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AWSEC2.py +0 -0
  32. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AWSS3.py +0 -0
  33. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/AutoDocstrings.py +0 -0
  34. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientAPI.py +0 -0
  35. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientSocket.py +0 -0
  36. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ClientWebSocket.py +0 -0
  37. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/LogHandlerAPI.py +0 -0
  38. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/MongoDBClient.py +0 -0
  39. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/SaveTables.py +0 -0
  40. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ServerSocket.py +0 -0
  41. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/ServerWebSocket.py +0 -0
  42. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/StreamsCache.py +0 -0
  43. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/StreamsPersist.py +0 -0
  44. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/SyncTable.py +0 -0
  45. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/TunnelWebSocket.py +0 -0
  46. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/IO/__init__.py +0 -0
  47. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Metadata.py +0 -0
  48. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/MultiProc.py +0 -0
  49. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/OpenFIGI.py +0 -0
  50. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/BatchJob.py +0 -0
  51. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Schedule.py +0 -0
  52. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/ScheduleMonitor.py +0 -0
  53. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/Scheduler.py +0 -0
  54. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/WorkerLib.py +0 -0
  55. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/WorkerPool.py +0 -0
  56. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Routines/__init__.py +0 -0
  57. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/SharedData.py +0 -0
  58. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/SharedNumpy.py +0 -0
  59. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/StreamKafka.py +0 -0
  60. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Symbol.py +0 -0
  61. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Table.py +0 -0
  62. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableDisk.py +0 -0
  63. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndex.py +0 -0
  64. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitFunctions.py +0 -0
  65. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitFunctionsManual.py +0 -0
  66. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitGenerate.py +0 -0
  67. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitHash.py +0 -0
  68. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TableIndexJitLoc.py +0 -0
  69. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TimeSeriesDisk.py +0 -0
  70. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/TimeseriesContainer.py +0 -0
  71. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Users.py +0 -0
  72. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/Utils.py +0 -0
  73. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/__init__.py +0 -0
  74. {shareddata-6.83.4 → shareddata-6.83.7}/src/SharedData/sharedmutexwin.pyd +0 -0
  75. {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/SOURCES.txt +0 -0
  76. {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/dependency_links.txt +0 -0
  77. {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/requires.txt +0 -0
  78. {shareddata-6.83.4 → shareddata-6.83.7}/src/shareddata.egg-info/top_level.txt +0 -0
  79. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_collection.py +0 -0
  80. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_collection_loopback.py +0 -0
  81. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table.py +0 -0
  82. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table_schemaless.py +0 -0
  83. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_api_table_schemaless_extend.py +0 -0
  84. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_bson_last_pos_reuse.py +0 -0
  85. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_extend_rt.py +0 -0
  86. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_loc.py +0 -0
  87. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_metadata.py +0 -0
  88. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_read_write_tail.py +0 -0
  89. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_stream_loopback_async.py +0 -0
  90. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_timeseries.py +0 -0
  91. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_timeseries_api.py +0 -0
  92. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_d1.py +0 -0
  93. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_m1.py +0 -0
  94. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_get_date_loc_m15.py +0 -0
  95. {shareddata-6.83.4 → shareddata-6.83.7}/tests/test_upsert_unordered_positions_m1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shareddata
3
- Version: 6.83.4
3
+ Version: 6.83.7
4
4
  Summary: Memory Mapped / Shared Memory Database with S3 repository
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -34,7 +34,7 @@ install_requires = [
34
34
 
35
35
  setup(
36
36
  name='shareddata',
37
- version='6.83.4',
37
+ version='6.83.7',
38
38
  description='Memory Mapped / Shared Memory Database with S3 repository',
39
39
  long_description=open('README.md').read(),
40
40
  long_description_content_type='text/markdown',
@@ -0,0 +1,612 @@
1
+ """
2
+ Redis-backed last-state cache for Kafka stream consumers.
3
+
4
+ Key layout (all share hash-tag '{path}' → same cluster slot):
5
+ {path}:<pkey> BSON-encoded dict, one per logical entity
6
+ {path}#pkeys SET of all pkeys (enumeration index)
7
+ {path}#<field> header / counter scalars (CacheHeader)
8
+
9
+ Design:
10
+ - At-least-once durability: Kafka path calls apply_batch(), awaits the
11
+ Redis ack, then commits the offset. No data lost on crash.
12
+ - Atomic read-modify-write: CAS via Lua script; SET + SADD + INCR happen
13
+ in one server-side operation so the #pkeys index can never drift from
14
+ the actual keyspace on a successful write.
15
+ - Cluster-safe: data, pkey-set and header fields all share the '{path}'
16
+ hash-tag, so multi-key Lua and SMEMBERS stay slot-local.
17
+ - BSON encoding (handles datetime natively; same format as the Kafka
18
+ pipeline).
19
+
20
+ Enumeration:
21
+ - list_keys() → SMEMBERS {path}#pkeys (fast; one round-trip)
22
+ - update_keys() → SCAN the data prefix and rebuild the SET (reconciliation
23
+ tool for the rare TTL-expiry drift case; call manually if needed)
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import asyncio
28
+ import os
29
+ import random
30
+ from collections.abc import Iterable, Iterator, Mapping
31
+ from fnmatch import fnmatch
32
+ from typing import Any
33
+
34
+ import bson
35
+ from redis import Redis
36
+ from redis.cluster import ClusterNode, RedisCluster
37
+ from redis.asyncio import Redis as RedisAsync
38
+ from redis.asyncio.cluster import ClusterNode as ClusterNodeAsync
39
+ from redis.asyncio.cluster import RedisCluster as RedisClusterAsync
40
+
41
+ from SharedData.Database import DATABASE_PKEYS
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Lua: atomic CAS + SADD pkey-set + INCR counter
46
+ # ---------------------------------------------------------------------------
47
+ # KEYS[1] = {path}:<pkey> data
48
+ # KEYS[2] = {path}#pkeys pkey SET
49
+ # KEYS[3] = {path}#cache->counter counter
50
+ # ARGV[1] = expected previous bytes ('' if caller expects key to be absent)
51
+ # ARGV[2] = new bytes (BSON-encoded merged dict)
52
+ # ARGV[3] = pkey string (for SADD)
53
+ # ARGV[4] = TTL seconds (0 = no TTL)
54
+ # Returns: 1 on success, 0 on CAS conflict (caller retries).
55
+ _CAS_LUA = r"""
56
+ local current = redis.call('GET', KEYS[1])
57
+ if current == false then current = '' end
58
+ if current ~= ARGV[1] then
59
+ return 0
60
+ end
61
+ local ttl = tonumber(ARGV[4])
62
+ if ttl and ttl > 0 then
63
+ redis.call('SET', KEYS[1], ARGV[2], 'EX', ttl)
64
+ else
65
+ redis.call('SET', KEYS[1], ARGV[2])
66
+ end
67
+ redis.call('SADD', KEYS[2], ARGV[3])
68
+ redis.call('INCR', KEYS[3])
69
+ return 1
70
+ """
71
+
72
+
73
+ _COUNTER_FIELD = 'cache->counter'
74
+ # Production Kafka path: 1 writer per pkey (partition ownership) → CAS always
75
+ # succeeds on attempt 1. Retries matter only when a strategy/script writes to
76
+ # the same pkey concurrently with the consumer.
77
+ _MAX_CAS_RETRIES = 32
78
+ _CAS_BACKOFF_BASE_MS = 0.5
79
+ _CAS_BACKOFF_MAX_MS = 50.0
80
+
81
+
82
+ def _deep_merge(target: dict, source: Mapping) -> dict:
83
+ """Recursively merge source into target. Mutates target; returns it."""
84
+ for k, v in source.items():
85
+ if isinstance(v, Mapping) and isinstance(target.get(k), dict):
86
+ _deep_merge(target[k], v)
87
+ else:
88
+ target[k] = v
89
+ return target
90
+
91
+
92
+ def _check_pkey(pkey: str) -> None:
93
+ if not isinstance(pkey, str):
94
+ raise TypeError('pkey must be a string')
95
+ if not pkey:
96
+ raise ValueError('pkey must be non-empty')
97
+ if '#' in pkey or ':' in pkey:
98
+ raise ValueError(f'pkey cannot contain # or : (got {pkey!r})')
99
+
100
+
101
+ def _parse_cluster_nodes() -> list[tuple[str, int]]:
102
+ raw = os.environ.get('REDIS_CLUSTER_NODES')
103
+ if not raw:
104
+ raise RuntimeError('REDIS_CLUSTER_NODES not defined')
105
+ nodes = []
106
+ for part in raw.split(','):
107
+ host, port = part.strip().split(':')
108
+ nodes.append((host.strip(), int(port)))
109
+ return nodes
110
+
111
+
112
+ def _build_sync_client():
113
+ nodes = _parse_cluster_nodes()
114
+ if len(nodes) > 1:
115
+ return RedisCluster(
116
+ startup_nodes=[ClusterNode(h, p) for h, p in nodes],
117
+ decode_responses=False,
118
+ )
119
+ host, port = nodes[0]
120
+ return Redis(host=host, port=port, decode_responses=False)
121
+
122
+
123
+ def _build_async_client():
124
+ nodes = _parse_cluster_nodes()
125
+ if len(nodes) > 1:
126
+ return RedisClusterAsync(
127
+ startup_nodes=[ClusterNodeAsync(h, p) for h, p in nodes],
128
+ decode_responses=False,
129
+ )
130
+ host, port = nodes[0]
131
+ return RedisAsync(host=host, port=port, decode_responses=False)
132
+
133
+
134
+ class CacheRedis:
135
+ def __init__(
136
+ self,
137
+ database: str,
138
+ period: str,
139
+ source: str,
140
+ tablename: str,
141
+ user: str = 'master',
142
+ ttl_seconds: int = 0,
143
+ pkey_columns: list[str] | None = None,
144
+ ):
145
+ if database not in DATABASE_PKEYS:
146
+ raise ValueError(f'unknown database {database!r}')
147
+ self.database = database
148
+ self.period = period
149
+ self.source = source
150
+ self.tablename = tablename
151
+ self.user = user
152
+ self.ttl_seconds = int(ttl_seconds)
153
+
154
+ self.path = f'{user}/{database}/{period}/{source}/cache/{tablename}'
155
+ self._tag = '{' + self.path + '}'
156
+
157
+ # 6.80.7 pkey derivation: keep only the entity-identifier columns.
158
+ # Override via pkey_columns if a caller wants the full DATABASE_PKEYS.
159
+ if pkey_columns is None:
160
+ pkey_columns = [
161
+ c for c in DATABASE_PKEYS[database]
162
+ if c in ('symbol', 'portfolio', 'tag')
163
+ ]
164
+ if not pkey_columns:
165
+ raise ValueError(
166
+ f'no entity pkey columns for database {database!r}; '
167
+ f'pass pkey_columns explicitly'
168
+ )
169
+ self.pkey_columns = list(pkey_columns)
170
+ self.pkeycolumns = self.pkey_columns # legacy alias
171
+
172
+ self.set_pkeys = f'{self._tag}#pkeys'
173
+
174
+ self._redis = _build_sync_client()
175
+ self._redis_async: Any | None = None
176
+ self._redis_async_loop: Any | None = None
177
+
178
+ # Register CAS script (evalsha + NOSCRIPT fallback handled by redis-py).
179
+ self._cas = self._redis.register_script(_CAS_LUA)
180
+ self._cas_async: Any | None = None
181
+
182
+ # Header shim — same API as the old CacheHeader class.
183
+ self.header = CacheHeader(self)
184
+
185
+ # Seed counter once; no-op if already set.
186
+ if self.header.get('cache->counter') is None:
187
+ self.header['cache->counter'] = 0
188
+
189
+ # ------------------------------------------------------------------
190
+ # Back-compat property aliases
191
+ # ------------------------------------------------------------------
192
+ @property
193
+ def redis(self):
194
+ return self._redis
195
+
196
+ @property
197
+ def redis_async(self):
198
+ return self._get_async()
199
+
200
+ # ------------------------------------------------------------------
201
+ # Async client (lazy; bound to the event loop that first asks for it)
202
+ # ------------------------------------------------------------------
203
+ def _get_async(self):
204
+ try:
205
+ loop = asyncio.get_running_loop()
206
+ except RuntimeError:
207
+ loop = None
208
+ if self._redis_async is not None and self._redis_async_loop is not loop:
209
+ self._redis_async = None
210
+ self._cas_async = None
211
+ self._redis_async_loop = None
212
+ if self._redis_async is None:
213
+ self._redis_async = _build_async_client()
214
+ self._redis_async_loop = loop
215
+ return self._redis_async
216
+
217
+ def _get_async_cas(self):
218
+ if self._cas_async is None:
219
+ self._cas_async = self._get_async().register_script(_CAS_LUA)
220
+ return self._cas_async
221
+
222
+ async def aclose(self) -> None:
223
+ if self._redis_async is not None:
224
+ try:
225
+ await self._redis_async.close()
226
+ except Exception:
227
+ pass
228
+ self._redis_async = None
229
+
230
+ # ------------------------------------------------------------------
231
+ # Key helpers
232
+ # ------------------------------------------------------------------
233
+ def get_hash(self, pkey: str) -> str:
234
+ return f'{self._tag}:{pkey}'
235
+
236
+ def _counter_key(self) -> str:
237
+ return f'{self._tag}#{_COUNTER_FIELD}'
238
+
239
+ # ------------------------------------------------------------------
240
+ # pkey derivation
241
+ # ------------------------------------------------------------------
242
+ def pkey_of(self, value: Mapping[str, Any]) -> str:
243
+ parts = []
244
+ for col in self.pkey_columns:
245
+ if col not in value:
246
+ raise KeyError(
247
+ f'message missing pkey column {col!r}: keys={list(value.keys())}'
248
+ )
249
+ s = str(value[col])
250
+ if not s:
251
+ raise ValueError(f'empty pkey value for column {col!r}')
252
+ if '#' in s or ':' in s:
253
+ raise ValueError(f'invalid pkey value for {col!r}: {s!r}')
254
+ parts.append(s)
255
+ return ','.join(parts)
256
+
257
+ def get_pkey(self, value: Mapping[str, Any]) -> str:
258
+ return self.pkey_of(value)
259
+
260
+ # ------------------------------------------------------------------
261
+ # Point reads
262
+ # ------------------------------------------------------------------
263
+ def __getitem__(self, pkey: str) -> dict:
264
+ _check_pkey(pkey)
265
+ raw = self._redis.get(self.get_hash(pkey))
266
+ if raw is None:
267
+ return {}
268
+ return bson.BSON.decode(raw)
269
+
270
+ def at(self, pkey: str) -> dict:
271
+ _check_pkey(pkey)
272
+ raw = self._redis.get(self.get_hash(pkey))
273
+ if raw is None:
274
+ raise KeyError(pkey)
275
+ return bson.BSON.decode(raw)
276
+
277
+ def get(self, pkey: str, default: Any = None) -> Any:
278
+ _check_pkey(pkey)
279
+ raw = self._redis.get(self.get_hash(pkey))
280
+ if raw is None:
281
+ return {} if default is None else default
282
+ return bson.BSON.decode(raw)
283
+
284
+ def mget(self, pkeys: Iterable[str]) -> list[dict]:
285
+ pkeys = list(pkeys)
286
+ if not pkeys:
287
+ return []
288
+ for pkey in pkeys:
289
+ _check_pkey(pkey)
290
+ raws = self._redis.mget([self.get_hash(p) for p in pkeys])
291
+ return [bson.BSON.decode(r) if r is not None else {} for r in raws]
292
+
293
+ async def aget(self, pkey: str, default: Any = None) -> Any:
294
+ _check_pkey(pkey)
295
+ r = self._get_async()
296
+ raw = await r.get(self.get_hash(pkey))
297
+ if raw is None:
298
+ return {} if default is None else default
299
+ return bson.BSON.decode(raw)
300
+
301
+ async def amget(self, pkeys: Iterable[str]) -> list[dict]:
302
+ pkeys = list(pkeys)
303
+ if not pkeys:
304
+ return []
305
+ for pkey in pkeys:
306
+ _check_pkey(pkey)
307
+ r = self._get_async()
308
+ raws = await r.mget([self.get_hash(p) for p in pkeys])
309
+ return [bson.BSON.decode(raw) if raw is not None else {} for raw in raws]
310
+
311
+ def exists(self, pkey: str) -> bool:
312
+ _check_pkey(pkey)
313
+ return bool(self._redis.exists(self.get_hash(pkey)))
314
+
315
+ def key_ttl(self, pkey: str) -> int:
316
+ _check_pkey(pkey)
317
+ return int(self._redis.ttl(self.get_hash(pkey)))
318
+
319
+ def load(self) -> dict:
320
+ pkeys = self.list_keys('*')
321
+ values = self.mget(pkeys)
322
+ return dict(zip(pkeys, values))
323
+
324
+ # ------------------------------------------------------------------
325
+ # Writes (single pkey → CAS Lua, one round-trip in the happy path)
326
+ # ------------------------------------------------------------------
327
+ def __setitem__(self, pkey: str, new_value: Mapping[str, Any]) -> None:
328
+ _check_pkey(pkey)
329
+ self._cas_merge_sync(pkey, dict(new_value))
330
+
331
+ def set(self, new_value: Mapping[str, Any], pkey: str | None = None) -> None:
332
+ if pkey is None:
333
+ pkey = self.pkey_of(new_value)
334
+ self[pkey] = new_value
335
+
336
+ def set_ex(self, pkey: str, value: Mapping[str, Any], ex: int) -> None:
337
+ _check_pkey(pkey)
338
+ self._cas_merge_sync(pkey, dict(value), ttl=int(ex))
339
+
340
+ def __delitem__(self, pkey: str) -> None:
341
+ _check_pkey(pkey)
342
+ pipe = self._redis.pipeline(transaction=False)
343
+ pipe.delete(self.get_hash(pkey))
344
+ pipe.srem(self.set_pkeys, pkey)
345
+ pipe.execute()
346
+
347
+ # ------------------------------------------------------------------
348
+ # Batch writes — the Kafka path
349
+ # ------------------------------------------------------------------
350
+ async def apply_batch(
351
+ self,
352
+ messages: Iterable[Mapping[str, Any]],
353
+ mode: str = 'merge',
354
+ strict: bool = False,
355
+ ) -> int:
356
+ """
357
+ Apply a batch of messages with at-least-once semantics.
358
+
359
+ Kafka consumers call this with strict=False: messages whose pkey can't
360
+ be derived are skipped (skip counter incremented) instead of poisoning
361
+ the stream. Returns only after Redis has acked every write, so the
362
+ caller can commit the Kafka offset safely.
363
+ """
364
+ if mode not in ('merge', 'replace'):
365
+ raise ValueError(f'invalid mode {mode!r}')
366
+ messages = list(messages)
367
+ if not messages:
368
+ return 0
369
+
370
+ conflated: dict[str, dict] = {}
371
+ skipped = 0
372
+ for msg in messages:
373
+ try:
374
+ pkey = self.pkey_of(msg)
375
+ except (KeyError, ValueError, TypeError):
376
+ if strict:
377
+ raise
378
+ skipped += 1
379
+ continue
380
+ if pkey in conflated:
381
+ _deep_merge(conflated[pkey], dict(msg))
382
+ else:
383
+ conflated[pkey] = dict(msg)
384
+
385
+ if skipped:
386
+ try:
387
+ self._redis.incrby(
388
+ f'{self._tag}#cache->skip_counter', skipped
389
+ )
390
+ except Exception:
391
+ pass
392
+
393
+ await self._cas_write_many(conflated, mode=mode)
394
+ return len(conflated)
395
+
396
+ async def async_set(self, value_or_list) -> int:
397
+ """Back-compat: now a durable write, returns after Redis ack."""
398
+ if isinstance(value_or_list, list):
399
+ return await self.apply_batch(value_or_list)
400
+ return await self.apply_batch([value_or_list])
401
+
402
+ # ------------------------------------------------------------------
403
+ # CAS primitives
404
+ # ------------------------------------------------------------------
405
+ def _cas_merge_sync(self, pkey: str, delta: dict, ttl: int | None = None) -> None:
406
+ data_key = self.get_hash(pkey)
407
+ counter_key = self._counter_key()
408
+ ttl_val = int(ttl if ttl is not None else self.ttl_seconds)
409
+
410
+ for _ in range(_MAX_CAS_RETRIES):
411
+ prev = self._redis.get(data_key)
412
+ current = bson.BSON.decode(prev) if prev else {}
413
+ merged = _deep_merge(current, delta) if current else dict(delta)
414
+ new_bytes = bson.BSON.encode(merged)
415
+ ok = self._cas(
416
+ keys=[data_key, self.set_pkeys, counter_key],
417
+ args=[prev if prev else b'', new_bytes, pkey, ttl_val],
418
+ client=self._redis,
419
+ )
420
+ if ok:
421
+ return
422
+ raise RuntimeError(f'CAS retry exhausted for {pkey!r}')
423
+
424
+ async def _cas_write_many(self, deltas: dict[str, dict], mode: str) -> None:
425
+ if not deltas:
426
+ return
427
+ r = self._get_async()
428
+ counter_key = self._counter_key()
429
+ ttl = self.ttl_seconds
430
+ script = self._get_async_cas()
431
+
432
+ # Phase 1: pipelined GET (same-slot; cluster allows this).
433
+ pkeys = list(deltas.keys())
434
+ data_keys = [self.get_hash(p) for p in pkeys]
435
+ pipe = r.pipeline(transaction=False)
436
+ for k in data_keys:
437
+ pipe.get(k)
438
+ prevs = await pipe.execute()
439
+
440
+ attempts: dict[str, bytes] = {
441
+ pkey: (prev or b'') for pkey, prev in zip(pkeys, prevs)
442
+ }
443
+
444
+ # Phase 2: concurrent CAS via asyncio.gather. Cluster blocks evalsha
445
+ # inside pipelines, so each Script call is routed independently.
446
+ async def _one(pkey: str, prev: bytes):
447
+ delta = deltas[pkey]
448
+ if mode == 'merge':
449
+ prev_dict = bson.BSON.decode(prev) if prev else {}
450
+ merged = (
451
+ _deep_merge(prev_dict, delta) if prev_dict else dict(delta)
452
+ )
453
+ else:
454
+ merged = dict(delta)
455
+ new_bytes = bson.BSON.encode(merged)
456
+ return await script(
457
+ keys=[self.get_hash(pkey), self.set_pkeys, counter_key],
458
+ args=[prev, new_bytes, pkey, ttl],
459
+ client=r,
460
+ )
461
+
462
+ for attempt in range(_MAX_CAS_RETRIES):
463
+ if not attempts:
464
+ return
465
+ ordered = list(attempts.items())
466
+ results = await asyncio.gather(
467
+ *(_one(pk, prev) for pk, prev in ordered)
468
+ )
469
+ retry_keys = [pk for (pk, _), ok in zip(ordered, results) if not ok]
470
+ if not retry_keys:
471
+ return
472
+ delay_ms = min(
473
+ _CAS_BACKOFF_BASE_MS * (2 ** min(attempt, 6)),
474
+ _CAS_BACKOFF_MAX_MS,
475
+ )
476
+ await asyncio.sleep(random.uniform(0, delay_ms) / 1000.0)
477
+ pipe = r.pipeline(transaction=False)
478
+ for pk in retry_keys:
479
+ pipe.get(self.get_hash(pk))
480
+ new_prevs = await pipe.execute()
481
+ attempts = {
482
+ pk: (np or b'') for pk, np in zip(retry_keys, new_prevs)
483
+ }
484
+
485
+ raise RuntimeError(f'CAS retry exhausted for {list(attempts.keys())}')
486
+
487
+ # ------------------------------------------------------------------
488
+ # Enumeration — SMEMBERS on the pkey SET (fast path)
489
+ # ------------------------------------------------------------------
490
+ def list_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
491
+ """SMEMBERS the pkey set; optional fnmatch filter and count limit.
492
+
493
+ Empty-string members from legacy drift are dropped — they can't
494
+ correspond to a valid data key.
495
+ """
496
+ members = self._redis.smembers(self.set_pkeys)
497
+ decoded = [
498
+ m.decode('utf-8') if isinstance(m, bytes) else m
499
+ for m in members
500
+ ]
501
+ decoded = [k for k in decoded if k]
502
+ if keyword and keyword != '*':
503
+ decoded = [k for k in decoded if fnmatch(k, keyword)]
504
+ if count is not None:
505
+ decoded = decoded[:count]
506
+ return decoded
507
+
508
+ def update_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
509
+ """
510
+ Reconciliation: SCAN {path}:<keyword> and rebuild the pkey SET.
511
+
512
+ Run this manually if keys expired via TTL (or were deleted out of band)
513
+ and the SET holds ghost entries. The happy path does NOT need this —
514
+ every write atomically adds the pkey via the CAS Lua script.
515
+ """
516
+ pattern = f'{self._tag}:{keyword}'
517
+ scan_kw: dict = {'match': pattern}
518
+ if count is not None:
519
+ scan_kw['count'] = count
520
+ prefix = f'{self._tag}:'
521
+ plen = len(prefix)
522
+ found: list[str] = []
523
+ for key in self._redis.scan_iter(**scan_kw):
524
+ key_s = key.decode('utf-8') if isinstance(key, bytes) else key
525
+ if not key_s.startswith(prefix):
526
+ continue
527
+ found.append(key_s[plen:])
528
+ self._redis.delete(self.set_pkeys)
529
+ if found:
530
+ for i in range(0, len(found), 1000):
531
+ self._redis.sadd(self.set_pkeys, *found[i:i + 1000])
532
+ return found
533
+
534
+ def scan(self, keyword: str = '*', count: int | None = None) -> Iterator[str]:
535
+ """Iterator form of list_keys."""
536
+ yield from self.list_keys(keyword=keyword, count=count)
537
+
538
+ def __iter__(self) -> Iterator[str]:
539
+ yield from self.list_keys()
540
+
541
+ # ------------------------------------------------------------------
542
+ # Admin
543
+ # ------------------------------------------------------------------
544
+ def clear(self) -> None:
545
+ pkeys = self.list_keys('*')
546
+ if pkeys:
547
+ data_keys = [self.get_hash(p) for p in pkeys]
548
+ for i in range(0, len(data_keys), 1000):
549
+ self._redis.delete(*data_keys[i:i + 1000])
550
+ self._redis.delete(self.set_pkeys)
551
+ header_fields = list(self.header)
552
+ if header_fields:
553
+ hkeys = [self.header.get_hash(f) for f in header_fields]
554
+ for i in range(0, len(hkeys), 1000):
555
+ self._redis.delete(*hkeys[i:i + 1000])
556
+
557
+ def recursive_update(self, original: dict, updates: Mapping) -> dict:
558
+ return _deep_merge(original, updates)
559
+
560
+
561
+ class CacheHeader:
562
+ """Dict-like view over {path}#<field>. Same API as the 6.80.7 class."""
563
+
564
+ def __init__(self, cache: CacheRedis):
565
+ self.cache = cache
566
+
567
+ def get_hash(self, field: str) -> str:
568
+ return f'{self.cache._tag}#{field}'
569
+
570
+ def __getitem__(self, field: str):
571
+ return self.cache._redis.get(self.get_hash(field))
572
+
573
+ def get(self, field: str, default=None):
574
+ val = self.cache._redis.get(self.get_hash(field))
575
+ return val if val is not None else default
576
+
577
+ def __setitem__(self, field: str, value) -> None:
578
+ self.cache._redis.set(self.get_hash(field), value)
579
+
580
+ def set(self, field: str, value) -> None:
581
+ self.cache._redis.set(self.get_hash(field), value)
582
+
583
+ def __delitem__(self, field: str) -> None:
584
+ self.cache._redis.delete(self.get_hash(field))
585
+
586
+ def __iter__(self):
587
+ # Enumerate via SCAN {path}#* — filters out the '#pkeys' SET itself.
588
+ pattern = f'{self.cache._tag}#*'
589
+ pkeys_key = self.cache.set_pkeys
590
+ for key in self.cache._redis.scan_iter(match=pattern):
591
+ key_s = key.decode('utf-8') if isinstance(key, bytes) else key
592
+ if key_s == pkeys_key:
593
+ continue
594
+ _, _, field = key_s.partition('#')
595
+ if field:
596
+ yield field
597
+
598
+ def list_keys(self, keyword: str = '*', count: int | None = None) -> list[str]:
599
+ out = []
600
+ for field in self:
601
+ if keyword == '*' or fnmatch(field, keyword):
602
+ out.append(field)
603
+ if count is not None and len(out) >= count:
604
+ break
605
+ return out
606
+
607
+ def incrby(self, field: str, value: int) -> int:
608
+ return int(self.cache._redis.incrby(self.get_hash(field), value))
609
+
610
+ async def async_incrby(self, field: str, value: int) -> int:
611
+ r = self.cache._get_async()
612
+ return int(await r.incrby(self.get_hash(field), value))
@@ -183,7 +183,7 @@ class CollectionMongoDB:
183
183
  item['date'] = pd.Timestamp(item['date']).normalize()
184
184
  elif self.period == 'M15':
185
185
  item = item.copy()
186
- item['date'] = pd.Timestamp(item['date']).floor('15min')
186
+ item['date'] = pd.Timestamp(item['date']).floor('15min')
187
187
  elif self.period == 'M1':
188
188
  item = item.copy()
189
189
  item['date'] = pd.Timestamp(item['date']).floor('min')
@@ -376,7 +376,7 @@ class Logger:
376
376
  if dfnewlines.empty:
377
377
  return dfnewlines
378
378
 
379
- dfnewlines['asctime'] = pd.to_datetime(dfnewlines['asctime'],format='mixed', errors='coerce')
379
+ dfnewlines['asctime'] = pd.to_datetime(dfnewlines['asctime'], format='ISO8601', errors='coerce')
380
380
 
381
381
  # Use cached max asctime
382
382
  max_asctime = Logger._max_asctime
@@ -243,7 +243,19 @@ while True:
243
243
  else:
244
244
  try:
245
245
  proc = scheduler_routine.get('process')
246
- is_alive = proc is not None and proc.is_running()
246
+ is_alive = (
247
+ proc is not None
248
+ and proc.is_running()
249
+ and proc.status() != psutil.STATUS_ZOMBIE
250
+ )
251
+ if proc is not None and not is_alive:
252
+ # Reap the zombie so the PID is freed and the entry clears from ps
253
+ try:
254
+ proc.wait(timeout=0)
255
+ except (psutil.TimeoutExpired, psutil.NoSuchProcess):
256
+ pass
257
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
258
+ is_alive = False
247
259
  except Exception:
248
260
  is_alive = False
249
261
  if not is_alive:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shareddata
3
- Version: 6.83.4
3
+ Version: 6.83.7
4
4
  Summary: Memory Mapped / Shared Memory Database with S3 repository
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License