gooddata-flight-server 1.34.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gooddata-flight-server might be problematic. Click here for more details.
- gooddata_flight_server/__init__.py +23 -0
- gooddata_flight_server/_version.py +7 -0
- gooddata_flight_server/cli.py +137 -0
- gooddata_flight_server/config/__init__.py +1 -0
- gooddata_flight_server/config/config.py +536 -0
- gooddata_flight_server/errors/__init__.py +1 -0
- gooddata_flight_server/errors/error_code.py +209 -0
- gooddata_flight_server/errors/error_info.py +475 -0
- gooddata_flight_server/exceptions.py +16 -0
- gooddata_flight_server/health/__init__.py +1 -0
- gooddata_flight_server/health/health_check_http_server.py +103 -0
- gooddata_flight_server/health/server_health_monitor.py +83 -0
- gooddata_flight_server/metrics.py +16 -0
- gooddata_flight_server/py.typed +1 -0
- gooddata_flight_server/server/__init__.py +1 -0
- gooddata_flight_server/server/auth/__init__.py +1 -0
- gooddata_flight_server/server/auth/auth_middleware.py +83 -0
- gooddata_flight_server/server/auth/token_verifier.py +62 -0
- gooddata_flight_server/server/auth/token_verifier_factory.py +55 -0
- gooddata_flight_server/server/auth/token_verifier_impl.py +41 -0
- gooddata_flight_server/server/base.py +63 -0
- gooddata_flight_server/server/default.logging.ini +28 -0
- gooddata_flight_server/server/flight_rpc/__init__.py +1 -0
- gooddata_flight_server/server/flight_rpc/flight_middleware.py +162 -0
- gooddata_flight_server/server/flight_rpc/flight_server.py +228 -0
- gooddata_flight_server/server/flight_rpc/flight_service.py +279 -0
- gooddata_flight_server/server/flight_rpc/server_methods.py +200 -0
- gooddata_flight_server/server/server_base.py +321 -0
- gooddata_flight_server/server/server_main.py +116 -0
- gooddata_flight_server/tasks/__init__.py +1 -0
- gooddata_flight_server/tasks/base.py +21 -0
- gooddata_flight_server/tasks/metrics.py +115 -0
- gooddata_flight_server/tasks/task.py +193 -0
- gooddata_flight_server/tasks/task_error.py +60 -0
- gooddata_flight_server/tasks/task_executor.py +96 -0
- gooddata_flight_server/tasks/task_result.py +363 -0
- gooddata_flight_server/tasks/temporal_container.py +247 -0
- gooddata_flight_server/tasks/thread_task_executor.py +639 -0
- gooddata_flight_server/utils/__init__.py +1 -0
- gooddata_flight_server/utils/libc_utils.py +35 -0
- gooddata_flight_server/utils/logging.py +158 -0
- gooddata_flight_server/utils/methods_discovery.py +98 -0
- gooddata_flight_server/utils/otel_tracing.py +142 -0
- gooddata_flight_server-1.34.1.dev1.data/scripts/gooddata-flight-server +10 -0
- gooddata_flight_server-1.34.1.dev1.dist-info/LICENSE.txt +7 -0
- gooddata_flight_server-1.34.1.dev1.dist-info/METADATA +749 -0
- gooddata_flight_server-1.34.1.dev1.dist-info/RECORD +49 -0
- gooddata_flight_server-1.34.1.dev1.dist-info/WHEEL +5 -0
- gooddata_flight_server-1.34.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# (C) 2024 GoodData Corporation
|
|
2
|
+
import abc
|
|
3
|
+
import threading
|
|
4
|
+
from collections.abc import Generator, Iterable
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Callable, Optional, Union, final
|
|
7
|
+
|
|
8
|
+
import pyarrow.flight
|
|
9
|
+
import structlog
|
|
10
|
+
from readerwriterlock import rwlock
|
|
11
|
+
from typing_extensions import TypeAlias
|
|
12
|
+
|
|
13
|
+
from gooddata_flight_server.errors.error_code import ErrorCode
|
|
14
|
+
from gooddata_flight_server.errors.error_info import ErrorInfo
|
|
15
|
+
from gooddata_flight_server.tasks.base import ArrowData
|
|
16
|
+
from gooddata_flight_server.tasks.task_error import TaskError
|
|
17
|
+
|
|
18
|
+
OnCloseCallback: TypeAlias = Callable[[], None]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FlightDataTaskResult(abc.ABC):
|
|
22
|
+
"""
|
|
23
|
+
This class represents a result of a task execution which contains some data
|
|
24
|
+
that can be sent out to clients via DoGet.
|
|
25
|
+
|
|
26
|
+
Subclasses should implement methods to get schema of the data, get the actual
|
|
27
|
+
data and perform the cleanup.
|
|
28
|
+
|
|
29
|
+
The class provides essential customization and synchronization mechanisms so
|
|
30
|
+
that subclasses can use it to realize:
|
|
31
|
+
|
|
32
|
+
- results whose data can only be consumed once
|
|
33
|
+
- results whose data that can be consumed repeatedly
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
__slots__ = (
|
|
37
|
+
"_single_use_data",
|
|
38
|
+
"_data_lock",
|
|
39
|
+
"_claim_lock",
|
|
40
|
+
"_claimed",
|
|
41
|
+
"_closed",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def __init__(self, single_use_data: bool = False) -> None:
|
|
45
|
+
self._single_use_data = single_use_data
|
|
46
|
+
self._data_lock: rwlock.RWLockRead = rwlock.RWLockRead()
|
|
47
|
+
|
|
48
|
+
self._claim_lock = threading.Lock()
|
|
49
|
+
self._claimed = False
|
|
50
|
+
self._closed = False
|
|
51
|
+
|
|
52
|
+
def _acquire_reader(self) -> Optional[rwlock.Lockable]:
|
|
53
|
+
rlock = self._data_lock.gen_rlock()
|
|
54
|
+
if not rlock.acquire(blocking=False):
|
|
55
|
+
# lock cannot be acquired -> means write lock is taken -> means data is being closed
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
if self._closed:
|
|
59
|
+
# lock was obtained by the result was closed already
|
|
60
|
+
rlock.release()
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
if self._single_use_data:
|
|
64
|
+
# if the data is single-use, then the first-reader wins the
|
|
65
|
+
# claim and all others will fail
|
|
66
|
+
|
|
67
|
+
if not self._claim_lock.acquire(blocking=True):
|
|
68
|
+
# someone else is already claiming the result -> this reader lost
|
|
69
|
+
rlock.release()
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
if self._claimed:
|
|
73
|
+
# someone else has already claimed the result -> this reader lost
|
|
74
|
+
self._claim_lock.release()
|
|
75
|
+
rlock.release()
|
|
76
|
+
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# this reader has won the claim
|
|
80
|
+
self._claimed = True
|
|
81
|
+
self._claim_lock.release()
|
|
82
|
+
|
|
83
|
+
return rlock
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def single_use_data(self) -> bool:
|
|
87
|
+
"""
|
|
88
|
+
Indicates whether the data contained in this result can only be used / consumed once.
|
|
89
|
+
|
|
90
|
+
In this type of results, the first caller of `acquire_data` wins and can read the data. Everyone
|
|
91
|
+
else coming later will fail and will not be able to read.
|
|
92
|
+
|
|
93
|
+
:return: true if single use, false if not
|
|
94
|
+
"""
|
|
95
|
+
return self._single_use_data
|
|
96
|
+
|
|
97
|
+
@abc.abstractmethod
|
|
98
|
+
def get_schema(self) -> pyarrow.Schema:
|
|
99
|
+
"""
|
|
100
|
+
Gets schema of the result.
|
|
101
|
+
|
|
102
|
+
:return: Arrow schema
|
|
103
|
+
"""
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
@abc.abstractmethod
|
|
107
|
+
def _get_data(self) -> Union[Iterable[ArrowData], ArrowData]:
|
|
108
|
+
"""
|
|
109
|
+
Gets the data. By default, the method is supposed to return the same data upon
|
|
110
|
+
repeated calls. If the subclass generates result data that can only be consumed
|
|
111
|
+
once, then it must set
|
|
112
|
+
|
|
113
|
+
:return: a single record batch, tables, RecordBatchReaders or iterable therefor
|
|
114
|
+
(iterable may contain mixed types)
|
|
115
|
+
"""
|
|
116
|
+
raise NotImplementedError
|
|
117
|
+
|
|
118
|
+
@abc.abstractmethod
|
|
119
|
+
def _close(self) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Implement this method to close / cleanup any resources tied to this result.
|
|
122
|
+
|
|
123
|
+
Note: this method is protected from repeated calls. It is guaranteed it will
|
|
124
|
+
be called exactly once.
|
|
125
|
+
|
|
126
|
+
:return: nothing
|
|
127
|
+
"""
|
|
128
|
+
raise NotImplementedError
|
|
129
|
+
|
|
130
|
+
@final
|
|
131
|
+
def acquire_data(
|
|
132
|
+
self,
|
|
133
|
+
) -> tuple[rwlock.Lockable, Union[Iterable[ArrowData], ArrowData]]:
|
|
134
|
+
"""
|
|
135
|
+
Acquires this result's data. This method will first ensure that the data is
|
|
136
|
+
still available for reading:
|
|
137
|
+
|
|
138
|
+
1. result was not closed
|
|
139
|
+
2. if the result has single-use data, then the current thread is the first
|
|
140
|
+
to try and consume it
|
|
141
|
+
|
|
142
|
+
If these checks succeed, then it returns a tuple of:
|
|
143
|
+
|
|
144
|
+
- read lock that is guarding the result from being closed
|
|
145
|
+
- the data itself
|
|
146
|
+
|
|
147
|
+
The data can be the following:
|
|
148
|
+
|
|
149
|
+
- Arrow RecordBatch, Table or RecordBatchReader
|
|
150
|
+
- Iterable (e.g. generator) of thereof; the iterated elements may be heterogeneous
|
|
151
|
+
|
|
152
|
+
IMPORTANT: the caller who acquires the data MUST release the returned read lock after
|
|
153
|
+
it is done with the data.
|
|
154
|
+
|
|
155
|
+
:return: tuple of (acquired lock, data)
|
|
156
|
+
:raises: pyarrow.flight.FlightError: when this result's data was single-use and was already consumed;
|
|
157
|
+
the error contains ErrorCode COMMAND_RESULT_CONSUMED.
|
|
158
|
+
"""
|
|
159
|
+
rlock = self._acquire_reader()
|
|
160
|
+
if rlock is None:
|
|
161
|
+
raise ErrorInfo.for_reason(
|
|
162
|
+
ErrorCode.COMMAND_RESULT_CONSUMED,
|
|
163
|
+
"Result data was already consumed or closed.",
|
|
164
|
+
).to_server_error()
|
|
165
|
+
|
|
166
|
+
return rlock, self._get_data()
|
|
167
|
+
|
|
168
|
+
@final
|
|
169
|
+
def close(self) -> None:
|
|
170
|
+
with self._data_lock.gen_wlock():
|
|
171
|
+
if self._closed:
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
self._closed = True
|
|
175
|
+
|
|
176
|
+
# this is intentionally done without holding the lock
|
|
177
|
+
# mainly to prevent deadlocks in more complex scenarios where
|
|
178
|
+
# result is 'part of something else' and close() can
|
|
179
|
+
# be called from multiple places and possibly lead
|
|
180
|
+
# to recursion
|
|
181
|
+
self._close()
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def for_table(table: pyarrow.Table, on_close: Optional[OnCloseCallback] = None) -> "FlightDataTaskResult":
|
|
185
|
+
"""
|
|
186
|
+
Factory to create result for an Arrow table. This result allows for repeated
|
|
187
|
+
reads.
|
|
188
|
+
|
|
189
|
+
:param table: table with result's data
|
|
190
|
+
:param on_close: optionally provide a callback function that will be
|
|
191
|
+
invoked when the result is closed; you may find this useful if your service
|
|
192
|
+
needs to do additional cleanup / release resources bound with the result
|
|
193
|
+
:return: a new instance of result
|
|
194
|
+
"""
|
|
195
|
+
return _TableTaskResult(table, on_close=on_close)
|
|
196
|
+
|
|
197
|
+
@staticmethod
|
|
198
|
+
def for_reader(
|
|
199
|
+
reader: pyarrow.RecordBatchReader, on_close: Optional[OnCloseCallback] = None
|
|
200
|
+
) -> "FlightDataTaskResult":
|
|
201
|
+
"""
|
|
202
|
+
Factory to create result for an RecordBatchReader. The created result will
|
|
203
|
+
be 'single use' - the data from the reader can only be consumed once. After that,
|
|
204
|
+
the result will be closed. Useful when your service creates streams of Arrow data.
|
|
205
|
+
|
|
206
|
+
:param reader: reader result's data
|
|
207
|
+
:param on_close: optionally provide a callback function that will be
|
|
208
|
+
invoked when the result is closed; you may find this useful if your service
|
|
209
|
+
needs to do additional cleanup / release resources bound with the result
|
|
210
|
+
:return: a new instance of result
|
|
211
|
+
"""
|
|
212
|
+
return _ReaderTaskResult(reader, on_close=on_close)
|
|
213
|
+
|
|
214
|
+
@staticmethod
|
|
215
|
+
def for_data(data: ArrowData, on_close: Optional[OnCloseCallback] = None) -> "FlightDataTaskResult":
|
|
216
|
+
"""
|
|
217
|
+
Convenience factory function to create result from either Arrow Table or RecordBatchReader.
|
|
218
|
+
|
|
219
|
+
See `for_table` and `for_reader` for further detail.
|
|
220
|
+
|
|
221
|
+
:param data: either Arrow Table or RecordBatchReader
|
|
222
|
+
:param on_close: optionally provide a callback function that will be
|
|
223
|
+
invoked when the result is closed; you may find this useful if your service
|
|
224
|
+
needs to do additional cleanup / release resources bound with the result
|
|
225
|
+
:return: a new instance of result
|
|
226
|
+
"""
|
|
227
|
+
if isinstance(data, pyarrow.Table):
|
|
228
|
+
return FlightDataTaskResult.for_table(data, on_close=on_close)
|
|
229
|
+
elif isinstance(data, pyarrow.RecordBatchReader):
|
|
230
|
+
return FlightDataTaskResult.for_reader(data, on_close=on_close)
|
|
231
|
+
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"Unexpected type of 'data': {type(data).__name__}. Expected Arrow Table or RecordBatchReader."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@dataclass
|
|
238
|
+
class ListFlightsTaskResult:
|
|
239
|
+
"""
|
|
240
|
+
This class represents a result of a task that listed available flights. The flight
|
|
241
|
+
infos are materialized.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
flight_infos: tuple[pyarrow.flight.FlightInfo, ...]
|
|
245
|
+
|
|
246
|
+
def as_generator(self) -> Generator[pyarrow.flight.FlightInfo, None, None]:
|
|
247
|
+
yield from self.flight_infos
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
TaskResult: TypeAlias = Union[FlightDataTaskResult, ListFlightsTaskResult]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class TaskExecutionResult:
|
|
254
|
+
"""
|
|
255
|
+
Represents result of particular task execution. This indicates to the caller
|
|
256
|
+
whether the task finished successfully or not or whether it was cancelled.
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
__slots__ = ("_task_id", "_cmd", "_result", "_cancelled", "_error")
|
|
260
|
+
|
|
261
|
+
def __init__(
|
|
262
|
+
self,
|
|
263
|
+
task_id: str,
|
|
264
|
+
cmd: bytes,
|
|
265
|
+
result: Optional[TaskResult],
|
|
266
|
+
cancelled: bool,
|
|
267
|
+
error: Optional[TaskError],
|
|
268
|
+
):
|
|
269
|
+
self._task_id = task_id
|
|
270
|
+
self._cmd = cmd
|
|
271
|
+
self._result = result
|
|
272
|
+
self._cancelled = cancelled
|
|
273
|
+
self._error = error
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def task_id(self) -> str:
|
|
277
|
+
"""
|
|
278
|
+
:return: Task id to which the result pertains.
|
|
279
|
+
"""
|
|
280
|
+
return self._task_id
|
|
281
|
+
|
|
282
|
+
@property
|
|
283
|
+
def cmd(self) -> bytes:
|
|
284
|
+
"""
|
|
285
|
+
:return: command from Flight descriptor which resulted in the creation of the task that
|
|
286
|
+
created this result
|
|
287
|
+
"""
|
|
288
|
+
return self._cmd
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def result(self) -> Optional[TaskResult]:
|
|
292
|
+
"""
|
|
293
|
+
:return: result of task's successful execution; None if the task failed or was cancelled
|
|
294
|
+
"""
|
|
295
|
+
return self._result
|
|
296
|
+
|
|
297
|
+
@property
|
|
298
|
+
def cancelled(self) -> bool:
|
|
299
|
+
"""
|
|
300
|
+
:return: indicates whether the task was cancelled; True if cancelled; False if not
|
|
301
|
+
"""
|
|
302
|
+
return self._cancelled
|
|
303
|
+
|
|
304
|
+
@property
|
|
305
|
+
def error(self) -> Optional[TaskError]:
|
|
306
|
+
"""
|
|
307
|
+
:return: error that caused the task to fail; None if the task has not failed
|
|
308
|
+
"""
|
|
309
|
+
return self._error
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
_LOGGER = structlog.get_logger("gooddata_flight_server.task_executor")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class _TableTaskResult(FlightDataTaskResult):
|
|
316
|
+
def __init__(self, table: pyarrow.Table, on_close: Optional[OnCloseCallback] = None) -> None:
|
|
317
|
+
super().__init__(single_use_data=False)
|
|
318
|
+
|
|
319
|
+
self._table: pyarrow.Table = table
|
|
320
|
+
self._on_close = on_close
|
|
321
|
+
|
|
322
|
+
def get_schema(self) -> pyarrow.Schema:
|
|
323
|
+
return self._table.schema
|
|
324
|
+
|
|
325
|
+
def _get_data(self) -> Union[Iterable[ArrowData], ArrowData]:
|
|
326
|
+
return self._table
|
|
327
|
+
|
|
328
|
+
def _close(self) -> None:
|
|
329
|
+
del self._table
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
if self._on_close is not None:
|
|
333
|
+
self._on_close()
|
|
334
|
+
except Exception:
|
|
335
|
+
_LOGGER.warning("reader_on_close_failed", exc_info=True)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class _ReaderTaskResult(FlightDataTaskResult):
|
|
339
|
+
def __init__(self, reader: pyarrow.RecordBatchReader, on_close: Optional[OnCloseCallback] = None) -> None:
|
|
340
|
+
super().__init__(single_use_data=True)
|
|
341
|
+
|
|
342
|
+
self._reader = reader
|
|
343
|
+
self._on_close = on_close
|
|
344
|
+
|
|
345
|
+
def get_schema(self) -> pyarrow.Schema:
|
|
346
|
+
return self._reader.schema
|
|
347
|
+
|
|
348
|
+
def _get_data(self) -> Union[Iterable[ArrowData], ArrowData]:
|
|
349
|
+
return self._reader
|
|
350
|
+
|
|
351
|
+
def _close(self) -> None:
|
|
352
|
+
try:
|
|
353
|
+
self._reader.close()
|
|
354
|
+
except Exception:
|
|
355
|
+
_LOGGER.warning("reader_close_failed", exc_info=True)
|
|
356
|
+
finally:
|
|
357
|
+
self._reader = None
|
|
358
|
+
|
|
359
|
+
try:
|
|
360
|
+
if self._on_close is not None:
|
|
361
|
+
self._on_close()
|
|
362
|
+
except Exception:
|
|
363
|
+
_LOGGER.warning("reader_on_close_failed", exc_info=True)
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# (C) 2024 GoodData Corporation
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Callable, Generic, Optional, TypeVar
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from readerwriterlock import rwlock
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class _Entry(Generic[T]):
|
|
16
|
+
value: T
|
|
17
|
+
added: float
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TemporalContainer(Generic[T]):
|
|
21
|
+
"""
|
|
22
|
+
Temporal container holds entries for a configured amount of time and then evicts
|
|
23
|
+
them from the container. At the time of eviction, the container will dispatch
|
|
24
|
+
the evicted entry to a callback function where cleanup of the entry can be done.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
logger_name: str,
|
|
30
|
+
entry_evict_fun: Callable[[T], Any],
|
|
31
|
+
grace_period: float = 10,
|
|
32
|
+
collector_cycle_time: float = 1,
|
|
33
|
+
start_collector: bool = True,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Create a new temporal container.
|
|
37
|
+
|
|
38
|
+
:param logger_name: specify logger name where the container should emit logs
|
|
39
|
+
:param entry_evict_fun: specify function to call when the entries are removed via expiration or when
|
|
40
|
+
they are manually evicted using `evict_entry` call. This function should not block. Blocking evictions
|
|
41
|
+
need to be done in a separate thread pool. Note: this method is called in fire-and-forget mode - the
|
|
42
|
+
container does not rely on results of the eviction.
|
|
43
|
+
:param grace_period: duration, in fractions of seconds, for which the entry will stay in the container
|
|
44
|
+
:param collector_cycle_time: number of seconds to sleep the collector thread between collection cycles
|
|
45
|
+
:param start_collector: whether to automatically start the collector
|
|
46
|
+
"""
|
|
47
|
+
self._logger = structlog.get_logger(logger_name)
|
|
48
|
+
self._entry_evict_fun = entry_evict_fun
|
|
49
|
+
self._grace_period = grace_period
|
|
50
|
+
self._collector_cycle_time = collector_cycle_time
|
|
51
|
+
|
|
52
|
+
self._entries_rwlock: rwlock.RWLockRead = rwlock.RWLockRead()
|
|
53
|
+
self._entries: dict[str, _Entry[T]] = {}
|
|
54
|
+
|
|
55
|
+
self._thread: threading.Thread = threading.Thread(daemon=True, target=self._collector)
|
|
56
|
+
self._closed = False
|
|
57
|
+
|
|
58
|
+
if start_collector:
|
|
59
|
+
self._thread.start()
|
|
60
|
+
|
|
61
|
+
def _collector(self) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Collects results that have exceeded the grace period. The code takes advantage
|
|
64
|
+
of the natural ordering of entries in the dict. When iterating dict, older entries are
|
|
65
|
+
always before newer entries.
|
|
66
|
+
|
|
67
|
+
So the code first goes read-only through the dict of entries, finds those that are
|
|
68
|
+
expired and halts as soon as first non-expired entry is found. It then removes
|
|
69
|
+
the entries and finally after all removals are done calls the eviction function.
|
|
70
|
+
"""
|
|
71
|
+
self._logger.debug(
|
|
72
|
+
"collector_started",
|
|
73
|
+
grace=self._grace_period,
|
|
74
|
+
cycle=self._collector_cycle_time,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
while not self._closed:
|
|
78
|
+
time.sleep(self._collector_cycle_time)
|
|
79
|
+
now = time.time()
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
self._evict_expired_items(now)
|
|
83
|
+
except Exception:
|
|
84
|
+
# log and ignore
|
|
85
|
+
self._logger.error("temporal_entries_evict_failed", exc_info=True)
|
|
86
|
+
|
|
87
|
+
def _evict_expired_items(self, now: float) -> list[T]:
|
|
88
|
+
to_remove: list[str] = []
|
|
89
|
+
|
|
90
|
+
# first get list of entries to remove in this cycle
|
|
91
|
+
# the dict items are naturally ordered so that older entries
|
|
92
|
+
# come before newer entries
|
|
93
|
+
#
|
|
94
|
+
# thus the identification of entries to remove can break
|
|
95
|
+
# as soon as an item that expires in the future is encountered
|
|
96
|
+
with self._entries_rwlock.gen_rlock():
|
|
97
|
+
for entry_id, entry in self._entries.items():
|
|
98
|
+
expires_at = entry.added + self._grace_period
|
|
99
|
+
|
|
100
|
+
if expires_at <= now:
|
|
101
|
+
to_remove.append(entry_id)
|
|
102
|
+
else:
|
|
103
|
+
break
|
|
104
|
+
|
|
105
|
+
if not len(to_remove):
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
# now go ahead and remove all entries that were identified
|
|
109
|
+
# previously. mind that can happen that a previously identified
|
|
110
|
+
# entry could be removed manually in the meanwhile
|
|
111
|
+
to_evict: list[T] = []
|
|
112
|
+
with self._entries_rwlock.gen_wlock():
|
|
113
|
+
self._logger.debug(
|
|
114
|
+
"temporal_entries_remove",
|
|
115
|
+
collect=len(to_remove),
|
|
116
|
+
out_of=len(self._entries),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
for entry_id in to_remove:
|
|
120
|
+
entry_to_evict = self._entries.pop(entry_id, None)
|
|
121
|
+
if entry_to_evict is None:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
to_evict.append(entry_to_evict.value)
|
|
125
|
+
|
|
126
|
+
# finally, make the eviction calls
|
|
127
|
+
#
|
|
128
|
+
# this is intentionally done outside the critical section
|
|
129
|
+
for value in to_evict:
|
|
130
|
+
if not self._safe_evict(value):
|
|
131
|
+
self._logger.error("temporal_entries_evict_failed", value=value)
|
|
132
|
+
|
|
133
|
+
return to_evict
|
|
134
|
+
|
|
135
|
+
def _safe_evict(self, value: T) -> bool:
|
|
136
|
+
try:
|
|
137
|
+
self._entry_evict_fun(value)
|
|
138
|
+
return True
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def entry_timeout(self) -> float:
|
|
144
|
+
"""
|
|
145
|
+
:return: timeout for entries; entry is guaranteed to be removed from the container
|
|
146
|
+
after this time
|
|
147
|
+
"""
|
|
148
|
+
return self._grace_period + self._collector_cycle_time + 1
|
|
149
|
+
|
|
150
|
+
def start_collector(self) -> None:
|
|
151
|
+
assert not self._closed
|
|
152
|
+
|
|
153
|
+
if self._thread.is_alive():
|
|
154
|
+
self._thread.start()
|
|
155
|
+
|
|
156
|
+
def get_entry(self, entry_id: str) -> Optional[T]:
|
|
157
|
+
"""
|
|
158
|
+
:param entry_id: entry identifier
|
|
159
|
+
:return: entry or None if not found
|
|
160
|
+
"""
|
|
161
|
+
assert not self._closed
|
|
162
|
+
|
|
163
|
+
with self._entries_rwlock.gen_rlock():
|
|
164
|
+
entry = self._entries.get(entry_id)
|
|
165
|
+
if entry is None:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
return entry.value
|
|
169
|
+
|
|
170
|
+
def evict_entry(self, entry_id: str) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Explicitly evicts an entry from the container. This will drop the entry and call
|
|
173
|
+
the eviction function.
|
|
174
|
+
|
|
175
|
+
:param entry_id: entry to evict
|
|
176
|
+
:return: true if entry existed and was evicted, false if entry did not exist
|
|
177
|
+
"""
|
|
178
|
+
assert not self._closed
|
|
179
|
+
|
|
180
|
+
with self._entries_rwlock.gen_wlock():
|
|
181
|
+
entry = self._entries.pop(entry_id, None)
|
|
182
|
+
if entry is None:
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
self._entry_evict_fun(entry.value)
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
def pop_entry(self, entry_id: str) -> Optional[T]:
|
|
189
|
+
"""
|
|
190
|
+
Pops an entry out of the container. This will take the entry out and will not
|
|
191
|
+
run the eviction function.
|
|
192
|
+
|
|
193
|
+
:param entry_id: id of entry to pop
|
|
194
|
+
:return: None if no entry with the provided id
|
|
195
|
+
"""
|
|
196
|
+
assert not self._closed
|
|
197
|
+
|
|
198
|
+
with self._entries_rwlock.gen_wlock():
|
|
199
|
+
entry = self._entries.pop(entry_id, None)
|
|
200
|
+
if entry is None:
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
return entry.value
|
|
204
|
+
|
|
205
|
+
def close(self) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Closes the container. This will evict all entries that are currently in the container
|
|
208
|
+
and halt (eventually) the internal collector thread.
|
|
209
|
+
|
|
210
|
+
:return: nothing
|
|
211
|
+
"""
|
|
212
|
+
self._closed = True
|
|
213
|
+
with self._entries_rwlock.gen_wlock():
|
|
214
|
+
snapshot = self._entries
|
|
215
|
+
self._entries = {}
|
|
216
|
+
|
|
217
|
+
for entry in snapshot.values():
|
|
218
|
+
self._entry_evict_fun(entry.value)
|
|
219
|
+
|
|
220
|
+
def __iter__(self) -> Iterator[T]:
|
|
221
|
+
assert not self._closed
|
|
222
|
+
|
|
223
|
+
with self._entries_rwlock.gen_rlock():
|
|
224
|
+
value_copy = tuple(entry.value for entry in self._entries.values())
|
|
225
|
+
|
|
226
|
+
return value_copy.__iter__()
|
|
227
|
+
|
|
228
|
+
def _add_entry(self, key: str, value: T, now: float) -> None:
|
|
229
|
+
"""
|
|
230
|
+
Adds entry to the container.
|
|
231
|
+
|
|
232
|
+
Note: this method exists to improve testability - so that tests can
|
|
233
|
+
simply 'emulate' time moving forward. It is not intended to be used
|
|
234
|
+
outside the test code: bad use can mess the guarantees provided
|
|
235
|
+
by the container.
|
|
236
|
+
"""
|
|
237
|
+
assert not self._closed
|
|
238
|
+
|
|
239
|
+
with self._entries_rwlock.gen_wlock():
|
|
240
|
+
self._entries[key] = _Entry(value, now)
|
|
241
|
+
|
|
242
|
+
def __setitem__(self, key: str, value: T) -> None:
|
|
243
|
+
self._add_entry(key, value, time.time())
|
|
244
|
+
|
|
245
|
+
def __len__(self) -> int:
|
|
246
|
+
with self._entries_rwlock.gen_rlock():
|
|
247
|
+
return len(self._entries)
|