slipstream-async 0.0.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- slipstream_async-0.0.0a0/PKG-INFO +121 -0
- slipstream_async-0.0.0a0/README.md +74 -0
- slipstream_async-0.0.0a0/pyproject.toml +101 -0
- slipstream_async-0.0.0a0/setup.cfg +4 -0
- slipstream_async-0.0.0a0/slipstream/__init__.py +18 -0
- slipstream_async-0.0.0a0/slipstream/__version__.py +5 -0
- slipstream_async-0.0.0a0/slipstream/caching.py +308 -0
- slipstream_async-0.0.0a0/slipstream/codecs.py +34 -0
- slipstream_async-0.0.0a0/slipstream/core.py +340 -0
- slipstream_async-0.0.0a0/slipstream/utils.py +42 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/PKG-INFO +121 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/SOURCES.txt +16 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/dependency_links.txt +1 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/entry_points.txt +2 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/requires.txt +29 -0
- slipstream_async-0.0.0a0/slipstream_async.egg-info/top_level.txt +1 -0
- slipstream_async-0.0.0a0/tests/test__init__.py +67 -0
- slipstream_async-0.0.0a0/tests/test_cache.py +68 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: slipstream-async
|
|
3
|
+
Version: 0.0.0a0
|
|
4
|
+
Summary: Streamline your stream processing.
|
|
5
|
+
Author-email: Menziess <stefan_schenk@hotmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: repository, https://github.com/Menziess/slipstream
|
|
8
|
+
Project-URL: documentation, https://slipstream.readthedocs.io
|
|
9
|
+
Keywords: kafka,pubsub
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: uvloop
|
|
24
|
+
Provides-Extra: kafka
|
|
25
|
+
Requires-Dist: aiokafka; extra == "kafka"
|
|
26
|
+
Provides-Extra: cache
|
|
27
|
+
Requires-Dist: rocksdict; extra == "cache"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pydocstyle; extra == "dev"
|
|
30
|
+
Requires-Dist: autopep8; extra == "dev"
|
|
31
|
+
Requires-Dist: pyright; extra == "dev"
|
|
32
|
+
Requires-Dist: flake8; extra == "dev"
|
|
33
|
+
Requires-Dist: bandit; extra == "dev"
|
|
34
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
35
|
+
Provides-Extra: test
|
|
36
|
+
Requires-Dist: pytest; extra == "test"
|
|
37
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-forked; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-asyncio; extra == "test"
|
|
41
|
+
Requires-Dist: testcontainers[kafka]; extra == "test"
|
|
42
|
+
Provides-Extra: docs
|
|
43
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
44
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
45
|
+
Requires-Dist: sphinx-autoapi; extra == "docs"
|
|
46
|
+
Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
47
|
+
|
|
48
|
+
[](https://github.com/Menziess/slipstream/actions/workflows/python-test.yml) [](https://slipstream.readthedocs.io/en/latest/?badge=latest) [](https://pepy.tech/project/slipstream)
|
|
49
|
+
|
|
50
|
+
# Slipstream
|
|
51
|
+
|
|
52
|
+
<img src="./res/logo.png" width="25%" height="25%" align="right" />
|
|
53
|
+
|
|
54
|
+
Slipstream provides a data-flow model to simplify development of stateful streaming applications.
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
pip install slipstream
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
```py
|
|
61
|
+
from asyncio import run
|
|
62
|
+
|
|
63
|
+
from slipstream import handle, stream
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def messages():
|
|
67
|
+
for emoji in '🏆📞🐟👌':
|
|
68
|
+
yield emoji
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@handle(messages(), sink=[print])
|
|
72
|
+
def handle_message(msg):
|
|
73
|
+
yield f'Hello {msg}!'
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == '__main__':
|
|
77
|
+
run(stream())
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```sh
|
|
81
|
+
Hello 🏆!
|
|
82
|
+
Hello 📞!
|
|
83
|
+
Hello 🐟!
|
|
84
|
+
Hello 👌!
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Usage
|
|
88
|
+
|
|
89
|
+
Async `iterables` are sources, (async) `callables` are sinks.
|
|
90
|
+
|
|
91
|
+
Decorate handler functions using `handle`, then run `stream` to start processing:
|
|
92
|
+
|
|
93
|
+

|
|
94
|
+
|
|
95
|
+
Multiple sources and sinks can be provided to establish many-to-many relations between them.
|
|
96
|
+
The 4 emoji's were printed using the callable `print`.
|
|
97
|
+
|
|
98
|
+
## Quickstart
|
|
99
|
+
|
|
100
|
+
Install `aiokafka` (latest) along with slipstream:
|
|
101
|
+
|
|
102
|
+
```sh
|
|
103
|
+
pip install slipstream[kafka]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Spin up a local Kafka broker with [docker-compose.yml](docker-compose.yml), using `localhost:29091` to connect:
|
|
107
|
+
|
|
108
|
+
```sh
|
|
109
|
+
docker compose up broker -d
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Follow the docs and set up a Kafka connection: [slipstream.readthedocs.io](https://slipstream.readthedocs.io).
|
|
113
|
+
|
|
114
|
+
## Features
|
|
115
|
+
|
|
116
|
+
- [`slipstream.handle`](slipstream/__init__.py): bind streams (iterables) and sinks (callables) to user defined handler functions
|
|
117
|
+
- [`slipstream.stream`](slipstream/__init__.py): start streaming
|
|
118
|
+
- [`slipstream.Topic`](slipstream/core.py): consume from (iterable), and produce to (callable) kafka using [**aiokafka**](https://aiokafka.readthedocs.io/en/stable/index.html)
|
|
119
|
+
- [`slipstream.Cache`](slipstream/caching.py): store data to disk using [**rocksdict**](https://congyuwang.github.io/RocksDict/rocksdict.html)
|
|
120
|
+
- [`slipstream.Conf`](slipstream/core.py): set global kafka configuration (can be overridden per topic)
|
|
121
|
+
- [`slipstream.codecs.JsonCodec`](slipstream/codecs.py): serialize and deserialize json messages
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
[](https://github.com/Menziess/slipstream/actions/workflows/python-test.yml) [](https://slipstream.readthedocs.io/en/latest/?badge=latest) [](https://pepy.tech/project/slipstream)
|
|
2
|
+
|
|
3
|
+
# Slipstream
|
|
4
|
+
|
|
5
|
+
<img src="./res/logo.png" width="25%" height="25%" align="right" />
|
|
6
|
+
|
|
7
|
+
Slipstream provides a data-flow model to simplify development of stateful streaming applications.
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
pip install slipstream
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
```py
|
|
14
|
+
from asyncio import run
|
|
15
|
+
|
|
16
|
+
from slipstream import handle, stream
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def messages():
|
|
20
|
+
for emoji in '🏆📞🐟👌':
|
|
21
|
+
yield emoji
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@handle(messages(), sink=[print])
|
|
25
|
+
def handle_message(msg):
|
|
26
|
+
yield f'Hello {msg}!'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == '__main__':
|
|
30
|
+
run(stream())
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
Hello 🏆!
|
|
35
|
+
Hello 📞!
|
|
36
|
+
Hello 🐟!
|
|
37
|
+
Hello 👌!
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
Async `iterables` are sources, (async) `callables` are sinks.
|
|
43
|
+
|
|
44
|
+
Decorate handler functions using `handle`, then run `stream` to start processing:
|
|
45
|
+
|
|
46
|
+

|
|
47
|
+
|
|
48
|
+
Multiple sources and sinks can be provided to establish many-to-many relations between them.
|
|
49
|
+
The 4 emoji's were printed using the callable `print`.
|
|
50
|
+
|
|
51
|
+
## Quickstart
|
|
52
|
+
|
|
53
|
+
Install `aiokafka` (latest) along with slipstream:
|
|
54
|
+
|
|
55
|
+
```sh
|
|
56
|
+
pip install slipstream[kafka]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Spin up a local Kafka broker with [docker-compose.yml](docker-compose.yml), using `localhost:29091` to connect:
|
|
60
|
+
|
|
61
|
+
```sh
|
|
62
|
+
docker compose up broker -d
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Follow the docs and set up a Kafka connection: [slipstream.readthedocs.io](https://slipstream.readthedocs.io).
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
- [`slipstream.handle`](slipstream/__init__.py): bind streams (iterables) and sinks (callables) to user defined handler functions
|
|
70
|
+
- [`slipstream.stream`](slipstream/__init__.py): start streaming
|
|
71
|
+
- [`slipstream.Topic`](slipstream/core.py): consume from (iterable), and produce to (callable) kafka using [**aiokafka**](https://aiokafka.readthedocs.io/en/stable/index.html)
|
|
72
|
+
- [`slipstream.Cache`](slipstream/caching.py): store data to disk using [**rocksdict**](https://congyuwang.github.io/RocksDict/rocksdict.html)
|
|
73
|
+
- [`slipstream.Conf`](slipstream/core.py): set global kafka configuration (can be overridden per topic)
|
|
74
|
+
- [`slipstream.codecs.JsonCodec`](slipstream/codecs.py): serialize and deserialize json messages
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "slipstream-async"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "Streamline your stream processing."
|
|
5
|
+
authors = [{ name = "Menziess", email = "stefan_schenk@hotmail.com" }]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
keywords = ["kafka", "pubsub"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Intended Audience :: Developers",
|
|
11
|
+
"Intended Audience :: Science/Research",
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3.8",
|
|
14
|
+
"Programming Language :: Python :: 3.9",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Topic :: Scientific/Engineering",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
21
|
+
]
|
|
22
|
+
requires-python = ">=3.10"
|
|
23
|
+
dependencies = [
|
|
24
|
+
"uvloop",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
kafka = [
|
|
29
|
+
"aiokafka",
|
|
30
|
+
]
|
|
31
|
+
cache = [
|
|
32
|
+
"rocksdict",
|
|
33
|
+
]
|
|
34
|
+
dev = [
|
|
35
|
+
"pydocstyle",
|
|
36
|
+
"autopep8",
|
|
37
|
+
"pyright",
|
|
38
|
+
"flake8",
|
|
39
|
+
"bandit",
|
|
40
|
+
"pre-commit",
|
|
41
|
+
]
|
|
42
|
+
test = [
|
|
43
|
+
"pytest",
|
|
44
|
+
"pytest-cov",
|
|
45
|
+
"pytest-mock",
|
|
46
|
+
"pytest-forked",
|
|
47
|
+
"pytest-asyncio",
|
|
48
|
+
"testcontainers[kafka]",
|
|
49
|
+
]
|
|
50
|
+
docs = [
|
|
51
|
+
"sphinx",
|
|
52
|
+
"sphinx-rtd-theme",
|
|
53
|
+
"sphinx-autoapi",
|
|
54
|
+
"sphinx-autobuild",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[project.urls]
|
|
58
|
+
repository = "https://github.com/Menziess/slipstream"
|
|
59
|
+
documentation = "https://slipstream.readthedocs.io"
|
|
60
|
+
|
|
61
|
+
[project.scripts]
|
|
62
|
+
slipstream = "slipstream.__main__:main"
|
|
63
|
+
|
|
64
|
+
[tool.setuptools.dynamic]
|
|
65
|
+
version = {attr = "slipstream.__version__.VERSION"}
|
|
66
|
+
|
|
67
|
+
[tool.setuptools.packages.find]
|
|
68
|
+
where = ["."]
|
|
69
|
+
include = ["slipstream*"]
|
|
70
|
+
|
|
71
|
+
[build-system]
|
|
72
|
+
requires = ["setuptools"]
|
|
73
|
+
build-backend = "setuptools.build_meta"
|
|
74
|
+
|
|
75
|
+
[tool.pyright]
|
|
76
|
+
venvPath = "."
|
|
77
|
+
venv = ".venv"
|
|
78
|
+
include = [
|
|
79
|
+
"slipstream/**/*.py",
|
|
80
|
+
"tests/**/*.py"
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
[tool.bandit]
|
|
84
|
+
exclude_dirs = ["tests"]
|
|
85
|
+
|
|
86
|
+
[tool.ruff]
|
|
87
|
+
line-length = 79
|
|
88
|
+
|
|
89
|
+
[tool.mypy]
|
|
90
|
+
ignore_missing_imports = true
|
|
91
|
+
|
|
92
|
+
[tool.pytest.ini_options]
|
|
93
|
+
addopts = "--doctest-modules"
|
|
94
|
+
testpaths = [
|
|
95
|
+
"slipstream",
|
|
96
|
+
"tests",
|
|
97
|
+
]
|
|
98
|
+
markers = [
|
|
99
|
+
"serial"
|
|
100
|
+
]
|
|
101
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Top level objects."""
|
|
2
|
+
|
|
3
|
+
import uvloop
|
|
4
|
+
|
|
5
|
+
from slipstream.__version__ import VERSION
|
|
6
|
+
from slipstream.caching import Cache
|
|
7
|
+
from slipstream.core import Conf, Topic, handle, stream
|
|
8
|
+
|
|
9
|
+
uvloop.install()
|
|
10
|
+
|
|
11
|
+
__all__ = (
|
|
12
|
+
'VERSION',
|
|
13
|
+
'Conf',
|
|
14
|
+
'Topic',
|
|
15
|
+
'Cache',
|
|
16
|
+
'handle',
|
|
17
|
+
'stream',
|
|
18
|
+
)
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Slipstream caching."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from threading import RLock
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
from rocksdict import (
|
|
9
|
+
AccessType,
|
|
10
|
+
ColumnFamily,
|
|
11
|
+
CompactOptions,
|
|
12
|
+
DBCompactionStyle,
|
|
13
|
+
DBCompressionType,
|
|
14
|
+
FifoCompactOptions,
|
|
15
|
+
IngestExternalFileOptions,
|
|
16
|
+
Options,
|
|
17
|
+
Rdict,
|
|
18
|
+
RdictIter,
|
|
19
|
+
ReadOptions,
|
|
20
|
+
Snapshot,
|
|
21
|
+
WriteOptions,
|
|
22
|
+
)
|
|
23
|
+
from rocksdict.rocksdict import RdictItems, RdictKeys, RdictValues
|
|
24
|
+
|
|
25
|
+
MB, MINUTES = 1024 * 1024, 60
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Cache:
|
|
29
|
+
"""Create a RocksDB database in the specified folder.
|
|
30
|
+
|
|
31
|
+
>>> cache = Cache('db/mycache') # doctest: +SKIP
|
|
32
|
+
|
|
33
|
+
The cache instance acts as a callable to store data:
|
|
34
|
+
|
|
35
|
+
>>> cache('key', {'msg': 'Hello World!'}) # doctest: +SKIP
|
|
36
|
+
>>> cache['key'] # doctest: +SKIP
|
|
37
|
+
{'msg': 'Hello World!'}
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
path: str,
|
|
43
|
+
options: Union[Options, None] = None,
|
|
44
|
+
column_families: Union[Dict[str, Options], None] = None,
|
|
45
|
+
access_type=AccessType.read_write(),
|
|
46
|
+
target_table_size=25 * MB,
|
|
47
|
+
number_of_locks=16
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Create instance that holds rocksdb reference.
|
|
50
|
+
|
|
51
|
+
This configuration setup optimizes for low disk usage (25mb per table).
|
|
52
|
+
The oldest records may be removed during compaction.
|
|
53
|
+
|
|
54
|
+
https://congyuwang.github.io/RocksDict/rocksdict.html
|
|
55
|
+
"""
|
|
56
|
+
self.name = path
|
|
57
|
+
self._number_of_locks = number_of_locks
|
|
58
|
+
self._locks = [RLock() for _ in range(self._number_of_locks)]
|
|
59
|
+
options = options or self._default_options(target_table_size)
|
|
60
|
+
column_families = column_families or {
|
|
61
|
+
key: options
|
|
62
|
+
for key in Rdict.list_cf(path, options)
|
|
63
|
+
} if os.path.exists(path + '/CURRENT') else {}
|
|
64
|
+
self.db = Rdict(path, options, column_families, access_type)
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def _default_options(target_table_size: int):
|
|
68
|
+
options = Options()
|
|
69
|
+
compaction_options = FifoCompactOptions()
|
|
70
|
+
compaction_options.max_table_files_size = target_table_size
|
|
71
|
+
options.create_if_missing(True)
|
|
72
|
+
options.set_max_background_jobs(os.cpu_count() or 2)
|
|
73
|
+
options.increase_parallelism(os.cpu_count() or 2)
|
|
74
|
+
options.set_log_file_time_to_roll(30 * MINUTES)
|
|
75
|
+
options.set_keep_log_file_num(1)
|
|
76
|
+
options.set_max_log_file_size(int(0.1 * MB))
|
|
77
|
+
options.set_max_manifest_file_size(MB)
|
|
78
|
+
options.set_fifo_compaction_options(compaction_options)
|
|
79
|
+
options.set_compaction_style(DBCompactionStyle.fifo())
|
|
80
|
+
options.set_level_zero_file_num_compaction_trigger(4)
|
|
81
|
+
options.set_level_zero_slowdown_writes_trigger(6)
|
|
82
|
+
options.set_level_zero_stop_writes_trigger(8)
|
|
83
|
+
options.set_max_write_buffer_number(2)
|
|
84
|
+
options.set_write_buffer_size(1 * MB)
|
|
85
|
+
options.set_target_file_size_base(256 * MB)
|
|
86
|
+
options.set_max_bytes_for_level_base(1024 * MB)
|
|
87
|
+
options.set_max_bytes_for_level_multiplier(4.0)
|
|
88
|
+
options.set_compression_type(DBCompressionType.lz4())
|
|
89
|
+
options.set_delete_obsolete_files_period_micros(10 * 1000)
|
|
90
|
+
return options
|
|
91
|
+
|
|
92
|
+
@contextmanager
|
|
93
|
+
def _get_lock(self, key):
|
|
94
|
+
"""Get lock from a pool of locks based on key."""
|
|
95
|
+
index = hash(key) % self._number_of_locks
|
|
96
|
+
with (lock := self._locks[index]):
|
|
97
|
+
yield lock
|
|
98
|
+
|
|
99
|
+
def __call__(self, key, val, *args) -> None:
|
|
100
|
+
"""Call cache to set item."""
|
|
101
|
+
self.__setitem__(key, val)
|
|
102
|
+
|
|
103
|
+
def __contains__(self, key) -> bool:
|
|
104
|
+
"""Key exists in db."""
|
|
105
|
+
return key in self.db
|
|
106
|
+
|
|
107
|
+
def __delitem__(self, key) -> None:
|
|
108
|
+
"""Delete item from db."""
|
|
109
|
+
del self.db[key]
|
|
110
|
+
|
|
111
|
+
def __getitem__(self, key) -> Any:
|
|
112
|
+
"""Get item from db or None."""
|
|
113
|
+
try:
|
|
114
|
+
return self.db[key]
|
|
115
|
+
except KeyError:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
def __setitem__(self, key, val) -> None:
|
|
119
|
+
"""Set item in db."""
|
|
120
|
+
with self._get_lock(key):
|
|
121
|
+
self.db[key] = val
|
|
122
|
+
|
|
123
|
+
def __enter__(self) -> 'Cache':
|
|
124
|
+
"""Contextmanager."""
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
128
|
+
"""Exit contextmanager."""
|
|
129
|
+
self.close()
|
|
130
|
+
|
|
131
|
+
def set_dumps(self, dumps: Callable[[Any], bytes]) -> None:
|
|
132
|
+
"""Set custom dumps function."""
|
|
133
|
+
return self.db.set_dumps(dumps)
|
|
134
|
+
|
|
135
|
+
def set_loads(self, dumps: Callable[[bytes], Any]) -> None:
|
|
136
|
+
"""Set custom loads function."""
|
|
137
|
+
return self.db.set_loads(dumps)
|
|
138
|
+
|
|
139
|
+
def set_read_options(self, read_opt: ReadOptions) -> None:
|
|
140
|
+
"""Set custom read options."""
|
|
141
|
+
return self.db.set_read_options(read_opt)
|
|
142
|
+
|
|
143
|
+
def set_write_options(self, write_opt: WriteOptions) -> None:
|
|
144
|
+
"""Set custom write options."""
|
|
145
|
+
return self.db.set_write_options(write_opt)
|
|
146
|
+
|
|
147
|
+
@contextmanager
|
|
148
|
+
def transaction(self, key) -> Any:
|
|
149
|
+
"""Lock the db entry while using the context manager."""
|
|
150
|
+
with self._get_lock(key):
|
|
151
|
+
yield self
|
|
152
|
+
|
|
153
|
+
def get(
|
|
154
|
+
self,
|
|
155
|
+
key: Union[str, int, float, bytes, bool, List[
|
|
156
|
+
Union[str, int, float, bytes, bool]]],
|
|
157
|
+
default: Any = None,
|
|
158
|
+
read_opt: Union[ReadOptions, None] = None
|
|
159
|
+
) -> Optional[Any]:
|
|
160
|
+
"""Get item from database by key."""
|
|
161
|
+
return self.db.get(key, default, read_opt)
|
|
162
|
+
|
|
163
|
+
def put(
|
|
164
|
+
self,
|
|
165
|
+
key: Union[str, int, float, bytes, bool],
|
|
166
|
+
value: Any,
|
|
167
|
+
write_opt: Union[WriteOptions, None] = None
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Put item in database using key."""
|
|
170
|
+
with self._get_lock(key):
|
|
171
|
+
return self.db.put(key, value, write_opt)
|
|
172
|
+
|
|
173
|
+
def delete(
|
|
174
|
+
self,
|
|
175
|
+
key: Union[str, int, float, bytes, bool],
|
|
176
|
+
write_opt: Union[WriteOptions, None] = None
|
|
177
|
+
) -> None:
|
|
178
|
+
"""Delete item from database."""
|
|
179
|
+
return self.db.delete(key, write_opt)
|
|
180
|
+
|
|
181
|
+
def key_may_exist(
|
|
182
|
+
self,
|
|
183
|
+
key: Union[str, int, float, bytes, bool],
|
|
184
|
+
fetch: bool = False,
|
|
185
|
+
read_opt=None
|
|
186
|
+
) -> Union[bool, Tuple[bool, Any]]:
|
|
187
|
+
"""Check if a key exist without performing IO operations."""
|
|
188
|
+
return self.db.key_may_exist(key, fetch, read_opt)
|
|
189
|
+
|
|
190
|
+
def iter(self, read_opt: Union[ReadOptions, None] = None) -> RdictIter:
|
|
191
|
+
"""Get iterable."""
|
|
192
|
+
return self.db.iter(read_opt)
|
|
193
|
+
|
|
194
|
+
def items(
|
|
195
|
+
self,
|
|
196
|
+
backwards: bool = False,
|
|
197
|
+
from_key: Union[str, int, float, bytes, bool, None] = None,
|
|
198
|
+
read_opt: Union[ReadOptions, None] = None
|
|
199
|
+
) -> RdictItems:
|
|
200
|
+
"""Get tuples of key-value pairs."""
|
|
201
|
+
return self.db.items(backwards, from_key, read_opt)
|
|
202
|
+
|
|
203
|
+
def keys(
|
|
204
|
+
self,
|
|
205
|
+
backwards: bool = False,
|
|
206
|
+
from_key: Union[str, int, float, bytes, bool, None] = None,
|
|
207
|
+
read_opt: Union[ReadOptions, None] = None
|
|
208
|
+
) -> RdictKeys:
|
|
209
|
+
"""Get keys."""
|
|
210
|
+
return self.db.keys(backwards, from_key, read_opt)
|
|
211
|
+
|
|
212
|
+
def values(
|
|
213
|
+
self,
|
|
214
|
+
backwards: bool = False,
|
|
215
|
+
from_key: Union[str, int, float, bytes, bool, None] = None,
|
|
216
|
+
read_opt: Union[ReadOptions, None] = None
|
|
217
|
+
) -> RdictValues:
|
|
218
|
+
"""Get values."""
|
|
219
|
+
return self.db.values(backwards, from_key, read_opt)
|
|
220
|
+
|
|
221
|
+
def ingest_external_file(
|
|
222
|
+
self,
|
|
223
|
+
paths: List[str],
|
|
224
|
+
opts: IngestExternalFileOptions = IngestExternalFileOptions()
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Load list of SST files into current column family."""
|
|
227
|
+
return self.db.ingest_external_file(paths, opts)
|
|
228
|
+
|
|
229
|
+
def get_column_family(self, name: str) -> Rdict:
|
|
230
|
+
"""Get column family by name."""
|
|
231
|
+
return self.db.get_column_family(name)
|
|
232
|
+
|
|
233
|
+
def get_column_family_handle(self, name: str) -> ColumnFamily:
|
|
234
|
+
"""Get column family handle by name."""
|
|
235
|
+
return self.db.get_column_family_handle(name)
|
|
236
|
+
|
|
237
|
+
def drop_column_family(self, name: str) -> None:
|
|
238
|
+
"""Drop column family by name."""
|
|
239
|
+
return self.db.drop_column_family(name)
|
|
240
|
+
|
|
241
|
+
def create_column_family(
|
|
242
|
+
self,
|
|
243
|
+
name: str,
|
|
244
|
+
options: Options = Options()
|
|
245
|
+
) -> Rdict:
|
|
246
|
+
"""Craete column family."""
|
|
247
|
+
return self.db.create_column_family(name, options)
|
|
248
|
+
|
|
249
|
+
def delete_range(
|
|
250
|
+
self,
|
|
251
|
+
begin: Union[str, int, float, bytes, bool],
|
|
252
|
+
end: Union[str, int, float, bytes, bool],
|
|
253
|
+
write_opt: Union[WriteOptions, None] = None
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Delete database items, excluding end."""
|
|
256
|
+
return self.db.delete_range(begin, end, write_opt)
|
|
257
|
+
|
|
258
|
+
def snapshot(self) -> Snapshot:
|
|
259
|
+
"""Create snapshot of current column family."""
|
|
260
|
+
return self.db.snapshot()
|
|
261
|
+
|
|
262
|
+
def path(self) -> str:
|
|
263
|
+
"""Get current database path."""
|
|
264
|
+
return self.db.path()
|
|
265
|
+
|
|
266
|
+
def set_options(self, options: Dict[str, str]) -> None:
|
|
267
|
+
"""Set options for current column family."""
|
|
268
|
+
return self.db.set_options(options)
|
|
269
|
+
|
|
270
|
+
def property_value(self, name: str) -> Union[str, None]:
|
|
271
|
+
"""Get property by name from current column family."""
|
|
272
|
+
return self.db.property_value(name)
|
|
273
|
+
|
|
274
|
+
def property_int_value(self, name: str) -> Union[int, None]:
|
|
275
|
+
"""Get property as int by name from current column family."""
|
|
276
|
+
return self.db.property_int_value(name)
|
|
277
|
+
|
|
278
|
+
def latest_sequence_number(self) -> int:
|
|
279
|
+
"""Get sequence number of the most recent transaction."""
|
|
280
|
+
return self.db.latest_sequence_number()
|
|
281
|
+
|
|
282
|
+
def live_files(self) -> List[Dict[str, Any]]:
|
|
283
|
+
"""Get list of all table files with their level, start- and end key."""
|
|
284
|
+
return self.db.live_files()
|
|
285
|
+
|
|
286
|
+
def compact_range(
|
|
287
|
+
self, begin: Union[str, int, float, bytes, bool, None],
|
|
288
|
+
end: Union[str, int, float, bytes, bool, None],
|
|
289
|
+
compact_opt: CompactOptions = CompactOptions()
|
|
290
|
+
) -> None:
|
|
291
|
+
"""Run manual compaction on range for the current column family."""
|
|
292
|
+
return self.db.compact_range(begin, end, compact_opt)
|
|
293
|
+
|
|
294
|
+
def close(self) -> None:
|
|
295
|
+
"""Flush memory to disk, and drop the current column family."""
|
|
296
|
+
return self.db.close()
|
|
297
|
+
|
|
298
|
+
def flush(self, wait: bool = True) -> None:
|
|
299
|
+
"""Manually flush the current column family."""
|
|
300
|
+
return self.db.flush(wait)
|
|
301
|
+
|
|
302
|
+
def flush_wal(self, sync: bool = True) -> None:
|
|
303
|
+
"""Manually flush the WAL buffer."""
|
|
304
|
+
return self.db.flush_wal(sync)
|
|
305
|
+
|
|
306
|
+
def destroy(self, options: Options = Options()) -> None:
|
|
307
|
+
"""Delete the database."""
|
|
308
|
+
return Rdict.destroy(self.name, options)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Slipstream codecs."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABCMeta, abstractmethod
|
|
5
|
+
from json import dumps, loads
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ICodec(metaclass=ABCMeta):
|
|
12
|
+
"""Base class for codecs."""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def encode(self, obj: Any) -> bytes:
|
|
16
|
+
"""Serialize object."""
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def decode(self, s: bytes) -> object:
|
|
21
|
+
"""Deserialize object."""
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class JsonCodec(ICodec):
|
|
26
|
+
"""Serialize/deserialize json messages."""
|
|
27
|
+
|
|
28
|
+
def encode(self, obj: Any) -> bytes:
|
|
29
|
+
"""Serialize message."""
|
|
30
|
+
return dumps(obj, default=str).encode()
|
|
31
|
+
|
|
32
|
+
def decode(self, s: bytes) -> object:
|
|
33
|
+
"""Deserialize message."""
|
|
34
|
+
return loads(s.decode())
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""Core module."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from asyncio import gather, sleep
|
|
5
|
+
from collections.abc import AsyncIterable
|
|
6
|
+
from inspect import signature
|
|
7
|
+
from re import sub
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
AsyncIterator,
|
|
11
|
+
Awaitable,
|
|
12
|
+
Callable,
|
|
13
|
+
Generator,
|
|
14
|
+
Iterable,
|
|
15
|
+
Optional,
|
|
16
|
+
Union,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from aiokafka import (
|
|
20
|
+
AIOKafkaClient,
|
|
21
|
+
AIOKafkaConsumer,
|
|
22
|
+
AIOKafkaProducer,
|
|
23
|
+
ConsumerRecord,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from slipstream.caching import Cache
|
|
27
|
+
from slipstream.codecs import ICodec
|
|
28
|
+
from slipstream.utils import Singleton, get_params_names, iscoroutinecallable
|
|
29
|
+
|
|
30
|
+
KAFKA_CLASSES_PARAMS = {
|
|
31
|
+
**get_params_names(AIOKafkaConsumer),
|
|
32
|
+
**get_params_names(AIOKafkaProducer),
|
|
33
|
+
**get_params_names(AIOKafkaClient),
|
|
34
|
+
}
|
|
35
|
+
READ_FROM_START = -2
|
|
36
|
+
READ_FROM_END = -1
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Conf(metaclass=Singleton):
|
|
42
|
+
"""Define default kafka configuration, optionally.
|
|
43
|
+
|
|
44
|
+
>>> Conf({'bootstrap_servers': 'localhost:29091'})
|
|
45
|
+
{'bootstrap_servers': 'localhost:29091'}
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
topics: list['Topic'] = []
|
|
49
|
+
iterables: set[tuple[str, AsyncIterable]] = set()
|
|
50
|
+
handlers: dict[str, set[Union[
|
|
51
|
+
Callable[..., Awaitable[None]],
|
|
52
|
+
Callable[..., None]
|
|
53
|
+
]]] = {}
|
|
54
|
+
|
|
55
|
+
def register_topic(self, topic: 'Topic'):
|
|
56
|
+
"""Add topic to global conf."""
|
|
57
|
+
self.topics.append(topic)
|
|
58
|
+
|
|
59
|
+
def register_iterable(
|
|
60
|
+
self,
|
|
61
|
+
key: str,
|
|
62
|
+
it: AsyncIterable
|
|
63
|
+
):
|
|
64
|
+
"""Add iterable to global Conf."""
|
|
65
|
+
self.iterables.add((key, it))
|
|
66
|
+
|
|
67
|
+
def register_handler(
|
|
68
|
+
self,
|
|
69
|
+
key: str,
|
|
70
|
+
handler: Union[
|
|
71
|
+
Callable[..., Awaitable[None]],
|
|
72
|
+
Callable[..., None]
|
|
73
|
+
]
|
|
74
|
+
):
|
|
75
|
+
"""Add handler to global Conf."""
|
|
76
|
+
handlers = self.handlers.get(key, set())
|
|
77
|
+
handlers.add(handler)
|
|
78
|
+
self.handlers[key] = handlers
|
|
79
|
+
|
|
80
|
+
async def _start(self, **kwargs):
|
|
81
|
+
try:
|
|
82
|
+
await gather(*[
|
|
83
|
+
self._distribute_messages(key, it, kwargs)
|
|
84
|
+
for key, it in self.iterables
|
|
85
|
+
])
|
|
86
|
+
except KeyboardInterrupt:
|
|
87
|
+
pass
|
|
88
|
+
finally:
|
|
89
|
+
await self._shutdown()
|
|
90
|
+
|
|
91
|
+
async def _shutdown(self) -> None:
|
|
92
|
+
# When the program immediately crashes give chance for topic
|
|
93
|
+
# consumer and producer to be fully initialized before
|
|
94
|
+
# shutting them down
|
|
95
|
+
await sleep(0.05)
|
|
96
|
+
for t in self.topics:
|
|
97
|
+
await t._shutdown()
|
|
98
|
+
|
|
99
|
+
async def _distribute_messages(self, key, it, kwargs):
|
|
100
|
+
async for msg in it:
|
|
101
|
+
for h in self.handlers.get(key, []):
|
|
102
|
+
await h(msg=msg, kwargs=kwargs) # type: ignore
|
|
103
|
+
|
|
104
|
+
def __init__(self, conf: dict = {}) -> None:
|
|
105
|
+
"""Define init behavior."""
|
|
106
|
+
self.conf: dict[str, Any] = {}
|
|
107
|
+
self.__update__(conf)
|
|
108
|
+
|
|
109
|
+
def __update__(self, conf: dict = {}):
|
|
110
|
+
"""Set default app configuration."""
|
|
111
|
+
self.conf = {**self.conf, **conf}
|
|
112
|
+
for key, value in conf.items():
|
|
113
|
+
key = sub('[^0-9a-zA-Z]+', '_', key)
|
|
114
|
+
setattr(self, key, value)
|
|
115
|
+
|
|
116
|
+
def __repr__(self) -> str:
|
|
117
|
+
"""Represent config."""
|
|
118
|
+
return str(self.conf)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Topic:
|
|
122
|
+
"""Act as a consumer and producer.
|
|
123
|
+
|
|
124
|
+
>>> topic = Topic('emoji', {
|
|
125
|
+
... 'bootstrap_servers': 'localhost:29091',
|
|
126
|
+
... 'auto_offset_reset': 'earliest',
|
|
127
|
+
... 'group_id': 'demo',
|
|
128
|
+
... })
|
|
129
|
+
|
|
130
|
+
Loop over topic (iterable) to consume from it:
|
|
131
|
+
|
|
132
|
+
>>> async for msg in topic: # doctest: +SKIP
|
|
133
|
+
... print(msg.value)
|
|
134
|
+
|
|
135
|
+
Call topic (callable) with data to produce to it:
|
|
136
|
+
|
|
137
|
+
>>> await topic({'msg': 'Hello World!'}) # doctest: +SKIP
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(
|
|
141
|
+
self,
|
|
142
|
+
name: str,
|
|
143
|
+
conf: dict = {},
|
|
144
|
+
offset: Optional[int] = None,
|
|
145
|
+
codec: Optional[ICodec] = None,
|
|
146
|
+
):
|
|
147
|
+
"""Create topic instance to produce and consume messages."""
|
|
148
|
+
c = Conf()
|
|
149
|
+
c.register_topic(self)
|
|
150
|
+
self.name = name
|
|
151
|
+
self.conf = {**c.conf, **conf}
|
|
152
|
+
self.starting_offset = offset
|
|
153
|
+
self.codec = codec
|
|
154
|
+
|
|
155
|
+
self.consumer: Optional[AIOKafkaConsumer] = None
|
|
156
|
+
self.producer: Optional[AIOKafkaProducer] = None
|
|
157
|
+
|
|
158
|
+
if diff := set(self.conf).difference(KAFKA_CLASSES_PARAMS):
|
|
159
|
+
logger.warning(
|
|
160
|
+
f'Unexpected Topic {self.name} conf entries: {",".join(diff)}')
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
async def admin(self) -> AIOKafkaClient:
|
|
164
|
+
"""Get started instance of Kafka admin client."""
|
|
165
|
+
params = get_params_names(AIOKafkaClient)
|
|
166
|
+
return AIOKafkaClient(**{
|
|
167
|
+
k: v
|
|
168
|
+
for k, v in self.conf.items()
|
|
169
|
+
if k in params
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
async def get_consumer(self):
|
|
173
|
+
"""Get started instance of Kafka consumer."""
|
|
174
|
+
params = get_params_names(AIOKafkaConsumer)
|
|
175
|
+
if self.codec:
|
|
176
|
+
self.conf['value_deserializer'] = self.codec.decode
|
|
177
|
+
consumer = AIOKafkaConsumer(self.name, **{
|
|
178
|
+
k: v
|
|
179
|
+
for k, v in self.conf.items()
|
|
180
|
+
if k in params
|
|
181
|
+
})
|
|
182
|
+
await consumer.start()
|
|
183
|
+
return consumer
|
|
184
|
+
|
|
185
|
+
async def get_producer(self):
|
|
186
|
+
"""Get started instance of Kafka producer."""
|
|
187
|
+
params = get_params_names(AIOKafkaProducer)
|
|
188
|
+
if self.codec:
|
|
189
|
+
self.conf['value_serializer'] = self.codec.encode
|
|
190
|
+
producer = AIOKafkaProducer(**{
|
|
191
|
+
k: v
|
|
192
|
+
for k, v in self.conf.items()
|
|
193
|
+
if k in params
|
|
194
|
+
})
|
|
195
|
+
await producer.start()
|
|
196
|
+
return producer
|
|
197
|
+
|
|
198
|
+
async def __call__(self, key, value) -> None:
|
|
199
|
+
"""Produce message to topic."""
|
|
200
|
+
if not self.producer:
|
|
201
|
+
self.producer = await self.get_producer()
|
|
202
|
+
if isinstance(key, str) and not self.conf.get('key_serializer'):
|
|
203
|
+
key = key.encode()
|
|
204
|
+
if isinstance(value, str) and not self.conf.get('value_serializer'):
|
|
205
|
+
value = value.encode()
|
|
206
|
+
try:
|
|
207
|
+
await self.producer.send_and_wait(
|
|
208
|
+
self.name,
|
|
209
|
+
key=key,
|
|
210
|
+
value=value,
|
|
211
|
+
)
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.error(
|
|
214
|
+
f'Error raised while producing to Topic {self.name}: '
|
|
215
|
+
f'{e.args[0]}' if e.args else ''
|
|
216
|
+
)
|
|
217
|
+
raise
|
|
218
|
+
|
|
219
|
+
async def __aiter__(self) -> AsyncIterator[ConsumerRecord]:
|
|
220
|
+
"""Iterate over messages from topic."""
|
|
221
|
+
if not self.consumer:
|
|
222
|
+
self.consumer = await self.get_consumer()
|
|
223
|
+
try:
|
|
224
|
+
async for msg in self.consumer:
|
|
225
|
+
if (
|
|
226
|
+
isinstance(msg.key, bytes)
|
|
227
|
+
and not self.conf.get('key_deserializer')
|
|
228
|
+
):
|
|
229
|
+
msg.key = msg.key.decode()
|
|
230
|
+
if (
|
|
231
|
+
isinstance(msg.value, bytes)
|
|
232
|
+
and not self.conf.get('value_deserializer')
|
|
233
|
+
):
|
|
234
|
+
msg.value = msg.value.decode()
|
|
235
|
+
yield msg
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(
|
|
238
|
+
f'Error raised while consuming from Topic {self.name}: '
|
|
239
|
+
f'{e.args[0]}' if e.args else ''
|
|
240
|
+
)
|
|
241
|
+
raise
|
|
242
|
+
|
|
243
|
+
async def __next__(self):
|
|
244
|
+
"""Get the next message from topic."""
|
|
245
|
+
iterator = self.__aiter__()
|
|
246
|
+
return await anext(iterator)
|
|
247
|
+
|
|
248
|
+
async def _shutdown(self):
|
|
249
|
+
"""Cleanup and finalization."""
|
|
250
|
+
if self.consumer:
|
|
251
|
+
await self.consumer.stop()
|
|
252
|
+
if self.producer:
|
|
253
|
+
await self.producer.stop()
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
async def _sink_output(
|
|
257
|
+
s: Union[
|
|
258
|
+
Callable[..., Awaitable[None]],
|
|
259
|
+
Callable[..., None]
|
|
260
|
+
],
|
|
261
|
+
output: Any
|
|
262
|
+
) -> None:
|
|
263
|
+
is_coroutine = iscoroutinecallable(s)
|
|
264
|
+
if isinstance(s, Cache):
|
|
265
|
+
if not isinstance(output, tuple):
|
|
266
|
+
raise ValueError('Cache sink expects: Tuple[key, val].')
|
|
267
|
+
else:
|
|
268
|
+
if isinstance(s, Cache):
|
|
269
|
+
s(*output)
|
|
270
|
+
elif isinstance(s, Topic):
|
|
271
|
+
if not isinstance(output, tuple):
|
|
272
|
+
await s(b'', output) # type: ignore
|
|
273
|
+
else:
|
|
274
|
+
await s(*output) # type: ignore
|
|
275
|
+
else:
|
|
276
|
+
if is_coroutine:
|
|
277
|
+
await s(output) # type: ignore
|
|
278
|
+
else:
|
|
279
|
+
s(output)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def handle(
|
|
283
|
+
*iterable: AsyncIterable,
|
|
284
|
+
sink: Iterable[Union[
|
|
285
|
+
Callable[..., Awaitable[None]],
|
|
286
|
+
Callable[..., None]]
|
|
287
|
+
] = []
|
|
288
|
+
):
|
|
289
|
+
"""Snaps function to stream.
|
|
290
|
+
|
|
291
|
+
Ex:
|
|
292
|
+
>>> topic = Topic('demo') # doctest: +SKIP
|
|
293
|
+
>>> cache = Cache('state/demo') # doctest: +SKIP
|
|
294
|
+
|
|
295
|
+
>>> @handle(topic, sink=[print, cache]) # doctest: +SKIP
|
|
296
|
+
... def handler(msg, **kwargs):
|
|
297
|
+
... return msg.key, msg.value
|
|
298
|
+
"""
|
|
299
|
+
c = Conf()
|
|
300
|
+
|
|
301
|
+
def _deco(f):
|
|
302
|
+
parameters = signature(f).parameters.values()
|
|
303
|
+
is_coroutine = iscoroutinecallable(f)
|
|
304
|
+
|
|
305
|
+
async def _handler(msg, kwargs={}):
|
|
306
|
+
if is_coroutine:
|
|
307
|
+
if any(p.kind == p.VAR_KEYWORD for p in parameters):
|
|
308
|
+
output = await f(msg, **kwargs)
|
|
309
|
+
else:
|
|
310
|
+
output = await f(msg) if parameters else await f()
|
|
311
|
+
else:
|
|
312
|
+
if any(p.kind == p.VAR_KEYWORD for p in parameters):
|
|
313
|
+
output = f(msg, **kwargs)
|
|
314
|
+
else:
|
|
315
|
+
output = f(msg) if parameters else f()
|
|
316
|
+
|
|
317
|
+
for val in output if isinstance(output, Generator) else [output]:
|
|
318
|
+
for s in sink:
|
|
319
|
+
await _sink_output(s, val)
|
|
320
|
+
|
|
321
|
+
for it in iterable:
|
|
322
|
+
iterable_key = str(id(it))
|
|
323
|
+
c.register_iterable(iterable_key, it)
|
|
324
|
+
c.register_handler(iterable_key, _handler)
|
|
325
|
+
return _handler
|
|
326
|
+
|
|
327
|
+
return _deco
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def stream(**kwargs):
|
|
331
|
+
"""Start the streams.
|
|
332
|
+
|
|
333
|
+
Ex:
|
|
334
|
+
>>> from asyncio import run
|
|
335
|
+
>>> args = {
|
|
336
|
+
... 'env': 'DEV',
|
|
337
|
+
... }
|
|
338
|
+
>>> run(stream(**args))
|
|
339
|
+
"""
|
|
340
|
+
return Conf()._start(**kwargs)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Slipstream utilities."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from asyncio import iscoroutinefunction
|
|
5
|
+
from inspect import signature
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def iscoroutinecallable(o: Any) -> bool:
|
|
12
|
+
"""Check whether function is coroutine."""
|
|
13
|
+
return iscoroutinefunction(o) or (
|
|
14
|
+
hasattr(o, '__call__')
|
|
15
|
+
and iscoroutinefunction(o.__call__)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_params_names(o: Any):
|
|
20
|
+
"""Return function parameters."""
|
|
21
|
+
parameters = signature(o).parameters.values()
|
|
22
|
+
return getattr(parameters, 'mapping')
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Singleton(type):
|
|
26
|
+
"""Maintain a single instance of a class."""
|
|
27
|
+
|
|
28
|
+
_instances: Dict['Singleton', Any] = {}
|
|
29
|
+
|
|
30
|
+
def __init__(cls, name, bases, dct):
|
|
31
|
+
"""Perform checks before instantiation."""
|
|
32
|
+
if '__update__' not in dct:
|
|
33
|
+
raise TypeError('Expected __update__.')
|
|
34
|
+
|
|
35
|
+
def __call__(cls, *args, **kwargs):
|
|
36
|
+
"""Apply metaclass singleton action."""
|
|
37
|
+
if cls not in cls._instances:
|
|
38
|
+
cls._instances[cls] = super(
|
|
39
|
+
Singleton, cls).__call__(*args, **kwargs)
|
|
40
|
+
instance = cls._instances[cls]
|
|
41
|
+
instance.__update__(*args, **kwargs)
|
|
42
|
+
return instance
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: slipstream-async
|
|
3
|
+
Version: 0.0.0a0
|
|
4
|
+
Summary: Streamline your stream processing.
|
|
5
|
+
Author-email: Menziess <stefan_schenk@hotmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: repository, https://github.com/Menziess/slipstream
|
|
8
|
+
Project-URL: documentation, https://slipstream.readthedocs.io
|
|
9
|
+
Keywords: kafka,pubsub
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: uvloop
|
|
24
|
+
Provides-Extra: kafka
|
|
25
|
+
Requires-Dist: aiokafka; extra == "kafka"
|
|
26
|
+
Provides-Extra: cache
|
|
27
|
+
Requires-Dist: rocksdict; extra == "cache"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pydocstyle; extra == "dev"
|
|
30
|
+
Requires-Dist: autopep8; extra == "dev"
|
|
31
|
+
Requires-Dist: pyright; extra == "dev"
|
|
32
|
+
Requires-Dist: flake8; extra == "dev"
|
|
33
|
+
Requires-Dist: bandit; extra == "dev"
|
|
34
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
35
|
+
Provides-Extra: test
|
|
36
|
+
Requires-Dist: pytest; extra == "test"
|
|
37
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-forked; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-asyncio; extra == "test"
|
|
41
|
+
Requires-Dist: testcontainers[kafka]; extra == "test"
|
|
42
|
+
Provides-Extra: docs
|
|
43
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
44
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
45
|
+
Requires-Dist: sphinx-autoapi; extra == "docs"
|
|
46
|
+
Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
47
|
+
|
|
48
|
+
[](https://github.com/Menziess/slipstream/actions/workflows/python-test.yml) [](https://slipstream.readthedocs.io/en/latest/?badge=latest) [](https://pepy.tech/project/slipstream)
|
|
49
|
+
|
|
50
|
+
# Slipstream
|
|
51
|
+
|
|
52
|
+
<img src="./res/logo.png" width="25%" height="25%" align="right" />
|
|
53
|
+
|
|
54
|
+
Slipstream provides a data-flow model to simplify development of stateful streaming applications.
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
pip install slipstream
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
```py
|
|
61
|
+
from asyncio import run
|
|
62
|
+
|
|
63
|
+
from slipstream import handle, stream
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def messages():
|
|
67
|
+
for emoji in '🏆📞🐟👌':
|
|
68
|
+
yield emoji
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@handle(messages(), sink=[print])
|
|
72
|
+
def handle_message(msg):
|
|
73
|
+
yield f'Hello {msg}!'
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == '__main__':
|
|
77
|
+
run(stream())
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```sh
|
|
81
|
+
Hello 🏆!
|
|
82
|
+
Hello 📞!
|
|
83
|
+
Hello 🐟!
|
|
84
|
+
Hello 👌!
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Usage
|
|
88
|
+
|
|
89
|
+
Async `iterables` are sources, (async) `callables` are sinks.
|
|
90
|
+
|
|
91
|
+
Decorate handler functions using `handle`, then run `stream` to start processing:
|
|
92
|
+
|
|
93
|
+

|
|
94
|
+
|
|
95
|
+
Multiple sources and sinks can be provided to establish many-to-many relations between them.
|
|
96
|
+
The 4 emoji's were printed using the callable `print`.
|
|
97
|
+
|
|
98
|
+
## Quickstart
|
|
99
|
+
|
|
100
|
+
Install `aiokafka` (latest) along with slipstream:
|
|
101
|
+
|
|
102
|
+
```sh
|
|
103
|
+
pip install slipstream[kafka]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Spin up a local Kafka broker with [docker-compose.yml](docker-compose.yml), using `localhost:29091` to connect:
|
|
107
|
+
|
|
108
|
+
```sh
|
|
109
|
+
docker compose up broker -d
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Follow the docs and set up a Kafka connection: [slipstream.readthedocs.io](https://slipstream.readthedocs.io).
|
|
113
|
+
|
|
114
|
+
## Features
|
|
115
|
+
|
|
116
|
+
- [`slipstream.handle`](slipstream/__init__.py): bind streams (iterables) and sinks (callables) to user defined handler functions
|
|
117
|
+
- [`slipstream.stream`](slipstream/__init__.py): start streaming
|
|
118
|
+
- [`slipstream.Topic`](slipstream/core.py): consume from (iterable), and produce to (callable) kafka using [**aiokafka**](https://aiokafka.readthedocs.io/en/stable/index.html)
|
|
119
|
+
- [`slipstream.Cache`](slipstream/caching.py): store data to disk using [**rocksdict**](https://congyuwang.github.io/RocksDict/rocksdict.html)
|
|
120
|
+
- [`slipstream.Conf`](slipstream/core.py): set global kafka configuration (can be overridden per topic)
|
|
121
|
+
- [`slipstream.codecs.JsonCodec`](slipstream/codecs.py): serialize and deserialize json messages
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
slipstream/__init__.py
|
|
4
|
+
slipstream/__version__.py
|
|
5
|
+
slipstream/caching.py
|
|
6
|
+
slipstream/codecs.py
|
|
7
|
+
slipstream/core.py
|
|
8
|
+
slipstream/utils.py
|
|
9
|
+
slipstream_async.egg-info/PKG-INFO
|
|
10
|
+
slipstream_async.egg-info/SOURCES.txt
|
|
11
|
+
slipstream_async.egg-info/dependency_links.txt
|
|
12
|
+
slipstream_async.egg-info/entry_points.txt
|
|
13
|
+
slipstream_async.egg-info/requires.txt
|
|
14
|
+
slipstream_async.egg-info/top_level.txt
|
|
15
|
+
tests/test__init__.py
|
|
16
|
+
tests/test_cache.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
uvloop
|
|
2
|
+
|
|
3
|
+
[cache]
|
|
4
|
+
rocksdict
|
|
5
|
+
|
|
6
|
+
[dev]
|
|
7
|
+
pydocstyle
|
|
8
|
+
autopep8
|
|
9
|
+
pyright
|
|
10
|
+
flake8
|
|
11
|
+
bandit
|
|
12
|
+
pre-commit
|
|
13
|
+
|
|
14
|
+
[docs]
|
|
15
|
+
sphinx
|
|
16
|
+
sphinx-rtd-theme
|
|
17
|
+
sphinx-autoapi
|
|
18
|
+
sphinx-autobuild
|
|
19
|
+
|
|
20
|
+
[kafka]
|
|
21
|
+
aiokafka
|
|
22
|
+
|
|
23
|
+
[test]
|
|
24
|
+
pytest
|
|
25
|
+
pytest-cov
|
|
26
|
+
pytest-mock
|
|
27
|
+
pytest-forked
|
|
28
|
+
pytest-asyncio
|
|
29
|
+
testcontainers[kafka]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
slipstream
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from asyncio import sleep
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from slipstream import Conf, handle, stream
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def async_iterable(it):
|
|
9
|
+
"""Make synchonous Iterable act like AsyncIterable."""
|
|
10
|
+
for msg in it:
|
|
11
|
+
await sleep(0.01)
|
|
12
|
+
yield msg
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_handle():
|
|
16
|
+
"""Should register iterable."""
|
|
17
|
+
Conf().iterables = set()
|
|
18
|
+
|
|
19
|
+
iterable = async_iterable(range(1))
|
|
20
|
+
iterable_key = str(id(iterable))
|
|
21
|
+
iterable_item = (iterable_key, iterable)
|
|
22
|
+
|
|
23
|
+
@handle(iterable)
|
|
24
|
+
def _(msg):
|
|
25
|
+
return msg
|
|
26
|
+
|
|
27
|
+
assert Conf().iterables == set([iterable_item])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.mark.asyncio
|
|
31
|
+
async def test_stream(mocker):
|
|
32
|
+
"""Should start distributing messages for each registered iterable."""
|
|
33
|
+
Conf().iterables = set()
|
|
34
|
+
spy = mocker.spy(Conf(), '_distribute_messages')
|
|
35
|
+
|
|
36
|
+
it = async_iterable(range(1))
|
|
37
|
+
iterable_key = str(id(it))
|
|
38
|
+
Conf().register_iterable(iterable_key, it)
|
|
39
|
+
|
|
40
|
+
assert spy.call_count == 0
|
|
41
|
+
await stream()
|
|
42
|
+
assert spy.call_count == 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.mark.asyncio
|
|
46
|
+
async def test_kwargable_function():
|
|
47
|
+
"""Should try to pass kwargs to user defined handler function."""
|
|
48
|
+
my_kwargs = {
|
|
49
|
+
'my_kwarg': 'kwarg value'
|
|
50
|
+
}
|
|
51
|
+
is_kwargable = False
|
|
52
|
+
is_unkwargable = False
|
|
53
|
+
|
|
54
|
+
@handle(async_iterable(range(1)))
|
|
55
|
+
def kwargable(msg, **kwargs):
|
|
56
|
+
nonlocal is_kwargable
|
|
57
|
+
is_kwargable = kwargs == my_kwargs
|
|
58
|
+
|
|
59
|
+
@handle(async_iterable(range(1)))
|
|
60
|
+
def unkwargable(msg):
|
|
61
|
+
nonlocal is_unkwargable
|
|
62
|
+
is_unkwargable = msg == 0
|
|
63
|
+
|
|
64
|
+
await stream(**my_kwargs)
|
|
65
|
+
|
|
66
|
+
assert is_kwargable is True
|
|
67
|
+
assert is_unkwargable is True
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from threading import Thread
|
|
2
|
+
from time import sleep
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.mark.serial
|
|
8
|
+
@pytest.mark.parametrize('key,val,updated', [
|
|
9
|
+
(b'123', 'a', 'b'),
|
|
10
|
+
('123', 'b', 'c'),
|
|
11
|
+
(True, 'c', 'd'),
|
|
12
|
+
(123, 'd', 'e'),
|
|
13
|
+
])
|
|
14
|
+
def test_crud(key, val, updated, cache):
|
|
15
|
+
"""Test create/read/update/delete."""
|
|
16
|
+
cache[key] = val
|
|
17
|
+
assert cache[key] == val
|
|
18
|
+
cache[key] = updated
|
|
19
|
+
assert cache[key] == updated
|
|
20
|
+
del cache[key]
|
|
21
|
+
assert cache[key] is None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_iterability(cache):
|
|
25
|
+
"""Test iterability."""
|
|
26
|
+
cache[123] = 123
|
|
27
|
+
it = cache.iter()
|
|
28
|
+
it.seek_to_first()
|
|
29
|
+
|
|
30
|
+
assert it.valid()
|
|
31
|
+
while it.valid():
|
|
32
|
+
assert it.key() == 123
|
|
33
|
+
assert it.value() == 123
|
|
34
|
+
it.next()
|
|
35
|
+
|
|
36
|
+
assert list(cache.keys()) == [123]
|
|
37
|
+
assert list(cache.values()) == [123]
|
|
38
|
+
assert list(cache.items()) == [(123, 123)]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_transaction(cache):
|
|
42
|
+
"""Test transaction."""
|
|
43
|
+
key, result = '123', []
|
|
44
|
+
|
|
45
|
+
def try_access_locked_cache():
|
|
46
|
+
result.append(cache[key])
|
|
47
|
+
cache[key] = 'b'
|
|
48
|
+
result.append(cache[key])
|
|
49
|
+
|
|
50
|
+
t = Thread(target=try_access_locked_cache)
|
|
51
|
+
|
|
52
|
+
with cache.transaction(key):
|
|
53
|
+
cache[key] = 'a'
|
|
54
|
+
|
|
55
|
+
# Within the transaction, we read and alter cache[key] and add
|
|
56
|
+
# its value to the result list, alterations shouldn't work
|
|
57
|
+
t.start()
|
|
58
|
+
t.join(timeout=0.01)
|
|
59
|
+
if t.is_alive():
|
|
60
|
+
result.append('Timeout')
|
|
61
|
+
|
|
62
|
+
assert result == ['a', 'Timeout']
|
|
63
|
+
assert cache[key] == 'a'
|
|
64
|
+
|
|
65
|
+
# The thread is still running here, so outside of the
|
|
66
|
+
# transaction it will eventually succeed to add 'b'
|
|
67
|
+
sleep(0.01)
|
|
68
|
+
assert cache[key] == 'b'
|