pyfreeflow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyfreeflow/__init__.py +34 -0
- pyfreeflow/ext/__init__.py +13 -0
- pyfreeflow/ext/buffer_operator.py +159 -0
- pyfreeflow/ext/crypto_operator.py +44 -0
- pyfreeflow/ext/data_transformer.py +217 -0
- pyfreeflow/ext/env_operator.py +23 -0
- pyfreeflow/ext/file_operator.py +207 -0
- pyfreeflow/ext/jwt_operator.py +140 -0
- pyfreeflow/ext/pgsql_executor.py +167 -0
- pyfreeflow/ext/rest_api_requester.py +159 -0
- pyfreeflow/ext/sleep_operator.py +53 -0
- pyfreeflow/ext/types.py +58 -0
- pyfreeflow/pipeline.py +127 -0
- pyfreeflow/registry.py +34 -0
- pyfreeflow/utils.py +67 -0
- pyfreeflow-0.1.0.data/scripts/pyfreeflow-cli.py +86 -0
- pyfreeflow-0.1.0.dist-info/METADATA +143 -0
- pyfreeflow-0.1.0.dist-info/RECORD +21 -0
- pyfreeflow-0.1.0.dist-info/WHEEL +5 -0
- pyfreeflow-0.1.0.dist-info/licenses/LICENSE +661 -0
- pyfreeflow-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
from .types import FreeFlowExt
|
2
|
+
import jwt
|
3
|
+
import hashlib
|
4
|
+
import logging
|
5
|
+
import datetime as dt
|
6
|
+
from ..utils import DurationParser
|
7
|
+
|
8
|
+
__TYPENAME__ = "JwtOperator"
|
9
|
+
|
10
|
+
|
11
|
+
"""
|
12
|
+
run parameter:
|
13
|
+
{
|
14
|
+
"state": { ... },
|
15
|
+
"data": {
|
16
|
+
"op": "encode|decode", # operation (default encode)
|
17
|
+
"headers": {}, # for encode (optional)
|
18
|
+
"algorithm": {}, # for encode (optional)
|
19
|
+
"body": "" # for encode
|
20
|
+
"token": "" # for decode
|
21
|
+
"headers_only": True|False # only unverified headers (default False)
|
22
|
+
}
|
23
|
+
}
|
24
|
+
"""
|
25
|
+
|
26
|
+
|
27
|
+
class JwtOperatorV1_0(FreeFlowExt):
|
28
|
+
__typename__ = __TYPENAME__
|
29
|
+
__version__ = "1.0"
|
30
|
+
|
31
|
+
def __init__(self, name, pubkey_files, privkey_files, algorithms=["HS256"],
|
32
|
+
headers={}, verify_sign=True, verify_exp=True,
|
33
|
+
required_claims=[], duration=None, not_before=None,
|
34
|
+
issuer=None, max_tasks=4):
|
35
|
+
super().__init__(name, max_tasks=max_tasks)
|
36
|
+
|
37
|
+
self._algorithms = algorithms
|
38
|
+
|
39
|
+
self._pub_key = {}
|
40
|
+
for key_file in pubkey_files:
|
41
|
+
with open(key_file, "rb") as f:
|
42
|
+
content = f.read()
|
43
|
+
h = hashlib.sha256(content).hexdigest()
|
44
|
+
self._pub_key[h] = content
|
45
|
+
|
46
|
+
self._priv_key = {}
|
47
|
+
for key_file in privkey_files:
|
48
|
+
with open(key_file, "rb") as f:
|
49
|
+
content = f.read()
|
50
|
+
h = hashlib.sha256(content).hexdigest()
|
51
|
+
self._priv_key[h] = content
|
52
|
+
|
53
|
+
self._default_pub_key = list(self._pub_key.keys())[0]
|
54
|
+
self._default_priv_key = list(self._priv_key.keys())[0]
|
55
|
+
|
56
|
+
self._headers = headers
|
57
|
+
self._options = {
|
58
|
+
"require": [x for x in required_claims],
|
59
|
+
"verify_signature": verify_sign,
|
60
|
+
"verify_exp": verify_exp,
|
61
|
+
}
|
62
|
+
|
63
|
+
self._duration = DurationParser.parse(duration) if isinstance(duration, str) else None
|
64
|
+
self._not_before = DurationParser.parse(not_before) if isinstance(not_before, str) else None
|
65
|
+
self._issuer = issuer
|
66
|
+
|
67
|
+
self._logger = logging.getLogger(".".join([__name__, self.__typename__,
|
68
|
+
self._name]))
|
69
|
+
|
70
|
+
self._action = {
|
71
|
+
"encode": self._do_encode,
|
72
|
+
"decode": self._do_decode,
|
73
|
+
}
|
74
|
+
|
75
|
+
def _get_priv_key(self, data):
|
76
|
+
kid = data.get("kid", self._default_priv_key)
|
77
|
+
return kid, self._priv_key[kid]
|
78
|
+
|
79
|
+
def _get_pub_key(self, headers):
|
80
|
+
kid = headers.get("kid", self._default_pub_key)
|
81
|
+
return kid, self._pub_key[kid]
|
82
|
+
|
83
|
+
async def _do_encode(self, data):
|
84
|
+
body = data.get("body")
|
85
|
+
|
86
|
+
if not isinstance(body, dict):
|
87
|
+
self._logger.error("Invalid input format '{}'".format(type(body)))
|
88
|
+
return None, 101
|
89
|
+
|
90
|
+
hdr = self._headers | data.get("headers", {})
|
91
|
+
|
92
|
+
kid, key = self._get_priv_key(data)
|
93
|
+
|
94
|
+
hdr["kid"] = kid
|
95
|
+
|
96
|
+
if self._duration is not None and "exp" not in body:
|
97
|
+
body["exp"] = int(dt.datetime.now(dt.UTC).timestamp() + self._duration)
|
98
|
+
|
99
|
+
if self._not_before is not None and "nbf" not in body:
|
100
|
+
body["nbf"] = int(dt.datetime.now(dt.UTC).timestamp() + self._not_before)
|
101
|
+
|
102
|
+
if "iat" not in body:
|
103
|
+
body["iat"] = int(dt.datetime.now(dt.UTC).timestamp())
|
104
|
+
|
105
|
+
if self._issuer is not None and "iss" not in body:
|
106
|
+
body["iss"] = self._issuer
|
107
|
+
|
108
|
+
algorithm = data.get("algorithm", self._algorithms[0])
|
109
|
+
|
110
|
+
return {"token": jwt.encode(body, key,
|
111
|
+
algorithm=algorithm, headers=hdr)}, 0
|
112
|
+
|
113
|
+
async def _do_decode(self, data):
|
114
|
+
token = data.get("token")
|
115
|
+
hdr_only = data.get("headers_only", False)
|
116
|
+
|
117
|
+
if not isinstance(token, str):
|
118
|
+
self.logger.error("Invalid input format '{}'".format(type(token)))
|
119
|
+
return None, 101
|
120
|
+
|
121
|
+
try:
|
122
|
+
hdr = jwt.get_unverified_header(token)
|
123
|
+
|
124
|
+
kid, key = self._get_pub_key(hdr)
|
125
|
+
|
126
|
+
if not hdr_only:
|
127
|
+
body = jwt.decode(token, key,
|
128
|
+
algorithms=self._algorithms,
|
129
|
+
options=self._options)
|
130
|
+
else:
|
131
|
+
body = None
|
132
|
+
|
133
|
+
return {"headers": hdr, "body": body}, 0
|
134
|
+
except Exception as ex:
|
135
|
+
self._logger.error(ex)
|
136
|
+
return {"headers": None, "body": None}, 102
|
137
|
+
|
138
|
+
async def do(self, state, data):
|
139
|
+
op = data.get("op", "encode")
|
140
|
+
return state, await self._action[op](data)
|
@@ -0,0 +1,167 @@
|
|
1
|
+
from .types import FreeFlowExt
|
2
|
+
import psycopg
|
3
|
+
import asyncio
|
4
|
+
import logging
|
5
|
+
|
6
|
+
__TYPENAME__ = "PgSqlExecutor"
|
7
|
+
|
8
|
+
|
9
|
+
#
|
10
|
+
# Connection Pool
|
11
|
+
#
|
12
|
+
class ConnectionPool():
|
13
|
+
CLIENT = {}
|
14
|
+
POOL = {}
|
15
|
+
LOCK = asyncio.Lock()
|
16
|
+
LOGGER = logging.getLogger(".".join([__name__, "ConnectionPool"]))
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
def register(cls, client_name, conninfo, max_size=4):
|
20
|
+
if client_name not in cls.CLIENT.keys():
|
21
|
+
cls.CLIENT[client_name] = {
|
22
|
+
"conninfo": conninfo,
|
23
|
+
"lock": asyncio.BoundedSemaphore(max_size)}
|
24
|
+
|
25
|
+
if client_name not in cls.POOL.keys():
|
26
|
+
cls.POOL[client_name] = asyncio.Queue()
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def get(cls, client_name):
|
30
|
+
if client_name not in cls.CLIENT.keys():
|
31
|
+
return None
|
32
|
+
|
33
|
+
lock = cls.CLIENT[client_name]["lock"]
|
34
|
+
cls.LOGGER.debug("GET {} Lock[{}/{}/{}] Queue[{}]".format(
|
35
|
+
client_name, len(lock._waiters) if lock._waiters else 0,
|
36
|
+
lock._value, lock._bound_value,
|
37
|
+
cls.POOL[client_name].qsize()))
|
38
|
+
await lock.acquire()
|
39
|
+
|
40
|
+
try:
|
41
|
+
while not cls.POOL[client_name].empty():
|
42
|
+
conn = await cls.POOL[client_name].get()
|
43
|
+
if await cls.is_alive(conn):
|
44
|
+
return conn
|
45
|
+
except psycopg.errors.Error as ex:
|
46
|
+
lock.release()
|
47
|
+
raise ex
|
48
|
+
|
49
|
+
conninfo = cls.CLIENT[client_name]["conninfo"]
|
50
|
+
return await psycopg.AsyncConnection.connect(conninfo)
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
async def release(cls, client_name, conn):
|
54
|
+
if client_name in cls.CLIENT.keys():
|
55
|
+
lock = cls.CLIENT[client_name]["lock"]
|
56
|
+
await cls.POOL[client_name].put(conn)
|
57
|
+
lock.release()
|
58
|
+
cls.LOGGER.debug("RELEASE {} Lock[{}/{}/{}] Queue[{}]".format(
|
59
|
+
client_name, len(lock._waiters) if lock._waiters else 0,
|
60
|
+
lock._value, lock._bound_value,
|
61
|
+
cls.POOL[client_name].qsize()))
|
62
|
+
else:
|
63
|
+
conn.close()
|
64
|
+
|
65
|
+
@staticmethod
|
66
|
+
async def is_alive(conn):
|
67
|
+
try:
|
68
|
+
async with conn.cursor() as cur:
|
69
|
+
cur = await cur.execute("SELECT 1;")
|
70
|
+
d = await cur.fetchall()
|
71
|
+
del d
|
72
|
+
await conn.commit()
|
73
|
+
return True
|
74
|
+
except psycopg.errors.Error:
|
75
|
+
return False
|
76
|
+
|
77
|
+
|
78
|
+
#
|
79
|
+
# PgSql Executor
|
80
|
+
#
|
81
|
+
class PgSqlExecutor(FreeFlowExt):
|
82
|
+
__typename__ = __TYPENAME__
|
83
|
+
__version__ = "1.0"
|
84
|
+
|
85
|
+
CONNECTION_STRING = "postgresql://{userspec}{hostspec}{dbspec}{paramspec}"
|
86
|
+
|
87
|
+
def __init__(self, name, username=None, password=None, host=[],
|
88
|
+
dbname=None, param={}, statement=None, max_connections=4,
|
89
|
+
max_tasks=4):
|
90
|
+
super().__init__(name, max_tasks=max_tasks)
|
91
|
+
|
92
|
+
userspec = self._conninfo_helper(username, password, sep=":")
|
93
|
+
|
94
|
+
hostspec = ",".join(host)
|
95
|
+
hostspec = "@" + hostspec if len(hostspec) > 0 else ""
|
96
|
+
|
97
|
+
dbspec = self._conninfo_helper(None, dbname, sep="/")
|
98
|
+
|
99
|
+
if "connect_timeout" not in param.keys():
|
100
|
+
param["connect_timeout"] = 30
|
101
|
+
|
102
|
+
paramspec = "?" + "&".join([k + "=" + str(v) for k, v in param.items()])
|
103
|
+
|
104
|
+
self._conninfo = self.CONNECTION_STRING.format(
|
105
|
+
userspec=userspec, hostspec=hostspec, dbspec=dbspec,
|
106
|
+
paramspec=paramspec)
|
107
|
+
self._stm = statement
|
108
|
+
assert (self._stm is not None)
|
109
|
+
|
110
|
+
ConnectionPool.register(self._name, self._conninfo,
|
111
|
+
max_size=max_connections)
|
112
|
+
|
113
|
+
self._logger = logging.getLogger(".".join([__name__, self.__typename__,
|
114
|
+
self._name]))
|
115
|
+
|
116
|
+
def _conninfo_helper(self, a, b, sep=":"):
|
117
|
+
return "{a}{s}{b}".format(
|
118
|
+
a=a if a else "",
|
119
|
+
s=sep if b else "",
|
120
|
+
b=b if b else ""
|
121
|
+
)
|
122
|
+
|
123
|
+
async def __aenter__(self):
|
124
|
+
await self._ensure_connection()
|
125
|
+
return self
|
126
|
+
|
127
|
+
async def __aexit__(self, exc_type, exc, tb):
|
128
|
+
await self._close()
|
129
|
+
|
130
|
+
def __del__(self):
|
131
|
+
pass
|
132
|
+
|
133
|
+
async def do(self, state, data):
|
134
|
+
if self._stm is None:
|
135
|
+
return state, (data, 101)
|
136
|
+
|
137
|
+
rs = {"resultset": []}
|
138
|
+
rc = 0
|
139
|
+
|
140
|
+
try:
|
141
|
+
conn = await ConnectionPool.get(self._name)
|
142
|
+
except psycopg.errors.Error as ex:
|
143
|
+
self._logger.error(ex)
|
144
|
+
return state, (rs, 101)
|
145
|
+
|
146
|
+
try:
|
147
|
+
async with conn.cursor() as cur:
|
148
|
+
value = data.get("value")
|
149
|
+
|
150
|
+
if value and len(value) > 1:
|
151
|
+
await cur.executemany(self._stm, value)
|
152
|
+
else:
|
153
|
+
await cur.execute(self._stm, value)
|
154
|
+
|
155
|
+
if cur.description:
|
156
|
+
rs["resultset"] = await cur.fetchall()
|
157
|
+
|
158
|
+
await conn.commit()
|
159
|
+
except psycopg.errors.Error as ex:
|
160
|
+
rc = 102
|
161
|
+
if not conn.closed:
|
162
|
+
await conn.rollback()
|
163
|
+
self._logger.error(ex)
|
164
|
+
finally:
|
165
|
+
await ConnectionPool.release(self._name, conn)
|
166
|
+
|
167
|
+
return state, (rs, rc)
|
@@ -0,0 +1,159 @@
|
|
1
|
+
from .types import FreeFlowExt
|
2
|
+
import aiohttp
|
3
|
+
import yarl
|
4
|
+
import multidict
|
5
|
+
import json
|
6
|
+
import ssl
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
from ..utils import asyncio_run
|
10
|
+
|
11
|
+
__TYPENAME__ = "RestApiRequester"
|
12
|
+
|
13
|
+
|
14
|
+
"""
|
15
|
+
run parameter:
|
16
|
+
{
|
17
|
+
"state": { ... },
|
18
|
+
"data": {
|
19
|
+
"headers": {},
|
20
|
+
"body": {}
|
21
|
+
}
|
22
|
+
}
|
23
|
+
"""
|
24
|
+
|
25
|
+
|
26
|
+
class RestApiRequesterV1_0(FreeFlowExt):
|
27
|
+
__typename__ = __TYPENAME__
|
28
|
+
__version__ = "1.0"
|
29
|
+
|
30
|
+
def __init__(self, name, url, method="GET", headers={}, timeout=300,
|
31
|
+
sslenabled=True, insecure=False, cafile=None, capath=None,
|
32
|
+
cadata=None, max_tasks=4):
|
33
|
+
super().__init__(name, max_tasks=max_tasks)
|
34
|
+
|
35
|
+
self._url = url
|
36
|
+
self._timeout = timeout
|
37
|
+
self._headers = headers
|
38
|
+
self._method = method.upper()
|
39
|
+
|
40
|
+
self._logger = logging.getLogger(".".join([__name__, self.__typename__,
|
41
|
+
self._name]))
|
42
|
+
|
43
|
+
if sslenabled:
|
44
|
+
self._ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
45
|
+
if insecure:
|
46
|
+
self._ssl_context.check_hostname = False
|
47
|
+
self._ssl_context.verify_mode = ssl.CERT_NONE
|
48
|
+
else:
|
49
|
+
self._ssl_context.check_hostname = True
|
50
|
+
self._ssl_context.verify_mode = ssl.CERT_REQUIRED
|
51
|
+
if cafile or capath or cadata:
|
52
|
+
self._ssl_context.load_verify_locations(
|
53
|
+
cafile=cafile, capath=capath, cadata=cadata)
|
54
|
+
else:
|
55
|
+
self._ssl_context = None
|
56
|
+
|
57
|
+
self._session = None
|
58
|
+
|
59
|
+
self._method_op = {
|
60
|
+
"GET": self._do_get,
|
61
|
+
"POST": self._do_post,
|
62
|
+
}
|
63
|
+
|
64
|
+
def __str__(self):
|
65
|
+
return "{typ}(name: {n}, version: {v}, url: {u}, headers: {h}, timeout: {tm_out})".format(
|
66
|
+
typ=self.__typename__, n=self._name, v=self.__version__,
|
67
|
+
u=self._url, h=self._headers, tm_out=self._timeout)
|
68
|
+
|
69
|
+
def _validate_ssl_ca_config(self, config):
|
70
|
+
keys = ["file", "path", "data"]
|
71
|
+
return isinstance(config, dict) and len([k for k in config.keys() if k in keys]) > 0
|
72
|
+
|
73
|
+
async def _ensure_session(self) -> aiohttp.ClientSession:
|
74
|
+
if self._session is None or self._session.closed:
|
75
|
+
t = aiohttp.ClientTimeout(total=self._timeout)
|
76
|
+
self._session = aiohttp.ClientSession(timeout=t)
|
77
|
+
|
78
|
+
async def _close(self):
|
79
|
+
if self._session and not self._session.closed:
|
80
|
+
await self._session.close()
|
81
|
+
|
82
|
+
async def __aenter__(self):
|
83
|
+
await self._ensure_session()
|
84
|
+
return self
|
85
|
+
|
86
|
+
async def __aexit__(self, exc_type, exc, tb):
|
87
|
+
await self._close()
|
88
|
+
|
89
|
+
def __del__(self):
|
90
|
+
asyncio_run(self._close(), force=True)
|
91
|
+
|
92
|
+
def _multidict_to_dict(self, x):
|
93
|
+
if isinstance(x, yarl.URL):
|
94
|
+
return str(x)
|
95
|
+
elif isinstance(x, multidict._multidict.CIMultiDictProxy):
|
96
|
+
return dict(x)
|
97
|
+
return x
|
98
|
+
|
99
|
+
def _prepare_request_p(self, x):
|
100
|
+
return {k: str(v) for k, v in x.items()} if x is not None else x
|
101
|
+
|
102
|
+
async def _do_request(self, method, url, headers=None, params=None,
|
103
|
+
data=None):
|
104
|
+
try:
|
105
|
+
await self._ensure_session()
|
106
|
+
|
107
|
+
async with self._session.request(
|
108
|
+
method, url, headers=headers, params=params, data=data,
|
109
|
+
ssl=self._ssl_context, allow_redirects=True) as resp:
|
110
|
+
|
111
|
+
if resp.status >= 400:
|
112
|
+
self._logger.error(f"'{url}' response code {resp.status}")
|
113
|
+
return (
|
114
|
+
{"req": {}, "headers": {}, "body": {}}, 102)
|
115
|
+
|
116
|
+
raw = await resp.read()
|
117
|
+
|
118
|
+
try:
|
119
|
+
body = json.loads(raw.decode("utf-8"))
|
120
|
+
except json.JSONDecodeError:
|
121
|
+
body = raw.decode("utf-8") # oppure tenere i bytes
|
122
|
+
req_info = {k: self._multidict_to_dict(v)
|
123
|
+
for k, v in dict(resp._request_info._asdict()).items()}
|
124
|
+
return (
|
125
|
+
{"req": req_info, "headers": dict(resp.headers), "body": body}, 0)
|
126
|
+
|
127
|
+
except aiohttp.ClientError as ex:
|
128
|
+
self._logger.error("aiohttp request error %s", ex)
|
129
|
+
return (
|
130
|
+
{"req": {}, "headers": {}, "body": {}}, 101)
|
131
|
+
except asyncio.exceptions.TimeoutError as ex:
|
132
|
+
self._logger.error("aiohttp timeout on %s error %s", url, ex)
|
133
|
+
return (
|
134
|
+
{"req": {}, "headers": {}, "body": {}}, 104)
|
135
|
+
|
136
|
+
async def _do_get(self, state, data):
|
137
|
+
headers = self._headers | data.get("headers", {})
|
138
|
+
|
139
|
+
url = self._url.format(**data.get("urlcomp", {}))
|
140
|
+
query_params = data.get("body", {})
|
141
|
+
|
142
|
+
return await self._do_request(
|
143
|
+
"GET", url, headers=headers, params=query_params)
|
144
|
+
|
145
|
+
async def _do_post(self, state, data):
|
146
|
+
headers = self._headers | data.get("headers", {})
|
147
|
+
|
148
|
+
url = self._url.format(**data.get("urlcomp", {}))
|
149
|
+
body_bytes = json.dumps(data.get("body", {})).encode("utf-8")
|
150
|
+
|
151
|
+
return await self._do_request("POST", url, headers=headers,
|
152
|
+
data=body_bytes)
|
153
|
+
|
154
|
+
async def do(self, state, data):
|
155
|
+
if isinstance(data, dict):
|
156
|
+
rval = await self._method_op[self._method](state, data)
|
157
|
+
return state, rval
|
158
|
+
self._logger.error("Bad request expected 'dict' got '{}'".format(type(data)))
|
159
|
+
return (state, ({"headers": {}, "body": {}}, 103))
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from .types import FreeFlowExt
|
2
|
+
import random
|
3
|
+
import logging
|
4
|
+
import asyncio
|
5
|
+
|
6
|
+
__TYPENAME__ = "SleepOperator"
|
7
|
+
|
8
|
+
|
9
|
+
"""
|
10
|
+
run parameter:
|
11
|
+
{
|
12
|
+
"state": { ... },
|
13
|
+
"data": {}
|
14
|
+
}
|
15
|
+
"""
|
16
|
+
|
17
|
+
|
18
|
+
class SleepOperatorV1_0(FreeFlowExt):
|
19
|
+
__typename__ = __TYPENAME__
|
20
|
+
__version__ = "1.0"
|
21
|
+
|
22
|
+
def __init__(self, name, sleep=5, max_tasks=4):
|
23
|
+
super().__init__(name, max_tasks=max_tasks)
|
24
|
+
self._sleep = sleep
|
25
|
+
|
26
|
+
self._logger = logging.getLogger(".".join([__name__, self.__typename__,
|
27
|
+
self._name]))
|
28
|
+
|
29
|
+
async def run(self, state, data):
|
30
|
+
self._logger.debug("%s sleeping for %d", self._name, self._sleep)
|
31
|
+
await asyncio.sleep(self._sleep)
|
32
|
+
|
33
|
+
return state, data
|
34
|
+
|
35
|
+
|
36
|
+
class RandomSleepOperatorV1_0(FreeFlowExt):
|
37
|
+
__typename__ = "Random" + __TYPENAME__
|
38
|
+
__version__ = "1.0"
|
39
|
+
|
40
|
+
def __init__(self, name, sleep_min=5, sleep_max=10, max_tasks=4):
|
41
|
+
super().__init__(name, max_tasks=max_tasks)
|
42
|
+
self._sleep_min = sleep_min
|
43
|
+
self._sleep_max = sleep_max
|
44
|
+
|
45
|
+
self._logger = logging.getLogger(".".join([__name__, self.__typename__,
|
46
|
+
self._name]))
|
47
|
+
|
48
|
+
async def run(self, state, data):
|
49
|
+
t = random.randint(self._sleep_min, self._sleep_max)
|
50
|
+
self._logger.debug("%s sleeping for %d", self._name, t)
|
51
|
+
await asyncio.sleep(t)
|
52
|
+
|
53
|
+
return state, data
|
pyfreeflow/ext/types.py
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
from ..registry import ExtRegister
|
2
|
+
import asyncio
|
3
|
+
|
4
|
+
"""
|
5
|
+
run parameter:
|
6
|
+
{
|
7
|
+
"state": { ... },
|
8
|
+
"param": {
|
9
|
+
<CLASS SPECIFIC PARAMETERS>
|
10
|
+
}
|
11
|
+
}
|
12
|
+
"""
|
13
|
+
|
14
|
+
|
15
|
+
class FreeFlowExt(metaclass=ExtRegister):
|
16
|
+
def __init__(self, name, max_tasks=4):
|
17
|
+
self._name = name
|
18
|
+
self._max_tasks = max_tasks
|
19
|
+
|
20
|
+
async def do(self, state, data):
|
21
|
+
raise NotImplementedError
|
22
|
+
|
23
|
+
async def unpack(self, state, data):
|
24
|
+
if isinstance(data, list):
|
25
|
+
loop = asyncio.get_running_loop()
|
26
|
+
|
27
|
+
# [param0, param1, ...]
|
28
|
+
cur = self._max_tasks
|
29
|
+
_data = []
|
30
|
+
aws = []
|
31
|
+
|
32
|
+
for i, p in enumerate(data):
|
33
|
+
if cur == 0:
|
34
|
+
await asyncio.wait(aws, loop=loop,
|
35
|
+
return_when=asyncio.FIRST_COMPLETED)
|
36
|
+
cur += 1
|
37
|
+
|
38
|
+
if p[1] == 0:
|
39
|
+
aws.append(loop.create_task(
|
40
|
+
self.do(state, p[0]),
|
41
|
+
name=self._name + "-unpack-" + str(i)))
|
42
|
+
cur -= 1
|
43
|
+
|
44
|
+
await asyncio.wait(aws, loop=loop,
|
45
|
+
return_when=asyncio.ALL_COMPLETED)
|
46
|
+
|
47
|
+
for task in aws:
|
48
|
+
state, p = await task
|
49
|
+
_data.append(p)
|
50
|
+
return state, _data
|
51
|
+
else:
|
52
|
+
# param0 or param1 or ...
|
53
|
+
if data[1] == 0:
|
54
|
+
return await self.do(state, data[0])
|
55
|
+
return state, data
|
56
|
+
|
57
|
+
async def run(self, state, data={}):
|
58
|
+
return await self.unpack(state, data)
|
pyfreeflow/pipeline.py
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
from .registry import ExtRegistry
|
2
|
+
import networkx as nx
|
3
|
+
import io
|
4
|
+
import copy
|
5
|
+
import asyncio
|
6
|
+
import logging
|
7
|
+
|
8
|
+
"""
|
9
|
+
Example of configuratio file
|
10
|
+
|
11
|
+
last: "D" # Optional
|
12
|
+
node:
|
13
|
+
- name: "A"
|
14
|
+
type: "RestApiRequester"
|
15
|
+
version: "1.0"
|
16
|
+
config: {}
|
17
|
+
- name: "B"
|
18
|
+
type: "DataTransformer"
|
19
|
+
version: "1.0"
|
20
|
+
config: {}
|
21
|
+
- name: "C"
|
22
|
+
type: "RestApiRequester"
|
23
|
+
version: "1.0"
|
24
|
+
config: {}
|
25
|
+
- name: "D"
|
26
|
+
type: "DataTransformer"
|
27
|
+
version: "1.0"
|
28
|
+
config: {}
|
29
|
+
digraph:
|
30
|
+
- A -> B
|
31
|
+
- B -> C
|
32
|
+
- A -> D
|
33
|
+
"""
|
34
|
+
|
35
|
+
|
36
|
+
class Pipeline():
|
37
|
+
def __init__(self, node, digraph, last=None, name="stream"):
|
38
|
+
self._name = name
|
39
|
+
self._registry = {}
|
40
|
+
self._data = {}
|
41
|
+
self._state = {}
|
42
|
+
self._last = last
|
43
|
+
self._lock = asyncio.Lock()
|
44
|
+
self._cond = asyncio.Condition()
|
45
|
+
|
46
|
+
self._logger = logging.getLogger(".".join([__name__, "Pipeline",
|
47
|
+
self._name]))
|
48
|
+
|
49
|
+
for cls in node:
|
50
|
+
cls_name = cls.get("name")
|
51
|
+
cls_config = cls.get("config", {})
|
52
|
+
cls_type = cls.get("type")
|
53
|
+
cls_version = cls.get("version")
|
54
|
+
|
55
|
+
assert (cls_name not in self._registry.keys())
|
56
|
+
|
57
|
+
self._registry[cls_name] = ExtRegistry.get_registered_class(
|
58
|
+
cls_type, cls_version)(cls_name, **cls_config)
|
59
|
+
|
60
|
+
dot = io.StringIO("digraph D {" + "\n".join(digraph) + "}")
|
61
|
+
self._G = nx.nx_pydot.read_dot(dot)
|
62
|
+
|
63
|
+
self._tree = list(nx.topological_sort(self._G))
|
64
|
+
|
65
|
+
async def _cleanup(self):
|
66
|
+
t = self._state
|
67
|
+
self._state = {}
|
68
|
+
del t
|
69
|
+
|
70
|
+
t = self._data
|
71
|
+
self._data = {}
|
72
|
+
del t
|
73
|
+
|
74
|
+
async def _task(self, n, _data):
|
75
|
+
try:
|
76
|
+
self._state, self._data[n] = await self._registry[n].run(
|
77
|
+
self._state, _data)
|
78
|
+
|
79
|
+
except Exception as ex:
|
80
|
+
self._logger.error(ex)
|
81
|
+
finally:
|
82
|
+
async with self._cond:
|
83
|
+
self._cond.notify()
|
84
|
+
|
85
|
+
async def run(self, data={}):
|
86
|
+
async with self._lock:
|
87
|
+
degrees = {x[0]: x[1] for x in self._G.in_degree()}
|
88
|
+
loop = asyncio.get_running_loop()
|
89
|
+
|
90
|
+
pending = len(self._tree)
|
91
|
+
task = {}
|
92
|
+
|
93
|
+
while pending > 0:
|
94
|
+
nodes = [k for k, v in degrees.items() if v == 0]
|
95
|
+
for n in nodes:
|
96
|
+
_prev = list(self._G.predecessors(n))
|
97
|
+
if len(_prev) > 1:
|
98
|
+
_data = [self._data.get(x) for x in _prev]
|
99
|
+
elif len(_prev) == 1:
|
100
|
+
_data = self._data.get(_prev[0])
|
101
|
+
else:
|
102
|
+
_data = (data, 0)
|
103
|
+
|
104
|
+
task[n] = loop.create_task(self._task(n, _data),
|
105
|
+
name=n)
|
106
|
+
degrees[n] -= 1
|
107
|
+
|
108
|
+
async with self._cond:
|
109
|
+
await self._cond.wait()
|
110
|
+
|
111
|
+
nodes.clear()
|
112
|
+
for tname, t in {k: v for k, v in task.items() if v.done()}.items():
|
113
|
+
degrees[tname] -= 1
|
114
|
+
pending -= 1
|
115
|
+
del task[tname]
|
116
|
+
for succ in self._G.successors(tname):
|
117
|
+
degrees[succ] -= 1
|
118
|
+
|
119
|
+
if self._last is not None:
|
120
|
+
_data = self._data.get(self._last, {})
|
121
|
+
else:
|
122
|
+
_data = self._data.get(self._tree[-1], {})
|
123
|
+
|
124
|
+
rep = (copy.deepcopy(_data[0]), _data[1])
|
125
|
+
|
126
|
+
await self._cleanup()
|
127
|
+
return rep
|