kubernetes-watch 0.1.4__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kube_watch/enums/kube.py +5 -5
- kube_watch/enums/logic.py +8 -8
- kube_watch/enums/providers.py +12 -12
- kube_watch/enums/workflow.py +17 -17
- kube_watch/models/common.py +16 -16
- kube_watch/models/workflow.py +60 -60
- kube_watch/modules/clusters/kube.py +185 -185
- kube_watch/modules/database/__init__.py +0 -0
- kube_watch/modules/database/model.py +12 -0
- kube_watch/modules/database/postgre.py +271 -0
- kube_watch/modules/logic/actions.py +55 -55
- kube_watch/modules/logic/checks.py +7 -7
- kube_watch/modules/logic/load.py +23 -8
- kube_watch/modules/logic/merge.py +31 -31
- kube_watch/modules/logic/scheduler.py +74 -74
- kube_watch/modules/mock/mock_generator.py +53 -53
- kube_watch/modules/providers/aws.py +210 -210
- kube_watch/modules/providers/git.py +32 -32
- kube_watch/modules/providers/github.py +126 -126
- kube_watch/modules/providers/vault.py +188 -166
- kube_watch/standalone/metarecogen/ckan_to_gn.py +132 -132
- kube_watch/watch/__init__.py +1 -1
- kube_watch/watch/helpers.py +170 -170
- kube_watch/watch/workflow.py +232 -100
- {kubernetes_watch-0.1.4.dist-info → kubernetes_watch-0.1.8.dist-info}/LICENSE +21 -21
- {kubernetes_watch-0.1.4.dist-info → kubernetes_watch-0.1.8.dist-info}/METADATA +5 -3
- kubernetes_watch-0.1.8.dist-info/RECORD +36 -0
- kubernetes_watch-0.1.4.dist-info/RECORD +0 -33
- {kubernetes_watch-0.1.4.dist-info → kubernetes_watch-0.1.8.dist-info}/WHEEL +0 -0
|
@@ -1,132 +1,132 @@
|
|
|
1
|
-
import requests
|
|
2
|
-
# from pathlib import Path
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
|
|
6
|
-
"""
|
|
7
|
-
A simple script to:
|
|
8
|
-
1. Retrieve all public records from a CKAN service
|
|
9
|
-
2. Insert CKAN records into a Geonetwork service
|
|
10
|
-
|
|
11
|
-
This requires the 'iso19115' extension to be installed in CKAN and the following env. vars:
|
|
12
|
-
1. 'CKAN2GN_GN_USERNAME' geonetwork username for an account that can create records
|
|
13
|
-
2. 'CKAN2GN_GN_PASSWORD' geonetwork password for an account that can create records
|
|
14
|
-
3. 'CKAN2GN_GN_URL' geonetork service URL
|
|
15
|
-
4. 'CKAN2GN_CKAN_URL' CKAN service URL
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
# Geonetwork username and password:
|
|
19
|
-
GN_USERNAME = os.environ.get('CKAN2GN_GN_USERNAME')
|
|
20
|
-
GN_PASSWORD = os.environ.get('CKAN2GN_GN_PASSWORD')
|
|
21
|
-
|
|
22
|
-
# Geonetwork and CKAN server URLs
|
|
23
|
-
GN_URL = os.environ.get('CKAN2GN_GN_URL')
|
|
24
|
-
CKAN_URL = os.environ.get('CKAN2GN_CKAN_URL')
|
|
25
|
-
|
|
26
|
-
def get_gn_xsrf_token(session):
|
|
27
|
-
""" Retrieves XSRF token from Geonetwork
|
|
28
|
-
|
|
29
|
-
:param session: requests Session object
|
|
30
|
-
:returns: XSRF as string or None upon error
|
|
31
|
-
"""
|
|
32
|
-
authenticate_url = GN_URL + '/geonetwork/srv/eng/info?type=me'
|
|
33
|
-
response = session.post(authenticate_url)
|
|
34
|
-
|
|
35
|
-
# Extract XRSF token
|
|
36
|
-
xsrf_token = response.cookies.get("XSRF-TOKEN")
|
|
37
|
-
if xsrf_token:
|
|
38
|
-
return xsrf_token
|
|
39
|
-
return None
|
|
40
|
-
|
|
41
|
-
def list_ckan_records():
|
|
42
|
-
""" Contacts CKAN and retrieves a list of package ids for all public records
|
|
43
|
-
|
|
44
|
-
:returns: list of package id strings or None upon error
|
|
45
|
-
"""
|
|
46
|
-
session = requests.Session()
|
|
47
|
-
url_path = 'api/3/action/package_list' # Path('api') / '3' / 'action' / 'package_list'
|
|
48
|
-
url = f'{CKAN_URL}/{url_path}'
|
|
49
|
-
r = session.get(url)
|
|
50
|
-
resp = r.json()
|
|
51
|
-
if resp['success'] is False:
|
|
52
|
-
return None
|
|
53
|
-
return resp['result']
|
|
54
|
-
|
|
55
|
-
def get_ckan_record(package_id):
|
|
56
|
-
""" Given a package id retrieves its record metadata
|
|
57
|
-
|
|
58
|
-
:param package_id: CKAN package_id string
|
|
59
|
-
:returns: package metadata as a dict or None upon error
|
|
60
|
-
"""
|
|
61
|
-
session = requests.Session()
|
|
62
|
-
# Set up CKAN URL
|
|
63
|
-
url_path = 'api/3/action/iso19115_package_show' # Path('api') / '3' / 'action' / 'iso19115_package_show'
|
|
64
|
-
url = f'{CKAN_URL}/{url_path}'
|
|
65
|
-
r = session.get(url, params={'format':'xml', 'id':package_id})
|
|
66
|
-
resp = r.json()
|
|
67
|
-
if resp['success'] is False:
|
|
68
|
-
return None
|
|
69
|
-
return resp['result']
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def insert_gn_record(session, xsrf_token, xml_string):
|
|
73
|
-
""" Inserts a record into Geonetwork
|
|
74
|
-
|
|
75
|
-
:param session: requests Session object
|
|
76
|
-
:param xsrf_token: Geonetwork's XSRF token as a string
|
|
77
|
-
:param xml_string: XML to be inserted as a string
|
|
78
|
-
:returns: True or False if insert succeeded
|
|
79
|
-
"""
|
|
80
|
-
# Set header for connection
|
|
81
|
-
headers = {'Accept': 'application/json',
|
|
82
|
-
'Content-Type': 'application/xml',
|
|
83
|
-
'X-XSRF-TOKEN': xsrf_token
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
# Set the parameters
|
|
87
|
-
# Currently 'uuidProcessing' is set to 'NOTHING' so that records that
|
|
88
|
-
# already exist are rejected by Geonetwork as duplicates
|
|
89
|
-
params = {'metadataType': 'METADATA',
|
|
90
|
-
'publishToAll': 'true',
|
|
91
|
-
'uuidProcessing': 'NOTHING', # Available values : GENERATEUUID, NOTHING, OVERWRITE
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
# Send a put request to the endpoint to create record
|
|
95
|
-
response = session.put(GN_URL + '/geonetwork/srv/api/0.1/records',
|
|
96
|
-
data=xml_string,
|
|
97
|
-
params=params,
|
|
98
|
-
auth=(GN_USERNAME, GN_PASSWORD),
|
|
99
|
-
headers=headers
|
|
100
|
-
)
|
|
101
|
-
resp = response.json()
|
|
102
|
-
|
|
103
|
-
# Check if record was created in Geonetwork
|
|
104
|
-
if response.status_code == requests.codes['created'] and resp['numberOfRecordsProcessed'] == 1 and \
|
|
105
|
-
resp['numberOfRecordsWithErrors'] == 0:
|
|
106
|
-
print("Inserted")
|
|
107
|
-
return True
|
|
108
|
-
print(f"Insert failed: status code: {response.status_code}\n{resp}")
|
|
109
|
-
return False
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if __name__ == "__main__":
|
|
113
|
-
# Check env. vars
|
|
114
|
-
if GN_USERNAME is None or GN_PASSWORD is None or GN_URL is None or CKAN_URL is None:
|
|
115
|
-
print("Please define the following env. vars:")
|
|
116
|
-
print(" 'CKAN2GN_GN_USERNAME' 'CKAN2GN_GN_PASSWORD' 'CKAN2GN_GN_URL' 'CKAN2GN_CKAN_URL'")
|
|
117
|
-
sys.exit(1)
|
|
118
|
-
# Connect to server
|
|
119
|
-
session = requests.Session()
|
|
120
|
-
xsrf = get_gn_xsrf_token(session)
|
|
121
|
-
if xsrf is not None:
|
|
122
|
-
# Get records from CKAN
|
|
123
|
-
for id in list_ckan_records():
|
|
124
|
-
print(f"Inserting '{id}'")
|
|
125
|
-
xml_string = get_ckan_record(id)
|
|
126
|
-
if xml_string is not None:
|
|
127
|
-
# Insert GN record
|
|
128
|
-
insert_gn_record(session, xsrf, xml_string)
|
|
129
|
-
else:
|
|
130
|
-
print(f"Could not get record id {id} from CKAN")
|
|
131
|
-
|
|
132
|
-
|
|
1
|
+
import requests
|
|
2
|
+
# from pathlib import Path
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
A simple script to:
|
|
8
|
+
1. Retrieve all public records from a CKAN service
|
|
9
|
+
2. Insert CKAN records into a Geonetwork service
|
|
10
|
+
|
|
11
|
+
This requires the 'iso19115' extension to be installed in CKAN and the following env. vars:
|
|
12
|
+
1. 'CKAN2GN_GN_USERNAME' geonetwork username for an account that can create records
|
|
13
|
+
2. 'CKAN2GN_GN_PASSWORD' geonetwork password for an account that can create records
|
|
14
|
+
3. 'CKAN2GN_GN_URL' geonetork service URL
|
|
15
|
+
4. 'CKAN2GN_CKAN_URL' CKAN service URL
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Geonetwork username and password:
|
|
19
|
+
GN_USERNAME = os.environ.get('CKAN2GN_GN_USERNAME')
|
|
20
|
+
GN_PASSWORD = os.environ.get('CKAN2GN_GN_PASSWORD')
|
|
21
|
+
|
|
22
|
+
# Geonetwork and CKAN server URLs
|
|
23
|
+
GN_URL = os.environ.get('CKAN2GN_GN_URL')
|
|
24
|
+
CKAN_URL = os.environ.get('CKAN2GN_CKAN_URL')
|
|
25
|
+
|
|
26
|
+
def get_gn_xsrf_token(session):
|
|
27
|
+
""" Retrieves XSRF token from Geonetwork
|
|
28
|
+
|
|
29
|
+
:param session: requests Session object
|
|
30
|
+
:returns: XSRF as string or None upon error
|
|
31
|
+
"""
|
|
32
|
+
authenticate_url = GN_URL + '/geonetwork/srv/eng/info?type=me'
|
|
33
|
+
response = session.post(authenticate_url)
|
|
34
|
+
|
|
35
|
+
# Extract XRSF token
|
|
36
|
+
xsrf_token = response.cookies.get("XSRF-TOKEN")
|
|
37
|
+
if xsrf_token:
|
|
38
|
+
return xsrf_token
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def list_ckan_records():
|
|
42
|
+
""" Contacts CKAN and retrieves a list of package ids for all public records
|
|
43
|
+
|
|
44
|
+
:returns: list of package id strings or None upon error
|
|
45
|
+
"""
|
|
46
|
+
session = requests.Session()
|
|
47
|
+
url_path = 'api/3/action/package_list' # Path('api') / '3' / 'action' / 'package_list'
|
|
48
|
+
url = f'{CKAN_URL}/{url_path}'
|
|
49
|
+
r = session.get(url)
|
|
50
|
+
resp = r.json()
|
|
51
|
+
if resp['success'] is False:
|
|
52
|
+
return None
|
|
53
|
+
return resp['result']
|
|
54
|
+
|
|
55
|
+
def get_ckan_record(package_id):
|
|
56
|
+
""" Given a package id retrieves its record metadata
|
|
57
|
+
|
|
58
|
+
:param package_id: CKAN package_id string
|
|
59
|
+
:returns: package metadata as a dict or None upon error
|
|
60
|
+
"""
|
|
61
|
+
session = requests.Session()
|
|
62
|
+
# Set up CKAN URL
|
|
63
|
+
url_path = 'api/3/action/iso19115_package_show' # Path('api') / '3' / 'action' / 'iso19115_package_show'
|
|
64
|
+
url = f'{CKAN_URL}/{url_path}'
|
|
65
|
+
r = session.get(url, params={'format':'xml', 'id':package_id})
|
|
66
|
+
resp = r.json()
|
|
67
|
+
if resp['success'] is False:
|
|
68
|
+
return None
|
|
69
|
+
return resp['result']
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def insert_gn_record(session, xsrf_token, xml_string):
|
|
73
|
+
""" Inserts a record into Geonetwork
|
|
74
|
+
|
|
75
|
+
:param session: requests Session object
|
|
76
|
+
:param xsrf_token: Geonetwork's XSRF token as a string
|
|
77
|
+
:param xml_string: XML to be inserted as a string
|
|
78
|
+
:returns: True or False if insert succeeded
|
|
79
|
+
"""
|
|
80
|
+
# Set header for connection
|
|
81
|
+
headers = {'Accept': 'application/json',
|
|
82
|
+
'Content-Type': 'application/xml',
|
|
83
|
+
'X-XSRF-TOKEN': xsrf_token
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Set the parameters
|
|
87
|
+
# Currently 'uuidProcessing' is set to 'NOTHING' so that records that
|
|
88
|
+
# already exist are rejected by Geonetwork as duplicates
|
|
89
|
+
params = {'metadataType': 'METADATA',
|
|
90
|
+
'publishToAll': 'true',
|
|
91
|
+
'uuidProcessing': 'NOTHING', # Available values : GENERATEUUID, NOTHING, OVERWRITE
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Send a put request to the endpoint to create record
|
|
95
|
+
response = session.put(GN_URL + '/geonetwork/srv/api/0.1/records',
|
|
96
|
+
data=xml_string,
|
|
97
|
+
params=params,
|
|
98
|
+
auth=(GN_USERNAME, GN_PASSWORD),
|
|
99
|
+
headers=headers
|
|
100
|
+
)
|
|
101
|
+
resp = response.json()
|
|
102
|
+
|
|
103
|
+
# Check if record was created in Geonetwork
|
|
104
|
+
if response.status_code == requests.codes['created'] and resp['numberOfRecordsProcessed'] == 1 and \
|
|
105
|
+
resp['numberOfRecordsWithErrors'] == 0:
|
|
106
|
+
print("Inserted")
|
|
107
|
+
return True
|
|
108
|
+
print(f"Insert failed: status code: {response.status_code}\n{resp}")
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
# Check env. vars
|
|
114
|
+
if GN_USERNAME is None or GN_PASSWORD is None or GN_URL is None or CKAN_URL is None:
|
|
115
|
+
print("Please define the following env. vars:")
|
|
116
|
+
print(" 'CKAN2GN_GN_USERNAME' 'CKAN2GN_GN_PASSWORD' 'CKAN2GN_GN_URL' 'CKAN2GN_CKAN_URL'")
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
# Connect to server
|
|
119
|
+
session = requests.Session()
|
|
120
|
+
xsrf = get_gn_xsrf_token(session)
|
|
121
|
+
if xsrf is not None:
|
|
122
|
+
# Get records from CKAN
|
|
123
|
+
for id in list_ckan_records():
|
|
124
|
+
print(f"Inserting '{id}'")
|
|
125
|
+
xml_string = get_ckan_record(id)
|
|
126
|
+
if xml_string is not None:
|
|
127
|
+
# Insert GN record
|
|
128
|
+
insert_gn_record(session, xsrf, xml_string)
|
|
129
|
+
else:
|
|
130
|
+
print(f"Could not get record id {id} from CKAN")
|
|
131
|
+
|
|
132
|
+
|
kube_watch/watch/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from .workflow import
|
|
1
|
+
from .workflow import single_run_workflow_async, batch_run_workflow, single_run_workflow
|
kube_watch/watch/helpers.py
CHANGED
|
@@ -1,171 +1,171 @@
|
|
|
1
|
-
from prefect import task
|
|
2
|
-
import sys
|
|
3
|
-
from prefect.task_runners import ConcurrentTaskRunner,
|
|
4
|
-
from prefect import runtime
|
|
5
|
-
# from prefect_dask.task_runners import DaskTaskRunner
|
|
6
|
-
from typing import Dict, List
|
|
7
|
-
import yaml
|
|
8
|
-
import importlib
|
|
9
|
-
import os
|
|
10
|
-
from kube_watch.models.workflow import WorkflowConfig, BatchFlowConfig, Task
|
|
11
|
-
from kube_watch.enums.workflow import ParameterType, TaskRunners, TaskInputsType
|
|
12
|
-
from kube_watch.modules.logic.merge import merge_logical_list
|
|
13
|
-
|
|
14
|
-
def load_workflow_config(yaml_file) -> WorkflowConfig:
|
|
15
|
-
with open(yaml_file, 'r') as file:
|
|
16
|
-
data = yaml.safe_load(file)
|
|
17
|
-
return WorkflowConfig(**data['workflow'])
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def load_batch_config(yaml_file) -> BatchFlowConfig:
|
|
21
|
-
with open(yaml_file, 'r') as file:
|
|
22
|
-
data = yaml.safe_load(file)
|
|
23
|
-
return BatchFlowConfig(**data['batchFlows'])
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# def execute_task(func, *args, name="default_task_name", **kwargs):
|
|
28
|
-
# @task(name=name)
|
|
29
|
-
# def func_task():
|
|
30
|
-
# return func(*args, **kwargs)
|
|
31
|
-
# return func_task
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def func_task(name="default_task_name", task_input_type: TaskInputsType = TaskInputsType.ARG):
|
|
35
|
-
if task_input_type == TaskInputsType.ARG:
|
|
36
|
-
@task(name=name)
|
|
37
|
-
def execute_task(func, *args, **kwargs):
|
|
38
|
-
return func(*args, **kwargs)
|
|
39
|
-
return execute_task
|
|
40
|
-
if task_input_type == TaskInputsType.DICT:
|
|
41
|
-
@task(name=name)
|
|
42
|
-
def execute_task_dict(func, dict_inp):
|
|
43
|
-
return func(dict_inp)
|
|
44
|
-
return execute_task_dict
|
|
45
|
-
raise ValueError(f'Unknow Task Input Type. It should either be {TaskInputsType.ARG} or {TaskInputsType.DICT} but {task_input_type} is provided.')
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# @task
|
|
49
|
-
# def execute_task(func, *args, **kwargs):
|
|
50
|
-
# return func(*args, **kwargs)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def get_task_function(module_name, task_name, plugin_path=None):
|
|
55
|
-
"""
|
|
56
|
-
Fetch a function directly from a specified module.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
module_name (str): The name of the module to import the function from. e.g. providers.aws
|
|
60
|
-
task_name (str): The name of the function to fetch from the module.
|
|
61
|
-
plugin_path (ster): define for external modules
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
function: The function object fetched from the module.
|
|
65
|
-
"""
|
|
66
|
-
try:
|
|
67
|
-
if plugin_path:
|
|
68
|
-
# Temporarily prepend the plugin path to sys.path to find the module
|
|
69
|
-
module_path = os.path.join(plugin_path, *module_name.split('.')) + '.py'
|
|
70
|
-
module_spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
71
|
-
module = importlib.util.module_from_spec(module_spec)
|
|
72
|
-
module_spec.loader.exec_module(module)
|
|
73
|
-
else:
|
|
74
|
-
# Standard import from the internal library path
|
|
75
|
-
module = importlib.import_module(f"kube_watch.modules.{module_name}")
|
|
76
|
-
|
|
77
|
-
return getattr(module, task_name)
|
|
78
|
-
except ImportError as e:
|
|
79
|
-
raise ImportError(f"Unable to import module '{module_name}': {e}")
|
|
80
|
-
except AttributeError as e:
|
|
81
|
-
raise AttributeError(f"The module '{module_name}' does not have a function named '{task_name}': {e}")
|
|
82
|
-
# finally:
|
|
83
|
-
# if plugin_path:
|
|
84
|
-
# # Remove the plugin path from sys.path after importing
|
|
85
|
-
# sys.path.pop(0) # Using pop(0) is safer in the context of insert(0, plugin_path)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def resolve_parameter_value(param):
|
|
90
|
-
if param.type == ParameterType.FROM_ENV:
|
|
91
|
-
return os.getenv(param.value, '') # Default to empty string if env var is not set
|
|
92
|
-
if param.type == ParameterType.FROM_FLOW:
|
|
93
|
-
return runtime.flow_run.parameters.get(param.value, '')
|
|
94
|
-
return param.value
|
|
95
|
-
|
|
96
|
-
def prepare_task_inputs(parameters):
|
|
97
|
-
return {param.name: resolve_parameter_value(param) for param in parameters}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def prepare_task_inputs_from_dep(task_data: Task, task_inputs: Dict, tasks):
|
|
101
|
-
for dep in task_data.dependency:
|
|
102
|
-
par_task = tasks[dep.taskName]
|
|
103
|
-
par_res = par_task.result()
|
|
104
|
-
if dep.inputParamName != None:
|
|
105
|
-
task_inputs.update({dep.inputParamName: par_res})
|
|
106
|
-
|
|
107
|
-
return task_inputs
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def resolve_conditional(task_data: Task, tasks):
|
|
111
|
-
lst_bools = []
|
|
112
|
-
for task_name in task_data.conditional.tasks:
|
|
113
|
-
if task_name not in tasks:
|
|
114
|
-
return False
|
|
115
|
-
|
|
116
|
-
par_task = tasks[task_name]
|
|
117
|
-
lst_bools.append(par_task.result())
|
|
118
|
-
return merge_logical_list(lst_bools, task_data.conditional.operation)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def submit_task(task_name, task_data, task_inputs, func):
|
|
124
|
-
execute_task = func_task(name=task_name, task_input_type=task_data.inputsArgType)
|
|
125
|
-
if task_data.inputsArgType == TaskInputsType.ARG:
|
|
126
|
-
return execute_task.submit(func, **task_inputs)
|
|
127
|
-
if task_data.inputsArgType == TaskInputsType.DICT:
|
|
128
|
-
return execute_task.submit(func, dict_inp=task_inputs)
|
|
129
|
-
raise ValueError("Unknown Input Arg Type.")
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def resolve_runner(runner):
|
|
134
|
-
if runner == TaskRunners.CONCURRENT:
|
|
135
|
-
return ConcurrentTaskRunner
|
|
136
|
-
if runner == TaskRunners.SEQUENTIAL:
|
|
137
|
-
return
|
|
138
|
-
if runner == TaskRunners.DASK:
|
|
139
|
-
raise ValueError("Dask Not Implemented")
|
|
140
|
-
# return DaskTaskRunner
|
|
141
|
-
if runner == TaskRunners.RAY:
|
|
142
|
-
raise ValueError("Ray Not Implemented")
|
|
143
|
-
# return RayTaskRunner
|
|
144
|
-
raise ValueError("Invalid task runner type")
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def filter_attributes(obj):
|
|
148
|
-
import uuid
|
|
149
|
-
from collections.abc import Iterable
|
|
150
|
-
import inspect
|
|
151
|
-
|
|
152
|
-
def is_simple(value):
|
|
153
|
-
""" Check if the value is a simple data type or a collection of simple data types """
|
|
154
|
-
if isinstance(value, (int, float, str, bool, type(None), uuid.UUID)):
|
|
155
|
-
return True
|
|
156
|
-
if isinstance(value, dict):
|
|
157
|
-
return all(is_simple(k) and is_simple(v) for k, v in value.items())
|
|
158
|
-
if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
|
|
159
|
-
return all(is_simple(item) for item in value)
|
|
160
|
-
return False
|
|
161
|
-
|
|
162
|
-
result = {}
|
|
163
|
-
for attr in dir(obj):
|
|
164
|
-
# Avoid magic methods and attributes
|
|
165
|
-
if attr.startswith("__") and attr.endswith("__"):
|
|
166
|
-
continue
|
|
167
|
-
value = getattr(obj, attr)
|
|
168
|
-
# Filter out methods and check if the attribute value is simple
|
|
169
|
-
if not callable(value) and not inspect.isclass(value) and is_simple(value):
|
|
170
|
-
result[attr] = value
|
|
1
|
+
from prefect import task
|
|
2
|
+
import sys
|
|
3
|
+
from prefect.task_runners import ConcurrentTaskRunner, ThreadPoolTaskRunner
|
|
4
|
+
from prefect import runtime
|
|
5
|
+
# from prefect_dask.task_runners import DaskTaskRunner
|
|
6
|
+
from typing import Dict, List
|
|
7
|
+
import yaml
|
|
8
|
+
import importlib
|
|
9
|
+
import os
|
|
10
|
+
from kube_watch.models.workflow import WorkflowConfig, BatchFlowConfig, Task
|
|
11
|
+
from kube_watch.enums.workflow import ParameterType, TaskRunners, TaskInputsType
|
|
12
|
+
from kube_watch.modules.logic.merge import merge_logical_list
|
|
13
|
+
|
|
14
|
+
def load_workflow_config(yaml_file) -> WorkflowConfig:
|
|
15
|
+
with open(yaml_file, 'r') as file:
|
|
16
|
+
data = yaml.safe_load(file)
|
|
17
|
+
return WorkflowConfig(**data['workflow'])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_batch_config(yaml_file) -> BatchFlowConfig:
|
|
21
|
+
with open(yaml_file, 'r') as file:
|
|
22
|
+
data = yaml.safe_load(file)
|
|
23
|
+
return BatchFlowConfig(**data['batchFlows'])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# def execute_task(func, *args, name="default_task_name", **kwargs):
|
|
28
|
+
# @task(name=name)
|
|
29
|
+
# def func_task():
|
|
30
|
+
# return func(*args, **kwargs)
|
|
31
|
+
# return func_task
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def func_task(name="default_task_name", task_input_type: TaskInputsType = TaskInputsType.ARG):
|
|
35
|
+
if task_input_type == TaskInputsType.ARG:
|
|
36
|
+
@task(name=name)
|
|
37
|
+
def execute_task(func, *args, **kwargs):
|
|
38
|
+
return func(*args, **kwargs)
|
|
39
|
+
return execute_task
|
|
40
|
+
if task_input_type == TaskInputsType.DICT:
|
|
41
|
+
@task(name=name)
|
|
42
|
+
def execute_task_dict(func, dict_inp):
|
|
43
|
+
return func(dict_inp)
|
|
44
|
+
return execute_task_dict
|
|
45
|
+
raise ValueError(f'Unknow Task Input Type. It should either be {TaskInputsType.ARG} or {TaskInputsType.DICT} but {task_input_type} is provided.')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# @task
|
|
49
|
+
# def execute_task(func, *args, **kwargs):
|
|
50
|
+
# return func(*args, **kwargs)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_task_function(module_name, task_name, plugin_path=None):
|
|
55
|
+
"""
|
|
56
|
+
Fetch a function directly from a specified module.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
module_name (str): The name of the module to import the function from. e.g. providers.aws
|
|
60
|
+
task_name (str): The name of the function to fetch from the module.
|
|
61
|
+
plugin_path (ster): define for external modules
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
function: The function object fetched from the module.
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
if plugin_path:
|
|
68
|
+
# Temporarily prepend the plugin path to sys.path to find the module
|
|
69
|
+
module_path = os.path.join(plugin_path, *module_name.split('.')) + '.py'
|
|
70
|
+
module_spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
71
|
+
module = importlib.util.module_from_spec(module_spec)
|
|
72
|
+
module_spec.loader.exec_module(module)
|
|
73
|
+
else:
|
|
74
|
+
# Standard import from the internal library path
|
|
75
|
+
module = importlib.import_module(f"kube_watch.modules.{module_name}")
|
|
76
|
+
|
|
77
|
+
return getattr(module, task_name)
|
|
78
|
+
except ImportError as e:
|
|
79
|
+
raise ImportError(f"Unable to import module '{module_name}': {e}")
|
|
80
|
+
except AttributeError as e:
|
|
81
|
+
raise AttributeError(f"The module '{module_name}' does not have a function named '{task_name}': {e}")
|
|
82
|
+
# finally:
|
|
83
|
+
# if plugin_path:
|
|
84
|
+
# # Remove the plugin path from sys.path after importing
|
|
85
|
+
# sys.path.pop(0) # Using pop(0) is safer in the context of insert(0, plugin_path)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def resolve_parameter_value(param):
|
|
90
|
+
if param.type == ParameterType.FROM_ENV:
|
|
91
|
+
return os.getenv(param.value, '') # Default to empty string if env var is not set
|
|
92
|
+
if param.type == ParameterType.FROM_FLOW:
|
|
93
|
+
return runtime.flow_run.parameters.get(param.value, '')
|
|
94
|
+
return param.value
|
|
95
|
+
|
|
96
|
+
def prepare_task_inputs(parameters):
|
|
97
|
+
return {param.name: resolve_parameter_value(param) for param in parameters}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def prepare_task_inputs_from_dep(task_data: Task, task_inputs: Dict, tasks):
|
|
101
|
+
for dep in task_data.dependency:
|
|
102
|
+
par_task = tasks[dep.taskName]
|
|
103
|
+
par_res = par_task.result()
|
|
104
|
+
if dep.inputParamName != None:
|
|
105
|
+
task_inputs.update({dep.inputParamName: par_res})
|
|
106
|
+
|
|
107
|
+
return task_inputs
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def resolve_conditional(task_data: Task, tasks):
|
|
111
|
+
lst_bools = []
|
|
112
|
+
for task_name in task_data.conditional.tasks:
|
|
113
|
+
if task_name not in tasks:
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
par_task = tasks[task_name]
|
|
117
|
+
lst_bools.append(par_task.result())
|
|
118
|
+
return merge_logical_list(lst_bools, task_data.conditional.operation)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def submit_task(task_name, task_data, task_inputs, func):
|
|
124
|
+
execute_task = func_task(name=task_name, task_input_type=task_data.inputsArgType)
|
|
125
|
+
if task_data.inputsArgType == TaskInputsType.ARG:
|
|
126
|
+
return execute_task.submit(func, **task_inputs)
|
|
127
|
+
if task_data.inputsArgType == TaskInputsType.DICT:
|
|
128
|
+
return execute_task.submit(func, dict_inp=task_inputs)
|
|
129
|
+
raise ValueError("Unknown Input Arg Type.")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def resolve_runner(runner):
|
|
134
|
+
if runner == TaskRunners.CONCURRENT:
|
|
135
|
+
return ConcurrentTaskRunner
|
|
136
|
+
if runner == TaskRunners.SEQUENTIAL:
|
|
137
|
+
return ThreadPoolTaskRunner(max_workers=1)
|
|
138
|
+
if runner == TaskRunners.DASK:
|
|
139
|
+
raise ValueError("Dask Not Implemented")
|
|
140
|
+
# return DaskTaskRunner
|
|
141
|
+
if runner == TaskRunners.RAY:
|
|
142
|
+
raise ValueError("Ray Not Implemented")
|
|
143
|
+
# return RayTaskRunner
|
|
144
|
+
raise ValueError("Invalid task runner type")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def filter_attributes(obj):
|
|
148
|
+
import uuid
|
|
149
|
+
from collections.abc import Iterable
|
|
150
|
+
import inspect
|
|
151
|
+
|
|
152
|
+
def is_simple(value):
|
|
153
|
+
""" Check if the value is a simple data type or a collection of simple data types """
|
|
154
|
+
if isinstance(value, (int, float, str, bool, type(None), uuid.UUID)):
|
|
155
|
+
return True
|
|
156
|
+
if isinstance(value, dict):
|
|
157
|
+
return all(is_simple(k) and is_simple(v) for k, v in value.items())
|
|
158
|
+
if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
|
|
159
|
+
return all(is_simple(item) for item in value)
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
result = {}
|
|
163
|
+
for attr in dir(obj):
|
|
164
|
+
# Avoid magic methods and attributes
|
|
165
|
+
if attr.startswith("__") and attr.endswith("__"):
|
|
166
|
+
continue
|
|
167
|
+
value = getattr(obj, attr)
|
|
168
|
+
# Filter out methods and check if the attribute value is simple
|
|
169
|
+
if not callable(value) and not inspect.isclass(value) and is_simple(value):
|
|
170
|
+
result[attr] = value
|
|
171
171
|
return result
|