datallog 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datallog-0.1.0/PKG-INFO +17 -0
- datallog-0.1.0/README.md +1 -0
- datallog-0.1.0/pyproject.toml +19 -0
- datallog-0.1.0/src/datallog/__init__.py +1 -0
- datallog-0.1.0/src/datallog/decorators/__init__.py +2 -0
- datallog-0.1.0/src/datallog/decorators/core_step.py +8 -0
- datallog-0.1.0/src/datallog/decorators/step.py +32 -0
- datallog-0.1.0/src/datallog/utils/__init__.py +0 -0
- datallog-0.1.0/src/datallog/utils/errors.py +66 -0
- datallog-0.1.0/src/datallog/utils/generate_build_file.py +48 -0
- datallog-0.1.0/src/datallog/utils/generate_step_props.py +44 -0
- datallog-0.1.0/src/datallog/utils/get_all_apps.py +20 -0
- datallog-0.1.0/src/datallog/utils/get_step_name_by_index.py +14 -0
- datallog-0.1.0/src/datallog/utils/import_module.py +16 -0
- datallog-0.1.0/src/datallog/utils/storage.py +98 -0
- datallog-0.1.0/src/datallog/utils/validate_step_sequence.py +37 -0
- datallog-0.1.0/src/datallog/utils/worker.py +453 -0
datallog-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: datallog
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SDK datallog library
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Geovane Schmitz
|
|
7
|
+
Author-email: contato@geovanems.com.br
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
## Datallog SDK
|
datallog-0.1.0/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
## Datallog SDK
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "datallog"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "SDK datallog library"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Geovane Schmitz",email = "contato@geovanems.com.br"}
|
|
7
|
+
]
|
|
8
|
+
license = {text = "MIT"}
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[tool.poetry]
|
|
15
|
+
name = "src/datallog"
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
19
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .decorators import core_step as core_step, step as step
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import Optional, Callable
|
|
3
|
+
from datallog.utils.storage import set_next_step, set_step_to_callable, set_core_step, set_step_not_branching
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def step(*, next_step: Optional[str] = None, core_step: bool = False, branching: bool = True) -> Callable:
|
|
9
|
+
"""
|
|
10
|
+
Decorator to mark a function as a step in a sequence.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
next_step (Optional[str]): The name of the next step in the sequence.
|
|
14
|
+
core_step (bool): Whether the step is the core step of the application. (first step)
|
|
15
|
+
branching (bool): Whether the step is a branching step. If False, the step will spawn a only new step.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def decorator(func):
|
|
19
|
+
set_step_to_callable(func.__name__, func)
|
|
20
|
+
set_next_step(func.__name__, next_step)
|
|
21
|
+
if core_step:
|
|
22
|
+
set_core_step(func.__name__)
|
|
23
|
+
if not branching:
|
|
24
|
+
set_step_not_branching(func.__name__)
|
|
25
|
+
|
|
26
|
+
@wraps(func)
|
|
27
|
+
def wrapper(*args, **kwargs):
|
|
28
|
+
return func(*args, **kwargs)
|
|
29
|
+
|
|
30
|
+
return wrapper
|
|
31
|
+
|
|
32
|
+
return decorator
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
class StepNameConflictError(Exception):
|
|
2
|
+
"""
|
|
3
|
+
Exception raised when a step with the same name already exist
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
def __init__(self, message="Step with the same name already exist"):
|
|
7
|
+
self.message = message
|
|
8
|
+
super().__init__(self.message)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CoreStepAlreadySetError(Exception):
|
|
12
|
+
"""
|
|
13
|
+
Exception raised when the core step is already set
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, message="Core step is already set"):
|
|
17
|
+
self.message = message
|
|
18
|
+
super().__init__(self.message)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CoreStepNotSetError(Exception):
|
|
22
|
+
"""
|
|
23
|
+
Exception raised when the core step is not set
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, message="Core step is not set"):
|
|
27
|
+
self.message = message
|
|
28
|
+
super().__init__(self.message)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StepNotDefinedError(Exception):
|
|
32
|
+
"""
|
|
33
|
+
Exception raised when a step is not defined
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, message="Step is not defined"):
|
|
37
|
+
self.message = message
|
|
38
|
+
super().__init__(self.message)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CircularStepDefinitionError(Exception):
|
|
42
|
+
"""
|
|
43
|
+
Exception raised when a step is defined circularly
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, message="Circular step definition"):
|
|
47
|
+
self.message = message
|
|
48
|
+
super().__init__(self.message)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class StepBranchingCannotBeUsedWithoutNextStepError(Exception):
|
|
52
|
+
"""
|
|
53
|
+
Exception raised when a step is defined as branching but without next step
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, message="Step cannot be defined as branching without next step"):
|
|
57
|
+
self.message = message
|
|
58
|
+
super().__init__(self.message)
|
|
59
|
+
# self.message = message
|
|
60
|
+
|
|
61
|
+
class InvalidAppError(Exception):
|
|
62
|
+
"""Custom exception for invalid app configurations."""
|
|
63
|
+
def __init__(self, message: str = "Invalid app configuration"):
|
|
64
|
+
super().__init__(message)
|
|
65
|
+
self.message = message
|
|
66
|
+
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import sys
|
|
4
|
+
import threading
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
from .get_all_apps import get_all_apps
|
|
7
|
+
import json
|
|
8
|
+
from .import_module import import_module
|
|
9
|
+
from .generate_step_props import generate_step_props
|
|
10
|
+
from .storage import reset_storage
|
|
11
|
+
|
|
12
|
+
def generate_build_file(deploy_dir: Path, output_file_path: Path) -> Dict[str, Any]:
|
|
13
|
+
apps = get_all_apps(deploy_dir)
|
|
14
|
+
|
|
15
|
+
build: Dict[str, Any] = {}
|
|
16
|
+
apps_dir = deploy_dir / "apps"
|
|
17
|
+
print(f"Generating build file for apps in {apps_dir}")
|
|
18
|
+
for app_name in apps:
|
|
19
|
+
try:
|
|
20
|
+
app_file = apps_dir / app_name / f'{app_name}.py'
|
|
21
|
+
num_of_threads = threading.active_count()
|
|
22
|
+
reset_storage()
|
|
23
|
+
import_module(app_file)
|
|
24
|
+
if threading.active_count() > num_of_threads:
|
|
25
|
+
print(f"Warning: {app_name} has threads running, this may cause issues with the build file generation.")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
step_props = generate_step_props(app_name)
|
|
29
|
+
|
|
30
|
+
build[app_name] = step_props
|
|
31
|
+
except Exception as e:
|
|
32
|
+
import traceback
|
|
33
|
+
traceback.print_exc()
|
|
34
|
+
print(f"Error generating build file for {app_name}: {e}", file=sys.stderr)
|
|
35
|
+
exit(1)
|
|
36
|
+
with open(output_file_path, 'w') as f:
|
|
37
|
+
json.dump(build, f, indent=4)
|
|
38
|
+
|
|
39
|
+
return build
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
|
|
44
|
+
deploy_dir = Path('/deploy')
|
|
45
|
+
output_file_path = Path('/build.json')
|
|
46
|
+
|
|
47
|
+
build_file = generate_build_file(deploy_dir, output_file_path)
|
|
48
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from .storage import get_core_step, get_next_step
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def generate_step_props(app_name: str) -> Dict[str, Any]:
|
|
8
|
+
"""
|
|
9
|
+
Generate step properties for each step in the workflow.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
steps (list): List of steps in the workflow.
|
|
13
|
+
context (list): List of context variables.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
dict: Dictionary containing step properties.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
step_list = []
|
|
20
|
+
old_step = None
|
|
21
|
+
current_step = get_core_step()
|
|
22
|
+
i = 0
|
|
23
|
+
|
|
24
|
+
while current_step != None:
|
|
25
|
+
i += 1
|
|
26
|
+
next_step = get_next_step(current_step)
|
|
27
|
+
tll = i if next_step is not None else i - 1
|
|
28
|
+
|
|
29
|
+
if old_step is None:
|
|
30
|
+
data_to_process = None
|
|
31
|
+
else:
|
|
32
|
+
data_to_process = "${{step.%s}}" % (i - 2)
|
|
33
|
+
|
|
34
|
+
step_list.append(
|
|
35
|
+
{
|
|
36
|
+
"to_result": "exec_result" if next_step is None else None,
|
|
37
|
+
"name_core_function": current_step,
|
|
38
|
+
"data_to_process": data_to_process,
|
|
39
|
+
"context_step_ttl": tll,
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
old_step = current_step
|
|
43
|
+
current_step = next_step
|
|
44
|
+
return {"steps": step_list, "name": app_name, "context": []}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
import pathlib
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from .errors import InvalidAppError
|
|
6
|
+
|
|
7
|
+
def get_all_apps(path: pathlib.Path) -> List[str]:
|
|
8
|
+
apps = []
|
|
9
|
+
apps_dir = path / "apps"
|
|
10
|
+
if not apps_dir.exists():
|
|
11
|
+
raise InvalidAppError(f"Apps directory not found at {apps_dir}")
|
|
12
|
+
for app in apps_dir.iterdir():
|
|
13
|
+
if app.is_dir():
|
|
14
|
+
app_name = app.name
|
|
15
|
+
if app_name.startswith(".") or app_name == "__pycache__":
|
|
16
|
+
continue
|
|
17
|
+
app_file = app / f"{app_name}.py"
|
|
18
|
+
if app_file.exists():
|
|
19
|
+
apps.append(app_name)
|
|
20
|
+
return apps
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .storage import get_core_step, get_next_step, get_step_to_callable
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_step_name_by_index(step_index: int) -> Optional[str]:
|
|
6
|
+
if step_index == 0:
|
|
7
|
+
return get_core_step()
|
|
8
|
+
else:
|
|
9
|
+
current_step = get_core_step()
|
|
10
|
+
for _ in range(step_index):
|
|
11
|
+
if current_step is None:
|
|
12
|
+
return None
|
|
13
|
+
current_step = get_next_step(current_step)
|
|
14
|
+
return current_step
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import importlib.util
|
|
3
|
+
|
|
4
|
+
def import_module(file_path: str):
|
|
5
|
+
spec = importlib.util.spec_from_file_location("*", file_path)
|
|
6
|
+
if spec is None:
|
|
7
|
+
raise Exception("Failed to import module")
|
|
8
|
+
module = importlib.util.module_from_spec(spec)
|
|
9
|
+
|
|
10
|
+
if spec.loader is None:
|
|
11
|
+
raise Exception("Failed to load module")
|
|
12
|
+
|
|
13
|
+
spec.loader.exec_module(module)
|
|
14
|
+
|
|
15
|
+
return module
|
|
16
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import Optional, Dict, Callable, Set
|
|
2
|
+
from .errors import (
|
|
3
|
+
CoreStepAlreadySetError,
|
|
4
|
+
StepNameConflictError,
|
|
5
|
+
StepBranchingCannotBeUsedWithoutNextStepError,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
This module stores the data from the decorators
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# The name of the core step of the application
|
|
14
|
+
_core_step: Optional[str] = None
|
|
15
|
+
# The name of the step to call next, E.g. "step_1" -> "step_2"
|
|
16
|
+
_step_sequence: Dict[str, Optional[str]] = {}
|
|
17
|
+
# The name of the step to callable
|
|
18
|
+
_step_name_to_callable: Dict[str, Callable] = {}
|
|
19
|
+
# branching step
|
|
20
|
+
_step_name_not_branching: Set[str] = set()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def reset_storage() -> None:
|
|
24
|
+
"""
|
|
25
|
+
Reset the data from the decorators
|
|
26
|
+
"""
|
|
27
|
+
global _core_step, _step_sequence, _step_name_to_callable
|
|
28
|
+
_core_step = None
|
|
29
|
+
_step_sequence = {}
|
|
30
|
+
_step_name_to_callable = {}
|
|
31
|
+
_step_name_not_branching = set()
|
|
32
|
+
|
|
33
|
+
def set_core_step(core_step: str) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Set the core step of the application
|
|
36
|
+
"""
|
|
37
|
+
global _core_step
|
|
38
|
+
if _core_step is None:
|
|
39
|
+
_core_step = core_step
|
|
40
|
+
else:
|
|
41
|
+
raise CoreStepAlreadySetError(f'Core step "{core_step}" already set')
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_core_step() -> Optional[str]:
|
|
45
|
+
"""
|
|
46
|
+
Get the core step of the application
|
|
47
|
+
"""
|
|
48
|
+
return _core_step
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_next_step(step: str) -> Optional[str]:
|
|
52
|
+
"""
|
|
53
|
+
Get the next step of the application
|
|
54
|
+
"""
|
|
55
|
+
return _step_sequence.get(step)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_step_to_callable(step: str) -> Optional[Callable]:
|
|
59
|
+
"""
|
|
60
|
+
Get the callable of the step
|
|
61
|
+
"""
|
|
62
|
+
return _step_name_to_callable.get(step)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def set_step_to_callable(step: str, callable: Callable) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Set the callable of the step
|
|
68
|
+
"""
|
|
69
|
+
if step in _step_name_to_callable:
|
|
70
|
+
raise StepNameConflictError(f'Step "{step}" already exist')
|
|
71
|
+
|
|
72
|
+
_step_name_to_callable[step] = callable
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def set_next_step(step: str, next_step: Optional[str]) -> None:
|
|
76
|
+
"""
|
|
77
|
+
Set the sequence of steps of the application
|
|
78
|
+
"""
|
|
79
|
+
_step_sequence[step] = next_step
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def set_step_not_branching(step: str) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Set the step as not branching
|
|
85
|
+
"""
|
|
86
|
+
if get_next_step(step) is None:
|
|
87
|
+
raise StepBranchingCannotBeUsedWithoutNextStepError(
|
|
88
|
+
f'Step "{step}" cannot be defined as branching without next step'
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
_step_name_not_branching.add(step)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_step_branching(step: str) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Get the step as branching
|
|
97
|
+
"""
|
|
98
|
+
return step not in _step_name_not_branching
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Set
|
|
2
|
+
from .storage import get_core_step, get_next_step, get_step_to_callable
|
|
3
|
+
from .errors import (
|
|
4
|
+
CoreStepNotSetError,
|
|
5
|
+
StepNotDefinedError,
|
|
6
|
+
CircularStepDefinitionError,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def validate_step_sequence() -> None:
|
|
11
|
+
core_step = get_core_step()
|
|
12
|
+
if core_step is None:
|
|
13
|
+
raise CoreStepNotSetError(
|
|
14
|
+
"Core step not set - please set it using @core_step decorator"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
current_step = core_step
|
|
18
|
+
name_used: Set[str] = set()
|
|
19
|
+
name_used.add(core_step)
|
|
20
|
+
old_step = None
|
|
21
|
+
while current_step is not None:
|
|
22
|
+
if get_step_to_callable(current_step) is None:
|
|
23
|
+
raise StepNotDefinedError(
|
|
24
|
+
f"Step {current_step} is not defined but is set as next step for {old_step} - please define it using @step decorator"
|
|
25
|
+
)
|
|
26
|
+
next_step = get_next_step(current_step)
|
|
27
|
+
|
|
28
|
+
if next_step is None:
|
|
29
|
+
break
|
|
30
|
+
|
|
31
|
+
if next_step in name_used:
|
|
32
|
+
raise CircularStepDefinitionError(
|
|
33
|
+
f"Circular step definition detected: {current_step} -> {next_step}"
|
|
34
|
+
)
|
|
35
|
+
name_used.add(next_step)
|
|
36
|
+
old_step = current_step
|
|
37
|
+
current_step = next_step
|
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
import importlib
|
|
3
|
+
import importlib.util
|
|
4
|
+
import inspect
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
import json
|
|
9
|
+
from socket import AF_UNIX, SOCK_STREAM, SocketIO, socket
|
|
10
|
+
from typing import Any, Callable, Optional
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from .get_step_name_by_index import get_step_name_by_index
|
|
14
|
+
from .storage import (
|
|
15
|
+
get_next_step,
|
|
16
|
+
get_step_to_callable,
|
|
17
|
+
get_step_branching,
|
|
18
|
+
)
|
|
19
|
+
from .validate_step_sequence import validate_step_sequence
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_get_work_item_json(worker_id: int) -> str:
|
|
23
|
+
return json.dumps({"type": "GET_WORK_ITEM", "worker_id": worker_id})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_get_execution_props_json(worker_id: int) -> str:
|
|
27
|
+
return json.dumps({"type": "GET_STEP_EXECUTION_PROPS", "worker_id": worker_id})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def create_work_item_json(step_index: int, argument: Any, from_work_id: str) -> str:
|
|
31
|
+
new_work_id = str(uuid4())
|
|
32
|
+
return json.dumps(
|
|
33
|
+
{
|
|
34
|
+
"type": "WORK_ITEM",
|
|
35
|
+
"work_id": new_work_id,
|
|
36
|
+
"step_index": step_index,
|
|
37
|
+
"argument": argument,
|
|
38
|
+
"from_work_id": from_work_id,
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def create_worker_publish_result_json(work_id_str: str, result: Any) -> str:
|
|
44
|
+
return json.dumps(
|
|
45
|
+
{"type": "PUBLISH_RESULT", "work_id": work_id_str, "result": result}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def create_worker_error_json(
|
|
50
|
+
work_id_str: Optional[str], error: str, traceback_str: str
|
|
51
|
+
) -> str:
|
|
52
|
+
return json.dumps(
|
|
53
|
+
{
|
|
54
|
+
"type": "WORKER_ERROR",
|
|
55
|
+
"error": error,
|
|
56
|
+
"traceback": traceback_str,
|
|
57
|
+
"work_id": work_id_str,
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def create_worker_mark_as_idle_json(worker_id: int) -> str:
|
|
63
|
+
return json.dumps({"type": "MARK_AS_IDLE", "worker_id": worker_id})
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
unix_socket = "/tmp/datallog_worker.sock"
|
|
67
|
+
my_id = int(sys.argv[1])
|
|
68
|
+
|
|
69
|
+
sock: Optional[socket] = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def import_module_from_path(file_path: str):
|
|
73
|
+
spec = importlib.util.spec_from_file_location("*", file_path)
|
|
74
|
+
if spec is None:
|
|
75
|
+
raise Exception(f"Failed to create spec for module: {file_path}")
|
|
76
|
+
module = importlib.util.module_from_spec(spec)
|
|
77
|
+
|
|
78
|
+
if spec.loader is None:
|
|
79
|
+
raise Exception(f"Failed to get loader for module: {file_path}")
|
|
80
|
+
|
|
81
|
+
spec.loader.exec_module(module)
|
|
82
|
+
return module
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def connect_to_conteiner_server():
|
|
86
|
+
global sock
|
|
87
|
+
sock = socket(AF_UNIX, SOCK_STREAM)
|
|
88
|
+
sock.connect(unix_socket)
|
|
89
|
+
return SocketIO(sock, "rwb")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
sockio = connect_to_conteiner_server()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def send_message_to_conteiner_server(data: str):
|
|
96
|
+
sockio.write(data.encode() + b"\n")
|
|
97
|
+
sockio.flush()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def send_mark_as_idle_to_conteiner_server():
|
|
101
|
+
json_payload = create_worker_mark_as_idle_json(worker_id=my_id)
|
|
102
|
+
send_message_to_conteiner_server(json_payload)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def receive_message_from_conteiner_server():
|
|
106
|
+
response_bytes = sockio.readline()
|
|
107
|
+
if (
|
|
108
|
+
not response_bytes
|
|
109
|
+
): # Handle case where readline returns empty bytes (e.g. server closed connection)
|
|
110
|
+
raise ConnectionAbortedError("Server closed connection or sent empty response.")
|
|
111
|
+
response_str = response_bytes.decode().strip()
|
|
112
|
+
if (
|
|
113
|
+
not response_str
|
|
114
|
+
): # Handle case where the line was just whitespace or empty after strip
|
|
115
|
+
raise ValueError("Received an empty message from server.")
|
|
116
|
+
return json.loads(response_str)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_work_item_from_conteiner_server():
|
|
120
|
+
json_payload = create_get_work_item_json(worker_id=my_id)
|
|
121
|
+
send_message_to_conteiner_server(json_payload)
|
|
122
|
+
return receive_message_from_conteiner_server()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_execution_props_from_conteiner_server():
|
|
126
|
+
json_payload = create_get_execution_props_json(worker_id=my_id)
|
|
127
|
+
send_message_to_conteiner_server(json_payload)
|
|
128
|
+
return receive_message_from_conteiner_server()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def send_worker_error_to_conteiner_server(
|
|
132
|
+
work_id_str: Optional[str], error: str, traceback_str: str
|
|
133
|
+
):
|
|
134
|
+
json_payload = create_worker_error_json(
|
|
135
|
+
work_id_str=work_id_str, error=error, traceback_str=traceback_str
|
|
136
|
+
)
|
|
137
|
+
send_message_to_conteiner_server(json_payload)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def close_connection_to_conteiner_server():
|
|
141
|
+
if sockio:
|
|
142
|
+
sockio.close()
|
|
143
|
+
if sock:
|
|
144
|
+
sock.close()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_mandatory_argcount(f: Callable[..., Any]) -> int:
|
|
148
|
+
sig = inspect.signature(f)
|
|
149
|
+
|
|
150
|
+
def parameter_is_mandatory(p: inspect.Parameter) -> bool:
|
|
151
|
+
return p.default is inspect.Parameter.empty and p.kind not in (
|
|
152
|
+
inspect.Parameter.VAR_POSITIONAL,
|
|
153
|
+
inspect.Parameter.VAR_KEYWORD,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return sum(parameter_is_mandatory(p) for p in sig.parameters.values())
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def execute_steps():
|
|
160
|
+
"""
|
|
161
|
+
Manages the execution lifecycle of steps for a single application instance
|
|
162
|
+
within a dedicated worker process.
|
|
163
|
+
|
|
164
|
+
This function runs continuously within a worker process, handling multiple
|
|
165
|
+
steps sequentially as directed by the container server for a specific
|
|
166
|
+
application execution. It loads the user's application code once and then
|
|
167
|
+
enters a loop, processing work items received from the server. It supports
|
|
168
|
+
optional arguments for steps and allows execution paths to branch based on
|
|
169
|
+
step results, provided the step is explicitly marked for branching. The
|
|
170
|
+
worker also notifies the server about its idle status to aid in efficient
|
|
171
|
+
worker pool management.
|
|
172
|
+
|
|
173
|
+
The execution flow is as follows:
|
|
174
|
+
1. **Initialization:**
|
|
175
|
+
a. Fetches initial application execution properties (like code file path)
|
|
176
|
+
from the container server (`get_execution_props_from_conteiner_server`).
|
|
177
|
+
b. Records the initial number of active threads.
|
|
178
|
+
c. Dynamically loads the user's application module. Decorator information
|
|
179
|
+
(including branching flags) is gathered. (Decorator state persists).
|
|
180
|
+
d. Performs an immediate check: If loading the module created lingering
|
|
181
|
+
threads, reports an error (`send_thread_error_to_conteiner_server`)
|
|
182
|
+
and terminates the worker.
|
|
183
|
+
|
|
184
|
+
2. **Step Execution Loop:**
|
|
185
|
+
a. Enters a loop (with a safety break) to continuously poll for and
|
|
186
|
+
execute steps.
|
|
187
|
+
b. Fetches the next work item from the container server
|
|
188
|
+
(`get_work_item_from_conteiner_server`).
|
|
189
|
+
c. **Exit Condition:** If the received item indicates no more work
|
|
190
|
+
(e.g., type is "NO_MORE_WORK_ITEMS"), closes the server connection
|
|
191
|
+
and exits the loop/function.
|
|
192
|
+
d. **Step Processing (within try...except block):**
|
|
193
|
+
i. If step index is 0, performs initial sequence validation.
|
|
194
|
+
ii. Identifies the step function (callable) based on the index.
|
|
195
|
+
iii. **Log Redirection:** Redirects stdout and stderr to a log file
|
|
196
|
+
specific to this step instance (`logs/{step_name}-{work_id}.log`)
|
|
197
|
+
for the duration of the step execution.
|
|
198
|
+
iv. **Flexible Argument Handling:** Calls the step function correctly
|
|
199
|
+
based on whether an argument is provided and whether the step
|
|
200
|
+
requires mandatory arguments.
|
|
201
|
+
v. Executes the step function. Flushes stdout/stderr before restoring.
|
|
202
|
+
vi. **Per-Step Thread Check:** Compares current thread count against the
|
|
203
|
+
*initial* count. If new threads were created and not terminated,
|
|
204
|
+
reports a thread error (`send_thread_error_to_conteiner_server`)
|
|
205
|
+
and terminates the worker immediately.
|
|
206
|
+
vii.**Idle Notification:** If the step completed successfully and a
|
|
207
|
+
`next_step` exists (meaning this is not the final step), the
|
|
208
|
+
worker immediately notifies the container server that it is
|
|
209
|
+
momentarily idle (`send_mark_as_idle_to_conteiner_server`). This
|
|
210
|
+
signal allows the server to optimize the allocation of pending
|
|
211
|
+
tasks to available workers before this worker submits its
|
|
212
|
+
follow-up task(s).
|
|
213
|
+
viii.**Result Handling & Branching:** (Formerly 2.d.vi)
|
|
214
|
+
- Checks if a `next_step` is defined for the current step.
|
|
215
|
+
- If a `next_step` exists (idle notification was already sent):
|
|
216
|
+
- If the `step_result` is a list *and* the step is explicitly
|
|
217
|
+
marked for branching (`get_step_branching`):
|
|
218
|
+
Iterates through the list. For *each item*, sends a new
|
|
219
|
+
`WorkItem` payload to the server (`send_work_item_to_conteiner_server`)
|
|
220
|
+
for the `next_step`, using the item as the argument.
|
|
221
|
+
- Otherwise: Sends a single `WorkItem` payload for the
|
|
222
|
+
`next_step`, using the entire `step_result` as the argument.
|
|
223
|
+
- If no `next_step` exists: Sends a final `WorkerPublishResult`
|
|
224
|
+
payload to the server (`send_result_to_conteiner_server`).
|
|
225
|
+
e. **Error Handling:** Catches other exceptions during step processing,
|
|
226
|
+
reports them as a `WorkerPublishError` payload
|
|
227
|
+
(`send_result_to_conteiner_server` handles both results and errors),
|
|
228
|
+
and continues the loop to await the next work item or exit signal.
|
|
229
|
+
(Note: Thread creation errors are fatal and exit the worker earlier).
|
|
230
|
+
"""
|
|
231
|
+
# 1. Initialization
|
|
232
|
+
try:
|
|
233
|
+
execution_props = get_execution_props_from_conteiner_server()
|
|
234
|
+
except (ConnectionAbortedError, ValueError, json.JSONDecodeError) as e:
|
|
235
|
+
print(f"Failed to get execution properties: {e}", file=sys.stderr)
|
|
236
|
+
# Optionally send a specific error type if the protocol supports it before exiting
|
|
237
|
+
# For now, just close and exit.
|
|
238
|
+
if sock: # Ensure sock is defined before trying to close
|
|
239
|
+
close_connection_to_conteiner_server()
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
num_of_threads = threading.active_count()
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
import_module_from_path(execution_props["file_path"])
|
|
246
|
+
except Exception as e:
|
|
247
|
+
print(
|
|
248
|
+
f"Failed to import module {execution_props.get('file_path', 'N/A')}: {e}",
|
|
249
|
+
file=sys.stderr,
|
|
250
|
+
)
|
|
251
|
+
import traceback
|
|
252
|
+
|
|
253
|
+
# Attempt to notify server about this critical initialization error
|
|
254
|
+
# No work_id yet, so pass None.
|
|
255
|
+
# This assumes the server can handle an error report this early.
|
|
256
|
+
error_payload = create_worker_error_json(
|
|
257
|
+
work_id_str=None,
|
|
258
|
+
error=str(e),
|
|
259
|
+
traceback_str=traceback.format_exc(),
|
|
260
|
+
)
|
|
261
|
+
try:
|
|
262
|
+
send_message_to_conteiner_server(error_payload)
|
|
263
|
+
except Exception as send_e:
|
|
264
|
+
print(
|
|
265
|
+
f"Additionally, failed to send import error to server: {send_e}",
|
|
266
|
+
file=sys.stderr,
|
|
267
|
+
)
|
|
268
|
+
if sock:
|
|
269
|
+
close_connection_to_conteiner_server()
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
if threading.active_count() > num_of_threads:
|
|
273
|
+
send_worker_error_to_conteiner_server(
|
|
274
|
+
work_id_str=None,
|
|
275
|
+
error="New threads created during module import",
|
|
276
|
+
traceback_str="Thread count increased during module import, indicating potential resource leak or mismanagement. Keep in mind that the code should be inside a step function, not at the module level.",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
close_connection_to_conteiner_server()
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
# 2. Step Execution Loop
|
|
283
|
+
for _ in range(1000): # Safeguard
|
|
284
|
+
try:
|
|
285
|
+
work_item = get_work_item_from_conteiner_server()
|
|
286
|
+
except (ConnectionAbortedError, ValueError, json.JSONDecodeError) as e:
|
|
287
|
+
import traceback
|
|
288
|
+
|
|
289
|
+
send_worker_error_to_conteiner_server(
|
|
290
|
+
work_id_str=None,
|
|
291
|
+
error=str(e),
|
|
292
|
+
traceback_str=traceback.format_exc(),
|
|
293
|
+
)
|
|
294
|
+
close_connection_to_conteiner_server()
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
if (
|
|
298
|
+
isinstance(work_item, dict)
|
|
299
|
+
and work_item.get("type") == "NO_MORE_WORK_ITEMS"
|
|
300
|
+
):
|
|
301
|
+
close_connection_to_conteiner_server()
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
if (
|
|
305
|
+
not isinstance(work_item, dict)
|
|
306
|
+
or work_item.get("type") != "WORK_ITEM"
|
|
307
|
+
or "step_index" not in work_item
|
|
308
|
+
or "work_id" not in work_item
|
|
309
|
+
):
|
|
310
|
+
print(
|
|
311
|
+
f"Received unexpected or malformed message: {work_item}",
|
|
312
|
+
file=sys.stderr,
|
|
313
|
+
)
|
|
314
|
+
# Optionally, send an error to the server about the malformed message.
|
|
315
|
+
# For now, we'll try to get another message.
|
|
316
|
+
error_payload = create_worker_error_json(
|
|
317
|
+
work_id_str=(
|
|
318
|
+
work_item.get("work_id") if isinstance(work_item, dict) else None
|
|
319
|
+
),
|
|
320
|
+
error="Received malformed work item from server",
|
|
321
|
+
traceback_str=f"Message: {work_item}",
|
|
322
|
+
)
|
|
323
|
+
send_message_to_conteiner_server(error_payload)
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
current_work_id_str = work_item["work_id"] # Assumed present for WORK_ITEM type
|
|
327
|
+
original_stdout = sys.stdout # Store original stdout/stderr for restoration
|
|
328
|
+
original_stderr = sys.stderr
|
|
329
|
+
|
|
330
|
+
try:
|
|
331
|
+
step_index = work_item["step_index"]
|
|
332
|
+
|
|
333
|
+
if step_index == 0:
|
|
334
|
+
validate_step_sequence()
|
|
335
|
+
|
|
336
|
+
step_name = get_step_name_by_index(step_index)
|
|
337
|
+
step_callable = get_step_to_callable(step_name)
|
|
338
|
+
log_file = None
|
|
339
|
+
try:
|
|
340
|
+
if execution_props.get("logging_to_files", False):
|
|
341
|
+
# Ensure logs directory exists
|
|
342
|
+
if not os.path.exists("logs"):
|
|
343
|
+
os.makedirs("logs")
|
|
344
|
+
log_file_path = (
|
|
345
|
+
f"logs/{step_name}-{current_work_id_str or 'unknown'}.log"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
log_file = open(log_file_path, "a")
|
|
349
|
+
sys.stdout = log_file
|
|
350
|
+
sys.stderr = log_file
|
|
351
|
+
|
|
352
|
+
step_argument = work_item.get("argument")
|
|
353
|
+
if step_argument is None:
|
|
354
|
+
if get_mandatory_argcount(step_callable) > 0:
|
|
355
|
+
step_result = step_callable(None)
|
|
356
|
+
else:
|
|
357
|
+
step_result = step_callable()
|
|
358
|
+
else:
|
|
359
|
+
step_result = step_callable(step_argument)
|
|
360
|
+
sys.stdout.flush()
|
|
361
|
+
sys.stderr.flush()
|
|
362
|
+
finally:
|
|
363
|
+
sys.stdout = original_stdout
|
|
364
|
+
sys.stderr = original_stderr
|
|
365
|
+
if log_file:
|
|
366
|
+
log_file.close()
|
|
367
|
+
|
|
368
|
+
if threading.active_count() > num_of_threads:
|
|
369
|
+
create_worker_error_json(
|
|
370
|
+
work_id_str=current_work_id_str,
|
|
371
|
+
error="New threads created during step execution",
|
|
372
|
+
traceback_str="Thread count increased during step execution, indicating potential resource leak or mismanagement. Keep in mind that all threads should be properly managed and terminated within the step function.",
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
close_connection_to_conteiner_server()
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
next_step = get_next_step(step_name)
|
|
379
|
+
|
|
380
|
+
if next_step is not None:
|
|
381
|
+
send_mark_as_idle_to_conteiner_server()
|
|
382
|
+
|
|
383
|
+
if isinstance(step_result, list) and get_step_branching(step_name):
|
|
384
|
+
# Process in reverse if original logic intended to preserve order after potential server-side reordering
|
|
385
|
+
# The original code did step_result.reverse(), assuming it's for specific processing order.
|
|
386
|
+
# If the order of submitting branched items doesn't matter or is handled by server, reverse might not be needed.
|
|
387
|
+
# For now, keeping it consistent with the original logic.
|
|
388
|
+
items_to_send = list(
|
|
389
|
+
step_result
|
|
390
|
+
) # Create a copy if step_result could be modified elsewhere
|
|
391
|
+
items_to_send.reverse()
|
|
392
|
+
for item_arg in items_to_send:
|
|
393
|
+
next_work_item_json = create_work_item_json(
|
|
394
|
+
step_index=step_index
|
|
395
|
+
+ 1, # Assuming next_step maps to step_index + 1
|
|
396
|
+
argument=item_arg,
|
|
397
|
+
from_work_id=current_work_id_str,
|
|
398
|
+
)
|
|
399
|
+
send_message_to_conteiner_server(next_work_item_json)
|
|
400
|
+
else:
|
|
401
|
+
next_work_item_json = create_work_item_json(
|
|
402
|
+
step_index=step_index
|
|
403
|
+
+ 1, # Assuming next_step maps to step_index + 1
|
|
404
|
+
argument=step_result,
|
|
405
|
+
from_work_id=current_work_id_str,
|
|
406
|
+
)
|
|
407
|
+
send_message_to_conteiner_server(next_work_item_json)
|
|
408
|
+
else:
|
|
409
|
+
result_payload_json = create_worker_publish_result_json(
|
|
410
|
+
work_id_str=current_work_id_str, result=step_result
|
|
411
|
+
)
|
|
412
|
+
send_message_to_conteiner_server(result_payload_json)
|
|
413
|
+
|
|
414
|
+
except Exception as e:
|
|
415
|
+
# Ensure stdout/stderr are restored if an error occurred mid-step processing
|
|
416
|
+
if sys.stdout != original_stdout:
|
|
417
|
+
sys.stdout = original_stdout
|
|
418
|
+
if sys.stderr != original_stderr:
|
|
419
|
+
sys.stderr = original_stderr
|
|
420
|
+
|
|
421
|
+
import traceback
|
|
422
|
+
|
|
423
|
+
error_payload_json = create_worker_error_json(
|
|
424
|
+
work_id_str=current_work_id_str,
|
|
425
|
+
error=str(e),
|
|
426
|
+
traceback_str=traceback.format_exc(),
|
|
427
|
+
)
|
|
428
|
+
send_message_to_conteiner_server(error_payload_json)
|
|
429
|
+
# Continue loop to await next work item or exit signal
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
if __name__ == "__main__":
|
|
433
|
+
try:
|
|
434
|
+
execute_steps()
|
|
435
|
+
except Exception as e:
|
|
436
|
+
# Catch-all for any unhandled exceptions during worker setup or main loop
|
|
437
|
+
print(f"Unhandled exception in worker (worker_id: {my_id}): {e}", file=sys.stderr)
|
|
438
|
+
import traceback
|
|
439
|
+
|
|
440
|
+
# Attempt to notify the server about a critical failure if connection is still possible
|
|
441
|
+
try:
|
|
442
|
+
# work_id might not be available or relevant here, send None
|
|
443
|
+
send_worker_error_to_conteiner_server(
|
|
444
|
+
work_id_str=None, error=str(e), traceback_str=traceback.format_exc()
|
|
445
|
+
)
|
|
446
|
+
except Exception as final_e:
|
|
447
|
+
traceback.print_exc(file=sys.stderr)
|
|
448
|
+
print(
|
|
449
|
+
f"Failed to send final thread error to server: {final_e}",
|
|
450
|
+
file=sys.stderr,
|
|
451
|
+
)
|
|
452
|
+
finally:
|
|
453
|
+
close_connection_to_conteiner_server()
|