ocean-runner 0.2.12__tar.gz → 0.2.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/PKG-INFO +84 -51
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/README.md +80 -49
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/ocean_runner/__init__.py +1 -1
- ocean_runner-0.2.19/ocean_runner/config.py +71 -0
- ocean_runner-0.2.19/ocean_runner/runner.py +181 -0
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/pyproject.toml +14 -2
- ocean_runner-0.2.12/ocean_runner/config.py +0 -60
- ocean_runner-0.2.12/ocean_runner/runner.py +0 -143
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/.gitignore +0 -0
- {ocean_runner-0.2.12 → ocean_runner-0.2.19}/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ocean-runner
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.19
|
|
4
4
|
Summary: A fluent API for OceanProtocol algorithms
|
|
5
5
|
Project-URL: Homepage, https://github.com/AgrospAI/ocean-runner
|
|
6
6
|
Project-URL: Issues, https://github.com/AgrospAI/ocean-runner/issues
|
|
@@ -17,13 +17,15 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
|
18
18
|
Classifier: Programming Language :: Python :: 3
|
|
19
19
|
Requires-Python: >=3.10
|
|
20
|
-
Requires-Dist: oceanprotocol-job-details>=0.2.
|
|
20
|
+
Requires-Dist: oceanprotocol-job-details>=0.2.8
|
|
21
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
22
|
+
Requires-Dist: pydantic>=2.12.5
|
|
21
23
|
Requires-Dist: pytest>=8.4.2
|
|
22
24
|
Description-Content-Type: text/markdown
|
|
23
25
|
|
|
24
26
|
# ocean-runner
|
|
25
27
|
|
|
26
|
-
Ocean Runner is a package that
|
|
28
|
+
Ocean Runner is a package that eases algorithm creation in the scope of OceanProtocol.
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
## Installation
|
|
@@ -40,33 +42,40 @@ uv add ocean-runner
|
|
|
40
42
|
|
|
41
43
|
```python
|
|
42
44
|
import random
|
|
43
|
-
from ocean_runner import Algorithm
|
|
45
|
+
from ocean_runner import Algorithm
|
|
46
|
+
|
|
47
|
+
algorithm = Algorithm()
|
|
48
|
+
|
|
44
49
|
|
|
50
|
+
@algorithm.run
|
|
51
|
+
def run():
|
|
52
|
+
return random.randint()
|
|
45
53
|
|
|
46
|
-
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
algorithm()
|
|
47
57
|
```
|
|
48
58
|
|
|
49
|
-
|
|
59
|
+
This code snippet will:
|
|
50
60
|
|
|
51
|
-
- Read the OceanProtocol JobDetails from the environment variables and use default file paths.
|
|
52
|
-
-
|
|
53
|
-
-
|
|
61
|
+
- Read the OceanProtocol JobDetails from the environment variables and use default configuration file paths.
|
|
62
|
+
- Execute the run function.
|
|
63
|
+
- Execute the default saving function, storing the result in a "result.txt" file within the default outputs path.
|
|
54
64
|
|
|
55
65
|
### Tuning
|
|
56
66
|
|
|
57
67
|
#### Application Config
|
|
58
68
|
|
|
59
|
-
The application configuration can be tweaked by passing a Config instance to its
|
|
69
|
+
The application configuration can be tweaked by passing a Config instance to its constructor.
|
|
60
70
|
|
|
61
71
|
```python
|
|
62
|
-
Algorithm
|
|
72
|
+
from ocean_runner import Algorithm, Config
|
|
73
|
+
|
|
74
|
+
algorithm = Algorithm(
|
|
63
75
|
Config(
|
|
64
76
|
custom_input: ... # dataclass
|
|
65
77
|
# Custom algorithm parameters dataclass.
|
|
66
78
|
|
|
67
|
-
error_callback: ... # Callable[[Exception], None]
|
|
68
|
-
# Callback to run on exceptions.
|
|
69
|
-
|
|
70
79
|
logger: ... # type: logging.Logger
|
|
71
80
|
# Custom logger to use.
|
|
72
81
|
|
|
@@ -82,6 +91,8 @@ Algorithm(
|
|
|
82
91
|
```python
|
|
83
92
|
import logging
|
|
84
93
|
|
|
94
|
+
from ocean_runner import Algorithm, Config
|
|
95
|
+
|
|
85
96
|
|
|
86
97
|
@dataclass
|
|
87
98
|
class CustomInput:
|
|
@@ -91,19 +102,13 @@ class CustomInput:
|
|
|
91
102
|
logger = logging.getLogger(__name__)
|
|
92
103
|
|
|
93
104
|
|
|
94
|
-
Algorithm(
|
|
105
|
+
algorithm = Algorithm(
|
|
95
106
|
Config(
|
|
96
107
|
custom_input: CustomInput,
|
|
97
108
|
"""
|
|
98
109
|
Load the Algorithm's Custom Input into a CustomInput dataclass instance.
|
|
99
110
|
"""
|
|
100
111
|
|
|
101
|
-
error_callback: lambda ex: logger.exception(ex),
|
|
102
|
-
"""
|
|
103
|
-
Run this callback when an exception is caught
|
|
104
|
-
NOTE: it's not recommended to catch exceptions this way. Should re-raise and halt the execution.
|
|
105
|
-
"""
|
|
106
|
-
|
|
107
112
|
source_paths: [Path("/algorithm/src")],
|
|
108
113
|
"""
|
|
109
114
|
Source paths to include in the PATH. '/algorithm/src' is the default since our templates place the algorithm source files there.
|
|
@@ -143,44 +148,72 @@ Algorithm(
|
|
|
143
148
|
|
|
144
149
|
```
|
|
145
150
|
|
|
146
|
-
|
|
151
|
+
#### Behaviour Config
|
|
147
152
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
|
|
153
|
+
To fully configure the behaviour of the algorithm as in the [Minimal Example](#minimal-example), you can do it decorating your defined function as in the following example, which features all the possible algorithm customization.
|
|
151
154
|
|
|
152
155
|
```python
|
|
156
|
+
from pathlib import Path
|
|
153
157
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
"""
|
|
158
|
-
Default constructor, will use default values of Config.
|
|
159
|
-
"""
|
|
160
|
-
|
|
161
|
-
.validate()
|
|
162
|
-
|
|
163
|
-
"""
|
|
164
|
-
Will validate the algorithm's job detail instance, checking for the existence of:
|
|
165
|
-
- `job_details.ddos`
|
|
166
|
-
- `job_details.files`
|
|
167
|
-
"""
|
|
158
|
+
import pandas as pd
|
|
159
|
+
from ocean_runner import Algorithm
|
|
168
160
|
|
|
169
|
-
|
|
161
|
+
algorithm = Algorithm()
|
|
170
162
|
|
|
171
|
-
"""
|
|
172
|
-
Has NO default implementation, must pass a callback that returns a result of any type.
|
|
173
|
-
"""
|
|
174
163
|
|
|
175
|
-
|
|
164
|
+
@algorithm.on_error
|
|
165
|
+
def error_callback(ex: Exception):
|
|
166
|
+
algorithm.logger.exception(ex)
|
|
167
|
+
raise algorithm.Error() from ex
|
|
176
168
|
|
|
177
|
-
"""
|
|
178
|
-
Stores the result of running the algorithm in "outputs/results.txt"
|
|
179
|
-
"""
|
|
180
169
|
|
|
181
|
-
|
|
170
|
+
@algorithm.validate
|
|
171
|
+
def val():
|
|
172
|
+
assert algorithm.job_details.files, "Empty input dir"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@algorithm.run
|
|
176
|
+
def run() -> pd.DataFrame:
|
|
177
|
+
_, filename = next(algorithm.job_details.next_path())
|
|
178
|
+
return pd.read_csv(filename).describe(include="all")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@algorithm.save_results
|
|
182
|
+
def save(results: pd.DataFrame, path: Path):
|
|
183
|
+
algorithm.logger.info(f"Descriptive statistics: {results}")
|
|
184
|
+
results.to_csv(path / "results.csv")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if __name__ == "__main__":
|
|
188
|
+
algorithm()
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
### Default implementations
|
|
194
|
+
|
|
195
|
+
As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
.validate()
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
Will validate the algorithm's job detail instance, checking for the existence of:
|
|
202
|
+
- `job_details.ddos`
|
|
203
|
+
- `job_details.files`
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
.run()
|
|
207
|
+
|
|
208
|
+
"""
|
|
209
|
+
Has NO default implementation, must pass a callback that returns a result of any type.
|
|
210
|
+
"""
|
|
182
211
|
|
|
212
|
+
.save_results()
|
|
183
213
|
|
|
214
|
+
"""
|
|
215
|
+
Stores the result of running the algorithm in "outputs/results.txt"
|
|
216
|
+
"""
|
|
184
217
|
```
|
|
185
218
|
|
|
186
219
|
### Job Details
|
|
@@ -188,7 +221,7 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
|
|
|
188
221
|
To load the OceanProtocol JobDetails instance, the program will read some environment variables, they can be mocked passing an instance of `Environment` through the configuration of the algorithm.
|
|
189
222
|
|
|
190
223
|
Environment variables:
|
|
191
|
-
- `DIDS` Input dataset(s) DID's, must have format: `["abc..90"]`
|
|
192
|
-
- `TRANSFORMATION_DID` Algorithm DID, must have format: `abc..90
|
|
193
|
-
- `SECRET` Algorithm secret.
|
|
224
|
+
- `DIDS` (optional) Input dataset(s) DID's, must have format: `["abc..90"]`. Defaults to reading them automatically from the `DDO` data directory.
|
|
225
|
+
- `TRANSFORMATION_DID` (optional, default="DEFAULT"): Algorithm DID, must have format: `abc..90`.
|
|
226
|
+
- `SECRET` (optional, default="DEFAULT"): Algorithm secret.
|
|
194
227
|
- `BASE_DIR` (optional, default="/data"): Base path to the OceanProtocol data directories.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ocean-runner
|
|
2
2
|
|
|
3
|
-
Ocean Runner is a package that
|
|
3
|
+
Ocean Runner is a package that eases algorithm creation in the scope of OceanProtocol.
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
## Installation
|
|
@@ -17,33 +17,40 @@ uv add ocean-runner
|
|
|
17
17
|
|
|
18
18
|
```python
|
|
19
19
|
import random
|
|
20
|
-
from ocean_runner import Algorithm
|
|
20
|
+
from ocean_runner import Algorithm
|
|
21
|
+
|
|
22
|
+
algorithm = Algorithm()
|
|
23
|
+
|
|
21
24
|
|
|
25
|
+
@algorithm.run
|
|
26
|
+
def run():
|
|
27
|
+
return random.randint()
|
|
22
28
|
|
|
23
|
-
|
|
29
|
+
|
|
30
|
+
if __name__ == "__main__":
|
|
31
|
+
algorithm()
|
|
24
32
|
```
|
|
25
33
|
|
|
26
|
-
|
|
34
|
+
This code snippet will:
|
|
27
35
|
|
|
28
|
-
- Read the OceanProtocol JobDetails from the environment variables and use default file paths.
|
|
29
|
-
-
|
|
30
|
-
-
|
|
36
|
+
- Read the OceanProtocol JobDetails from the environment variables and use default configuration file paths.
|
|
37
|
+
- Execute the run function.
|
|
38
|
+
- Execute the default saving function, storing the result in a "result.txt" file within the default outputs path.
|
|
31
39
|
|
|
32
40
|
### Tuning
|
|
33
41
|
|
|
34
42
|
#### Application Config
|
|
35
43
|
|
|
36
|
-
The application configuration can be tweaked by passing a Config instance to its
|
|
44
|
+
The application configuration can be tweaked by passing a Config instance to its constructor.
|
|
37
45
|
|
|
38
46
|
```python
|
|
39
|
-
Algorithm
|
|
47
|
+
from ocean_runner import Algorithm, Config
|
|
48
|
+
|
|
49
|
+
algorithm = Algorithm(
|
|
40
50
|
Config(
|
|
41
51
|
custom_input: ... # dataclass
|
|
42
52
|
# Custom algorithm parameters dataclass.
|
|
43
53
|
|
|
44
|
-
error_callback: ... # Callable[[Exception], None]
|
|
45
|
-
# Callback to run on exceptions.
|
|
46
|
-
|
|
47
54
|
logger: ... # type: logging.Logger
|
|
48
55
|
# Custom logger to use.
|
|
49
56
|
|
|
@@ -59,6 +66,8 @@ Algorithm(
|
|
|
59
66
|
```python
|
|
60
67
|
import logging
|
|
61
68
|
|
|
69
|
+
from ocean_runner import Algorithm, Config
|
|
70
|
+
|
|
62
71
|
|
|
63
72
|
@dataclass
|
|
64
73
|
class CustomInput:
|
|
@@ -68,19 +77,13 @@ class CustomInput:
|
|
|
68
77
|
logger = logging.getLogger(__name__)
|
|
69
78
|
|
|
70
79
|
|
|
71
|
-
Algorithm(
|
|
80
|
+
algorithm = Algorithm(
|
|
72
81
|
Config(
|
|
73
82
|
custom_input: CustomInput,
|
|
74
83
|
"""
|
|
75
84
|
Load the Algorithm's Custom Input into a CustomInput dataclass instance.
|
|
76
85
|
"""
|
|
77
86
|
|
|
78
|
-
error_callback: lambda ex: logger.exception(ex),
|
|
79
|
-
"""
|
|
80
|
-
Run this callback when an exception is caught
|
|
81
|
-
NOTE: it's not recommended to catch exceptions this way. Should re-raise and halt the execution.
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
87
|
source_paths: [Path("/algorithm/src")],
|
|
85
88
|
"""
|
|
86
89
|
Source paths to include in the PATH. '/algorithm/src' is the default since our templates place the algorithm source files there.
|
|
@@ -120,44 +123,72 @@ Algorithm(
|
|
|
120
123
|
|
|
121
124
|
```
|
|
122
125
|
|
|
123
|
-
|
|
126
|
+
#### Behaviour Config
|
|
124
127
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
|
|
128
|
+
To fully configure the behaviour of the algorithm as in the [Minimal Example](#minimal-example), you can do it decorating your defined function as in the following example, which features all the possible algorithm customization.
|
|
128
129
|
|
|
129
130
|
```python
|
|
131
|
+
from pathlib import Path
|
|
130
132
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"""
|
|
135
|
-
Default constructor, will use default values of Config.
|
|
136
|
-
"""
|
|
137
|
-
|
|
138
|
-
.validate()
|
|
139
|
-
|
|
140
|
-
"""
|
|
141
|
-
Will validate the algorithm's job detail instance, checking for the existence of:
|
|
142
|
-
- `job_details.ddos`
|
|
143
|
-
- `job_details.files`
|
|
144
|
-
"""
|
|
133
|
+
import pandas as pd
|
|
134
|
+
from ocean_runner import Algorithm
|
|
145
135
|
|
|
146
|
-
|
|
136
|
+
algorithm = Algorithm()
|
|
147
137
|
|
|
148
|
-
"""
|
|
149
|
-
Has NO default implementation, must pass a callback that returns a result of any type.
|
|
150
|
-
"""
|
|
151
138
|
|
|
152
|
-
|
|
139
|
+
@algorithm.on_error
|
|
140
|
+
def error_callback(ex: Exception):
|
|
141
|
+
algorithm.logger.exception(ex)
|
|
142
|
+
raise algorithm.Error() from ex
|
|
153
143
|
|
|
154
|
-
"""
|
|
155
|
-
Stores the result of running the algorithm in "outputs/results.txt"
|
|
156
|
-
"""
|
|
157
144
|
|
|
158
|
-
|
|
145
|
+
@algorithm.validate
|
|
146
|
+
def val():
|
|
147
|
+
assert algorithm.job_details.files, "Empty input dir"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@algorithm.run
|
|
151
|
+
def run() -> pd.DataFrame:
|
|
152
|
+
_, filename = next(algorithm.job_details.next_path())
|
|
153
|
+
return pd.read_csv(filename).describe(include="all")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@algorithm.save_results
|
|
157
|
+
def save(results: pd.DataFrame, path: Path):
|
|
158
|
+
algorithm.logger.info(f"Descriptive statistics: {results}")
|
|
159
|
+
results.to_csv(path / "results.csv")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
if __name__ == "__main__":
|
|
163
|
+
algorithm()
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
### Default implementations
|
|
169
|
+
|
|
170
|
+
As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
.validate()
|
|
174
|
+
|
|
175
|
+
"""
|
|
176
|
+
Will validate the algorithm's job detail instance, checking for the existence of:
|
|
177
|
+
- `job_details.ddos`
|
|
178
|
+
- `job_details.files`
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
.run()
|
|
182
|
+
|
|
183
|
+
"""
|
|
184
|
+
Has NO default implementation, must pass a callback that returns a result of any type.
|
|
185
|
+
"""
|
|
159
186
|
|
|
187
|
+
.save_results()
|
|
160
188
|
|
|
189
|
+
"""
|
|
190
|
+
Stores the result of running the algorithm in "outputs/results.txt"
|
|
191
|
+
"""
|
|
161
192
|
```
|
|
162
193
|
|
|
163
194
|
### Job Details
|
|
@@ -165,7 +196,7 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
|
|
|
165
196
|
To load the OceanProtocol JobDetails instance, the program will read some environment variables, they can be mocked passing an instance of `Environment` through the configuration of the algorithm.
|
|
166
197
|
|
|
167
198
|
Environment variables:
|
|
168
|
-
- `DIDS` Input dataset(s) DID's, must have format: `["abc..90"]`
|
|
169
|
-
- `TRANSFORMATION_DID` Algorithm DID, must have format: `abc..90
|
|
170
|
-
- `SECRET` Algorithm secret.
|
|
199
|
+
- `DIDS` (optional) Input dataset(s) DID's, must have format: `["abc..90"]`. Defaults to reading them automatically from the `DDO` data directory.
|
|
200
|
+
- `TRANSFORMATION_DID` (optional, default="DEFAULT"): Algorithm DID, must have format: `abc..90`.
|
|
201
|
+
- `SECRET` (optional, default="DEFAULT"): Algorithm secret.
|
|
171
202
|
- `BASE_DIR` (optional, default="/data"): Base path to the OceanProtocol data directories.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from enum import StrEnum, auto
|
|
2
|
+
from logging import Logger
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Generic, Sequence, TypeVar
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
+
from pydantic_settings import BaseSettings
|
|
8
|
+
|
|
9
|
+
InputT = TypeVar("InputT")
|
|
10
|
+
|
|
11
|
+
DEFAULT = "DEFAULT"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Keys(StrEnum):
|
|
15
|
+
SECRET = auto()
|
|
16
|
+
BASE_DIR = auto()
|
|
17
|
+
TRANSFORMATION_DID = auto()
|
|
18
|
+
DIDS = auto()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Environment(BaseSettings):
|
|
22
|
+
"""Environment configuration loaded from environment variables"""
|
|
23
|
+
|
|
24
|
+
base_dir: str | Path | None = Field(
|
|
25
|
+
default_factory=lambda: Path("/data"),
|
|
26
|
+
validation_alias=Keys.BASE_DIR.value,
|
|
27
|
+
description="Base data directory, defaults to '/data'",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
dids: str | list[Path] | None = Field(
|
|
31
|
+
default=None,
|
|
32
|
+
validation_alias=Keys.DIDS.value,
|
|
33
|
+
description='Datasets DID\'s, format: ["XXXX"]',
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
transformation_did: str = Field(
|
|
37
|
+
default=DEFAULT,
|
|
38
|
+
validation_alias=Keys.TRANSFORMATION_DID.value,
|
|
39
|
+
description="Transformation (algorithm) DID",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
secret: str = Field(
|
|
43
|
+
default=DEFAULT,
|
|
44
|
+
validation_alias=Keys.SECRET.value,
|
|
45
|
+
description="Super secret secret",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Config(BaseModel, Generic[InputT]):
|
|
50
|
+
"""Algorithm overall configuration"""
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
53
|
+
|
|
54
|
+
custom_input: InputT | None = Field(
|
|
55
|
+
default=None,
|
|
56
|
+
description="Algorithm's custom input types, must be a dataclass_json",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
logger: Logger | None = Field(
|
|
60
|
+
default=None,
|
|
61
|
+
description="Logger to use in the algorithm",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
source_paths: Sequence[Path] = Field(
|
|
65
|
+
default_factory=lambda: [Path("/algorithm/src")],
|
|
66
|
+
description="Paths that should be included so the code executes correctly",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
environment: Environment = Field(
|
|
70
|
+
default_factory=Environment, description="Environment configuration"
|
|
71
|
+
)
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import InitVar, asdict, dataclass, field
|
|
4
|
+
from logging import Logger
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, Generic, TypeVar
|
|
7
|
+
|
|
8
|
+
from oceanprotocol_job_details import JobDetails # type: ignore
|
|
9
|
+
|
|
10
|
+
from ocean_runner.config import Config
|
|
11
|
+
|
|
12
|
+
InputT = TypeVar("InputT")
|
|
13
|
+
ResultT = TypeVar("ResultT")
|
|
14
|
+
|
|
15
|
+
ValidateFuncT = Callable[["Algorithm"], None]
|
|
16
|
+
RunFuncT = Callable[["Algorithm"], ResultT] | None
|
|
17
|
+
SaveFuncT = Callable[["Algorithm", ResultT, Path], None]
|
|
18
|
+
ErrorFuncT = Callable[["Algorithm", Exception], None]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def default_error_callback(algorithm: Algorithm, e: Exception) -> None:
|
|
22
|
+
algorithm.logger.exception("Error during algorithm execution")
|
|
23
|
+
raise e
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def default_validation(algorithm: Algorithm) -> None:
|
|
27
|
+
algorithm.logger.info("Validating input using default validation")
|
|
28
|
+
assert algorithm.job_details.ddos, "DDOs missing"
|
|
29
|
+
assert algorithm.job_details.files, "Files missing"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def default_save(algorithm: Algorithm, result: ResultT, base: Path) -> None:
|
|
33
|
+
algorithm.logger.info("Saving results using default save")
|
|
34
|
+
with open(base / "result.txt", "w+") as f:
|
|
35
|
+
f.write(str(result))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class Algorithm(Generic[InputT, ResultT]):
|
|
40
|
+
"""
|
|
41
|
+
A configurable algorithm runner that behaves like a FastAPI app:
|
|
42
|
+
- You register `validate`, `run`, and `save_results` via decorators.
|
|
43
|
+
- You execute the full pipeline by calling `app()`.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
config: InitVar[Config[InputT] | None] = field(default=None)
|
|
47
|
+
logger: Logger = field(init=False)
|
|
48
|
+
_job_details: JobDetails[InputT] = field(init=False)
|
|
49
|
+
_result: ResultT | None = field(default=None, init=False)
|
|
50
|
+
|
|
51
|
+
# Decorator-registered callbacks
|
|
52
|
+
_validate_fn: ValidateFuncT = field(
|
|
53
|
+
default=default_validation,
|
|
54
|
+
init=False,
|
|
55
|
+
repr=False,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
_run_fn: RunFuncT = field(
|
|
59
|
+
default=None,
|
|
60
|
+
init=False,
|
|
61
|
+
repr=False,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
_save_fn: SaveFuncT = field(
|
|
65
|
+
default=default_save,
|
|
66
|
+
init=False,
|
|
67
|
+
repr=False,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
_error_callback: ErrorFuncT = field(
|
|
71
|
+
default=default_error_callback,
|
|
72
|
+
init=False,
|
|
73
|
+
repr=False,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def __post_init__(self, config: Config[InputT] | None) -> None:
|
|
77
|
+
configuration = config or Config()
|
|
78
|
+
|
|
79
|
+
# Configure logger
|
|
80
|
+
if configuration.logger:
|
|
81
|
+
self.logger = configuration.logger
|
|
82
|
+
else:
|
|
83
|
+
import logging
|
|
84
|
+
|
|
85
|
+
logging.basicConfig(
|
|
86
|
+
level=logging.DEBUG,
|
|
87
|
+
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
|
|
88
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
89
|
+
)
|
|
90
|
+
self.logger = logging.getLogger(__name__)
|
|
91
|
+
|
|
92
|
+
# Normalize base_dir
|
|
93
|
+
if isinstance(configuration.environment.base_dir, str):
|
|
94
|
+
configuration.environment.base_dir = Path(
|
|
95
|
+
configuration.environment.base_dir
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Extend sys.path for custom imports
|
|
99
|
+
if configuration.source_paths:
|
|
100
|
+
import sys
|
|
101
|
+
|
|
102
|
+
sys.path.extend(
|
|
103
|
+
[str(path.absolute()) for path in configuration.source_paths]
|
|
104
|
+
)
|
|
105
|
+
self.logger.debug(
|
|
106
|
+
f"Added [{len(configuration.source_paths)}] entries to PATH"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self.configuration = configuration
|
|
110
|
+
|
|
111
|
+
class Error(RuntimeError): ...
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def job_details(self) -> JobDetails:
|
|
115
|
+
if not self._job_details:
|
|
116
|
+
raise Algorithm.Error("JobDetails not initialized or missing")
|
|
117
|
+
return self._job_details
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def result(self) -> ResultT:
|
|
121
|
+
if self._result is None:
|
|
122
|
+
raise Algorithm.Error("Result missing, run the algorithm first")
|
|
123
|
+
return self._result
|
|
124
|
+
|
|
125
|
+
# ---------------------------
|
|
126
|
+
# Decorators (FastAPI-style)
|
|
127
|
+
# ---------------------------
|
|
128
|
+
|
|
129
|
+
def validate(self, fn: ValidateFuncT) -> ValidateFuncT:
|
|
130
|
+
self._validate_fn = fn
|
|
131
|
+
return fn
|
|
132
|
+
|
|
133
|
+
def run(self, fn: RunFuncT) -> RunFuncT:
|
|
134
|
+
self._run_fn = fn
|
|
135
|
+
return fn
|
|
136
|
+
|
|
137
|
+
def save_results(self, fn: SaveFuncT) -> SaveFuncT:
|
|
138
|
+
self._save_fn = fn
|
|
139
|
+
return fn
|
|
140
|
+
|
|
141
|
+
def on_error(self, fn: ErrorFuncT) -> ErrorFuncT:
|
|
142
|
+
self._error_callback = fn
|
|
143
|
+
return fn
|
|
144
|
+
|
|
145
|
+
# ---------------------------
|
|
146
|
+
# Execution Pipeline
|
|
147
|
+
# ---------------------------
|
|
148
|
+
|
|
149
|
+
def __call__(self) -> ResultT | None:
|
|
150
|
+
"""Executes the algorithm pipeline: validate → run → save_results."""
|
|
151
|
+
# Load job details
|
|
152
|
+
self._job_details = JobDetails.load(
|
|
153
|
+
_type=self.configuration.custom_input,
|
|
154
|
+
base_dir=self.configuration.environment.base_dir,
|
|
155
|
+
dids=self.configuration.environment.dids,
|
|
156
|
+
transformation_did=self.configuration.environment.transformation_did,
|
|
157
|
+
secret=self.configuration.environment.secret,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
self.logger.info("Loaded JobDetails")
|
|
161
|
+
self.logger.debug(asdict(self.job_details))
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# Validation step
|
|
165
|
+
self._validate_fn(self)
|
|
166
|
+
|
|
167
|
+
# Run step
|
|
168
|
+
if self._run_fn:
|
|
169
|
+
self.logger.info("Running algorithm...")
|
|
170
|
+
self._result = self._run_fn(self)
|
|
171
|
+
else:
|
|
172
|
+
self.logger.error("No run() function defined. Skipping execution.")
|
|
173
|
+
self._result = None
|
|
174
|
+
|
|
175
|
+
# Save step
|
|
176
|
+
self._save_fn(self, self._result, self.job_details.paths.outputs)
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self._error_callback(self, e)
|
|
180
|
+
|
|
181
|
+
return self._result
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ocean-runner"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.19"
|
|
4
4
|
description = "A fluent API for OceanProtocol algorithms"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "AgrospAI", email = "agrospai@udl.cat" },
|
|
@@ -15,7 +15,9 @@ classifiers = [
|
|
|
15
15
|
"License :: OSI Approved :: MIT License",
|
|
16
16
|
]
|
|
17
17
|
dependencies = [
|
|
18
|
-
"oceanprotocol-job-details>=0.2.
|
|
18
|
+
"oceanprotocol-job-details>=0.2.8",
|
|
19
|
+
"pydantic>=2.12.5",
|
|
20
|
+
"pydantic-settings>=2.12.0",
|
|
19
21
|
"pytest>=8.4.2",
|
|
20
22
|
]
|
|
21
23
|
|
|
@@ -24,14 +26,24 @@ Homepage = "https://github.com/AgrospAI/ocean-runner"
|
|
|
24
26
|
Issues = "https://github.com/AgrospAI/ocean-runner/issues"
|
|
25
27
|
|
|
26
28
|
[tool.pytest.ini_options]
|
|
29
|
+
log_level = "INFO"
|
|
30
|
+
log_cli = true
|
|
27
31
|
pythonpath = "ocean_runner"
|
|
28
32
|
|
|
29
33
|
[build-system]
|
|
30
34
|
requires = ["hatchling"]
|
|
31
35
|
build-backend = "hatchling.build"
|
|
32
36
|
|
|
37
|
+
[dependency-groups]
|
|
38
|
+
dev = [
|
|
39
|
+
"mypy>=1.19.1",
|
|
40
|
+
]
|
|
41
|
+
|
|
33
42
|
[tool.hatch.build.targets.sdist]
|
|
34
43
|
include = ["ocean_runner"]
|
|
35
44
|
|
|
36
45
|
[tool.hatch.build.targets.wheel]
|
|
37
46
|
include = ["ocean_runner"]
|
|
47
|
+
|
|
48
|
+
[tool.mypy]
|
|
49
|
+
plugins = ['pydantic.mypy']
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from dataclasses import asdict, dataclass, field
|
|
3
|
-
from logging import Logger
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Callable, Iterable, TypeVar
|
|
6
|
-
|
|
7
|
-
T = TypeVar("T")
|
|
8
|
-
|
|
9
|
-
DEFAULT = "DEFAULT"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class Environment:
|
|
14
|
-
"""Environment variables mock"""
|
|
15
|
-
|
|
16
|
-
base_dir: str | None = field(
|
|
17
|
-
default_factory=lambda: os.environ.get("BASE_DIR", None),
|
|
18
|
-
)
|
|
19
|
-
"""Base data directory, defaults to '/data'"""
|
|
20
|
-
|
|
21
|
-
dids: str = field(
|
|
22
|
-
default_factory=lambda: os.environ.get("DIDS", None),
|
|
23
|
-
)
|
|
24
|
-
"""Datasets DID's, format: '["XXXX"]'"""
|
|
25
|
-
|
|
26
|
-
transformation_did: str = field(
|
|
27
|
-
default_factory=lambda: os.environ.get("TRANSFORMATION_DID", DEFAULT),
|
|
28
|
-
)
|
|
29
|
-
"""Transformation (algorithm) DID"""
|
|
30
|
-
|
|
31
|
-
secret: str = field(
|
|
32
|
-
default_factory=lambda: os.environ.get("SECRET", DEFAULT),
|
|
33
|
-
)
|
|
34
|
-
"""Super secret secret"""
|
|
35
|
-
|
|
36
|
-
dict = asdict
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclass
|
|
40
|
-
class Config:
|
|
41
|
-
"""Algorithm overall configuration"""
|
|
42
|
-
|
|
43
|
-
custom_input: T | None = None
|
|
44
|
-
"""Algorithm's custom input types, must be a dataclass_json"""
|
|
45
|
-
|
|
46
|
-
error_callback: Callable[[Exception], None] = None
|
|
47
|
-
"""Callback to execute upon exceptions"""
|
|
48
|
-
|
|
49
|
-
logger: Logger | None = None
|
|
50
|
-
"""Logger to use in the algorithm"""
|
|
51
|
-
|
|
52
|
-
source_paths: Iterable[Path] = field(
|
|
53
|
-
default_factory=lambda: [Path("/algorithm/src")]
|
|
54
|
-
)
|
|
55
|
-
"""Paths that should be included so the code executes correctly"""
|
|
56
|
-
|
|
57
|
-
environment: Environment = field(
|
|
58
|
-
default_factory=lambda: Environment(),
|
|
59
|
-
)
|
|
60
|
-
"""Mock of environment data"""
|
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from dataclasses import InitVar, asdict, dataclass, field
|
|
4
|
-
from logging import Logger
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Callable, Generic, Self, TypeVar
|
|
7
|
-
|
|
8
|
-
from oceanprotocol_job_details import JobDetails
|
|
9
|
-
|
|
10
|
-
from ocean_runner.config import Config
|
|
11
|
-
|
|
12
|
-
JobDetailsT = TypeVar(
|
|
13
|
-
"JobDetailsT",
|
|
14
|
-
)
|
|
15
|
-
ResultT = TypeVar("ResultT")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def default_error_callback(_: Algorithm, e: Exception) -> None:
|
|
19
|
-
raise e
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def default_validation(algorithm: Algorithm) -> None:
|
|
23
|
-
algorithm.logger.info("Validating input using default validation")
|
|
24
|
-
|
|
25
|
-
assert algorithm.job_details.ddos, "DDOs missing"
|
|
26
|
-
assert algorithm.job_details.files, "Files missing"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def default_save(*, result: ResultT, base: Path, algorithm: Algorithm) -> None:
|
|
30
|
-
algorithm.logger.info("Saving results using default save")
|
|
31
|
-
|
|
32
|
-
with open(base / "result.txt", "w+") as f:
|
|
33
|
-
f.write(str(result))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass
|
|
37
|
-
class Algorithm(Generic[JobDetailsT, ResultT]):
|
|
38
|
-
|
|
39
|
-
config: InitVar[Config | None] = None
|
|
40
|
-
|
|
41
|
-
# Load from config
|
|
42
|
-
logger: Logger = field(init=False)
|
|
43
|
-
|
|
44
|
-
_job_details: JobDetails[JobDetailsT] = field(init=False)
|
|
45
|
-
_result: ResultT | None = field(default=None, init=False)
|
|
46
|
-
|
|
47
|
-
error_callback = default_error_callback
|
|
48
|
-
|
|
49
|
-
def __post_init__(self, config: Config | None) -> None:
|
|
50
|
-
config: Config = config or Config()
|
|
51
|
-
|
|
52
|
-
if config.error_callback:
|
|
53
|
-
self.error_callback = config.error_callback
|
|
54
|
-
|
|
55
|
-
if config.logger:
|
|
56
|
-
self.logger = config.logger
|
|
57
|
-
else:
|
|
58
|
-
import logging
|
|
59
|
-
|
|
60
|
-
logging.basicConfig(
|
|
61
|
-
level=logging.DEBUG,
|
|
62
|
-
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
|
|
63
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
self.logger = logging.getLogger("ocean_runner")
|
|
67
|
-
|
|
68
|
-
if isinstance(config.environment.base_dir, str):
|
|
69
|
-
config.environment.base_dir = Path(config.environment.base_dir)
|
|
70
|
-
|
|
71
|
-
if config.source_paths:
|
|
72
|
-
import sys
|
|
73
|
-
|
|
74
|
-
sys.path.extend([str(path.absolute()) for path in config.source_paths])
|
|
75
|
-
self.logger.debug(f"Added [{len(config.source_paths)}] entries to PATH")
|
|
76
|
-
|
|
77
|
-
self._job_details = JobDetails.load(
|
|
78
|
-
_type=config.custom_input,
|
|
79
|
-
base_dir=config.environment.base_dir,
|
|
80
|
-
dids=config.environment.dids,
|
|
81
|
-
transformation_did=config.environment.transformation_did,
|
|
82
|
-
secret=config.environment.secret,
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
self.logger.info("Loaded JobDetails")
|
|
86
|
-
self.logger.debug(asdict(self.job_details))
|
|
87
|
-
|
|
88
|
-
class Error(RuntimeError): ...
|
|
89
|
-
|
|
90
|
-
@property
|
|
91
|
-
def job_details(self) -> JobDetails:
|
|
92
|
-
if not self._job_details:
|
|
93
|
-
raise Algorithm.Error("JobDetails not initialized or missing")
|
|
94
|
-
return self._job_details
|
|
95
|
-
|
|
96
|
-
@property
|
|
97
|
-
def result(self) -> ResultT:
|
|
98
|
-
if self._result is None:
|
|
99
|
-
raise Algorithm.Error("Result missing, run the algorithm first")
|
|
100
|
-
return self._result
|
|
101
|
-
|
|
102
|
-
def validate(
|
|
103
|
-
self,
|
|
104
|
-
callback: Callable[[Self], None] = default_validation,
|
|
105
|
-
) -> Self:
|
|
106
|
-
self.logger.info("Validating instance...")
|
|
107
|
-
try:
|
|
108
|
-
callback(self)
|
|
109
|
-
except Exception as e:
|
|
110
|
-
self.error_callback(e)
|
|
111
|
-
|
|
112
|
-
return self
|
|
113
|
-
|
|
114
|
-
def run(
|
|
115
|
-
self,
|
|
116
|
-
callable: Callable[[Self], ResultT],
|
|
117
|
-
) -> Self:
|
|
118
|
-
self.logger.info("Running algorithm...")
|
|
119
|
-
try:
|
|
120
|
-
self._result = callable(self)
|
|
121
|
-
except Exception as e:
|
|
122
|
-
self.error_callback(e)
|
|
123
|
-
|
|
124
|
-
return self
|
|
125
|
-
|
|
126
|
-
def save_results(
|
|
127
|
-
self,
|
|
128
|
-
callable: Callable[[ResultT, Path, Algorithm], None] = default_save,
|
|
129
|
-
*,
|
|
130
|
-
override_path: Path | None = None,
|
|
131
|
-
) -> None:
|
|
132
|
-
self.logger.info("Saving results...")
|
|
133
|
-
try:
|
|
134
|
-
callable(
|
|
135
|
-
results=self.result,
|
|
136
|
-
base_path=override_path or self.job_details.paths.outputs,
|
|
137
|
-
algorithm=self,
|
|
138
|
-
)
|
|
139
|
-
except Exception as e:
|
|
140
|
-
self.error_callback(e)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
__all__ = [Algorithm]
|
|
File without changes
|
|
File without changes
|