ocean-runner 0.2.19__tar.gz → 0.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocean-runner
3
- Version: 0.2.19
3
+ Version: 0.2.25
4
4
  Summary: A fluent API for OceanProtocol algorithms
5
5
  Project-URL: Homepage, https://github.com/AgrospAI/ocean-runner
6
6
  Project-URL: Issues, https://github.com/AgrospAI/ocean-runner/issues
@@ -17,7 +17,8 @@ Classifier: License :: OSI Approved :: MIT License
17
17
  Classifier: Operating System :: OS Independent
18
18
  Classifier: Programming Language :: Python :: 3
19
19
  Requires-Python: >=3.10
20
- Requires-Dist: oceanprotocol-job-details>=0.2.8
20
+ Requires-Dist: aiofiles>=25.1.0
21
+ Requires-Dist: oceanprotocol-job-details>=0.3.11
21
22
  Requires-Dist: pydantic-settings>=2.12.0
22
23
  Requires-Dist: pydantic>=2.12.5
23
24
  Requires-Dist: pytest>=8.4.2
@@ -27,7 +28,6 @@ Description-Content-Type: text/markdown
27
28
 
28
29
  Ocean Runner is a package that eases algorithm creation in the scope of OceanProtocol.
29
30
 
30
-
31
31
  ## Installation
32
32
 
33
33
  ```bash
@@ -48,7 +48,7 @@ algorithm = Algorithm()
48
48
 
49
49
 
50
50
  @algorithm.run
51
- def run():
51
+ def run(_: Algorithm):
52
52
  return random.randint()
53
53
 
54
54
 
@@ -75,14 +75,14 @@ algorithm = Algorithm(
75
75
  Config(
76
76
  custom_input: ... # dataclass
77
77
  # Custom algorithm parameters dataclass.
78
-
78
+
79
79
  logger: ... # type: logging.Logger
80
80
  # Custom logger to use.
81
81
 
82
82
  source_paths: ... # type: Iterable[Path]
83
83
  # Source paths to include in the PATH
84
-
85
- environment: ...
84
+
85
+ environment: ...
86
86
  # type: ocean_runner.Environment. Mock of environment variables.
87
87
  )
88
88
  )
@@ -91,12 +91,12 @@ algorithm = Algorithm(
91
91
  ```python
92
92
  import logging
93
93
 
94
+ from pydantic import BaseModel
94
95
  from ocean_runner import Algorithm, Config
95
96
 
96
97
 
97
- @dataclass
98
- class CustomInput:
99
- foobar: string
98
+ class CustomInput(BaseModel):
99
+ foobar: string
100
100
 
101
101
 
102
102
  logger = logging.getLogger(__name__)
@@ -106,7 +106,7 @@ algorithm = Algorithm(
106
106
  Config(
107
107
  custom_input: CustomInput,
108
108
  """
109
- Load the Algorithm's Custom Input into a CustomInput dataclass instance.
109
+ Load the Algorithm's Custom Input into a CustomInput instance.
110
110
  """
111
111
 
112
112
  source_paths: [Path("/algorithm/src")],
@@ -162,34 +162,32 @@ algorithm = Algorithm()
162
162
 
163
163
 
164
164
  @algorithm.on_error
165
- def error_callback(ex: Exception):
165
+ def error_callback(algorithm: Algorithm, ex: Exception):
166
166
  algorithm.logger.exception(ex)
167
167
  raise algorithm.Error() from ex
168
168
 
169
169
 
170
170
  @algorithm.validate
171
- def val():
171
+ def val(algorithm: Algorithm):
172
172
  assert algorithm.job_details.files, "Empty input dir"
173
173
 
174
174
 
175
175
  @algorithm.run
176
- def run() -> pd.DataFrame:
177
- _, filename = next(algorithm.job_details.next_path())
176
+ def run(algorithm: Algorithm) -> pd.DataFrame:
177
+ _, filename = next(algorithm.job_details.inputs())
178
178
  return pd.read_csv(filename).describe(include="all")
179
179
 
180
180
 
181
181
  @algorithm.save_results
182
- def save(results: pd.DataFrame, path: Path):
183
- algorithm.logger.info(f"Descriptive statistics: {results}")
184
- results.to_csv(path / "results.csv")
182
+ def save(algorithm: Algorithm, result: pd.DataFrame, base: Path):
183
+ algorithm.logger.info(f"Descriptive statistics: {result}")
184
+ result.to_csv(base / "result.csv")
185
185
 
186
186
 
187
187
  if __name__ == "__main__":
188
188
  algorithm()
189
189
  ```
190
190
 
191
-
192
-
193
191
  ### Default implementations
194
192
 
195
193
  As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
@@ -205,7 +203,7 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
205
203
 
206
204
  .run()
207
205
 
208
- """
206
+ """
209
207
  Has NO default implementation, must pass a callback that returns a result of any type.
210
208
  """
211
209
 
@@ -221,7 +219,8 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
221
219
  To load the OceanProtocol JobDetails instance, the program will read some environment variables, they can be mocked passing an instance of `Environment` through the configuration of the algorithm.
222
220
 
223
221
  Environment variables:
222
+
224
223
  - `DIDS` (optional) Input dataset(s) DID's, must have format: `["abc..90"]`. Defaults to reading them automatically from the `DDO` data directory.
225
224
  - `TRANSFORMATION_DID` (optional, default="DEFAULT"): Algorithm DID, must have format: `abc..90`.
226
- - `SECRET` (optional, default="DEFAULT"): Algorithm secret.
225
+ - `SECRET` (optional, default="DEFAULT"): Algorithm secret.
227
226
  - `BASE_DIR` (optional, default="/data"): Base path to the OceanProtocol data directories.
@@ -2,7 +2,6 @@
2
2
 
3
3
  Ocean Runner is a package that eases algorithm creation in the scope of OceanProtocol.
4
4
 
5
-
6
5
  ## Installation
7
6
 
8
7
  ```bash
@@ -23,7 +22,7 @@ algorithm = Algorithm()
23
22
 
24
23
 
25
24
  @algorithm.run
26
- def run():
25
+ def run(_: Algorithm):
27
26
  return random.randint()
28
27
 
29
28
 
@@ -50,14 +49,14 @@ algorithm = Algorithm(
50
49
  Config(
51
50
  custom_input: ... # dataclass
52
51
  # Custom algorithm parameters dataclass.
53
-
52
+
54
53
  logger: ... # type: logging.Logger
55
54
  # Custom logger to use.
56
55
 
57
56
  source_paths: ... # type: Iterable[Path]
58
57
  # Source paths to include in the PATH
59
-
60
- environment: ...
58
+
59
+ environment: ...
61
60
  # type: ocean_runner.Environment. Mock of environment variables.
62
61
  )
63
62
  )
@@ -66,12 +65,12 @@ algorithm = Algorithm(
66
65
  ```python
67
66
  import logging
68
67
 
68
+ from pydantic import BaseModel
69
69
  from ocean_runner import Algorithm, Config
70
70
 
71
71
 
72
- @dataclass
73
- class CustomInput:
74
- foobar: string
72
+ class CustomInput(BaseModel):
73
+ foobar: string
75
74
 
76
75
 
77
76
  logger = logging.getLogger(__name__)
@@ -81,7 +80,7 @@ algorithm = Algorithm(
81
80
  Config(
82
81
  custom_input: CustomInput,
83
82
  """
84
- Load the Algorithm's Custom Input into a CustomInput dataclass instance.
83
+ Load the Algorithm's Custom Input into a CustomInput instance.
85
84
  """
86
85
 
87
86
  source_paths: [Path("/algorithm/src")],
@@ -137,34 +136,32 @@ algorithm = Algorithm()
137
136
 
138
137
 
139
138
  @algorithm.on_error
140
- def error_callback(ex: Exception):
139
+ def error_callback(algorithm: Algorithm, ex: Exception):
141
140
  algorithm.logger.exception(ex)
142
141
  raise algorithm.Error() from ex
143
142
 
144
143
 
145
144
  @algorithm.validate
146
- def val():
145
+ def val(algorithm: Algorithm):
147
146
  assert algorithm.job_details.files, "Empty input dir"
148
147
 
149
148
 
150
149
  @algorithm.run
151
- def run() -> pd.DataFrame:
152
- _, filename = next(algorithm.job_details.next_path())
150
+ def run(algorithm: Algorithm) -> pd.DataFrame:
151
+ _, filename = next(algorithm.job_details.inputs())
153
152
  return pd.read_csv(filename).describe(include="all")
154
153
 
155
154
 
156
155
  @algorithm.save_results
157
- def save(results: pd.DataFrame, path: Path):
158
- algorithm.logger.info(f"Descriptive statistics: {results}")
159
- results.to_csv(path / "results.csv")
156
+ def save(algorithm: Algorithm, result: pd.DataFrame, base: Path):
157
+ algorithm.logger.info(f"Descriptive statistics: {result}")
158
+ result.to_csv(base / "result.csv")
160
159
 
161
160
 
162
161
  if __name__ == "__main__":
163
162
  algorithm()
164
163
  ```
165
164
 
166
-
167
-
168
165
  ### Default implementations
169
166
 
170
167
  As seen in the minimal example, all methods implemented in `Algorithm` have a default implementation which will be commented here.
@@ -180,7 +177,7 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
180
177
 
181
178
  .run()
182
179
 
183
- """
180
+ """
184
181
  Has NO default implementation, must pass a callback that returns a result of any type.
185
182
  """
186
183
 
@@ -196,7 +193,8 @@ As seen in the minimal example, all methods implemented in `Algorithm` have a de
196
193
  To load the OceanProtocol JobDetails instance, the program will read some environment variables, they can be mocked passing an instance of `Environment` through the configuration of the algorithm.
197
194
 
198
195
  Environment variables:
196
+
199
197
  - `DIDS` (optional) Input dataset(s) DID's, must have format: `["abc..90"]`. Defaults to reading them automatically from the `DDO` data directory.
200
198
  - `TRANSFORMATION_DID` (optional, default="DEFAULT"): Algorithm DID, must have format: `abc..90`.
201
- - `SECRET` (optional, default="DEFAULT"): Algorithm secret.
199
+ - `SECRET` (optional, default="DEFAULT"): Algorithm secret.
202
200
  - `BASE_DIR` (optional, default="/data"): Base path to the OceanProtocol data directories.
@@ -1,12 +1,12 @@
1
1
  from enum import StrEnum, auto
2
2
  from logging import Logger
3
3
  from pathlib import Path
4
- from typing import Generic, Sequence, TypeVar
4
+ from typing import Generic, Sequence, Type, TypeVar
5
5
 
6
6
  from pydantic import BaseModel, ConfigDict, Field
7
7
  from pydantic_settings import BaseSettings
8
8
 
9
- InputT = TypeVar("InputT")
9
+ InputT = TypeVar("InputT", BaseModel, None)
10
10
 
11
11
  DEFAULT = "DEFAULT"
12
12
 
@@ -21,13 +21,13 @@ class Keys(StrEnum):
21
21
  class Environment(BaseSettings):
22
22
  """Environment configuration loaded from environment variables"""
23
23
 
24
- base_dir: str | Path | None = Field(
24
+ base_dir: str | Path = Field(
25
25
  default_factory=lambda: Path("/data"),
26
26
  validation_alias=Keys.BASE_DIR.value,
27
27
  description="Base data directory, defaults to '/data'",
28
28
  )
29
29
 
30
- dids: str | list[Path] | None = Field(
30
+ dids: str | None = Field(
31
31
  default=None,
32
32
  validation_alias=Keys.DIDS.value,
33
33
  description='Datasets DID\'s, format: ["XXXX"]',
@@ -51,7 +51,7 @@ class Config(BaseModel, Generic[InputT]):
51
51
 
52
52
  model_config = ConfigDict(arbitrary_types_allowed=True)
53
53
 
54
- custom_input: InputT | None = Field(
54
+ custom_input: Type[InputT] | None = Field(
55
55
  default=None,
56
56
  description="Algorithm's custom input types, must be a dataclass_json",
57
57
  )
File without changes
@@ -1,26 +1,30 @@
1
1
  from __future__ import annotations
2
2
 
3
- from dataclasses import InitVar, asdict, dataclass, field
3
+ from dataclasses import InitVar, dataclass, field
4
4
  from logging import Logger
5
5
  from pathlib import Path
6
- from typing import Callable, Generic, TypeVar
6
+ from typing import Awaitable, Callable, Dict, Generic, TypeAlias, TypeVar
7
7
 
8
- from oceanprotocol_job_details import JobDetails # type: ignore
8
+ from oceanprotocol_job_details import JobDetails, load_job_details, run_in_executor
9
+ from pydantic import BaseModel, JsonValue
9
10
 
10
11
  from ocean_runner.config import Config
11
12
 
12
- InputT = TypeVar("InputT")
13
+ InputT = TypeVar("InputT", BaseModel, None)
13
14
  ResultT = TypeVar("ResultT")
15
+ T = TypeVar("T")
14
16
 
15
- ValidateFuncT = Callable[["Algorithm"], None]
16
- RunFuncT = Callable[["Algorithm"], ResultT] | None
17
- SaveFuncT = Callable[["Algorithm", ResultT, Path], None]
18
- ErrorFuncT = Callable[["Algorithm", Exception], None]
19
17
 
18
+ Algo: TypeAlias = "Algorithm[InputT, ResultT]"
19
+ ValidateFuncT: TypeAlias = Callable[[Algo], None | Awaitable[None] | None]
20
+ RunFuncT: TypeAlias = Callable[[Algo], ResultT | Awaitable[ResultT]]
21
+ SaveFuncT: TypeAlias = Callable[[Algo, ResultT, Path], Awaitable[None] | None]
22
+ ErrorFuncT: TypeAlias = Callable[[Algo, Exception], Awaitable[None] | None]
20
23
 
21
- def default_error_callback(algorithm: Algorithm, e: Exception) -> None:
24
+
25
+ def default_error_callback(algorithm: Algorithm, error: Exception) -> None:
22
26
  algorithm.logger.exception("Error during algorithm execution")
23
- raise e
27
+ raise error
24
28
 
25
29
 
26
30
  def default_validation(algorithm: Algorithm) -> None:
@@ -29,10 +33,20 @@ def default_validation(algorithm: Algorithm) -> None:
29
33
  assert algorithm.job_details.files, "Files missing"
30
34
 
31
35
 
32
- def default_save(algorithm: Algorithm, result: ResultT, base: Path) -> None:
36
+ async def default_save(algorithm: Algorithm, result: ResultT, base: Path) -> None:
37
+ import aiofiles
38
+
33
39
  algorithm.logger.info("Saving results using default save")
34
- with open(base / "result.txt", "w+") as f:
35
- f.write(str(result))
40
+ async with aiofiles.open(base / "result.txt", "w+") as f:
41
+ await f.write(str(result))
42
+
43
+
44
+ @dataclass(slots=True)
45
+ class Functions(Generic[InputT, ResultT]):
46
+ validate: ValidateFuncT = field(default=default_validation, init=False)
47
+ run: RunFuncT | None = field(default=None, init=False)
48
+ save: SaveFuncT = field(default=default_save, init=False)
49
+ error: ErrorFuncT = field(default=default_error_callback, init=False)
36
50
 
37
51
 
38
52
  @dataclass
@@ -44,33 +58,13 @@ class Algorithm(Generic[InputT, ResultT]):
44
58
  """
45
59
 
46
60
  config: InitVar[Config[InputT] | None] = field(default=None)
47
- logger: Logger = field(init=False)
48
- _job_details: JobDetails[InputT] = field(init=False)
49
- _result: ResultT | None = field(default=None, init=False)
50
-
51
- # Decorator-registered callbacks
52
- _validate_fn: ValidateFuncT = field(
53
- default=default_validation,
54
- init=False,
55
- repr=False,
56
- )
57
-
58
- _run_fn: RunFuncT = field(
59
- default=None,
60
- init=False,
61
- repr=False,
62
- )
63
61
 
64
- _save_fn: SaveFuncT = field(
65
- default=default_save,
66
- init=False,
67
- repr=False,
68
- )
62
+ logger: Logger = field(init=False, repr=False)
69
63
 
70
- _error_callback: ErrorFuncT = field(
71
- default=default_error_callback,
72
- init=False,
73
- repr=False,
64
+ _job_details: JobDetails[InputT] = field(init=False)
65
+ _result: ResultT | None = field(default=None, init=False)
66
+ _functions: Functions[InputT, ResultT] = field(
67
+ default_factory=Functions, init=False, repr=False
74
68
  )
75
69
 
76
70
  def __post_init__(self, config: Config[InputT] | None) -> None:
@@ -106,7 +100,7 @@ class Algorithm(Generic[InputT, ResultT]):
106
100
  f"Added [{len(configuration.source_paths)}] entries to PATH"
107
101
  )
108
102
 
109
- self.configuration = configuration
103
+ self.configuration: Config[InputT] = configuration
110
104
 
111
105
  class Error(RuntimeError): ...
112
106
 
@@ -127,55 +121,62 @@ class Algorithm(Generic[InputT, ResultT]):
127
121
  # ---------------------------
128
122
 
129
123
  def validate(self, fn: ValidateFuncT) -> ValidateFuncT:
130
- self._validate_fn = fn
124
+ self._functions.validate = fn
131
125
  return fn
132
126
 
133
127
  def run(self, fn: RunFuncT) -> RunFuncT:
134
- self._run_fn = fn
128
+ self._functions.run = fn
135
129
  return fn
136
130
 
137
131
  def save_results(self, fn: SaveFuncT) -> SaveFuncT:
138
- self._save_fn = fn
132
+ self._functions.save = fn
139
133
  return fn
140
134
 
141
135
  def on_error(self, fn: ErrorFuncT) -> ErrorFuncT:
142
- self._error_callback = fn
136
+ self._functions.error = fn
143
137
  return fn
144
138
 
145
139
  # ---------------------------
146
140
  # Execution Pipeline
147
141
  # ---------------------------
148
142
 
149
- def __call__(self) -> ResultT | None:
150
- """Executes the algorithm pipeline: validate → run → save_results."""
151
- # Load job details
152
- self._job_details = JobDetails.load(
153
- _type=self.configuration.custom_input,
154
- base_dir=self.configuration.environment.base_dir,
155
- dids=self.configuration.environment.dids,
156
- transformation_did=self.configuration.environment.transformation_did,
157
- secret=self.configuration.environment.secret,
158
- )
143
+ def execute(self) -> ResultT | None:
144
+ env = self.configuration.environment
145
+ config: Dict[str, JsonValue] = {
146
+ "base_dir": str(env.base_dir),
147
+ "dids": env.dids,
148
+ "secret": env.secret,
149
+ "transformation_did": env.transformation_did,
150
+ }
151
+
152
+ self._job_details = load_job_details(config, self.configuration.custom_input)
159
153
 
160
154
  self.logger.info("Loaded JobDetails")
161
- self.logger.debug(asdict(self.job_details))
155
+ self.logger.debug(self.job_details.model_dump())
162
156
 
163
157
  try:
164
- # Validation step
165
- self._validate_fn(self)
158
+ run_in_executor(self._functions.validate(self))
166
159
 
167
- # Run step
168
- if self._run_fn:
160
+ if self._functions.run:
169
161
  self.logger.info("Running algorithm...")
170
- self._result = self._run_fn(self)
162
+ self._result = run_in_executor(self._functions.run(self))
171
163
  else:
172
164
  self.logger.error("No run() function defined. Skipping execution.")
173
165
  self._result = None
174
166
 
175
- # Save step
176
- self._save_fn(self, self._result, self.job_details.paths.outputs)
167
+ run_in_executor(
168
+ self._functions.save(
169
+ algorithm=self,
170
+ result=self._result,
171
+ base=self.job_details.paths.outputs,
172
+ ),
173
+ )
177
174
 
178
175
  except Exception as e:
179
- self._error_callback(self, e)
176
+ run_in_executor(self._functions.error(self, e))
180
177
 
181
178
  return self._result
179
+
180
+ def __call__(self) -> ResultT | None:
181
+ """Executes the algorithm pipeline: validate → run → save_results."""
182
+ return self.execute()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ocean-runner"
3
- version = "0.2.19"
3
+ version = "0.2.25"
4
4
  description = "A fluent API for OceanProtocol algorithms"
5
5
  authors = [
6
6
  { name = "AgrospAI", email = "agrospai@udl.cat" },
@@ -15,7 +15,8 @@ classifiers = [
15
15
  "License :: OSI Approved :: MIT License",
16
16
  ]
17
17
  dependencies = [
18
- "oceanprotocol-job-details>=0.2.8",
18
+ "aiofiles>=25.1.0",
19
+ "oceanprotocol-job-details>=0.3.11",
19
20
  "pydantic>=2.12.5",
20
21
  "pydantic-settings>=2.12.0",
21
22
  "pytest>=8.4.2",
@@ -35,15 +36,16 @@ requires = ["hatchling"]
35
36
  build-backend = "hatchling.build"
36
37
 
37
38
  [dependency-groups]
38
- dev = [
39
- "mypy>=1.19.1",
40
- ]
39
+ dev = ["mypy>=1.19.1", "types-aiofiles>=25.1.0.20251011"]
41
40
 
42
41
  [tool.hatch.build.targets.sdist]
43
42
  include = ["ocean_runner"]
44
43
 
45
44
  [tool.hatch.build.targets.wheel]
46
- include = ["ocean_runner"]
45
+ packages = ["ocean_runner"]
46
+
47
+ [tool.hatch.build.targets.wheel.package-data]
48
+ ocean_runner = ["py.typed"]
47
49
 
48
50
  [tool.mypy]
49
51
  plugins = ['pydantic.mypy']
File without changes
File without changes