pinexq-procon 2.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ import logging
2
+ import pathlib
3
+ import warnings
4
+ from functools import partial
5
+ from typing import IO, Any, Self, Type, TypeVar, Union
6
+
7
+ from ..core.exceptions import ProConDataslotError
8
+ from ..dataslots.annotation import RETURN_SLOT_NAME, FileMode, ReaderType, WriterType, dataslot
9
+ from ..dataslots.datatypes import DataSlotDescription, MediaTypes, Metadata, SlotDescription, SlotType
10
+ from ..dataslots.filebackend import FileBackEnd, LocalCachedHttpFile, LocalFile
11
+ from ..dataslots.metadata import MetadataHandler, MetadataProxy
12
+
13
+
14
+ PathLike = TypeVar("PathLike", str, pathlib.Path)
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ def create_dataslot_description(slots: dict[str, list[str]]) -> dict[str, DataSlotDescription]:
20
+ """Create a dataslot description to pass on to the _call function of the Step class.
21
+
22
+ The description is usually created internally
23
+ from the information in a `job.offer` message or cli call.
24
+ """
25
+ return {
26
+ name: DataSlotDescription(
27
+ name=name, slots=[SlotDescription(p, i) for i, p in enumerate(path_list)]
28
+ )
29
+ for name, path_list in slots.items()
30
+ }
31
+
32
+
33
+ def ishttp(uri: str) -> bool:
34
+ return uri.startswith(('http://', 'https://'))
35
+
36
+
37
+ def _select_backend(description: SlotDescription) -> Type[FileBackEnd]:
38
+ """Factory for selecting the backend for a Slot depending on the URI"""
39
+ # Todo: use a match .. case here for pattern matching
40
+ if ishttp(str(description.uri)):
41
+ return partial(LocalCachedHttpFile, request_args={'headers': description.headers})
42
+ else:
43
+ # Todo: do all file paths reach here? relative paths might be problematic?
44
+ # https://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
45
+ # https://stackoverflow.com/questions/5977576/is-there-a-convenient-way-to-map-a-file-uri-to-os-path
46
+ return LocalFile
47
+
48
+
49
+ class Slot:
50
+ """Wraps functionality for a single slot (i.e. one uri/file containing data)"""
51
+
52
+ _name: str
53
+ _type: SlotType
54
+ _description: SlotDescription
55
+ _backend: FileBackEnd
56
+ _mode: FileMode
57
+ _reader: ReaderType | None
58
+ _writer: WriterType | None
59
+ _metaproxy: MetadataProxy | None
60
+ _metadata_handler: MetadataHandler | None
61
+ _open: bool
62
+
63
+ def __init__(
64
+ self,
65
+ name: str,
66
+ slot_type: SlotType,
67
+ description: SlotDescription,
68
+ mode: FileMode,
69
+ reader: ReaderType = None,
70
+ writer: WriterType = None,
71
+ metadata_handler: MetadataHandler | None = None
72
+ ) -> None:
73
+ self._name = name
74
+ self._type = slot_type
75
+ self._description = description
76
+ self._mode = mode
77
+ self._reader = reader
78
+ self._writer = writer
79
+
80
+ if self._type is SlotType.INPUT and self._writer is not None:
81
+ raise ValueError(f"A 'writer' parameter is defined for INPUT slot '{self._name}'. "
82
+ f"Did you mean to define a 'reader' instead?")
83
+ if self._type in (SlotType.OUTPUT, SlotType.RETURN) and self._reader is not None:
84
+ raise ValueError(f"A 'reader' parameter is defined for OUTPUT slot '{self._name}'. "
85
+ f"Did you mean to define a 'writer' instead?")
86
+
87
+ backend_class = _select_backend(description)
88
+ self._backend = backend_class(uri=description.uri, mode=mode)
89
+
90
+ self._metadata_handler = metadata_handler
91
+ self._metaproxy = None
92
+ self._open = False
93
+
94
+ @property
95
+ def file(self) -> IO:
96
+ return self._backend.file
97
+
98
+ @property
99
+ def name(self) -> str:
100
+ return self._name
101
+
102
+ @property
103
+ def media_type(self) -> str:
104
+ return self._description.mediatype
105
+
106
+ @property
107
+ def meta(self) -> MetadataProxy | None:
108
+ """Return a proxy object to access metadata.
109
+
110
+ Returns:
111
+ `None` if the current implementation does not provide access to metadata.
112
+ Otherwise, a `MetadataProxy` is returned. The proxy provides the data, if available,
113
+ but will delay creation of new data until it is actually used.
114
+ """
115
+ return self._metaproxy
116
+
117
+ def __enter__(self) -> Self:
118
+ self.open()
119
+ return self
120
+
121
+ def __exit__(self, exc_type, exc_val, exc_tb):
122
+ # close the file, but push to remote destination only if there was no exception
123
+ self.close(sync=exc_type is None)
124
+
125
+ def open(self):
126
+ if not self._open:
127
+ if self._type == SlotType.INPUT:
128
+ self._backend.pull()
129
+ self._backend.open()
130
+ self._open = True
131
+ self._get_metadata()
132
+
133
+ def _get_metadata(self):
134
+ # Read metadata from the handler
135
+ if self._metadata_handler:
136
+ # If metadata is available, use it, otherwise initialize with empty Metadata
137
+ metadata = self._metadata_handler.get(self._description)
138
+ read_only = self._type == SlotType.INPUT
139
+ self._metaproxy = MetadataProxy(metadata=metadata or Metadata(), _readonly=read_only)
140
+ else:
141
+ # If the current environment does not provide any means of metadata handling, create an empty stub
142
+ self._metaproxy = MetadataProxy(metadata=Metadata(), _readonly=True)
143
+
144
+ def close(self, sync: bool = True):
145
+ if self._open:
146
+ self._backend.close()
147
+ self._open = False
148
+ if sync and self._type in (SlotType.OUTPUT, SlotType.RETURN):
149
+ self.push()
150
+
151
+ def push(self):
152
+ self._backend.push()
153
+ self._set_metadata()
154
+
155
+ def _set_metadata(self):
156
+ # Write the metadata to storage
157
+ if (self._metadata_handler is not None) and self._metaproxy is not None:
158
+ self._metadata_handler.set(self._description, self._metaproxy._metadata)
159
+
160
+ def read_data(self) -> Any:
161
+ with self as b:
162
+ if self._reader is not None:
163
+ data = self._reader(b.file)
164
+ else:
165
+ data = b.file.read()
166
+ return data
167
+
168
+ def write_data(self, data: Any) -> None:
169
+ with self as b:
170
+ if self._writer is not None:
171
+ self._writer(b.file, data)
172
+ else:
173
+ b.file.write(data)
174
+
175
+ def __repr__(self) -> str:
176
+ return f"Slot('{self._name}', type={self._type.name}, uri='{self._description.uri}')"
177
+
178
+ def __hash__(self) -> int:
179
+ return hash((self._name, self._description.uri, self._type))
180
+
181
+ def __eq__(self, other: 'Slot') -> bool:
182
+ if not isinstance(other, Slot):
183
+ raise TypeError(f"Can not compare {type(self)} object with type {type(other)}!?")
184
+ return (
185
+ self._name == other._name and
186
+ self._description.uri == other._description.uri and
187
+ self._type == other._type
188
+ )
189
+
190
+ def __getstate__(self) -> dict[str, Any]:
191
+ state = self.__dict__.copy()
192
+ # Remove the backend; it might contain open file handles
193
+ del state['_backend']
194
+ return state
195
+
196
+ def __setstate__(self, state: dict[str, Any]) -> None:
197
+ self.__dict__.update(state)
198
+ backend_class = _select_backend(self._description)
199
+ self._backend = backend_class(uri=self._description.uri, mode=self._mode)
200
+
201
+
202
+ class DataSlot:
203
+ """Manages underlying file access and application of de-/serializer functions."""
204
+
205
+ annotation: dataslot
206
+ description: DataSlotDescription
207
+ _metadata_handler: MetadataHandler | None
208
+
209
+ _slots: list[Slot]
210
+
211
+ def __init__(self, description: DataSlotDescription, annotation: dataslot,
212
+ metadata_handler: MetadataHandler | None = None):
213
+ """
214
+ Args:
215
+ description: The description send with the job offer
216
+ annotation: The annotation generated by the dataslot decorator
217
+ metadata_handler: `MetadataHandler` object managing access to metadata.
218
+ """
219
+ self.description = description
220
+ self.annotation = annotation
221
+ self._metadata_handler = metadata_handler
222
+ self._check_constraints()
223
+ self._init_slots()
224
+
225
+ def _check_constraints(self):
226
+ anno = self.annotation
227
+ descr = self.description
228
+
229
+ if (anno.slot_type in (SlotType.INPUT,)) and ('w' in anno.mode):
230
+ warnings.warn(f"Write mode set for INPUT dataslot '{anno.name}'! "
231
+ f"Did you intent to use read-mode instead?")
232
+ elif (anno.slot_type in (SlotType.OUTPUT, SlotType.RETURN)) and ('r' in anno.mode):
233
+ warnings.warn(f"Write mode set for OUTPUT dataslot '{anno.name}'! "
234
+ f"Did you intent to use write-mode instead?")
235
+
236
+ slot_count = len(descr.slots)
237
+ if slot_count == 0:
238
+ raise ProConDataslotError(f"Dataslot description for '{descr.name}' "
239
+ f"does not define any slot locations!")
240
+ if slot_count > 1 and not anno.collection:
241
+ raise ProConDataslotError(f"Dataslot for '{descr.name}' is no collection"
242
+ f" but got multiple slot locations!")
243
+ if ((descr.name == RETURN_SLOT_NAME)
244
+ and (anno.slot_type is not SlotType.RETURN)):
245
+ raise ProConDataslotError(f"Reserved name {RETURN_SLOT_NAME} "
246
+ f"used for a dataslot that is not of type RETURN!")
247
+
248
+ def _init_slots(self):
249
+ for idx, slot_description in enumerate(self.description.slots):
250
+ slot_description.dataslot_name = self.description.name
251
+ slot_description.index = idx
252
+ self._slots = [
253
+ Slot(
254
+ name=f"{slot_description.dataslot_name}[{slot_description.index}]",
255
+ slot_type=self.annotation.slot_type,
256
+ description=slot_description,
257
+ mode=self.annotation.mode,
258
+ writer=self.annotation.writer,
259
+ reader=self.annotation.reader,
260
+ metadata_handler=self._metadata_handler,
261
+ )
262
+ for slot_description in self.description.slots
263
+ ]
264
+
265
+ @property
266
+ def slots(self) -> list[Slot]:
267
+ """List of all Slots assigned to this Dataslot."""
268
+ return [s for s in self._slots]
269
+
270
+ def __enter__(self) -> Self:
271
+ for slot in self._slots:
272
+ slot.open()
273
+ return self
274
+
275
+ def __exit__(self, exc_type, exc_val, exc_tb):
276
+ for slot in self._slots:
277
+ slot.close()
278
+
279
+ def __len__(self) -> int:
280
+ return len(self.description.slots)
281
+
282
+ def __repr__(self) -> str:
283
+ return f"DataSlot('{self.description.name}', type={self.annotation.slot_type.name})"
284
+
285
+ # Todo: maybe allow hashing in the future; description and annotation are currently not hashable
286
+ # def __hash__(self):
287
+ # return hash((self.description, self.annotation))
288
+
289
+ def __eq__(self, other: "DataSlot"):
290
+ if not isinstance(other, DataSlot):
291
+ raise TypeError(f"Can not compare {type(self)} object with type {type(other)}!?")
292
+ return (
293
+ self.description == other.description and
294
+ self.annotation == other.annotation
295
+ )
296
+
297
+ def __getstate__(self) -> dict[str, Any]:
298
+ state = self.__dict__.copy()
299
+ # Remove Slots as they might contain open file handles
300
+ del state['_slots']
301
+ return state
302
+
303
+ def __setstate__(self, state: dict[str, Any]) -> None:
304
+ # restore self.annotation and self.description
305
+ self.__dict__.update(state)
306
+ self._init_slots()
307
+
308
+ def __copy__(self):
309
+ return DataSlot(annotation=self.annotation, description=self.description)
310
+
311
+ def copy_with(self, annotation: dataslot):
312
+ """Create a copy of this DataSlot with a new annotation. (e.g. change the type from INPUT to OUTPUT)"""
313
+ return DataSlot(annotation=annotation, description=self.description)
314
+
315
+ def read_data_from_slots(self) -> Union[list[Any], 'DataSlot']:
316
+ """Resolve all files in this dataslot.
317
+
318
+ If it is annotated with a type, open the file(s) and apply the configured
319
+ load/deserializer callable.
320
+ """
321
+ if self.annotation.collection: # multiple Slots
322
+ if not self.annotation.collection_reader:
323
+ raise ProConDataslotError(f"Input-collection-DataSlot '{self.annotation.name}' "
324
+ f"has no 'collection_reader' configured!")
325
+ with self:
326
+ read_data = self.annotation.collection_reader(
327
+ [slot.file for slot in self._slots]
328
+ )
329
+ return read_data
330
+
331
+ else: # single Slot
332
+ if len(self._slots) > 1:
333
+ raise ProConDataslotError(f"Configuration error: Non-collection DataSlot "
334
+ f"'{self.annotation.name}' with multiple Slots!")
335
+ return self._slots[0].read_data()
336
+
337
+ def write_data_to_slots(self, data: Any):
338
+ """Open the file(s) and apply the configured writer/serializer callable.
339
+
340
+ NOTE: Writing strings to files in "text-mode" produces different output
341
+ depending on the underlying operating system. Windows wil use CRLF (\r\n) as
342
+ line ending while most others will use CR (\n).
343
+
344
+ :param data: Data to write into this dataslot. It is passed on as parameter
345
+ to the configured `writer`.
346
+ """
347
+
348
+ if self.annotation.collection: # multiple Slots
349
+ if not self.annotation.collection_writer:
350
+ raise ProConDataslotError(f"Output-collection-DataSlot '{self.annotation.name}' "
351
+ f"has no 'collection_writer' configured!")
352
+ with self:
353
+ self.annotation.collection_writer(
354
+ [slot.file for slot in self._slots],
355
+ data
356
+ )
357
+ else: # single Slot
358
+ if len(self._slots) > 1:
359
+ raise ProConDataslotError(f"Configuration error: Non-collection DataSlot "
360
+ f"'{self.annotation.name}' with multiple Slots!")
361
+ self._slots[0].write_data(data)
362
+
363
+
364
+ DataslotTypes = (DataSlot,)
365
+
366
+
367
+ def isdataslot(t: type) -> bool:
368
+ """Return 'true' if a type is one of the Dataslot types."""
369
+ return t in DataslotTypes
@@ -0,0 +1,50 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import Enum, StrEnum, auto
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class MediaTypes(StrEnum):
8
+ OCTETSTREAM = 'application/octet-stream'
9
+ JSON = 'application/json'
10
+ FORMDATA = 'multipart/form-data'
11
+ SIREN = 'application/siren+json'
12
+ XML = 'application/xml'
13
+ ZIP = 'application/zip'
14
+ PDF = 'application/pdf'
15
+ TEXT = 'text/plain'
16
+ HTML = 'text/html'
17
+ CSV = 'text/csv'
18
+ SVG = 'image/svg+xml'
19
+ PNG = 'image/png'
20
+ JPEG = 'image/jpeg'
21
+ BMP = 'image/bmp'
22
+ WORKFLOW_DEFINITION = "application/vnd.pinexq.workflow.definition+json"
23
+ WORKFLOW_REPORT = "application/vnd.pinexq.workflow.report+json"
24
+
25
+
26
+ class SlotType(Enum):
27
+ INPUT = auto()
28
+ OUTPUT = auto()
29
+ RETURN = auto()
30
+
31
+
32
+ @dataclass(eq=True, slots=True)
33
+ class SlotDescription:
34
+ uri: str
35
+ index: int = field(default=0)
36
+ dataslot_name: str = field(default_factory=str)
37
+ headers: dict[str, str] = field(default_factory=dict)
38
+ mediatype: str = field(default_factory=str)
39
+
40
+
41
+ @dataclass(eq=True, slots=True)
42
+ class DataSlotDescription:
43
+ name: str
44
+ slots: list[SlotDescription]
45
+
46
+
47
+ class Metadata(BaseModel):
48
+ comment: str = Field(default="")
49
+ tags: list[str] = Field(default_factory=list)
50
+ filename: str = Field(default="")
@@ -0,0 +1,26 @@
1
+ from typing import IO, TypeVar, Type
2
+ import json
3
+
4
+ from pydantic import BaseModel
5
+
6
+ TPydanticBase = TypeVar('TPydanticBase', bound=BaseModel)
7
+
8
+ class DefaultReaderWriter:
9
+ @staticmethod
10
+ def pydantic_base_writer(filehandle: IO, d: BaseModel):
11
+ filehandle.write(d.model_dump_json(by_alias=True).encode())
12
+
13
+ @staticmethod
14
+ def pydantic_base_reader(filehandle: IO, target_type: Type[TPydanticBase]) -> TPydanticBase:
15
+ return target_type.model_validate_json(filehandle.read())
16
+
17
+ @staticmethod
18
+ def pydantic_list_base_writer(filehandle: IO, d: list[BaseModel]):
19
+ result = [item.model_dump(by_alias=True) for item in d]
20
+ filehandle.write(json.dumps(result).encode())
21
+
22
+ @staticmethod
23
+ def pydantic_list_base_reader(filehandle: IO, target_type: Type[TPydanticBase]) -> list[TPydanticBase]:
24
+ result: list[dict] = json.loads(filehandle.read().decode())
25
+ base_results = [target_type.model_validate(item) for item in result]
26
+ return base_results
@@ -0,0 +1,126 @@
1
+ import logging
2
+ import os
3
+ from tempfile import NamedTemporaryFile
4
+ from typing import IO, Protocol, Self, runtime_checkable
5
+
6
+ import httpx
7
+ from pydantic import AnyUrl
8
+
9
+ from ..core.exceptions import ProConDataslotError
10
+
11
+
12
+ LOG = logging.getLogger(__name__)
13
+
14
+
15
+ @runtime_checkable
16
+ class FileBackEnd(Protocol):
17
+ """Implements the prototype for the file access behind Slots."""
18
+ uri: AnyUrl
19
+ mode: str
20
+
21
+ _file: IO | None = None
22
+
23
+ def __init__(self, uri: AnyUrl | str, mode: str = 'r'):
24
+ self.uri = uri
25
+ self.mode = mode
26
+
27
+ def __enter__(self) -> Self:
28
+ self.open()
29
+ return self
30
+
31
+ def __exit__(self, exc_type, exc_val, exc_tb):
32
+ self.close()
33
+
34
+ @property
35
+ def file(self) -> IO:
36
+ return self._file
37
+
38
+ def open(self):
39
+ if self._file:
40
+ self._file.close()
41
+ self._file = open(str(self.uri), self.mode)
42
+
43
+ def close(self):
44
+ self._file.close()
45
+
46
+ def push(self):
47
+ pass
48
+
49
+ def pull(self):
50
+ pass
51
+
52
+
53
+ class LocalFile(FileBackEnd):
54
+ """Pass-through wrapper for just a local file on disk."""
55
+
56
+ def __init__(self, uri: AnyUrl | str, mode: str = 'r'):
57
+ """
58
+
59
+ Args:
60
+ uri: Local path of the file.
61
+ mode: File mode to open the file with [rw]b?a?
62
+ """
63
+ super().__init__(uri, mode)
64
+
65
+
66
+ class LocalCachedHttpFile(FileBackEnd):
67
+ """
68
+ Wrapper around a temporary local file with methods to down-/upload it from/to a HTTP server.
69
+ """
70
+
71
+ _http_timeout_s: int
72
+ _req_args: dict
73
+
74
+ def __init__(self, uri: AnyUrl | str, mode: str = 'r', request_args: dict | None = None):
75
+ """
76
+
77
+ Attributes:
78
+ uri: The source/destination uri where the remote data resides.
79
+ mode: File mode for this file [r|w](b)?
80
+ request_args: optional parameters for the HTTP requests
81
+ """
82
+ super().__init__(uri, mode)
83
+ self._req_args = request_args or {}
84
+ self._http_timeout_s = int(os.getenv("PROCON_HTTP_TIMEOUT_S", "60"))
85
+
86
+ self._file = NamedTemporaryFile(delete=False)
87
+ self._file.close()
88
+
89
+ def open(self):
90
+ if self._file:
91
+ self._file.close()
92
+ self._file = open(self._file.name, self.mode)
93
+
94
+ def pull(self):
95
+ """Download the file from the defined source location."""
96
+ try:
97
+ with open(self._file.name, mode='wb') as download_file:
98
+ with httpx.stream("GET", str(self.uri), timeout=self._http_timeout_s, **self._req_args) as response:
99
+ # total = int(response.headers["Content-Length"])
100
+ for chunk in response.iter_bytes():
101
+ download_file.write(chunk)
102
+ response.raise_for_status()
103
+ except httpx.RequestError as ex:
104
+ raise ProConDataslotError(f"Error in HTTP GET-request for: {ex.request.url}") from ex
105
+ except httpx.HTTPStatusError as ex:
106
+ raise ProConDataslotError(f"Error response {ex.response.status_code} on GET-request for {ex.request.url!r}")
107
+ finally:
108
+ self._file.close()
109
+
110
+ def push(self):
111
+ """Upload the file to the defined destination."""
112
+ try:
113
+ with open(self._file.name, 'rb') as f:
114
+ response = httpx.request("PUT", str(self.uri), content=f, timeout=self._http_timeout_s, **self._req_args)
115
+ response.raise_for_status()
116
+ except httpx.RequestError as ex:
117
+ raise ProConDataslotError(f"Error in HTTP PUT-request for: {ex.request.url}") from ex
118
+ except httpx.HTTPStatusError as ex:
119
+ raise ProConDataslotError(f"Error response {ex.response.status_code} on PUT-request for {ex.request.url!r}")
120
+
121
+ def __del__(self):
122
+ try:
123
+ if self._file:
124
+ os.unlink(self._file.name)
125
+ except PermissionError:
126
+ LOG.warning(f"Can not delete file '{self._file.name}', as it's still in use!")