cortex-loom 0.3.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: cortex-loom
3
+ Version: 0.3.0rc1
4
+ Summary: Package for developing and publishing custom Kelvin Cortex Flows.
5
+ Author: MMT Analytics
6
+ Author-email: support@machinemedicine.com
7
+ Requires-Python: >=3.11, <3.13
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Provides-Extra: full
12
+ Requires-Dist: cloudpickle (==3.1.1)
13
+ Requires-Dist: cortex-types (==0.6.4)
14
+ Requires-Dist: jupyter (>=1.0,<2.0) ; extra == "full"
15
+ Requires-Dist: lightgbm (>=4.1,<5.0) ; extra == "full"
16
+ Requires-Dist: matplotlib (>=3.8,<4.0) ; extra == "full"
17
+ Requires-Dist: pandas (>=2.1,<3.0) ; extra == "full"
18
+ Requires-Dist: requests (==2.32.4)
19
+ Requires-Dist: scikit-learn (>=1.4,<2.0) ; extra == "full"
20
+ Requires-Dist: scipy (>=1.12,<2.0) ; extra == "full"
21
+ Requires-Dist: seaborn (>=0.13,<0.14) ; extra == "full"
22
+ Requires-Dist: xgboost (>=2.0,<3.0) ; extra == "full"
23
+ Description-Content-Type: text/markdown
24
+
25
+ # README #
26
+
27
+ This README would normally document whatever steps are necessary to get your application up and running.
28
+
29
+ ### What is this repository for? ###
30
+
31
+ * Quick summary
32
+ * Version
33
+ * [Learn Markdown](https://bitbucket.org/tutorials/markdowndemo)
34
+
35
+ ### How do I get set up? ###
36
+
37
+ * Summary of set up
38
+ * Configuration
39
+ * Dependencies
40
+ * Database configuration
41
+ * How to run tests
42
+ * Deployment instructions
43
+
44
+ ### Contribution guidelines ###
45
+
46
+ * Writing tests
47
+ * Code review
48
+ * Other guidelines
49
+
50
+ ### Who do I talk to? ###
51
+
52
+ * Repo owner or admin
53
+ * Other community or team contact
@@ -0,0 +1,29 @@
1
+ # README #
2
+
3
+ This README would normally document whatever steps are necessary to get your application up and running.
4
+
5
+ ### What is this repository for? ###
6
+
7
+ * Quick summary
8
+ * Version
9
+ * [Learn Markdown](https://bitbucket.org/tutorials/markdowndemo)
10
+
11
+ ### How do I get set up? ###
12
+
13
+ * Summary of set up
14
+ * Configuration
15
+ * Dependencies
16
+ * Database configuration
17
+ * How to run tests
18
+ * Deployment instructions
19
+
20
+ ### Contribution guidelines ###
21
+
22
+ * Writing tests
23
+ * Code review
24
+ * Other guidelines
25
+
26
+ ### Who do I talk to? ###
27
+
28
+ * Repo owner or admin
29
+ * Other community or team contact
@@ -0,0 +1,10 @@
1
+ r"""
2
+ This package provides Python interfaces and utilities for interacting with
3
+ the Kelvin Cortex platform, particularly for:
4
+
5
+ - Loading and manipulating custom data objects (see [available types](types/__init__.md))
6
+ - Implementing and publishing custom processing Flows (see [FlowBuilder](flow_builder.md))
7
+ - Local development and testing of Cortex-compatible functions
8
+ """
9
+
10
+ from cortex_loom.flow_builder import FlowBuilder
@@ -0,0 +1,356 @@
1
+ import inspect
2
+ import json
3
+ from importlib.metadata import version
4
+ from io import BytesIO
5
+ from typing import Any, Callable, Dict, Optional, Tuple
6
+
7
+ import cloudpickle
8
+ import requests
9
+
10
+ from cortex_loom.types.data_object import DataObject
11
+ from cortex_loom.types.utils import (
12
+ PROVIDER_REGISTRY,
13
+ get_type_path,
14
+ init_defaults,
15
+ )
16
+
17
+
18
+ class FlowBuilder:
19
+ r"""FlowBuilder helps validate custom functions meant for publishing in
20
+ Cortex and provides an interface for the publishing API."""
21
+
22
+ _API_URL = "https://api.cortex.machinemedicine.com"
23
+ _PUBLISH_ENDPOINT = "argo/publish_flow"
24
+
25
+ def __init__(
26
+ self,
27
+ api_token: str,
28
+ ):
29
+ """
30
+ Parameters
31
+ ----------
32
+ api_token : str
33
+ API token to validate the Flow publishing requests with.
34
+
35
+ Examples
36
+ --------
37
+ Basic publishing of a simple function
38
+
39
+ >>> def extract_video_metadata(
40
+ ... video: VideoObject,
41
+ ... ) -> float:
42
+ ... '''Reads metadata of the video (duration and frame dimensions).'''
43
+ ... return video.duration, video.height, video.width
44
+ ...
45
+ >>> builder = FlowBuilder(api_token="your-long-api-token-here")
46
+ >>> response = builder.publish(
47
+ ... extract_video_metadata,
48
+ ... flow_display_name="Video Metadata Extraction",
49
+ ... flow_description="Extracts metadata of the video.",
50
+ ... output_display_name="Video Metadata",
51
+ ... )
52
+
53
+ Validate function signature & serialization before publishing
54
+
55
+ >>> serialised_func, param_spec = builder.build(extract_video_metadata)
56
+ >>> print(param_spec)
57
+ {'video': {
58
+ 'data_type': 'VideoObject',
59
+ 'provider_type': 'cortex_loom.types.video_object.VideoObjectProvider',
60
+ 'provider_parameters': { # Filled with Provider's defaults
61
+ "stream_index": 0,
62
+ "pixel_format": "rgb24",
63
+ "rotation": None,
64
+ "height": None,
65
+ "width": None,
66
+ }
67
+ }}
68
+
69
+ Publishing with custom VideoObjectProvider parameters
70
+ (force RGB format, resize to 640×360, rotate 90° clockwise)
71
+
72
+
73
+ >>> # Tell the platform to load input videos with specific transformations
74
+ >>> custom_params = {
75
+ ... "video": { # name of the function parameter
76
+ ... "width": 640,
77
+ ... "height": 360,
78
+ ... "rotation": 90, # clockwise
79
+ ... }
80
+ ... }
81
+ ...
82
+ >>> response = builder.publish(
83
+ ... extract_video_metadata,
84
+ ... custom_provider_parameters=custom_params,
85
+ ... ...,
86
+ ... )
87
+ >>>
88
+ >>> # Or just validate without publishing
89
+ >>> serialised, spec = builder.build(
90
+ ... extract_upper_half,
91
+ ... custom_provider_parameters=custom_params,
92
+ ... )
93
+ >>> print(spec["video"]["provider_parameters"])
94
+ {
95
+ 'stream_index': 0,
96
+ 'pixel_format': 'rgb24',
97
+ 'width': 640,
98
+ 'height': 360,
99
+ 'rotation': 90
100
+ }
101
+ """
102
+ self.api_token = api_token
103
+
104
+ def build(
105
+ self,
106
+ func: Callable,
107
+ custom_provider_parameters: Dict[str, Dict[str, Any]] = {},
108
+ ) -> Tuple[BytesIO, Dict[str, dict]]:
109
+ """Validates the signature of the function and prepares its input
110
+ specification. If successful, returns the serialised function
111
+ and parameter details. Can be used to validate the function before
112
+ publishing.
113
+
114
+ Parameters
115
+ ----------
116
+ func : Callable
117
+ Function to be serialised. Its parameters need to be strictly
118
+ type-hinted and the output has to be JSON-serialisable.
119
+
120
+ custom_provider_parameters : Dict[str, Dict[str, Any]]
121
+ Mapping of function input names to a dictionary of the input's
122
+ `DataObjectProvider` parameters. Used to overwrite default parameter
123
+ values of the Providers.
124
+
125
+ Returns
126
+ -------
127
+ BytesIO
128
+ Buffer containing the serialised `func` code.
129
+ Dict[str, dict]
130
+ Mapping of function parameter names to their details including
131
+ type name and Provider parameters.
132
+ """
133
+ # Validate the signature of the function, extract its parameters
134
+ parameter_info = self._validate_func_signature(func=func)
135
+ # Validate DataProviders' parameters
136
+ provider_parameters = self._validate_provider_parameters(
137
+ func_parameter_info=parameter_info,
138
+ custom_provider_parameters=custom_provider_parameters,
139
+ )
140
+ # Format the required function data parameters and their providers
141
+ required_data = dict()
142
+ for name, data_type in parameter_info.items():
143
+ # Save specification of the required parameter
144
+ required_data[name] = {
145
+ "data_type": data_type.__qualname__,
146
+ "provider_type": get_type_path(
147
+ PROVIDER_REGISTRY[data_type.__qualname__]
148
+ ),
149
+ "provider_parameters": provider_parameters[name],
150
+ }
151
+ # Serialise the function into bytes
152
+ serialised_func = self._serialise(func=func)
153
+
154
+ return serialised_func, required_data
155
+
156
+ def publish(
157
+ self,
158
+ func: Callable,
159
+ custom_provider_parameters: Dict[str, Dict[str, Any]] = {},
160
+ flow_display_name: Optional[str] = None,
161
+ flow_description: Optional[str] = None,
162
+ output_display_name: Optional[str] = None,
163
+ output_description: Optional[str] = None,
164
+ ) -> Dict[str, Any]:
165
+ """Publishes the given function as a custom Cortex Flow.
166
+
167
+ Parameters
168
+ ----------
169
+ func : Callable
170
+ Function to be published. Its parameters need to be strictly
171
+ type-hinted and the output has to be JSON-serialisable. Once
172
+ published, it will be available for use in Kelvin Cortex.
173
+ custom_provider_parameters : Dict[str, Dict[str, Any]]
174
+ Mapping of function input names to a dictionary of the input's
175
+ `DataObjectProvider` parameters. Used to overwrite default parameter
176
+ values of the Providers.
177
+ flow_display_name : Optional[str]
178
+ Name of the function to be displayed in Cortex. If None, a name
179
+ will be generated.
180
+ flow_description : Optional[str]
181
+ Optional description of the Flow shown to users in Cortex.
182
+ output_display_name : Optional[str]
183
+ Name of the output of the function. If None, a name will be
184
+ generated.
185
+ output_description : Optional[str]
186
+ Optional description of the Flow's output visible to Cortex users.
187
+
188
+ Returns
189
+ -------
190
+ Dict[str, Any]
191
+ Response of the publishing API.
192
+ """
193
+ # Prepare the function for publishing by building it
194
+ serialised_func, required_data = self.build(
195
+ func=func, custom_provider_parameters=custom_provider_parameters
196
+ )
197
+
198
+ # Get the version of the cortex-loom and cortex-types package
199
+ builder_version = version("cortex_loom")
200
+ types_version = version("cortex_types")
201
+
202
+ # Construct the publishing request
203
+ headers = {"Authorization": f"Bearer {self.api_token}"}
204
+ data = {
205
+ "required_data": required_data,
206
+ "builder_version": builder_version,
207
+ "types_version": types_version,
208
+ "flow_display_name": flow_display_name,
209
+ "flow_description": flow_description,
210
+ "output_display_name": output_display_name,
211
+ "output_description": output_description,
212
+ }
213
+ files = {
214
+ "file": (
215
+ "serialised_func.pkl",
216
+ serialised_func,
217
+ "application/octet-stream",
218
+ ),
219
+ }
220
+ # Send a request to the publishing API
221
+ response = requests.request(
222
+ method="POST",
223
+ url=f"{self._API_URL}/{self._PUBLISH_ENDPOINT}",
224
+ headers=headers,
225
+ files=files,
226
+ data={"info": json.dumps(data)},
227
+ )
228
+ # Handle API respnse
229
+ response.raise_for_status()
230
+ return response.json()
231
+
232
+ @staticmethod
233
+ def _validate_func_signature(
234
+ func: Callable,
235
+ ) -> Dict[str, DataObject]:
236
+ """Validate the signature of the function ensuring that it only expects
237
+ parameters of specific types.
238
+
239
+ Parameters
240
+ ----------
241
+ func : Callable
242
+ Function which signature is to be verified.
243
+
244
+ Returns
245
+ -------
246
+ Dict[str, DataObject]
247
+ Mapping of function parameter names to the expected DataObject type.
248
+
249
+ Raises
250
+ ------
251
+ ValueError
252
+ All parameters need to have a type specified.
253
+ """
254
+ # Get the signature of the function
255
+ signature = inspect.signature(func)
256
+
257
+ # Check if all parameters have types specified
258
+ missing_type = [
259
+ x for x in signature.parameters.values() if x == inspect._empty
260
+ ]
261
+ if missing_type:
262
+ raise ValueError(
263
+ "All parameters need to have a type specified. "
264
+ f"Types missing for the following parameters: {missing_type}."
265
+ )
266
+
267
+ # Gather information about the parameters
268
+ parameter_info, invalid = {}, []
269
+ for parameter in signature.parameters.values():
270
+ # Check if the type is one of the provided data objects
271
+ if issubclass(parameter.annotation, DataObject):
272
+ parameter_info[parameter.name] = parameter.annotation
273
+ # If not, raise an error for invalid parameters
274
+ else:
275
+ invalid.append((parameter.name, parameter.annotation))
276
+ # Check if any parameters were found to be invalid
277
+ if invalid:
278
+ invalid_str = ", ".join(
279
+ [f"({n}: {t.__name__})" for n, t in invalid]
280
+ )
281
+ raise ValueError(
282
+ "Some of the requested parameters are not DataObjects. "
283
+ f"Parameters causing errors: [{invalid_str}]."
284
+ )
285
+
286
+ return parameter_info
287
+
288
+ @staticmethod
289
+ def _validate_provider_parameters(
290
+ func_parameter_info: Dict[str, DataObject],
291
+ custom_provider_parameters: Dict[str, Dict[str, Any]],
292
+ ) -> Dict[str, Dict[str, Any]]:
293
+ """Validates Provider parameters for function inputs specified in
294
+ `func_parameter_info`.
295
+
296
+ Parameters
297
+ ----------
298
+ func_parameter_info : Dict[str, DataObject]
299
+ Mapping of function parameter names to their type.
300
+ custom_provider_parameters : Dict[str, Dict[str, Any]]
301
+ User-specified Provider parameters, will overwrite any defaults.
302
+
303
+ Returns
304
+ -------
305
+ Dict[str, Dict[str, Any]]
306
+ Mapping of function parameter names to dictionaries of corresponding
307
+ Provider parameters.
308
+
309
+ Raises
310
+ ------
311
+ RuntimeError
312
+ Invalid Provider parameters detected.
313
+ """
314
+
315
+ # Track all validated Provider parameters
316
+ provider_parameters = dict()
317
+ # Validate for every function input parameter
318
+ for name, data_type in func_parameter_info.items():
319
+ # Get the corresponding Provider
320
+ provider_type = PROVIDER_REGISTRY[data_type.__qualname__]
321
+ # Get the Provider's __init__ defaults
322
+ defaults = init_defaults(provider_type)
323
+ # Get the user-provided custom parameter values
324
+ parameters = defaults | custom_provider_parameters.get(name, {})
325
+ # Verify that the Provider can be initialised
326
+ try:
327
+ provider_type(**parameters)
328
+ except Exception as error:
329
+ raise RuntimeError(
330
+ f"Provider parameters for `{name}` are invalid."
331
+ f"(Input: {name}, Type: {data_type.__qualname__}, "
332
+ f"Provider: {provider_type.__qualname__})"
333
+ ) from error
334
+ # Save the Provider's parameters
335
+ provider_parameters[name] = parameters
336
+
337
+ return provider_parameters
338
+
339
+ @staticmethod
340
+ def _serialise(func: Callable) -> BytesIO:
341
+ """Serialise the function using the `cloudpickle` module.
342
+
343
+ Parameters
344
+ ----------
345
+ func : Callable
346
+ Function to be serialised.
347
+
348
+ Returns
349
+ -------
350
+ BytesIO
351
+ Buffer containing the serialised function.
352
+ """
353
+ buffer = BytesIO()
354
+ cloudpickle.dump(obj=func, file=buffer)
355
+ buffer.seek(0)
356
+ return buffer
@@ -0,0 +1,41 @@
1
+ r"""Cortex-native data types which include:
2
+
3
+ - [`VideoObject`](video_object.md/#cortex_loom.types.video_object.VideoObject)
4
+ - [`PoseObject`](pose_object.md/#cortex_loom.types.pose_object.PoseObject)
5
+ - [`FaceObject`](face_object.md/#cortex_loom.types.face_object.FaceObject)
6
+
7
+ Specifying them as inputs to a custom Flow allows Cortex to correctly provide
8
+ data to your function. For example, in order to write a function that uses
9
+ a human pose estimated based on an assignemt video one can write:
10
+
11
+ ``` py
12
+ from cortex_loom.types import PoseObject
13
+
14
+ def my_function(pose: PoseObject):
15
+ ...
16
+ ```
17
+
18
+ ---
19
+
20
+ DataObject Providers, which can be used to construct instances of specific
21
+ DataObjects, are defined alongside the data types. For example:
22
+
23
+ ``` py
24
+ from cortex_loom.types import PoseObjectProvider
25
+
26
+ # Initialise the Provider
27
+ provider = PoseObjectProvider()
28
+
29
+ # Use it to construct the DataObject from a local file
30
+ pose = provider.get_data(file_path="./pose.json")
31
+
32
+ # Interact with the PoseObject
33
+ print(pose.pose_features)
34
+ # ["x", "y", "z"]
35
+ ```
36
+
37
+ """
38
+
39
+ from cortex_loom.types.face_object import FaceObject, FaceObjectProvider
40
+ from cortex_loom.types.pose_object import PoseObject, PoseObjectProvider
41
+ from cortex_loom.types.video_object import VideoObject, VideoObjectProvider
@@ -0,0 +1,116 @@
1
+ from abc import abstractmethod
2
+ from typing import Any, Dict, Optional, Tuple
3
+
4
+ from cortex_types import DataObject as BaseDataObject
5
+
6
+
7
+ class DataObject(BaseDataObject):
8
+ pass
9
+
10
+
11
+ class DataObjectProvider:
12
+ """Base class for all `DataObject` Poviders. Use Providers specific to data
13
+ modality (e.g. `VideoObjectProvider`) to interact with data files."""
14
+
15
+ def get_data(
16
+ self,
17
+ file_path: str,
18
+ roi: Optional[Tuple[int, int]] = None,
19
+ **kwargs,
20
+ ) -> DataObject:
21
+ r"""Method for reading DataObjects. Provided a local file path
22
+ it constructs a DataObject.
23
+
24
+ Parameters
25
+ ----------
26
+ file_path : str
27
+ Path to a locally saved file based on which a DataObject is to be
28
+ constructed.
29
+ roi : Tuple[int, int] | None
30
+ An optional region of interest (ROI) to which the DataObject ought
31
+ to be limited. Specified as a tuple of integers (milliseconds).
32
+ For objects with a temporal dimension (e.g. VideoObject) it limits
33
+ the output to the specified interval. If None, no trim is applied.
34
+
35
+ Returns
36
+ -------
37
+ DataObject
38
+ Initialised DataObject.
39
+ """
40
+ # If provided, check if the ROI is valid
41
+ if roi is not None:
42
+ self._check_roi(roi=roi)
43
+ # Read the file data and return the DataObject
44
+ data = self._read_file(file_path=file_path)
45
+ data_object = self._get_data(data=data, roi=roi)
46
+ return data_object
47
+
48
+ @staticmethod
49
+ @abstractmethod
50
+ def _read_file(file_path: str) -> Any:
51
+ """Abstract method that needs to be implemented by child classes.
52
+ It ought to read the specified file into memory."""
53
+
54
+ @abstractmethod
55
+ def _get_data(
56
+ self,
57
+ data: Any,
58
+ roi: Optional[Tuple[int, int]],
59
+ **metadata: Dict[str, Any],
60
+ ) -> DataObject:
61
+ """Abstract method that needs to be implemented by child classes.
62
+ It ought to construct a DataObject from the read file data."""
63
+
64
+ def __call__(
65
+ self,
66
+ file_path: str,
67
+ roi: Optional[Tuple[int, int]] = None,
68
+ **metadata: Dict[str, Any],
69
+ ) -> DataObject:
70
+ """Constructs a DataObject from the specified file. Expects required
71
+ metadata parameters to be passed along the file path."""
72
+ return self.get_data(file_path=file_path, roi=roi, **metadata)
73
+
74
+ @staticmethod
75
+ def _check_roi(roi: Tuple[int, int]) -> None:
76
+ """Validates the format of the ROI.
77
+
78
+ Parameters
79
+ ----------
80
+ roi : Tuple[int, int]
81
+ A tuple of two, non-decreasing integers specifying the region of
82
+ interest.
83
+
84
+ Raises
85
+ ------
86
+ TypeError
87
+ ROI is not a tuple nor a list.
88
+ ValueError
89
+ ROI is not of length 2.
90
+ TypeError
91
+ ROI elements are not integers.
92
+ ValueError
93
+ ROI elements are negative or decreasing.
94
+ """
95
+ if not isinstance(roi, (list, tuple)):
96
+ raise TypeError(
97
+ "`roi` is expected to be (Tuple[int, int]), "
98
+ f"got ({type(roi).__name__}) instead."
99
+ )
100
+ if len(roi) != 2:
101
+ raise ValueError(
102
+ "`roi` Tuple is expected to be of length (2), "
103
+ f"got ({len(roi)}) instead."
104
+ )
105
+
106
+ start, end = roi
107
+ if not isinstance(start, int) or not isinstance(end, int):
108
+ raise TypeError(
109
+ "`roi` elements need to be integers, "
110
+ f"got ({type(start).__name__}, {type(end).__name__})."
111
+ )
112
+ if start > end or start < 0:
113
+ raise ValueError(
114
+ "`roi` elements need to be non-negative and non-decreasing, "
115
+ f"got (start: {start}, end: {end})."
116
+ )
@@ -0,0 +1,96 @@
1
+ import json
2
+ from typing import Any, Dict, Optional, Tuple
3
+
4
+ from typing_extensions import override
5
+
6
+ from cortex_types import FaceObject as BaseFaceObject
7
+ from cortex_types.face_object import _FaceObjectProvider
8
+ from cortex_types.interfaces import FaceInterface
9
+
10
+ from cortex_loom.types.data_object import DataObject, DataObjectProvider
11
+ from cortex_loom.types.utils import _register_provider
12
+
13
+
14
+ class FaceObject(BaseFaceObject, DataObject, FaceInterface):
15
+ r"""
16
+ FaceObject type represents a face mesh estimated based on a source
17
+ video/image.
18
+
19
+ Attributes
20
+ ----------
21
+ face_array : np.ndarray
22
+ Array of shape (T, K, C) containing face keypoints.
23
+ timestamps : np.ndarray
24
+ Array of shape (T,) containing timestamps [milliseconds].
25
+ blendshapes_array : Optional[np.ndarray], default=None
26
+ Array of shape (T, B) containing blendshapes' scores. Estimation of
27
+ blendshapes by the FaceLandmarker model is optional.
28
+ rotation_array : Optional[np.ndarray]
29
+ Array of shape (T, 3) containing rotation angles (pitch, yaw, roll),
30
+ if available.
31
+ face_features : List[str]
32
+ List containing feature names of the `face_array`; e.g., ["x", "y", "z"].
33
+ blendshapes_features : Optional[List[str]]
34
+ List containing feature names of the `blendshapes_array`;
35
+ e.g., ["NEUTRAL", "BROW_DOWN_LEFT"], if available.
36
+ rotation_features : Optional[List[str]]
37
+ List containing feature names of the `rotation_array`;
38
+ e.g., ["pitch", "yaw", "roll"], if available.
39
+
40
+ Notes
41
+ -----
42
+ - T: Number of time steps.
43
+ - K: Number of keypoints.
44
+ - C: Number of features per keypoint (e.g., x, y, z).
45
+ """
46
+
47
+
48
+ @_register_provider(model_type=FaceObject)
49
+ class FaceObjectProvider(_FaceObjectProvider, DataObjectProvider):
50
+ """
51
+ FaceObjectProvider expects a dictionary input representing a FaceObject
52
+ JSON and constructs a FaceObject instance.
53
+ """
54
+
55
+ FACE_OBJECT_TYPE = FaceObject
56
+
57
+ @override
58
+ def get_data(
59
+ self,
60
+ file_path: str,
61
+ roi: Optional[Tuple[int, int]] = None,
62
+ **kwargs,
63
+ ) -> FaceObject:
64
+ r"""Constructs a FaceObject from a local file.
65
+
66
+ Parameters
67
+ ----------
68
+ file_path : str
69
+ Local path to a file representing a FaceObject. Likely downloaded
70
+ from Kelvin Cortex.
71
+ roi : Optional[Tuple[int, int]], optional
72
+ An optional region of interest (ROI) to which the FaceObject ought
73
+ to be limited, specified as a tuple of integers (milliseconds).
74
+ The temporal dimension of the FaceObject will be limited to the
75
+ given time interval. If None, no trim is applied.
76
+
77
+ Returns
78
+ -------
79
+ FaceObject
80
+ An object holding an estimated human pose face mesh.
81
+
82
+ Examples
83
+ --------
84
+ >>> provider = FaceObjectProvider()
85
+ >>> face = provider.get_data(file_path="./face_object.json", roi=None)
86
+ >>> face.face_features
87
+ ["x", "y", "z"]
88
+ """
89
+ return super().get_data(file_path=file_path, roi=roi, **kwargs)
90
+
91
+ @staticmethod
92
+ def _read_file(file_path: str) -> Dict[str, Any]:
93
+ """Reads the serialised FaceObject JSON file into a dictionary."""
94
+ with open(file=file_path, mode="r") as fp:
95
+ data = json.load(fp=fp)
96
+ return data
@@ -0,0 +1,165 @@
1
+ import json
2
+ from typing import Any, Dict, Optional, Tuple
3
+
4
+ from typing_extensions import override
5
+
6
+ from cortex_types.interfaces import BodyContourInterface, PoseInterface
7
+ from cortex_types.pose_object import PoseObject as BasePoseObject
8
+ from cortex_types.pose_object import _PoseObjectProvider
9
+
10
+ from cortex_loom.types.data_object import DataObject, DataObjectProvider
11
+ from cortex_loom.types.utils import _register_provider
12
+
13
+
14
+ class BodyContour(BodyContourInterface):
15
+ """A body contour interface holding its points and a threshold value used to
16
+ find it.
17
+
18
+ Attributes
19
+ ----------
20
+ points : np.ndarray
21
+ Array of 2D coordinates of points comprising the contours. Shaped as
22
+ (N, 2), where N is the number of contour points.
23
+
24
+ threshold : float
25
+ Threshold value used to convert a predicted body segmentation mask into
26
+ a binary one based on which the contour has been computed.
27
+ """
28
+
29
+
30
+ class PoseObject(BasePoseObject, DataObject, PoseInterface):
31
+ r"""
32
+ PoseObject type represents human poses estimated based on a source
33
+ video/image.
34
+
35
+ Attributes
36
+ ----------
37
+ bbox_array : np.ndarray
38
+ Array of shape (T, len(bbox_features)) containing the bounding box for
39
+ the detected person over T timesteps.
40
+ pose_array : np.ndarray
41
+ Array of shape (T, K, len(pose_features)) containing the pose keypoints
42
+ for K keypoints over T timesteps.
43
+ Supports convenient string-based indexing using keypoint names from
44
+ `pose_mapping`:
45
+
46
+ - ``pose_array["nose"]`` → shape (T, C)
47
+ - ``pose_array[["left_wrist", "right_wrist"]]`` → shape (T, 2, C)
48
+ timestamps : np.ndarray
49
+ Array of shape (T,) containing a timestamp [milliseconds] for each index
50
+ of the pose.
51
+ valid_times: np.ndarray
52
+ Boolean array of shape (T,) indicating which timestamps correspond to
53
+ valid poses.
54
+ pose_mapping : Dict[str, int]
55
+ Dictionary mapping keypoint names to their indices in the pose array's K
56
+ dimension, e.g., ``{"left_eye": 0, "right_eye": 1, "nose": 15, ...}``.
57
+ bbox_features : List[str]
58
+ List containing the names of the features in the bounding box array,
59
+ e.g., ``["origin_x", "origin_y", "height", "width"]``.
60
+ pose_features : List[str]
61
+ List containing the names of the features in the pose array,
62
+ e.g., ``["x", "y", "confidence"]`` or ``["x", "y", "z", "visibility"]``.
63
+ body_contours : Optional[List[List[BodyContour]]]
64
+ Optional list of length T containing lists of body contour objects.
65
+ Each contour holds an array of 2D points and a threshold value used to
66
+ convert the float body segmentation mask into a boolean one.
67
+ source_width: int
68
+ Width of the source video/image based on which the pose has been
69
+ estimated.
70
+ source_height: int
71
+ Height of the source video/image based on which the pose has been
72
+ estimated.
73
+
74
+ Notes
75
+ -----
76
+ - T: Number of time steps (frames).
77
+ - K: Number of keypoints.
78
+ - C: Number of features per keypoint (usually 2-4, e.g. x, y, confidence).
79
+
80
+ Examples
81
+ --------
82
+ Keypoint name-based indexing (most common usage pattern):
83
+
84
+ >>> # Assume pose comes from PoseObjectProvider.get_data(...)
85
+ >>> pose
86
+ PoseObject(T: 240, K: 75, C: 4)
87
+
88
+ >>> # Single keypoint → returns (T, C) array
89
+ >>> nose = pose.pose_array["nose"]
90
+ >>> nose.shape
91
+ (240, 4)
92
+ >>> nose[0] # first frame: [x, y, visibility, confidence]
93
+ array([512.3, 248.7, 0.95, 0.98])
94
+
95
+ >>> # Multiple keypoints → returns (T, N, C) array
96
+ >>> wrists = pose.pose_array[["left_wrist", "right_wrist"]]
97
+ >>> wrists.shape
98
+ (240, 2, 4)
99
+
100
+ >>> # Plot trajectory of both wrists over time
101
+ >>> import matplotlib.pyplot as plt
102
+ >>> plt.plot(wrists[:, 0, 0], wrists[:, 0, 1], label="left wrist") # x,y
103
+ >>> plt.plot(wrists[:, 1, 0], wrists[:, 1, 1], label="right wrist")
104
+ >>> plt.legend()
105
+ >>> plt.show()
106
+
107
+ >>> # More complex selection
108
+ >>> core_points = ["nose", "left_shoulder", "right_shoulder",
109
+ ... "left_hip", "right_hip"]
110
+ >>> torso = pose.pose_array[core_points]
111
+ >>> torso.shape
112
+ (240, 5, 4)
113
+ """
114
+
115
+
116
+ @_register_provider(model_type=PoseObject)
117
+ class PoseObjectProvider(_PoseObjectProvider, DataObjectProvider):
118
+ r"""
119
+ PoseObjectProvider expects a dictionary input representing a PoseObject
120
+ JSON and constructs a PoseObject instance.
121
+ """
122
+
123
+ POSE_OBJECT_TYPE = PoseObject
124
+
125
+ @override
126
+ def get_data(
127
+ self,
128
+ file_path: str,
129
+ roi: Optional[Tuple[int, int]] = None,
130
+ **kwargs,
131
+ ) -> PoseObject:
132
+ r"""Constructs a PoseObject from a local file.
133
+
134
+ Parameters
135
+ ----------
136
+ file_path : str
137
+ Local path to a file representing a PoseObject. Likely downloaded
138
+ from Kelvin Cortex.
139
+ roi : Optional[Tuple[int, int]], optional
140
+ An optional region of interest (ROI) to which the PoseObject ought
141
+ to be limited, specified as a tuple of integers (milliseconds).
142
+ The temporal dimension of the PoseObject will be limited to the
143
+ given time interval. If None, no trim is applied.
144
+
145
+ Returns
146
+ -------
147
+ PoseObject
148
+ An object holding an estimated human pose key-points and bounding
149
+ boxes.
150
+
151
+ Examples
152
+ --------
153
+ >>> provider = PoseObjectProvider()
154
+ >>> pose = provider.get_data(file_path="./pose_object.json", roi=None)
155
+ >>> pose
156
+ PoseObject(T: 100, K: 75, C: 4)
157
+ """
158
+ return super().get_data(file_path=file_path, roi=roi, **kwargs)
159
+
160
+ @staticmethod
161
+ def _read_file(file_path: str) -> Dict[str, Any]:
162
+ """Reads the serialised PoseObject JSON file into a dictionary."""
163
+ with open(file=file_path, mode="r") as fp:
164
+ data = json.load(fp=fp)
165
+ return data
@@ -0,0 +1,65 @@
1
+ import inspect
2
+ from typing import Any, Callable, Dict, Type, Union
3
+
4
+ from cortex_loom.types.data_object import DataObject, DataObjectProvider
5
+
6
+ PROVIDER_REGISTRY: Dict[str, Type[DataObjectProvider]] = dict()
7
+
8
+
9
+ def init_defaults(cls_or_init: Union[Type, Callable]) -> Dict[str, Any]:
10
+ """
11
+ Extract default values of the parameters from a class __init__ method or
12
+ an __init__ function.
13
+
14
+ Parameters
15
+ ----------
16
+ cls_or_init : class or callable
17
+ The class whose __init__ method or an __init__ function to inspect.
18
+
19
+ Returns
20
+ -------
21
+ Dict[str, Any]
22
+ Dictionary mapping parameter names to their default values. Only
23
+ parameters with default values are included; 'self', *args, and **kwargs
24
+ are excluded.
25
+
26
+ Examples
27
+ --------
28
+ >>> class MyClass:
29
+ ... def __init__(self, a, b=2, c=3, *args, **kwargs):
30
+ ... pass
31
+ >>> init_defaults(MyClass)
32
+ {'b': 2, 'c': 3}
33
+ """
34
+ # Accept either a class or an __init__ method
35
+ init = (
36
+ cls_or_init.__init__ if inspect.isclass(cls_or_init) else cls_or_init
37
+ )
38
+
39
+ defaults: Dict[str, Any] = {}
40
+ for param in inspect.signature(init).parameters.values():
41
+ if param.name == "self" or param.kind in (
42
+ inspect.Parameter.VAR_POSITIONAL,
43
+ inspect.Parameter.VAR_KEYWORD,
44
+ ):
45
+ continue
46
+
47
+ if param.default is not inspect.Parameter.empty:
48
+ defaults[param.name] = param.default
49
+
50
+ return defaults
51
+
52
+
53
+ def get_type_path(tp: type) -> str:
54
+ """Return the module path of the object."""
55
+ return f"{tp.__module__}.{tp.__qualname__}"
56
+
57
+
58
+ def _register_provider(model_type: Type[DataObject]):
59
+ """Decorator used to register a Provider type for a DataObject"""
60
+
61
+ def wrapper(provider_cls: Type[DataObjectProvider]):
62
+ PROVIDER_REGISTRY[model_type.__qualname__] = provider_cls
63
+ return provider_cls
64
+
65
+ return wrapper
@@ -0,0 +1,214 @@
1
+ from typing import Optional, Tuple
2
+
3
+ import av
4
+ from av.container import InputContainer
5
+ from typing_extensions import override
6
+
7
+ from cortex_types.interfaces import VideoFrameInterface, VideoInterface
8
+ from cortex_types.video_object import VideoFrame as BaseVideoFrame
9
+ from cortex_types.video_object import VideoObject as BaseVideoObject
10
+ from cortex_types.video_object import _VideoObjectProvider
11
+
12
+ from cortex_loom.types.data_object import DataObject, DataObjectProvider
13
+ from cortex_loom.types.utils import _register_provider
14
+
15
+
16
+ class VideoFrame(BaseVideoFrame, VideoFrameInterface):
17
+ """Single video frame container holding the frame as a NumPy array, as well
18
+ as its presentiation and decoding timestamp.
19
+
20
+ Attributes
21
+ ----------
22
+ array : np.ndarray
23
+ NumPy array representation of the video frame.
24
+ pts : int
25
+ Presentation timestamp of the frame.
26
+ dts : int
27
+ Decoding timestamp of the frame.
28
+ """
29
+
30
+
31
+ class VideoObject(BaseVideoObject, DataObject, VideoInterface):
32
+ r"""VideoObject type provides a simple interface for video frames
33
+ and attributes such as framerate or frame dimensions.
34
+
35
+ Attributes
36
+ ----------
37
+ frames : Generator[VideoFrame, None, None]
38
+ A generator yielding VideoFrame objects containing a frame NumPy array
39
+ and its presentation timestamp.
40
+ framerate : float
41
+ The effective framerate of the source video stream.
42
+ pixel_format : str
43
+ Format of the video frame pixels, e.g., "rgb24", "bgr24".
44
+ height : int
45
+ Height of the video frames yielded by the `frames` generator.
46
+ width : int
47
+ Width of the video frames yielded by the `frames` generator.
48
+ time_base : Fraction
49
+ The time base fraction indicating the duration represented by each
50
+ unit increment in `VideoFrame.pts`.
51
+ codec_name : str
52
+ Name of the codec of the source video stream, e.g. "h264", "hevc".
53
+ bit_rate : int
54
+ Bit rate of the source video stream.
55
+ duration : float
56
+ Duration (in seconds) of the VideoObject, roughly equal to the range
57
+ of the presentation timestamps of frames yielded by `frames`.
58
+ """
59
+
60
+
61
+ @_register_provider(model_type=VideoObject)
62
+ class VideoObjectProvider(_VideoObjectProvider, DataObjectProvider):
63
+ r"""
64
+ VideoObjectProvider expects a video input and constructs a
65
+ `VideoObject` instance.
66
+
67
+ This provider loads video files (via PyAV) and allows optional on-the-fly
68
+ transformations: pixel format conversion, resolution resizing, and frame
69
+ rotation. It is typically used to obtain frame iterators in a consistent
70
+ format suitable for downstream computer vision processing.
71
+
72
+ Methods
73
+ -------
74
+ get_data(file_path, roi=None, **kwargs)
75
+ Load a video file and return a `VideoObject` with the configured
76
+ transformations applied.
77
+
78
+ """
79
+
80
+ VIDEO_OBJECT_TYPE = VideoObject
81
+
82
+ def __init__(
83
+ self,
84
+ stream_index: int = 0,
85
+ pixel_format: Optional[str] = "rgb24",
86
+ rotation: Optional[int] = None,
87
+ height: Optional[int] = None,
88
+ width: Optional[int] = None,
89
+ ):
90
+ """
91
+ Parameters
92
+ ----------
93
+ stream_index : int, default=0
94
+ Index of the video stream to extract from the container (usually 0).
95
+ pixel_format : str or None, default="rgb24"
96
+ Target pixel format for the decoded frames(e.g. ``"rgb24"``,
97
+ ``"bgr24"``). If ``None``, preserves the source format.
98
+ rotation : int or None, default=None
99
+ Clockwise rotation to apply to each frame. Supported values: ``0``,
100
+ ``90``, ``180``, ``270``. If ``None``, no additional rotation is applied.
101
+ height : int or None, default=None
102
+ Target height for resizing frames. If ``None``, keeps original height.
103
+ width : int or None, default=None
104
+ Target width for resizing frames. If ``None``, keeps original width.
105
+
106
+ Notes
107
+ -----
108
+ - Resizing and rotation are applied in the order: decode → resize → format
109
+ → rotate conversion;
110
+ - This class is usually used through the `get_data` method.
111
+
112
+ Examples
113
+ --------
114
+ Basic usage with default settings (RGB24, no resize/rotation):
115
+
116
+ >>> provider = VideoObjectProvider()
117
+ >>> video = provider.get_data("./example.mp4")
118
+ >>> video
119
+ VideoObject(height=1080, width=1920, framerate=29.97, pixel_format='rgb24')
120
+
121
+ >>> for frame in video.frames:
122
+ ... # frame.array is a numpy array with shape (H, W, 3)
123
+ ... process_frame(frame.array)
124
+
125
+ With custom parameters (downscale + rotation + specific ROI):
126
+
127
+ >>> provider = VideoObjectProvider(
128
+ ... pixel_format="rgb24",
129
+ ... rotation=90,
130
+ ... height=720,
131
+ ... width=1280
132
+ ... )
133
+ >>> video = provider.get_data(
134
+ ... file_path="./example.mp4",
135
+ ... roi=(30000, 90000) # 30s to 90s
136
+ ... )
137
+ >>> video
138
+ VideoObject(height=720, width=1280, framerate=30.0, pixel_format='rgb24')
139
+ """
140
+ super().__init__(
141
+ stream_index=stream_index,
142
+ pixel_format=pixel_format,
143
+ rotation=rotation,
144
+ height=height,
145
+ width=width,
146
+ )
147
+
148
+ @override
149
+ def get_data(
150
+ self,
151
+ file_path: str,
152
+ roi: Optional[Tuple[int, int]] = None,
153
+ **kwargs,
154
+ ) -> VideoObject:
155
+ r"""Constructs a `VideoObject` from a local video file.
156
+
157
+ Parameters
158
+ ----------
159
+ file_path : str
160
+ Path to a video file (mp4, avi, mov, etc.) that PyAV can decode.
161
+ roi : tuple of int or None, optional
162
+ Time interval (start_ms, end_ms) to trim the video to.
163
+ If ``None``, the full video is used.
164
+ **kwargs
165
+ Additional arguments passed to the underlying implementation
166
+ (rarely used directly).
167
+
168
+ Returns
169
+ -------
170
+ VideoObject
171
+ Object that provides video metadata and an iterator over frames.
172
+
173
+ Examples
174
+ --------
175
+ Default provider, full video:
176
+
177
+ >>> provider = VideoObjectProvider()
178
+ >>> video = provider.get_data("dance.mp4")
179
+ >>> video.framerate
180
+ 59.94
181
+
182
+ Trimmed interval (first 10 seconds):
183
+
184
+ >>> video_short = provider.get_data("dance.mp4", roi=(0, 10000))
185
+ >>> video_short.duration
186
+ 10.0
187
+
188
+ Custom provider with preprocessing:
189
+
190
+ >>> provider = VideoObjectProvider(pixel_format="rgb24", height=512, width=512)
191
+ >>> video = provider.get_data("input.mp4")
192
+ >>> video.pixel_format
193
+ 'rgb24'
194
+ >>> video.height, video.width
195
+ (512, 512)
196
+ """
197
+ return super().get_data(file_path=file_path, roi=roi, **kwargs)
198
+
199
+ @staticmethod
200
+ def _read_file(file_path: str) -> InputContainer:
201
+ """Reads the video file and returns a PyAV container.
202
+
203
+ Parameters
204
+ ----------
205
+ file_path : str
206
+ Path to the video file.
207
+
208
+ Returns
209
+ -------
210
+ av.container.InputContainer
211
+ PyAV container for reading packets/frames.
212
+ """
213
+ data = av.open(file=file_path)
214
+ return data
@@ -0,0 +1,60 @@
1
+ [project]
2
+ name = "cortex-loom"
3
+ version = "0.3.0rc1"
4
+ description = "Package for developing and publishing custom Kelvin Cortex Flows."
5
+ authors = [
6
+ { name = "MMT Analytics", email = "support@machinemedicine.com" },
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.11, <3.13"
10
+
11
+ [tool.poetry]
12
+ packages = [{include = "cortex_loom"}]
13
+
14
+ [tool.poetry.dependencies]
15
+ requests = "2.32.4"
16
+ cloudpickle = "3.1.1"
17
+ cortex-types = "0.6.4"
18
+
19
+ # Optional ML dependencies
20
+ pandas = { version = "^2.1", optional = true }
21
+ scipy = { version = "^1.12", optional = true }
22
+ scikit-learn = { version = "^1.4", optional = true }
23
+ xgboost = { version = "^2.0", optional = true }
24
+ lightgbm = { version = "^4.1", optional = true }
25
+ matplotlib = { version = "^3.8", optional = true }
26
+ seaborn = { version = "^0.13", optional = true }
27
+ jupyter = { version = "^1.0", optional = true }
28
+
29
+ [tool.poetry.extras]
30
+ full = [
31
+ "pandas",
32
+ "scipy",
33
+ "scikit-learn",
34
+ "xgboost",
35
+ "lightgbm",
36
+ "matplotlib",
37
+ "seaborn",
38
+ "jupyter",
39
+ ]
40
+
41
+ [dependency-groups]
42
+ docs = [
43
+ "mkdocs (>=1.6.1,<2.0.0)",
44
+ "mkdocstrings[python] (>=1.0.2,<2.0.0)",
45
+ "mkdocs-material (>=9.7.1,<10.0.0)",
46
+ "mkdocs-gen-files (>=0.6.0,<0.7.0)",
47
+ "mkdocs-section-index (>=0.3.10,<0.4.0)",
48
+ "mkdocs-literate-nav (>=0.6.2,<0.7.0)"
49
+ ]
50
+
51
+ [tool.isort]
52
+ profile = "black"
53
+ line_length = 79
54
+ known_first_party = ["cortex_types"]
55
+ known_local_folder = ["cortex_loom"]
56
+ sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
57
+
58
+ [build-system]
59
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
60
+ build-backend = "poetry.core.masonry.api"