cineon-format 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ from .cineon_data import CineonData
2
+ from .random_data import (
3
+ create_random_data,
4
+ create_random_dataframe,
5
+ create_random_dictionary,
6
+ )
7
+ from .rolling_data import get_window, get_windowed_cineon_data
8
+ from .version import __version__
9
+
10
+ __all__ = [
11
+ "CineonData",
12
+ "create_random_dictionary",
13
+ "create_random_dataframe",
14
+ "create_random_data",
15
+ "get_windowed_cineon_data",
16
+ "get_window",
17
+ "__version__",
18
+ ]
@@ -0,0 +1,313 @@
1
+ import json
2
+ from datetime import datetime, timedelta
3
+ from typing import Any, Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from pandas.testing import assert_frame_equal
8
+ from pydantic import BaseModel, field_validator, model_validator
9
+
10
+ from .utils import flatten_dictionary, json_serialiser, unflatten_dictionary
11
+
12
+ # Constants
13
+ MAX_PUPIL_DIAMETER_MM = 13.0
14
+ ATOL_UNIT_VECTOR = 1e-5
15
+
16
+
17
+ class CartesianVector(BaseModel, extra="forbid"):
18
+ """Data model for a 3D Cartesian vector."""
19
+
20
+ x: list[float]
21
+ y: list[float]
22
+ z: list[float]
23
+
24
+
25
+ def get_squared_magnitudes(v: CartesianVector) -> np.ndarray:
26
+ x = np.array(v.x)
27
+ y = np.array(v.y)
28
+ z = np.array(v.z)
29
+ return x**2 + y**2 + z**2
30
+
31
+
32
+ class Eye(BaseModel, extra="forbid"):
33
+ """Data model for a cyclops eye gaze data in the head coordinate system."""
34
+
35
+ gaze_direction: CartesianVector
36
+ gaze_depth: Optional[list[float]] = None
37
+ gaze_object: Optional[list[str | None]] = None
38
+ pupil_diameter: Optional[list[float | None]] = None
39
+ openness: Optional[list[float]] = None
40
+
41
+ # Ensure direction vectors are normalized
42
+ @field_validator("gaze_direction")
43
+ @classmethod
44
+ def validate_unit_vector(cls, vector: CartesianVector):
45
+ squared_magnitude = get_squared_magnitudes(vector)
46
+ if not np.allclose(squared_magnitude, 1.0, atol=ATOL_UNIT_VECTOR):
47
+ idxs = np.where(~np.isclose(squared_magnitude, 1.0, atol=ATOL_UNIT_VECTOR))[
48
+ 0
49
+ ]
50
+ bad = squared_magnitude[idxs]
51
+ raise ValueError(
52
+ f"Gaze direction at indices {idxs.tolist()} are not normalized: {bad.tolist()!r}"
53
+ )
54
+ return vector
55
+
56
+ @field_validator("gaze_direction")
57
+ @classmethod
58
+ def validate_local_coordinates(cls, vector: CartesianVector):
59
+ for idx, value in enumerate(vector.z):
60
+ if value < 0.0:
61
+ raise ValueError(
62
+ f"Gaze direction z component at index {idx} has invalid value {value!r}: must be non-negative in local coordinates"
63
+ )
64
+ return vector
65
+
66
+ @field_validator("gaze_depth")
67
+ @classmethod
68
+ def validate_gaze_depth(cls, list_of_values):
69
+ if list_of_values is None:
70
+ return list_of_values
71
+ else:
72
+ for idx, value in enumerate(list_of_values):
73
+ if value is not None and value < 0.0:
74
+ raise ValueError(
75
+ f"Gaze depth at index {idx} has invalid value {value!r}: must be non-negative"
76
+ )
77
+ return list_of_values
78
+
79
+ @field_validator("openness")
80
+ @classmethod
81
+ def validate_eye_openness(cls, list_of_values):
82
+ if list_of_values is None:
83
+ return list_of_values
84
+ else:
85
+ for idx, value in enumerate(list_of_values):
86
+ if (value is not None) and not (0.0 <= value <= 1.0):
87
+ raise ValueError(
88
+ f"Eye openness at index {idx} is normalised and must be between 0 and 1"
89
+ )
90
+ return list_of_values
91
+
92
+ @field_validator("pupil_diameter")
93
+ @classmethod
94
+ def validate_pupil_diameter(cls, list_of_values):
95
+ if list_of_values is None:
96
+ return list_of_values
97
+ else:
98
+ for idx, value in enumerate(list_of_values):
99
+ if value is not None and not (0.0 <= value <= MAX_PUPIL_DIAMETER_MM):
100
+ raise ValueError(
101
+ f"Pupil_diameter at index {idx} has invalid value {value!r}: must be non-negative and less than {MAX_PUPIL_DIAMETER_MM}mm"
102
+ )
103
+ return list_of_values
104
+
105
+
106
+ class Head(BaseModel, extra="forbid"):
107
+ """Data model for head position data in the world coordinate system."""
108
+
109
+ direction: Optional[CartesianVector] = None
110
+ position: Optional[CartesianVector] = None
111
+ acceleration: Optional[CartesianVector] = None
112
+
113
+ # Ensure direction vectors are normalized
114
+ @field_validator("direction")
115
+ @classmethod
116
+ def validate_unit_vector(cls, vector):
117
+ if vector is None:
118
+ return vector
119
+ else:
120
+ squared_magnitudes = get_squared_magnitudes(vector)
121
+ if not np.allclose(squared_magnitudes, 1.0, atol=ATOL_UNIT_VECTOR):
122
+ idxs = np.where(
123
+ ~np.isclose(squared_magnitudes, 1.0, atol=ATOL_UNIT_VECTOR)
124
+ )[0]
125
+ bad = squared_magnitudes[idxs]
126
+ raise ValueError(
127
+ f"Directions at indices {idxs.tolist()} are not normalized: magnitudes are {bad.tolist()!r}"
128
+ )
129
+ return vector
130
+
131
+
132
+ class CineonData(BaseModel, extra="forbid"):
133
+ """
134
+ Data model for Cineon eye-tracking data.
135
+
136
+ The coordinate system in which the data is represented is left-handed. The y axis points upwards (gravity accelerates in the negative y direction).
137
+
138
+ In the case of data in the Eye object, the coordinate system is non-inertial and local to the head. The z axis points "forwards", and the x axis points to the "right". The y axis always points upwards. In this local coordinate system, gaze direction vectors should have a non-negative z component (you cannot look backwards through your own head).
139
+
140
+ In the case of the data in the Head object, the coordinate system is inertial and world-based. The y axis always points upwards. The orientation of the other axes is arbitrary but should be consistent within a dataset. Often (e.g., in virtual reality) the z axis points "forwards" from the initial location of the head, and the x axis points to the "right" from the initial location of the head. Otherwise z might point North and x East (e.g., if the data-collection device has a magnetometer). In any case, x and z must be orthogonal and left-handed with respect to y.
141
+
142
+ Some columns are optional and may be omitted if not available. A `None` value for an entire column indicates data the was not available to be collected (e.g., some hardware does not have the ability to detect eye openness). Missing data within a column is indicated by the value `None` (e.g., the pupil diameter could not be measured while the eye was closed).
143
+
144
+ Attributes
145
+ ----------
146
+ timestamp : list[datetime]
147
+ List of timestamps for each sample.
148
+ eye : Eye
149
+ Eye gaze data.
150
+ head : Head
151
+ Head position data.
152
+ participant_id : Optional[list[int]]
153
+ List of participant IDs.
154
+ event : Optional[list[str | None]]
155
+ List of event labels.
156
+ stress_report : Optional[list[float]]
157
+ List of stress report values.
158
+ stress_certainty_report : Optional[list[float]]
159
+ List of stress certainty report values.
160
+ workload_report : Optional[list[float]]
161
+ List of workload report values.
162
+ workload_certainty_report : Optional[list[float]]
163
+ List of workload certainty report values.
164
+ fatigue_report : Optional[list[float]]
165
+ List of fatigue report values.
166
+ fatigue_certainty_report : Optional[list[float]]
167
+ List of fatigue certainty report values.
168
+ """
169
+
170
+ timestamp: list[datetime]
171
+ eye: Eye
172
+ head: Optional[Head] = None
173
+ participant_id: Optional[list[int]] = None
174
+ event: Optional[list[str | None]] = None
175
+ shard_id: Optional[list[int]] = None
176
+ stress_report: Optional[list[float]] = None
177
+ stress_certainty_report: Optional[list[float]] = None
178
+ workload_report: Optional[list[float]] = None
179
+ workload_certainty_report: Optional[list[float]] = None
180
+ fatigue_report: Optional[list[float]] = None
181
+ fatigue_certainty_report: Optional[list[float]] = None
182
+
183
+ # Check that all lists have the same length
184
+ @model_validator(mode="after")
185
+ def ensure_equal_lengths(self) -> "CineonData":
186
+ flat_dict = flatten_dictionary(self.to_dict())
187
+ list_lengths = { # Pick out only those attributes that are lists (not None)
188
+ name: len(value)
189
+ for name, value in flat_dict.items()
190
+ if isinstance(value, list)
191
+ }
192
+ lengths_set = set(
193
+ list_lengths.values()
194
+ ) # A set that should have only one element
195
+ if len(lengths_set) > 1: # Show which fields disagree to help debugging
196
+ raise ValueError(f"List fields must all have same length: {list_lengths}")
197
+ return self
198
+
199
+ # Check that timestamps are in ascending order
200
+ @field_validator("timestamp")
201
+ def validate_timestamps(cls, list_of_timestamps):
202
+ """Validate that timestamps are in strictly ascending order."""
203
+ for i in range(1, len(list_of_timestamps)):
204
+ if list_of_timestamps[i] <= list_of_timestamps[i - 1]: # Early exit
205
+ raise ValueError("Timestamps must be in strictly ascending order")
206
+ return list_of_timestamps
207
+
208
+ @classmethod
209
+ def from_dict(cls, data_dict: dict[str, Any]):
210
+ return cls.model_validate(data_dict)
211
+
212
+ @classmethod
213
+ def from_json(cls, filepath):
214
+ with open(filepath, "r") as f:
215
+ data_dict = json.load(f)
216
+ return cls.from_dict(data_dict)
217
+
218
+ @classmethod
219
+ def from_dataframe(cls, df: pd.DataFrame):
220
+ # Convert NaN to None for Pydantic compatibility
221
+ df = df.astype(object).where(pd.notnull(df), None)
222
+ flat_dict = df.to_dict(orient="list")
223
+ data_dict = unflatten_dictionary(flat_dict)
224
+ return cls.model_validate(data_dict)
225
+
226
+ @classmethod
227
+ def from_csv(cls, filepath):
228
+ df = pd.read_csv(
229
+ filepath,
230
+ dtype={ # Ensure correct types for optional string columns
231
+ "event": "string",
232
+ "eye.gaze_object": "string",
233
+ },
234
+ keep_default_na=True,
235
+ na_values=[""],
236
+ parse_dates=["timestamp"],
237
+ date_format="%Y-%m-%d %H:%M:%S.%f",
238
+ )
239
+ return cls.from_dataframe(df)
240
+
241
+ def to_dict(self) -> dict[str, Any]:
242
+ # "exclude_none" removes any keys with value None from the nested structure
243
+ return self.model_dump(exclude_none=True)
244
+
245
+ def to_json(self, filepath: str) -> None:
246
+ with open(filepath, "w") as f:
247
+ json.dump(self.to_dict(), f, indent=2, default=json_serialiser)
248
+
249
+ def to_dataframe(self) -> pd.DataFrame:
250
+ """Convert the CineonData to a Pandas DataFrame."""
251
+ flat_dict = flatten_dictionary(self.to_dict())
252
+ return pd.DataFrame(flat_dict)
253
+
254
+ def to_csv(self, filepath: str) -> None:
255
+ df = self.to_dataframe()
256
+ df.to_csv(filepath, index=False)
257
+
258
+ def get_times(self) -> list[float]:
259
+ """
260
+ Get timestamps as a list of float seconds since the first timestamp.
261
+ The first entry in this list is always 0.0.
262
+ The last entry is the total duration in seconds.
263
+ """
264
+ times = [(t - self.timestamp[0]).total_seconds() for t in self.timestamp]
265
+ return times
266
+
267
+ def duration(self) -> timedelta:
268
+ return self.timestamp[-1] - self.timestamp[0]
269
+
270
+ def sampling(self) -> tuple[float, float]:
271
+ """
272
+ Get the mean and standard deviation of the sampling times in seconds.
273
+ """
274
+ t = self.get_times()
275
+ dt = np.diff(t)
276
+ return float(np.mean(dt)), float(np.std(dt))
277
+
278
+ def frequency(self) -> tuple[float, float]:
279
+ """
280
+ Get the mean and standard deviation of the sampling frequencies in Hz.
281
+ """
282
+ t = self.get_times()
283
+ dt = np.diff(t)
284
+ freqs = 1 / dt
285
+ return float(np.mean(freqs)), float(np.std(freqs))
286
+
287
+ def __len__(self) -> int:
288
+ return len(self.timestamp)
289
+
290
+ def __str__(self) -> str:
291
+ t_mean, t_std = self.sampling()
292
+ f_mean, f_std = self.frequency()
293
+ return (
294
+ f"CineonData object with\n"
295
+ f"Start time: {self.timestamp[0].isoformat()}\n"
296
+ f"End time: {self.timestamp[-1].isoformat()}\n"
297
+ f"Duration: {self.duration()}\n"
298
+ f"Data sampling: {t_mean:.4f}±{t_std:.4f}s\n"
299
+ f"Data frequency: {f_mean:.2f}±{f_std:.2f}Hz\n"
300
+ f"Number of samples: {len(self)}"
301
+ )
302
+
303
+ def __eq__(self, other: object) -> bool:
304
+ if not isinstance(other, CineonData):
305
+ return False
306
+ else:
307
+ df1 = self.to_dataframe()
308
+ df2 = other.to_dataframe()
309
+ try:
310
+ assert_frame_equal(df1, df2, check_dtype=False)
311
+ except AssertionError:
312
+ return False
313
+ return True
@@ -0,0 +1,146 @@
1
+ import random
2
+ from datetime import datetime, timedelta
3
+ from typing import Any, Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from typeguard import typechecked
8
+
9
+ from .cineon_data import CineonData
10
+ from .utils import flatten_dictionary
11
+
12
+ DURATION_EPS = 1e-9 # Small epsilon for topping up durations (seconds)
13
+
14
+
15
+ @typechecked
16
+ def _create_random_unit_vectors(n: int, forward=False) -> np.ndarray:
17
+ if forward:
18
+ azimin, azimax = -np.pi / 2.0, np.pi / 2.0
19
+ else:
20
+ azimin, azimax = -np.pi, np.pi
21
+ azimuth = np.random.uniform(azimin, azimax, n)
22
+ sin_elevation = np.random.uniform(-1.0, 1.0, n)
23
+ cos_elevation = np.sqrt(1.0 - sin_elevation**2)
24
+ unit_vectors = np.zeros((n, 3))
25
+ unit_vectors[:, 0] = cos_elevation * np.sin(azimuth)
26
+ unit_vectors[:, 1] = sin_elevation
27
+ unit_vectors[:, 2] = cos_elevation * np.cos(azimuth)
28
+ return unit_vectors
29
+
30
+
31
+ @typechecked
32
+ def create_random_dictionary(
33
+ duration: float,
34
+ frequency: float = 60.0,
35
+ include_gaze_depth: bool = False,
36
+ include_pupil: bool = False,
37
+ include_eye_openness: bool = False,
38
+ include_target_objects: bool = False,
39
+ include_head_direction: bool = False,
40
+ include_head_position: bool = False,
41
+ include_head_acceleration: bool = False,
42
+ include_events: bool = False,
43
+ include_shards: bool = False,
44
+ include_reports: bool = False,
45
+ seed: Optional[int] = None,
46
+ ) -> dict[str, Any]:
47
+ # Seed the random number generators for reproducibility
48
+ if seed is not None:
49
+ np.random.seed(seed)
50
+ random.seed(seed)
51
+
52
+ # Create timestamps based on duration and frequency
53
+ n = int(np.floor((duration + DURATION_EPS) * frequency)) + 1
54
+ t = np.arange(n, dtype=float) / frequency
55
+ t = t[t <= duration + DURATION_EPS]
56
+ n = len(t)
57
+ timestamp_start = datetime.now()
58
+ timestamp = [(timestamp_start + timedelta(seconds=float(ts))) for ts in t]
59
+
60
+ # Eye data
61
+ gaze_direction = _create_random_unit_vectors(n, forward=True)
62
+ eye: dict[str, Any] = {
63
+ "gaze_direction": {
64
+ "x": gaze_direction[:, 0].tolist(),
65
+ "y": gaze_direction[:, 1].tolist(),
66
+ "z": gaze_direction[:, 2].tolist(),
67
+ }
68
+ }
69
+ if include_gaze_depth:
70
+ eye["gaze_depth"] = np.random.uniform(0, 10.0, n).tolist()
71
+ if include_target_objects:
72
+ target_objects = random.choices(["A", "B", "C", None], k=n)
73
+ eye["gaze_object"] = target_objects
74
+ if include_pupil:
75
+ eye["pupil_diameter"] = np.random.uniform(0.0, 13.0, n).tolist()
76
+ if include_eye_openness:
77
+ eye["openness"] = np.random.uniform(0.0, 1.0, n).tolist()
78
+
79
+ # Head data
80
+ head: dict[str, Any] = {}
81
+ if include_head_direction:
82
+ head_direction = _create_random_unit_vectors(n)
83
+ head["direction"] = {
84
+ "x": head_direction[:, 0].tolist(),
85
+ "y": head_direction[:, 1].tolist(),
86
+ "z": head_direction[:, 2].tolist(),
87
+ }
88
+ if include_head_position:
89
+ head["position"] = {
90
+ "x": np.random.normal(0.0, 1.0, n).tolist(),
91
+ "y": np.random.normal(0.0, 1.0, n).tolist(),
92
+ "z": np.random.normal(0.0, 1.0, n).tolist(),
93
+ }
94
+ if include_head_acceleration:
95
+ head["acceleration"] = {
96
+ "x": np.random.normal(0.0, 1.0, n).tolist(),
97
+ "y": np.random.normal(0.0, 1.0, n).tolist(),
98
+ "z": np.random.normal(0.0, 1.0, n).tolist(),
99
+ }
100
+
101
+ # Create full object
102
+ data: dict[str, Any] = {
103
+ "timestamp": timestamp,
104
+ "eye": eye,
105
+ }
106
+ if len(head) > 0:
107
+ data["head"] = head
108
+ if include_events:
109
+ data["event"] = random.choices(["A", "B", "C", None], k=n)
110
+ if include_shards:
111
+ data["shard_id"] = np.random.randint(0, 10, size=n).tolist()
112
+ if include_reports:
113
+ data["stress_report"] = np.random.randint(0, 11, size=n).tolist()
114
+ data["workload_report"] = np.random.randint(0, 11, size=n).tolist()
115
+ data["fatigue_report"] = np.random.randint(0, 11, size=n).tolist()
116
+
117
+ return data
118
+
119
+
120
+ @typechecked
121
+ def create_random_data(
122
+ duration: float,
123
+ frequency: float = 60.0,
124
+ **kwargs: Any,
125
+ ) -> CineonData:
126
+ data_dict = create_random_dictionary(
127
+ duration=duration,
128
+ frequency=frequency,
129
+ **kwargs,
130
+ )
131
+ return CineonData.from_dict(data_dict)
132
+
133
+
134
+ @typechecked
135
+ def create_random_dataframe(
136
+ duration: float,
137
+ frequency: float = 60.0,
138
+ **kwargs: Any,
139
+ ) -> pd.DataFrame:
140
+ data_dict = create_random_dictionary(
141
+ duration=duration,
142
+ frequency=frequency,
143
+ **kwargs,
144
+ )
145
+ flat_dict = flatten_dictionary(data_dict)
146
+ return pd.DataFrame(flat_dict)
@@ -0,0 +1,102 @@
1
+ from datetime import timedelta
2
+ from typing import Generator
3
+
4
+ import numpy as np
5
+ from typeguard import typechecked
6
+
7
+ from .cineon_data import CineonData
8
+
9
+
10
+ @typechecked
11
+ def get_window(data: CineonData, tmin: timedelta, tmax: timedelta) -> CineonData:
12
+ """
13
+ Trim the input data to a window defined by `tmin` (inclusive) and `tmax` (exclusive) from the start of the data.
14
+ The returned `CineonData` will only include data points whose timestamps fall within this window.
15
+ """
16
+ if tmin < timedelta(0):
17
+ raise ValueError(f"`tmin` = {tmin} and must be non-negative.")
18
+ if tmax <= timedelta(0):
19
+ raise ValueError(f"`tmax` = {tmax} and must be greater than 0.")
20
+ if tmax <= tmin:
21
+ raise ValueError(f"`tmax` = {tmax} must be greater than `tmin` = {tmin}.")
22
+ df = data.to_dataframe()
23
+ if df.empty:
24
+ raise ValueError("The provided CineonData is empty and cannot be windowed.")
25
+ start_timestamp = df["timestamp"].iloc[0]
26
+ window_start = start_timestamp + tmin
27
+ window_end = start_timestamp + tmax
28
+ df = df[(df["timestamp"] >= window_start) & (df["timestamp"] < window_end)]
29
+ return CineonData.from_dataframe(df)
30
+
31
+
32
+ @typechecked
33
+ def get_windowed_cineon_data(
34
+ cineon_data: CineonData, window_size: timedelta, step: timedelta | None = None
35
+ ) -> Generator[CineonData, None, None]:
36
+ """
37
+ Returns a generator of time-based `CineonData` windows.
38
+ Each yielded `CineonData` contains all data points whose timestamps fall
39
+ within a time span of approximately `window_size` duration (from the
40
+ first to the last timestamp in that window); it does not guarantee a fixed
41
+ number of samples per window.
42
+
43
+ If `step` is specified, each window starts `step` after the previous
44
+ window's start time. This function will raise an error if `window_size < step`,
45
+ so all produced windows will overlap at least slightly.
46
+ When `step` is not specified, it defaults to the provided `window_size`,
47
+ which typically results in consecutive, non-overlapping windows."""
48
+ if step is None:
49
+ step = window_size
50
+
51
+ data = cineon_data.to_dict()
52
+ timestamps = np.array(cineon_data.timestamp)
53
+
54
+ if window_size < step:
55
+ raise ValueError(
56
+ f"window_size must be >= step. Currently {window_size} < {step}"
57
+ )
58
+
59
+ if window_size.total_seconds() <= 0:
60
+ raise ValueError(f"`window_size` = {window_size} and must be greater than 0.")
61
+
62
+ if step.total_seconds() <= 0:
63
+ raise ValueError(f"`step` = {step} and must be greater than 0.")
64
+
65
+ if len(timestamps) < 2:
66
+ raise ValueError("At least two data points must be included in the CineonData")
67
+ time_difference = timestamps[-1] - timestamps[0]
68
+
69
+ if time_difference < window_size:
70
+ raise ValueError(
71
+ f"The provided CineonData only has {time_difference}s of data in it, which isn't enough for a window_size of {window_size}"
72
+ )
73
+
74
+ def split_columns(start: int, stop: int) -> CineonData:
75
+ # For hierarchical format, recursively slice nested dicts/lists
76
+ def recursive_slice(obj):
77
+ if isinstance(obj, dict):
78
+ return {k: recursive_slice(v) for k, v in obj.items()}
79
+ elif isinstance(obj, list) or isinstance(obj, np.ndarray):
80
+ return obj[start:stop]
81
+ else:
82
+ return obj # Non-indexable, return as is
83
+
84
+ return CineonData.from_dict(recursive_slice(data))
85
+
86
+ def find_timestamp_index(t: timedelta) -> int | None:
87
+ if t > time_difference:
88
+ return None
89
+ return timestamps.searchsorted(t + timestamps[0])
90
+
91
+ # Calculate num_steps using timedelta arithmetic to avoid floating-precision issues
92
+ max_start_offset = time_difference - window_size
93
+ num_steps = (max_start_offset // step) + 1
94
+ for i in range(num_steps):
95
+ td = i * step
96
+ start = find_timestamp_index(td)
97
+ stop = find_timestamp_index(td + window_size)
98
+
99
+ if start is None or stop is None:
100
+ break
101
+
102
+ yield split_columns(start, stop)
cineon_format/utils.py ADDED
@@ -0,0 +1,46 @@
1
+ from datetime import datetime
2
+ from typing import Any
3
+
4
+ from typeguard import typechecked
5
+
6
+
7
+ @typechecked
8
+ def json_serialiser(obj: Any) -> str:
9
+ if isinstance(obj, (datetime,)):
10
+ return obj.isoformat()
11
+ raise TypeError("Type not serializable")
12
+
13
+
14
+ @typechecked
15
+ def flatten_dictionary(data_dict: dict, separator: str = ".") -> dict[str, Any]:
16
+ """Convert a nested dictionary to a flat dictionary."""
17
+ flat_dict: dict[str, Any] = {}
18
+
19
+ def _flatten(prefix: str, value: Any):
20
+ if isinstance(value, dict):
21
+ for k, v in value.items():
22
+ _flatten(f"{prefix}{separator}{k}", v)
23
+ else:
24
+ flat_dict[prefix] = value
25
+
26
+ for key, val in data_dict.items():
27
+ _flatten(key, val)
28
+
29
+ return flat_dict
30
+
31
+
32
+ @typechecked
33
+ def unflatten_dictionary(flat_dict: dict, separator: str = ".") -> dict[str, Any]:
34
+ """Convert a flat dictionary to a nested dictionary."""
35
+ nested_dict: dict[str, Any] = {}
36
+
37
+ for flat_key, value in flat_dict.items():
38
+ keys = flat_key.split(separator)
39
+ d = nested_dict
40
+ for key in keys[:-1]:
41
+ if key not in d:
42
+ d[key] = {}
43
+ d = d[key]
44
+ d[keys[-1]] = value
45
+
46
+ return nested_dict
@@ -0,0 +1,3 @@
1
+ from importlib import metadata
2
+
3
+ __version__ = metadata.version("cineon-format")
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.3
2
+ Name: cineon-format
3
+ Version: 3.0.0
4
+ Summary: Definitions for Cineon data formats
5
+ Author: Cineon
6
+ Author-email: Cineon <info@cineon.ai>
7
+ Requires-Dist: numpy>=2.3.2
8
+ Requires-Dist: pandas>=2.3.1
9
+ Requires-Dist: pydantic>=2.11.7
10
+ Requires-Dist: typeguard>=4.4.4
11
+ Requires-Python: >=3.11
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Cineon Format
15
+
16
+ ![Latest Release](badges/version.svg) ![Coverage Status](badges/coverage.svg) ![Code complexity](badges/complexity.svg)
17
+
18
+ This package contains a definition of a common data format to be used across Cineon repositories.
19
+
20
+ The `CineonData` format is a class that inherits from `pydantic`'s `BaseModel` class, and as such the data validation is performed on class instantiation. If the data being ingested has a problem then class instantiation will fail. Simple. This is useful because downstream processing tasks can make valid assumptions about the data and therefore do not have to perform any explicit checks. This means that lots of downstream data-validation code can be deleted.
21
+
22
+ ## Prerequisites
23
+
24
+ To use the example script in the package you need to install `uv`, but you do not need `uv` in order to use the package in other Python code.
25
+
26
+ ## Usage
27
+
28
+ Add the `cineon_format` package into your project, with either:
29
+
30
+ ```bash
31
+ poetry add git+https://github.com/cineon-ai/cineon_format.git
32
+ uv add git+https://github.com/cineon-ai/cineon_format.git
33
+ ```
34
+
35
+ Then:
36
+
37
+ ```python
38
+ from cineon_format import CineonData
39
+
40
+ # Load from CSV
41
+ cineon_data = CineonData.from_csv("path/to/csv")
42
+
43
+ # Alternatively, load from JSON
44
+ cineon_data = CineonData.from_json("path/to/json")
45
+ ```
46
+
47
+ ## Examples
48
+
49
+ Running the example script with either:
50
+
51
+ ```bash
52
+ uv run scripts/example.py --csv=data/good.csv
53
+ uv run scripts/example.py --json=data/good.json
54
+ ```
55
+
56
+ will load a file, convert that file into the `CineonData` format and then spit out a dictionary representation and a `DataFrame` representation to the terminal.
57
+
58
+ If you run the script with either:
59
+
60
+ ```bash
61
+ uv run scripts/example.py --csv=data/bad.csv
62
+ uv run scripts/example.py --json=data/bad.json
63
+ ```
64
+
65
+ you can see examples of data that does not conform to the format, and therefore errors out with helpful error messages printed to the terminal.
66
+
67
+ "Random" `CineonData` can be generated via the script:
68
+
69
+ ```bash
70
+ uv run scripts/generate.py
71
+ ```
72
+
73
+ This will print a summary of the `CineonData` to the terminal.
@@ -0,0 +1,9 @@
1
+ cineon_format/__init__.py,sha256=Q1HfKWsBcLkdeTWsNfjLvCU9QwjcMZoGvbWeKzoZUiY,435
2
+ cineon_format/cineon_data.py,sha256=1FUrxzxenh-lnpp_FbKJ8peaqYKqcTCKd4hzUtzax2A,12453
3
+ cineon_format/random_data.py,sha256=GCr6d1-o22tSZa4zICirYPoiuv8WlGzXgwMIX2tePfc,4657
4
+ cineon_format/rolling_data.py,sha256=M-ZT-u8cPlJ4rBfU1YdzW60itAni9LgJhEjTSH1PaYo,4161
5
+ cineon_format/utils.py,sha256=2V14HtBwRQCMNT4hQisC3HdrmHQLY2zCWmdhF-pEnjs,1229
6
+ cineon_format/version.py,sha256=a8OY5s2w5HL3Ht7wBWzhXjJhS66JJ_ZxLDUqwhVQGqI,80
7
+ cineon_format-3.0.0.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
8
+ cineon_format-3.0.0.dist-info/METADATA,sha256=sTqoVTB0qrUPyEBg5rrbI-ASMq8jbQHIPtl25wY6Jtk,2388
9
+ cineon_format-3.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.15
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any