pygidata 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gi_data/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .dataclient import GIDataClient
2
+
3
+ __all__: list[str] = ["GIDataClient"]
gi_data/dataclient.py ADDED
@@ -0,0 +1,380 @@
1
+ # src/src/dataclient.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import inspect
6
+ import logging
7
+ from typing import Any, Dict, List, Optional, Tuple, Union, Type, Iterable
8
+ from uuid import UUID
9
+
10
+ import nest_asyncio
11
+ import pandas as pd
12
+
13
+ from gi_data.drivers.base import BaseDriver
14
+ from gi_data.drivers.cloud_gql import CloudGQLDriver
15
+ from gi_data.drivers.kafka_stream import KafkaStreamDriver
16
+ from gi_data.drivers.local_http import HTTPTimeSeriesDriver
17
+ from gi_data.drivers.ws_stream import WebSocketDriver
18
+ from gi_data.infra.auth import AuthManager
19
+ from gi_data.infra.http import AsyncHTTP
20
+ from gi_data.mapping.enums import Resolution, DataType, DataFormat
21
+ from gi_data.mapping.models import (GIStream, GIStreamVariable,
22
+ GIOnlineVariable, VarSelector,
23
+ CSVSettings, LogSettings,
24
+ CSVImportSettings, GIHistoryMeasurement)
25
+ from gi_data.utils.logging import setup_module_logger
26
+
27
+ logger = setup_module_logger(__name__, level=logging.DEBUG)
28
+ PACKAGE_PREFIX = "gi_data"
29
+ # ------------------------------------------------------------------ #
30
+ # helpers #
31
+ # ------------------------------------------------------------------ #
32
+ asyncio.set_event_loop(asyncio.new_event_loop())
33
+
34
+
35
+ def _to_task(fut, as_task, loop):
36
+ if not as_task or isinstance(fut, asyncio.Task):
37
+ return fut
38
+ return loop.create_task(fut)
39
+
40
+
41
+ def _run(fut, as_task=True):
42
+ try:
43
+ loop = asyncio.get_running_loop()
44
+ except RuntimeError:
45
+ loop = asyncio.new_event_loop()
46
+ asyncio.set_event_loop(loop)
47
+ return loop.run_until_complete(_to_task(fut, as_task, loop))
48
+ else:
49
+ nest_asyncio.apply(loop)
50
+ return loop.run_until_complete(_to_task(fut, as_task, loop))
51
+
52
+
53
+ class GIDataClient:
54
+ """
55
+ High-level synchronous interface for GI Data-API.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ base_url: str,
61
+ *,
62
+ username: Optional[str] = None,
63
+ password: Optional[str] = None,
64
+ access_token: Optional[str] = None,
65
+ driver_cls: Type = HTTPTimeSeriesDriver,
66
+ driver_kwargs: Optional[dict] = None,
67
+ ) -> None:
68
+ self._kafka = None
69
+ self._auth = AuthManager(base_url, username, password, access_token=access_token)
70
+ self._http = AsyncHTTP(base_url, self._auth)
71
+
72
+ driver_kwargs = driver_kwargs or {}
73
+
74
+ # ------------------------------------------------------------------
75
+ # driver factory that only passes supported ctor-arguments
76
+ # ------------------------------------------------------------------
77
+ def _build_driver(domain: str):
78
+ sig = inspect.signature(driver_cls) # ctor signature
79
+ kw: Dict[str, Any] = {"client_id": None, **driver_kwargs}
80
+
81
+ # only add "domain" if the driver accepts it
82
+ if "domain" in sig.parameters:
83
+ kw["domain"] = domain
84
+
85
+ kw = {k: v for k, v in kw.items() if k in sig.parameters}
86
+ return driver_cls(self._auth, self._http, **kw)
87
+
88
+ # domain drivers
89
+ cloud_env = self._auth.is_cloud_environment()
90
+
91
+ buffer_driver = CloudGQLDriver(self._auth, self._http) if cloud_env \
92
+ else HTTPTimeSeriesDriver(self._auth, self._http, None, "buffer")
93
+
94
+ history_driver = CloudGQLDriver(self._auth, self._http) if cloud_env \
95
+ else HTTPTimeSeriesDriver(self._auth, self._http, None, "history")
96
+
97
+ self._drivers: Dict[str, BaseDriver] = {
98
+ "buffer": buffer_driver, # ← cloud => GQL Raw
99
+ "history": history_driver,
100
+ }
101
+
102
+ self._ws_driver: Optional[WebSocketDriver] = None
103
+
104
+ # --------------------------- online ------------------------------ #
105
+ def list_variables(self) -> List[GIOnlineVariable]:
106
+ return _run(self._drivers["buffer"].list_variables())
107
+
108
+ def read_online(self, var_ids: List[UUID]) -> Dict[UUID, float]:
109
+ return _run(self._drivers["buffer"].read(var_ids))
110
+
111
+ def write_online(self, mapping: Dict[UUID, float]) -> None:
112
+ _run(self._drivers["buffer"].write(mapping))
113
+
114
+ # --------------------------- buffer ------------------------------ #
115
+ def list_buffer_sources(self) -> List[GIStream]:
116
+ return _run(self._drivers["buffer"].list_buffer_sources())
117
+
118
+ def list_buffer_variables(self, source_id: Union[UUID, int]) -> List[GIStreamVariable]:
119
+ return _run(self._drivers["buffer"].list_buffer_variables(source_id))
120
+
121
+ def fetch_buffer(
122
+ self,
123
+ selectors: List[VarSelector],
124
+ *,
125
+ start_ms: float = -20_000,
126
+ end_ms: float = 0,
127
+ points: int = 2048,
128
+ ) -> pd.DataFrame:
129
+ return _run(
130
+ self._drivers["buffer"].fetch_buffer(
131
+ selectors, start_ms=start_ms, end_ms=end_ms, points=points
132
+ )
133
+ )
134
+
135
+ # --------------------------- history ----------------------------- #
136
+
137
+ def list_history_sources(self) -> List[GIStream]:
138
+ return _run(self._drivers["history"].list_buffer_sources())
139
+
140
+ def list_history_variables(self, source_id: Union[UUID, int]):
141
+ return _run(self._drivers["history"].list_buffer_variables(source_id))
142
+
143
+ def list_history_measurements(
144
+ self,
145
+ source_id: Union[str, int, UUID],
146
+ *,
147
+ start: Optional[int] = None,
148
+ end: Optional[int] = None,
149
+ order: str = "DESC",
150
+ limit: Optional[int] = None,
151
+ measurements: Optional[Iterable[Union[str, UUID]]] = None,
152
+ add_var_mapping: bool = True,
153
+ add_meas_metadata: bool = False,
154
+ meas_metadata_filter: Optional[List[dict]] = None,
155
+ ) -> List[GIHistoryMeasurement]:
156
+
157
+ result = _run(
158
+ self._drivers["history"].list_measurements(
159
+ source_id,
160
+ start=start,
161
+ end=end,
162
+ order=order,
163
+ limit=limit,
164
+ measurements=measurements,
165
+ add_var_mapping=add_var_mapping,
166
+ add_meas_metadata=add_meas_metadata,
167
+ meas_metadata_filter=meas_metadata_filter,
168
+ )
169
+ )
170
+
171
+ # Attach client to enable selected_meas.vars lazy variable resolution
172
+ return [m.attach_client(self) for m in result]
173
+
174
+ def fetch_history(
175
+ self,
176
+ selectors: List[VarSelector],
177
+ measurement_id: UUID,
178
+ *,
179
+ start_ms: float = 0,
180
+ end_ms: float = 0,
181
+ points: int = 2048,
182
+ ) -> pd.DataFrame:
183
+ return _run(
184
+ self._drivers["history"].fetch_history(
185
+ selectors,
186
+ measurement_id=measurement_id,
187
+ start_ms=start_ms,
188
+ end_ms=end_ms,
189
+ points=points,
190
+ )
191
+ )
192
+
193
+ # -------------------------- websocket ---------------------------- #
194
+ async def stream_online(
195
+ self,
196
+ var_ids: List[UUID],
197
+ *,
198
+ interval_ms: int = 1,
199
+ extended: bool = True,
200
+ on_change: bool = True,
201
+ precision: int = -1,
202
+ ):
203
+ driver = await self._ensure_ws_driver()
204
+ async for tick in driver.stream_online(
205
+ var_ids,
206
+ interval_ms=interval_ms,
207
+ extended=extended,
208
+ on_change=on_change,
209
+ precision=precision,
210
+ ):
211
+ yield tick
212
+
213
+ async def publish_online(
214
+ self,
215
+ data: Dict[UUID, float] | List[Tuple[UUID, float]],
216
+ *,
217
+ function: str = "write",
218
+ ) -> None:
219
+ driver = await self._ensure_ws_driver()
220
+ await driver.publish(data, function=function)
221
+
222
+ async def _ensure_ws_driver(self) -> WebSocketDriver:
223
+ if self._ws_driver is None:
224
+ from gi_data.infra.ws import AsyncWS
225
+ ws = AsyncWS(self._http.base_url, self._auth)
226
+ self._ws_driver = WebSocketDriver(self._auth, ws, self._http)
227
+ return self._ws_driver
228
+
229
+ # ---------------------------- kafka ------------------------------ #
230
+ async def stream_kafka(
231
+ self,
232
+ var_ids: List[UUID],
233
+ *,
234
+ ssl: bool = False,
235
+ group_id: str = "gi_data_client",
236
+ ):
237
+ driver = await self._ensure_kafka_driver()
238
+ logger.debug(f"Kafka driver: {driver}")
239
+ async for update in driver.stream(var_ids, ssl=ssl, group_id=group_id):
240
+ logger.debug("Kafka update: %s", update)
241
+ yield update
242
+
243
+ async def _ensure_kafka_driver(self) -> KafkaStreamDriver:
244
+ if self._kafka is None:
245
+ from gi_data.drivers.kafka_stream import KafkaStreamDriver
246
+ self._kafka = KafkaStreamDriver(self._auth, self._http)
247
+ return self._kafka
248
+
249
+ # --------------------------- export ------------------------------- #
250
+ def export_data(
251
+ self,
252
+ selectors: List[VarSelector],
253
+ *,
254
+ start_ms: float,
255
+ end_ms: float,
256
+ format: DataFormat,
257
+ points: Optional[int] = None,
258
+ timezone: str = "UTC",
259
+ resolution: Optional[Resolution] = None,
260
+ data_type: Optional[DataType] = None,
261
+ aggregation: Optional[str] = None,
262
+ date_format: Optional[str] = None,
263
+ filename: Optional[str] = None,
264
+ precision: int = -1,
265
+ csv_settings: Optional[CSVSettings] = None,
266
+ log_settings: Optional[LogSettings] = None,
267
+ target: Optional[str] = None,
268
+ ) -> bytes:
269
+ drv = self._drivers["buffer"]
270
+
271
+ if format.value not in drv.supported_exports():
272
+ raise NotImplementedError(f"{drv.name} does not support {format.value}")
273
+
274
+ return _run(
275
+ drv.export(
276
+ selectors,
277
+ start_ms=start_ms,
278
+ end_ms=end_ms,
279
+ format=format.value,
280
+ points=points,
281
+ timezone=timezone,
282
+ resolution=resolution.value if resolution else None,
283
+ data_type=data_type.value if data_type else None,
284
+ aggregation=aggregation,
285
+ date_format=date_format,
286
+ filename=filename,
287
+ precision=precision,
288
+ csv_settings=csv_settings,
289
+ log_settings=log_settings,
290
+ target=target,
291
+ )
292
+ )
293
+
294
+ # convenience
295
+ def export_csv(self, selectors, *, start_ms, end_ms, **kw) -> bytes:
296
+ return self.export(selectors, start_ms=start_ms, end_ms=end_ms,
297
+ format=DataFormat.CSV, **kw)
298
+
299
+ def export_udbf(self, selectors, *, start_ms, end_ms, **kw) -> bytes:
300
+ return self.export(selectors, start_ms=start_ms, end_ms=end_ms,
301
+ format=DataFormat.UDBF, **kw)
302
+
303
+ # --------------------------- import ------------------------------- #
304
+ def import_data(
305
+ self,
306
+ source_id: str,
307
+ source_name: str,
308
+ file_bytes: bytes,
309
+ *,
310
+ format: DataFormat,
311
+ target: str = "stream", # "stream" | "record" - only stream on cloud
312
+ csv_settings: Optional[CSVImportSettings] = None,
313
+ add_time_series: bool = False,
314
+ retention_time_sec: int = 0,
315
+ time_offset_sec: int = 0,
316
+ sample_rate: int = -1,
317
+ auto_create_metadata: bool = True,
318
+ session_timeout_sec: int = 300,
319
+ ) -> str:
320
+ drv = self._drivers["history"]
321
+
322
+ if format == DataFormat.CSV:
323
+ return _run(
324
+ drv.import_csv(
325
+ source_id,
326
+ source_name,
327
+ file_bytes,
328
+ target=target,
329
+ csv_settings=csv_settings,
330
+ add_time_series=add_time_series,
331
+ retention_time_sec=retention_time_sec,
332
+ time_offset_sec=time_offset_sec,
333
+ sample_rate=sample_rate,
334
+ auto_create_metadata=auto_create_metadata,
335
+ session_timeout_sec=session_timeout_sec,
336
+ )
337
+ )
338
+
339
+ if format == DataFormat.UDBF:
340
+ return _run(
341
+ drv.import_udbf(
342
+ source_id,
343
+ source_name,
344
+ file_bytes,
345
+ target=target,
346
+ add_time_series=add_time_series,
347
+ sample_rate=sample_rate,
348
+ auto_create_metadata=auto_create_metadata,
349
+ session_timeout_sec=session_timeout_sec,
350
+ )
351
+ )
352
+
353
+ raise NotImplementedError(f"Import for format={format} not supported.")
354
+
355
+ def import_csv(self, source_id, source_name, file_bytes, **kw) -> str:
356
+ return self.import_(source_id, source_name, file_bytes, format=DataFormat.CSV, **kw)
357
+
358
+ def import_udbf(self, source_id, source_name, file_bytes, **kw) -> str:
359
+ return self.import_(source_id, source_name, file_bytes, format=DataFormat.UDBF, **kw)
360
+
361
+ # ------------------------ housekeeping --------------------------- #
362
+ def close(self) -> None:
363
+ _run(self._http.aclose())
364
+
365
+ def __enter__(self) -> "GIDataClient":
366
+ return self
367
+
368
+ def __exit__(self, exc_type, exc, tb) -> bool:
369
+ self.close()
370
+ return False
371
+
372
+ @staticmethod
373
+ def set_log_level(level: int):
374
+ root = logging.getLogger(PACKAGE_PREFIX)
375
+ root.setLevel(level) # affects children that don't explicitly override
376
+
377
+ # ensure already-created module loggers are updated, too
378
+ for name, lg in logging.root.manager.loggerDict.items():
379
+ if isinstance(lg, logging.Logger) and name.startswith(PACKAGE_PREFIX):
380
+ lg.setLevel(level)
File without changes
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import AsyncIterator, Dict, List, Literal, Optional
5
+ from uuid import UUID
6
+
7
+ import pandas as pd
8
+
9
+ from gi_data.mapping.models import LogSettings, CSVSettings, VarSelector
10
+
11
+
12
+ class BaseDriver(abc.ABC):
13
+ """
14
+ Abstract transport driver.
15
+
16
+ Concrete subclasses implement only the subset of methods
17
+ their protocol / product family supports.
18
+ """
19
+
20
+ priority: int = 10
21
+ name: str = "base"
22
+
23
+ def __init__(self, auth_manager, http_client, ws_client) -> None:
24
+ self.auth = auth_manager
25
+ self.http = http_client
26
+ self.ws = ws_client
27
+
28
+ # ---------------------------- ONLINE --------------------------------
29
+
30
+ async def list_variables(self) -> List["Variable"]: # noqa: F821
31
+ """Return metadata for every online variable."""
32
+ raise NotImplementedError
33
+
34
+ async def read(self, var_ids: List[UUID]) -> Dict[UUID, float]:
35
+ """Read current online values for a list of UUIDs."""
36
+ raise NotImplementedError
37
+
38
+ async def write(self, mapping: Dict[UUID, float]) -> None:
39
+ """Write values to online variables."""
40
+ raise NotImplementedError
41
+
42
+ # ---------------------------- BUFFER --------------------------------
43
+
44
+ async def list_buffer_sources(self) -> List["Source"]: # noqa: F821
45
+ """Return buffer-stream definitions."""
46
+ raise NotImplementedError
47
+
48
+ async def list_buffer_variables(self, source_id) -> List["GIStreamVariable"]: # noqa: F821
49
+ """Return buffer-stream variables."""
50
+ raise NotImplementedError
51
+
52
+ async def fetch_buffer(self, *args, **kwargs) -> "TimeSeriesFrame": # noqa: F821
53
+ """Fetch equidistant or absolute buffer data."""
54
+ raise NotImplementedError
55
+
56
+ # --------------------------- HISTORY --------------------------------
57
+
58
+ async def list_measurements(self, *args, **kwargs) -> List["Measurement"]: # noqa: F821
59
+ """Return measurements inside a history source."""
60
+ raise NotImplementedError
61
+
62
+ async def fetch_history(self, *args, **kwargs) -> "TimeSeriesFrame": # noqa: F821
63
+ """Read historical data within a time window."""
64
+ raise NotImplementedError
65
+
66
+ # --------------------------- STREAMING ------------------------------
67
+
68
+ def stream(
69
+ self, worker: str, **cfg
70
+ ) -> AsyncIterator[pd.DataFrame]: # pragma: no cover
71
+ """
72
+ Subscribe to a WebSocket worker and yield DataFrame chunks.
73
+
74
+ Implementation is optional; drivers that do not support WebSocket
75
+ simply raise `NotImplementedError`.
76
+ """
77
+ raise NotImplementedError
78
+
79
+ async def export_data(
80
+ self,
81
+ selectors: List["VarSelector"],
82
+ *,
83
+ start_ms: float,
84
+ end_ms: float,
85
+ format: Literal["csv", "udbf"],
86
+ points: Optional[int] = None,
87
+ timezone: str = "UTC",
88
+ aggregation: Optional[str] = None,
89
+ date_format: Optional[str] = None,
90
+ filename: Optional[str] = None,
91
+ precision: int = -1,
92
+ csv_settings: Optional["CSVSettings"] = None,
93
+ log_settings: Optional["LogSettings"] = None,
94
+ target: Optional[str] = None,
95
+ ) -> bytes:
96
+ raise NotImplementedError
97
+
98
+ def supported_exports(self) -> set[str]:
99
+ return {"csv", "udbf"}
100
+
101
+ def import_csv(self, source_id, source_name, file_bytes, target,
102
+ csv_settings, add_time_series, retention_time_sec,
103
+ time_offset_sec, sample_rate, auto_create_metadata, session_timeout_sec):
104
+ pass
105
+
106
+ def import_udbf(self, source_id, source_name, file_bytes,
107
+ target, add_time_series, sample_rate,
108
+ auto_create_metadata, session_timeout_sec):
109
+ pass
110
+
111
+ def export(self, selectors, start_ms, end_ms, format, points,
112
+ timezone, resolution, data_type, aggregation,
113
+ date_format, filename, precision, csv_settings, log_settings, target):
114
+ pass