cgse-common 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,442 +0,0 @@
1
- __all__ = [
2
- "DuckDBRepository",
3
- "get_repository_class",
4
- ]
5
- import json
6
- from datetime import datetime
7
- from typing import Any
8
- from typing import Dict
9
- from typing import List
10
-
11
- import duckdb
12
-
13
- from egse.metrics import DataPoint
14
-
15
-
16
- class DuckDBRepository:
17
- """
18
- DuckDB TimeSeriesRepository implementation.
19
-
20
- DuckDB stores time-series data in a table with columns for:
21
- - measurement: The measurement name (like table name in InfluxDB)
22
- - timestamp: Time column
23
- - tags: JSON object storing tag key-value pairs
24
- - fields: JSON object storing field key-value pairs
25
- """
26
-
27
- def __init__(self, db_path: str, table_name: str = "timeseries"):
28
- """
29
- Initialize DuckDB repository.
30
-
31
- Args:
32
- db_path: Path to DuckDB database file (or ":memory:" for in-memory)
33
- table_name: Name of the main timeseries table
34
- """
35
- self.db_path = db_path
36
- self.table_name = table_name
37
- self.conn = None
38
-
39
- def connect(self) -> None:
40
- """Connect to DuckDB database and create schema."""
41
- try:
42
- self.conn = duckdb.connect(self.db_path)
43
-
44
- # Create main timeseries table if it doesn't exist
45
- self.conn.execute(
46
- f"""
47
- CREATE TABLE IF NOT EXISTS {self.table_name} (
48
- measurement VARCHAR NOT NULL,
49
- timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
50
- tags JSON,
51
- fields JSON,
52
- created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
53
- )
54
- """
55
- )
56
-
57
- # Create indices for better query performance
58
- self.conn.execute(
59
- f"""
60
- CREATE INDEX IF NOT EXISTS idx_{self.table_name}_measurement
61
- ON {self.table_name}(measurement)
62
- """
63
- )
64
-
65
- self.conn.execute(
66
- f"""
67
- CREATE INDEX IF NOT EXISTS idx_{self.table_name}_timestamp
68
- ON {self.table_name}(timestamp)
69
- """
70
- )
71
-
72
- # Create a view that flattens the JSON for easier querying
73
- self.conn.execute(
74
- f"""
75
- CREATE OR REPLACE VIEW {self.table_name}_flat AS
76
- SELECT
77
- measurement,
78
- timestamp,
79
- tags,
80
- fields,
81
- created_at,
82
- -- Extract all tag keys and values
83
- json_extract_string(tags, '$.*') as tag_values,
84
- -- Extract all field keys and values
85
- json_extract_string(fields, '$.*') as field_values
86
- FROM {self.table_name}
87
- """
88
- )
89
-
90
- except Exception as e:
91
- raise ConnectionError(f"Failed to connect to DuckDB at {self.db_path}: {e}")
92
-
93
- def write(self, points: List[DataPoint]) -> None:
94
- """Write data points to DuckDB."""
95
- if not self.conn:
96
- raise ConnectionError("Not connected. Call connect() first.")
97
-
98
- if not points:
99
- return
100
-
101
- # Prepare data for bulk insert
102
- data = []
103
- for point in points:
104
- # Convert timestamp if provided, otherwise use current time
105
- timestamp = point.timestamp
106
- if timestamp is None:
107
- timestamp = datetime.now().isoformat()
108
- elif isinstance(timestamp, str):
109
- # Ensure ISO format
110
- try:
111
- dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
112
- timestamp = dt.isoformat()
113
- except ValueError:
114
- timestamp = datetime.now().isoformat()
115
-
116
- data.append(
117
- {
118
- "measurement": point.measurement,
119
- "timestamp": timestamp,
120
- "tags": json.dumps(point.tags) if point.tags else "{}",
121
- "fields": json.dumps(point.fields) if point.fields else "{}",
122
- }
123
- )
124
-
125
- try:
126
- # Use prepared statement for bulk insert
127
- placeholders = ", ".join(["(?, ?, ?, ?)"] * len(data))
128
- values = []
129
- for row in data:
130
- values.extend([row["measurement"], row["timestamp"], row["tags"], row["fields"]])
131
-
132
- self.conn.execute(
133
- f"""
134
- INSERT INTO {self.table_name} (measurement, timestamp, tags, fields)
135
- VALUES {placeholders}
136
- """,
137
- values,
138
- )
139
-
140
- except Exception as e:
141
- raise RuntimeError(f"Failed to write data points: {e}")
142
-
143
- def query(self, query_str: str) -> List[Dict]:
144
- """Execute SQL query and return results."""
145
- if not self.conn:
146
- raise ConnectionError("Not connected. Call connect() first.")
147
-
148
- try:
149
- result = self.conn.execute(query_str).fetchall()
150
- columns = [desc[0] for desc in self.conn.description]
151
-
152
- # Convert to list of dictionaries
153
- return [dict(zip(columns, row)) for row in result]
154
-
155
- except Exception as e:
156
- raise RuntimeError(f"Failed to execute query: {e}")
157
-
158
- def close(self) -> None:
159
- """Close the database connection."""
160
- if self.conn:
161
- self.conn.close()
162
- self.conn = None
163
-
164
- # Schema exploration methods
165
- def get_tables(self) -> List[str]:
166
- """Get all measurements (equivalent to tables)."""
167
- try:
168
- query = f"SELECT DISTINCT measurement FROM {self.table_name} ORDER BY measurement"
169
- results = self.query(query)
170
- return [row["measurement"] for row in results]
171
- except Exception as exc:
172
- print(f"Error getting measurements: {exc}")
173
- return []
174
-
175
- def get_columns(self, table_name: str) -> List[Dict[str, Any]]:
176
- """Get column information for a measurement."""
177
- try:
178
- # Get basic schema
179
- columns = [
180
- {"column_name": "measurement", "data_type": "VARCHAR", "is_nullable": "NO"},
181
- {"column_name": "timestamp", "data_type": "TIMESTAMPTZ", "is_nullable": "YES"},
182
- {"column_name": "tags", "data_type": "JSON", "is_nullable": "YES"},
183
- {"column_name": "fields", "data_type": "JSON", "is_nullable": "YES"},
184
- {"column_name": "created_at", "data_type": "TIMESTAMPTZ", "is_nullable": "YES"},
185
- ]
186
-
187
- # Get unique tag keys for this measurement
188
- tag_query = f"""
189
- SELECT DISTINCT json_extract_string(tags, '$.' || key) as tag_key
190
- FROM {table_name},
191
- unnest(json_object_keys(json(tags))) as key
192
- WHERE measurement = ?
193
- AND tags IS NOT NULL
194
- AND tags != '{{}}'
195
- """
196
-
197
- try:
198
- tag_results = self.conn.execute(tag_query, [table_name]).fetchall()
199
- for row in tag_results:
200
- if row[0]: # tag_key is not null
201
- columns.append(
202
- {
203
- "column_name": f"tag_{row[0]}",
204
- "data_type": "VARCHAR",
205
- "is_nullable": "YES",
206
- "column_type": "tag",
207
- }
208
- )
209
- except Exception:
210
- pass # If JSON extraction fails, skip tag columns
211
-
212
- # Get unique field keys for this measurement
213
- field_query = f"""
214
- SELECT DISTINCT json_extract_string(fields, '$.' || key) as field_key
215
- FROM {table_name},
216
- unnest(json_object_keys(json(fields))) as key
217
- WHERE measurement = ?
218
- AND fields IS NOT NULL
219
- AND fields != '{{}}'
220
- """
221
-
222
- try:
223
- field_results = self.conn.execute(field_query, [table_name]).fetchall()
224
- for row in field_results:
225
- if row[0]: # field_key is not null
226
- columns.append(
227
- {
228
- "column_name": f"field_{row[0]}",
229
- "data_type": "DOUBLE",
230
- "is_nullable": "YES",
231
- "column_type": "field",
232
- }
233
- )
234
- except Exception:
235
- pass # If JSON extraction fails, skip field columns
236
-
237
- return columns
238
-
239
- except Exception as e:
240
- print(f"Error getting columns for {table_name}: {e}")
241
- return []
242
-
243
- def get_schema_info(self, table_name: str) -> Dict[str, Any]:
244
- """Get detailed schema information for a measurement."""
245
- columns = self.get_columns(table_name)
246
-
247
- schema = {
248
- "table_name": table_name,
249
- "time_column": None,
250
- "tag_columns": [],
251
- "field_columns": [],
252
- "other_columns": [],
253
- }
254
-
255
- for col in columns:
256
- col_name = col["column_name"]
257
- col_type = col.get("column_type", "")
258
-
259
- if col_name == "timestamp":
260
- schema["time_column"] = col
261
- elif col_type == "tag" or col_name.startswith("tag_"):
262
- schema["tag_columns"].append(col)
263
- elif col_type == "field" or col_name.startswith("field_"):
264
- schema["field_columns"].append(col)
265
- else:
266
- schema["other_columns"].append(col)
267
-
268
- # Add row count
269
- try:
270
- count_result = self.query(f"SELECT COUNT(*) as count FROM {table_name} WHERE measurement = '{table_name}'")
271
- schema["row_count"] = count_result[0]["count"] if count_result else 0
272
- except Exception:
273
- schema["row_count"] = 0
274
-
275
- return schema
276
-
277
- def inspect_database(self) -> Dict[str, Any]:
278
- """Get complete database schema information."""
279
- measurements = self.get_tables()
280
-
281
- database_info = {
282
- "database_path": self.db_path,
283
- "main_table": self.table_name,
284
- "total_measurements": len(measurements),
285
- "measurements": {},
286
- }
287
-
288
- # Get total row count
289
- try:
290
- total_rows = self.query(f"SELECT COUNT(*) as count FROM {self.table_name}")
291
- database_info["total_rows"] = total_rows[0]["count"] if total_rows else 0
292
- except Exception:
293
- database_info["total_rows"] = 0
294
-
295
- # Get schema info for each measurement
296
- for measurement in measurements:
297
- database_info["measurements"][measurement] = self.get_schema_info(measurement)
298
-
299
- return database_info
300
-
301
- def query_latest(self, measurement: str, limit: int = 20) -> List[Dict]:
302
- """Get latest records for a measurement."""
303
- try:
304
- query = f"""
305
- SELECT measurement, timestamp, tags, fields, created_at
306
- FROM {self.table_name}
307
- WHERE measurement = ?
308
- ORDER BY timestamp DESC, created_at DESC
309
- LIMIT ?
310
- """
311
-
312
- result = self.conn.execute(query, [measurement, limit]).fetchall()
313
- columns = [desc[0] for desc in self.conn.description]
314
-
315
- # Convert to list of dictionaries and parse JSON
316
- records = []
317
- for row in result:
318
- record = dict(zip(columns, row))
319
-
320
- # Parse JSON fields
321
- try:
322
- record["tags"] = json.loads(record["tags"]) if record["tags"] else {}
323
- except (json.JSONDecodeError, TypeError):
324
- record["tags"] = {}
325
-
326
- try:
327
- record["fields"] = json.loads(record["fields"]) if record["fields"] else {}
328
- except (json.JSONDecodeError, TypeError):
329
- record["fields"] = {}
330
-
331
- records.append(record)
332
-
333
- return records
334
-
335
- except Exception as e:
336
- print(f"Error getting latest records for {measurement}: {e}")
337
- return []
338
-
339
- def get_measurements_with_stats(self) -> List[Dict[str, Any]]:
340
- """Get measurements with statistics."""
341
- try:
342
- query = f"""
343
- SELECT
344
- measurement,
345
- COUNT(*) as record_count,
346
- MIN(timestamp) as earliest_timestamp,
347
- MAX(timestamp) as latest_timestamp,
348
- COUNT(DISTINCT json_object_keys(json(tags))) as unique_tag_keys,
349
- COUNT(DISTINCT json_object_keys(json(fields))) as unique_field_keys
350
- FROM {self.table_name}
351
- GROUP BY measurement
352
- ORDER BY record_count DESC
353
- """
354
-
355
- return self.query(query)
356
-
357
- except Exception as e:
358
- print(f"Error getting measurement statistics: {e}")
359
- return []
360
-
361
- def query_by_tags(self, measurement: str, tags: Dict[str, str], limit: int = 100) -> List[Dict]:
362
- """Query records by measurement and tag filters."""
363
- try:
364
- # Build tag filter conditions
365
- tag_conditions = []
366
- params = [measurement]
367
-
368
- for tag_key, tag_value in tags.items():
369
- tag_conditions.append(f"json_extract_string(tags, '$.{tag_key}') = ?")
370
- params.append(tag_value)
371
-
372
- where_clause = " AND ".join(tag_conditions)
373
- if where_clause:
374
- where_clause = f" AND {where_clause}"
375
-
376
- query = f"""
377
- SELECT measurement, timestamp, tags, fields, created_at
378
- FROM {self.table_name}
379
- WHERE measurement = ?{where_clause}
380
- ORDER BY timestamp DESC
381
- LIMIT ?
382
- """
383
-
384
- params.append(limit)
385
-
386
- result = self.conn.execute(query, params).fetchall()
387
- columns = [desc[0] for desc in self.conn.description]
388
-
389
- # Convert to list of dictionaries and parse JSON
390
- records = []
391
- for row in result:
392
- record = dict(zip(columns, row))
393
-
394
- # Parse JSON fields
395
- try:
396
- record["tags"] = json.loads(record["tags"]) if record["tags"] else {}
397
- except (json.JSONDecodeError, TypeError):
398
- record["tags"] = {}
399
-
400
- try:
401
- record["fields"] = json.loads(record["fields"]) if record["fields"] else {}
402
- except (json.JSONDecodeError, TypeError):
403
- record["fields"] = {}
404
-
405
- records.append(record)
406
-
407
- return records
408
-
409
- except Exception as e:
410
- print(f"Error querying by tags: {e}")
411
- return []
412
-
413
- def aggregate_by_time(
414
- self, measurement: str, field_name: str, time_bucket: str = "1 hour", aggregation: str = "AVG"
415
- ) -> List[Dict]:
416
- """Aggregate field values by time buckets."""
417
- try:
418
- agg_func = aggregation.upper()
419
- if agg_func not in ["AVG", "SUM", "COUNT", "MIN", "MAX"]:
420
- raise ValueError(f"Unsupported aggregation function: {aggregation}")
421
-
422
- query = f"""
423
- SELECT
424
- date_trunc('{time_bucket}', timestamp) as time_bucket,
425
- {agg_func}(CAST(json_extract_string(fields, '$.{field_name}') AS DOUBLE)) as {field_name}_{agg_func.lower()}
426
- FROM {self.table_name}
427
- WHERE measurement = ?
428
- AND json_extract_string(fields, '$.{field_name}') IS NOT NULL
429
- GROUP BY date_trunc('{time_bucket}', timestamp)
430
- ORDER BY time_bucket
431
- """
432
-
433
- return self.query(query.replace("?", f"'{measurement}'"))
434
-
435
- except Exception as e:
436
- print(f"Error in time aggregation: {e}")
437
- return []
438
-
439
-
440
- def get_repository_class():
441
- """Return the repository class for the plugin manager."""
442
- return DuckDBRepository