cgse-common 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,596 +0,0 @@
1
- from datetime import datetime
2
- from typing import Any
3
- from typing import Dict
4
- from typing import List
5
- from typing import Optional
6
- from urllib.parse import urlparse
7
-
8
- import psycopg
9
- import psycopg.rows
10
-
11
- from egse.metrics import DataPoint
12
-
13
-
14
- class TimeScaleDBRepository:
15
- """
16
- TimeScaleDB TimeSeriesRepository implementation.
17
-
18
- TimeScaleDB is a PostgreSQL extension optimized for time-series data.
19
- It uses hypertables for automatic partitioning and provides excellent
20
- performance for time-series workloads.
21
-
22
- Data model:
23
- - Each measurement becomes a hypertable
24
- - Time column for timestamps
25
- - Tag columns for indexed metadata
26
- - Field columns for actual values
27
- - Automatic partitioning by time
28
- """
29
-
30
- def __init__(
31
- self,
32
- connection_string: str,
33
- database: str = "timeseries",
34
- create_hypertables: bool = True,
35
- chunk_time_interval: str = "1 day",
36
- ):
37
- """
38
- Initialize TimeScaleDB repository.
39
-
40
- Args:
41
- connection_string: PostgreSQL connection string
42
- (e.g., "postgresql://user:pass@localhost:5432/dbname")
43
- database: Database name
44
- create_hypertables: Whether to automatically create hypertables
45
- chunk_time_interval: Hypertable chunk interval (e.g., "1 day", "1 hour")
46
- """
47
- self.connection_string = connection_string
48
- self.database = database
49
- self.create_hypertables = create_hypertables
50
- self.chunk_time_interval = chunk_time_interval
51
- self.conn = None
52
- self.cursor = None
53
- self._known_measurements = set()
54
-
55
- def connect(self) -> None:
56
- """Connect to TimeScaleDB and ensure extensions are installed."""
57
- try:
58
- self.conn = psycopg.connect(
59
- self.connection_string, row_factory=psycopg.rows.dict_row, options="-c search_path=public"
60
- )
61
- self.conn.autocommit = False
62
- self.cursor = self.conn.cursor()
63
-
64
- # Ensure TimescaleDB extension is installed
65
- self.cursor.execute("CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;")
66
- self.conn.commit()
67
-
68
- # Create metadata table for tracking measurements
69
- self._create_metadata_table()
70
-
71
- except psycopg.Error as exc:
72
- raise ConnectionError(f"Failed to connect to TimeScaleDB: {exc}")
73
-
74
- def _create_metadata_table(self) -> None:
75
- """Create metadata table to track measurements and their schemas."""
76
- try:
77
- self.cursor.execute(
78
- """
79
- CREATE TABLE IF NOT EXISTS _timeseries_metadata (
80
- measurement VARCHAR(255) PRIMARY KEY,
81
- created_at TIMESTAMPTZ DEFAULT NOW(),
82
- tag_columns TEXT[],
83
- field_columns TEXT[],
84
- last_updated TIMESTAMPTZ DEFAULT NOW()
85
- );
86
- """
87
- )
88
- self.conn.commit()
89
- except psycopg.Error as exc:
90
- raise RuntimeError(f"Failed to create metadata table: {exc}")
91
-
92
- def _ensure_measurement_table(self, measurement: str, tags: Dict[str, str], fields: Dict[str, Any]) -> None:
93
- """Create measurement table and hypertable if it doesn't exist."""
94
- if measurement in self._known_measurements:
95
- return
96
-
97
- try:
98
- # Check if table exists
99
- self.cursor.execute(
100
- """
101
- SELECT EXISTS (
102
- SELECT FROM information_schema.tables
103
- WHERE table_name = %s
104
- );
105
- """,
106
- (measurement,),
107
- )
108
-
109
- table_exists = self.cursor.fetchone()["exists"]
110
-
111
- if not table_exists:
112
- # Analyze tags and fields to determine column types
113
- tag_columns = []
114
- field_columns = []
115
-
116
- for tag_key in tags.keys():
117
- tag_columns.append(f"{tag_key} TEXT")
118
-
119
- for field_key, field_value in fields.items():
120
- if isinstance(field_value, (int, float)):
121
- field_columns.append(f"{field_key} DOUBLE PRECISION")
122
- elif isinstance(field_value, bool):
123
- field_columns.append(f"{field_key} BOOLEAN")
124
- else:
125
- field_columns.append(f"{field_key} TEXT")
126
-
127
- # Build CREATE TABLE statement
128
- all_columns = ["time TIMESTAMPTZ NOT NULL"] + tag_columns + field_columns
129
-
130
- create_sql = f"""
131
- CREATE TABLE {measurement} (
132
- {", ".join(all_columns)}
133
- );
134
- """
135
-
136
- self.cursor.execute(create_sql)
137
-
138
- # Create hypertable if enabled
139
- if self.create_hypertables:
140
- self.cursor.execute(
141
- f"""
142
- SELECT create_hypertable('{measurement}', 'time',
143
- chunk_time_interval => INTERVAL '{self.chunk_time_interval}');
144
- """
145
- )
146
-
147
- # Create indices on tag columns for better query performance
148
- for tag_key in tags.keys():
149
- index_name = f"idx_{measurement}_{tag_key}"
150
- self.cursor.execute(
151
- f"""
152
- CREATE INDEX IF NOT EXISTS {index_name}
153
- ON {measurement} ({tag_key});
154
- """
155
- )
156
-
157
- # Update metadata
158
- self.cursor.execute(
159
- """
160
- INSERT INTO _timeseries_metadata
161
- (measurement, tag_columns, field_columns)
162
- VALUES (%s, %s, %s)
163
- ON CONFLICT (measurement) DO UPDATE SET
164
- tag_columns = EXCLUDED.tag_columns,
165
- field_columns = EXCLUDED.field_columns,
166
- last_updated = NOW();
167
- """,
168
- (measurement, list(tags.keys()), list(fields.keys())),
169
- )
170
-
171
- self.conn.commit()
172
- print(
173
- f"Created hypertable '{measurement}' with {len(tag_columns)} tag columns and {len(field_columns)} field columns"
174
- )
175
-
176
- self._known_measurements.add(measurement)
177
-
178
- except psycopg.Error as exc:
179
- self.conn.rollback()
180
- raise RuntimeError(f"Failed to create measurement table '{measurement}': {exc}")
181
-
182
- def write_points(self, points: List[DataPoint]) -> None:
183
- """Write data points to TimeScaleDB."""
184
- if not self.conn or not self.cursor:
185
- raise ConnectionError("Not connected. Call connect() first.")
186
-
187
- if not points:
188
- return
189
-
190
- # Group points by measurement
191
- points_by_measurement = {}
192
- for point in points:
193
- if point.measurement not in points_by_measurement:
194
- points_by_measurement[point.measurement] = []
195
- points_by_measurement[point.measurement].append(point)
196
-
197
- try:
198
- # Process each measurement
199
- for measurement, measurement_points in points_by_measurement.items():
200
- # Ensure table exists (analyze first point for schema)
201
- first_point = measurement_points[0]
202
- self._ensure_measurement_table(measurement, first_point.tags, first_point.fields)
203
-
204
- # Prepare batch insert
205
- self._batch_insert_points(measurement, measurement_points)
206
-
207
- self.conn.commit()
208
-
209
- except Exception as e:
210
- self.conn.rollback()
211
- raise RuntimeError(f"Failed to write data points: {e}")
212
-
213
- def _batch_insert_points(self, measurement: str, points: List[DataPoint]) -> None:
214
- """Batch insert points for a specific measurement."""
215
- if not points:
216
- return
217
-
218
- # Get all unique columns from all points
219
- all_tags = set()
220
- all_fields = set()
221
-
222
- for point in points:
223
- all_tags.update(point.tags.keys())
224
- all_fields.update(point.fields.keys())
225
-
226
- # Build column list
227
- columns = ["time"] + sorted(all_tags) + sorted(all_fields)
228
-
229
- # Prepare values
230
- values = []
231
- for point in points:
232
- # Handle timestamp
233
- timestamp = point.timestamp
234
- if timestamp is None:
235
- timestamp = datetime.now()
236
- elif isinstance(timestamp, str):
237
- timestamp = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
238
-
239
- row = [timestamp]
240
-
241
- # Add tag values
242
- for tag in sorted(all_tags):
243
- row.append(point.tags.get(tag))
244
-
245
- # Add field values
246
- for field in sorted(all_fields):
247
- row.append(point.fields.get(field))
248
-
249
- values.append(row)
250
-
251
- # Build INSERT statement
252
- placeholders = ", ".join(["%s"] * len(columns))
253
- insert_sql = f"""
254
- INSERT INTO {measurement} ({", ".join(columns)})
255
- VALUES ({placeholders})
256
- """
257
-
258
- # Execute batch insert
259
- # psycopg.extras.execute_batch(
260
- # self.cursor, insert_sql, values, page_size=1000
261
- # )
262
- self.cursor.executemany(insert_sql, values, page_size=1000)
263
-
264
- def query(self, query_str: str, params: Optional[tuple] = None) -> List[Dict]:
265
- """Execute SQL query and return results."""
266
- if not self.conn or not self.cursor:
267
- raise ConnectionError("Not connected. Call connect() first.")
268
-
269
- try:
270
- if params:
271
- self.cursor.execute(query_str, params)
272
- else:
273
- self.cursor.execute(query_str)
274
-
275
- # Fetch results as dictionaries
276
- results = self.cursor.fetchall()
277
-
278
- # Convert RealDictRow to regular dict and handle datetime serialization
279
- formatted_results = []
280
- for row in results:
281
- row_dict = dict(row)
282
- # Convert datetime objects to ISO strings
283
- for key, value in row_dict.items():
284
- if isinstance(value, datetime):
285
- row_dict[key] = value.isoformat()
286
- formatted_results.append(row_dict)
287
-
288
- return formatted_results
289
-
290
- except psycopg2.Error as e:
291
- raise RuntimeError(f"Failed to execute query: {e}")
292
-
293
- def close(self) -> None:
294
- """Close the database connection."""
295
- if self.cursor:
296
- self.cursor.close()
297
- self.cursor = None
298
- if self.conn:
299
- self.conn.close()
300
- self.conn = None
301
-
302
- # Schema exploration methods
303
- def get_tables(self) -> List[str]:
304
- """Get all measurements (hypertables)."""
305
- try:
306
- # Get all hypertables
307
- self.cursor.execute(
308
- """
309
- SELECT hypertable_name
310
- FROM timescaledb_information.hypertables
311
- ORDER BY hypertable_name;
312
- """
313
- )
314
-
315
- hypertables = [row["hypertable_name"] for row in self.cursor.fetchall()]
316
-
317
- # Also get regular tables that might be measurements
318
- self.cursor.execute(
319
- """
320
- SELECT table_name
321
- FROM information_schema.tables
322
- WHERE table_schema = 'public'
323
- AND table_name NOT LIKE '_%'
324
- AND table_name NOT IN (
325
- SELECT hypertable_name
326
- FROM timescaledb_information.hypertables
327
- )
328
- ORDER BY table_name;
329
- """
330
- )
331
-
332
- regular_tables = [row["table_name"] for row in self.cursor.fetchall()]
333
-
334
- return sorted(set(hypertables + regular_tables))
335
-
336
- except psycopg2.Error as e:
337
- print(f"Error getting tables: {e}")
338
- return []
339
-
340
- def get_columns(self, table_name: str) -> List[Dict[str, Any]]:
341
- """Get column information for a measurement."""
342
- try:
343
- self.cursor.execute(
344
- """
345
- SELECT
346
- column_name,
347
- data_type,
348
- is_nullable,
349
- column_default
350
- FROM information_schema.columns
351
- WHERE table_name = %s
352
- ORDER BY ordinal_position;
353
- """,
354
- (table_name,),
355
- )
356
-
357
- columns = []
358
- for row in self.cursor.fetchall():
359
- columns.append(
360
- {
361
- "column_name": row["column_name"],
362
- "data_type": row["data_type"],
363
- "is_nullable": row["is_nullable"],
364
- "column_default": row["column_default"],
365
- }
366
- )
367
-
368
- return columns
369
-
370
- except psycopg2.Error as e:
371
- print(f"Error getting columns for {table_name}: {e}")
372
- return []
373
-
374
- def get_schema_info(self, table_name: str) -> Dict[str, Any]:
375
- """Get detailed schema information for a measurement."""
376
- columns = self.get_columns(table_name)
377
-
378
- schema = {
379
- "table_name": table_name,
380
- "time_column": None,
381
- "tag_columns": [],
382
- "field_columns": [],
383
- "is_hypertable": self._is_hypertable(table_name),
384
- }
385
-
386
- # Get metadata if available
387
- try:
388
- self.cursor.execute(
389
- """
390
- SELECT tag_columns, field_columns
391
- FROM _timeseries_metadata
392
- WHERE measurement = %s;
393
- """,
394
- (table_name,),
395
- )
396
-
397
- metadata = self.cursor.fetchone()
398
- known_tags = set(metadata["tag_columns"]) if metadata else set()
399
- known_fields = set(metadata["field_columns"]) if metadata else set()
400
- except:
401
- known_tags = set()
402
- known_fields = set()
403
-
404
- for col in columns:
405
- col_name = col["column_name"]
406
-
407
- if col_name == "time":
408
- schema["time_column"] = col
409
- elif col_name in known_tags or col["data_type"] in ["text", "character varying"]:
410
- schema["tag_columns"].append(col)
411
- elif col_name in known_fields or col["data_type"] in ["double precision", "integer", "boolean"]:
412
- schema["field_columns"].append(col)
413
-
414
- # Add row count and time range
415
- try:
416
- self.cursor.execute(
417
- f"""
418
- SELECT
419
- COUNT(*) as row_count,
420
- MIN(time) as earliest_time,
421
- MAX(time) as latest_time
422
- FROM {table_name};
423
- """
424
- )
425
-
426
- stats = self.cursor.fetchone()
427
- if stats:
428
- schema.update(
429
- {
430
- "row_count": stats["row_count"],
431
- "earliest_time": stats["earliest_time"].isoformat() if stats["earliest_time"] else None,
432
- "latest_time": stats["latest_time"].isoformat() if stats["latest_time"] else None,
433
- }
434
- )
435
- except:
436
- schema.update({"row_count": 0, "earliest_time": None, "latest_time": None})
437
-
438
- return schema
439
-
440
- def _is_hypertable(self, table_name: str) -> bool:
441
- """Check if table is a hypertable."""
442
- try:
443
- self.cursor.execute(
444
- """
445
- SELECT EXISTS (
446
- SELECT 1 FROM timescaledb_information.hypertables
447
- WHERE hypertable_name = %s
448
- );
449
- """,
450
- (table_name,),
451
- )
452
-
453
- return self.cursor.fetchone()["exists"]
454
- except:
455
- return False
456
-
457
- def inspect_database(self) -> Dict[str, Any]:
458
- """Get complete database schema information."""
459
- measurements = self.get_tables()
460
-
461
- database_info = {
462
- "database": self.database,
463
- "connection_string": self._sanitize_connection_string(),
464
- "total_measurements": len(measurements),
465
- "measurements": {},
466
- }
467
-
468
- # Get TimescaleDB specific info
469
- try:
470
- self.cursor.execute("SELECT extversion FROM pg_extension WHERE extname = 'timescaledb';")
471
- version = self.cursor.fetchone()
472
- database_info["timescaledb_version"] = version["extversion"] if version else "Unknown"
473
- except:
474
- database_info["timescaledb_version"] = "Not installed"
475
-
476
- # Get schema info for each measurement
477
- for measurement in measurements:
478
- database_info["measurements"][measurement] = self.get_schema_info(measurement)
479
-
480
- return database_info
481
-
482
- def _sanitize_connection_string(self) -> str:
483
- """Remove sensitive info from connection string for logging."""
484
- try:
485
- parsed = urlparse(self.connection_string)
486
- return f"{parsed.scheme}://**:**@{parsed.hostname}:{parsed.port}{parsed.path}"
487
- except:
488
- return "postgresql://***:***@***:***/***"
489
-
490
- def query_latest(self, measurement: str, limit: int = 20) -> List[Dict]:
491
- """Get latest records for a measurement."""
492
- try:
493
- query = f"""
494
- SELECT * FROM {measurement}
495
- ORDER BY time DESC
496
- LIMIT %s;
497
- """
498
-
499
- return self.query(query, (limit,))
500
-
501
- except Exception as e:
502
- print(f"Error getting latest records for {measurement}: {e}")
503
- return []
504
-
505
- def query_time_range(
506
- self, measurement: str, start_time: str, end_time: str, limit: Optional[int] = None
507
- ) -> List[Dict]:
508
- """Query records within a time range."""
509
- try:
510
- limit_clause = f"LIMIT {limit}" if limit else ""
511
-
512
- query = f"""
513
- SELECT * FROM {measurement}
514
- WHERE time >= %s AND time <= %s
515
- ORDER BY time DESC
516
- {limit_clause};
517
- """
518
-
519
- return self.query(query, (start_time, end_time))
520
-
521
- except Exception as e:
522
- print(f"Error querying time range for {measurement}: {e}")
523
- return []
524
-
525
- def aggregate_by_time(
526
- self, measurement: str, field_name: str, time_bucket: str = "1 hour", aggregation: str = "AVG"
527
- ) -> List[Dict]:
528
- """Aggregate field values using TimescaleDB time_bucket function."""
529
- try:
530
- agg_func = aggregation.upper()
531
- if agg_func not in ["AVG", "SUM", "COUNT", "MIN", "MAX"]:
532
- raise ValueError(f"Unsupported aggregation function: {aggregation}")
533
-
534
- query = f"""
535
- SELECT
536
- time_bucket(INTERVAL '{time_bucket}', time) as time_bucket,
537
- {agg_func}({field_name}) as {field_name}_{agg_func.lower()}
538
- FROM {measurement}
539
- WHERE {field_name} IS NOT NULL
540
- GROUP BY time_bucket
541
- ORDER BY time_bucket;
542
- """
543
-
544
- return self.query(query)
545
-
546
- except Exception as e:
547
- print(f"Error in time aggregation: {e}")
548
- return []
549
-
550
- def get_hypertable_info(self, measurement: str) -> Dict[str, Any]:
551
- """Get TimescaleDB specific hypertable information."""
552
- try:
553
- if not self._is_hypertable(measurement):
554
- return {"is_hypertable": False}
555
-
556
- # Get hypertable stats
557
- self.cursor.execute(
558
- """
559
- SELECT
560
- h.table_name,
561
- h.compression_enabled,
562
- h.chunk_time_interval,
563
- s.num_chunks,
564
- s.total_chunks,
565
- s.approximate_row_count,
566
- s.total_bytes
567
- FROM timescaledb_information.hypertables h
568
- LEFT JOIN timescaledb_information.hypertable_stats s
569
- ON h.hypertable_name = s.hypertable_name
570
- WHERE h.hypertable_name = %s;
571
- """,
572
- (measurement,),
573
- )
574
-
575
- info = self.cursor.fetchone()
576
- if info:
577
- return {
578
- "is_hypertable": True,
579
- "compression_enabled": info["compression_enabled"],
580
- "chunk_time_interval": str(info["chunk_time_interval"]),
581
- "num_chunks": info["num_chunks"],
582
- "total_chunks": info["total_chunks"],
583
- "approximate_row_count": info["approximate_row_count"],
584
- "total_bytes": info["total_bytes"],
585
- }
586
-
587
- return {"is_hypertable": True}
588
-
589
- except Exception as e:
590
- print(f"Error getting hypertable info: {e}")
591
- return {"is_hypertable": False, "error": str(e)}
592
-
593
-
594
- def get_repository_class():
595
- """Return the repository class for the plugin manager."""
596
- return TimeScaleDBRepository