botanu 0.1.dev60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,488 @@
1
+ # SPDX-FileCopyrightText: 2026 The Botanu Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Data Tracking — Track database, storage, and messaging operations.
5
+
6
+ Usage::
7
+
8
+ from botanu.tracking.data import track_db_operation, track_storage_operation
9
+
10
+ with track_db_operation(system="postgresql", operation="SELECT") as db:
11
+ result = cursor.execute("SELECT * FROM users WHERE active = true")
12
+ db.set_result(rows_returned=len(result))
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from contextlib import contextmanager
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime, timezone
20
+ from typing import Any, Dict, Generator, Optional
21
+
22
+ from opentelemetry import trace
23
+ from opentelemetry.trace import Span, SpanKind, Status, StatusCode
24
+
25
+ # =========================================================================
26
+ # System Normalization Maps
27
+ # =========================================================================
28
+
29
+ DB_SYSTEMS: Dict[str, str] = {
30
+ "postgresql": "postgresql",
31
+ "postgres": "postgresql",
32
+ "pg": "postgresql",
33
+ "mysql": "mysql",
34
+ "mariadb": "mariadb",
35
+ "mssql": "mssql",
36
+ "sqlserver": "mssql",
37
+ "oracle": "oracle",
38
+ "sqlite": "sqlite",
39
+ "mongodb": "mongodb",
40
+ "mongo": "mongodb",
41
+ "dynamodb": "dynamodb",
42
+ "cassandra": "cassandra",
43
+ "couchdb": "couchdb",
44
+ "firestore": "firestore",
45
+ "cosmosdb": "cosmosdb",
46
+ "redis": "redis",
47
+ "memcached": "memcached",
48
+ "elasticache": "elasticache",
49
+ "elasticsearch": "elasticsearch",
50
+ "opensearch": "opensearch",
51
+ "snowflake": "snowflake",
52
+ "bigquery": "bigquery",
53
+ "redshift": "redshift",
54
+ "databricks": "databricks",
55
+ "athena": "athena",
56
+ "synapse": "synapse",
57
+ "influxdb": "influxdb",
58
+ "timescaledb": "timescaledb",
59
+ "neo4j": "neo4j",
60
+ "neptune": "neptune",
61
+ }
62
+
63
+ STORAGE_SYSTEMS: Dict[str, str] = {
64
+ "s3": "s3",
65
+ "aws_s3": "s3",
66
+ "gcs": "gcs",
67
+ "google_cloud_storage": "gcs",
68
+ "blob": "azure_blob",
69
+ "azure_blob": "azure_blob",
70
+ "minio": "minio",
71
+ "ceph": "ceph",
72
+ "nfs": "nfs",
73
+ "efs": "efs",
74
+ }
75
+
76
+ MESSAGING_SYSTEMS: Dict[str, str] = {
77
+ "sqs": "sqs",
78
+ "aws_sqs": "sqs",
79
+ "sns": "sns",
80
+ "kinesis": "kinesis",
81
+ "eventbridge": "eventbridge",
82
+ "pubsub": "pubsub",
83
+ "google_pubsub": "pubsub",
84
+ "servicebus": "servicebus",
85
+ "azure_servicebus": "servicebus",
86
+ "eventhub": "eventhub",
87
+ "kafka": "kafka",
88
+ "rabbitmq": "rabbitmq",
89
+ "nats": "nats",
90
+ "redis_pubsub": "redis_pubsub",
91
+ "celery": "celery",
92
+ }
93
+
94
+
95
+ class DBOperation:
96
+ SELECT = "SELECT"
97
+ INSERT = "INSERT"
98
+ UPDATE = "UPDATE"
99
+ DELETE = "DELETE"
100
+ UPSERT = "UPSERT"
101
+ MERGE = "MERGE"
102
+ CREATE = "CREATE"
103
+ DROP = "DROP"
104
+ ALTER = "ALTER"
105
+ INDEX = "INDEX"
106
+ TRANSACTION = "TRANSACTION"
107
+ BATCH = "BATCH"
108
+
109
+
110
+ class StorageOperation:
111
+ GET = "GET"
112
+ PUT = "PUT"
113
+ DELETE = "DELETE"
114
+ LIST = "LIST"
115
+ HEAD = "HEAD"
116
+ COPY = "COPY"
117
+ MULTIPART_UPLOAD = "MULTIPART_UPLOAD"
118
+
119
+
120
+ class MessagingOperation:
121
+ PUBLISH = "publish"
122
+ CONSUME = "consume"
123
+ RECEIVE = "receive"
124
+ SEND = "send"
125
+ SUBSCRIBE = "subscribe"
126
+
127
+
128
+ # =========================================================================
129
+ # Database Tracker
130
+ # =========================================================================
131
+
132
+
133
+ @dataclass
134
+ class DBTracker:
135
+ """Tracks database operations."""
136
+
137
+ system: str
138
+ operation: str
139
+ span: Optional[Span] = field(default=None, repr=False)
140
+ start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
141
+
142
+ rows_returned: int = 0
143
+ rows_affected: int = 0
144
+ bytes_read: int = 0
145
+ bytes_written: int = 0
146
+
147
+ def set_result(
148
+ self,
149
+ rows_returned: int = 0,
150
+ rows_affected: int = 0,
151
+ bytes_read: int = 0,
152
+ bytes_written: int = 0,
153
+ ) -> DBTracker:
154
+ self.rows_returned = rows_returned
155
+ self.rows_affected = rows_affected
156
+ self.bytes_read = bytes_read
157
+ self.bytes_written = bytes_written
158
+ if self.span:
159
+ if rows_returned > 0:
160
+ self.span.set_attribute("botanu.data.rows_returned", rows_returned)
161
+ if rows_affected > 0:
162
+ self.span.set_attribute("botanu.data.rows_affected", rows_affected)
163
+ if bytes_read > 0:
164
+ self.span.set_attribute("botanu.data.bytes_read", bytes_read)
165
+ if bytes_written > 0:
166
+ self.span.set_attribute("botanu.data.bytes_written", bytes_written)
167
+ return self
168
+
169
+ def set_table(self, table_name: str, schema: Optional[str] = None) -> DBTracker:
170
+ if self.span:
171
+ self.span.set_attribute("db.collection.name", table_name)
172
+ if schema:
173
+ self.span.set_attribute("db.schema", schema)
174
+ return self
175
+
176
+ def set_query_id(self, query_id: str) -> DBTracker:
177
+ if self.span:
178
+ self.span.set_attribute("botanu.warehouse.query_id", query_id)
179
+ return self
180
+
181
+ def set_bytes_scanned(self, bytes_scanned: int) -> DBTracker:
182
+ self.bytes_read = bytes_scanned
183
+ if self.span:
184
+ self.span.set_attribute("botanu.warehouse.bytes_scanned", bytes_scanned)
185
+ return self
186
+
187
+ def set_error(self, error: Exception) -> DBTracker:
188
+ if self.span:
189
+ self.span.set_status(Status(StatusCode.ERROR, str(error)))
190
+ self.span.set_attribute("botanu.data.error", type(error).__name__)
191
+ self.span.record_exception(error)
192
+ return self
193
+
194
+ def add_metadata(self, **kwargs: Any) -> DBTracker:
195
+ if self.span:
196
+ for key, value in kwargs.items():
197
+ attr_key = key if key.startswith("botanu.") else f"botanu.data.{key}"
198
+ self.span.set_attribute(attr_key, value)
199
+ return self
200
+
201
+ def _finalize(self) -> None:
202
+ if not self.span:
203
+ return
204
+ duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000
205
+ self.span.set_attribute("botanu.data.duration_ms", duration_ms)
206
+
207
+
208
+ @contextmanager
209
+ def track_db_operation(
210
+ system: str,
211
+ operation: str,
212
+ database: Optional[str] = None,
213
+ **kwargs: Any,
214
+ ) -> Generator[DBTracker, None, None]:
215
+ """Track a database operation.
216
+
217
+ Args:
218
+ system: Database system (postgresql, mysql, mongodb, …).
219
+ operation: Type of operation (SELECT, INSERT, …).
220
+ database: Database name (optional).
221
+ """
222
+ tracer = trace.get_tracer("botanu.data")
223
+ normalized_system = DB_SYSTEMS.get(system.lower(), system.lower())
224
+
225
+ with tracer.start_as_current_span(
226
+ name=f"db.{normalized_system}.{operation.lower()}",
227
+ kind=SpanKind.CLIENT,
228
+ ) as span:
229
+ span.set_attribute("db.system", normalized_system)
230
+ span.set_attribute("db.operation", operation.upper())
231
+ span.set_attribute("botanu.vendor", normalized_system)
232
+ if database:
233
+ span.set_attribute("db.name", database)
234
+ for key, value in kwargs.items():
235
+ span.set_attribute(f"botanu.data.{key}", value)
236
+
237
+ tracker = DBTracker(system=normalized_system, operation=operation, span=span)
238
+ try:
239
+ yield tracker
240
+ except Exception as exc:
241
+ tracker.set_error(exc)
242
+ raise
243
+ finally:
244
+ tracker._finalize()
245
+
246
+
247
+ # =========================================================================
248
+ # Storage Tracker
249
+ # =========================================================================
250
+
251
+
252
+ @dataclass
253
+ class StorageTracker:
254
+ """Tracks storage operations."""
255
+
256
+ system: str
257
+ operation: str
258
+ span: Optional[Span] = field(default=None, repr=False)
259
+ start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
260
+
261
+ objects_count: int = 0
262
+ bytes_read: int = 0
263
+ bytes_written: int = 0
264
+
265
+ def set_result(
266
+ self,
267
+ objects_count: int = 0,
268
+ bytes_read: int = 0,
269
+ bytes_written: int = 0,
270
+ ) -> StorageTracker:
271
+ self.objects_count = objects_count
272
+ self.bytes_read = bytes_read
273
+ self.bytes_written = bytes_written
274
+ if self.span:
275
+ if objects_count > 0:
276
+ self.span.set_attribute("botanu.data.objects_count", objects_count)
277
+ if bytes_read > 0:
278
+ self.span.set_attribute("botanu.data.bytes_read", bytes_read)
279
+ if bytes_written > 0:
280
+ self.span.set_attribute("botanu.data.bytes_written", bytes_written)
281
+ return self
282
+
283
+ def set_bucket(self, bucket: str) -> StorageTracker:
284
+ if self.span:
285
+ self.span.set_attribute("botanu.storage.bucket", bucket)
286
+ return self
287
+
288
+ def set_error(self, error: Exception) -> StorageTracker:
289
+ if self.span:
290
+ self.span.set_status(Status(StatusCode.ERROR, str(error)))
291
+ self.span.set_attribute("botanu.storage.error", type(error).__name__)
292
+ self.span.record_exception(error)
293
+ return self
294
+
295
+ def add_metadata(self, **kwargs: Any) -> StorageTracker:
296
+ if self.span:
297
+ for key, value in kwargs.items():
298
+ attr_key = key if key.startswith("botanu.") else f"botanu.storage.{key}"
299
+ self.span.set_attribute(attr_key, value)
300
+ return self
301
+
302
+ def _finalize(self) -> None:
303
+ if not self.span:
304
+ return
305
+ duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000
306
+ self.span.set_attribute("botanu.storage.duration_ms", duration_ms)
307
+
308
+
309
+ @contextmanager
310
+ def track_storage_operation(
311
+ system: str,
312
+ operation: str,
313
+ **kwargs: Any,
314
+ ) -> Generator[StorageTracker, None, None]:
315
+ """Track a storage operation.
316
+
317
+ Args:
318
+ system: Storage system (s3, gcs, azure_blob, …).
319
+ operation: Type of operation (GET, PUT, DELETE, …).
320
+ """
321
+ tracer = trace.get_tracer("botanu.storage")
322
+ normalized_system = STORAGE_SYSTEMS.get(system.lower(), system.lower())
323
+
324
+ with tracer.start_as_current_span(
325
+ name=f"storage.{normalized_system}.{operation.lower()}",
326
+ kind=SpanKind.CLIENT,
327
+ ) as span:
328
+ span.set_attribute("botanu.storage.system", normalized_system)
329
+ span.set_attribute("botanu.storage.operation", operation.upper())
330
+ span.set_attribute("botanu.vendor", normalized_system)
331
+ for key, value in kwargs.items():
332
+ span.set_attribute(f"botanu.storage.{key}", value)
333
+
334
+ tracker = StorageTracker(system=normalized_system, operation=operation, span=span)
335
+ try:
336
+ yield tracker
337
+ except Exception as exc:
338
+ tracker.set_error(exc)
339
+ raise
340
+ finally:
341
+ tracker._finalize()
342
+
343
+
344
+ # =========================================================================
345
+ # Messaging Tracker
346
+ # =========================================================================
347
+
348
+
349
+ @dataclass
350
+ class MessagingTracker:
351
+ """Tracks messaging operations."""
352
+
353
+ system: str
354
+ operation: str
355
+ destination: str
356
+ span: Optional[Span] = field(default=None, repr=False)
357
+ start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
358
+
359
+ message_count: int = 0
360
+ bytes_transferred: int = 0
361
+
362
+ def set_result(
363
+ self,
364
+ message_count: int = 0,
365
+ bytes_transferred: int = 0,
366
+ ) -> MessagingTracker:
367
+ self.message_count = message_count
368
+ self.bytes_transferred = bytes_transferred
369
+ if self.span:
370
+ if message_count > 0:
371
+ self.span.set_attribute("botanu.messaging.message_count", message_count)
372
+ if bytes_transferred > 0:
373
+ self.span.set_attribute("botanu.messaging.bytes_transferred", bytes_transferred)
374
+ return self
375
+
376
+ def set_error(self, error: Exception) -> MessagingTracker:
377
+ if self.span:
378
+ self.span.set_status(Status(StatusCode.ERROR, str(error)))
379
+ self.span.set_attribute("botanu.messaging.error", type(error).__name__)
380
+ self.span.record_exception(error)
381
+ return self
382
+
383
+ def add_metadata(self, **kwargs: Any) -> MessagingTracker:
384
+ if self.span:
385
+ for key, value in kwargs.items():
386
+ attr_key = key if key.startswith("botanu.") else f"botanu.messaging.{key}"
387
+ self.span.set_attribute(attr_key, value)
388
+ return self
389
+
390
+ def _finalize(self) -> None:
391
+ if not self.span:
392
+ return
393
+ duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000
394
+ self.span.set_attribute("botanu.messaging.duration_ms", duration_ms)
395
+
396
+
397
+ @contextmanager
398
+ def track_messaging_operation(
399
+ system: str,
400
+ operation: str,
401
+ destination: str,
402
+ **kwargs: Any,
403
+ ) -> Generator[MessagingTracker, None, None]:
404
+ """Track a messaging operation.
405
+
406
+ Args:
407
+ system: Messaging system (sqs, kafka, pubsub, …).
408
+ operation: Type of operation (publish, consume, …).
409
+ destination: Queue/topic name.
410
+ """
411
+ tracer = trace.get_tracer("botanu.messaging")
412
+ normalized_system = MESSAGING_SYSTEMS.get(system.lower(), system.lower())
413
+ span_kind = SpanKind.PRODUCER if operation in ("publish", "send") else SpanKind.CONSUMER
414
+
415
+ with tracer.start_as_current_span(
416
+ name=f"messaging.{normalized_system}.{operation.lower()}",
417
+ kind=span_kind,
418
+ ) as span:
419
+ span.set_attribute("messaging.system", normalized_system)
420
+ span.set_attribute("messaging.operation", operation.lower())
421
+ span.set_attribute("messaging.destination.name", destination)
422
+ span.set_attribute("botanu.vendor", normalized_system)
423
+ for key, value in kwargs.items():
424
+ span.set_attribute(f"botanu.messaging.{key}", value)
425
+
426
+ tracker = MessagingTracker(
427
+ system=normalized_system,
428
+ operation=operation,
429
+ destination=destination,
430
+ span=span,
431
+ )
432
+ try:
433
+ yield tracker
434
+ except Exception as exc:
435
+ tracker.set_error(exc)
436
+ raise
437
+ finally:
438
+ tracker._finalize()
439
+
440
+
441
+ # =========================================================================
442
+ # Standalone Helpers
443
+ # =========================================================================
444
+
445
+
446
+ def set_data_metrics(
447
+ rows_returned: int = 0,
448
+ rows_affected: int = 0,
449
+ bytes_read: int = 0,
450
+ bytes_written: int = 0,
451
+ objects_count: int = 0,
452
+ span: Optional[Span] = None,
453
+ ) -> None:
454
+ """Set data operation metrics on the current span."""
455
+ target_span = span or trace.get_current_span()
456
+ if not target_span or not target_span.is_recording():
457
+ return
458
+
459
+ if rows_returned > 0:
460
+ target_span.set_attribute("botanu.data.rows_returned", rows_returned)
461
+ if rows_affected > 0:
462
+ target_span.set_attribute("botanu.data.rows_affected", rows_affected)
463
+ if bytes_read > 0:
464
+ target_span.set_attribute("botanu.data.bytes_read", bytes_read)
465
+ if bytes_written > 0:
466
+ target_span.set_attribute("botanu.data.bytes_written", bytes_written)
467
+ if objects_count > 0:
468
+ target_span.set_attribute("botanu.data.objects_count", objects_count)
469
+
470
+
471
+ def set_warehouse_metrics(
472
+ query_id: str,
473
+ bytes_scanned: int,
474
+ rows_returned: int = 0,
475
+ partitions_scanned: int = 0,
476
+ span: Optional[Span] = None,
477
+ ) -> None:
478
+ """Set data warehouse query metrics on the current span."""
479
+ target_span = span or trace.get_current_span()
480
+ if not target_span or not target_span.is_recording():
481
+ return
482
+
483
+ target_span.set_attribute("botanu.warehouse.query_id", query_id)
484
+ target_span.set_attribute("botanu.warehouse.bytes_scanned", bytes_scanned)
485
+ if rows_returned > 0:
486
+ target_span.set_attribute("botanu.data.rows_returned", rows_returned)
487
+ if partitions_scanned > 0:
488
+ target_span.set_attribute("botanu.warehouse.partitions_scanned", partitions_scanned)