dt-extensions-sdk 1.2.7__py3-none-any.whl → 1.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {dt_extensions_sdk-1.2.7.dist-info → dt_extensions_sdk-1.2.10.dist-info}/METADATA +3 -3
  2. dt_extensions_sdk-1.2.10.dist-info/RECORD +34 -0
  3. {dt_extensions_sdk-1.2.7.dist-info → dt_extensions_sdk-1.2.10.dist-info}/licenses/LICENSE.txt +9 -9
  4. dynatrace_extension/__about__.py +5 -5
  5. dynatrace_extension/__init__.py +27 -27
  6. dynatrace_extension/cli/__init__.py +5 -5
  7. dynatrace_extension/cli/create/__init__.py +1 -1
  8. dynatrace_extension/cli/create/create.py +76 -76
  9. dynatrace_extension/cli/create/extension_template/.gitignore.template +160 -160
  10. dynatrace_extension/cli/create/extension_template/README.md.template +33 -33
  11. dynatrace_extension/cli/create/extension_template/activation.json.template +15 -15
  12. dynatrace_extension/cli/create/extension_template/extension/activationSchema.json.template +118 -118
  13. dynatrace_extension/cli/create/extension_template/extension/extension.yaml.template +17 -17
  14. dynatrace_extension/cli/create/extension_template/extension_name/__main__.py.template +40 -40
  15. dynatrace_extension/cli/create/extension_template/setup.py.template +28 -28
  16. dynatrace_extension/cli/main.py +437 -437
  17. dynatrace_extension/cli/schema.py +129 -129
  18. dynatrace_extension/sdk/__init__.py +3 -3
  19. dynatrace_extension/sdk/activation.py +43 -43
  20. dynatrace_extension/sdk/callback.py +145 -145
  21. dynatrace_extension/sdk/communication.py +483 -483
  22. dynatrace_extension/sdk/event.py +19 -19
  23. dynatrace_extension/sdk/extension.py +1093 -1076
  24. dynatrace_extension/sdk/helper.py +191 -191
  25. dynatrace_extension/sdk/metric.py +118 -118
  26. dynatrace_extension/sdk/runtime.py +67 -67
  27. dynatrace_extension/sdk/snapshot.py +198 -198
  28. dynatrace_extension/sdk/vendor/mureq/LICENSE +13 -13
  29. dynatrace_extension/sdk/vendor/mureq/mureq.py +448 -448
  30. dt_extensions_sdk-1.2.7.dist-info/RECORD +0 -34
  31. {dt_extensions_sdk-1.2.7.dist-info → dt_extensions_sdk-1.2.10.dist-info}/WHEEL +0 -0
  32. {dt_extensions_sdk-1.2.7.dist-info → dt_extensions_sdk-1.2.10.dist-info}/entry_points.txt +0 -0
@@ -1,1076 +1,1093 @@
1
- # SPDX-FileCopyrightText: 2023-present Dynatrace LLC
2
- #
3
- # SPDX-License-Identifier: MIT
4
-
5
- import logging
6
- import sched
7
- import signal
8
- import sys
9
- import threading
10
- import time
11
- from argparse import ArgumentParser
12
- from concurrent.futures import ThreadPoolExecutor
13
- from datetime import datetime, timedelta, timezone
14
- from enum import Enum
15
- from itertools import chain
16
- from pathlib import Path
17
- from threading import Lock, RLock, active_count
18
- from typing import Any, Callable, ClassVar, Dict, List, NamedTuple, Optional, Union
19
-
20
- from .activation import ActivationConfig, ActivationType
21
- from .callback import WrappedCallback
22
- from .communication import CommunicationClient, DebugClient, HttpClient, Status, StatusValue
23
- from .event import Severity
24
- from .metric import Metric, MetricType, SfmMetric, SummaryStat
25
- from .runtime import RuntimeProperties
26
- from .snapshot import Snapshot
27
-
28
- HEARTBEAT_INTERVAL = timedelta(seconds=60)
29
- METRIC_SENDING_INTERVAL = timedelta(seconds=30)
30
- SFM_METRIC_SENDING_INTERVAL = timedelta(seconds=60)
31
- TIME_DIFF_INTERVAL = timedelta(seconds=60)
32
-
33
- CALLBACKS_THREAD_POOL_SIZE = 100
34
- INTERNAL_THREAD_POOL_SIZE = 20
35
-
36
- RFC_3339_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
37
- DATASOURCE_TYPE = "python"
38
-
39
- logging.raiseExceptions = False
40
- formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s (%(threadName)s): %(message)s")
41
- error_handler = logging.StreamHandler()
42
- error_handler.addFilter(lambda record: record.levelno >= logging.ERROR)
43
- error_handler.setFormatter(formatter)
44
- std_handler = logging.StreamHandler(sys.stdout)
45
- std_handler.addFilter(lambda record: record.levelno < logging.ERROR)
46
- std_handler.setFormatter(formatter)
47
- extension_logger = logging.getLogger(__name__)
48
- extension_logger.setLevel(logging.INFO)
49
- extension_logger.addHandler(error_handler)
50
- extension_logger.addHandler(std_handler)
51
-
52
- api_logger = logging.getLogger("api")
53
- api_logger.setLevel(logging.INFO)
54
- api_logger.addHandler(error_handler)
55
- api_logger.addHandler(std_handler)
56
-
57
- DT_EVENT_SCHEMA = {
58
- "eventType": str,
59
- "title": str,
60
- "startTime": int,
61
- "endTime": int,
62
- "timeout": int,
63
- "entitySelector": str,
64
- "properties": dict,
65
- }
66
-
67
-
68
- class AggregationMode(Enum):
69
- ALL = "include_all"
70
- NONE = "include_none"
71
- LIST = "include_list"
72
-
73
-
74
- class DtEventType(str, Enum):
75
- """Event type.
76
-
77
- Note:
78
- Official API v2 documentation:
79
-
80
- https://docs.dynatrace.com/docs/dynatrace-api/environment-api/events-v2/post-event
81
- """
82
-
83
- AVAILABILITY_EVENT = "AVAILABILITY_EVENT"
84
- CUSTOM_INFO = "CUSTOM_INFO"
85
- CUSTOM_ALERT = "CUSTOM_ALERT"
86
- CUSTOM_ANNOTATION = "CUSTOM_ANNOTATION"
87
- CUSTOM_CONFIGURATION = "CUSTOM_CONFIGURATION"
88
- CUSTOM_DEPLOYMENT = "CUSTOM_DEPLOYMENT"
89
- ERROR_EVENT = "ERROR_EVENT"
90
- MARKED_FOR_TERMINATION = "MARKED_FOR_TERMINATION"
91
- PERFORMANCE_EVENT = "PERFORMANCE_EVENT"
92
- RESOURCE_CONTENTION_EVENT = "RESOURCE_CONTENTION_EVENT"
93
-
94
-
95
- class CountMetricRegistrationEntry(NamedTuple):
96
- metric_key: str
97
- aggregation_mode: AggregationMode
98
- dimensions_list: list[str]
99
-
100
- @staticmethod
101
- def make_list(metric_key: str, dimensions_list: List[str]):
102
- """Build an entry that uses defined list of dimensions for aggregation.
103
-
104
- Args:
105
- metric_key: Metric key in string.
106
- dimensions_list: List of dimensions.
107
- """
108
- return CountMetricRegistrationEntry(metric_key, AggregationMode.LIST, dimensions_list)
109
-
110
- @staticmethod
111
- def make_all(metric_key: str):
112
- """Build an entry that uses all mint dimensions for aggregation.
113
-
114
- Args:
115
- metric_key: Metric key in string.
116
- """
117
- return CountMetricRegistrationEntry(metric_key, AggregationMode.ALL, [])
118
-
119
- @staticmethod
120
- def make_none(metric_key: str):
121
- """Build an entry that uses none of mint dimensions for aggregation.
122
-
123
- Args:
124
- metric_key: Metric key in string.
125
- """
126
- return CountMetricRegistrationEntry(metric_key, AggregationMode.NONE, [])
127
-
128
- def registration_items_dict(self):
129
- result = {"aggregation_mode": self.aggregation_mode.value}
130
- if self.aggregation_mode == AggregationMode.LIST:
131
- result["dimensions_list"] = self.dimensions_list
132
- return result
133
- else:
134
- return result
135
-
136
-
137
- def _add_sfm_metric(metric: Metric, sfm_metrics: Optional[List[Metric]] = None):
138
- if sfm_metrics is None:
139
- sfm_metrics = []
140
- metric.validate()
141
- sfm_metrics.append(metric)
142
-
143
-
144
- class Extension:
145
- """Base class for Python extensions.
146
-
147
- Attributes:
148
- logger: Embedded logger object for the extension.
149
- """
150
-
151
- _instance: ClassVar = None
152
- schedule_decorators: ClassVar = []
153
-
154
- def __new__(cls, *args, **kwargs): # noqa: ARG003
155
- if Extension._instance is None:
156
- Extension._instance = super(__class__, cls).__new__(cls)
157
- return Extension._instance
158
-
159
- def __init__(self, name: str = "") -> None:
160
- # do not initialize already created singleton
161
- if hasattr(self, "logger"):
162
- return
163
-
164
- self.logger = extension_logger
165
- self.logger.name = name
166
-
167
- self.extension_config: str = ""
168
- self._feature_sets: dict[str, list[str]] = {}
169
-
170
- # Useful metadata, populated once the extension is started
171
- self.extension_name = name
172
- self.extension_version = ""
173
- self.monitoring_config_name = ""
174
- self._task_id = "development_task_id"
175
- self._monitoring_config_id = "development_config_id"
176
-
177
- # The user can override default EEC enrichment for logs
178
- self.log_event_enrichment = True
179
-
180
- # The Communication client
181
- self._client: CommunicationClient = None # type: ignore
182
-
183
- # Set to true when --fastcheck is passed as a parameter
184
- self._is_fastcheck: bool = True
185
-
186
- # If this is true, we are running locally during development
187
- self._running_in_sim: bool = False
188
-
189
- # Response from EEC to /alive/ requests
190
- self._runtime_properties: RuntimeProperties = RuntimeProperties({})
191
-
192
- # The time difference between the local machine and the cluster time, used to sync callbacks with cluster
193
- self._cluster_time_diff: int = 0
194
-
195
- # Optional callback to be invoked during the fastcheck
196
- self._fast_check_callback: Optional[Callable[[ActivationConfig, str], Status]] = None
197
-
198
- # List of all scheduled callbacks we must run
199
- self._scheduled_callbacks: List[WrappedCallback] = []
200
- self._scheduled_callbacks_before_run: List[WrappedCallback] = []
201
-
202
- # Internal callbacks results, used to report statuses
203
- self._internal_callbacks_results: Dict[str, Status] = {}
204
- self._internal_callbacks_results_lock: Lock = Lock()
205
-
206
- # Running callbacks, used to get the callback info when reporting metrics
207
- self._running_callbacks: Dict[int, WrappedCallback] = {}
208
- self._running_callbacks_lock: Lock = Lock()
209
-
210
- self._scheduler = sched.scheduler(time.time, time.sleep)
211
-
212
- # Executors for the callbacks and internal methods
213
- self._callbacks_executor = ThreadPoolExecutor(max_workers=CALLBACKS_THREAD_POOL_SIZE)
214
- self._internal_executor = ThreadPoolExecutor(max_workers=INTERNAL_THREAD_POOL_SIZE)
215
-
216
- # Extension metrics
217
- self._metrics_lock = RLock()
218
- self._metrics: List[str] = []
219
-
220
- # Self monitoring metrics
221
- self._sfm_metrics_lock = Lock()
222
- self._callbackSfmReport: Dict[str, WrappedCallback] = {}
223
-
224
- # Count metric delta signals
225
- self._delta_signal_buffer: set[str] = set()
226
- self._registered_count_metrics: set[str] = set()
227
-
228
- # Self tech rule
229
- self._techrule = ""
230
-
231
- # Error message from caught exception in self.initialize()
232
- self._initialization_error: str = ""
233
-
234
- self._parse_args()
235
-
236
- for function, interval, args, activation_type in Extension.schedule_decorators:
237
- params = (self,)
238
- if args is not None:
239
- params = params + args
240
- self.schedule(function, interval, params, activation_type)
241
-
242
- starting_message = f"Starting {self}"
243
- api_logger.info("-" * len(starting_message))
244
- api_logger.info(starting_message)
245
- api_logger.info("-" * len(starting_message))
246
-
247
- def __repr__(self):
248
- return f"{self.__class__.__name__}(name={self.extension_name}, version={self.extension_version})"
249
-
250
- @property
251
- def is_helper(self) -> bool:
252
- """Internal property used by the EEC."""
253
-
254
- return False
255
-
256
- @property
257
- def task_id(self) -> str:
258
- """Internal property used by the EEC."""
259
-
260
- return self._task_id
261
-
262
- @property
263
- def monitoring_config_id(self) -> str:
264
- """Internal property used by the EEC.
265
-
266
- Represents a unique identifier of the monitoring configuration.
267
- that is assigned to this particular extension instance.
268
- """
269
-
270
- return self._monitoring_config_id
271
-
272
- def run(self):
273
- """Launch the extension instance.
274
-
275
- Calling this method starts the main loop of the extension.
276
-
277
- This method must be invoked once to start the extension,
278
-
279
- if `--fastcheck` is set, the extension will run in fastcheck mode,
280
- otherwise the main loop is started, which periodically runs:
281
-
282
- * The scheduled callbacks
283
- * The heartbeat method
284
- * The metrics publisher method
285
- """
286
-
287
- self._setup_signal_handlers()
288
- if self._is_fastcheck:
289
- return self._run_fastcheck()
290
- self._start_extension_loop()
291
-
292
- def _setup_signal_handlers(self):
293
- if sys.platform == "win32":
294
- signal.signal(signal.SIGBREAK, self._shutdown_signal_handler)
295
- signal.signal(signal.SIGINT, self._shutdown_signal_handler)
296
-
297
- def _shutdown_signal_handler(self, sig, frame): # noqa: ARG002
298
- api_logger.info(f"{signal.Signals(sig).name} captured. Flushing metrics and exiting...")
299
- self.on_shutdown()
300
- self._send_metrics()
301
- self._send_sfm_metrics()
302
- sys.exit(0)
303
-
304
- def on_shutdown(self):
305
- """Callback method to be invoked when the extension is shutting down.
306
-
307
- Called when extension exits after it has received shutdown signal from EEC
308
- This is executed before metrics are flushed to EEC
309
- """
310
- pass
311
-
312
- def _schedule_callback(self, callback: WrappedCallback):
313
- if callback.activation_type is not None and callback.activation_type != self.activation_config.type:
314
- api_logger.info(
315
- f"Skipping {callback} with activation type {callback.activation_type} because it is not {self.activation_config.type}"
316
- )
317
- return
318
-
319
- api_logger.debug(f"Scheduling callback {callback}")
320
-
321
- # These properties are updated after the extension starts
322
- callback.cluster_time_diff = self._cluster_time_diff
323
- callback.running_in_sim = self._running_in_sim
324
- self._scheduled_callbacks.append(callback)
325
- self._scheduler.enter(callback.initial_wait_time(), 1, self._callback_iteration, (callback,))
326
-
327
- def schedule(
328
- self,
329
- callback: Callable,
330
- interval: Union[timedelta, int],
331
- args: Optional[tuple] = None,
332
- activation_type: Optional[ActivationType] = None,
333
- ) -> None:
334
- """Schedule a method to be executed periodically.
335
-
336
- The callback method will be periodically invoked in a separate thread.
337
- The callback method is always immediately scheduled for execution.
338
-
339
- Args:
340
- callback: The callback method to be invoked
341
- interval: The time interval between invocations, can be a timedelta object,
342
- or an int representing the number of seconds
343
- args: Arguments to the callback, if any
344
- activation_type: Optional activation type when this callback should run,
345
- can be 'ActivationType.LOCAL' or 'ActivationType.REMOTE'
346
- """
347
-
348
- if isinstance(interval, int):
349
- interval = timedelta(seconds=interval)
350
-
351
- if interval.total_seconds() < 1:
352
- msg = f"Interval must be at least 1 second, got {interval.total_seconds()} seconds"
353
- raise ValueError(msg)
354
-
355
- callback = WrappedCallback(interval, callback, api_logger, args, activation_type=activation_type)
356
- if self._is_fastcheck:
357
- self._scheduled_callbacks_before_run.append(callback)
358
- else:
359
- self._schedule_callback(callback)
360
-
361
- def query(self):
362
- """Callback to be executed every minute by default.
363
-
364
- Optional method that can be implemented by subclasses.
365
- The query method is always scheduled to run every minute.
366
- """
367
- pass
368
-
369
- def initialize(self):
370
- """Callback to be executed when the extension starts.
371
-
372
- Called once after the extension starts and the processes arguments are parsed.
373
- Sometimes there are tasks the user needs to do that must happen before runtime,
374
- but after the activation config has been received, example: Setting the schedule frequency
375
- based on the user input on the monitoring configuration, this can be done on this method
376
- """
377
- pass
378
-
379
- def fastcheck(self) -> Status:
380
- """Callback executed when extension is launched.
381
-
382
- Called if the extension is run in the `fastcheck` mode. Only invoked for remote
383
- extensions.
384
- This method is not called if fastcheck callback was already registered with
385
- Extension.register_fastcheck().
386
-
387
- Returns:
388
- Status with optional message whether the fastcheck succeed or failed.
389
- """
390
- return Status(StatusValue.OK)
391
-
392
- def register_fastcheck(self, fast_check_callback: Callable[[ActivationConfig, str], Status]):
393
- """Registers fastcheck callback that is executed in the `fastcheck` mode.
394
-
395
- Extension.fastcheck() is not called if fastcheck callback is registered with this method
396
-
397
- Args:
398
- fast_check_callback: callable called with ActivationConfig and
399
- extension_config arguments. Must return the Status with optional message
400
- whether the fastcheck succeed or failed.
401
- """
402
- if self._fast_check_callback:
403
- api_logger.error("More than one function assigned to fastcheck, last registered one was kept.")
404
-
405
- self._fast_check_callback = fast_check_callback
406
-
407
- def _register_count_metrics(self, *count_metric_entries: CountMetricRegistrationEntry) -> None:
408
- """Send a count metric registration request to EEC.
409
-
410
- Args:
411
- count_metric_entries: CountMetricRegistrationEntry objects for each count metric to register
412
- """
413
- json_pattern = {
414
- metric_entry.metric_key: metric_entry.registration_items_dict() for metric_entry in count_metric_entries
415
- }
416
- self._client.register_count_metrics(json_pattern)
417
-
418
- def _send_count_delta_signal(self, metric_keys: set[str], force: bool = True) -> None:
419
- """Send calculate-delta signal to EEC monotonic converter.
420
-
421
- Args:
422
- metric_keys: List with metrics for which we want to calculate deltas
423
- force: If true, it forces the metrics from cache to be pushed into EEC and then delta signal request is
424
- sent. Otherwise, it puts delta signal request in cache and request is sent after nearest (in time) sending
425
- metrics to EEC event
426
- """
427
-
428
- with self._metrics_lock:
429
- if not force:
430
- for key in metric_keys:
431
- self._delta_signal_buffer.add(key)
432
- return
433
-
434
- self._send_metrics()
435
- self._client.send_count_delta_signal(metric_keys)
436
- self._delta_signal_buffer = {
437
- metric_key for metric_key in self._delta_signal_buffer if metric_key not in metric_keys
438
- }
439
-
440
- def report_metric(
441
- self,
442
- key: str,
443
- value: Union[float, str, int, SummaryStat],
444
- dimensions: Optional[Dict[str, str]] = None,
445
- techrule: Optional[str] = None,
446
- timestamp: Optional[datetime] = None,
447
- metric_type: MetricType = MetricType.GAUGE,
448
- ) -> None:
449
- """Report a metric.
450
-
451
- Metric is sent to EEC using an HTTP request and MINT protocol. EEC then
452
- sends the metrics to the tenant.
453
-
454
- By default, it reports a gauge metric.
455
-
456
- Args:
457
- key: The metric key, must follow the MINT specification
458
- value: The metric value, can be a simple value or a SummaryStat
459
- dimensions: A dictionary of dimensions
460
- techrule: The technology rule string set by self.techrule setter.
461
- timestamp: The timestamp of the metric, defaults to the current time
462
- metric_type: The type of the metric, defaults to MetricType.GAUGE
463
- """
464
-
465
- if techrule:
466
- if not dimensions:
467
- dimensions = {}
468
- if "dt.techrule.id" not in dimensions:
469
- dimensions["dt.techrule.id"] = techrule
470
-
471
- if metric_type == MetricType.COUNT and timestamp is None:
472
- # We must report a timestamp for count metrics
473
- timestamp = datetime.now()
474
-
475
- metric = Metric(key=key, value=value, dimensions=dimensions, metric_type=metric_type, timestamp=timestamp)
476
- self._add_metric(metric)
477
-
478
- def report_mint_lines(self, lines: List[str]) -> None:
479
- """Report mint lines using the MINT protocol
480
-
481
- Examples:
482
- Metric lines must comply with the MINT format.
483
-
484
- >>> self.report_mint_lines(["my_metric 1", "my_other_metric 2"])
485
-
486
- Args:
487
- lines: A list of mint lines
488
- """
489
- self._add_mint_lines(lines)
490
-
491
- def report_event(
492
- self,
493
- title: str,
494
- description: str,
495
- properties: Optional[dict] = None,
496
- timestamp: Optional[datetime] = None,
497
- severity: Union[Severity, str] = Severity.INFO,
498
- ) -> None:
499
- """Report an event using log ingest.
500
-
501
- Args:
502
- title: The title of the event
503
- description: The description of the event
504
- properties: A dictionary of extra event properties
505
- timestamp: The timestamp of the event, defaults to the current time
506
- severity: The severity of the event, defaults to Severity.INFO
507
- """
508
- if timestamp is None:
509
- timestamp = datetime.now(tz=timezone.utc)
510
-
511
- if properties is None:
512
- properties = {}
513
-
514
- event = {
515
- "content": f"{title}\n{description}",
516
- "title": title,
517
- "description": description,
518
- "timestamp": timestamp.strftime(RFC_3339_FORMAT),
519
- "severity": severity.value if isinstance(severity, Severity) else severity,
520
- **self._metadata,
521
- **properties,
522
- }
523
- self._send_events(event)
524
-
525
- def report_dt_event(
526
- self,
527
- event_type: DtEventType,
528
- title: str,
529
- start_time: Optional[int] = None,
530
- end_time: Optional[int] = None,
531
- timeout: Optional[int] = None,
532
- entity_selector: Optional[str] = None,
533
- properties: Optional[dict[str, str]] = None,
534
- ) -> None:
535
- """
536
- Reports an event using the v2 event ingest API.
537
-
538
- Unlike ``report_event``, this directly raises an event or even a problem
539
- based on the specified ``event_type``.
540
-
541
- Note:
542
- For reference see: https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event
543
-
544
- Args:
545
- event_type: The event type chosen from type Enum (required)
546
- title: The title of the event (required)
547
- start_time: The start time of event in UTC ms, if not set, current timestamp (optional)
548
- end_time: The end time of event in UTC ms, if not set, current timestamp + timeout (optional)
549
- timeout: The timeout of event in minutes, if not set, 15 (optional)
550
- entity_selector: The entity selector, if not set, the event is associated with environment entity (optional)
551
- properties: A map of event properties (optional)
552
- """
553
- event: Dict[str, Any] = {"eventType": event_type, "title": title}
554
- if start_time:
555
- event["startTime"] = start_time
556
- if end_time:
557
- event["endTime"] = end_time
558
- if timeout:
559
- event["timeout"] = timeout
560
- if entity_selector:
561
- event["entitySelector"] = entity_selector
562
- if properties:
563
- event["properties"] = properties
564
-
565
- self._send_dt_event(event)
566
-
567
- def report_dt_event_dict(self, event: dict):
568
- """Report an event using event ingest API with provided dictionary.
569
-
570
- Note:
571
- For reference see: https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event
572
-
573
- Format of the event dictionary::
574
-
575
- {
576
- "type": "object",
577
- "required": ["eventType", "title"],
578
- "properties": {
579
- "eventType": {
580
- "type": "string",
581
- "enum": [
582
- "CUSTOM_INFO",
583
- "CUSTOM_ANNOTATION",
584
- "CUSTOM_CONFIGURATION",
585
- "CUSTOM_DEPLOYMENT",
586
- "MARKED_FOR_TERMINATION",
587
- "ERROR_EVENT",
588
- "AVAILABILITY_EVENT",
589
- "PERFORMANCE_EVENT",
590
- "RESOURCE_CONTENTION_EVENT",
591
- "CUSTOM_ALERT"
592
- ]
593
- },
594
- "title": {
595
- "type": "string",
596
- "minLength": 1
597
- },
598
- "startTime": {"type": "integer"},
599
- "endTime": {"type": "integer"},
600
- "timeout": {"type": "integer"},
601
- "entitySelector": {"type": "string"},
602
- "properties": {
603
- "type": "object",
604
- "patternProperties": {
605
- "^.*$": {"type": "string"}
606
- }
607
- }
608
- }
609
- }
610
- """
611
-
612
- if "eventType" not in event or "title" not in event:
613
- raise ValueError('"eventType" not present' if "eventType" not in event else '"title" not present in event')
614
- for key, value in event.items():
615
- if DT_EVENT_SCHEMA[key] is None:
616
- msg = f'invalid member: "{key}"'
617
- raise ValueError(msg)
618
- if key == "eventType" and value not in list(DtEventType):
619
- msg = f"Event type must be a DtEventType enum value, got: {value}"
620
- raise ValueError(msg)
621
- if key == "properties":
622
- for prop_key, prop_val in event[key].items():
623
- if not isinstance(prop_key, str) or not isinstance(prop_val, str):
624
- msg = f'invalid "properties" member: {prop_key}: {prop_val}, required: "str": str'
625
- raise ValueError(msg)
626
- self._send_dt_event(event)
627
-
628
- def report_log_event(self, log_event: dict):
629
- """Report a custom log event using log ingest.
630
-
631
- Note:
632
- See reference: https://www.dynatrace.com/support/help/shortlink/log-monitoring-log-data-ingestion
633
-
634
- Args:
635
- log_event: The log event dictionary.
636
- """
637
- self._send_events(log_event)
638
-
639
- def report_log_events(self, log_events: List[dict]):
640
- """Report a list of custom log events using log ingest.
641
-
642
- Args:
643
- log_events: The list of log events
644
- """
645
- self._send_events(log_events)
646
-
647
- def report_log_lines(self, log_lines: List[Union[str, bytes]]):
648
- """Report a list of log lines using log ingest
649
-
650
- Args:
651
- log_lines: The list of log lines
652
- """
653
- events = [{"content": line} for line in log_lines]
654
- self._send_events(events)
655
-
656
- @property
657
- def enabled_feature_sets(self) -> dict[str, list[str]]:
658
- """Map of enabled feautre sets and corresponding metrics.
659
-
660
- Returns:
661
- Dictionary containing enabled feature sets with corresponding
662
- metrics defined in ``extension.yaml``.
663
- """
664
- return {
665
- feature_set_name: metric_keys
666
- for feature_set_name, metric_keys in self._feature_sets.items()
667
- if feature_set_name in self.activation_config.feature_sets or feature_set_name == "default"
668
- }
669
-
670
- @property
671
- def enabled_feature_sets_names(self) -> list[str]:
672
- """Names of enabled feature sets.
673
-
674
- Returns:
675
- List containing names of enabled feature sets.
676
- """
677
- return list(self.enabled_feature_sets.keys())
678
-
679
- @property
680
- def enabled_feature_sets_metrics(self) -> list[str]:
681
- """Enabled metrics.
682
-
683
- Returns:
684
- List of all metric keys from enabled feature sets
685
- """
686
- return list(chain(*self.enabled_feature_sets.values()))
687
-
688
- def _parse_args(self):
689
- parser = ArgumentParser(description="Python extension parameters")
690
-
691
- # Production parameters, these are passed by the EEC
692
- parser.add_argument("--dsid", required=False, default=None)
693
- parser.add_argument("--url", required=False)
694
- parser.add_argument("--idtoken", required=False)
695
- parser.add_argument(
696
- "--loglevel",
697
- help="Set extension log level. Info is default.",
698
- type=str,
699
- choices=["debug", "info"],
700
- default="info",
701
- )
702
- parser.add_argument("--fastcheck", action="store_true", default=False)
703
- parser.add_argument("--monitoring_config_id", required=False, default=None)
704
- parser.add_argument("--local-ingest", action="store_true", default=False)
705
- parser.add_argument("--local-ingest-port", required=False, default=14499)
706
-
707
- # Debug parameters, these are used when running the extension locally
708
- parser.add_argument("--extensionconfig", required=False, default=None)
709
- parser.add_argument("--activationconfig", required=False, default="activation.json")
710
- parser.add_argument("--no-print-metrics", required=False, action="store_true")
711
-
712
- args, unknown = parser.parse_known_args()
713
- self._is_fastcheck = args.fastcheck
714
- if args.dsid is None:
715
- # DEV mode
716
- self._running_in_sim = True
717
- print_metrics = not args.no_print_metrics
718
- self._client = DebugClient(
719
- activation_config_path=args.activationconfig,
720
- extension_config_path=args.extensionconfig,
721
- logger=api_logger,
722
- local_ingest=args.local_ingest,
723
- local_ingest_port=args.local_ingest_port,
724
- print_metrics=print_metrics,
725
- )
726
- RuntimeProperties.set_default_log_level(args.loglevel)
727
- else:
728
- # EEC mode
729
- self._client = HttpClient(args.url, args.dsid, args.idtoken, api_logger)
730
- self._task_id = args.dsid
731
- self._monitoring_config_id = args.monitoring_config_id
732
- api_logger.info(f"DSID = {self.task_id}, monitoring config id = {self._monitoring_config_id}")
733
-
734
- self.activation_config = ActivationConfig(self._client.get_activation_config())
735
- self.extension_config = self._client.get_extension_config()
736
- self._feature_sets = self._client.get_feature_sets()
737
-
738
- self.monitoring_config_name = self.activation_config.description
739
- self.extension_version = self.activation_config.version
740
-
741
- if not self._is_fastcheck:
742
- try:
743
- self.initialize()
744
- if not self.is_helper:
745
- self.schedule(self.query, timedelta(minutes=1))
746
- except Exception as e:
747
- msg = f"Error running self.initialize {self}: {e!r}"
748
- api_logger.exception(msg)
749
- self._client.send_status(Status(StatusValue.GENERIC_ERROR, msg))
750
- self._initialization_error = msg
751
- raise e
752
-
753
- @property
754
- def _metadata(self) -> dict:
755
- return {
756
- "dt.extension.config.id": self._runtime_properties.extconfig,
757
- "dt.extension.ds": DATASOURCE_TYPE,
758
- "dt.extension.version": self.extension_version,
759
- "dt.extension.name": self.extension_name,
760
- "monitoring.configuration": self.monitoring_config_name,
761
- }
762
-
763
- def _run_fastcheck(self):
764
- api_logger.info(f"Running fastcheck for monitoring configuration '{self.monitoring_config_name}'")
765
- try:
766
- if self._fast_check_callback:
767
- status = self._fast_check_callback(self.activation_config, self.extension_config)
768
- api_logger.info(f"Sending fastcheck status: {status}")
769
- self._client.send_status(status)
770
- return
771
-
772
- status = self.fastcheck()
773
- api_logger.info(f"Sending fastcheck status: {status}")
774
- self._client.send_status(status)
775
- except Exception as e:
776
- status = Status(StatusValue.GENERIC_ERROR, f"Python datasource fastcheck error: {e!r}")
777
- api_logger.error(f"Error running fastcheck {self}: {e!r}")
778
- self._client.send_status(status)
779
- raise
780
-
781
- def _run_callback(self, callback: WrappedCallback):
782
- if not callback.running:
783
- # Add the callback to the list of running callbacks
784
- with self._running_callbacks_lock:
785
- current_thread_id = threading.get_ident()
786
- self._running_callbacks[current_thread_id] = callback
787
-
788
- callback()
789
-
790
- with self._sfm_metrics_lock:
791
- self._callbackSfmReport[callback.name()] = callback
792
- # Remove the callback from the list of running callbacks
793
- with self._running_callbacks_lock:
794
- self._running_callbacks.pop(current_thread_id, None)
795
-
796
- def _callback_iteration(self, callback: WrappedCallback):
797
- self._callbacks_executor.submit(self._run_callback, callback)
798
- callback.iterations += 1
799
- next_timestamp = callback.get_next_execution_timestamp()
800
- self._scheduler.enterabs(next_timestamp, 1, self._callback_iteration, (callback,))
801
-
802
- def _start_extension_loop(self):
803
- api_logger.debug(f"Starting main loop for monitoring configuration: '{self.monitoring_config_name}'")
804
-
805
- # These were scheduled before the extension started, schedule them now
806
- for callback in self._scheduled_callbacks_before_run:
807
- self._schedule_callback(callback)
808
- self._heartbeat_iteration()
809
- self._metrics_iteration()
810
- self._sfm_metrics_iteration()
811
- self._timediff_iteration()
812
- self._scheduler.run()
813
-
814
- def _timediff_iteration(self):
815
- self._internal_executor.submit(self._update_cluster_time_diff)
816
- self._scheduler.enter(TIME_DIFF_INTERVAL.total_seconds(), 1, self._timediff_iteration)
817
-
818
- def _heartbeat_iteration(self):
819
- self._internal_executor.submit(self._heartbeat)
820
- self._scheduler.enter(HEARTBEAT_INTERVAL.total_seconds(), 1, self._heartbeat_iteration)
821
-
822
- def _metrics_iteration(self):
823
- self._internal_executor.submit(self._send_metrics)
824
- self._scheduler.enter(METRIC_SENDING_INTERVAL.total_seconds(), 1, self._metrics_iteration)
825
-
826
- def _sfm_metrics_iteration(self):
827
- self._internal_executor.submit(self._send_sfm_metrics)
828
- self._scheduler.enter(SFM_METRIC_SENDING_INTERVAL.total_seconds(), 1, self._sfm_metrics_iteration)
829
-
830
- def _send_metrics(self):
831
- with self._metrics_lock:
832
- with self._internal_callbacks_results_lock:
833
- if self._metrics:
834
- number_of_metrics = len(self._metrics)
835
- responses = self._client.send_metrics(self._metrics)
836
-
837
- self._internal_callbacks_results[self._send_metrics.__name__] = Status(StatusValue.OK)
838
- lines_invalid = sum(response.lines_invalid for response in responses)
839
- if lines_invalid > 0:
840
- message = f"{lines_invalid} invalid metric lines found"
841
- self._internal_callbacks_results[self._send_metrics.__name__] = Status(
842
- StatusValue.GENERIC_ERROR, message
843
- )
844
-
845
- api_logger.info(f"Sent {number_of_metrics} metric lines to EEC: {responses}")
846
- self._metrics = []
847
-
848
- def _prepare_sfm_metrics(self) -> List[str]:
849
- """Prepare self monitoring metrics.
850
-
851
- Builds the list of mint metric lines to send as self monitoring metrics.
852
- """
853
-
854
- sfm_metrics: List[Metric] = []
855
- sfm_dimensions = {"dt.extension.config.id": self.monitoring_config_id}
856
- _add_sfm_metric(
857
- SfmMetric("threads", active_count(), sfm_dimensions, client_facing=True, metric_type=MetricType.DELTA),
858
- sfm_metrics,
859
- )
860
-
861
- for name, callback in self._callbackSfmReport.items():
862
- sfm_dimensions = {"callback": name, "dt.extension.config.id": self.monitoring_config_id}
863
- _add_sfm_metric(
864
- SfmMetric(
865
- "execution.time",
866
- f"{callback.duration_interval_total:.4f}",
867
- sfm_dimensions,
868
- client_facing=True,
869
- metric_type=MetricType.GAUGE,
870
- ),
871
- sfm_metrics,
872
- )
873
- _add_sfm_metric(
874
- SfmMetric(
875
- "execution.total.count",
876
- callback.executions_total,
877
- sfm_dimensions,
878
- client_facing=True,
879
- metric_type=MetricType.DELTA,
880
- ),
881
- sfm_metrics,
882
- )
883
- _add_sfm_metric(
884
- SfmMetric(
885
- "execution.count",
886
- callback.executions_per_interval,
887
- sfm_dimensions,
888
- client_facing=True,
889
- metric_type=MetricType.DELTA,
890
- ),
891
- sfm_metrics,
892
- )
893
- _add_sfm_metric(
894
- SfmMetric(
895
- "execution.ok.count",
896
- callback.ok_count,
897
- sfm_dimensions,
898
- client_facing=True,
899
- metric_type=MetricType.DELTA,
900
- ),
901
- sfm_metrics,
902
- )
903
- _add_sfm_metric(
904
- SfmMetric(
905
- "execution.timeout.count",
906
- callback.timeouts_count,
907
- sfm_dimensions,
908
- client_facing=True,
909
- metric_type=MetricType.DELTA,
910
- ),
911
- sfm_metrics,
912
- )
913
- _add_sfm_metric(
914
- SfmMetric(
915
- "execution.exception.count",
916
- callback.exception_count,
917
- sfm_dimensions,
918
- client_facing=True,
919
- metric_type=MetricType.DELTA,
920
- ),
921
- sfm_metrics,
922
- )
923
- callback.clear_sfm_metrics()
924
- return [metric.to_mint_line() for metric in sfm_metrics]
925
-
926
- def _send_sfm_metrics(self):
927
- with self._sfm_metrics_lock:
928
- lines = self._prepare_sfm_metrics()
929
- # Flushes the cache of metrics, maybe we should only flush if they were successfully sent
930
- self._callbackSfmReport.clear()
931
- response = self._client.send_sfm_metrics(lines)
932
-
933
- with self._internal_callbacks_results_lock:
934
- self._internal_callbacks_results[self._send_sfm_metrics.__name__] = Status(StatusValue.OK)
935
- if response.lines_invalid > 0:
936
- message = f"{response.lines_invalid} invalid metric lines found"
937
- self._internal_callbacks_results[self._send_sfm_metrics.__name__] = Status(
938
- StatusValue.GENERIC_ERROR, message
939
- )
940
-
941
- def _build_current_status(self):
942
- overall_status = Status(StatusValue.OK)
943
-
944
- if self._initialization_error:
945
- overall_status.status = StatusValue.GENERIC_ERROR
946
- overall_status.message = self._initialization_error
947
- return overall_status
948
-
949
- internal_callback_error = False
950
- messages = []
951
- with self._internal_callbacks_results_lock:
952
- for callback, result in self._internal_callbacks_results.items():
953
- if result.is_error():
954
- internal_callback_error = True
955
- overall_status.status = result.status
956
- messages.append(f"{callback}: {result.message}")
957
- if internal_callback_error:
958
- overall_status.message = "\n".join(messages)
959
- return overall_status
960
-
961
- for callback in self._scheduled_callbacks:
962
- if callback.status.is_error():
963
- overall_status.status = callback.status.status
964
- messages.append(f"{callback}: {callback.status.message}")
965
-
966
- overall_status.message = "\n".join(messages)
967
- return overall_status
968
-
969
- def _update_cluster_time_diff(self):
970
- self._cluster_time_diff = self._client.get_cluster_time_diff()
971
- for callback in self._scheduled_callbacks:
972
- callback.cluster_time_diff = self._cluster_time_diff
973
-
974
- def _heartbeat(self):
975
- response = bytes("not set", "utf-8")
976
- try:
977
- overall_status = self._build_current_status()
978
- response = self._client.send_status(overall_status)
979
- self._runtime_properties = RuntimeProperties(response)
980
- except Exception as e:
981
- api_logger.error(f"Heartbeat failed because {e}, response {response}", exc_info=True)
982
-
983
- def __del__(self):
984
- self._callbacks_executor.shutdown()
985
- self._internal_executor.shutdown()
986
-
987
- def _add_metric(self, metric: Metric):
988
- metric.validate()
989
-
990
- with self._running_callbacks_lock:
991
- current_thread_id = threading.get_ident()
992
- current_callback = self._running_callbacks.get(current_thread_id)
993
-
994
- if current_callback is not None and metric.timestamp is None:
995
- # Adjust the metric timestamp according to the callback start time
996
- # If the user manually set a metric timestamp, don't adjust it
997
- metric.timestamp = current_callback.get_adjusted_metric_timestamp()
998
- elif current_callback is None and metric.timestamp is None:
999
- api_logger.debug(
1000
- f"Metric {metric} was added by unknown thread {current_thread_id}, cannot adjust the timestamp"
1001
- )
1002
-
1003
- with self._metrics_lock:
1004
- self._metrics.append(metric.to_mint_line())
1005
-
1006
- def _add_mint_lines(self, lines: List[str]):
1007
- with self._metrics_lock:
1008
- self._metrics.extend(lines)
1009
-
1010
- def _send_events_internal(self, events: Union[dict, List[dict]]):
1011
- try:
1012
- responses = self._client.send_events(events, self.log_event_enrichment)
1013
-
1014
- for response in responses:
1015
- with self._internal_callbacks_results_lock:
1016
- self._internal_callbacks_results[self._send_events.__name__] = Status(StatusValue.OK)
1017
- if not response or "error" not in response or "message" not in response["error"]:
1018
- return
1019
- self._internal_callbacks_results[self._send_events.__name__] = Status(
1020
- StatusValue.GENERIC_ERROR, response["error"]["message"]
1021
- )
1022
- except Exception as e:
1023
- api_logger.error(f"Error sending events: {e!r}", exc_info=True)
1024
- with self._internal_callbacks_results_lock:
1025
- self._internal_callbacks_results[self._send_events.__name__] = Status(StatusValue.GENERIC_ERROR, str(e))
1026
-
1027
- def _send_events(self, events: Union[dict, List[dict]]):
1028
- self._internal_executor.submit(self._send_events_internal, events)
1029
-
1030
- def _send_dt_event(self, event: dict[str, str | int | dict[str, str]]):
1031
- self._client.send_dt_event(event)
1032
-
1033
- def get_version(self) -> str:
1034
- """Return the extension version."""
1035
- return self.activation_config.version
1036
-
1037
- @property
1038
- def techrule(self) -> str:
1039
- """Internal property used by the EEC."""
1040
-
1041
- return self._techrule
1042
-
1043
- @techrule.setter
1044
- def techrule(self, value):
1045
- self._techrule = value
1046
-
1047
- def get_activation_config(self) -> ActivationConfig:
1048
- """Retrieve the activation config.
1049
-
1050
- Represents activation configuration assigned to this particular
1051
- extension instance.
1052
-
1053
- Returns:
1054
- ActivationConfig object.
1055
- """
1056
- return self.activation_config
1057
-
1058
- def get_snapshot(self, snapshot_file: Path | str | None = None) -> Snapshot:
1059
- """Retrieves an oneagent snapshot.
1060
-
1061
- Args:
1062
- snapshot_file: Optional path to the snapshot file, only used when running from dt-sdk run
1063
-
1064
- Returns:
1065
- Snapshot object.
1066
- """
1067
- if self._running_in_sim:
1068
- if snapshot_file is None:
1069
- snapshot_file = Path("snapshot.json")
1070
- if isinstance(snapshot_file, str):
1071
- snapshot_file = Path(snapshot_file)
1072
- if not snapshot_file.exists():
1073
- msg = f"snapshot file '{snapshot_file}' not found"
1074
- raise FileNotFoundError(msg)
1075
-
1076
- return Snapshot.parse_from_file(snapshot_file)
1
+ # SPDX-FileCopyrightText: 2023-present Dynatrace LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ import logging
6
+ import sched
7
+ import signal
8
+ import sys
9
+ import threading
10
+ import time
11
+ from argparse import ArgumentParser
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ from datetime import datetime, timedelta, timezone
14
+ from enum import Enum
15
+ from itertools import chain
16
+ from pathlib import Path
17
+ from threading import Lock, RLock, active_count
18
+ from typing import Any, Callable, ClassVar, Dict, List, NamedTuple, Optional, Union
19
+
20
+ from .activation import ActivationConfig, ActivationType
21
+ from .callback import WrappedCallback
22
+ from .communication import CommunicationClient, DebugClient, HttpClient, Status, StatusValue
23
+ from .event import Severity
24
+ from .metric import Metric, MetricType, SfmMetric, SummaryStat
25
+ from .runtime import RuntimeProperties
26
+ from .snapshot import Snapshot
27
+
28
+ HEARTBEAT_INTERVAL = timedelta(seconds=50)
29
+ METRIC_SENDING_INTERVAL = timedelta(seconds=30)
30
+ SFM_METRIC_SENDING_INTERVAL = timedelta(seconds=60)
31
+ TIME_DIFF_INTERVAL = timedelta(seconds=60)
32
+
33
+ CALLBACKS_THREAD_POOL_SIZE = 100
34
+ INTERNAL_THREAD_POOL_SIZE = 20
35
+
36
+ RFC_3339_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
37
+ DATASOURCE_TYPE = "python"
38
+
39
+ logging.raiseExceptions = False
40
+ formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s (%(threadName)s): %(message)s")
41
+ error_handler = logging.StreamHandler()
42
+ error_handler.addFilter(lambda record: record.levelno >= logging.ERROR)
43
+ error_handler.setFormatter(formatter)
44
+ std_handler = logging.StreamHandler(sys.stdout)
45
+ std_handler.addFilter(lambda record: record.levelno < logging.ERROR)
46
+ std_handler.setFormatter(formatter)
47
+ extension_logger = logging.getLogger(__name__)
48
+ extension_logger.setLevel(logging.INFO)
49
+ extension_logger.addHandler(error_handler)
50
+ extension_logger.addHandler(std_handler)
51
+
52
+ api_logger = logging.getLogger("api")
53
+ api_logger.setLevel(logging.INFO)
54
+ api_logger.addHandler(error_handler)
55
+ api_logger.addHandler(std_handler)
56
+
57
+ DT_EVENT_SCHEMA = {
58
+ "eventType": str,
59
+ "title": str,
60
+ "startTime": int,
61
+ "endTime": int,
62
+ "timeout": int,
63
+ "entitySelector": str,
64
+ "properties": dict,
65
+ }
66
+
67
+
68
+ class AggregationMode(Enum):
69
+ ALL = "include_all"
70
+ NONE = "include_none"
71
+ LIST = "include_list"
72
+
73
+
74
+ class DtEventType(str, Enum):
75
+ """Event type.
76
+
77
+ Note:
78
+ Official API v2 documentation:
79
+
80
+ https://docs.dynatrace.com/docs/dynatrace-api/environment-api/events-v2/post-event
81
+ """
82
+
83
+ AVAILABILITY_EVENT = "AVAILABILITY_EVENT"
84
+ CUSTOM_INFO = "CUSTOM_INFO"
85
+ CUSTOM_ALERT = "CUSTOM_ALERT"
86
+ CUSTOM_ANNOTATION = "CUSTOM_ANNOTATION"
87
+ CUSTOM_CONFIGURATION = "CUSTOM_CONFIGURATION"
88
+ CUSTOM_DEPLOYMENT = "CUSTOM_DEPLOYMENT"
89
+ ERROR_EVENT = "ERROR_EVENT"
90
+ MARKED_FOR_TERMINATION = "MARKED_FOR_TERMINATION"
91
+ PERFORMANCE_EVENT = "PERFORMANCE_EVENT"
92
+ RESOURCE_CONTENTION_EVENT = "RESOURCE_CONTENTION_EVENT"
93
+
94
+
95
+ class CountMetricRegistrationEntry(NamedTuple):
96
+ metric_key: str
97
+ aggregation_mode: AggregationMode
98
+ dimensions_list: list[str]
99
+
100
+ @staticmethod
101
+ def make_list(metric_key: str, dimensions_list: List[str]):
102
+ """Build an entry that uses defined list of dimensions for aggregation.
103
+
104
+ Args:
105
+ metric_key: Metric key in string.
106
+ dimensions_list: List of dimensions.
107
+ """
108
+ return CountMetricRegistrationEntry(metric_key, AggregationMode.LIST, dimensions_list)
109
+
110
+ @staticmethod
111
+ def make_all(metric_key: str):
112
+ """Build an entry that uses all mint dimensions for aggregation.
113
+
114
+ Args:
115
+ metric_key: Metric key in string.
116
+ """
117
+ return CountMetricRegistrationEntry(metric_key, AggregationMode.ALL, [])
118
+
119
+ @staticmethod
120
+ def make_none(metric_key: str):
121
+ """Build an entry that uses none of mint dimensions for aggregation.
122
+
123
+ Args:
124
+ metric_key: Metric key in string.
125
+ """
126
+ return CountMetricRegistrationEntry(metric_key, AggregationMode.NONE, [])
127
+
128
+ def registration_items_dict(self):
129
+ result = {"aggregation_mode": self.aggregation_mode.value}
130
+ if self.aggregation_mode == AggregationMode.LIST:
131
+ result["dimensions_list"] = self.dimensions_list
132
+ return result
133
+ else:
134
+ return result
135
+
136
+
137
+ def _add_sfm_metric(metric: Metric, sfm_metrics: Optional[List[Metric]] = None):
138
+ if sfm_metrics is None:
139
+ sfm_metrics = []
140
+ metric.validate()
141
+ sfm_metrics.append(metric)
142
+
143
+
144
+ class Extension:
145
+ """Base class for Python extensions.
146
+
147
+ Attributes:
148
+ logger: Embedded logger object for the extension.
149
+ """
150
+
151
+ _instance: ClassVar = None
152
+ schedule_decorators: ClassVar = []
153
+
154
+ def __new__(cls, *args, **kwargs): # noqa: ARG003
155
+ if Extension._instance is None:
156
+ Extension._instance = super(__class__, cls).__new__(cls)
157
+ return Extension._instance
158
+
159
+ def __init__(self, name: str = "") -> None:
160
+ # do not initialize already created singleton
161
+ if hasattr(self, "logger"):
162
+ return
163
+
164
+ self.logger = extension_logger
165
+ self.logger.name = name
166
+
167
+ self.extension_config: str = ""
168
+ self._feature_sets: dict[str, list[str]] = {}
169
+
170
+ # Useful metadata, populated once the extension is started
171
+ self.extension_name = name
172
+ self.extension_version = ""
173
+ self.monitoring_config_name = ""
174
+ self._task_id = "development_task_id"
175
+ self._monitoring_config_id = "development_config_id"
176
+
177
+ # The user can override default EEC enrichment for logs
178
+ self.log_event_enrichment = True
179
+
180
+ # The Communication client
181
+ self._client: CommunicationClient = None # type: ignore
182
+
183
+ # Set to true when --fastcheck is passed as a parameter
184
+ self._is_fastcheck: bool = True
185
+
186
+ # If this is true, we are running locally during development
187
+ self._running_in_sim: bool = False
188
+
189
+ # Response from EEC to /alive/ requests
190
+ self._runtime_properties: RuntimeProperties = RuntimeProperties({})
191
+
192
+ # The time difference between the local machine and the cluster time, used to sync callbacks with cluster
193
+ self._cluster_time_diff: int = 0
194
+
195
+ # Optional callback to be invoked during the fastcheck
196
+ self._fast_check_callback: Optional[Callable[[ActivationConfig, str], Status]] = None
197
+
198
+ # List of all scheduled callbacks we must run
199
+ self._scheduled_callbacks: List[WrappedCallback] = []
200
+ self._scheduled_callbacks_before_run: List[WrappedCallback] = []
201
+
202
+ # Internal callbacks results, used to report statuses
203
+ self._internal_callbacks_results: Dict[str, Status] = {}
204
+ self._internal_callbacks_results_lock: Lock = Lock()
205
+
206
+ # Running callbacks, used to get the callback info when reporting metrics
207
+ self._running_callbacks: Dict[int, WrappedCallback] = {}
208
+ self._running_callbacks_lock: Lock = Lock()
209
+
210
+ self._scheduler = sched.scheduler(time.time, time.sleep)
211
+
212
+ # Timestamps for scheduling of internal callbacks
213
+ self._next_internal_callbacks_timestamps: Dict[str, datetime] = {
214
+ "timediff": datetime.now() + TIME_DIFF_INTERVAL,
215
+ "heartbeat": datetime.now() + HEARTBEAT_INTERVAL,
216
+ "metrics": datetime.now() + METRIC_SENDING_INTERVAL,
217
+ "sfm_metrics": datetime.now() + SFM_METRIC_SENDING_INTERVAL,
218
+ }
219
+
220
+ # Executors for the callbacks and internal methods
221
+ self._callbacks_executor = ThreadPoolExecutor(max_workers=CALLBACKS_THREAD_POOL_SIZE)
222
+ self._internal_executor = ThreadPoolExecutor(max_workers=INTERNAL_THREAD_POOL_SIZE)
223
+
224
+ # Extension metrics
225
+ self._metrics_lock = RLock()
226
+ self._metrics: List[str] = []
227
+
228
+ # Self monitoring metrics
229
+ self._sfm_metrics_lock = Lock()
230
+ self._callbackSfmReport: Dict[str, WrappedCallback] = {}
231
+
232
+ # Count metric delta signals
233
+ self._delta_signal_buffer: set[str] = set()
234
+ self._registered_count_metrics: set[str] = set()
235
+
236
+ # Self tech rule
237
+ self._techrule = ""
238
+
239
+ # Error message from caught exception in self.initialize()
240
+ self._initialization_error: str = ""
241
+
242
+ self._parse_args()
243
+
244
+ for function, interval, args, activation_type in Extension.schedule_decorators:
245
+ params = (self,)
246
+ if args is not None:
247
+ params = params + args
248
+ self.schedule(function, interval, params, activation_type)
249
+
250
+ starting_message = f"Starting {self}"
251
+ api_logger.info("-" * len(starting_message))
252
+ api_logger.info(starting_message)
253
+ api_logger.info("-" * len(starting_message))
254
+
255
+ def __repr__(self):
256
+ return f"{self.__class__.__name__}(name={self.extension_name}, version={self.extension_version})"
257
+
258
+ @property
259
+ def is_helper(self) -> bool:
260
+ """Internal property used by the EEC."""
261
+
262
+ return False
263
+
264
+ @property
265
+ def task_id(self) -> str:
266
+ """Internal property used by the EEC."""
267
+
268
+ return self._task_id
269
+
270
+ @property
271
+ def monitoring_config_id(self) -> str:
272
+ """Internal property used by the EEC.
273
+
274
+ Represents a unique identifier of the monitoring configuration.
275
+ that is assigned to this particular extension instance.
276
+ """
277
+
278
+ return self._monitoring_config_id
279
+
280
+ def run(self):
281
+ """Launch the extension instance.
282
+
283
+ Calling this method starts the main loop of the extension.
284
+
285
+ This method must be invoked once to start the extension,
286
+
287
+ if `--fastcheck` is set, the extension will run in fastcheck mode,
288
+ otherwise the main loop is started, which periodically runs:
289
+
290
+ * The scheduled callbacks
291
+ * The heartbeat method
292
+ * The metrics publisher method
293
+ """
294
+
295
+ self._setup_signal_handlers()
296
+ if self._is_fastcheck:
297
+ return self._run_fastcheck()
298
+ self._start_extension_loop()
299
+
300
+ def _setup_signal_handlers(self):
301
+ if sys.platform == "win32":
302
+ signal.signal(signal.SIGBREAK, self._shutdown_signal_handler)
303
+ signal.signal(signal.SIGINT, self._shutdown_signal_handler)
304
+
305
+ def _shutdown_signal_handler(self, sig, frame): # noqa: ARG002
306
+ api_logger.info(f"{signal.Signals(sig).name} captured. Flushing metrics and exiting...")
307
+ self.on_shutdown()
308
+ self._send_metrics()
309
+ self._send_sfm_metrics()
310
+ sys.exit(0)
311
+
312
+ def on_shutdown(self):
313
+ """Callback method to be invoked when the extension is shutting down.
314
+
315
+ Called when extension exits after it has received shutdown signal from EEC
316
+ This is executed before metrics are flushed to EEC
317
+ """
318
+ pass
319
+
320
+ def _schedule_callback(self, callback: WrappedCallback):
321
+ if callback.activation_type is not None and callback.activation_type != self.activation_config.type:
322
+ api_logger.info(
323
+ f"Skipping {callback} with activation type {callback.activation_type} because it is not {self.activation_config.type}"
324
+ )
325
+ return
326
+
327
+ api_logger.debug(f"Scheduling callback {callback}")
328
+
329
+ # These properties are updated after the extension starts
330
+ callback.cluster_time_diff = self._cluster_time_diff
331
+ callback.running_in_sim = self._running_in_sim
332
+ self._scheduled_callbacks.append(callback)
333
+ self._scheduler.enter(callback.initial_wait_time(), 1, self._callback_iteration, (callback,))
334
+
335
+ def schedule(
336
+ self,
337
+ callback: Callable,
338
+ interval: Union[timedelta, int],
339
+ args: Optional[tuple] = None,
340
+ activation_type: Optional[ActivationType] = None,
341
+ ) -> None:
342
+ """Schedule a method to be executed periodically.
343
+
344
+ The callback method will be periodically invoked in a separate thread.
345
+ The callback method is always immediately scheduled for execution.
346
+
347
+ Args:
348
+ callback: The callback method to be invoked
349
+ interval: The time interval between invocations, can be a timedelta object,
350
+ or an int representing the number of seconds
351
+ args: Arguments to the callback, if any
352
+ activation_type: Optional activation type when this callback should run,
353
+ can be 'ActivationType.LOCAL' or 'ActivationType.REMOTE'
354
+ """
355
+
356
+ if isinstance(interval, int):
357
+ interval = timedelta(seconds=interval)
358
+
359
+ if interval.total_seconds() < 1:
360
+ msg = f"Interval must be at least 1 second, got {interval.total_seconds()} seconds"
361
+ raise ValueError(msg)
362
+
363
+ callback = WrappedCallback(interval, callback, api_logger, args, activation_type=activation_type)
364
+ if self._is_fastcheck:
365
+ self._scheduled_callbacks_before_run.append(callback)
366
+ else:
367
+ self._schedule_callback(callback)
368
+
369
+ def query(self):
370
+ """Callback to be executed every minute by default.
371
+
372
+ Optional method that can be implemented by subclasses.
373
+ The query method is always scheduled to run every minute.
374
+ """
375
+ pass
376
+
377
+ def initialize(self):
378
+ """Callback to be executed when the extension starts.
379
+
380
+ Called once after the extension starts and the processes arguments are parsed.
381
+ Sometimes there are tasks the user needs to do that must happen before runtime,
382
+ but after the activation config has been received, example: Setting the schedule frequency
383
+ based on the user input on the monitoring configuration, this can be done on this method
384
+ """
385
+ pass
386
+
387
+ def fastcheck(self) -> Status:
388
+ """Callback executed when extension is launched.
389
+
390
+ Called if the extension is run in the `fastcheck` mode. Only invoked for remote
391
+ extensions.
392
+ This method is not called if fastcheck callback was already registered with
393
+ Extension.register_fastcheck().
394
+
395
+ Returns:
396
+ Status with optional message whether the fastcheck succeed or failed.
397
+ """
398
+ return Status(StatusValue.OK)
399
+
400
+ def register_fastcheck(self, fast_check_callback: Callable[[ActivationConfig, str], Status]):
401
+ """Registers fastcheck callback that is executed in the `fastcheck` mode.
402
+
403
+ Extension.fastcheck() is not called if fastcheck callback is registered with this method
404
+
405
+ Args:
406
+ fast_check_callback: callable called with ActivationConfig and
407
+ extension_config arguments. Must return the Status with optional message
408
+ whether the fastcheck succeed or failed.
409
+ """
410
+ if self._fast_check_callback:
411
+ api_logger.error("More than one function assigned to fastcheck, last registered one was kept.")
412
+
413
+ self._fast_check_callback = fast_check_callback
414
+
415
+ def _register_count_metrics(self, *count_metric_entries: CountMetricRegistrationEntry) -> None:
416
+ """Send a count metric registration request to EEC.
417
+
418
+ Args:
419
+ count_metric_entries: CountMetricRegistrationEntry objects for each count metric to register
420
+ """
421
+ json_pattern = {
422
+ metric_entry.metric_key: metric_entry.registration_items_dict() for metric_entry in count_metric_entries
423
+ }
424
+ self._client.register_count_metrics(json_pattern)
425
+
426
+ def _send_count_delta_signal(self, metric_keys: set[str], force: bool = True) -> None:
427
+ """Send calculate-delta signal to EEC monotonic converter.
428
+
429
+ Args:
430
+ metric_keys: List with metrics for which we want to calculate deltas
431
+ force: If true, it forces the metrics from cache to be pushed into EEC and then delta signal request is
432
+ sent. Otherwise, it puts delta signal request in cache and request is sent after nearest (in time) sending
433
+ metrics to EEC event
434
+ """
435
+
436
+ with self._metrics_lock:
437
+ if not force:
438
+ for key in metric_keys:
439
+ self._delta_signal_buffer.add(key)
440
+ return
441
+
442
+ self._send_metrics()
443
+ self._client.send_count_delta_signal(metric_keys)
444
+ self._delta_signal_buffer = {
445
+ metric_key for metric_key in self._delta_signal_buffer if metric_key not in metric_keys
446
+ }
447
+
448
+ def report_metric(
449
+ self,
450
+ key: str,
451
+ value: Union[float, str, int, SummaryStat],
452
+ dimensions: Optional[Dict[str, str]] = None,
453
+ techrule: Optional[str] = None,
454
+ timestamp: Optional[datetime] = None,
455
+ metric_type: MetricType = MetricType.GAUGE,
456
+ ) -> None:
457
+ """Report a metric.
458
+
459
+ Metric is sent to EEC using an HTTP request and MINT protocol. EEC then
460
+ sends the metrics to the tenant.
461
+
462
+ By default, it reports a gauge metric.
463
+
464
+ Args:
465
+ key: The metric key, must follow the MINT specification
466
+ value: The metric value, can be a simple value or a SummaryStat
467
+ dimensions: A dictionary of dimensions
468
+ techrule: The technology rule string set by self.techrule setter.
469
+ timestamp: The timestamp of the metric, defaults to the current time
470
+ metric_type: The type of the metric, defaults to MetricType.GAUGE
471
+ """
472
+
473
+ if techrule:
474
+ if not dimensions:
475
+ dimensions = {}
476
+ if "dt.techrule.id" not in dimensions:
477
+ dimensions["dt.techrule.id"] = techrule
478
+
479
+ if metric_type == MetricType.COUNT and timestamp is None:
480
+ # We must report a timestamp for count metrics
481
+ timestamp = datetime.now()
482
+
483
+ metric = Metric(key=key, value=value, dimensions=dimensions, metric_type=metric_type, timestamp=timestamp)
484
+ self._add_metric(metric)
485
+
486
+ def report_mint_lines(self, lines: List[str]) -> None:
487
+ """Report mint lines using the MINT protocol
488
+
489
+ Examples:
490
+ Metric lines must comply with the MINT format.
491
+
492
+ >>> self.report_mint_lines(["my_metric 1", "my_other_metric 2"])
493
+
494
+ Args:
495
+ lines: A list of mint lines
496
+ """
497
+ self._add_mint_lines(lines)
498
+
499
+ def report_event(
500
+ self,
501
+ title: str,
502
+ description: str,
503
+ properties: Optional[dict] = None,
504
+ timestamp: Optional[datetime] = None,
505
+ severity: Union[Severity, str] = Severity.INFO,
506
+ ) -> None:
507
+ """Report an event using log ingest.
508
+
509
+ Args:
510
+ title: The title of the event
511
+ description: The description of the event
512
+ properties: A dictionary of extra event properties
513
+ timestamp: The timestamp of the event, defaults to the current time
514
+ severity: The severity of the event, defaults to Severity.INFO
515
+ """
516
+ if timestamp is None:
517
+ timestamp = datetime.now(tz=timezone.utc)
518
+
519
+ if properties is None:
520
+ properties = {}
521
+
522
+ event = {
523
+ "content": f"{title}\n{description}",
524
+ "title": title,
525
+ "description": description,
526
+ "timestamp": timestamp.strftime(RFC_3339_FORMAT),
527
+ "severity": severity.value if isinstance(severity, Severity) else severity,
528
+ **self._metadata,
529
+ **properties,
530
+ }
531
+ self._send_events(event)
532
+
533
+ def report_dt_event(
534
+ self,
535
+ event_type: DtEventType,
536
+ title: str,
537
+ start_time: Optional[int] = None,
538
+ end_time: Optional[int] = None,
539
+ timeout: Optional[int] = None,
540
+ entity_selector: Optional[str] = None,
541
+ properties: Optional[dict[str, str]] = None,
542
+ ) -> None:
543
+ """
544
+ Reports an event using the v2 event ingest API.
545
+
546
+ Unlike ``report_event``, this directly raises an event or even a problem
547
+ based on the specified ``event_type``.
548
+
549
+ Note:
550
+ For reference see: https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event
551
+
552
+ Args:
553
+ event_type: The event type chosen from type Enum (required)
554
+ title: The title of the event (required)
555
+ start_time: The start time of event in UTC ms, if not set, current timestamp (optional)
556
+ end_time: The end time of event in UTC ms, if not set, current timestamp + timeout (optional)
557
+ timeout: The timeout of event in minutes, if not set, 15 (optional)
558
+ entity_selector: The entity selector, if not set, the event is associated with environment entity (optional)
559
+ properties: A map of event properties (optional)
560
+ """
561
+ event: Dict[str, Any] = {"eventType": event_type, "title": title}
562
+ if start_time:
563
+ event["startTime"] = start_time
564
+ if end_time:
565
+ event["endTime"] = end_time
566
+ if timeout:
567
+ event["timeout"] = timeout
568
+ if entity_selector:
569
+ event["entitySelector"] = entity_selector
570
+ if properties:
571
+ event["properties"] = properties
572
+
573
+ self._send_dt_event(event)
574
+
575
+ def report_dt_event_dict(self, event: dict):
576
+ """Report an event using event ingest API with provided dictionary.
577
+
578
+ Note:
579
+ For reference see: https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event
580
+
581
+ Format of the event dictionary::
582
+
583
+ {
584
+ "type": "object",
585
+ "required": ["eventType", "title"],
586
+ "properties": {
587
+ "eventType": {
588
+ "type": "string",
589
+ "enum": [
590
+ "CUSTOM_INFO",
591
+ "CUSTOM_ANNOTATION",
592
+ "CUSTOM_CONFIGURATION",
593
+ "CUSTOM_DEPLOYMENT",
594
+ "MARKED_FOR_TERMINATION",
595
+ "ERROR_EVENT",
596
+ "AVAILABILITY_EVENT",
597
+ "PERFORMANCE_EVENT",
598
+ "RESOURCE_CONTENTION_EVENT",
599
+ "CUSTOM_ALERT"
600
+ ]
601
+ },
602
+ "title": {
603
+ "type": "string",
604
+ "minLength": 1
605
+ },
606
+ "startTime": {"type": "integer"},
607
+ "endTime": {"type": "integer"},
608
+ "timeout": {"type": "integer"},
609
+ "entitySelector": {"type": "string"},
610
+ "properties": {
611
+ "type": "object",
612
+ "patternProperties": {
613
+ "^.*$": {"type": "string"}
614
+ }
615
+ }
616
+ }
617
+ }
618
+ """
619
+
620
+ if "eventType" not in event or "title" not in event:
621
+ raise ValueError('"eventType" not present' if "eventType" not in event else '"title" not present in event')
622
+ for key, value in event.items():
623
+ if DT_EVENT_SCHEMA[key] is None:
624
+ msg = f'invalid member: "{key}"'
625
+ raise ValueError(msg)
626
+ if key == "eventType" and value not in list(DtEventType):
627
+ msg = f"Event type must be a DtEventType enum value, got: {value}"
628
+ raise ValueError(msg)
629
+ if key == "properties":
630
+ for prop_key, prop_val in event[key].items():
631
+ if not isinstance(prop_key, str) or not isinstance(prop_val, str):
632
+ msg = f'invalid "properties" member: {prop_key}: {prop_val}, required: "str": str'
633
+ raise ValueError(msg)
634
+ self._send_dt_event(event)
635
+
636
+ def report_log_event(self, log_event: dict):
637
+ """Report a custom log event using log ingest.
638
+
639
+ Note:
640
+ See reference: https://www.dynatrace.com/support/help/shortlink/log-monitoring-log-data-ingestion
641
+
642
+ Args:
643
+ log_event: The log event dictionary.
644
+ """
645
+ self._send_events(log_event)
646
+
647
+ def report_log_events(self, log_events: List[dict]):
648
+ """Report a list of custom log events using log ingest.
649
+
650
+ Args:
651
+ log_events: The list of log events
652
+ """
653
+ self._send_events(log_events)
654
+
655
+ def report_log_lines(self, log_lines: List[Union[str, bytes]]):
656
+ """Report a list of log lines using log ingest
657
+
658
+ Args:
659
+ log_lines: The list of log lines
660
+ """
661
+ events = [{"content": line} for line in log_lines]
662
+ self._send_events(events)
663
+
664
+ @property
665
+ def enabled_feature_sets(self) -> dict[str, list[str]]:
666
+ """Map of enabled feautre sets and corresponding metrics.
667
+
668
+ Returns:
669
+ Dictionary containing enabled feature sets with corresponding
670
+ metrics defined in ``extension.yaml``.
671
+ """
672
+ return {
673
+ feature_set_name: metric_keys
674
+ for feature_set_name, metric_keys in self._feature_sets.items()
675
+ if feature_set_name in self.activation_config.feature_sets or feature_set_name == "default"
676
+ }
677
+
678
+ @property
679
+ def enabled_feature_sets_names(self) -> list[str]:
680
+ """Names of enabled feature sets.
681
+
682
+ Returns:
683
+ List containing names of enabled feature sets.
684
+ """
685
+ return list(self.enabled_feature_sets.keys())
686
+
687
+ @property
688
+ def enabled_feature_sets_metrics(self) -> list[str]:
689
+ """Enabled metrics.
690
+
691
+ Returns:
692
+ List of all metric keys from enabled feature sets
693
+ """
694
+ return list(chain(*self.enabled_feature_sets.values()))
695
+
696
+ def _parse_args(self):
697
+ parser = ArgumentParser(description="Python extension parameters")
698
+
699
+ # Production parameters, these are passed by the EEC
700
+ parser.add_argument("--dsid", required=False, default=None)
701
+ parser.add_argument("--url", required=False)
702
+ parser.add_argument("--idtoken", required=False)
703
+ parser.add_argument(
704
+ "--loglevel",
705
+ help="Set extension log level. Info is default.",
706
+ type=str,
707
+ choices=["debug", "info"],
708
+ default="info",
709
+ )
710
+ parser.add_argument("--fastcheck", action="store_true", default=False)
711
+ parser.add_argument("--monitoring_config_id", required=False, default=None)
712
+ parser.add_argument("--local-ingest", action="store_true", default=False)
713
+ parser.add_argument("--local-ingest-port", required=False, default=14499)
714
+
715
+ # Debug parameters, these are used when running the extension locally
716
+ parser.add_argument("--extensionconfig", required=False, default=None)
717
+ parser.add_argument("--activationconfig", required=False, default="activation.json")
718
+ parser.add_argument("--no-print-metrics", required=False, action="store_true")
719
+
720
+ args, unknown = parser.parse_known_args()
721
+ self._is_fastcheck = args.fastcheck
722
+ if args.dsid is None:
723
+ # DEV mode
724
+ self._running_in_sim = True
725
+ print_metrics = not args.no_print_metrics
726
+ self._client = DebugClient(
727
+ activation_config_path=args.activationconfig,
728
+ extension_config_path=args.extensionconfig,
729
+ logger=api_logger,
730
+ local_ingest=args.local_ingest,
731
+ local_ingest_port=args.local_ingest_port,
732
+ print_metrics=print_metrics,
733
+ )
734
+ RuntimeProperties.set_default_log_level(args.loglevel)
735
+ else:
736
+ # EEC mode
737
+ self._client = HttpClient(args.url, args.dsid, args.idtoken, api_logger)
738
+ self._task_id = args.dsid
739
+ self._monitoring_config_id = args.monitoring_config_id
740
+ api_logger.info(f"DSID = {self.task_id}, monitoring config id = {self._monitoring_config_id}")
741
+
742
+ self.activation_config = ActivationConfig(self._client.get_activation_config())
743
+ self.extension_config = self._client.get_extension_config()
744
+ self._feature_sets = self._client.get_feature_sets()
745
+
746
+ self.monitoring_config_name = self.activation_config.description
747
+ self.extension_version = self.activation_config.version
748
+
749
+ if not self._is_fastcheck:
750
+ try:
751
+ self.initialize()
752
+ if not self.is_helper:
753
+ self.schedule(self.query, timedelta(minutes=1))
754
+ except Exception as e:
755
+ msg = f"Error running self.initialize {self}: {e!r}"
756
+ api_logger.exception(msg)
757
+ self._client.send_status(Status(StatusValue.GENERIC_ERROR, msg))
758
+ self._initialization_error = msg
759
+ raise e
760
+
761
+ @property
762
+ def _metadata(self) -> dict:
763
+ return {
764
+ "dt.extension.config.id": self._runtime_properties.extconfig,
765
+ "dt.extension.ds": DATASOURCE_TYPE,
766
+ "dt.extension.version": self.extension_version,
767
+ "dt.extension.name": self.extension_name,
768
+ "monitoring.configuration": self.monitoring_config_name,
769
+ }
770
+
771
+ def _run_fastcheck(self):
772
+ api_logger.info(f"Running fastcheck for monitoring configuration '{self.monitoring_config_name}'")
773
+ try:
774
+ if self._fast_check_callback:
775
+ status = self._fast_check_callback(self.activation_config, self.extension_config)
776
+ api_logger.info(f"Sending fastcheck status: {status}")
777
+ self._client.send_status(status)
778
+ return
779
+
780
+ status = self.fastcheck()
781
+ api_logger.info(f"Sending fastcheck status: {status}")
782
+ self._client.send_status(status)
783
+ except Exception as e:
784
+ status = Status(StatusValue.GENERIC_ERROR, f"Python datasource fastcheck error: {e!r}")
785
+ api_logger.error(f"Error running fastcheck {self}: {e!r}")
786
+ self._client.send_status(status)
787
+ raise
788
+
789
+ def _run_callback(self, callback: WrappedCallback):
790
+ if not callback.running:
791
+ # Add the callback to the list of running callbacks
792
+ with self._running_callbacks_lock:
793
+ current_thread_id = threading.get_ident()
794
+ self._running_callbacks[current_thread_id] = callback
795
+
796
+ callback()
797
+
798
+ with self._sfm_metrics_lock:
799
+ self._callbackSfmReport[callback.name()] = callback
800
+ # Remove the callback from the list of running callbacks
801
+ with self._running_callbacks_lock:
802
+ self._running_callbacks.pop(current_thread_id, None)
803
+
804
+ def _callback_iteration(self, callback: WrappedCallback):
805
+ self._callbacks_executor.submit(self._run_callback, callback)
806
+ callback.iterations += 1
807
+ next_timestamp = callback.get_next_execution_timestamp()
808
+ self._scheduler.enterabs(next_timestamp, 1, self._callback_iteration, (callback,))
809
+
810
+ def _start_extension_loop(self):
811
+ api_logger.debug(f"Starting main loop for monitoring configuration: '{self.monitoring_config_name}'")
812
+
813
+ # These were scheduled before the extension started, schedule them now
814
+ for callback in self._scheduled_callbacks_before_run:
815
+ self._schedule_callback(callback)
816
+ self._heartbeat_iteration()
817
+ self._metrics_iteration()
818
+ self._sfm_metrics_iteration()
819
+ self._timediff_iteration()
820
+ self._scheduler.run()
821
+
822
+ def _timediff_iteration(self):
823
+ self._internal_executor.submit(self._update_cluster_time_diff)
824
+ next_timestamp = self._get_and_set_next_internal_callback_timestamp("timediff", TIME_DIFF_INTERVAL)
825
+ self._scheduler.enterabs(next_timestamp, 1, self._timediff_iteration)
826
+
827
+ def _heartbeat_iteration(self):
828
+ self._internal_executor.submit(self._heartbeat)
829
+ next_timestamp = self._get_and_set_next_internal_callback_timestamp("heartbeat", HEARTBEAT_INTERVAL)
830
+ self._scheduler.enterabs(next_timestamp, 2, self._heartbeat_iteration)
831
+
832
+ def _metrics_iteration(self):
833
+ self._internal_executor.submit(self._send_metrics)
834
+ next_timestamp = self._get_and_set_next_internal_callback_timestamp("metrics", METRIC_SENDING_INTERVAL)
835
+ self._scheduler.enterabs(next_timestamp, 1, self._metrics_iteration)
836
+
837
+ def _sfm_metrics_iteration(self):
838
+ self._internal_executor.submit(self._send_sfm_metrics)
839
+ next_timestamp = self._get_and_set_next_internal_callback_timestamp("sfm_metrics", SFM_METRIC_SENDING_INTERVAL)
840
+ self._scheduler.enterabs(next_timestamp, 1, self._sfm_metrics_iteration)
841
+
842
+ def _send_metrics(self):
843
+ with self._metrics_lock:
844
+ with self._internal_callbacks_results_lock:
845
+ if self._metrics:
846
+ number_of_metrics = len(self._metrics)
847
+ responses = self._client.send_metrics(self._metrics)
848
+
849
+ self._internal_callbacks_results[self._send_metrics.__name__] = Status(StatusValue.OK)
850
+ lines_invalid = sum(response.lines_invalid for response in responses)
851
+ if lines_invalid > 0:
852
+ message = f"{lines_invalid} invalid metric lines found"
853
+ self._internal_callbacks_results[self._send_metrics.__name__] = Status(
854
+ StatusValue.GENERIC_ERROR, message
855
+ )
856
+
857
+ api_logger.info(f"Sent {number_of_metrics} metric lines to EEC: {responses}")
858
+ self._metrics = []
859
+
860
+ def _prepare_sfm_metrics(self) -> List[str]:
861
+ """Prepare self monitoring metrics.
862
+
863
+ Builds the list of mint metric lines to send as self monitoring metrics.
864
+ """
865
+
866
+ sfm_metrics: List[Metric] = []
867
+ sfm_dimensions = {"dt.extension.config.id": self.monitoring_config_id}
868
+ _add_sfm_metric(
869
+ SfmMetric("threads", active_count(), sfm_dimensions, client_facing=True, metric_type=MetricType.DELTA),
870
+ sfm_metrics,
871
+ )
872
+
873
+ for name, callback in self._callbackSfmReport.items():
874
+ sfm_dimensions = {"callback": name, "dt.extension.config.id": self.monitoring_config_id}
875
+ _add_sfm_metric(
876
+ SfmMetric(
877
+ "execution.time",
878
+ f"{callback.duration_interval_total:.4f}",
879
+ sfm_dimensions,
880
+ client_facing=True,
881
+ metric_type=MetricType.GAUGE,
882
+ ),
883
+ sfm_metrics,
884
+ )
885
+ _add_sfm_metric(
886
+ SfmMetric(
887
+ "execution.total.count",
888
+ callback.executions_total,
889
+ sfm_dimensions,
890
+ client_facing=True,
891
+ metric_type=MetricType.DELTA,
892
+ ),
893
+ sfm_metrics,
894
+ )
895
+ _add_sfm_metric(
896
+ SfmMetric(
897
+ "execution.count",
898
+ callback.executions_per_interval,
899
+ sfm_dimensions,
900
+ client_facing=True,
901
+ metric_type=MetricType.DELTA,
902
+ ),
903
+ sfm_metrics,
904
+ )
905
+ _add_sfm_metric(
906
+ SfmMetric(
907
+ "execution.ok.count",
908
+ callback.ok_count,
909
+ sfm_dimensions,
910
+ client_facing=True,
911
+ metric_type=MetricType.DELTA,
912
+ ),
913
+ sfm_metrics,
914
+ )
915
+ _add_sfm_metric(
916
+ SfmMetric(
917
+ "execution.timeout.count",
918
+ callback.timeouts_count,
919
+ sfm_dimensions,
920
+ client_facing=True,
921
+ metric_type=MetricType.DELTA,
922
+ ),
923
+ sfm_metrics,
924
+ )
925
+ _add_sfm_metric(
926
+ SfmMetric(
927
+ "execution.exception.count",
928
+ callback.exception_count,
929
+ sfm_dimensions,
930
+ client_facing=True,
931
+ metric_type=MetricType.DELTA,
932
+ ),
933
+ sfm_metrics,
934
+ )
935
+ callback.clear_sfm_metrics()
936
+ return [metric.to_mint_line() for metric in sfm_metrics]
937
+
938
+ def _send_sfm_metrics(self):
939
+ with self._sfm_metrics_lock:
940
+ lines = self._prepare_sfm_metrics()
941
+ # Flushes the cache of metrics, maybe we should only flush if they were successfully sent
942
+ self._callbackSfmReport.clear()
943
+ response = self._client.send_sfm_metrics(lines)
944
+
945
+ with self._internal_callbacks_results_lock:
946
+ self._internal_callbacks_results[self._send_sfm_metrics.__name__] = Status(StatusValue.OK)
947
+ if response.lines_invalid > 0:
948
+ message = f"{response.lines_invalid} invalid metric lines found"
949
+ self._internal_callbacks_results[self._send_sfm_metrics.__name__] = Status(
950
+ StatusValue.GENERIC_ERROR, message
951
+ )
952
+
953
+ def _build_current_status(self):
954
+ overall_status = Status(StatusValue.OK)
955
+
956
+ if self._initialization_error:
957
+ overall_status.status = StatusValue.GENERIC_ERROR
958
+ overall_status.message = self._initialization_error
959
+ return overall_status
960
+
961
+ internal_callback_error = False
962
+ messages = []
963
+ with self._internal_callbacks_results_lock:
964
+ for callback, result in self._internal_callbacks_results.items():
965
+ if result.is_error():
966
+ internal_callback_error = True
967
+ overall_status.status = result.status
968
+ messages.append(f"{callback}: {result.message}")
969
+ if internal_callback_error:
970
+ overall_status.message = "\n".join(messages)
971
+ return overall_status
972
+
973
+ for callback in self._scheduled_callbacks:
974
+ if callback.status.is_error():
975
+ overall_status.status = callback.status.status
976
+ messages.append(f"{callback}: {callback.status.message}")
977
+
978
+ overall_status.message = "\n".join(messages)
979
+ return overall_status
980
+
981
+ def _update_cluster_time_diff(self):
982
+ self._cluster_time_diff = self._client.get_cluster_time_diff()
983
+ for callback in self._scheduled_callbacks:
984
+ callback.cluster_time_diff = self._cluster_time_diff
985
+
986
+ def _heartbeat(self):
987
+ response = bytes("not set", "utf-8")
988
+ try:
989
+ overall_status = self._build_current_status()
990
+ response = self._client.send_status(overall_status)
991
+ self._runtime_properties = RuntimeProperties(response)
992
+ except Exception as e:
993
+ api_logger.error(f"Heartbeat failed because {e}, response {response}", exc_info=True)
994
+
995
+ def __del__(self):
996
+ self._callbacks_executor.shutdown()
997
+ self._internal_executor.shutdown()
998
+
999
+ def _add_metric(self, metric: Metric):
1000
+ metric.validate()
1001
+
1002
+ with self._running_callbacks_lock:
1003
+ current_thread_id = threading.get_ident()
1004
+ current_callback = self._running_callbacks.get(current_thread_id)
1005
+
1006
+ if current_callback is not None and metric.timestamp is None:
1007
+ # Adjust the metric timestamp according to the callback start time
1008
+ # If the user manually set a metric timestamp, don't adjust it
1009
+ metric.timestamp = current_callback.get_adjusted_metric_timestamp()
1010
+ elif current_callback is None and metric.timestamp is None:
1011
+ api_logger.debug(
1012
+ f"Metric {metric} was added by unknown thread {current_thread_id}, cannot adjust the timestamp"
1013
+ )
1014
+
1015
+ with self._metrics_lock:
1016
+ self._metrics.append(metric.to_mint_line())
1017
+
1018
+ def _add_mint_lines(self, lines: List[str]):
1019
+ with self._metrics_lock:
1020
+ self._metrics.extend(lines)
1021
+
1022
+ def _send_events_internal(self, events: Union[dict, List[dict]]):
1023
+ try:
1024
+ responses = self._client.send_events(events, self.log_event_enrichment)
1025
+
1026
+ for response in responses:
1027
+ with self._internal_callbacks_results_lock:
1028
+ self._internal_callbacks_results[self._send_events.__name__] = Status(StatusValue.OK)
1029
+ if not response or "error" not in response or "message" not in response["error"]:
1030
+ return
1031
+ self._internal_callbacks_results[self._send_events.__name__] = Status(
1032
+ StatusValue.GENERIC_ERROR, response["error"]["message"]
1033
+ )
1034
+ except Exception as e:
1035
+ api_logger.error(f"Error sending events: {e!r}", exc_info=True)
1036
+ with self._internal_callbacks_results_lock:
1037
+ self._internal_callbacks_results[self._send_events.__name__] = Status(StatusValue.GENERIC_ERROR, str(e))
1038
+
1039
+ def _send_events(self, events: Union[dict, List[dict]]):
1040
+ self._internal_executor.submit(self._send_events_internal, events)
1041
+
1042
+ def _send_dt_event(self, event: dict[str, str | int | dict[str, str]]):
1043
+ self._client.send_dt_event(event)
1044
+
1045
+ def _get_and_set_next_internal_callback_timestamp(self, callback_name: str, interval: timedelta):
1046
+ next_timestamp = self._next_internal_callbacks_timestamps[callback_name]
1047
+ self._next_internal_callbacks_timestamps[callback_name] += interval
1048
+ return next_timestamp.timestamp()
1049
+
1050
+ def get_version(self) -> str:
1051
+ """Return the extension version."""
1052
+ return self.activation_config.version
1053
+
1054
+ @property
1055
+ def techrule(self) -> str:
1056
+ """Internal property used by the EEC."""
1057
+
1058
+ return self._techrule
1059
+
1060
+ @techrule.setter
1061
+ def techrule(self, value):
1062
+ self._techrule = value
1063
+
1064
+ def get_activation_config(self) -> ActivationConfig:
1065
+ """Retrieve the activation config.
1066
+
1067
+ Represents activation configuration assigned to this particular
1068
+ extension instance.
1069
+
1070
+ Returns:
1071
+ ActivationConfig object.
1072
+ """
1073
+ return self.activation_config
1074
+
1075
+ def get_snapshot(self, snapshot_file: Path | str | None = None) -> Snapshot:
1076
+ """Retrieves an oneagent snapshot.
1077
+
1078
+ Args:
1079
+ snapshot_file: Optional path to the snapshot file, only used when running from dt-sdk run
1080
+
1081
+ Returns:
1082
+ Snapshot object.
1083
+ """
1084
+ if self._running_in_sim:
1085
+ if snapshot_file is None:
1086
+ snapshot_file = Path("snapshot.json")
1087
+ if isinstance(snapshot_file, str):
1088
+ snapshot_file = Path(snapshot_file)
1089
+ if not snapshot_file.exists():
1090
+ msg = f"snapshot file '{snapshot_file}' not found"
1091
+ raise FileNotFoundError(msg)
1092
+
1093
+ return Snapshot.parse_from_file(snapshot_file)