cognite-extractor-utils 7.8.1__tar.gz → 7.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (55) hide show
  1. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/PKG-INFO +1 -1
  2. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/models.py +344 -0
  4. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/base.py +20 -3
  5. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/runtime.py +8 -2
  6. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/time_series.py +2 -2
  7. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/pyproject.toml +1 -1
  8. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/.gitignore +0 -0
  9. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/LICENSE +0 -0
  10. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/README.md +0 -0
  11. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/config/config.yaml +0 -0
  12. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/config/connection_config.yaml +0 -0
  13. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/main.py +0 -0
  14. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/_inner_util.py +0 -0
  15. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/base.py +0 -0
  16. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/__init__.py +0 -0
  17. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/_util.py +0 -0
  18. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/elements.py +0 -0
  19. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/loaders.py +0 -0
  20. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/validators.py +0 -0
  21. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/exceptions.py +0 -0
  22. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/metrics.py +0 -0
  23. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/py.typed +0 -0
  24. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/__init__.py +0 -0
  25. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/_base.py +0 -0
  26. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/hashing.py +0 -0
  27. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/watermark.py +0 -0
  28. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/threading.py +0 -0
  29. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/__init__.py +0 -0
  30. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
  31. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/exceptions.py +0 -0
  32. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/loaders.py +0 -0
  33. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/__init__.py +0 -0
  34. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/_dto.py +0 -0
  35. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/_messaging.py +0 -0
  36. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/checkin_worker.py +0 -0
  37. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/errors.py +0 -0
  38. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/logger.py +0 -0
  39. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/restart_policy.py +0 -0
  40. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/tasks.py +0 -0
  41. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/__init__.py +0 -0
  42. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/_scheduler.py +0 -0
  43. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/_schedules.py +0 -0
  44. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/__init__.py +0 -0
  45. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/_base.py +0 -0
  46. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
  47. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/assets.py +0 -0
  48. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  49. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/events.py +0 -0
  50. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/files.py +0 -0
  51. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/raw.py +0 -0
  52. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/upload_failure_handler.py +0 -0
  53. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader_extractor.py +0 -0
  54. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader_types.py +0 -0
  55. {cognite_extractor_utils-7.8.1 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognite-extractor-utils
3
- Version: 7.8.1
3
+ Version: 7.9.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Project-URL: repository, https://github.com/cognitedata/python-extractor-utils
6
6
  Author-email: Mathias Lohne <mathias.lohne@cognite.com>
@@ -16,7 +16,7 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.8.1"
19
+ __version__ = "7.9.0"
20
20
  from .base import Extractor
21
21
 
22
22
  __all__ = ["Extractor"]
@@ -8,9 +8,11 @@ from collections.abc import Iterator
8
8
  from datetime import timedelta
9
9
  from enum import Enum
10
10
  from pathlib import Path
11
+ from time import sleep
11
12
  from typing import Annotated, Any, Literal, TypeVar
12
13
 
13
14
  from humps import kebabize
15
+ from prometheus_client import REGISTRY, start_http_server
14
16
  from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
15
17
  from pydantic_core import CoreSchema, core_schema
16
18
  from typing_extensions import assert_never
@@ -22,8 +24,10 @@ from cognite.client.credentials import (
22
24
  OAuthClientCertificate,
23
25
  OAuthClientCredentials,
24
26
  )
27
+ from cognite.client.data_classes import Asset
25
28
  from cognite.extractorutils.configtools._util import _load_certificate_data
26
29
  from cognite.extractorutils.exceptions import InvalidConfigError
30
+ from cognite.extractorutils.metrics import AbstractMetricsPusher, CognitePusher, PrometheusPusher
27
31
  from cognite.extractorutils.statestore import (
28
32
  AbstractStateStore,
29
33
  LocalStateStore,
@@ -31,6 +35,7 @@ from cognite.extractorutils.statestore import (
31
35
  RawStateStore,
32
36
  )
33
37
  from cognite.extractorutils.threading import CancellationToken
38
+ from cognite.extractorutils.util import EitherId
34
39
 
35
40
  __all__ = [
36
41
  "AuthenticationConfig",
@@ -43,6 +48,7 @@ __all__ = [
43
48
  "LogFileHandlerConfig",
44
49
  "LogHandlerConfig",
45
50
  "LogLevel",
51
+ "MetricsConfig",
46
52
  "ScheduleConfig",
47
53
  "TimeIntervalConfig",
48
54
  ]
@@ -405,6 +411,15 @@ class LogLevel(Enum):
405
411
  INFO = "INFO"
406
412
  DEBUG = "DEBUG"
407
413
 
414
+ @classmethod
415
+ def _missing_(cls, value: object) -> "LogLevel":
416
+ if not isinstance(value, str):
417
+ raise ValueError(f"{value} is not a valid log level")
418
+ for member in cls:
419
+ if member.value == value.upper():
420
+ return member
421
+ raise ValueError(f"{value} is not a valid log level")
422
+
408
423
 
409
424
  class LogFileHandlerConfig(ConfigModel):
410
425
  """
@@ -429,11 +444,339 @@ class LogConsoleHandlerConfig(ConfigModel):
429
444
  LogHandlerConfig = Annotated[LogFileHandlerConfig | LogConsoleHandlerConfig, Field(discriminator="type")]
430
445
 
431
446
 
447
+ class EitherIdConfig(ConfigModel):
448
+ """
449
+ Configuration parameter representing an ID in CDF, which can either be an external or internal ID.
450
+
451
+ An EitherId can only hold one ID type, not both.
452
+ """
453
+
454
+ id: int | None = None
455
+ external_id: str | None = None
456
+
457
+ @property
458
+ def either_id(self) -> EitherId:
459
+ """
460
+ Returns an EitherId object based on the current configuration.
461
+
462
+ Raises:
463
+ TypeError: If both id and external_id are None, or if both are set.
464
+ """
465
+ return EitherId(id=self.id, external_id=self.external_id)
466
+
467
+
468
+ class _PushGatewayConfig(ConfigModel):
469
+ """
470
+ Configuration for pushing metrics to a Prometheus Push Gateway.
471
+ """
472
+
473
+ host: str
474
+ job_name: str
475
+ username: str | None = None
476
+ password: str | None = None
477
+
478
+ clear_after: TimeIntervalConfig | None = None
479
+ push_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
480
+
481
+
482
+ class _PromServerConfig(ConfigModel):
483
+ """
484
+ Configuration for pushing metrics to a Prometheus server.
485
+ """
486
+
487
+ port: int = 9000
488
+ host: str = "0.0.0.0"
489
+
490
+
491
+ class _CogniteMetricsConfig(ConfigModel):
492
+ """
493
+ Configuration for pushing metrics to Cognite Data Fusion.
494
+ """
495
+
496
+ external_id_prefix: str
497
+ asset_name: str | None = None
498
+ asset_external_id: str | None = None
499
+ data_set: EitherIdConfig | None = None
500
+
501
+ push_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
502
+
503
+
504
+ class MetricsPushManager:
505
+ """
506
+ Manages the pushing of metrics to various backends.
507
+
508
+ Starts and stops pushers based on a given configuration.
509
+
510
+ Args:
511
+ metrics_config: Configuration for the metrics to be pushed.
512
+ cdf_client: The CDF tenant to upload time series to
513
+ cancellation_token: Event object to be used as a thread cancelation event
514
+ """
515
+
516
+ def __init__(
517
+ self,
518
+ metrics_config: "MetricsConfig",
519
+ cdf_client: CogniteClient,
520
+ cancellation_token: CancellationToken | None = None,
521
+ ) -> None:
522
+ """
523
+ Initialize the MetricsPushManager.
524
+ """
525
+ self.metrics_config = metrics_config
526
+ self.cdf_client = cdf_client
527
+ self.cancellation_token = cancellation_token
528
+ self.pushers: list[AbstractMetricsPusher] = []
529
+ self.clear_on_stop: dict[AbstractMetricsPusher, int] = {}
530
+
531
+ def start(self) -> None:
532
+ """
533
+ Start all metric pushers.
534
+ """
535
+ push_gateways = self.metrics_config.push_gateways or []
536
+ for counter, push_gateway in enumerate(push_gateways):
537
+ prometheus_pusher = PrometheusPusher(
538
+ job_name=push_gateway.job_name,
539
+ username=push_gateway.username,
540
+ password=push_gateway.password,
541
+ url=push_gateway.host,
542
+ push_interval=push_gateway.push_interval.seconds,
543
+ thread_name=f"MetricsPusher_{counter}",
544
+ cancellation_token=self.cancellation_token,
545
+ )
546
+ prometheus_pusher.start()
547
+ self.pushers.append(prometheus_pusher)
548
+ if push_gateway.clear_after is not None:
549
+ self.clear_on_stop[prometheus_pusher] = push_gateway.clear_after.seconds
550
+
551
+ if self.metrics_config.cognite:
552
+ asset = None
553
+ if self.metrics_config.cognite.asset_name and self.metrics_config.cognite.asset_external_id:
554
+ asset = Asset(
555
+ name=self.metrics_config.cognite.asset_name,
556
+ external_id=self.metrics_config.cognite.asset_external_id,
557
+ )
558
+ cognite_pusher = CognitePusher(
559
+ cdf_client=self.cdf_client,
560
+ external_id_prefix=self.metrics_config.cognite.external_id_prefix,
561
+ push_interval=self.metrics_config.cognite.push_interval.seconds,
562
+ asset=asset,
563
+ data_set=self.metrics_config.cognite.data_set.either_id
564
+ if self.metrics_config.cognite.data_set
565
+ else None,
566
+ thread_name="CogniteMetricsPusher",
567
+ cancellation_token=self.cancellation_token,
568
+ )
569
+ cognite_pusher.start()
570
+ self.pushers.append(cognite_pusher)
571
+
572
+ if self.metrics_config.server:
573
+ start_http_server(self.metrics_config.server.port, self.metrics_config.server.host, registry=REGISTRY)
574
+
575
+ def stop(self) -> None:
576
+ """
577
+ Stop all metric pushers.
578
+ """
579
+ for pusher in self.pushers:
580
+ pusher.stop()
581
+
582
+ # Clear Prometheus pushers gateways if required
583
+ if self.clear_on_stop:
584
+ wait_time = max(self.clear_on_stop.values())
585
+ sleep(wait_time)
586
+ for pusher in (p for p in self.clear_on_stop if isinstance(p, PrometheusPusher)):
587
+ pusher.clear_gateway()
588
+
589
+
590
+ class MetricsConfig(ConfigModel):
591
+ """
592
+ Destination(s) for metrics.
593
+
594
+ Including options for one or several Prometheus push gateways, and pushing as CDF Time Series.
595
+ """
596
+
597
+ push_gateways: list[_PushGatewayConfig] | None = None
598
+ cognite: _CogniteMetricsConfig | None = None
599
+ server: _PromServerConfig | None = None
600
+
601
+ def create_manager(
602
+ self, cdf_client: CogniteClient, cancellation_token: CancellationToken | None = None
603
+ ) -> MetricsPushManager:
604
+ """
605
+ Create a MetricsPushManager based on the current configuration.
606
+
607
+ Args:
608
+ cdf_client: An instance of CogniteClient to interact with CDF.
609
+ cancellation_token: Optional token to signal cancellation of metric pushing.
610
+
611
+ Returns:
612
+ MetricsPushManager: An instance of MetricsPushManager configured with the provided parameters.
613
+ """
614
+ return MetricsPushManager(self, cdf_client, cancellation_token)
615
+
616
+
432
617
  # Mypy BS
433
618
  def _log_handler_default() -> list[LogHandlerConfig]:
434
619
  return [LogConsoleHandlerConfig(type="console", level=LogLevel.INFO)]
435
620
 
436
621
 
622
+ class FileSizeConfig:
623
+ """
624
+ Configuration parameter for setting a file size.
625
+ """
626
+
627
+ def __init__(self, expression: str) -> None:
628
+ self._bytes, self._expression = FileSizeConfig._parse_expression(expression)
629
+
630
+ @classmethod
631
+ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema: # noqa: ANN401
632
+ """
633
+ Pydantic hook to define how this class should be serialized/deserialized.
634
+
635
+ This allows the class to be used as a field in Pydantic models.
636
+ """
637
+ return core_schema.no_info_after_validator_function(cls, handler(str | int))
638
+
639
+ def __eq__(self, other: object) -> bool:
640
+ """
641
+ Two FileSizeConfig objects are equal if they have the same number of bytes.
642
+ """
643
+ if not isinstance(other, FileSizeConfig):
644
+ return NotImplemented
645
+ return self._bytes == other._bytes
646
+
647
+ def __hash__(self) -> int:
648
+ """
649
+ Hash function for FileSizeConfig based on the number of bytes.
650
+ """
651
+ return hash(self._bytes)
652
+
653
+ @classmethod
654
+ def _parse_expression(cls, expression: str) -> tuple[int, str]:
655
+ sizes = {
656
+ "kb": 1000,
657
+ "mb": 1_000_000,
658
+ "gb": 1_000_000_000,
659
+ "tb": 1_000_000_000_000,
660
+ "kib": 1024,
661
+ "mib": 1_048_576,
662
+ "gib": 1_073_741_824,
663
+ "tib": 1_099_511_627_776,
664
+ }
665
+
666
+ expression_normalized = expression.strip().lower()
667
+ try:
668
+ num_value = float(expression_normalized)
669
+ return int(num_value), expression
670
+ except ValueError:
671
+ pass
672
+
673
+ match = re.match(r"^([0-9]*\.?[0-9]+)\s*([a-zA-Z]*)$", expression_normalized)
674
+ if not match:
675
+ raise InvalidConfigError(f"Invalid file size format: '{expression}'. Must start with a number.")
676
+
677
+ num_str, unit_str = match.groups()
678
+ try:
679
+ num_value = float(num_str)
680
+ except ValueError as e:
681
+ raise InvalidConfigError(f"Invalid numeric value in file size: '{num_str}'") from e
682
+
683
+ if not unit_str:
684
+ return int(num_value), expression
685
+
686
+ unit_lower = unit_str.lower()
687
+ if unit_lower in sizes:
688
+ return int(num_value * sizes[unit_lower]), expression
689
+
690
+ raise InvalidConfigError(f"Invalid unit for file size: '{unit_str}'. Valid units: {list(sizes.keys())}")
691
+
692
+ @property
693
+ def bytes(self) -> int:
694
+ """
695
+ File size in bytes.
696
+ """
697
+ return self._bytes
698
+
699
+ @property
700
+ def kilobytes(self) -> float:
701
+ """
702
+ File size in kilobytes.
703
+ """
704
+ return self._bytes / 1000
705
+
706
+ @property
707
+ def megabytes(self) -> float:
708
+ """
709
+ File size in megabytes.
710
+ """
711
+ return self._bytes / 1_000_000
712
+
713
+ @property
714
+ def gigabytes(self) -> float:
715
+ """
716
+ File size in gigabytes.
717
+ """
718
+ return self._bytes / 1_000_000_000
719
+
720
+ @property
721
+ def terabytes(self) -> float:
722
+ """
723
+ File size in terabytes.
724
+ """
725
+ return self._bytes / 1_000_000_000_000
726
+
727
+ @property
728
+ def kibibytes(self) -> float:
729
+ """
730
+ File size in kibibytes (1024 bytes).
731
+ """
732
+ return self._bytes / 1024
733
+
734
+ @property
735
+ def mebibytes(self) -> float:
736
+ """
737
+ File size in mebibytes (1024 kibibytes).
738
+ """
739
+ return self._bytes / 1_048_576
740
+
741
+ @property
742
+ def gibibytes(self) -> float:
743
+ """
744
+ File size in gibibytes (1024 mebibytes).
745
+ """
746
+ return self._bytes / 1_073_741_824
747
+
748
+ @property
749
+ def tebibytes(self) -> float:
750
+ """
751
+ File size in tebibytes (1024 gibibytes).
752
+ """
753
+ return self._bytes / 1_099_511_627_776
754
+
755
+ def __int__(self) -> int:
756
+ """
757
+ Returns the file size as bytes.
758
+ """
759
+ return int(self._bytes)
760
+
761
+ def __float__(self) -> float:
762
+ """
763
+ Returns the file size as bytes.
764
+ """
765
+ return float(self._bytes)
766
+
767
+ def __str__(self) -> str:
768
+ """
769
+ Returns the file size as a human readable string.
770
+ """
771
+ return self._expression
772
+
773
+ def __repr__(self) -> str:
774
+ """
775
+ Returns the file size as a human readable string.
776
+ """
777
+ return self._expression
778
+
779
+
437
780
  class RawDestinationConfig(ConfigModel):
438
781
  """
439
782
  Configuration parameters for using Raw.
@@ -523,6 +866,7 @@ class ExtractorConfig(ConfigModel):
523
866
  """
524
867
 
525
868
  state_store: StateStoreConfig | None = None
869
+ metrics: MetricsConfig | None = None
526
870
  log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
527
871
  retry_startup: bool = True
528
872
 
@@ -59,6 +59,7 @@ from humps import pascalize
59
59
  from typing_extensions import Self, assert_never
60
60
 
61
61
  from cognite.extractorutils._inner_util import _resolve_log_level
62
+ from cognite.extractorutils.metrics import BaseMetrics
62
63
  from cognite.extractorutils.statestore import (
63
64
  AbstractStateStore,
64
65
  LocalStateStore,
@@ -147,7 +148,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
147
148
 
148
149
  cancellation_token: CancellationToken
149
150
 
150
- def __init__(self, config: FullConfig[ConfigType], checkin_worker: CheckinWorker) -> None:
151
+ def __init__(
152
+ self, config: FullConfig[ConfigType], checkin_worker: CheckinWorker, metrics: BaseMetrics | None = None
153
+ ) -> None:
151
154
  self._logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
152
155
  self._checkin_worker = checkin_worker
153
156
 
@@ -156,6 +159,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
156
159
 
157
160
  self.connection_config = config.connection_config
158
161
  self.application_config = config.application_config
162
+ self.metrics_config = config.application_config.metrics
159
163
  self.current_config_revision: ConfigRevision = config.current_config_revision
160
164
  self.log_level_override = config.log_level_override
161
165
 
@@ -170,6 +174,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
170
174
 
171
175
  self._tasks: list[Task] = []
172
176
  self._start_time: datetime
177
+ self._metrics: BaseMetrics | None = metrics
178
+
179
+ self.metrics_push_manager = (
180
+ self.metrics_config.create_manager(self.cognite_client, cancellation_token=self.cancellation_token)
181
+ if self.metrics_config
182
+ else None
183
+ )
173
184
 
174
185
  self.__init_tasks__()
175
186
 
@@ -367,8 +378,10 @@ class Extractor(Generic[ConfigType], CogniteLogger):
367
378
  self.cancellation_token.cancel()
368
379
 
369
380
  @classmethod
370
- def _init_from_runtime(cls, config: FullConfig[ConfigType], checkin_worker: CheckinWorker) -> Self:
371
- return cls(config, checkin_worker)
381
+ def _init_from_runtime(
382
+ cls, config: FullConfig[ConfigType], checkin_worker: CheckinWorker, metrics: BaseMetrics
383
+ ) -> Self:
384
+ return cls(config, checkin_worker, metrics)
372
385
 
373
386
  def add_task(self, task: Task) -> None:
374
387
  """
@@ -438,6 +451,8 @@ class Extractor(Generic[ConfigType], CogniteLogger):
438
451
  self.state_store.start()
439
452
 
440
453
  Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
454
+ if self.metrics_push_manager:
455
+ self.metrics_push_manager.start()
441
456
 
442
457
  def stop(self) -> None:
443
458
  """
@@ -446,6 +461,8 @@ class Extractor(Generic[ConfigType], CogniteLogger):
446
461
  Instead of calling this method directly, it is recommended to use the context manager interface by using the
447
462
  ``with`` statement, which ensures proper cleanup on exit.
448
463
  """
464
+ if self.metrics_push_manager:
465
+ self.metrics_push_manager.stop()
449
466
  self.cancellation_token.cancel()
450
467
 
451
468
  def __enter__(self) -> Self:
@@ -47,6 +47,7 @@ from cognite.client.exceptions import (
47
47
  CogniteAuthError,
48
48
  CogniteConnectionError,
49
49
  )
50
+ from cognite.extractorutils.metrics import BaseMetrics
50
51
  from cognite.extractorutils.threading import CancellationToken
51
52
  from cognite.extractorutils.unstable.configuration.exceptions import InvalidArgumentError, InvalidConfigError
52
53
  from cognite.extractorutils.unstable.configuration.loaders import (
@@ -78,6 +79,7 @@ def _extractor_process_entrypoint(
78
79
  controls: _RuntimeControls,
79
80
  config: FullConfig,
80
81
  checkin_worker: CheckinWorker,
82
+ metrics: BaseMetrics | None = None,
81
83
  ) -> None:
82
84
  logger = logging.getLogger(f"{extractor_class.EXTERNAL_ID}.runtime")
83
85
  checkin_worker.active_revision = config.current_config_revision
@@ -85,7 +87,9 @@ def _extractor_process_entrypoint(
85
87
  checkin_worker.set_on_revision_change_handler(lambda _: on_revision_changed(controls))
86
88
  if config.application_config.retry_startup:
87
89
  checkin_worker.set_retry_startup(config.application_config.retry_startup)
88
- extractor = extractor_class._init_from_runtime(config, checkin_worker)
90
+ if not metrics:
91
+ metrics = BaseMetrics(extractor_name=extractor_class.NAME, extractor_version=extractor_class.VERSION)
92
+ extractor = extractor_class._init_from_runtime(config, checkin_worker, metrics)
89
93
  extractor._attach_runtime_controls(
90
94
  cancel_event=controls.cancel_event,
91
95
  message_queue=controls.message_queue,
@@ -135,11 +139,13 @@ class Runtime(Generic[ExtractorType]):
135
139
  def __init__(
136
140
  self,
137
141
  extractor: type[ExtractorType],
142
+ metrics: BaseMetrics | None = None,
138
143
  ) -> None:
139
144
  self._extractor_class = extractor
140
145
  self._cancellation_token = CancellationToken()
141
146
  self._cancellation_token.cancel_on_interrupt()
142
147
  self._message_queue: Queue[RuntimeMessage] = Queue()
148
+ self._metrics = metrics
143
149
  self.logger = logging.getLogger(f"{self._extractor_class.EXTERNAL_ID}.runtime")
144
150
  self._setup_logging()
145
151
  self._cancel_event: MpEvent | None = None
@@ -268,7 +274,7 @@ class Runtime(Generic[ExtractorType]):
268
274
 
269
275
  process = Process(
270
276
  target=_extractor_process_entrypoint,
271
- args=(self._extractor_class, controls, config, checkin_worker),
277
+ args=(self._extractor_class, controls, config, checkin_worker, self._metrics),
272
278
  )
273
279
 
274
280
  process.start()
@@ -53,7 +53,7 @@ from cognite.extractorutils.uploader._metrics import (
53
53
  from cognite.extractorutils.util import EitherId, cognite_exceptions, retry
54
54
 
55
55
  MIN_DATAPOINT_TIMESTAMP = -2208988800000
56
- MAX_DATAPOINT_STRING_LENGTH = 255
56
+ MAX_DATAPOINT_STRING_BYTES = 1023
57
57
  MAX_DATAPOINT_VALUE = 1e100
58
58
  MIN_DATAPOINT_VALUE = -1e100
59
59
 
@@ -154,7 +154,7 @@ class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
154
154
  math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
155
155
  )
156
156
  elif isinstance(value, str):
157
- return len(value) <= MAX_DATAPOINT_STRING_LENGTH
157
+ return len(value.encode("utf-8")) <= MAX_DATAPOINT_STRING_BYTES
158
158
  return not isinstance(value, datetime)
159
159
 
160
160
  def _is_datapoint_valid(
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.8.1"
3
+ version = "7.9.0"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = [
6
6
  {name = "Mathias Lohne", email = "mathias.lohne@cognite.com"}