cognite-extractor-utils 7.8.0__tar.gz → 7.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (55) hide show
  1. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/PKG-INFO +2 -2
  2. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/loaders.py +16 -2
  4. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/models.py +344 -0
  5. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/base.py +26 -13
  6. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/runtime.py +10 -4
  7. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/time_series.py +2 -2
  8. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/pyproject.toml +2 -2
  9. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/.gitignore +0 -0
  10. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/LICENSE +0 -0
  11. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/README.md +0 -0
  12. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/config/config.yaml +0 -0
  13. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/config/connection_config.yaml +0 -0
  14. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/examples/unstable/extractors/simple_extractor/main.py +0 -0
  15. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/_inner_util.py +0 -0
  16. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/base.py +0 -0
  17. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/__init__.py +0 -0
  18. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/_util.py +0 -0
  19. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/elements.py +0 -0
  20. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/configtools/validators.py +0 -0
  21. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/exceptions.py +0 -0
  22. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/metrics.py +0 -0
  23. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/py.typed +0 -0
  24. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/__init__.py +0 -0
  25. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/_base.py +0 -0
  26. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/hashing.py +0 -0
  27. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/statestore/watermark.py +0 -0
  28. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/threading.py +0 -0
  29. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/__init__.py +0 -0
  30. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
  31. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/exceptions.py +0 -0
  32. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/configuration/loaders.py +0 -0
  33. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/__init__.py +0 -0
  34. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/_dto.py +0 -0
  35. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/_messaging.py +0 -0
  36. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/checkin_worker.py +0 -0
  37. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/errors.py +0 -0
  38. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/logger.py +0 -0
  39. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/restart_policy.py +0 -0
  40. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/core/tasks.py +0 -0
  41. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/__init__.py +0 -0
  42. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/_scheduler.py +0 -0
  43. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/unstable/scheduling/_schedules.py +0 -0
  44. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/__init__.py +0 -0
  45. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/_base.py +0 -0
  46. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
  47. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/assets.py +0 -0
  48. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  49. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/events.py +0 -0
  50. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/files.py +0 -0
  51. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/raw.py +0 -0
  52. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader/upload_failure_handler.py +0 -0
  53. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader_extractor.py +0 -0
  54. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/uploader_types.py +0 -0
  55. {cognite_extractor_utils-7.8.0 → cognite_extractor_utils-7.9.0}/cognite/extractorutils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognite-extractor-utils
3
- Version: 7.8.0
3
+ Version: 7.9.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Project-URL: repository, https://github.com/cognitedata/python-extractor-utils
6
6
  Author-email: Mathias Lohne <mathias.lohne@cognite.com>
@@ -14,7 +14,7 @@ Requires-Dist: azure-identity>=1.14.0
14
14
  Requires-Dist: azure-keyvault-secrets>=4.7.0
15
15
  Requires-Dist: cognite-sdk>=7.75.2
16
16
  Requires-Dist: croniter>=6.0.0
17
- Requires-Dist: dacite<1.9.0,>=1.6.0
17
+ Requires-Dist: dacite<1.10.0,>=1.9.2
18
18
  Requires-Dist: decorator>=5.1.1
19
19
  Requires-Dist: httpx<1,>=0.27.0
20
20
  Requires-Dist: jsonlines>=4.0.0
@@ -16,7 +16,7 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.8.0"
19
+ __version__ = "7.9.0"
20
20
  from .base import Extractor
21
21
 
22
22
  __all__ = ["Extractor"]
@@ -384,8 +384,22 @@ class ConfigResolver(Generic[CustomConfigClass]):
384
384
  self._cognite_client: CogniteClient | None = None
385
385
 
386
386
  def _reload_file(self) -> None:
387
- with open(self.config_path, encoding="utf-8") as stream:
388
- self._config_text = stream.read()
387
+ try:
388
+ with open(self.config_path, encoding="utf-8") as stream:
389
+ self._config_text = stream.read()
390
+ except UnicodeDecodeError:
391
+ _logger.warning(
392
+ f"Config file '{self.config_path}' is not valid UTF-8. Falling back to system default encoding."
393
+ )
394
+ try:
395
+ with open(self.config_path) as stream:
396
+ self._config_text = stream.read()
397
+ except Exception as e:
398
+ _logger.error(
399
+ f"Failed to read '{self.config_path}' with both UTF-8 and system default encoding. "
400
+ f"The file may be corrupt or in an unsupported format. Final error: {e}"
401
+ )
402
+ raise RuntimeError("Unable to read configuration file.") from e
389
403
 
390
404
  @property
391
405
  def cognite_client(self) -> CogniteClient | None:
@@ -8,9 +8,11 @@ from collections.abc import Iterator
8
8
  from datetime import timedelta
9
9
  from enum import Enum
10
10
  from pathlib import Path
11
+ from time import sleep
11
12
  from typing import Annotated, Any, Literal, TypeVar
12
13
 
13
14
  from humps import kebabize
15
+ from prometheus_client import REGISTRY, start_http_server
14
16
  from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
15
17
  from pydantic_core import CoreSchema, core_schema
16
18
  from typing_extensions import assert_never
@@ -22,8 +24,10 @@ from cognite.client.credentials import (
22
24
  OAuthClientCertificate,
23
25
  OAuthClientCredentials,
24
26
  )
27
+ from cognite.client.data_classes import Asset
25
28
  from cognite.extractorutils.configtools._util import _load_certificate_data
26
29
  from cognite.extractorutils.exceptions import InvalidConfigError
30
+ from cognite.extractorutils.metrics import AbstractMetricsPusher, CognitePusher, PrometheusPusher
27
31
  from cognite.extractorutils.statestore import (
28
32
  AbstractStateStore,
29
33
  LocalStateStore,
@@ -31,6 +35,7 @@ from cognite.extractorutils.statestore import (
31
35
  RawStateStore,
32
36
  )
33
37
  from cognite.extractorutils.threading import CancellationToken
38
+ from cognite.extractorutils.util import EitherId
34
39
 
35
40
  __all__ = [
36
41
  "AuthenticationConfig",
@@ -43,6 +48,7 @@ __all__ = [
43
48
  "LogFileHandlerConfig",
44
49
  "LogHandlerConfig",
45
50
  "LogLevel",
51
+ "MetricsConfig",
46
52
  "ScheduleConfig",
47
53
  "TimeIntervalConfig",
48
54
  ]
@@ -405,6 +411,15 @@ class LogLevel(Enum):
405
411
  INFO = "INFO"
406
412
  DEBUG = "DEBUG"
407
413
 
414
+ @classmethod
415
+ def _missing_(cls, value: object) -> "LogLevel":
416
+ if not isinstance(value, str):
417
+ raise ValueError(f"{value} is not a valid log level")
418
+ for member in cls:
419
+ if member.value == value.upper():
420
+ return member
421
+ raise ValueError(f"{value} is not a valid log level")
422
+
408
423
 
409
424
  class LogFileHandlerConfig(ConfigModel):
410
425
  """
@@ -429,11 +444,339 @@ class LogConsoleHandlerConfig(ConfigModel):
429
444
  LogHandlerConfig = Annotated[LogFileHandlerConfig | LogConsoleHandlerConfig, Field(discriminator="type")]
430
445
 
431
446
 
447
+ class EitherIdConfig(ConfigModel):
448
+ """
449
+ Configuration parameter representing an ID in CDF, which can either be an external or internal ID.
450
+
451
+ An EitherId can only hold one ID type, not both.
452
+ """
453
+
454
+ id: int | None = None
455
+ external_id: str | None = None
456
+
457
+ @property
458
+ def either_id(self) -> EitherId:
459
+ """
460
+ Returns an EitherId object based on the current configuration.
461
+
462
+ Raises:
463
+ TypeError: If both id and external_id are None, or if both are set.
464
+ """
465
+ return EitherId(id=self.id, external_id=self.external_id)
466
+
467
+
468
+ class _PushGatewayConfig(ConfigModel):
469
+ """
470
+ Configuration for pushing metrics to a Prometheus Push Gateway.
471
+ """
472
+
473
+ host: str
474
+ job_name: str
475
+ username: str | None = None
476
+ password: str | None = None
477
+
478
+ clear_after: TimeIntervalConfig | None = None
479
+ push_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
480
+
481
+
482
+ class _PromServerConfig(ConfigModel):
483
+ """
484
+ Configuration for pushing metrics to a Prometheus server.
485
+ """
486
+
487
+ port: int = 9000
488
+ host: str = "0.0.0.0"
489
+
490
+
491
+ class _CogniteMetricsConfig(ConfigModel):
492
+ """
493
+ Configuration for pushing metrics to Cognite Data Fusion.
494
+ """
495
+
496
+ external_id_prefix: str
497
+ asset_name: str | None = None
498
+ asset_external_id: str | None = None
499
+ data_set: EitherIdConfig | None = None
500
+
501
+ push_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
502
+
503
+
504
+ class MetricsPushManager:
505
+ """
506
+ Manages the pushing of metrics to various backends.
507
+
508
+ Starts and stops pushers based on a given configuration.
509
+
510
+ Args:
511
+ metrics_config: Configuration for the metrics to be pushed.
512
+ cdf_client: The CDF tenant to upload time series to
513
+ cancellation_token: Event object to be used as a thread cancelation event
514
+ """
515
+
516
+ def __init__(
517
+ self,
518
+ metrics_config: "MetricsConfig",
519
+ cdf_client: CogniteClient,
520
+ cancellation_token: CancellationToken | None = None,
521
+ ) -> None:
522
+ """
523
+ Initialize the MetricsPushManager.
524
+ """
525
+ self.metrics_config = metrics_config
526
+ self.cdf_client = cdf_client
527
+ self.cancellation_token = cancellation_token
528
+ self.pushers: list[AbstractMetricsPusher] = []
529
+ self.clear_on_stop: dict[AbstractMetricsPusher, int] = {}
530
+
531
+ def start(self) -> None:
532
+ """
533
+ Start all metric pushers.
534
+ """
535
+ push_gateways = self.metrics_config.push_gateways or []
536
+ for counter, push_gateway in enumerate(push_gateways):
537
+ prometheus_pusher = PrometheusPusher(
538
+ job_name=push_gateway.job_name,
539
+ username=push_gateway.username,
540
+ password=push_gateway.password,
541
+ url=push_gateway.host,
542
+ push_interval=push_gateway.push_interval.seconds,
543
+ thread_name=f"MetricsPusher_{counter}",
544
+ cancellation_token=self.cancellation_token,
545
+ )
546
+ prometheus_pusher.start()
547
+ self.pushers.append(prometheus_pusher)
548
+ if push_gateway.clear_after is not None:
549
+ self.clear_on_stop[prometheus_pusher] = push_gateway.clear_after.seconds
550
+
551
+ if self.metrics_config.cognite:
552
+ asset = None
553
+ if self.metrics_config.cognite.asset_name and self.metrics_config.cognite.asset_external_id:
554
+ asset = Asset(
555
+ name=self.metrics_config.cognite.asset_name,
556
+ external_id=self.metrics_config.cognite.asset_external_id,
557
+ )
558
+ cognite_pusher = CognitePusher(
559
+ cdf_client=self.cdf_client,
560
+ external_id_prefix=self.metrics_config.cognite.external_id_prefix,
561
+ push_interval=self.metrics_config.cognite.push_interval.seconds,
562
+ asset=asset,
563
+ data_set=self.metrics_config.cognite.data_set.either_id
564
+ if self.metrics_config.cognite.data_set
565
+ else None,
566
+ thread_name="CogniteMetricsPusher",
567
+ cancellation_token=self.cancellation_token,
568
+ )
569
+ cognite_pusher.start()
570
+ self.pushers.append(cognite_pusher)
571
+
572
+ if self.metrics_config.server:
573
+ start_http_server(self.metrics_config.server.port, self.metrics_config.server.host, registry=REGISTRY)
574
+
575
+ def stop(self) -> None:
576
+ """
577
+ Stop all metric pushers.
578
+ """
579
+ for pusher in self.pushers:
580
+ pusher.stop()
581
+
582
+ # Clear Prometheus pushers gateways if required
583
+ if self.clear_on_stop:
584
+ wait_time = max(self.clear_on_stop.values())
585
+ sleep(wait_time)
586
+ for pusher in (p for p in self.clear_on_stop if isinstance(p, PrometheusPusher)):
587
+ pusher.clear_gateway()
588
+
589
+
590
+ class MetricsConfig(ConfigModel):
591
+ """
592
+ Destination(s) for metrics.
593
+
594
+ Including options for one or several Prometheus push gateways, and pushing as CDF Time Series.
595
+ """
596
+
597
+ push_gateways: list[_PushGatewayConfig] | None = None
598
+ cognite: _CogniteMetricsConfig | None = None
599
+ server: _PromServerConfig | None = None
600
+
601
+ def create_manager(
602
+ self, cdf_client: CogniteClient, cancellation_token: CancellationToken | None = None
603
+ ) -> MetricsPushManager:
604
+ """
605
+ Create a MetricsPushManager based on the current configuration.
606
+
607
+ Args:
608
+ cdf_client: An instance of CogniteClient to interact with CDF.
609
+ cancellation_token: Optional token to signal cancellation of metric pushing.
610
+
611
+ Returns:
612
+ MetricsPushManager: An instance of MetricsPushManager configured with the provided parameters.
613
+ """
614
+ return MetricsPushManager(self, cdf_client, cancellation_token)
615
+
616
+
432
617
  # Mypy BS
433
618
  def _log_handler_default() -> list[LogHandlerConfig]:
434
619
  return [LogConsoleHandlerConfig(type="console", level=LogLevel.INFO)]
435
620
 
436
621
 
622
+ class FileSizeConfig:
623
+ """
624
+ Configuration parameter for setting a file size.
625
+ """
626
+
627
+ def __init__(self, expression: str) -> None:
628
+ self._bytes, self._expression = FileSizeConfig._parse_expression(expression)
629
+
630
+ @classmethod
631
+ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema: # noqa: ANN401
632
+ """
633
+ Pydantic hook to define how this class should be serialized/deserialized.
634
+
635
+ This allows the class to be used as a field in Pydantic models.
636
+ """
637
+ return core_schema.no_info_after_validator_function(cls, handler(str | int))
638
+
639
+ def __eq__(self, other: object) -> bool:
640
+ """
641
+ Two FileSizeConfig objects are equal if they have the same number of bytes.
642
+ """
643
+ if not isinstance(other, FileSizeConfig):
644
+ return NotImplemented
645
+ return self._bytes == other._bytes
646
+
647
+ def __hash__(self) -> int:
648
+ """
649
+ Hash function for FileSizeConfig based on the number of bytes.
650
+ """
651
+ return hash(self._bytes)
652
+
653
+ @classmethod
654
+ def _parse_expression(cls, expression: str) -> tuple[int, str]:
655
+ sizes = {
656
+ "kb": 1000,
657
+ "mb": 1_000_000,
658
+ "gb": 1_000_000_000,
659
+ "tb": 1_000_000_000_000,
660
+ "kib": 1024,
661
+ "mib": 1_048_576,
662
+ "gib": 1_073_741_824,
663
+ "tib": 1_099_511_627_776,
664
+ }
665
+
666
+ expression_normalized = expression.strip().lower()
667
+ try:
668
+ num_value = float(expression_normalized)
669
+ return int(num_value), expression
670
+ except ValueError:
671
+ pass
672
+
673
+ match = re.match(r"^([0-9]*\.?[0-9]+)\s*([a-zA-Z]*)$", expression_normalized)
674
+ if not match:
675
+ raise InvalidConfigError(f"Invalid file size format: '{expression}'. Must start with a number.")
676
+
677
+ num_str, unit_str = match.groups()
678
+ try:
679
+ num_value = float(num_str)
680
+ except ValueError as e:
681
+ raise InvalidConfigError(f"Invalid numeric value in file size: '{num_str}'") from e
682
+
683
+ if not unit_str:
684
+ return int(num_value), expression
685
+
686
+ unit_lower = unit_str.lower()
687
+ if unit_lower in sizes:
688
+ return int(num_value * sizes[unit_lower]), expression
689
+
690
+ raise InvalidConfigError(f"Invalid unit for file size: '{unit_str}'. Valid units: {list(sizes.keys())}")
691
+
692
+ @property
693
+ def bytes(self) -> int:
694
+ """
695
+ File size in bytes.
696
+ """
697
+ return self._bytes
698
+
699
+ @property
700
+ def kilobytes(self) -> float:
701
+ """
702
+ File size in kilobytes.
703
+ """
704
+ return self._bytes / 1000
705
+
706
+ @property
707
+ def megabytes(self) -> float:
708
+ """
709
+ File size in megabytes.
710
+ """
711
+ return self._bytes / 1_000_000
712
+
713
+ @property
714
+ def gigabytes(self) -> float:
715
+ """
716
+ File size in gigabytes.
717
+ """
718
+ return self._bytes / 1_000_000_000
719
+
720
+ @property
721
+ def terabytes(self) -> float:
722
+ """
723
+ File size in terabytes.
724
+ """
725
+ return self._bytes / 1_000_000_000_000
726
+
727
+ @property
728
+ def kibibytes(self) -> float:
729
+ """
730
+ File size in kibibytes (1024 bytes).
731
+ """
732
+ return self._bytes / 1024
733
+
734
+ @property
735
+ def mebibytes(self) -> float:
736
+ """
737
+ File size in mebibytes (1024 kibibytes).
738
+ """
739
+ return self._bytes / 1_048_576
740
+
741
+ @property
742
+ def gibibytes(self) -> float:
743
+ """
744
+ File size in gibibytes (1024 mebibytes).
745
+ """
746
+ return self._bytes / 1_073_741_824
747
+
748
+ @property
749
+ def tebibytes(self) -> float:
750
+ """
751
+ File size in tebibytes (1024 gibibytes).
752
+ """
753
+ return self._bytes / 1_099_511_627_776
754
+
755
+ def __int__(self) -> int:
756
+ """
757
+ Returns the file size as bytes.
758
+ """
759
+ return int(self._bytes)
760
+
761
+ def __float__(self) -> float:
762
+ """
763
+ Returns the file size as bytes.
764
+ """
765
+ return float(self._bytes)
766
+
767
+ def __str__(self) -> str:
768
+ """
769
+ Returns the file size as a human readable string.
770
+ """
771
+ return self._expression
772
+
773
+ def __repr__(self) -> str:
774
+ """
775
+ Returns the file size as a human readable string.
776
+ """
777
+ return self._expression
778
+
779
+
437
780
  class RawDestinationConfig(ConfigModel):
438
781
  """
439
782
  Configuration parameters for using Raw.
@@ -523,6 +866,7 @@ class ExtractorConfig(ConfigModel):
523
866
  """
524
867
 
525
868
  state_store: StateStoreConfig | None = None
869
+ metrics: MetricsConfig | None = None
526
870
  log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
527
871
  retry_startup: bool = True
528
872
 
@@ -59,6 +59,7 @@ from humps import pascalize
59
59
  from typing_extensions import Self, assert_never
60
60
 
61
61
  from cognite.extractorutils._inner_util import _resolve_log_level
62
+ from cognite.extractorutils.metrics import BaseMetrics
62
63
  from cognite.extractorutils.statestore import (
63
64
  AbstractStateStore,
64
65
  LocalStateStore,
@@ -147,7 +148,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
147
148
 
148
149
  cancellation_token: CancellationToken
149
150
 
150
- def __init__(self, config: FullConfig[ConfigType], checkin_worker: CheckinWorker) -> None:
151
+ def __init__(
152
+ self, config: FullConfig[ConfigType], checkin_worker: CheckinWorker, metrics: BaseMetrics | None = None
153
+ ) -> None:
151
154
  self._logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
152
155
  self._checkin_worker = checkin_worker
153
156
 
@@ -156,6 +159,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
156
159
 
157
160
  self.connection_config = config.connection_config
158
161
  self.application_config = config.application_config
162
+ self.metrics_config = config.application_config.metrics
159
163
  self.current_config_revision: ConfigRevision = config.current_config_revision
160
164
  self.log_level_override = config.log_level_override
161
165
 
@@ -170,6 +174,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
170
174
 
171
175
  self._tasks: list[Task] = []
172
176
  self._start_time: datetime
177
+ self._metrics: BaseMetrics | None = metrics
178
+
179
+ self.metrics_push_manager = (
180
+ self.metrics_config.create_manager(self.cognite_client, cancellation_token=self.cancellation_token)
181
+ if self.metrics_config
182
+ else None
183
+ )
173
184
 
174
185
  self.__init_tasks__()
175
186
 
@@ -220,14 +231,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
220
231
  case LogConsoleHandlerConfig() as console_handler:
221
232
  sh = logging.StreamHandler()
222
233
  sh.setFormatter(fmt)
223
- level_for_handler = _resolve_log_level(
224
- self.log_level_override if self.log_level_override else console_handler.level.value
225
- )
234
+ level_for_handler = _resolve_log_level(self.log_level_override or console_handler.level.value)
226
235
  sh.setLevel(level_for_handler)
227
236
 
228
237
  root.addHandler(sh)
229
238
 
230
239
  case LogFileHandlerConfig() as file_handler:
240
+ level_for_handler = _resolve_log_level(self.log_level_override or file_handler.level.value)
231
241
  try:
232
242
  fh = RobustFileHandler(
233
243
  filename=file_handler.path,
@@ -236,23 +246,20 @@ class Extractor(Generic[ConfigType], CogniteLogger):
236
246
  backupCount=file_handler.retention,
237
247
  create_dirs=True,
238
248
  )
239
- level_for_handler = _resolve_log_level(
240
- self.log_level_override if self.log_level_override else file_handler.level.value
241
- )
242
249
  fh.setLevel(level_for_handler)
243
250
  fh.setFormatter(fmt)
244
251
 
245
252
  root.addHandler(fh)
246
253
  except (OSError, PermissionError) as e:
247
- self._logger.warning(
248
- f"Could not create or write to log file {file_handler.path}: {e}. "
249
- "Falling back to console logging."
250
- )
251
254
  if not any(isinstance(h, logging.StreamHandler) for h in root.handlers):
252
255
  sh = logging.StreamHandler()
253
256
  sh.setFormatter(fmt)
254
257
  sh.setLevel(level_for_handler)
255
258
  root.addHandler(sh)
259
+ self._logger.warning(
260
+ f"Could not create or write to log file {file_handler.path}: {e}. "
261
+ "Defaulted to console logging."
262
+ )
256
263
 
257
264
  def _load_state_store(self) -> None:
258
265
  """
@@ -371,8 +378,10 @@ class Extractor(Generic[ConfigType], CogniteLogger):
371
378
  self.cancellation_token.cancel()
372
379
 
373
380
  @classmethod
374
- def _init_from_runtime(cls, config: FullConfig[ConfigType], checkin_worker: CheckinWorker) -> Self:
375
- return cls(config, checkin_worker)
381
+ def _init_from_runtime(
382
+ cls, config: FullConfig[ConfigType], checkin_worker: CheckinWorker, metrics: BaseMetrics
383
+ ) -> Self:
384
+ return cls(config, checkin_worker, metrics)
376
385
 
377
386
  def add_task(self, task: Task) -> None:
378
387
  """
@@ -442,6 +451,8 @@ class Extractor(Generic[ConfigType], CogniteLogger):
442
451
  self.state_store.start()
443
452
 
444
453
  Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
454
+ if self.metrics_push_manager:
455
+ self.metrics_push_manager.start()
445
456
 
446
457
  def stop(self) -> None:
447
458
  """
@@ -450,6 +461,8 @@ class Extractor(Generic[ConfigType], CogniteLogger):
450
461
  Instead of calling this method directly, it is recommended to use the context manager interface by using the
451
462
  ``with`` statement, which ensures proper cleanup on exit.
452
463
  """
464
+ if self.metrics_push_manager:
465
+ self.metrics_push_manager.stop()
453
466
  self.cancellation_token.cancel()
454
467
 
455
468
  def __enter__(self) -> Self:
@@ -47,6 +47,7 @@ from cognite.client.exceptions import (
47
47
  CogniteAuthError,
48
48
  CogniteConnectionError,
49
49
  )
50
+ from cognite.extractorutils.metrics import BaseMetrics
50
51
  from cognite.extractorutils.threading import CancellationToken
51
52
  from cognite.extractorutils.unstable.configuration.exceptions import InvalidArgumentError, InvalidConfigError
52
53
  from cognite.extractorutils.unstable.configuration.loaders import (
@@ -78,6 +79,7 @@ def _extractor_process_entrypoint(
78
79
  controls: _RuntimeControls,
79
80
  config: FullConfig,
80
81
  checkin_worker: CheckinWorker,
82
+ metrics: BaseMetrics | None = None,
81
83
  ) -> None:
82
84
  logger = logging.getLogger(f"{extractor_class.EXTERNAL_ID}.runtime")
83
85
  checkin_worker.active_revision = config.current_config_revision
@@ -85,7 +87,9 @@ def _extractor_process_entrypoint(
85
87
  checkin_worker.set_on_revision_change_handler(lambda _: on_revision_changed(controls))
86
88
  if config.application_config.retry_startup:
87
89
  checkin_worker.set_retry_startup(config.application_config.retry_startup)
88
- extractor = extractor_class._init_from_runtime(config, checkin_worker)
90
+ if not metrics:
91
+ metrics = BaseMetrics(extractor_name=extractor_class.NAME, extractor_version=extractor_class.VERSION)
92
+ extractor = extractor_class._init_from_runtime(config, checkin_worker, metrics)
89
93
  extractor._attach_runtime_controls(
90
94
  cancel_event=controls.cancel_event,
91
95
  message_queue=controls.message_queue,
@@ -135,11 +139,13 @@ class Runtime(Generic[ExtractorType]):
135
139
  def __init__(
136
140
  self,
137
141
  extractor: type[ExtractorType],
142
+ metrics: BaseMetrics | None = None,
138
143
  ) -> None:
139
144
  self._extractor_class = extractor
140
145
  self._cancellation_token = CancellationToken()
141
146
  self._cancellation_token.cancel_on_interrupt()
142
147
  self._message_queue: Queue[RuntimeMessage] = Queue()
148
+ self._metrics = metrics
143
149
  self.logger = logging.getLogger(f"{self._extractor_class.EXTERNAL_ID}.runtime")
144
150
  self._setup_logging()
145
151
  self._cancel_event: MpEvent | None = None
@@ -180,8 +186,8 @@ class Runtime(Generic[ExtractorType]):
180
186
  choices=["debug", "info", "warning", "error", "critical"],
181
187
  type=str,
182
188
  required=False,
183
- default="info",
184
- help="Set the logging level for the runtime. Default is 'info'.",
189
+ default=None,
190
+ help="Set the logging level for the runtime.",
185
191
  )
186
192
  argparser.add_argument(
187
193
  "--skip-init-checks",
@@ -268,7 +274,7 @@ class Runtime(Generic[ExtractorType]):
268
274
 
269
275
  process = Process(
270
276
  target=_extractor_process_entrypoint,
271
- args=(self._extractor_class, controls, config, checkin_worker),
277
+ args=(self._extractor_class, controls, config, checkin_worker, self._metrics),
272
278
  )
273
279
 
274
280
  process.start()
@@ -53,7 +53,7 @@ from cognite.extractorutils.uploader._metrics import (
53
53
  from cognite.extractorutils.util import EitherId, cognite_exceptions, retry
54
54
 
55
55
  MIN_DATAPOINT_TIMESTAMP = -2208988800000
56
- MAX_DATAPOINT_STRING_LENGTH = 255
56
+ MAX_DATAPOINT_STRING_BYTES = 1023
57
57
  MAX_DATAPOINT_VALUE = 1e100
58
58
  MIN_DATAPOINT_VALUE = -1e100
59
59
 
@@ -154,7 +154,7 @@ class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
154
154
  math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
155
155
  )
156
156
  elif isinstance(value, str):
157
- return len(value) <= MAX_DATAPOINT_STRING_LENGTH
157
+ return len(value.encode("utf-8")) <= MAX_DATAPOINT_STRING_BYTES
158
158
  return not isinstance(value, datetime)
159
159
 
160
160
  def _is_datapoint_valid(
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.8.0"
3
+ version = "7.9.0"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = [
6
6
  {name = "Mathias Lohne", email = "mathias.lohne@cognite.com"}
@@ -19,7 +19,7 @@ dependencies = [
19
19
  "prometheus-client>=0.7.0,<=1.0.0",
20
20
  "arrow>=1.0.0",
21
21
  "pyyaml>=5.3.0,<7",
22
- "dacite>=1.6.0,<1.9.0",
22
+ "dacite>=1.9.2,<1.10.0",
23
23
  "psutil>=6.0.0",
24
24
  "decorator>=5.1.1",
25
25
  "more-itertools>=10.0.0",