aws-lambda-powertools 3.14.1a5__py3-none-any.whl → 3.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aws_lambda_powertools/event_handler/api_gateway.py +1 -1
- aws_lambda_powertools/shared/version.py +1 -1
- aws_lambda_powertools/utilities/data_classes/kafka_event.py +45 -13
- aws_lambda_powertools/utilities/kafka/__init__.py +9 -0
- aws_lambda_powertools/utilities/kafka/consumer_records.py +144 -0
- aws_lambda_powertools/utilities/kafka/deserializer/__init__.py +0 -0
- aws_lambda_powertools/utilities/kafka/deserializer/avro.py +71 -0
- aws_lambda_powertools/utilities/kafka/deserializer/base.py +52 -0
- aws_lambda_powertools/utilities/kafka/deserializer/default.py +46 -0
- aws_lambda_powertools/utilities/kafka/deserializer/deserializer.py +107 -0
- aws_lambda_powertools/utilities/kafka/deserializer/json.py +53 -0
- aws_lambda_powertools/utilities/kafka/deserializer/protobuf.py +117 -0
- aws_lambda_powertools/utilities/kafka/exceptions.py +22 -0
- aws_lambda_powertools/utilities/kafka/kafka_consumer.py +60 -0
- aws_lambda_powertools/utilities/kafka/schema_config.py +83 -0
- aws_lambda_powertools/utilities/kafka/serialization/__init__.py +0 -0
- aws_lambda_powertools/utilities/kafka/serialization/base.py +56 -0
- aws_lambda_powertools/utilities/kafka/serialization/custom_dict.py +22 -0
- aws_lambda_powertools/utilities/kafka/serialization/dataclass.py +25 -0
- aws_lambda_powertools/utilities/kafka/serialization/pydantic.py +26 -0
- aws_lambda_powertools/utilities/kafka/serialization/serialization.py +65 -0
- aws_lambda_powertools/utilities/kafka/serialization/types.py +3 -0
- aws_lambda_powertools/utilities/parser/models/kafka.py +7 -0
- {aws_lambda_powertools-3.14.1a5.dist-info → aws_lambda_powertools-3.15.0.dist-info}/METADATA +5 -1
- {aws_lambda_powertools-3.14.1a5.dist-info → aws_lambda_powertools-3.15.0.dist-info}/RECORD +27 -8
- {aws_lambda_powertools-3.14.1a5.dist-info → aws_lambda_powertools-3.15.0.dist-info}/LICENSE +0 -0
- {aws_lambda_powertools-3.14.1a5.dist-info → aws_lambda_powertools-3.15.0.dist-info}/WHEEL +0 -0
@@ -407,7 +407,7 @@ class Route:
|
|
407
407
|
|
408
408
|
# OpenAPI spec only understands paths with { }. So we'll have to convert Powertools' < >.
|
409
409
|
# https://swagger.io/specification/#path-templating
|
410
|
-
self.openapi_path = re.sub(r"<(.*?)>", lambda m: f"{{{''.join(m.group(1))}}}", self.path)
|
410
|
+
self.openapi_path = re.sub(r"<(.*?)>", lambda m: f"{{{''.join(m.group(1))}}}", self.path) # type: ignore[arg-type]
|
411
411
|
|
412
412
|
self.rule = rule
|
413
413
|
self.func = func
|
@@ -10,7 +10,19 @@ if TYPE_CHECKING:
|
|
10
10
|
from collections.abc import Iterator
|
11
11
|
|
12
12
|
|
13
|
-
class
|
13
|
+
class KafkaEventRecordSchemaMetadata(DictWrapper):
|
14
|
+
@property
|
15
|
+
def data_format(self) -> str | None:
|
16
|
+
"""The data format of the Kafka record."""
|
17
|
+
return self.get("dataFormat", None)
|
18
|
+
|
19
|
+
@property
|
20
|
+
def schema_id(self) -> str | None:
|
21
|
+
"""The schema id of the Kafka record."""
|
22
|
+
return self.get("schemaId", None)
|
23
|
+
|
24
|
+
|
25
|
+
class KafkaEventRecordBase(DictWrapper):
|
14
26
|
@property
|
15
27
|
def topic(self) -> str:
|
16
28
|
"""The Kafka topic."""
|
@@ -36,6 +48,24 @@ class KafkaEventRecord(DictWrapper):
|
|
36
48
|
"""The Kafka record timestamp type."""
|
37
49
|
return self["timestampType"]
|
38
50
|
|
51
|
+
@property
|
52
|
+
def key_schema_metadata(self) -> KafkaEventRecordSchemaMetadata | None:
|
53
|
+
"""The metadata of the Key Kafka record."""
|
54
|
+
return (
|
55
|
+
None if self.get("keySchemaMetadata") is None else KafkaEventRecordSchemaMetadata(self["keySchemaMetadata"])
|
56
|
+
)
|
57
|
+
|
58
|
+
@property
|
59
|
+
def value_schema_metadata(self) -> KafkaEventRecordSchemaMetadata | None:
|
60
|
+
"""The metadata of the Value Kafka record."""
|
61
|
+
return (
|
62
|
+
None
|
63
|
+
if self.get("valueSchemaMetadata") is None
|
64
|
+
else KafkaEventRecordSchemaMetadata(self["valueSchemaMetadata"])
|
65
|
+
)
|
66
|
+
|
67
|
+
|
68
|
+
class KafkaEventRecord(KafkaEventRecordBase):
|
39
69
|
@property
|
40
70
|
def key(self) -> str | None:
|
41
71
|
"""
|
@@ -83,18 +113,7 @@ class KafkaEventRecord(DictWrapper):
|
|
83
113
|
return CaseInsensitiveDict((k, bytes(v)) for chunk in self.headers for k, v in chunk.items())
|
84
114
|
|
85
115
|
|
86
|
-
class
|
87
|
-
"""Self-managed or MSK Apache Kafka event trigger
|
88
|
-
Documentation:
|
89
|
-
--------------
|
90
|
-
- https://docs.aws.amazon.com/lambda/latest/dg/with-kafka.html
|
91
|
-
- https://docs.aws.amazon.com/lambda/latest/dg/with-msk.html
|
92
|
-
"""
|
93
|
-
|
94
|
-
def __init__(self, data: dict[str, Any]):
|
95
|
-
super().__init__(data)
|
96
|
-
self._records: Iterator[KafkaEventRecord] | None = None
|
97
|
-
|
116
|
+
class KafkaEventBase(DictWrapper):
|
98
117
|
@property
|
99
118
|
def event_source(self) -> str:
|
100
119
|
"""The AWS service from which the Kafka event record originated."""
|
@@ -115,6 +134,19 @@ class KafkaEvent(DictWrapper):
|
|
115
134
|
"""The decoded Kafka bootstrap URL."""
|
116
135
|
return self.bootstrap_servers.split(",")
|
117
136
|
|
137
|
+
|
138
|
+
class KafkaEvent(KafkaEventBase):
|
139
|
+
"""Self-managed or MSK Apache Kafka event trigger
|
140
|
+
Documentation:
|
141
|
+
--------------
|
142
|
+
- https://docs.aws.amazon.com/lambda/latest/dg/with-kafka.html
|
143
|
+
- https://docs.aws.amazon.com/lambda/latest/dg/with-msk.html
|
144
|
+
"""
|
145
|
+
|
146
|
+
def __init__(self, data: dict[str, Any]):
|
147
|
+
super().__init__(data)
|
148
|
+
self._records: Iterator[KafkaEventRecord] | None = None
|
149
|
+
|
118
150
|
@property
|
119
151
|
def records(self) -> Iterator[KafkaEventRecord]:
|
120
152
|
"""The Kafka records."""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from aws_lambda_powertools.utilities.kafka.consumer_records import ConsumerRecords
|
2
|
+
from aws_lambda_powertools.utilities.kafka.kafka_consumer import kafka_consumer
|
3
|
+
from aws_lambda_powertools.utilities.kafka.schema_config import SchemaConfig
|
4
|
+
|
5
|
+
__all__ = [
|
6
|
+
"kafka_consumer",
|
7
|
+
"ConsumerRecords",
|
8
|
+
"SchemaConfig",
|
9
|
+
]
|
@@ -0,0 +1,144 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from functools import cached_property
|
4
|
+
from typing import TYPE_CHECKING, Any
|
5
|
+
|
6
|
+
from aws_lambda_powertools.utilities.data_classes.common import CaseInsensitiveDict
|
7
|
+
from aws_lambda_powertools.utilities.data_classes.kafka_event import KafkaEventBase, KafkaEventRecordBase
|
8
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.deserializer import get_deserializer
|
9
|
+
from aws_lambda_powertools.utilities.kafka.serialization.serialization import serialize_to_output_type
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from collections.abc import Iterator
|
13
|
+
|
14
|
+
from aws_lambda_powertools.utilities.kafka.schema_config import SchemaConfig
|
15
|
+
|
16
|
+
|
17
|
+
class ConsumerRecordRecords(KafkaEventRecordBase):
|
18
|
+
"""
|
19
|
+
A Kafka Consumer Record
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, data: dict[str, Any], schema_config: SchemaConfig | None = None):
|
23
|
+
super().__init__(data)
|
24
|
+
self.schema_config = schema_config
|
25
|
+
|
26
|
+
@cached_property
|
27
|
+
def key(self) -> Any:
|
28
|
+
key = self.get("key")
|
29
|
+
|
30
|
+
# Return None if key doesn't exist
|
31
|
+
if not key:
|
32
|
+
return None
|
33
|
+
|
34
|
+
# Determine schema type and schema string
|
35
|
+
schema_type = None
|
36
|
+
schema_str = None
|
37
|
+
output_serializer = None
|
38
|
+
|
39
|
+
if self.schema_config and self.schema_config.key_schema_type:
|
40
|
+
schema_type = self.schema_config.key_schema_type
|
41
|
+
schema_str = self.schema_config.key_schema
|
42
|
+
output_serializer = self.schema_config.key_output_serializer
|
43
|
+
|
44
|
+
# Always use get_deserializer if None it will default to DEFAULT
|
45
|
+
deserializer = get_deserializer(schema_type, schema_str)
|
46
|
+
deserialized_value = deserializer.deserialize(key)
|
47
|
+
|
48
|
+
# Apply output serializer if specified
|
49
|
+
if output_serializer:
|
50
|
+
return serialize_to_output_type(deserialized_value, output_serializer)
|
51
|
+
|
52
|
+
return deserialized_value
|
53
|
+
|
54
|
+
@cached_property
|
55
|
+
def value(self) -> Any:
|
56
|
+
value = self["value"]
|
57
|
+
|
58
|
+
# Determine schema type and schema string
|
59
|
+
schema_type = None
|
60
|
+
schema_str = None
|
61
|
+
output_serializer = None
|
62
|
+
|
63
|
+
if self.schema_config and self.schema_config.value_schema_type:
|
64
|
+
schema_type = self.schema_config.value_schema_type
|
65
|
+
schema_str = self.schema_config.value_schema
|
66
|
+
output_serializer = self.schema_config.value_output_serializer
|
67
|
+
|
68
|
+
# Always use get_deserializer if None it will default to DEFAULT
|
69
|
+
deserializer = get_deserializer(schema_type, schema_str)
|
70
|
+
deserialized_value = deserializer.deserialize(value)
|
71
|
+
|
72
|
+
# Apply output serializer if specified
|
73
|
+
if output_serializer:
|
74
|
+
return serialize_to_output_type(deserialized_value, output_serializer)
|
75
|
+
|
76
|
+
return deserialized_value
|
77
|
+
|
78
|
+
@property
|
79
|
+
def original_value(self) -> str:
|
80
|
+
"""The original (base64 encoded) Kafka record value."""
|
81
|
+
return self["value"]
|
82
|
+
|
83
|
+
@property
|
84
|
+
def original_key(self) -> str | None:
|
85
|
+
"""
|
86
|
+
The original (base64 encoded) Kafka record key.
|
87
|
+
|
88
|
+
This key is optional; if not provided,
|
89
|
+
a round-robin algorithm will be used to determine
|
90
|
+
the partition for the message.
|
91
|
+
"""
|
92
|
+
|
93
|
+
return self.get("key")
|
94
|
+
|
95
|
+
@property
|
96
|
+
def original_headers(self) -> list[dict[str, list[int]]]:
|
97
|
+
"""The raw Kafka record headers."""
|
98
|
+
return self["headers"]
|
99
|
+
|
100
|
+
@cached_property
|
101
|
+
def headers(self) -> dict[str, bytes]:
|
102
|
+
"""Decodes the headers as a single dictionary."""
|
103
|
+
return CaseInsensitiveDict((k, bytes(v)) for chunk in self.original_headers for k, v in chunk.items())
|
104
|
+
|
105
|
+
|
106
|
+
class ConsumerRecords(KafkaEventBase):
|
107
|
+
"""Self-managed or MSK Apache Kafka event trigger
|
108
|
+
Documentation:
|
109
|
+
--------------
|
110
|
+
- https://docs.aws.amazon.com/lambda/latest/dg/with-kafka.html
|
111
|
+
- https://docs.aws.amazon.com/lambda/latest/dg/with-msk.html
|
112
|
+
"""
|
113
|
+
|
114
|
+
def __init__(self, data: dict[str, Any], schema_config: SchemaConfig | None = None):
|
115
|
+
super().__init__(data)
|
116
|
+
self._records: Iterator[ConsumerRecordRecords] | None = None
|
117
|
+
self.schema_config = schema_config
|
118
|
+
|
119
|
+
@property
|
120
|
+
def records(self) -> Iterator[ConsumerRecordRecords]:
|
121
|
+
"""The Kafka records."""
|
122
|
+
for chunk in self["records"].values():
|
123
|
+
for record in chunk:
|
124
|
+
yield ConsumerRecordRecords(data=record, schema_config=self.schema_config)
|
125
|
+
|
126
|
+
@property
|
127
|
+
def record(self) -> ConsumerRecordRecords:
|
128
|
+
"""
|
129
|
+
Returns the next Kafka record using an iterator.
|
130
|
+
|
131
|
+
Returns
|
132
|
+
-------
|
133
|
+
ConsumerRecordRecords
|
134
|
+
The next Kafka record.
|
135
|
+
|
136
|
+
Raises
|
137
|
+
------
|
138
|
+
StopIteration
|
139
|
+
If there are no more records available.
|
140
|
+
|
141
|
+
"""
|
142
|
+
if self._records is None:
|
143
|
+
self._records = self.records
|
144
|
+
return next(self._records)
|
File without changes
|
@@ -0,0 +1,71 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import io
|
4
|
+
|
5
|
+
from avro.io import BinaryDecoder, DatumReader
|
6
|
+
from avro.schema import parse as parse_schema
|
7
|
+
|
8
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.base import DeserializerBase
|
9
|
+
from aws_lambda_powertools.utilities.kafka.exceptions import (
|
10
|
+
KafkaConsumerAvroSchemaParserError,
|
11
|
+
KafkaConsumerDeserializationError,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class AvroDeserializer(DeserializerBase):
|
16
|
+
"""
|
17
|
+
Deserializer for Apache Avro formatted data.
|
18
|
+
|
19
|
+
This class provides functionality to deserialize Avro binary data using
|
20
|
+
a provided Avro schema definition.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, schema_str: str):
|
24
|
+
try:
|
25
|
+
self.parsed_schema = parse_schema(schema_str)
|
26
|
+
self.reader = DatumReader(self.parsed_schema)
|
27
|
+
except Exception as e:
|
28
|
+
raise KafkaConsumerAvroSchemaParserError(
|
29
|
+
f"Invalid Avro schema. Please ensure the provided avro schema is valid: {type(e).__name__}: {str(e)}",
|
30
|
+
) from e
|
31
|
+
|
32
|
+
def deserialize(self, data: bytes | str) -> object:
|
33
|
+
"""
|
34
|
+
Deserialize Avro binary data to a Python dictionary.
|
35
|
+
|
36
|
+
Parameters
|
37
|
+
----------
|
38
|
+
data : bytes or str
|
39
|
+
The Avro binary data to deserialize. If provided as a string,
|
40
|
+
it will be decoded to bytes first.
|
41
|
+
|
42
|
+
Returns
|
43
|
+
-------
|
44
|
+
dict[str, Any]
|
45
|
+
Deserialized data as a dictionary.
|
46
|
+
|
47
|
+
Raises
|
48
|
+
------
|
49
|
+
KafkaConsumerDeserializationError
|
50
|
+
When the data cannot be deserialized according to the schema,
|
51
|
+
typically due to data format incompatibility.
|
52
|
+
|
53
|
+
Examples
|
54
|
+
--------
|
55
|
+
>>> deserializer = AvroDeserializer(schema_str)
|
56
|
+
>>> avro_data = b'...' # binary Avro data
|
57
|
+
>>> try:
|
58
|
+
... result = deserializer.deserialize(avro_data)
|
59
|
+
... # Process the deserialized data
|
60
|
+
... except KafkaConsumerDeserializationError as e:
|
61
|
+
... print(f"Failed to deserialize: {e}")
|
62
|
+
"""
|
63
|
+
try:
|
64
|
+
value = self._decode_input(data)
|
65
|
+
bytes_reader = io.BytesIO(value)
|
66
|
+
decoder = BinaryDecoder(bytes_reader)
|
67
|
+
return self.reader.read(decoder)
|
68
|
+
except Exception as e:
|
69
|
+
raise KafkaConsumerDeserializationError(
|
70
|
+
f"Error trying to deserialize avro data - {type(e).__name__}: {str(e)}",
|
71
|
+
) from e
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
|
8
|
+
class DeserializerBase(ABC):
|
9
|
+
"""
|
10
|
+
Abstract base class for deserializers.
|
11
|
+
|
12
|
+
This class defines the interface for all deserializers in the Kafka consumer utility
|
13
|
+
and provides a common method for decoding input data.
|
14
|
+
|
15
|
+
Methods
|
16
|
+
-------
|
17
|
+
deserialize(data)
|
18
|
+
Abstract method that must be implemented by subclasses to deserialize data.
|
19
|
+
_decode_input(data)
|
20
|
+
Helper method to decode input data to bytes.
|
21
|
+
|
22
|
+
Examples
|
23
|
+
--------
|
24
|
+
>>> class MyDeserializer(DeserializerBase):
|
25
|
+
... def deserialize(self, data: bytes | str) -> dict[str, Any]:
|
26
|
+
... value = self._decode_input(data)
|
27
|
+
... # Custom deserialization logic here
|
28
|
+
... return {"key": "value"}
|
29
|
+
"""
|
30
|
+
|
31
|
+
@abstractmethod
|
32
|
+
def deserialize(self, data: str) -> dict[str, Any] | str | object:
|
33
|
+
"""
|
34
|
+
Deserialize input data to a Python dictionary.
|
35
|
+
|
36
|
+
This abstract method must be implemented by subclasses to provide
|
37
|
+
specific deserialization logic.
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
data : str
|
42
|
+
The data to deserialize, it's always a base64 encoded string
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
dict[str, Any]
|
47
|
+
The deserialized data as a dictionary.
|
48
|
+
"""
|
49
|
+
raise NotImplementedError("Subclasses must implement the deserialize method")
|
50
|
+
|
51
|
+
def _decode_input(self, data: bytes | str) -> bytes:
|
52
|
+
return base64.b64decode(data)
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
|
5
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.base import DeserializerBase
|
6
|
+
|
7
|
+
|
8
|
+
class DefaultDeserializer(DeserializerBase):
|
9
|
+
"""
|
10
|
+
A default deserializer that performs base64 decode + binary decode on the input data.
|
11
|
+
|
12
|
+
This deserializer simply returns the input data with base64 decode, which is useful when
|
13
|
+
no customized deserialization is needed or when handling raw data formats.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def deserialize(self, data: bytes | str) -> str:
|
17
|
+
"""
|
18
|
+
Return the input data base64 decoded.
|
19
|
+
|
20
|
+
This method implements the deserialize interface and performs base64 decode.
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
data : bytes or str
|
25
|
+
The input data to "deserialize".
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
dict[str, Any]
|
30
|
+
The input data base64 decoded.
|
31
|
+
|
32
|
+
Example
|
33
|
+
--------
|
34
|
+
>>> deserializer = NoOpDeserializer()
|
35
|
+
>>>
|
36
|
+
>>> # With string input
|
37
|
+
>>> string_data = "Hello, world!"
|
38
|
+
>>> result = deserializer.deserialize(string_data)
|
39
|
+
>>> print(result == string_data) # Output: True
|
40
|
+
>>>
|
41
|
+
>>> # With bytes input
|
42
|
+
>>> bytes_data = b"Binary data"
|
43
|
+
>>> result = deserializer.deserialize(bytes_data)
|
44
|
+
>>> print(result == bytes_data) # Output: True
|
45
|
+
"""
|
46
|
+
return base64.b64decode(data).decode("utf-8")
|
@@ -0,0 +1,107 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
from typing import TYPE_CHECKING, Any
|
5
|
+
|
6
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.default import DefaultDeserializer
|
7
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.json import JsonDeserializer
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.base import DeserializerBase
|
11
|
+
|
12
|
+
# Cache for deserializers
|
13
|
+
_deserializer_cache: dict[str, DeserializerBase] = {}
|
14
|
+
|
15
|
+
|
16
|
+
def _get_cache_key(schema_type: str | object, schema_value: Any) -> str:
|
17
|
+
if schema_value is None:
|
18
|
+
return str(schema_type)
|
19
|
+
|
20
|
+
if isinstance(schema_value, str):
|
21
|
+
# For string schemas like Avro, hash the content
|
22
|
+
schema_hash = hashlib.md5(schema_value.encode("utf-8"), usedforsecurity=False).hexdigest()
|
23
|
+
else:
|
24
|
+
# For objects like Protobuf, use the object id
|
25
|
+
schema_hash = str(id(schema_value))
|
26
|
+
|
27
|
+
return f"{schema_type}_{schema_hash}"
|
28
|
+
|
29
|
+
|
30
|
+
def get_deserializer(schema_type: str | object, schema_value: Any) -> DeserializerBase:
|
31
|
+
"""
|
32
|
+
Factory function to get the appropriate deserializer based on schema type.
|
33
|
+
|
34
|
+
This function creates and returns a deserializer instance that corresponds to the
|
35
|
+
specified schema type. It handles lazy imports for optional dependencies.
|
36
|
+
|
37
|
+
Parameters
|
38
|
+
----------
|
39
|
+
schema_type : str
|
40
|
+
The type of schema to use for deserialization.
|
41
|
+
Supported values are: "AVRO", "PROTOBUF", "JSON", or any other value for no-op.
|
42
|
+
schema_value : Any
|
43
|
+
The schema definition to use for deserialization. The format depends on the
|
44
|
+
schema_type:
|
45
|
+
- For "AVRO": A string containing the Avro schema definition
|
46
|
+
- For "PROTOBUF": A object containing the Protobuf schema definition
|
47
|
+
- For "JSON": Not used (can be None)
|
48
|
+
- For other types: Not used (can be None)
|
49
|
+
|
50
|
+
Returns
|
51
|
+
-------
|
52
|
+
DeserializerBase
|
53
|
+
An instance of a deserializer that implements the DeserializerBase interface.
|
54
|
+
|
55
|
+
Examples
|
56
|
+
--------
|
57
|
+
>>> # Get an Avro deserializer
|
58
|
+
>>> avro_schema = '''
|
59
|
+
... {
|
60
|
+
... "type": "record",
|
61
|
+
... "name": "User",
|
62
|
+
... "fields": [
|
63
|
+
... {"name": "name", "type": "string"},
|
64
|
+
... {"name": "age", "type": "int"}
|
65
|
+
... ]
|
66
|
+
... }
|
67
|
+
... '''
|
68
|
+
>>> deserializer = get_deserializer("AVRO", avro_schema)
|
69
|
+
>>>
|
70
|
+
>>> # Get a JSON deserializer
|
71
|
+
>>> json_deserializer = get_deserializer("JSON", None)
|
72
|
+
>>>
|
73
|
+
>>> # Get a no-op deserializer for raw data
|
74
|
+
>>> no_op_deserializer = get_deserializer("RAW", None)
|
75
|
+
"""
|
76
|
+
|
77
|
+
# Generate a cache key based on schema type and value
|
78
|
+
cache_key = _get_cache_key(schema_type, schema_value)
|
79
|
+
|
80
|
+
# Check if we already have this deserializer in cache
|
81
|
+
if cache_key in _deserializer_cache:
|
82
|
+
return _deserializer_cache[cache_key]
|
83
|
+
|
84
|
+
deserializer: DeserializerBase
|
85
|
+
|
86
|
+
if schema_type == "AVRO":
|
87
|
+
# Import here to avoid dependency if not used
|
88
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.avro import AvroDeserializer
|
89
|
+
|
90
|
+
deserializer = AvroDeserializer(schema_value)
|
91
|
+
elif schema_type == "PROTOBUF":
|
92
|
+
# Import here to avoid dependency if not used
|
93
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.protobuf import ProtobufDeserializer
|
94
|
+
|
95
|
+
deserializer = ProtobufDeserializer(schema_value)
|
96
|
+
elif schema_type == "JSON":
|
97
|
+
deserializer = JsonDeserializer()
|
98
|
+
|
99
|
+
else:
|
100
|
+
# Default to no-op deserializer
|
101
|
+
deserializer = DefaultDeserializer()
|
102
|
+
|
103
|
+
# Store in cache for future use
|
104
|
+
_deserializer_cache[cache_key] = deserializer
|
105
|
+
|
106
|
+
# Default to default deserializer that is base64 decode + bytes decoded
|
107
|
+
return deserializer
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
import json
|
5
|
+
|
6
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.base import DeserializerBase
|
7
|
+
from aws_lambda_powertools.utilities.kafka.exceptions import KafkaConsumerDeserializationError
|
8
|
+
|
9
|
+
|
10
|
+
class JsonDeserializer(DeserializerBase):
|
11
|
+
"""
|
12
|
+
Deserializer for JSON formatted data.
|
13
|
+
|
14
|
+
This class provides functionality to deserialize JSON data from bytes or string
|
15
|
+
into Python dictionaries.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def deserialize(self, data: bytes | str) -> dict:
|
19
|
+
"""
|
20
|
+
Deserialize JSON data to a Python dictionary.
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
data : bytes or str
|
25
|
+
The JSON data to deserialize. If provided as bytes, it will be decoded as UTF-8.
|
26
|
+
If provided as a string, it's assumed to be base64-encoded and will be decoded first.
|
27
|
+
|
28
|
+
Returns
|
29
|
+
-------
|
30
|
+
dict
|
31
|
+
Deserialized data as a dictionary.
|
32
|
+
|
33
|
+
Raises
|
34
|
+
------
|
35
|
+
KafkaConsumerDeserializationError
|
36
|
+
When the data cannot be deserialized as valid JSON.
|
37
|
+
|
38
|
+
Examples
|
39
|
+
--------
|
40
|
+
>>> deserializer = JsonDeserializer()
|
41
|
+
>>> json_data = '{"key": "value", "number": 123}'
|
42
|
+
>>> try:
|
43
|
+
... result = deserializer.deserialize(json_data)
|
44
|
+
... print(result["key"]) # Output: value
|
45
|
+
... except KafkaConsumerDeserializationError as e:
|
46
|
+
... print(f"Failed to deserialize: {e}")
|
47
|
+
"""
|
48
|
+
try:
|
49
|
+
return json.loads(base64.b64decode(data).decode("utf-8"))
|
50
|
+
except Exception as e:
|
51
|
+
raise KafkaConsumerDeserializationError(
|
52
|
+
f"Error trying to deserialize json data - {type(e).__name__}: {str(e)}",
|
53
|
+
) from e
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from google.protobuf.internal.decoder import _DecodeVarint # type: ignore[attr-defined]
|
6
|
+
from google.protobuf.json_format import MessageToDict
|
7
|
+
|
8
|
+
from aws_lambda_powertools.utilities.kafka.deserializer.base import DeserializerBase
|
9
|
+
from aws_lambda_powertools.utilities.kafka.exceptions import (
|
10
|
+
KafkaConsumerDeserializationError,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class ProtobufDeserializer(DeserializerBase):
|
15
|
+
"""
|
16
|
+
Deserializer for Protocol Buffer formatted data.
|
17
|
+
|
18
|
+
This class provides functionality to deserialize Protocol Buffer binary data
|
19
|
+
into Python dictionaries using the provided Protocol Buffer message class.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, message_class: Any):
|
23
|
+
self.message_class = message_class
|
24
|
+
|
25
|
+
def deserialize(self, data: bytes | str) -> dict:
|
26
|
+
"""
|
27
|
+
Deserialize Protocol Buffer binary data to a Python dictionary.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
data : bytes or str
|
32
|
+
The Protocol Buffer binary data to deserialize. If provided as a string,
|
33
|
+
it's assumed to be base64-encoded and will be decoded first.
|
34
|
+
|
35
|
+
Returns
|
36
|
+
-------
|
37
|
+
dict
|
38
|
+
Deserialized data as a dictionary with field names preserved from the
|
39
|
+
Protocol Buffer definition.
|
40
|
+
|
41
|
+
Raises
|
42
|
+
------
|
43
|
+
KafkaConsumerDeserializationError
|
44
|
+
When the data cannot be deserialized according to the message class,
|
45
|
+
typically due to data format incompatibility or incorrect message class.
|
46
|
+
|
47
|
+
Notes
|
48
|
+
-----
|
49
|
+
This deserializer handles both standard Protocol Buffer format and the Confluent
|
50
|
+
Schema Registry format which includes message index information. It will first try
|
51
|
+
standard deserialization and fall back to message index handling if needed.
|
52
|
+
|
53
|
+
Example
|
54
|
+
--------
|
55
|
+
>>> # Assuming proper protobuf setup
|
56
|
+
>>> deserializer = ProtobufDeserializer(my_proto_module.MyMessage)
|
57
|
+
>>> proto_data = b'...' # binary protobuf data
|
58
|
+
>>> try:
|
59
|
+
... result = deserializer.deserialize(proto_data)
|
60
|
+
... # Process the deserialized dictionary
|
61
|
+
... except KafkaConsumerDeserializationError as e:
|
62
|
+
... print(f"Failed to deserialize: {e}")
|
63
|
+
"""
|
64
|
+
value = self._decode_input(data)
|
65
|
+
try:
|
66
|
+
message = self.message_class()
|
67
|
+
message.ParseFromString(value)
|
68
|
+
return MessageToDict(message, preserving_proto_field_name=True)
|
69
|
+
except Exception:
|
70
|
+
return self._deserialize_with_message_index(value, self.message_class())
|
71
|
+
|
72
|
+
def _deserialize_with_message_index(self, data: bytes, parser: Any) -> dict:
|
73
|
+
"""
|
74
|
+
Deserialize protobuf message with Confluent message index handling.
|
75
|
+
|
76
|
+
Parameters
|
77
|
+
----------
|
78
|
+
data : bytes
|
79
|
+
data
|
80
|
+
parser : google.protobuf.message.Message
|
81
|
+
Protobuf message instance to parse the data into
|
82
|
+
|
83
|
+
Returns
|
84
|
+
-------
|
85
|
+
dict
|
86
|
+
Dictionary representation of the parsed protobuf message with original field names
|
87
|
+
|
88
|
+
Raises
|
89
|
+
------
|
90
|
+
KafkaConsumerDeserializationError
|
91
|
+
If deserialization fails
|
92
|
+
|
93
|
+
Notes
|
94
|
+
-----
|
95
|
+
This method handles the special case of Confluent Schema Registry's message index
|
96
|
+
format, where the message is prefixed with either a single 0 (for the first schema)
|
97
|
+
or a list of schema indexes. The actual protobuf message follows these indexes.
|
98
|
+
"""
|
99
|
+
|
100
|
+
buffer = memoryview(data)
|
101
|
+
pos = 0
|
102
|
+
|
103
|
+
try:
|
104
|
+
first_value, new_pos = _DecodeVarint(buffer, pos)
|
105
|
+
pos = new_pos
|
106
|
+
|
107
|
+
if first_value != 0:
|
108
|
+
for _ in range(first_value):
|
109
|
+
_, new_pos = _DecodeVarint(buffer, pos)
|
110
|
+
pos = new_pos
|
111
|
+
|
112
|
+
parser.ParseFromString(data[pos:])
|
113
|
+
return MessageToDict(parser, preserving_proto_field_name=True)
|
114
|
+
except Exception as e:
|
115
|
+
raise KafkaConsumerDeserializationError(
|
116
|
+
f"Error trying to deserialize protobuf data - {type(e).__name__}: {str(e)}",
|
117
|
+
) from e
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class KafkaConsumerAvroSchemaParserError(Exception):
|
2
|
+
"""
|
3
|
+
Error raised when parsing Avro schema definition fails.
|
4
|
+
"""
|
5
|
+
|
6
|
+
|
7
|
+
class KafkaConsumerDeserializationError(Exception):
|
8
|
+
"""
|
9
|
+
Error raised when message deserialization fails.
|
10
|
+
"""
|
11
|
+
|
12
|
+
|
13
|
+
class KafkaConsumerMissingSchemaError(Exception):
|
14
|
+
"""
|
15
|
+
Error raised when a required schema is not provided.
|
16
|
+
"""
|
17
|
+
|
18
|
+
|
19
|
+
class KafkaConsumerOutputSerializerError(Exception):
|
20
|
+
"""
|
21
|
+
Error raised when output serializer fails.
|
22
|
+
"""
|
@@ -0,0 +1,60 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Any
|
4
|
+
|
5
|
+
from aws_lambda_powertools.middleware_factory import lambda_handler_decorator
|
6
|
+
from aws_lambda_powertools.utilities.kafka.consumer_records import ConsumerRecords
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from collections.abc import Callable
|
10
|
+
|
11
|
+
from aws_lambda_powertools.utilities.kafka.schema_config import SchemaConfig
|
12
|
+
from aws_lambda_powertools.utilities.typing import LambdaContext
|
13
|
+
|
14
|
+
|
15
|
+
@lambda_handler_decorator
|
16
|
+
def kafka_consumer(
|
17
|
+
handler: Callable[[Any, LambdaContext], Any],
|
18
|
+
event: dict[str, Any],
|
19
|
+
context: LambdaContext,
|
20
|
+
schema_config: SchemaConfig | None = None,
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
Decorator for processing Kafka consumer records in AWS Lambda functions.
|
24
|
+
|
25
|
+
This decorator transforms the raw Lambda event into a ConsumerRecords object,
|
26
|
+
making it easier to process Kafka messages with optional schema validation
|
27
|
+
and deserialization.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
handler : Callable[[Any, LambdaContext], Any]
|
32
|
+
The Lambda handler function being decorated.
|
33
|
+
event : dict[str, Any]
|
34
|
+
The Lambda event containing Kafka records.
|
35
|
+
context : LambdaContext
|
36
|
+
The Lambda context object.
|
37
|
+
schema_config : SchemaConfig, optional
|
38
|
+
Schema configuration for deserializing Kafka records.
|
39
|
+
Must be an instance of SchemaConfig.
|
40
|
+
|
41
|
+
Returns
|
42
|
+
-------
|
43
|
+
Any
|
44
|
+
The return value from the handler function.
|
45
|
+
|
46
|
+
Examples
|
47
|
+
--------
|
48
|
+
>>> from aws_lambda_powertools.utilities.kafka import kafka_consumer, SchemaConfig
|
49
|
+
>>>
|
50
|
+
>>> # With schema validation using SchemaConfig
|
51
|
+
>>> schema_config = SchemaConfig(value_schema_type="JSON")
|
52
|
+
>>>
|
53
|
+
>>> @kafka_consumer(schema_config=schema_config)
|
54
|
+
>>> def handler_with_schema(records, context):
|
55
|
+
>>> for record in records:
|
56
|
+
>>> # record.value will be automatically deserialized according to schema_config
|
57
|
+
>>> process_message(record.value)
|
58
|
+
>>> return {"statusCode": 200}
|
59
|
+
"""
|
60
|
+
return handler(ConsumerRecords(event, schema_config), context)
|
@@ -0,0 +1,83 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any, Literal
|
4
|
+
|
5
|
+
from aws_lambda_powertools.utilities.kafka.exceptions import KafkaConsumerMissingSchemaError
|
6
|
+
|
7
|
+
|
8
|
+
class SchemaConfig:
|
9
|
+
"""
|
10
|
+
Configuration for schema management in Kafka consumers.
|
11
|
+
|
12
|
+
This class handles schema configuration for both keys and values in Kafka records,
|
13
|
+
supporting AVRO, PROTOBUF, and JSON schema types.
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
value_schema_type : {'AVRO', 'PROTOBUF', 'JSON', None}, default=None
|
18
|
+
Schema type for message values.
|
19
|
+
value_schema : str, optional
|
20
|
+
Schema definition for message values. Required when value_schema_type is 'AVRO' or 'PROTOBUF'.
|
21
|
+
value_output_serializer : Any, optional
|
22
|
+
Custom output serializer for message values. Supports Pydantic classes, Dataclasses and Custom Class
|
23
|
+
key_schema_type : {'AVRO', 'PROTOBUF', 'JSON', None}, default=None
|
24
|
+
Schema type for message keys.
|
25
|
+
key_schema : str, optional
|
26
|
+
Schema definition for message keys. Required when key_schema_type is 'AVRO' or 'PROTOBUF'.
|
27
|
+
key_output_serializer : Any, optional
|
28
|
+
Custom serializer for message keys. Supports Pydantic classes, Dataclasses and Custom Class
|
29
|
+
|
30
|
+
Raises
|
31
|
+
------
|
32
|
+
KafkaConsumerMissingSchemaError
|
33
|
+
When schema_type is set to 'AVRO' or 'PROTOBUF' but the corresponding schema
|
34
|
+
definition is not provided.
|
35
|
+
|
36
|
+
Examples
|
37
|
+
--------
|
38
|
+
>>> # Configure with AVRO schema for values
|
39
|
+
>>> avro_schema = '''
|
40
|
+
... {
|
41
|
+
... "type": "record",
|
42
|
+
... "name": "User",
|
43
|
+
... "fields": [
|
44
|
+
... {"name": "name", "type": "string"},
|
45
|
+
... {"name": "age", "type": "int"}
|
46
|
+
... ]
|
47
|
+
... }
|
48
|
+
... '''
|
49
|
+
>>> config = SchemaConfig(value_schema_type="AVRO", value_schema=avro_schema)
|
50
|
+
|
51
|
+
>>> # Configure with JSON schema for both keys and values
|
52
|
+
>>> config = SchemaConfig(
|
53
|
+
... value_schema_type="JSON",
|
54
|
+
... key_schema_type="JSON"
|
55
|
+
... )
|
56
|
+
"""
|
57
|
+
|
58
|
+
def __init__(
|
59
|
+
self,
|
60
|
+
value_schema_type: Literal["AVRO", "PROTOBUF", "JSON"] | None = None,
|
61
|
+
value_schema: str | None = None,
|
62
|
+
value_output_serializer: Any | None = None,
|
63
|
+
key_schema_type: Literal["AVRO", "PROTOBUF", "JSON", None] | None = None,
|
64
|
+
key_schema: str | None = None,
|
65
|
+
key_output_serializer: Any | None = None,
|
66
|
+
):
|
67
|
+
# Validate schema requirements
|
68
|
+
self._validate_schema_requirements(value_schema_type, value_schema, "value")
|
69
|
+
self._validate_schema_requirements(key_schema_type, key_schema, "key")
|
70
|
+
|
71
|
+
self.value_schema_type = value_schema_type
|
72
|
+
self.value_schema = value_schema
|
73
|
+
self.value_output_serializer = value_output_serializer
|
74
|
+
self.key_schema_type = key_schema_type
|
75
|
+
self.key_schema = key_schema
|
76
|
+
self.key_output_serializer = key_output_serializer
|
77
|
+
|
78
|
+
def _validate_schema_requirements(self, schema_type: str | None, schema: str | None, prefix: str) -> None:
|
79
|
+
"""Validate that schema is provided when required by schema_type."""
|
80
|
+
if schema_type in ["AVRO", "PROTOBUF"] and schema is None:
|
81
|
+
raise KafkaConsumerMissingSchemaError(
|
82
|
+
f"{prefix}_schema must be provided when {prefix}_schema_type is {schema_type}",
|
83
|
+
)
|
File without changes
|
@@ -0,0 +1,56 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import TYPE_CHECKING, Any
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from collections.abc import Callable
|
8
|
+
|
9
|
+
from aws_lambda_powertools.utilities.kafka.serialization.types import T
|
10
|
+
|
11
|
+
|
12
|
+
class OutputSerializerBase(ABC):
|
13
|
+
"""
|
14
|
+
Abstract base class for output serializers.
|
15
|
+
|
16
|
+
This class defines the interface for serializers that transform dictionary data
|
17
|
+
into specific output formats or class instances.
|
18
|
+
|
19
|
+
Methods
|
20
|
+
-------
|
21
|
+
serialize(data, output)
|
22
|
+
Abstract method that must be implemented by subclasses to serialize data.
|
23
|
+
|
24
|
+
Examples
|
25
|
+
--------
|
26
|
+
>>> class MyOutputSerializer(OutputSerializerBase):
|
27
|
+
... def serialize(self, data: dict[str, Any], output=None):
|
28
|
+
... if output:
|
29
|
+
... # Convert dictionary to class instance
|
30
|
+
... return output(**data)
|
31
|
+
... return data # Return as is if no output class provided
|
32
|
+
"""
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def serialize(self, data: dict[str, Any], output: type[T] | Callable | None = None) -> T | dict[str, Any]:
|
36
|
+
"""
|
37
|
+
Serialize dictionary data into a specific output format or class instance.
|
38
|
+
|
39
|
+
This abstract method must be implemented by subclasses to provide
|
40
|
+
specific serialization logic.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
data : dict[str, Any]
|
45
|
+
The dictionary data to serialize.
|
46
|
+
output : type[T] or None, optional
|
47
|
+
Optional class type to convert the dictionary into. If provided,
|
48
|
+
the method should return an instance of this class.
|
49
|
+
|
50
|
+
Returns
|
51
|
+
-------
|
52
|
+
T or dict[str, Any]
|
53
|
+
An instance of output if provided, otherwise a processed dictionary.
|
54
|
+
The generic type T represents the type of the output.
|
55
|
+
"""
|
56
|
+
raise NotImplementedError("Subclasses must implement this method")
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Any
|
4
|
+
|
5
|
+
from aws_lambda_powertools.utilities.kafka.serialization.base import OutputSerializerBase
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from collections.abc import Callable
|
9
|
+
|
10
|
+
from aws_lambda_powertools.utilities.kafka.serialization.types import T
|
11
|
+
|
12
|
+
|
13
|
+
class CustomDictOutputSerializer(OutputSerializerBase):
|
14
|
+
"""
|
15
|
+
Serializer that allows custom dict transformations.
|
16
|
+
|
17
|
+
This serializer takes dictionary data and either returns it as-is or passes it
|
18
|
+
through a custom transformation function provided as the output parameter.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def serialize(self, data: dict[str, Any], output: type[T] | Callable | None = None) -> T | dict[str, Any]:
|
22
|
+
return data if output is None else output(data) # type: ignore[call-arg]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import is_dataclass
|
4
|
+
from typing import TYPE_CHECKING, Any, cast
|
5
|
+
|
6
|
+
from aws_lambda_powertools.utilities.kafka.serialization.base import OutputSerializerBase
|
7
|
+
from aws_lambda_powertools.utilities.kafka.serialization.types import T
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from collections.abc import Callable
|
11
|
+
|
12
|
+
|
13
|
+
class DataclassOutputSerializer(OutputSerializerBase):
|
14
|
+
"""
|
15
|
+
Serializer that converts dictionary data into dataclass instances.
|
16
|
+
|
17
|
+
This serializer takes dictionary data and converts it into an instance of the specified
|
18
|
+
dataclass type.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def serialize(self, data: dict[str, Any], output: type[T] | Callable | None = None) -> T | dict[str, Any]:
|
22
|
+
if not is_dataclass(output): # pragma: no cover
|
23
|
+
raise ValueError("Output class must be a dataclass")
|
24
|
+
|
25
|
+
return cast(T, output(**data))
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Any
|
4
|
+
|
5
|
+
from pydantic import TypeAdapter
|
6
|
+
|
7
|
+
from aws_lambda_powertools.utilities.kafka.serialization.base import OutputSerializerBase
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from collections.abc import Callable
|
11
|
+
|
12
|
+
from aws_lambda_powertools.utilities.kafka.serialization.types import T
|
13
|
+
|
14
|
+
|
15
|
+
class PydanticOutputSerializer(OutputSerializerBase):
|
16
|
+
"""
|
17
|
+
Serializer that converts dictionary data into Pydantic model instances.
|
18
|
+
|
19
|
+
This serializer takes dictionary data and validates/converts it into an instance
|
20
|
+
of the specified Pydantic model type using Pydantic's TypeAdapter.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def serialize(self, data: dict[str, Any], output: type[T] | Callable | None = None) -> T | dict[str, Any]:
|
24
|
+
# Use TypeAdapter for better support of Union types and other complex types
|
25
|
+
adapter: TypeAdapter = TypeAdapter(output)
|
26
|
+
return adapter.validate_python(data)
|
@@ -0,0 +1,65 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import sys
|
4
|
+
from dataclasses import is_dataclass
|
5
|
+
from typing import TYPE_CHECKING, Annotated, Any, Optional, Union, get_args, get_origin
|
6
|
+
|
7
|
+
# Conditionally import or define UnionType based on Python version
|
8
|
+
if sys.version_info >= (3, 10):
|
9
|
+
from types import UnionType # Available in Python 3.10+
|
10
|
+
else:
|
11
|
+
UnionType = Union # Fallback for Python 3.9
|
12
|
+
|
13
|
+
from aws_lambda_powertools.utilities.kafka.serialization.custom_dict import CustomDictOutputSerializer
|
14
|
+
from aws_lambda_powertools.utilities.kafka.serialization.dataclass import DataclassOutputSerializer
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from collections.abc import Callable
|
18
|
+
|
19
|
+
from aws_lambda_powertools.utilities.kafka.serialization.types import T
|
20
|
+
|
21
|
+
|
22
|
+
def _get_output_serializer(output: type[T] | Callable | None = None) -> Any:
|
23
|
+
"""
|
24
|
+
Returns the appropriate serializer for the given output class.
|
25
|
+
Uses lazy imports to avoid unnecessary dependencies.
|
26
|
+
"""
|
27
|
+
# Check if it's a dataclass
|
28
|
+
if is_dataclass(output):
|
29
|
+
return DataclassOutputSerializer()
|
30
|
+
|
31
|
+
if _is_pydantic_model(output):
|
32
|
+
from aws_lambda_powertools.utilities.kafka.serialization.pydantic import PydanticOutputSerializer
|
33
|
+
|
34
|
+
return PydanticOutputSerializer()
|
35
|
+
|
36
|
+
# Default to custom serializer
|
37
|
+
return CustomDictOutputSerializer()
|
38
|
+
|
39
|
+
|
40
|
+
def _is_pydantic_model(obj: Any) -> bool:
|
41
|
+
if isinstance(obj, type):
|
42
|
+
# Check for Pydantic model attributes without direct import
|
43
|
+
has_model_fields = getattr(obj, "model_fields", None) is not None
|
44
|
+
has_model_validate = callable(getattr(obj, "model_validate", None))
|
45
|
+
return has_model_fields and has_model_validate
|
46
|
+
|
47
|
+
origin = get_origin(obj)
|
48
|
+
if origin in (Union, Optional, Annotated) or (sys.version_info >= (3, 10) and origin in (Union, UnionType)):
|
49
|
+
# Check if any element in the Union is a Pydantic model
|
50
|
+
for arg in get_args(obj):
|
51
|
+
if _is_pydantic_model(arg):
|
52
|
+
return True
|
53
|
+
|
54
|
+
return False
|
55
|
+
|
56
|
+
|
57
|
+
def serialize_to_output_type(
|
58
|
+
data: object | dict[str, Any],
|
59
|
+
output: type[T] | Callable | None = None,
|
60
|
+
) -> T | dict[str, Any]:
|
61
|
+
"""
|
62
|
+
Helper function to directly serialize data to the specified output class
|
63
|
+
"""
|
64
|
+
serializer = _get_output_serializer(output)
|
65
|
+
return serializer.serialize(data, output)
|
@@ -8,6 +8,11 @@ from aws_lambda_powertools.shared.functions import base64_decode, bytes_to_strin
|
|
8
8
|
SERVERS_DELIMITER = ","
|
9
9
|
|
10
10
|
|
11
|
+
class KafkaRecordSchemaMetadata(BaseModel):
|
12
|
+
dataFormat: str
|
13
|
+
schemaId: str
|
14
|
+
|
15
|
+
|
11
16
|
class KafkaRecordModel(BaseModel):
|
12
17
|
topic: str
|
13
18
|
partition: int
|
@@ -17,6 +22,8 @@ class KafkaRecordModel(BaseModel):
|
|
17
22
|
key: Optional[bytes] = None
|
18
23
|
value: Union[str, Type[BaseModel]]
|
19
24
|
headers: List[Dict[str, bytes]]
|
25
|
+
keySchemaMetadata: Optional[KafkaRecordSchemaMetadata] = None
|
26
|
+
valueSchemaMetadata: Optional[KafkaRecordSchemaMetadata] = None
|
20
27
|
|
21
28
|
# key is optional; only decode if not None
|
22
29
|
@field_validator("key", mode="before")
|
{aws_lambda_powertools-3.14.1a5.dist-info → aws_lambda_powertools-3.15.0.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: aws_lambda_powertools
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.15.0
|
4
4
|
Summary: Powertools for AWS Lambda (Python) is a developer toolkit to implement Serverless best practices and increase developer velocity.
|
5
5
|
License: MIT
|
6
6
|
Keywords: aws_lambda_powertools,aws,tracing,logging,lambda,powertools,feature_flags,idempotency,middleware
|
@@ -21,11 +21,14 @@ Provides-Extra: all
|
|
21
21
|
Provides-Extra: aws-sdk
|
22
22
|
Provides-Extra: datadog
|
23
23
|
Provides-Extra: datamasking
|
24
|
+
Provides-Extra: kafka-consumer-avro
|
25
|
+
Provides-Extra: kafka-consumer-protobuf
|
24
26
|
Provides-Extra: parser
|
25
27
|
Provides-Extra: redis
|
26
28
|
Provides-Extra: tracer
|
27
29
|
Provides-Extra: validation
|
28
30
|
Provides-Extra: valkey
|
31
|
+
Requires-Dist: avro (>=1.12.0,<2.0.0) ; extra == "kafka-consumer-avro"
|
29
32
|
Requires-Dist: aws-encryption-sdk (>=3.1.1,<5.0.0) ; extra == "all" or extra == "datamasking"
|
30
33
|
Requires-Dist: aws-xray-sdk (>=2.8.0,<3.0.0) ; extra == "tracer" or extra == "all"
|
31
34
|
Requires-Dist: boto3 (>=1.34.32,<2.0.0) ; extra == "aws-sdk"
|
@@ -33,6 +36,7 @@ Requires-Dist: datadog-lambda (>=6.106.0,<7.0.0) ; extra == "datadog"
|
|
33
36
|
Requires-Dist: fastjsonschema (>=2.14.5,<3.0.0) ; extra == "validation" or extra == "all"
|
34
37
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
35
38
|
Requires-Dist: jsonpath-ng (>=1.6.0,<2.0.0) ; extra == "all" or extra == "datamasking"
|
39
|
+
Requires-Dist: protobuf (>=6.30.2,<7.0.0) ; extra == "kafka-consumer-protobuf"
|
36
40
|
Requires-Dist: pydantic (>=2.4.0,<3.0.0) ; extra == "parser" or extra == "all"
|
37
41
|
Requires-Dist: pydantic-settings (>=2.6.1,<3.0.0) ; extra == "all"
|
38
42
|
Requires-Dist: redis (>=4.4,<7.0) ; extra == "redis"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
aws_lambda_powertools/__init__.py,sha256=o4iEHU0MfWC0_TfVmisxi0VOAUw5uQfqLQWr0t29ZaE,676
|
2
2
|
aws_lambda_powertools/event_handler/__init__.py,sha256=HWTBEIrd2Znes31qT9h8k9PEfmWwepWyh9bxed2ES-0,1275
|
3
|
-
aws_lambda_powertools/event_handler/api_gateway.py,sha256=
|
3
|
+
aws_lambda_powertools/event_handler/api_gateway.py,sha256=LQfrqwXCze2sY8kaG2v4x67AE-c70ASJ5bxK19U6peU,122023
|
4
4
|
aws_lambda_powertools/event_handler/appsync.py,sha256=MNUlaM-4Ioaejei4L5hoW_DuDgOkQWAtMmKZU_Jwce4,18530
|
5
5
|
aws_lambda_powertools/event_handler/bedrock_agent.py,sha256=j3pKSDUmfb5MNcanqm78s9THh7bvh40LKKgnov95JcU,15146
|
6
6
|
aws_lambda_powertools/event_handler/bedrock_agent_function.py,sha256=7EXeF-mOVAaFiw5Lu872NQVDexzPkgAFNj5nvE9KU4I,9441
|
@@ -98,7 +98,7 @@ aws_lambda_powertools/shared/json_encoder.py,sha256=JQeWNu-4M7_xI_hqYExrxsb3OcEH
|
|
98
98
|
aws_lambda_powertools/shared/lazy_import.py,sha256=TbXQm2bcwXdZrYdBaJJXIswyLlumM85RJ_A_0w-h-GU,2019
|
99
99
|
aws_lambda_powertools/shared/types.py,sha256=EZ_tbX3F98LA4Zcra1hTEjzRacpZAtggK957Zcv1oKg,135
|
100
100
|
aws_lambda_powertools/shared/user_agent.py,sha256=DrCMFQuT4a4iIrpcWpAIjY37EFqR9-QxlxDGD-Nn9Gg,7081
|
101
|
-
aws_lambda_powertools/shared/version.py,sha256=
|
101
|
+
aws_lambda_powertools/shared/version.py,sha256=and7WBc_Ox1Z0FcIGlQjGhuFeAT1WVEaKytTKGK0tuk,83
|
102
102
|
aws_lambda_powertools/tracing/__init__.py,sha256=f4bMThOPBPWTPVcYqcAIErAJPerMsf3H_Z4gCXCsK9I,141
|
103
103
|
aws_lambda_powertools/tracing/base.py,sha256=WSO986XGBOe9K0F2SnG6ustJokIrtO0m0mcL8N7mfno,4544
|
104
104
|
aws_lambda_powertools/tracing/extensions.py,sha256=APOfXOq-hRBKaK5WyfIyrd_6M1_9SWJZ3zxLA9jDZzU,492
|
@@ -137,7 +137,7 @@ aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py,sha256=tK
|
|
137
137
|
aws_lambda_powertools/utilities/data_classes/event_bridge_event.py,sha256=TeTtx2jOKyZSVdSPFrtxZUgv0Yt6HW_H7J5NNxy_mHo,2434
|
138
138
|
aws_lambda_powertools/utilities/data_classes/event_source.py,sha256=CKo1_TY0UcUVsh3PUk39z2WFVC2Bf4QgXWj93eEHbcY,1211
|
139
139
|
aws_lambda_powertools/utilities/data_classes/iot_registry_event.py,sha256=wUa0Gr64maNGkd9qKNxtwFHawIJN-nj9X8uLFdINE7c,10842
|
140
|
-
aws_lambda_powertools/utilities/data_classes/kafka_event.py,sha256=
|
140
|
+
aws_lambda_powertools/utilities/data_classes/kafka_event.py,sha256=kjTfsvOpiAtY1le4sb55VxsPqmjLedxqk7ruMc3SxfI,5171
|
141
141
|
aws_lambda_powertools/utilities/data_classes/kinesis_firehose_event.py,sha256=5H-Rt6LG80_jVkTlWxhutZH0cv4Xrhzm-fqwkpjPfQM,10508
|
142
142
|
aws_lambda_powertools/utilities/data_classes/kinesis_stream_event.py,sha256=QVDKkcXQRp6T0KfjoZXkr_2U__5w9IT096jZqaoa4hI,5022
|
143
143
|
aws_lambda_powertools/utilities/data_classes/lambda_function_url_event.py,sha256=dFhL3IYebW4WeJc1XJJYmF94gI_qYeay6EpT9b2mDZU,633
|
@@ -190,6 +190,25 @@ aws_lambda_powertools/utilities/idempotency/serialization/no_op.py,sha256=9m5-EG
|
|
190
190
|
aws_lambda_powertools/utilities/idempotency/serialization/pydantic.py,sha256=NVKiKk87d_8BgcYpyxHJVRnRaDVCA6PP28SG3J0GGKg,1471
|
191
191
|
aws_lambda_powertools/utilities/jmespath_utils/__init__.py,sha256=Br89UButW4sLv2Dkjz_MiPS0TpMEPOO-W4wW0n9quPc,3597
|
192
192
|
aws_lambda_powertools/utilities/jmespath_utils/envelopes.py,sha256=jZJYbUldrZgCWl-PL8oRmC9p6G6D-3812kJmJfLkM6Q,817
|
193
|
+
aws_lambda_powertools/utilities/kafka/__init__.py,sha256=PUx4xgcL7td8fSfumD6INCdmGGIy7E11eqHk0w4XiEU,320
|
194
|
+
aws_lambda_powertools/utilities/kafka/consumer_records.py,sha256=1S6mRcKeqPQtiRyDJIL308TQ9dEMdv13JdBFCwRVVmo,4825
|
195
|
+
aws_lambda_powertools/utilities/kafka/deserializer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
196
|
+
aws_lambda_powertools/utilities/kafka/deserializer/avro.py,sha256=G2pr5GLOsurhRCcMNCpd1VFcIh79LSYWDqg4_qVN0-g,2401
|
197
|
+
aws_lambda_powertools/utilities/kafka/deserializer/base.py,sha256=-bOPbZADsKK-HuQupZy54csY74rvIXVIQiFxGHxjTww,1549
|
198
|
+
aws_lambda_powertools/utilities/kafka/deserializer/default.py,sha256=GhAUfZO3rcasy_ZiKEv3IOFX_VNDTA61zb2eo0HAIcQ,1416
|
199
|
+
aws_lambda_powertools/utilities/kafka/deserializer/deserializer.py,sha256=Wh4-srDUFGBUZ0kXiiU-R8DznumOKdLPk9Va88LT7zE,3733
|
200
|
+
aws_lambda_powertools/utilities/kafka/deserializer/json.py,sha256=2_helv8DyUoK60z3wflLd1BwkuRINaVjucyYie10qUM,1742
|
201
|
+
aws_lambda_powertools/utilities/kafka/deserializer/protobuf.py,sha256=WEaJysV__sZy32HNpqZZ6KrZyxyvsEnxf24On36zeXY,4183
|
202
|
+
aws_lambda_powertools/utilities/kafka/exceptions.py,sha256=kwNfAcYmiTgk19QEdN6oHtMc2dDIZh64uWr_d5X3FFc,495
|
203
|
+
aws_lambda_powertools/utilities/kafka/kafka_consumer.py,sha256=x1RcOWJcG74JvzrahO4cSSrAuSZFUlMtg14QjAjtJjY,2069
|
204
|
+
aws_lambda_powertools/utilities/kafka/schema_config.py,sha256=GaSiwKWiUEQqrdBZWMaGl_DQcqpyplohoCMgMrCOyGI,3268
|
205
|
+
aws_lambda_powertools/utilities/kafka/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
206
|
+
aws_lambda_powertools/utilities/kafka/serialization/base.py,sha256=z3Z_CbmBbrKDfzZjyvWkhdhnmgpR4CznefweATh-0_8,1904
|
207
|
+
aws_lambda_powertools/utilities/kafka/serialization/custom_dict.py,sha256=ZEIiPy9HxdImeCh0ljzWPFw93SD1ktsJpIKqD2_uzfg,789
|
208
|
+
aws_lambda_powertools/utilities/kafka/serialization/dataclass.py,sha256=uWqi8K0Hcn-ET-RcJwwn4HyOCtu_Oq6oTZxXtIW8xBE,871
|
209
|
+
aws_lambda_powertools/utilities/kafka/serialization/pydantic.py,sha256=2Uuv6b20bsYVOY5gl8tcVHVVi3krCquEzPVuzsO5LeE,933
|
210
|
+
aws_lambda_powertools/utilities/kafka/serialization/serialization.py,sha256=XqLLZcHFea0jhtznNrrZeK5vW1arEBu8a0jwziJC-jY,2312
|
211
|
+
aws_lambda_powertools/utilities/kafka/serialization/types.py,sha256=zClRo5ve8RGia7wQnby41W-Zprj-slOA5da1LfYnuhw,45
|
193
212
|
aws_lambda_powertools/utilities/parameters/__init__.py,sha256=KVJWu7pyunw9to8VkTZ0fy6MCR9iW0tUCjAJVqgdwdw,771
|
194
213
|
aws_lambda_powertools/utilities/parameters/appconfig.py,sha256=CqFVXUKMjKsGX0eUClPxQNlLdPTJbbYOTU_7Vz8b8TY,8363
|
195
214
|
aws_lambda_powertools/utilities/parameters/base.py,sha256=kTzLaVcp64mM6CwfP3Tz4kkMFvUwLexg9DVJ1zkFUtw,12743
|
@@ -231,7 +250,7 @@ aws_lambda_powertools/utilities/parser/models/cloudwatch.py,sha256=1y2WmgBbIo6Gk
|
|
231
250
|
aws_lambda_powertools/utilities/parser/models/dynamodb.py,sha256=ktjvd5L7LLvitgi1yPFbtc_TK8J_3yzqIHmaZ4UfTt8,2197
|
232
251
|
aws_lambda_powertools/utilities/parser/models/event_bridge.py,sha256=Fanggwc7Ug_YTzFGl1utSRn7fjxILNR_GnkrNG9IQ88,941
|
233
252
|
aws_lambda_powertools/utilities/parser/models/iot_registry_events.py,sha256=hnuCxt9jYri_MRI9Z-n7aknA2CcBIHkPD__G9jSQEZ4,5448
|
234
|
-
aws_lambda_powertools/utilities/parser/models/kafka.py,sha256=
|
253
|
+
aws_lambda_powertools/utilities/parser/models/kafka.py,sha256=vd214aPiGW434yFdpZCBpTnPCrJ4_IyXHK8kifhxEI0,2137
|
235
254
|
aws_lambda_powertools/utilities/parser/models/kinesis.py,sha256=-8zJh1GJd69QCo0tGQkg7SEqbcWJCbwmeSb1Rx8Jt40,1790
|
236
255
|
aws_lambda_powertools/utilities/parser/models/kinesis_firehose.py,sha256=K1MbkoX9yZjMcZOJ7H7nek3OgVoBAjtqorKlcoWRlQ8,939
|
237
256
|
aws_lambda_powertools/utilities/parser/models/kinesis_firehose_sqs.py,sha256=VTXGKlo0aBpi8njpOOiTaNFncl0_vRRWmiI7Z6ngKPI,875
|
@@ -271,7 +290,7 @@ aws_lambda_powertools/utilities/validation/envelopes.py,sha256=YD5HOFx6IClQgii0n
|
|
271
290
|
aws_lambda_powertools/utilities/validation/exceptions.py,sha256=PKy_19zQMBJGCMMFl-sMkcm-cc0v3zZBn_bhGE4wKNo,2084
|
272
291
|
aws_lambda_powertools/utilities/validation/validator.py,sha256=khCqFhACSdn0nKyYRRPiC5Exht956hTfSfhlV3IRmpg,10099
|
273
292
|
aws_lambda_powertools/warnings/__init__.py,sha256=vqDVeZz8wGtD8WGYNSkQE7AHwqtIrPGRxuoJR_BBnSs,1193
|
274
|
-
aws_lambda_powertools-3.
|
275
|
-
aws_lambda_powertools-3.
|
276
|
-
aws_lambda_powertools-3.
|
277
|
-
aws_lambda_powertools-3.
|
293
|
+
aws_lambda_powertools-3.15.0.dist-info/LICENSE,sha256=vMHS2eBgmwPUIMPb7LQ4p7ib_FPVQXarVjAasflrTwo,951
|
294
|
+
aws_lambda_powertools-3.15.0.dist-info/METADATA,sha256=10eOohXPQlEeOM8QpLP-2ZyvAuukPgXLD5JJgtY-PI0,11528
|
295
|
+
aws_lambda_powertools-3.15.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
296
|
+
aws_lambda_powertools-3.15.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|