kafka-python 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kafka/__init__.py +34 -0
- kafka/__main__.py +5 -0
- kafka/admin/__init__.py +29 -0
- kafka/admin/__main__.py +5 -0
- kafka/admin/_acls.py +355 -0
- kafka/admin/_cluster.py +359 -0
- kafka/admin/_configs.py +479 -0
- kafka/admin/_groups.py +754 -0
- kafka/admin/_partitions.py +595 -0
- kafka/admin/_topics.py +281 -0
- kafka/admin/_transactions.py +450 -0
- kafka/admin/_users.py +194 -0
- kafka/admin/client.py +373 -0
- kafka/benchmarks/__init__.py +0 -0
- kafka/benchmarks/consumer_performance.py +138 -0
- kafka/benchmarks/load_example.py +109 -0
- kafka/benchmarks/producer_encode_path.py +201 -0
- kafka/benchmarks/producer_performance.py +161 -0
- kafka/benchmarks/profile_protocol.py +138 -0
- kafka/benchmarks/protocol_old_vs_new.py +447 -0
- kafka/benchmarks/record_batch_compose.py +77 -0
- kafka/benchmarks/record_batch_read.py +82 -0
- kafka/benchmarks/varint_speed.py +426 -0
- kafka/cli/__init__.py +36 -0
- kafka/cli/admin/__init__.py +117 -0
- kafka/cli/admin/acls/__init__.py +9 -0
- kafka/cli/admin/acls/common.py +76 -0
- kafka/cli/admin/acls/create.py +19 -0
- kafka/cli/admin/acls/delete.py +23 -0
- kafka/cli/admin/acls/describe.py +16 -0
- kafka/cli/admin/cluster/__init__.py +14 -0
- kafka/cli/admin/cluster/describe.py +11 -0
- kafka/cli/admin/cluster/describe_quorum.py +11 -0
- kafka/cli/admin/cluster/features.py +52 -0
- kafka/cli/admin/cluster/log_dirs.py +43 -0
- kafka/cli/admin/cluster/versions.py +33 -0
- kafka/cli/admin/configs/__init__.py +10 -0
- kafka/cli/admin/configs/alter.py +43 -0
- kafka/cli/admin/configs/common.py +17 -0
- kafka/cli/admin/configs/describe.py +30 -0
- kafka/cli/admin/configs/list.py +16 -0
- kafka/cli/admin/configs/reset.py +20 -0
- kafka/cli/admin/groups/__init__.py +16 -0
- kafka/cli/admin/groups/alter_offsets.py +30 -0
- kafka/cli/admin/groups/delete.py +11 -0
- kafka/cli/admin/groups/delete_offsets.py +29 -0
- kafka/cli/admin/groups/describe.py +11 -0
- kafka/cli/admin/groups/list.py +28 -0
- kafka/cli/admin/groups/list_offsets.py +29 -0
- kafka/cli/admin/groups/remove_members.py +40 -0
- kafka/cli/admin/groups/reset_offsets.py +139 -0
- kafka/cli/admin/partitions/__init__.py +21 -0
- kafka/cli/admin/partitions/alter_reassignments.py +37 -0
- kafka/cli/admin/partitions/create.py +27 -0
- kafka/cli/admin/partitions/delete_records.py +31 -0
- kafka/cli/admin/partitions/describe.py +36 -0
- kafka/cli/admin/partitions/elect_leaders.py +53 -0
- kafka/cli/admin/partitions/list_offsets.py +88 -0
- kafka/cli/admin/partitions/list_reassignments.py +35 -0
- kafka/cli/admin/topics/__init__.py +10 -0
- kafka/cli/admin/topics/create.py +13 -0
- kafka/cli/admin/topics/delete.py +19 -0
- kafka/cli/admin/topics/describe.py +18 -0
- kafka/cli/admin/topics/list.py +11 -0
- kafka/cli/admin/transactions/__init__.py +17 -0
- kafka/cli/admin/transactions/abort.py +38 -0
- kafka/cli/admin/transactions/describe.py +24 -0
- kafka/cli/admin/transactions/describe_producers.py +29 -0
- kafka/cli/admin/transactions/find_hanging.py +26 -0
- kafka/cli/admin/transactions/list.py +37 -0
- kafka/cli/admin/users/__init__.py +8 -0
- kafka/cli/admin/users/alter_user_scram_credentials.py +34 -0
- kafka/cli/admin/users/describe_user_scram_credentials.py +15 -0
- kafka/cli/common.py +95 -0
- kafka/cli/consumer/__init__.py +63 -0
- kafka/cli/producer/__init__.py +57 -0
- kafka/cluster.py +824 -0
- kafka/codec.py +325 -0
- kafka/consumer/__init__.py +5 -0
- kafka/consumer/__main__.py +5 -0
- kafka/consumer/fetcher.py +2012 -0
- kafka/consumer/group.py +1347 -0
- kafka/consumer/subscription_state.py +897 -0
- kafka/coordinator/__init__.py +0 -0
- kafka/coordinator/assignors/__init__.py +0 -0
- kafka/coordinator/assignors/abstract.py +90 -0
- kafka/coordinator/assignors/cooperative_sticky.py +167 -0
- kafka/coordinator/assignors/range.py +81 -0
- kafka/coordinator/assignors/roundrobin.py +101 -0
- kafka/coordinator/assignors/sticky/StickyAssignorUserData.json +37 -0
- kafka/coordinator/assignors/sticky/__init__.py +0 -0
- kafka/coordinator/assignors/sticky/partition_movements.py +149 -0
- kafka/coordinator/assignors/sticky/sorted_set.py +63 -0
- kafka/coordinator/assignors/sticky/sticky_assignor.py +665 -0
- kafka/coordinator/assignors/sticky/user_data.py +8 -0
- kafka/coordinator/base.py +1215 -0
- kafka/coordinator/consumer.py +1224 -0
- kafka/coordinator/heartbeat.py +82 -0
- kafka/coordinator/subscription.py +34 -0
- kafka/errors.py +1004 -0
- kafka/future.py +166 -0
- kafka/metrics/__init__.py +13 -0
- kafka/metrics/compound_stat.py +33 -0
- kafka/metrics/dict_reporter.py +81 -0
- kafka/metrics/kafka_metric.py +36 -0
- kafka/metrics/measurable.py +27 -0
- kafka/metrics/measurable_stat.py +13 -0
- kafka/metrics/metric_config.py +33 -0
- kafka/metrics/metric_name.py +105 -0
- kafka/metrics/metrics.py +261 -0
- kafka/metrics/metrics_reporter.py +53 -0
- kafka/metrics/quota.py +41 -0
- kafka/metrics/stat.py +19 -0
- kafka/metrics/stats/__init__.py +15 -0
- kafka/metrics/stats/avg.py +24 -0
- kafka/metrics/stats/count.py +17 -0
- kafka/metrics/stats/histogram.py +99 -0
- kafka/metrics/stats/max_stat.py +17 -0
- kafka/metrics/stats/min_stat.py +19 -0
- kafka/metrics/stats/percentile.py +14 -0
- kafka/metrics/stats/percentiles.py +75 -0
- kafka/metrics/stats/rate.py +118 -0
- kafka/metrics/stats/sampled_stat.py +99 -0
- kafka/metrics/stats/sensor.py +136 -0
- kafka/metrics/stats/total.py +15 -0
- kafka/net/__init__.py +19 -0
- kafka/net/compat.py +165 -0
- kafka/net/connection.py +593 -0
- kafka/net/http_connect.py +144 -0
- kafka/net/inet.py +122 -0
- kafka/net/manager.py +451 -0
- kafka/net/metrics.py +149 -0
- kafka/net/sasl/__init__.py +32 -0
- kafka/net/sasl/abc.py +28 -0
- kafka/net/sasl/gssapi.py +95 -0
- kafka/net/sasl/msk.py +245 -0
- kafka/net/sasl/oauth.py +98 -0
- kafka/net/sasl/plain.py +42 -0
- kafka/net/sasl/scram.py +135 -0
- kafka/net/sasl/sspi.py +111 -0
- kafka/net/selector.py +644 -0
- kafka/net/socks5.py +262 -0
- kafka/net/transport.py +415 -0
- kafka/net/wakeup_notifier.py +72 -0
- kafka/partitioner/__init__.py +8 -0
- kafka/partitioner/abc.py +8 -0
- kafka/partitioner/default.py +89 -0
- kafka/partitioner/sticky.py +109 -0
- kafka/producer/__init__.py +5 -0
- kafka/producer/__main__.py +5 -0
- kafka/producer/future.py +101 -0
- kafka/producer/kafka.py +1123 -0
- kafka/producer/producer_batch.py +192 -0
- kafka/producer/record_accumulator.py +647 -0
- kafka/producer/sender.py +884 -0
- kafka/producer/transaction_manager.py +1326 -0
- kafka/protocol/__init__.py +0 -0
- kafka/protocol/admin/__init__.py +29 -0
- kafka/protocol/admin/acl.py +83 -0
- kafka/protocol/admin/acl.pyi +375 -0
- kafka/protocol/admin/client_quotas.py +14 -0
- kafka/protocol/admin/client_quotas.pyi +265 -0
- kafka/protocol/admin/cluster.py +31 -0
- kafka/protocol/admin/cluster.pyi +620 -0
- kafka/protocol/admin/configs.py +22 -0
- kafka/protocol/admin/configs.pyi +437 -0
- kafka/protocol/admin/groups.py +24 -0
- kafka/protocol/admin/groups.pyi +261 -0
- kafka/protocol/admin/topics.py +53 -0
- kafka/protocol/admin/topics.pyi +982 -0
- kafka/protocol/admin/transactions.py +18 -0
- kafka/protocol/admin/transactions.pyi +311 -0
- kafka/protocol/admin/users.py +14 -0
- kafka/protocol/admin/users.pyi +223 -0
- kafka/protocol/api_data.py +125 -0
- kafka/protocol/api_header.py +55 -0
- kafka/protocol/api_key.py +97 -0
- kafka/protocol/api_message.py +277 -0
- kafka/protocol/broker_version_data.py +246 -0
- kafka/protocol/consumer/__init__.py +13 -0
- kafka/protocol/consumer/fetch.py +16 -0
- kafka/protocol/consumer/fetch.pyi +298 -0
- kafka/protocol/consumer/group.py +38 -0
- kafka/protocol/consumer/group.pyi +824 -0
- kafka/protocol/consumer/metadata.py +30 -0
- kafka/protocol/consumer/metadata.pyi +89 -0
- kafka/protocol/consumer/offsets.py +75 -0
- kafka/protocol/consumer/offsets.pyi +288 -0
- kafka/protocol/data_container.py +166 -0
- kafka/protocol/frame.py +30 -0
- kafka/protocol/generate_stubs.py +468 -0
- kafka/protocol/metadata/__init__.py +10 -0
- kafka/protocol/metadata/api_versions.py +41 -0
- kafka/protocol/metadata/api_versions.pyi +128 -0
- kafka/protocol/metadata/find_coordinator.py +19 -0
- kafka/protocol/metadata/find_coordinator.pyi +105 -0
- kafka/protocol/metadata/metadata.py +34 -0
- kafka/protocol/metadata/metadata.pyi +160 -0
- kafka/protocol/old/__init__.py +0 -0
- kafka/protocol/old/abstract.py +17 -0
- kafka/protocol/old/add_offsets_to_txn.py +54 -0
- kafka/protocol/old/add_partitions_to_txn.py +71 -0
- kafka/protocol/old/admin.py +1086 -0
- kafka/protocol/old/api.py +205 -0
- kafka/protocol/old/api_versions.py +133 -0
- kafka/protocol/old/commit.py +355 -0
- kafka/protocol/old/consumer_protocol.py +36 -0
- kafka/protocol/old/end_txn.py +53 -0
- kafka/protocol/old/fetch.py +408 -0
- kafka/protocol/old/find_coordinator.py +72 -0
- kafka/protocol/old/group.py +451 -0
- kafka/protocol/old/init_producer_id.py +42 -0
- kafka/protocol/old/list_offsets.py +186 -0
- kafka/protocol/old/metadata.py +290 -0
- kafka/protocol/old/offset_for_leader_epoch.py +133 -0
- kafka/protocol/old/produce.py +247 -0
- kafka/protocol/old/sasl_authenticate.py +38 -0
- kafka/protocol/old/sasl_handshake.py +39 -0
- kafka/protocol/old/struct.py +87 -0
- kafka/protocol/old/txn_offset_commit.py +73 -0
- kafka/protocol/old/types.py +440 -0
- kafka/protocol/parser.py +191 -0
- kafka/protocol/producer/__init__.py +7 -0
- kafka/protocol/producer/produce.py +17 -0
- kafka/protocol/producer/produce.pyi +197 -0
- kafka/protocol/producer/transaction.py +30 -0
- kafka/protocol/producer/transaction.pyi +663 -0
- kafka/protocol/sasl.py +52 -0
- kafka/protocol/sasl.pyi +126 -0
- kafka/protocol/schemas/__init__.py +7 -0
- kafka/protocol/schemas/fields/__init__.py +7 -0
- kafka/protocol/schemas/fields/array.py +127 -0
- kafka/protocol/schemas/fields/base.py +156 -0
- kafka/protocol/schemas/fields/codecs/__init__.py +12 -0
- kafka/protocol/schemas/fields/codecs/encode_buffer.py +82 -0
- kafka/protocol/schemas/fields/codecs/tagged_fields.py +109 -0
- kafka/protocol/schemas/fields/codecs/types.py +505 -0
- kafka/protocol/schemas/fields/codegen.py +40 -0
- kafka/protocol/schemas/fields/simple.py +127 -0
- kafka/protocol/schemas/fields/struct.py +357 -0
- kafka/protocol/schemas/fields/struct_array.py +142 -0
- kafka/protocol/schemas/load_json.py +42 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnRequest.json +40 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnResponse.json +35 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnRequest.json +65 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnResponse.json +60 -0
- kafka/protocol/schemas/resources/AlterClientQuotasRequest.json +47 -0
- kafka/protocol/schemas/resources/AlterClientQuotasResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterConfigsRequest.json +43 -0
- kafka/protocol/schemas/resources/AlterConfigsResponse.json +39 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsRequest.json +42 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsResponse.json +47 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsRequest.json +41 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsRequest.json +45 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsResponse.json +35 -0
- kafka/protocol/schemas/resources/ApiVersionsRequest.json +34 -0
- kafka/protocol/schemas/resources/ApiVersionsResponse.json +79 -0
- kafka/protocol/schemas/resources/ConsumerProtocolAssignment.json +42 -0
- kafka/protocol/schemas/resources/ConsumerProtocolSubscription.json +49 -0
- kafka/protocol/schemas/resources/CreateAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/CreateAclsResponse.json +37 -0
- kafka/protocol/schemas/resources/CreatePartitionsRequest.json +47 -0
- kafka/protocol/schemas/resources/CreatePartitionsResponse.json +41 -0
- kafka/protocol/schemas/resources/CreateTopicsRequest.json +65 -0
- kafka/protocol/schemas/resources/CreateTopicsResponse.json +72 -0
- kafka/protocol/schemas/resources/DeleteAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/DeleteAclsResponse.json +59 -0
- kafka/protocol/schemas/resources/DeleteGroupsRequest.json +30 -0
- kafka/protocol/schemas/resources/DeleteGroupsResponse.json +36 -0
- kafka/protocol/schemas/resources/DeleteRecordsRequest.json +42 -0
- kafka/protocol/schemas/resources/DeleteRecordsResponse.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsRequest.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeAclsRequest.json +43 -0
- kafka/protocol/schemas/resources/DescribeAclsResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasRequest.json +37 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasResponse.json +47 -0
- kafka/protocol/schemas/resources/DescribeClusterRequest.json +35 -0
- kafka/protocol/schemas/resources/DescribeClusterResponse.json +56 -0
- kafka/protocol/schemas/resources/DescribeConfigsRequest.json +42 -0
- kafka/protocol/schemas/resources/DescribeConfigsResponse.json +69 -0
- kafka/protocol/schemas/resources/DescribeGroupsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeGroupsResponse.json +74 -0
- kafka/protocol/schemas/resources/DescribeLogDirsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeLogDirsResponse.json +65 -0
- kafka/protocol/schemas/resources/DescribeProducersRequest.json +32 -0
- kafka/protocol/schemas/resources/DescribeProducersResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeQuorumRequest.json +39 -0
- kafka/protocol/schemas/resources/DescribeQuorumResponse.json +82 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsRequest.json +40 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsResponse.json +66 -0
- kafka/protocol/schemas/resources/DescribeTransactionsRequest.json +27 -0
- kafka/protocol/schemas/resources/DescribeTransactionsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsRequest.json +30 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsResponse.json +45 -0
- kafka/protocol/schemas/resources/ElectLeadersRequest.json +41 -0
- kafka/protocol/schemas/resources/ElectLeadersResponse.json +45 -0
- kafka/protocol/schemas/resources/EndTxnRequest.json +43 -0
- kafka/protocol/schemas/resources/EndTxnResponse.json +41 -0
- kafka/protocol/schemas/resources/FetchRequest.json +125 -0
- kafka/protocol/schemas/resources/FetchResponse.json +124 -0
- kafka/protocol/schemas/resources/FindCoordinatorRequest.json +43 -0
- kafka/protocol/schemas/resources/FindCoordinatorResponse.json +58 -0
- kafka/protocol/schemas/resources/HeartbeatRequest.json +39 -0
- kafka/protocol/schemas/resources/HeartbeatResponse.json +35 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsRequest.json +44 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsResponse.json +38 -0
- kafka/protocol/schemas/resources/InitProducerIdRequest.json +50 -0
- kafka/protocol/schemas/resources/InitProducerIdResponse.json +47 -0
- kafka/protocol/schemas/resources/JoinGroupRequest.json +63 -0
- kafka/protocol/schemas/resources/JoinGroupResponse.json +69 -0
- kafka/protocol/schemas/resources/LeaveGroupRequest.json +47 -0
- kafka/protocol/schemas/resources/LeaveGroupResponse.json +47 -0
- kafka/protocol/schemas/resources/ListConfigResourcesRequest.json +31 -0
- kafka/protocol/schemas/resources/ListConfigResourcesResponse.json +37 -0
- kafka/protocol/schemas/resources/ListGroupsRequest.json +36 -0
- kafka/protocol/schemas/resources/ListGroupsResponse.json +49 -0
- kafka/protocol/schemas/resources/ListOffsetsRequest.json +72 -0
- kafka/protocol/schemas/resources/ListOffsetsResponse.json +71 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsRequest.json +34 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsResponse.json +46 -0
- kafka/protocol/schemas/resources/ListTransactionsRequest.json +40 -0
- kafka/protocol/schemas/resources/ListTransactionsResponse.json +42 -0
- kafka/protocol/schemas/resources/MetadataRequest.json +56 -0
- kafka/protocol/schemas/resources/MetadataResponse.json +101 -0
- kafka/protocol/schemas/resources/OffsetCommitRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetCommitResponse.json +71 -0
- kafka/protocol/schemas/resources/OffsetDeleteRequest.json +39 -0
- kafka/protocol/schemas/resources/OffsetDeleteResponse.json +42 -0
- kafka/protocol/schemas/resources/OffsetFetchRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetFetchResponse.json +107 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochRequest.json +52 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochResponse.json +51 -0
- kafka/protocol/schemas/resources/ProduceRequest.json +73 -0
- kafka/protocol/schemas/resources/ProduceResponse.json +96 -0
- kafka/protocol/schemas/resources/RequestHeader.json +44 -0
- kafka/protocol/schemas/resources/ResponseHeader.json +26 -0
- kafka/protocol/schemas/resources/SaslAuthenticateRequest.json +29 -0
- kafka/protocol/schemas/resources/SaslAuthenticateResponse.json +34 -0
- kafka/protocol/schemas/resources/SaslHandshakeRequest.json +31 -0
- kafka/protocol/schemas/resources/SaslHandshakeResponse.json +32 -0
- kafka/protocol/schemas/resources/SyncGroupRequest.json +56 -0
- kafka/protocol/schemas/resources/SyncGroupResponse.json +46 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitRequest.json +68 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitResponse.json +47 -0
- kafka/protocol/schemas/resources/UpdateFeaturesRequest.json +43 -0
- kafka/protocol/schemas/resources/UpdateFeaturesResponse.json +39 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersRequest.json +49 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersResponse.json +45 -0
- kafka/protocol/schemas/resources/__init__.py +0 -0
- kafka/record/__init__.py +3 -0
- kafka/record/_crc32c.py +161 -0
- kafka/record/abc.py +144 -0
- kafka/record/default_records.py +782 -0
- kafka/record/legacy_records.py +587 -0
- kafka/record/memory_records.py +255 -0
- kafka/record/util.py +135 -0
- kafka/serializer/__init__.py +4 -0
- kafka/serializer/abstract.py +20 -0
- kafka/serializer/default.py +16 -0
- kafka/serializer/json.py +17 -0
- kafka/serializer/wrapper.py +21 -0
- kafka/structs.py +69 -0
- kafka/util.py +159 -0
- kafka/vendor/__init__.py +0 -0
- kafka/version.py +1 -0
- kafka_python-3.0.0.dist-info/METADATA +319 -0
- kafka_python-3.0.0.dist-info/RECORD +373 -0
- kafka_python-3.0.0.dist-info/WHEEL +5 -0
- kafka_python-3.0.0.dist-info/entry_points.txt +2 -0
- kafka_python-3.0.0.dist-info/licenses/LICENSE +202 -0
- kafka_python-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,782 @@
|
|
|
1
|
+
# See:
|
|
2
|
+
# https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/\
|
|
3
|
+
# apache/kafka/common/record/DefaultRecordBatch.java
|
|
4
|
+
# https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/\
|
|
5
|
+
# apache/kafka/common/record/DefaultRecord.java
|
|
6
|
+
|
|
7
|
+
# RecordBatch and Record implementation for magic 2 and above.
|
|
8
|
+
# The schema is given below:
|
|
9
|
+
|
|
10
|
+
# RecordBatch =>
|
|
11
|
+
# BaseOffset => Int64
|
|
12
|
+
# Length => Int32
|
|
13
|
+
# PartitionLeaderEpoch => Int32
|
|
14
|
+
# Magic => Int8
|
|
15
|
+
# CRC => Uint32
|
|
16
|
+
# Attributes => Int16
|
|
17
|
+
# LastOffsetDelta => Int32 // also serves as LastSequenceDelta
|
|
18
|
+
# FirstTimestamp => Int64
|
|
19
|
+
# MaxTimestamp => Int64
|
|
20
|
+
# ProducerId => Int64
|
|
21
|
+
# ProducerEpoch => Int16
|
|
22
|
+
# BaseSequence => Int32
|
|
23
|
+
# Records => [Record]
|
|
24
|
+
|
|
25
|
+
# Record =>
|
|
26
|
+
# Length => Varint
|
|
27
|
+
# Attributes => Int8
|
|
28
|
+
# TimestampDelta => Varlong
|
|
29
|
+
# OffsetDelta => Varint
|
|
30
|
+
# Key => Bytes
|
|
31
|
+
# Value => Bytes
|
|
32
|
+
# Headers => [HeaderKey HeaderValue]
|
|
33
|
+
# HeaderKey => String
|
|
34
|
+
# HeaderValue => Bytes
|
|
35
|
+
|
|
36
|
+
# Note that when compression is enabled (see attributes below), the compressed
|
|
37
|
+
# record data is serialized directly following the count of the number of
|
|
38
|
+
# records. (ie Records => [Record], but without length bytes)
|
|
39
|
+
|
|
40
|
+
# The CRC covers the data from the attributes to the end of the batch (i.e. all
|
|
41
|
+
# the bytes that follow the CRC). It is located after the magic byte, which
|
|
42
|
+
# means that clients must parse the magic byte before deciding how to interpret
|
|
43
|
+
# the bytes between the batch length and the magic byte. The partition leader
|
|
44
|
+
# epoch field is not included in the CRC computation to avoid the need to
|
|
45
|
+
# recompute the CRC when this field is assigned for every batch that is
|
|
46
|
+
# received by the broker. The CRC-32C (Castagnoli) polynomial is used for the
|
|
47
|
+
# computation.
|
|
48
|
+
|
|
49
|
+
# The current RecordBatch attributes are given below:
|
|
50
|
+
#
|
|
51
|
+
# * Unused (6-15)
|
|
52
|
+
# * Control (5)
|
|
53
|
+
# * Transactional (4)
|
|
54
|
+
# * Timestamp Type (3)
|
|
55
|
+
# * Compression Type (0-2)
|
|
56
|
+
|
|
57
|
+
import struct
|
|
58
|
+
import time
|
|
59
|
+
from kafka.record.abc import ABCRecord, ABCRecordBatch, ABCRecordBatchBuilder
|
|
60
|
+
from kafka.record.util import (
|
|
61
|
+
decode_varint, encode_varint, calc_crc32c, size_of_varint
|
|
62
|
+
)
|
|
63
|
+
from kafka.errors import CorruptRecordError, UnsupportedCodecError
|
|
64
|
+
from kafka.codec import (
|
|
65
|
+
gzip_encode, snappy_encode, lz4_encode, zstd_encode,
|
|
66
|
+
gzip_decode, snappy_decode, lz4_decode, zstd_decode
|
|
67
|
+
)
|
|
68
|
+
import kafka.codec as codecs
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DefaultRecordBase:
|
|
72
|
+
|
|
73
|
+
__slots__ = ()
|
|
74
|
+
|
|
75
|
+
HEADER_STRUCT = struct.Struct(
|
|
76
|
+
">q" # BaseOffset => Int64
|
|
77
|
+
"i" # Length => Int32
|
|
78
|
+
"i" # PartitionLeaderEpoch => Int32
|
|
79
|
+
"b" # Magic => Int8
|
|
80
|
+
"I" # CRC => Uint32
|
|
81
|
+
"h" # Attributes => Int16
|
|
82
|
+
"i" # LastOffsetDelta => Int32 // also serves as LastSequenceDelta
|
|
83
|
+
"q" # FirstTimestamp => Int64
|
|
84
|
+
"q" # MaxTimestamp => Int64
|
|
85
|
+
"q" # ProducerId => Int64
|
|
86
|
+
"h" # ProducerEpoch => Int16
|
|
87
|
+
"i" # BaseSequence => Int32
|
|
88
|
+
"i" # Records count => Int32
|
|
89
|
+
)
|
|
90
|
+
# Byte offset in HEADER_STRUCT of attributes field. Used to calculate CRC
|
|
91
|
+
ATTRIBUTES_OFFSET = struct.calcsize(">qiibI")
|
|
92
|
+
CRC_OFFSET = struct.calcsize(">qiib")
|
|
93
|
+
AFTER_LEN_OFFSET = struct.calcsize(">qi")
|
|
94
|
+
|
|
95
|
+
CODEC_MASK = 0x07
|
|
96
|
+
CODEC_NONE = 0x00
|
|
97
|
+
CODEC_GZIP = 0x01
|
|
98
|
+
CODEC_SNAPPY = 0x02
|
|
99
|
+
CODEC_LZ4 = 0x03
|
|
100
|
+
CODEC_ZSTD = 0x04
|
|
101
|
+
TIMESTAMP_TYPE_MASK = 0x08
|
|
102
|
+
TRANSACTIONAL_MASK = 0x10
|
|
103
|
+
CONTROL_MASK = 0x20
|
|
104
|
+
|
|
105
|
+
LOG_APPEND_TIME = 1
|
|
106
|
+
CREATE_TIME = 0
|
|
107
|
+
NO_PRODUCER_ID = -1
|
|
108
|
+
NO_SEQUENCE = -1
|
|
109
|
+
MAX_INT = 2147483647
|
|
110
|
+
|
|
111
|
+
def _assert_has_codec(self, compression_type):
|
|
112
|
+
if compression_type == self.CODEC_GZIP:
|
|
113
|
+
checker, name = codecs.has_gzip, "gzip"
|
|
114
|
+
elif compression_type == self.CODEC_SNAPPY:
|
|
115
|
+
checker, name = codecs.has_snappy, "snappy"
|
|
116
|
+
elif compression_type == self.CODEC_LZ4:
|
|
117
|
+
checker, name = codecs.has_lz4, "lz4"
|
|
118
|
+
elif compression_type == self.CODEC_ZSTD:
|
|
119
|
+
checker, name = codecs.has_zstd, "zstd"
|
|
120
|
+
else:
|
|
121
|
+
raise UnsupportedCodecError("Unrecognized compression type: %s" % (compression_type,))
|
|
122
|
+
if not checker():
|
|
123
|
+
raise UnsupportedCodecError(
|
|
124
|
+
"Libraries for {} compression codec not found".format(name))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
|
128
|
+
|
|
129
|
+
__slots__ = ("_buffer", "_header_data", "_pos", "_num_records",
|
|
130
|
+
"_next_record_index", "_decompressed")
|
|
131
|
+
|
|
132
|
+
def __init__(self, buffer):
|
|
133
|
+
self._buffer = bytearray(buffer)
|
|
134
|
+
self._header_data = self.HEADER_STRUCT.unpack_from(self._buffer)
|
|
135
|
+
self._pos = self.HEADER_STRUCT.size
|
|
136
|
+
self._num_records = self._header_data[12]
|
|
137
|
+
self._next_record_index = 0
|
|
138
|
+
self._decompressed = False
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def base_offset(self):
|
|
142
|
+
return self._header_data[0]
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def size_in_bytes(self):
|
|
146
|
+
return self._header_data[1] + self.AFTER_LEN_OFFSET
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def leader_epoch(self):
|
|
150
|
+
return self._header_data[2]
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def magic(self):
|
|
154
|
+
return self._header_data[3]
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def crc(self):
|
|
158
|
+
return self._header_data[4]
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def attributes(self):
|
|
162
|
+
return self._header_data[5]
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def last_offset_delta(self):
|
|
166
|
+
return self._header_data[6]
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def last_offset(self):
|
|
170
|
+
return self.base_offset + self.last_offset_delta
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def next_offset(self):
|
|
174
|
+
return self.last_offset + 1
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def compression_type(self):
|
|
178
|
+
return self.attributes & self.CODEC_MASK
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def timestamp_type(self):
|
|
182
|
+
return int(bool(self.attributes & self.TIMESTAMP_TYPE_MASK))
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def is_transactional(self):
|
|
186
|
+
return bool(self.attributes & self.TRANSACTIONAL_MASK)
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def is_control_batch(self):
|
|
190
|
+
return bool(self.attributes & self.CONTROL_MASK)
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def first_timestamp(self):
|
|
194
|
+
return self._header_data[7]
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def max_timestamp(self):
|
|
198
|
+
return self._header_data[8]
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def producer_id(self):
|
|
202
|
+
return self._header_data[9]
|
|
203
|
+
|
|
204
|
+
def has_producer_id(self):
|
|
205
|
+
return self.producer_id > self.NO_PRODUCER_ID
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def producer_epoch(self):
|
|
209
|
+
return self._header_data[10]
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def base_sequence(self):
|
|
213
|
+
return self._header_data[11]
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def has_sequence(self):
|
|
217
|
+
return self._header_data[11] != -1 # NO_SEQUENCE
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def last_sequence(self):
|
|
221
|
+
if self.base_sequence == self.NO_SEQUENCE:
|
|
222
|
+
return self.NO_SEQUENCE
|
|
223
|
+
return self._increment_sequence(self.base_sequence, self.last_offset_delta)
|
|
224
|
+
|
|
225
|
+
def _increment_sequence(self, base, increment):
|
|
226
|
+
if base > (self.MAX_INT - increment):
|
|
227
|
+
return increment - (self.MAX_INT - base) - 1
|
|
228
|
+
return base + increment
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def records_count(self):
|
|
232
|
+
return self._header_data[12]
|
|
233
|
+
|
|
234
|
+
def _maybe_uncompress(self):
|
|
235
|
+
if not self._decompressed:
|
|
236
|
+
compression_type = self.compression_type
|
|
237
|
+
if compression_type != self.CODEC_NONE:
|
|
238
|
+
self._assert_has_codec(compression_type)
|
|
239
|
+
data = memoryview(self._buffer)[self._pos:]
|
|
240
|
+
if compression_type == self.CODEC_GZIP:
|
|
241
|
+
uncompressed = gzip_decode(data)
|
|
242
|
+
if compression_type == self.CODEC_SNAPPY:
|
|
243
|
+
uncompressed = snappy_decode(data.tobytes())
|
|
244
|
+
if compression_type == self.CODEC_LZ4:
|
|
245
|
+
uncompressed = lz4_decode(data.tobytes())
|
|
246
|
+
if compression_type == self.CODEC_ZSTD:
|
|
247
|
+
uncompressed = zstd_decode(data.tobytes())
|
|
248
|
+
self._buffer = bytearray(uncompressed) # pylint: disable=E0606
|
|
249
|
+
self._pos = 0
|
|
250
|
+
self._decompressed = True
|
|
251
|
+
|
|
252
|
+
def _read_msg(
|
|
253
|
+
self,
|
|
254
|
+
decode_varint=decode_varint):
|
|
255
|
+
# Record =>
|
|
256
|
+
# Length => Varint
|
|
257
|
+
# Attributes => Int8
|
|
258
|
+
# TimestampDelta => Varlong
|
|
259
|
+
# OffsetDelta => Varint
|
|
260
|
+
# Key => Bytes
|
|
261
|
+
# Value => Bytes
|
|
262
|
+
# Headers => [HeaderKey HeaderValue]
|
|
263
|
+
# HeaderKey => String
|
|
264
|
+
# HeaderValue => Bytes
|
|
265
|
+
|
|
266
|
+
buffer = self._buffer
|
|
267
|
+
pos = self._pos
|
|
268
|
+
length, pos = decode_varint(buffer, pos)
|
|
269
|
+
start_pos = pos
|
|
270
|
+
_, pos = decode_varint(buffer, pos) # attrs can be skipped for now
|
|
271
|
+
|
|
272
|
+
ts_delta, pos = decode_varint(buffer, pos)
|
|
273
|
+
if self.timestamp_type == self.LOG_APPEND_TIME:
|
|
274
|
+
timestamp = self.max_timestamp
|
|
275
|
+
else:
|
|
276
|
+
timestamp = self.first_timestamp + ts_delta
|
|
277
|
+
|
|
278
|
+
offset_delta, pos = decode_varint(buffer, pos)
|
|
279
|
+
offset = self.base_offset + offset_delta
|
|
280
|
+
|
|
281
|
+
key_len, pos = decode_varint(buffer, pos)
|
|
282
|
+
if key_len >= 0:
|
|
283
|
+
key = bytes(buffer[pos: pos + key_len])
|
|
284
|
+
pos += key_len
|
|
285
|
+
else:
|
|
286
|
+
key = None
|
|
287
|
+
|
|
288
|
+
value_len, pos = decode_varint(buffer, pos)
|
|
289
|
+
if value_len >= 0:
|
|
290
|
+
value = bytes(buffer[pos: pos + value_len])
|
|
291
|
+
pos += value_len
|
|
292
|
+
else:
|
|
293
|
+
value = None
|
|
294
|
+
|
|
295
|
+
header_count, pos = decode_varint(buffer, pos)
|
|
296
|
+
if header_count < 0:
|
|
297
|
+
raise CorruptRecordError("Found invalid number of record "
|
|
298
|
+
"headers {}".format(header_count))
|
|
299
|
+
headers = []
|
|
300
|
+
while header_count:
|
|
301
|
+
# Header key is of type String, that can't be None
|
|
302
|
+
h_key_len, pos = decode_varint(buffer, pos)
|
|
303
|
+
if h_key_len < 0:
|
|
304
|
+
raise CorruptRecordError(
|
|
305
|
+
"Invalid negative header key size {}".format(h_key_len))
|
|
306
|
+
h_key = buffer[pos: pos + h_key_len].decode("utf-8")
|
|
307
|
+
pos += h_key_len
|
|
308
|
+
|
|
309
|
+
# Value is of type NULLABLE_BYTES, so it can be None
|
|
310
|
+
h_value_len, pos = decode_varint(buffer, pos)
|
|
311
|
+
if h_value_len >= 0:
|
|
312
|
+
h_value = bytes(buffer[pos: pos + h_value_len])
|
|
313
|
+
pos += h_value_len
|
|
314
|
+
else:
|
|
315
|
+
h_value = None
|
|
316
|
+
|
|
317
|
+
headers.append((h_key, h_value))
|
|
318
|
+
header_count -= 1
|
|
319
|
+
|
|
320
|
+
# validate whether we have read all header bytes in the current record
|
|
321
|
+
if pos - start_pos != length:
|
|
322
|
+
raise CorruptRecordError(
|
|
323
|
+
"Invalid record size: expected to read {} bytes in record "
|
|
324
|
+
"payload, but instead read {}".format(length, pos - start_pos))
|
|
325
|
+
self._pos = pos
|
|
326
|
+
|
|
327
|
+
if self.is_control_batch:
|
|
328
|
+
return ControlRecord(
|
|
329
|
+
length, offset, timestamp, self.timestamp_type, key, value, headers)
|
|
330
|
+
else:
|
|
331
|
+
return DefaultRecord(
|
|
332
|
+
length, offset, timestamp, self.timestamp_type, key, value, headers)
|
|
333
|
+
|
|
334
|
+
def __iter__(self):
|
|
335
|
+
self._maybe_uncompress()
|
|
336
|
+
return self
|
|
337
|
+
|
|
338
|
+
def __next__(self):
|
|
339
|
+
if self._next_record_index >= self._num_records:
|
|
340
|
+
if self._pos != len(self._buffer):
|
|
341
|
+
raise CorruptRecordError(
|
|
342
|
+
"{} unconsumed bytes after all records consumed".format(
|
|
343
|
+
len(self._buffer) - self._pos))
|
|
344
|
+
raise StopIteration
|
|
345
|
+
try:
|
|
346
|
+
msg = self._read_msg()
|
|
347
|
+
except (ValueError, IndexError) as err:
|
|
348
|
+
raise CorruptRecordError(
|
|
349
|
+
"Found invalid record structure: {!r}".format(err))
|
|
350
|
+
else:
|
|
351
|
+
self._next_record_index += 1
|
|
352
|
+
return msg
|
|
353
|
+
|
|
354
|
+
next = __next__
|
|
355
|
+
|
|
356
|
+
def validate_crc(self):
|
|
357
|
+
assert self._decompressed is False, \
|
|
358
|
+
"Validate should be called before iteration"
|
|
359
|
+
|
|
360
|
+
crc = self.crc
|
|
361
|
+
data_view = memoryview(self._buffer)[self.ATTRIBUTES_OFFSET:]
|
|
362
|
+
verify_crc = calc_crc32c(data_view)
|
|
363
|
+
return crc == verify_crc
|
|
364
|
+
|
|
365
|
+
def __str__(self):
|
|
366
|
+
return (
|
|
367
|
+
"DefaultRecordBatch(magic={}, base_offset={}, last_offset_delta={},"
|
|
368
|
+
" first_timestamp={}, max_timestamp={},"
|
|
369
|
+
" is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={},"
|
|
370
|
+
" records_count={})".format(
|
|
371
|
+
self.magic, self.base_offset, self.last_offset_delta,
|
|
372
|
+
self.first_timestamp, self.max_timestamp,
|
|
373
|
+
self.is_transactional, self.producer_id, self.producer_epoch, self.base_sequence,
|
|
374
|
+
self.records_count))
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class DefaultRecord(ABCRecord):
|
|
378
|
+
|
|
379
|
+
__slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
|
380
|
+
"_headers")
|
|
381
|
+
|
|
382
|
+
def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers):
|
|
383
|
+
self._size_in_bytes = size_in_bytes
|
|
384
|
+
self._offset = offset
|
|
385
|
+
self._timestamp = timestamp
|
|
386
|
+
self._timestamp_type = timestamp_type
|
|
387
|
+
self._key = key
|
|
388
|
+
self._value = value
|
|
389
|
+
self._headers = headers
|
|
390
|
+
|
|
391
|
+
@property
|
|
392
|
+
def size_in_bytes(self):
|
|
393
|
+
return self._size_in_bytes
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def offset(self):
|
|
397
|
+
return self._offset
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def timestamp(self):
|
|
401
|
+
""" Epoch milliseconds
|
|
402
|
+
"""
|
|
403
|
+
return self._timestamp
|
|
404
|
+
|
|
405
|
+
@property
|
|
406
|
+
def timestamp_type(self):
|
|
407
|
+
""" CREATE_TIME(0) or APPEND_TIME(1)
|
|
408
|
+
"""
|
|
409
|
+
return self._timestamp_type
|
|
410
|
+
|
|
411
|
+
@property
|
|
412
|
+
def key(self):
|
|
413
|
+
""" Bytes key or None
|
|
414
|
+
"""
|
|
415
|
+
return self._key
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def value(self):
|
|
419
|
+
""" Bytes value or None
|
|
420
|
+
"""
|
|
421
|
+
return self._value
|
|
422
|
+
|
|
423
|
+
@property
|
|
424
|
+
def headers(self):
|
|
425
|
+
return self._headers
|
|
426
|
+
|
|
427
|
+
@property
|
|
428
|
+
def checksum(self):
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
def validate_crc(self):
|
|
432
|
+
return True
|
|
433
|
+
|
|
434
|
+
def __repr__(self):
|
|
435
|
+
return (
|
|
436
|
+
"DefaultRecord(offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
|
437
|
+
" key={!r}, value={!r}, headers={!r})".format(
|
|
438
|
+
self._offset, self._timestamp, self._timestamp_type,
|
|
439
|
+
self._key, self._value, self._headers)
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
class ControlRecord(DefaultRecord):
|
|
444
|
+
__slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
|
445
|
+
"_headers", "_version", "_type")
|
|
446
|
+
|
|
447
|
+
KEY_STRUCT = struct.Struct(
|
|
448
|
+
">h" # Current Version => Int16
|
|
449
|
+
"h" # Type => Int16 (0 indicates an abort marker, 1 indicates a commit)
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers):
|
|
453
|
+
super().__init__(size_in_bytes, offset, timestamp, timestamp_type, key, value, headers)
|
|
454
|
+
(self._version, self._type) = self.KEY_STRUCT.unpack(self._key)
|
|
455
|
+
|
|
456
|
+
# see https://kafka.apache.org/documentation/#controlbatch
|
|
457
|
+
@property
|
|
458
|
+
def version(self):
|
|
459
|
+
return self._version
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def type(self):
|
|
463
|
+
return self._type
|
|
464
|
+
|
|
465
|
+
@property
|
|
466
|
+
def abort(self):
|
|
467
|
+
return self._type == 0
|
|
468
|
+
|
|
469
|
+
@property
|
|
470
|
+
def commit(self):
|
|
471
|
+
return self._type == 1
|
|
472
|
+
|
|
473
|
+
def __repr__(self):
|
|
474
|
+
return (
|
|
475
|
+
"ControlRecord(offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
|
476
|
+
" version={!r}, type={!r} <{!s}>)".format(
|
|
477
|
+
self._offset, self._timestamp, self._timestamp_type,
|
|
478
|
+
self._version, self._type, "abort" if self.abort else "commit")
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
|
483
|
+
|
|
484
|
+
# excluding key, value and headers:
|
|
485
|
+
# 5 bytes length + 10 bytes timestamp + 5 bytes offset + 1 byte attributes
|
|
486
|
+
MAX_RECORD_OVERHEAD = 21
|
|
487
|
+
|
|
488
|
+
__slots__ = ("_magic", "_compression_type", "_batch_size", "_is_transactional",
|
|
489
|
+
"_producer_id", "_producer_epoch", "_base_sequence",
|
|
490
|
+
"_first_timestamp", "_max_timestamp", "_last_offset", "_num_records",
|
|
491
|
+
"_buffer")
|
|
492
|
+
|
|
493
|
+
def __init__(
|
|
494
|
+
self, magic, compression_type, is_transactional,
|
|
495
|
+
producer_id, producer_epoch, base_sequence, batch_size):
|
|
496
|
+
assert magic >= 2
|
|
497
|
+
self._magic = magic
|
|
498
|
+
self._compression_type = compression_type & self.CODEC_MASK
|
|
499
|
+
self._batch_size = batch_size
|
|
500
|
+
self._is_transactional = bool(is_transactional)
|
|
501
|
+
# KIP-98 fields for EOS
|
|
502
|
+
self._producer_id = producer_id
|
|
503
|
+
self._producer_epoch = producer_epoch
|
|
504
|
+
self._base_sequence = base_sequence
|
|
505
|
+
|
|
506
|
+
self._first_timestamp = None
|
|
507
|
+
self._max_timestamp = None
|
|
508
|
+
self._last_offset = 0
|
|
509
|
+
self._num_records = 0
|
|
510
|
+
|
|
511
|
+
self._buffer = bytearray(self.HEADER_STRUCT.size)
|
|
512
|
+
|
|
513
|
+
def set_producer_state(self, producer_id, producer_epoch, base_sequence, is_transactional):
|
|
514
|
+
assert not is_transactional or producer_id != -1, "Cannot write transactional messages without a valid producer ID"
|
|
515
|
+
assert producer_id == -1 or producer_epoch != -1, "Invalid negative producer epoch"
|
|
516
|
+
assert producer_id == -1 or base_sequence != -1, "Invalid negative sequence number"
|
|
517
|
+
self._producer_id = producer_id
|
|
518
|
+
self._producer_epoch = producer_epoch
|
|
519
|
+
self._base_sequence = base_sequence
|
|
520
|
+
self._is_transactional = is_transactional
|
|
521
|
+
|
|
522
|
+
@property
|
|
523
|
+
def producer_id(self):
|
|
524
|
+
return self._producer_id
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def producer_epoch(self):
|
|
528
|
+
return self._producer_epoch
|
|
529
|
+
|
|
530
|
+
@property
|
|
531
|
+
def base_sequence(self):
|
|
532
|
+
return self._base_sequence
|
|
533
|
+
|
|
534
|
+
def _get_attributes(self, include_compression_type=True):
|
|
535
|
+
attrs = 0
|
|
536
|
+
if include_compression_type:
|
|
537
|
+
attrs |= self._compression_type
|
|
538
|
+
# Timestamp Type is set by Broker
|
|
539
|
+
if self._is_transactional:
|
|
540
|
+
attrs |= self.TRANSACTIONAL_MASK
|
|
541
|
+
# Control batches are only created by Broker
|
|
542
|
+
return attrs
|
|
543
|
+
|
|
544
|
+
def append(self, offset, timestamp, key, value, headers,
|
|
545
|
+
# Cache for LOAD_FAST opcodes
|
|
546
|
+
encode_varint=encode_varint, size_of_varint=size_of_varint,
|
|
547
|
+
get_type=type, type_int=int, time_time=time.time,
|
|
548
|
+
byte_like=(bytes, bytearray, memoryview),
|
|
549
|
+
bytearray_type=bytearray, len_func=len, zero_len_varint=1
|
|
550
|
+
):
|
|
551
|
+
""" Write message to messageset buffer with MsgVersion 2
|
|
552
|
+
"""
|
|
553
|
+
# Check types
|
|
554
|
+
if get_type(offset) != type_int:
|
|
555
|
+
raise TypeError(offset)
|
|
556
|
+
if timestamp is None:
|
|
557
|
+
timestamp = type_int(time_time() * 1000)
|
|
558
|
+
elif get_type(timestamp) != type_int:
|
|
559
|
+
raise TypeError(timestamp)
|
|
560
|
+
if not (key is None or get_type(key) in byte_like):
|
|
561
|
+
raise TypeError(
|
|
562
|
+
"Not supported type for key: {}".format(type(key)))
|
|
563
|
+
if not (value is None or get_type(value) in byte_like):
|
|
564
|
+
raise TypeError(
|
|
565
|
+
"Not supported type for value: {}".format(type(value)))
|
|
566
|
+
|
|
567
|
+
# We will always add the first message, so those will be set
|
|
568
|
+
if self._first_timestamp is None:
|
|
569
|
+
self._first_timestamp = timestamp
|
|
570
|
+
self._max_timestamp = timestamp
|
|
571
|
+
timestamp_delta = 0
|
|
572
|
+
first_message = 1
|
|
573
|
+
else:
|
|
574
|
+
timestamp_delta = timestamp - self._first_timestamp
|
|
575
|
+
first_message = 0
|
|
576
|
+
|
|
577
|
+
# We can't write record right away to out buffer, we need to
|
|
578
|
+
# precompute the length as first value...
|
|
579
|
+
message_buffer = bytearray_type(b"\x00") # Attributes
|
|
580
|
+
write_byte = message_buffer.append
|
|
581
|
+
write = message_buffer.extend
|
|
582
|
+
|
|
583
|
+
encode_varint(timestamp_delta, write_byte)
|
|
584
|
+
# Base offset is always 0 on Produce
|
|
585
|
+
encode_varint(offset, write_byte)
|
|
586
|
+
|
|
587
|
+
if key is not None:
|
|
588
|
+
encode_varint(len_func(key), write_byte)
|
|
589
|
+
write(key)
|
|
590
|
+
else:
|
|
591
|
+
write_byte(zero_len_varint)
|
|
592
|
+
|
|
593
|
+
if value is not None:
|
|
594
|
+
encode_varint(len_func(value), write_byte)
|
|
595
|
+
write(value)
|
|
596
|
+
else:
|
|
597
|
+
write_byte(zero_len_varint)
|
|
598
|
+
|
|
599
|
+
encode_varint(len_func(headers), write_byte)
|
|
600
|
+
|
|
601
|
+
for h_key, h_value in headers:
|
|
602
|
+
h_key = h_key.encode("utf-8")
|
|
603
|
+
encode_varint(len_func(h_key), write_byte)
|
|
604
|
+
write(h_key)
|
|
605
|
+
if h_value is not None:
|
|
606
|
+
encode_varint(len_func(h_value), write_byte)
|
|
607
|
+
write(h_value)
|
|
608
|
+
else:
|
|
609
|
+
write_byte(zero_len_varint)
|
|
610
|
+
|
|
611
|
+
message_len = len_func(message_buffer)
|
|
612
|
+
main_buffer = self._buffer
|
|
613
|
+
|
|
614
|
+
required_size = message_len + size_of_varint(message_len)
|
|
615
|
+
# Check if we can write this message
|
|
616
|
+
if (required_size + len_func(main_buffer) > self._batch_size and
|
|
617
|
+
not first_message):
|
|
618
|
+
return None
|
|
619
|
+
|
|
620
|
+
# Those should be updated after the length check
|
|
621
|
+
if self._max_timestamp < timestamp:
|
|
622
|
+
self._max_timestamp = timestamp
|
|
623
|
+
self._num_records += 1
|
|
624
|
+
self._last_offset = offset
|
|
625
|
+
|
|
626
|
+
encode_varint(message_len, main_buffer.append)
|
|
627
|
+
main_buffer.extend(message_buffer)
|
|
628
|
+
|
|
629
|
+
return DefaultRecordMetadata(offset, required_size, timestamp)
|
|
630
|
+
|
|
631
|
+
def write_header(self, use_compression_type=True):
|
|
632
|
+
batch_len = len(self._buffer)
|
|
633
|
+
self.HEADER_STRUCT.pack_into(
|
|
634
|
+
self._buffer, 0,
|
|
635
|
+
0, # BaseOffset, set by broker
|
|
636
|
+
batch_len - self.AFTER_LEN_OFFSET, # Size from here to end
|
|
637
|
+
0, # PartitionLeaderEpoch, set by broker
|
|
638
|
+
self._magic,
|
|
639
|
+
0, # CRC will be set below, as we need a filled buffer for it
|
|
640
|
+
self._get_attributes(use_compression_type),
|
|
641
|
+
self._last_offset,
|
|
642
|
+
self._first_timestamp or 0,
|
|
643
|
+
self._max_timestamp or 0,
|
|
644
|
+
self._producer_id,
|
|
645
|
+
self._producer_epoch,
|
|
646
|
+
self._base_sequence,
|
|
647
|
+
self._num_records
|
|
648
|
+
)
|
|
649
|
+
# Use memoryview to avoid a full-body copy of ~batch_size bytes.
|
|
650
|
+
# The decode path at _check_crc already does this.
|
|
651
|
+
crc = calc_crc32c(memoryview(self._buffer)[self.ATTRIBUTES_OFFSET:])
|
|
652
|
+
struct.pack_into(">I", self._buffer, self.CRC_OFFSET, crc)
|
|
653
|
+
|
|
654
|
+
def _maybe_compress(self):
|
|
655
|
+
if self._compression_type != self.CODEC_NONE:
|
|
656
|
+
self._assert_has_codec(self._compression_type)
|
|
657
|
+
header_size = self.HEADER_STRUCT.size
|
|
658
|
+
data = bytes(self._buffer[header_size:])
|
|
659
|
+
if self._compression_type == self.CODEC_GZIP:
|
|
660
|
+
compressed = gzip_encode(data)
|
|
661
|
+
elif self._compression_type == self.CODEC_SNAPPY:
|
|
662
|
+
compressed = snappy_encode(data)
|
|
663
|
+
elif self._compression_type == self.CODEC_LZ4:
|
|
664
|
+
compressed = lz4_encode(data)
|
|
665
|
+
elif self._compression_type == self.CODEC_ZSTD:
|
|
666
|
+
compressed = zstd_encode(data)
|
|
667
|
+
compressed_size = len(compressed) # pylint: disable=E0606
|
|
668
|
+
if len(data) <= compressed_size:
|
|
669
|
+
# We did not get any benefit from compression, lets send
|
|
670
|
+
# uncompressed
|
|
671
|
+
return False
|
|
672
|
+
else:
|
|
673
|
+
# Trim bytearray to the required size
|
|
674
|
+
needed_size = header_size + compressed_size
|
|
675
|
+
del self._buffer[needed_size:]
|
|
676
|
+
self._buffer[header_size:needed_size] = compressed
|
|
677
|
+
return True
|
|
678
|
+
return False
|
|
679
|
+
|
|
680
|
+
def build(self):
|
|
681
|
+
send_compressed = self._maybe_compress()
|
|
682
|
+
self.write_header(send_compressed)
|
|
683
|
+
return self._buffer
|
|
684
|
+
|
|
685
|
+
def size(self):
|
|
686
|
+
""" Return current size of data written to buffer
|
|
687
|
+
"""
|
|
688
|
+
return len(self._buffer)
|
|
689
|
+
|
|
690
|
+
@classmethod
|
|
691
|
+
def header_size_in_bytes(self):
|
|
692
|
+
return self.HEADER_STRUCT.size
|
|
693
|
+
|
|
694
|
+
@classmethod
|
|
695
|
+
def size_in_bytes(self, offset_delta, timestamp_delta, key, value, headers):
|
|
696
|
+
size_of_body = (
|
|
697
|
+
1 + # Attrs
|
|
698
|
+
size_of_varint(offset_delta) +
|
|
699
|
+
size_of_varint(timestamp_delta) +
|
|
700
|
+
self.size_of(key, value, headers)
|
|
701
|
+
)
|
|
702
|
+
return size_of_body + size_of_varint(size_of_body)
|
|
703
|
+
|
|
704
|
+
@classmethod
|
|
705
|
+
def size_of(cls, key, value, headers):
|
|
706
|
+
size = 0
|
|
707
|
+
# Key size
|
|
708
|
+
if key is None:
|
|
709
|
+
size += 1
|
|
710
|
+
else:
|
|
711
|
+
key_len = len(key)
|
|
712
|
+
size += size_of_varint(key_len) + key_len
|
|
713
|
+
# Value size
|
|
714
|
+
if value is None:
|
|
715
|
+
size += 1
|
|
716
|
+
else:
|
|
717
|
+
value_len = len(value)
|
|
718
|
+
size += size_of_varint(value_len) + value_len
|
|
719
|
+
# Header size
|
|
720
|
+
size += size_of_varint(len(headers))
|
|
721
|
+
for h_key, h_value in headers:
|
|
722
|
+
h_key_len = len(h_key.encode("utf-8"))
|
|
723
|
+
size += size_of_varint(h_key_len) + h_key_len
|
|
724
|
+
|
|
725
|
+
if h_value is None:
|
|
726
|
+
size += 1
|
|
727
|
+
else:
|
|
728
|
+
h_value_len = len(h_value)
|
|
729
|
+
size += size_of_varint(h_value_len) + h_value_len
|
|
730
|
+
return size
|
|
731
|
+
|
|
732
|
+
@classmethod
|
|
733
|
+
def estimate_size_in_bytes(cls, key, value, headers):
|
|
734
|
+
""" Get the upper bound estimate on the size of record
|
|
735
|
+
"""
|
|
736
|
+
return (
|
|
737
|
+
cls.HEADER_STRUCT.size + cls.MAX_RECORD_OVERHEAD +
|
|
738
|
+
cls.size_of(key, value, headers)
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
def __str__(self):
|
|
742
|
+
return (
|
|
743
|
+
"DefaultRecordBatchBuilder(magic={}, base_offset={}, last_offset_delta={},"
|
|
744
|
+
" first_timestamp={}, max_timestamp={},"
|
|
745
|
+
" is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={},"
|
|
746
|
+
" records_count={})".format(
|
|
747
|
+
self._magic, 0, self._last_offset,
|
|
748
|
+
self._first_timestamp or 0, self._max_timestamp or 0,
|
|
749
|
+
self._is_transactional, self._producer_id, self._producer_epoch, self._base_sequence,
|
|
750
|
+
self._num_records))
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
class DefaultRecordMetadata:
|
|
754
|
+
|
|
755
|
+
__slots__ = ("_size", "_timestamp", "_offset")
|
|
756
|
+
|
|
757
|
+
def __init__(self, offset, size, timestamp):
|
|
758
|
+
self._offset = offset
|
|
759
|
+
self._size = size
|
|
760
|
+
self._timestamp = timestamp
|
|
761
|
+
|
|
762
|
+
@property
|
|
763
|
+
def offset(self):
|
|
764
|
+
return self._offset
|
|
765
|
+
|
|
766
|
+
@property
|
|
767
|
+
def crc(self):
|
|
768
|
+
return None
|
|
769
|
+
|
|
770
|
+
@property
|
|
771
|
+
def size(self):
|
|
772
|
+
return self._size
|
|
773
|
+
|
|
774
|
+
@property
|
|
775
|
+
def timestamp(self):
|
|
776
|
+
return self._timestamp
|
|
777
|
+
|
|
778
|
+
def __repr__(self):
|
|
779
|
+
return (
|
|
780
|
+
"DefaultRecordMetadata(offset={!r}, size={!r}, timestamp={!r})"
|
|
781
|
+
.format(self._offset, self._size, self._timestamp)
|
|
782
|
+
)
|