kafka-python 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kafka/__init__.py +34 -0
- kafka/__main__.py +5 -0
- kafka/admin/__init__.py +29 -0
- kafka/admin/__main__.py +5 -0
- kafka/admin/_acls.py +355 -0
- kafka/admin/_cluster.py +359 -0
- kafka/admin/_configs.py +479 -0
- kafka/admin/_groups.py +754 -0
- kafka/admin/_partitions.py +595 -0
- kafka/admin/_topics.py +281 -0
- kafka/admin/_transactions.py +450 -0
- kafka/admin/_users.py +194 -0
- kafka/admin/client.py +373 -0
- kafka/benchmarks/__init__.py +0 -0
- kafka/benchmarks/consumer_performance.py +138 -0
- kafka/benchmarks/load_example.py +109 -0
- kafka/benchmarks/producer_encode_path.py +201 -0
- kafka/benchmarks/producer_performance.py +161 -0
- kafka/benchmarks/profile_protocol.py +138 -0
- kafka/benchmarks/protocol_old_vs_new.py +447 -0
- kafka/benchmarks/record_batch_compose.py +77 -0
- kafka/benchmarks/record_batch_read.py +82 -0
- kafka/benchmarks/varint_speed.py +426 -0
- kafka/cli/__init__.py +36 -0
- kafka/cli/admin/__init__.py +117 -0
- kafka/cli/admin/acls/__init__.py +9 -0
- kafka/cli/admin/acls/common.py +76 -0
- kafka/cli/admin/acls/create.py +19 -0
- kafka/cli/admin/acls/delete.py +23 -0
- kafka/cli/admin/acls/describe.py +16 -0
- kafka/cli/admin/cluster/__init__.py +14 -0
- kafka/cli/admin/cluster/describe.py +11 -0
- kafka/cli/admin/cluster/describe_quorum.py +11 -0
- kafka/cli/admin/cluster/features.py +52 -0
- kafka/cli/admin/cluster/log_dirs.py +43 -0
- kafka/cli/admin/cluster/versions.py +33 -0
- kafka/cli/admin/configs/__init__.py +10 -0
- kafka/cli/admin/configs/alter.py +43 -0
- kafka/cli/admin/configs/common.py +17 -0
- kafka/cli/admin/configs/describe.py +30 -0
- kafka/cli/admin/configs/list.py +16 -0
- kafka/cli/admin/configs/reset.py +20 -0
- kafka/cli/admin/groups/__init__.py +16 -0
- kafka/cli/admin/groups/alter_offsets.py +30 -0
- kafka/cli/admin/groups/delete.py +11 -0
- kafka/cli/admin/groups/delete_offsets.py +29 -0
- kafka/cli/admin/groups/describe.py +11 -0
- kafka/cli/admin/groups/list.py +28 -0
- kafka/cli/admin/groups/list_offsets.py +29 -0
- kafka/cli/admin/groups/remove_members.py +40 -0
- kafka/cli/admin/groups/reset_offsets.py +139 -0
- kafka/cli/admin/partitions/__init__.py +21 -0
- kafka/cli/admin/partitions/alter_reassignments.py +37 -0
- kafka/cli/admin/partitions/create.py +27 -0
- kafka/cli/admin/partitions/delete_records.py +31 -0
- kafka/cli/admin/partitions/describe.py +36 -0
- kafka/cli/admin/partitions/elect_leaders.py +53 -0
- kafka/cli/admin/partitions/list_offsets.py +88 -0
- kafka/cli/admin/partitions/list_reassignments.py +35 -0
- kafka/cli/admin/topics/__init__.py +10 -0
- kafka/cli/admin/topics/create.py +13 -0
- kafka/cli/admin/topics/delete.py +19 -0
- kafka/cli/admin/topics/describe.py +18 -0
- kafka/cli/admin/topics/list.py +11 -0
- kafka/cli/admin/transactions/__init__.py +17 -0
- kafka/cli/admin/transactions/abort.py +38 -0
- kafka/cli/admin/transactions/describe.py +24 -0
- kafka/cli/admin/transactions/describe_producers.py +29 -0
- kafka/cli/admin/transactions/find_hanging.py +26 -0
- kafka/cli/admin/transactions/list.py +37 -0
- kafka/cli/admin/users/__init__.py +8 -0
- kafka/cli/admin/users/alter_user_scram_credentials.py +34 -0
- kafka/cli/admin/users/describe_user_scram_credentials.py +15 -0
- kafka/cli/common.py +95 -0
- kafka/cli/consumer/__init__.py +63 -0
- kafka/cli/producer/__init__.py +57 -0
- kafka/cluster.py +824 -0
- kafka/codec.py +325 -0
- kafka/consumer/__init__.py +5 -0
- kafka/consumer/__main__.py +5 -0
- kafka/consumer/fetcher.py +2012 -0
- kafka/consumer/group.py +1347 -0
- kafka/consumer/subscription_state.py +897 -0
- kafka/coordinator/__init__.py +0 -0
- kafka/coordinator/assignors/__init__.py +0 -0
- kafka/coordinator/assignors/abstract.py +90 -0
- kafka/coordinator/assignors/cooperative_sticky.py +167 -0
- kafka/coordinator/assignors/range.py +81 -0
- kafka/coordinator/assignors/roundrobin.py +101 -0
- kafka/coordinator/assignors/sticky/StickyAssignorUserData.json +37 -0
- kafka/coordinator/assignors/sticky/__init__.py +0 -0
- kafka/coordinator/assignors/sticky/partition_movements.py +149 -0
- kafka/coordinator/assignors/sticky/sorted_set.py +63 -0
- kafka/coordinator/assignors/sticky/sticky_assignor.py +665 -0
- kafka/coordinator/assignors/sticky/user_data.py +8 -0
- kafka/coordinator/base.py +1215 -0
- kafka/coordinator/consumer.py +1224 -0
- kafka/coordinator/heartbeat.py +82 -0
- kafka/coordinator/subscription.py +34 -0
- kafka/errors.py +1004 -0
- kafka/future.py +166 -0
- kafka/metrics/__init__.py +13 -0
- kafka/metrics/compound_stat.py +33 -0
- kafka/metrics/dict_reporter.py +81 -0
- kafka/metrics/kafka_metric.py +36 -0
- kafka/metrics/measurable.py +27 -0
- kafka/metrics/measurable_stat.py +13 -0
- kafka/metrics/metric_config.py +33 -0
- kafka/metrics/metric_name.py +105 -0
- kafka/metrics/metrics.py +261 -0
- kafka/metrics/metrics_reporter.py +53 -0
- kafka/metrics/quota.py +41 -0
- kafka/metrics/stat.py +19 -0
- kafka/metrics/stats/__init__.py +15 -0
- kafka/metrics/stats/avg.py +24 -0
- kafka/metrics/stats/count.py +17 -0
- kafka/metrics/stats/histogram.py +99 -0
- kafka/metrics/stats/max_stat.py +17 -0
- kafka/metrics/stats/min_stat.py +19 -0
- kafka/metrics/stats/percentile.py +14 -0
- kafka/metrics/stats/percentiles.py +75 -0
- kafka/metrics/stats/rate.py +118 -0
- kafka/metrics/stats/sampled_stat.py +99 -0
- kafka/metrics/stats/sensor.py +136 -0
- kafka/metrics/stats/total.py +15 -0
- kafka/net/__init__.py +19 -0
- kafka/net/compat.py +165 -0
- kafka/net/connection.py +593 -0
- kafka/net/http_connect.py +144 -0
- kafka/net/inet.py +122 -0
- kafka/net/manager.py +451 -0
- kafka/net/metrics.py +149 -0
- kafka/net/sasl/__init__.py +32 -0
- kafka/net/sasl/abc.py +28 -0
- kafka/net/sasl/gssapi.py +95 -0
- kafka/net/sasl/msk.py +245 -0
- kafka/net/sasl/oauth.py +98 -0
- kafka/net/sasl/plain.py +42 -0
- kafka/net/sasl/scram.py +135 -0
- kafka/net/sasl/sspi.py +111 -0
- kafka/net/selector.py +644 -0
- kafka/net/socks5.py +262 -0
- kafka/net/transport.py +415 -0
- kafka/net/wakeup_notifier.py +72 -0
- kafka/partitioner/__init__.py +8 -0
- kafka/partitioner/abc.py +8 -0
- kafka/partitioner/default.py +89 -0
- kafka/partitioner/sticky.py +109 -0
- kafka/producer/__init__.py +5 -0
- kafka/producer/__main__.py +5 -0
- kafka/producer/future.py +101 -0
- kafka/producer/kafka.py +1123 -0
- kafka/producer/producer_batch.py +192 -0
- kafka/producer/record_accumulator.py +647 -0
- kafka/producer/sender.py +884 -0
- kafka/producer/transaction_manager.py +1326 -0
- kafka/protocol/__init__.py +0 -0
- kafka/protocol/admin/__init__.py +29 -0
- kafka/protocol/admin/acl.py +83 -0
- kafka/protocol/admin/acl.pyi +375 -0
- kafka/protocol/admin/client_quotas.py +14 -0
- kafka/protocol/admin/client_quotas.pyi +265 -0
- kafka/protocol/admin/cluster.py +31 -0
- kafka/protocol/admin/cluster.pyi +620 -0
- kafka/protocol/admin/configs.py +22 -0
- kafka/protocol/admin/configs.pyi +437 -0
- kafka/protocol/admin/groups.py +24 -0
- kafka/protocol/admin/groups.pyi +261 -0
- kafka/protocol/admin/topics.py +53 -0
- kafka/protocol/admin/topics.pyi +982 -0
- kafka/protocol/admin/transactions.py +18 -0
- kafka/protocol/admin/transactions.pyi +311 -0
- kafka/protocol/admin/users.py +14 -0
- kafka/protocol/admin/users.pyi +223 -0
- kafka/protocol/api_data.py +125 -0
- kafka/protocol/api_header.py +55 -0
- kafka/protocol/api_key.py +97 -0
- kafka/protocol/api_message.py +277 -0
- kafka/protocol/broker_version_data.py +246 -0
- kafka/protocol/consumer/__init__.py +13 -0
- kafka/protocol/consumer/fetch.py +16 -0
- kafka/protocol/consumer/fetch.pyi +298 -0
- kafka/protocol/consumer/group.py +38 -0
- kafka/protocol/consumer/group.pyi +824 -0
- kafka/protocol/consumer/metadata.py +30 -0
- kafka/protocol/consumer/metadata.pyi +89 -0
- kafka/protocol/consumer/offsets.py +75 -0
- kafka/protocol/consumer/offsets.pyi +288 -0
- kafka/protocol/data_container.py +166 -0
- kafka/protocol/frame.py +30 -0
- kafka/protocol/generate_stubs.py +468 -0
- kafka/protocol/metadata/__init__.py +10 -0
- kafka/protocol/metadata/api_versions.py +41 -0
- kafka/protocol/metadata/api_versions.pyi +128 -0
- kafka/protocol/metadata/find_coordinator.py +19 -0
- kafka/protocol/metadata/find_coordinator.pyi +105 -0
- kafka/protocol/metadata/metadata.py +34 -0
- kafka/protocol/metadata/metadata.pyi +160 -0
- kafka/protocol/old/__init__.py +0 -0
- kafka/protocol/old/abstract.py +17 -0
- kafka/protocol/old/add_offsets_to_txn.py +54 -0
- kafka/protocol/old/add_partitions_to_txn.py +71 -0
- kafka/protocol/old/admin.py +1086 -0
- kafka/protocol/old/api.py +205 -0
- kafka/protocol/old/api_versions.py +133 -0
- kafka/protocol/old/commit.py +355 -0
- kafka/protocol/old/consumer_protocol.py +36 -0
- kafka/protocol/old/end_txn.py +53 -0
- kafka/protocol/old/fetch.py +408 -0
- kafka/protocol/old/find_coordinator.py +72 -0
- kafka/protocol/old/group.py +451 -0
- kafka/protocol/old/init_producer_id.py +42 -0
- kafka/protocol/old/list_offsets.py +186 -0
- kafka/protocol/old/metadata.py +290 -0
- kafka/protocol/old/offset_for_leader_epoch.py +133 -0
- kafka/protocol/old/produce.py +247 -0
- kafka/protocol/old/sasl_authenticate.py +38 -0
- kafka/protocol/old/sasl_handshake.py +39 -0
- kafka/protocol/old/struct.py +87 -0
- kafka/protocol/old/txn_offset_commit.py +73 -0
- kafka/protocol/old/types.py +440 -0
- kafka/protocol/parser.py +191 -0
- kafka/protocol/producer/__init__.py +7 -0
- kafka/protocol/producer/produce.py +17 -0
- kafka/protocol/producer/produce.pyi +197 -0
- kafka/protocol/producer/transaction.py +30 -0
- kafka/protocol/producer/transaction.pyi +663 -0
- kafka/protocol/sasl.py +52 -0
- kafka/protocol/sasl.pyi +126 -0
- kafka/protocol/schemas/__init__.py +7 -0
- kafka/protocol/schemas/fields/__init__.py +7 -0
- kafka/protocol/schemas/fields/array.py +127 -0
- kafka/protocol/schemas/fields/base.py +156 -0
- kafka/protocol/schemas/fields/codecs/__init__.py +12 -0
- kafka/protocol/schemas/fields/codecs/encode_buffer.py +82 -0
- kafka/protocol/schemas/fields/codecs/tagged_fields.py +109 -0
- kafka/protocol/schemas/fields/codecs/types.py +505 -0
- kafka/protocol/schemas/fields/codegen.py +40 -0
- kafka/protocol/schemas/fields/simple.py +127 -0
- kafka/protocol/schemas/fields/struct.py +357 -0
- kafka/protocol/schemas/fields/struct_array.py +142 -0
- kafka/protocol/schemas/load_json.py +42 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnRequest.json +40 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnResponse.json +35 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnRequest.json +65 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnResponse.json +60 -0
- kafka/protocol/schemas/resources/AlterClientQuotasRequest.json +47 -0
- kafka/protocol/schemas/resources/AlterClientQuotasResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterConfigsRequest.json +43 -0
- kafka/protocol/schemas/resources/AlterConfigsResponse.json +39 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsRequest.json +42 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsResponse.json +47 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsRequest.json +41 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsRequest.json +45 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsResponse.json +35 -0
- kafka/protocol/schemas/resources/ApiVersionsRequest.json +34 -0
- kafka/protocol/schemas/resources/ApiVersionsResponse.json +79 -0
- kafka/protocol/schemas/resources/ConsumerProtocolAssignment.json +42 -0
- kafka/protocol/schemas/resources/ConsumerProtocolSubscription.json +49 -0
- kafka/protocol/schemas/resources/CreateAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/CreateAclsResponse.json +37 -0
- kafka/protocol/schemas/resources/CreatePartitionsRequest.json +47 -0
- kafka/protocol/schemas/resources/CreatePartitionsResponse.json +41 -0
- kafka/protocol/schemas/resources/CreateTopicsRequest.json +65 -0
- kafka/protocol/schemas/resources/CreateTopicsResponse.json +72 -0
- kafka/protocol/schemas/resources/DeleteAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/DeleteAclsResponse.json +59 -0
- kafka/protocol/schemas/resources/DeleteGroupsRequest.json +30 -0
- kafka/protocol/schemas/resources/DeleteGroupsResponse.json +36 -0
- kafka/protocol/schemas/resources/DeleteRecordsRequest.json +42 -0
- kafka/protocol/schemas/resources/DeleteRecordsResponse.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsRequest.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeAclsRequest.json +43 -0
- kafka/protocol/schemas/resources/DescribeAclsResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasRequest.json +37 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasResponse.json +47 -0
- kafka/protocol/schemas/resources/DescribeClusterRequest.json +35 -0
- kafka/protocol/schemas/resources/DescribeClusterResponse.json +56 -0
- kafka/protocol/schemas/resources/DescribeConfigsRequest.json +42 -0
- kafka/protocol/schemas/resources/DescribeConfigsResponse.json +69 -0
- kafka/protocol/schemas/resources/DescribeGroupsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeGroupsResponse.json +74 -0
- kafka/protocol/schemas/resources/DescribeLogDirsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeLogDirsResponse.json +65 -0
- kafka/protocol/schemas/resources/DescribeProducersRequest.json +32 -0
- kafka/protocol/schemas/resources/DescribeProducersResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeQuorumRequest.json +39 -0
- kafka/protocol/schemas/resources/DescribeQuorumResponse.json +82 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsRequest.json +40 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsResponse.json +66 -0
- kafka/protocol/schemas/resources/DescribeTransactionsRequest.json +27 -0
- kafka/protocol/schemas/resources/DescribeTransactionsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsRequest.json +30 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsResponse.json +45 -0
- kafka/protocol/schemas/resources/ElectLeadersRequest.json +41 -0
- kafka/protocol/schemas/resources/ElectLeadersResponse.json +45 -0
- kafka/protocol/schemas/resources/EndTxnRequest.json +43 -0
- kafka/protocol/schemas/resources/EndTxnResponse.json +41 -0
- kafka/protocol/schemas/resources/FetchRequest.json +125 -0
- kafka/protocol/schemas/resources/FetchResponse.json +124 -0
- kafka/protocol/schemas/resources/FindCoordinatorRequest.json +43 -0
- kafka/protocol/schemas/resources/FindCoordinatorResponse.json +58 -0
- kafka/protocol/schemas/resources/HeartbeatRequest.json +39 -0
- kafka/protocol/schemas/resources/HeartbeatResponse.json +35 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsRequest.json +44 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsResponse.json +38 -0
- kafka/protocol/schemas/resources/InitProducerIdRequest.json +50 -0
- kafka/protocol/schemas/resources/InitProducerIdResponse.json +47 -0
- kafka/protocol/schemas/resources/JoinGroupRequest.json +63 -0
- kafka/protocol/schemas/resources/JoinGroupResponse.json +69 -0
- kafka/protocol/schemas/resources/LeaveGroupRequest.json +47 -0
- kafka/protocol/schemas/resources/LeaveGroupResponse.json +47 -0
- kafka/protocol/schemas/resources/ListConfigResourcesRequest.json +31 -0
- kafka/protocol/schemas/resources/ListConfigResourcesResponse.json +37 -0
- kafka/protocol/schemas/resources/ListGroupsRequest.json +36 -0
- kafka/protocol/schemas/resources/ListGroupsResponse.json +49 -0
- kafka/protocol/schemas/resources/ListOffsetsRequest.json +72 -0
- kafka/protocol/schemas/resources/ListOffsetsResponse.json +71 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsRequest.json +34 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsResponse.json +46 -0
- kafka/protocol/schemas/resources/ListTransactionsRequest.json +40 -0
- kafka/protocol/schemas/resources/ListTransactionsResponse.json +42 -0
- kafka/protocol/schemas/resources/MetadataRequest.json +56 -0
- kafka/protocol/schemas/resources/MetadataResponse.json +101 -0
- kafka/protocol/schemas/resources/OffsetCommitRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetCommitResponse.json +71 -0
- kafka/protocol/schemas/resources/OffsetDeleteRequest.json +39 -0
- kafka/protocol/schemas/resources/OffsetDeleteResponse.json +42 -0
- kafka/protocol/schemas/resources/OffsetFetchRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetFetchResponse.json +107 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochRequest.json +52 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochResponse.json +51 -0
- kafka/protocol/schemas/resources/ProduceRequest.json +73 -0
- kafka/protocol/schemas/resources/ProduceResponse.json +96 -0
- kafka/protocol/schemas/resources/RequestHeader.json +44 -0
- kafka/protocol/schemas/resources/ResponseHeader.json +26 -0
- kafka/protocol/schemas/resources/SaslAuthenticateRequest.json +29 -0
- kafka/protocol/schemas/resources/SaslAuthenticateResponse.json +34 -0
- kafka/protocol/schemas/resources/SaslHandshakeRequest.json +31 -0
- kafka/protocol/schemas/resources/SaslHandshakeResponse.json +32 -0
- kafka/protocol/schemas/resources/SyncGroupRequest.json +56 -0
- kafka/protocol/schemas/resources/SyncGroupResponse.json +46 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitRequest.json +68 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitResponse.json +47 -0
- kafka/protocol/schemas/resources/UpdateFeaturesRequest.json +43 -0
- kafka/protocol/schemas/resources/UpdateFeaturesResponse.json +39 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersRequest.json +49 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersResponse.json +45 -0
- kafka/protocol/schemas/resources/__init__.py +0 -0
- kafka/record/__init__.py +3 -0
- kafka/record/_crc32c.py +161 -0
- kafka/record/abc.py +144 -0
- kafka/record/default_records.py +782 -0
- kafka/record/legacy_records.py +587 -0
- kafka/record/memory_records.py +255 -0
- kafka/record/util.py +135 -0
- kafka/serializer/__init__.py +4 -0
- kafka/serializer/abstract.py +20 -0
- kafka/serializer/default.py +16 -0
- kafka/serializer/json.py +17 -0
- kafka/serializer/wrapper.py +21 -0
- kafka/structs.py +69 -0
- kafka/util.py +159 -0
- kafka/vendor/__init__.py +0 -0
- kafka/version.py +1 -0
- kafka_python-3.0.0.dist-info/METADATA +319 -0
- kafka_python-3.0.0.dist-info/RECORD +373 -0
- kafka_python-3.0.0.dist-info/WHEEL +5 -0
- kafka_python-3.0.0.dist-info/entry_points.txt +2 -0
- kafka_python-3.0.0.dist-info/licenses/LICENSE +202 -0
- kafka_python-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
from collections import defaultdict, namedtuple
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
import logging
|
|
4
|
+
import io
|
|
5
|
+
|
|
6
|
+
from ..abstract import AbstractPartitionAssignor
|
|
7
|
+
from .partition_movements import PartitionMovements
|
|
8
|
+
from .sorted_set import SortedSet
|
|
9
|
+
from .user_data import StickyAssignorUserData
|
|
10
|
+
from kafka.protocol.consumer.metadata import (
|
|
11
|
+
ConsumerProtocolSubscription,
|
|
12
|
+
ConsumerProtocolAssignment,
|
|
13
|
+
)
|
|
14
|
+
from kafka.structs import TopicPartition
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
ConsumerGenerationPair = namedtuple("ConsumerGenerationPair", ["consumer", "generation"])
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def has_identical_list_elements(list_):
|
|
22
|
+
"""Checks if all lists in the collection have the same members
|
|
23
|
+
|
|
24
|
+
Arguments:
|
|
25
|
+
list_: collection of lists
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
true if all lists in the collection have the same members; false otherwise
|
|
29
|
+
"""
|
|
30
|
+
if not list_:
|
|
31
|
+
return True
|
|
32
|
+
for i in range(1, len(list_)):
|
|
33
|
+
if list_[i] != list_[i - 1]:
|
|
34
|
+
return False
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def subscriptions_comparator_key(element):
|
|
39
|
+
return len(element[1]), element[0]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def partitions_comparator_key(element):
|
|
43
|
+
return len(element[1]), element[0].topic, element[0].partition
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def remove_if_present(collection, element):
|
|
47
|
+
try:
|
|
48
|
+
collection.remove(element)
|
|
49
|
+
except (ValueError, KeyError):
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
StickyAssignorMemberMetadataV1 = namedtuple("StickyAssignorMemberMetadataV1",
|
|
54
|
+
["subscription", "partitions", "generation"])
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class StickyAssignmentExecutor:
|
|
58
|
+
def __init__(self, cluster, members):
|
|
59
|
+
# a mapping of member_id => StickyAssignorMemberMetadataV1
|
|
60
|
+
self.members = members
|
|
61
|
+
# a mapping between consumers and their assigned partitions that is updated during assignment procedure
|
|
62
|
+
self.current_assignment = defaultdict(list)
|
|
63
|
+
# an assignment from a previous generation
|
|
64
|
+
self.previous_assignment = {}
|
|
65
|
+
# a mapping between partitions and their assigned consumers
|
|
66
|
+
self.current_partition_consumer = {}
|
|
67
|
+
# a flag indicating that there were no previous assignments performed ever
|
|
68
|
+
self.is_fresh_assignment = False
|
|
69
|
+
# a mapping of all topic partitions to all consumers that can be assigned to them
|
|
70
|
+
self.partition_to_all_potential_consumers = {}
|
|
71
|
+
# a mapping of all consumers to all potential topic partitions that can be assigned to them
|
|
72
|
+
self.consumer_to_all_potential_partitions = {}
|
|
73
|
+
# an ascending sorted set of consumers based on how many topic partitions are already assigned to them
|
|
74
|
+
self.sorted_current_subscriptions = SortedSet()
|
|
75
|
+
# an ascending sorted list of topic partitions based on how many consumers can potentially use them
|
|
76
|
+
self.sorted_partitions = []
|
|
77
|
+
# all partitions that need to be assigned
|
|
78
|
+
self.unassigned_partitions = []
|
|
79
|
+
# a flag indicating that a certain partition cannot remain assigned to its current consumer because the consumer
|
|
80
|
+
# is no longer subscribed to its topic
|
|
81
|
+
self.revocation_required = False
|
|
82
|
+
|
|
83
|
+
self.partition_movements = PartitionMovements()
|
|
84
|
+
self._initialize(cluster)
|
|
85
|
+
|
|
86
|
+
def perform_initial_assignment(self):
|
|
87
|
+
self._populate_sorted_partitions()
|
|
88
|
+
self._populate_partitions_to_reassign()
|
|
89
|
+
|
|
90
|
+
def balance(self):
|
|
91
|
+
self._initialize_current_subscriptions()
|
|
92
|
+
initializing = len(self.current_assignment[self._get_consumer_with_most_subscriptions()]) == 0
|
|
93
|
+
|
|
94
|
+
# assign all unassigned partitions
|
|
95
|
+
for partition in self.unassigned_partitions:
|
|
96
|
+
# skip if there is no potential consumer for the partition
|
|
97
|
+
if not self.partition_to_all_potential_consumers[partition]:
|
|
98
|
+
continue
|
|
99
|
+
self._assign_partition(partition)
|
|
100
|
+
|
|
101
|
+
# narrow down the reassignment scope to only those partitions that can actually be reassigned
|
|
102
|
+
fixed_partitions = set()
|
|
103
|
+
for partition in self.partition_to_all_potential_consumers:
|
|
104
|
+
if not self._can_partition_participate_in_reassignment(partition):
|
|
105
|
+
fixed_partitions.add(partition)
|
|
106
|
+
for fixed_partition in fixed_partitions:
|
|
107
|
+
remove_if_present(self.sorted_partitions, fixed_partition)
|
|
108
|
+
remove_if_present(self.unassigned_partitions, fixed_partition)
|
|
109
|
+
|
|
110
|
+
# narrow down the reassignment scope to only those consumers that are subject to reassignment
|
|
111
|
+
fixed_assignments = {}
|
|
112
|
+
for consumer in self.consumer_to_all_potential_partitions:
|
|
113
|
+
if not self._can_consumer_participate_in_reassignment(consumer):
|
|
114
|
+
self._remove_consumer_from_current_subscriptions_and_maintain_order(consumer)
|
|
115
|
+
fixed_assignments[consumer] = self.current_assignment[consumer]
|
|
116
|
+
del self.current_assignment[consumer]
|
|
117
|
+
|
|
118
|
+
# create a deep copy of the current assignment so we can revert to it
|
|
119
|
+
# if we do not get a more balanced assignment later
|
|
120
|
+
prebalance_assignment = deepcopy(self.current_assignment)
|
|
121
|
+
prebalance_partition_consumers = deepcopy(self.current_partition_consumer)
|
|
122
|
+
|
|
123
|
+
# if we don't already need to revoke something due to subscription changes,
|
|
124
|
+
# first try to balance by only moving newly added partitions
|
|
125
|
+
if not self.revocation_required:
|
|
126
|
+
self._perform_reassignments(self.unassigned_partitions)
|
|
127
|
+
reassignment_performed = self._perform_reassignments(self.sorted_partitions)
|
|
128
|
+
|
|
129
|
+
# if we are not preserving existing assignments and we have made changes to the current assignment
|
|
130
|
+
# make sure we are getting a more balanced assignment; otherwise, revert to previous assignment
|
|
131
|
+
if (
|
|
132
|
+
not initializing
|
|
133
|
+
and reassignment_performed
|
|
134
|
+
and self._get_balance_score(self.current_assignment) >= self._get_balance_score(prebalance_assignment)
|
|
135
|
+
):
|
|
136
|
+
self.current_assignment = prebalance_assignment
|
|
137
|
+
self.current_partition_consumer.clear()
|
|
138
|
+
self.current_partition_consumer.update(prebalance_partition_consumers)
|
|
139
|
+
|
|
140
|
+
# add the fixed assignments (those that could not change) back
|
|
141
|
+
for consumer, partitions in fixed_assignments.items():
|
|
142
|
+
self.current_assignment[consumer] = partitions
|
|
143
|
+
self._add_consumer_to_current_subscriptions_and_maintain_order(consumer)
|
|
144
|
+
|
|
145
|
+
def get_final_assignment(self, member_id):
|
|
146
|
+
assignment = defaultdict(list)
|
|
147
|
+
for topic_partition in self.current_assignment[member_id]:
|
|
148
|
+
assignment[topic_partition.topic].append(topic_partition.partition)
|
|
149
|
+
assignment = {k: sorted(v) for k, v in assignment.items()}
|
|
150
|
+
return assignment.items()
|
|
151
|
+
|
|
152
|
+
def _initialize(self, cluster):
|
|
153
|
+
self._init_current_assignments(self.members)
|
|
154
|
+
|
|
155
|
+
for topic in cluster.topics():
|
|
156
|
+
partitions = cluster.partitions_for_topic(topic)
|
|
157
|
+
if partitions is None:
|
|
158
|
+
log.warning("No partition metadata for topic %s", topic)
|
|
159
|
+
continue
|
|
160
|
+
for p in partitions:
|
|
161
|
+
partition = TopicPartition(topic=topic, partition=p)
|
|
162
|
+
self.partition_to_all_potential_consumers[partition] = []
|
|
163
|
+
for consumer_id, member_metadata in self.members.items():
|
|
164
|
+
self.consumer_to_all_potential_partitions[consumer_id] = []
|
|
165
|
+
for topic in member_metadata.subscription:
|
|
166
|
+
if cluster.partitions_for_topic(topic) is None:
|
|
167
|
+
log.warning("No partition metadata for topic {}".format(topic))
|
|
168
|
+
continue
|
|
169
|
+
for p in cluster.partitions_for_topic(topic):
|
|
170
|
+
partition = TopicPartition(topic=topic, partition=p)
|
|
171
|
+
self.consumer_to_all_potential_partitions[consumer_id].append(partition)
|
|
172
|
+
self.partition_to_all_potential_consumers[partition].append(consumer_id)
|
|
173
|
+
if consumer_id not in self.current_assignment:
|
|
174
|
+
self.current_assignment[consumer_id] = []
|
|
175
|
+
|
|
176
|
+
def _init_current_assignments(self, members):
|
|
177
|
+
# we need to process subscriptions' user data with each consumer's reported generation in mind
|
|
178
|
+
# higher generations overwrite lower generations in case of a conflict
|
|
179
|
+
# note that a conflict could exists only if user data is for different generations
|
|
180
|
+
|
|
181
|
+
# for each partition we create a map of its consumers by generation
|
|
182
|
+
sorted_partition_consumers_by_generation = {}
|
|
183
|
+
for consumer, member_metadata in members.items():
|
|
184
|
+
for partitions in member_metadata.partitions:
|
|
185
|
+
if partitions in sorted_partition_consumers_by_generation:
|
|
186
|
+
consumers = sorted_partition_consumers_by_generation[partitions]
|
|
187
|
+
if member_metadata.generation and member_metadata.generation in consumers:
|
|
188
|
+
# same partition is assigned to two consumers during the same rebalance.
|
|
189
|
+
# log a warning and skip this record
|
|
190
|
+
log.warning(
|
|
191
|
+
"Partition {} is assigned to multiple consumers "
|
|
192
|
+
"following sticky assignment generation {}.".format(partitions, member_metadata.generation)
|
|
193
|
+
)
|
|
194
|
+
else:
|
|
195
|
+
consumers[member_metadata.generation] = consumer
|
|
196
|
+
else:
|
|
197
|
+
sorted_consumers = {member_metadata.generation: consumer}
|
|
198
|
+
sorted_partition_consumers_by_generation[partitions] = sorted_consumers
|
|
199
|
+
|
|
200
|
+
# previous_assignment holds the prior ConsumerGenerationPair (before current) of each partition
|
|
201
|
+
# current and previous consumers are the last two consumers of each partition in the above sorted map
|
|
202
|
+
for partitions, consumers in sorted_partition_consumers_by_generation.items():
|
|
203
|
+
generations = sorted(consumers.keys(), reverse=True)
|
|
204
|
+
self.current_assignment[consumers[generations[0]]].append(partitions)
|
|
205
|
+
# now update previous assignment if any
|
|
206
|
+
if len(generations) > 1:
|
|
207
|
+
self.previous_assignment[partitions] = ConsumerGenerationPair(
|
|
208
|
+
consumer=consumers[generations[1]], generation=generations[1]
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
self.is_fresh_assignment = len(self.current_assignment) == 0
|
|
212
|
+
|
|
213
|
+
for consumer_id, partitions in self.current_assignment.items():
|
|
214
|
+
for partition in partitions:
|
|
215
|
+
self.current_partition_consumer[partition] = consumer_id
|
|
216
|
+
|
|
217
|
+
def _are_subscriptions_identical(self):
|
|
218
|
+
"""
|
|
219
|
+
Returns:
|
|
220
|
+
true, if both potential consumers of partitions and potential partitions that consumers can
|
|
221
|
+
consume are the same
|
|
222
|
+
"""
|
|
223
|
+
if not has_identical_list_elements(list(self.partition_to_all_potential_consumers.values())):
|
|
224
|
+
return False
|
|
225
|
+
return has_identical_list_elements(list(self.consumer_to_all_potential_partitions.values()))
|
|
226
|
+
|
|
227
|
+
def _populate_sorted_partitions(self):
|
|
228
|
+
# set of topic partitions with their respective potential consumers
|
|
229
|
+
all_partitions = set((tp, tuple(consumers))
|
|
230
|
+
for tp, consumers in self.partition_to_all_potential_consumers.items())
|
|
231
|
+
partitions_sorted_by_num_of_potential_consumers = sorted(all_partitions, key=partitions_comparator_key)
|
|
232
|
+
|
|
233
|
+
self.sorted_partitions = []
|
|
234
|
+
if not self.is_fresh_assignment and self._are_subscriptions_identical():
|
|
235
|
+
# if this is a reassignment and the subscriptions are identical (all consumers can consumer from all topics)
|
|
236
|
+
# then we just need to simply list partitions in a round robin fashion (from consumers with
|
|
237
|
+
# most assigned partitions to those with least)
|
|
238
|
+
assignments = deepcopy(self.current_assignment)
|
|
239
|
+
for consumer_id, partitions in assignments.items():
|
|
240
|
+
to_remove = []
|
|
241
|
+
for partition in partitions:
|
|
242
|
+
if partition not in self.partition_to_all_potential_consumers:
|
|
243
|
+
to_remove.append(partition)
|
|
244
|
+
for partition in to_remove:
|
|
245
|
+
partitions.remove(partition)
|
|
246
|
+
|
|
247
|
+
sorted_consumers = SortedSet(
|
|
248
|
+
iterable=[(consumer, tuple(partitions)) for consumer, partitions in assignments.items()],
|
|
249
|
+
key=subscriptions_comparator_key,
|
|
250
|
+
)
|
|
251
|
+
# at this point, sorted_consumers contains an ascending-sorted list of consumers based on
|
|
252
|
+
# how many valid partitions are currently assigned to them
|
|
253
|
+
while sorted_consumers:
|
|
254
|
+
# take the consumer with the most partitions
|
|
255
|
+
consumer, _ = sorted_consumers.pop_last()
|
|
256
|
+
# currently assigned partitions to this consumer
|
|
257
|
+
remaining_partitions = assignments[consumer]
|
|
258
|
+
# from partitions that had a different consumer before,
|
|
259
|
+
# keep only those that are assigned to this consumer now
|
|
260
|
+
previous_partitions = set(self.previous_assignment.keys()).intersection(set(remaining_partitions))
|
|
261
|
+
if previous_partitions:
|
|
262
|
+
# if there is a partition of this consumer that was assigned to another consumer before
|
|
263
|
+
# mark it as good options for reassignment
|
|
264
|
+
partition = previous_partitions.pop()
|
|
265
|
+
remaining_partitions.remove(partition)
|
|
266
|
+
self.sorted_partitions.append(partition)
|
|
267
|
+
sorted_consumers.add((consumer, tuple(assignments[consumer])))
|
|
268
|
+
elif remaining_partitions:
|
|
269
|
+
# otherwise, mark any other one of the current partitions as a reassignment candidate
|
|
270
|
+
self.sorted_partitions.append(remaining_partitions.pop())
|
|
271
|
+
sorted_consumers.add((consumer, tuple(assignments[consumer])))
|
|
272
|
+
|
|
273
|
+
while partitions_sorted_by_num_of_potential_consumers:
|
|
274
|
+
partition = partitions_sorted_by_num_of_potential_consumers.pop(0)[0]
|
|
275
|
+
if partition not in self.sorted_partitions:
|
|
276
|
+
self.sorted_partitions.append(partition)
|
|
277
|
+
else:
|
|
278
|
+
while partitions_sorted_by_num_of_potential_consumers:
|
|
279
|
+
self.sorted_partitions.append(partitions_sorted_by_num_of_potential_consumers.pop(0)[0])
|
|
280
|
+
|
|
281
|
+
def _populate_partitions_to_reassign(self):
|
|
282
|
+
self.unassigned_partitions = deepcopy(self.sorted_partitions)
|
|
283
|
+
|
|
284
|
+
assignments_to_remove = []
|
|
285
|
+
for consumer_id, partitions in self.current_assignment.items():
|
|
286
|
+
if consumer_id not in self.members:
|
|
287
|
+
# if a consumer that existed before (and had some partition assignments) is now removed,
|
|
288
|
+
# remove it from current_assignment
|
|
289
|
+
for partition in partitions:
|
|
290
|
+
del self.current_partition_consumer[partition]
|
|
291
|
+
assignments_to_remove.append(consumer_id)
|
|
292
|
+
else:
|
|
293
|
+
# otherwise (the consumer still exists)
|
|
294
|
+
partitions_to_remove = []
|
|
295
|
+
for partition in partitions:
|
|
296
|
+
if partition not in self.partition_to_all_potential_consumers:
|
|
297
|
+
# if this topic partition of this consumer no longer exists
|
|
298
|
+
# remove it from current_assignment of the consumer
|
|
299
|
+
partitions_to_remove.append(partition)
|
|
300
|
+
elif partition.topic not in self.members[consumer_id].subscription:
|
|
301
|
+
# if this partition cannot remain assigned to its current consumer because the consumer
|
|
302
|
+
# is no longer subscribed to its topic remove it from current_assignment of the consumer
|
|
303
|
+
partitions_to_remove.append(partition)
|
|
304
|
+
self.revocation_required = True
|
|
305
|
+
else:
|
|
306
|
+
# otherwise, remove the topic partition from those that need to be assigned only if
|
|
307
|
+
# its current consumer is still subscribed to its topic (because it is already assigned
|
|
308
|
+
# and we would want to preserve that assignment as much as possible)
|
|
309
|
+
self.unassigned_partitions.remove(partition)
|
|
310
|
+
for partition in partitions_to_remove:
|
|
311
|
+
self.current_assignment[consumer_id].remove(partition)
|
|
312
|
+
del self.current_partition_consumer[partition]
|
|
313
|
+
for consumer_id in assignments_to_remove:
|
|
314
|
+
del self.current_assignment[consumer_id]
|
|
315
|
+
|
|
316
|
+
def _initialize_current_subscriptions(self):
|
|
317
|
+
self.sorted_current_subscriptions = SortedSet(
|
|
318
|
+
iterable=[(consumer, tuple(partitions)) for consumer, partitions in self.current_assignment.items()],
|
|
319
|
+
key=subscriptions_comparator_key,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
def _get_consumer_with_least_subscriptions(self):
|
|
323
|
+
return self.sorted_current_subscriptions.first()[0]
|
|
324
|
+
|
|
325
|
+
def _get_consumer_with_most_subscriptions(self):
|
|
326
|
+
return self.sorted_current_subscriptions.last()[0]
|
|
327
|
+
|
|
328
|
+
def _remove_consumer_from_current_subscriptions_and_maintain_order(self, consumer):
|
|
329
|
+
self.sorted_current_subscriptions.remove((consumer, tuple(self.current_assignment[consumer])))
|
|
330
|
+
|
|
331
|
+
def _add_consumer_to_current_subscriptions_and_maintain_order(self, consumer):
|
|
332
|
+
self.sorted_current_subscriptions.add((consumer, tuple(self.current_assignment[consumer])))
|
|
333
|
+
|
|
334
|
+
def _is_balanced(self):
|
|
335
|
+
"""Determines if the current assignment is a balanced one"""
|
|
336
|
+
if (
|
|
337
|
+
len(self.current_assignment[self._get_consumer_with_least_subscriptions()])
|
|
338
|
+
>= len(self.current_assignment[self._get_consumer_with_most_subscriptions()]) - 1
|
|
339
|
+
):
|
|
340
|
+
# if minimum and maximum numbers of partitions assigned to consumers differ by at most one return true
|
|
341
|
+
return True
|
|
342
|
+
|
|
343
|
+
# create a mapping from partitions to the consumer assigned to them
|
|
344
|
+
all_assigned_partitions = {}
|
|
345
|
+
for consumer_id, consumer_partitions in self.current_assignment.items():
|
|
346
|
+
for partition in consumer_partitions:
|
|
347
|
+
if partition in all_assigned_partitions:
|
|
348
|
+
log.error("{} is assigned to more than one consumer.".format(partition))
|
|
349
|
+
all_assigned_partitions[partition] = consumer_id
|
|
350
|
+
|
|
351
|
+
# for each consumer that does not have all the topic partitions it can get
|
|
352
|
+
# make sure none of the topic partitions it could but did not get cannot be moved to it
|
|
353
|
+
# (because that would break the balance)
|
|
354
|
+
for consumer, _ in self.sorted_current_subscriptions:
|
|
355
|
+
consumer_partition_count = len(self.current_assignment[consumer])
|
|
356
|
+
# skip if this consumer already has all the topic partitions it can get
|
|
357
|
+
if consumer_partition_count == len(self.consumer_to_all_potential_partitions[consumer]):
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
# otherwise make sure it cannot get any more
|
|
361
|
+
for partition in self.consumer_to_all_potential_partitions[consumer]:
|
|
362
|
+
if partition not in self.current_assignment[consumer]:
|
|
363
|
+
other_consumer = all_assigned_partitions[partition]
|
|
364
|
+
other_consumer_partition_count = len(self.current_assignment[other_consumer])
|
|
365
|
+
if consumer_partition_count < other_consumer_partition_count:
|
|
366
|
+
return False
|
|
367
|
+
return True
|
|
368
|
+
|
|
369
|
+
def _assign_partition(self, partition):
|
|
370
|
+
for consumer, _ in self.sorted_current_subscriptions:
|
|
371
|
+
if partition in self.consumer_to_all_potential_partitions[consumer]:
|
|
372
|
+
self._remove_consumer_from_current_subscriptions_and_maintain_order(consumer)
|
|
373
|
+
self.current_assignment[consumer].append(partition)
|
|
374
|
+
self.current_partition_consumer[partition] = consumer
|
|
375
|
+
self._add_consumer_to_current_subscriptions_and_maintain_order(consumer)
|
|
376
|
+
break
|
|
377
|
+
|
|
378
|
+
def _can_partition_participate_in_reassignment(self, partition):
|
|
379
|
+
return len(self.partition_to_all_potential_consumers[partition]) >= 2
|
|
380
|
+
|
|
381
|
+
def _can_consumer_participate_in_reassignment(self, consumer):
|
|
382
|
+
current_partitions = self.current_assignment[consumer]
|
|
383
|
+
current_assignment_size = len(current_partitions)
|
|
384
|
+
max_assignment_size = len(self.consumer_to_all_potential_partitions[consumer])
|
|
385
|
+
if current_assignment_size > max_assignment_size:
|
|
386
|
+
log.error("The consumer {} is assigned more partitions than the maximum possible.".format(consumer))
|
|
387
|
+
if current_assignment_size < max_assignment_size:
|
|
388
|
+
# if a consumer is not assigned all its potential partitions it is subject to reassignment
|
|
389
|
+
return True
|
|
390
|
+
for partition in current_partitions:
|
|
391
|
+
# if any of the partitions assigned to a consumer is subject to reassignment the consumer itself
|
|
392
|
+
# is subject to reassignment
|
|
393
|
+
if self._can_partition_participate_in_reassignment(partition):
|
|
394
|
+
return True
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
def _perform_reassignments(self, reassignable_partitions):
|
|
398
|
+
reassignment_performed = False
|
|
399
|
+
|
|
400
|
+
# repeat reassignment until no partition can be moved to improve the balance
|
|
401
|
+
while True:
|
|
402
|
+
modified = False
|
|
403
|
+
# reassign all reassignable partitions until the full list is processed or a balance is achieved
|
|
404
|
+
# (starting from the partition with least potential consumers and if needed)
|
|
405
|
+
for partition in reassignable_partitions:
|
|
406
|
+
if self._is_balanced():
|
|
407
|
+
break
|
|
408
|
+
# the partition must have at least two potential consumers
|
|
409
|
+
if len(self.partition_to_all_potential_consumers[partition]) <= 1:
|
|
410
|
+
log.error("Expected more than one potential consumer for partition {}".format(partition))
|
|
411
|
+
# the partition must have a current consumer
|
|
412
|
+
consumer = self.current_partition_consumer.get(partition)
|
|
413
|
+
if consumer is None:
|
|
414
|
+
log.error("Expected partition {} to be assigned to a consumer".format(partition))
|
|
415
|
+
|
|
416
|
+
if (
|
|
417
|
+
partition in self.previous_assignment
|
|
418
|
+
and len(self.current_assignment[consumer])
|
|
419
|
+
> len(self.current_assignment[self.previous_assignment[partition].consumer]) + 1
|
|
420
|
+
):
|
|
421
|
+
self._reassign_partition_to_consumer(
|
|
422
|
+
partition, self.previous_assignment[partition].consumer,
|
|
423
|
+
)
|
|
424
|
+
reassignment_performed = True
|
|
425
|
+
modified = True
|
|
426
|
+
continue
|
|
427
|
+
|
|
428
|
+
# check if a better-suited consumer exist for the partition; if so, reassign it
|
|
429
|
+
for other_consumer in self.partition_to_all_potential_consumers[partition]:
|
|
430
|
+
if len(self.current_assignment[consumer]) > len(self.current_assignment[other_consumer]) + 1:
|
|
431
|
+
self._reassign_partition(partition)
|
|
432
|
+
reassignment_performed = True
|
|
433
|
+
modified = True
|
|
434
|
+
break
|
|
435
|
+
|
|
436
|
+
if not modified:
|
|
437
|
+
break
|
|
438
|
+
return reassignment_performed
|
|
439
|
+
|
|
440
|
+
def _reassign_partition(self, partition):
|
|
441
|
+
new_consumer = None
|
|
442
|
+
for another_consumer, _ in self.sorted_current_subscriptions:
|
|
443
|
+
if partition in self.consumer_to_all_potential_partitions[another_consumer]:
|
|
444
|
+
new_consumer = another_consumer
|
|
445
|
+
break
|
|
446
|
+
assert new_consumer is not None
|
|
447
|
+
self._reassign_partition_to_consumer(partition, new_consumer)
|
|
448
|
+
|
|
449
|
+
def _reassign_partition_to_consumer(self, partition, new_consumer):
|
|
450
|
+
consumer = self.current_partition_consumer[partition]
|
|
451
|
+
# find the correct partition movement considering the stickiness requirement
|
|
452
|
+
partition_to_be_moved = self.partition_movements.get_partition_to_be_moved(partition, consumer, new_consumer)
|
|
453
|
+
self._move_partition(partition_to_be_moved, new_consumer)
|
|
454
|
+
|
|
455
|
+
def _move_partition(self, partition, new_consumer):
|
|
456
|
+
old_consumer = self.current_partition_consumer[partition]
|
|
457
|
+
self._remove_consumer_from_current_subscriptions_and_maintain_order(old_consumer)
|
|
458
|
+
self._remove_consumer_from_current_subscriptions_and_maintain_order(new_consumer)
|
|
459
|
+
|
|
460
|
+
self.partition_movements.move_partition(partition, old_consumer, new_consumer)
|
|
461
|
+
|
|
462
|
+
self.current_assignment[old_consumer].remove(partition)
|
|
463
|
+
self.current_assignment[new_consumer].append(partition)
|
|
464
|
+
self.current_partition_consumer[partition] = new_consumer
|
|
465
|
+
|
|
466
|
+
self._add_consumer_to_current_subscriptions_and_maintain_order(new_consumer)
|
|
467
|
+
self._add_consumer_to_current_subscriptions_and_maintain_order(old_consumer)
|
|
468
|
+
|
|
469
|
+
@staticmethod
|
|
470
|
+
def _get_balance_score(assignment):
|
|
471
|
+
"""Calculates a balance score of a give assignment
|
|
472
|
+
as the sum of assigned partitions size difference of all consumer pairs.
|
|
473
|
+
A perfectly balanced assignment (with all consumers getting the same number of partitions)
|
|
474
|
+
has a balance score of 0. Lower balance score indicates a more balanced assignment.
|
|
475
|
+
|
|
476
|
+
Arguments:
|
|
477
|
+
assignment (dict): {consumer: list of assigned topic partitions}
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
the balance score of the assignment
|
|
481
|
+
"""
|
|
482
|
+
score = 0
|
|
483
|
+
consumer_to_assignment = {}
|
|
484
|
+
for consumer_id, partitions in assignment.items():
|
|
485
|
+
consumer_to_assignment[consumer_id] = len(partitions)
|
|
486
|
+
|
|
487
|
+
consumers_to_explore = set(consumer_to_assignment.keys())
|
|
488
|
+
for consumer_id in consumer_to_assignment:
|
|
489
|
+
if consumer_id in consumers_to_explore:
|
|
490
|
+
consumers_to_explore.remove(consumer_id)
|
|
491
|
+
for other_consumer_id in consumers_to_explore:
|
|
492
|
+
score += abs(consumer_to_assignment[consumer_id] - consumer_to_assignment[other_consumer_id])
|
|
493
|
+
return score
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class StickyPartitionAssignor(AbstractPartitionAssignor):
|
|
497
|
+
"""
|
|
498
|
+
https://cwiki.apache.org/confluence/display/KAFKA/KIP-54+-+Sticky+Partition+Assignment+Strategy
|
|
499
|
+
|
|
500
|
+
The sticky assignor serves two purposes. First, it guarantees an assignment that is as balanced as possible, meaning either:
|
|
501
|
+
- the numbers of topic partitions assigned to consumers differ by at most one; or
|
|
502
|
+
- each consumer that has 2+ fewer topic partitions than some other consumer cannot get any of those topic partitions transferred to it.
|
|
503
|
+
|
|
504
|
+
Second, it preserved as many existing assignment as possible when a reassignment occurs.
|
|
505
|
+
This helps in saving some of the overhead processing when topic partitions move from one consumer to another.
|
|
506
|
+
|
|
507
|
+
Starting fresh it would work by distributing the partitions over consumers as evenly as possible.
|
|
508
|
+
Even though this may sound similar to how round robin assignor works, the second example below shows that it is not.
|
|
509
|
+
During a reassignment it would perform the reassignment in such a way that in the new assignment
|
|
510
|
+
- topic partitions are still distributed as evenly as possible, and
|
|
511
|
+
- topic partitions stay with their previously assigned consumers as much as possible.
|
|
512
|
+
|
|
513
|
+
The first goal above takes precedence over the second one.
|
|
514
|
+
|
|
515
|
+
Example 1.
|
|
516
|
+
Suppose there are three consumers C0, C1, C2,
|
|
517
|
+
four topics t0, t1, t2, t3, and each topic has 2 partitions,
|
|
518
|
+
resulting in partitions t0p0, t0p1, t1p0, t1p1, t2p0, t2p1, t3p0, t3p1.
|
|
519
|
+
Each consumer is subscribed to all three topics.
|
|
520
|
+
|
|
521
|
+
The assignment with both sticky and round robin assignors will be:
|
|
522
|
+
- C0: [t0p0, t1p1, t3p0]
|
|
523
|
+
- C1: [t0p1, t2p0, t3p1]
|
|
524
|
+
- C2: [t1p0, t2p1]
|
|
525
|
+
|
|
526
|
+
Now, let's assume C1 is removed and a reassignment is about to happen. The round robin assignor would produce:
|
|
527
|
+
- C0: [t0p0, t1p0, t2p0, t3p0]
|
|
528
|
+
- C2: [t0p1, t1p1, t2p1, t3p1]
|
|
529
|
+
|
|
530
|
+
while the sticky assignor would result in:
|
|
531
|
+
- C0 [t0p0, t1p1, t3p0, t2p0]
|
|
532
|
+
- C2 [t1p0, t2p1, t0p1, t3p1]
|
|
533
|
+
preserving all the previous assignments (unlike the round robin assignor).
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
Example 2.
|
|
537
|
+
There are three consumers C0, C1, C2,
|
|
538
|
+
and three topics t0, t1, t2, with 1, 2, and 3 partitions respectively.
|
|
539
|
+
Therefore, the partitions are t0p0, t1p0, t1p1, t2p0, t2p1, t2p2.
|
|
540
|
+
C0 is subscribed to t0;
|
|
541
|
+
C1 is subscribed to t0, t1;
|
|
542
|
+
and C2 is subscribed to t0, t1, t2.
|
|
543
|
+
|
|
544
|
+
The round robin assignor would come up with the following assignment:
|
|
545
|
+
- C0 [t0p0]
|
|
546
|
+
- C1 [t1p0]
|
|
547
|
+
- C2 [t1p1, t2p0, t2p1, t2p2]
|
|
548
|
+
|
|
549
|
+
which is not as balanced as the assignment suggested by sticky assignor:
|
|
550
|
+
- C0 [t0p0]
|
|
551
|
+
- C1 [t1p0, t1p1]
|
|
552
|
+
- C2 [t2p0, t2p1, t2p2]
|
|
553
|
+
|
|
554
|
+
Now, if consumer C0 is removed, these two assignors would produce the following assignments.
|
|
555
|
+
Round Robin (preserves 3 partition assignments):
|
|
556
|
+
- C1 [t0p0, t1p1]
|
|
557
|
+
- C2 [t1p0, t2p0, t2p1, t2p2]
|
|
558
|
+
|
|
559
|
+
Sticky (preserves 5 partition assignments):
|
|
560
|
+
- C1 [t1p0, t1p1, t0p0]
|
|
561
|
+
- C2 [t2p0, t2p1, t2p2]
|
|
562
|
+
"""
|
|
563
|
+
|
|
564
|
+
DEFAULT_GENERATION_ID = -1
|
|
565
|
+
|
|
566
|
+
name = "sticky"
|
|
567
|
+
version = 0
|
|
568
|
+
|
|
569
|
+
def __init__(self):
|
|
570
|
+
self.member_assignment = None
|
|
571
|
+
self.generation = self.DEFAULT_GENERATION_ID
|
|
572
|
+
self._latest_partition_movements = None
|
|
573
|
+
|
|
574
|
+
def assign(self, cluster, members):
|
|
575
|
+
"""Performs group assignment given cluster metadata and member subscriptions
|
|
576
|
+
|
|
577
|
+
Arguments:
|
|
578
|
+
cluster (ClusterMetadata): cluster metadata
|
|
579
|
+
members ([JoinGroupResponseMember]): decoded metadata for each member in the group.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
dict: {member_id: ConsumerProtocolAssignment}
|
|
583
|
+
"""
|
|
584
|
+
members_metadata = {
|
|
585
|
+
member.member_id: self.parse_member_metadata(member.metadata)
|
|
586
|
+
for member in members
|
|
587
|
+
}
|
|
588
|
+
executor = StickyAssignmentExecutor(cluster, members_metadata)
|
|
589
|
+
executor.perform_initial_assignment()
|
|
590
|
+
executor.balance()
|
|
591
|
+
|
|
592
|
+
# store for tests
|
|
593
|
+
self._latest_partition_movements = executor.partition_movements
|
|
594
|
+
|
|
595
|
+
assignment = {
|
|
596
|
+
member.member_id: ConsumerProtocolAssignment(
|
|
597
|
+
self.version, sorted(executor.get_final_assignment(member.member_id)), b'')
|
|
598
|
+
for member in members
|
|
599
|
+
}
|
|
600
|
+
return assignment
|
|
601
|
+
|
|
602
|
+
@classmethod
|
|
603
|
+
def parse_member_metadata(cls, metadata):
|
|
604
|
+
"""
|
|
605
|
+
Parses member metadata into a python object.
|
|
606
|
+
This implementation only serializes and deserializes the StickyAssignorMemberMetadataV1 user data,
|
|
607
|
+
since no StickyAssignor written in Python was deployed ever in the wild with version V0, meaning that
|
|
608
|
+
there is no need to support backward compatibility with V0.
|
|
609
|
+
|
|
610
|
+
Arguments:
|
|
611
|
+
metadata (ConsumerProtocolSubscription): decoded subscription for a member of the group.
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
parsed metadata (StickyAssignorMemberMetadataV1)
|
|
615
|
+
"""
|
|
616
|
+
user_data = metadata.user_data
|
|
617
|
+
if not user_data:
|
|
618
|
+
return StickyAssignorMemberMetadataV1(
|
|
619
|
+
partitions=[], generation=cls.DEFAULT_GENERATION_ID, subscription=metadata.topics
|
|
620
|
+
)
|
|
621
|
+
elif isinstance(user_data, StickyAssignorUserData):
|
|
622
|
+
decoded_user_data = user_data
|
|
623
|
+
else:
|
|
624
|
+
try:
|
|
625
|
+
decoded_user_data = StickyAssignorUserData.decode(user_data)
|
|
626
|
+
except Exception:
|
|
627
|
+
# ignore the consumer's previous assignment if it cannot be parsed
|
|
628
|
+
log.exception("Could not parse member data")
|
|
629
|
+
return StickyAssignorMemberMetadataV1(
|
|
630
|
+
partitions=[], generation=cls.DEFAULT_GENERATION_ID, subscription=metadata.topics
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
member_partitions = []
|
|
634
|
+
for topic, partitions in decoded_user_data.previous_assignment: # pylint: disable=no-member
|
|
635
|
+
member_partitions.extend([TopicPartition(topic, partition) for partition in partitions])
|
|
636
|
+
return StickyAssignorMemberMetadataV1(
|
|
637
|
+
# pylint: disable=no-member
|
|
638
|
+
partitions=member_partitions, generation=decoded_user_data.generation, subscription=metadata.topics
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def metadata(self, topics):
|
|
642
|
+
return self._metadata(topics, self.member_assignment, self.generation)
|
|
643
|
+
|
|
644
|
+
@classmethod
|
|
645
|
+
def _metadata(cls, topics, member_assignment_partitions, generation=-1):
|
|
646
|
+
if member_assignment_partitions is None:
|
|
647
|
+
log.debug("No member assignment available")
|
|
648
|
+
user_data = b''
|
|
649
|
+
else:
|
|
650
|
+
log.debug("Member assignment is available, generating the metadata: generation {}".format(generation))
|
|
651
|
+
partitions_by_topic = defaultdict(list)
|
|
652
|
+
for topic_partition in member_assignment_partitions:
|
|
653
|
+
partitions_by_topic[topic_partition.topic].append(topic_partition.partition)
|
|
654
|
+
user_data = StickyAssignorUserData(list(partitions_by_topic.items()), generation)
|
|
655
|
+
return ConsumerProtocolSubscription(cls.version, list(topics), user_data)
|
|
656
|
+
|
|
657
|
+
def on_assignment(self, assignment, generation):
|
|
658
|
+
"""Callback that runs on each assignment. Updates assignor's state.
|
|
659
|
+
|
|
660
|
+
Arguments:
|
|
661
|
+
assignment: MemberAssignment
|
|
662
|
+
"""
|
|
663
|
+
log.debug(f"On assignment: assignment={assignment}, generation={generation}")
|
|
664
|
+
self.member_assignment = assignment.partitions()
|
|
665
|
+
self.generation = generation
|