kafka-python 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kafka/__init__.py +34 -0
- kafka/__main__.py +5 -0
- kafka/admin/__init__.py +29 -0
- kafka/admin/__main__.py +5 -0
- kafka/admin/_acls.py +355 -0
- kafka/admin/_cluster.py +359 -0
- kafka/admin/_configs.py +479 -0
- kafka/admin/_groups.py +754 -0
- kafka/admin/_partitions.py +595 -0
- kafka/admin/_topics.py +281 -0
- kafka/admin/_transactions.py +450 -0
- kafka/admin/_users.py +194 -0
- kafka/admin/client.py +373 -0
- kafka/benchmarks/__init__.py +0 -0
- kafka/benchmarks/consumer_performance.py +138 -0
- kafka/benchmarks/load_example.py +109 -0
- kafka/benchmarks/producer_encode_path.py +201 -0
- kafka/benchmarks/producer_performance.py +161 -0
- kafka/benchmarks/profile_protocol.py +138 -0
- kafka/benchmarks/protocol_old_vs_new.py +447 -0
- kafka/benchmarks/record_batch_compose.py +77 -0
- kafka/benchmarks/record_batch_read.py +82 -0
- kafka/benchmarks/varint_speed.py +426 -0
- kafka/cli/__init__.py +36 -0
- kafka/cli/admin/__init__.py +117 -0
- kafka/cli/admin/acls/__init__.py +9 -0
- kafka/cli/admin/acls/common.py +76 -0
- kafka/cli/admin/acls/create.py +19 -0
- kafka/cli/admin/acls/delete.py +23 -0
- kafka/cli/admin/acls/describe.py +16 -0
- kafka/cli/admin/cluster/__init__.py +14 -0
- kafka/cli/admin/cluster/describe.py +11 -0
- kafka/cli/admin/cluster/describe_quorum.py +11 -0
- kafka/cli/admin/cluster/features.py +52 -0
- kafka/cli/admin/cluster/log_dirs.py +43 -0
- kafka/cli/admin/cluster/versions.py +33 -0
- kafka/cli/admin/configs/__init__.py +10 -0
- kafka/cli/admin/configs/alter.py +43 -0
- kafka/cli/admin/configs/common.py +17 -0
- kafka/cli/admin/configs/describe.py +30 -0
- kafka/cli/admin/configs/list.py +16 -0
- kafka/cli/admin/configs/reset.py +20 -0
- kafka/cli/admin/groups/__init__.py +16 -0
- kafka/cli/admin/groups/alter_offsets.py +30 -0
- kafka/cli/admin/groups/delete.py +11 -0
- kafka/cli/admin/groups/delete_offsets.py +29 -0
- kafka/cli/admin/groups/describe.py +11 -0
- kafka/cli/admin/groups/list.py +28 -0
- kafka/cli/admin/groups/list_offsets.py +29 -0
- kafka/cli/admin/groups/remove_members.py +40 -0
- kafka/cli/admin/groups/reset_offsets.py +139 -0
- kafka/cli/admin/partitions/__init__.py +21 -0
- kafka/cli/admin/partitions/alter_reassignments.py +37 -0
- kafka/cli/admin/partitions/create.py +27 -0
- kafka/cli/admin/partitions/delete_records.py +31 -0
- kafka/cli/admin/partitions/describe.py +36 -0
- kafka/cli/admin/partitions/elect_leaders.py +53 -0
- kafka/cli/admin/partitions/list_offsets.py +88 -0
- kafka/cli/admin/partitions/list_reassignments.py +35 -0
- kafka/cli/admin/topics/__init__.py +10 -0
- kafka/cli/admin/topics/create.py +13 -0
- kafka/cli/admin/topics/delete.py +19 -0
- kafka/cli/admin/topics/describe.py +18 -0
- kafka/cli/admin/topics/list.py +11 -0
- kafka/cli/admin/transactions/__init__.py +17 -0
- kafka/cli/admin/transactions/abort.py +38 -0
- kafka/cli/admin/transactions/describe.py +24 -0
- kafka/cli/admin/transactions/describe_producers.py +29 -0
- kafka/cli/admin/transactions/find_hanging.py +26 -0
- kafka/cli/admin/transactions/list.py +37 -0
- kafka/cli/admin/users/__init__.py +8 -0
- kafka/cli/admin/users/alter_user_scram_credentials.py +34 -0
- kafka/cli/admin/users/describe_user_scram_credentials.py +15 -0
- kafka/cli/common.py +95 -0
- kafka/cli/consumer/__init__.py +63 -0
- kafka/cli/producer/__init__.py +57 -0
- kafka/cluster.py +824 -0
- kafka/codec.py +325 -0
- kafka/consumer/__init__.py +5 -0
- kafka/consumer/__main__.py +5 -0
- kafka/consumer/fetcher.py +2012 -0
- kafka/consumer/group.py +1347 -0
- kafka/consumer/subscription_state.py +897 -0
- kafka/coordinator/__init__.py +0 -0
- kafka/coordinator/assignors/__init__.py +0 -0
- kafka/coordinator/assignors/abstract.py +90 -0
- kafka/coordinator/assignors/cooperative_sticky.py +167 -0
- kafka/coordinator/assignors/range.py +81 -0
- kafka/coordinator/assignors/roundrobin.py +101 -0
- kafka/coordinator/assignors/sticky/StickyAssignorUserData.json +37 -0
- kafka/coordinator/assignors/sticky/__init__.py +0 -0
- kafka/coordinator/assignors/sticky/partition_movements.py +149 -0
- kafka/coordinator/assignors/sticky/sorted_set.py +63 -0
- kafka/coordinator/assignors/sticky/sticky_assignor.py +665 -0
- kafka/coordinator/assignors/sticky/user_data.py +8 -0
- kafka/coordinator/base.py +1215 -0
- kafka/coordinator/consumer.py +1224 -0
- kafka/coordinator/heartbeat.py +82 -0
- kafka/coordinator/subscription.py +34 -0
- kafka/errors.py +1004 -0
- kafka/future.py +166 -0
- kafka/metrics/__init__.py +13 -0
- kafka/metrics/compound_stat.py +33 -0
- kafka/metrics/dict_reporter.py +81 -0
- kafka/metrics/kafka_metric.py +36 -0
- kafka/metrics/measurable.py +27 -0
- kafka/metrics/measurable_stat.py +13 -0
- kafka/metrics/metric_config.py +33 -0
- kafka/metrics/metric_name.py +105 -0
- kafka/metrics/metrics.py +261 -0
- kafka/metrics/metrics_reporter.py +53 -0
- kafka/metrics/quota.py +41 -0
- kafka/metrics/stat.py +19 -0
- kafka/metrics/stats/__init__.py +15 -0
- kafka/metrics/stats/avg.py +24 -0
- kafka/metrics/stats/count.py +17 -0
- kafka/metrics/stats/histogram.py +99 -0
- kafka/metrics/stats/max_stat.py +17 -0
- kafka/metrics/stats/min_stat.py +19 -0
- kafka/metrics/stats/percentile.py +14 -0
- kafka/metrics/stats/percentiles.py +75 -0
- kafka/metrics/stats/rate.py +118 -0
- kafka/metrics/stats/sampled_stat.py +99 -0
- kafka/metrics/stats/sensor.py +136 -0
- kafka/metrics/stats/total.py +15 -0
- kafka/net/__init__.py +19 -0
- kafka/net/compat.py +165 -0
- kafka/net/connection.py +593 -0
- kafka/net/http_connect.py +144 -0
- kafka/net/inet.py +122 -0
- kafka/net/manager.py +451 -0
- kafka/net/metrics.py +149 -0
- kafka/net/sasl/__init__.py +32 -0
- kafka/net/sasl/abc.py +28 -0
- kafka/net/sasl/gssapi.py +95 -0
- kafka/net/sasl/msk.py +245 -0
- kafka/net/sasl/oauth.py +98 -0
- kafka/net/sasl/plain.py +42 -0
- kafka/net/sasl/scram.py +135 -0
- kafka/net/sasl/sspi.py +111 -0
- kafka/net/selector.py +644 -0
- kafka/net/socks5.py +262 -0
- kafka/net/transport.py +415 -0
- kafka/net/wakeup_notifier.py +72 -0
- kafka/partitioner/__init__.py +8 -0
- kafka/partitioner/abc.py +8 -0
- kafka/partitioner/default.py +89 -0
- kafka/partitioner/sticky.py +109 -0
- kafka/producer/__init__.py +5 -0
- kafka/producer/__main__.py +5 -0
- kafka/producer/future.py +101 -0
- kafka/producer/kafka.py +1123 -0
- kafka/producer/producer_batch.py +192 -0
- kafka/producer/record_accumulator.py +647 -0
- kafka/producer/sender.py +884 -0
- kafka/producer/transaction_manager.py +1326 -0
- kafka/protocol/__init__.py +0 -0
- kafka/protocol/admin/__init__.py +29 -0
- kafka/protocol/admin/acl.py +83 -0
- kafka/protocol/admin/acl.pyi +375 -0
- kafka/protocol/admin/client_quotas.py +14 -0
- kafka/protocol/admin/client_quotas.pyi +265 -0
- kafka/protocol/admin/cluster.py +31 -0
- kafka/protocol/admin/cluster.pyi +620 -0
- kafka/protocol/admin/configs.py +22 -0
- kafka/protocol/admin/configs.pyi +437 -0
- kafka/protocol/admin/groups.py +24 -0
- kafka/protocol/admin/groups.pyi +261 -0
- kafka/protocol/admin/topics.py +53 -0
- kafka/protocol/admin/topics.pyi +982 -0
- kafka/protocol/admin/transactions.py +18 -0
- kafka/protocol/admin/transactions.pyi +311 -0
- kafka/protocol/admin/users.py +14 -0
- kafka/protocol/admin/users.pyi +223 -0
- kafka/protocol/api_data.py +125 -0
- kafka/protocol/api_header.py +55 -0
- kafka/protocol/api_key.py +97 -0
- kafka/protocol/api_message.py +277 -0
- kafka/protocol/broker_version_data.py +246 -0
- kafka/protocol/consumer/__init__.py +13 -0
- kafka/protocol/consumer/fetch.py +16 -0
- kafka/protocol/consumer/fetch.pyi +298 -0
- kafka/protocol/consumer/group.py +38 -0
- kafka/protocol/consumer/group.pyi +824 -0
- kafka/protocol/consumer/metadata.py +30 -0
- kafka/protocol/consumer/metadata.pyi +89 -0
- kafka/protocol/consumer/offsets.py +75 -0
- kafka/protocol/consumer/offsets.pyi +288 -0
- kafka/protocol/data_container.py +166 -0
- kafka/protocol/frame.py +30 -0
- kafka/protocol/generate_stubs.py +468 -0
- kafka/protocol/metadata/__init__.py +10 -0
- kafka/protocol/metadata/api_versions.py +41 -0
- kafka/protocol/metadata/api_versions.pyi +128 -0
- kafka/protocol/metadata/find_coordinator.py +19 -0
- kafka/protocol/metadata/find_coordinator.pyi +105 -0
- kafka/protocol/metadata/metadata.py +34 -0
- kafka/protocol/metadata/metadata.pyi +160 -0
- kafka/protocol/old/__init__.py +0 -0
- kafka/protocol/old/abstract.py +17 -0
- kafka/protocol/old/add_offsets_to_txn.py +54 -0
- kafka/protocol/old/add_partitions_to_txn.py +71 -0
- kafka/protocol/old/admin.py +1086 -0
- kafka/protocol/old/api.py +205 -0
- kafka/protocol/old/api_versions.py +133 -0
- kafka/protocol/old/commit.py +355 -0
- kafka/protocol/old/consumer_protocol.py +36 -0
- kafka/protocol/old/end_txn.py +53 -0
- kafka/protocol/old/fetch.py +408 -0
- kafka/protocol/old/find_coordinator.py +72 -0
- kafka/protocol/old/group.py +451 -0
- kafka/protocol/old/init_producer_id.py +42 -0
- kafka/protocol/old/list_offsets.py +186 -0
- kafka/protocol/old/metadata.py +290 -0
- kafka/protocol/old/offset_for_leader_epoch.py +133 -0
- kafka/protocol/old/produce.py +247 -0
- kafka/protocol/old/sasl_authenticate.py +38 -0
- kafka/protocol/old/sasl_handshake.py +39 -0
- kafka/protocol/old/struct.py +87 -0
- kafka/protocol/old/txn_offset_commit.py +73 -0
- kafka/protocol/old/types.py +440 -0
- kafka/protocol/parser.py +191 -0
- kafka/protocol/producer/__init__.py +7 -0
- kafka/protocol/producer/produce.py +17 -0
- kafka/protocol/producer/produce.pyi +197 -0
- kafka/protocol/producer/transaction.py +30 -0
- kafka/protocol/producer/transaction.pyi +663 -0
- kafka/protocol/sasl.py +52 -0
- kafka/protocol/sasl.pyi +126 -0
- kafka/protocol/schemas/__init__.py +7 -0
- kafka/protocol/schemas/fields/__init__.py +7 -0
- kafka/protocol/schemas/fields/array.py +127 -0
- kafka/protocol/schemas/fields/base.py +156 -0
- kafka/protocol/schemas/fields/codecs/__init__.py +12 -0
- kafka/protocol/schemas/fields/codecs/encode_buffer.py +82 -0
- kafka/protocol/schemas/fields/codecs/tagged_fields.py +109 -0
- kafka/protocol/schemas/fields/codecs/types.py +505 -0
- kafka/protocol/schemas/fields/codegen.py +40 -0
- kafka/protocol/schemas/fields/simple.py +127 -0
- kafka/protocol/schemas/fields/struct.py +357 -0
- kafka/protocol/schemas/fields/struct_array.py +142 -0
- kafka/protocol/schemas/load_json.py +42 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnRequest.json +40 -0
- kafka/protocol/schemas/resources/AddOffsetsToTxnResponse.json +35 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnRequest.json +65 -0
- kafka/protocol/schemas/resources/AddPartitionsToTxnResponse.json +60 -0
- kafka/protocol/schemas/resources/AlterClientQuotasRequest.json +47 -0
- kafka/protocol/schemas/resources/AlterClientQuotasResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterConfigsRequest.json +43 -0
- kafka/protocol/schemas/resources/AlterConfigsResponse.json +39 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsRequest.json +42 -0
- kafka/protocol/schemas/resources/AlterPartitionReassignmentsResponse.json +47 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsRequest.json +41 -0
- kafka/protocol/schemas/resources/AlterReplicaLogDirsResponse.json +41 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsRequest.json +45 -0
- kafka/protocol/schemas/resources/AlterUserScramCredentialsResponse.json +35 -0
- kafka/protocol/schemas/resources/ApiVersionsRequest.json +34 -0
- kafka/protocol/schemas/resources/ApiVersionsResponse.json +79 -0
- kafka/protocol/schemas/resources/ConsumerProtocolAssignment.json +42 -0
- kafka/protocol/schemas/resources/ConsumerProtocolSubscription.json +49 -0
- kafka/protocol/schemas/resources/CreateAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/CreateAclsResponse.json +37 -0
- kafka/protocol/schemas/resources/CreatePartitionsRequest.json +47 -0
- kafka/protocol/schemas/resources/CreatePartitionsResponse.json +41 -0
- kafka/protocol/schemas/resources/CreateTopicsRequest.json +65 -0
- kafka/protocol/schemas/resources/CreateTopicsResponse.json +72 -0
- kafka/protocol/schemas/resources/DeleteAclsRequest.json +46 -0
- kafka/protocol/schemas/resources/DeleteAclsResponse.json +59 -0
- kafka/protocol/schemas/resources/DeleteGroupsRequest.json +30 -0
- kafka/protocol/schemas/resources/DeleteGroupsResponse.json +36 -0
- kafka/protocol/schemas/resources/DeleteRecordsRequest.json +42 -0
- kafka/protocol/schemas/resources/DeleteRecordsResponse.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsRequest.json +43 -0
- kafka/protocol/schemas/resources/DeleteTopicsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeAclsRequest.json +43 -0
- kafka/protocol/schemas/resources/DescribeAclsResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasRequest.json +37 -0
- kafka/protocol/schemas/resources/DescribeClientQuotasResponse.json +47 -0
- kafka/protocol/schemas/resources/DescribeClusterRequest.json +35 -0
- kafka/protocol/schemas/resources/DescribeClusterResponse.json +56 -0
- kafka/protocol/schemas/resources/DescribeConfigsRequest.json +42 -0
- kafka/protocol/schemas/resources/DescribeConfigsResponse.json +69 -0
- kafka/protocol/schemas/resources/DescribeGroupsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeGroupsResponse.json +74 -0
- kafka/protocol/schemas/resources/DescribeLogDirsRequest.json +38 -0
- kafka/protocol/schemas/resources/DescribeLogDirsResponse.json +65 -0
- kafka/protocol/schemas/resources/DescribeProducersRequest.json +32 -0
- kafka/protocol/schemas/resources/DescribeProducersResponse.json +55 -0
- kafka/protocol/schemas/resources/DescribeQuorumRequest.json +39 -0
- kafka/protocol/schemas/resources/DescribeQuorumResponse.json +82 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsRequest.json +40 -0
- kafka/protocol/schemas/resources/DescribeTopicPartitionsResponse.json +66 -0
- kafka/protocol/schemas/resources/DescribeTransactionsRequest.json +27 -0
- kafka/protocol/schemas/resources/DescribeTransactionsResponse.json +52 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsRequest.json +30 -0
- kafka/protocol/schemas/resources/DescribeUserScramCredentialsResponse.json +45 -0
- kafka/protocol/schemas/resources/ElectLeadersRequest.json +41 -0
- kafka/protocol/schemas/resources/ElectLeadersResponse.json +45 -0
- kafka/protocol/schemas/resources/EndTxnRequest.json +43 -0
- kafka/protocol/schemas/resources/EndTxnResponse.json +41 -0
- kafka/protocol/schemas/resources/FetchRequest.json +125 -0
- kafka/protocol/schemas/resources/FetchResponse.json +124 -0
- kafka/protocol/schemas/resources/FindCoordinatorRequest.json +43 -0
- kafka/protocol/schemas/resources/FindCoordinatorResponse.json +58 -0
- kafka/protocol/schemas/resources/HeartbeatRequest.json +39 -0
- kafka/protocol/schemas/resources/HeartbeatResponse.json +35 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsRequest.json +44 -0
- kafka/protocol/schemas/resources/IncrementalAlterConfigsResponse.json +38 -0
- kafka/protocol/schemas/resources/InitProducerIdRequest.json +50 -0
- kafka/protocol/schemas/resources/InitProducerIdResponse.json +47 -0
- kafka/protocol/schemas/resources/JoinGroupRequest.json +63 -0
- kafka/protocol/schemas/resources/JoinGroupResponse.json +69 -0
- kafka/protocol/schemas/resources/LeaveGroupRequest.json +47 -0
- kafka/protocol/schemas/resources/LeaveGroupResponse.json +47 -0
- kafka/protocol/schemas/resources/ListConfigResourcesRequest.json +31 -0
- kafka/protocol/schemas/resources/ListConfigResourcesResponse.json +37 -0
- kafka/protocol/schemas/resources/ListGroupsRequest.json +36 -0
- kafka/protocol/schemas/resources/ListGroupsResponse.json +49 -0
- kafka/protocol/schemas/resources/ListOffsetsRequest.json +72 -0
- kafka/protocol/schemas/resources/ListOffsetsResponse.json +71 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsRequest.json +34 -0
- kafka/protocol/schemas/resources/ListPartitionReassignmentsResponse.json +46 -0
- kafka/protocol/schemas/resources/ListTransactionsRequest.json +40 -0
- kafka/protocol/schemas/resources/ListTransactionsResponse.json +42 -0
- kafka/protocol/schemas/resources/MetadataRequest.json +56 -0
- kafka/protocol/schemas/resources/MetadataResponse.json +101 -0
- kafka/protocol/schemas/resources/OffsetCommitRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetCommitResponse.json +71 -0
- kafka/protocol/schemas/resources/OffsetDeleteRequest.json +39 -0
- kafka/protocol/schemas/resources/OffsetDeleteResponse.json +42 -0
- kafka/protocol/schemas/resources/OffsetFetchRequest.json +76 -0
- kafka/protocol/schemas/resources/OffsetFetchResponse.json +107 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochRequest.json +52 -0
- kafka/protocol/schemas/resources/OffsetForLeaderEpochResponse.json +51 -0
- kafka/protocol/schemas/resources/ProduceRequest.json +73 -0
- kafka/protocol/schemas/resources/ProduceResponse.json +96 -0
- kafka/protocol/schemas/resources/RequestHeader.json +44 -0
- kafka/protocol/schemas/resources/ResponseHeader.json +26 -0
- kafka/protocol/schemas/resources/SaslAuthenticateRequest.json +29 -0
- kafka/protocol/schemas/resources/SaslAuthenticateResponse.json +34 -0
- kafka/protocol/schemas/resources/SaslHandshakeRequest.json +31 -0
- kafka/protocol/schemas/resources/SaslHandshakeResponse.json +32 -0
- kafka/protocol/schemas/resources/SyncGroupRequest.json +56 -0
- kafka/protocol/schemas/resources/SyncGroupResponse.json +46 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitRequest.json +68 -0
- kafka/protocol/schemas/resources/TxnOffsetCommitResponse.json +47 -0
- kafka/protocol/schemas/resources/UpdateFeaturesRequest.json +43 -0
- kafka/protocol/schemas/resources/UpdateFeaturesResponse.json +39 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersRequest.json +49 -0
- kafka/protocol/schemas/resources/WriteTxnMarkersResponse.json +45 -0
- kafka/protocol/schemas/resources/__init__.py +0 -0
- kafka/record/__init__.py +3 -0
- kafka/record/_crc32c.py +161 -0
- kafka/record/abc.py +144 -0
- kafka/record/default_records.py +782 -0
- kafka/record/legacy_records.py +587 -0
- kafka/record/memory_records.py +255 -0
- kafka/record/util.py +135 -0
- kafka/serializer/__init__.py +4 -0
- kafka/serializer/abstract.py +20 -0
- kafka/serializer/default.py +16 -0
- kafka/serializer/json.py +17 -0
- kafka/serializer/wrapper.py +21 -0
- kafka/structs.py +69 -0
- kafka/util.py +159 -0
- kafka/vendor/__init__.py +0 -0
- kafka/version.py +1 -0
- kafka_python-3.0.0.dist-info/METADATA +319 -0
- kafka_python-3.0.0.dist-info/RECORD +373 -0
- kafka_python-3.0.0.dist-info/WHEEL +5 -0
- kafka_python-3.0.0.dist-info/entry_points.txt +2 -0
- kafka_python-3.0.0.dist-info/licenses/LICENSE +202 -0
- kafka_python-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1215 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import copy
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
from kafka.coordinator.heartbeat import Heartbeat
|
|
9
|
+
from kafka import errors as Errors
|
|
10
|
+
from kafka.future import Future
|
|
11
|
+
from kafka.metrics import AnonMeasurable
|
|
12
|
+
from kafka.metrics.stats import Avg, Count, Max, Rate
|
|
13
|
+
from kafka.net.wakeup_notifier import WakeupNotifier
|
|
14
|
+
from kafka.protocol.metadata import FindCoordinatorRequest, CoordinatorType
|
|
15
|
+
from kafka.protocol.consumer import (
|
|
16
|
+
HeartbeatRequest, JoinGroupRequest, LeaveGroupRequest, SyncGroupRequest,
|
|
17
|
+
DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID,
|
|
18
|
+
)
|
|
19
|
+
from kafka.structs import ConsumerGroupMetadata
|
|
20
|
+
from kafka.util import Timer
|
|
21
|
+
|
|
22
|
+
log = logging.getLogger('kafka.coordinator')
|
|
23
|
+
heartbeat_log = logging.getLogger('kafka.coordinator.heartbeat')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MemberState:
|
|
27
|
+
UNJOINED = '<unjoined>' # the client is not part of a group
|
|
28
|
+
REBALANCING = '<rebalancing>' # the client has begun rebalancing
|
|
29
|
+
STABLE = '<stable>' # the client has joined and is sending heartbeats
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Generation:
|
|
33
|
+
def __init__(self, generation_id, member_id, protocol):
|
|
34
|
+
self.generation_id = generation_id
|
|
35
|
+
self.member_id = member_id
|
|
36
|
+
self.protocol = protocol
|
|
37
|
+
|
|
38
|
+
def has_member_id(self):
|
|
39
|
+
"""
|
|
40
|
+
True if this generation has a valid member id, False otherwise.
|
|
41
|
+
A member might have an id before it becomes part of a group generation.
|
|
42
|
+
"""
|
|
43
|
+
return self.member_id != UNKNOWN_MEMBER_ID
|
|
44
|
+
|
|
45
|
+
def is_lost(self):
|
|
46
|
+
"""True if this generation is effectively the no-generation
|
|
47
|
+
sentinel - either the generation_id has been cleared
|
|
48
|
+
(DEFAULT_GENERATION_ID) or the member_id has been cleared
|
|
49
|
+
(UNKNOWN_MEMBER_ID). Mirrors Java's NO_GENERATION-or-empty-memberId
|
|
50
|
+
check in ConsumerCoordinator.onJoinPrepare; used to fire
|
|
51
|
+
on_partitions_lost (KIP-429) instead of on_partitions_revoked
|
|
52
|
+
when the broker has forcibly removed us from the group.
|
|
53
|
+
"""
|
|
54
|
+
return (self.generation_id == DEFAULT_GENERATION_ID
|
|
55
|
+
or not self.has_member_id())
|
|
56
|
+
|
|
57
|
+
def __eq__(self, other):
|
|
58
|
+
return (self.generation_id == other.generation_id and
|
|
59
|
+
self.member_id == other.member_id and
|
|
60
|
+
self.protocol == other.protocol)
|
|
61
|
+
|
|
62
|
+
def __str__(self):
|
|
63
|
+
return "<Generation %s (member_id: %s, protocol: %s)>" % (self.generation_id, self.member_id, self.protocol)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
Generation.NO_GENERATION = Generation(DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID, None)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class UnjoinedGroupException(Errors.RetriableError):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class BaseCoordinator(ABC):
|
|
74
|
+
"""
|
|
75
|
+
BaseCoordinator implements group management for a single group member
|
|
76
|
+
by interacting with a designated Kafka broker (the coordinator). Group
|
|
77
|
+
semantics are provided by extending this class. See ConsumerCoordinator
|
|
78
|
+
for example usage.
|
|
79
|
+
|
|
80
|
+
From a high level, Kafka's group management protocol consists of the
|
|
81
|
+
following sequence of actions:
|
|
82
|
+
|
|
83
|
+
1. Group Registration: Group members register with the coordinator providing
|
|
84
|
+
their own metadata (such as the set of topics they are interested in).
|
|
85
|
+
|
|
86
|
+
2. Group/Leader Selection: The coordinator select the members of the group
|
|
87
|
+
and chooses one member as the leader.
|
|
88
|
+
|
|
89
|
+
3. State Assignment: The leader collects the metadata from all the members
|
|
90
|
+
of the group and assigns state.
|
|
91
|
+
|
|
92
|
+
4. Group Stabilization: Each member receives the state assigned by the
|
|
93
|
+
leader and begins processing.
|
|
94
|
+
|
|
95
|
+
To leverage this protocol, an implementation must define the format of
|
|
96
|
+
metadata provided by each member for group registration in
|
|
97
|
+
:meth:`.group_protocols` and the format of the state assignment provided by
|
|
98
|
+
the leader in :meth:`._perform_assignment` and which becomes available to
|
|
99
|
+
members in :meth:`._on_join_complete`.
|
|
100
|
+
|
|
101
|
+
Note on locking: this class shares state between the caller and a background
|
|
102
|
+
thread which is used for sending heartbeats after the client has joined the
|
|
103
|
+
group. All mutable state as well as state transitions are protected with the
|
|
104
|
+
class's monitor. Generally this means acquiring the lock before reading or
|
|
105
|
+
writing the state of the group (e.g. generation, member_id) and holding the
|
|
106
|
+
lock when sending a request that affects the state of the group
|
|
107
|
+
(e.g. JoinGroup, LeaveGroup).
|
|
108
|
+
"""
|
|
109
|
+
DEFAULT_CONFIG = {
|
|
110
|
+
'group_id': 'kafka-python-default-group',
|
|
111
|
+
'group_instance_id': None,
|
|
112
|
+
'session_timeout_ms': 45000,
|
|
113
|
+
'heartbeat_interval_ms': 3000,
|
|
114
|
+
'max_poll_interval_ms': 300000,
|
|
115
|
+
'request_timeout_ms': 30000,
|
|
116
|
+
'retry_backoff_ms': 100,
|
|
117
|
+
'api_version': (0, 10, 1),
|
|
118
|
+
'metrics': None,
|
|
119
|
+
'metric_group_prefix': '',
|
|
120
|
+
}
|
|
121
|
+
DEFAULT_SESSION_TIMEOUT_MS_PRE_KIP_735 = 30000
|
|
122
|
+
|
|
123
|
+
def __init__(self, client, **configs):
|
|
124
|
+
"""
|
|
125
|
+
Keyword Arguments:
|
|
126
|
+
group_id (str): name of the consumer group to join for dynamic
|
|
127
|
+
partition assignment (if enabled), and to use for fetching and
|
|
128
|
+
committing offsets. Default: 'kafka-python-default-group'
|
|
129
|
+
group_instance_id (str): A unique identifier of the consumer instance
|
|
130
|
+
provided by end user. Only non-empty strings are permitted. If set,
|
|
131
|
+
the consumer is treated as a static member, which means that only
|
|
132
|
+
one instance with this ID is allowed in the consumer group at any
|
|
133
|
+
time. This can be used in combination with a larger session timeout
|
|
134
|
+
to avoid group rebalances caused by transient unavailability (e.g.
|
|
135
|
+
process restarts). If not set, the consumer will join the group as
|
|
136
|
+
a dynamic member, which is the traditional behavior. Default: None
|
|
137
|
+
session_timeout_ms (int): The timeout used to detect failures when
|
|
138
|
+
using Kafka's group management facilities. The consumer sends
|
|
139
|
+
periodic heartbeats to indicate its liveness to the broker. If
|
|
140
|
+
no heartbeats are received by the broker before the expiration of
|
|
141
|
+
this session timeout, then the broker will remove this consumer
|
|
142
|
+
from the group and initiate a rebalance. Note that the value must
|
|
143
|
+
be in the allowable range as configured in the broker configuration
|
|
144
|
+
by group.min.session.timeout.ms and group.max.session.timeout.ms.
|
|
145
|
+
Default: 45000 for brokers 3.0+, otherwise 30000.
|
|
146
|
+
heartbeat_interval_ms (int): The expected time in milliseconds
|
|
147
|
+
between heartbeats to the consumer coordinator when using
|
|
148
|
+
Kafka's group management feature. Heartbeats are used to ensure
|
|
149
|
+
that the consumer's session stays active and to facilitate
|
|
150
|
+
rebalancing when new consumers join or leave the group. The
|
|
151
|
+
value must be set lower than session_timeout_ms, but typically
|
|
152
|
+
should be set no higher than 1/3 of that value. It can be
|
|
153
|
+
adjusted even lower to control the expected time for normal
|
|
154
|
+
rebalances. Default: 3000
|
|
155
|
+
retry_backoff_ms (int): Milliseconds to backoff when retrying on
|
|
156
|
+
errors. Default: 100.
|
|
157
|
+
"""
|
|
158
|
+
self.config = copy.copy(self.DEFAULT_CONFIG)
|
|
159
|
+
for key in self.config:
|
|
160
|
+
if key in configs:
|
|
161
|
+
self.config[key] = configs[key]
|
|
162
|
+
|
|
163
|
+
# Coordinator configurations are different for older brokers
|
|
164
|
+
# max_poll_interval_ms is not supported directly -- it must the be
|
|
165
|
+
# the same as session_timeout_ms. If the user provides one of them,
|
|
166
|
+
# use it for both.
|
|
167
|
+
user_supplied_session_timeout = 'session_timeout_ms' in configs
|
|
168
|
+
user_supplied_max_poll_interval = 'max_poll_interval_ms' in configs
|
|
169
|
+
|
|
170
|
+
if not user_supplied_session_timeout:
|
|
171
|
+
if self.config['api_version'] < (0, 10, 1) and user_supplied_max_poll_interval:
|
|
172
|
+
self.config['session_timeout_ms'] = self.config['max_poll_interval_ms']
|
|
173
|
+
|
|
174
|
+
elif self.config['api_version'] < (3, 0):
|
|
175
|
+
# Prior to 3.0 the broker-side default max session timeout was 30000
|
|
176
|
+
self.config['session_timeout_ms'] = self.DEFAULT_SESSION_TIMEOUT_MS_PRE_KIP_735
|
|
177
|
+
|
|
178
|
+
if not user_supplied_max_poll_interval:
|
|
179
|
+
if self.config['api_version'] < (0, 10, 1):
|
|
180
|
+
self.config['max_poll_interval_ms'] = self.config['session_timeout_ms']
|
|
181
|
+
|
|
182
|
+
if self.config['group_instance_id'] is not None:
|
|
183
|
+
if self.config['group_id'] is None:
|
|
184
|
+
raise Errors.KafkaConfigurationError("group_instance_id requires group_id")
|
|
185
|
+
|
|
186
|
+
if self.config['api_version'] < (0, 10, 1):
|
|
187
|
+
if self.config['max_poll_interval_ms'] != self.config['session_timeout_ms']:
|
|
188
|
+
raise Errors.KafkaConfigurationError("Broker version %s does not support "
|
|
189
|
+
"different values for max_poll_interval_ms "
|
|
190
|
+
"and session_timeout_ms")
|
|
191
|
+
|
|
192
|
+
self._client = client
|
|
193
|
+
self._manager = client._manager
|
|
194
|
+
self._cluster = self._manager.cluster
|
|
195
|
+
self._net = self._manager._net
|
|
196
|
+
self.heartbeat = Heartbeat(**self.config)
|
|
197
|
+
self._heartbeat_wakeup = WakeupNotifier(self._net)
|
|
198
|
+
self._heartbeat_loop_future = None
|
|
199
|
+
self._heartbeat_enabled = False
|
|
200
|
+
self._heartbeat_closed = False
|
|
201
|
+
self._lock = threading.RLock()
|
|
202
|
+
self.rejoin_needed = True
|
|
203
|
+
self.rejoining = False # renamed / complement of java needsJoinPrepare
|
|
204
|
+
self.state = MemberState.UNJOINED
|
|
205
|
+
self.coordinator_id = None
|
|
206
|
+
self._find_coordinator_future = None
|
|
207
|
+
# In-flight JoinGroup -> SyncGroup task cached across poll re-entries.
|
|
208
|
+
# consumer.poll(timeout_ms=N) may give up while a JoinGroup is still
|
|
209
|
+
# pending on the broker (e.g. broker waiting for other members to
|
|
210
|
+
# rejoin); the next poll re-awaits this task instead of sending a
|
|
211
|
+
# duplicate JoinGroup. Cleared on success or non-retriable failure.
|
|
212
|
+
self._join_task = None
|
|
213
|
+
self._generation = Generation.NO_GENERATION
|
|
214
|
+
if self.config['metrics']:
|
|
215
|
+
self._sensors = GroupCoordinatorMetrics(self.heartbeat, self.config['metrics'],
|
|
216
|
+
self.config['metric_group_prefix'])
|
|
217
|
+
else:
|
|
218
|
+
self._sensors = None
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def group_id(self):
|
|
222
|
+
return self.config['group_id']
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def group_instance_id(self):
|
|
226
|
+
return self.config['group_instance_id']
|
|
227
|
+
|
|
228
|
+
@abstractmethod
|
|
229
|
+
def protocol_type(self):
|
|
230
|
+
"""
|
|
231
|
+
Unique identifier for the class of supported protocols
|
|
232
|
+
(e.g. "consumer" or "connect").
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
str: protocol type name
|
|
236
|
+
"""
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
@abstractmethod
|
|
240
|
+
def group_protocols(self):
|
|
241
|
+
"""Return the list of supported group protocols and metadata.
|
|
242
|
+
|
|
243
|
+
This list is submitted by each group member via a JoinGroupRequest.
|
|
244
|
+
The order of the protocols in the list indicates the preference of the
|
|
245
|
+
protocol (the first entry is the most preferred). The coordinator takes
|
|
246
|
+
this preference into account when selecting the generation protocol
|
|
247
|
+
(generally more preferred protocols will be selected as long as all
|
|
248
|
+
members support them and there is no disagreement on the preference).
|
|
249
|
+
|
|
250
|
+
Note: metadata must be type bytes or support an encode() method
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
list: [(protocol, metadata), ...]
|
|
254
|
+
"""
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
async def _on_join_prepare_async(self, generation, member_id, timeout_ms=None):
|
|
258
|
+
"""Invoked prior to each group join or rejoin.
|
|
259
|
+
|
|
260
|
+
Subclasses (e.g. :class:`ConsumerCoordinator`) override with auto-commit
|
|
261
|
+
+ rebalance-listener invocation. Called from the join coroutine on the
|
|
262
|
+
event loop, so blocking work in subclass overrides will block the loop
|
|
263
|
+
-- including heartbeats. Async rebalance listeners are awaited; sync
|
|
264
|
+
listeners run inline.
|
|
265
|
+
|
|
266
|
+
Arguments:
|
|
267
|
+
generation (int): The previous generation or -1 if there was none
|
|
268
|
+
member_id (str): The identifier of this member in the previous group
|
|
269
|
+
or '' if there was none
|
|
270
|
+
"""
|
|
271
|
+
pass
|
|
272
|
+
|
|
273
|
+
@abstractmethod
|
|
274
|
+
def _perform_assignment(self, leader_id, protocol, members):
|
|
275
|
+
"""Perform assignment for the group.
|
|
276
|
+
|
|
277
|
+
This is used by the leader to push state to all the members of the group
|
|
278
|
+
(e.g. to push partition assignments in the case of the new consumer)
|
|
279
|
+
|
|
280
|
+
Arguments:
|
|
281
|
+
leader_id (str): The id of the leader (which is this member)
|
|
282
|
+
protocol (str): the chosen group protocol (assignment strategy)
|
|
283
|
+
members (list): [JoinGroupResponseMember] from JoinGroupResponse.
|
|
284
|
+
metadata is associated with the chosen group protocol,
|
|
285
|
+
and the Coordinator subclass is responsible for decoding
|
|
286
|
+
metadata based on that protocol.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
dict: {member_id: assignment}; assignment must either be bytes
|
|
290
|
+
or have an encode() method to convert to bytes
|
|
291
|
+
"""
|
|
292
|
+
pass
|
|
293
|
+
|
|
294
|
+
async def _on_join_complete_async(self, generation, member_id, protocol,
|
|
295
|
+
member_assignment_bytes):
|
|
296
|
+
"""Invoked when a group member has successfully joined a group.
|
|
297
|
+
|
|
298
|
+
Subclasses override to apply the assignment and run user listeners.
|
|
299
|
+
|
|
300
|
+
Arguments:
|
|
301
|
+
generation (int): the generation that was joined
|
|
302
|
+
member_id (str): the identifier for the local member in the group
|
|
303
|
+
protocol (str): the protocol selected by the coordinator
|
|
304
|
+
member_assignment_bytes (bytes): the protocol-encoded assignment
|
|
305
|
+
propagated from the group leader. The Coordinator instance is
|
|
306
|
+
responsible for decoding based on the chosen protocol.
|
|
307
|
+
"""
|
|
308
|
+
pass
|
|
309
|
+
|
|
310
|
+
def coordinator_unknown(self):
|
|
311
|
+
"""Check if we know who the coordinator is and have an active connection
|
|
312
|
+
|
|
313
|
+
Side-effect: reset coordinator_id to None if connection failed
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
bool: True if the coordinator is unknown
|
|
317
|
+
"""
|
|
318
|
+
return self.coordinator() is None
|
|
319
|
+
|
|
320
|
+
def coordinator(self):
|
|
321
|
+
"""Get the current coordinator
|
|
322
|
+
|
|
323
|
+
Returns: the current coordinator id or None if it is unknown
|
|
324
|
+
"""
|
|
325
|
+
if self.coordinator_id is None:
|
|
326
|
+
return None
|
|
327
|
+
elif self._client.is_disconnected(self.coordinator_id) and self._client.connection_delay(self.coordinator_id) > 0:
|
|
328
|
+
self.coordinator_dead('Node Disconnected')
|
|
329
|
+
return None
|
|
330
|
+
else:
|
|
331
|
+
return self.coordinator_id
|
|
332
|
+
|
|
333
|
+
def stable(self):
|
|
334
|
+
return self.state is MemberState.STABLE
|
|
335
|
+
|
|
336
|
+
def ensure_coordinator_ready(self, timeout_ms=None):
|
|
337
|
+
"""Block until the coordinator for this group is known.
|
|
338
|
+
|
|
339
|
+
Keyword Arguments:
|
|
340
|
+
timeout_ms (numeric, optional): Maximum number of milliseconds to
|
|
341
|
+
block waiting to find coordinator. Default: None.
|
|
342
|
+
|
|
343
|
+
Returns: True is coordinator found before timeout_ms, else False
|
|
344
|
+
"""
|
|
345
|
+
return self._net.run(self.ensure_coordinator_ready_async, timeout_ms)
|
|
346
|
+
|
|
347
|
+
async def ensure_coordinator_ready_async(self, timeout_ms=None):
|
|
348
|
+
"""Async variant of :meth:`ensure_coordinator_ready`.
|
|
349
|
+
|
|
350
|
+
Awaits until the coordinator for this group is known, or until the
|
|
351
|
+
timeout (if any) expires.
|
|
352
|
+
"""
|
|
353
|
+
timer = Timer(timeout_ms)
|
|
354
|
+
while self.coordinator_unknown():
|
|
355
|
+
# Prior to 0.8.2 there was no group coordinator
|
|
356
|
+
# so we will just pick a node at random and treat
|
|
357
|
+
# it as the "coordinator"
|
|
358
|
+
if self.config['api_version'] < (0, 8, 2):
|
|
359
|
+
maybe_coordinator_id = self._client.least_loaded_node()
|
|
360
|
+
if maybe_coordinator_id is None:
|
|
361
|
+
future = Future().failure(Errors.NodeNotReadyError('coordinator'))
|
|
362
|
+
else:
|
|
363
|
+
self.coordinator_id = maybe_coordinator_id
|
|
364
|
+
return not timer.expired
|
|
365
|
+
else:
|
|
366
|
+
future = self.lookup_coordinator()
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
await self._manager.wait_for(future, timer.timeout_ms)
|
|
370
|
+
except Errors.KafkaTimeoutError:
|
|
371
|
+
return False
|
|
372
|
+
except Errors.InvalidMetadataError as exc:
|
|
373
|
+
log.debug('Requesting metadata for group coordinator request: %s', exc)
|
|
374
|
+
metadata_update = self._cluster.request_update()
|
|
375
|
+
try:
|
|
376
|
+
await self._manager.wait_for(metadata_update, timer.timeout_ms)
|
|
377
|
+
except Errors.KafkaTimeoutError:
|
|
378
|
+
return False
|
|
379
|
+
except Errors.RetriableError:
|
|
380
|
+
delay_ms = self.config['retry_backoff_ms']
|
|
381
|
+
if timer.timeout_ms is not None:
|
|
382
|
+
delay = min(delay_ms, timer.timeout_ms)
|
|
383
|
+
await self._manager._net.sleep(delay_ms / 1000)
|
|
384
|
+
if timer.expired:
|
|
385
|
+
return False
|
|
386
|
+
return True
|
|
387
|
+
|
|
388
|
+
def _reset_find_coordinator_future(self, result):
|
|
389
|
+
self._find_coordinator_future = None
|
|
390
|
+
|
|
391
|
+
def lookup_coordinator(self):
|
|
392
|
+
with self._lock:
|
|
393
|
+
if self._find_coordinator_future is not None:
|
|
394
|
+
return self._find_coordinator_future
|
|
395
|
+
|
|
396
|
+
# If there is an error sending the group coordinator request
|
|
397
|
+
# then _reset_find_coordinator_future will immediately fire and
|
|
398
|
+
# set _find_coordinator_future = None
|
|
399
|
+
# To avoid returning None, we capture the future in a local variable
|
|
400
|
+
future = self._manager.call_soon(self._send_group_coordinator_request)
|
|
401
|
+
self._find_coordinator_future = future
|
|
402
|
+
self._find_coordinator_future.add_both(self._reset_find_coordinator_future)
|
|
403
|
+
return future
|
|
404
|
+
|
|
405
|
+
def need_rejoin(self):
|
|
406
|
+
"""Check whether the group should be rejoined (e.g. if metadata changes)
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
bool: True if it should, False otherwise
|
|
410
|
+
"""
|
|
411
|
+
return self.rejoin_needed
|
|
412
|
+
|
|
413
|
+
def poll_heartbeat(self):
|
|
414
|
+
"""
|
|
415
|
+
Check the status of the heartbeat coroutine and indicate the liveness
|
|
416
|
+
of the client. This must be called periodically after joining with
|
|
417
|
+
:meth:`.ensure_active_group` to ensure that the member stays in the
|
|
418
|
+
group. If an interval of time longer than the provided rebalance
|
|
419
|
+
timeout (max_poll_interval_ms) expires without calling this method,
|
|
420
|
+
then the client will proactively leave the group.
|
|
421
|
+
|
|
422
|
+
Raises: the underlying exception if the heartbeat coroutine has
|
|
423
|
+
terminated with an error. The next call to ensure_active_group will
|
|
424
|
+
respawn the loop.
|
|
425
|
+
"""
|
|
426
|
+
with self._lock:
|
|
427
|
+
fut = self._heartbeat_loop_future
|
|
428
|
+
if fut is not None and fut.is_done and fut.failed():
|
|
429
|
+
# Forget the dead future so the next ensure_active_group()
|
|
430
|
+
# respawns the heartbeat loop.
|
|
431
|
+
cause = fut.exception
|
|
432
|
+
self._heartbeat_loop_future = None
|
|
433
|
+
raise cause # pylint: disable-msg=raising-bad-type
|
|
434
|
+
self.heartbeat.poll()
|
|
435
|
+
|
|
436
|
+
def time_to_next_heartbeat(self):
|
|
437
|
+
"""Returns seconds (float) remaining before next heartbeat should be sent
|
|
438
|
+
|
|
439
|
+
Note: Returns infinite if group is not joined
|
|
440
|
+
"""
|
|
441
|
+
with self._lock:
|
|
442
|
+
# if we have not joined the group, we don't need to send heartbeats
|
|
443
|
+
if self.state is MemberState.UNJOINED:
|
|
444
|
+
return float('inf')
|
|
445
|
+
return self.heartbeat.time_to_next_heartbeat()
|
|
446
|
+
|
|
447
|
+
@property
|
|
448
|
+
def _use_group_apis(self):
|
|
449
|
+
return self.config['api_version'] >= (0, 9)
|
|
450
|
+
|
|
451
|
+
def ensure_active_group(self, timeout_ms=None):
|
|
452
|
+
"""Ensure that the group is active (i.e. joined and synced).
|
|
453
|
+
|
|
454
|
+
Sync facade over :meth:`ensure_active_group_async`.
|
|
455
|
+
|
|
456
|
+
Keyword Arguments:
|
|
457
|
+
timeout_ms (numeric, optional): Maximum number of milliseconds to
|
|
458
|
+
block waiting to join group. Default: None.
|
|
459
|
+
|
|
460
|
+
Returns: True if group initialized before timeout_ms, else False
|
|
461
|
+
"""
|
|
462
|
+
return self._net.run(self.ensure_active_group_async, timeout_ms)
|
|
463
|
+
|
|
464
|
+
async def ensure_active_group_async(self, timeout_ms=None):
|
|
465
|
+
"""Async variant of :meth:`ensure_active_group`."""
|
|
466
|
+
if not self._use_group_apis:
|
|
467
|
+
raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker')
|
|
468
|
+
timer = Timer(timeout_ms)
|
|
469
|
+
if not await self.ensure_coordinator_ready_async(timeout_ms=timer.timeout_ms):
|
|
470
|
+
return False
|
|
471
|
+
self._maybe_start_heartbeat_loop()
|
|
472
|
+
return await self.join_group_async(timeout_ms=timer.timeout_ms)
|
|
473
|
+
|
|
474
|
+
async def join_group_async(self, timeout_ms=None):
|
|
475
|
+
"""Drive JoinGroup -> SyncGroup attempts until joined or aborted.
|
|
476
|
+
|
|
477
|
+
Internal: the only entry point is :meth:`ensure_active_group_async`
|
|
478
|
+
(and its sync facade :meth:`ensure_active_group`).
|
|
479
|
+
|
|
480
|
+
Returns True when the member has been (re-)joined, False on timer
|
|
481
|
+
expiry, or raises on a non-retriable error.
|
|
482
|
+
"""
|
|
483
|
+
if not self._use_group_apis:
|
|
484
|
+
raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker')
|
|
485
|
+
timer = Timer(timeout_ms)
|
|
486
|
+
while self.need_rejoin():
|
|
487
|
+
if not await self.ensure_coordinator_ready_async(timeout_ms=timer.timeout_ms):
|
|
488
|
+
return False
|
|
489
|
+
|
|
490
|
+
# Schedule the join attempt as a Task on first entry; subsequent
|
|
491
|
+
# poll iterations re-await the same Task while the broker is still
|
|
492
|
+
# processing JoinGroup. Without this cache, a short
|
|
493
|
+
# consumer.poll(timeout_ms=N) that gives up on the first iteration
|
|
494
|
+
# would send a fresh JoinGroup on the next iteration, confusing
|
|
495
|
+
# the broker.
|
|
496
|
+
if self._join_task is None or self._join_task.is_done:
|
|
497
|
+
# Call _on_join_prepare once per rebalance attempt. The rejoining
|
|
498
|
+
# flag survives across loop iterations so we don't re-run user
|
|
499
|
+
# listeners or auto-commit on retry.
|
|
500
|
+
if not self.rejoining:
|
|
501
|
+
await self._on_join_prepare_async(
|
|
502
|
+
self._generation.generation_id,
|
|
503
|
+
self._generation.member_id,
|
|
504
|
+
timeout_ms=timer.timeout_ms)
|
|
505
|
+
self.rejoining = True
|
|
506
|
+
|
|
507
|
+
# Disable heartbeat for the wire round-trip. Must come AFTER
|
|
508
|
+
# _on_join_prepare_async so heartbeats keep flowing while a
|
|
509
|
+
# potentially-slow rebalance listener runs.
|
|
510
|
+
log.debug("Disabling heartbeat during join-group")
|
|
511
|
+
self._disable_heartbeat()
|
|
512
|
+
|
|
513
|
+
self._join_task = self._manager.call_soon(self._do_join_and_sync_async)
|
|
514
|
+
|
|
515
|
+
try:
|
|
516
|
+
assignment_bytes = await self._manager.wait_for(
|
|
517
|
+
self._join_task, timer.timeout_ms)
|
|
518
|
+
except Errors.KafkaTimeoutError:
|
|
519
|
+
# Timer expired; leave self._join_task in flight so the next
|
|
520
|
+
# poll re-awaits it instead of sending a duplicate JoinGroup.
|
|
521
|
+
return False
|
|
522
|
+
except (Errors.UnknownMemberIdError,
|
|
523
|
+
Errors.RebalanceInProgressError,
|
|
524
|
+
Errors.IllegalGenerationError,
|
|
525
|
+
Errors.MemberIdRequiredError):
|
|
526
|
+
# Side effects (reset_generation / coordinator_dead /
|
|
527
|
+
# request_rejoin) were applied by the response processors;
|
|
528
|
+
# loop back and retry immediately.
|
|
529
|
+
self._join_task = None
|
|
530
|
+
continue
|
|
531
|
+
except Errors.RetriableError:
|
|
532
|
+
self._join_task = None
|
|
533
|
+
if timer.expired:
|
|
534
|
+
return False
|
|
535
|
+
backoff_ms = self.config['retry_backoff_ms']
|
|
536
|
+
if timer.timeout_ms is not None:
|
|
537
|
+
backoff_ms = min(backoff_ms, timer.timeout_ms)
|
|
538
|
+
if backoff_ms > 0:
|
|
539
|
+
await self._manager._net.sleep(backoff_ms / 1000)
|
|
540
|
+
continue
|
|
541
|
+
except Errors.KafkaError:
|
|
542
|
+
# Non-retriable error
|
|
543
|
+
self._join_task = None
|
|
544
|
+
raise
|
|
545
|
+
|
|
546
|
+
self._join_task = None
|
|
547
|
+
|
|
548
|
+
with self._lock:
|
|
549
|
+
self.rejoining = False
|
|
550
|
+
self.rejoin_needed = False
|
|
551
|
+
self.state = MemberState.STABLE
|
|
552
|
+
self._enable_heartbeat()
|
|
553
|
+
await self._on_join_complete_async(
|
|
554
|
+
self._generation.generation_id,
|
|
555
|
+
self._generation.member_id,
|
|
556
|
+
self._generation.protocol,
|
|
557
|
+
assignment_bytes)
|
|
558
|
+
return True
|
|
559
|
+
return True
|
|
560
|
+
|
|
561
|
+
def _failed_request(self, node_id, request, error):
|
|
562
|
+
# Marking coordinator dead
|
|
563
|
+
# unless the error is caused by internal client pipelining or throttling
|
|
564
|
+
if not isinstance(error, (Errors.NodeNotReadyError,
|
|
565
|
+
Errors.ThrottlingQuotaExceededError,
|
|
566
|
+
Errors.TooManyInFlightRequests)):
|
|
567
|
+
log.error('Error sending %s to node %s [%s]',
|
|
568
|
+
request.__class__.__name__, node_id, error)
|
|
569
|
+
self.coordinator_dead(error)
|
|
570
|
+
else:
|
|
571
|
+
log.debug('Error sending %s to node %s [%s]',
|
|
572
|
+
request.__class__.__name__, node_id, error)
|
|
573
|
+
|
|
574
|
+
def _process_join_group_response(self, response, send_time):
|
|
575
|
+
"""Classify a JoinGroupResponse: mutate state on success, raise on error.
|
|
576
|
+
|
|
577
|
+
Used by :meth:`_do_join_and_sync_async`. Callers route to leader or
|
|
578
|
+
follower based on the returned response.
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
JoinGroupResponse: the response (caller does leader/follower routing).
|
|
582
|
+
Raises:
|
|
583
|
+
Errors.KafkaError: subclass matching the response error code.
|
|
584
|
+
UnjoinedGroupException: state is no longer REBALANCING.
|
|
585
|
+
"""
|
|
586
|
+
log.debug("Received JoinGroup response: %s", response)
|
|
587
|
+
error_type = Errors.for_code(response.error_code)
|
|
588
|
+
if error_type is Errors.NoError:
|
|
589
|
+
# KIP-559: starting with v7 the response carries the protocol_type;
|
|
590
|
+
# validate it matches what this member sent (None on older versions).
|
|
591
|
+
if response.protocol_type is not None and response.protocol_type != self.protocol_type():
|
|
592
|
+
log.error("JoinGroup for group %s returned inconsistent protocol_type %s (expected %s)",
|
|
593
|
+
self.group_id, response.protocol_type, self.protocol_type())
|
|
594
|
+
raise Errors.InconsistentGroupProtocolError(
|
|
595
|
+
"JoinGroupResponse protocol_type %r does not match group protocol_type %r"
|
|
596
|
+
% (response.protocol_type, self.protocol_type()))
|
|
597
|
+
if self._sensors:
|
|
598
|
+
self._sensors.join_latency.record((time.monotonic() - send_time) * 1000)
|
|
599
|
+
with self._lock:
|
|
600
|
+
if self.state is not MemberState.REBALANCING:
|
|
601
|
+
raise UnjoinedGroupException()
|
|
602
|
+
self._generation = Generation(response.generation_id,
|
|
603
|
+
response.member_id,
|
|
604
|
+
response.protocol_name)
|
|
605
|
+
log.info("Successfully joined group %s %s", self.group_id, self._generation)
|
|
606
|
+
return response
|
|
607
|
+
|
|
608
|
+
if error_type is Errors.CoordinatorLoadInProgressError:
|
|
609
|
+
log.info("Attempt to join group %s rejected since coordinator %s"
|
|
610
|
+
" is loading the group.", self.group_id, self.coordinator_id)
|
|
611
|
+
raise error_type(response)
|
|
612
|
+
|
|
613
|
+
if error_type is Errors.UnknownMemberIdError:
|
|
614
|
+
error = error_type(self._generation.member_id)
|
|
615
|
+
self.reset_generation()
|
|
616
|
+
log.info("Attempt to join group %s failed due to unknown member id",
|
|
617
|
+
self.group_id)
|
|
618
|
+
raise error
|
|
619
|
+
|
|
620
|
+
if error_type in (Errors.CoordinatorNotAvailableError,
|
|
621
|
+
Errors.NotCoordinatorError):
|
|
622
|
+
self.coordinator_dead(error_type())
|
|
623
|
+
log.info("Attempt to join group %s failed due to obsolete "
|
|
624
|
+
"coordinator information: %s", self.group_id,
|
|
625
|
+
error_type.__name__)
|
|
626
|
+
raise error_type()
|
|
627
|
+
|
|
628
|
+
if error_type in (Errors.InconsistentGroupProtocolError,
|
|
629
|
+
Errors.InvalidSessionTimeoutError,
|
|
630
|
+
Errors.InvalidGroupIdError,
|
|
631
|
+
Errors.GroupAuthorizationFailedError,
|
|
632
|
+
Errors.GroupMaxSizeReachedError,
|
|
633
|
+
Errors.FencedInstanceIdError):
|
|
634
|
+
log.error("Attempt to join group %s failed due to fatal error: %s",
|
|
635
|
+
self.group_id, error_type.__name__)
|
|
636
|
+
if error_type in (Errors.GroupAuthorizationFailedError,
|
|
637
|
+
Errors.GroupMaxSizeReachedError):
|
|
638
|
+
raise error_type(self.group_id)
|
|
639
|
+
raise error_type()
|
|
640
|
+
|
|
641
|
+
if error_type is Errors.MemberIdRequiredError:
|
|
642
|
+
log.info("Received member id %s for group %s; will retry join-group",
|
|
643
|
+
response.member_id, self.group_id)
|
|
644
|
+
self.reset_generation(response.member_id)
|
|
645
|
+
raise error_type()
|
|
646
|
+
|
|
647
|
+
if error_type is Errors.RebalanceInProgressError:
|
|
648
|
+
log.info("Attempt to join group %s failed due to RebalanceInProgressError,"
|
|
649
|
+
" which could indicate a replication timeout on the broker. Will retry.",
|
|
650
|
+
self.group_id)
|
|
651
|
+
raise error_type()
|
|
652
|
+
|
|
653
|
+
error = error_type()
|
|
654
|
+
log.error("Unexpected error in join group response: %s", error)
|
|
655
|
+
raise error
|
|
656
|
+
|
|
657
|
+
def _process_sync_group_response(self, response, send_time):
|
|
658
|
+
"""Classify a SyncGroupResponse: return assignment bytes or raise.
|
|
659
|
+
|
|
660
|
+
Used by :meth:`_do_join_and_sync_async`. Applies ``request_rejoin()``
|
|
661
|
+
/ ``coordinator_dead()`` / ``reset_generation()`` side effects on
|
|
662
|
+
the relevant error codes.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
bytes: encoded member assignment.
|
|
666
|
+
Raises:
|
|
667
|
+
Errors.KafkaError: subclass matching the response error code.
|
|
668
|
+
"""
|
|
669
|
+
log.debug("Received SyncGroup response: %s", response)
|
|
670
|
+
error_type = Errors.for_code(response.error_code)
|
|
671
|
+
if error_type is Errors.NoError:
|
|
672
|
+
# KIP-559: starting with v5 the response carries the protocol_type and
|
|
673
|
+
# protocol_name; validate they match what this member is using
|
|
674
|
+
# (both None on older versions).
|
|
675
|
+
if response.protocol_type is not None and response.protocol_type != self.protocol_type():
|
|
676
|
+
log.error("SyncGroup for group %s returned inconsistent protocol_type %s (expected %s)",
|
|
677
|
+
self.group_id, response.protocol_type, self.protocol_type())
|
|
678
|
+
raise Errors.InconsistentGroupProtocolError(
|
|
679
|
+
"SyncGroupResponse protocol_type %r does not match group protocol_type %r"
|
|
680
|
+
% (response.protocol_type, self.protocol_type()))
|
|
681
|
+
if (response.protocol_name is not None
|
|
682
|
+
and self._generation is not Generation.NO_GENERATION
|
|
683
|
+
and response.protocol_name != self._generation.protocol):
|
|
684
|
+
log.error("SyncGroup for group %s returned inconsistent protocol_name %s (expected %s)",
|
|
685
|
+
self.group_id, response.protocol_name, self._generation.protocol)
|
|
686
|
+
raise Errors.InconsistentGroupProtocolError(
|
|
687
|
+
"SyncGroupResponse protocol_name %r does not match group protocol_name %r"
|
|
688
|
+
% (response.protocol_name, self._generation.protocol))
|
|
689
|
+
if self._sensors:
|
|
690
|
+
self._sensors.sync_latency.record((time.monotonic() - send_time) * 1000)
|
|
691
|
+
return response.assignment
|
|
692
|
+
|
|
693
|
+
# Always rejoin on error
|
|
694
|
+
self.request_rejoin()
|
|
695
|
+
if error_type is Errors.GroupAuthorizationFailedError:
|
|
696
|
+
raise error_type(self.group_id)
|
|
697
|
+
if error_type is Errors.RebalanceInProgressError:
|
|
698
|
+
log.info("SyncGroup for group %s failed due to coordinator rebalance",
|
|
699
|
+
self.group_id)
|
|
700
|
+
raise error_type(self.group_id)
|
|
701
|
+
if error_type is Errors.FencedInstanceIdError:
|
|
702
|
+
log.error("SyncGroup for group %s failed due to fenced id error: %s",
|
|
703
|
+
self.group_id, self.group_instance_id)
|
|
704
|
+
raise error_type((self.group_id, self.group_instance_id))
|
|
705
|
+
if error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError):
|
|
706
|
+
error = error_type()
|
|
707
|
+
log.info("SyncGroup for group %s failed due to %s; reseting generation.", self.group_id, error)
|
|
708
|
+
if error_type is Errors.IllegalGenerationError:
|
|
709
|
+
self.reset_generation(member_id=self._generation.member_id)
|
|
710
|
+
else:
|
|
711
|
+
self.reset_generation()
|
|
712
|
+
raise error
|
|
713
|
+
if error_type in (Errors.CoordinatorNotAvailableError,
|
|
714
|
+
Errors.NotCoordinatorError):
|
|
715
|
+
error = error_type()
|
|
716
|
+
log.info("SyncGroup for group %s failed due to %s; marking coordinator dead.", self.group_id, error)
|
|
717
|
+
self.coordinator_dead(error)
|
|
718
|
+
raise error
|
|
719
|
+
error = error_type()
|
|
720
|
+
log.error("Unexpected error from SyncGroup: %s", error)
|
|
721
|
+
raise error
|
|
722
|
+
|
|
723
|
+
async def _do_join_and_sync_async(self):
|
|
724
|
+
"""Run a single JoinGroup -> SyncGroup attempt against the coordinator.
|
|
725
|
+
|
|
726
|
+
Sends a JoinGroupRequest and processes the response (mutates
|
|
727
|
+
self._generation on success). Then dispatches as group leader
|
|
728
|
+
(running the configured assignor) or follower (empty assignment),
|
|
729
|
+
sends the matching SyncGroupRequest, and returns the assignment
|
|
730
|
+
bytes from the response.
|
|
731
|
+
|
|
732
|
+
The outer retry loop in :meth:`join_group_async` handles backoff
|
|
733
|
+
and retriable errors; this method attempts exactly one round trip.
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
bytes: the encoded member assignment from SyncGroupResponse.
|
|
737
|
+
|
|
738
|
+
Raises:
|
|
739
|
+
Errors.CoordinatorNotAvailableError: if the coordinator is unknown.
|
|
740
|
+
Errors.KafkaError: on any error response from JoinGroup or
|
|
741
|
+
SyncGroup. Side effects (coordinator_dead, reset_generation,
|
|
742
|
+
request_rejoin) are applied by the response processors.
|
|
743
|
+
Exception: anything raised by ``_perform_assignment``
|
|
744
|
+
(e.g. assignor crash); leader-only path.
|
|
745
|
+
"""
|
|
746
|
+
if self.coordinator_unknown():
|
|
747
|
+
raise Errors.CoordinatorNotAvailableError(self.coordinator_id)
|
|
748
|
+
|
|
749
|
+
with self._lock:
|
|
750
|
+
self.state = MemberState.REBALANCING
|
|
751
|
+
|
|
752
|
+
log.info("(Re-)joining group %s", self.group_id)
|
|
753
|
+
join_request = JoinGroupRequest(
|
|
754
|
+
group_id=self.group_id,
|
|
755
|
+
session_timeout_ms=self.config['session_timeout_ms'],
|
|
756
|
+
rebalance_timeout_ms=self.config['max_poll_interval_ms'],
|
|
757
|
+
member_id=self._generation.member_id,
|
|
758
|
+
group_instance_id=self.group_instance_id,
|
|
759
|
+
protocol_type=self.protocol_type(),
|
|
760
|
+
protocols=self.group_protocols(),
|
|
761
|
+
max_version=7)
|
|
762
|
+
log.debug("Sending JoinGroup (%s) to coordinator %s",
|
|
763
|
+
join_request, self.coordinator_id)
|
|
764
|
+
join_send_time = time.monotonic()
|
|
765
|
+
# The broker holds JoinGroup open up to rebalance_timeout_ms
|
|
766
|
+
# (== max_poll_interval_ms) waiting for every member to join.
|
|
767
|
+
# Default request_timeout_ms (30s) would time out a healthy
|
|
768
|
+
# rebalance, so override per-request. Matches Java's
|
|
769
|
+
# joinGroupTimeoutMs = max(request_timeout_ms, rebalance_timeout_ms + 5s).
|
|
770
|
+
join_timeout_ms = max(
|
|
771
|
+
self.config['request_timeout_ms'],
|
|
772
|
+
self.config['max_poll_interval_ms'] + 5000,
|
|
773
|
+
)
|
|
774
|
+
join_response = await self._manager.send(
|
|
775
|
+
join_request, node_id=self.coordinator_id,
|
|
776
|
+
request_timeout_ms=join_timeout_ms)
|
|
777
|
+
# raises on error; mutates self._generation on success
|
|
778
|
+
self._process_join_group_response(join_response, join_send_time)
|
|
779
|
+
|
|
780
|
+
if join_response.leader == join_response.member_id:
|
|
781
|
+
log.info("Elected group leader -- performing partition assignments"
|
|
782
|
+
" using %s", self._generation.protocol)
|
|
783
|
+
group_assignment = self._perform_assignment(
|
|
784
|
+
join_response.leader,
|
|
785
|
+
join_response.protocol_name,
|
|
786
|
+
join_response.members)
|
|
787
|
+
sync_request = SyncGroupRequest(
|
|
788
|
+
group_id=self.group_id,
|
|
789
|
+
generation_id=self._generation.generation_id,
|
|
790
|
+
member_id=self._generation.member_id,
|
|
791
|
+
group_instance_id=self.group_instance_id,
|
|
792
|
+
protocol_type=self.protocol_type(),
|
|
793
|
+
protocol_name=self._generation.protocol,
|
|
794
|
+
assignments=group_assignment.items(),
|
|
795
|
+
max_version=5)
|
|
796
|
+
log.debug("Sending leader SyncGroup for group %s to coordinator %s: %s",
|
|
797
|
+
self.group_id, self.coordinator_id, sync_request)
|
|
798
|
+
else:
|
|
799
|
+
sync_request = SyncGroupRequest(
|
|
800
|
+
group_id=self.group_id,
|
|
801
|
+
generation_id=self._generation.generation_id,
|
|
802
|
+
member_id=self._generation.member_id,
|
|
803
|
+
group_instance_id=self.group_instance_id,
|
|
804
|
+
protocol_type=self.protocol_type(),
|
|
805
|
+
protocol_name=self._generation.protocol,
|
|
806
|
+
assignments=[],
|
|
807
|
+
max_version=5)
|
|
808
|
+
log.debug("Sending follower SyncGroup for group %s to coordinator %s: %s",
|
|
809
|
+
self.group_id, self.coordinator_id, sync_request)
|
|
810
|
+
|
|
811
|
+
sync_send_time = time.monotonic()
|
|
812
|
+
sync_response = await self._manager.send(
|
|
813
|
+
sync_request, node_id=self.coordinator_id)
|
|
814
|
+
return self._process_sync_group_response(sync_response, sync_send_time)
|
|
815
|
+
|
|
816
|
+
async def _send_group_coordinator_request(self):
|
|
817
|
+
"""Discover the current coordinator for the group.
|
|
818
|
+
|
|
819
|
+
Returns:
|
|
820
|
+
node_id of the coordinator on success.
|
|
821
|
+
Raises:
|
|
822
|
+
NodeNotReadyError if no broker is currently connectable.
|
|
823
|
+
Coordinator-related errors (see _handle_find_coordinator_response).
|
|
824
|
+
"""
|
|
825
|
+
node_id = self._client.least_loaded_node()
|
|
826
|
+
if node_id is None:
|
|
827
|
+
raise Errors.NodeNotReadyError('coordinator')
|
|
828
|
+
|
|
829
|
+
# Setting key, key_type, and coordinator_keys all at once lets the
|
|
830
|
+
# connection layer negotiate any version: v0-v3 emit `key`/`key_type`,
|
|
831
|
+
# v4+ (KIP-699) emit `key_type`/`coordinator_keys`.
|
|
832
|
+
request = FindCoordinatorRequest(
|
|
833
|
+
key=self.group_id,
|
|
834
|
+
key_type=0,
|
|
835
|
+
coordinator_keys=[self.group_id])
|
|
836
|
+
log.debug("Sending group coordinator request for group %s to broker %s: %s",
|
|
837
|
+
self.group_id, node_id, request)
|
|
838
|
+
|
|
839
|
+
try:
|
|
840
|
+
response = await self._manager.send(request, node_id=node_id)
|
|
841
|
+
except Exception as exc:
|
|
842
|
+
self._failed_request(node_id, request, exc)
|
|
843
|
+
raise
|
|
844
|
+
return self._handle_find_coordinator_response(response)
|
|
845
|
+
|
|
846
|
+
def _handle_find_coordinator_response(self, response):
|
|
847
|
+
log.debug("Received find coordinator response %s", response)
|
|
848
|
+
|
|
849
|
+
# v4+ returns results in a Coordinators array; we always send a single
|
|
850
|
+
# key, so the first entry is ours. v0-v3 returns top-level fields.
|
|
851
|
+
result = response.coordinators[0] if response.coordinators else response
|
|
852
|
+
error_type = Errors.for_code(result.error_code)
|
|
853
|
+
if error_type is Errors.NoError:
|
|
854
|
+
with self._lock:
|
|
855
|
+
self.coordinator_id = self._cluster.add_coordinator(
|
|
856
|
+
result, CoordinatorType.GROUP, self.group_id)
|
|
857
|
+
log.info("Discovered coordinator %s for group %s",
|
|
858
|
+
self.coordinator_id, self.group_id)
|
|
859
|
+
self._client.maybe_connect(self.coordinator_id)
|
|
860
|
+
self.heartbeat.reset_timeouts()
|
|
861
|
+
return self.coordinator_id
|
|
862
|
+
|
|
863
|
+
elif error_type is Errors.CoordinatorNotAvailableError:
|
|
864
|
+
log.debug("Group Coordinator Not Available; retry")
|
|
865
|
+
raise error_type()
|
|
866
|
+
elif error_type is Errors.GroupAuthorizationFailedError:
|
|
867
|
+
error = error_type(self.group_id)
|
|
868
|
+
log.error("Group Coordinator Request failed: %s", error)
|
|
869
|
+
raise error
|
|
870
|
+
else:
|
|
871
|
+
error = error_type()
|
|
872
|
+
log.error("Group Coordinator lookup for group %s failed: %s",
|
|
873
|
+
self.group_id, error)
|
|
874
|
+
raise error
|
|
875
|
+
|
|
876
|
+
def coordinator_dead(self, error):
|
|
877
|
+
"""Mark the current coordinator as dead."""
|
|
878
|
+
if self.coordinator_id is not None:
|
|
879
|
+
log.warning("Marking the coordinator dead (node %s) for group %s: %s.",
|
|
880
|
+
self.coordinator_id, self.group_id, error)
|
|
881
|
+
self.coordinator_id = None
|
|
882
|
+
|
|
883
|
+
def generation_if_stable(self):
|
|
884
|
+
"""Get the current generation state if the group is stable.
|
|
885
|
+
|
|
886
|
+
Returns: the current generation or None if the group is unjoined/rebalancing
|
|
887
|
+
"""
|
|
888
|
+
with self._lock:
|
|
889
|
+
if self.state is not MemberState.STABLE:
|
|
890
|
+
return None
|
|
891
|
+
return self._generation
|
|
892
|
+
|
|
893
|
+
def group_metadata(self):
|
|
894
|
+
"""Return a snapshot of this member's group identity (KIP-447).
|
|
895
|
+
|
|
896
|
+
Returns the current generation_id / member_id / group_instance_id even
|
|
897
|
+
when the group is not stable; the caller (typically
|
|
898
|
+
KafkaProducer.send_offsets_to_transaction) needs whatever is current
|
|
899
|
+
so the broker can fence stale instances. If the consumer has never
|
|
900
|
+
joined, the snapshot has the no-generation defaults.
|
|
901
|
+
"""
|
|
902
|
+
with self._lock:
|
|
903
|
+
return ConsumerGroupMetadata(
|
|
904
|
+
group_id=self.group_id,
|
|
905
|
+
generation_id=self._generation.generation_id,
|
|
906
|
+
member_id=self._generation.member_id,
|
|
907
|
+
group_instance_id=self.group_instance_id,
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
# deprecated
|
|
911
|
+
def generation(self):
|
|
912
|
+
warnings.warn("Function coordinator.generation() has been renamed to generation_if_stable()",
|
|
913
|
+
DeprecationWarning, stacklevel=2)
|
|
914
|
+
return self.generation_if_stable()
|
|
915
|
+
|
|
916
|
+
def rebalance_in_progress(self):
|
|
917
|
+
return self.state is MemberState.REBALANCING
|
|
918
|
+
|
|
919
|
+
def reset_generation(self, member_id=UNKNOWN_MEMBER_ID):
|
|
920
|
+
"""Reset the generation and member_id because we have fallen out of the group.
|
|
921
|
+
|
|
922
|
+
Arguments:
|
|
923
|
+
member_id (str): new local member id to record. Defaults to
|
|
924
|
+
``UNKNOWN_MEMBER_ID``. The broker hands back a real member id
|
|
925
|
+
on a ``MemberIdRequiredError`` retry; that path passes the
|
|
926
|
+
broker-returned id through here.
|
|
927
|
+
"""
|
|
928
|
+
with self._lock:
|
|
929
|
+
self._generation = Generation(DEFAULT_GENERATION_ID, member_id, None)
|
|
930
|
+
self.rejoin_needed = True
|
|
931
|
+
self.state = MemberState.UNJOINED
|
|
932
|
+
|
|
933
|
+
def request_rejoin(self):
|
|
934
|
+
self.rejoin_needed = True
|
|
935
|
+
|
|
936
|
+
def _maybe_start_heartbeat_loop(self):
|
|
937
|
+
if self._heartbeat_closed:
|
|
938
|
+
return
|
|
939
|
+
if self._heartbeat_loop_future is None or self._heartbeat_loop_future.is_done:
|
|
940
|
+
heartbeat_log.debug('Starting heartbeat loop')
|
|
941
|
+
self._heartbeat_loop_future = self._manager.call_soon(self._heartbeat_loop)
|
|
942
|
+
|
|
943
|
+
def _enable_heartbeat(self):
|
|
944
|
+
heartbeat_log.debug('Enabling heartbeat')
|
|
945
|
+
self._heartbeat_enabled = True
|
|
946
|
+
self.heartbeat.reset_timeouts()
|
|
947
|
+
self._heartbeat_wakeup.notify()
|
|
948
|
+
|
|
949
|
+
def _disable_heartbeat(self):
|
|
950
|
+
heartbeat_log.debug('Disabling heartbeat')
|
|
951
|
+
self._heartbeat_enabled = False
|
|
952
|
+
self._heartbeat_wakeup.notify()
|
|
953
|
+
|
|
954
|
+
def _close_heartbeat(self):
|
|
955
|
+
self._heartbeat_closed = True
|
|
956
|
+
self._heartbeat_wakeup.notify()
|
|
957
|
+
|
|
958
|
+
async def _heartbeat_loop(self):
|
|
959
|
+
heartbeat_log.debug('Heartbeat loop started.')
|
|
960
|
+
while not self._heartbeat_closed:
|
|
961
|
+
try:
|
|
962
|
+
if not self._heartbeat_enabled:
|
|
963
|
+
heartbeat_log.debug('Heartbeat disabled. Waiting')
|
|
964
|
+
await self._heartbeat_wakeup()
|
|
965
|
+
if self._heartbeat_enabled:
|
|
966
|
+
heartbeat_log.debug('Heartbeat re-enabled.')
|
|
967
|
+
|
|
968
|
+
elif not self.stable():
|
|
969
|
+
# the group is not stable (perhaps because we left the
|
|
970
|
+
# group or because the coordinator kicked us out), so
|
|
971
|
+
# disable heartbeats and wait for the main thread to rejoin.
|
|
972
|
+
heartbeat_log.debug('Group state is not stable, disabling heartbeats')
|
|
973
|
+
self._disable_heartbeat()
|
|
974
|
+
|
|
975
|
+
elif self.coordinator_unknown():
|
|
976
|
+
heartbeat_log.debug('Looking up coordinator')
|
|
977
|
+
try:
|
|
978
|
+
await self.lookup_coordinator()
|
|
979
|
+
except Errors.KafkaError:
|
|
980
|
+
await self._heartbeat_wakeup(self.config['retry_backoff_ms'] / 1000)
|
|
981
|
+
|
|
982
|
+
elif self.heartbeat.session_timeout_expired():
|
|
983
|
+
# the session timeout has expired without seeing a
|
|
984
|
+
# successful heartbeat, so we should probably make sure
|
|
985
|
+
# the coordinator is still healthy.
|
|
986
|
+
heartbeat_log.warning('Heartbeat session expired, marking coordinator dead')
|
|
987
|
+
self.coordinator_dead('Heartbeat session expired')
|
|
988
|
+
|
|
989
|
+
elif self.heartbeat.poll_timeout_expired():
|
|
990
|
+
# the poll timeout has expired, which means that the
|
|
991
|
+
# foreground thread has stalled in between calls to
|
|
992
|
+
# poll(), so we explicitly leave the group.
|
|
993
|
+
heartbeat_log.warning(
|
|
994
|
+
"Consumer poll timeout has expired. This means the time between subsequent calls to poll()"
|
|
995
|
+
" was longer than the configured max_poll_interval_ms, which typically implies that"
|
|
996
|
+
" the poll loop is spending too much time processing messages. You can address this"
|
|
997
|
+
" either by increasing max_poll_interval_ms or by reducing the maximum size of batches"
|
|
998
|
+
" returned in poll() with max_poll_records."
|
|
999
|
+
)
|
|
1000
|
+
# Leave group resets coordinator.state => UNJOINED
|
|
1001
|
+
# which will cause heartbeat thread to disable() on next loop
|
|
1002
|
+
# TODO: handle static member case
|
|
1003
|
+
await self.maybe_leave_group_async()
|
|
1004
|
+
|
|
1005
|
+
elif not self.heartbeat.should_heartbeat():
|
|
1006
|
+
next_hb = self.heartbeat.time_to_next_heartbeat()
|
|
1007
|
+
heartbeat_log.debug('Waiting %0.1f secs to send next heartbeat', next_hb)
|
|
1008
|
+
await self._heartbeat_wakeup(next_hb)
|
|
1009
|
+
else:
|
|
1010
|
+
await self._do_heartbeat()
|
|
1011
|
+
except BaseException as exc:
|
|
1012
|
+
heartbeat_log.error('Unhandled Heartbeat loop error: %s', exc)
|
|
1013
|
+
raise
|
|
1014
|
+
heartbeat_log.debug('_heartbeat_loop: closed')
|
|
1015
|
+
|
|
1016
|
+
async def _do_heartbeat(self):
|
|
1017
|
+
heartbeat_log.debug('Sending heartbeat for group %s %s', self.group_id, self._generation)
|
|
1018
|
+
self.heartbeat.sent_heartbeat()
|
|
1019
|
+
try:
|
|
1020
|
+
await self._send_heartbeat_request()
|
|
1021
|
+
heartbeat_log.debug('Heartbeat success')
|
|
1022
|
+
self.heartbeat.received_heartbeat()
|
|
1023
|
+
except Errors.KafkaError as exc:
|
|
1024
|
+
if isinstance(exc, Errors.RebalanceInProgressError):
|
|
1025
|
+
# it is valid to continue heartbeating while the group is
|
|
1026
|
+
# rebalancing. This ensures that the coordinator keeps the
|
|
1027
|
+
# member in the group for as long as the duration of the
|
|
1028
|
+
# rebalance timeout. If we stop sending heartbeats, however,
|
|
1029
|
+
# then the session timeout may expire before we can rejoin.
|
|
1030
|
+
heartbeat_log.debug('Treating RebalanceInProgressError as successful heartbeat')
|
|
1031
|
+
self.heartbeat.received_heartbeat()
|
|
1032
|
+
elif isinstance(exc, Errors.FencedInstanceIdError):
|
|
1033
|
+
heartbeat_log.error("Heartbeat thread caught fenced group_instance_id %s error",
|
|
1034
|
+
self.group_instance_id)
|
|
1035
|
+
self._disable_heartbeat()
|
|
1036
|
+
else:
|
|
1037
|
+
heartbeat_log.debug('Heartbeat failure: %s', exc)
|
|
1038
|
+
self.heartbeat.fail_heartbeat()
|
|
1039
|
+
|
|
1040
|
+
def close(self, timeout_ms=None):
|
|
1041
|
+
"""Close the coordinator, leave the current group,
|
|
1042
|
+
and reset local generation / member_id"""
|
|
1043
|
+
if self._use_group_apis:
|
|
1044
|
+
self._close_heartbeat()
|
|
1045
|
+
self.maybe_leave_group(timeout_ms=timeout_ms)
|
|
1046
|
+
|
|
1047
|
+
def is_dynamic_member(self):
|
|
1048
|
+
return self.group_instance_id is None or self.config['api_version'] < (2, 3)
|
|
1049
|
+
|
|
1050
|
+
def maybe_leave_group(self, reason=None, timeout_ms=None):
|
|
1051
|
+
"""Leave the current group and reset local generation/member_id."""
|
|
1052
|
+
return self._net.run(self.maybe_leave_group_async, reason, timeout_ms)
|
|
1053
|
+
|
|
1054
|
+
async def maybe_leave_group_async(self, reason=None, timeout_ms=None):
|
|
1055
|
+
if not self._use_group_apis:
|
|
1056
|
+
raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker')
|
|
1057
|
+
# Starting from 2.3, only dynamic members will send LeaveGroupRequest to the broker,
|
|
1058
|
+
# consumer with valid group.instance.id is viewed as static member that never sends LeaveGroup,
|
|
1059
|
+
# and the membership expiration is only controlled by session timeout.
|
|
1060
|
+
if (self.is_dynamic_member() and not self.coordinator_unknown()
|
|
1061
|
+
and self.state is not MemberState.UNJOINED and self._generation.has_member_id()):
|
|
1062
|
+
|
|
1063
|
+
# this is a minimal effort attempt to leave the group. we do not
|
|
1064
|
+
# attempt any resending if the request fails or times out.
|
|
1065
|
+
log.info('Leaving consumer group %s (member %s).', self.group_id, self._generation.member_id)
|
|
1066
|
+
# client side length restriction mirrors java client
|
|
1067
|
+
if reason is not None:
|
|
1068
|
+
reason = reason[:255]
|
|
1069
|
+
request = LeaveGroupRequest(
|
|
1070
|
+
group_id=self.group_id,
|
|
1071
|
+
member_id=self._generation.member_id,
|
|
1072
|
+
members=[
|
|
1073
|
+
LeaveGroupRequest.MemberIdentity(
|
|
1074
|
+
member_id=self._generation.member_id,
|
|
1075
|
+
group_instance_id=self.group_instance_id,
|
|
1076
|
+
reason=reason,
|
|
1077
|
+
)
|
|
1078
|
+
]
|
|
1079
|
+
)
|
|
1080
|
+
log.debug('Sending LeaveGroupRequest to %s: %s', self.coordinator_id, request)
|
|
1081
|
+
future = self._manager.send(request, node_id=self.coordinator_id)
|
|
1082
|
+
try:
|
|
1083
|
+
response = await self._manager.wait_for(future, timeout_ms)
|
|
1084
|
+
self._handle_leave_group_response(response)
|
|
1085
|
+
except Errors.KafkaError as exc:
|
|
1086
|
+
log.error("LeaveGroup request failed: %s", exc)
|
|
1087
|
+
self.reset_generation()
|
|
1088
|
+
|
|
1089
|
+
def _handle_leave_group_response(self, response):
|
|
1090
|
+
log.debug("Received LeaveGroupResponse: %s", response)
|
|
1091
|
+
error_type = Errors.for_code(response.error_code)
|
|
1092
|
+
if error_type is Errors.NoError:
|
|
1093
|
+
log.info("LeaveGroup request for group %s returned successfully",
|
|
1094
|
+
self.group_id)
|
|
1095
|
+
else:
|
|
1096
|
+
log.error("LeaveGroup request for group %s failed with error: %s",
|
|
1097
|
+
self.group_id, error_type())
|
|
1098
|
+
for member in response.members:
|
|
1099
|
+
error_type = Errors.for_code(member.error_code)
|
|
1100
|
+
if error_type is Errors.NoError:
|
|
1101
|
+
log.debug("LeaveGroup request for member %s / group instance %s returned successfully",
|
|
1102
|
+
member.member_id, member.group_instance_id)
|
|
1103
|
+
else:
|
|
1104
|
+
log.error("LeaveGroup request for member %s / group instance %s failed with error: %s",
|
|
1105
|
+
member.member_id, member.group_instance_id, error_type())
|
|
1106
|
+
|
|
1107
|
+
async def _send_heartbeat_request(self):
|
|
1108
|
+
"""Send a heartbeat request"""
|
|
1109
|
+
if self.coordinator_unknown():
|
|
1110
|
+
raise Errors.CoordinatorNotAvailableError(self.coordinator_id)
|
|
1111
|
+
|
|
1112
|
+
request = HeartbeatRequest(
|
|
1113
|
+
group_id=self.group_id,
|
|
1114
|
+
generation_id=self._generation.generation_id,
|
|
1115
|
+
member_id=self._generation.member_id,
|
|
1116
|
+
group_instance_id=self.group_instance_id,
|
|
1117
|
+
)
|
|
1118
|
+
heartbeat_log.debug("Sending HeartbeatRequest to %s: %s", self.coordinator_id, request)
|
|
1119
|
+
try:
|
|
1120
|
+
send_time = time.monotonic()
|
|
1121
|
+
response = await self._manager.send(request, node_id=self.coordinator_id)
|
|
1122
|
+
return self._handle_heartbeat_response(response, send_time)
|
|
1123
|
+
except Errors.KafkaError as exc:
|
|
1124
|
+
self._failed_request(self.coordinator_id, request, exc)
|
|
1125
|
+
raise
|
|
1126
|
+
|
|
1127
|
+
def _handle_heartbeat_response(self, response, send_time):
|
|
1128
|
+
if self._sensors:
|
|
1129
|
+
self._sensors.heartbeat_latency.record((time.monotonic() - send_time) * 1000)
|
|
1130
|
+
heartbeat_log.debug("Received heartbeat response for group %s: %s",
|
|
1131
|
+
self.group_id, response)
|
|
1132
|
+
error_type = Errors.for_code(response.error_code)
|
|
1133
|
+
error = error_type()
|
|
1134
|
+
if error_type is Errors.NoError:
|
|
1135
|
+
return
|
|
1136
|
+
elif error_type in (Errors.CoordinatorNotAvailableError,
|
|
1137
|
+
Errors.NotCoordinatorError):
|
|
1138
|
+
heartbeat_log.warning("Heartbeat failed for group %s: coordinator (node %s)"
|
|
1139
|
+
" is either not started or not valid", self.group_id,
|
|
1140
|
+
self.coordinator_id)
|
|
1141
|
+
self.coordinator_dead(error)
|
|
1142
|
+
elif error_type is Errors.RebalanceInProgressError:
|
|
1143
|
+
heartbeat_log.warning("Heartbeat failed for group %s because it is"
|
|
1144
|
+
" rebalancing", self.group_id)
|
|
1145
|
+
self.request_rejoin()
|
|
1146
|
+
elif error_type is Errors.IllegalGenerationError:
|
|
1147
|
+
heartbeat_log.warning("Heartbeat failed for group %s: generation id is not "
|
|
1148
|
+
" current.", self.group_id)
|
|
1149
|
+
self.reset_generation(member_id=self._generation.member_id)
|
|
1150
|
+
elif error_type is Errors.FencedInstanceIdError:
|
|
1151
|
+
heartbeat_log.error("Heartbeat failed for group %s due to fenced id error: %s",
|
|
1152
|
+
self.group_id, self.group_instance_id)
|
|
1153
|
+
error = error_type((self.group_id, self.group_instance_id))
|
|
1154
|
+
elif error_type is Errors.UnknownMemberIdError:
|
|
1155
|
+
heartbeat_log.warning("Heartbeat: local member_id was not recognized;"
|
|
1156
|
+
" this consumer needs to re-join")
|
|
1157
|
+
self.reset_generation()
|
|
1158
|
+
elif error_type is Errors.GroupAuthorizationFailedError:
|
|
1159
|
+
error = error_type(self.group_id)
|
|
1160
|
+
heartbeat_log.error("Heartbeat failed: authorization error: %s", error)
|
|
1161
|
+
else:
|
|
1162
|
+
heartbeat_log.error("Heartbeat failed: Unhandled error: %s", error)
|
|
1163
|
+
|
|
1164
|
+
raise error
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
class GroupCoordinatorMetrics:
|
|
1168
|
+
def __init__(self, heartbeat, metrics, prefix, tags=None):
|
|
1169
|
+
self.heartbeat = heartbeat
|
|
1170
|
+
self.metrics = metrics
|
|
1171
|
+
self.metric_group_name = prefix + "-coordinator-metrics"
|
|
1172
|
+
|
|
1173
|
+
self.heartbeat_latency = metrics.sensor('heartbeat-latency')
|
|
1174
|
+
self.heartbeat_latency.add(metrics.metric_name(
|
|
1175
|
+
'heartbeat-response-time-max', self.metric_group_name,
|
|
1176
|
+
'The max time taken to receive a response to a heartbeat request',
|
|
1177
|
+
tags), Max())
|
|
1178
|
+
self.heartbeat_latency.add(metrics.metric_name(
|
|
1179
|
+
'heartbeat-rate', self.metric_group_name,
|
|
1180
|
+
'The average number of heartbeats per second',
|
|
1181
|
+
tags), Rate(sampled_stat=Count()))
|
|
1182
|
+
|
|
1183
|
+
self.join_latency = metrics.sensor('join-latency')
|
|
1184
|
+
self.join_latency.add(metrics.metric_name(
|
|
1185
|
+
'join-time-avg', self.metric_group_name,
|
|
1186
|
+
'The average time taken for a group rejoin',
|
|
1187
|
+
tags), Avg())
|
|
1188
|
+
self.join_latency.add(metrics.metric_name(
|
|
1189
|
+
'join-time-max', self.metric_group_name,
|
|
1190
|
+
'The max time taken for a group rejoin',
|
|
1191
|
+
tags), Max())
|
|
1192
|
+
self.join_latency.add(metrics.metric_name(
|
|
1193
|
+
'join-rate', self.metric_group_name,
|
|
1194
|
+
'The number of group joins per second',
|
|
1195
|
+
tags), Rate(sampled_stat=Count()))
|
|
1196
|
+
|
|
1197
|
+
self.sync_latency = metrics.sensor('sync-latency')
|
|
1198
|
+
self.sync_latency.add(metrics.metric_name(
|
|
1199
|
+
'sync-time-avg', self.metric_group_name,
|
|
1200
|
+
'The average time taken for a group sync',
|
|
1201
|
+
tags), Avg())
|
|
1202
|
+
self.sync_latency.add(metrics.metric_name(
|
|
1203
|
+
'sync-time-max', self.metric_group_name,
|
|
1204
|
+
'The max time taken for a group sync',
|
|
1205
|
+
tags), Max())
|
|
1206
|
+
self.sync_latency.add(metrics.metric_name(
|
|
1207
|
+
'sync-rate', self.metric_group_name,
|
|
1208
|
+
'The number of group syncs per second',
|
|
1209
|
+
tags), Rate(sampled_stat=Count()))
|
|
1210
|
+
|
|
1211
|
+
metrics.add_metric(metrics.metric_name(
|
|
1212
|
+
'last-heartbeat-seconds-ago', self.metric_group_name,
|
|
1213
|
+
'The number of seconds since the last controller heartbeat was sent',
|
|
1214
|
+
tags), AnonMeasurable(
|
|
1215
|
+
lambda _, now: (now / 1000) - self.heartbeat.last_send))
|