PyPI - kafka-python - Versions diffs - 2.2.7__py2.py3-none-any.whl → 2.2.9__py2.py3-none-any.whl - Mend

kafka-python 2.2.7py2.py3-none-any.whl → 2.2.9py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

kafka/consumer/fetcher.py +6 -1
kafka/consumer/group.py +14 -14
kafka/coordinator/base.py +96 -47
kafka/coordinator/consumer.py +28 -6
kafka/errors.py +1 -8
kafka/producer/kafka.py +2 -2
kafka/version.py +1 -1
{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/METADATA +1 -1
{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/RECORD +11 -11
{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/WHEEL +1 -1
{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/top_level.txt +0 -0

kafka/consumer/fetcher.py CHANGED Viewed

@@ -178,6 +178,9 @@ class Fetcher(six.Iterator):
         Arguments:
             partitions ([TopicPartition]): the partitions that need offsets reset
+        Returns:
+            bool: True if any partitions need reset; otherwise False (no reset pending)
         Raises:
             NoOffsetForPartitionError: if no offset reset strategy is defined
             KafkaTimeoutError if timeout_ms provided
@@ -189,7 +192,8 @@ class Fetcher(six.Iterator):
         partitions = self._subscriptions.partitions_needing_reset()
         if not partitions:
-            return
+            return False
+        log.debug('Resetting offsets for %s', partitions)
         offset_resets = dict()
         for tp in partitions:
@@ -198,6 +202,7 @@ class Fetcher(six.Iterator):
                 offset_resets[tp] = ts
         self._reset_offsets_async(offset_resets)
+        return True
     def offsets_by_times(self, timestamps, timeout_ms=None):
         """Fetch offset for each partition passed in ``timestamps`` map.

kafka/consumer/group.py CHANGED Viewed

@@ -699,6 +699,7 @@ class KafkaConsumer(six.Iterator):
             dict: Map of topic to list of records (may be empty).
         """
         if not self._coordinator.poll(timeout_ms=timer.timeout_ms):
+            log.debug('poll: timeout during coordinator.poll(); returning early')
             return {}
         has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms)
@@ -706,13 +707,13 @@ class KafkaConsumer(six.Iterator):
         # If data is available already, e.g. from a previous network client
         # poll() call to commit, then just return it immediately
         records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
-        log.debug('Fetched records: %s, %s', records, partial)
+        log.debug('poll: fetched records: %s, %s', records, partial)
         # Before returning the fetched records, we can send off the
         # next round of fetches and avoid block waiting for their
         # responses to enable pipelining while the user is handling the
         # fetched records.
         if not partial:
-            log.debug("Sending fetches")
+            log.debug("poll: Sending fetches")
             futures = self._fetcher.send_fetches()
             if len(futures):
                 self._client.poll(timeout_ms=0)
@@ -724,12 +725,14 @@ class KafkaConsumer(six.Iterator):
         # since the offset lookup may be backing off after a failure
         poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000)
         if not has_all_fetch_positions:
+            log.debug('poll: do not have all fetch positions...')
             poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms'])
         self._client.poll(timeout_ms=poll_timeout_ms)
         # after the long poll, we should check whether the group needs to rebalance
         # prior to returning data so that the group can stabilize faster
         if self._coordinator.need_rejoin():
+            log.debug('poll: coordinator needs rejoin; returning early')
             return {}
         records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
@@ -1124,7 +1127,7 @@ class KafkaConsumer(six.Iterator):
             partitions (List[TopicPartition]): The partitions that need
                 updating fetch positions.
-        Returns True if fetch positions updated, False if timeout
+        Returns True if fetch positions updated, False if timeout or async reset is pending
         Raises:
             NoOffsetForPartitionError: If no offset is stored for a given
@@ -1135,15 +1138,13 @@ class KafkaConsumer(six.Iterator):
         if (self.config['api_version'] >= (0, 8, 1) and
             self.config['group_id'] is not None):
-            try:
-                # If there are any partitions which do not have a valid position and are not
-                # awaiting reset, then we need to fetch committed offsets. We will only do a
-                # coordinator lookup if there are partitions which have missing positions, so
-                # a consumer with manually assigned partitions can avoid a coordinator dependence
-                # by always ensuring that assigned partitions have an initial position.
-                self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms)
-            except KafkaTimeoutError:
-                pass
+            # If there are any partitions which do not have a valid position and are not
+            # awaiting reset, then we need to fetch committed offsets. We will only do a
+            # coordinator lookup if there are partitions which have missing positions, so
+            # a consumer with manually assigned partitions can avoid a coordinator dependence
+            # by always ensuring that assigned partitions have an initial position.
+            if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms):
+                return False
         # If there are partitions still needing a position and a reset policy is defined,
         # request reset using the default policy. If no reset strategy is defined and there
@@ -1152,8 +1153,7 @@ class KafkaConsumer(six.Iterator):
         # Finally send an asynchronous request to lookup and update the positions of any
         # partitions which are awaiting reset.
-        self._fetcher.reset_offsets_if_needed()
-        return False
+        return not self._fetcher.reset_offsets_if_needed()
     def _message_generator_v2(self):
         timeout_ms = 1000 * max(0, self._consumer_timeout - time.time())

kafka/coordinator/base.py CHANGED Viewed

@@ -5,6 +5,7 @@ import copy
 import logging
 import threading
 import time
+import warnings
 import weakref
 from kafka.vendor import six
@@ -43,6 +44,9 @@ class Generation(object):
                 self.member_id == other.member_id and
                 self.protocol == other.protocol)
+    def __str__(self):
+        return "<Generation %s (member_id: %s, protocol: %s)>" % (self.generation_id, self.member_id, self.protocol)
 Generation.NO_GENERATION = Generation(DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID, None)
@@ -250,6 +254,11 @@ class BaseCoordinator(object):
         else:
             return self.coordinator_id
+    def connected(self):
+        """Return True iff the coordinator node is connected"""
+        with self._lock:
+            return self.coordinator_id is not None and self._client.connected(self.coordinator_id)
     def ensure_coordinator_ready(self, timeout_ms=None):
         """Block until the coordinator for this group is known.
@@ -309,7 +318,7 @@ class BaseCoordinator(object):
         self._find_coordinator_future = None
     def lookup_coordinator(self):
-        with self._lock:
+        with self._client._lock, self._lock:
             if self._find_coordinator_future is not None:
                 return self._find_coordinator_future
@@ -398,17 +407,16 @@ class BaseCoordinator(object):
         # will be invoked even if the consumer is woken up before
         # finishing the rebalance
         with self._lock:
-            log.info("Successfully joined group %s with generation %s",
-                     self.group_id, self._generation.generation_id)
             self.state = MemberState.STABLE
             if self._heartbeat_thread:
                 self._heartbeat_thread.enable()
-    def _handle_join_failure(self, _):
+    def _handle_join_failure(self, exception):
         # we handle failures below after the request finishes.
         # if the join completes after having been woken up,
         # the exception is ignored and we will rejoin
         with self._lock:
+            log.info("Failed to join group %s: %s", self.group_id, exception)
             self.state = MemberState.UNJOINED
     def ensure_active_group(self, timeout_ms=None):
@@ -554,8 +562,9 @@ class BaseCoordinator(object):
     def _failed_request(self, node_id, request, future, error):
         # Marking coordinator dead
-        # unless the error is caused by internal client pipelining
+        # unless the error is caused by internal client pipelining or throttling
         if not isinstance(error, (Errors.NodeNotReadyError,
+                                  Errors.ThrottlingQuotaExceededError,
                                   Errors.TooManyInFlightRequests)):
             log.error('Error sending %s to node %s [%s]',
                       request.__class__.__name__, node_id, error)
@@ -566,10 +575,9 @@ class BaseCoordinator(object):
         future.failure(error)
     def _handle_join_group_response(self, future, send_time, response):
+        log.debug("Received JoinGroup response: %s", response)
         error_type = Errors.for_code(response.error_code)
         if error_type is Errors.NoError:
-            log.debug("Received successful JoinGroup response for group %s: %s",
-                      self.group_id, response)
             if self._sensors:
                 self._sensors.join_latency.record((time.time() - send_time) * 1000)
             with self._lock:
@@ -583,6 +591,7 @@ class BaseCoordinator(object):
                                                   response.member_id,
                                                   response.group_protocol)
+                log.info("Successfully joined group %s %s", self.group_id, self._generation)
                 if response.leader_id == response.member_id:
                     log.info("Elected group leader -- performing partition"
                              " assignments using %s", self._generation.protocol)
@@ -591,24 +600,24 @@ class BaseCoordinator(object):
                     self._on_join_follower().chain(future)
         elif error_type is Errors.CoordinatorLoadInProgressError:
-            log.debug("Attempt to join group %s rejected since coordinator %s"
-                      " is loading the group.", self.group_id, self.coordinator_id)
+            log.info("Attempt to join group %s rejected since coordinator %s"
+                     " is loading the group.", self.group_id, self.coordinator_id)
             # backoff and retry
             future.failure(error_type(response))
         elif error_type is Errors.UnknownMemberIdError:
             # reset the member id and retry immediately
             error = error_type(self._generation.member_id)
             self.reset_generation()
-            log.debug("Attempt to join group %s failed due to unknown member id",
-                      self.group_id)
+            log.info("Attempt to join group %s failed due to unknown member id",
+                     self.group_id)
             future.failure(error)
         elif error_type in (Errors.CoordinatorNotAvailableError,
                             Errors.NotCoordinatorError):
             # re-discover the coordinator and retry with backoff
             self.coordinator_dead(error_type())
-            log.debug("Attempt to join group %s failed due to obsolete "
-                      "coordinator information: %s", self.group_id,
-                      error_type.__name__)
+            log.info("Attempt to join group %s failed due to obsolete "
+                     "coordinator information: %s", self.group_id,
+                     error_type.__name__)
             future.failure(error_type())
         elif error_type in (Errors.InconsistentGroupProtocolError,
                             Errors.InvalidSessionTimeoutError,
@@ -619,12 +628,21 @@ class BaseCoordinator(object):
                       self.group_id, error)
             future.failure(error)
         elif error_type is Errors.GroupAuthorizationFailedError:
+            log.error("Attempt to join group %s failed due to group authorization error",
+                      self.group_id)
             future.failure(error_type(self.group_id))
         elif error_type is Errors.MemberIdRequiredError:
             # Broker requires a concrete member id to be allowed to join the group. Update member id
             # and send another join group request in next cycle.
+            log.info("Received member id %s for group %s; will retry join-group",
+                     response.member_id, self.group_id)
             self.reset_generation(response.member_id)
             future.failure(error_type())
+        elif error_type is Errors.RebalanceInProgressError:
+            log.info("Attempt to join group %s failed due to RebalanceInProgressError,"
+                     " which could indicate a replication timeout on the broker. Will retry.",
+                     self.group_id)
+            future.failure(error_type())
         else:
             # unexpected error, throw the exception
             error = error_type()
@@ -693,6 +711,7 @@ class BaseCoordinator(object):
         return future
     def _handle_sync_group_response(self, future, send_time, response):
+        log.debug("Received SyncGroup response: %s", response)
         error_type = Errors.for_code(response.error_code)
         if error_type is Errors.NoError:
             if self._sensors:
@@ -705,19 +724,19 @@ class BaseCoordinator(object):
         if error_type is Errors.GroupAuthorizationFailedError:
             future.failure(error_type(self.group_id))
         elif error_type is Errors.RebalanceInProgressError:
-            log.debug("SyncGroup for group %s failed due to coordinator"
-                      " rebalance", self.group_id)
+            log.info("SyncGroup for group %s failed due to coordinator"
+                     " rebalance", self.group_id)
             future.failure(error_type(self.group_id))
         elif error_type in (Errors.UnknownMemberIdError,
                             Errors.IllegalGenerationError):
             error = error_type()
-            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
+            log.info("SyncGroup for group %s failed due to %s", self.group_id, error)
             self.reset_generation()
             future.failure(error)
         elif error_type in (Errors.CoordinatorNotAvailableError,
                             Errors.NotCoordinatorError):
             error = error_type()
-            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
+            log.info("SyncGroup for group %s failed due to %s", self.group_id, error)
             self.coordinator_dead(error)
             future.failure(error)
         else:
@@ -739,13 +758,13 @@ class BaseCoordinator(object):
             e = Errors.NodeNotReadyError(node_id)
             return Future().failure(e)
-        log.debug("Sending group coordinator request for group %s to broker %s",
-                  self.group_id, node_id)
         version = self._client.api_version(FindCoordinatorRequest, max_version=2)
         if version == 0:
             request = FindCoordinatorRequest[version](self.group_id)
         else:
             request = FindCoordinatorRequest[version](self.group_id, 0)
+        log.debug("Sending group coordinator request for group %s to broker %s: %s",
+                  self.group_id, node_id, request)
         future = Future()
         _f = self._client.send(node_id, request)
         _f.add_callback(self._handle_group_coordinator_response, future)
@@ -792,7 +811,7 @@ class BaseCoordinator(object):
                         self.coordinator_id, self.group_id, error)
             self.coordinator_id = None
-    def generation(self):
+    def generation_if_stable(self):
         """Get the current generation state if the group is stable.
         Returns: the current generation or None if the group is unjoined/rebalancing
@@ -802,6 +821,15 @@ class BaseCoordinator(object):
                 return None
             return self._generation
+    # deprecated
+    def generation(self):
+        warnings.warn("Function coordinator.generation() has been renamed to generation_if_stable()",
+                      DeprecationWarning, stacklevel=2)
+        return self.generation_if_stable()
+    def rebalance_in_progress(self):
+        return self.state is MemberState.REBALANCING
     def reset_generation(self, member_id=UNKNOWN_MEMBER_ID):
         """Reset the generation and member_id because we have fallen out of the group."""
         with self._lock:
@@ -865,6 +893,7 @@ class BaseCoordinator(object):
                 log.info('Leaving consumer group (%s).', self.group_id)
                 version = self._client.api_version(LeaveGroupRequest, max_version=2)
                 request = LeaveGroupRequest[version](self.group_id, self._generation.member_id)
+                log.debug('Sending LeaveGroupRequest to %s: %s', self.coordinator_id, request)
                 future = self._client.send(self.coordinator_id, request)
                 future.add_callback(self._handle_leave_group_response)
                 future.add_errback(log.error, "LeaveGroup request failed: %s")
@@ -873,16 +902,18 @@ class BaseCoordinator(object):
             self.reset_generation()
     def _handle_leave_group_response(self, response):
+        log.debug("Received LeaveGroupResponse: %s", response)
         error_type = Errors.for_code(response.error_code)
         if error_type is Errors.NoError:
-            log.debug("LeaveGroup request for group %s returned successfully",
-                      self.group_id)
+            log.info("LeaveGroup request for group %s returned successfully",
+                     self.group_id)
         else:
             log.error("LeaveGroup request for group %s failed with error: %s",
                       self.group_id, error_type())
     def _send_heartbeat_request(self):
         """Send a heartbeat request"""
+        # Note: acquire both client + coordinator lock before calling
         if self.coordinator_unknown():
             e = Errors.CoordinatorNotAvailableError(self.coordinator_id)
             return Future().failure(e)
@@ -895,7 +926,7 @@ class BaseCoordinator(object):
         request = HeartbeatRequest[version](self.group_id,
                                             self._generation.generation_id,
                                             self._generation.member_id)
-        heartbeat_log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id)  # pylint: disable-msg=no-member
+        heartbeat_log.debug("Sending HeartbeatRequest to %s: %s", self.coordinator_id, request)
         future = Future()
         _f = self._client.send(self.coordinator_id, request)
         _f.add_callback(self._handle_heartbeat_response, future, time.time())
@@ -906,10 +937,10 @@ class BaseCoordinator(object):
     def _handle_heartbeat_response(self, future, send_time, response):
         if self._sensors:
             self._sensors.heartbeat_latency.record((time.time() - send_time) * 1000)
+        heartbeat_log.debug("Received heartbeat response for group %s: %s",
+                            self.group_id, response)
         error_type = Errors.for_code(response.error_code)
         if error_type is Errors.NoError:
-            heartbeat_log.debug("Received successful heartbeat response for group %s",
-                                self.group_id)
             future.success(None)
         elif error_type in (Errors.CoordinatorNotAvailableError,
                             Errors.NotCoordinatorError):
@@ -1054,20 +1085,15 @@ class HeartbeatThread(threading.Thread):
             heartbeat_log.debug('Heartbeat thread closed')
     def _run_once(self):
-        with self.coordinator._client._lock, self.coordinator._lock:
-            if self.enabled and self.coordinator.state is MemberState.STABLE:
-                # TODO: When consumer.wakeup() is implemented, we need to
-                # disable here to prevent propagating an exception to this
-                # heartbeat thread
-                # must get client._lock, or maybe deadlock at heartbeat
-                # failure callback in consumer poll
-                self.coordinator._client.poll(timeout_ms=0)
-        with self.coordinator._lock:
+        self.coordinator._client._lock.acquire()
+        self.coordinator._lock.acquire()
+        try:
             if not self.enabled:
                 heartbeat_log.debug('Heartbeat disabled. Waiting')
+                self.coordinator._client._lock.release()
                 self.coordinator._lock.wait()
-                heartbeat_log.debug('Heartbeat re-enabled.')
+                if self.enabled:
+                    heartbeat_log.debug('Heartbeat re-enabled.')
                 return
             if self.coordinator.state is not MemberState.STABLE:
@@ -1078,14 +1104,24 @@ class HeartbeatThread(threading.Thread):
                 self.disable()
                 return
+            # TODO: When consumer.wakeup() is implemented, we need to
+            # disable here to prevent propagating an exception to this
+            # heartbeat thread
+            self.coordinator._client.poll(timeout_ms=0)
             if self.coordinator.coordinator_unknown():
                 future = self.coordinator.lookup_coordinator()
                 if not future.is_done or future.failed():
                     # the immediate future check ensures that we backoff
                     # properly in the case that no brokers are available
                     # to connect to (and the future is automatically failed).
+                    self.coordinator._client._lock.release()
                     self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
+            elif not self.coordinator.connected():
+                self.coordinator._client._lock.release()
+                self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
             elif self.coordinator.heartbeat.session_timeout_expired():
                 # the session timeout has expired without seeing a
                 # successful heartbeat, so we should probably make sure
@@ -1097,28 +1133,39 @@ class HeartbeatThread(threading.Thread):
                 # the poll timeout has expired, which means that the
                 # foreground thread has stalled in between calls to
                 # poll(), so we explicitly leave the group.
-                heartbeat_log.warning('Heartbeat poll expired, leaving group')
-                ### XXX
-                # maybe_leave_group acquires client + coordinator lock;
-                # if we hold coordinator lock before calling, we risk deadlock
-                # release() is safe here because this is the last code in the current context
-                self.coordinator._lock.release()
+                heartbeat_log.warning(
+                    "Consumer poll timeout has expired. This means the time between subsequent calls to poll()"
+                    " was longer than the configured max_poll_interval_ms, which typically implies that"
+                    " the poll loop is spending too much time processing messages. You can address this"
+                    " either by increasing max_poll_interval_ms or by reducing the maximum size of batches"
+                    " returned in poll() with max_poll_records."
+                )
                 self.coordinator.maybe_leave_group()
             elif not self.coordinator.heartbeat.should_heartbeat():
-                # poll again after waiting for the retry backoff in case
-                # the heartbeat failed or the coordinator disconnected
-                heartbeat_log.log(0, 'Not ready to heartbeat, waiting')
-                self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
+                next_hb = self.coordinator.heartbeat.time_to_next_heartbeat()
+                heartbeat_log.debug('Waiting %0.1f secs to send next heartbeat', next_hb)
+                self.coordinator._client._lock.release()
+                self.coordinator._lock.wait(next_hb)
             else:
+                heartbeat_log.debug('Sending heartbeat for group %s %s', self.coordinator.group_id, self.coordinator._generation)
                 self.coordinator.heartbeat.sent_heartbeat()
                 future = self.coordinator._send_heartbeat_request()
                 future.add_callback(self._handle_heartbeat_success)
                 future.add_errback(self._handle_heartbeat_failure)
+        finally:
+            self.coordinator._lock.release()
+            try:
+                # Possibly released in block above to allow coordinator lock wait()
+                self.coordinator._client._lock.release()
+            except RuntimeError:
+                pass
     def _handle_heartbeat_success(self, result):
         with self.coordinator._lock:
+            heartbeat_log.debug('Heartbeat success')
             self.coordinator.heartbeat.received_heartbeat()
     def _handle_heartbeat_failure(self, exception):
@@ -1129,8 +1176,10 @@ class HeartbeatThread(threading.Thread):
                 # member in the group for as long as the duration of the
                 # rebalance timeout. If we stop sending heartbeats, however,
                 # then the session timeout may expire before we can rejoin.
+                heartbeat_log.debug('Treating RebalanceInProgressError as successful heartbeat')
                 self.coordinator.heartbeat.received_heartbeat()
             else:
+                heartbeat_log.debug('Heartbeat failure: %s', exception)
                 self.coordinator.heartbeat.fail_heartbeat()
                 # wake up the thread if it's sleeping to reschedule the heartbeat
                 self.coordinator._lock.notify()

kafka/coordinator/consumer.py CHANGED Viewed

@@ -274,6 +274,7 @@ class ConsumerCoordinator(BaseCoordinator):
         try:
             self._invoke_completed_offset_commit_callbacks()
             if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
+                log.debug('coordinator.poll: timeout in ensure_coordinator_ready; returning early')
                 return False
             if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned():
@@ -293,9 +294,11 @@ class ConsumerCoordinator(BaseCoordinator):
                         metadata_update = self._client.cluster.request_update()
                         self._client.poll(future=metadata_update, timeout_ms=timer.timeout_ms)
                         if not metadata_update.is_done:
+                            log.debug('coordinator.poll: timeout updating metadata; returning early')
                             return False
                     if not self.ensure_active_group(timeout_ms=timer.timeout_ms):
+                        log.debug('coordinator.poll: timeout in ensure_active_group; returning early')
                         return False
                 self.poll_heartbeat()
@@ -427,7 +430,8 @@ class ConsumerCoordinator(BaseCoordinator):
         future_key = frozenset(partitions)
         timer = Timer(timeout_ms)
         while True:
-            self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms)
+            if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
+                timer.maybe_raise()
             # contact coordinator to fetch committed offsets
             if future_key in self._offset_fetch_futures:
@@ -608,6 +612,11 @@ class ConsumerCoordinator(BaseCoordinator):
         if node_id is None:
             return Future().failure(Errors.CoordinatorNotAvailableError)
+        # Verify node is ready
+        if not self._client.ready(node_id, metadata_priority=False):
+            log.debug("Node %s not ready -- failing offset commit request",
+                      node_id)
+            return Future().failure(Errors.NodeNotReadyError)
         # create the offset commit request
         offset_data = collections.defaultdict(dict)
@@ -616,7 +625,7 @@ class ConsumerCoordinator(BaseCoordinator):
         version = self._client.api_version(OffsetCommitRequest, max_version=6)
         if version > 1 and self._subscription.partitions_auto_assigned():
-            generation = self.generation()
+            generation = self.generation_if_stable()
         else:
             generation = Generation.NO_GENERATION
@@ -625,7 +634,18 @@ class ConsumerCoordinator(BaseCoordinator):
         # and let the user rejoin the group in poll()
         if generation is None:
             log.info("Failing OffsetCommit request since the consumer is not part of an active group")
-            return Future().failure(Errors.CommitFailedError('Group rebalance in progress'))
+            if self.rebalance_in_progress():
+                # if the client knows it is already rebalancing, we can use RebalanceInProgressError instead of
+                # CommitFailedError to indicate this is not a fatal error
+                return Future().failure(Errors.RebalanceInProgressError(
+                    "Offset commit cannot be completed since the"
+                    " consumer is undergoing a rebalance for auto partition assignment. You can try completing the rebalance"
+                    " by calling poll() and then retry the operation."))
+            else:
+                return Future().failure(Errors.CommitFailedError(
+                    "Offset commit cannot be completed since the"
+                    " consumer is not part of an active group for auto partition assignment; it is likely that the consumer"
+                    " was kicked out of the group."))
         if version == 0:
             request = OffsetCommitRequest[version](
@@ -706,6 +726,7 @@ class ConsumerCoordinator(BaseCoordinator):
         return future
     def _handle_offset_commit_response(self, offsets, future, send_time, response):
+        log.debug("Received OffsetCommitResponse: %s", response)
         # TODO look at adding request_latency_ms to response (like java kafka)
         if self._consumer_sensors:
             self._consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
@@ -756,7 +777,7 @@ class ConsumerCoordinator(BaseCoordinator):
                     # However, we do not need to reset generations and just request re-join, such that
                     # if the caller decides to proceed and poll, it would still try to proceed and re-join normally.
                     self.request_rejoin()
-                    future.failure(Errors.CommitFailedError('Group rebalance in progress'))
+                    future.failure(Errors.CommitFailedError(error_type()))
                     return
                 elif error_type in (Errors.UnknownMemberIdError,
                                     Errors.IllegalGenerationError):
@@ -765,7 +786,7 @@ class ConsumerCoordinator(BaseCoordinator):
                     log.warning("OffsetCommit for group %s failed: %s",
                                 self.group_id, error)
                     self.reset_generation()
-                    future.failure(Errors.CommitFailedError())
+                    future.failure(Errors.CommitFailedError(error_type()))
                     return
                 else:
                     log.error("Group %s failed to commit partition %s at offset"
@@ -804,7 +825,7 @@ class ConsumerCoordinator(BaseCoordinator):
             return Future().failure(Errors.CoordinatorNotAvailableError)
         # Verify node is ready
-        if not self._client.ready(node_id):
+        if not self._client.ready(node_id, metadata_priority=False):
             log.debug("Node %s not ready -- failing offset fetch request",
                       node_id)
             return Future().failure(Errors.NodeNotReadyError)
@@ -832,6 +853,7 @@ class ConsumerCoordinator(BaseCoordinator):
         return future
     def _handle_offset_fetch_response(self, future, response):
+        log.debug("Received OffsetFetchResponse: %s", response)
         if response.API_VERSION >= 2 and response.error_code != Errors.NoError.errno:
             error_type = Errors.for_code(response.error_code)
             log.debug("Offset fetch failed: %s", error_type.__name__)

kafka/errors.py CHANGED Viewed

@@ -24,14 +24,7 @@ class CommitFailedError(KafkaError):
     def __init__(self, *args):
         if not args:
             args = ("Commit cannot be completed since the group has already"
-                    " rebalanced and assigned the partitions to another member."
-                    " This means that the time between subsequent calls to poll()"
-                    " was longer than the configured max_poll_interval_ms, which"
-                    " typically implies that the poll loop is spending too much"
-                    " time message processing. You can address this either by"
-                    " increasing the rebalance timeout with max_poll_interval_ms,"
-                    " or by reducing the maximum size of batches returned in poll()"
-                    " with max_poll_records.",)
+                    " rebalanced and assigned the partitions to another member.",)
         super(CommitFailedError, self).__init__(*args)

kafka/producer/kafka.py CHANGED Viewed

@@ -944,7 +944,7 @@ class KafkaProducer(object):
         """
         # add topic to metadata topic list if it is not there already.
         self._sender.add_topic(topic)
-        timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms * 1000,))
+        timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
         metadata_event = None
         while True:
             partitions = self._metadata.partitions_for_topic(topic)
@@ -962,7 +962,7 @@ class KafkaProducer(object):
             metadata_event.wait(timer.timeout_ms / 1000)
             if not metadata_event.is_set():
                 raise Errors.KafkaTimeoutError(
-                    "Failed to update metadata after %.1f secs." % (max_wait_ms * 1000,))
+                    "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
             elif topic in self._metadata.unauthorized_topics:
                 raise Errors.TopicAuthorizationFailedError(set([topic]))
             else:

kafka/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.2.7'
1	+ __version__ = '2.2.9'

{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kafka-python
-Version: 2.2.7
+Version: 2.2.9
 Summary: Pure Python client for Apache Kafka
 Author-email: Dana Powers <dana.powers@gmail.com>
 Project-URL: Homepage, https://github.com/dpkp/kafka-python

{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/RECORD RENAMED Viewed

@@ -3,12 +3,12 @@ kafka/client_async.py,sha256=R8q_rRpG3RrYrRmcZo7XgO2oSdpLJATNcq8w-1vIJ_8,56878
 kafka/cluster.py,sha256=N3_Al4We4ZhWzz6lVHy6SfqwDZfQy73iV7Qg4g4nxRs,16745
 kafka/codec.py,sha256=8NZpnehzNrhSBIjzbPVSvyFbSeLAqEntE7BfVHu-_9I,10036
 kafka/conn.py,sha256=pDmzcn-m8oiFdvYh-97qbRLEBXh0sSl9nT74VIIRuEE,69472
-kafka/errors.py,sha256=VygO7AYZvbb52wVgjxuXz-6S2W3vNzzDstF5FNP8Bvk,33829
+kafka/errors.py,sha256=qX2Fp0qawU_HBNcZCwB7EDCmx3C2PehrETi6qSEJHmk,33290
 kafka/future.py,sha256=ZQStbfUYIPJRrgMfAWxxjrIRVxsw4WCtSR0J0bkyGno,2847
 kafka/socks5_wrapper.py,sha256=6woOaCTJXJ5e89_zdyW5BjOpyE4rCbYFH-kd-FeuPuk,9827
 kafka/structs.py,sha256=SJGzmLdV21jZyQ7247k0WFy16UiusgTHK3I-e4qzI-E,3058
 kafka/util.py,sha256=EnzCJuRkQ6Kh2lIdNwFKvT4PddkZ5bzop4ooGGIhe5g,4366
-kafka/version.py,sha256=_a_I7cjpwo5jHaXCQYkNQgSKbe2WWcxvqM6NIPlZ27w,22
+kafka/version.py,sha256=PLfhtFzYo-Q28-0b7ctZlZxf03WI4mwwRfMHSMxNmd8,22
 kafka/admin/__init__.py,sha256=S_XxqyyV480_yXhttK79XZqNAmZyXRjspd3SoqYykE8,720
 kafka/admin/acl_resource.py,sha256=ak_dUsSni4SyP0ORbSKenZpwTy0Ykxq3FSt_9XgLR8k,8265
 kafka/admin/client.py,sha256=RabA8l8Im3iBEXgPVkiofNW6QyeatQHaymBWFZ8Sxkw,78929
@@ -23,12 +23,12 @@ kafka/benchmarks/record_batch_compose.py,sha256=CnUreNg1lUT0Qx9enmSr-THmBl9PjVMf
 kafka/benchmarks/record_batch_read.py,sha256=vlFaWU2YWI379n_2M8qieb_S2uHUWKV0NquEYy5b-Ho,2184
 kafka/benchmarks/varint_speed.py,sha256=s4CuvKgDZL-_zna5E3vM8RgHjhXuW6pcaO1z1WYZ_0Y,12585
 kafka/consumer/__init__.py,sha256=NDdvtyuJgFyQZahqL9i5sYXGP6rOMIXWwHQEaZ1fCcs,122
-kafka/consumer/fetcher.py,sha256=iwYhWotaEQ55oXTzGKPUOYxvC_6FcoIks_ZqL-gu3DE,68855
-kafka/consumer/group.py,sha256=xmEpVMPJbCAk9__pdAOMswh8I-Ujj5hBax_hPZHZb_s,58758
+kafka/consumer/fetcher.py,sha256=5b-_4VsmQXrRd2Ul8LMZ93TZJHVEoYpmTPB6QcOMizw,69045
+kafka/consumer/group.py,sha256=oieWNHM1NWiOZT8pasOLfFJAbmJEXJ4h7PgUtklxo_Q,58944
 kafka/consumer/subscription_state.py,sha256=f_qJQMhTWQnUd_7lPj43gsagWSKGEmP4jpnEwA6s1Ec,23661
 kafka/coordinator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kafka/coordinator/base.py,sha256=fT6lOD7whmZ1jY7M-D6L5aR7daedxwJmhL7mZGfX4GE,51858
-kafka/coordinator/consumer.py,sha256=IJWWt4E6E7JZZGKtGgPtud9V3eqs0js6EaosS3bxffE,44766
+kafka/coordinator/base.py,sha256=NmHXyqoJZVXL2KhahXLCOH1zVx9gyTdhrt-_unxIAaE,54365
+kafka/coordinator/consumer.py,sha256=le4bGbHfrDK4pperYXekPKzuZW576uXL324IOwS4Kmw,46348
 kafka/coordinator/heartbeat.py,sha256=LeJJlwz1oUEOfEMIFT-R7ZOHBQ-b-luVKwmKyWxLfDo,3242
 kafka/coordinator/protocol.py,sha256=wTaIOnUVbj0CKXZ82FktZo-zMRvOCk3hdQAoHJ62e3I,1041
 kafka/coordinator/assignors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,7 +67,7 @@ kafka/partitioner/__init__.py,sha256=Fks3C5_kokVWYw1Ad5wv0sVVzaaBtOejL-2bIL1yRII
 kafka/partitioner/default.py,sha256=tW-RC1PWIPRDEbeEAaPTLn-00oiZnXoVouEk9AnYE4w,2879
 kafka/producer/__init__.py,sha256=i3Wxih0NHjmqCkRNE54ial8fBp9siqabUE6ZGyL6oX8,122
 kafka/producer/future.py,sha256=UC3-g9QlgVFmbitrtMXVpeP0Pbvr7xl2kcw6bAehKG8,2983
-kafka/producer/kafka.py,sha256=rzsAoB4ser889nRCtILqGqzWI7jREGV9HPngimCWJPE,53211
+kafka/producer/kafka.py,sha256=-xWSiy4V8kNTpqNZVZiEtEdZG2H27n54MTw8sPZx9Cc,53211
 kafka/producer/record_accumulator.py,sha256=dhJW2vxiEDxsws0xRQ5REIrt3lLNu1g0R7HIMs6pZOY,28172
 kafka/producer/sender.py,sha256=8-TLTw6vQO7AheWSDPI33cQdWMyTDxi1k-pkXuUb9k0,37789
 kafka/producer/transaction_manager.py,sha256=HNfJNZwNfJtYdftn9SeaDfi7I5MKk0LD3sK64inuPt0,41537
@@ -120,7 +120,7 @@ kafka/vendor/enum34.py,sha256=-u-lxAiJMt6ru4Do7NUDY9OpeWkYJMksb2xengJawFE,31204
 kafka/vendor/selectors34.py,sha256=gxejLO4eXf8mRSGXaQiknPig3GdX1rtsZiYOQJVuAy8,20594
 kafka/vendor/six.py,sha256=lLBa9_HrANP5BMZ7twEzg1M3wofwPmXyptuWmHX0brY,34826
 kafka/vendor/socketpair.py,sha256=Fi3PoY1Okkppab720wFk1BhHXyjcw7hi5DwhqrYZH2Y,2737
-kafka_python-2.2.7.dist-info/METADATA,sha256=DaRVJ4MFgXCCQvTdyKm39Be5iorMY5YN-3w6zHMvVnA,9951
-kafka_python-2.2.7.dist-info/WHEEL,sha256=joeZ_q2kZqPjVkNy_YbjGrynLS6bxmBj74YkvIORXVI,109
-kafka_python-2.2.7.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
-kafka_python-2.2.7.dist-info/RECORD,,
+kafka_python-2.2.9.dist-info/METADATA,sha256=HsbiFii51H1LeFT67mMZFvpspHlK360D-PnMjXlYw5A,9951
+kafka_python-2.2.9.dist-info/WHEEL,sha256=egKm5cKfE6OqlHwodY8Jjp4yqZDBXgsj09UsV5ojd_U,109
+kafka_python-2.2.9.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
+kafka_python-2.2.9.dist-info/RECORD,,

{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.4.0)
+Generator: setuptools (80.8.0)
 Root-Is-Purelib: true
 Tag: py2-none-any
 Tag: py3-none-any

{kafka_python-2.2.7.dist-info → kafka_python-2.2.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

kafka-python 2.2.7__py2.py3-none-any.whl → 2.2.9__py2.py3-none-any.whl

kafka-python 2.2.7py2.py3-none-any.whl → 2.2.9py2.py3-none-any.whl