kafka-python 2.2.7__py2.py3-none-any.whl → 2.2.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kafka/consumer/fetcher.py CHANGED
@@ -178,6 +178,9 @@ class Fetcher(six.Iterator):
178
178
  Arguments:
179
179
  partitions ([TopicPartition]): the partitions that need offsets reset
180
180
 
181
+ Returns:
182
+ bool: True if any partitions need reset; otherwise False (no reset pending)
183
+
181
184
  Raises:
182
185
  NoOffsetForPartitionError: if no offset reset strategy is defined
183
186
  KafkaTimeoutError if timeout_ms provided
@@ -189,7 +192,8 @@ class Fetcher(six.Iterator):
189
192
 
190
193
  partitions = self._subscriptions.partitions_needing_reset()
191
194
  if not partitions:
192
- return
195
+ return False
196
+ log.debug('Resetting offsets for %s', partitions)
193
197
 
194
198
  offset_resets = dict()
195
199
  for tp in partitions:
@@ -198,6 +202,7 @@ class Fetcher(six.Iterator):
198
202
  offset_resets[tp] = ts
199
203
 
200
204
  self._reset_offsets_async(offset_resets)
205
+ return True
201
206
 
202
207
  def offsets_by_times(self, timestamps, timeout_ms=None):
203
208
  """Fetch offset for each partition passed in ``timestamps`` map.
kafka/consumer/group.py CHANGED
@@ -699,6 +699,7 @@ class KafkaConsumer(six.Iterator):
699
699
  dict: Map of topic to list of records (may be empty).
700
700
  """
701
701
  if not self._coordinator.poll(timeout_ms=timer.timeout_ms):
702
+ log.debug('poll: timeout during coordinator.poll(); returning early')
702
703
  return {}
703
704
 
704
705
  has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms)
@@ -706,13 +707,13 @@ class KafkaConsumer(six.Iterator):
706
707
  # If data is available already, e.g. from a previous network client
707
708
  # poll() call to commit, then just return it immediately
708
709
  records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
709
- log.debug('Fetched records: %s, %s', records, partial)
710
+ log.debug('poll: fetched records: %s, %s', records, partial)
710
711
  # Before returning the fetched records, we can send off the
711
712
  # next round of fetches and avoid block waiting for their
712
713
  # responses to enable pipelining while the user is handling the
713
714
  # fetched records.
714
715
  if not partial:
715
- log.debug("Sending fetches")
716
+ log.debug("poll: Sending fetches")
716
717
  futures = self._fetcher.send_fetches()
717
718
  if len(futures):
718
719
  self._client.poll(timeout_ms=0)
@@ -724,12 +725,14 @@ class KafkaConsumer(six.Iterator):
724
725
  # since the offset lookup may be backing off after a failure
725
726
  poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000)
726
727
  if not has_all_fetch_positions:
728
+ log.debug('poll: do not have all fetch positions...')
727
729
  poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms'])
728
730
 
729
731
  self._client.poll(timeout_ms=poll_timeout_ms)
730
732
  # after the long poll, we should check whether the group needs to rebalance
731
733
  # prior to returning data so that the group can stabilize faster
732
734
  if self._coordinator.need_rejoin():
735
+ log.debug('poll: coordinator needs rejoin; returning early')
733
736
  return {}
734
737
 
735
738
  records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
@@ -1124,7 +1127,7 @@ class KafkaConsumer(six.Iterator):
1124
1127
  partitions (List[TopicPartition]): The partitions that need
1125
1128
  updating fetch positions.
1126
1129
 
1127
- Returns True if fetch positions updated, False if timeout
1130
+ Returns True if fetch positions updated, False if timeout or async reset is pending
1128
1131
 
1129
1132
  Raises:
1130
1133
  NoOffsetForPartitionError: If no offset is stored for a given
@@ -1135,15 +1138,13 @@ class KafkaConsumer(six.Iterator):
1135
1138
 
1136
1139
  if (self.config['api_version'] >= (0, 8, 1) and
1137
1140
  self.config['group_id'] is not None):
1138
- try:
1139
- # If there are any partitions which do not have a valid position and are not
1140
- # awaiting reset, then we need to fetch committed offsets. We will only do a
1141
- # coordinator lookup if there are partitions which have missing positions, so
1142
- # a consumer with manually assigned partitions can avoid a coordinator dependence
1143
- # by always ensuring that assigned partitions have an initial position.
1144
- self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms)
1145
- except KafkaTimeoutError:
1146
- pass
1141
+ # If there are any partitions which do not have a valid position and are not
1142
+ # awaiting reset, then we need to fetch committed offsets. We will only do a
1143
+ # coordinator lookup if there are partitions which have missing positions, so
1144
+ # a consumer with manually assigned partitions can avoid a coordinator dependence
1145
+ # by always ensuring that assigned partitions have an initial position.
1146
+ if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms):
1147
+ return False
1147
1148
 
1148
1149
  # If there are partitions still needing a position and a reset policy is defined,
1149
1150
  # request reset using the default policy. If no reset strategy is defined and there
@@ -1152,8 +1153,7 @@ class KafkaConsumer(six.Iterator):
1152
1153
 
1153
1154
  # Finally send an asynchronous request to lookup and update the positions of any
1154
1155
  # partitions which are awaiting reset.
1155
- self._fetcher.reset_offsets_if_needed()
1156
- return False
1156
+ return not self._fetcher.reset_offsets_if_needed()
1157
1157
 
1158
1158
  def _message_generator_v2(self):
1159
1159
  timeout_ms = 1000 * max(0, self._consumer_timeout - time.time())
kafka/coordinator/base.py CHANGED
@@ -5,6 +5,7 @@ import copy
5
5
  import logging
6
6
  import threading
7
7
  import time
8
+ import warnings
8
9
  import weakref
9
10
 
10
11
  from kafka.vendor import six
@@ -43,6 +44,9 @@ class Generation(object):
43
44
  self.member_id == other.member_id and
44
45
  self.protocol == other.protocol)
45
46
 
47
+ def __str__(self):
48
+ return "<Generation %s (member_id: %s, protocol: %s)>" % (self.generation_id, self.member_id, self.protocol)
49
+
46
50
 
47
51
  Generation.NO_GENERATION = Generation(DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID, None)
48
52
 
@@ -250,6 +254,11 @@ class BaseCoordinator(object):
250
254
  else:
251
255
  return self.coordinator_id
252
256
 
257
+ def connected(self):
258
+ """Return True iff the coordinator node is connected"""
259
+ with self._lock:
260
+ return self.coordinator_id is not None and self._client.connected(self.coordinator_id)
261
+
253
262
  def ensure_coordinator_ready(self, timeout_ms=None):
254
263
  """Block until the coordinator for this group is known.
255
264
 
@@ -309,7 +318,7 @@ class BaseCoordinator(object):
309
318
  self._find_coordinator_future = None
310
319
 
311
320
  def lookup_coordinator(self):
312
- with self._lock:
321
+ with self._client._lock, self._lock:
313
322
  if self._find_coordinator_future is not None:
314
323
  return self._find_coordinator_future
315
324
 
@@ -398,17 +407,16 @@ class BaseCoordinator(object):
398
407
  # will be invoked even if the consumer is woken up before
399
408
  # finishing the rebalance
400
409
  with self._lock:
401
- log.info("Successfully joined group %s with generation %s",
402
- self.group_id, self._generation.generation_id)
403
410
  self.state = MemberState.STABLE
404
411
  if self._heartbeat_thread:
405
412
  self._heartbeat_thread.enable()
406
413
 
407
- def _handle_join_failure(self, _):
414
+ def _handle_join_failure(self, exception):
408
415
  # we handle failures below after the request finishes.
409
416
  # if the join completes after having been woken up,
410
417
  # the exception is ignored and we will rejoin
411
418
  with self._lock:
419
+ log.info("Failed to join group %s: %s", self.group_id, exception)
412
420
  self.state = MemberState.UNJOINED
413
421
 
414
422
  def ensure_active_group(self, timeout_ms=None):
@@ -554,8 +562,9 @@ class BaseCoordinator(object):
554
562
 
555
563
  def _failed_request(self, node_id, request, future, error):
556
564
  # Marking coordinator dead
557
- # unless the error is caused by internal client pipelining
565
+ # unless the error is caused by internal client pipelining or throttling
558
566
  if not isinstance(error, (Errors.NodeNotReadyError,
567
+ Errors.ThrottlingQuotaExceededError,
559
568
  Errors.TooManyInFlightRequests)):
560
569
  log.error('Error sending %s to node %s [%s]',
561
570
  request.__class__.__name__, node_id, error)
@@ -566,10 +575,9 @@ class BaseCoordinator(object):
566
575
  future.failure(error)
567
576
 
568
577
  def _handle_join_group_response(self, future, send_time, response):
578
+ log.debug("Received JoinGroup response: %s", response)
569
579
  error_type = Errors.for_code(response.error_code)
570
580
  if error_type is Errors.NoError:
571
- log.debug("Received successful JoinGroup response for group %s: %s",
572
- self.group_id, response)
573
581
  if self._sensors:
574
582
  self._sensors.join_latency.record((time.time() - send_time) * 1000)
575
583
  with self._lock:
@@ -583,6 +591,7 @@ class BaseCoordinator(object):
583
591
  response.member_id,
584
592
  response.group_protocol)
585
593
 
594
+ log.info("Successfully joined group %s %s", self.group_id, self._generation)
586
595
  if response.leader_id == response.member_id:
587
596
  log.info("Elected group leader -- performing partition"
588
597
  " assignments using %s", self._generation.protocol)
@@ -591,24 +600,24 @@ class BaseCoordinator(object):
591
600
  self._on_join_follower().chain(future)
592
601
 
593
602
  elif error_type is Errors.CoordinatorLoadInProgressError:
594
- log.debug("Attempt to join group %s rejected since coordinator %s"
595
- " is loading the group.", self.group_id, self.coordinator_id)
603
+ log.info("Attempt to join group %s rejected since coordinator %s"
604
+ " is loading the group.", self.group_id, self.coordinator_id)
596
605
  # backoff and retry
597
606
  future.failure(error_type(response))
598
607
  elif error_type is Errors.UnknownMemberIdError:
599
608
  # reset the member id and retry immediately
600
609
  error = error_type(self._generation.member_id)
601
610
  self.reset_generation()
602
- log.debug("Attempt to join group %s failed due to unknown member id",
603
- self.group_id)
611
+ log.info("Attempt to join group %s failed due to unknown member id",
612
+ self.group_id)
604
613
  future.failure(error)
605
614
  elif error_type in (Errors.CoordinatorNotAvailableError,
606
615
  Errors.NotCoordinatorError):
607
616
  # re-discover the coordinator and retry with backoff
608
617
  self.coordinator_dead(error_type())
609
- log.debug("Attempt to join group %s failed due to obsolete "
610
- "coordinator information: %s", self.group_id,
611
- error_type.__name__)
618
+ log.info("Attempt to join group %s failed due to obsolete "
619
+ "coordinator information: %s", self.group_id,
620
+ error_type.__name__)
612
621
  future.failure(error_type())
613
622
  elif error_type in (Errors.InconsistentGroupProtocolError,
614
623
  Errors.InvalidSessionTimeoutError,
@@ -619,12 +628,21 @@ class BaseCoordinator(object):
619
628
  self.group_id, error)
620
629
  future.failure(error)
621
630
  elif error_type is Errors.GroupAuthorizationFailedError:
631
+ log.error("Attempt to join group %s failed due to group authorization error",
632
+ self.group_id)
622
633
  future.failure(error_type(self.group_id))
623
634
  elif error_type is Errors.MemberIdRequiredError:
624
635
  # Broker requires a concrete member id to be allowed to join the group. Update member id
625
636
  # and send another join group request in next cycle.
637
+ log.info("Received member id %s for group %s; will retry join-group",
638
+ response.member_id, self.group_id)
626
639
  self.reset_generation(response.member_id)
627
640
  future.failure(error_type())
641
+ elif error_type is Errors.RebalanceInProgressError:
642
+ log.info("Attempt to join group %s failed due to RebalanceInProgressError,"
643
+ " which could indicate a replication timeout on the broker. Will retry.",
644
+ self.group_id)
645
+ future.failure(error_type())
628
646
  else:
629
647
  # unexpected error, throw the exception
630
648
  error = error_type()
@@ -693,6 +711,7 @@ class BaseCoordinator(object):
693
711
  return future
694
712
 
695
713
  def _handle_sync_group_response(self, future, send_time, response):
714
+ log.debug("Received SyncGroup response: %s", response)
696
715
  error_type = Errors.for_code(response.error_code)
697
716
  if error_type is Errors.NoError:
698
717
  if self._sensors:
@@ -705,19 +724,19 @@ class BaseCoordinator(object):
705
724
  if error_type is Errors.GroupAuthorizationFailedError:
706
725
  future.failure(error_type(self.group_id))
707
726
  elif error_type is Errors.RebalanceInProgressError:
708
- log.debug("SyncGroup for group %s failed due to coordinator"
709
- " rebalance", self.group_id)
727
+ log.info("SyncGroup for group %s failed due to coordinator"
728
+ " rebalance", self.group_id)
710
729
  future.failure(error_type(self.group_id))
711
730
  elif error_type in (Errors.UnknownMemberIdError,
712
731
  Errors.IllegalGenerationError):
713
732
  error = error_type()
714
- log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
733
+ log.info("SyncGroup for group %s failed due to %s", self.group_id, error)
715
734
  self.reset_generation()
716
735
  future.failure(error)
717
736
  elif error_type in (Errors.CoordinatorNotAvailableError,
718
737
  Errors.NotCoordinatorError):
719
738
  error = error_type()
720
- log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
739
+ log.info("SyncGroup for group %s failed due to %s", self.group_id, error)
721
740
  self.coordinator_dead(error)
722
741
  future.failure(error)
723
742
  else:
@@ -739,13 +758,13 @@ class BaseCoordinator(object):
739
758
  e = Errors.NodeNotReadyError(node_id)
740
759
  return Future().failure(e)
741
760
 
742
- log.debug("Sending group coordinator request for group %s to broker %s",
743
- self.group_id, node_id)
744
761
  version = self._client.api_version(FindCoordinatorRequest, max_version=2)
745
762
  if version == 0:
746
763
  request = FindCoordinatorRequest[version](self.group_id)
747
764
  else:
748
765
  request = FindCoordinatorRequest[version](self.group_id, 0)
766
+ log.debug("Sending group coordinator request for group %s to broker %s: %s",
767
+ self.group_id, node_id, request)
749
768
  future = Future()
750
769
  _f = self._client.send(node_id, request)
751
770
  _f.add_callback(self._handle_group_coordinator_response, future)
@@ -792,7 +811,7 @@ class BaseCoordinator(object):
792
811
  self.coordinator_id, self.group_id, error)
793
812
  self.coordinator_id = None
794
813
 
795
- def generation(self):
814
+ def generation_if_stable(self):
796
815
  """Get the current generation state if the group is stable.
797
816
 
798
817
  Returns: the current generation or None if the group is unjoined/rebalancing
@@ -802,6 +821,15 @@ class BaseCoordinator(object):
802
821
  return None
803
822
  return self._generation
804
823
 
824
+ # deprecated
825
+ def generation(self):
826
+ warnings.warn("Function coordinator.generation() has been renamed to generation_if_stable()",
827
+ DeprecationWarning, stacklevel=2)
828
+ return self.generation_if_stable()
829
+
830
+ def rebalance_in_progress(self):
831
+ return self.state is MemberState.REBALANCING
832
+
805
833
  def reset_generation(self, member_id=UNKNOWN_MEMBER_ID):
806
834
  """Reset the generation and member_id because we have fallen out of the group."""
807
835
  with self._lock:
@@ -865,6 +893,7 @@ class BaseCoordinator(object):
865
893
  log.info('Leaving consumer group (%s).', self.group_id)
866
894
  version = self._client.api_version(LeaveGroupRequest, max_version=2)
867
895
  request = LeaveGroupRequest[version](self.group_id, self._generation.member_id)
896
+ log.debug('Sending LeaveGroupRequest to %s: %s', self.coordinator_id, request)
868
897
  future = self._client.send(self.coordinator_id, request)
869
898
  future.add_callback(self._handle_leave_group_response)
870
899
  future.add_errback(log.error, "LeaveGroup request failed: %s")
@@ -873,16 +902,18 @@ class BaseCoordinator(object):
873
902
  self.reset_generation()
874
903
 
875
904
  def _handle_leave_group_response(self, response):
905
+ log.debug("Received LeaveGroupResponse: %s", response)
876
906
  error_type = Errors.for_code(response.error_code)
877
907
  if error_type is Errors.NoError:
878
- log.debug("LeaveGroup request for group %s returned successfully",
879
- self.group_id)
908
+ log.info("LeaveGroup request for group %s returned successfully",
909
+ self.group_id)
880
910
  else:
881
911
  log.error("LeaveGroup request for group %s failed with error: %s",
882
912
  self.group_id, error_type())
883
913
 
884
914
  def _send_heartbeat_request(self):
885
915
  """Send a heartbeat request"""
916
+ # Note: acquire both client + coordinator lock before calling
886
917
  if self.coordinator_unknown():
887
918
  e = Errors.CoordinatorNotAvailableError(self.coordinator_id)
888
919
  return Future().failure(e)
@@ -895,7 +926,7 @@ class BaseCoordinator(object):
895
926
  request = HeartbeatRequest[version](self.group_id,
896
927
  self._generation.generation_id,
897
928
  self._generation.member_id)
898
- heartbeat_log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id) # pylint: disable-msg=no-member
929
+ heartbeat_log.debug("Sending HeartbeatRequest to %s: %s", self.coordinator_id, request)
899
930
  future = Future()
900
931
  _f = self._client.send(self.coordinator_id, request)
901
932
  _f.add_callback(self._handle_heartbeat_response, future, time.time())
@@ -906,10 +937,10 @@ class BaseCoordinator(object):
906
937
  def _handle_heartbeat_response(self, future, send_time, response):
907
938
  if self._sensors:
908
939
  self._sensors.heartbeat_latency.record((time.time() - send_time) * 1000)
940
+ heartbeat_log.debug("Received heartbeat response for group %s: %s",
941
+ self.group_id, response)
909
942
  error_type = Errors.for_code(response.error_code)
910
943
  if error_type is Errors.NoError:
911
- heartbeat_log.debug("Received successful heartbeat response for group %s",
912
- self.group_id)
913
944
  future.success(None)
914
945
  elif error_type in (Errors.CoordinatorNotAvailableError,
915
946
  Errors.NotCoordinatorError):
@@ -1054,20 +1085,15 @@ class HeartbeatThread(threading.Thread):
1054
1085
  heartbeat_log.debug('Heartbeat thread closed')
1055
1086
 
1056
1087
  def _run_once(self):
1057
- with self.coordinator._client._lock, self.coordinator._lock:
1058
- if self.enabled and self.coordinator.state is MemberState.STABLE:
1059
- # TODO: When consumer.wakeup() is implemented, we need to
1060
- # disable here to prevent propagating an exception to this
1061
- # heartbeat thread
1062
- # must get client._lock, or maybe deadlock at heartbeat
1063
- # failure callback in consumer poll
1064
- self.coordinator._client.poll(timeout_ms=0)
1065
-
1066
- with self.coordinator._lock:
1088
+ self.coordinator._client._lock.acquire()
1089
+ self.coordinator._lock.acquire()
1090
+ try:
1067
1091
  if not self.enabled:
1068
1092
  heartbeat_log.debug('Heartbeat disabled. Waiting')
1093
+ self.coordinator._client._lock.release()
1069
1094
  self.coordinator._lock.wait()
1070
- heartbeat_log.debug('Heartbeat re-enabled.')
1095
+ if self.enabled:
1096
+ heartbeat_log.debug('Heartbeat re-enabled.')
1071
1097
  return
1072
1098
 
1073
1099
  if self.coordinator.state is not MemberState.STABLE:
@@ -1078,14 +1104,24 @@ class HeartbeatThread(threading.Thread):
1078
1104
  self.disable()
1079
1105
  return
1080
1106
 
1107
+ # TODO: When consumer.wakeup() is implemented, we need to
1108
+ # disable here to prevent propagating an exception to this
1109
+ # heartbeat thread
1110
+ self.coordinator._client.poll(timeout_ms=0)
1111
+
1081
1112
  if self.coordinator.coordinator_unknown():
1082
1113
  future = self.coordinator.lookup_coordinator()
1083
1114
  if not future.is_done or future.failed():
1084
1115
  # the immediate future check ensures that we backoff
1085
1116
  # properly in the case that no brokers are available
1086
1117
  # to connect to (and the future is automatically failed).
1118
+ self.coordinator._client._lock.release()
1087
1119
  self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
1088
1120
 
1121
+ elif not self.coordinator.connected():
1122
+ self.coordinator._client._lock.release()
1123
+ self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
1124
+
1089
1125
  elif self.coordinator.heartbeat.session_timeout_expired():
1090
1126
  # the session timeout has expired without seeing a
1091
1127
  # successful heartbeat, so we should probably make sure
@@ -1097,28 +1133,39 @@ class HeartbeatThread(threading.Thread):
1097
1133
  # the poll timeout has expired, which means that the
1098
1134
  # foreground thread has stalled in between calls to
1099
1135
  # poll(), so we explicitly leave the group.
1100
- heartbeat_log.warning('Heartbeat poll expired, leaving group')
1101
- ### XXX
1102
- # maybe_leave_group acquires client + coordinator lock;
1103
- # if we hold coordinator lock before calling, we risk deadlock
1104
- # release() is safe here because this is the last code in the current context
1105
- self.coordinator._lock.release()
1136
+ heartbeat_log.warning(
1137
+ "Consumer poll timeout has expired. This means the time between subsequent calls to poll()"
1138
+ " was longer than the configured max_poll_interval_ms, which typically implies that"
1139
+ " the poll loop is spending too much time processing messages. You can address this"
1140
+ " either by increasing max_poll_interval_ms or by reducing the maximum size of batches"
1141
+ " returned in poll() with max_poll_records."
1142
+ )
1106
1143
  self.coordinator.maybe_leave_group()
1107
1144
 
1108
1145
  elif not self.coordinator.heartbeat.should_heartbeat():
1109
- # poll again after waiting for the retry backoff in case
1110
- # the heartbeat failed or the coordinator disconnected
1111
- heartbeat_log.log(0, 'Not ready to heartbeat, waiting')
1112
- self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000)
1146
+ next_hb = self.coordinator.heartbeat.time_to_next_heartbeat()
1147
+ heartbeat_log.debug('Waiting %0.1f secs to send next heartbeat', next_hb)
1148
+ self.coordinator._client._lock.release()
1149
+ self.coordinator._lock.wait(next_hb)
1113
1150
 
1114
1151
  else:
1152
+ heartbeat_log.debug('Sending heartbeat for group %s %s', self.coordinator.group_id, self.coordinator._generation)
1115
1153
  self.coordinator.heartbeat.sent_heartbeat()
1116
1154
  future = self.coordinator._send_heartbeat_request()
1117
1155
  future.add_callback(self._handle_heartbeat_success)
1118
1156
  future.add_errback(self._handle_heartbeat_failure)
1119
1157
 
1158
+ finally:
1159
+ self.coordinator._lock.release()
1160
+ try:
1161
+ # Possibly released in block above to allow coordinator lock wait()
1162
+ self.coordinator._client._lock.release()
1163
+ except RuntimeError:
1164
+ pass
1165
+
1120
1166
  def _handle_heartbeat_success(self, result):
1121
1167
  with self.coordinator._lock:
1168
+ heartbeat_log.debug('Heartbeat success')
1122
1169
  self.coordinator.heartbeat.received_heartbeat()
1123
1170
 
1124
1171
  def _handle_heartbeat_failure(self, exception):
@@ -1129,8 +1176,10 @@ class HeartbeatThread(threading.Thread):
1129
1176
  # member in the group for as long as the duration of the
1130
1177
  # rebalance timeout. If we stop sending heartbeats, however,
1131
1178
  # then the session timeout may expire before we can rejoin.
1179
+ heartbeat_log.debug('Treating RebalanceInProgressError as successful heartbeat')
1132
1180
  self.coordinator.heartbeat.received_heartbeat()
1133
1181
  else:
1182
+ heartbeat_log.debug('Heartbeat failure: %s', exception)
1134
1183
  self.coordinator.heartbeat.fail_heartbeat()
1135
1184
  # wake up the thread if it's sleeping to reschedule the heartbeat
1136
1185
  self.coordinator._lock.notify()
@@ -274,6 +274,7 @@ class ConsumerCoordinator(BaseCoordinator):
274
274
  try:
275
275
  self._invoke_completed_offset_commit_callbacks()
276
276
  if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
277
+ log.debug('coordinator.poll: timeout in ensure_coordinator_ready; returning early')
277
278
  return False
278
279
 
279
280
  if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned():
@@ -293,9 +294,11 @@ class ConsumerCoordinator(BaseCoordinator):
293
294
  metadata_update = self._client.cluster.request_update()
294
295
  self._client.poll(future=metadata_update, timeout_ms=timer.timeout_ms)
295
296
  if not metadata_update.is_done:
297
+ log.debug('coordinator.poll: timeout updating metadata; returning early')
296
298
  return False
297
299
 
298
300
  if not self.ensure_active_group(timeout_ms=timer.timeout_ms):
301
+ log.debug('coordinator.poll: timeout in ensure_active_group; returning early')
299
302
  return False
300
303
 
301
304
  self.poll_heartbeat()
@@ -427,7 +430,8 @@ class ConsumerCoordinator(BaseCoordinator):
427
430
  future_key = frozenset(partitions)
428
431
  timer = Timer(timeout_ms)
429
432
  while True:
430
- self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms)
433
+ if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
434
+ timer.maybe_raise()
431
435
 
432
436
  # contact coordinator to fetch committed offsets
433
437
  if future_key in self._offset_fetch_futures:
@@ -608,6 +612,11 @@ class ConsumerCoordinator(BaseCoordinator):
608
612
  if node_id is None:
609
613
  return Future().failure(Errors.CoordinatorNotAvailableError)
610
614
 
615
+ # Verify node is ready
616
+ if not self._client.ready(node_id, metadata_priority=False):
617
+ log.debug("Node %s not ready -- failing offset commit request",
618
+ node_id)
619
+ return Future().failure(Errors.NodeNotReadyError)
611
620
 
612
621
  # create the offset commit request
613
622
  offset_data = collections.defaultdict(dict)
@@ -616,7 +625,7 @@ class ConsumerCoordinator(BaseCoordinator):
616
625
 
617
626
  version = self._client.api_version(OffsetCommitRequest, max_version=6)
618
627
  if version > 1 and self._subscription.partitions_auto_assigned():
619
- generation = self.generation()
628
+ generation = self.generation_if_stable()
620
629
  else:
621
630
  generation = Generation.NO_GENERATION
622
631
 
@@ -625,7 +634,18 @@ class ConsumerCoordinator(BaseCoordinator):
625
634
  # and let the user rejoin the group in poll()
626
635
  if generation is None:
627
636
  log.info("Failing OffsetCommit request since the consumer is not part of an active group")
628
- return Future().failure(Errors.CommitFailedError('Group rebalance in progress'))
637
+ if self.rebalance_in_progress():
638
+ # if the client knows it is already rebalancing, we can use RebalanceInProgressError instead of
639
+ # CommitFailedError to indicate this is not a fatal error
640
+ return Future().failure(Errors.RebalanceInProgressError(
641
+ "Offset commit cannot be completed since the"
642
+ " consumer is undergoing a rebalance for auto partition assignment. You can try completing the rebalance"
643
+ " by calling poll() and then retry the operation."))
644
+ else:
645
+ return Future().failure(Errors.CommitFailedError(
646
+ "Offset commit cannot be completed since the"
647
+ " consumer is not part of an active group for auto partition assignment; it is likely that the consumer"
648
+ " was kicked out of the group."))
629
649
 
630
650
  if version == 0:
631
651
  request = OffsetCommitRequest[version](
@@ -706,6 +726,7 @@ class ConsumerCoordinator(BaseCoordinator):
706
726
  return future
707
727
 
708
728
  def _handle_offset_commit_response(self, offsets, future, send_time, response):
729
+ log.debug("Received OffsetCommitResponse: %s", response)
709
730
  # TODO look at adding request_latency_ms to response (like java kafka)
710
731
  if self._consumer_sensors:
711
732
  self._consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
@@ -756,7 +777,7 @@ class ConsumerCoordinator(BaseCoordinator):
756
777
  # However, we do not need to reset generations and just request re-join, such that
757
778
  # if the caller decides to proceed and poll, it would still try to proceed and re-join normally.
758
779
  self.request_rejoin()
759
- future.failure(Errors.CommitFailedError('Group rebalance in progress'))
780
+ future.failure(Errors.CommitFailedError(error_type()))
760
781
  return
761
782
  elif error_type in (Errors.UnknownMemberIdError,
762
783
  Errors.IllegalGenerationError):
@@ -765,7 +786,7 @@ class ConsumerCoordinator(BaseCoordinator):
765
786
  log.warning("OffsetCommit for group %s failed: %s",
766
787
  self.group_id, error)
767
788
  self.reset_generation()
768
- future.failure(Errors.CommitFailedError())
789
+ future.failure(Errors.CommitFailedError(error_type()))
769
790
  return
770
791
  else:
771
792
  log.error("Group %s failed to commit partition %s at offset"
@@ -804,7 +825,7 @@ class ConsumerCoordinator(BaseCoordinator):
804
825
  return Future().failure(Errors.CoordinatorNotAvailableError)
805
826
 
806
827
  # Verify node is ready
807
- if not self._client.ready(node_id):
828
+ if not self._client.ready(node_id, metadata_priority=False):
808
829
  log.debug("Node %s not ready -- failing offset fetch request",
809
830
  node_id)
810
831
  return Future().failure(Errors.NodeNotReadyError)
@@ -832,6 +853,7 @@ class ConsumerCoordinator(BaseCoordinator):
832
853
  return future
833
854
 
834
855
  def _handle_offset_fetch_response(self, future, response):
856
+ log.debug("Received OffsetFetchResponse: %s", response)
835
857
  if response.API_VERSION >= 2 and response.error_code != Errors.NoError.errno:
836
858
  error_type = Errors.for_code(response.error_code)
837
859
  log.debug("Offset fetch failed: %s", error_type.__name__)
kafka/errors.py CHANGED
@@ -24,14 +24,7 @@ class CommitFailedError(KafkaError):
24
24
  def __init__(self, *args):
25
25
  if not args:
26
26
  args = ("Commit cannot be completed since the group has already"
27
- " rebalanced and assigned the partitions to another member."
28
- " This means that the time between subsequent calls to poll()"
29
- " was longer than the configured max_poll_interval_ms, which"
30
- " typically implies that the poll loop is spending too much"
31
- " time message processing. You can address this either by"
32
- " increasing the rebalance timeout with max_poll_interval_ms,"
33
- " or by reducing the maximum size of batches returned in poll()"
34
- " with max_poll_records.",)
27
+ " rebalanced and assigned the partitions to another member.",)
35
28
  super(CommitFailedError, self).__init__(*args)
36
29
 
37
30
 
kafka/producer/kafka.py CHANGED
@@ -944,7 +944,7 @@ class KafkaProducer(object):
944
944
  """
945
945
  # add topic to metadata topic list if it is not there already.
946
946
  self._sender.add_topic(topic)
947
- timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms * 1000,))
947
+ timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
948
948
  metadata_event = None
949
949
  while True:
950
950
  partitions = self._metadata.partitions_for_topic(topic)
@@ -962,7 +962,7 @@ class KafkaProducer(object):
962
962
  metadata_event.wait(timer.timeout_ms / 1000)
963
963
  if not metadata_event.is_set():
964
964
  raise Errors.KafkaTimeoutError(
965
- "Failed to update metadata after %.1f secs." % (max_wait_ms * 1000,))
965
+ "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
966
966
  elif topic in self._metadata.unauthorized_topics:
967
967
  raise Errors.TopicAuthorizationFailedError(set([topic]))
968
968
  else:
kafka/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.2.7'
1
+ __version__ = '2.2.9'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kafka-python
3
- Version: 2.2.7
3
+ Version: 2.2.9
4
4
  Summary: Pure Python client for Apache Kafka
5
5
  Author-email: Dana Powers <dana.powers@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/dpkp/kafka-python
@@ -3,12 +3,12 @@ kafka/client_async.py,sha256=R8q_rRpG3RrYrRmcZo7XgO2oSdpLJATNcq8w-1vIJ_8,56878
3
3
  kafka/cluster.py,sha256=N3_Al4We4ZhWzz6lVHy6SfqwDZfQy73iV7Qg4g4nxRs,16745
4
4
  kafka/codec.py,sha256=8NZpnehzNrhSBIjzbPVSvyFbSeLAqEntE7BfVHu-_9I,10036
5
5
  kafka/conn.py,sha256=pDmzcn-m8oiFdvYh-97qbRLEBXh0sSl9nT74VIIRuEE,69472
6
- kafka/errors.py,sha256=VygO7AYZvbb52wVgjxuXz-6S2W3vNzzDstF5FNP8Bvk,33829
6
+ kafka/errors.py,sha256=qX2Fp0qawU_HBNcZCwB7EDCmx3C2PehrETi6qSEJHmk,33290
7
7
  kafka/future.py,sha256=ZQStbfUYIPJRrgMfAWxxjrIRVxsw4WCtSR0J0bkyGno,2847
8
8
  kafka/socks5_wrapper.py,sha256=6woOaCTJXJ5e89_zdyW5BjOpyE4rCbYFH-kd-FeuPuk,9827
9
9
  kafka/structs.py,sha256=SJGzmLdV21jZyQ7247k0WFy16UiusgTHK3I-e4qzI-E,3058
10
10
  kafka/util.py,sha256=EnzCJuRkQ6Kh2lIdNwFKvT4PddkZ5bzop4ooGGIhe5g,4366
11
- kafka/version.py,sha256=_a_I7cjpwo5jHaXCQYkNQgSKbe2WWcxvqM6NIPlZ27w,22
11
+ kafka/version.py,sha256=PLfhtFzYo-Q28-0b7ctZlZxf03WI4mwwRfMHSMxNmd8,22
12
12
  kafka/admin/__init__.py,sha256=S_XxqyyV480_yXhttK79XZqNAmZyXRjspd3SoqYykE8,720
13
13
  kafka/admin/acl_resource.py,sha256=ak_dUsSni4SyP0ORbSKenZpwTy0Ykxq3FSt_9XgLR8k,8265
14
14
  kafka/admin/client.py,sha256=RabA8l8Im3iBEXgPVkiofNW6QyeatQHaymBWFZ8Sxkw,78929
@@ -23,12 +23,12 @@ kafka/benchmarks/record_batch_compose.py,sha256=CnUreNg1lUT0Qx9enmSr-THmBl9PjVMf
23
23
  kafka/benchmarks/record_batch_read.py,sha256=vlFaWU2YWI379n_2M8qieb_S2uHUWKV0NquEYy5b-Ho,2184
24
24
  kafka/benchmarks/varint_speed.py,sha256=s4CuvKgDZL-_zna5E3vM8RgHjhXuW6pcaO1z1WYZ_0Y,12585
25
25
  kafka/consumer/__init__.py,sha256=NDdvtyuJgFyQZahqL9i5sYXGP6rOMIXWwHQEaZ1fCcs,122
26
- kafka/consumer/fetcher.py,sha256=iwYhWotaEQ55oXTzGKPUOYxvC_6FcoIks_ZqL-gu3DE,68855
27
- kafka/consumer/group.py,sha256=xmEpVMPJbCAk9__pdAOMswh8I-Ujj5hBax_hPZHZb_s,58758
26
+ kafka/consumer/fetcher.py,sha256=5b-_4VsmQXrRd2Ul8LMZ93TZJHVEoYpmTPB6QcOMizw,69045
27
+ kafka/consumer/group.py,sha256=oieWNHM1NWiOZT8pasOLfFJAbmJEXJ4h7PgUtklxo_Q,58944
28
28
  kafka/consumer/subscription_state.py,sha256=f_qJQMhTWQnUd_7lPj43gsagWSKGEmP4jpnEwA6s1Ec,23661
29
29
  kafka/coordinator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- kafka/coordinator/base.py,sha256=fT6lOD7whmZ1jY7M-D6L5aR7daedxwJmhL7mZGfX4GE,51858
31
- kafka/coordinator/consumer.py,sha256=IJWWt4E6E7JZZGKtGgPtud9V3eqs0js6EaosS3bxffE,44766
30
+ kafka/coordinator/base.py,sha256=NmHXyqoJZVXL2KhahXLCOH1zVx9gyTdhrt-_unxIAaE,54365
31
+ kafka/coordinator/consumer.py,sha256=le4bGbHfrDK4pperYXekPKzuZW576uXL324IOwS4Kmw,46348
32
32
  kafka/coordinator/heartbeat.py,sha256=LeJJlwz1oUEOfEMIFT-R7ZOHBQ-b-luVKwmKyWxLfDo,3242
33
33
  kafka/coordinator/protocol.py,sha256=wTaIOnUVbj0CKXZ82FktZo-zMRvOCk3hdQAoHJ62e3I,1041
34
34
  kafka/coordinator/assignors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,7 +67,7 @@ kafka/partitioner/__init__.py,sha256=Fks3C5_kokVWYw1Ad5wv0sVVzaaBtOejL-2bIL1yRII
67
67
  kafka/partitioner/default.py,sha256=tW-RC1PWIPRDEbeEAaPTLn-00oiZnXoVouEk9AnYE4w,2879
68
68
  kafka/producer/__init__.py,sha256=i3Wxih0NHjmqCkRNE54ial8fBp9siqabUE6ZGyL6oX8,122
69
69
  kafka/producer/future.py,sha256=UC3-g9QlgVFmbitrtMXVpeP0Pbvr7xl2kcw6bAehKG8,2983
70
- kafka/producer/kafka.py,sha256=rzsAoB4ser889nRCtILqGqzWI7jREGV9HPngimCWJPE,53211
70
+ kafka/producer/kafka.py,sha256=-xWSiy4V8kNTpqNZVZiEtEdZG2H27n54MTw8sPZx9Cc,53211
71
71
  kafka/producer/record_accumulator.py,sha256=dhJW2vxiEDxsws0xRQ5REIrt3lLNu1g0R7HIMs6pZOY,28172
72
72
  kafka/producer/sender.py,sha256=8-TLTw6vQO7AheWSDPI33cQdWMyTDxi1k-pkXuUb9k0,37789
73
73
  kafka/producer/transaction_manager.py,sha256=HNfJNZwNfJtYdftn9SeaDfi7I5MKk0LD3sK64inuPt0,41537
@@ -120,7 +120,7 @@ kafka/vendor/enum34.py,sha256=-u-lxAiJMt6ru4Do7NUDY9OpeWkYJMksb2xengJawFE,31204
120
120
  kafka/vendor/selectors34.py,sha256=gxejLO4eXf8mRSGXaQiknPig3GdX1rtsZiYOQJVuAy8,20594
121
121
  kafka/vendor/six.py,sha256=lLBa9_HrANP5BMZ7twEzg1M3wofwPmXyptuWmHX0brY,34826
122
122
  kafka/vendor/socketpair.py,sha256=Fi3PoY1Okkppab720wFk1BhHXyjcw7hi5DwhqrYZH2Y,2737
123
- kafka_python-2.2.7.dist-info/METADATA,sha256=DaRVJ4MFgXCCQvTdyKm39Be5iorMY5YN-3w6zHMvVnA,9951
124
- kafka_python-2.2.7.dist-info/WHEEL,sha256=joeZ_q2kZqPjVkNy_YbjGrynLS6bxmBj74YkvIORXVI,109
125
- kafka_python-2.2.7.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
126
- kafka_python-2.2.7.dist-info/RECORD,,
123
+ kafka_python-2.2.9.dist-info/METADATA,sha256=HsbiFii51H1LeFT67mMZFvpspHlK360D-PnMjXlYw5A,9951
124
+ kafka_python-2.2.9.dist-info/WHEEL,sha256=egKm5cKfE6OqlHwodY8Jjp4yqZDBXgsj09UsV5ojd_U,109
125
+ kafka_python-2.2.9.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
126
+ kafka_python-2.2.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any