kafka-python 2.1.2__py2.py3-none-any.whl → 2.1.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kafka/admin/client.py CHANGED
@@ -1496,7 +1496,7 @@ class KafkaAdminClient(object):
1496
1496
  A message future
1497
1497
  """
1498
1498
  version = self._client.api_version(OffsetFetchRequest, max_version=5)
1499
- if version <= 3:
1499
+ if version <= 5:
1500
1500
  if partitions is None:
1501
1501
  if version <= 1:
1502
1502
  raise ValueError(
kafka/client_async.py CHANGED
@@ -27,7 +27,7 @@ from kafka.metrics.stats import Avg, Count, Rate
27
27
  from kafka.metrics.stats.rate import TimeUnit
28
28
  from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS
29
29
  from kafka.protocol.metadata import MetadataRequest
30
- from kafka.util import Dict, WeakMethod
30
+ from kafka.util import Dict, WeakMethod, ensure_valid_topic_name
31
31
  # Although this looks unused, it actually monkey-patches socket.socketpair()
32
32
  # and should be left in as long as we're using socket.socketpair() in this file
33
33
  from kafka.vendor import socketpair # noqa: F401
@@ -276,6 +276,7 @@ class KafkaClient(object):
276
276
  if compatible_version:
277
277
  log.warning('Configured api_version %s not supported; using %s',
278
278
  self.config['api_version'], compatible_version)
279
+ self.config['api_version'] = compatible_version
279
280
  self._api_versions = BROKER_API_VERSIONS[compatible_version]
280
281
  else:
281
282
  raise Errors.UnrecognizedBrokerVersion(self.config['api_version'])
@@ -909,7 +910,13 @@ class KafkaClient(object):
909
910
 
910
911
  Returns:
911
912
  Future: resolves after metadata request/response
913
+
914
+ Raises:
915
+ TypeError: if topic is not a string
916
+ ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
912
917
  """
918
+ ensure_valid_topic_name(topic)
919
+
913
920
  if topic in self._topics:
914
921
  return Future().success(set(self._topics))
915
922
 
kafka/cluster.py CHANGED
@@ -112,6 +112,7 @@ class ClusterMetadata(object):
112
112
 
113
113
  Returns:
114
114
  set: {partition (int), ...}
115
+ None if topic not found.
115
116
  """
116
117
  if topic not in self._partitions:
117
118
  return None
kafka/conn.py CHANGED
@@ -101,6 +101,10 @@ class BrokerConnection(object):
101
101
  server-side log entries that correspond to this client. Also
102
102
  submitted to GroupCoordinator for logging with respect to
103
103
  consumer group administration. Default: 'kafka-python-{version}'
104
+ client_software_name (str): Sent to kafka broker for KIP-511.
105
+ Default: 'kafka-python'
106
+ client_software_version (str): Sent to kafka broker for KIP-511.
107
+ Default: The kafka-python version (via kafka.version).
104
108
  reconnect_backoff_ms (int): The amount of time in milliseconds to
105
109
  wait before attempting to reconnect to a given host.
106
110
  Default: 50.
@@ -191,6 +195,8 @@ class BrokerConnection(object):
191
195
 
192
196
  DEFAULT_CONFIG = {
193
197
  'client_id': 'kafka-python-' + __version__,
198
+ 'client_software_name': 'kafka-python',
199
+ 'client_software_version': __version__,
194
200
  'node_id': 0,
195
201
  'request_timeout_ms': 30000,
196
202
  'reconnect_backoff_ms': 50,
@@ -242,7 +248,7 @@ class BrokerConnection(object):
242
248
  self._api_versions = None
243
249
  self._api_version = None
244
250
  self._check_version_idx = None
245
- self._api_versions_idx = 2
251
+ self._api_versions_idx = 4 # version of ApiVersionsRequest to try on first connect
246
252
  self._throttle_time = None
247
253
  self._socks5_proxy = None
248
254
 
@@ -538,7 +544,14 @@ class BrokerConnection(object):
538
544
  log.debug('%s: Using pre-configured api_version %s for ApiVersions', self, self._api_version)
539
545
  return True
540
546
  elif self._check_version_idx is None:
541
- request = ApiVersionsRequest[self._api_versions_idx]()
547
+ version = self._api_versions_idx
548
+ if version >= 3:
549
+ request = ApiVersionsRequest[version](
550
+ client_software_name=self.config['client_software_name'],
551
+ client_software_version=self.config['client_software_version'],
552
+ _tagged_fields={})
553
+ else:
554
+ request = ApiVersionsRequest[version]()
542
555
  future = Future()
543
556
  response = self._send(request, blocking=True, request_timeout_ms=(self.config['api_version_auto_timeout_ms'] * 0.8))
544
557
  response.add_callback(self._handle_api_versions_response, future)
@@ -573,11 +586,15 @@ class BrokerConnection(object):
573
586
 
574
587
  def _handle_api_versions_response(self, future, response):
575
588
  error_type = Errors.for_code(response.error_code)
576
- # if error_type i UNSUPPORTED_VERSION: retry w/ latest version from response
577
589
  if error_type is not Errors.NoError:
578
590
  future.failure(error_type())
579
591
  if error_type is Errors.UnsupportedVersionError:
580
592
  self._api_versions_idx -= 1
593
+ for api_key, min_version, max_version, *rest in response.api_versions:
594
+ # If broker provides a lower max_version, skip to that
595
+ if api_key == response.API_KEY:
596
+ self._api_versions_idx = min(self._api_versions_idx, max_version)
597
+ break
581
598
  if self._api_versions_idx >= 0:
582
599
  self._api_versions_future = None
583
600
  self.state = ConnectionStates.API_VERSIONS_SEND
@@ -587,7 +604,7 @@ class BrokerConnection(object):
587
604
  return
588
605
  self._api_versions = dict([
589
606
  (api_key, (min_version, max_version))
590
- for api_key, min_version, max_version in response.api_versions
607
+ for api_key, min_version, max_version, *rest in response.api_versions
591
608
  ])
592
609
  self._api_version = self._infer_broker_version_from_api_versions(self._api_versions)
593
610
  log.info('Broker version identified as %s', '.'.join(map(str, self._api_version)))
kafka/consumer/fetcher.py CHANGED
@@ -4,7 +4,6 @@ import collections
4
4
  import copy
5
5
  import itertools
6
6
  import logging
7
- import random
8
7
  import sys
9
8
  import time
10
9
 
@@ -57,7 +56,6 @@ class Fetcher(six.Iterator):
57
56
  'max_partition_fetch_bytes': 1048576,
58
57
  'max_poll_records': sys.maxsize,
59
58
  'check_crcs': True,
60
- 'iterator_refetch_records': 1, # undocumented -- interface may change
61
59
  'metric_group_prefix': 'consumer',
62
60
  'retry_backoff_ms': 100,
63
61
  'enable_incremental_fetch_sessions': True,
@@ -380,10 +378,13 @@ class Fetcher(six.Iterator):
380
378
  # as long as the partition is still assigned
381
379
  position = self._subscriptions.assignment[tp].position
382
380
  if part.next_fetch_offset == position.offset:
383
- part_records = part.take(max_records)
384
381
  log.debug("Returning fetched records at offset %d for assigned"
385
382
  " partition %s", position.offset, tp)
386
- drained[tp].extend(part_records)
383
+ part_records = part.take(max_records)
384
+ # list.extend([]) is a noop, but because drained is a defaultdict
385
+ # we should avoid initializing the default list unless there are records
386
+ if part_records:
387
+ drained[tp].extend(part_records)
387
388
  # We want to increment subscription position if (1) we're using consumer.poll(),
388
389
  # or (2) we didn't return any records (consumer iterator will update position
389
390
  # when each message is yielded). There may be edge cases where we re-fetch records
@@ -562,13 +563,11 @@ class Fetcher(six.Iterator):
562
563
  def _fetchable_partitions(self):
563
564
  fetchable = self._subscriptions.fetchable_partitions()
564
565
  # do not fetch a partition if we have a pending fetch response to process
566
+ discard = {fetch.topic_partition for fetch in self._completed_fetches}
565
567
  current = self._next_partition_records
566
- pending = copy.copy(self._completed_fetches)
567
568
  if current:
568
- fetchable.discard(current.topic_partition)
569
- for fetch in pending:
570
- fetchable.discard(fetch.topic_partition)
571
- return fetchable
569
+ discard.add(current.topic_partition)
570
+ return [tp for tp in fetchable if tp not in discard]
572
571
 
573
572
  def _create_fetch_requests(self):
574
573
  """Create fetch requests for all assigned partitions, grouped by node.
@@ -581,7 +580,7 @@ class Fetcher(six.Iterator):
581
580
  # create the fetch info as a dict of lists of partition info tuples
582
581
  # which can be passed to FetchRequest() via .items()
583
582
  version = self._client.api_version(FetchRequest, max_version=10)
584
- fetchable = collections.defaultdict(dict)
583
+ fetchable = collections.defaultdict(collections.OrderedDict)
585
584
 
586
585
  for partition in self._fetchable_partitions():
587
586
  node_id = self._client.cluster.leader_for_partition(partition)
@@ -695,10 +694,7 @@ class Fetcher(six.Iterator):
695
694
  for partition_data in partitions])
696
695
  metric_aggregator = FetchResponseMetricAggregator(self._sensors, partitions)
697
696
 
698
- # randomized ordering should improve balance for short-lived consumers
699
- random.shuffle(response.topics)
700
697
  for topic, partitions in response.topics:
701
- random.shuffle(partitions)
702
698
  for partition_data in partitions:
703
699
  tp = TopicPartition(topic, partition_data[0])
704
700
  fetch_offset = fetch_offsets[tp]
@@ -733,8 +729,6 @@ class Fetcher(six.Iterator):
733
729
  " since it is no longer fetchable", tp)
734
730
 
735
731
  elif error_type is Errors.NoError:
736
- self._subscriptions.assignment[tp].highwater = highwater
737
-
738
732
  # we are interested in this fetch only if the beginning
739
733
  # offset (of the *request*) matches the current consumed position
740
734
  # Note that the *response* may return a messageset that starts
@@ -748,30 +742,35 @@ class Fetcher(six.Iterator):
748
742
  return None
749
743
 
750
744
  records = MemoryRecords(completed_fetch.partition_data[-1])
751
- if records.has_next():
752
- log.debug("Adding fetched record for partition %s with"
753
- " offset %d to buffered record list", tp,
754
- position.offset)
755
- parsed_records = self.PartitionRecords(fetch_offset, tp, records,
756
- self.config['key_deserializer'],
757
- self.config['value_deserializer'],
758
- self.config['check_crcs'],
759
- completed_fetch.metric_aggregator)
760
- return parsed_records
761
- elif records.size_in_bytes() > 0:
762
- # we did not read a single message from a non-empty
763
- # buffer because that message's size is larger than
764
- # fetch size, in this case record this exception
765
- record_too_large_partitions = {tp: fetch_offset}
766
- raise RecordTooLargeError(
767
- "There are some messages at [Partition=Offset]: %s "
768
- " whose size is larger than the fetch size %s"
769
- " and hence cannot be ever returned."
770
- " Increase the fetch size, or decrease the maximum message"
771
- " size the broker will allow." % (
772
- record_too_large_partitions,
773
- self.config['max_partition_fetch_bytes']),
774
- record_too_large_partitions)
745
+ log.debug("Preparing to read %s bytes of data for partition %s with offset %d",
746
+ records.size_in_bytes(), tp, fetch_offset)
747
+ parsed_records = self.PartitionRecords(fetch_offset, tp, records,
748
+ self.config['key_deserializer'],
749
+ self.config['value_deserializer'],
750
+ self.config['check_crcs'],
751
+ completed_fetch.metric_aggregator,
752
+ self._on_partition_records_drain)
753
+ if not records.has_next() and records.size_in_bytes() > 0:
754
+ if completed_fetch.response_version < 3:
755
+ # Implement the pre KIP-74 behavior of throwing a RecordTooLargeException.
756
+ record_too_large_partitions = {tp: fetch_offset}
757
+ raise RecordTooLargeError(
758
+ "There are some messages at [Partition=Offset]: %s "
759
+ " whose size is larger than the fetch size %s"
760
+ " and hence cannot be ever returned. Please condier upgrading your broker to 0.10.1.0 or"
761
+ " newer to avoid this issue. Alternatively, increase the fetch size on the client (using"
762
+ " max_partition_fetch_bytes)" % (
763
+ record_too_large_partitions,
764
+ self.config['max_partition_fetch_bytes']),
765
+ record_too_large_partitions)
766
+ else:
767
+ # This should not happen with brokers that support FetchRequest/Response V3 or higher (i.e. KIP-74)
768
+ raise Errors.KafkaError("Failed to make progress reading messages at %s=%s."
769
+ " Received a non-empty fetch response from the server, but no"
770
+ " complete records were found." % (tp, fetch_offset))
771
+
772
+ if highwater >= 0:
773
+ self._subscriptions.assignment[tp].highwater = highwater
775
774
 
776
775
  elif error_type in (Errors.NotLeaderForPartitionError,
777
776
  Errors.ReplicaNotAvailableError,
@@ -805,14 +804,25 @@ class Fetcher(six.Iterator):
805
804
  if parsed_records is None:
806
805
  completed_fetch.metric_aggregator.record(tp, 0, 0)
807
806
 
808
- return None
807
+ if error_type is not Errors.NoError:
808
+ # we move the partition to the end if there was an error. This way, it's more likely that partitions for
809
+ # the same topic can remain together (allowing for more efficient serialization).
810
+ self._subscriptions.move_partition_to_end(tp)
811
+
812
+ return parsed_records
813
+
814
+ def _on_partition_records_drain(self, partition_records):
815
+ # we move the partition to the end if we received some bytes. This way, it's more likely that partitions
816
+ # for the same topic can remain together (allowing for more efficient serialization).
817
+ if partition_records.bytes_read > 0:
818
+ self._subscriptions.move_partition_to_end(partition_records.topic_partition)
809
819
 
810
820
  def close(self):
811
821
  if self._next_partition_records is not None:
812
822
  self._next_partition_records.drain()
813
823
 
814
824
  class PartitionRecords(object):
815
- def __init__(self, fetch_offset, tp, records, key_deserializer, value_deserializer, check_crcs, metric_aggregator):
825
+ def __init__(self, fetch_offset, tp, records, key_deserializer, value_deserializer, check_crcs, metric_aggregator, on_drain):
816
826
  self.fetch_offset = fetch_offset
817
827
  self.topic_partition = tp
818
828
  self.leader_epoch = -1
@@ -824,6 +834,7 @@ class Fetcher(six.Iterator):
824
834
  self.record_iterator = itertools.dropwhile(
825
835
  self._maybe_skip_record,
826
836
  self._unpack_records(tp, records, key_deserializer, value_deserializer))
837
+ self.on_drain = on_drain
827
838
 
828
839
  def _maybe_skip_record(self, record):
829
840
  # When fetching an offset that is in the middle of a
@@ -845,6 +856,7 @@ class Fetcher(six.Iterator):
845
856
  if self.record_iterator is not None:
846
857
  self.record_iterator = None
847
858
  self.metric_aggregator.record(self.topic_partition, self.bytes_read, self.records_read)
859
+ self.on_drain(self)
848
860
 
849
861
  def take(self, n=None):
850
862
  return list(itertools.islice(self.record_iterator, 0, n))
@@ -943,6 +955,13 @@ class FetchSessionHandler(object):
943
955
  self.session_partitions = {}
944
956
 
945
957
  def build_next(self, next_partitions):
958
+ """
959
+ Arguments:
960
+ next_partitions (dict): TopicPartition -> TopicPartitionState
961
+
962
+ Returns:
963
+ FetchRequestData
964
+ """
946
965
  if self.next_metadata.is_full:
947
966
  log.debug("Built full fetch %s for node %s with %s partition(s).",
948
967
  self.next_metadata, self.node_id, len(next_partitions))
@@ -965,8 +984,8 @@ class FetchSessionHandler(object):
965
984
  altered.add(tp)
966
985
 
967
986
  log.debug("Built incremental fetch %s for node %s. Added %s, altered %s, removed %s out of %s",
968
- self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys())
969
- to_send = {tp: next_partitions[tp] for tp in (added | altered)}
987
+ self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys())
988
+ to_send = collections.OrderedDict({tp: next_partitions[tp] for tp in next_partitions if tp in (added | altered)})
970
989
  return FetchRequestData(to_send, removed, self.next_metadata)
971
990
 
972
991
  def handle_response(self, response):
@@ -1106,18 +1125,11 @@ class FetchRequestData(object):
1106
1125
  @property
1107
1126
  def to_send(self):
1108
1127
  # Return as list of [(topic, [(partition, ...), ...]), ...]
1109
- # so it an be passed directly to encoder
1128
+ # so it can be passed directly to encoder
1110
1129
  partition_data = collections.defaultdict(list)
1111
1130
  for tp, partition_info in six.iteritems(self._to_send):
1112
1131
  partition_data[tp.topic].append(partition_info)
1113
- # As of version == 3 partitions will be returned in order as
1114
- # they are requested, so to avoid starvation with
1115
- # `fetch_max_bytes` option we need this shuffle
1116
- # NOTE: we do have partition_data in random order due to usage
1117
- # of unordered structures like dicts, but that does not
1118
- # guarantee equal distribution, and starting in Python3.6
1119
- # dicts retain insert order.
1120
- return random.sample(list(partition_data.items()), k=len(partition_data))
1132
+ return list(partition_data.items())
1121
1133
 
1122
1134
  @property
1123
1135
  def to_forget(self):
kafka/consumer/group.py CHANGED
@@ -444,8 +444,15 @@ class KafkaConsumer(six.Iterator):
444
444
  no rebalance operation triggered when group membership or cluster
445
445
  and topic metadata change.
446
446
  """
447
- self._subscription.assign_from_user(partitions)
448
- self._client.set_topics([tp.topic for tp in partitions])
447
+ if not partitions:
448
+ self.unsubscribe()
449
+ else:
450
+ # make sure the offsets of topic partitions the consumer is unsubscribing from
451
+ # are committed since there will be no following rebalance
452
+ self._coordinator.maybe_auto_commit_offsets_now()
453
+ self._subscription.assign_from_user(partitions)
454
+ self._client.set_topics([tp.topic for tp in partitions])
455
+ log.debug("Subscribed to partition(s): %s", partitions)
449
456
 
450
457
  def assignment(self):
451
458
  """Get the TopicPartitions currently assigned to this consumer.
@@ -463,19 +470,21 @@ class KafkaConsumer(six.Iterator):
463
470
  """
464
471
  return self._subscription.assigned_partitions()
465
472
 
466
- def close(self, autocommit=True):
473
+ def close(self, autocommit=True, timeout_ms=None):
467
474
  """Close the consumer, waiting indefinitely for any needed cleanup.
468
475
 
469
476
  Keyword Arguments:
470
477
  autocommit (bool): If auto-commit is configured for this consumer,
471
478
  this optional flag causes the consumer to attempt to commit any
472
479
  pending consumed offsets prior to close. Default: True
480
+ timeout_ms (num, optional): Milliseconds to wait for auto-commit.
481
+ Default: None
473
482
  """
474
483
  if self._closed:
475
484
  return
476
485
  log.debug("Closing the KafkaConsumer.")
477
486
  self._closed = True
478
- self._coordinator.close(autocommit=autocommit)
487
+ self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms)
479
488
  self._metrics.close()
480
489
  self._client.close()
481
490
  try:
@@ -634,7 +643,7 @@ class KafkaConsumer(six.Iterator):
634
643
  if partitions is None:
635
644
  self._fetch_all_topic_metadata()
636
645
  partitions = cluster.partitions_for_topic(topic)
637
- return partitions
646
+ return partitions or set()
638
647
 
639
648
  def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
640
649
  """Fetch data from assigned topics / partitions.
@@ -959,8 +968,11 @@ class KafkaConsumer(six.Iterator):
959
968
 
960
969
  def unsubscribe(self):
961
970
  """Unsubscribe from all topics and clear all assigned partitions."""
971
+ # make sure the offsets of topic partitions the consumer is unsubscribing from
972
+ # are committed since there will be no following rebalance
973
+ self._coordinator.maybe_auto_commit_offsets_now()
962
974
  self._subscription.unsubscribe()
963
- self._coordinator.close()
975
+ self._coordinator.maybe_leave_group()
964
976
  self._client.cluster.need_all_topic_metadata = False
965
977
  self._client.set_topics([])
966
978
  log.debug("Unsubscribed all topics or patterns and assigned partitions")
@@ -1,7 +1,19 @@
1
1
  from __future__ import absolute_import
2
2
 
3
3
  import abc
4
+ from collections import defaultdict, OrderedDict
5
+ try:
6
+ from collections import Sequence
7
+ except ImportError:
8
+ from collections.abc import Sequence
9
+ try:
10
+ # enum in stdlib as of py3.4
11
+ from enum import IntEnum # pylint: disable=import-error
12
+ except ImportError:
13
+ # vendored backport module
14
+ from kafka.vendor.enum34 import IntEnum
4
15
  import logging
16
+ import random
5
17
  import re
6
18
 
7
19
  from kafka.vendor import six
@@ -9,10 +21,18 @@ from kafka.vendor import six
9
21
  from kafka.errors import IllegalStateError
10
22
  from kafka.protocol.list_offsets import OffsetResetStrategy
11
23
  from kafka.structs import OffsetAndMetadata
24
+ from kafka.util import ensure_valid_topic_name
12
25
 
13
26
  log = logging.getLogger(__name__)
14
27
 
15
28
 
29
+ class SubscriptionType(IntEnum):
30
+ NONE = 0
31
+ AUTO_TOPICS = 1
32
+ AUTO_PATTERN = 2
33
+ USER_ASSIGNED = 3
34
+
35
+
16
36
  class SubscriptionState(object):
17
37
  """
18
38
  A class for tracking the topics, partitions, and offsets for the consumer.
@@ -43,10 +63,6 @@ class SubscriptionState(object):
43
63
  " (2) subscribe to topics matching a regex pattern,"
44
64
  " (3) assign itself specific topic-partitions.")
45
65
 
46
- # Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29
47
- _MAX_NAME_LENGTH = 249
48
- _TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$')
49
-
50
66
  def __init__(self, offset_reset_strategy='earliest'):
51
67
  """Initialize a SubscriptionState instance
52
68
 
@@ -64,15 +80,24 @@ class SubscriptionState(object):
64
80
  self._default_offset_reset_strategy = offset_reset_strategy
65
81
 
66
82
  self.subscription = None # set() or None
83
+ self.subscription_type = SubscriptionType.NONE
67
84
  self.subscribed_pattern = None # regex str or None
68
85
  self._group_subscription = set()
69
86
  self._user_assignment = set()
70
- self.assignment = dict()
87
+ self.assignment = OrderedDict()
71
88
  self.listener = None
72
89
 
73
90
  # initialize to true for the consumers to fetch offset upon starting up
74
91
  self.needs_fetch_committed_offsets = True
75
92
 
93
+ def _set_subscription_type(self, subscription_type):
94
+ if not isinstance(subscription_type, SubscriptionType):
95
+ raise ValueError('SubscriptionType enum required')
96
+ if self.subscription_type == SubscriptionType.NONE:
97
+ self.subscription_type = subscription_type
98
+ elif self.subscription_type != subscription_type:
99
+ raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
100
+
76
101
  def subscribe(self, topics=(), pattern=None, listener=None):
77
102
  """Subscribe to a list of topics, or a topic regex pattern.
78
103
 
@@ -108,39 +133,25 @@ class SubscriptionState(object):
108
133
  guaranteed, however, that the partitions revoked/assigned
109
134
  through this interface are from topics subscribed in this call.
110
135
  """
111
- if self._user_assignment or (topics and pattern):
112
- raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
113
136
  assert topics or pattern, 'Must provide topics or pattern'
137
+ if (topics and pattern):
138
+ raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
114
139
 
115
- if pattern:
140
+ elif pattern:
141
+ self._set_subscription_type(SubscriptionType.AUTO_PATTERN)
116
142
  log.info('Subscribing to pattern: /%s/', pattern)
117
143
  self.subscription = set()
118
144
  self.subscribed_pattern = re.compile(pattern)
119
145
  else:
146
+ if isinstance(topics, str) or not isinstance(topics, Sequence):
147
+ raise TypeError('Topics must be a list (or non-str sequence)')
148
+ self._set_subscription_type(SubscriptionType.AUTO_TOPICS)
120
149
  self.change_subscription(topics)
121
150
 
122
151
  if listener and not isinstance(listener, ConsumerRebalanceListener):
123
152
  raise TypeError('listener must be a ConsumerRebalanceListener')
124
153
  self.listener = listener
125
154
 
126
- def _ensure_valid_topic_name(self, topic):
127
- """ Ensures that the topic name is valid according to the kafka source. """
128
-
129
- # See Kafka Source:
130
- # https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
131
- if topic is None:
132
- raise TypeError('All topics must not be None')
133
- if not isinstance(topic, six.string_types):
134
- raise TypeError('All topics must be strings')
135
- if len(topic) == 0:
136
- raise ValueError('All topics must be non-empty strings')
137
- if topic == '.' or topic == '..':
138
- raise ValueError('Topic name cannot be "." or ".."')
139
- if len(topic) > self._MAX_NAME_LENGTH:
140
- raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(self._MAX_NAME_LENGTH, topic))
141
- if not self._TOPIC_LEGAL_CHARS.match(topic):
142
- raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic))
143
-
144
155
  def change_subscription(self, topics):
145
156
  """Change the topic subscription.
146
157
 
@@ -154,7 +165,7 @@ class SubscriptionState(object):
154
165
  - a topic name is '.' or '..' or
155
166
  - a topic name does not consist of ASCII-characters/'-'/'_'/'.'
156
167
  """
157
- if self._user_assignment:
168
+ if not self.partitions_auto_assigned():
158
169
  raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
159
170
 
160
171
  if isinstance(topics, six.string_types):
@@ -166,17 +177,12 @@ class SubscriptionState(object):
166
177
  return
167
178
 
168
179
  for t in topics:
169
- self._ensure_valid_topic_name(t)
180
+ ensure_valid_topic_name(t)
170
181
 
171
182
  log.info('Updating subscribed topics to: %s', topics)
172
183
  self.subscription = set(topics)
173
184
  self._group_subscription.update(topics)
174
185
 
175
- # Remove any assigned partitions which are no longer subscribed to
176
- for tp in set(self.assignment.keys()):
177
- if tp.topic not in self.subscription:
178
- del self.assignment[tp]
179
-
180
186
  def group_subscribe(self, topics):
181
187
  """Add topics to the current group subscription.
182
188
 
@@ -186,13 +192,13 @@ class SubscriptionState(object):
186
192
  Arguments:
187
193
  topics (list of str): topics to add to the group subscription
188
194
  """
189
- if self._user_assignment:
195
+ if not self.partitions_auto_assigned():
190
196
  raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
191
197
  self._group_subscription.update(topics)
192
198
 
193
199
  def reset_group_subscription(self):
194
200
  """Reset the group's subscription to only contain topics subscribed by this consumer."""
195
- if self._user_assignment:
201
+ if not self.partitions_auto_assigned():
196
202
  raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
197
203
  assert self.subscription is not None, 'Subscription required'
198
204
  self._group_subscription.intersection_update(self.subscription)
@@ -215,19 +221,11 @@ class SubscriptionState(object):
215
221
  Raises:
216
222
  IllegalStateError: if consumer has already called subscribe()
217
223
  """
218
- if self.subscription is not None:
219
- raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
220
-
224
+ self._set_subscription_type(SubscriptionType.USER_ASSIGNED)
221
225
  if self._user_assignment != set(partitions):
222
226
  self._user_assignment = set(partitions)
223
-
224
- for partition in partitions:
225
- if partition not in self.assignment:
226
- self._add_assigned_partition(partition)
227
-
228
- for tp in set(self.assignment.keys()) - self._user_assignment:
229
- del self.assignment[tp]
230
-
227
+ self._set_assignment({partition: self.assignment.get(partition, TopicPartitionState())
228
+ for partition in partitions})
231
229
  self.needs_fetch_committed_offsets = True
232
230
 
233
231
  def assign_from_subscribed(self, assignments):
@@ -249,19 +247,32 @@ class SubscriptionState(object):
249
247
  if tp.topic not in self.subscription:
250
248
  raise ValueError("Assigned partition %s for non-subscribed topic." % (tp,))
251
249
 
252
- # after rebalancing, we always reinitialize the assignment state
253
- self.assignment.clear()
254
- for tp in assignments:
255
- self._add_assigned_partition(tp)
250
+ # after rebalancing, we always reinitialize the assignment value
251
+ # randomized ordering should improve balance for short-lived consumers
252
+ self._set_assignment({partition: TopicPartitionState() for partition in assignments}, randomize=True)
256
253
  self.needs_fetch_committed_offsets = True
257
254
  log.info("Updated partition assignment: %s", assignments)
258
255
 
256
+ def _set_assignment(self, partition_states, randomize=False):
257
+ """Batch partition assignment by topic (self.assignment is OrderedDict)"""
258
+ self.assignment.clear()
259
+ topics = [tp.topic for tp in six.iterkeys(partition_states)]
260
+ if randomize:
261
+ random.shuffle(topics)
262
+ topic_partitions = OrderedDict({topic: [] for topic in topics})
263
+ for tp in six.iterkeys(partition_states):
264
+ topic_partitions[tp.topic].append(tp)
265
+ for topic in six.iterkeys(topic_partitions):
266
+ for tp in topic_partitions[topic]:
267
+ self.assignment[tp] = partition_states[tp]
268
+
259
269
  def unsubscribe(self):
260
270
  """Clear all topic subscriptions and partition assignments"""
261
271
  self.subscription = None
262
272
  self._user_assignment.clear()
263
273
  self.assignment.clear()
264
274
  self.subscribed_pattern = None
275
+ self.subscription_type = SubscriptionType.NONE
265
276
 
266
277
  def group_subscription(self):
267
278
  """Get the topic subscription for the group.
@@ -303,16 +314,16 @@ class SubscriptionState(object):
303
314
  if self.is_paused(partition))
304
315
 
305
316
  def fetchable_partitions(self):
306
- """Return set of TopicPartitions that should be Fetched."""
307
- fetchable = set()
317
+ """Return ordered list of TopicPartitions that should be Fetched."""
318
+ fetchable = list()
308
319
  for partition, state in six.iteritems(self.assignment):
309
320
  if state.is_fetchable():
310
- fetchable.add(partition)
321
+ fetchable.append(partition)
311
322
  return fetchable
312
323
 
313
324
  def partitions_auto_assigned(self):
314
325
  """Return True unless user supplied partitions manually."""
315
- return self.subscription is not None
326
+ return self.subscription_type in (SubscriptionType.AUTO_TOPICS, SubscriptionType.AUTO_PATTERN)
316
327
 
317
328
  def all_consumed_offsets(self):
318
329
  """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}"""
@@ -368,8 +379,9 @@ class SubscriptionState(object):
368
379
  def resume(self, partition):
369
380
  self.assignment[partition].resume()
370
381
 
371
- def _add_assigned_partition(self, partition):
372
- self.assignment[partition] = TopicPartitionState()
382
+ def move_partition_to_end(self, partition):
383
+ if partition in self.assignment:
384
+ self.assignment.move_to_end(partition)
373
385
 
374
386
 
375
387
  class TopicPartitionState(object):
kafka/coordinator/base.py CHANGED
@@ -776,12 +776,12 @@ class BaseCoordinator(object):
776
776
  if self._heartbeat_thread is not None:
777
777
  self._heartbeat_thread.disable()
778
778
 
779
- def _close_heartbeat_thread(self):
779
+ def _close_heartbeat_thread(self, timeout_ms=None):
780
780
  with self._lock:
781
781
  if self._heartbeat_thread is not None:
782
782
  log.info('Stopping heartbeat thread')
783
783
  try:
784
- self._heartbeat_thread.close()
784
+ self._heartbeat_thread.close(timeout_ms=timeout_ms)
785
785
  except ReferenceError:
786
786
  pass
787
787
  self._heartbeat_thread = None
@@ -790,13 +790,13 @@ class BaseCoordinator(object):
790
790
  if hasattr(self, '_heartbeat_thread'):
791
791
  self._close_heartbeat_thread()
792
792
 
793
- def close(self):
793
+ def close(self, timeout_ms=None):
794
794
  """Close the coordinator, leave the current group,
795
795
  and reset local generation / member_id"""
796
- self._close_heartbeat_thread()
797
- self.maybe_leave_group()
796
+ self._close_heartbeat_thread(timeout_ms=timeout_ms)
797
+ self.maybe_leave_group(timeout_ms=timeout_ms)
798
798
 
799
- def maybe_leave_group(self):
799
+ def maybe_leave_group(self, timeout_ms=None):
800
800
  """Leave the current group and reset local generation/memberId."""
801
801
  with self._client._lock, self._lock:
802
802
  if (not self.coordinator_unknown()
@@ -811,7 +811,7 @@ class BaseCoordinator(object):
811
811
  future = self._client.send(self.coordinator_id, request)
812
812
  future.add_callback(self._handle_leave_group_response)
813
813
  future.add_errback(log.error, "LeaveGroup request failed: %s")
814
- self._client.poll(future=future)
814
+ self._client.poll(future=future, timeout_ms=timeout_ms)
815
815
 
816
816
  self.reset_generation()
817
817
 
@@ -957,7 +957,7 @@ class HeartbeatThread(threading.Thread):
957
957
  log.debug('Disabling heartbeat thread')
958
958
  self.enabled = False
959
959
 
960
- def close(self):
960
+ def close(self, timeout_ms=None):
961
961
  if self.closed:
962
962
  return
963
963
  self.closed = True
@@ -972,7 +972,9 @@ class HeartbeatThread(threading.Thread):
972
972
  self.coordinator._lock.notify()
973
973
 
974
974
  if self.is_alive():
975
- self.join(self.coordinator.config['heartbeat_interval_ms'] / 1000)
975
+ if timeout_ms is None:
976
+ timeout_ms = self.coordinator.config['heartbeat_interval_ms']
977
+ self.join(timeout_ms / 1000)
976
978
  if self.is_alive():
977
979
  log.warning("Heartbeat thread did not fully terminate during close")
978
980
 
@@ -203,8 +203,8 @@ class ConsumerCoordinator(BaseCoordinator):
203
203
  def _build_metadata_snapshot(self, subscription, cluster):
204
204
  metadata_snapshot = {}
205
205
  for topic in subscription.group_subscription():
206
- partitions = cluster.partitions_for_topic(topic) or []
207
- metadata_snapshot[topic] = set(partitions)
206
+ partitions = cluster.partitions_for_topic(topic)
207
+ metadata_snapshot[topic] = partitions or set()
208
208
  return metadata_snapshot
209
209
 
210
210
  def _lookup_assignor(self, name):
@@ -449,7 +449,7 @@ class ConsumerCoordinator(BaseCoordinator):
449
449
  if autocommit:
450
450
  self._maybe_auto_commit_offsets_sync(timeout_ms=timeout_ms)
451
451
  finally:
452
- super(ConsumerCoordinator, self).close()
452
+ super(ConsumerCoordinator, self).close(timeout_ms=timeout_ms)
453
453
 
454
454
  def _invoke_completed_offset_commit_callbacks(self):
455
455
  while self.completed_offset_commits:
@@ -878,8 +878,15 @@ class ConsumerCoordinator(BaseCoordinator):
878
878
  self.next_auto_commit_deadline = time.time() + self.config['retry_backoff_ms'] / 1000
879
879
  elif time.time() > self.next_auto_commit_deadline:
880
880
  self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
881
- self.commit_offsets_async(self._subscription.all_consumed_offsets(),
882
- self._commit_offsets_async_on_complete)
881
+ self._do_auto_commit_offsets_async()
882
+
883
+ def maybe_auto_commit_offsets_now(self):
884
+ if self.config['enable_auto_commit'] and not self.coordinator_unknown():
885
+ self._do_auto_commit_offsets_async()
886
+
887
+ def _do_auto_commit_offsets_async(self):
888
+ self.commit_offsets_async(self._subscription.all_consumed_offsets(),
889
+ self._commit_offsets_async_on_complete)
883
890
 
884
891
 
885
892
  class ConsumerCoordinatorMetrics(object):
kafka/producer/kafka.py CHANGED
@@ -22,6 +22,7 @@ from kafka.record.default_records import DefaultRecordBatchBuilder
22
22
  from kafka.record.legacy_records import LegacyRecordBatchBuilder
23
23
  from kafka.serializer import Serializer
24
24
  from kafka.structs import TopicPartition
25
+ from kafka.util import ensure_valid_topic_name
25
26
 
26
27
 
27
28
  log = logging.getLogger(__name__)
@@ -593,11 +594,15 @@ class KafkaProducer(object):
593
594
  Raises:
594
595
  KafkaTimeoutError: if unable to fetch topic metadata, or unable
595
596
  to obtain memory buffer prior to configured max_block_ms
597
+ TypeError: if topic is not a string
598
+ ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
599
+ AssertionError: if KafkaProducer is closed, or key and value are both None
596
600
  """
597
601
  assert not self._closed, 'KafkaProducer already closed!'
598
602
  assert value is not None or self.config['api_version'] >= (0, 8, 1), (
599
603
  'Null messages require kafka >= 0.8.1')
600
604
  assert not (value is None and key is None), 'Need at least one: key or value'
605
+ ensure_valid_topic_name(topic)
601
606
  key_bytes = value_bytes = None
602
607
  try:
603
608
  assigned_partition = None
kafka/protocol/admin.py CHANGED
@@ -737,7 +737,6 @@ DescribeConfigsResponse = [
737
737
  class DescribeLogDirsResponse_v0(Response):
738
738
  API_KEY = 35
739
739
  API_VERSION = 0
740
- FLEXIBLE_VERSION = True
741
740
  SCHEMA = Schema(
742
741
  ('throttle_time_ms', Int32),
743
742
  ('log_dirs', Array(
@@ -970,6 +969,7 @@ class AlterPartitionReassignmentsResponse_v0(Response):
970
969
  )),
971
970
  ("tags", TaggedFields)
972
971
  )
972
+ FLEXIBLE_VERSION = True
973
973
 
974
974
 
975
975
  class AlterPartitionReassignmentsRequest_v0(Request):
@@ -1017,6 +1017,7 @@ class ListPartitionReassignmentsResponse_v0(Response):
1017
1017
  )),
1018
1018
  ("tags", TaggedFields)
1019
1019
  )
1020
+ FLEXIBLE_VERSION = True
1020
1021
 
1021
1022
 
1022
1023
  class ListPartitionReassignmentsRequest_v0(Request):
kafka/protocol/api.py CHANGED
@@ -82,19 +82,15 @@ class Request(Struct):
82
82
  def to_object(self):
83
83
  return _to_object(self.SCHEMA, self)
84
84
 
85
- def build_request_header(self, correlation_id, client_id):
85
+ def build_header(self, correlation_id, client_id):
86
86
  if self.FLEXIBLE_VERSION:
87
87
  return RequestHeaderV2(self, correlation_id=correlation_id, client_id=client_id)
88
88
  return RequestHeader(self, correlation_id=correlation_id, client_id=client_id)
89
89
 
90
- def parse_response_header(self, read_buffer):
91
- if self.FLEXIBLE_VERSION:
92
- return ResponseHeaderV2.decode(read_buffer)
93
- return ResponseHeader.decode(read_buffer)
94
-
95
90
 
96
91
  @add_metaclass(abc.ABCMeta)
97
92
  class Response(Struct):
93
+ FLEXIBLE_VERSION = False
98
94
 
99
95
  @abc.abstractproperty
100
96
  def API_KEY(self):
@@ -114,6 +110,12 @@ class Response(Struct):
114
110
  def to_object(self):
115
111
  return _to_object(self.SCHEMA, self)
116
112
 
113
+ @classmethod
114
+ def parse_header(cls, read_buffer):
115
+ if cls.FLEXIBLE_VERSION:
116
+ return ResponseHeaderV2.decode(read_buffer)
117
+ return ResponseHeader.decode(read_buffer)
118
+
117
119
 
118
120
  def _to_object(schema, data):
119
121
  obj = {}
@@ -3,7 +3,7 @@ from __future__ import absolute_import
3
3
  from io import BytesIO
4
4
 
5
5
  from kafka.protocol.api import Request, Response
6
- from kafka.protocol.types import Array, Int16, Int32, Schema
6
+ from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Schema, TaggedFields
7
7
 
8
8
 
9
9
  class BaseApiVersionsResponse(Response):
@@ -61,6 +61,28 @@ class ApiVersionsResponse_v2(BaseApiVersionsResponse):
61
61
  SCHEMA = ApiVersionsResponse_v1.SCHEMA
62
62
 
63
63
 
64
+ class ApiVersionsResponse_v3(BaseApiVersionsResponse):
65
+ API_KEY = 18
66
+ API_VERSION = 3
67
+ SCHEMA = Schema(
68
+ ('error_code', Int16),
69
+ ('api_versions', CompactArray(
70
+ ('api_key', Int16),
71
+ ('min_version', Int16),
72
+ ('max_version', Int16),
73
+ ('_tagged_fields', TaggedFields))),
74
+ ('throttle_time_ms', Int32),
75
+ ('_tagged_fields', TaggedFields)
76
+ )
77
+ # Note: ApiVersions Response does not send FLEXIBLE_VERSION header!
78
+
79
+
80
+ class ApiVersionsResponse_v4(BaseApiVersionsResponse):
81
+ API_KEY = 18
82
+ API_VERSION = 4
83
+ SCHEMA = ApiVersionsResponse_v3.SCHEMA
84
+
85
+
64
86
  class ApiVersionsRequest_v0(Request):
65
87
  API_KEY = 18
66
88
  API_VERSION = 0
@@ -82,9 +104,31 @@ class ApiVersionsRequest_v2(Request):
82
104
  SCHEMA = ApiVersionsRequest_v1.SCHEMA
83
105
 
84
106
 
107
+ class ApiVersionsRequest_v3(Request):
108
+ API_KEY = 18
109
+ API_VERSION = 3
110
+ RESPONSE_TYPE = ApiVersionsResponse_v3
111
+ SCHEMA = Schema(
112
+ ('client_software_name', CompactString('utf-8')),
113
+ ('client_software_version', CompactString('utf-8')),
114
+ ('_tagged_fields', TaggedFields)
115
+ )
116
+ FLEXIBLE_VERSION = True
117
+
118
+
119
+ class ApiVersionsRequest_v4(Request):
120
+ API_KEY = 18
121
+ API_VERSION = 4
122
+ RESPONSE_TYPE = ApiVersionsResponse_v4
123
+ SCHEMA = ApiVersionsRequest_v3.SCHEMA
124
+ FLEXIBLE_VERSION = True
125
+
126
+
85
127
  ApiVersionsRequest = [
86
128
  ApiVersionsRequest_v0, ApiVersionsRequest_v1, ApiVersionsRequest_v2,
129
+ ApiVersionsRequest_v3, ApiVersionsRequest_v4,
87
130
  ]
88
131
  ApiVersionsResponse = [
89
132
  ApiVersionsResponse_v0, ApiVersionsResponse_v1, ApiVersionsResponse_v2,
133
+ ApiVersionsResponse_v3, ApiVersionsResponse_v4,
90
134
  ]
@@ -63,4 +63,6 @@ BROKER_API_VERSIONS = {
63
63
 
64
64
  (3, 9): {0: (0, 11), 1: (0, 17), 2: (0, 9), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 9), 9: (0, 9), 10: (0, 6), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 5), 23: (0, 4), 24: (0, 5), 25: (0, 4), 26: (0, 4), 27: (0, 1), 28: (0, 4), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 1), 61: (0, 0), 65: (0, 0), 66: (0, 1), 67: (0, 0), 68: (0, 0), 69: (0, 0)},
65
65
 
66
+ (4, 0): {0: (0, 12), 1: (4, 17), 2: (1, 10), 3: (0, 13), 8: (2, 9), 9: (1, 9), 10: (0, 6), 11: (2, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 6), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (2, 7), 20: (1, 6), 21: (0, 2), 22: (0, 5), 23: (2, 4), 24: (0, 5), 25: (0, 4), 26: (0, 5), 27: (1, 1), 28: (0, 5), 29: (1, 3), 30: (1, 3), 31: (1, 3), 32: (1, 4), 33: (0, 2), 34: (1, 2), 35: (1, 4), 36: (0, 2), 37: (0, 3), 38: (1, 3), 39: (1, 2), 40: (1, 2), 41: (1, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 55: (0, 2), 57: (0, 2), 60: (0, 2), 61: (0, 0), 64: (0, 0), 65: (0, 0), 66: (0, 1), 68: (0, 1), 69: (0, 1), 74: (0, 0), 75: (0, 0), 80: (0, 0), 81: (0, 0)},
67
+
66
68
  }
kafka/protocol/parser.py CHANGED
@@ -59,7 +59,7 @@ class KafkaProtocol(object):
59
59
  if correlation_id is None:
60
60
  correlation_id = self._next_correlation_id()
61
61
 
62
- header = request.build_request_header(correlation_id=correlation_id, client_id=self._client_id)
62
+ header = request.build_header(correlation_id=correlation_id, client_id=self._client_id)
63
63
  message = b''.join([header.encode(), request.encode()])
64
64
  size = Int32.encode(len(message))
65
65
  data = size + message
@@ -136,13 +136,14 @@ class KafkaProtocol(object):
136
136
  if not self.in_flight_requests:
137
137
  raise Errors.CorrelationIdError('No in-flight-request found for server response')
138
138
  (correlation_id, request) = self.in_flight_requests.popleft()
139
- response_header = request.parse_response_header(read_buffer)
139
+ response_type = request.RESPONSE_TYPE
140
+ response_header = response_type.parse_header(read_buffer)
140
141
  recv_correlation_id = response_header.correlation_id
141
142
  log.debug('Received correlation id: %d', recv_correlation_id)
142
143
  # 0.8.2 quirk
143
144
  if (recv_correlation_id == 0 and
144
145
  correlation_id != 0 and
145
- request.RESPONSE_TYPE is FindCoordinatorResponse[0] and
146
+ response_type is FindCoordinatorResponse[0] and
146
147
  (self._api_version == (0, 8, 2) or self._api_version is None)):
147
148
  log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse'
148
149
  ' Correlation ID does not match request. This'
@@ -156,15 +157,15 @@ class KafkaProtocol(object):
156
157
  % (correlation_id, recv_correlation_id))
157
158
 
158
159
  # decode response
159
- log.debug('Processing response %s', request.RESPONSE_TYPE.__name__)
160
+ log.debug('Processing response %s', response_type.__name__)
160
161
  try:
161
- response = request.RESPONSE_TYPE.decode(read_buffer)
162
+ response = response_type.decode(read_buffer)
162
163
  except ValueError:
163
164
  read_buffer.seek(0)
164
165
  buf = read_buffer.read()
165
166
  log.error('Response %d [ResponseType: %s Request: %s]:'
166
167
  ' Unable to decode %d-byte buffer: %r',
167
- correlation_id, request.RESPONSE_TYPE,
168
+ correlation_id, response_type,
168
169
  request, len(buf), buf)
169
170
  raise Errors.KafkaProtocolError('Unable to decode response')
170
171
 
kafka/util.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import absolute_import
2
2
 
3
3
  import binascii
4
+ import re
4
5
  import time
5
6
  import weakref
6
7
 
@@ -43,6 +44,29 @@ def timeout_ms_fn(timeout_ms, error_message):
43
44
  return inner_timeout_ms
44
45
 
45
46
 
47
+ # Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29
48
+ TOPIC_MAX_LENGTH = 249
49
+ TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$')
50
+
51
+ def ensure_valid_topic_name(topic):
52
+ """ Ensures that the topic name is valid according to the kafka source. """
53
+
54
+ # See Kafka Source:
55
+ # https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
56
+ if topic is None:
57
+ raise TypeError('All topics must not be None')
58
+ if not isinstance(topic, six.string_types):
59
+ raise TypeError('All topics must be strings')
60
+ if len(topic) == 0:
61
+ raise ValueError('All topics must be non-empty strings')
62
+ if topic == '.' or topic == '..':
63
+ raise ValueError('Topic name cannot be "." or ".."')
64
+ if len(topic) > TOPIC_MAX_LENGTH:
65
+ raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(TOPIC_MAX_LENGTH, topic))
66
+ if not TOPIC_LEGAL_CHARS.match(topic):
67
+ raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic))
68
+
69
+
46
70
  class WeakMethod(object):
47
71
  """
48
72
  Callable that weakly references a method and the object it is bound to. It
kafka/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.1.2'
1
+ __version__ = '2.1.3'
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: kafka-python
3
- Version: 2.1.2
3
+ Version: 2.1.3
4
4
  Summary: Pure Python client for Apache Kafka
5
5
  Author-email: Dana Powers <dana.powers@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/dpkp/kafka-python
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.9
21
21
  Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
24
25
  Classifier: Programming Language :: Python :: Implementation :: CPython
25
26
  Classifier: Programming Language :: Python :: Implementation :: PyPy
26
27
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -1,27 +1,27 @@
1
1
  kafka/__init__.py,sha256=4dvHKZAxmD_4tfJ5wGcRV2X78vPcm8vsUoqceULevjA,1077
2
- kafka/client_async.py,sha256=e9RsJXXPRajxODz5KtBAndiEqJytdP5xHWeb157l4xM,54921
3
- kafka/cluster.py,sha256=tFv8JQfloV6tJ4Yghp5gTXpvcJjL-kJNREVijCxal44,15828
2
+ kafka/client_async.py,sha256=RMoraXTtsHUox3G1iaJImZDFrXdTt3Y1Sd_TFSQEVMM,55215
3
+ kafka/cluster.py,sha256=X67LdVTmkyJalYZfagb_Ah1cpwK4xywd2gTUhHGIQRA,15865
4
4
  kafka/codec.py,sha256=8NZpnehzNrhSBIjzbPVSvyFbSeLAqEntE7BfVHu-_9I,10036
5
- kafka/conn.py,sha256=3DZ-Rv_OFvlQa6iaFkbiyrZkI9QCAp9k1HRRjPjM-jE,68080
5
+ kafka/conn.py,sha256=dG4qiocVGIjb8QZ5G_2RyUKFTK-v64ajyLsDQQH-rWU,69099
6
6
  kafka/errors.py,sha256=LBi6SMBV-4bkJsNJhEDuClfe0pJLUvc__bqGkRyVqX0,34550
7
7
  kafka/future.py,sha256=ZQStbfUYIPJRrgMfAWxxjrIRVxsw4WCtSR0J0bkyGno,2847
8
8
  kafka/socks5_wrapper.py,sha256=6woOaCTJXJ5e89_zdyW5BjOpyE4rCbYFH-kd-FeuPuk,9827
9
9
  kafka/structs.py,sha256=SJGzmLdV21jZyQ7247k0WFy16UiusgTHK3I-e4qzI-E,3058
10
- kafka/util.py,sha256=YvnY5HeXcg2k1sWSuH9xIC19D6OTeDWZPYBphjtICzA,2509
11
- kafka/version.py,sha256=m5qImnzcnIhayvILFVqEnXPYsN-vE0vxokygykKhRfw,22
10
+ kafka/util.py,sha256=LV6BlELC8-889FpWM1RECX25sccoVrY2U0r5dRZjLNo,3781
11
+ kafka/version.py,sha256=hgD1miBO_f3fboq1GKyV4DdK_igCLGJFnZRD7l9oNRs,22
12
12
  kafka/admin/__init__.py,sha256=S_XxqyyV480_yXhttK79XZqNAmZyXRjspd3SoqYykE8,720
13
13
  kafka/admin/acl_resource.py,sha256=ak_dUsSni4SyP0ORbSKenZpwTy0Ykxq3FSt_9XgLR8k,8265
14
- kafka/admin/client.py,sha256=BBHHoOEwswCeEVOQ7zj1JFybeDF7x2pQCpYh-gdhcVA,78933
14
+ kafka/admin/client.py,sha256=FlyXa0YR7VQoqO0dC9RxU4Gx56mlAE2homES-HSNmGs,78933
15
15
  kafka/admin/config_resource.py,sha256=_JZWN_Q7jbuTtq2kdfHxWyTt_jI1LI-xnVGsf6oYGyY,1039
16
16
  kafka/admin/new_partitions.py,sha256=rYSb7S6VL706ZauSmiN5J9GDsep0HYRmkkAZUgT2JIg,757
17
17
  kafka/admin/new_topic.py,sha256=fvezLP9JXumqX-nU27Fgo0tj4d85ybcJgKluQImm3-0,1306
18
18
  kafka/consumer/__init__.py,sha256=NDdvtyuJgFyQZahqL9i5sYXGP6rOMIXWwHQEaZ1fCcs,122
19
- kafka/consumer/fetcher.py,sha256=I_vm-qRqOo1X18H_J2uxjsg21ko6vlBUYS9_wDyxgVQ,59664
20
- kafka/consumer/group.py,sha256=z9GoDQZ90cQlH5dtnuI3BsMsREdtpY2ybpkYCxpCVK8,57718
21
- kafka/consumer/subscription_state.py,sha256=I_4SZR4mUBLqoxHj0DFnMu6Idj44eECb6_j6gPM-Dn0,21452
19
+ kafka/consumer/fetcher.py,sha256=FkpZ7xD8W8aYZWa1HwWMn50VUCMYstFJL50uVr6Qkyg,60735
20
+ kafka/consumer/group.py,sha256=zsI3q_jhoCUxf3TvyFztyxpFyyvQum1YwE0knF3akD0,58471
21
+ kafka/consumer/subscription_state.py,sha256=oRigrCJNVVk8fydSnOpyBgydCkok6nNgu7H9nTVok6k,21918
22
22
  kafka/coordinator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- kafka/coordinator/base.py,sha256=grtpvkeR_03GRxLUo71PEZCvIA4ZH1N92ggYMw_LXB4,48877
24
- kafka/coordinator/consumer.py,sha256=LJKZ7ZxjWBaG_wiWNMLZ2G59_baLS3sCB-HElEEPgH4,42708
23
+ kafka/coordinator/base.py,sha256=v0qsbclips65TXDBJWKPHWXsencmCLeXjoJdw_udkYo,49106
24
+ kafka/coordinator/consumer.py,sha256=1B-39ExFQkW3Lm6Dt_p-lKGk0JeVhLTQNAUkmI4KtQ0,42986
25
25
  kafka/coordinator/heartbeat.py,sha256=WJqZGnXHG7TTq1Is3D0mKDis-bBwWVZlSgQiUoZv1jU,2304
26
26
  kafka/coordinator/protocol.py,sha256=wTaIOnUVbj0CKXZ82FktZo-zMRvOCk3hdQAoHJ62e3I,1041
27
27
  kafka/coordinator/assignors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -61,15 +61,15 @@ kafka/partitioner/default.py,sha256=tW-RC1PWIPRDEbeEAaPTLn-00oiZnXoVouEk9AnYE4w,
61
61
  kafka/producer/__init__.py,sha256=i3Wxih0NHjmqCkRNE54ial8fBp9siqabUE6ZGyL6oX8,122
62
62
  kafka/producer/buffer.py,sha256=1ucTlZOQKBa37c_cKUNgFmHpO0P1WEQ9XDqTxmsOrG0,4370
63
63
  kafka/producer/future.py,sha256=CEUWEmYKeTMMPjP-SjSJY1RZ2QFn7ebcK0G0sSWx4xo,3039
64
- kafka/producer/kafka.py,sha256=Y6BexiE5G0-3AhvWQaDbeIB2a3PeYiElu67ymgOMllk,39947
64
+ kafka/producer/kafka.py,sha256=WLtcGXAj27Z-ts26RffPkoJuHSoPtwHugZ8SSbQudXQ,40268
65
65
  kafka/producer/record_accumulator.py,sha256=PtLmcTtmZWikeStmV5tFNH0ABIkUQT3SO2n8zVcZFcc,25155
66
66
  kafka/producer/sender.py,sha256=6vKfmPgzcjksFlT53n59so5KCewlqk_wov0i8rL-JWQ,21521
67
67
  kafka/protocol/__init__.py,sha256=T1RBBlTH3zze0Cr1RqemPD4Z1b3IUDRmLOBfZTsPgLs,1184
68
68
  kafka/protocol/abstract.py,sha256=uOnuf6D8OTkL31Tp2QXG3VlzDPHVELGzM_bpSVa-_iw,424
69
- kafka/protocol/admin.py,sha256=pXcxYADdI8aa9emwl6OYAAoAf6_PFVYvM8neHruo880,30692
70
- kafka/protocol/api.py,sha256=dPtYU1VPUd5nCzc5AfVgtRSEqZw2ejoTCxcjgjv2TNc,3786
71
- kafka/protocol/api_versions.py,sha256=guLhFqRbdAcJ4hIjA5o0UtjlaG9dN_9BeOq3uA9xg0I,2246
72
- kafka/protocol/broker_api_versions.py,sha256=lWOcGgPEVyhxAeVgs5koLQLGCAPo8w5M1xTCsqUzYBA,15778
69
+ kafka/protocol/admin.py,sha256=11zE9sVrb34QY6AwYVvvWiwg4iycnq9aDSONCiuE9bo,30720
70
+ kafka/protocol/api.py,sha256=ZI7DYb85UTL4BuhpwKGAyAKEv4Dl_y69AEW78M233lg,3813
71
+ kafka/protocol/api_versions.py,sha256=VC9pvorLM--BE2uw0SvpeeMQPfWmcOvTgDFigLuGuVM,3546
72
+ kafka/protocol/broker_api_versions.py,sha256=LA_pdbfsJClBxQPi01u5yVRLUIpZRUz6LiqhSsj8cgU,16523
73
73
  kafka/protocol/commit.py,sha256=-COlx8lTVCI6Zg4ZebDnsX4Wy_V69Kjw8V85FRd3Ics,8627
74
74
  kafka/protocol/fetch.py,sha256=dYk4GigzimbtsW3_AS1mai3Q9fgkdpjr4fyRvB5jZ_c,11089
75
75
  kafka/protocol/find_coordinator.py,sha256=sROaXxqAje2BSaNunh6QMTdVcR7uil5kz-woZqdg2BY,1697
@@ -79,7 +79,7 @@ kafka/protocol/list_offsets.py,sha256=3kvif8X-B2LBSpR3qwbkGYyJ0GLKbQdENDGpxWV0sc
79
79
  kafka/protocol/message.py,sha256=9wNwJvfl9bsrdk_YcxbmAFjgvwZ5R1EBLSif2KILg9s,7657
80
80
  kafka/protocol/metadata.py,sha256=FyelkBHhUbC2IFTS9uAv0iLLtmAb1N6EXT3vn-nlt1s,7313
81
81
  kafka/protocol/offset_for_leader_epoch.py,sha256=aunp-LMIuwcCsKwvgBZ8OcUhcgb0blaq5d3PAh22JOo,4304
82
- kafka/protocol/parser.py,sha256=UZtOtoHMaM98MG1-oSSJsxu-ZOwXMq6Z85qRDGZ19V8,6856
82
+ kafka/protocol/parser.py,sha256=OB3yebOp6JSQpl-5fEpV1_0SdAtYkiqIk6ffDIkHzu0,6859
83
83
  kafka/protocol/pickle.py,sha256=FGEv-1l1aXY3TogqzCwOS1gCNpEg6-xNLbrysqNdHcs,920
84
84
  kafka/protocol/produce.py,sha256=JDWCRY5B7eSL3vp0N977MIgYMrR2qxgrbUZrqQMlGWk,6540
85
85
  kafka/protocol/sasl_authenticate.py,sha256=HaFAHPRhCKgyGEoJ5LwGffcpMUBNCphgBgXCsITLho8,1150
@@ -108,7 +108,7 @@ kafka/vendor/enum34.py,sha256=-u-lxAiJMt6ru4Do7NUDY9OpeWkYJMksb2xengJawFE,31204
108
108
  kafka/vendor/selectors34.py,sha256=gxejLO4eXf8mRSGXaQiknPig3GdX1rtsZiYOQJVuAy8,20594
109
109
  kafka/vendor/six.py,sha256=lLBa9_HrANP5BMZ7twEzg1M3wofwPmXyptuWmHX0brY,34826
110
110
  kafka/vendor/socketpair.py,sha256=Fi3PoY1Okkppab720wFk1BhHXyjcw7hi5DwhqrYZH2Y,2737
111
- kafka_python-2.1.2.dist-info/METADATA,sha256=Fz6e2KUqnM3a4CGyL7r6WXW9YCxLkrF8QVfENC3nygk,9075
112
- kafka_python-2.1.2.dist-info/WHEEL,sha256=Kol19cahXavY536r5Aj6aAgK_6CmctrOu3bgNJMSNJA,109
113
- kafka_python-2.1.2.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
114
- kafka_python-2.1.2.dist-info/RECORD,,
111
+ kafka_python-2.1.3.dist-info/METADATA,sha256=Ol7WhdSgEXbYk_3kqyxi-oaMFAmQ3dyDhzp86zG2IJ8,9126
112
+ kafka_python-2.1.3.dist-info/WHEEL,sha256=o0rarkcOb9LpX8ZZLivH8WexZTjr4ngUzRfLSntlBA8,109
113
+ kafka_python-2.1.3.dist-info/top_level.txt,sha256=IivJz7l5WHdLNDT6RIiVAlhjQzYRwGqBBmKHZ7WjPeM,6
114
+ kafka_python-2.1.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (78.0.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any