PyPI - kafka-python - Versions diffs - 2.1.2__tar.gz → 2.1.3__tar.gz - Mend

kafka-python 2.1.2tar.gz → 2.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

{kafka_python-2.1.2 → kafka_python-2.1.3}/CHANGES.md RENAMED Viewed

@@ -1,3 +1,25 @@
+# 2.1.3 (Mar 25, 2025)
+Fixes
+* Fix crash when switching to closest compatible api_version in KafkaClient (#2567)
+* Fix maximum version to send an OffsetFetchRequest in KafkaAdminClient (#2563)
+* Return empty set from consumer.partitions_for_topic when topic not found (#2556)
+Improvements
+* KIP-511: Use ApiVersions v4 on initial connect w/ client_software_name + version (#2558)
+* KIP-74: Manage assigned partition order in consumer (#2562)
+* KIP-70: Auto-commit offsets on consumer.unsubscribe(), defer assignment changes to rejoin  (#2560)
+* Use SubscriptionType to track topics/pattern/user assignment (#2565)
+* Add optional timeout_ms kwarg to consumer.close() (#2564)
+* Move ensure_valid_topic_name to kafka.util; use in client and producer (#2561)
+Testing
+* Support KRaft / 4.0 brokers in tests (#2559)
+* Test older pythons against 4.0 broker
+Compatibility
+* Add python 3.13 to compatibility list
 # 2.1.2 (Mar 17, 2025)
 Fixes

{kafka_python-2.1.2 → kafka_python-2.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: kafka-python
-Version: 2.1.2
+Version: 2.1.3
 Summary: Pure Python client for Apache Kafka
 Author-email: Dana Powers <dana.powers@gmail.com>
 Project-URL: Homepage, https://github.com/dpkp/kafka-python
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Classifier: Topic :: Software Development :: Libraries :: Python Modules

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/admin/client.py RENAMED Viewed

@@ -1496,7 +1496,7 @@ class KafkaAdminClient(object):
             A message future
         """
         version = self._client.api_version(OffsetFetchRequest, max_version=5)
-        if version <= 3:
+        if version <= 5:
             if partitions is None:
                 if version <= 1:
                     raise ValueError(

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/client_async.py RENAMED Viewed

@@ -27,7 +27,7 @@ from kafka.metrics.stats import Avg, Count, Rate
 from kafka.metrics.stats.rate import TimeUnit
 from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS
 from kafka.protocol.metadata import MetadataRequest
-from kafka.util import Dict, WeakMethod
+from kafka.util import Dict, WeakMethod, ensure_valid_topic_name
 # Although this looks unused, it actually monkey-patches socket.socketpair()
 # and should be left in as long as we're using socket.socketpair() in this file
 from kafka.vendor import socketpair # noqa: F401
@@ -276,6 +276,7 @@ class KafkaClient(object):
             if compatible_version:
                 log.warning('Configured api_version %s not supported; using %s',
                             self.config['api_version'], compatible_version)
+                self.config['api_version'] = compatible_version
                 self._api_versions = BROKER_API_VERSIONS[compatible_version]
             else:
                 raise Errors.UnrecognizedBrokerVersion(self.config['api_version'])
@@ -909,7 +910,13 @@ class KafkaClient(object):
         Returns:
             Future: resolves after metadata request/response
+        Raises:
+            TypeError: if topic is not a string
+            ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
         """
+        ensure_valid_topic_name(topic)
         if topic in self._topics:
             return Future().success(set(self._topics))

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/cluster.py RENAMED Viewed

@@ -112,6 +112,7 @@ class ClusterMetadata(object):
         Returns:
             set: {partition (int), ...}
+            None if topic not found.
         """
         if topic not in self._partitions:
             return None

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/conn.py RENAMED Viewed

@@ -101,6 +101,10 @@ class BrokerConnection(object):
             server-side log entries that correspond to this client. Also
             submitted to GroupCoordinator for logging with respect to
             consumer group administration. Default: 'kafka-python-{version}'
+        client_software_name (str): Sent to kafka broker for KIP-511.
+            Default: 'kafka-python'
+        client_software_version (str): Sent to kafka broker for KIP-511.
+            Default: The kafka-python version (via kafka.version).
         reconnect_backoff_ms (int): The amount of time in milliseconds to
             wait before attempting to reconnect to a given host.
             Default: 50.
@@ -191,6 +195,8 @@ class BrokerConnection(object):
     DEFAULT_CONFIG = {
         'client_id': 'kafka-python-' + __version__,
+        'client_software_name': 'kafka-python',
+        'client_software_version': __version__,
         'node_id': 0,
         'request_timeout_ms': 30000,
         'reconnect_backoff_ms': 50,
@@ -242,7 +248,7 @@ class BrokerConnection(object):
         self._api_versions = None
         self._api_version = None
         self._check_version_idx = None
-        self._api_versions_idx = 2
+        self._api_versions_idx = 4 # version of ApiVersionsRequest to try on first connect
         self._throttle_time = None
         self._socks5_proxy = None
@@ -538,7 +544,14 @@ class BrokerConnection(object):
                 log.debug('%s: Using pre-configured api_version %s for ApiVersions', self, self._api_version)
                 return True
             elif self._check_version_idx is None:
-                request = ApiVersionsRequest[self._api_versions_idx]()
+                version = self._api_versions_idx
+                if version >= 3:
+                    request = ApiVersionsRequest[version](
+                        client_software_name=self.config['client_software_name'],
+                        client_software_version=self.config['client_software_version'],
+                        _tagged_fields={})
+                else:
+                    request = ApiVersionsRequest[version]()
                 future = Future()
                 response = self._send(request, blocking=True, request_timeout_ms=(self.config['api_version_auto_timeout_ms'] * 0.8))
                 response.add_callback(self._handle_api_versions_response, future)
@@ -573,11 +586,15 @@ class BrokerConnection(object):
     def _handle_api_versions_response(self, future, response):
         error_type = Errors.for_code(response.error_code)
-        # if error_type i UNSUPPORTED_VERSION: retry w/ latest version from response
         if error_type is not Errors.NoError:
             future.failure(error_type())
             if error_type is Errors.UnsupportedVersionError:
                 self._api_versions_idx -= 1
+                for api_key, min_version, max_version, *rest in response.api_versions:
+                    # If broker provides a lower max_version, skip to that
+                    if api_key == response.API_KEY:
+                        self._api_versions_idx = min(self._api_versions_idx, max_version)
+                        break
                 if self._api_versions_idx >= 0:
                     self._api_versions_future = None
                     self.state = ConnectionStates.API_VERSIONS_SEND
@@ -587,7 +604,7 @@ class BrokerConnection(object):
             return
         self._api_versions = dict([
             (api_key, (min_version, max_version))
-            for api_key, min_version, max_version in response.api_versions
+            for api_key, min_version, max_version, *rest in response.api_versions
         ])
         self._api_version = self._infer_broker_version_from_api_versions(self._api_versions)
         log.info('Broker version identified as %s', '.'.join(map(str, self._api_version)))

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/consumer/fetcher.py RENAMED Viewed

@@ -4,7 +4,6 @@ import collections
 import copy
 import itertools
 import logging
-import random
 import sys
 import time
@@ -57,7 +56,6 @@ class Fetcher(six.Iterator):
         'max_partition_fetch_bytes': 1048576,
         'max_poll_records': sys.maxsize,
         'check_crcs': True,
-        'iterator_refetch_records': 1,  # undocumented -- interface may change
         'metric_group_prefix': 'consumer',
         'retry_backoff_ms': 100,
         'enable_incremental_fetch_sessions': True,
@@ -380,10 +378,13 @@ class Fetcher(six.Iterator):
             # as long as the partition is still assigned
             position = self._subscriptions.assignment[tp].position
             if part.next_fetch_offset == position.offset:
-                part_records = part.take(max_records)
                 log.debug("Returning fetched records at offset %d for assigned"
                           " partition %s", position.offset, tp)
-                drained[tp].extend(part_records)
+                part_records = part.take(max_records)
+                # list.extend([]) is a noop, but because drained is a defaultdict
+                # we should avoid initializing the default list unless there are records
+                if part_records:
+                    drained[tp].extend(part_records)
                 # We want to increment subscription position if (1) we're using consumer.poll(),
                 # or (2) we didn't return any records (consumer iterator will update position
                 # when each message is yielded). There may be edge cases where we re-fetch records
@@ -562,13 +563,11 @@ class Fetcher(six.Iterator):
     def _fetchable_partitions(self):
         fetchable = self._subscriptions.fetchable_partitions()
         # do not fetch a partition if we have a pending fetch response to process
+        discard = {fetch.topic_partition for fetch in self._completed_fetches}
         current = self._next_partition_records
-        pending = copy.copy(self._completed_fetches)
         if current:
-            fetchable.discard(current.topic_partition)
-        for fetch in pending:
-            fetchable.discard(fetch.topic_partition)
-        return fetchable
+            discard.add(current.topic_partition)
+        return [tp for tp in fetchable if tp not in discard]
     def _create_fetch_requests(self):
         """Create fetch requests for all assigned partitions, grouped by node.
@@ -581,7 +580,7 @@ class Fetcher(six.Iterator):
         # create the fetch info as a dict of lists of partition info tuples
         # which can be passed to FetchRequest() via .items()
         version = self._client.api_version(FetchRequest, max_version=10)
-        fetchable = collections.defaultdict(dict)
+        fetchable = collections.defaultdict(collections.OrderedDict)
         for partition in self._fetchable_partitions():
             node_id = self._client.cluster.leader_for_partition(partition)
@@ -695,10 +694,7 @@ class Fetcher(six.Iterator):
                           for partition_data in partitions])
         metric_aggregator = FetchResponseMetricAggregator(self._sensors, partitions)
-        # randomized ordering should improve balance for short-lived consumers
-        random.shuffle(response.topics)
         for topic, partitions in response.topics:
-            random.shuffle(partitions)
             for partition_data in partitions:
                 tp = TopicPartition(topic, partition_data[0])
                 fetch_offset = fetch_offsets[tp]
@@ -733,8 +729,6 @@ class Fetcher(six.Iterator):
                           " since it is no longer fetchable", tp)
             elif error_type is Errors.NoError:
-                self._subscriptions.assignment[tp].highwater = highwater
                 # we are interested in this fetch only if the beginning
                 # offset (of the *request*) matches the current consumed position
                 # Note that the *response* may return a messageset that starts
@@ -748,30 +742,35 @@ class Fetcher(six.Iterator):
                     return None
                 records = MemoryRecords(completed_fetch.partition_data[-1])
-                if records.has_next():
-                    log.debug("Adding fetched record for partition %s with"
-                              " offset %d to buffered record list", tp,
-                              position.offset)
-                    parsed_records = self.PartitionRecords(fetch_offset, tp, records,
-                                                           self.config['key_deserializer'],
-                                                           self.config['value_deserializer'],
-                                                           self.config['check_crcs'],
-                                                           completed_fetch.metric_aggregator)
-                    return parsed_records
-                elif records.size_in_bytes() > 0:
-                    # we did not read a single message from a non-empty
-                    # buffer because that message's size is larger than
-                    # fetch size, in this case record this exception
-                    record_too_large_partitions = {tp: fetch_offset}
-                    raise RecordTooLargeError(
-                        "There are some messages at [Partition=Offset]: %s "
-                        " whose size is larger than the fetch size %s"
-                        " and hence cannot be ever returned."
-                        " Increase the fetch size, or decrease the maximum message"
-                        " size the broker will allow." % (
-                            record_too_large_partitions,
-                            self.config['max_partition_fetch_bytes']),
-                        record_too_large_partitions)
+                log.debug("Preparing to read %s bytes of data for partition %s with offset %d",
+                          records.size_in_bytes(), tp, fetch_offset)
+                parsed_records = self.PartitionRecords(fetch_offset, tp, records,
+                                                       self.config['key_deserializer'],
+                                                       self.config['value_deserializer'],
+                                                       self.config['check_crcs'],
+                                                       completed_fetch.metric_aggregator,
+                                                       self._on_partition_records_drain)
+                if not records.has_next() and records.size_in_bytes() > 0:
+                    if completed_fetch.response_version < 3:
+                        # Implement the pre KIP-74 behavior of throwing a RecordTooLargeException.
+                        record_too_large_partitions = {tp: fetch_offset}
+                        raise RecordTooLargeError(
+                            "There are some messages at [Partition=Offset]: %s "
+                            " whose size is larger than the fetch size %s"
+                            " and hence cannot be ever returned. Please condier upgrading your broker to 0.10.1.0 or"
+                            " newer to avoid this issue. Alternatively, increase the fetch size on the client (using"
+                            " max_partition_fetch_bytes)" % (
+                                record_too_large_partitions,
+                                self.config['max_partition_fetch_bytes']),
+                            record_too_large_partitions)
+                    else:
+                        # This should not happen with brokers that support FetchRequest/Response V3 or higher (i.e. KIP-74)
+                        raise Errors.KafkaError("Failed to make progress reading messages at %s=%s."
+                                                " Received a non-empty fetch response from the server, but no"
+                                                " complete records were found." % (tp, fetch_offset))
+                if highwater >= 0:
+                    self._subscriptions.assignment[tp].highwater = highwater
             elif error_type in (Errors.NotLeaderForPartitionError,
                                 Errors.ReplicaNotAvailableError,
@@ -805,14 +804,25 @@ class Fetcher(six.Iterator):
             if parsed_records is None:
                 completed_fetch.metric_aggregator.record(tp, 0, 0)
-        return None
+            if error_type is not Errors.NoError:
+                # we move the partition to the end if there was an error. This way, it's more likely that partitions for
+                # the same topic can remain together (allowing for more efficient serialization).
+                self._subscriptions.move_partition_to_end(tp)
+        return parsed_records
+    def _on_partition_records_drain(self, partition_records):
+        # we move the partition to the end if we received some bytes. This way, it's more likely that partitions
+        # for the same topic can remain together (allowing for more efficient serialization).
+        if partition_records.bytes_read > 0:
+            self._subscriptions.move_partition_to_end(partition_records.topic_partition)
     def close(self):
         if self._next_partition_records is not None:
             self._next_partition_records.drain()
     class PartitionRecords(object):
-        def __init__(self, fetch_offset, tp, records, key_deserializer, value_deserializer, check_crcs, metric_aggregator):
+        def __init__(self, fetch_offset, tp, records, key_deserializer, value_deserializer, check_crcs, metric_aggregator, on_drain):
             self.fetch_offset = fetch_offset
             self.topic_partition = tp
             self.leader_epoch = -1
@@ -824,6 +834,7 @@ class Fetcher(six.Iterator):
             self.record_iterator = itertools.dropwhile(
                 self._maybe_skip_record,
                 self._unpack_records(tp, records, key_deserializer, value_deserializer))
+            self.on_drain = on_drain
         def _maybe_skip_record(self, record):
             # When fetching an offset that is in the middle of a
@@ -845,6 +856,7 @@ class Fetcher(six.Iterator):
             if self.record_iterator is not None:
                 self.record_iterator = None
                 self.metric_aggregator.record(self.topic_partition, self.bytes_read, self.records_read)
+                self.on_drain(self)
         def take(self, n=None):
             return list(itertools.islice(self.record_iterator, 0, n))
@@ -943,6 +955,13 @@ class FetchSessionHandler(object):
         self.session_partitions = {}
     def build_next(self, next_partitions):
+        """
+        Arguments:
+            next_partitions (dict): TopicPartition -> TopicPartitionState
+        Returns:
+            FetchRequestData
+        """
         if self.next_metadata.is_full:
             log.debug("Built full fetch %s for node %s with %s partition(s).",
                 self.next_metadata, self.node_id, len(next_partitions))
@@ -965,8 +984,8 @@ class FetchSessionHandler(object):
                 altered.add(tp)
         log.debug("Built incremental fetch %s for node %s. Added %s, altered %s, removed %s out of %s",
-                self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys())
-        to_send = {tp: next_partitions[tp] for tp in (added | altered)}
+                  self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys())
+        to_send = collections.OrderedDict({tp: next_partitions[tp] for tp in next_partitions if tp in (added | altered)})
         return FetchRequestData(to_send, removed, self.next_metadata)
     def handle_response(self, response):
@@ -1106,18 +1125,11 @@ class FetchRequestData(object):
     @property
     def to_send(self):
         # Return as list of [(topic, [(partition, ...), ...]), ...]
-        # so it an be passed directly to encoder
+        # so it can be passed directly to encoder
         partition_data = collections.defaultdict(list)
         for tp, partition_info in six.iteritems(self._to_send):
             partition_data[tp.topic].append(partition_info)
-        # As of version == 3 partitions will be returned in order as
-        # they are requested, so to avoid starvation with
-        # `fetch_max_bytes` option we need this shuffle
-        # NOTE: we do have partition_data in random order due to usage
-        #       of unordered structures like dicts, but that does not
-        #       guarantee equal distribution, and starting in Python3.6
-        #       dicts retain insert order.
-        return random.sample(list(partition_data.items()), k=len(partition_data))
+        return list(partition_data.items())
     @property
     def to_forget(self):

{kafka_python-2.1.2 → kafka_python-2.1.3}/kafka/consumer/group.py RENAMED Viewed

@@ -444,8 +444,15 @@ class KafkaConsumer(six.Iterator):
             no rebalance operation triggered when group membership or cluster
             and topic metadata change.
         """
-        self._subscription.assign_from_user(partitions)
-        self._client.set_topics([tp.topic for tp in partitions])
+        if not partitions:
+            self.unsubscribe()
+        else:
+            # make sure the offsets of topic partitions the consumer is unsubscribing from
+            # are committed since there will be no following rebalance
+            self._coordinator.maybe_auto_commit_offsets_now()
+            self._subscription.assign_from_user(partitions)
+            self._client.set_topics([tp.topic for tp in partitions])
+            log.debug("Subscribed to partition(s): %s", partitions)
     def assignment(self):
         """Get the TopicPartitions currently assigned to this consumer.
@@ -463,19 +470,21 @@ class KafkaConsumer(six.Iterator):
         """
         return self._subscription.assigned_partitions()
-    def close(self, autocommit=True):
+    def close(self, autocommit=True, timeout_ms=None):
         """Close the consumer, waiting indefinitely for any needed cleanup.
         Keyword Arguments:
             autocommit (bool): If auto-commit is configured for this consumer,
                 this optional flag causes the consumer to attempt to commit any
                 pending consumed offsets prior to close. Default: True
+            timeout_ms (num, optional): Milliseconds to wait for auto-commit.
+                Default: None
         """
         if self._closed:
             return
         log.debug("Closing the KafkaConsumer.")
         self._closed = True
-        self._coordinator.close(autocommit=autocommit)
+        self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms)
         self._metrics.close()
         self._client.close()
         try:
@@ -634,7 +643,7 @@ class KafkaConsumer(six.Iterator):
         if partitions is None:
             self._fetch_all_topic_metadata()
             partitions = cluster.partitions_for_topic(topic)
-        return partitions
+        return partitions or set()
     def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
         """Fetch data from assigned topics / partitions.
@@ -959,8 +968,11 @@ class KafkaConsumer(six.Iterator):
     def unsubscribe(self):
         """Unsubscribe from all topics and clear all assigned partitions."""
+        # make sure the offsets of topic partitions the consumer is unsubscribing from
+        # are committed since there will be no following rebalance
+        self._coordinator.maybe_auto_commit_offsets_now()
         self._subscription.unsubscribe()
-        self._coordinator.close()
+        self._coordinator.maybe_leave_group()
         self._client.cluster.need_all_topic_metadata = False
         self._client.set_topics([])
         log.debug("Unsubscribed all topics or patterns and assigned partitions")

kafka-python 2.1.2__tar.gz → 2.1.3__tar.gz

kafka-python 2.1.2tar.gz → 2.1.3tar.gz