kafka-python 2.1.1__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {kafka_python-2.1.1 → kafka_python-2.1.2}/CHANGES.md +19 -0
  2. {kafka_python-2.1.1 → kafka_python-2.1.2}/PKG-INFO +1 -1
  3. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/consumer/fetcher.py +156 -211
  4. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/consumer/group.py +9 -13
  5. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/consumer/subscription_state.py +1 -7
  6. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/compound_stat.py +2 -2
  7. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/measurable_stat.py +2 -1
  8. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/metrics_reporter.py +3 -2
  9. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stat.py +3 -2
  10. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/sampled_stat.py +2 -2
  11. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/abstract.py +3 -2
  12. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/api.py +4 -3
  13. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/abc.py +22 -4
  14. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/default_records.py +17 -9
  15. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/legacy_records.py +33 -10
  16. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/memory_records.py +6 -2
  17. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/abc.py +3 -2
  18. kafka_python-2.1.2/kafka/version.py +1 -0
  19. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka_python.egg-info/PKG-INFO +1 -1
  20. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_fetcher.py +46 -50
  21. kafka_python-2.1.1/kafka/version.py +0 -1
  22. {kafka_python-2.1.1 → kafka_python-2.1.2}/AUTHORS.md +0 -0
  23. {kafka_python-2.1.1 → kafka_python-2.1.2}/LICENSE +0 -0
  24. {kafka_python-2.1.1 → kafka_python-2.1.2}/MANIFEST.in +0 -0
  25. {kafka_python-2.1.1 → kafka_python-2.1.2}/README.rst +0 -0
  26. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/__init__.py +0 -0
  27. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/__init__.py +0 -0
  28. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/acl_resource.py +0 -0
  29. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/client.py +0 -0
  30. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/config_resource.py +0 -0
  31. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/new_partitions.py +0 -0
  32. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/admin/new_topic.py +0 -0
  33. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/client_async.py +0 -0
  34. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/cluster.py +0 -0
  35. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/codec.py +0 -0
  36. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/conn.py +0 -0
  37. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/consumer/__init__.py +0 -0
  38. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/__init__.py +0 -0
  39. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/__init__.py +0 -0
  40. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/abstract.py +0 -0
  41. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/range.py +0 -0
  42. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/roundrobin.py +0 -0
  43. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/sticky/__init__.py +0 -0
  44. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/sticky/partition_movements.py +0 -0
  45. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/sticky/sorted_set.py +0 -0
  46. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/assignors/sticky/sticky_assignor.py +0 -0
  47. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/base.py +0 -0
  48. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/consumer.py +0 -0
  49. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/heartbeat.py +0 -0
  50. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/coordinator/protocol.py +0 -0
  51. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/errors.py +0 -0
  52. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/future.py +0 -0
  53. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/__init__.py +0 -0
  54. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/dict_reporter.py +0 -0
  55. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/kafka_metric.py +0 -0
  56. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/measurable.py +0 -0
  57. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/metric_config.py +0 -0
  58. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/metric_name.py +0 -0
  59. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/metrics.py +0 -0
  60. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/quota.py +0 -0
  61. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/__init__.py +0 -0
  62. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/avg.py +0 -0
  63. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/count.py +0 -0
  64. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/histogram.py +0 -0
  65. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/max_stat.py +0 -0
  66. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/min_stat.py +0 -0
  67. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/percentile.py +0 -0
  68. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/percentiles.py +0 -0
  69. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/rate.py +0 -0
  70. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/sensor.py +0 -0
  71. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/metrics/stats/total.py +0 -0
  72. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/partitioner/__init__.py +0 -0
  73. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/partitioner/default.py +0 -0
  74. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/__init__.py +0 -0
  75. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/buffer.py +0 -0
  76. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/future.py +0 -0
  77. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/kafka.py +0 -0
  78. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/record_accumulator.py +0 -0
  79. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/producer/sender.py +0 -0
  80. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/__init__.py +0 -0
  81. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/admin.py +0 -0
  82. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/api_versions.py +0 -0
  83. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/broker_api_versions.py +0 -0
  84. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/commit.py +0 -0
  85. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/fetch.py +0 -0
  86. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/find_coordinator.py +0 -0
  87. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/frame.py +0 -0
  88. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/group.py +0 -0
  89. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/list_offsets.py +0 -0
  90. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/message.py +0 -0
  91. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/metadata.py +0 -0
  92. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/offset_for_leader_epoch.py +0 -0
  93. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/parser.py +0 -0
  94. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/pickle.py +0 -0
  95. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/produce.py +0 -0
  96. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/sasl_authenticate.py +0 -0
  97. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/sasl_handshake.py +0 -0
  98. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/struct.py +0 -0
  99. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/protocol/types.py +0 -0
  100. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/__init__.py +0 -0
  101. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/_crc32c.py +0 -0
  102. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/record/util.py +0 -0
  103. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/__init__.py +0 -0
  104. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/gssapi.py +0 -0
  105. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/msk.py +0 -0
  106. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/oauth.py +0 -0
  107. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/plain.py +0 -0
  108. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/scram.py +0 -0
  109. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/sasl/sspi.py +0 -0
  110. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/serializer/__init__.py +0 -0
  111. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/serializer/abstract.py +0 -0
  112. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/socks5_wrapper.py +0 -0
  113. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/structs.py +0 -0
  114. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/util.py +0 -0
  115. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/vendor/__init__.py +0 -0
  116. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/vendor/enum34.py +0 -0
  117. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/vendor/selectors34.py +0 -0
  118. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/vendor/six.py +0 -0
  119. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka/vendor/socketpair.py +0 -0
  120. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka_python.egg-info/SOURCES.txt +0 -0
  121. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka_python.egg-info/dependency_links.txt +0 -0
  122. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka_python.egg-info/requires.txt +0 -0
  123. {kafka_python-2.1.1 → kafka_python-2.1.2}/kafka_python.egg-info/top_level.txt +0 -0
  124. {kafka_python-2.1.1 → kafka_python-2.1.2}/pyproject.toml +0 -0
  125. {kafka_python-2.1.1 → kafka_python-2.1.2}/setup.cfg +0 -0
  126. {kafka_python-2.1.1 → kafka_python-2.1.2}/setup.py +0 -0
  127. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_acl_comparisons.py +0 -0
  128. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_admin.py +0 -0
  129. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_admin_integration.py +0 -0
  130. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_api_object_implementation.py +0 -0
  131. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_assignors.py +0 -0
  132. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_client_async.py +0 -0
  133. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_cluster.py +0 -0
  134. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_codec.py +0 -0
  135. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_conn.py +0 -0
  136. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_consumer.py +0 -0
  137. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_consumer_group.py +0 -0
  138. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_consumer_integration.py +0 -0
  139. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_coordinator.py +0 -0
  140. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_metrics.py +0 -0
  141. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_object_conversion.py +0 -0
  142. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_package.py +0 -0
  143. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_partition_movements.py +0 -0
  144. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_partitioner.py +0 -0
  145. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_producer.py +0 -0
  146. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_protocol.py +0 -0
  147. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_sasl_integration.py +0 -0
  148. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_sender.py +0 -0
  149. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/test_subscription_state.py +0 -0
  150. {kafka_python-2.1.1 → kafka_python-2.1.2}/test/testutil.py +0 -0
@@ -1,3 +1,22 @@
1
+ # 2.1.2 (Mar 17, 2025)
2
+
3
+ Fixes
4
+ * Simplify consumer.poll send fetches logic
5
+ * Fix crc validation in consumer / fetcher
6
+ * Lazy `_unpack_records` in PartitionRecords to fix premature fetch offset advance in consumer.poll() (#2555)
7
+ * Debug log fetch records return; separate offsets update log
8
+ * Fix Fetcher retriable error handling (#2554)
9
+ * Use six.add_metaclass for py2/py3 compatible abc (#2551)
10
+
11
+ Improvements
12
+ * Add FetchMetrics class; move topic_fetch_metrics inside aggregator
13
+ * DefaultRecordsBatchBuilder: support empty batch
14
+ * MemoryRecordsBuilder: support arbitrary offset, skipping offsets
15
+ * Add record.validate_crc() for v0/v1 crc checks
16
+ * Remove fetcher message_generator / iterator interface
17
+ * Add size_in_bytes to ABCRecordBatch and implement for Legacy and Default
18
+ * Add magic property to ABCRecord and implement for LegacyRecord
19
+
1
20
  # 2.1.1 (Mar 16, 2025)
2
21
 
3
22
  Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kafka-python
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: Pure Python client for Apache Kafka
5
5
  Author-email: Dana Powers <dana.powers@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/dpkp/kafka-python
@@ -2,6 +2,7 @@ from __future__ import absolute_import, division
2
2
 
3
3
  import collections
4
4
  import copy
5
+ import itertools
5
6
  import logging
6
7
  import random
7
8
  import sys
@@ -363,176 +364,50 @@ class Fetcher(six.Iterator):
363
364
  return 0
364
365
 
365
366
  tp = part.topic_partition
366
- fetch_offset = part.fetch_offset
367
367
  if not self._subscriptions.is_assigned(tp):
368
368
  # this can happen when a rebalance happened before
369
369
  # fetched records are returned to the consumer's poll call
370
370
  log.debug("Not returning fetched records for partition %s"
371
371
  " since it is no longer assigned", tp)
372
+ elif not self._subscriptions.is_fetchable(tp):
373
+ # this can happen when a partition is paused before
374
+ # fetched records are returned to the consumer's poll call
375
+ log.debug("Not returning fetched records for assigned partition"
376
+ " %s since it is no longer fetchable", tp)
377
+
372
378
  else:
373
379
  # note that the position should always be available
374
380
  # as long as the partition is still assigned
375
381
  position = self._subscriptions.assignment[tp].position
376
- if not self._subscriptions.is_fetchable(tp):
377
- # this can happen when a partition is paused before
378
- # fetched records are returned to the consumer's poll call
379
- log.debug("Not returning fetched records for assigned partition"
380
- " %s since it is no longer fetchable", tp)
381
-
382
- elif fetch_offset == position.offset:
383
- # we are ensured to have at least one record since we already checked for emptiness
382
+ if part.next_fetch_offset == position.offset:
384
383
  part_records = part.take(max_records)
385
- next_offset = part_records[-1].offset + 1
386
- leader_epoch = part_records[-1].leader_epoch
387
-
388
- log.log(0, "Returning fetched records at offset %d for assigned"
389
- " partition %s and update position to %s (leader epoch %s)", position.offset,
390
- tp, next_offset, leader_epoch)
391
-
392
- for record in part_records:
393
- drained[tp].append(record)
394
-
395
- if update_offsets:
384
+ log.debug("Returning fetched records at offset %d for assigned"
385
+ " partition %s", position.offset, tp)
386
+ drained[tp].extend(part_records)
387
+ # We want to increment subscription position if (1) we're using consumer.poll(),
388
+ # or (2) we didn't return any records (consumer iterator will update position
389
+ # when each message is yielded). There may be edge cases where we re-fetch records
390
+ # that we'll end up skipping, but for now we'll live with that.
391
+ highwater = self._subscriptions.assignment[tp].highwater
392
+ if highwater is not None:
393
+ self._sensors.records_fetch_lag.record(highwater - part.next_fetch_offset)
394
+ if update_offsets or not part_records:
396
395
  # TODO: save leader_epoch
397
- self._subscriptions.assignment[tp].position = OffsetAndMetadata(next_offset, '', -1)
396
+ log.debug("Updating fetch position for assigned partition %s to %s (leader epoch %s)",
397
+ tp, part.next_fetch_offset, part.leader_epoch)
398
+ self._subscriptions.assignment[tp].position = OffsetAndMetadata(part.next_fetch_offset, '', -1)
398
399
  return len(part_records)
399
400
 
400
401
  else:
401
402
  # these records aren't next in line based on the last consumed
402
403
  # position, ignore them they must be from an obsolete request
403
404
  log.debug("Ignoring fetched records for %s at offset %s since"
404
- " the current position is %d", tp, part.fetch_offset,
405
+ " the current position is %d", tp, part.next_fetch_offset,
405
406
  position.offset)
406
407
 
407
- part.discard()
408
+ part.drain()
408
409
  return 0
409
410
 
410
- def _message_generator(self):
411
- """Iterate over fetched_records"""
412
- while self._next_partition_records or self._completed_fetches:
413
-
414
- if not self._next_partition_records:
415
- completion = self._completed_fetches.popleft()
416
- self._next_partition_records = self._parse_fetched_data(completion)
417
- continue
418
-
419
- # Send additional FetchRequests when the internal queue is low
420
- # this should enable moderate pipelining
421
- if len(self._completed_fetches) <= self.config['iterator_refetch_records']:
422
- self.send_fetches()
423
-
424
- tp = self._next_partition_records.topic_partition
425
-
426
- # We can ignore any prior signal to drop pending record batches
427
- # because we are starting from a fresh one where fetch_offset == position
428
- # i.e., the user seek()'d to this position
429
- self._subscriptions.assignment[tp].drop_pending_record_batch = False
430
-
431
- for msg in self._next_partition_records.take():
432
-
433
- # Because we are in a generator, it is possible for
434
- # subscription state to change between yield calls
435
- # so we need to re-check on each loop
436
- # this should catch assignment changes, pauses
437
- # and resets via seek_to_beginning / seek_to_end
438
- if not self._subscriptions.is_fetchable(tp):
439
- log.debug("Not returning fetched records for partition %s"
440
- " since it is no longer fetchable", tp)
441
- self._next_partition_records = None
442
- break
443
-
444
- # If there is a seek during message iteration,
445
- # we should stop unpacking this record batch and
446
- # wait for a new fetch response that aligns with the
447
- # new seek position
448
- elif self._subscriptions.assignment[tp].drop_pending_record_batch:
449
- log.debug("Skipping remainder of record batch for partition %s", tp)
450
- self._subscriptions.assignment[tp].drop_pending_record_batch = False
451
- self._next_partition_records = None
452
- break
453
-
454
- # Compressed messagesets may include earlier messages
455
- elif msg.offset < self._subscriptions.assignment[tp].position.offset:
456
- log.debug("Skipping message offset: %s (expecting %s)",
457
- msg.offset,
458
- self._subscriptions.assignment[tp].position.offset)
459
- continue
460
-
461
- self._subscriptions.assignment[tp].position = OffsetAndMetadata(msg.offset + 1, '', -1)
462
- yield msg
463
-
464
- self._next_partition_records = None
465
-
466
- def _unpack_records(self, tp, records):
467
- try:
468
- batch = records.next_batch()
469
- while batch is not None:
470
-
471
- # Try DefaultsRecordBatch / message log format v2
472
- # base_offset, last_offset_delta, and control batches
473
- try:
474
- batch_offset = batch.base_offset + batch.last_offset_delta
475
- leader_epoch = batch.leader_epoch
476
- self._subscriptions.assignment[tp].last_offset_from_record_batch = batch_offset
477
- # Control batches have a single record indicating whether a transaction
478
- # was aborted or committed.
479
- # When isolation_level is READ_COMMITTED (currently unsupported)
480
- # we should also skip all messages from aborted transactions
481
- # For now we only support READ_UNCOMMITTED and so we ignore the
482
- # abort/commit signal.
483
- if batch.is_control_batch:
484
- batch = records.next_batch()
485
- continue
486
- except AttributeError:
487
- leader_epoch = -1
488
- pass
489
-
490
- for record in batch:
491
- key_size = len(record.key) if record.key is not None else -1
492
- value_size = len(record.value) if record.value is not None else -1
493
- key = self._deserialize(
494
- self.config['key_deserializer'],
495
- tp.topic, record.key)
496
- value = self._deserialize(
497
- self.config['value_deserializer'],
498
- tp.topic, record.value)
499
- headers = record.headers
500
- header_size = sum(
501
- len(h_key.encode("utf-8")) + (len(h_val) if h_val is not None else 0) for h_key, h_val in
502
- headers) if headers else -1
503
- yield ConsumerRecord(
504
- tp.topic, tp.partition, leader_epoch, record.offset, record.timestamp,
505
- record.timestamp_type, key, value, headers, record.checksum,
506
- key_size, value_size, header_size)
507
-
508
- batch = records.next_batch()
509
-
510
- # If unpacking raises StopIteration, it is erroneously
511
- # caught by the generator. We want all exceptions to be raised
512
- # back to the user. See Issue 545
513
- except StopIteration:
514
- log.exception('StopIteration raised unpacking messageset')
515
- raise RuntimeError('StopIteration raised unpacking messageset')
516
-
517
- def __iter__(self): # pylint: disable=non-iterator-returned
518
- return self
519
-
520
- def __next__(self):
521
- if not self._iterator:
522
- self._iterator = self._message_generator()
523
- try:
524
- return next(self._iterator)
525
- except StopIteration:
526
- self._iterator = None
527
- raise
528
-
529
- def _deserialize(self, f, topic, bytes_):
530
- if not f:
531
- return bytes_
532
- if isinstance(f, Deserializer):
533
- return f.deserialize(topic, bytes_)
534
- return f(bytes_)
535
-
536
411
  def _send_list_offsets_requests(self, timestamps):
537
412
  """Fetch offsets for each partition in timestamps dict. This may send
538
413
  request to multiple nodes, based on who is Leader for partition.
@@ -711,16 +586,6 @@ class Fetcher(six.Iterator):
711
586
  for partition in self._fetchable_partitions():
712
587
  node_id = self._client.cluster.leader_for_partition(partition)
713
588
 
714
- # advance position for any deleted compacted messages if required
715
- if self._subscriptions.assignment[partition].last_offset_from_record_batch:
716
- next_offset_from_batch_header = self._subscriptions.assignment[partition].last_offset_from_record_batch + 1
717
- if next_offset_from_batch_header > self._subscriptions.assignment[partition].position.offset:
718
- log.debug(
719
- "Advance position for partition %s from %s to %s (last record batch location plus one)"
720
- " to correct for deleted compacted messages and/or transactional control records",
721
- partition, self._subscriptions.assignment[partition].position.offset, next_offset_from_batch_header)
722
- self._subscriptions.assignment[partition].position = OffsetAndMetadata(next_offset_from_batch_header, '', -1)
723
-
724
589
  position = self._subscriptions.assignment[partition].position
725
590
 
726
591
  # fetch if there is a leader and no in-flight requests
@@ -856,12 +721,9 @@ class Fetcher(six.Iterator):
856
721
  def _parse_fetched_data(self, completed_fetch):
857
722
  tp = completed_fetch.topic_partition
858
723
  fetch_offset = completed_fetch.fetched_offset
859
- num_bytes = 0
860
- records_count = 0
861
- parsed_records = None
862
-
863
724
  error_code, highwater = completed_fetch.partition_data[:2]
864
725
  error_type = Errors.for_code(error_code)
726
+ parsed_records = None
865
727
 
866
728
  try:
867
729
  if not self._subscriptions.is_fetchable(tp):
@@ -890,13 +752,12 @@ class Fetcher(six.Iterator):
890
752
  log.debug("Adding fetched record for partition %s with"
891
753
  " offset %d to buffered record list", tp,
892
754
  position.offset)
893
- unpacked = list(self._unpack_records(tp, records))
894
- parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked)
895
- if unpacked:
896
- last_offset = unpacked[-1].offset
897
- self._sensors.records_fetch_lag.record(highwater - last_offset)
898
- num_bytes = records.valid_bytes()
899
- records_count = len(unpacked)
755
+ parsed_records = self.PartitionRecords(fetch_offset, tp, records,
756
+ self.config['key_deserializer'],
757
+ self.config['value_deserializer'],
758
+ self.config['check_crcs'],
759
+ completed_fetch.metric_aggregator)
760
+ return parsed_records
900
761
  elif records.size_in_bytes() > 0:
901
762
  # we did not read a single message from a non-empty
902
763
  # buffer because that message's size is larger than
@@ -911,7 +772,6 @@ class Fetcher(six.Iterator):
911
772
  record_too_large_partitions,
912
773
  self.config['max_partition_fetch_bytes']),
913
774
  record_too_large_partitions)
914
- self._sensors.record_topic_fetch_metrics(tp.topic, num_bytes, records_count)
915
775
 
916
776
  elif error_type in (Errors.NotLeaderForPartitionError,
917
777
  Errors.ReplicaNotAvailableError,
@@ -934,60 +794,133 @@ class Fetcher(six.Iterator):
934
794
  elif error_type is Errors.TopicAuthorizationFailedError:
935
795
  log.warning("Not authorized to read from topic %s.", tp.topic)
936
796
  raise Errors.TopicAuthorizationFailedError(set([tp.topic]))
937
- elif error_type.is_retriable:
797
+ elif getattr(error_type, 'retriable', False):
938
798
  log.debug("Retriable error fetching partition %s: %s", tp, error_type())
939
- if error_type.invalid_metadata:
799
+ if getattr(error_type, 'invalid_metadata', False):
940
800
  self._client.cluster.request_update()
941
801
  else:
942
802
  raise error_type('Unexpected error while fetching data')
943
803
 
944
804
  finally:
945
- completed_fetch.metric_aggregator.record(tp, num_bytes, records_count)
805
+ if parsed_records is None:
806
+ completed_fetch.metric_aggregator.record(tp, 0, 0)
807
+
808
+ return None
946
809
 
947
- return parsed_records
810
+ def close(self):
811
+ if self._next_partition_records is not None:
812
+ self._next_partition_records.drain()
948
813
 
949
814
  class PartitionRecords(object):
950
- def __init__(self, fetch_offset, tp, messages):
815
+ def __init__(self, fetch_offset, tp, records, key_deserializer, value_deserializer, check_crcs, metric_aggregator):
951
816
  self.fetch_offset = fetch_offset
952
817
  self.topic_partition = tp
953
- self.messages = messages
818
+ self.leader_epoch = -1
819
+ self.next_fetch_offset = fetch_offset
820
+ self.bytes_read = 0
821
+ self.records_read = 0
822
+ self.metric_aggregator = metric_aggregator
823
+ self.check_crcs = check_crcs
824
+ self.record_iterator = itertools.dropwhile(
825
+ self._maybe_skip_record,
826
+ self._unpack_records(tp, records, key_deserializer, value_deserializer))
827
+
828
+ def _maybe_skip_record(self, record):
954
829
  # When fetching an offset that is in the middle of a
955
830
  # compressed batch, we will get all messages in the batch.
956
831
  # But we want to start 'take' at the fetch_offset
957
832
  # (or the next highest offset in case the message was compacted)
958
- for i, msg in enumerate(messages):
959
- if msg.offset < fetch_offset:
960
- log.debug("Skipping message offset: %s (expecting %s)",
961
- msg.offset, fetch_offset)
962
- else:
963
- self.message_idx = i
964
- break
965
-
833
+ if record.offset < self.fetch_offset:
834
+ log.debug("Skipping message offset: %s (expecting %s)",
835
+ record.offset, self.fetch_offset)
836
+ return True
966
837
  else:
967
- self.message_idx = 0
968
- self.messages = None
838
+ return False
969
839
 
970
- # For truthiness evaluation we need to define __len__ or __nonzero__
971
- def __len__(self):
972
- if self.messages is None or self.message_idx >= len(self.messages):
973
- return 0
974
- return len(self.messages) - self.message_idx
840
+ # For truthiness evaluation
841
+ def __bool__(self):
842
+ return self.record_iterator is not None
975
843
 
976
- def discard(self):
977
- self.messages = None
844
+ def drain(self):
845
+ if self.record_iterator is not None:
846
+ self.record_iterator = None
847
+ self.metric_aggregator.record(self.topic_partition, self.bytes_read, self.records_read)
978
848
 
979
849
  def take(self, n=None):
980
- if not len(self):
981
- return []
982
- if n is None or n > len(self):
983
- n = len(self)
984
- next_idx = self.message_idx + n
985
- res = self.messages[self.message_idx:next_idx]
986
- self.message_idx = next_idx
987
- # fetch_offset should be incremented by 1 to parallel the
988
- # subscription position (also incremented by 1)
989
- self.fetch_offset = max(self.fetch_offset, res[-1].offset + 1)
990
- return res
850
+ return list(itertools.islice(self.record_iterator, 0, n))
851
+
852
+ def _unpack_records(self, tp, records, key_deserializer, value_deserializer):
853
+ try:
854
+ batch = records.next_batch()
855
+ last_batch = None
856
+ while batch is not None:
857
+ last_batch = batch
858
+
859
+ if self.check_crcs and not batch.validate_crc():
860
+ raise Errors.CorruptRecordException(
861
+ "Record batch for partition %s at offset %s failed crc check" % (
862
+ self.topic_partition, batch.base_offset))
863
+
864
+ # Try DefaultsRecordBatch / message log format v2
865
+ # base_offset, last_offset_delta, and control batches
866
+ if batch.magic == 2:
867
+ self.leader_epoch = batch.leader_epoch
868
+ # Control batches have a single record indicating whether a transaction
869
+ # was aborted or committed.
870
+ # When isolation_level is READ_COMMITTED (currently unsupported)
871
+ # we should also skip all messages from aborted transactions
872
+ # For now we only support READ_UNCOMMITTED and so we ignore the
873
+ # abort/commit signal.
874
+ if batch.is_control_batch:
875
+ self.next_fetch_offset = next(batch).offset + 1
876
+ batch = records.next_batch()
877
+ continue
878
+
879
+ for record in batch:
880
+ if self.check_crcs and not record.validate_crc():
881
+ raise Errors.CorruptRecordException(
882
+ "Record for partition %s at offset %s failed crc check" % (
883
+ self.topic_partition, record.offset))
884
+ key_size = len(record.key) if record.key is not None else -1
885
+ value_size = len(record.value) if record.value is not None else -1
886
+ key = self._deserialize(key_deserializer, tp.topic, record.key)
887
+ value = self._deserialize(value_deserializer, tp.topic, record.value)
888
+ headers = record.headers
889
+ header_size = sum(
890
+ len(h_key.encode("utf-8")) + (len(h_val) if h_val is not None else 0) for h_key, h_val in
891
+ headers) if headers else -1
892
+ self.records_read += 1
893
+ self.bytes_read += record.size_in_bytes
894
+ self.next_fetch_offset = record.offset + 1
895
+ yield ConsumerRecord(
896
+ tp.topic, tp.partition, self.leader_epoch, record.offset, record.timestamp,
897
+ record.timestamp_type, key, value, headers, record.checksum,
898
+ key_size, value_size, header_size)
899
+
900
+ batch = records.next_batch()
901
+ else:
902
+ # Message format v2 preserves the last offset in a batch even if the last record is removed
903
+ # through compaction. By using the next offset computed from the last offset in the batch,
904
+ # we ensure that the offset of the next fetch will point to the next batch, which avoids
905
+ # unnecessary re-fetching of the same batch (in the worst case, the consumer could get stuck
906
+ # fetching the same batch repeatedly).
907
+ if last_batch and last_batch.magic == 2:
908
+ self.next_fetch_offset = last_batch.base_offset + last_batch.last_offset_delta + 1
909
+ self.drain()
910
+
911
+ # If unpacking raises StopIteration, it is erroneously
912
+ # caught by the generator. We want all exceptions to be raised
913
+ # back to the user. See Issue 545
914
+ except StopIteration:
915
+ log.exception('StopIteration raised unpacking messageset')
916
+ raise RuntimeError('StopIteration raised unpacking messageset')
917
+
918
+ def _deserialize(self, f, topic, bytes_):
919
+ if not f:
920
+ return bytes_
921
+ if isinstance(f, Deserializer):
922
+ return f.deserialize(topic, bytes_)
923
+ return f(bytes_)
991
924
 
992
925
 
993
926
  class FetchSessionHandler(object):
@@ -1196,6 +1129,14 @@ class FetchRequestData(object):
1196
1129
  return list(partition_data.items())
1197
1130
 
1198
1131
 
1132
+ class FetchMetrics(object):
1133
+ __slots__ = ('total_bytes', 'total_records')
1134
+
1135
+ def __init__(self):
1136
+ self.total_bytes = 0
1137
+ self.total_records = 0
1138
+
1139
+
1199
1140
  class FetchResponseMetricAggregator(object):
1200
1141
  """
1201
1142
  Since we parse the message data for each partition from each fetch
@@ -1206,8 +1147,8 @@ class FetchResponseMetricAggregator(object):
1206
1147
  def __init__(self, sensors, partitions):
1207
1148
  self.sensors = sensors
1208
1149
  self.unrecorded_partitions = partitions
1209
- self.total_bytes = 0
1210
- self.total_records = 0
1150
+ self.fetch_metrics = FetchMetrics()
1151
+ self.topic_fetch_metrics = collections.defaultdict(FetchMetrics)
1211
1152
 
1212
1153
  def record(self, partition, num_bytes, num_records):
1213
1154
  """
@@ -1216,13 +1157,17 @@ class FetchResponseMetricAggregator(object):
1216
1157
  have reported, we write the metric.
1217
1158
  """
1218
1159
  self.unrecorded_partitions.remove(partition)
1219
- self.total_bytes += num_bytes
1220
- self.total_records += num_records
1160
+ self.fetch_metrics.total_bytes += num_bytes
1161
+ self.fetch_metrics.total_records += num_records
1162
+ self.topic_fetch_metrics[partition.topic].total_bytes += num_bytes
1163
+ self.topic_fetch_metrics[partition.topic].total_records += num_records
1221
1164
 
1222
1165
  # once all expected partitions from the fetch have reported in, record the metrics
1223
1166
  if not self.unrecorded_partitions:
1224
- self.sensors.bytes_fetched.record(self.total_bytes)
1225
- self.sensors.records_fetched.record(self.total_records)
1167
+ self.sensors.bytes_fetched.record(self.fetch_metrics.total_bytes)
1168
+ self.sensors.records_fetched.record(self.fetch_metrics.total_records)
1169
+ for topic, metrics in six.iteritems(self.topic_fetch_metrics):
1170
+ self.sensors.record_topic_fetch_metrics(topic, metrics.total_bytes, metrics.total_records)
1226
1171
 
1227
1172
 
1228
1173
  class FetchManagerMetrics(object):
@@ -707,22 +707,18 @@ class KafkaConsumer(six.Iterator):
707
707
  # If data is available already, e.g. from a previous network client
708
708
  # poll() call to commit, then just return it immediately
709
709
  records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
710
+ # Before returning the fetched records, we can send off the
711
+ # next round of fetches and avoid block waiting for their
712
+ # responses to enable pipelining while the user is handling the
713
+ # fetched records.
714
+ if not partial:
715
+ futures = self._fetcher.send_fetches()
716
+ if len(futures):
717
+ self._client.poll(timeout_ms=0)
718
+
710
719
  if records:
711
- # Before returning the fetched records, we can send off the
712
- # next round of fetches and avoid block waiting for their
713
- # responses to enable pipelining while the user is handling the
714
- # fetched records.
715
- if not partial:
716
- futures = self._fetcher.send_fetches()
717
- if len(futures):
718
- self._client.poll(timeout_ms=0)
719
720
  return records
720
721
 
721
- # Send any new fetches (won't resend pending fetches)
722
- futures = self._fetcher.send_fetches()
723
- if len(futures):
724
- self._client.poll(timeout_ms=0)
725
-
726
722
  self._client.poll(timeout_ms=inner_timeout_ms(self._coordinator.time_to_next_poll() * 1000))
727
723
  # after the long poll, we should check whether the group needs to rebalance
728
724
  # prior to returning data so that the group can stabilize faster
@@ -382,9 +382,6 @@ class TopicPartitionState(object):
382
382
  self._position = None # OffsetAndMetadata exposed to the user
383
383
  self.highwater = None
384
384
  self.drop_pending_record_batch = False
385
- # The last message offset hint available from a record batch with
386
- # magic=2 which includes deleted compacted messages
387
- self.last_offset_from_record_batch = None
388
385
 
389
386
  def _set_position(self, offset):
390
387
  assert self.has_valid_position, 'Valid position required'
@@ -400,7 +397,6 @@ class TopicPartitionState(object):
400
397
  self.awaiting_reset = True
401
398
  self.reset_strategy = strategy
402
399
  self._position = None
403
- self.last_offset_from_record_batch = None
404
400
  self.has_valid_position = False
405
401
 
406
402
  def seek(self, offset):
@@ -409,7 +405,6 @@ class TopicPartitionState(object):
409
405
  self.reset_strategy = None
410
406
  self.has_valid_position = True
411
407
  self.drop_pending_record_batch = True
412
- self.last_offset_from_record_batch = None
413
408
 
414
409
  def pause(self):
415
410
  self.paused = True
@@ -421,6 +416,7 @@ class TopicPartitionState(object):
421
416
  return not self.paused and self.has_valid_position
422
417
 
423
418
 
419
+ @six.add_metaclass(abc.ABCMeta)
424
420
  class ConsumerRebalanceListener(object):
425
421
  """
426
422
  A callback interface that the user can implement to trigger custom actions
@@ -462,8 +458,6 @@ class ConsumerRebalanceListener(object):
462
458
  taking over that partition has their on_partitions_assigned() callback
463
459
  called to load the state.
464
460
  """
465
- __metaclass__ = abc.ABCMeta
466
-
467
461
  @abc.abstractmethod
468
462
  def on_partitions_revoked(self, revoked):
469
463
  """
@@ -3,16 +3,16 @@ from __future__ import absolute_import
3
3
  import abc
4
4
 
5
5
  from kafka.metrics.stat import AbstractStat
6
+ from kafka.vendor.six import add_metaclass
6
7
 
7
8
 
9
+ @add_metaclass(abc.ABCMeta)
8
10
  class AbstractCompoundStat(AbstractStat):
9
11
  """
10
12
  A compound stat is a stat where a single measurement and associated
11
13
  data structure feeds many metrics. This is the example for a
12
14
  histogram which has many associated percentiles.
13
15
  """
14
- __metaclass__ = abc.ABCMeta
15
-
16
16
  def stats(self):
17
17
  """
18
18
  Return list of NamedMeasurable
@@ -4,8 +4,10 @@ import abc
4
4
 
5
5
  from kafka.metrics.measurable import AbstractMeasurable
6
6
  from kafka.metrics.stat import AbstractStat
7
+ from kafka.vendor.six import add_metaclass
7
8
 
8
9
 
10
+ @add_metaclass(abc.ABCMeta)
9
11
  class AbstractMeasurableStat(AbstractStat, AbstractMeasurable):
10
12
  """
11
13
  An AbstractMeasurableStat is an AbstractStat that is also
@@ -13,4 +15,3 @@ class AbstractMeasurableStat(AbstractStat, AbstractMeasurable):
13
15
  This is the interface used for most of the simple statistics such
14
16
  as Avg, Max, Count, etc.
15
17
  """
16
- __metaclass__ = abc.ABCMeta
@@ -2,14 +2,15 @@ from __future__ import absolute_import
2
2
 
3
3
  import abc
4
4
 
5
+ from kafka.vendor.six import add_metaclass
5
6
 
7
+
8
+ @add_metaclass(abc.ABCMeta)
6
9
  class AbstractMetricsReporter(object):
7
10
  """
8
11
  An abstract class to allow things to listen as new metrics
9
12
  are created so they can be reported.
10
13
  """
11
- __metaclass__ = abc.ABCMeta
12
-
13
14
  @abc.abstractmethod
14
15
  def init(self, metrics):
15
16
  """
@@ -2,14 +2,15 @@ from __future__ import absolute_import
2
2
 
3
3
  import abc
4
4
 
5
+ from kafka.vendor.six import add_metaclass
5
6
 
7
+
8
+ @add_metaclass(abc.ABCMeta)
6
9
  class AbstractStat(object):
7
10
  """
8
11
  An AbstractStat is a quantity such as average, max, etc that is computed
9
12
  off the stream of updates to a sensor
10
13
  """
11
- __metaclass__ = abc.ABCMeta
12
-
13
14
  @abc.abstractmethod
14
15
  def record(self, config, value, time_ms):
15
16
  """