PyPI - kafka-python - Versions diffs - 2.1.4__py2.py3-none-any.whl → 2.2.0__py2.py3-none-any.whl - Mend

kafka-python 2.1.4py2.py3-none-any.whl → 2.2.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

kafka/admin/client.py +6 -6
kafka/benchmarks/__init__.py +0 -0
kafka/benchmarks/consumer_performance.py +142 -0
kafka/benchmarks/load_example.py +110 -0
kafka/benchmarks/producer_performance.py +153 -0
kafka/benchmarks/record_batch_compose.py +78 -0
kafka/benchmarks/record_batch_read.py +83 -0
kafka/benchmarks/varint_speed.py +434 -0
kafka/client_async.py +40 -2
kafka/cluster.py +18 -13
kafka/conn.py +7 -5
kafka/consumer/fetcher.py +309 -194
kafka/consumer/group.py +73 -63
kafka/consumer/subscription_state.py +84 -36
kafka/coordinator/base.py +60 -26
kafka/coordinator/consumer.py +40 -40
kafka/errors.py +68 -93
kafka/metrics/compound_stat.py +2 -0
kafka/metrics/kafka_metric.py +3 -1
kafka/metrics/metric_config.py +2 -0
kafka/metrics/metric_name.py +1 -0
kafka/metrics/quota.py +2 -0
kafka/metrics/stats/avg.py +2 -0
kafka/metrics/stats/count.py +2 -0
kafka/metrics/stats/histogram.py +6 -0
kafka/metrics/stats/max_stat.py +2 -0
kafka/metrics/stats/min_stat.py +2 -0
kafka/metrics/stats/percentile.py +2 -0
kafka/metrics/stats/percentiles.py +3 -0
kafka/metrics/stats/rate.py +3 -0
kafka/metrics/stats/sampled_stat.py +2 -0
kafka/metrics/stats/sensor.py +4 -0
kafka/metrics/stats/total.py +2 -0
kafka/producer/future.py +3 -3
kafka/producer/kafka.py +291 -58
kafka/producer/record_accumulator.py +293 -214
kafka/producer/sender.py +355 -75
kafka/producer/transaction_manager.py +981 -0
kafka/protocol/add_offsets_to_txn.py +59 -0
kafka/protocol/add_partitions_to_txn.py +63 -0
kafka/protocol/end_txn.py +58 -0
kafka/protocol/fetch.py +6 -0
kafka/protocol/group.py +17 -3
kafka/protocol/init_producer_id.py +46 -0
kafka/protocol/txn_offset_commit.py +78 -0
kafka/record/abc.py +10 -0
kafka/record/default_records.py +101 -12
kafka/record/legacy_records.py +12 -3
kafka/record/memory_records.py +54 -6
kafka/version.py +1 -1
{kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/METADATA +3 -1
{kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/RECORD +54 -42
{kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/WHEEL +1 -1
kafka/producer/buffer.py +0 -115
{kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/top_level.txt +0 -0

kafka/admin/client.py CHANGED Viewed

@@ -15,7 +15,7 @@ from kafka.client_async import KafkaClient, selectors
 from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment, ConsumerProtocol
 import kafka.errors as Errors
 from kafka.errors import (
-    IncompatibleBrokerVersion, KafkaConfigurationError, NotControllerError, UnknownTopicOrPartitionError,
+    IncompatibleBrokerVersion, KafkaConfigurationError, UnknownTopicOrPartitionError,
     UnrecognizedBrokerVersion, IllegalArgumentError)
 from kafka.metrics import MetricConfig, Metrics
 from kafka.protocol.admin import (
@@ -411,7 +411,7 @@ class KafkaAdminClient(object):
         # extra values (usually the error_message)
         for topic, error_code in map(lambda e: e[:2], topic_error_tuples):
             error_type = Errors.for_code(error_code)
-            if tries and error_type is NotControllerError:
+            if tries and error_type is Errors.NotControllerError:
                 # No need to inspect the rest of the errors for
                 # non-retriable errors because NotControllerError should
                 # either be thrown for all errors or no errors.
@@ -431,13 +431,13 @@ class KafkaAdminClient(object):
         for topic, partition_results in response.replication_election_results:
             for partition_id, error_code in map(lambda e: e[:2], partition_results):
                 error_type = Errors.for_code(error_code)
-                if tries and error_type is NotControllerError:
+                if tries and error_type is Errors.NotControllerError:
                     # No need to inspect the rest of the errors for
                     # non-retriable errors because NotControllerError should
                     # either be thrown for all errors or no errors.
                     self._refresh_controller_id()
                     return False
-                elif error_type not in [Errors.NoError, Errors.ElectionNotNeeded]:
+                elif error_type not in (Errors.NoError, Errors.ElectionNotNeededError):
                     raise error_type(
                         "Request '{}' failed with response '{}'."
                         .format(request, response))
@@ -1460,9 +1460,9 @@ class KafkaAdminClient(object):
             list: List of tuples of Consumer Groups.
         Raises:
-            GroupCoordinatorNotAvailableError: The coordinator is not
+            CoordinatorNotAvailableError: The coordinator is not
                 available, so cannot process requests.
-            GroupLoadInProgressError: The coordinator is loading and
+            CoordinatorLoadInProgressError: The coordinator is loading and
                 hence can't process requests.
         """
         # While we return a list, internally use a set to prevent duplicates

kafka/benchmarks/__init__.py ADDED Viewed

File without changes

kafka/benchmarks/consumer_performance.py ADDED Viewed

@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# Adapted from https://github.com/mrafayaleem/kafka-jython
+from __future__ import absolute_import, print_function
+import argparse
+import pprint
+import sys
+import threading
+import time
+import traceback
+from kafka import KafkaConsumer
+class ConsumerPerformance(object):
+    @staticmethod
+    def run(args):
+        try:
+            props = {}
+            for prop in args.consumer_config:
+                k, v = prop.split('=')
+                try:
+                    v = int(v)
+                except ValueError:
+                    pass
+                if v == 'None':
+                    v = None
+                elif v == 'False':
+                    v = False
+                elif v == 'True':
+                    v = True
+                props[k] = v
+            print('Initializing Consumer...')
+            props['bootstrap_servers'] = args.bootstrap_servers
+            props['auto_offset_reset'] = 'earliest'
+            if 'group_id' not in props:
+                props['group_id'] = 'kafka-consumer-benchmark'
+            if 'consumer_timeout_ms' not in props:
+                props['consumer_timeout_ms'] = 10000
+            props['metrics_sample_window_ms'] = args.stats_interval * 1000
+            for k, v in props.items():
+                print('---> {0}={1}'.format(k, v))
+            consumer = KafkaConsumer(args.topic, **props)
+            print('---> group_id={0}'.format(consumer.config['group_id']))
+            print('---> report stats every {0} secs'.format(args.stats_interval))
+            print('---> raw metrics? {0}'.format(args.raw_metrics))
+            timer_stop = threading.Event()
+            timer = StatsReporter(args.stats_interval, consumer,
+                                  event=timer_stop,
+                                  raw_metrics=args.raw_metrics)
+            timer.start()
+            print('-> OK!')
+            print()
+            start_time = time.time()
+            records = 0
+            for msg in consumer:
+                records += 1
+                if records >= args.num_records:
+                    break
+            end_time = time.time()
+            timer_stop.set()
+            timer.join()
+            print('Consumed {0} records'.format(records))
+            print('Execution time:', end_time - start_time, 'secs')
+        except Exception:
+            exc_info = sys.exc_info()
+            traceback.print_exception(*exc_info)
+            sys.exit(1)
+class StatsReporter(threading.Thread):
+    def __init__(self, interval, consumer, event=None, raw_metrics=False):
+        super(StatsReporter, self).__init__()
+        self.interval = interval
+        self.consumer = consumer
+        self.event = event
+        self.raw_metrics = raw_metrics
+    def print_stats(self):
+        metrics = self.consumer.metrics()
+        if self.raw_metrics:
+            pprint.pprint(metrics)
+        else:
+            print('{records-consumed-rate} records/sec ({bytes-consumed-rate} B/sec),'
+                  ' {fetch-latency-avg} latency,'
+                  ' {fetch-rate} fetch/s,'
+                  ' {fetch-size-avg} fetch size,'
+                  ' {records-lag-max} max record lag,'
+                  ' {records-per-request-avg} records/req'
+                  .format(**metrics['consumer-fetch-manager-metrics']))
+    def print_final(self):
+        self.print_stats()
+    def run(self):
+        while self.event and not self.event.wait(self.interval):
+            self.print_stats()
+        else:
+            self.print_final()
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to verify the consumer performance.')
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=(),
+        help='host:port for cluster bootstrap servers')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic for consumer test (default: kafka-python-benchmark-test)',
+        default='kafka-python-benchmark-test')
+    parser.add_argument(
+        '--num-records', type=int,
+        help='number of messages to consume (default: 1000000)',
+        default=1000000)
+    parser.add_argument(
+        '--consumer-config', type=str, nargs='+', default=(),
+        help='kafka consumer related configuration properties like '
+             'bootstrap_servers,client_id etc..')
+    parser.add_argument(
+        '--fixture-compression', type=str,
+        help='specify a compression type for use with broker fixtures / producer')
+    parser.add_argument(
+        '--stats-interval', type=int,
+        help='Interval in seconds for stats reporting to console (default: 5)',
+        default=5)
+    parser.add_argument(
+        '--raw-metrics', action='store_true',
+        help='Enable this flag to print full metrics dict on each interval')
+    return parser
+if __name__ == '__main__':
+    args = get_args_parser().parse_args()
+    ConsumerPerformance.run(args)

kafka/benchmarks/load_example.py ADDED Viewed

@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import argparse
+import logging
+import threading
+import time
+from kafka import KafkaConsumer, KafkaProducer
+class Producer(threading.Thread):
+    def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
+        super(Producer, self).__init__()
+        self.bootstrap_servers = bootstrap_servers
+        self.topic = topic
+        self.stop_event = stop_event
+        self.big_msg = b'1' * msg_size
+    def run(self):
+        producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers)
+        self.sent = 0
+        while not self.stop_event.is_set():
+            producer.send(self.topic, self.big_msg)
+            self.sent += 1
+        producer.flush()
+        producer.close()
+class Consumer(threading.Thread):
+    def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
+        super(Consumer, self).__init__()
+        self.bootstrap_servers = bootstrap_servers
+        self.topic = topic
+        self.stop_event = stop_event
+        self.msg_size = msg_size
+    def run(self):
+        consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
+                                 auto_offset_reset='earliest')
+        consumer.subscribe([self.topic])
+        self.valid = 0
+        self.invalid = 0
+        for message in consumer:
+            if len(message.value) == self.msg_size:
+                self.valid += 1
+            else:
+                print('Invalid message:', len(message.value), self.msg_size)
+                self.invalid += 1
+            if self.stop_event.is_set():
+                break
+        consumer.close()
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to demonstrate consumer and producer load.')
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'),
+        help='host:port for cluster bootstrap servers (default: localhost:9092)')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic for load test (default: kafka-python-benchmark-load-example)',
+        default='kafka-python-benchmark-load-example')
+    parser.add_argument(
+        '--msg-size', type=int,
+        help='Message size, in bytes, for load test (default: 524288)',
+        default=524288)
+    parser.add_argument(
+        '--load-time', type=int,
+        help='number of seconds to run load test (default: 10)',
+        default=10)
+    parser.add_argument(
+        '--log-level', type=str,
+        help='Optional logging level for load test: ERROR|INFO|DEBUG etc',
+        default=None)
+    return parser
+def main(args):
+    if args.log_level:
+        logging.basicConfig(
+            format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
+            level=getattr(logging, args.log_level))
+    producer_stop = threading.Event()
+    consumer_stop = threading.Event()
+    threads = [
+        Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size),
+        Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size)
+    ]
+    for t in threads:
+        t.start()
+    time.sleep(args.load_time)
+    producer_stop.set()
+    consumer_stop.set()
+    print('Messages sent: %d' % threads[0].sent)
+    print('Messages recvd: %d' % threads[1].valid)
+    print('Messages invalid: %d' % threads[1].invalid)
+if __name__ == "__main__":
+    args = get_args_parser().parse_args()
+    main(args)

kafka/benchmarks/producer_performance.py ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# Adapted from https://github.com/mrafayaleem/kafka-jython
+from __future__ import absolute_import, print_function
+import argparse
+import pprint
+import sys
+import threading
+import time
+import traceback
+from kafka.vendor.six.moves import range
+from kafka import KafkaProducer
+class ProducerPerformance(object):
+    @staticmethod
+    def run(args):
+        try:
+            props = {}
+            for prop in args.producer_config:
+                k, v = prop.split('=')
+                try:
+                    v = int(v)
+                except ValueError:
+                    pass
+                if v == 'None':
+                    v = None
+                elif v == 'False':
+                    v = False
+                elif v == 'True':
+                    v = True
+                props[k] = v
+            print('Initializing producer...')
+            props['bootstrap_servers'] = args.bootstrap_servers
+            record = bytes(bytearray(args.record_size))
+            props['metrics_sample_window_ms'] = args.stats_interval * 1000
+            producer = KafkaProducer(**props)
+            for k, v in props.items():
+                print('---> {0}={1}'.format(k, v))
+            print('---> send {0} byte records'.format(args.record_size))
+            print('---> report stats every {0} secs'.format(args.stats_interval))
+            print('---> raw metrics? {0}'.format(args.raw_metrics))
+            timer_stop = threading.Event()
+            timer = StatsReporter(args.stats_interval, producer,
+                                  event=timer_stop,
+                                  raw_metrics=args.raw_metrics)
+            timer.start()
+            print('-> OK!')
+            print()
+            def _benchmark():
+                results = []
+                for i in range(args.num_records):
+                    results.append(producer.send(topic=args.topic, value=record))
+                print("Send complete...")
+                producer.flush()
+                producer.close()
+                count_success, count_failure = 0, 0
+                for r in results:
+                    if r.succeeded():
+                        count_success += 1
+                    elif r.failed():
+                        count_failure += 1
+                    else:
+                        raise ValueError(r)
+                print("%d suceeded, %d failed" % (count_success, count_failure))
+            start_time = time.time()
+            _benchmark()
+            end_time = time.time()
+            timer_stop.set()
+            timer.join()
+            print('Execution time:', end_time - start_time, 'secs')
+        except Exception:
+            exc_info = sys.exc_info()
+            traceback.print_exception(*exc_info)
+            sys.exit(1)
+class StatsReporter(threading.Thread):
+    def __init__(self, interval, producer, event=None, raw_metrics=False):
+        super(StatsReporter, self).__init__()
+        self.interval = interval
+        self.producer = producer
+        self.event = event
+        self.raw_metrics = raw_metrics
+    def print_stats(self):
+        metrics = self.producer.metrics()
+        if not metrics:
+            return
+        if self.raw_metrics:
+            pprint.pprint(metrics)
+        else:
+            print('{record-send-rate} records/sec ({byte-rate} B/sec),'
+                  ' {request-latency-avg} latency,'
+                  ' {record-size-avg} record size,'
+                  ' {batch-size-avg} batch size,'
+                  ' {records-per-request-avg} records/req'
+                  .format(**metrics['producer-metrics']))
+    def print_final(self):
+        self.print_stats()
+    def run(self):
+        while self.event and not self.event.wait(self.interval):
+            self.print_stats()
+        else:
+            self.print_final()
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to verify the producer performance.')
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=(),
+        help='host:port for cluster bootstrap server')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic name for test (default: kafka-python-benchmark-test)',
+        default='kafka-python-benchmark-test')
+    parser.add_argument(
+        '--num-records', type=int,
+        help='number of messages to produce (default: 1000000)',
+        default=1000000)
+    parser.add_argument(
+        '--record-size', type=int,
+        help='message size in bytes (default: 100)',
+        default=100)
+    parser.add_argument(
+        '--producer-config', type=str, nargs='+', default=(),
+        help='kafka producer related configuaration properties like '
+             'bootstrap_servers,client_id etc..')
+    parser.add_argument(
+        '--stats-interval', type=int,
+        help='Interval in seconds for stats reporting to console (default: 5)',
+        default=5)
+    parser.add_argument(
+        '--raw-metrics', action='store_true',
+        help='Enable this flag to print full metrics dict on each interval')
+    return parser
+if __name__ == '__main__':
+    args = get_args_parser().parse_args()
+    ProducerPerformance.run(args)

kafka/benchmarks/record_batch_compose.py ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+from __future__ import print_function
+import hashlib
+import itertools
+import os
+import random
+import pyperf
+from kafka.record.memory_records import MemoryRecordsBuilder
+DEFAULT_BATCH_SIZE = 1600 * 1024
+KEY_SIZE = 6
+VALUE_SIZE = 60
+TIMESTAMP_RANGE = [1505824130000, 1505824140000]
+# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
+MESSAGES_PER_BATCH = 100
+def random_bytes(length):
+    buffer = bytearray(length)
+    for i in range(length):
+        buffer[i] = random.randint(0, 255)
+    return bytes(buffer)
+def prepare():
+    return iter(itertools.cycle([
+        (random_bytes(KEY_SIZE),
+         random_bytes(VALUE_SIZE),
+         random.randint(*TIMESTAMP_RANGE)
+         )
+        for _ in range(int(MESSAGES_PER_BATCH * 1.94))
+    ]))
+def finalize(results):
+    # Just some strange code to make sure PyPy does execute the main code
+    # properly, without optimizing it away
+    hash_val = hashlib.md5()
+    for buf in results:
+        hash_val.update(buf)
+    print(hash_val, file=open(os.devnull, "w"))
+def func(loops, magic):
+    # Jit can optimize out the whole function if the result is the same each
+    # time, so we need some randomized input data )
+    precomputed_samples = prepare()
+    results = []
+    # Main benchmark code.
+    t0 = pyperf.perf_counter()
+    for _ in range(loops):
+        batch = MemoryRecordsBuilder(
+            magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
+        for _ in range(MESSAGES_PER_BATCH):
+            key, value, timestamp = next(precomputed_samples)
+            size = batch.append(
+                timestamp=timestamp, key=key, value=value)
+            assert size
+        batch.close()
+        results.append(batch.buffer())
+    res = pyperf.perf_counter() - t0
+    finalize(results)
+    return res
+if __name__ == '__main__':
+    runner = pyperf.Runner()
+    runner.bench_time_func('batch_append_v0', func, 0)
+    runner.bench_time_func('batch_append_v1', func, 1)
+    runner.bench_time_func('batch_append_v2', func, 2)

kafka/benchmarks/record_batch_read.py ADDED Viewed

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import hashlib
+import itertools
+import os
+import random
+import pyperf
+from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
+DEFAULT_BATCH_SIZE = 1600 * 1024
+KEY_SIZE = 6
+VALUE_SIZE = 60
+TIMESTAMP_RANGE = [1505824130000, 1505824140000]
+BATCH_SAMPLES = 5
+MESSAGES_PER_BATCH = 100
+def random_bytes(length):
+    buffer = bytearray(length)
+    for i in range(length):
+        buffer[i] = random.randint(0, 255)
+    return bytes(buffer)
+def prepare(magic):
+    samples = []
+    for _ in range(BATCH_SAMPLES):
+        batch = MemoryRecordsBuilder(
+            magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
+        for _ in range(MESSAGES_PER_BATCH):
+            size = batch.append(
+                random.randint(*TIMESTAMP_RANGE),
+                random_bytes(KEY_SIZE),
+                random_bytes(VALUE_SIZE),
+                headers=[])
+            assert size
+        batch.close()
+        samples.append(bytes(batch.buffer()))
+    return iter(itertools.cycle(samples))
+def finalize(results):
+    # Just some strange code to make sure PyPy does execute the code above
+    # properly
+    hash_val = hashlib.md5()
+    for buf in results:
+        hash_val.update(buf)
+    print(hash_val, file=open(os.devnull, "w"))
+def func(loops, magic):
+    # Jit can optimize out the whole function if the result is the same each
+    # time, so we need some randomized input data )
+    precomputed_samples = prepare(magic)
+    results = []
+    # Main benchmark code.
+    batch_data = next(precomputed_samples)
+    t0 = pyperf.perf_counter()
+    for _ in range(loops):
+        records = MemoryRecords(batch_data)
+        while records.has_next():
+            batch = records.next_batch()
+            batch.validate_crc()
+            for record in batch:
+                results.append(record.value)
+    res = pyperf.perf_counter() - t0
+    finalize(results)
+    return res
+if __name__ == '__main__':
+    runner = pyperf.Runner()
+    runner.bench_time_func('batch_read_v0', func, 0)
+    runner.bench_time_func('batch_read_v1', func, 1)
+    runner.bench_time_func('batch_read_v2', func, 2)

kafka-python 2.1.4__py2.py3-none-any.whl → 2.2.0__py2.py3-none-any.whl

kafka-python 2.1.4py2.py3-none-any.whl → 2.2.0py2.py3-none-any.whl