kafka-python 2.1.4__py2.py3-none-any.whl → 2.2.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kafka/admin/client.py +6 -6
- kafka/benchmarks/__init__.py +0 -0
- kafka/benchmarks/consumer_performance.py +142 -0
- kafka/benchmarks/load_example.py +110 -0
- kafka/benchmarks/producer_performance.py +153 -0
- kafka/benchmarks/record_batch_compose.py +78 -0
- kafka/benchmarks/record_batch_read.py +83 -0
- kafka/benchmarks/varint_speed.py +434 -0
- kafka/client_async.py +40 -2
- kafka/cluster.py +18 -13
- kafka/conn.py +7 -5
- kafka/consumer/fetcher.py +309 -194
- kafka/consumer/group.py +73 -63
- kafka/consumer/subscription_state.py +84 -36
- kafka/coordinator/base.py +60 -26
- kafka/coordinator/consumer.py +40 -40
- kafka/errors.py +68 -93
- kafka/metrics/compound_stat.py +2 -0
- kafka/metrics/kafka_metric.py +3 -1
- kafka/metrics/metric_config.py +2 -0
- kafka/metrics/metric_name.py +1 -0
- kafka/metrics/quota.py +2 -0
- kafka/metrics/stats/avg.py +2 -0
- kafka/metrics/stats/count.py +2 -0
- kafka/metrics/stats/histogram.py +6 -0
- kafka/metrics/stats/max_stat.py +2 -0
- kafka/metrics/stats/min_stat.py +2 -0
- kafka/metrics/stats/percentile.py +2 -0
- kafka/metrics/stats/percentiles.py +3 -0
- kafka/metrics/stats/rate.py +3 -0
- kafka/metrics/stats/sampled_stat.py +2 -0
- kafka/metrics/stats/sensor.py +4 -0
- kafka/metrics/stats/total.py +2 -0
- kafka/producer/future.py +3 -3
- kafka/producer/kafka.py +291 -58
- kafka/producer/record_accumulator.py +293 -214
- kafka/producer/sender.py +355 -75
- kafka/producer/transaction_manager.py +981 -0
- kafka/protocol/add_offsets_to_txn.py +59 -0
- kafka/protocol/add_partitions_to_txn.py +63 -0
- kafka/protocol/end_txn.py +58 -0
- kafka/protocol/fetch.py +6 -0
- kafka/protocol/group.py +17 -3
- kafka/protocol/init_producer_id.py +46 -0
- kafka/protocol/txn_offset_commit.py +78 -0
- kafka/record/abc.py +10 -0
- kafka/record/default_records.py +101 -12
- kafka/record/legacy_records.py +12 -3
- kafka/record/memory_records.py +54 -6
- kafka/version.py +1 -1
- {kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/METADATA +3 -1
- {kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/RECORD +54 -42
- {kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/WHEEL +1 -1
- kafka/producer/buffer.py +0 -115
- {kafka_python-2.1.4.dist-info → kafka_python-2.2.0.dist-info}/top_level.txt +0 -0
kafka/admin/client.py
CHANGED
|
@@ -15,7 +15,7 @@ from kafka.client_async import KafkaClient, selectors
|
|
|
15
15
|
from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment, ConsumerProtocol
|
|
16
16
|
import kafka.errors as Errors
|
|
17
17
|
from kafka.errors import (
|
|
18
|
-
IncompatibleBrokerVersion, KafkaConfigurationError,
|
|
18
|
+
IncompatibleBrokerVersion, KafkaConfigurationError, UnknownTopicOrPartitionError,
|
|
19
19
|
UnrecognizedBrokerVersion, IllegalArgumentError)
|
|
20
20
|
from kafka.metrics import MetricConfig, Metrics
|
|
21
21
|
from kafka.protocol.admin import (
|
|
@@ -411,7 +411,7 @@ class KafkaAdminClient(object):
|
|
|
411
411
|
# extra values (usually the error_message)
|
|
412
412
|
for topic, error_code in map(lambda e: e[:2], topic_error_tuples):
|
|
413
413
|
error_type = Errors.for_code(error_code)
|
|
414
|
-
if tries and error_type is NotControllerError:
|
|
414
|
+
if tries and error_type is Errors.NotControllerError:
|
|
415
415
|
# No need to inspect the rest of the errors for
|
|
416
416
|
# non-retriable errors because NotControllerError should
|
|
417
417
|
# either be thrown for all errors or no errors.
|
|
@@ -431,13 +431,13 @@ class KafkaAdminClient(object):
|
|
|
431
431
|
for topic, partition_results in response.replication_election_results:
|
|
432
432
|
for partition_id, error_code in map(lambda e: e[:2], partition_results):
|
|
433
433
|
error_type = Errors.for_code(error_code)
|
|
434
|
-
if tries and error_type is NotControllerError:
|
|
434
|
+
if tries and error_type is Errors.NotControllerError:
|
|
435
435
|
# No need to inspect the rest of the errors for
|
|
436
436
|
# non-retriable errors because NotControllerError should
|
|
437
437
|
# either be thrown for all errors or no errors.
|
|
438
438
|
self._refresh_controller_id()
|
|
439
439
|
return False
|
|
440
|
-
elif error_type not in
|
|
440
|
+
elif error_type not in (Errors.NoError, Errors.ElectionNotNeededError):
|
|
441
441
|
raise error_type(
|
|
442
442
|
"Request '{}' failed with response '{}'."
|
|
443
443
|
.format(request, response))
|
|
@@ -1460,9 +1460,9 @@ class KafkaAdminClient(object):
|
|
|
1460
1460
|
list: List of tuples of Consumer Groups.
|
|
1461
1461
|
|
|
1462
1462
|
Raises:
|
|
1463
|
-
|
|
1463
|
+
CoordinatorNotAvailableError: The coordinator is not
|
|
1464
1464
|
available, so cannot process requests.
|
|
1465
|
-
|
|
1465
|
+
CoordinatorLoadInProgressError: The coordinator is loading and
|
|
1466
1466
|
hence can't process requests.
|
|
1467
1467
|
"""
|
|
1468
1468
|
# While we return a list, internally use a set to prevent duplicates
|
|
File without changes
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
|
3
|
+
|
|
4
|
+
from __future__ import absolute_import, print_function
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import pprint
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
import traceback
|
|
12
|
+
|
|
13
|
+
from kafka import KafkaConsumer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ConsumerPerformance(object):
|
|
17
|
+
@staticmethod
|
|
18
|
+
def run(args):
|
|
19
|
+
try:
|
|
20
|
+
props = {}
|
|
21
|
+
for prop in args.consumer_config:
|
|
22
|
+
k, v = prop.split('=')
|
|
23
|
+
try:
|
|
24
|
+
v = int(v)
|
|
25
|
+
except ValueError:
|
|
26
|
+
pass
|
|
27
|
+
if v == 'None':
|
|
28
|
+
v = None
|
|
29
|
+
elif v == 'False':
|
|
30
|
+
v = False
|
|
31
|
+
elif v == 'True':
|
|
32
|
+
v = True
|
|
33
|
+
props[k] = v
|
|
34
|
+
|
|
35
|
+
print('Initializing Consumer...')
|
|
36
|
+
props['bootstrap_servers'] = args.bootstrap_servers
|
|
37
|
+
props['auto_offset_reset'] = 'earliest'
|
|
38
|
+
if 'group_id' not in props:
|
|
39
|
+
props['group_id'] = 'kafka-consumer-benchmark'
|
|
40
|
+
if 'consumer_timeout_ms' not in props:
|
|
41
|
+
props['consumer_timeout_ms'] = 10000
|
|
42
|
+
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
|
43
|
+
for k, v in props.items():
|
|
44
|
+
print('---> {0}={1}'.format(k, v))
|
|
45
|
+
consumer = KafkaConsumer(args.topic, **props)
|
|
46
|
+
print('---> group_id={0}'.format(consumer.config['group_id']))
|
|
47
|
+
print('---> report stats every {0} secs'.format(args.stats_interval))
|
|
48
|
+
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
|
49
|
+
timer_stop = threading.Event()
|
|
50
|
+
timer = StatsReporter(args.stats_interval, consumer,
|
|
51
|
+
event=timer_stop,
|
|
52
|
+
raw_metrics=args.raw_metrics)
|
|
53
|
+
timer.start()
|
|
54
|
+
print('-> OK!')
|
|
55
|
+
print()
|
|
56
|
+
|
|
57
|
+
start_time = time.time()
|
|
58
|
+
records = 0
|
|
59
|
+
for msg in consumer:
|
|
60
|
+
records += 1
|
|
61
|
+
if records >= args.num_records:
|
|
62
|
+
break
|
|
63
|
+
|
|
64
|
+
end_time = time.time()
|
|
65
|
+
timer_stop.set()
|
|
66
|
+
timer.join()
|
|
67
|
+
print('Consumed {0} records'.format(records))
|
|
68
|
+
print('Execution time:', end_time - start_time, 'secs')
|
|
69
|
+
|
|
70
|
+
except Exception:
|
|
71
|
+
exc_info = sys.exc_info()
|
|
72
|
+
traceback.print_exception(*exc_info)
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class StatsReporter(threading.Thread):
|
|
77
|
+
def __init__(self, interval, consumer, event=None, raw_metrics=False):
|
|
78
|
+
super(StatsReporter, self).__init__()
|
|
79
|
+
self.interval = interval
|
|
80
|
+
self.consumer = consumer
|
|
81
|
+
self.event = event
|
|
82
|
+
self.raw_metrics = raw_metrics
|
|
83
|
+
|
|
84
|
+
def print_stats(self):
|
|
85
|
+
metrics = self.consumer.metrics()
|
|
86
|
+
if self.raw_metrics:
|
|
87
|
+
pprint.pprint(metrics)
|
|
88
|
+
else:
|
|
89
|
+
print('{records-consumed-rate} records/sec ({bytes-consumed-rate} B/sec),'
|
|
90
|
+
' {fetch-latency-avg} latency,'
|
|
91
|
+
' {fetch-rate} fetch/s,'
|
|
92
|
+
' {fetch-size-avg} fetch size,'
|
|
93
|
+
' {records-lag-max} max record lag,'
|
|
94
|
+
' {records-per-request-avg} records/req'
|
|
95
|
+
.format(**metrics['consumer-fetch-manager-metrics']))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def print_final(self):
|
|
99
|
+
self.print_stats()
|
|
100
|
+
|
|
101
|
+
def run(self):
|
|
102
|
+
while self.event and not self.event.wait(self.interval):
|
|
103
|
+
self.print_stats()
|
|
104
|
+
else:
|
|
105
|
+
self.print_final()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_args_parser():
|
|
109
|
+
parser = argparse.ArgumentParser(
|
|
110
|
+
description='This tool is used to verify the consumer performance.')
|
|
111
|
+
|
|
112
|
+
parser.add_argument(
|
|
113
|
+
'--bootstrap-servers', type=str, nargs='+', default=(),
|
|
114
|
+
help='host:port for cluster bootstrap servers')
|
|
115
|
+
parser.add_argument(
|
|
116
|
+
'--topic', type=str,
|
|
117
|
+
help='Topic for consumer test (default: kafka-python-benchmark-test)',
|
|
118
|
+
default='kafka-python-benchmark-test')
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
'--num-records', type=int,
|
|
121
|
+
help='number of messages to consume (default: 1000000)',
|
|
122
|
+
default=1000000)
|
|
123
|
+
parser.add_argument(
|
|
124
|
+
'--consumer-config', type=str, nargs='+', default=(),
|
|
125
|
+
help='kafka consumer related configuration properties like '
|
|
126
|
+
'bootstrap_servers,client_id etc..')
|
|
127
|
+
parser.add_argument(
|
|
128
|
+
'--fixture-compression', type=str,
|
|
129
|
+
help='specify a compression type for use with broker fixtures / producer')
|
|
130
|
+
parser.add_argument(
|
|
131
|
+
'--stats-interval', type=int,
|
|
132
|
+
help='Interval in seconds for stats reporting to console (default: 5)',
|
|
133
|
+
default=5)
|
|
134
|
+
parser.add_argument(
|
|
135
|
+
'--raw-metrics', action='store_true',
|
|
136
|
+
help='Enable this flag to print full metrics dict on each interval')
|
|
137
|
+
return parser
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if __name__ == '__main__':
|
|
141
|
+
args = get_args_parser().parse_args()
|
|
142
|
+
ConsumerPerformance.run(args)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
from __future__ import print_function
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
from kafka import KafkaConsumer, KafkaProducer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Producer(threading.Thread):
|
|
13
|
+
|
|
14
|
+
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
|
15
|
+
super(Producer, self).__init__()
|
|
16
|
+
self.bootstrap_servers = bootstrap_servers
|
|
17
|
+
self.topic = topic
|
|
18
|
+
self.stop_event = stop_event
|
|
19
|
+
self.big_msg = b'1' * msg_size
|
|
20
|
+
|
|
21
|
+
def run(self):
|
|
22
|
+
producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers)
|
|
23
|
+
self.sent = 0
|
|
24
|
+
|
|
25
|
+
while not self.stop_event.is_set():
|
|
26
|
+
producer.send(self.topic, self.big_msg)
|
|
27
|
+
self.sent += 1
|
|
28
|
+
producer.flush()
|
|
29
|
+
producer.close()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Consumer(threading.Thread):
|
|
33
|
+
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
|
34
|
+
super(Consumer, self).__init__()
|
|
35
|
+
self.bootstrap_servers = bootstrap_servers
|
|
36
|
+
self.topic = topic
|
|
37
|
+
self.stop_event = stop_event
|
|
38
|
+
self.msg_size = msg_size
|
|
39
|
+
|
|
40
|
+
def run(self):
|
|
41
|
+
consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
|
|
42
|
+
auto_offset_reset='earliest')
|
|
43
|
+
consumer.subscribe([self.topic])
|
|
44
|
+
self.valid = 0
|
|
45
|
+
self.invalid = 0
|
|
46
|
+
|
|
47
|
+
for message in consumer:
|
|
48
|
+
if len(message.value) == self.msg_size:
|
|
49
|
+
self.valid += 1
|
|
50
|
+
else:
|
|
51
|
+
print('Invalid message:', len(message.value), self.msg_size)
|
|
52
|
+
self.invalid += 1
|
|
53
|
+
|
|
54
|
+
if self.stop_event.is_set():
|
|
55
|
+
break
|
|
56
|
+
consumer.close()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_args_parser():
|
|
60
|
+
parser = argparse.ArgumentParser(
|
|
61
|
+
description='This tool is used to demonstrate consumer and producer load.')
|
|
62
|
+
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
'--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'),
|
|
65
|
+
help='host:port for cluster bootstrap servers (default: localhost:9092)')
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
'--topic', type=str,
|
|
68
|
+
help='Topic for load test (default: kafka-python-benchmark-load-example)',
|
|
69
|
+
default='kafka-python-benchmark-load-example')
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
'--msg-size', type=int,
|
|
72
|
+
help='Message size, in bytes, for load test (default: 524288)',
|
|
73
|
+
default=524288)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
'--load-time', type=int,
|
|
76
|
+
help='number of seconds to run load test (default: 10)',
|
|
77
|
+
default=10)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
'--log-level', type=str,
|
|
80
|
+
help='Optional logging level for load test: ERROR|INFO|DEBUG etc',
|
|
81
|
+
default=None)
|
|
82
|
+
return parser
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main(args):
|
|
86
|
+
if args.log_level:
|
|
87
|
+
logging.basicConfig(
|
|
88
|
+
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
|
|
89
|
+
level=getattr(logging, args.log_level))
|
|
90
|
+
producer_stop = threading.Event()
|
|
91
|
+
consumer_stop = threading.Event()
|
|
92
|
+
threads = [
|
|
93
|
+
Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size),
|
|
94
|
+
Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size)
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
for t in threads:
|
|
98
|
+
t.start()
|
|
99
|
+
|
|
100
|
+
time.sleep(args.load_time)
|
|
101
|
+
producer_stop.set()
|
|
102
|
+
consumer_stop.set()
|
|
103
|
+
print('Messages sent: %d' % threads[0].sent)
|
|
104
|
+
print('Messages recvd: %d' % threads[1].valid)
|
|
105
|
+
print('Messages invalid: %d' % threads[1].invalid)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
args = get_args_parser().parse_args()
|
|
110
|
+
main(args)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
|
3
|
+
|
|
4
|
+
from __future__ import absolute_import, print_function
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import pprint
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
import traceback
|
|
12
|
+
|
|
13
|
+
from kafka.vendor.six.moves import range
|
|
14
|
+
|
|
15
|
+
from kafka import KafkaProducer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ProducerPerformance(object):
|
|
19
|
+
@staticmethod
|
|
20
|
+
def run(args):
|
|
21
|
+
try:
|
|
22
|
+
props = {}
|
|
23
|
+
for prop in args.producer_config:
|
|
24
|
+
k, v = prop.split('=')
|
|
25
|
+
try:
|
|
26
|
+
v = int(v)
|
|
27
|
+
except ValueError:
|
|
28
|
+
pass
|
|
29
|
+
if v == 'None':
|
|
30
|
+
v = None
|
|
31
|
+
elif v == 'False':
|
|
32
|
+
v = False
|
|
33
|
+
elif v == 'True':
|
|
34
|
+
v = True
|
|
35
|
+
props[k] = v
|
|
36
|
+
|
|
37
|
+
print('Initializing producer...')
|
|
38
|
+
props['bootstrap_servers'] = args.bootstrap_servers
|
|
39
|
+
record = bytes(bytearray(args.record_size))
|
|
40
|
+
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
|
41
|
+
|
|
42
|
+
producer = KafkaProducer(**props)
|
|
43
|
+
for k, v in props.items():
|
|
44
|
+
print('---> {0}={1}'.format(k, v))
|
|
45
|
+
print('---> send {0} byte records'.format(args.record_size))
|
|
46
|
+
print('---> report stats every {0} secs'.format(args.stats_interval))
|
|
47
|
+
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
|
48
|
+
timer_stop = threading.Event()
|
|
49
|
+
timer = StatsReporter(args.stats_interval, producer,
|
|
50
|
+
event=timer_stop,
|
|
51
|
+
raw_metrics=args.raw_metrics)
|
|
52
|
+
timer.start()
|
|
53
|
+
print('-> OK!')
|
|
54
|
+
print()
|
|
55
|
+
|
|
56
|
+
def _benchmark():
|
|
57
|
+
results = []
|
|
58
|
+
for i in range(args.num_records):
|
|
59
|
+
results.append(producer.send(topic=args.topic, value=record))
|
|
60
|
+
print("Send complete...")
|
|
61
|
+
producer.flush()
|
|
62
|
+
producer.close()
|
|
63
|
+
count_success, count_failure = 0, 0
|
|
64
|
+
for r in results:
|
|
65
|
+
if r.succeeded():
|
|
66
|
+
count_success += 1
|
|
67
|
+
elif r.failed():
|
|
68
|
+
count_failure += 1
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError(r)
|
|
71
|
+
print("%d suceeded, %d failed" % (count_success, count_failure))
|
|
72
|
+
|
|
73
|
+
start_time = time.time()
|
|
74
|
+
_benchmark()
|
|
75
|
+
end_time = time.time()
|
|
76
|
+
timer_stop.set()
|
|
77
|
+
timer.join()
|
|
78
|
+
print('Execution time:', end_time - start_time, 'secs')
|
|
79
|
+
|
|
80
|
+
except Exception:
|
|
81
|
+
exc_info = sys.exc_info()
|
|
82
|
+
traceback.print_exception(*exc_info)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class StatsReporter(threading.Thread):
|
|
87
|
+
def __init__(self, interval, producer, event=None, raw_metrics=False):
|
|
88
|
+
super(StatsReporter, self).__init__()
|
|
89
|
+
self.interval = interval
|
|
90
|
+
self.producer = producer
|
|
91
|
+
self.event = event
|
|
92
|
+
self.raw_metrics = raw_metrics
|
|
93
|
+
|
|
94
|
+
def print_stats(self):
|
|
95
|
+
metrics = self.producer.metrics()
|
|
96
|
+
if not metrics:
|
|
97
|
+
return
|
|
98
|
+
if self.raw_metrics:
|
|
99
|
+
pprint.pprint(metrics)
|
|
100
|
+
else:
|
|
101
|
+
print('{record-send-rate} records/sec ({byte-rate} B/sec),'
|
|
102
|
+
' {request-latency-avg} latency,'
|
|
103
|
+
' {record-size-avg} record size,'
|
|
104
|
+
' {batch-size-avg} batch size,'
|
|
105
|
+
' {records-per-request-avg} records/req'
|
|
106
|
+
.format(**metrics['producer-metrics']))
|
|
107
|
+
|
|
108
|
+
def print_final(self):
|
|
109
|
+
self.print_stats()
|
|
110
|
+
|
|
111
|
+
def run(self):
|
|
112
|
+
while self.event and not self.event.wait(self.interval):
|
|
113
|
+
self.print_stats()
|
|
114
|
+
else:
|
|
115
|
+
self.print_final()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def get_args_parser():
|
|
119
|
+
parser = argparse.ArgumentParser(
|
|
120
|
+
description='This tool is used to verify the producer performance.')
|
|
121
|
+
|
|
122
|
+
parser.add_argument(
|
|
123
|
+
'--bootstrap-servers', type=str, nargs='+', default=(),
|
|
124
|
+
help='host:port for cluster bootstrap server')
|
|
125
|
+
parser.add_argument(
|
|
126
|
+
'--topic', type=str,
|
|
127
|
+
help='Topic name for test (default: kafka-python-benchmark-test)',
|
|
128
|
+
default='kafka-python-benchmark-test')
|
|
129
|
+
parser.add_argument(
|
|
130
|
+
'--num-records', type=int,
|
|
131
|
+
help='number of messages to produce (default: 1000000)',
|
|
132
|
+
default=1000000)
|
|
133
|
+
parser.add_argument(
|
|
134
|
+
'--record-size', type=int,
|
|
135
|
+
help='message size in bytes (default: 100)',
|
|
136
|
+
default=100)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
'--producer-config', type=str, nargs='+', default=(),
|
|
139
|
+
help='kafka producer related configuaration properties like '
|
|
140
|
+
'bootstrap_servers,client_id etc..')
|
|
141
|
+
parser.add_argument(
|
|
142
|
+
'--stats-interval', type=int,
|
|
143
|
+
help='Interval in seconds for stats reporting to console (default: 5)',
|
|
144
|
+
default=5)
|
|
145
|
+
parser.add_argument(
|
|
146
|
+
'--raw-metrics', action='store_true',
|
|
147
|
+
help='Enable this flag to print full metrics dict on each interval')
|
|
148
|
+
return parser
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == '__main__':
|
|
152
|
+
args = get_args_parser().parse_args()
|
|
153
|
+
ProducerPerformance.run(args)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import print_function
|
|
3
|
+
import hashlib
|
|
4
|
+
import itertools
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
|
|
8
|
+
import pyperf
|
|
9
|
+
|
|
10
|
+
from kafka.record.memory_records import MemoryRecordsBuilder
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
DEFAULT_BATCH_SIZE = 1600 * 1024
|
|
14
|
+
KEY_SIZE = 6
|
|
15
|
+
VALUE_SIZE = 60
|
|
16
|
+
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
|
17
|
+
|
|
18
|
+
# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
|
|
19
|
+
MESSAGES_PER_BATCH = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def random_bytes(length):
|
|
23
|
+
buffer = bytearray(length)
|
|
24
|
+
for i in range(length):
|
|
25
|
+
buffer[i] = random.randint(0, 255)
|
|
26
|
+
return bytes(buffer)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def prepare():
|
|
30
|
+
return iter(itertools.cycle([
|
|
31
|
+
(random_bytes(KEY_SIZE),
|
|
32
|
+
random_bytes(VALUE_SIZE),
|
|
33
|
+
random.randint(*TIMESTAMP_RANGE)
|
|
34
|
+
)
|
|
35
|
+
for _ in range(int(MESSAGES_PER_BATCH * 1.94))
|
|
36
|
+
]))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def finalize(results):
|
|
40
|
+
# Just some strange code to make sure PyPy does execute the main code
|
|
41
|
+
# properly, without optimizing it away
|
|
42
|
+
hash_val = hashlib.md5()
|
|
43
|
+
for buf in results:
|
|
44
|
+
hash_val.update(buf)
|
|
45
|
+
print(hash_val, file=open(os.devnull, "w"))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def func(loops, magic):
|
|
49
|
+
# Jit can optimize out the whole function if the result is the same each
|
|
50
|
+
# time, so we need some randomized input data )
|
|
51
|
+
precomputed_samples = prepare()
|
|
52
|
+
results = []
|
|
53
|
+
|
|
54
|
+
# Main benchmark code.
|
|
55
|
+
t0 = pyperf.perf_counter()
|
|
56
|
+
for _ in range(loops):
|
|
57
|
+
batch = MemoryRecordsBuilder(
|
|
58
|
+
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
|
59
|
+
for _ in range(MESSAGES_PER_BATCH):
|
|
60
|
+
key, value, timestamp = next(precomputed_samples)
|
|
61
|
+
size = batch.append(
|
|
62
|
+
timestamp=timestamp, key=key, value=value)
|
|
63
|
+
assert size
|
|
64
|
+
batch.close()
|
|
65
|
+
results.append(batch.buffer())
|
|
66
|
+
|
|
67
|
+
res = pyperf.perf_counter() - t0
|
|
68
|
+
|
|
69
|
+
finalize(results)
|
|
70
|
+
|
|
71
|
+
return res
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == '__main__':
|
|
75
|
+
runner = pyperf.Runner()
|
|
76
|
+
runner.bench_time_func('batch_append_v0', func, 0)
|
|
77
|
+
runner.bench_time_func('batch_append_v1', func, 1)
|
|
78
|
+
runner.bench_time_func('batch_append_v2', func, 2)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
from __future__ import print_function
|
|
3
|
+
import hashlib
|
|
4
|
+
import itertools
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
|
|
8
|
+
import pyperf
|
|
9
|
+
|
|
10
|
+
from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
DEFAULT_BATCH_SIZE = 1600 * 1024
|
|
14
|
+
KEY_SIZE = 6
|
|
15
|
+
VALUE_SIZE = 60
|
|
16
|
+
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
|
17
|
+
|
|
18
|
+
BATCH_SAMPLES = 5
|
|
19
|
+
MESSAGES_PER_BATCH = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def random_bytes(length):
|
|
23
|
+
buffer = bytearray(length)
|
|
24
|
+
for i in range(length):
|
|
25
|
+
buffer[i] = random.randint(0, 255)
|
|
26
|
+
return bytes(buffer)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def prepare(magic):
|
|
30
|
+
samples = []
|
|
31
|
+
for _ in range(BATCH_SAMPLES):
|
|
32
|
+
batch = MemoryRecordsBuilder(
|
|
33
|
+
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
|
34
|
+
for _ in range(MESSAGES_PER_BATCH):
|
|
35
|
+
size = batch.append(
|
|
36
|
+
random.randint(*TIMESTAMP_RANGE),
|
|
37
|
+
random_bytes(KEY_SIZE),
|
|
38
|
+
random_bytes(VALUE_SIZE),
|
|
39
|
+
headers=[])
|
|
40
|
+
assert size
|
|
41
|
+
batch.close()
|
|
42
|
+
samples.append(bytes(batch.buffer()))
|
|
43
|
+
|
|
44
|
+
return iter(itertools.cycle(samples))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def finalize(results):
|
|
48
|
+
# Just some strange code to make sure PyPy does execute the code above
|
|
49
|
+
# properly
|
|
50
|
+
hash_val = hashlib.md5()
|
|
51
|
+
for buf in results:
|
|
52
|
+
hash_val.update(buf)
|
|
53
|
+
print(hash_val, file=open(os.devnull, "w"))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def func(loops, magic):
|
|
57
|
+
# Jit can optimize out the whole function if the result is the same each
|
|
58
|
+
# time, so we need some randomized input data )
|
|
59
|
+
precomputed_samples = prepare(magic)
|
|
60
|
+
results = []
|
|
61
|
+
|
|
62
|
+
# Main benchmark code.
|
|
63
|
+
batch_data = next(precomputed_samples)
|
|
64
|
+
t0 = pyperf.perf_counter()
|
|
65
|
+
for _ in range(loops):
|
|
66
|
+
records = MemoryRecords(batch_data)
|
|
67
|
+
while records.has_next():
|
|
68
|
+
batch = records.next_batch()
|
|
69
|
+
batch.validate_crc()
|
|
70
|
+
for record in batch:
|
|
71
|
+
results.append(record.value)
|
|
72
|
+
|
|
73
|
+
res = pyperf.perf_counter() - t0
|
|
74
|
+
finalize(results)
|
|
75
|
+
|
|
76
|
+
return res
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == '__main__':
|
|
80
|
+
runner = pyperf.Runner()
|
|
81
|
+
runner.bench_time_func('batch_read_v0', func, 0)
|
|
82
|
+
runner.bench_time_func('batch_read_v1', func, 1)
|
|
83
|
+
runner.bench_time_func('batch_read_v2', func, 2)
|