kafka-python 3.0.2__py3-none-any.whl → 3.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,4 +30,4 @@ class GetBrokerVersion:
30
30
  def command(cls, client, args):
31
31
  broker_id = int(args.broker)
32
32
  bvd = client.get_broker_version_data(broker_id)
33
- return {broker_id: '.'.join(map(str, bvd.broker_version))}
33
+ return {broker_id: bvd.broker_version_str}
kafka/cli/common.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import logging.config
2
3
 
3
4
 
4
5
  def add_connect_cli_args(parser, bootstrap_required=True):
@@ -37,9 +38,13 @@ def build_kwargs(props):
37
38
  def build_connect_kwargs(config):
38
39
  if not config.bootstrap_servers:
39
40
  raise ValueError('python -m kafka: error: the following arguments are required: -b/--bootstrap-servers')
41
+ # Accept both repeated -b flags and comma-separated lists within a single flag
42
+ bootstrap_servers = []
43
+ for entry in config.bootstrap_servers:
44
+ bootstrap_servers.extend(s.strip() for s in entry.split(',') if s.strip())
40
45
  kwargs = build_kwargs(config.extra_config)
41
46
  kwargs.update({
42
- 'bootstrap_servers': config.bootstrap_servers,
47
+ 'bootstrap_servers': bootstrap_servers,
43
48
  'security_protocol': config.security_protocol,
44
49
  'sasl_mechanism': config.sasl_mechanism,
45
50
  'sasl_plain_username': config.sasl_user,
@@ -59,12 +64,60 @@ def add_logging_cli_args(parser):
59
64
  logging_group.add_argument(
60
65
  '-D', '--disable-logger', type=str, action='append',
61
66
  help='disable a specific logger. Can be provided multiple times.')
67
+ logging_group.add_argument(
68
+ '--log-format', type=str, default=None,
69
+ help='log message format string, passed to logging.Formatter')
70
+ logging_group.add_argument(
71
+ '--log-date-format', type=str, default=None,
72
+ help='log date format string, passed to logging.Formatter')
73
+ logging_group.add_argument(
74
+ '--log-file', type=str, default=None,
75
+ help='write logs to this file instead of stderr')
76
+ logging_group.add_argument(
77
+ '--log-config', type=str, default=None,
78
+ help='path to a logging configuration file for full control over handlers, '
79
+ 'formatters, etc. A .json (or .yaml/.yml, if PyYAML is installed) file is '
80
+ 'loaded as a logging.config.dictConfig; any other extension is loaded as a '
81
+ 'logging.config.fileConfig. The file owns handlers/formatters, so --log-format, '
82
+ '--log-date-format and --log-file are ignored, but --enable-logger and '
83
+ '--disable-logger still apply as logger level adjustments.')
84
+
85
+
86
+ def add_extended_cli_args(parser):
62
87
  extended_group = parser.add_argument_group('extended')
63
88
  extended_group.add_argument(
64
89
  '-C', '--extra-config', type=str, action='append',
65
90
  help='additional configuration properties for client in "key=val" format. Can be provided multiple times.')
66
91
 
67
92
 
93
+ def _load_log_config(path):
94
+ """Configure logging from a dictConfig (.json/.yaml) or fileConfig (.ini) file."""
95
+ if path.endswith(('.yaml', '.yml')):
96
+ try:
97
+ import yaml
98
+ except ImportError:
99
+ raise ValueError('PyYAML is required to load a YAML logging config: %s' % (path,))
100
+ with open(path) as f:
101
+ _dict_config(yaml.safe_load(f))
102
+ elif path.endswith('.json'):
103
+ import json
104
+ with open(path) as f:
105
+ _dict_config(json.load(f))
106
+ else:
107
+ # disable_existing_loggers defaults to True, which would silence loggers
108
+ # configured before this call (e.g. the loggers we are about to enable);
109
+ # keep them around so --enable-logger still works.
110
+ logging.config.fileConfig(path, disable_existing_loggers=False)
111
+
112
+
113
+ def _dict_config(cfg):
114
+ # Default disable_existing_loggers to False (dictConfig defaults to True), so a
115
+ # config that does not mention an already-created logger does not silence it.
116
+ # A config may still set it explicitly to opt into the stdlib default.
117
+ cfg.setdefault('disable_existing_loggers', False)
118
+ logging.config.dictConfig(cfg)
119
+
120
+
68
121
  def configure_logging(config):
69
122
  _LOGGING_LEVELS = {
70
123
  'NOTSET': 0,
@@ -74,22 +127,43 @@ def configure_logging(config):
74
127
  'ERROR': 40,
75
128
  'CRITICAL': 50,
76
129
  }
77
- if config.enable_logger is not None:
78
- log_level = _LOGGING_LEVELS[config.log_level.upper()]
79
- handler = logging.StreamHandler()
80
- handler.setLevel(log_level)
81
- handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
82
- for name in config.enable_logger:
83
- logger = logging.getLogger(name)
84
- logger.setLevel(log_level)
85
- logger.addHandler(handler)
130
+ log_level = _LOGGING_LEVELS[config.log_level.upper()]
131
+ if getattr(config, 'log_config', None):
132
+ _load_log_config(config.log_config)
133
+ if config.enable_logger is not None:
134
+ # Preserve the no-config behavior of --enable-logger: ONLY the named
135
+ # loggers emit. The config file owns the handlers/formatters, so rather
136
+ # than attach our own we reuse them: silence everything else by raising
137
+ # the root level, then let each enabled logger opt back in and propagate
138
+ # its records up to the config's handlers.
139
+ logging.getLogger().setLevel(logging.CRITICAL + 1)
140
+ for name in config.enable_logger:
141
+ logger = logging.getLogger(name)
142
+ logger.disabled = False
143
+ logger.setLevel(log_level)
86
144
  else:
87
- logging.basicConfig(level=_LOGGING_LEVELS[config.log_level.upper()])
88
- if config.disable_logger is not None:
89
- for name in config.disable_logger:
90
- logging.getLogger(name).setLevel(logging.CRITICAL + 1)
145
+ log_format = config.log_format or logging.BASIC_FORMAT
146
+ if config.enable_logger is not None:
147
+ if config.log_file:
148
+ handler = logging.FileHandler(config.log_file)
149
+ else:
150
+ handler = logging.StreamHandler()
151
+ handler.setLevel(log_level)
152
+ handler.setFormatter(logging.Formatter(log_format, datefmt=config.log_date_format))
153
+ for name in config.enable_logger:
154
+ logger = logging.getLogger(name)
155
+ logger.setLevel(log_level)
156
+ logger.addHandler(handler)
157
+ else:
158
+ logging.basicConfig(
159
+ level=log_level, format=log_format,
160
+ datefmt=config.log_date_format, filename=config.log_file)
161
+ # --disable-logger silences a named logger in either mode by raising its level.
162
+ for name in config.disable_logger or []:
163
+ logging.getLogger(name).setLevel(logging.CRITICAL + 1)
91
164
 
92
165
 
93
166
  def add_common_cli_args(parser, bootstrap_required=True):
94
167
  add_connect_cli_args(parser, bootstrap_required)
95
168
  add_logging_cli_args(parser)
169
+ add_extended_cli_args(parser)
kafka/cluster.py CHANGED
@@ -125,9 +125,8 @@ class ClusterMetadata:
125
125
  if ttl_ms == 0:
126
126
  try:
127
127
  await self.refresh_metadata()
128
- except Errors.KafkaError as exc:
129
- log.debug('Metadata refresh failed: %s', exc)
130
- log.exception(exc)
128
+ except Errors.KafkaError:
129
+ log.debug('Metadata refresh failed', exc_info=True)
131
130
  continue
132
131
  try:
133
132
  log.debug('Sleeping %s for next Metadata refresh', ttl_ms / 1000)
@@ -168,7 +167,6 @@ class ClusterMetadata:
168
167
  raise Errors.NodeNotReadyError('metadata')
169
168
  else:
170
169
  self._manager.reset_backoff('metadata')
171
- log.info(f'Metadata refresh (node_id={node_id})')
172
170
  try:
173
171
  request = self.metadata_request()
174
172
  log.debug("Sending metadata request %s to node %s", request, node_id)
@@ -177,7 +175,7 @@ class ClusterMetadata:
177
175
  log.error('Metadata refresh: failed %s', exc)
178
176
  self.failed_update(exc)
179
177
  raise
180
- log.debug('Metadata refresh: success')
178
+ log.debug(f'Metadata refresh: success (node_id={node_id})')
181
179
  self.update_metadata(response)
182
180
 
183
181
  def _generate_bootstrap_brokers(self):
kafka/consumer/fetcher.py CHANGED
@@ -232,9 +232,18 @@ class Fetcher:
232
232
  # No records yet. Block until either an in-flight fetch
233
233
  # completes (records may have arrived) or a pending offset-reset
234
234
  # task completes (positions become available, enabling a fetch
235
- # on the next caller iteration). add_both fires synchronously on
236
- # already-done futures, closing the race where a future resolves
237
- # between scheduling and the wait setup.
235
+ # on the next caller iteration).
236
+ #
237
+ # add_both fires synchronously on an already-done future: if a fetch
238
+ # response lands between the drain above and this wait setup, _wake
239
+ # fires immediately so we re-drain instead of stalling for the full
240
+ # timeout.
241
+ #
242
+ # This relies on _fetch_futures holding only *recent* completions.
243
+ # otherwise a fetch that completed and was already drained iterations
244
+ # ago lingers behind a slow broker's in-flight fetch and re-fires
245
+ # _wake on every call, busy-looping the poll loop until that slow
246
+ # fetch finally returns.
238
247
  waited_on = list(self._fetch_futures)
239
248
  if self._reset_task is not None and not self._reset_task.is_done:
240
249
  waited_on.append(self._reset_task)
@@ -277,21 +286,66 @@ class Fetcher:
277
286
  future.add_both(self._clear_pending_fetch_request, node_id)
278
287
  futures.append(future)
279
288
  self._fetch_futures.extend(futures)
280
- self._clean_done_fetch_futures()
289
+ await self._clean_done_fetch_futures()
281
290
  return futures
282
291
 
283
- def _clean_done_fetch_futures(self):
284
- while True:
285
- if not self._fetch_futures:
286
- break
287
- if not self._fetch_futures[0].is_done:
288
- break
289
- self._fetch_futures.popleft()
292
+ async def _clean_done_fetch_futures(self):
293
+ # Drop every completed fetch future. With multiple brokers, fetches
294
+ # may complete out of order. fetch_records() relies on _fetch_futures
295
+ # holding only recent completions (it fires _wake synchronously on any
296
+ # done future to avoid stalling -- see the wait setup there); a
297
+ # lingering stale completion re-fires that wake on every call and busy-
298
+ # loops the poll loop until the slow broker's in-flight fetch returns.
299
+ #
300
+ # Threading: this REBINDS self._fetch_futures, which must happen on the
301
+ # IO thread so it never races the foreground's list(self._fetch_futures)
302
+ # read in fetch_records(). Defined async to enforce that -- the body
303
+ # can only run by being driven on the IO loop (awaited from another
304
+ # coroutine, or scheduled via manager.run/call_soon), so the rebind
305
+ # always executes on the IO thread regardless of who initiates it.
306
+ # The rebind is a single atomic attribute store, so a foreground reader
307
+ # always sees either the old or the new deque, never a half-cleaned one.
308
+ #
309
+ # Two alternate designs we considered (either would remove the need for
310
+ # this "evict every done future + rebind" dance):
311
+ #
312
+ # 1. Wakeup flag (Apache Kafka Java client, FetchBuffer). Instead of
313
+ # waiting on the fetch-future objects, wait on a single consumable
314
+ # signal: the IO thread sets a flag (wokenup) when it buffers a
315
+ # completed fetch; the foreground's wait loops `while not woken:
316
+ # await` and consumes the flag (compareAndSet true->false) on each
317
+ # pass. Because the signal is cleared on consumption and is not
318
+ # re-derived from lingering future objects, a stale/drained
319
+ # completion cannot re-trigger it -- so no busy-loop and no
320
+ # per-call cleanup of a future list at all. This is the most
321
+ # faithful port of the threaded Java consumer's design.
322
+ #
323
+ # 2. Per-node fetch tracking. Key fetches by broker: dict[node_id,
324
+ # deque] (or just dict[node_id, Future], since _create_fetch_-
325
+ # requests keeps at most one in-flight fetch per node). Within a
326
+ # single connection responses return in request order, so each
327
+ # per-node deque completes in order and the simple head-only
328
+ # popleft cleanup is correct again -- no out-of-order stranding,
329
+ # and cleanup is an in-place popleft (atomic, no rebind, so the
330
+ # threading note above goes away). This structure could also
331
+ # subsume _nodes_with_pending_fetch_requests entirely ("pending"
332
+ # == the node's last future is not done), collapsing two
333
+ # structures into one source of truth.
334
+ if not self._fetch_futures:
335
+ return
336
+ self._fetch_futures = collections.deque(
337
+ fut for fut in self._fetch_futures if not fut.is_done)
290
338
 
291
339
  def in_flight_fetches(self):
292
- """Return True if there are any unprocessed FetchRequests in flight."""
293
- self._clean_done_fetch_futures()
294
- return bool(self._fetch_futures)
340
+ """Return True if there are any unprocessed (incomplete) FetchRequests
341
+ in flight."""
342
+
343
+ # Read-only on purpose: this may be called from the foreground thread,
344
+ # which must not mutate _fetch_futures (see _clean_done_fetch_futures --
345
+ # cleanup is IO-thread-only). Snapshot first so we never iterate the
346
+ # deque while the IO thread extends it, and check is_done directly
347
+ # rather than relying on a prior cleanup pass.
348
+ return any(not fut.is_done for fut in list(self._fetch_futures))
295
349
 
296
350
  def reset_offsets_if_needed(self, timeout_ms=None):
297
351
  """Schedule pending offset resets and return the in-flight Task.
kafka/coordinator/base.py CHANGED
@@ -1117,10 +1117,11 @@ class BaseCoordinator(ABC):
1117
1117
  try:
1118
1118
  send_time = time.monotonic()
1119
1119
  response = await self._manager.send(request, node_id=self.coordinator_id)
1120
- return self._handle_heartbeat_response(response, send_time)
1121
1120
  except Errors.KafkaError as exc:
1122
1121
  self._failed_request(self.coordinator_id, request, exc)
1123
1122
  raise
1123
+ else:
1124
+ return self._handle_heartbeat_response(response, send_time)
1124
1125
 
1125
1126
  def _handle_heartbeat_response(self, response, send_time):
1126
1127
  if self._sensors:
@@ -1138,8 +1139,7 @@ class BaseCoordinator(ABC):
1138
1139
  self.coordinator_id)
1139
1140
  self.coordinator_dead(error)
1140
1141
  elif error_type is Errors.RebalanceInProgressError:
1141
- heartbeat_log.warning("Heartbeat failed for group %s because it is"
1142
- " rebalancing", self.group_id)
1142
+ heartbeat_log.info("Group %s is rebalancing; rejoining.", self.group_id)
1143
1143
  self.request_rejoin()
1144
1144
  elif error_type is Errors.IllegalGenerationError:
1145
1145
  heartbeat_log.warning("Heartbeat failed for group %s: generation id is not "
@@ -1158,7 +1158,6 @@ class BaseCoordinator(ABC):
1158
1158
  heartbeat_log.error("Heartbeat failed: authorization error: %s", error)
1159
1159
  else:
1160
1160
  heartbeat_log.error("Heartbeat failed: Unhandled error: %s", error)
1161
-
1162
1161
  raise error
1163
1162
 
1164
1163
 
kafka/net/connection.py CHANGED
@@ -96,7 +96,7 @@ class KafkaConnection:
96
96
  return self._init_future
97
97
 
98
98
  def __await__(self):
99
- yield self.init_future
99
+ yield from self.init_future.__await__() # == await self.init_future; raises on failure
100
100
  return self
101
101
 
102
102
  @property
@@ -203,7 +203,7 @@ class KafkaConnection:
203
203
  if req_correlation_id != resp_correlation_id:
204
204
  return self.close(Errors.KafkaConnectionError('Received unrecognized correlation id'))
205
205
 
206
- self.net.unschedule(timeout_task)
206
+ self.net.cancel(timeout_task)
207
207
  latency_ms = (time.monotonic() - sent_time) * 1000
208
208
  if self._sensors:
209
209
  self._sensors.request_time.record(latency_ms)
@@ -239,8 +239,10 @@ class KafkaConnection:
239
239
  self._init_future.failure(error)
240
240
  if not self._close_future.is_done:
241
241
  if exc is None:
242
+ log.info('%s: Connection closed', self)
242
243
  self._close_future.success(None)
243
244
  else:
245
+ log.error('%s: Connection lost: %s', self, exc)
244
246
  self._close_future.failure(exc)
245
247
 
246
248
  def fail_in_flight_requests(self, error):
@@ -252,7 +254,7 @@ class KafkaConnection:
252
254
  future.failure(error)
253
255
  while self.in_flight_requests:
254
256
  _, future, _, _, timeout_task = self.in_flight_requests.popleft()
255
- self.net.unschedule(timeout_task)
257
+ self.net.cancel(timeout_task)
256
258
  future.failure(error)
257
259
 
258
260
  def connection_made(self, transport):
@@ -262,6 +264,13 @@ class KafkaConnection:
262
264
  To receive data, wait for data_received() calls.
263
265
  When the connection is closed, connection_lost() is called.
264
266
  """
267
+ if self.closed:
268
+ # A concurrent close() may have torn the connection down while the
269
+ # transport was still being built. Setting initializing=True below
270
+ # would resurrect an already-closed connection mid-teardown and
271
+ # break the fail_in_flight_requests invariant; refuse instead. The
272
+ # caller (manager._connect) closes the orphaned transport.
273
+ raise Errors.KafkaConnectionError('Connection closed during connect')
265
274
  self.transport = transport
266
275
  if self.transport.get_protocol() != self:
267
276
  self.transport.set_protocol(self)
@@ -276,6 +285,7 @@ class KafkaConnection:
276
285
  client_id=self.config['client_id'],
277
286
  receive_message_max_bytes=self.config['receive_message_max_bytes'],
278
287
  ident=log_prefix)
288
+ log.debug('%s: Connection made', self)
279
289
 
280
290
  def pause(self, v):
281
291
  self.paused.add(v)
@@ -362,6 +372,7 @@ class KafkaConnection:
362
372
  self.close(error)
363
373
  else:
364
374
  self._init_complete()
375
+ log.info('%s: Connected', self)
365
376
 
366
377
  async def _get_api_versions(self, timeout_at=None):
367
378
  if timeout_at is None:
@@ -400,11 +411,15 @@ class KafkaConnection:
400
411
  api_versions = {api_version.api_key: (api_version.min_version, api_version.max_version)
401
412
  for api_version in response.api_keys}
402
413
  bvd = BrokerVersionData(api_versions=api_versions)
403
- log.info('%s: Broker version identified as %s', self, '.'.join(map(str, bvd.broker_version)))
404
- if self.broker_version_data is None or self.broker_version_data > bvd:
414
+ if self.broker_version_data is None:
415
+ log.info('%s: Broker version identified as %s', self, bvd.broker_version_str)
405
416
  self.broker_version_data = bvd
406
- else:
407
- log.info('%s: Clamping client to user-supplied broker version %s', self, '.'.join(map(str, self.broker_version)))
417
+ elif self.broker_version_data > bvd:
418
+ log.info('%s: Broker version identified as %s (lower than user-supplied %s)', self, bvd.broker_version_str, self.broker_version_data.broker_version_str)
419
+ self.broker_version_data = bvd
420
+ elif self.broker_version_data is not None and self.broker_version_data < bvd:
421
+ log.info('%s: Broker version identified as %s; clamping to user-supplied %s', self, bvd.broker_version_str, self.broker_version_data.broker_version_str)
422
+ # No log if user-supplied api_version is the same as broker-identified version
408
423
 
409
424
  @property
410
425
  def sasl_enabled(self):
@@ -534,10 +549,7 @@ class SaslReauthenticator:
534
549
  """Cancel any pending re-auth and fail the drain awaiter if present.
535
550
  Called from KafkaConnection.connection_lost."""
536
551
  if self._task is not None:
537
- try:
538
- self._conn.net.unschedule(self._task)
539
- except (ValueError, KeyError):
540
- pass
552
+ self._conn.net.cancel(self._task)
541
553
  self._task = None
542
554
  if self._drain_future is not None and not self._drain_future.is_done:
543
555
  self._drain_future.failure(Errors.KafkaConnectionError())
kafka/net/manager.py CHANGED
@@ -73,7 +73,7 @@ class KafkaConnectionManager:
73
73
  "client_dns_lookup must be one of %s; got %r"
74
74
  % (self._VALID_DNS_LOOKUP_MODES, self.config['client_dns_lookup']))
75
75
 
76
- if 'socks5_proxy' in configs:
76
+ if configs.get('socks5_proxy') is not None:
77
77
  if self.config['proxy_url'] is None:
78
78
  log.warning('socks5_proxy is deprecated, use proxy_url instead')
79
79
  self.config['proxy_url'] = configs['socks5_proxy']
@@ -117,7 +117,9 @@ class KafkaConnectionManager:
117
117
  async def _do_bootstrap(self, deadline):
118
118
  while not self.closed and (deadline is None or time.monotonic() < deadline):
119
119
  bootstrap_broker = random.choice(self.cluster.bootstrap_brokers())
120
- log.debug('Attempting bootstrap with %s', bootstrap_broker)
120
+ log.info('Attempting bootstrap to %s at %s:%s (rack %s)',
121
+ bootstrap_broker.node_id, bootstrap_broker.host,
122
+ bootstrap_broker.port, bootstrap_broker.rack)
121
123
  try:
122
124
  timeout_ms = (deadline - time.monotonic()) * 1000 if deadline is not None else None
123
125
  conn = self.get_connection(bootstrap_broker.node_id,
@@ -129,7 +131,7 @@ class KafkaConnectionManager:
129
131
  delay = self.connection_delay(bootstrap_broker.node_id)
130
132
  if deadline is not None:
131
133
  delay = min(delay, max(0, deadline - time.monotonic()))
132
- log.debug('Bootstrap %s NodeNotReadyError: backoff %s', bootstrap_broker, delay)
134
+ log.warning('Bootstrap %s not ready; waiting %.2f secs', bootstrap_broker.node_id, delay)
133
135
  await self._bootstrap_wakeup(delay)
134
136
  continue
135
137
 
@@ -140,17 +142,20 @@ class KafkaConnectionManager:
140
142
  raise
141
143
  except Exception as exc:
142
144
  self._conns.pop(bootstrap_broker.node_id, conn).close(exc)
145
+ backoff_ms = self.update_backoff(bootstrap_broker.node_id)
146
+ log.warning('Bootstrap connection to %s failed: %s (backoff %.2f secs)',
147
+ bootstrap_broker.node_id, exc, backoff_ms / 1000)
143
148
  continue
144
149
 
145
150
  try:
146
151
  await self.cluster.refresh_metadata(bootstrap_broker.node_id)
147
152
  if not self.cluster.brokers():
148
- log.warning('Bootstrap metadata response has no brokers. Retrying.')
149
- self.update_backoff(bootstrap_broker.node_id)
153
+ backoff_ms = self.update_backoff(bootstrap_broker.node_id)
154
+ log.warning('Bootstrap metadata response has no brokers. Retrying in %.2f secs.', backoff_ms / 1000)
150
155
  continue
151
156
  except Exception as exc:
152
- log.error(f'Bootstrap attempt to {bootstrap_broker.node_id} failed: {exc}')
153
- self.update_backoff(bootstrap_broker.node_id)
157
+ backoff_ms = self.update_backoff(bootstrap_broker.node_id)
158
+ log.error(f'Bootstrap attempt to {bootstrap_broker.node_id} failed: {exc} (backoff {(backoff_ms / 1000):.2f} secs)')
154
159
  continue
155
160
  else:
156
161
  self.reset_backoff(bootstrap_broker.node_id)
@@ -158,6 +163,7 @@ class KafkaConnectionManager:
158
163
  log.info('Bootstrap complete: %s', self.cluster)
159
164
  return True
160
165
  finally:
166
+ log.info('Closing bootstrap connection %s', bootstrap_broker.node_id)
161
167
  self._conns.pop(bootstrap_broker.node_id, conn).close()
162
168
  else:
163
169
  raise Errors.KafkaTimeoutError(
@@ -239,9 +245,24 @@ class KafkaConnectionManager:
239
245
  return transport
240
246
 
241
247
  async def _connect(self, node, conn, reset_backoff_on_connect=True, timeout_at=None):
248
+ # Tracks ownership of the freshly built transport: while non-None it is
249
+ # ours to clean up (the connection hasn't taken it over yet), so the
250
+ # finally clause closes it. Cleared once connection_made() succeeds.
251
+ transport = None
242
252
  try:
243
253
  transport = await self._build_transport(node, timeout_at=timeout_at)
254
+ # The connection (or the whole manager) may have been closed while
255
+ # we were building the transport. Handing it to connection_made()
256
+ # would flip the conn back to `initializing` and resurrect a
257
+ # connection that is already being torn down. Discard
258
+ # the new transport instead of reviving a dead connection.
259
+ if conn.closed or self.closed:
260
+ log.debug('%s: closed during connect; discarding new transport', conn)
261
+ return
244
262
  conn.connection_made(transport)
263
+ transport = None # conn owns cleanup now; skip finally: transport.close()
264
+ # Note: conn.initialize does not currently raise on error;
265
+ # errors are pushed to conn.init_future and raised on await conn
245
266
  await conn.initialize(timeout_at=timeout_at)
246
267
  except Exception as exc:
247
268
  log.error('Connection failed: %s', exc)
@@ -251,6 +272,9 @@ class KafkaConnectionManager:
251
272
  Errors.AuthorizationError)):
252
273
  self._auth_failures[node.node_id] = exc
253
274
  return
275
+ finally:
276
+ if transport is not None:
277
+ transport.close()
254
278
 
255
279
  if self._sensors:
256
280
  self._sensors.connection_created.record()
@@ -275,6 +299,7 @@ class KafkaConnectionManager:
275
299
  node = self.cluster.broker_metadata(node_id)
276
300
  if node is None:
277
301
  raise Errors.UnknownBrokerIdError(node_id)
302
+ log.info('Initializing connection for node_id %s at %s:%s (rack=%s)', node_id, node.host, node.port, node.rack)
278
303
  conn = KafkaConnection(self._net, node_id=node_id, broker_version_data=self.broker_version_data, **self.config)
279
304
  if pop_on_close:
280
305
  conn.close_future.add_both(lambda _: self._conns.pop(node.node_id, None))
@@ -359,6 +384,7 @@ class KafkaConnectionManager:
359
384
  node_id, backoff_ms, connect_ms, failures)
360
385
  backoff_until_time = time.monotonic() + (backoff_ms / 1000)
361
386
  self._backoff[node_id] = (failures, backoff_until_time, connect_ms)
387
+ return backoff_ms
362
388
 
363
389
  def connection_delay(self, node_id):
364
390
  """Connection delay in seconds.
@@ -423,11 +449,7 @@ class KafkaConnectionManager:
423
449
  try:
424
450
  return await wrapper
425
451
  finally:
426
- if not timer.is_done:
427
- try:
428
- self._net.unschedule(timer)
429
- except ValueError:
430
- pass
452
+ self._net.cancel(timer)
431
453
 
432
454
  def call_soon(self, coro, *args):
433
455
  """Accepts a coroutine / awaitable / function and schedules it on the event loop.