kafka-python 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. kafka/__init__.py +34 -0
  2. kafka/__main__.py +5 -0
  3. kafka/admin/__init__.py +29 -0
  4. kafka/admin/__main__.py +5 -0
  5. kafka/admin/_acls.py +355 -0
  6. kafka/admin/_cluster.py +359 -0
  7. kafka/admin/_configs.py +479 -0
  8. kafka/admin/_groups.py +754 -0
  9. kafka/admin/_partitions.py +595 -0
  10. kafka/admin/_topics.py +281 -0
  11. kafka/admin/_transactions.py +450 -0
  12. kafka/admin/_users.py +194 -0
  13. kafka/admin/client.py +373 -0
  14. kafka/benchmarks/__init__.py +0 -0
  15. kafka/benchmarks/consumer_performance.py +138 -0
  16. kafka/benchmarks/load_example.py +109 -0
  17. kafka/benchmarks/producer_encode_path.py +201 -0
  18. kafka/benchmarks/producer_performance.py +161 -0
  19. kafka/benchmarks/profile_protocol.py +138 -0
  20. kafka/benchmarks/protocol_old_vs_new.py +447 -0
  21. kafka/benchmarks/record_batch_compose.py +77 -0
  22. kafka/benchmarks/record_batch_read.py +82 -0
  23. kafka/benchmarks/varint_speed.py +426 -0
  24. kafka/cli/__init__.py +36 -0
  25. kafka/cli/admin/__init__.py +117 -0
  26. kafka/cli/admin/acls/__init__.py +9 -0
  27. kafka/cli/admin/acls/common.py +76 -0
  28. kafka/cli/admin/acls/create.py +19 -0
  29. kafka/cli/admin/acls/delete.py +23 -0
  30. kafka/cli/admin/acls/describe.py +16 -0
  31. kafka/cli/admin/cluster/__init__.py +14 -0
  32. kafka/cli/admin/cluster/describe.py +11 -0
  33. kafka/cli/admin/cluster/describe_quorum.py +11 -0
  34. kafka/cli/admin/cluster/features.py +52 -0
  35. kafka/cli/admin/cluster/log_dirs.py +43 -0
  36. kafka/cli/admin/cluster/versions.py +33 -0
  37. kafka/cli/admin/configs/__init__.py +10 -0
  38. kafka/cli/admin/configs/alter.py +43 -0
  39. kafka/cli/admin/configs/common.py +17 -0
  40. kafka/cli/admin/configs/describe.py +30 -0
  41. kafka/cli/admin/configs/list.py +16 -0
  42. kafka/cli/admin/configs/reset.py +20 -0
  43. kafka/cli/admin/groups/__init__.py +16 -0
  44. kafka/cli/admin/groups/alter_offsets.py +30 -0
  45. kafka/cli/admin/groups/delete.py +11 -0
  46. kafka/cli/admin/groups/delete_offsets.py +29 -0
  47. kafka/cli/admin/groups/describe.py +11 -0
  48. kafka/cli/admin/groups/list.py +28 -0
  49. kafka/cli/admin/groups/list_offsets.py +29 -0
  50. kafka/cli/admin/groups/remove_members.py +40 -0
  51. kafka/cli/admin/groups/reset_offsets.py +139 -0
  52. kafka/cli/admin/partitions/__init__.py +21 -0
  53. kafka/cli/admin/partitions/alter_reassignments.py +37 -0
  54. kafka/cli/admin/partitions/create.py +27 -0
  55. kafka/cli/admin/partitions/delete_records.py +31 -0
  56. kafka/cli/admin/partitions/describe.py +36 -0
  57. kafka/cli/admin/partitions/elect_leaders.py +53 -0
  58. kafka/cli/admin/partitions/list_offsets.py +88 -0
  59. kafka/cli/admin/partitions/list_reassignments.py +35 -0
  60. kafka/cli/admin/topics/__init__.py +10 -0
  61. kafka/cli/admin/topics/create.py +13 -0
  62. kafka/cli/admin/topics/delete.py +19 -0
  63. kafka/cli/admin/topics/describe.py +18 -0
  64. kafka/cli/admin/topics/list.py +11 -0
  65. kafka/cli/admin/transactions/__init__.py +17 -0
  66. kafka/cli/admin/transactions/abort.py +38 -0
  67. kafka/cli/admin/transactions/describe.py +24 -0
  68. kafka/cli/admin/transactions/describe_producers.py +29 -0
  69. kafka/cli/admin/transactions/find_hanging.py +26 -0
  70. kafka/cli/admin/transactions/list.py +37 -0
  71. kafka/cli/admin/users/__init__.py +8 -0
  72. kafka/cli/admin/users/alter_user_scram_credentials.py +34 -0
  73. kafka/cli/admin/users/describe_user_scram_credentials.py +15 -0
  74. kafka/cli/common.py +95 -0
  75. kafka/cli/consumer/__init__.py +63 -0
  76. kafka/cli/producer/__init__.py +57 -0
  77. kafka/cluster.py +824 -0
  78. kafka/codec.py +325 -0
  79. kafka/consumer/__init__.py +5 -0
  80. kafka/consumer/__main__.py +5 -0
  81. kafka/consumer/fetcher.py +2012 -0
  82. kafka/consumer/group.py +1347 -0
  83. kafka/consumer/subscription_state.py +897 -0
  84. kafka/coordinator/__init__.py +0 -0
  85. kafka/coordinator/assignors/__init__.py +0 -0
  86. kafka/coordinator/assignors/abstract.py +90 -0
  87. kafka/coordinator/assignors/cooperative_sticky.py +167 -0
  88. kafka/coordinator/assignors/range.py +81 -0
  89. kafka/coordinator/assignors/roundrobin.py +101 -0
  90. kafka/coordinator/assignors/sticky/StickyAssignorUserData.json +37 -0
  91. kafka/coordinator/assignors/sticky/__init__.py +0 -0
  92. kafka/coordinator/assignors/sticky/partition_movements.py +149 -0
  93. kafka/coordinator/assignors/sticky/sorted_set.py +63 -0
  94. kafka/coordinator/assignors/sticky/sticky_assignor.py +665 -0
  95. kafka/coordinator/assignors/sticky/user_data.py +8 -0
  96. kafka/coordinator/base.py +1215 -0
  97. kafka/coordinator/consumer.py +1224 -0
  98. kafka/coordinator/heartbeat.py +82 -0
  99. kafka/coordinator/subscription.py +34 -0
  100. kafka/errors.py +1004 -0
  101. kafka/future.py +166 -0
  102. kafka/metrics/__init__.py +13 -0
  103. kafka/metrics/compound_stat.py +33 -0
  104. kafka/metrics/dict_reporter.py +81 -0
  105. kafka/metrics/kafka_metric.py +36 -0
  106. kafka/metrics/measurable.py +27 -0
  107. kafka/metrics/measurable_stat.py +13 -0
  108. kafka/metrics/metric_config.py +33 -0
  109. kafka/metrics/metric_name.py +105 -0
  110. kafka/metrics/metrics.py +261 -0
  111. kafka/metrics/metrics_reporter.py +53 -0
  112. kafka/metrics/quota.py +41 -0
  113. kafka/metrics/stat.py +19 -0
  114. kafka/metrics/stats/__init__.py +15 -0
  115. kafka/metrics/stats/avg.py +24 -0
  116. kafka/metrics/stats/count.py +17 -0
  117. kafka/metrics/stats/histogram.py +99 -0
  118. kafka/metrics/stats/max_stat.py +17 -0
  119. kafka/metrics/stats/min_stat.py +19 -0
  120. kafka/metrics/stats/percentile.py +14 -0
  121. kafka/metrics/stats/percentiles.py +75 -0
  122. kafka/metrics/stats/rate.py +118 -0
  123. kafka/metrics/stats/sampled_stat.py +99 -0
  124. kafka/metrics/stats/sensor.py +136 -0
  125. kafka/metrics/stats/total.py +15 -0
  126. kafka/net/__init__.py +19 -0
  127. kafka/net/compat.py +165 -0
  128. kafka/net/connection.py +593 -0
  129. kafka/net/http_connect.py +144 -0
  130. kafka/net/inet.py +122 -0
  131. kafka/net/manager.py +451 -0
  132. kafka/net/metrics.py +149 -0
  133. kafka/net/sasl/__init__.py +32 -0
  134. kafka/net/sasl/abc.py +28 -0
  135. kafka/net/sasl/gssapi.py +95 -0
  136. kafka/net/sasl/msk.py +245 -0
  137. kafka/net/sasl/oauth.py +98 -0
  138. kafka/net/sasl/plain.py +42 -0
  139. kafka/net/sasl/scram.py +135 -0
  140. kafka/net/sasl/sspi.py +111 -0
  141. kafka/net/selector.py +644 -0
  142. kafka/net/socks5.py +262 -0
  143. kafka/net/transport.py +415 -0
  144. kafka/net/wakeup_notifier.py +72 -0
  145. kafka/partitioner/__init__.py +8 -0
  146. kafka/partitioner/abc.py +8 -0
  147. kafka/partitioner/default.py +89 -0
  148. kafka/partitioner/sticky.py +109 -0
  149. kafka/producer/__init__.py +5 -0
  150. kafka/producer/__main__.py +5 -0
  151. kafka/producer/future.py +101 -0
  152. kafka/producer/kafka.py +1123 -0
  153. kafka/producer/producer_batch.py +192 -0
  154. kafka/producer/record_accumulator.py +647 -0
  155. kafka/producer/sender.py +884 -0
  156. kafka/producer/transaction_manager.py +1326 -0
  157. kafka/protocol/__init__.py +0 -0
  158. kafka/protocol/admin/__init__.py +29 -0
  159. kafka/protocol/admin/acl.py +83 -0
  160. kafka/protocol/admin/acl.pyi +375 -0
  161. kafka/protocol/admin/client_quotas.py +14 -0
  162. kafka/protocol/admin/client_quotas.pyi +265 -0
  163. kafka/protocol/admin/cluster.py +31 -0
  164. kafka/protocol/admin/cluster.pyi +620 -0
  165. kafka/protocol/admin/configs.py +22 -0
  166. kafka/protocol/admin/configs.pyi +437 -0
  167. kafka/protocol/admin/groups.py +24 -0
  168. kafka/protocol/admin/groups.pyi +261 -0
  169. kafka/protocol/admin/topics.py +53 -0
  170. kafka/protocol/admin/topics.pyi +982 -0
  171. kafka/protocol/admin/transactions.py +18 -0
  172. kafka/protocol/admin/transactions.pyi +311 -0
  173. kafka/protocol/admin/users.py +14 -0
  174. kafka/protocol/admin/users.pyi +223 -0
  175. kafka/protocol/api_data.py +125 -0
  176. kafka/protocol/api_header.py +55 -0
  177. kafka/protocol/api_key.py +97 -0
  178. kafka/protocol/api_message.py +277 -0
  179. kafka/protocol/broker_version_data.py +246 -0
  180. kafka/protocol/consumer/__init__.py +13 -0
  181. kafka/protocol/consumer/fetch.py +16 -0
  182. kafka/protocol/consumer/fetch.pyi +298 -0
  183. kafka/protocol/consumer/group.py +38 -0
  184. kafka/protocol/consumer/group.pyi +824 -0
  185. kafka/protocol/consumer/metadata.py +30 -0
  186. kafka/protocol/consumer/metadata.pyi +89 -0
  187. kafka/protocol/consumer/offsets.py +75 -0
  188. kafka/protocol/consumer/offsets.pyi +288 -0
  189. kafka/protocol/data_container.py +166 -0
  190. kafka/protocol/frame.py +30 -0
  191. kafka/protocol/generate_stubs.py +468 -0
  192. kafka/protocol/metadata/__init__.py +10 -0
  193. kafka/protocol/metadata/api_versions.py +41 -0
  194. kafka/protocol/metadata/api_versions.pyi +128 -0
  195. kafka/protocol/metadata/find_coordinator.py +19 -0
  196. kafka/protocol/metadata/find_coordinator.pyi +105 -0
  197. kafka/protocol/metadata/metadata.py +34 -0
  198. kafka/protocol/metadata/metadata.pyi +160 -0
  199. kafka/protocol/old/__init__.py +0 -0
  200. kafka/protocol/old/abstract.py +17 -0
  201. kafka/protocol/old/add_offsets_to_txn.py +54 -0
  202. kafka/protocol/old/add_partitions_to_txn.py +71 -0
  203. kafka/protocol/old/admin.py +1086 -0
  204. kafka/protocol/old/api.py +205 -0
  205. kafka/protocol/old/api_versions.py +133 -0
  206. kafka/protocol/old/commit.py +355 -0
  207. kafka/protocol/old/consumer_protocol.py +36 -0
  208. kafka/protocol/old/end_txn.py +53 -0
  209. kafka/protocol/old/fetch.py +408 -0
  210. kafka/protocol/old/find_coordinator.py +72 -0
  211. kafka/protocol/old/group.py +451 -0
  212. kafka/protocol/old/init_producer_id.py +42 -0
  213. kafka/protocol/old/list_offsets.py +186 -0
  214. kafka/protocol/old/metadata.py +290 -0
  215. kafka/protocol/old/offset_for_leader_epoch.py +133 -0
  216. kafka/protocol/old/produce.py +247 -0
  217. kafka/protocol/old/sasl_authenticate.py +38 -0
  218. kafka/protocol/old/sasl_handshake.py +39 -0
  219. kafka/protocol/old/struct.py +87 -0
  220. kafka/protocol/old/txn_offset_commit.py +73 -0
  221. kafka/protocol/old/types.py +440 -0
  222. kafka/protocol/parser.py +191 -0
  223. kafka/protocol/producer/__init__.py +7 -0
  224. kafka/protocol/producer/produce.py +17 -0
  225. kafka/protocol/producer/produce.pyi +197 -0
  226. kafka/protocol/producer/transaction.py +30 -0
  227. kafka/protocol/producer/transaction.pyi +663 -0
  228. kafka/protocol/sasl.py +52 -0
  229. kafka/protocol/sasl.pyi +126 -0
  230. kafka/protocol/schemas/__init__.py +7 -0
  231. kafka/protocol/schemas/fields/__init__.py +7 -0
  232. kafka/protocol/schemas/fields/array.py +127 -0
  233. kafka/protocol/schemas/fields/base.py +156 -0
  234. kafka/protocol/schemas/fields/codecs/__init__.py +12 -0
  235. kafka/protocol/schemas/fields/codecs/encode_buffer.py +82 -0
  236. kafka/protocol/schemas/fields/codecs/tagged_fields.py +109 -0
  237. kafka/protocol/schemas/fields/codecs/types.py +505 -0
  238. kafka/protocol/schemas/fields/codegen.py +40 -0
  239. kafka/protocol/schemas/fields/simple.py +127 -0
  240. kafka/protocol/schemas/fields/struct.py +357 -0
  241. kafka/protocol/schemas/fields/struct_array.py +142 -0
  242. kafka/protocol/schemas/load_json.py +42 -0
  243. kafka/protocol/schemas/resources/AddOffsetsToTxnRequest.json +40 -0
  244. kafka/protocol/schemas/resources/AddOffsetsToTxnResponse.json +35 -0
  245. kafka/protocol/schemas/resources/AddPartitionsToTxnRequest.json +65 -0
  246. kafka/protocol/schemas/resources/AddPartitionsToTxnResponse.json +60 -0
  247. kafka/protocol/schemas/resources/AlterClientQuotasRequest.json +47 -0
  248. kafka/protocol/schemas/resources/AlterClientQuotasResponse.json +41 -0
  249. kafka/protocol/schemas/resources/AlterConfigsRequest.json +43 -0
  250. kafka/protocol/schemas/resources/AlterConfigsResponse.json +39 -0
  251. kafka/protocol/schemas/resources/AlterPartitionReassignmentsRequest.json +42 -0
  252. kafka/protocol/schemas/resources/AlterPartitionReassignmentsResponse.json +47 -0
  253. kafka/protocol/schemas/resources/AlterReplicaLogDirsRequest.json +41 -0
  254. kafka/protocol/schemas/resources/AlterReplicaLogDirsResponse.json +41 -0
  255. kafka/protocol/schemas/resources/AlterUserScramCredentialsRequest.json +45 -0
  256. kafka/protocol/schemas/resources/AlterUserScramCredentialsResponse.json +35 -0
  257. kafka/protocol/schemas/resources/ApiVersionsRequest.json +34 -0
  258. kafka/protocol/schemas/resources/ApiVersionsResponse.json +79 -0
  259. kafka/protocol/schemas/resources/ConsumerProtocolAssignment.json +42 -0
  260. kafka/protocol/schemas/resources/ConsumerProtocolSubscription.json +49 -0
  261. kafka/protocol/schemas/resources/CreateAclsRequest.json +46 -0
  262. kafka/protocol/schemas/resources/CreateAclsResponse.json +37 -0
  263. kafka/protocol/schemas/resources/CreatePartitionsRequest.json +47 -0
  264. kafka/protocol/schemas/resources/CreatePartitionsResponse.json +41 -0
  265. kafka/protocol/schemas/resources/CreateTopicsRequest.json +65 -0
  266. kafka/protocol/schemas/resources/CreateTopicsResponse.json +72 -0
  267. kafka/protocol/schemas/resources/DeleteAclsRequest.json +46 -0
  268. kafka/protocol/schemas/resources/DeleteAclsResponse.json +59 -0
  269. kafka/protocol/schemas/resources/DeleteGroupsRequest.json +30 -0
  270. kafka/protocol/schemas/resources/DeleteGroupsResponse.json +36 -0
  271. kafka/protocol/schemas/resources/DeleteRecordsRequest.json +42 -0
  272. kafka/protocol/schemas/resources/DeleteRecordsResponse.json +43 -0
  273. kafka/protocol/schemas/resources/DeleteTopicsRequest.json +43 -0
  274. kafka/protocol/schemas/resources/DeleteTopicsResponse.json +52 -0
  275. kafka/protocol/schemas/resources/DescribeAclsRequest.json +43 -0
  276. kafka/protocol/schemas/resources/DescribeAclsResponse.json +55 -0
  277. kafka/protocol/schemas/resources/DescribeClientQuotasRequest.json +37 -0
  278. kafka/protocol/schemas/resources/DescribeClientQuotasResponse.json +47 -0
  279. kafka/protocol/schemas/resources/DescribeClusterRequest.json +35 -0
  280. kafka/protocol/schemas/resources/DescribeClusterResponse.json +56 -0
  281. kafka/protocol/schemas/resources/DescribeConfigsRequest.json +42 -0
  282. kafka/protocol/schemas/resources/DescribeConfigsResponse.json +69 -0
  283. kafka/protocol/schemas/resources/DescribeGroupsRequest.json +38 -0
  284. kafka/protocol/schemas/resources/DescribeGroupsResponse.json +74 -0
  285. kafka/protocol/schemas/resources/DescribeLogDirsRequest.json +38 -0
  286. kafka/protocol/schemas/resources/DescribeLogDirsResponse.json +65 -0
  287. kafka/protocol/schemas/resources/DescribeProducersRequest.json +32 -0
  288. kafka/protocol/schemas/resources/DescribeProducersResponse.json +55 -0
  289. kafka/protocol/schemas/resources/DescribeQuorumRequest.json +39 -0
  290. kafka/protocol/schemas/resources/DescribeQuorumResponse.json +82 -0
  291. kafka/protocol/schemas/resources/DescribeTopicPartitionsRequest.json +40 -0
  292. kafka/protocol/schemas/resources/DescribeTopicPartitionsResponse.json +66 -0
  293. kafka/protocol/schemas/resources/DescribeTransactionsRequest.json +27 -0
  294. kafka/protocol/schemas/resources/DescribeTransactionsResponse.json +52 -0
  295. kafka/protocol/schemas/resources/DescribeUserScramCredentialsRequest.json +30 -0
  296. kafka/protocol/schemas/resources/DescribeUserScramCredentialsResponse.json +45 -0
  297. kafka/protocol/schemas/resources/ElectLeadersRequest.json +41 -0
  298. kafka/protocol/schemas/resources/ElectLeadersResponse.json +45 -0
  299. kafka/protocol/schemas/resources/EndTxnRequest.json +43 -0
  300. kafka/protocol/schemas/resources/EndTxnResponse.json +41 -0
  301. kafka/protocol/schemas/resources/FetchRequest.json +125 -0
  302. kafka/protocol/schemas/resources/FetchResponse.json +124 -0
  303. kafka/protocol/schemas/resources/FindCoordinatorRequest.json +43 -0
  304. kafka/protocol/schemas/resources/FindCoordinatorResponse.json +58 -0
  305. kafka/protocol/schemas/resources/HeartbeatRequest.json +39 -0
  306. kafka/protocol/schemas/resources/HeartbeatResponse.json +35 -0
  307. kafka/protocol/schemas/resources/IncrementalAlterConfigsRequest.json +44 -0
  308. kafka/protocol/schemas/resources/IncrementalAlterConfigsResponse.json +38 -0
  309. kafka/protocol/schemas/resources/InitProducerIdRequest.json +50 -0
  310. kafka/protocol/schemas/resources/InitProducerIdResponse.json +47 -0
  311. kafka/protocol/schemas/resources/JoinGroupRequest.json +63 -0
  312. kafka/protocol/schemas/resources/JoinGroupResponse.json +69 -0
  313. kafka/protocol/schemas/resources/LeaveGroupRequest.json +47 -0
  314. kafka/protocol/schemas/resources/LeaveGroupResponse.json +47 -0
  315. kafka/protocol/schemas/resources/ListConfigResourcesRequest.json +31 -0
  316. kafka/protocol/schemas/resources/ListConfigResourcesResponse.json +37 -0
  317. kafka/protocol/schemas/resources/ListGroupsRequest.json +36 -0
  318. kafka/protocol/schemas/resources/ListGroupsResponse.json +49 -0
  319. kafka/protocol/schemas/resources/ListOffsetsRequest.json +72 -0
  320. kafka/protocol/schemas/resources/ListOffsetsResponse.json +71 -0
  321. kafka/protocol/schemas/resources/ListPartitionReassignmentsRequest.json +34 -0
  322. kafka/protocol/schemas/resources/ListPartitionReassignmentsResponse.json +46 -0
  323. kafka/protocol/schemas/resources/ListTransactionsRequest.json +40 -0
  324. kafka/protocol/schemas/resources/ListTransactionsResponse.json +42 -0
  325. kafka/protocol/schemas/resources/MetadataRequest.json +56 -0
  326. kafka/protocol/schemas/resources/MetadataResponse.json +101 -0
  327. kafka/protocol/schemas/resources/OffsetCommitRequest.json +76 -0
  328. kafka/protocol/schemas/resources/OffsetCommitResponse.json +71 -0
  329. kafka/protocol/schemas/resources/OffsetDeleteRequest.json +39 -0
  330. kafka/protocol/schemas/resources/OffsetDeleteResponse.json +42 -0
  331. kafka/protocol/schemas/resources/OffsetFetchRequest.json +76 -0
  332. kafka/protocol/schemas/resources/OffsetFetchResponse.json +107 -0
  333. kafka/protocol/schemas/resources/OffsetForLeaderEpochRequest.json +52 -0
  334. kafka/protocol/schemas/resources/OffsetForLeaderEpochResponse.json +51 -0
  335. kafka/protocol/schemas/resources/ProduceRequest.json +73 -0
  336. kafka/protocol/schemas/resources/ProduceResponse.json +96 -0
  337. kafka/protocol/schemas/resources/RequestHeader.json +44 -0
  338. kafka/protocol/schemas/resources/ResponseHeader.json +26 -0
  339. kafka/protocol/schemas/resources/SaslAuthenticateRequest.json +29 -0
  340. kafka/protocol/schemas/resources/SaslAuthenticateResponse.json +34 -0
  341. kafka/protocol/schemas/resources/SaslHandshakeRequest.json +31 -0
  342. kafka/protocol/schemas/resources/SaslHandshakeResponse.json +32 -0
  343. kafka/protocol/schemas/resources/SyncGroupRequest.json +56 -0
  344. kafka/protocol/schemas/resources/SyncGroupResponse.json +46 -0
  345. kafka/protocol/schemas/resources/TxnOffsetCommitRequest.json +68 -0
  346. kafka/protocol/schemas/resources/TxnOffsetCommitResponse.json +47 -0
  347. kafka/protocol/schemas/resources/UpdateFeaturesRequest.json +43 -0
  348. kafka/protocol/schemas/resources/UpdateFeaturesResponse.json +39 -0
  349. kafka/protocol/schemas/resources/WriteTxnMarkersRequest.json +49 -0
  350. kafka/protocol/schemas/resources/WriteTxnMarkersResponse.json +45 -0
  351. kafka/protocol/schemas/resources/__init__.py +0 -0
  352. kafka/record/__init__.py +3 -0
  353. kafka/record/_crc32c.py +161 -0
  354. kafka/record/abc.py +144 -0
  355. kafka/record/default_records.py +782 -0
  356. kafka/record/legacy_records.py +587 -0
  357. kafka/record/memory_records.py +255 -0
  358. kafka/record/util.py +135 -0
  359. kafka/serializer/__init__.py +4 -0
  360. kafka/serializer/abstract.py +20 -0
  361. kafka/serializer/default.py +16 -0
  362. kafka/serializer/json.py +17 -0
  363. kafka/serializer/wrapper.py +21 -0
  364. kafka/structs.py +69 -0
  365. kafka/util.py +159 -0
  366. kafka/vendor/__init__.py +0 -0
  367. kafka/version.py +1 -0
  368. kafka_python-3.0.0.dist-info/METADATA +319 -0
  369. kafka_python-3.0.0.dist-info/RECORD +373 -0
  370. kafka_python-3.0.0.dist-info/WHEEL +5 -0
  371. kafka_python-3.0.0.dist-info/entry_points.txt +2 -0
  372. kafka_python-3.0.0.dist-info/licenses/LICENSE +202 -0
  373. kafka_python-3.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,884 @@
1
+ import collections
2
+ import copy
3
+ import heapq
4
+ import logging
5
+ import threading
6
+ import time
7
+
8
+ from kafka import errors as Errors
9
+ from kafka.metrics.measurable import AnonMeasurable
10
+ from kafka.metrics.stats import Avg, Max, Rate
11
+ from kafka.producer.transaction_manager import TransactionManager
12
+ from kafka.protocol.producer import ProduceRequest, ProduceResponse
13
+ from kafka.structs import TopicPartition
14
+ from kafka.util import ensure_valid_topic_name
15
+ from kafka.version import __version__
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ # Short alias for the protocol type used throughout the sender's batch-
21
+ # completion paths. Synthetic instances (acks=0, locally-expired batches)
22
+ # are constructed with just a few fields set; unset fields fall through to
23
+ # the schema defaults via DataContainer.__getattr__.
24
+ _PartitionProduceResponse = ProduceResponse.TopicProduceResponse.PartitionProduceResponse
25
+
26
+
27
+ class Sender(threading.Thread):
28
+ """
29
+ The background thread that handles the sending of produce requests to the
30
+ Kafka cluster. This thread makes metadata requests to renew its view of the
31
+ cluster and then sends produce requests to the appropriate nodes.
32
+ """
33
+ DEFAULT_CONFIG = {
34
+ 'max_request_size': 1048576,
35
+ 'acks': 1,
36
+ 'retries': float('inf'),
37
+ 'request_timeout_ms': 30000,
38
+ 'retry_backoff_ms': 100,
39
+ 'metrics': None,
40
+ 'guarantee_message_order': False,
41
+ 'transaction_manager': None,
42
+ 'transactional_id': None,
43
+ 'transaction_timeout_ms': 60000,
44
+ 'client_id': 'kafka-python-' + __version__,
45
+ }
46
+
47
+ def __init__(self, client, metadata, accumulator, **configs):
48
+ super().__init__()
49
+ self.config = copy.copy(self.DEFAULT_CONFIG)
50
+ for key in self.config:
51
+ if key in configs:
52
+ self.config[key] = configs.pop(key)
53
+
54
+ self.name = self.config['client_id'] + '-network-thread'
55
+ self._client = client
56
+ self._accumulator = accumulator
57
+ self._metadata = client.cluster
58
+ self._running = True
59
+ self._force_close = False
60
+ self._topics_to_add = set()
61
+ if self.config['metrics']:
62
+ self._sensors = SenderMetrics(self.config['metrics'], self._client, self._metadata)
63
+ else:
64
+ self._sensors = None
65
+ self._transaction_manager = self.config['transaction_manager']
66
+ # A per-partition queue of batches ordered by creation time for tracking the in-flight batches
67
+ self._in_flight_batches = collections.defaultdict(list)
68
+
69
+ def _maybe_remove_from_inflight_batches(self, batch):
70
+ try:
71
+ queue = self._in_flight_batches[batch.topic_partition]
72
+ except KeyError:
73
+ return
74
+ try:
75
+ idx = queue.index((batch.created, batch))
76
+ except ValueError:
77
+ return
78
+ # https://stackoverflow.com/questions/10162679/python-delete-element-from-heap
79
+ queue[idx] = queue[-1]
80
+ queue.pop()
81
+ heapq.heapify(queue)
82
+
83
+ def _get_expired_inflight_batches(self, now=None):
84
+ """Get the in-flight batches that has reached delivery timeout."""
85
+ expired_batches = []
86
+ to_remove = []
87
+ for tp, queue in self._in_flight_batches.items():
88
+ while queue:
89
+ _created_at, batch = queue[0]
90
+ if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
91
+ heapq.heappop(queue)
92
+ if batch.final_state is None:
93
+ expired_batches.append(batch)
94
+ else:
95
+ raise Errors.IllegalStateError("%s batch created at %s gets unexpected final state %s" % (batch.topic_partition, batch.created, batch.final_state))
96
+ else:
97
+ self._accumulator.maybe_update_next_batch_expiry_time(batch)
98
+ break
99
+ else:
100
+ # Avoid mutating in_flight_batches during iteration
101
+ to_remove.append(tp)
102
+ for tp in to_remove:
103
+ del self._in_flight_batches[tp]
104
+ return expired_batches
105
+
106
+ def run(self):
107
+ """The main run loop for the sender thread."""
108
+ log.debug("%s: Starting Kafka producer I/O thread.", str(self))
109
+
110
+ # main loop, runs until close is called
111
+ while self._running:
112
+ try:
113
+ self.run_once()
114
+ except Exception:
115
+ log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
116
+
117
+ log.debug("%s: Beginning shutdown of Kafka producer I/O thread, sending"
118
+ " remaining records.", str(self))
119
+
120
+ # okay we stopped accepting requests but there may still be
121
+ # requests in the accumulator or waiting for acknowledgment,
122
+ # wait until these are completed.
123
+ while (not self._force_close
124
+ and (self._accumulator.has_undrained()
125
+ or self._client.in_flight_request_count() > 0)):
126
+ try:
127
+ self.run_once()
128
+ except Exception:
129
+ log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
130
+
131
+ if self._force_close:
132
+ # We need to fail all the incomplete batches and wake up the
133
+ # threads waiting on the futures.
134
+ self._accumulator.abort_incomplete_batches()
135
+
136
+ try:
137
+ self._client.close()
138
+ except Exception:
139
+ log.exception("%s: Failed to close network client", str(self))
140
+
141
+ log.debug("%s: Shutdown of Kafka producer I/O thread has completed.", str(self))
142
+
143
+ def run_once(self):
144
+ """Run a single iteration of sending."""
145
+ while self._topics_to_add:
146
+ self._metadata.add_topic(self._topics_to_add.pop())
147
+
148
+ if self._transaction_manager:
149
+ try:
150
+ if (not self._transaction_manager.is_transactional()
151
+ and not self._transaction_manager.has_producer_id()):
152
+ # Idempotent producer: ensure an InitProducerIdHandler is
153
+ # enqueued. Dispatch happens below via the same handler-queue
154
+ # path used for transactional requests; the produce gate
155
+ # below blocks new sends until the response arrives.
156
+ self._transaction_manager.init_producer_id()
157
+
158
+ if self._transaction_manager.has_in_flight_transactional_request() or self._maybe_send_pending_request():
159
+ # as long as there are outstanding transactional requests, we simply wait for them to return
160
+ self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
161
+ return
162
+
163
+ # do not continue sending if the transaction manager is in a failed state, if there
164
+ # is no producer id (for the idempotent case), or if we're currently bumping the
165
+ # producer epoch (KIP-360) -- the InitProducerIdRequest has to complete before we
166
+ # can safely send any new produce requests under the new epoch.
167
+ if (self._transaction_manager.has_fatal_error()
168
+ or not self._transaction_manager.has_producer_id()
169
+ or self._transaction_manager.is_bumping_epoch()):
170
+ last_error = self._transaction_manager.last_error
171
+ if last_error is not None:
172
+ self._maybe_abort_batches(last_error)
173
+ self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
174
+ return
175
+ elif self._transaction_manager.has_abortable_error():
176
+ # Attempt to get the last error that caused this abort.
177
+ # If there was no error, but we are still aborting,
178
+ # then this is most likely a case where there was no fatal error.
179
+ exception = self._transaction_manager.last_error or Errors.TransactionAbortedError()
180
+ self._accumulator.abort_undrained_batches(exception)
181
+
182
+ except Errors.SaslAuthenticationFailedError as e:
183
+ # This is already logged as error, but propagated here to perform any clean ups.
184
+ log.debug("%s: Authentication exception while processing transactional request: %s", str(self), e)
185
+ self._transaction_manager.authentication_failed(e)
186
+
187
+ poll_timeout_ms = self._send_producer_data()
188
+ self._client.poll(timeout_ms=poll_timeout_ms)
189
+
190
+ def _send_producer_data(self, now=None):
191
+ now = time.monotonic() if now is None else now
192
+ # get the list of partitions with data ready to send
193
+ result = self._accumulator.ready(self._metadata, now=now)
194
+ ready_nodes, next_ready_check_delay, unknown_leaders_exist = result
195
+
196
+ # if there are any partitions whose leaders are not known yet, force
197
+ # metadata update
198
+ if unknown_leaders_exist:
199
+ log.debug('%s: Unknown leaders exist, requesting metadata update', str(self))
200
+ self._metadata.request_update()
201
+
202
+ # remove any nodes we aren't ready to send to
203
+ not_ready_timeout_ms = float('inf')
204
+ for node in list(ready_nodes):
205
+ if not self._client.is_ready(node):
206
+ node_delay_ms = self._client.connection_delay(node)
207
+ log.debug('%s: Node %s not ready; delaying produce of accumulated batch (%f ms)', str(self), node, node_delay_ms)
208
+ self._client.maybe_connect(node, wakeup=False)
209
+ ready_nodes.remove(node)
210
+ not_ready_timeout_ms = min(not_ready_timeout_ms, node_delay_ms)
211
+
212
+ # create produce requests
213
+ batches_by_node = self._accumulator.drain(
214
+ self._metadata, ready_nodes, self.config['max_request_size'], now=now)
215
+
216
+ for batch_list in batches_by_node.values():
217
+ for batch in batch_list:
218
+ item = (batch.created, batch)
219
+ queue = self._in_flight_batches[batch.topic_partition]
220
+ heapq.heappush(queue, item)
221
+
222
+ if self.config['guarantee_message_order']:
223
+ # Mute all the partitions drained
224
+ for batch_list in batches_by_node.values():
225
+ for batch in batch_list:
226
+ self._accumulator.muted.add(batch.topic_partition)
227
+
228
+ self._accumulator.reset_next_batch_expiry_time()
229
+ expired_batches = self._accumulator.expired_batches(now=now)
230
+ expired_batches.extend(self._get_expired_inflight_batches(now=now))
231
+
232
+ if expired_batches:
233
+ log.debug("%s: Expired %s batches in accumulator", str(self), len(expired_batches))
234
+
235
+ # Reset the producer_id if an expired batch has previously been sent to the broker.
236
+ # See the documentation of `TransactionState.reset_producer_id` to understand why
237
+ # we need to reset the producer id here.
238
+ if self._transaction_manager and any([batch.in_retry() for batch in expired_batches]):
239
+ needs_transaction_state_reset = True
240
+ else:
241
+ needs_transaction_state_reset = False
242
+
243
+ for expired_batch in expired_batches:
244
+ error_message = "Expiring %d record(s) for %s: %s ms has passed since batch creation" % (
245
+ expired_batch.record_count, expired_batch.topic_partition,
246
+ int((time.monotonic() - expired_batch.created) * 1000))
247
+ self._complete_batch_with_exception(expired_batch, Errors.KafkaTimeoutError(error_message))
248
+
249
+ if self._sensors:
250
+ self._sensors.update_produce_request_metrics(batches_by_node)
251
+
252
+ if needs_transaction_state_reset:
253
+ self._transaction_manager.reset_producer_id()
254
+ return 0
255
+
256
+ requests = self._create_produce_requests(batches_by_node)
257
+ # If we have any nodes that are ready to send + have sendable data,
258
+ # poll with 0 timeout so this can immediately loop and try sending more
259
+ # data. Otherwise, the timeout will be the smaller value between next
260
+ # batch expiry time, and the delay time for checking data availability.
261
+ # Note that the nodes may have data that isn't yet sendable due to
262
+ # lingering, backing off, etc. This specifically does not include nodes with
263
+ # sendable data that aren't ready to send since they would cause busy
264
+ # looping.
265
+ poll_timeout_ms = min(next_ready_check_delay * 1000,
266
+ not_ready_timeout_ms,
267
+ self._accumulator.next_expiry_time_ms - now * 1000)
268
+ if poll_timeout_ms < 0:
269
+ poll_timeout_ms = 0
270
+
271
+ if ready_nodes:
272
+ log.debug("%s: Nodes with data ready to send: %s", str(self), ready_nodes) # trace
273
+ log.debug("%s: Created %d produce requests: %s", str(self), len(requests), requests) # trace
274
+ # if some partitions are already ready to be sent, the select time
275
+ # would be 0; otherwise if some partition already has some data
276
+ # accumulated but not ready yet, the select time will be the time
277
+ # difference between now and its linger expiry time; otherwise the
278
+ # select time will be the time difference between now and the
279
+ # metadata expiry time
280
+ poll_timeout_ms = 0
281
+
282
+ for node_id, request in requests.items():
283
+ batches = batches_by_node[node_id]
284
+ log.debug('%s: Sending Produce Request: %r', str(self), request)
285
+ (self._client.send(node_id, request, wakeup=False)
286
+ .add_callback(
287
+ self._handle_produce_response, node_id, time.monotonic(), batches)
288
+ .add_errback(
289
+ self._failed_produce, batches, node_id))
290
+ return poll_timeout_ms
291
+
292
+ def _maybe_send_pending_request(self):
293
+ if self._transaction_manager.is_completing() and self._accumulator.has_incomplete:
294
+ if self._transaction_manager.is_aborting():
295
+ # KIP-654: prefer the last error that triggered the abort;
296
+ # otherwise the user chose to abort with no underlying cause --
297
+ # surface a non-fatal TransactionAbortedError on the
298
+ # in-accumulator batches.
299
+ exception = self._transaction_manager.last_error or Errors.TransactionAbortedError()
300
+ self._accumulator.abort_undrained_batches(exception)
301
+ # There may still be requests left which are being retried. Since we do not know whether they had
302
+ # been successfully appended to the broker log, we must resend them until their final status is clear.
303
+ # If they had been appended and we did not receive the error, then our sequence number would no longer
304
+ # be correct which would lead to an OutOfSequenceNumberError.
305
+ if not self._accumulator.flush_in_progress():
306
+ self._accumulator.begin_flush()
307
+
308
+ next_request_handler = self._transaction_manager.next_request_handler(self._accumulator.has_incomplete)
309
+ if next_request_handler is None:
310
+ return False
311
+
312
+ log.debug("%s: Sending transactional request %s", str(self), next_request_handler.request)
313
+ while not self._force_close:
314
+ target_node = None
315
+ try:
316
+ if next_request_handler.needs_coordinator():
317
+ target_node = self._transaction_manager.coordinator(next_request_handler.coordinator_type)
318
+ if target_node is None:
319
+ self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
320
+ break
321
+ elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
322
+ self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
323
+ target_node = None
324
+ break
325
+ else:
326
+ target_node = self._client.least_loaded_node()
327
+ if target_node is None:
328
+ self._client.poll(future=self._metadata.request_update())
329
+ elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
330
+ continue
331
+
332
+ if target_node is not None:
333
+ if next_request_handler.is_retry:
334
+ time.sleep(self.config['retry_backoff_ms'] / 1000)
335
+ txn_correlation_id = self._transaction_manager.next_in_flight_request_correlation_id()
336
+ future = self._client.send(target_node, next_request_handler.request)
337
+ future.add_both(next_request_handler.on_complete, txn_correlation_id)
338
+ return True
339
+
340
+ except Exception as e:
341
+ log.warning("%s: Got an exception when trying to find a node to send a transactional request to. Going to back off and retry: %s", str(self), e)
342
+ if next_request_handler.needs_coordinator():
343
+ self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
344
+ break
345
+
346
+ if target_node is None:
347
+ self._transaction_manager.retry(next_request_handler)
348
+
349
+ return True
350
+
351
+ def _maybe_abort_batches(self, exc):
352
+ if self._accumulator.has_incomplete:
353
+ log.error("%s: Aborting producer batches due to fatal error: %s", str(self), exc)
354
+ # Fatal: fail everything including in-flight batches; their broker
355
+ # responses won't recover us, and the user's pending futures need
356
+ # to resolve so close() can return.
357
+ self._accumulator.abort_batches(exc)
358
+
359
+ def initiate_close(self):
360
+ """Start closing the sender (won't complete until all data is sent)."""
361
+ self._running = False
362
+ self._accumulator.close()
363
+ self.wakeup()
364
+
365
+ def force_close(self):
366
+ """Closes the sender without sending out any pending messages."""
367
+ self._force_close = True
368
+ self.initiate_close()
369
+
370
+ def add_topic(self, topic):
371
+ # This is generally called from a separate thread
372
+ # so this needs to be a thread-safe operation
373
+ # we assume that checking set membership across threads
374
+ # is ok where self._metadata._topics should never
375
+ # remove topics for a producer instance, only add them.
376
+ if topic not in self._metadata._topics:
377
+ ensure_valid_topic_name(topic)
378
+ self._topics_to_add.add(topic)
379
+ self.wakeup()
380
+
381
+ def _failed_produce(self, batches, node_id, error):
382
+ log.error("%s: Error sending produce request to node %d: %s", str(self), node_id, error) # trace
383
+ for batch in batches:
384
+ self._complete_batch_with_exception(batch, error)
385
+
386
+ def _handle_produce_response(self, node_id, send_time, batches, response):
387
+ """Handle a produce response."""
388
+ # if we have a response, parse it
389
+ log.debug('%s: Parsing produce response: %r', str(self), response)
390
+ if response:
391
+ batches_by_partition = {batch.topic_partition: batch for batch in batches}
392
+ for topic_response in response.responses:
393
+ topic = topic_response.name
394
+ for partition_response in topic_response.partition_responses:
395
+ tp = TopicPartition(topic, partition_response.index)
396
+ batch = batches_by_partition[tp]
397
+ self._complete_batch(batch, partition_response)
398
+ else:
399
+ # acks=0: no response data, synthesize a success response
400
+ synthetic = _PartitionProduceResponse(error_code=0)
401
+ for batch in batches:
402
+ self._complete_batch(batch, synthetic)
403
+
404
+ def _record_exceptions_fn(self, top_level_exception, record_errors, error_message):
405
+ """Returns a fn mapping batch_index to exception"""
406
+ # When no record_errors, all batches resolve to top-level exception
407
+ if not record_errors:
408
+ return lambda _: top_level_exception
409
+
410
+ record_errors_dict = dict(record_errors)
411
+ def record_exceptions_fn(batch_index):
412
+ if batch_index not in record_errors_dict:
413
+ return Errors.KafkaError(
414
+ "Failed to append record because it was part of a batch which had one more more invalid records")
415
+ record_error = record_errors_dict[batch_index]
416
+ err_msg = record_error or error_message or top_level_exception.description
417
+ exc = top_level_exception.__class__ if len(record_errors) == 1 else Errors.InvalidRecordError
418
+ return exc(err_msg)
419
+ return record_exceptions_fn
420
+
421
+ def _complete_batch(self, batch, partition_response):
422
+ """Complete or retry the given batch of records based on a broker response.
423
+
424
+ Handles both the success path (including treating
425
+ DuplicateSequenceNumberError as success, for max_in_flight > 1
426
+ retry arrivals) and the error path, which delegates to
427
+ _dispatch_error with a context-aware exception instance.
428
+
429
+ Arguments:
430
+ batch (ProducerBatch): The record batch
431
+ partition_response (PartitionProduceResponse): Protocol-layer
432
+ partition response from the broker (or a synthetic instance
433
+ for the acks=0 case).
434
+ """
435
+ error_code = partition_response.error_code
436
+ if error_code != 0:
437
+ error_cls = Errors.for_code(error_code)
438
+ if error_cls is Errors.DuplicateSequenceNumberError:
439
+ # With max_in_flight > 1 and retries, a retried batch may
440
+ # arrive after the broker already committed the original.
441
+ # DUPLICATE_SEQUENCE_NUMBER means the records were already
442
+ # written successfully; treat as success.
443
+ log.debug("%s: Received DUPLICATE_SEQUENCE_NUMBER for %s - records already committed, treating as success",
444
+ str(self), batch.topic_partition)
445
+ error_code = 0
446
+
447
+ if error_code == 0:
448
+ # Success path
449
+ base_offset = partition_response.base_offset
450
+ log_append_time = partition_response.log_append_time_ms
451
+ if batch.complete(base_offset, log_append_time):
452
+ self._maybe_remove_from_inflight_batches(batch)
453
+ self._accumulator.deallocate(batch)
454
+ # Track last ack'd offset for KAFKA-5793 retention detection.
455
+ if self._transaction_manager and self._transaction_manager.producer_id_and_epoch.match(batch):
456
+ self._transaction_manager.update_last_acked_offset(
457
+ batch.topic_partition, base_offset, batch.record_count)
458
+ if self.config['guarantee_message_order']:
459
+ self._accumulator.muted.remove(batch.topic_partition)
460
+ return
461
+
462
+ # Error path: construct the exception with context-specific wrappers
463
+ # for auth errors that carry a topic or producer-specific message.
464
+ if error_cls is Errors.TopicAuthorizationFailedError:
465
+ exception = Errors.TopicAuthorizationFailedError(batch.topic_partition.topic)
466
+ elif error_cls is Errors.ClusterAuthorizationFailedError:
467
+ exception = Errors.ClusterAuthorizationFailedError("The producer is not authorized to do idempotent sends")
468
+ else:
469
+ exception = error_cls(partition_response.error_message)
470
+ self._dispatch_error(batch, exception, partition_response)
471
+
472
+ def _complete_batch_with_exception(self, batch, exception):
473
+ """Complete a batch following a client-side failure.
474
+
475
+ Called from _failed_produce for network errors and from
476
+ _send_producer_data for locally-expired batches. The exception is
477
+ used as-is (no reconstruction), so any dynamic message is
478
+ preserved.
479
+
480
+ Arguments:
481
+ batch (ProducerBatch): The record batch
482
+ exception (Exception or type): The client-side exception or its
483
+ class (a bare class is instantiated with no message)
484
+ """
485
+ if isinstance(exception, type):
486
+ exception = exception(None)
487
+ self._dispatch_error(batch, exception, partition_response=None)
488
+
489
+ def _dispatch_error(self, batch, exception, partition_response):
490
+ """Apply the appropriate outcome for a failed batch.
491
+
492
+ Single decision point for both broker-reported errors (with a
493
+ partition_response) and client-side exceptions (partition_response
494
+ is None). Handles split / retry / retention-reset / fail along with
495
+ transaction-state transitions and post-error housekeeping
496
+ (metadata refresh, partition unmuting).
497
+ """
498
+ error_cls = type(exception)
499
+ log_start_offset = partition_response.log_start_offset if partition_response is not None else -1
500
+
501
+ if self._can_split(batch, error_cls):
502
+ log.warning("%s: Got %s on topic-partition %s with %d records, splitting batch and retrying",
503
+ str(self), error_cls.__name__, batch.topic_partition, batch.record_count)
504
+ self._accumulator.split_and_reenqueue(batch)
505
+ self._maybe_remove_from_inflight_batches(batch)
506
+ self._accumulator.deallocate(batch)
507
+ self._record_retries(batch)
508
+ elif self._is_retention_based_unknown_producer_id(batch, error_cls, log_start_offset):
509
+ # KAFKA-5793: the broker's producer state aged out due to
510
+ # retention (log_start_offset > last_acked_offset), not
511
+ # actual data loss. Reset the partition sequence and retry.
512
+ log.warning("%s: UnknownProducerIdError for %s appears to be retention-based"
513
+ " (log_start_offset=%s, last_acked_offset=%s); resetting sequence and retrying",
514
+ str(self), batch.topic_partition, log_start_offset,
515
+ self._transaction_manager.last_acked_offset(batch.topic_partition))
516
+ self._transaction_manager.reset_sequence_for_partition(batch.topic_partition)
517
+ self._accumulator.reenqueue(batch)
518
+ self._maybe_remove_from_inflight_batches(batch)
519
+ self._record_retries(batch)
520
+ elif self._can_retry(batch, error_cls):
521
+ error_message = exception.args[0] if exception.args and exception.args[0] is not None else None
522
+ log.warning("%s: Got error produce response on topic-partition %s, retrying (%s attempts left): %s%s",
523
+ str(self), batch.topic_partition,
524
+ self.config['retries'] - batch.attempts - 1,
525
+ error_cls.__name__,
526
+ (". Error Message: %s" % error_message) if error_message else "")
527
+ log.debug("%s: Retrying batch to topic-partition %s. Sequence number: %s",
528
+ str(self), batch.topic_partition,
529
+ self._transaction_manager.sequence_number(batch.topic_partition) if self._transaction_manager else None)
530
+ self._accumulator.reenqueue(batch)
531
+ self._maybe_remove_from_inflight_batches(batch)
532
+ self._record_retries(batch)
533
+ else:
534
+ # FAIL: dispatch transaction state transition via the
535
+ # classifier (KIP-360), then finalize the batch.
536
+ if self._transaction_manager:
537
+ classification = self._transaction_manager.classify_batch_error(
538
+ exception, batch, log_start_offset=log_start_offset)
539
+
540
+ if classification == TransactionManager.ERROR_CLASS_NEEDS_EPOCH_BUMP:
541
+ # KIP-360 (Kafka 2.5+): bump the producer epoch and
542
+ # continue. The accumulator's unsent records will be
543
+ # drained under the new epoch. In-flight batches at
544
+ # this moment are lost; their futures (including this
545
+ # one) fail.
546
+ self._transaction_manager.bump_producer_id_and_epoch()
547
+ elif classification == TransactionManager.ERROR_CLASS_NEEDS_PRODUCER_ID_RESET:
548
+ # Pre-KIP-360 fallback (non-transactional idempotent
549
+ # producer on < 2.5 broker).
550
+ if isinstance(exception, Errors.OutOfOrderSequenceNumberError) and \
551
+ self._transaction_manager.has_producer_id(batch.producer_id):
552
+ base_offset = partition_response.base_offset if partition_response is not None else -1
553
+ log.error("%s: The broker received an out of order sequence number for topic-partition %s"
554
+ " at offset %s. This indicates data loss on the broker, and should be investigated.",
555
+ str(self), batch.topic_partition, base_offset)
556
+ self._transaction_manager.reset_producer_id()
557
+ elif classification == TransactionManager.ERROR_CLASS_FATAL:
558
+ self._transaction_manager.transition_to_fatal_error(exception)
559
+ elif classification == TransactionManager.ERROR_CLASS_ABORTABLE:
560
+ self._transaction_manager.transition_to_abortable_error(exception)
561
+ # ERROR_CLASS_RETRIABLE at this point means we couldn't
562
+ # retry (e.g. delivery-timeout hit or retries exhausted);
563
+ # just fail the batch without any state transition.
564
+
565
+ if self._sensors:
566
+ self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
567
+
568
+ if partition_response is not None:
569
+ record_errors = partition_response.record_errors
570
+ error_message = partition_response.error_message
571
+ else:
572
+ record_errors = ()
573
+ error_message = exception.args[0] if exception.args and exception.args[0] is not None else None
574
+ record_exceptions_fn = self._record_exceptions_fn(exception, record_errors, error_message)
575
+ if batch.complete_exceptionally(exception, record_exceptions_fn):
576
+ self._maybe_remove_from_inflight_batches(batch)
577
+ self._accumulator.deallocate(batch)
578
+
579
+ # Post-error housekeeping (runs for all branches above)
580
+ if error_cls is Errors.UnknownTopicOrPartitionError:
581
+ log.warning("%s: Received unknown topic or partition error in produce request on partition %s."
582
+ " The topic/partition may not exist or the user may not have Describe access to it",
583
+ str(self), batch.topic_partition)
584
+ if issubclass(error_cls, Errors.InvalidMetadataError):
585
+ self._metadata.request_update()
586
+ if self.config['guarantee_message_order']:
587
+ self._accumulator.muted.remove(batch.topic_partition)
588
+
589
+ def _record_retries(self, batch):
590
+ if self._sensors:
591
+ self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
592
+
593
+ def _can_retry(self, batch, error_cls):
594
+ """
595
+ We can retry a send if the error is transient, the number of
596
+ attempts taken is fewer than the maximum allowed, and - for the
597
+ idempotent producer - the batch's producer id/epoch still matches
598
+ ours. A mismatched producer id/epoch (e.g. after a reset or future
599
+ KIP-360 epoch bump) means retrying would violate idempotence.
600
+ """
601
+ if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
602
+ return False
603
+ if batch.attempts >= self.config['retries']:
604
+ return False
605
+ if batch.final_state is not None:
606
+ return False
607
+ if not issubclass(error_cls, Errors.RetriableError):
608
+ return False
609
+ if self._transaction_manager and not self._transaction_manager.producer_id_and_epoch.match(batch):
610
+ log.warning("%s: Attempted to retry sending a batch but the producer id/epoch changed from %s/%s to %s/%s."
611
+ " This batch will be dropped",
612
+ str(self), batch.producer_id, batch.producer_epoch,
613
+ self._transaction_manager.producer_id_and_epoch.producer_id,
614
+ self._transaction_manager.producer_id_and_epoch.epoch)
615
+ return False
616
+ return True
617
+
618
+ def _is_retention_based_unknown_producer_id(self, batch, error_cls, log_start_offset):
619
+ """Detect retention-based UnknownProducerIdError (KAFKA-5793).
620
+
621
+ The broker returns UnknownProducerIdError either because the producer
622
+ state was legitimately removed by retention, or because of actual
623
+ data loss. If the broker's log_start_offset is strictly greater than
624
+ the last offset we acknowledged for this partition, then the records
625
+ we previously wrote have been aged out - the producer can safely
626
+ reset its sequence to 0 and resume.
627
+ """
628
+ if error_cls is not Errors.UnknownProducerIdError:
629
+ return False
630
+ if not self._transaction_manager:
631
+ return False
632
+ if not self._transaction_manager.producer_id_and_epoch.match(batch):
633
+ return False
634
+ if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
635
+ return False
636
+ if batch.final_state is not None:
637
+ return False
638
+ if log_start_offset is None or log_start_offset < 0:
639
+ return False
640
+ last_acked = self._transaction_manager.last_acked_offset(batch.topic_partition)
641
+ return log_start_offset > last_acked
642
+
643
+ def _can_split(self, batch, error):
644
+ """
645
+ We can split and retry a batch if the error indicates the batch is too
646
+ large for the broker, the batch contains more than one record (so it
647
+ can actually be split), and the delivery timeout has not been reached.
648
+ """
649
+ return (error in (Errors.MessageSizeTooLargeError, Errors.RecordListTooLargeError) and
650
+ batch.record_count > 1 and
651
+ batch.final_state is None and
652
+ not batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms))
653
+
654
+ def _create_produce_requests(self, collated):
655
+ """
656
+ Transfer the record batches into a list of produce requests on a
657
+ per-node basis.
658
+
659
+ Arguments:
660
+ collated: {node_id: [ProducerBatch]}
661
+
662
+ Returns:
663
+ dict: {node_id: ProduceRequest} (version depends on client api_versions)
664
+ """
665
+ requests = {}
666
+ for node_id, batches in collated.items():
667
+ if batches:
668
+ requests[node_id] = self._produce_request(
669
+ node_id, self.config['acks'],
670
+ self.config['request_timeout_ms'], batches)
671
+ return requests
672
+
673
+ def _produce_request(self, node_id, acks, timeout, batches):
674
+ """Create a produce request from the given record batches.
675
+
676
+ Returns:
677
+ ProduceRequest (version depends on client api_versions)
678
+ """
679
+ max_version = 9
680
+ min_version = 0
681
+ Topic = ProduceRequest.TopicProduceData
682
+ Partition = Topic.PartitionProduceData
683
+ topic_data = collections.defaultdict(list)
684
+ for batch in batches:
685
+ topic = batch.topic_partition.topic
686
+ partition = Partition(
687
+ index=batch.topic_partition.partition,
688
+ records=batch.records.buffer(),
689
+ )
690
+ topic_data[topic].append(partition)
691
+
692
+ transactional_id = self._transaction_manager.transactional_id if self._transaction_manager else None
693
+ if transactional_id is not None:
694
+ min_version = 3
695
+
696
+ return ProduceRequest(
697
+ transactional_id=transactional_id,
698
+ acks=acks,
699
+ timeout_ms=timeout,
700
+ topic_data=[Topic(name=topic, partition_data=partitions)
701
+ for topic, partitions in topic_data.items()],
702
+ min_version=min_version,
703
+ max_version=max_version,
704
+ )
705
+
706
+ def wakeup(self):
707
+ """Wake up the selector associated with this send thread."""
708
+ self._client.wakeup()
709
+
710
+ def bootstrap_connected(self):
711
+ return self._client.bootstrap_connected()
712
+
713
+ def __str__(self):
714
+ return "<Sender client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
715
+
716
+
717
+ class SenderMetrics:
718
+
719
+ def __init__(self, metrics, client, metadata):
720
+ self.metrics = metrics
721
+ self._client = client
722
+ self._metadata = metadata
723
+
724
+ sensor_name = 'batch-size'
725
+ self.batch_size_sensor = self.metrics.sensor(sensor_name)
726
+ self.add_metric('batch-size-avg', Avg(),
727
+ sensor_name=sensor_name,
728
+ description='The average number of bytes sent per partition per-request.')
729
+ self.add_metric('batch-size-max', Max(),
730
+ sensor_name=sensor_name,
731
+ description='The max number of bytes sent per partition per-request.')
732
+
733
+ sensor_name = 'compression-rate'
734
+ self.compression_rate_sensor = self.metrics.sensor(sensor_name)
735
+ self.add_metric('compression-rate-avg', Avg(),
736
+ sensor_name=sensor_name,
737
+ description='The average compression rate of record batches.')
738
+
739
+ sensor_name = 'queue-time'
740
+ self.queue_time_sensor = self.metrics.sensor(sensor_name)
741
+ self.add_metric('record-queue-time-avg', Avg(),
742
+ sensor_name=sensor_name,
743
+ description='The average time in ms record batches spent in the record accumulator.')
744
+ self.add_metric('record-queue-time-max', Max(),
745
+ sensor_name=sensor_name,
746
+ description='The maximum time in ms record batches spent in the record accumulator.')
747
+
748
+ sensor_name = 'records-per-request'
749
+ self.records_per_request_sensor = self.metrics.sensor(sensor_name)
750
+ self.add_metric('record-send-rate', Rate(),
751
+ sensor_name=sensor_name,
752
+ description='The average number of records sent per second.')
753
+ self.add_metric('records-per-request-avg', Avg(),
754
+ sensor_name=sensor_name,
755
+ description='The average number of records per request.')
756
+
757
+ sensor_name = 'bytes'
758
+ self.byte_rate_sensor = self.metrics.sensor(sensor_name)
759
+ self.add_metric('byte-rate', Rate(),
760
+ sensor_name=sensor_name,
761
+ description='The average number of bytes sent per second.')
762
+
763
+ sensor_name = 'record-retries'
764
+ self.retry_sensor = self.metrics.sensor(sensor_name)
765
+ self.add_metric('record-retry-rate', Rate(),
766
+ sensor_name=sensor_name,
767
+ description='The average per-second number of retried record sends')
768
+
769
+ sensor_name = 'errors'
770
+ self.error_sensor = self.metrics.sensor(sensor_name)
771
+ self.add_metric('record-error-rate', Rate(),
772
+ sensor_name=sensor_name,
773
+ description='The average per-second number of record sends that resulted in errors')
774
+
775
+ sensor_name = 'record-size-max'
776
+ self.max_record_size_sensor = self.metrics.sensor(sensor_name)
777
+ self.add_metric('record-size-max', Max(),
778
+ sensor_name=sensor_name,
779
+ description='The maximum record size across all batches')
780
+ self.add_metric('record-size-avg', Avg(),
781
+ sensor_name=sensor_name,
782
+ description='The average maximum record size per batch')
783
+
784
+ self.add_metric('requests-in-flight',
785
+ AnonMeasurable(lambda *_: self._client.in_flight_request_count()),
786
+ description='The current number of in-flight requests awaiting a response.')
787
+
788
+ self.add_metric('metadata-age',
789
+ AnonMeasurable(lambda _, now: (now - self._metadata._last_successful_refresh_ms) / 1000),
790
+ description='The age in seconds of the current producer metadata being used.')
791
+
792
+ def add_metric(self, metric_name, measurable, group_name='producer-metrics',
793
+ description=None, tags=None,
794
+ sensor_name=None):
795
+ m = self.metrics
796
+ metric = m.metric_name(metric_name, group_name, description, tags)
797
+ if sensor_name:
798
+ sensor = m.sensor(sensor_name)
799
+ sensor.add(metric, measurable)
800
+ else:
801
+ m.add_metric(metric, measurable)
802
+
803
+ def maybe_register_topic_metrics(self, topic):
804
+
805
+ def sensor_name(name):
806
+ return 'topic.{0}.{1}'.format(topic, name)
807
+
808
+ # if one sensor of the metrics has been registered for the topic,
809
+ # then all other sensors should have been registered; and vice versa
810
+ if not self.metrics.get_sensor(sensor_name('records-per-batch')):
811
+
812
+ self.add_metric('record-send-rate', Rate(),
813
+ sensor_name=sensor_name('records-per-batch'),
814
+ group_name='producer-topic-metrics.' + topic,
815
+ description= 'Records sent per second for topic ' + topic)
816
+
817
+ self.add_metric('byte-rate', Rate(),
818
+ sensor_name=sensor_name('bytes'),
819
+ group_name='producer-topic-metrics.' + topic,
820
+ description='Bytes per second for topic ' + topic)
821
+
822
+ self.add_metric('compression-rate', Avg(),
823
+ sensor_name=sensor_name('compression-rate'),
824
+ group_name='producer-topic-metrics.' + topic,
825
+ description='Average Compression ratio for topic ' + topic)
826
+
827
+ self.add_metric('record-retry-rate', Rate(),
828
+ sensor_name=sensor_name('record-retries'),
829
+ group_name='producer-topic-metrics.' + topic,
830
+ description='Record retries per second for topic ' + topic)
831
+
832
+ self.add_metric('record-error-rate', Rate(),
833
+ sensor_name=sensor_name('record-errors'),
834
+ group_name='producer-topic-metrics.' + topic,
835
+ description='Record errors per second for topic ' + topic)
836
+
837
+ def update_produce_request_metrics(self, batches_map):
838
+ for node_batch in batches_map.values():
839
+ records = 0
840
+ total_bytes = 0
841
+ for batch in node_batch:
842
+ # register all per-topic metrics at once
843
+ topic = batch.topic_partition.topic
844
+ self.maybe_register_topic_metrics(topic)
845
+
846
+ # per-topic record send rate
847
+ topic_records_count = self.metrics.get_sensor(
848
+ 'topic.' + topic + '.records-per-batch')
849
+ topic_records_count.record(batch.record_count)
850
+
851
+ # per-topic bytes send rate
852
+ topic_byte_rate = self.metrics.get_sensor(
853
+ 'topic.' + topic + '.bytes')
854
+ topic_byte_rate.record(batch.records.size_in_bytes())
855
+
856
+ # per-topic compression rate
857
+ topic_compression_rate = self.metrics.get_sensor(
858
+ 'topic.' + topic + '.compression-rate')
859
+ topic_compression_rate.record(batch.records.compression_rate())
860
+
861
+ # global metrics
862
+ self.batch_size_sensor.record(batch.records.size_in_bytes())
863
+ if batch.drained:
864
+ self.queue_time_sensor.record(batch.drained - batch.created)
865
+ self.compression_rate_sensor.record(batch.records.compression_rate())
866
+ self.max_record_size_sensor.record(batch.max_record_size)
867
+ records += batch.record_count
868
+ total_bytes += batch.records.size_in_bytes()
869
+
870
+ if node_batch:
871
+ self.records_per_request_sensor.record(records)
872
+ self.byte_rate_sensor.record(total_bytes)
873
+
874
+ def record_retries(self, topic, count):
875
+ self.retry_sensor.record(count)
876
+ sensor = self.metrics.get_sensor('topic.' + topic + '.record-retries')
877
+ if sensor:
878
+ sensor.record(count)
879
+
880
+ def record_errors(self, topic, count):
881
+ self.error_sensor.record(count)
882
+ sensor = self.metrics.get_sensor('topic.' + topic + '.record-errors')
883
+ if sensor:
884
+ sensor.record(count)