RubyGems - polyphony - Versions diffs - 0.45.5 → 0.46.0 - Mend

polyphony 0.45.5 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +4 -4
data/.github/workflows/test.yml +2 -0
data/.gitmodules +0 -0
data/CHANGELOG.md +4 -0
data/Gemfile.lock +1 -1
data/README.md +3 -3
data/Rakefile +1 -1
data/TODO.md +4 -4
data/examples/performance/thread-vs-fiber/polyphony_server.rb +1 -2
data/ext/liburing/liburing.h +585 -0
data/ext/liburing/liburing/README.md +4 -0
data/ext/liburing/liburing/barrier.h +73 -0
data/ext/liburing/liburing/compat.h +15 -0
data/ext/liburing/liburing/io_uring.h +343 -0
data/ext/liburing/queue.c +333 -0
data/ext/liburing/register.c +187 -0
data/ext/liburing/setup.c +210 -0
data/ext/liburing/syscall.c +54 -0
data/ext/liburing/syscall.h +18 -0
data/ext/polyphony/backend.h +0 -14
data/ext/polyphony/backend_common.h +109 -0
data/ext/polyphony/backend_io_uring.c +884 -0
data/ext/polyphony/backend_io_uring_context.c +73 -0
data/ext/polyphony/backend_io_uring_context.h +52 -0
data/ext/polyphony/{libev_backend.c → backend_libev.c} +202 -294
data/ext/polyphony/event.c +1 -1
data/ext/polyphony/extconf.rb +31 -13
data/ext/polyphony/fiber.c +29 -22
data/ext/polyphony/libev.c +4 -0
data/ext/polyphony/libev.h +8 -2
data/ext/polyphony/liburing.c +8 -0
data/ext/polyphony/playground.c +51 -0
data/ext/polyphony/polyphony.c +5 -5
data/ext/polyphony/polyphony.h +16 -12
data/ext/polyphony/polyphony_ext.c +10 -4
data/ext/polyphony/queue.c +1 -1
data/ext/polyphony/thread.c +11 -9
data/lib/polyphony/adapters/trace.rb +2 -2
data/lib/polyphony/core/global_api.rb +1 -4
data/lib/polyphony/extensions/debug.rb +13 -0
data/lib/polyphony/extensions/fiber.rb +2 -2
data/lib/polyphony/extensions/socket.rb +59 -10
data/lib/polyphony/version.rb +1 -1
data/test/helper.rb +36 -4
data/test/io_uring_test.rb +55 -0
data/test/stress.rb +5 -2
data/test/test_backend.rb +4 -6
data/test/test_ext.rb +1 -2
data/test/test_fiber.rb +22 -16
data/test/test_global_api.rb +33 -35
data/test/test_throttler.rb +3 -6
data/test/test_trace.rb +7 -5
metadata +22 -3

data/ext/liburing/liburing/README.md ADDED

@@ -0,0 +1,4 @@
+## Updating the liburing source code
+- copy liburing/src/**/* into ext/liburing
+- move ext/liburing/include/**/* to ext/liburing/

data/ext/liburing/liburing/barrier.h ADDED

@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_BARRIER_H
+#define LIBURING_BARRIER_H
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+#ifdef __cplusplus
+#include <atomic>
+template <typename T>
+static inline void IO_URING_WRITE_ONCE(T &var, T val)
+{
+	std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(&var),
+				   val, std::memory_order_relaxed);
+}
+template <typename T>
+static inline T IO_URING_READ_ONCE(const T &var)
+{
+	return std::atomic_load_explicit(
+		reinterpret_cast<const std::atomic<T> *>(&var),
+		std::memory_order_relaxed);
+}
+template <typename T>
+static inline void io_uring_smp_store_release(T *p, T v)
+{
+	std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(p), v,
+				   std::memory_order_release);
+}
+template <typename T>
+static inline T io_uring_smp_load_acquire(const T *p)
+{
+	return std::atomic_load_explicit(
+		reinterpret_cast<const std::atomic<T> *>(p),
+		std::memory_order_acquire);
+}
+#else
+#include <stdatomic.h>
+#define IO_URING_WRITE_ONCE(var, val)				\
+	atomic_store_explicit((_Atomic typeof(var) *)&(var),	\
+			      (val), memory_order_relaxed)
+#define IO_URING_READ_ONCE(var)					\
+	atomic_load_explicit((_Atomic typeof(var) *)&(var),	\
+			     memory_order_relaxed)
+#define io_uring_smp_store_release(p, v)			\
+	atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \
+			      memory_order_release)
+#define io_uring_smp_load_acquire(p)				\
+	atomic_load_explicit((_Atomic typeof(*(p)) *)(p),	\
+			     memory_order_acquire)
+#endif
+#endif /* defined(LIBURING_BARRIER_H) */

data/ext/liburing/liburing/compat.h ADDED

@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_COMPAT_H
+#define LIBURING_COMPAT_H
+#include <linux/time_types.h>
+#include <inttypes.h>
+struct open_how {
+	uint64_t	flags;
+	uint64_t	mode;
+	uint64_t	resolve;
+};
+#endif

data/ext/liburing/liburing/io_uring.h ADDED

@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring interface.
+ *
+ * Copyright (C) 2019 Jens Axboe
+ * Copyright (C) 2019 Christoph Hellwig
+ */
+#ifndef LINUX_IO_URING_H
+#define LINUX_IO_URING_H
+#include <linux/fs.h>
+#include <linux/types.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+	__u8	opcode;		/* type of operation for this sqe */
+	__u8	flags;		/* IOSQE_ flags */
+	__u16	ioprio;		/* ioprio for the request */
+	__s32	fd;		/* file descriptor to do IO on */
+	union {
+		__u64	off;	/* offset into file */
+		__u64	addr2;
+	};
+	union {
+		__u64	addr;	/* pointer to buffer or iovecs */
+		__u64	splice_off_in;
+	};
+	__u32	len;		/* buffer size or number of iovecs */
+	union {
+		__kernel_rwf_t	rw_flags;
+		__u32		fsync_flags;
+		__u16		poll_events;	/* compatibility */
+		__u32		poll32_events;	/* word-reversed for BE */
+		__u32		sync_range_flags;
+		__u32		msg_flags;
+		__u32		timeout_flags;
+		__u32		accept_flags;
+		__u32		cancel_flags;
+		__u32		open_flags;
+		__u32		statx_flags;
+		__u32		fadvise_advice;
+		__u32		splice_flags;
+	};
+	__u64	user_data;	/* data to be passed back at completion time */
+	union {
+		struct {
+			/* pack this to avoid bogus arm OABI complaints */
+			union {
+				/* index into fixed buffers, if used */
+				__u16	buf_index;
+				/* for grouped buffer selection */
+				__u16	buf_group;
+			} __attribute__((packed));
+			/* personality to use, if used */
+			__u16	personality;
+			__s32	splice_fd_in;
+		};
+		__u64	__pad2[3];
+	};
+};
+enum {
+	IOSQE_FIXED_FILE_BIT,
+	IOSQE_IO_DRAIN_BIT,
+	IOSQE_IO_LINK_BIT,
+	IOSQE_IO_HARDLINK_BIT,
+	IOSQE_ASYNC_BIT,
+	IOSQE_BUFFER_SELECT_BIT,
+};
+/*
+ * sqe->flags
+ */
+/* use fixed fileset */
+#define IOSQE_FIXED_FILE	(1U << IOSQE_FIXED_FILE_BIT)
+/* issue after inflight IO */
+#define IOSQE_IO_DRAIN		(1U << IOSQE_IO_DRAIN_BIT)
+/* links next sqe */
+#define IOSQE_IO_LINK		(1U << IOSQE_IO_LINK_BIT)
+/* like LINK, but stronger */
+#define IOSQE_IO_HARDLINK	(1U << IOSQE_IO_HARDLINK_BIT)
+/* always go async */
+#define IOSQE_ASYNC		(1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT	(1U << IOSQE_BUFFER_SELECT_BIT)
+/*
+ * io_uring_setup() flags
+ */
+#define IORING_SETUP_IOPOLL	(1U << 0)	/* io_context is polled */
+#define IORING_SETUP_SQPOLL	(1U << 1)	/* SQ poll thread */
+#define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
+#define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
+#define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
+#define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
+enum {
+	IORING_OP_NOP,
+	IORING_OP_READV,
+	IORING_OP_WRITEV,
+	IORING_OP_FSYNC,
+	IORING_OP_READ_FIXED,
+	IORING_OP_WRITE_FIXED,
+	IORING_OP_POLL_ADD,
+	IORING_OP_POLL_REMOVE,
+	IORING_OP_SYNC_FILE_RANGE,
+	IORING_OP_SENDMSG,
+	IORING_OP_RECVMSG,
+	IORING_OP_TIMEOUT,
+	IORING_OP_TIMEOUT_REMOVE,
+	IORING_OP_ACCEPT,
+	IORING_OP_ASYNC_CANCEL,
+	IORING_OP_LINK_TIMEOUT,
+	IORING_OP_CONNECT,
+	IORING_OP_FALLOCATE,
+	IORING_OP_OPENAT,
+	IORING_OP_CLOSE,
+	IORING_OP_FILES_UPDATE,
+	IORING_OP_STATX,
+	IORING_OP_READ,
+	IORING_OP_WRITE,
+	IORING_OP_FADVISE,
+	IORING_OP_MADVISE,
+	IORING_OP_SEND,
+	IORING_OP_RECV,
+	IORING_OP_OPENAT2,
+	IORING_OP_EPOLL_CTL,
+	IORING_OP_SPLICE,
+	IORING_OP_PROVIDE_BUFFERS,
+	IORING_OP_REMOVE_BUFFERS,
+	IORING_OP_TEE,
+	IORING_OP_SHUTDOWN,
+	/* this goes last, obviously */
+	IORING_OP_LAST,
+};
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC	(1U << 0)
+/*
+ * sqe->timeout_flags
+ */
+#define IORING_TIMEOUT_ABS	(1U << 0)
+/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED	(1U << 31) /* the last bit of __u32 */
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+	__u64	user_data;	/* sqe->data submission passed back */
+	__s32	res;		/* result code for this event */
+	__u32	flags;
+};
+/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
+ */
+#define IORING_CQE_F_BUFFER		(1U << 0)
+enum {
+	IORING_CQE_BUFFER_SHIFT		= 16,
+};
+/*
+ * Magic offsets for the application to mmap the data it needs
+ */
+#define IORING_OFF_SQ_RING		0ULL
+#define IORING_OFF_CQ_RING		0x8000000ULL
+#define IORING_OFF_SQES			0x10000000ULL
+/*
+ * Filled with the offset for mmap(2)
+ */
+struct io_sqring_offsets {
+	__u32 head;
+	__u32 tail;
+	__u32 ring_mask;
+	__u32 ring_entries;
+	__u32 flags;
+	__u32 dropped;
+	__u32 array;
+	__u32 resv1;
+	__u64 resv2;
+};
+/*
+ * sq_ring->flags
+ */
+#define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* CQ ring is overflown */
+struct io_cqring_offsets {
+	__u32 head;
+	__u32 tail;
+	__u32 ring_mask;
+	__u32 ring_entries;
+	__u32 overflow;
+	__u32 cqes;
+	__u32 flags;
+	__u32 resv1;
+	__u64 resv2;
+};
+/*
+ * cq_ring->flags
+ */
+/* disable eventfd notifications */
+#define IORING_CQ_EVENTFD_DISABLED	(1U << 0)
+/*
+ * io_uring_enter(2) flags
+ */
+#define IORING_ENTER_GETEVENTS	(1U << 0)
+#define IORING_ENTER_SQ_WAKEUP	(1U << 1)
+#define IORING_ENTER_SQ_WAIT	(1U << 2)
+/*
+ * Passed in for io_uring_setup(2). Copied back with updated info on success
+ */
+struct io_uring_params {
+	__u32 sq_entries;
+	__u32 cq_entries;
+	__u32 flags;
+	__u32 sq_thread_cpu;
+	__u32 sq_thread_idle;
+	__u32 features;
+	__u32 wq_fd;
+	__u32 resv[3];
+	struct io_sqring_offsets sq_off;
+	struct io_cqring_offsets cq_off;
+};
+/*
+ * io_uring_params->features flags
+ */
+#define IORING_FEAT_SINGLE_MMAP		(1U << 0)
+#define IORING_FEAT_NODROP		(1U << 1)
+#define IORING_FEAT_SUBMIT_STABLE	(1U << 2)
+#define IORING_FEAT_RW_CUR_POS		(1U << 3)
+#define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
+#define IORING_FEAT_FAST_POLL		(1U << 5)
+#define IORING_FEAT_POLL_32BITS 	(1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+enum {
+	IORING_REGISTER_BUFFERS			= 0,
+	IORING_UNREGISTER_BUFFERS		= 1,
+	IORING_REGISTER_FILES			= 2,
+	IORING_UNREGISTER_FILES			= 3,
+	IORING_REGISTER_EVENTFD			= 4,
+	IORING_UNREGISTER_EVENTFD		= 5,
+	IORING_REGISTER_FILES_UPDATE		= 6,
+	IORING_REGISTER_EVENTFD_ASYNC		= 7,
+	IORING_REGISTER_PROBE			= 8,
+	IORING_REGISTER_PERSONALITY		= 9,
+	IORING_UNREGISTER_PERSONALITY		= 10,
+	IORING_REGISTER_RESTRICTIONS		= 11,
+	IORING_REGISTER_ENABLE_RINGS		= 12,
+	/* this goes last */
+	IORING_REGISTER_LAST
+};
+struct io_uring_files_update {
+	__u32 offset;
+	__u32 resv;
+	__aligned_u64 /* __s32 * */ fds;
+};
+#define IO_URING_OP_SUPPORTED	(1U << 0)
+struct io_uring_probe_op {
+	__u8 op;
+	__u8 resv;
+	__u16 flags;	/* IO_URING_OP_* flags */
+	__u32 resv2;
+};
+struct io_uring_probe {
+	__u8 last_op;	/* last opcode supported */
+	__u8 ops_len;	/* length of ops[] array below */
+	__u16 resv;
+	__u32 resv2[3];
+	struct io_uring_probe_op ops[0];
+};
+struct io_uring_restriction {
+	__u16 opcode;
+	union {
+		__u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
+		__u8 sqe_op;      /* IORING_RESTRICTION_SQE_OP */
+		__u8 sqe_flags;   /* IORING_RESTRICTION_SQE_FLAGS_* */
+	};
+	__u8 resv;
+	__u32 resv2[3];
+};
+/*
+ * io_uring_restriction->opcode values
+ */
+enum {
+	/* Allow an io_uring_register(2) opcode */
+	IORING_RESTRICTION_REGISTER_OP		= 0,
+	/* Allow an sqe opcode */
+	IORING_RESTRICTION_SQE_OP		= 1,
+	/* Allow sqe flags */
+	IORING_RESTRICTION_SQE_FLAGS_ALLOWED	= 2,
+	/* Require sqe flags (these flags must be set on each submission) */
+	IORING_RESTRICTION_SQE_FLAGS_REQUIRED	= 3,
+	IORING_RESTRICTION_LAST
+};
+#ifdef __cplusplus
+}
+#endif
+#endif

data/ext/liburing/queue.c ADDED

@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: MIT */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+#include "liburing.h"
+#include "liburing/barrier.h"
+#include "syscall.h"
+/*
+ * Returns true if we're not using SQ thread (thus nobody submits but us)
+ * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
+ * awakened. For the latter case, we set the thread wakeup flag.
+ */
+static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
+{
+	if (!(ring->flags & IORING_SETUP_SQPOLL))
+		return true;
+	if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) {
+		*flags |= IORING_ENTER_SQ_WAKEUP;
+		return true;
+	}
+	return false;
+}
+static inline bool cq_ring_needs_flush(struct io_uring *ring)
+{
+	return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
+}
+static int __io_uring_peek_cqe(struct io_uring *ring,
+			       struct io_uring_cqe **cqe_ptr)
+{
+	struct io_uring_cqe *cqe;
+	unsigned head;
+	int err = 0;
+	do {
+		io_uring_for_each_cqe(ring, head, cqe)
+			break;
+		if (cqe) {
+			if (cqe->user_data == LIBURING_UDATA_TIMEOUT) {
+				if (cqe->res < 0)
+					err = cqe->res;
+				io_uring_cq_advance(ring, 1);
+				if (!err)
+					continue;
+				cqe = NULL;
+			}
+		}
+		break;
+	} while (1);
+	*cqe_ptr = cqe;
+	return err;
+}
+int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+		       unsigned submit, unsigned wait_nr, sigset_t *sigmask)
+{
+	struct io_uring_cqe *cqe = NULL;
+	const int to_wait = wait_nr;
+	int ret = 0, err;
+	do {
+		bool cq_overflow_flush = false;
+		unsigned flags = 0;
+		err = __io_uring_peek_cqe(ring, &cqe);
+		if (err)
+			break;
+		if (!cqe && !to_wait && !submit) {
+			if (!cq_ring_needs_flush(ring)) {
+				err = -EAGAIN;
+				break;
+			}
+			cq_overflow_flush = true;
+		}
+		if (wait_nr && cqe)
+			wait_nr--;
+		if (wait_nr || cq_overflow_flush)
+			flags = IORING_ENTER_GETEVENTS;
+		if (submit)
+			sq_ring_needs_enter(ring, &flags);
+		if (wait_nr || submit || cq_overflow_flush)
+			ret = __sys_io_uring_enter(ring->ring_fd, submit,
+						   wait_nr, flags, sigmask);
+		if (ret < 0) {
+			err = -errno;
+		} else if (ret == (int)submit) {
+			submit = 0;
+			/*
+			 * When SETUP_IOPOLL is set, __sys_io_uring enter()
+			 * must be called to reap new completions but the call
+			 * won't be made if both wait_nr and submit are zero
+			 * so preserve wait_nr.
+			 */
+			if (!(ring->flags & IORING_SETUP_IOPOLL))
+				wait_nr = 0;
+		} else {
+			submit -= ret;
+		}
+		if (cqe)
+			break;
+	} while (!err);
+	*cqe_ptr = cqe;
+	return err;
+}
+/*
+ * Fill in an array of IO completions up to count, if any are available.
+ * Returns the amount of IO completions filled.
+ */
+unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
+				 struct io_uring_cqe **cqes, unsigned count)
+{
+	unsigned ready;
+	bool overflow_checked = false;
+again:
+	ready = io_uring_cq_ready(ring);
+	if (ready) {
+		unsigned head = *ring->cq.khead;
+		unsigned mask = *ring->cq.kring_mask;
+		unsigned last;
+		int i = 0;
+		count = count > ready ? ready : count;
+		last = head + count;
+		for (;head != last; head++, i++)
+			cqes[i] = &ring->cq.cqes[head & mask];
+		return count;
+	}
+	if (overflow_checked)
+		goto done;
+	if (cq_ring_needs_flush(ring)) {
+		__sys_io_uring_enter(ring->ring_fd, 0, 0,
+				     IORING_ENTER_GETEVENTS, NULL);
+		overflow_checked = true;
+		goto again;
+	}
+done:
+	return 0;
+}
+/*
+ * Sync internal state with kernel ring state on the SQ side. Returns the
+ * number of pending items in the SQ ring, for the shared ring.
+ */
+int __io_uring_flush_sq(struct io_uring *ring)
+{
+	struct io_uring_sq *sq = &ring->sq;
+	const unsigned mask = *sq->kring_mask;
+	unsigned ktail, to_submit;
+	if (sq->sqe_head == sq->sqe_tail) {
+		ktail = *sq->ktail;
+		goto out;
+	}
+	/*
+	 * Fill in sqes that we have queued up, adding them to the kernel ring
+	 */
+	ktail = *sq->ktail;
+	to_submit = sq->sqe_tail - sq->sqe_head;
+	while (to_submit--) {
+		sq->array[ktail & mask] = sq->sqe_head & mask;
+		ktail++;
+		sq->sqe_head++;
+	}
+	/*
+	 * Ensure that the kernel sees the SQE updates before it sees the tail
+	 * update.
+	 */
+	io_uring_smp_store_release(sq->ktail, ktail);
+out:
+	return ktail - *sq->khead;
+}
+/*
+ * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
+ * that an sqe is used internally to handle the timeout. Applications using
+ * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
+ *
+ * If 'ts' is specified, the application need not call io_uring_submit() before
+ * calling this function, as we will do that on its behalf. From this it also
+ * follows that this function isn't safe to use for applications that split SQ
+ * and CQ handling between two threads and expect that to work without
+ * synchronization, as this function manipulates both the SQ and CQ side.
+ */
+int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+		       unsigned wait_nr, struct __kernel_timespec *ts,
+		       sigset_t *sigmask)
+{
+	unsigned to_submit = 0;
+	if (ts) {
+		struct io_uring_sqe *sqe;
+		int ret;
+		/*
+		 * If the SQ ring is full, we may need to submit IO first
+		 */
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			ret = io_uring_submit(ring);
+			if (ret < 0)
+				return ret;
+			sqe = io_uring_get_sqe(ring);
+			if (!sqe)
+				return -EAGAIN;
+		}
+		io_uring_prep_timeout(sqe, ts, wait_nr, 0);
+		sqe->user_data = LIBURING_UDATA_TIMEOUT;
+		to_submit = __io_uring_flush_sq(ring);
+	}
+	return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
+}
+/*
+ * See io_uring_wait_cqes() - this function is the same, it just always uses
+ * '1' as the wait_nr.
+ */
+int io_uring_wait_cqe_timeout(struct io_uring *ring,
+			      struct io_uring_cqe **cqe_ptr,
+			      struct __kernel_timespec *ts)
+{
+	return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
+}
+/*
+ * Submit sqes acquired from io_uring_get_sqe() to the kernel.
+ *
+ * Returns number of sqes submitted
+ */
+static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
+			     unsigned wait_nr)
+{
+	unsigned flags;
+	int ret;
+	flags = 0;
+	if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
+		if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL))
+			flags |= IORING_ENTER_GETEVENTS;
+		ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr,
+						flags, NULL);
+		if (ret < 0)
+			return -errno;
+	} else
+		ret = submitted;
+	return ret;
+}
+static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
+{
+	return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr);
+}
+/*
+ * Submit sqes acquired from io_uring_get_sqe() to the kernel.
+ *
+ * Returns number of sqes submitted
+ */
+int io_uring_submit(struct io_uring *ring)
+{
+	return __io_uring_submit_and_wait(ring, 0);
+}
+/*
+ * Like io_uring_submit(), but allows waiting for events as well.
+ *
+ * Returns number of sqes submitted
+ */
+int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
+{
+	return __io_uring_submit_and_wait(ring, wait_nr);
+}
+static inline struct io_uring_sqe *
+__io_uring_get_sqe(struct io_uring_sq *sq, unsigned int __head)
+{
+	unsigned int __next = (sq)->sqe_tail + 1;
+	struct io_uring_sqe *__sqe = NULL;
+	if (__next - __head <= *(sq)->kring_entries) {
+		__sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask];
+		(sq)->sqe_tail = __next;
+	}
+	return __sqe;
+}
+/*
+ * Return an sqe to fill. Application must later call io_uring_submit()
+ * when it's ready to tell the kernel about it. The caller may call this
+ * function multiple times before calling io_uring_submit().
+ *
+ * Returns a vacant sqe, or NULL if we're full.
+ */
+struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+	struct io_uring_sq *sq = &ring->sq;
+	return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead));
+}
+int __io_uring_sqring_wait(struct io_uring *ring)
+{
+	int ret;
+	ret = __sys_io_uring_enter(ring->ring_fd, 0, 0, IORING_ENTER_SQ_WAIT,
+					NULL);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}