RubyGems - polyphony - Versions diffs - 1.0.1 → 1.0.2 - Mend

polyphony 1.0.1 → 1.0.2

Files changed (158) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/TODO.md +4 -0
data/examples/core/debug.rb +12 -0
data/examples/core/rpc_benchmark.rb +136 -0
data/lib/polyphony/extensions/fiber.rb +1 -0
data/lib/polyphony/extensions/socket.rb +42 -42
data/lib/polyphony/version.rb +1 -1
data/polyphony.gemspec +3 -1
data/test/test_socket.rb +1 -1
metadata +32 -149
data/vendor/liburing/man/IO_URING_CHECK_VERSION.3 +0 -1
data/vendor/liburing/man/IO_URING_VERSION_MAJOR.3 +0 -1
data/vendor/liburing/man/IO_URING_VERSION_MINOR.3 +0 -1
data/vendor/liburing/man/io_uring.7 +0 -781
data/vendor/liburing/man/io_uring_buf_ring_add.3 +0 -53
data/vendor/liburing/man/io_uring_buf_ring_advance.3 +0 -31
data/vendor/liburing/man/io_uring_buf_ring_cq_advance.3 +0 -41
data/vendor/liburing/man/io_uring_buf_ring_init.3 +0 -30
data/vendor/liburing/man/io_uring_buf_ring_mask.3 +0 -27
data/vendor/liburing/man/io_uring_check_version.3 +0 -72
data/vendor/liburing/man/io_uring_close_ring_fd.3 +0 -43
data/vendor/liburing/man/io_uring_cq_advance.3 +0 -49
data/vendor/liburing/man/io_uring_cq_has_overflow.3 +0 -25
data/vendor/liburing/man/io_uring_cq_ready.3 +0 -26
data/vendor/liburing/man/io_uring_cqe_get_data.3 +0 -53
data/vendor/liburing/man/io_uring_cqe_get_data64.3 +0 -1
data/vendor/liburing/man/io_uring_cqe_seen.3 +0 -42
data/vendor/liburing/man/io_uring_enter.2 +0 -1700
data/vendor/liburing/man/io_uring_enter2.2 +0 -1
data/vendor/liburing/man/io_uring_free_probe.3 +0 -27
data/vendor/liburing/man/io_uring_get_events.3 +0 -33
data/vendor/liburing/man/io_uring_get_probe.3 +0 -30
data/vendor/liburing/man/io_uring_get_sqe.3 +0 -57
data/vendor/liburing/man/io_uring_major_version.3 +0 -1
data/vendor/liburing/man/io_uring_minor_version.3 +0 -1
data/vendor/liburing/man/io_uring_opcode_supported.3 +0 -30
data/vendor/liburing/man/io_uring_peek_cqe.3 +0 -38
data/vendor/liburing/man/io_uring_prep_accept.3 +0 -197
data/vendor/liburing/man/io_uring_prep_accept_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_cancel.3 +0 -118
data/vendor/liburing/man/io_uring_prep_cancel64.3 +0 -1
data/vendor/liburing/man/io_uring_prep_close.3 +0 -59
data/vendor/liburing/man/io_uring_prep_close_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_connect.3 +0 -66
data/vendor/liburing/man/io_uring_prep_fadvise.3 +0 -59
data/vendor/liburing/man/io_uring_prep_fallocate.3 +0 -59
data/vendor/liburing/man/io_uring_prep_fgetxattr.3 +0 -1
data/vendor/liburing/man/io_uring_prep_files_update.3 +0 -92
data/vendor/liburing/man/io_uring_prep_fsetxattr.3 +0 -1
data/vendor/liburing/man/io_uring_prep_fsync.3 +0 -70
data/vendor/liburing/man/io_uring_prep_getxattr.3 +0 -61
data/vendor/liburing/man/io_uring_prep_link.3 +0 -1
data/vendor/liburing/man/io_uring_prep_link_timeout.3 +0 -94
data/vendor/liburing/man/io_uring_prep_linkat.3 +0 -91
data/vendor/liburing/man/io_uring_prep_madvise.3 +0 -56
data/vendor/liburing/man/io_uring_prep_mkdir.3 +0 -1
data/vendor/liburing/man/io_uring_prep_mkdirat.3 +0 -83
data/vendor/liburing/man/io_uring_prep_msg_ring.3 +0 -92
data/vendor/liburing/man/io_uring_prep_msg_ring_cqe_flags.3 +0 -1
data/vendor/liburing/man/io_uring_prep_multishot_accept.3 +0 -1
data/vendor/liburing/man/io_uring_prep_multishot_accept_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_nop.3 +0 -28
data/vendor/liburing/man/io_uring_prep_openat.3 +0 -117
data/vendor/liburing/man/io_uring_prep_openat2.3 +0 -117
data/vendor/liburing/man/io_uring_prep_openat2_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_openat_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_poll_add.3 +0 -72
data/vendor/liburing/man/io_uring_prep_poll_multishot.3 +0 -1
data/vendor/liburing/man/io_uring_prep_poll_remove.3 +0 -55
data/vendor/liburing/man/io_uring_prep_poll_update.3 +0 -89
data/vendor/liburing/man/io_uring_prep_provide_buffers.3 +0 -140
data/vendor/liburing/man/io_uring_prep_read.3 +0 -69
data/vendor/liburing/man/io_uring_prep_read_fixed.3 +0 -72
data/vendor/liburing/man/io_uring_prep_readv.3 +0 -85
data/vendor/liburing/man/io_uring_prep_readv2.3 +0 -111
data/vendor/liburing/man/io_uring_prep_recv.3 +0 -105
data/vendor/liburing/man/io_uring_prep_recv_multishot.3 +0 -1
data/vendor/liburing/man/io_uring_prep_recvmsg.3 +0 -124
data/vendor/liburing/man/io_uring_prep_recvmsg_multishot.3 +0 -1
data/vendor/liburing/man/io_uring_prep_remove_buffers.3 +0 -52
data/vendor/liburing/man/io_uring_prep_rename.3 +0 -1
data/vendor/liburing/man/io_uring_prep_renameat.3 +0 -96
data/vendor/liburing/man/io_uring_prep_send.3 +0 -66
data/vendor/liburing/man/io_uring_prep_send_set_addr.3 +0 -38
data/vendor/liburing/man/io_uring_prep_send_zc.3 +0 -96
data/vendor/liburing/man/io_uring_prep_send_zc_fixed.3 +0 -1
data/vendor/liburing/man/io_uring_prep_sendmsg.3 +0 -89
data/vendor/liburing/man/io_uring_prep_sendmsg_zc.3 +0 -1
data/vendor/liburing/man/io_uring_prep_setxattr.3 +0 -64
data/vendor/liburing/man/io_uring_prep_shutdown.3 +0 -53
data/vendor/liburing/man/io_uring_prep_socket.3 +0 -118
data/vendor/liburing/man/io_uring_prep_socket_direct.3 +0 -1
data/vendor/liburing/man/io_uring_prep_socket_direct_alloc.3 +0 -1
data/vendor/liburing/man/io_uring_prep_splice.3 +0 -120
data/vendor/liburing/man/io_uring_prep_statx.3 +0 -74
data/vendor/liburing/man/io_uring_prep_symlink.3 +0 -1
data/vendor/liburing/man/io_uring_prep_symlinkat.3 +0 -85
data/vendor/liburing/man/io_uring_prep_sync_file_range.3 +0 -59
data/vendor/liburing/man/io_uring_prep_tee.3 +0 -74
data/vendor/liburing/man/io_uring_prep_timeout.3 +0 -95
data/vendor/liburing/man/io_uring_prep_timeout_remove.3 +0 -1
data/vendor/liburing/man/io_uring_prep_timeout_update.3 +0 -98
data/vendor/liburing/man/io_uring_prep_unlink.3 +0 -1
data/vendor/liburing/man/io_uring_prep_unlinkat.3 +0 -82
data/vendor/liburing/man/io_uring_prep_write.3 +0 -67
data/vendor/liburing/man/io_uring_prep_write_fixed.3 +0 -72
data/vendor/liburing/man/io_uring_prep_writev.3 +0 -85
data/vendor/liburing/man/io_uring_prep_writev2.3 +0 -111
data/vendor/liburing/man/io_uring_queue_exit.3 +0 -26
data/vendor/liburing/man/io_uring_queue_init.3 +0 -89
data/vendor/liburing/man/io_uring_queue_init_params.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_cmsg_firsthdr.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_cmsg_nexthdr.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_name.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_out.3 +0 -82
data/vendor/liburing/man/io_uring_recvmsg_payload.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_payload_length.3 +0 -1
data/vendor/liburing/man/io_uring_recvmsg_validate.3 +0 -1
data/vendor/liburing/man/io_uring_register.2 +0 -834
data/vendor/liburing/man/io_uring_register_buf_ring.3 +0 -140
data/vendor/liburing/man/io_uring_register_buffers.3 +0 -104
data/vendor/liburing/man/io_uring_register_buffers_sparse.3 +0 -1
data/vendor/liburing/man/io_uring_register_buffers_tags.3 +0 -1
data/vendor/liburing/man/io_uring_register_buffers_update_tag.3 +0 -1
data/vendor/liburing/man/io_uring_register_eventfd.3 +0 -51
data/vendor/liburing/man/io_uring_register_eventfd_async.3 +0 -1
data/vendor/liburing/man/io_uring_register_file_alloc_range.3 +0 -52
data/vendor/liburing/man/io_uring_register_files.3 +0 -112
data/vendor/liburing/man/io_uring_register_files_sparse.3 +0 -1
data/vendor/liburing/man/io_uring_register_files_tags.3 +0 -1
data/vendor/liburing/man/io_uring_register_files_update.3 +0 -1
data/vendor/liburing/man/io_uring_register_files_update_tag.3 +0 -1
data/vendor/liburing/man/io_uring_register_iowq_aff.3 +0 -61
data/vendor/liburing/man/io_uring_register_iowq_max_workers.3 +0 -71
data/vendor/liburing/man/io_uring_register_ring_fd.3 +0 -49
data/vendor/liburing/man/io_uring_register_sync_cancel.3 +0 -71
data/vendor/liburing/man/io_uring_setup.2 +0 -669
data/vendor/liburing/man/io_uring_sq_ready.3 +0 -31
data/vendor/liburing/man/io_uring_sq_space_left.3 +0 -25
data/vendor/liburing/man/io_uring_sqe_set_data.3 +0 -48
data/vendor/liburing/man/io_uring_sqe_set_data64.3 +0 -1
data/vendor/liburing/man/io_uring_sqe_set_flags.3 +0 -87
data/vendor/liburing/man/io_uring_sqring_wait.3 +0 -34
data/vendor/liburing/man/io_uring_submit.3 +0 -46
data/vendor/liburing/man/io_uring_submit_and_get_events.3 +0 -31
data/vendor/liburing/man/io_uring_submit_and_wait.3 +0 -38
data/vendor/liburing/man/io_uring_submit_and_wait_timeout.3 +0 -56
data/vendor/liburing/man/io_uring_unregister_buf_ring.3 +0 -30
data/vendor/liburing/man/io_uring_unregister_buffers.3 +0 -27
data/vendor/liburing/man/io_uring_unregister_eventfd.3 +0 -1
data/vendor/liburing/man/io_uring_unregister_files.3 +0 -27
data/vendor/liburing/man/io_uring_unregister_iowq_aff.3 +0 -1
data/vendor/liburing/man/io_uring_unregister_ring_fd.3 +0 -32
data/vendor/liburing/man/io_uring_wait_cqe.3 +0 -40
data/vendor/liburing/man/io_uring_wait_cqe_nr.3 +0 -43
data/vendor/liburing/man/io_uring_wait_cqe_timeout.3 +0 -53
data/vendor/liburing/man/io_uring_wait_cqes.3 +0 -56

data/vendor/liburing/man/io_uring.7 DELETED Viewed

@@ -1,781 +0,0 @@
-.\" Copyright (C) 2020 Shuveb Hussain <shuveb@gmail.com>
-.\" SPDX-License-Identifier: LGPL-2.0-or-later
-.\"
-.TH io_uring 7 2020-07-26 "Linux" "Linux Programmer's Manual"
-.SH NAME
-io_uring \- Asynchronous I/O facility
-.SH SYNOPSIS
-.nf
-.B "#include <linux/io_uring.h>"
-.fi
-.PP
-.SH DESCRIPTION
-.PP
-.B io_uring
-is a Linux-specific API for asynchronous I/O.
-It allows the user to submit one or more I/O requests,
-which are processed asynchronously without blocking the calling process.
-.B io_uring
-gets its name from ring buffers which are shared between user space and
-kernel space. This arrangement allows for efficient I/O,
-while avoiding the overhead of copying buffers between them,
-where possible.
-This interface makes
-.B io_uring
-different from other UNIX I/O APIs,
-wherein,
-rather than just communicate between kernel and user space with system calls,
-ring buffers are used as the main mode of communication.
-This arrangement has various performance benefits which are discussed in a
-separate section below.
-This man page uses the terms shared buffers, shared ring buffers and
-queues interchangeably.
-.PP
-The general programming model you need to follow for
-.B io_uring
-is outlined below
-.IP \(bu
-Set up shared buffers with
-.BR io_uring_setup (2)
-and
-.BR mmap (2),
-mapping into user space shared buffers for the submission queue (SQ) and the
-completion queue (CQ).
-You place I/O requests you want to make on the SQ,
-while the kernel places the results of those operations on the CQ.
-.IP \(bu
-For every I/O request you need to make (like to read a file, write a file,
-accept a socket connection, etc), you create a submission queue entry,
-or SQE,
-describe the I/O operation you need to get done and add it to the tail of
-the submission queue (SQ).
-Each I/O operation is,
-in essence,
-the equivalent of a system call you would have made otherwise,
-if you were not using
-.BR io_uring .
-You can add more than one SQE to the queue depending on the number of
-operations you want to request.
-.IP \(bu
-After you add one or more SQEs,
-you need to call
-.BR io_uring_enter (2)
-to tell the kernel to dequeue your I/O requests off the SQ and begin
-processing them.
-.IP \(bu
-For each SQE you submit,
-once it is done processing the request,
-the kernel places a completion queue event or CQE at the tail of the
-completion queue or CQ.
-The kernel places exactly one matching CQE in the CQ for every SQE you
-submit on the SQ.
-After you retrieve a CQE,
-minimally,
-you might be interested in checking the
-.I res
-field of the CQE structure,
-which corresponds to the return value of the system
-call's equivalent,
-had you used it directly without using
-.BR io_uring .
-For instance,
-a read operation under
-.BR io_uring ,
-started with the
-.BR IORING_OP_READ
-operation, issues the equivalent of the
-.BR read (2)
-system call. In practice, it mixes the semantics of
-.BR pread (2)
-and
-.BR preadv2 (2)
-in that it takes an explicit offset, and supports using -1 for the offset to
-indicate that the current file position should be used instead of passing in
-an explicit offset. See the opcode documentation for more details. Given that
-io_uring is an async interface,
-.I errno
-is never used for passing back error information. Instead,
-.I res
-will contain what the equivalent system call would have returned in case
-of success, and in case of error
-.I res
-will contain
-.I -errno .
-For example, if the normal read system call would have returned -1 and set
-.I errno
-to
-.B EINVAL ,
-then
-.I res
-would contain
-.B -EINVAL .
-If the normal system call would have returned a read size of 1024, then
-.I res
-would contain 1024.
-.IP \(bu
-Optionally,
-.BR io_uring_enter (2)
-can also wait for a specified number of requests to be processed by the kernel
-before it returns.
-If you specified a certain number of completions to wait for,
-the kernel would have placed at least those many number of CQEs on the CQ,
-which you can then readily read,
-right after the return from
-.BR io_uring_enter (2).
-.IP \(bu
-It is important to remember that I/O requests submitted to the kernel can
-complete in any order.
-It is not necessary for the kernel to process one request after another,
-in the order you placed them.
-Given that the interface is a ring,
-the requests are attempted in order,
-however that doesn't imply any sort of ordering on their completion.
-When more than one request is in flight,
-it is not possible to determine which one will complete first.
-When you dequeue CQEs off the CQ,
-you should always check which submitted request it corresponds to.
-The most common method for doing so is utilizing the
-.I user_data
-field in the request, which is passed back on the completion side.
-.PP
-Adding to and reading from the queues:
-.IP \(bu
-You add SQEs to the tail of the SQ.
-The kernel reads SQEs off the head of the queue.
-.IP \(bu
-The kernel adds CQEs to the tail of the CQ.
-You read CQEs off the head of the queue.
-.SS Submission queue polling
-One of the goals of
-.B io_uring
-is to provide a means for efficient I/O.
-To this end,
-.B io_uring
-supports a polling mode that lets you avoid the call to
-.BR io_uring_enter (2),
-which you use to inform the kernel that you have queued SQEs on to the SQ.
-With SQ Polling,
-.B io_uring
-starts a kernel thread that polls the submission queue for any I/O
-requests you submit by adding SQEs.
-With SQ Polling enabled,
-there is no need for you to call
-.BR io_uring_enter (2),
-letting you avoid the overhead of system calls.
-A designated kernel thread dequeues SQEs off the SQ as you add them and
-dispatches them for asynchronous processing.
-.SS Setting up io_uring
-.PP
-The main steps in setting up
-.B io_uring
-consist of mapping in the shared buffers with
-.BR mmap (2)
-calls.
-In the example program included in this man page,
-the function
-.BR app_setup_uring ()
-sets up
-.B io_uring
-with a QUEUE_DEPTH deep submission queue.
-Pay attention to the 2
-.BR mmap (2)
-calls that set up the shared submission and completion queues.
-If your kernel is older than version 5.4,
-three
-.BR mmap(2)
-calls are required.
-.PP
-.SS Submitting I/O requests
-The process of submitting a request consists of describing the I/O
-operation you need to get done using an
-.B io_uring_sqe
-structure instance.
-These details describe the equivalent system call and its parameters.
-Because the range of I/O operations Linux supports are very varied and the
-.B io_uring_sqe
-structure needs to be able to describe them,
-it has several fields,
-some packed into unions for space efficiency.
-Here is a simplified version of struct
-.B io_uring_sqe
-with some of the most often used fields:
-.PP
-.in +4n
-.EX
-struct io_uring_sqe {
-        __u8    opcode;         /* type of operation for this sqe */
-        __s32   fd;             /* file descriptor to do IO on */
-        __u64   off;            /* offset into file */
-        __u64   addr;           /* pointer to buffer or iovecs */
-        __u32   len;            /* buffer size or number of iovecs */
-        __u64   user_data;      /* data to be passed back at completion time */
-        __u8    flags;          /* IOSQE_ flags */
-        ...
-};
-.EE
-.in
-Here is struct
-.B io_uring_sqe
-in full:
-.in +4n
-.EX
-struct io_uring_sqe {
-        __u8    opcode;         /* type of operation for this sqe */
-        __u8    flags;          /* IOSQE_ flags */
-        __u16   ioprio;         /* ioprio for the request */
-        __s32   fd;             /* file descriptor to do IO on */
-        union {
-                __u64   off;    /* offset into file */
-                __u64   addr2;
-        };
-        union {
-                __u64   addr;   /* pointer to buffer or iovecs */
-                __u64   splice_off_in;
-        };
-        __u32   len;            /* buffer size or number of iovecs */
-        union {
-                __kernel_rwf_t  rw_flags;
-                __u32           fsync_flags;
-                __u16           poll_events;    /* compatibility */
-                __u32           poll32_events;  /* word-reversed for BE */
-                __u32           sync_range_flags;
-                __u32           msg_flags;
-                __u32           timeout_flags;
-                __u32           accept_flags;
-                __u32           cancel_flags;
-                __u32           open_flags;
-                __u32           statx_flags;
-                __u32           fadvise_advice;
-                __u32           splice_flags;
-        };
-        __u64   user_data;      /* data to be passed back at completion time */
-        union {
-                struct {
-                        /* pack this to avoid bogus arm OABI complaints */
-                        union {
-                                /* index into fixed buffers, if used */
-                                __u16   buf_index;
-                                /* for grouped buffer selection */
-                                __u16   buf_group;
-                        } __attribute__((packed));
-                        /* personality to use, if used */
-                        __u16   personality;
-                        __s32   splice_fd_in;
-                };
-                __u64   __pad2[3];
-        };
-};
-.EE
-.in
-.PP
-To submit an I/O request to
-.BR io_uring ,
-you need to acquire a submission queue entry (SQE) from the submission
-queue (SQ),
-fill it up with details of the operation you want to submit and call
-.BR io_uring_enter (2).
-There are helper functions of the form io_uring_prep_X to enable proper
-setup of the SQE. If you want to avoid calling
-.BR io_uring_enter (2),
-you have the option of setting up Submission Queue Polling.
-.PP
-SQEs are added to the tail of the submission queue.
-The kernel picks up SQEs off the head of the SQ.
-The general algorithm to get the next available SQE and update the tail is
-as follows.
-.PP
-.in +4n
-.EX
-struct io_uring_sqe *sqe;
-unsigned tail, index;
-tail = *sqring->tail;
-index = tail & (*sqring->ring_mask);
-sqe = &sqring->sqes[index];
-/* fill up details about this I/O request */
-describe_io(sqe);
-/* fill the sqe index into the SQ ring array */
-sqring->array[index] = index;
-tail++;
-atomic_store_release(sqring->tail, tail);
-.EE
-.in
-.PP
-To get the index of an entry,
-the application must mask the current tail index with the size mask of the
-ring.
-This holds true for both SQs and CQs.
-Once the SQE is acquired,
-the necessary fields are filled in,
-describing the request.
-While the CQ ring directly indexes the shared array of CQEs,
-the submission side has an indirection array between them.
-The submission side ring buffer is an index into this array,
-which in turn contains the index into the SQEs.
-.PP
-The following code snippet demonstrates how a read operation,
-an equivalent of a
-.BR preadv2 (2)
-system call is described by filling up an SQE with the necessary
-parameters.
-.PP
-.in +4n
-.EX
-struct iovec iovecs[16];
- ...
-sqe->opcode = IORING_OP_READV;
-sqe->fd = fd;
-sqe->addr = (unsigned long) iovecs;
-sqe->len = 16;
-sqe->off = offset;
-sqe->flags = 0;
-.EE
-.in
-.TP
-.B Memory ordering
-Modern compilers and CPUs freely reorder reads and writes without
-affecting the program's outcome to optimize performance.
-Some aspects of this need to be kept in mind on SMP systems since
-.B io_uring
-involves buffers shared between kernel and user space.
-These buffers are both visible and modifiable from kernel and user space.
-As heads and tails belonging to these shared buffers are updated by kernel
-and user space,
-changes need to be coherently visible on either side,
-irrespective of whether a CPU switch took place after the kernel-user mode
-switch happened.
-We use memory barriers to enforce this coherency.
-Being significantly large subjects on their own,
-memory barriers are out of scope for further discussion on this man page.
-.TP
-.B Letting the kernel know about I/O submissions
-Once you place one or more SQEs on to the SQ,
-you need to let the kernel know that you've done so.
-You can do this by calling the
-.BR io_uring_enter (2)
-system call.
-This system call is also capable of waiting for a specified count of
-events to complete.
-This way,
-you can be sure to find completion events in the completion queue without
-having to poll it for events later.
-.SS Reading completion events
-Similar to the submission queue (SQ),
-the completion queue (CQ) is a shared buffer between the kernel and user
-space.
-Whereas you placed submission queue entries on the tail of the SQ and the
-kernel read off the head,
-when it comes to the CQ,
-the kernel places completion queue events or CQEs on the tail of the CQ and
-you read off its head.
-.PP
-Submission is flexible (and thus a bit more complicated) since it needs to
-be able to encode different types of system calls that take various
-parameters.
-Completion,
-on the other hand is simpler since we're looking only for a return value
-back from the kernel.
-This is easily understood by looking at the completion queue event
-structure,
-struct
-.BR io_uring_cqe :
-.PP
-.in +4n
-.EX
-struct io_uring_cqe {
-	__u64	user_data;  /* sqe->data submission passed back */
-	__s32	res;        /* result code for this event */
-	__u32	flags;
-};
-.EE
-.in
-.PP
-Here,
-.I user_data
-is custom data that is passed unchanged from submission to completion.
-That is,
-from SQEs to CQEs.
-This field can be used to set context,
-uniquely identifying submissions that got completed.
-Given that I/O requests can complete in any order,
-this field can be used to correlate a submission with a completion.
-.I res
-is the result from the system call that was performed as part of the
-submission;
-its return value.
-The
-.I flags
-field carries request-specific information. As of the 6.0 kernel, the following
-flags are defined:
-.TP
-.B IORING_CQE_F_BUFFER
-If set, the upper 16 bits of the flags field carries the buffer ID that was
-chosen for this request. The request must have been issued with
-.B IOSQE_BUFFER_SELECT
-set, and used with a request type that supports buffer selection. Additionally,
-buffers must have been provided upfront either via the
-.B IORING_OP_PROVIDE_BUFFERS
-or the
-.B IORING_REGISTER_PBUF_RING
-methods.
-.TP
-.B IORING_CQE_F_MORE
-If set, the application should expect more completions from the request. This
-is used for requests that can generate multiple completions, such as multi-shot
-requests, receive, or accept.
-.TP
-.B IORING_CQE_F_SOCK_NONEMPTY
-If set, upon receiving the data from the socket in the current request, the
-socket still had data left on completion of this request.
-.TP
-.B IORING_CQE_F_NOTIF
-Set for notification CQEs, as seen with the zero-copy networking send and
-receive support.
-.PP
-The general sequence to read completion events off the completion queue is
-as follows:
-.PP
-.in +4n
-.EX
-unsigned head;
-head = *cqring->head;
-if (head != atomic_load_acquire(cqring->tail)) {
-    struct io_uring_cqe *cqe;
-    unsigned index;
-    index = head & (cqring->mask);
-    cqe = &cqring->cqes[index];
-    /* process completed CQE */
-    process_cqe(cqe);
-    /* CQE consumption complete */
-    head++;
-}
-atomic_store_release(cqring->head, head);
-.EE
-.in
-.PP
-It helps to be reminded that the kernel adds CQEs to the tail of the CQ,
-while you need to dequeue them off the head.
-To get the index of an entry at the head,
-the application must mask the current head index with the size mask of the
-ring.
-Once the CQE has been consumed or processed,
-the head needs to be updated to reflect the consumption of the CQE.
-Attention should be paid to the read and write barriers to ensure
-successful read and update of the head.
-.SS io_uring performance
-Because of the shared ring buffers between kernel and user space,
-.B io_uring
-can be a zero-copy system.
-Copying buffers to and from becomes necessary when system calls that
-transfer data between kernel and user space are involved.
-But since the bulk of the communication in
-.B io_uring
-is via buffers shared between the kernel and user space,
-this huge performance overhead is completely avoided.
-.PP
-While system calls may not seem like a significant overhead,
-in high performance applications,
-making a lot of them will begin to matter.
-While workarounds the operating system has in place to deal with Spectre
-and Meltdown are ideally best done away with,
-unfortunately,
-some of these workarounds are around the system call interface,
-making system calls not as cheap as before on affected hardware.
-While newer hardware should not need these workarounds,
-hardware with these vulnerabilities can be expected to be in the wild for a
-long time.
-While using synchronous programming interfaces or even when using
-asynchronous programming interfaces under Linux,
-there is at least one system call involved in the submission of each
-request.
-In
-.BR io_uring ,
-on the other hand,
-you can batch several requests in one go,
-simply by queueing up multiple SQEs,
-each describing an I/O operation you want and make a single call to
-.BR io_uring_enter (2).
-This is possible due to
-.BR io_uring 's
-shared buffers based design.
-.PP
-While this batching in itself can avoid the overhead associated with
-potentially multiple and frequent system calls,
-you can reduce even this overhead further with Submission Queue Polling,
-by having the kernel poll and pick up your SQEs for processing as you add
-them to the submission queue. This avoids the
-.BR io_uring_enter (2)
-call you need to make to tell the kernel to pick SQEs up.
-For high-performance applications,
-this means even fewer system call overheads.
-.SH CONFORMING TO
-.B io_uring
-is Linux-specific.
-.SH EXAMPLES
-The following example uses
-.B io_uring
-to copy stdin to stdout.
-Using shell redirection,
-you should be able to copy files with this example.
-Because it uses a queue depth of only one,
-this example processes I/O requests one after the other.
-It is purposefully kept this way to aid understanding.
-In real-world scenarios however,
-you'll want to have a larger queue depth to parallelize I/O request
-processing so as to gain the kind of performance benefits
-.B io_uring
-provides with its asynchronous processing of requests.
-.PP
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/syscall.h>
-#include <sys/mman.h>
-#include <sys/uio.h>
-#include <linux/fs.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdatomic.h>
-#include <linux/io_uring.h>
-#define QUEUE_DEPTH 1
-#define BLOCK_SZ    1024
-/* Macros for barriers needed by io_uring */
-#define io_uring_smp_store_release(p, v)            \\
-    atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \\
-                  memory_order_release)
-#define io_uring_smp_load_acquire(p)                \\
-    atomic_load_explicit((_Atomic typeof(*(p)) *)(p),   \\
-                 memory_order_acquire)
-int ring_fd;
-unsigned *sring_tail, *sring_mask, *sring_array,
-            *cring_head, *cring_tail, *cring_mask;
-struct io_uring_sqe *sqes;
-struct io_uring_cqe *cqes;
-char buff[BLOCK_SZ];
-off_t offset;
-/*
- * System call wrappers provided since glibc does not yet
- * provide wrappers for io_uring system calls.
-* */
-int io_uring_setup(unsigned entries, struct io_uring_params *p)
-{
-    return (int) syscall(__NR_io_uring_setup, entries, p);
-}
-int io_uring_enter(int ring_fd, unsigned int to_submit,
-                   unsigned int min_complete, unsigned int flags)
-{
-    return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit,
-    			 min_complete, flags, NULL, 0);
-}
-int app_setup_uring(void) {
-    struct io_uring_params p;
-    void *sq_ptr, *cq_ptr;
-    /* See io_uring_setup(2) for io_uring_params.flags you can set */
-    memset(&p, 0, sizeof(p));
-    ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
-    if (ring_fd < 0) {
-        perror("io_uring_setup");
-        return 1;
-    }
-    /*
-     * io_uring communication happens via 2 shared kernel-user space ring
-     * buffers, which can be jointly mapped with a single mmap() call in
-     * kernels >= 5.4.
-     */
-    int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
-    int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
-    /* Rather than check for kernel version, the recommended way is to
-     * check the features field of the io_uring_params structure, which is a
-     * bitmask. If IORING_FEAT_SINGLE_MMAP is set, we can do away with the
-     * second mmap() call to map in the completion ring separately.
-     */
-    if (p.features & IORING_FEAT_SINGLE_MMAP) {
-        if (cring_sz > sring_sz)
-            sring_sz = cring_sz;
-        cring_sz = sring_sz;
-    }
-    /* Map in the submission and completion queue ring buffers.
-     *  Kernels < 5.4 only map in the submission queue, though.
-     */
-    sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE,
-                  MAP_SHARED | MAP_POPULATE,
-                  ring_fd, IORING_OFF_SQ_RING);
-    if (sq_ptr == MAP_FAILED) {
-        perror("mmap");
-        return 1;
-    }
-    if (p.features & IORING_FEAT_SINGLE_MMAP) {
-        cq_ptr = sq_ptr;
-    } else {
-        /* Map in the completion queue ring buffer in older kernels separately */
-        cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE,
-                      MAP_SHARED | MAP_POPULATE,
-                      ring_fd, IORING_OFF_CQ_RING);
-        if (cq_ptr == MAP_FAILED) {
-            perror("mmap");
-            return 1;
-        }
-    }
-    /* Save useful fields for later easy reference */
-    sring_tail = sq_ptr + p.sq_off.tail;
-    sring_mask = sq_ptr + p.sq_off.ring_mask;
-    sring_array = sq_ptr + p.sq_off.array;
-    /* Map in the submission queue entries array */
-    sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
-                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                   ring_fd, IORING_OFF_SQES);
-    if (sqes == MAP_FAILED) {
-        perror("mmap");
-        return 1;
-    }
-    /* Save useful fields for later easy reference */
-    cring_head = cq_ptr + p.cq_off.head;
-    cring_tail = cq_ptr + p.cq_off.tail;
-    cring_mask = cq_ptr + p.cq_off.ring_mask;
-    cqes = cq_ptr + p.cq_off.cqes;
-    return 0;
-}
-/*
-* Read from completion queue.
-* In this function, we read completion events from the completion queue.
-* We dequeue the CQE, update and head and return the result of the operation.
-* */
-int read_from_cq() {
-    struct io_uring_cqe *cqe;
-    unsigned head;
-    /* Read barrier */
-    head = io_uring_smp_load_acquire(cring_head);
-    /*
-    * Remember, this is a ring buffer. If head == tail, it means that the
-    * buffer is empty.
-    * */
-    if (head == *cring_tail)
-        return -1;
-    /* Get the entry */
-    cqe = &cqes[head & (*cring_mask)];
-    if (cqe->res < 0)
-        fprintf(stderr, "Error: %s\\n", strerror(abs(cqe->res)));
-    head++;
-    /* Write barrier so that update to the head are made visible */
-    io_uring_smp_store_release(cring_head, head);
-    return cqe->res;
-}
-/*
-* Submit a read or a write request to the submission queue.
-* */
-int submit_to_sq(int fd, int op) {
-    unsigned index, tail;
-    /* Add our submission queue entry to the tail of the SQE ring buffer */
-    tail = *sring_tail;
-    index = tail & *sring_mask;
-    struct io_uring_sqe *sqe = &sqes[index];
-    /* Fill in the parameters required for the read or write operation */
-    sqe->opcode = op;
-    sqe->fd = fd;
-    sqe->addr = (unsigned long) buff;
-    if (op == IORING_OP_READ) {
-        memset(buff, 0, sizeof(buff));
-        sqe->len = BLOCK_SZ;
-    }
-    else {
-        sqe->len = strlen(buff);
-    }
-    sqe->off = offset;
-    sring_array[index] = index;
-    tail++;
-    /* Update the tail */
-    io_uring_smp_store_release(sring_tail, tail);
-    /*
-    * Tell the kernel we have submitted events with the io_uring_enter()
-    * system call. We also pass in the IOURING_ENTER_GETEVENTS flag which
-    * causes the io_uring_enter() call to wait until min_complete
-    * (the 3rd param) events complete.
-    * */
-    int ret =  io_uring_enter(ring_fd, 1,1,
-                              IORING_ENTER_GETEVENTS);
-    if(ret < 0) {
-        perror("io_uring_enter");
-        return -1;
-    }
-    return ret;
-}
-int main(int argc, char *argv[]) {
-    int res;
-    /* Setup io_uring for use */
-    if(app_setup_uring()) {
-        fprintf(stderr, "Unable to setup uring!\\n");
-        return 1;
-    }
-    /*
-    * A while loop that reads from stdin and writes to stdout.
-    * Breaks on EOF.
-    */
-    while (1) {
-        /* Initiate read from stdin and wait for it to complete */
-        submit_to_sq(STDIN_FILENO, IORING_OP_READ);
-        /* Read completion queue entry */
-        res = read_from_cq();
-        if (res > 0) {
-            /* Read successful. Write to stdout. */
-            submit_to_sq(STDOUT_FILENO, IORING_OP_WRITE);
-            read_from_cq();
-        } else if (res == 0) {
-            /* reached EOF */
-            break;
-        }
-        else if (res < 0) {
-            /* Error reading file */
-            fprintf(stderr, "Error: %s\\n", strerror(abs(res)));
-            break;
-        }
-        offset += res;
-    }
-    return 0;
-}
-.EE
-.SH SEE ALSO
-.BR io_uring_enter (2)
-.BR io_uring_register (2)
-.BR io_uring_setup (2)