uringmachine 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -2
- data/TODO.md +24 -138
- data/benchmark/README.md +69 -103
- data/benchmark/bm_io_pipe.rb +14 -0
- data/benchmark/chart_all.png +0 -0
- data/benchmark/common.rb +9 -0
- data/benchmark/read_each.rb +83 -0
- data/benchmark/send.rb +31 -36
- data/ext/um/extconf.rb +7 -1
- data/ext/um/um.c +131 -8
- data/ext/um/um.h +8 -0
- data/ext/um/um_class.c +34 -0
- data/ext/um/um_const.c +0 -2
- data/ext/um/um_op.c +20 -2
- data/ext/um/um_utils.c +27 -0
- data/grant-2025/journal.md +2 -2
- data/grant-2025/tasks.md +8 -13
- data/lib/uringmachine/version.rb +1 -1
- data/test/helper.rb +5 -4
- data/test/test_fiber_scheduler.rb +1 -17
- data/test/test_um.rb +299 -62
- data/vendor/liburing/configure +4 -2
- data/vendor/liburing/src/Makefile +1 -0
- data/vendor/liburing/test/min-timeout-wait.c +57 -2
- data/vendor/liburing/test/min-timeout.c +22 -0
- metadata +4 -3
- data/benchmark/chart.png +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b185b9cafdee3930061ed7101a12ccb500a8f131a9715a6a1268b22507ec2d85
|
|
4
|
+
data.tar.gz: c2d0fe4aced8f2340b2cdd29cf9540b7075198be56a0e695f58b9fbab5fae65c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 75b0fe0a71242d728cbe1901457e41a11255c1922eca6501e3fa4286a7bd89a01ad7e5625baa24721ad63efc16da24a4b4d8ffeb85114be8fbe910066a0b033e
|
|
7
|
+
data.tar.gz: 64afeb65cc42c5b5c3af30dc361f2917a6341fd38e645114c48972bb6aa6617f496be6de040dfec6eefa102ae1d933cd726549890a9d57369f465be3b6df613f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
|
|
2
|
+
# 0.23.0 2025-12-16
|
|
3
|
+
|
|
4
|
+
- Add `UM#accept_into_queue`, fix `#accept_each` to throw on error
|
|
5
|
+
- Use Set instead of Hash for holding pending fibers
|
|
6
|
+
- Add `UM#writev`, `UM#sendv` methods
|
|
7
|
+
- Allocate um_op and um_op_result in batches of 256
|
|
8
|
+
- Remove `SIGCLD` const
|
|
9
|
+
|
|
10
|
+
# 0.22.1 2025-12-11
|
|
11
|
+
|
|
12
|
+
- Comment out SIGCLD constant
|
|
13
|
+
|
|
1
14
|
# 0.22.0 2025-12-10
|
|
2
15
|
|
|
3
16
|
- Fix use of `um_yield` in statx, multishot ops
|
|
@@ -8,6 +21,7 @@
|
|
|
8
21
|
- More tests and benchmarks
|
|
9
22
|
- Add `UM#await_fibers` for awaiting fibers
|
|
10
23
|
- Add `UM.socketpair` for creating a socket pair
|
|
24
|
+
- Fix segfault caused by waiting fibers not being marked
|
|
11
25
|
- Fiber scheduler:
|
|
12
26
|
- Use fiber's mailbox for processing blocking operations
|
|
13
27
|
- Add `#io_close`, `#yield` hooks, remove `#process_fork` hook
|
|
@@ -22,8 +36,8 @@
|
|
|
22
36
|
- Add debug logging for key io_uring interactions
|
|
23
37
|
- Add UM#mark and DEBUG_MARK for debugging specific UM instances
|
|
24
38
|
- Short-circuit zero-length writes
|
|
25
|
-
- Add optional file_offset argument to #read, #write. Add optional len and
|
|
26
|
-
|
|
39
|
+
- Add optional file_offset argument to #read, #write. Add optional len and
|
|
40
|
+
file_offset arguments to #write_async
|
|
27
41
|
- Add support for specifying SQPOLL mode and SQ idle timeout in `UM#initialize`
|
|
28
42
|
- Add support for specifying number of SQ entries in `UM#initialize`
|
|
29
43
|
- Implement global worker pool for blocking operations in fiber scheduler
|
data/TODO.md
CHANGED
|
@@ -1,148 +1,28 @@
|
|
|
1
1
|
## immediate
|
|
2
2
|
|
|
3
|
-
##
|
|
4
|
-
|
|
5
|
-
- use CPU time (CLOCK_THREAD_CPUTIME_ID)
|
|
6
|
-
- measure:
|
|
7
|
-
- time each fiber is waiting
|
|
8
|
-
- time each fiber is running
|
|
9
|
-
- time machine is waiting (for CQEs)
|
|
10
|
-
- time machine is running fibers from the runqueue
|
|
11
|
-
- can be turned on/off at any time
|
|
12
|
-
- no performance impact when off
|
|
13
|
-
|
|
14
|
-
How can this be implemented:
|
|
15
|
-
|
|
16
|
-
- `um_get_time_cpu()` function for reading CPU time (CLOCK_THREAD_CPUTIME_ID) as
|
|
17
|
-
double.
|
|
18
|
-
- add to `struct um`:
|
|
19
|
-
|
|
20
|
-
```c
|
|
21
|
-
struct um {
|
|
22
|
-
...
|
|
23
|
-
int profiling_mode;
|
|
24
|
-
double total_time_run;
|
|
25
|
-
double total_time_wait;
|
|
26
|
-
double last_cpu_time;
|
|
27
|
-
}
|
|
28
|
-
```
|
|
3
|
+
## buffer rings - automatic management
|
|
29
4
|
|
|
30
|
-
|
|
31
|
-
|
|
5
|
+
```ruby
|
|
6
|
+
# completely hands off
|
|
7
|
+
machine.read_each(fd) { |str| ... }
|
|
32
8
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
machine->last_cpu_time = um_get_time_cpu();
|
|
37
|
-
```
|
|
9
|
+
# what if we want to get IO::Buffer?
|
|
10
|
+
machine.read_each(fd, io_buffer: true) { |iobuff, len| ... }
|
|
11
|
+
```
|
|
38
12
|
|
|
39
|
-
|
|
40
|
-
- before processing CQEs:
|
|
41
|
-
|
|
42
|
-
```c
|
|
43
|
-
// before
|
|
44
|
-
double cpu_time0;
|
|
45
|
-
VALUE fiber;
|
|
46
|
-
int profiling_mode = machine->profiling_mode;
|
|
47
|
-
if (profiling_mode) {
|
|
48
|
-
fiber = rb_fiber_current();
|
|
49
|
-
cpu_time0 = um_get_time_cpu();
|
|
50
|
-
double elapsed = cpu_time0 - machine->last_cpu_time;
|
|
51
|
-
um_update_fiber_time_run(fiber, cpu_time0, elapsed);
|
|
52
|
-
machine->total_time_run += elapsed;
|
|
53
|
-
}
|
|
54
|
-
process_cqes(...)
|
|
55
|
-
// after
|
|
56
|
-
if (profiling_mode) {
|
|
57
|
-
double cpu_time1 = um_get_time_cpu();
|
|
58
|
-
double elapsed = cpu_time1 - cpu_time0;
|
|
59
|
-
um_update_fiber_last_time(fiber, cpu_time1);
|
|
60
|
-
machine->total_time_wait += elapsed;
|
|
61
|
-
machine->last_cpu_time = cpu_time1;
|
|
62
|
-
}
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
- when doing switching, in `um_process_runqueue_op`:
|
|
66
|
-
|
|
67
|
-
```c
|
|
68
|
-
// before
|
|
69
|
-
double cpu_time;
|
|
70
|
-
VALUE cur_fiber;
|
|
71
|
-
VALUE next_fiber = get_next_fiber(...);
|
|
72
|
-
int profiling_mode = machine->profiling_mode;
|
|
73
|
-
if (profiling_mode) {
|
|
74
|
-
cur_fiber = rb_fiber_current();
|
|
75
|
-
cpu_time = um_get_time_cpu();
|
|
76
|
-
double elapsed = cpu_time - machine->last_cpu_time;
|
|
77
|
-
um_update_fiber_time_run(cur_fiber, cpu_time, elapsed);
|
|
78
|
-
machine->total_time_run += elapsed;
|
|
79
|
-
um_update_fiber_time_wait(next_fiber, cpu_time);
|
|
80
|
-
machine->last_cpu_time = cpu_time;
|
|
81
|
-
}
|
|
82
|
-
do_fiber_transfer(...)
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
- updating fiber time instance vars:
|
|
86
|
-
|
|
87
|
-
```c
|
|
88
|
-
inline void um_update_fiber_time_run(VALUE fiber, double stamp, double elapsed) {
|
|
89
|
-
// VALUE fiber_stamp = rb_ivar_get(fiber, ID_time_last_cpu);
|
|
90
|
-
VALUE fiber_total_run = rb_ivar_get(fiber, ID_time_total_run);
|
|
91
|
-
double total = NIL_P(fiber_total_run) ?
|
|
92
|
-
elapsed : NUM2DBL(fiber_total_run) + elapsed;
|
|
93
|
-
rb_ivar_set(fiber, ID_time_total_run, DBL2NUM(total));
|
|
94
|
-
rb_ivar_set(fiber, ID_time_last_cpu, DBL2NUM(stamp));
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
inline void um_update_fiber_time_wait(VALUE fiber, double stamp) {
|
|
98
|
-
VALUE fiber_last_stamp = rb_ivar_get(fiber, ID_time_last_cpu);
|
|
99
|
-
if (likely(!NIL_P(fiber_last_stamp))) {
|
|
100
|
-
double last_stamp = NUM2DBL(fiber_last_stamp);
|
|
101
|
-
double elapsed = stamp - last_stamp;
|
|
102
|
-
VALUE fiber_total_wait = rb_ivar_get(fiber, ID_time_total_wait);
|
|
103
|
-
double total = NIL_P(fiber_total_wait) ?
|
|
104
|
-
elapsed : NUM2DBL(fiber_total_wait) + elapsed;
|
|
105
|
-
rb_ivar_set(fiber, ID_time_total_wait, DBL2NUM(total));
|
|
106
|
-
}
|
|
107
|
-
else
|
|
108
|
-
rb_ivar_set(fiber, ID_time_total_wait, DBL2NUM(0.0));
|
|
109
|
-
rb_ivar_set(fiber, ID_time_last_cpu, DBL2NUM(stamp));
|
|
110
|
-
}
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
## Metrics API
|
|
114
|
-
|
|
115
|
-
- machine metrics: `UM#metrics` - returns a hash containing metrics:
|
|
13
|
+
## write/send multiple buffers at once
|
|
116
14
|
|
|
117
|
-
|
|
118
|
-
{
|
|
119
|
-
size:, # SQ size (entries)
|
|
120
|
-
total_ops:, # total ops submitted
|
|
121
|
-
total_fiber_switches:, # total fiber switches
|
|
122
|
-
total_cqe_waits:, # total number of CQE waits
|
|
123
|
-
ops_pending:, # number of pending ops
|
|
124
|
-
ops_unsubmitted:, # number of unsubmitted
|
|
125
|
-
ops_runqueue:, # number of ops in runqueue
|
|
126
|
-
ops_free:, # number of ops in freelist
|
|
127
|
-
ops_transient:, # number of ops in transient list
|
|
128
|
-
hwm_pending:, # high water mark - pending ops
|
|
129
|
-
hwm_unsubmitted:, # high water mark - unsubmitted ops
|
|
130
|
-
hwm_runqueue:, # high water mark - runqueue depth
|
|
131
|
-
hwm_free:, # high water mark - ops in free list
|
|
132
|
-
hwm_transient:, # high water mark - ops in transient list
|
|
133
|
-
# when profiling is active
|
|
134
|
-
time_total_run:, # total CPU time running
|
|
135
|
-
time_total_wait:, # total CPU time waiting for CQEs
|
|
136
|
-
}
|
|
137
|
-
```
|
|
15
|
+
This is done as vectored IO:
|
|
138
16
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
- transient list size
|
|
142
|
-
- free list size
|
|
143
|
-
- Those will be done in um_op.c (in linked list management code)
|
|
17
|
+
```ruby
|
|
18
|
+
machine.writev(fd, buf1, buf2, buf3)
|
|
144
19
|
|
|
145
|
-
|
|
20
|
+
# with optional file offset:
|
|
21
|
+
machine.writev(fd, buf1, buf2, buf3, 0)
|
|
22
|
+
|
|
23
|
+
# for the moment it won't take flags
|
|
24
|
+
machine.sendv(fd, buf1, buf2, buf3)
|
|
25
|
+
```
|
|
146
26
|
|
|
147
27
|
## useful concurrency tools
|
|
148
28
|
|
|
@@ -152,13 +32,19 @@ How can this be implemented:
|
|
|
152
32
|
debouncer = UM.debounce { }
|
|
153
33
|
```
|
|
154
34
|
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
## polyvalent select
|
|
38
|
+
|
|
39
|
+
- select on multiple queues (ala Go)
|
|
40
|
+
- select on mixture of queues and fds
|
|
41
|
+
|
|
155
42
|
## ops
|
|
156
43
|
|
|
157
44
|
- [ ] multishot timeout
|
|
158
45
|
- [v] machine.periodically(interval) { ... }
|
|
159
46
|
- [ ] machine.prep_timeout_multishot(interval)
|
|
160
47
|
|
|
161
|
-
- writev
|
|
162
48
|
- splice / - tee
|
|
163
49
|
- sendto
|
|
164
50
|
- recvfrom
|
data/benchmark/README.md
CHANGED
|
@@ -4,25 +4,26 @@ The following benchmarks measure the performance of UringMachine against stock
|
|
|
4
4
|
Ruby in a variety of scenarios. For each scenario, we compare three different
|
|
5
5
|
implementations:
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- `Threads`: thread-based concurrency using the stock Ruby I/O and
|
|
8
8
|
synchronization classes.
|
|
9
9
|
|
|
10
|
-
-
|
|
11
|
-
|
|
12
|
-
Ruby I/O and synchronization classes.
|
|
10
|
+
- `ThreadPool`: thread pool consisting of 10 worker threads, receiving jobs
|
|
11
|
+
through a common queue.
|
|
13
12
|
|
|
14
|
-
-
|
|
15
|
-
|
|
13
|
+
- `Async epoll`: fiber-based concurrency with
|
|
14
|
+
[Async](https://github.com/socketry/async) fiber scheduler, using an epoll
|
|
15
|
+
selector.
|
|
16
16
|
|
|
17
|
-
-
|
|
18
|
-
|
|
17
|
+
- `Async uring`: fiber-based concurrency with Async fiber scheduler, using a
|
|
18
|
+
uring selector.
|
|
19
19
|
|
|
20
|
-
-
|
|
21
|
-
polling](https://unixism.net/loti/tutorial/sq_poll.html).
|
|
20
|
+
- `UM FS`: fiber-based concurrency with UringMachine fiber scheduler.
|
|
22
21
|
|
|
23
|
-
|
|
22
|
+
- `UM`: fiber-based concurrency using the UringMachine low-level API.
|
|
24
23
|
|
|
25
|
-
|
|
24
|
+
<img src="./chart_all.png">
|
|
25
|
+
|
|
26
|
+
## Observations
|
|
26
27
|
|
|
27
28
|
- We see the stark difference between thread-based and fiber-based concurrency.
|
|
28
29
|
For I/O-bound workloads, there's really no contest - and that's exactly why
|
|
@@ -34,28 +35,37 @@ implementations:
|
|
|
34
35
|
C-extension.
|
|
35
36
|
|
|
36
37
|
- The UringMachine low-level API is faster to use in most cases, and its
|
|
37
|
-
performance advantage grows with the level of concurrency.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
depending on the context. It remains to be seen how it affects performance in
|
|
41
|
-
real-world situations.
|
|
38
|
+
performance advantage grows with the level of concurrency. Interestingly, when
|
|
39
|
+
performing CPU-bound work, it seems slightly slightly slower. This should be
|
|
40
|
+
investigated.
|
|
42
41
|
|
|
43
42
|
- The [pg](https://github.com/ged/ruby-pg) gem supports the use of fiber
|
|
44
43
|
schedulers, and there too we see a marked performance advantage to using
|
|
45
44
|
fibers instead of threads.
|
|
46
45
|
|
|
46
|
+
According to these benchmarks, for I/O-bound scenarios the different fiber-based
|
|
47
|
+
implementations present a average speedup as follows:
|
|
48
|
+
|
|
49
|
+
|implementation|average factor|
|
|
50
|
+
|--------------|--------------|
|
|
51
|
+
|Async epoll |x2.36 |
|
|
52
|
+
|Async uring |x2.42 |
|
|
53
|
+
|UM FS |x2.85 |
|
|
54
|
+
|UM |x6.20 |
|
|
55
|
+
|
|
47
56
|
## 1. I/O - Pipe
|
|
48
57
|
|
|
49
58
|
50 groups, where in each group we create a pipe with a pair of threads/fibers
|
|
50
59
|
writing/reading 1KB of data to the pipe.
|
|
51
60
|
|
|
52
61
|
```
|
|
53
|
-
C=50x2
|
|
54
|
-
Threads
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
UM
|
|
62
|
+
C=50x2 user system total real
|
|
63
|
+
Threads 2.105002 2.671980 4.776982 ( 4.272842)
|
|
64
|
+
ThreadPool 4.818014 10.740555 15.558569 ( 7.070236)
|
|
65
|
+
Async epoll 1.118937 0.254803 1.373740 ( 1.374298)
|
|
66
|
+
Async uring 1.363248 0.270063 1.633311 ( 1.633696)
|
|
67
|
+
UM FS 0.746332 0.183006 0.929338 ( 0.929619)
|
|
68
|
+
UM 0.237816 0.328352 0.566168 ( 0.566265)
|
|
59
69
|
```
|
|
60
70
|
|
|
61
71
|
## 2. I/O - Socketpair
|
|
@@ -64,12 +74,13 @@ UM sqpoll 0.217577 0.634414 0.851991 ( 0.593531)
|
|
|
64
74
|
pair of threads/fibers writing/reading 1KB of data to the sockets.
|
|
65
75
|
|
|
66
76
|
```
|
|
67
|
-
|
|
68
|
-
Threads
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
UM
|
|
77
|
+
C=50x2 user system total real
|
|
78
|
+
Threads 2.068122 3.247781 5.315903 ( 4.295488)
|
|
79
|
+
ThreadPool 2.283882 3.461607 5.745489 ( 4.650422)
|
|
80
|
+
Async epoll 0.381400 0.846445 1.227845 ( 1.227983)
|
|
81
|
+
Async uring 0.472526 0.821467 1.293993 ( 1.294166)
|
|
82
|
+
UM FS 0.443023 0.734334 1.177357 ( 1.177576)
|
|
83
|
+
UM 0.116995 0.675997 0.792992 ( 0.793183)
|
|
73
84
|
```
|
|
74
85
|
|
|
75
86
|
## 3. Mutex - CPU-bound
|
|
@@ -78,12 +89,12 @@ UM sqpoll 0.220933 1.021997 1.242930 ( 0.976198)
|
|
|
78
89
|
threads/fibers locking the mutex and performing a Regexp match.
|
|
79
90
|
|
|
80
91
|
```
|
|
81
|
-
|
|
82
|
-
Threads
|
|
83
|
-
Async
|
|
84
|
-
|
|
85
|
-
UM
|
|
86
|
-
UM
|
|
92
|
+
C=20x10 user system total real
|
|
93
|
+
Threads 5.174998 0.024885 5.199883 ( 5.193211)
|
|
94
|
+
Async epoll 5.309793 0.000949 5.310742 ( 5.311217)
|
|
95
|
+
Async uring 5.341404 0.004860 5.346264 ( 5.346963)
|
|
96
|
+
UM FS 5.363719 0.001976 5.365695 ( 5.366254)
|
|
97
|
+
UM 5.351073 0.005986 5.357059 ( 5.357602)
|
|
87
98
|
```
|
|
88
99
|
|
|
89
100
|
## 4. Mutex - I/O-bound
|
|
@@ -93,81 +104,36 @@ start 10 worker threads/fibers locking the mutex and writing 1KB chunks to the
|
|
|
93
104
|
file.
|
|
94
105
|
|
|
95
106
|
```
|
|
96
|
-
|
|
97
|
-
Threads
|
|
98
|
-
Async
|
|
99
|
-
|
|
100
|
-
UM
|
|
101
|
-
UM
|
|
102
|
-
|
|
103
|
-
N=5 user system total real
|
|
104
|
-
Threads 0.214296 0.384078 0.598374 ( 0.467425)
|
|
105
|
-
Async FS 0.085820 0.158782 0.244602 ( 0.139766)
|
|
106
|
-
UM FS 0.064279 0.147278 0.211557 ( 0.117488)
|
|
107
|
-
UM pure 0.036478 0.182950 0.219428 ( 0.119745)
|
|
108
|
-
UM sqpoll 0.036929 0.347573 0.384502 ( 0.160814)
|
|
109
|
-
|
|
110
|
-
N=10 user system total real
|
|
111
|
-
Threads 0.435688 0.752219 1.187907 ( 0.924561)
|
|
112
|
-
Async FS 0.126573 0.303704 0.430277 ( 0.234900)
|
|
113
|
-
UM FS 0.128427 0.215204 0.343631 ( 0.184074)
|
|
114
|
-
UM pure 0.065522 0.359659 0.425181 ( 0.192385)
|
|
115
|
-
UM sqpoll 0.076810 0.477429 0.554239 ( 0.210087)
|
|
116
|
-
|
|
117
|
-
N=20 user system total real
|
|
118
|
-
Threads 0.830763 1.585299 2.416062 ( 1.868194)
|
|
119
|
-
Async FS 0.291823 0.644043 0.935866 ( 0.507887)
|
|
120
|
-
UM FS 0.226202 0.460401 0.686603 ( 0.362879)
|
|
121
|
-
UM pure 0.120524 0.616274 0.736798 ( 0.332182)
|
|
122
|
-
UM sqpoll 0.177150 0.849890 1.027040 ( 0.284069)
|
|
123
|
-
|
|
124
|
-
N=50 user system total real
|
|
125
|
-
Threads 2.124048 4.182537 6.306585 ( 4.878387)
|
|
126
|
-
Async FS 0.897134 1.268629 2.165763 ( 1.254624)
|
|
127
|
-
UM FS 0.733193 0.971821 1.705014 ( 0.933749)
|
|
128
|
-
UM pure 0.226431 1.504441 1.730872 ( 0.760731)
|
|
129
|
-
UM sqpoll 0.557310 2.107389 2.664699 ( 0.783992)
|
|
130
|
-
|
|
131
|
-
N=100 user system total real
|
|
132
|
-
Threads 4.420832 8.628756 13.049588 ( 10.264590)
|
|
133
|
-
Async FS 2.557661 2.532998 5.090659 ( 3.179336)
|
|
134
|
-
UM FS 2.262136 1.912055 4.174191 ( 2.523789)
|
|
135
|
-
UM pure 0.633897 2.793998 3.427895 ( 1.612989)
|
|
136
|
-
UM sqpoll 1.119460 4.193703 5.313163 ( 1.525968)
|
|
107
|
+
C=50x10 user system total real
|
|
108
|
+
Threads 2.042649 3.441547 5.484196 ( 4.328783)
|
|
109
|
+
Async epoll 0.810375 0.744084 1.554459 ( 1.554726)
|
|
110
|
+
Async uring 0.854985 1.129260 1.984245 ( 1.140749)
|
|
111
|
+
UM FS 0.686329 0.872376 1.558705 ( 0.845214)
|
|
112
|
+
UM 0.250370 1.323227 1.573597 ( 0.720928)
|
|
137
113
|
```
|
|
138
114
|
|
|
139
|
-
## 5.
|
|
115
|
+
## 5. Postgres client
|
|
140
116
|
|
|
141
|
-
|
|
142
|
-
threads/fibers that push items to the queue, and 10 consumer threads/fibers that
|
|
143
|
-
pull items from the queue.
|
|
117
|
+
C concurrent threads/fibers, each thread issuing SELECT query to a PG database.
|
|
144
118
|
|
|
145
119
|
```
|
|
146
|
-
|
|
147
|
-
Threads
|
|
148
|
-
Async
|
|
149
|
-
|
|
150
|
-
UM
|
|
151
|
-
UM sqpoll 2.044662 2.460344 4.505006 ( 2.261502)
|
|
120
|
+
C=50 user system total real
|
|
121
|
+
Threads 4.304292 1.358116 5.662408 ( 4.795725)
|
|
122
|
+
Async epoll 2.890160 0.432836 3.322996 ( 3.334350)
|
|
123
|
+
Async uring 2.818439 0.433896 3.252335 ( 3.252799)
|
|
124
|
+
UM FS 2.819371 0.443182 3.262553 ( 3.264606)
|
|
152
125
|
```
|
|
126
|
+
## 6. Queue
|
|
153
127
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
128
|
+
20 concurrent groups, where in each group we create a queue, start 5 producer
|
|
129
|
+
threads/fibers that push items to the queue, and 10 consumer threads/fibers that
|
|
130
|
+
pull items from the queue.
|
|
157
131
|
|
|
158
132
|
```
|
|
159
|
-
C=10
|
|
160
|
-
Threads
|
|
161
|
-
Async
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
Threads 1.652901 0.714299 2.367200 ( 2.014781)
|
|
166
|
-
Async FS 1.136826 0.212991 1.349817 ( 1.350544)
|
|
167
|
-
UM FS 1.084873 0.205865 1.290738 ( 1.291865)
|
|
168
|
-
|
|
169
|
-
C=50 user system total real
|
|
170
|
-
Threads 4.410604 1.804900 6.215504 ( 5.253016)
|
|
171
|
-
Async FS 2.918522 0.507981 3.426503 ( 3.427966)
|
|
172
|
-
UM FS 2.789549 0.537269 3.326818 ( 3.329802)
|
|
133
|
+
C=20x(5+10) user system total real
|
|
134
|
+
Threads 4.880983 0.207451 5.088434 ( 5.071019)
|
|
135
|
+
Async epoll 4.107208 0.006519 4.113727 ( 4.114227)
|
|
136
|
+
Async uring 4.206283 0.028974 4.235257 ( 4.235705)
|
|
137
|
+
UM FS 4.082394 0.001719 4.084113 ( 4.084522)
|
|
138
|
+
UM 4.099893 0.323569 4.423462 ( 4.424089)
|
|
173
139
|
```
|
data/benchmark/bm_io_pipe.rb
CHANGED
|
@@ -38,6 +38,20 @@ class UMBenchmark
|
|
|
38
38
|
end
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
def do_baseline
|
|
42
|
+
GROUPS.times do
|
|
43
|
+
r, w = IO.pipe
|
|
44
|
+
r.sync = true
|
|
45
|
+
w.sync = true
|
|
46
|
+
ITERATIONS.times {
|
|
47
|
+
w.write(DATA)
|
|
48
|
+
r.read(SIZE)
|
|
49
|
+
}
|
|
50
|
+
r.close
|
|
51
|
+
w.close
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
41
55
|
def do_scheduler(scheduler, ios)
|
|
42
56
|
GROUPS.times do
|
|
43
57
|
r, w = IO.pipe
|
|
Binary file
|
data/benchmark/common.rb
CHANGED
|
@@ -54,6 +54,7 @@ class UMBenchmark
|
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
@@benchmarks = {
|
|
57
|
+
baseline: [:baseline, "No Concurrency"],
|
|
57
58
|
threads: [:threads, "Threads"],
|
|
58
59
|
thread_pool: [:thread_pool, "ThreadPool"],
|
|
59
60
|
async_uring: [:scheduler, "Async uring"],
|
|
@@ -69,6 +70,10 @@ class UMBenchmark
|
|
|
69
70
|
end
|
|
70
71
|
end
|
|
71
72
|
|
|
73
|
+
def run_baseline
|
|
74
|
+
do_baseline
|
|
75
|
+
end
|
|
76
|
+
|
|
72
77
|
def run_threads
|
|
73
78
|
threads = []
|
|
74
79
|
ios = []
|
|
@@ -118,6 +123,8 @@ class UMBenchmark
|
|
|
118
123
|
fds = []
|
|
119
124
|
do_um(machine, fibers, fds)
|
|
120
125
|
machine.await_fibers(fibers)
|
|
126
|
+
puts "UM:"
|
|
127
|
+
p machine.metrics
|
|
121
128
|
fds.each { machine.close(it) }
|
|
122
129
|
end
|
|
123
130
|
|
|
@@ -128,6 +135,8 @@ class UMBenchmark
|
|
|
128
135
|
do_um(machine, fibers, fds)
|
|
129
136
|
machine.await_fibers(fibers)
|
|
130
137
|
fds.each { machine.close_async(it) }
|
|
138
|
+
puts "UM sqpoll:"
|
|
139
|
+
p machine.metrics
|
|
131
140
|
machine.snooze
|
|
132
141
|
end
|
|
133
142
|
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source 'https://rubygems.org'
|
|
7
|
+
gem 'uringmachine', path: '..'
|
|
8
|
+
gem 'benchmark-ips'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
require 'benchmark/ips'
|
|
12
|
+
require 'uringmachine'
|
|
13
|
+
|
|
14
|
+
@machine = UM.new
|
|
15
|
+
|
|
16
|
+
make_socket_pair = -> do
|
|
17
|
+
port = 10000 + rand(30000)
|
|
18
|
+
server_fd = @machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
|
|
19
|
+
@machine.setsockopt(server_fd, UM::SOL_SOCKET, UM::SO_REUSEADDR, true)
|
|
20
|
+
@machine.bind(server_fd, '127.0.0.1', port)
|
|
21
|
+
@machine.listen(server_fd, UM::SOMAXCONN)
|
|
22
|
+
|
|
23
|
+
client_conn_fd = @machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
|
|
24
|
+
@machine.connect(client_conn_fd, '127.0.0.1', port)
|
|
25
|
+
|
|
26
|
+
server_conn_fd = @machine.accept(server_fd)
|
|
27
|
+
|
|
28
|
+
@machine.close(server_fd)
|
|
29
|
+
[client_conn_fd, server_conn_fd]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
@client_fd, @server_fd = make_socket_pair.()
|
|
33
|
+
|
|
34
|
+
@read_buf = +''
|
|
35
|
+
@read_fiber = @machine.spin do
|
|
36
|
+
while true
|
|
37
|
+
@machine.read(@client_fd, @read_buf, 65536, 0)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
STR_COUNT = ARGV[0]&.to_i || 3
|
|
42
|
+
STR_SIZE = ARGV[1]&.to_i || 100
|
|
43
|
+
|
|
44
|
+
@parts = ['*' * STR_SIZE] * STR_COUNT
|
|
45
|
+
|
|
46
|
+
@server_io = IO.new(@server_fd)
|
|
47
|
+
@server_io.sync = true
|
|
48
|
+
def io_write
|
|
49
|
+
@server_io.write(*@parts)
|
|
50
|
+
@machine.snooze
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def um_write
|
|
54
|
+
str = @parts.join
|
|
55
|
+
len = str.bytesize
|
|
56
|
+
|
|
57
|
+
while len > 0
|
|
58
|
+
ret = @machine.write(@server_fd, str, len)
|
|
59
|
+
len -= ret
|
|
60
|
+
str = str[ret..-1] if len > 0
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def um_send
|
|
65
|
+
str = @parts.join
|
|
66
|
+
@machine.send(@server_fd, str, str.bytesize, UM::MSG_WAITALL)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
@bgid = @machine.setup_buffer_ring(0, 8)
|
|
70
|
+
def um_send_bundle
|
|
71
|
+
@machine.send_bundle(@server_fd, @bgid, @parts)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
p(STR_COUNT:, STR_SIZE:)
|
|
75
|
+
|
|
76
|
+
Benchmark.ips do |x|
|
|
77
|
+
x.report('IO#write') { io_write }
|
|
78
|
+
x.report('UM#write') { um_write }
|
|
79
|
+
x.report('UM#send') { um_send }
|
|
80
|
+
x.report('UM#send_bundle') { um_send_bundle }
|
|
81
|
+
|
|
82
|
+
x.compare!(order: :baseline)
|
|
83
|
+
end
|