uringmachine 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 74e4816d1191d862df3ba04d46cc038d04b999c22c5604d9a4eec0d1d3fd047c
4
- data.tar.gz: d857ba559f6c48dfc8d65a1812eb3996c7a65d70d263e016bbb96dbf99e6273c
3
+ metadata.gz: b185b9cafdee3930061ed7101a12ccb500a8f131a9715a6a1268b22507ec2d85
4
+ data.tar.gz: c2d0fe4aced8f2340b2cdd29cf9540b7075198be56a0e695f58b9fbab5fae65c
5
5
  SHA512:
6
- metadata.gz: 662c0f7e07df7f87c759eb3e8001aa91c0682d55c63fc46e0429c5ac577de3e0f89476f93b0deb3e05fb3fba4daa4eaae767141615c9e1af1b35df8966f7d988
7
- data.tar.gz: d22cc49d99ef5772411ebdb8019b6d83eb9e944a83b0a327c638bb5eeaef5661ed3cc34dcb6673a26049e9f7f17cebe76306ca7b847cd4bc0e244c99dfafb210
6
+ metadata.gz: 75b0fe0a71242d728cbe1901457e41a11255c1922eca6501e3fa4286a7bd89a01ad7e5625baa24721ad63efc16da24a4b4d8ffeb85114be8fbe910066a0b033e
7
+ data.tar.gz: 64afeb65cc42c5b5c3af30dc361f2917a6341fd38e645114c48972bb6aa6617f496be6de040dfec6eefa102ae1d933cd726549890a9d57369f465be3b6df613f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+
2
+ # 0.23.0 2025-12-16
3
+
4
+ - Add `UM#accept_into_queue`, fix `#accept_each` to throw on error
5
+ - Use Set instead of Hash for holding pending fibers
6
+ - Add `UM#writev`, `UM#sendv` methods
7
+ - Allocate um_op and um_op_result in batches of 256
8
+ - Remove `SIGCLD` const
9
+
10
+ # 0.22.1 2025-12-11
11
+
12
+ - Comment out SIGCLD constant
13
+
1
14
  # 0.22.0 2025-12-10
2
15
 
3
16
  - Fix use of `um_yield` in statx, multishot ops
@@ -8,6 +21,7 @@
8
21
  - More tests and benchmarks
9
22
  - Add `UM#await_fibers` for awaiting fibers
10
23
  - Add `UM.socketpair` for creating a socket pair
24
+ - Fix segfault caused by waiting fibers not being marked
11
25
  - Fiber scheduler:
12
26
  - Use fiber's mailbox for processing blocking operations
13
27
  - Add `#io_close`, `#yield` hooks, remove `#process_fork` hook
@@ -22,8 +36,8 @@
22
36
  - Add debug logging for key io_uring interactions
23
37
  - Add UM#mark and DEBUG_MARK for debugging specific UM instances
24
38
  - Short-circuit zero-length writes
25
- - Add optional file_offset argument to #read, #write. Add optional len and file_off
26
- set arguments to #write_async
39
+ - Add optional file_offset argument to #read, #write. Add optional len and
40
+ file_offset arguments to #write_async
27
41
  - Add support for specifying SQPOLL mode and SQ idle timeout in `UM#initialize`
28
42
  - Add support for specifying number of SQ entries in `UM#initialize`
29
43
  - Implement global worker pool for blocking operations in fiber scheduler
data/TODO.md CHANGED
@@ -1,148 +1,28 @@
1
1
  ## immediate
2
2
 
3
- ## Measuring CPU time for fibers
4
-
5
- - use CPU time (CLOCK_THREAD_CPUTIME_ID)
6
- - measure:
7
- - time each fiber is waiting
8
- - time each fiber is running
9
- - time machine is waiting (for CQEs)
10
- - time machine is running fibers from the runqueue
11
- - can be turned on/off at any time
12
- - no performance impact when off
13
-
14
- How can this be implemented:
15
-
16
- - `um_get_time_cpu()` function for reading CPU time (CLOCK_THREAD_CPUTIME_ID) as
17
- double.
18
- - add to `struct um`:
19
-
20
- ```c
21
- struct um {
22
- ...
23
- int profiling_mode;
24
- double total_time_run;
25
- double total_time_wait;
26
- double last_cpu_time;
27
- }
28
- ```
3
+ ## buffer rings - automatic management
29
4
 
30
- - `UM#profile=` to turn it on/off.
31
- - On `machine.profile = true`, reset `total_time_xxx` and `last_cpu_time`
5
+ ```ruby
6
+ # completely hands off
7
+ machine.read_each(fd) { |str| ... }
32
8
 
33
- ```c
34
- machine->total_time_run = 0;
35
- machine->total_time_wait = 0;
36
- machine->last_cpu_time = um_get_time_cpu();
37
- ```
9
+ # what if we want to get IO::Buffer?
10
+ machine.read_each(fd, io_buffer: true) { |iobuff, len| ... }
11
+ ```
38
12
 
39
- - when profiling is active:
40
- - before processing CQEs:
41
-
42
- ```c
43
- // before
44
- double cpu_time0;
45
- VALUE fiber;
46
- int profiling_mode = machine->profiling_mode;
47
- if (profiling_mode) {
48
- fiber = rb_fiber_current();
49
- cpu_time0 = um_get_time_cpu();
50
- double elapsed = cpu_time0 - machine->last_cpu_time;
51
- um_update_fiber_time_run(fiber, cpu_time0, elapsed);
52
- machine->total_time_run += elapsed;
53
- }
54
- process_cqes(...)
55
- // after
56
- if (profiling_mode) {
57
- double cpu_time1 = um_get_time_cpu();
58
- double elapsed = cpu_time1 - cpu_time0;
59
- um_update_fiber_last_time(fiber, cpu_time1);
60
- machine->total_time_wait += elapsed;
61
- machine->last_cpu_time = cpu_time1;
62
- }
63
- ```
64
-
65
- - when doing switching, in `um_process_runqueue_op`:
66
-
67
- ```c
68
- // before
69
- double cpu_time;
70
- VALUE cur_fiber;
71
- VALUE next_fiber = get_next_fiber(...);
72
- int profiling_mode = machine->profiling_mode;
73
- if (profiling_mode) {
74
- cur_fiber = rb_fiber_current();
75
- cpu_time = um_get_time_cpu();
76
- double elapsed = cpu_time - machine->last_cpu_time;
77
- um_update_fiber_time_run(cur_fiber, cpu_time, elapsed);
78
- machine->total_time_run += elapsed;
79
- um_update_fiber_time_wait(next_fiber, cpu_time);
80
- machine->last_cpu_time = cpu_time;
81
- }
82
- do_fiber_transfer(...)
83
- ```
84
-
85
- - updating fiber time instance vars:
86
-
87
- ```c
88
- inline void um_update_fiber_time_run(VALUE fiber, double stamp, double elapsed) {
89
- // VALUE fiber_stamp = rb_ivar_get(fiber, ID_time_last_cpu);
90
- VALUE fiber_total_run = rb_ivar_get(fiber, ID_time_total_run);
91
- double total = NIL_P(fiber_total_run) ?
92
- elapsed : NUM2DBL(fiber_total_run) + elapsed;
93
- rb_ivar_set(fiber, ID_time_total_run, DBL2NUM(total));
94
- rb_ivar_set(fiber, ID_time_last_cpu, DBL2NUM(stamp));
95
- }
96
-
97
- inline void um_update_fiber_time_wait(VALUE fiber, double stamp) {
98
- VALUE fiber_last_stamp = rb_ivar_get(fiber, ID_time_last_cpu);
99
- if (likely(!NIL_P(fiber_last_stamp))) {
100
- double last_stamp = NUM2DBL(fiber_last_stamp);
101
- double elapsed = stamp - last_stamp;
102
- VALUE fiber_total_wait = rb_ivar_get(fiber, ID_time_total_wait);
103
- double total = NIL_P(fiber_total_wait) ?
104
- elapsed : NUM2DBL(fiber_total_wait) + elapsed;
105
- rb_ivar_set(fiber, ID_time_total_wait, DBL2NUM(total));
106
- }
107
- else
108
- rb_ivar_set(fiber, ID_time_total_wait, DBL2NUM(0.0));
109
- rb_ivar_set(fiber, ID_time_last_cpu, DBL2NUM(stamp));
110
- }
111
- ```
112
-
113
- ## Metrics API
114
-
115
- - machine metrics: `UM#metrics` - returns a hash containing metrics:
13
+ ## write/send multiple buffers at once
116
14
 
117
- ```ruby
118
- {
119
- size:, # SQ size (entries)
120
- total_ops:, # total ops submitted
121
- total_fiber_switches:, # total fiber switches
122
- total_cqe_waits:, # total number of CQE waits
123
- ops_pending:, # number of pending ops
124
- ops_unsubmitted:, # number of unsubmitted
125
- ops_runqueue:, # number of ops in runqueue
126
- ops_free:, # number of ops in freelist
127
- ops_transient:, # number of ops in transient list
128
- hwm_pending:, # high water mark - pending ops
129
- hwm_unsubmitted:, # high water mark - unsubmitted ops
130
- hwm_runqueue:, # high water mark - runqueue depth
131
- hwm_free:, # high water mark - ops in free list
132
- hwm_transient:, # high water mark - ops in transient list
133
- # when profiling is active
134
- time_total_run:, # total CPU time running
135
- time_total_wait:, # total CPU time waiting for CQEs
136
- }
137
- ```
15
+ This is done as vectored IO:
138
16
 
139
- - For this we need to add tracking for:
140
- - runqueue list size
141
- - transient list size
142
- - free list size
143
- - Those will be done in um_op.c (in linked list management code)
17
+ ```ruby
18
+ machine.writev(fd, buf1, buf2, buf3)
144
19
 
145
- - All metrics info in kept in
20
+ # with optional file offset:
21
+ machine.writev(fd, buf1, buf2, buf3, 0)
22
+
23
+ # for the moment it won't take flags
24
+ machine.sendv(fd, buf1, buf2, buf3)
25
+ ```
146
26
 
147
27
  ## useful concurrency tools
148
28
 
@@ -152,13 +32,19 @@ How can this be implemented:
152
32
  debouncer = UM.debounce { }
153
33
  ```
154
34
 
35
+
36
+
37
+ ## polyvalent select
38
+
39
+ - select on multiple queues (ala Go)
40
+ - select on mixture of queues and fds
41
+
155
42
  ## ops
156
43
 
157
44
  - [ ] multishot timeout
158
45
  - [v] machine.periodically(interval) { ... }
159
46
  - [ ] machine.prep_timeout_multishot(interval)
160
47
 
161
- - writev
162
48
  - splice / - tee
163
49
  - sendto
164
50
  - recvfrom
data/benchmark/README.md CHANGED
@@ -4,25 +4,26 @@ The following benchmarks measure the performance of UringMachine against stock
4
4
  Ruby in a variety of scenarios. For each scenario, we compare three different
5
5
  implementations:
6
6
 
7
- - **Threads**: thread-based concurrency using the stock Ruby I/O and
7
+ - `Threads`: thread-based concurrency using the stock Ruby I/O and
8
8
  synchronization classes.
9
9
 
10
- - **Async FS**: fiber-based concurrency with the
11
- [Async](https://github.com/socketry/async) fiber scheduler, using the stock
12
- Ruby I/O and synchronization classes.
10
+ - `ThreadPool`: thread pool consisting of 10 worker threads, receiving jobs
11
+ through a common queue.
13
12
 
14
- - **UM FS**: fiber-based concurrency with the UringMachine fiber scheduler,
15
- using the stock Ruby I/O and synchronization classes.
13
+ - `Async epoll`: fiber-based concurrency with
14
+ [Async](https://github.com/socketry/async) fiber scheduler, using an epoll
15
+ selector.
16
16
 
17
- - **UM pure**: fiber-based concurrency using the UringMachine low-level (pure)
18
- API.
17
+ - `Async uring`: fiber-based concurrency with Async fiber scheduler, using a
18
+ uring selector.
19
19
 
20
- - **UM sqpoll**: the same as **UM pure** with [submission queue
21
- polling](https://unixism.net/loti/tutorial/sq_poll.html).
20
+ - `UM FS`: fiber-based concurrency with UringMachine fiber scheduler.
22
21
 
23
- <img src="./chart.png">
22
+ - `UM`: fiber-based concurrency using the UringMachine low-level API.
24
23
 
25
- ## Observations:
24
+ <img src="./chart_all.png">
25
+
26
+ ## Observations
26
27
 
27
28
  - We see the stark difference between thread-based and fiber-based concurrency.
28
29
  For I/O-bound workloads, there's really no contest - and that's exactly why
@@ -34,28 +35,37 @@ implementations:
34
35
  C-extension.
35
36
 
36
37
  - The UringMachine low-level API is faster to use in most cases, and its
37
- performance advantage grows with the level of concurrency.
38
-
39
- - SQ polling provides a performance advantage in high-concurrency scenarios,
40
- depending on the context. It remains to be seen how it affects performance in
41
- real-world situations.
38
+ performance advantage grows with the level of concurrency. Interestingly, when
39
+ performing CPU-bound work, it seems slightly slightly slower. This should be
40
+ investigated.
42
41
 
43
42
  - The [pg](https://github.com/ged/ruby-pg) gem supports the use of fiber
44
43
  schedulers, and there too we see a marked performance advantage to using
45
44
  fibers instead of threads.
46
45
 
46
+ According to these benchmarks, for I/O-bound scenarios the different fiber-based
47
+ implementations present a average speedup as follows:
48
+
49
+ |implementation|average factor|
50
+ |--------------|--------------|
51
+ |Async epoll |x2.36 |
52
+ |Async uring |x2.42 |
53
+ |UM FS |x2.85 |
54
+ |UM |x6.20 |
55
+
47
56
  ## 1. I/O - Pipe
48
57
 
49
58
  50 groups, where in each group we create a pipe with a pair of threads/fibers
50
59
  writing/reading 1KB of data to the pipe.
51
60
 
52
61
  ```
53
- C=50x2 user system total real
54
- Threads 2.501885 3.111840 5.613725 ( 5.017991)
55
- Async FS 1.189332 0.526275 1.715607 ( 1.715726)
56
- UM FS 0.715688 0.318851 1.034539 ( 1.034723)
57
- UM pure 0.241029 0.365079 0.606108 ( 0.606308)
58
- UM sqpoll 0.217577 0.634414 0.851991 ( 0.593531)
62
+ C=50x2 user system total real
63
+ Threads 2.105002 2.671980 4.776982 ( 4.272842)
64
+ ThreadPool 4.818014 10.740555 15.558569 ( 7.070236)
65
+ Async epoll 1.118937 0.254803 1.373740 ( 1.374298)
66
+ Async uring 1.363248 0.270063 1.633311 ( 1.633696)
67
+ UM FS 0.746332 0.183006 0.929338 ( 0.929619)
68
+ UM 0.237816 0.328352 0.566168 ( 0.566265)
59
69
  ```
60
70
 
61
71
  ## 2. I/O - Socketpair
@@ -64,12 +74,13 @@ UM sqpoll 0.217577 0.634414 0.851991 ( 0.593531)
64
74
  pair of threads/fibers writing/reading 1KB of data to the sockets.
65
75
 
66
76
  ```
67
- N=50 user system total real
68
- Threads 2.372753 3.612468 5.985221 ( 4.798625)
69
- Async FS 0.516226 0.877822 1.394048 ( 1.394266)
70
- UM FS 0.521360 0.875674 1.397034 ( 1.397327)
71
- UM pure 0.239353 0.642498 0.881851 ( 0.881962)
72
- UM sqpoll 0.220933 1.021997 1.242930 ( 0.976198)
77
+ C=50x2 user system total real
78
+ Threads 2.068122 3.247781 5.315903 ( 4.295488)
79
+ ThreadPool 2.283882 3.461607 5.745489 ( 4.650422)
80
+ Async epoll 0.381400 0.846445 1.227845 ( 1.227983)
81
+ Async uring 0.472526 0.821467 1.293993 ( 1.294166)
82
+ UM FS 0.443023 0.734334 1.177357 ( 1.177576)
83
+ UM 0.116995 0.675997 0.792992 ( 0.793183)
73
84
  ```
74
85
 
75
86
  ## 3. Mutex - CPU-bound
@@ -78,12 +89,12 @@ UM sqpoll 0.220933 1.021997 1.242930 ( 0.976198)
78
89
  threads/fibers locking the mutex and performing a Regexp match.
79
90
 
80
91
  ```
81
- N=20 user system total real
82
- Threads 5.348378 0.021847 5.370225 ( 5.362117)
83
- Async FS 5.519970 0.003964 5.523934 ( 5.524536)
84
- UM FS 5.505282 0.003983 5.509265 ( 5.509840)
85
- UM pure 5.607048 0.002991 5.610039 ( 5.610749)
86
- UM sqpoll 5.437836 5.418316 10.856152 ( 5.443331)
92
+ C=20x10 user system total real
93
+ Threads 5.174998 0.024885 5.199883 ( 5.193211)
94
+ Async epoll 5.309793 0.000949 5.310742 ( 5.311217)
95
+ Async uring 5.341404 0.004860 5.346264 ( 5.346963)
96
+ UM FS 5.363719 0.001976 5.365695 ( 5.366254)
97
+ UM 5.351073 0.005986 5.357059 ( 5.357602)
87
98
  ```
88
99
 
89
100
  ## 4. Mutex - I/O-bound
@@ -93,81 +104,36 @@ start 10 worker threads/fibers locking the mutex and writing 1KB chunks to the
93
104
  file.
94
105
 
95
106
  ```
96
- N=1 user system total real
97
- Threads 0.044103 0.057831 0.101934 ( 0.087204)
98
- Async FS 0.050608 0.084449 0.135057 ( 0.121300)
99
- UM FS 0.030355 0.077069 0.107424 ( 0.108146)
100
- UM pure 0.024489 0.086201 0.110690 ( 0.108023)
101
- UM sqpoll 0.022752 0.225133 0.247885 ( 0.136251)
102
-
103
- N=5 user system total real
104
- Threads 0.214296 0.384078 0.598374 ( 0.467425)
105
- Async FS 0.085820 0.158782 0.244602 ( 0.139766)
106
- UM FS 0.064279 0.147278 0.211557 ( 0.117488)
107
- UM pure 0.036478 0.182950 0.219428 ( 0.119745)
108
- UM sqpoll 0.036929 0.347573 0.384502 ( 0.160814)
109
-
110
- N=10 user system total real
111
- Threads 0.435688 0.752219 1.187907 ( 0.924561)
112
- Async FS 0.126573 0.303704 0.430277 ( 0.234900)
113
- UM FS 0.128427 0.215204 0.343631 ( 0.184074)
114
- UM pure 0.065522 0.359659 0.425181 ( 0.192385)
115
- UM sqpoll 0.076810 0.477429 0.554239 ( 0.210087)
116
-
117
- N=20 user system total real
118
- Threads 0.830763 1.585299 2.416062 ( 1.868194)
119
- Async FS 0.291823 0.644043 0.935866 ( 0.507887)
120
- UM FS 0.226202 0.460401 0.686603 ( 0.362879)
121
- UM pure 0.120524 0.616274 0.736798 ( 0.332182)
122
- UM sqpoll 0.177150 0.849890 1.027040 ( 0.284069)
123
-
124
- N=50 user system total real
125
- Threads 2.124048 4.182537 6.306585 ( 4.878387)
126
- Async FS 0.897134 1.268629 2.165763 ( 1.254624)
127
- UM FS 0.733193 0.971821 1.705014 ( 0.933749)
128
- UM pure 0.226431 1.504441 1.730872 ( 0.760731)
129
- UM sqpoll 0.557310 2.107389 2.664699 ( 0.783992)
130
-
131
- N=100 user system total real
132
- Threads 4.420832 8.628756 13.049588 ( 10.264590)
133
- Async FS 2.557661 2.532998 5.090659 ( 3.179336)
134
- UM FS 2.262136 1.912055 4.174191 ( 2.523789)
135
- UM pure 0.633897 2.793998 3.427895 ( 1.612989)
136
- UM sqpoll 1.119460 4.193703 5.313163 ( 1.525968)
107
+ C=50x10 user system total real
108
+ Threads 2.042649 3.441547 5.484196 ( 4.328783)
109
+ Async epoll 0.810375 0.744084 1.554459 ( 1.554726)
110
+ Async uring 0.854985 1.129260 1.984245 ( 1.140749)
111
+ UM FS 0.686329 0.872376 1.558705 ( 0.845214)
112
+ UM 0.250370 1.323227 1.573597 ( 0.720928)
137
113
  ```
138
114
 
139
- ## 5. Queue
115
+ ## 5. Postgres client
140
116
 
141
- 20 concurrent groups, where in each group we create a queue, start 5 producer
142
- threads/fibers that push items to the queue, and 10 consumer threads/fibers that
143
- pull items from the queue.
117
+ C concurrent threads/fibers, each thread issuing SELECT query to a PG database.
144
118
 
145
119
  ```
146
- N=20 user system total real
147
- Threads 2.522270 0.125569 2.647839 ( 2.638276)
148
- Async FS 2.245917 0.044860 2.290777 ( 2.291068)
149
- UM FS 2.235130 0.000958 2.236088 ( 2.236392)
150
- UM pure 2.125827 0.225050 2.350877 ( 2.351347)
151
- UM sqpoll 2.044662 2.460344 4.505006 ( 2.261502)
120
+ C=50 user system total real
121
+ Threads 4.304292 1.358116 5.662408 ( 4.795725)
122
+ Async epoll 2.890160 0.432836 3.322996 ( 3.334350)
123
+ Async uring 2.818439 0.433896 3.252335 ( 3.252799)
124
+ UM FS 2.819371 0.443182 3.262553 ( 3.264606)
152
125
  ```
126
+ ## 6. Queue
153
127
 
154
- ## 6. Postgres client
155
-
156
- C concurrent threads/fiber, each thread issuing SELECT query to a PG database.
128
+ 20 concurrent groups, where in each group we create a queue, start 5 producer
129
+ threads/fibers that push items to the queue, and 10 consumer threads/fibers that
130
+ pull items from the queue.
157
131
 
158
132
  ```
159
- C=10 user system total real
160
- Threads 0.813844 0.358261 1.172105 ( 0.987320)
161
- Async FS 0.545493 0.098608 0.644101 ( 0.644636)
162
- UM FS 0.523503 0.094336 0.617839 ( 0.619250)
163
-
164
- C=20 user system total real
165
- Threads 1.652901 0.714299 2.367200 ( 2.014781)
166
- Async FS 1.136826 0.212991 1.349817 ( 1.350544)
167
- UM FS 1.084873 0.205865 1.290738 ( 1.291865)
168
-
169
- C=50 user system total real
170
- Threads 4.410604 1.804900 6.215504 ( 5.253016)
171
- Async FS 2.918522 0.507981 3.426503 ( 3.427966)
172
- UM FS 2.789549 0.537269 3.326818 ( 3.329802)
133
+ C=20x(5+10) user system total real
134
+ Threads 4.880983 0.207451 5.088434 ( 5.071019)
135
+ Async epoll 4.107208 0.006519 4.113727 ( 4.114227)
136
+ Async uring 4.206283 0.028974 4.235257 ( 4.235705)
137
+ UM FS 4.082394 0.001719 4.084113 ( 4.084522)
138
+ UM 4.099893 0.323569 4.423462 ( 4.424089)
173
139
  ```
@@ -38,6 +38,20 @@ class UMBenchmark
38
38
  end
39
39
  end
40
40
 
41
+ def do_baseline
42
+ GROUPS.times do
43
+ r, w = IO.pipe
44
+ r.sync = true
45
+ w.sync = true
46
+ ITERATIONS.times {
47
+ w.write(DATA)
48
+ r.read(SIZE)
49
+ }
50
+ r.close
51
+ w.close
52
+ end
53
+ end
54
+
41
55
  def do_scheduler(scheduler, ios)
42
56
  GROUPS.times do
43
57
  r, w = IO.pipe
Binary file
data/benchmark/common.rb CHANGED
@@ -54,6 +54,7 @@ class UMBenchmark
54
54
  end
55
55
 
56
56
  @@benchmarks = {
57
+ baseline: [:baseline, "No Concurrency"],
57
58
  threads: [:threads, "Threads"],
58
59
  thread_pool: [:thread_pool, "ThreadPool"],
59
60
  async_uring: [:scheduler, "Async uring"],
@@ -69,6 +70,10 @@ class UMBenchmark
69
70
  end
70
71
  end
71
72
 
73
+ def run_baseline
74
+ do_baseline
75
+ end
76
+
72
77
  def run_threads
73
78
  threads = []
74
79
  ios = []
@@ -118,6 +123,8 @@ class UMBenchmark
118
123
  fds = []
119
124
  do_um(machine, fibers, fds)
120
125
  machine.await_fibers(fibers)
126
+ puts "UM:"
127
+ p machine.metrics
121
128
  fds.each { machine.close(it) }
122
129
  end
123
130
 
@@ -128,6 +135,8 @@ class UMBenchmark
128
135
  do_um(machine, fibers, fds)
129
136
  machine.await_fibers(fibers)
130
137
  fds.each { machine.close_async(it) }
138
+ puts "UM sqpoll:"
139
+ p machine.metrics
131
140
  machine.snooze
132
141
  end
133
142
  end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/inline'
4
+
5
+ gemfile do
6
+ source 'https://rubygems.org'
7
+ gem 'uringmachine', path: '..'
8
+ gem 'benchmark-ips'
9
+ end
10
+
11
+ require 'benchmark/ips'
12
+ require 'uringmachine'
13
+
14
+ @machine = UM.new
15
+
16
+ make_socket_pair = -> do
17
+ port = 10000 + rand(30000)
18
+ server_fd = @machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
19
+ @machine.setsockopt(server_fd, UM::SOL_SOCKET, UM::SO_REUSEADDR, true)
20
+ @machine.bind(server_fd, '127.0.0.1', port)
21
+ @machine.listen(server_fd, UM::SOMAXCONN)
22
+
23
+ client_conn_fd = @machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
24
+ @machine.connect(client_conn_fd, '127.0.0.1', port)
25
+
26
+ server_conn_fd = @machine.accept(server_fd)
27
+
28
+ @machine.close(server_fd)
29
+ [client_conn_fd, server_conn_fd]
30
+ end
31
+
32
+ @client_fd, @server_fd = make_socket_pair.()
33
+
34
+ @read_buf = +''
35
+ @read_fiber = @machine.spin do
36
+ while true
37
+ @machine.read(@client_fd, @read_buf, 65536, 0)
38
+ end
39
+ end
40
+
41
+ STR_COUNT = ARGV[0]&.to_i || 3
42
+ STR_SIZE = ARGV[1]&.to_i || 100
43
+
44
+ @parts = ['*' * STR_SIZE] * STR_COUNT
45
+
46
+ @server_io = IO.new(@server_fd)
47
+ @server_io.sync = true
48
+ def io_write
49
+ @server_io.write(*@parts)
50
+ @machine.snooze
51
+ end
52
+
53
+ def um_write
54
+ str = @parts.join
55
+ len = str.bytesize
56
+
57
+ while len > 0
58
+ ret = @machine.write(@server_fd, str, len)
59
+ len -= ret
60
+ str = str[ret..-1] if len > 0
61
+ end
62
+ end
63
+
64
+ def um_send
65
+ str = @parts.join
66
+ @machine.send(@server_fd, str, str.bytesize, UM::MSG_WAITALL)
67
+ end
68
+
69
+ @bgid = @machine.setup_buffer_ring(0, 8)
70
+ def um_send_bundle
71
+ @machine.send_bundle(@server_fd, @bgid, @parts)
72
+ end
73
+
74
+ p(STR_COUNT:, STR_SIZE:)
75
+
76
+ Benchmark.ips do |x|
77
+ x.report('IO#write') { io_write }
78
+ x.report('UM#write') { um_write }
79
+ x.report('UM#send') { um_send }
80
+ x.report('UM#send_bundle') { um_send_bundle }
81
+
82
+ x.compare!(order: :baseline)
83
+ end