libev_scheduler 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -103,7 +103,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
103
103
  EV_ACQUIRE_CB;
104
104
  kqueue_changecnt = 0;
105
105
 
106
- if (expect_false (res < 0))
106
+ if (ecb_expect_false (res < 0))
107
107
  {
108
108
  if (errno != EINTR)
109
109
  ev_syserr ("(libev) kqueue kevent");
@@ -115,7 +115,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
115
115
  {
116
116
  int fd = kqueue_events [i].ident;
117
117
 
118
- if (expect_false (kqueue_events [i].flags & EV_ERROR))
118
+ if (ecb_expect_false (kqueue_events [i].flags & EV_ERROR))
119
119
  {
120
120
  int err = kqueue_events [i].data;
121
121
 
@@ -151,7 +151,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
151
151
  );
152
152
  }
153
153
 
154
- if (expect_false (res == kqueue_eventmax))
154
+ if (ecb_expect_false (res == kqueue_eventmax))
155
155
  {
156
156
  ev_free (kqueue_events);
157
157
  kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1);
@@ -170,7 +170,7 @@ kqueue_init (EV_P_ int flags)
170
170
 
171
171
  fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */
172
172
 
173
- backend_mintime = 1e-9; /* apparently, they did the right thing in freebsd */
173
+ backend_mintime = EV_TS_CONST (1e-9); /* apparently, they did the right thing in freebsd */
174
174
  backend_modify = kqueue_modify;
175
175
  backend_poll = kqueue_poll;
176
176
 
@@ -118,57 +118,6 @@ struct aio_ring
118
118
  struct io_event io_events[0];
119
119
  };
120
120
 
121
- /*
122
- * define some syscall wrappers for common architectures
123
- * this is mostly for nice looks during debugging, not performance.
124
- * our syscalls return < 0, not == -1, on error. which is good
125
- * enough for linux aio.
126
- * TODO: arm is also common nowadays, maybe even mips and x86
127
- * TODO: after implementing this, it suddenly looks like overkill, but its hard to remove...
128
- */
129
- #if __GNUC__ && __linux && ECB_AMD64 && !defined __OPTIMIZE_SIZE__
130
- /* the costly errno access probably kills this for size optimisation */
131
-
132
- #define ev_syscall(nr,narg,arg1,arg2,arg3,arg4,arg5) \
133
- ({ \
134
- long res; \
135
- register unsigned long r5 __asm__ ("r8" ); \
136
- register unsigned long r4 __asm__ ("r10"); \
137
- register unsigned long r3 __asm__ ("rdx"); \
138
- register unsigned long r2 __asm__ ("rsi"); \
139
- register unsigned long r1 __asm__ ("rdi"); \
140
- if (narg >= 5) r5 = (unsigned long)(arg5); \
141
- if (narg >= 4) r4 = (unsigned long)(arg4); \
142
- if (narg >= 3) r3 = (unsigned long)(arg3); \
143
- if (narg >= 2) r2 = (unsigned long)(arg2); \
144
- if (narg >= 1) r1 = (unsigned long)(arg1); \
145
- __asm__ __volatile__ ( \
146
- "syscall\n\t" \
147
- : "=a" (res) \
148
- : "0" (nr), "r" (r1), "r" (r2), "r" (r3), "r" (r4), "r" (r5) \
149
- : "cc", "r11", "cx", "memory"); \
150
- errno = -res; \
151
- res; \
152
- })
153
-
154
- #endif
155
-
156
- #ifdef ev_syscall
157
- #define ev_syscall0(nr) ev_syscall (nr, 0, 0, 0, 0, 0, 0
158
- #define ev_syscall1(nr,arg1) ev_syscall (nr, 1, arg1, 0, 0, 0, 0)
159
- #define ev_syscall2(nr,arg1,arg2) ev_syscall (nr, 2, arg1, arg2, 0, 0, 0)
160
- #define ev_syscall3(nr,arg1,arg2,arg3) ev_syscall (nr, 3, arg1, arg2, arg3, 0, 0)
161
- #define ev_syscall4(nr,arg1,arg2,arg3,arg4) ev_syscall (nr, 3, arg1, arg2, arg3, arg4, 0)
162
- #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) ev_syscall (nr, 5, arg1, arg2, arg3, arg4, arg5)
163
- #else
164
- #define ev_syscall0(nr) syscall (nr)
165
- #define ev_syscall1(nr,arg1) syscall (nr, arg1)
166
- #define ev_syscall2(nr,arg1,arg2) syscall (nr, arg1, arg2)
167
- #define ev_syscall3(nr,arg1,arg2,arg3) syscall (nr, arg1, arg2, arg3)
168
- #define ev_syscall4(nr,arg1,arg2,arg3,arg4) syscall (nr, arg1, arg2, arg3, arg4)
169
- #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) syscall (nr, arg1, arg2, arg3, arg4, arg5)
170
- #endif
171
-
172
121
  inline_size
173
122
  int
174
123
  evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
@@ -265,7 +214,6 @@ linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count)
265
214
  memset (iocb, 0, sizeof (*iocb));
266
215
 
267
216
  iocb->io.aio_lio_opcode = IOCB_CMD_POLL;
268
- iocb->io.aio_data = offset;
269
217
  iocb->io.aio_fildes = offset;
270
218
 
271
219
  base [offset++] = iocb;
@@ -287,28 +235,47 @@ linuxaio_modify (EV_P_ int fd, int oev, int nev)
287
235
  {
288
236
  array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp);
289
237
  ANIOCBP iocb = linuxaio_iocbps [fd];
238
+ ANFD *anfd = &anfds [fd];
290
239
 
291
- if (iocb->io.aio_reqprio < 0)
240
+ if (ecb_expect_false (iocb->io.aio_reqprio < 0))
292
241
  {
293
242
  /* we handed this fd over to epoll, so undo this first */
294
243
  /* we do it manually because the optimisations on epoll_modify won't do us any good */
295
244
  epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0);
296
- anfds [fd].emask = 0;
245
+ anfd->emask = 0;
297
246
  iocb->io.aio_reqprio = 0;
298
247
  }
299
-
300
- if (iocb->io.aio_buf)
248
+ else if (ecb_expect_false (iocb->io.aio_buf))
301
249
  {
302
- evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0);
303
- /* on relevant kernels, io_cancel fails with EINPROGRES if everything is fine */
304
- assert (("libev: linuxaio unexpected io_cancel failed", errno == EINPROGRESS));
250
+ /* iocb active, so cancel it first before resubmit */
251
+ /* this assumes we only ever get one call per fd per loop iteration */
252
+ for (;;)
253
+ {
254
+ /* on all relevant kernels, io_cancel fails with EINPROGRESS on "success" */
255
+ if (ecb_expect_false (evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0) == 0))
256
+ break;
257
+
258
+ if (ecb_expect_true (errno == EINPROGRESS))
259
+ break;
260
+
261
+ /* the EINPROGRESS test is for nicer error message. clumsy. */
262
+ if (errno != EINTR)
263
+ {
264
+ assert (("libev: linuxaio unexpected io_cancel failed", errno != EINTR && errno != EINPROGRESS));
265
+ break;
266
+ }
267
+ }
268
+
269
+ /* increment generation counter to avoid handling old events */
270
+ ++anfd->egen;
305
271
  }
306
272
 
273
+ iocb->io.aio_buf = (nev & EV_READ ? POLLIN : 0)
274
+ | (nev & EV_WRITE ? POLLOUT : 0);
275
+
307
276
  if (nev)
308
277
  {
309
- iocb->io.aio_buf =
310
- (nev & EV_READ ? POLLIN : 0)
311
- | (nev & EV_WRITE ? POLLOUT : 0);
278
+ iocb->io.aio_data = (uint32_t)fd | ((__u64)(uint32_t)anfd->egen << 32);
312
279
 
313
280
  /* queue iocb up for io_submit */
314
281
  /* this assumes we only ever get one call per fd per loop iteration */
@@ -338,21 +305,26 @@ linuxaio_parse_events (EV_P_ struct io_event *ev, int nr)
338
305
  {
339
306
  while (nr)
340
307
  {
341
- int fd = ev->data;
342
- int res = ev->res;
308
+ int fd = ev->data & 0xffffffff;
309
+ uint32_t gen = ev->data >> 32;
310
+ int res = ev->res;
343
311
 
344
312
  assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));
345
313
 
346
- /* feed events, we do not expect or handle POLLNVAL */
347
- fd_event (
348
- EV_A_
349
- fd,
350
- (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
351
- | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
352
- );
353
-
354
- /* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */
355
- linuxaio_fd_rearm (EV_A_ fd);
314
+ /* only accept events if generation counter matches */
315
+ if (ecb_expect_true (gen == (uint32_t)anfds [fd].egen))
316
+ {
317
+ /* feed events, we do not expect or handle POLLNVAL */
318
+ fd_event (
319
+ EV_A_
320
+ fd,
321
+ (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
322
+ | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
323
+ );
324
+
325
+ /* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */
326
+ linuxaio_fd_rearm (EV_A_ fd);
327
+ }
356
328
 
357
329
  --nr;
358
330
  ++ev;
@@ -364,21 +336,20 @@ static int
364
336
  linuxaio_get_events_from_ring (EV_P)
365
337
  {
366
338
  struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
339
+ unsigned head, tail;
367
340
 
368
341
  /* the kernel reads and writes both of these variables, */
369
342
  /* as a C extension, we assume that volatile use here */
370
343
  /* both makes reads atomic and once-only */
371
- unsigned head = *(volatile unsigned *)&ring->head;
372
- unsigned tail = *(volatile unsigned *)&ring->tail;
344
+ head = *(volatile unsigned *)&ring->head;
345
+ ECB_MEMORY_FENCE_ACQUIRE;
346
+ tail = *(volatile unsigned *)&ring->tail;
373
347
 
374
348
  if (head == tail)
375
349
  return 0;
376
350
 
377
- /* make sure the events up to tail are visible */
378
- ECB_MEMORY_FENCE_ACQUIRE;
379
-
380
351
  /* parse all available events, but only once, to avoid starvation */
381
- if (tail > head) /* normal case around */
352
+ if (ecb_expect_true (tail > head)) /* normal case around */
382
353
  linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head);
383
354
  else /* wrapped around */
384
355
  {
@@ -399,7 +370,7 @@ linuxaio_ringbuf_valid (EV_P)
399
370
  {
400
371
  struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
401
372
 
402
- return expect_true (ring->magic == AIO_RING_MAGIC)
373
+ return ecb_expect_true (ring->magic == AIO_RING_MAGIC)
403
374
  && ring->incompat_features == EV_AIO_RING_INCOMPAT_FEATURES
404
375
  && ring->header_length == sizeof (struct aio_ring); /* TODO: or use it to find io_event[0]? */
405
376
  }
@@ -414,7 +385,7 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
414
385
  int want = 1; /* how many events to request */
415
386
  int ringbuf_valid = linuxaio_ringbuf_valid (EV_A);
416
387
 
417
- if (expect_true (ringbuf_valid))
388
+ if (ecb_expect_true (ringbuf_valid))
418
389
  {
419
390
  /* if the ring buffer has any events, we don't wait or call the kernel at all */
420
391
  if (linuxaio_get_events_from_ring (EV_A))
@@ -437,9 +408,7 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
437
408
 
438
409
  EV_RELEASE_CB;
439
410
 
440
- ts.tv_sec = (long)timeout;
441
- ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9);
442
-
411
+ EV_TS_SET (ts, timeout);
443
412
  res = evsys_io_getevents (linuxaio_ctx, 1, want, ioev, &ts);
444
413
 
445
414
  EV_ACQUIRE_CB;
@@ -454,7 +423,7 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
454
423
  /* at least one event available, handle them */
455
424
  linuxaio_parse_events (EV_A_ ioev, res);
456
425
 
457
- if (expect_true (ringbuf_valid))
426
+ if (ecb_expect_true (ringbuf_valid))
458
427
  {
459
428
  /* if we have a ring buffer, handle any remaining events in it */
460
429
  linuxaio_get_events_from_ring (EV_A);
@@ -469,7 +438,7 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
469
438
  else
470
439
  break; /* no events from the kernel, we are done */
471
440
 
472
- timeout = 0; /* only wait in the first iteration */
441
+ timeout = EV_TS_CONST (0.); /* only wait in the first iteration */
473
442
  }
474
443
  }
475
444
 
@@ -495,7 +464,7 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
495
464
  {
496
465
  int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
497
466
 
498
- if (expect_false (res < 0))
467
+ if (ecb_expect_false (res < 0))
499
468
  if (errno == EINVAL)
500
469
  {
501
470
  /* This happens for unsupported fds, officially, but in my testing,
@@ -535,16 +504,21 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
535
504
  ++linuxaio_iteration;
536
505
  if (linuxaio_io_setup (EV_A) < 0)
537
506
  {
507
+ /* TODO: rearm all and recreate epoll backend from scratch */
508
+ /* TODO: might be more prudent? */
509
+
538
510
  /* to bad, we can't get a new aio context, go 100% epoll */
539
511
  linuxaio_free_iocbp (EV_A);
540
512
  ev_io_stop (EV_A_ &linuxaio_epoll_w);
541
513
  ev_ref (EV_A);
542
514
  linuxaio_ctx = 0;
515
+
516
+ backend = EVBACKEND_EPOLL;
543
517
  backend_modify = epoll_modify;
544
518
  backend_poll = epoll_poll;
545
519
  }
546
520
 
547
- timeout = 0;
521
+ timeout = EV_TS_CONST (0.);
548
522
  /* it's easiest to handle this mess in another iteration */
549
523
  return;
550
524
  }
@@ -555,8 +529,13 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
555
529
 
556
530
  res = 1; /* skip this iocb */
557
531
  }
532
+ else if (errno == EINTR) /* not seen in reality, not documented */
533
+ res = 0; /* silently ignore and retry */
558
534
  else
559
- ev_syserr ("(libev) linuxaio io_submit");
535
+ {
536
+ ev_syserr ("(libev) linuxaio io_submit");
537
+ res = 0;
538
+ }
560
539
 
561
540
  submitted += res;
562
541
  }
@@ -589,13 +568,13 @@ linuxaio_init (EV_P_ int flags)
589
568
  return 0;
590
569
  }
591
570
 
592
- ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
571
+ ev_io_init (&linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
593
572
  ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI);
594
573
  ev_io_start (EV_A_ &linuxaio_epoll_w);
595
574
  ev_unref (EV_A); /* watcher should not keep loop alive */
596
575
 
597
- backend_modify = linuxaio_modify;
598
- backend_poll = linuxaio_poll;
576
+ backend_modify = linuxaio_modify;
577
+ backend_poll = linuxaio_poll;
599
578
 
600
579
  linuxaio_iocbpmax = 0;
601
580
  linuxaio_iocbps = 0;
@@ -616,13 +595,13 @@ linuxaio_destroy (EV_P)
616
595
  evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */
617
596
  }
618
597
 
619
- inline_size
620
- void
598
+ ecb_cold
599
+ static void
621
600
  linuxaio_fork (EV_P)
622
601
  {
623
- /* this frees all iocbs, which is very heavy-handed */
624
- linuxaio_destroy (EV_A);
625
602
  linuxaio_submitcnt = 0; /* all pointers were invalidated */
603
+ linuxaio_free_iocbp (EV_A); /* this frees all iocbs, which is very heavy-handed */
604
+ evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */
626
605
 
627
606
  linuxaio_iteration = 0; /* we start over in the child */
628
607
 
@@ -631,12 +610,11 @@ linuxaio_fork (EV_P)
631
610
 
632
611
  /* forking epoll should also effectively unregister all fds from the backend */
633
612
  epoll_fork (EV_A);
613
+ /* epoll_fork already did this. hopefully */
614
+ /*fd_rearm_all (EV_A);*/
634
615
 
635
616
  ev_io_stop (EV_A_ &linuxaio_epoll_w);
636
617
  ev_io_set (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ);
637
618
  ev_io_start (EV_A_ &linuxaio_epoll_w);
638
-
639
- /* epoll_fork already did this. hopefully */
640
- /*fd_rearm_all (EV_A);*/
641
619
  }
642
620
 
@@ -80,7 +80,7 @@ poll_modify (EV_P_ int fd, int oev, int nev)
80
80
  {
81
81
  pollidxs [fd] = -1;
82
82
 
83
- if (expect_true (idx < --pollcnt))
83
+ if (ecb_expect_true (idx < --pollcnt))
84
84
  {
85
85
  polls [idx] = polls [pollcnt];
86
86
  pollidxs [polls [idx].fd] = idx;
@@ -95,10 +95,10 @@ poll_poll (EV_P_ ev_tstamp timeout)
95
95
  int res;
96
96
 
97
97
  EV_RELEASE_CB;
98
- res = poll (polls, pollcnt, timeout * 1e3);
98
+ res = poll (polls, pollcnt, EV_TS_TO_MSEC (timeout));
99
99
  EV_ACQUIRE_CB;
100
100
 
101
- if (expect_false (res < 0))
101
+ if (ecb_expect_false (res < 0))
102
102
  {
103
103
  if (errno == EBADF)
104
104
  fd_ebadf (EV_A);
@@ -112,11 +112,11 @@ poll_poll (EV_P_ ev_tstamp timeout)
112
112
  {
113
113
  assert (("libev: poll returned illegal result, broken BSD kernel?", p < polls + pollcnt));
114
114
 
115
- if (expect_false (p->revents)) /* this expect is debatable */
115
+ if (ecb_expect_false (p->revents)) /* this expect is debatable */
116
116
  {
117
117
  --res;
118
118
 
119
- if (expect_false (p->revents & POLLNVAL))
119
+ if (ecb_expect_false (p->revents & POLLNVAL))
120
120
  {
121
121
  assert (("libev: poll found invalid fd in poll set", 0));
122
122
  fd_kill (EV_A_ p->fd);
@@ -136,7 +136,7 @@ inline_size
136
136
  int
137
137
  poll_init (EV_P_ int flags)
138
138
  {
139
- backend_mintime = 1e-3;
139
+ backend_mintime = EV_TS_CONST (1e-3);
140
140
  backend_modify = poll_modify;
141
141
  backend_poll = poll_poll;
142
142
 
@@ -132,7 +132,7 @@ port_poll (EV_P_ ev_tstamp timeout)
132
132
  }
133
133
  }
134
134
 
135
- if (expect_false (nget == port_eventmax))
135
+ if (ecb_expect_false (nget == port_eventmax))
136
136
  {
137
137
  ev_free (port_events);
138
138
  port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1);
@@ -154,11 +154,11 @@ port_init (EV_P_ int flags)
154
154
 
155
155
  /* if my reading of the opensolaris kernel sources are correct, then
156
156
  * opensolaris does something very stupid: it checks if the time has already
157
- * elapsed and doesn't round up if that is the case,m otherwise it DOES round
157
+ * elapsed and doesn't round up if that is the case, otherwise it DOES round
158
158
  * up. Since we can't know what the case is, we need to guess by using a
159
159
  * "large enough" timeout. Normally, 1e-9 would be correct.
160
160
  */
161
- backend_mintime = 1e-3; /* needed to compensate for port_getn returning early */
161
+ backend_mintime = EV_TS_CONST (1e-3); /* needed to compensate for port_getn returning early */
162
162
  backend_modify = port_modify;
163
163
  backend_poll = port_poll;
164
164
 
@@ -108,7 +108,7 @@ select_modify (EV_P_ int fd, int oev, int nev)
108
108
  int word = fd / NFDBITS;
109
109
  fd_mask mask = 1UL << (fd % NFDBITS);
110
110
 
111
- if (expect_false (vec_max <= word))
111
+ if (ecb_expect_false (vec_max <= word))
112
112
  {
113
113
  int new_max = word + 1;
114
114
 
@@ -171,7 +171,7 @@ select_poll (EV_P_ ev_tstamp timeout)
171
171
  #endif
172
172
  EV_ACQUIRE_CB;
173
173
 
174
- if (expect_false (res < 0))
174
+ if (ecb_expect_false (res < 0))
175
175
  {
176
176
  #if EV_SELECT_IS_WINSOCKET
177
177
  errno = WSAGetLastError ();
@@ -197,7 +197,7 @@ select_poll (EV_P_ ev_tstamp timeout)
197
197
  {
198
198
  if (timeout)
199
199
  {
200
- unsigned long ms = timeout * 1e3;
200
+ unsigned long ms = EV_TS_TO_MSEC (timeout);
201
201
  Sleep (ms ? ms : 1);
202
202
  }
203
203
 
@@ -236,7 +236,7 @@ select_poll (EV_P_ ev_tstamp timeout)
236
236
  if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE;
237
237
  #endif
238
238
 
239
- if (expect_true (events))
239
+ if (ecb_expect_true (events))
240
240
  fd_event (EV_A_ fd, events);
241
241
  }
242
242
  }
@@ -262,7 +262,7 @@ select_poll (EV_P_ ev_tstamp timeout)
262
262
  events |= word_r & mask ? EV_READ : 0;
263
263
  events |= word_w & mask ? EV_WRITE : 0;
264
264
 
265
- if (expect_true (events))
265
+ if (ecb_expect_true (events))
266
266
  fd_event (EV_A_ word * NFDBITS + bit, events);
267
267
  }
268
268
  }
@@ -275,7 +275,7 @@ inline_size
275
275
  int
276
276
  select_init (EV_P_ int flags)
277
277
  {
278
- backend_mintime = 1e-6;
278
+ backend_mintime = EV_TS_CONST (1e-6);
279
279
  backend_modify = select_modify;
280
280
  backend_poll = select_poll;
281
281