nutcracker 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. data/README.md +22 -0
  2. data/Rakefile +55 -0
  3. data/bin/nutcracker +2 -0
  4. data/ext/nutcracker/ChangeLog +66 -0
  5. data/ext/nutcracker/LICENSE +177 -0
  6. data/ext/nutcracker/Makefile.am +7 -0
  7. data/ext/nutcracker/Makefile.in +726 -0
  8. data/ext/nutcracker/NOTICE +124 -0
  9. data/ext/nutcracker/README.md +240 -0
  10. data/ext/nutcracker/aclocal.m4 +956 -0
  11. data/ext/nutcracker/conf/nutcracker.leaf.yml +10 -0
  12. data/ext/nutcracker/conf/nutcracker.root.yml +8 -0
  13. data/ext/nutcracker/conf/nutcracker.yml +67 -0
  14. data/ext/nutcracker/config.h.in +316 -0
  15. data/ext/nutcracker/config/config.guess +1561 -0
  16. data/ext/nutcracker/config/config.sub +1686 -0
  17. data/ext/nutcracker/config/depcomp +630 -0
  18. data/ext/nutcracker/config/install-sh +520 -0
  19. data/ext/nutcracker/config/ltmain.sh +8413 -0
  20. data/ext/nutcracker/config/missing +376 -0
  21. data/ext/nutcracker/configure +18862 -0
  22. data/ext/nutcracker/configure.ac +155 -0
  23. data/ext/nutcracker/contrib/Makefile.am +3 -0
  24. data/ext/nutcracker/contrib/Makefile.in +560 -0
  25. data/ext/nutcracker/contrib/yaml-0.1.4.tar.gz +0 -0
  26. data/ext/nutcracker/contrib/yaml-0.1.4/LICENSE +19 -0
  27. data/ext/nutcracker/contrib/yaml-0.1.4/Makefile.am +20 -0
  28. data/ext/nutcracker/contrib/yaml-0.1.4/Makefile.in +736 -0
  29. data/ext/nutcracker/contrib/yaml-0.1.4/README +27 -0
  30. data/ext/nutcracker/contrib/yaml-0.1.4/aclocal.m4 +956 -0
  31. data/ext/nutcracker/contrib/yaml-0.1.4/config.h.in +80 -0
  32. data/ext/nutcracker/contrib/yaml-0.1.4/config/config.guess +1561 -0
  33. data/ext/nutcracker/contrib/yaml-0.1.4/config/config.sub +1686 -0
  34. data/ext/nutcracker/contrib/yaml-0.1.4/config/depcomp +630 -0
  35. data/ext/nutcracker/contrib/yaml-0.1.4/config/install-sh +520 -0
  36. data/ext/nutcracker/contrib/yaml-0.1.4/config/ltmain.sh +8406 -0
  37. data/ext/nutcracker/contrib/yaml-0.1.4/config/missing +376 -0
  38. data/ext/nutcracker/contrib/yaml-0.1.4/configure +13085 -0
  39. data/ext/nutcracker/contrib/yaml-0.1.4/configure.ac +75 -0
  40. data/ext/nutcracker/contrib/yaml-0.1.4/doc/doxygen.cfg +222 -0
  41. data/ext/nutcracker/contrib/yaml-0.1.4/include/yaml.h +1971 -0
  42. data/ext/nutcracker/contrib/yaml-0.1.4/m4/libtool.m4 +7357 -0
  43. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltoptions.m4 +368 -0
  44. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltsugar.m4 +123 -0
  45. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltversion.m4 +23 -0
  46. data/ext/nutcracker/contrib/yaml-0.1.4/m4/lt~obsolete.m4 +92 -0
  47. data/ext/nutcracker/contrib/yaml-0.1.4/src/Makefile.am +4 -0
  48. data/ext/nutcracker/contrib/yaml-0.1.4/src/Makefile.in +484 -0
  49. data/ext/nutcracker/contrib/yaml-0.1.4/src/api.c +1392 -0
  50. data/ext/nutcracker/contrib/yaml-0.1.4/src/dumper.c +394 -0
  51. data/ext/nutcracker/contrib/yaml-0.1.4/src/emitter.c +2329 -0
  52. data/ext/nutcracker/contrib/yaml-0.1.4/src/loader.c +432 -0
  53. data/ext/nutcracker/contrib/yaml-0.1.4/src/parser.c +1374 -0
  54. data/ext/nutcracker/contrib/yaml-0.1.4/src/reader.c +465 -0
  55. data/ext/nutcracker/contrib/yaml-0.1.4/src/scanner.c +3570 -0
  56. data/ext/nutcracker/contrib/yaml-0.1.4/src/writer.c +141 -0
  57. data/ext/nutcracker/contrib/yaml-0.1.4/src/yaml_private.h +640 -0
  58. data/ext/nutcracker/contrib/yaml-0.1.4/tests/Makefile.am +8 -0
  59. data/ext/nutcracker/contrib/yaml-0.1.4/tests/Makefile.in +675 -0
  60. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-deconstructor-alt.c +800 -0
  61. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-deconstructor.c +1130 -0
  62. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-reformatter-alt.c +217 -0
  63. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-reformatter.c +202 -0
  64. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-dumper.c +311 -0
  65. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-emitter.c +327 -0
  66. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-loader.c +63 -0
  67. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-parser.c +63 -0
  68. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-scanner.c +63 -0
  69. data/ext/nutcracker/contrib/yaml-0.1.4/tests/test-reader.c +354 -0
  70. data/ext/nutcracker/contrib/yaml-0.1.4/tests/test-version.c +29 -0
  71. data/ext/nutcracker/extconf.rb +5 -0
  72. data/ext/nutcracker/m4/libtool.m4 +7376 -0
  73. data/ext/nutcracker/m4/ltoptions.m4 +368 -0
  74. data/ext/nutcracker/m4/ltsugar.m4 +123 -0
  75. data/ext/nutcracker/m4/ltversion.m4 +23 -0
  76. data/ext/nutcracker/m4/lt~obsolete.m4 +92 -0
  77. data/ext/nutcracker/notes/c-styleguide.txt +425 -0
  78. data/ext/nutcracker/notes/debug.txt +96 -0
  79. data/ext/nutcracker/notes/memcache.txt +123 -0
  80. data/ext/nutcracker/notes/recommendation.md +118 -0
  81. data/ext/nutcracker/notes/redis.md +415 -0
  82. data/ext/nutcracker/notes/socket.txt +131 -0
  83. data/ext/nutcracker/scripts/multi_get.sh +26 -0
  84. data/ext/nutcracker/scripts/nutcracker.init +73 -0
  85. data/ext/nutcracker/scripts/nutcracker.spec +52 -0
  86. data/ext/nutcracker/scripts/pipelined_read.sh +23 -0
  87. data/ext/nutcracker/scripts/pipelined_write.sh +29 -0
  88. data/ext/nutcracker/scripts/populate_memcached.sh +24 -0
  89. data/ext/nutcracker/scripts/redis-check.py +23 -0
  90. data/ext/nutcracker/scripts/redis-check.sh +564 -0
  91. data/ext/nutcracker/src/Makefile.am +46 -0
  92. data/ext/nutcracker/src/Makefile.in +726 -0
  93. data/ext/nutcracker/src/hashkit/Makefile.am +22 -0
  94. data/ext/nutcracker/src/hashkit/Makefile.in +501 -0
  95. data/ext/nutcracker/src/hashkit/nc_crc32.c +105 -0
  96. data/ext/nutcracker/src/hashkit/nc_fnv.c +82 -0
  97. data/ext/nutcracker/src/hashkit/nc_hashkit.h +74 -0
  98. data/ext/nutcracker/src/hashkit/nc_hsieh.c +93 -0
  99. data/ext/nutcracker/src/hashkit/nc_jenkins.c +230 -0
  100. data/ext/nutcracker/src/hashkit/nc_ketama.c +240 -0
  101. data/ext/nutcracker/src/hashkit/nc_md5.c +379 -0
  102. data/ext/nutcracker/src/hashkit/nc_modula.c +144 -0
  103. data/ext/nutcracker/src/hashkit/nc_murmur.c +99 -0
  104. data/ext/nutcracker/src/hashkit/nc_one_at_a_time.c +51 -0
  105. data/ext/nutcracker/src/hashkit/nc_random.c +146 -0
  106. data/ext/nutcracker/src/nc.c +573 -0
  107. data/ext/nutcracker/src/nc_array.c +204 -0
  108. data/ext/nutcracker/src/nc_array.h +73 -0
  109. data/ext/nutcracker/src/nc_client.c +189 -0
  110. data/ext/nutcracker/src/nc_client.h +28 -0
  111. data/ext/nutcracker/src/nc_conf.c +1766 -0
  112. data/ext/nutcracker/src/nc_conf.h +134 -0
  113. data/ext/nutcracker/src/nc_connection.c +392 -0
  114. data/ext/nutcracker/src/nc_connection.h +99 -0
  115. data/ext/nutcracker/src/nc_core.c +334 -0
  116. data/ext/nutcracker/src/nc_core.h +131 -0
  117. data/ext/nutcracker/src/nc_event.c +214 -0
  118. data/ext/nutcracker/src/nc_event.h +39 -0
  119. data/ext/nutcracker/src/nc_log.c +254 -0
  120. data/ext/nutcracker/src/nc_log.h +120 -0
  121. data/ext/nutcracker/src/nc_mbuf.c +285 -0
  122. data/ext/nutcracker/src/nc_mbuf.h +67 -0
  123. data/ext/nutcracker/src/nc_message.c +828 -0
  124. data/ext/nutcracker/src/nc_message.h +253 -0
  125. data/ext/nutcracker/src/nc_proxy.c +359 -0
  126. data/ext/nutcracker/src/nc_proxy.h +34 -0
  127. data/ext/nutcracker/src/nc_queue.h +788 -0
  128. data/ext/nutcracker/src/nc_rbtree.c +348 -0
  129. data/ext/nutcracker/src/nc_rbtree.h +47 -0
  130. data/ext/nutcracker/src/nc_request.c +588 -0
  131. data/ext/nutcracker/src/nc_response.c +332 -0
  132. data/ext/nutcracker/src/nc_server.c +841 -0
  133. data/ext/nutcracker/src/nc_server.h +143 -0
  134. data/ext/nutcracker/src/nc_signal.c +131 -0
  135. data/ext/nutcracker/src/nc_signal.h +34 -0
  136. data/ext/nutcracker/src/nc_stats.c +1188 -0
  137. data/ext/nutcracker/src/nc_stats.h +206 -0
  138. data/ext/nutcracker/src/nc_string.c +109 -0
  139. data/ext/nutcracker/src/nc_string.h +112 -0
  140. data/ext/nutcracker/src/nc_util.c +619 -0
  141. data/ext/nutcracker/src/nc_util.h +214 -0
  142. data/ext/nutcracker/src/proto/Makefile.am +14 -0
  143. data/ext/nutcracker/src/proto/Makefile.in +482 -0
  144. data/ext/nutcracker/src/proto/nc_memcache.c +1306 -0
  145. data/ext/nutcracker/src/proto/nc_proto.h +155 -0
  146. data/ext/nutcracker/src/proto/nc_redis.c +2102 -0
  147. data/lib/nutcracker.rb +7 -0
  148. data/lib/nutcracker/version.rb +3 -0
  149. metadata +194 -0
@@ -0,0 +1,828 @@
1
+ /*
2
+ * twemproxy - A fast and lightweight proxy for memcached protocol.
3
+ * Copyright (C) 2011 Twitter, Inc.
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #include <stdio.h>
19
+ #include <stdlib.h>
20
+
21
+ #include <sys/uio.h>
22
+
23
+ #include <nc_core.h>
24
+ #include <nc_server.h>
25
+ #include <proto/nc_proto.h>
26
+
27
+ #if (IOV_MAX > 128)
28
+ #define NC_IOV_MAX 128
29
+ #else
30
+ #define NC_IOV_MAX IOV_MAX
31
+ #endif
32
+
33
+ /*
34
+ * nc_message.[ch]
35
+ * message (struct msg)
36
+ * + + .
37
+ * | | .
38
+ * / \ .
39
+ * Request Response .../ nc_mbuf.[ch] (mesage buffers)
40
+ * nc_request.c nc_response.c .../ nc_memcache.c; nc_redis.c (message parser)
41
+ *
42
+ * Messages in nutcracker are manipulated by a chain of processing handlers,
43
+ * where each handler is responsible for taking the input and producing an
44
+ * output for the next handler in the chain. This mechanism of processing
45
+ * loosely conforms to the standard chain-of-responsibility design pattern
46
+ *
47
+ * At the high level, each handler takes in a message: request or response
48
+ * and produces the message for the next handler in the chain. The input
49
+ * for a handler is either a request or response, but never both and
50
+ * similarly the output of an handler is either a request or response or
51
+ * nothing.
52
+ *
53
+ * Each handler itself is composed of two processing units:
54
+ *
55
+ * 1). filter: manipulates output produced by the handler, usually based
56
+ * on a policy. If needed, multiple filters can be hooked into each
57
+ * location.
58
+ * 2). forwarder: chooses one of the backend servers to send the request
59
+ * to, usually based on the configured distribution and key hasher.
60
+ *
61
+ * Handlers are registered either with Client or Server or Proxy
62
+ * connections. A Proxy connection only has a read handler as it is only
63
+ * responsible for accepting new connections from client. Read handler
64
+ * (conn_recv_t) registered with client is responsible for reading requests,
65
+ * while that registered with server is responsible for reading responses.
66
+ * Write handler (conn_send_t) registered with client is responsible for
67
+ * writing response, while that registered with server is responsible for
68
+ * writing requests.
69
+ *
70
+ * Note that in the above discussion, the terminology send is used
71
+ * synonymously with write or OUT event. Similarly recv is used synonymously
72
+ * with read or IN event
73
+ *
74
+ * Client+ Proxy Server+
75
+ * (nutcracker)
76
+ * .
77
+ * msg_recv {read event} . msg_recv {read event}
78
+ * + . +
79
+ * | . |
80
+ * \ . /
81
+ * req_recv_next . rsp_recv_next
82
+ * + . +
83
+ * | . | Rsp
84
+ * req_recv_done . rsp_recv_done <===
85
+ * + . +
86
+ * | . |
87
+ * Req \ . /
88
+ * ===> req_filter* . *rsp_filter
89
+ * + . +
90
+ * | . |
91
+ * \ . /
92
+ * req_forward-// (a) . (c) \\-rsp_forward
93
+ * .
94
+ * .
95
+ * msg_send {write event} . msg_send {write event}
96
+ * + . +
97
+ * | . |
98
+ * Rsp' \ . / Req'
99
+ * <=== rsp_send_next . req_send_next ===>
100
+ * + . +
101
+ * | . |
102
+ * \ . /
103
+ * rsp_send_done-// (d) . (b) //-req_send_done
104
+ *
105
+ *
106
+ * (a) -> (b) -> (c) -> (d) is the normal flow of transaction consisting
107
+ * of a single request response, where (a) and (b) handle request from
108
+ * client, while (c) and (d) handle the corresponding response from the
109
+ * server.
110
+ */
111
+
112
+ static uint64_t msg_id; /* message id counter */
113
+ static uint64_t frag_id; /* fragment id counter */
114
+ static uint32_t nfree_msgq; /* # free msg q */
115
+ static struct msg_tqh free_msgq; /* free msg q */
116
+ static struct rbtree tmo_rbt; /* timeout rbtree */
117
+ static struct rbnode tmo_rbs; /* timeout rbtree sentinel */
118
+
119
+ static struct msg *
120
+ msg_from_rbe(struct rbnode *node)
121
+ {
122
+ struct msg *msg;
123
+ int offset;
124
+
125
+ offset = offsetof(struct msg, tmo_rbe);
126
+ msg = (struct msg *)((char *)node - offset);
127
+
128
+ return msg;
129
+ }
130
+
131
+ struct msg *
132
+ msg_tmo_min(void)
133
+ {
134
+ struct rbnode *node;
135
+
136
+ node = rbtree_min(&tmo_rbt);
137
+ if (node == NULL) {
138
+ return NULL;
139
+ }
140
+
141
+ return msg_from_rbe(node);
142
+ }
143
+
144
+ void
145
+ msg_tmo_insert(struct msg *msg, struct conn *conn)
146
+ {
147
+ struct rbnode *node;
148
+ int timeout;
149
+
150
+ ASSERT(msg->request);
151
+ ASSERT(!msg->quit && !msg->noreply);
152
+
153
+ timeout = server_timeout(conn);
154
+ if (timeout <= 0) {
155
+ return;
156
+ }
157
+
158
+ node = &msg->tmo_rbe;
159
+ node->key = nc_msec_now() + timeout;
160
+ node->data = conn;
161
+
162
+ rbtree_insert(&tmo_rbt, node);
163
+
164
+ log_debug(LOG_VERB, "insert msg %"PRIu64" into tmo rbt with expiry of "
165
+ "%d msec", msg->id, timeout);
166
+ }
167
+
168
+ void
169
+ msg_tmo_delete(struct msg *msg)
170
+ {
171
+ struct rbnode *node;
172
+
173
+ node = &msg->tmo_rbe;
174
+
175
+ /* already deleted */
176
+
177
+ if (node->data == NULL) {
178
+ return;
179
+ }
180
+
181
+ rbtree_delete(&tmo_rbt, node);
182
+
183
+ log_debug(LOG_VERB, "delete msg %"PRIu64" from tmo rbt", msg->id);
184
+ }
185
+
186
+ static struct msg *
187
+ _msg_get(void)
188
+ {
189
+ struct msg *msg;
190
+
191
+ if (!TAILQ_EMPTY(&free_msgq)) {
192
+ ASSERT(nfree_msgq > 0);
193
+
194
+ msg = TAILQ_FIRST(&free_msgq);
195
+ nfree_msgq--;
196
+ TAILQ_REMOVE(&free_msgq, msg, m_tqe);
197
+ goto done;
198
+ }
199
+
200
+ msg = nc_alloc(sizeof(*msg));
201
+ if (msg == NULL) {
202
+ return NULL;
203
+ }
204
+
205
+ done:
206
+ /* c_tqe, s_tqe, and m_tqe are left uninitialized */
207
+ msg->id = ++msg_id;
208
+ msg->peer = NULL;
209
+ msg->owner = NULL;
210
+
211
+ rbtree_node_init(&msg->tmo_rbe);
212
+
213
+ STAILQ_INIT(&msg->mhdr);
214
+ msg->mlen = 0;
215
+
216
+ msg->state = 0;
217
+ msg->pos = NULL;
218
+ msg->token = NULL;
219
+
220
+ msg->parser = NULL;
221
+ msg->result = MSG_PARSE_OK;
222
+
223
+ msg->pre_splitcopy = NULL;
224
+ msg->post_splitcopy = NULL;
225
+ msg->pre_coalesce = NULL;
226
+ msg->post_coalesce = NULL;
227
+
228
+ msg->type = MSG_UNKNOWN;
229
+
230
+ msg->key_start = NULL;
231
+ msg->key_end = NULL;
232
+
233
+ msg->vlen = 0;
234
+ msg->end = NULL;
235
+
236
+ msg->frag_owner = NULL;
237
+ msg->nfrag = 0;
238
+ msg->frag_id = 0;
239
+
240
+ msg->narg_start = NULL;
241
+ msg->narg_end = NULL;
242
+ msg->narg = 0;
243
+ msg->rnarg = 0;
244
+ msg->rlen = 0;
245
+ msg->integer = 0;
246
+
247
+ msg->err = 0;
248
+ msg->error = 0;
249
+ msg->ferror = 0;
250
+ msg->request = 0;
251
+ msg->quit = 0;
252
+ msg->noreply = 0;
253
+ msg->done = 0;
254
+ msg->fdone = 0;
255
+ msg->first_fragment = 0;
256
+ msg->last_fragment = 0;
257
+ msg->swallow = 0;
258
+ msg->redis = 0;
259
+
260
+ return msg;
261
+ }
262
+
263
+ struct msg *
264
+ msg_get(struct conn *conn, bool request, bool redis)
265
+ {
266
+ struct msg *msg;
267
+
268
+ msg = _msg_get();
269
+ if (msg == NULL) {
270
+ return NULL;
271
+ }
272
+
273
+ msg->owner = conn;
274
+ msg->request = request ? 1 : 0;
275
+ msg->redis = redis ? 1 : 0;
276
+
277
+ if (redis) {
278
+ if (request) {
279
+ msg->parser = redis_parse_req;
280
+ } else {
281
+ msg->parser = redis_parse_rsp;
282
+ }
283
+ msg->pre_splitcopy = redis_pre_splitcopy;
284
+ msg->post_splitcopy = redis_post_splitcopy;
285
+ msg->pre_coalesce = redis_pre_coalesce;
286
+ msg->post_coalesce = redis_post_coalesce;
287
+ } else {
288
+ if (request) {
289
+ msg->parser = memcache_parse_req;
290
+ } else {
291
+ msg->parser = memcache_parse_rsp;
292
+ }
293
+ msg->pre_splitcopy = memcache_pre_splitcopy;
294
+ msg->post_splitcopy = memcache_post_splitcopy;
295
+ msg->pre_coalesce = memcache_pre_coalesce;
296
+ msg->post_coalesce = memcache_post_coalesce;
297
+ }
298
+
299
+ log_debug(LOG_VVERB, "get msg %p id %"PRIu64" request %d owner sd %d",
300
+ msg, msg->id, msg->request, conn->sd);
301
+
302
+ return msg;
303
+ }
304
+
305
+ struct msg *
306
+ msg_get_error(bool redis, err_t err)
307
+ {
308
+ struct msg *msg;
309
+ struct mbuf *mbuf;
310
+ int n;
311
+ char *errstr = err ? strerror(err) : "unknown";
312
+ char *protstr = redis ? "-ERR" : "SERVER_ERROR";
313
+
314
+ msg = _msg_get();
315
+ if (msg == NULL) {
316
+ return NULL;
317
+ }
318
+
319
+ msg->state = 0;
320
+ msg->type = MSG_RSP_MC_SERVER_ERROR;
321
+
322
+ mbuf = mbuf_get();
323
+ if (mbuf == NULL) {
324
+ msg_put(msg);
325
+ return NULL;
326
+ }
327
+ mbuf_insert(&msg->mhdr, mbuf);
328
+
329
+ n = nc_scnprintf(mbuf->last, mbuf_size(mbuf), "%s %s"CRLF, protstr, errstr);
330
+ mbuf->last += n;
331
+ msg->mlen = (uint32_t)n;
332
+
333
+ log_debug(LOG_VVERB, "get msg %p id %"PRIu64" len %"PRIu32" error '%s'",
334
+ msg, msg->id, msg->mlen, errstr);
335
+
336
+ return msg;
337
+ }
338
+
339
+ static void
340
+ msg_free(struct msg *msg)
341
+ {
342
+ ASSERT(STAILQ_EMPTY(&msg->mhdr));
343
+
344
+ log_debug(LOG_VVERB, "free msg %p id %"PRIu64"", msg, msg->id);
345
+ nc_free(msg);
346
+ }
347
+
348
+ void
349
+ msg_put(struct msg *msg)
350
+ {
351
+ log_debug(LOG_VVERB, "put msg %p id %"PRIu64"", msg, msg->id);
352
+
353
+ while (!STAILQ_EMPTY(&msg->mhdr)) {
354
+ struct mbuf *mbuf = STAILQ_FIRST(&msg->mhdr);
355
+ mbuf_remove(&msg->mhdr, mbuf);
356
+ mbuf_put(mbuf);
357
+ }
358
+
359
+ nfree_msgq++;
360
+ TAILQ_INSERT_HEAD(&free_msgq, msg, m_tqe);
361
+ }
362
+
363
+ void
364
+ msg_dump(struct msg *msg)
365
+ {
366
+ struct mbuf *mbuf;
367
+
368
+ loga("msg dump id %"PRIu64" request %d len %"PRIu32" type %d done %d "
369
+ "error %d (err %d)", msg->id, msg->request, msg->mlen, msg->type,
370
+ msg->done, msg->error, msg->err);
371
+
372
+ STAILQ_FOREACH(mbuf, &msg->mhdr, next) {
373
+ uint8_t *p, *q;
374
+ long int len;
375
+
376
+ p = mbuf->start;
377
+ q = mbuf->last;
378
+ len = q - p;
379
+
380
+ loga_hexdump(p, len, "mbuf with %ld bytes of data", len);
381
+ }
382
+ }
383
+
384
+ void
385
+ msg_init(void)
386
+ {
387
+ log_debug(LOG_DEBUG, "msg size %d", sizeof(struct msg));
388
+ msg_id = 0;
389
+ frag_id = 0;
390
+ nfree_msgq = 0;
391
+ TAILQ_INIT(&free_msgq);
392
+ rbtree_init(&tmo_rbt, &tmo_rbs);
393
+ }
394
+
395
+ void
396
+ msg_deinit(void)
397
+ {
398
+ struct msg *msg, *nmsg;
399
+
400
+ for (msg = TAILQ_FIRST(&free_msgq); msg != NULL;
401
+ msg = nmsg, nfree_msgq--) {
402
+ ASSERT(nfree_msgq > 0);
403
+ nmsg = TAILQ_NEXT(msg, m_tqe);
404
+ msg_free(msg);
405
+ }
406
+ ASSERT(nfree_msgq == 0);
407
+ }
408
+
409
+ bool
410
+ msg_empty(struct msg *msg)
411
+ {
412
+ return msg->mlen == 0 ? true : false;
413
+ }
414
+
415
+ static rstatus_t
416
+ msg_parsed(struct context *ctx, struct conn *conn, struct msg *msg)
417
+ {
418
+ struct msg *nmsg;
419
+ struct mbuf *mbuf, *nbuf;
420
+
421
+ mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next);
422
+ if (msg->pos == mbuf->last) {
423
+ /* no more data to parse */
424
+ conn->recv_done(ctx, conn, msg, NULL);
425
+ return NC_OK;
426
+ }
427
+
428
+ /*
429
+ * Input mbuf has un-parsed data. Split mbuf of the current message msg
430
+ * into (mbuf, nbuf), where mbuf is the portion of the message that has
431
+ * been parsed and nbuf is the portion of the message that is un-parsed.
432
+ * Parse nbuf as a new message nmsg in the next iteration.
433
+ */
434
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, NULL, NULL);
435
+ if (nbuf == NULL) {
436
+ return NC_ENOMEM;
437
+ }
438
+
439
+ nmsg = msg_get(msg->owner, msg->request, conn->redis);
440
+ if (nmsg == NULL) {
441
+ mbuf_put(nbuf);
442
+ return NC_ENOMEM;
443
+ }
444
+ mbuf_insert(&nmsg->mhdr, nbuf);
445
+ nmsg->pos = nbuf->pos;
446
+
447
+ /* update length of current (msg) and new message (nmsg) */
448
+ nmsg->mlen = mbuf_length(nbuf);
449
+ msg->mlen -= nmsg->mlen;
450
+
451
+ conn->recv_done(ctx, conn, msg, nmsg);
452
+
453
+ return NC_OK;
454
+ }
455
+
456
+ static rstatus_t
457
+ msg_fragment(struct context *ctx, struct conn *conn, struct msg *msg)
458
+ {
459
+ rstatus_t status; /* return status */
460
+ struct msg *nmsg; /* new message */
461
+ struct mbuf *nbuf; /* new mbuf */
462
+
463
+ ASSERT(conn->client && !conn->proxy);
464
+ ASSERT(msg->request);
465
+
466
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, msg->pre_splitcopy, msg);
467
+ if (nbuf == NULL) {
468
+ return NC_ENOMEM;
469
+ }
470
+
471
+ status = msg->post_splitcopy(msg);
472
+ if (status != NC_OK) {
473
+ mbuf_put(nbuf);
474
+ return status;
475
+ }
476
+
477
+ nmsg = msg_get(msg->owner, msg->request, msg->redis);
478
+ if (nmsg == NULL) {
479
+ mbuf_put(nbuf);
480
+ return NC_ENOMEM;
481
+ }
482
+ mbuf_insert(&nmsg->mhdr, nbuf);
483
+ nmsg->pos = nbuf->pos;
484
+
485
+ /* update length of current (msg) and new message (nmsg) */
486
+ nmsg->mlen = mbuf_length(nbuf);
487
+ msg->mlen -= nmsg->mlen;
488
+
489
+ /*
490
+ * Attach unique fragment id to all fragments of the message vector. All
491
+ * fragments of the message, including the first fragment point to the
492
+ * first fragment through the frag_owner pointer. The first_fragment and
493
+ * last_fragment identify first and last fragment respectively.
494
+ *
495
+ * For example, a message vector given below is split into 3 fragments:
496
+ * 'get key1 key2 key3\r\n'
497
+ * Or,
498
+ * '*4\r\n$4\r\nmget\r\n$4\r\nkey1\r\n$4\r\nkey2\r\n$4\r\nkey3\r\n'
499
+ *
500
+ * +--------------+
501
+ * | msg vector |
502
+ * |(original msg)|
503
+ * +--------------+
504
+ *
505
+ * frag_owner frag_owner
506
+ * /-----------+ /------------+
507
+ * | | | |
508
+ * | v v |
509
+ * +--------------------+ +---------------------+
510
+ * | frag_id = 10 | | frag_id = 10 |
511
+ * | first_fragment = 1 | | first_fragment = 0 |
512
+ * | last_fragment = 0 | | last_fragment = 0 |
513
+ * | nfrag = 3 | | nfrag = 0 |
514
+ * +--------------------+ +---------------------+
515
+ * ^
516
+ * | frag_owner
517
+ * \-------------+
518
+ * |
519
+ * |
520
+ * +---------------------+
521
+ * | frag_id = 10 |
522
+ * | first_fragment = 0 |
523
+ * | last_fragment = 1 |
524
+ * | nfrag = 0 |
525
+ * +---------------------+
526
+ *
527
+ *
528
+ */
529
+ if (msg->frag_id == 0) {
530
+ msg->frag_id = ++frag_id;
531
+ msg->first_fragment = 1;
532
+ msg->nfrag = 1;
533
+ msg->frag_owner = msg;
534
+ }
535
+ nmsg->frag_id = msg->frag_id;
536
+ msg->last_fragment = 0;
537
+ nmsg->last_fragment = 1;
538
+ nmsg->frag_owner = msg->frag_owner;
539
+ msg->frag_owner->nfrag++;
540
+
541
+ stats_pool_incr(ctx, conn->owner, fragments);
542
+
543
+ log_debug(LOG_VERB, "fragment msg into %"PRIu64" and %"PRIu64" frag id "
544
+ "%"PRIu64"", msg->id, nmsg->id, msg->frag_id);
545
+
546
+ conn->recv_done(ctx, conn, msg, nmsg);
547
+
548
+ return NC_OK;
549
+ }
550
+
551
+ static rstatus_t
552
+ msg_repair(struct context *ctx, struct conn *conn, struct msg *msg)
553
+ {
554
+ struct mbuf *nbuf;
555
+
556
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, NULL, NULL);
557
+ if (nbuf == NULL) {
558
+ return NC_ENOMEM;
559
+ }
560
+ mbuf_insert(&msg->mhdr, nbuf);
561
+ msg->pos = nbuf->pos;
562
+
563
+ return NC_OK;
564
+ }
565
+
566
+ static rstatus_t
567
+ msg_parse(struct context *ctx, struct conn *conn, struct msg *msg)
568
+ {
569
+ rstatus_t status;
570
+
571
+ if (msg_empty(msg)) {
572
+ /* no data to parse */
573
+ conn->recv_done(ctx, conn, msg, NULL);
574
+ return NC_OK;
575
+ }
576
+
577
+ msg->parser(msg);
578
+
579
+ switch (msg->result) {
580
+ case MSG_PARSE_OK:
581
+ status = msg_parsed(ctx, conn, msg);
582
+ break;
583
+
584
+ case MSG_PARSE_FRAGMENT:
585
+ status = msg_fragment(ctx, conn, msg);
586
+ break;
587
+
588
+ case MSG_PARSE_REPAIR:
589
+ status = msg_repair(ctx, conn, msg);
590
+ break;
591
+
592
+ case MSG_PARSE_AGAIN:
593
+ status = NC_OK;
594
+ break;
595
+
596
+ default:
597
+ status = NC_ERROR;
598
+ conn->err = errno;
599
+ break;
600
+ }
601
+
602
+ return conn->err != 0 ? NC_ERROR : status;
603
+ }
604
+
605
+ static rstatus_t
606
+ msg_recv_chain(struct context *ctx, struct conn *conn, struct msg *msg)
607
+ {
608
+ rstatus_t status;
609
+ struct msg *nmsg;
610
+ struct mbuf *mbuf;
611
+ size_t msize;
612
+ ssize_t n;
613
+
614
+ mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next);
615
+ if (mbuf == NULL || mbuf_full(mbuf)) {
616
+ mbuf = mbuf_get();
617
+ if (mbuf == NULL) {
618
+ return NC_ENOMEM;
619
+ }
620
+ mbuf_insert(&msg->mhdr, mbuf);
621
+ msg->pos = mbuf->pos;
622
+ }
623
+ ASSERT(mbuf->end - mbuf->last > 0);
624
+
625
+ msize = mbuf_size(mbuf);
626
+
627
+ n = conn_recv(conn, mbuf->last, msize);
628
+ if (n < 0) {
629
+ if (n == NC_EAGAIN) {
630
+ return NC_OK;
631
+ }
632
+ return NC_ERROR;
633
+ }
634
+
635
+ ASSERT((mbuf->last + n) <= mbuf->end);
636
+ mbuf->last += n;
637
+ msg->mlen += (uint32_t)n;
638
+
639
+ for (;;) {
640
+ status = msg_parse(ctx, conn, msg);
641
+ if (status != NC_OK) {
642
+ return status;
643
+ }
644
+
645
+ /* get next message to parse */
646
+ nmsg = conn->recv_next(ctx, conn, false);
647
+ if (nmsg == NULL || nmsg == msg) {
648
+ /* no more data to parse */
649
+ break;
650
+ }
651
+
652
+ msg = nmsg;
653
+ }
654
+
655
+ return NC_OK;
656
+ }
657
+
658
+ rstatus_t
659
+ msg_recv(struct context *ctx, struct conn *conn)
660
+ {
661
+ rstatus_t status;
662
+ struct msg *msg;
663
+
664
+ ASSERT(conn->recv_active);
665
+
666
+ conn->recv_ready = 1;
667
+ do {
668
+ msg = conn->recv_next(ctx, conn, true);
669
+ if (msg == NULL) {
670
+ return NC_OK;
671
+ }
672
+
673
+ status = msg_recv_chain(ctx, conn, msg);
674
+ if (status != NC_OK) {
675
+ return status;
676
+ }
677
+ } while (conn->recv_ready);
678
+
679
+ return NC_OK;
680
+ }
681
+
682
+ static rstatus_t
683
+ msg_send_chain(struct context *ctx, struct conn *conn, struct msg *msg)
684
+ {
685
+ struct msg_tqh send_msgq; /* send msg q */
686
+ struct msg *nmsg; /* next msg */
687
+ struct mbuf *mbuf, *nbuf; /* current and next mbuf */
688
+ size_t mlen; /* current mbuf data length */
689
+ struct iovec *ciov, iov[NC_IOV_MAX]; /* current iovec */
690
+ struct array sendv; /* send iovec */
691
+ size_t nsend, nsent; /* bytes to send; bytes sent */
692
+ size_t limit; /* bytes to send limit */
693
+ ssize_t n; /* bytes sent by sendv */
694
+
695
+ TAILQ_INIT(&send_msgq);
696
+
697
+ array_set(&sendv, iov, sizeof(iov[0]), NC_IOV_MAX);
698
+
699
+ /* preprocess - build iovec */
700
+
701
+ nsend = 0;
702
+ /*
703
+ * readv() and writev() returns EINVAL if the sum of the iov_len values
704
+ * overflows an ssize_t value Or, the vector count iovcnt is less than
705
+ * zero or greater than the permitted maximum.
706
+ */
707
+ limit = SSIZE_MAX;
708
+
709
+ for (;;) {
710
+ ASSERT(conn->smsg == msg);
711
+
712
+ TAILQ_INSERT_TAIL(&send_msgq, msg, m_tqe);
713
+
714
+ for (mbuf = STAILQ_FIRST(&msg->mhdr);
715
+ mbuf != NULL && array_n(&sendv) < NC_IOV_MAX && nsend < limit;
716
+ mbuf = nbuf) {
717
+ nbuf = STAILQ_NEXT(mbuf, next);
718
+
719
+ if (mbuf_empty(mbuf)) {
720
+ continue;
721
+ }
722
+
723
+ mlen = mbuf_length(mbuf);
724
+ if ((nsend + mlen) > limit) {
725
+ mlen = limit - nsend;
726
+ }
727
+
728
+ ciov = array_push(&sendv);
729
+ ciov->iov_base = mbuf->pos;
730
+ ciov->iov_len = mlen;
731
+
732
+ nsend += mlen;
733
+ }
734
+
735
+ if (array_n(&sendv) >= NC_IOV_MAX || nsend >= limit) {
736
+ break;
737
+ }
738
+
739
+ msg = conn->send_next(ctx, conn);
740
+ if (msg == NULL) {
741
+ break;
742
+ }
743
+ }
744
+
745
+ ASSERT(!TAILQ_EMPTY(&send_msgq) && nsend != 0);
746
+
747
+ conn->smsg = NULL;
748
+
749
+ n = conn_sendv(conn, &sendv, nsend);
750
+
751
+ nsent = n > 0 ? (size_t)n : 0;
752
+
753
+ /* postprocess - process sent messages in send_msgq */
754
+
755
+ for (msg = TAILQ_FIRST(&send_msgq); msg != NULL; msg = nmsg) {
756
+ nmsg = TAILQ_NEXT(msg, m_tqe);
757
+
758
+ TAILQ_REMOVE(&send_msgq, msg, m_tqe);
759
+
760
+ if (nsent == 0) {
761
+ if (msg->mlen == 0) {
762
+ conn->send_done(ctx, conn, msg);
763
+ }
764
+ continue;
765
+ }
766
+
767
+ /* adjust mbufs of the sent message */
768
+ for (mbuf = STAILQ_FIRST(&msg->mhdr); mbuf != NULL; mbuf = nbuf) {
769
+ nbuf = STAILQ_NEXT(mbuf, next);
770
+
771
+ if (mbuf_empty(mbuf)) {
772
+ continue;
773
+ }
774
+
775
+ mlen = mbuf_length(mbuf);
776
+ if (nsent < mlen) {
777
+ /* mbuf was sent partially; process remaining bytes later */
778
+ mbuf->pos += nsent;
779
+ ASSERT(mbuf->pos < mbuf->last);
780
+ nsent = 0;
781
+ break;
782
+ }
783
+
784
+ /* mbuf was sent completely; mark it empty */
785
+ mbuf->pos = mbuf->last;
786
+ nsent -= mlen;
787
+ }
788
+
789
+ /* message has been sent completely, finalize it */
790
+ if (mbuf == NULL) {
791
+ conn->send_done(ctx, conn, msg);
792
+ }
793
+ }
794
+
795
+ ASSERT(TAILQ_EMPTY(&send_msgq));
796
+
797
+ if (n > 0) {
798
+ return NC_OK;
799
+ }
800
+
801
+ return (n == NC_EAGAIN) ? NC_OK : NC_ERROR;
802
+ }
803
+
804
+ rstatus_t
805
+ msg_send(struct context *ctx, struct conn *conn)
806
+ {
807
+ rstatus_t status;
808
+ struct msg *msg;
809
+
810
+ ASSERT(conn->send_active);
811
+
812
+ conn->send_ready = 1;
813
+ do {
814
+ msg = conn->send_next(ctx, conn);
815
+ if (msg == NULL) {
816
+ /* nothing to send */
817
+ return NC_OK;
818
+ }
819
+
820
+ status = msg_send_chain(ctx, conn, msg);
821
+ if (status != NC_OK) {
822
+ return status;
823
+ }
824
+
825
+ } while (conn->send_ready);
826
+
827
+ return NC_OK;
828
+ }