nutcracker 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. data/README.md +22 -0
  2. data/Rakefile +55 -0
  3. data/bin/nutcracker +2 -0
  4. data/ext/nutcracker/ChangeLog +66 -0
  5. data/ext/nutcracker/LICENSE +177 -0
  6. data/ext/nutcracker/Makefile.am +7 -0
  7. data/ext/nutcracker/Makefile.in +726 -0
  8. data/ext/nutcracker/NOTICE +124 -0
  9. data/ext/nutcracker/README.md +240 -0
  10. data/ext/nutcracker/aclocal.m4 +956 -0
  11. data/ext/nutcracker/conf/nutcracker.leaf.yml +10 -0
  12. data/ext/nutcracker/conf/nutcracker.root.yml +8 -0
  13. data/ext/nutcracker/conf/nutcracker.yml +67 -0
  14. data/ext/nutcracker/config.h.in +316 -0
  15. data/ext/nutcracker/config/config.guess +1561 -0
  16. data/ext/nutcracker/config/config.sub +1686 -0
  17. data/ext/nutcracker/config/depcomp +630 -0
  18. data/ext/nutcracker/config/install-sh +520 -0
  19. data/ext/nutcracker/config/ltmain.sh +8413 -0
  20. data/ext/nutcracker/config/missing +376 -0
  21. data/ext/nutcracker/configure +18862 -0
  22. data/ext/nutcracker/configure.ac +155 -0
  23. data/ext/nutcracker/contrib/Makefile.am +3 -0
  24. data/ext/nutcracker/contrib/Makefile.in +560 -0
  25. data/ext/nutcracker/contrib/yaml-0.1.4.tar.gz +0 -0
  26. data/ext/nutcracker/contrib/yaml-0.1.4/LICENSE +19 -0
  27. data/ext/nutcracker/contrib/yaml-0.1.4/Makefile.am +20 -0
  28. data/ext/nutcracker/contrib/yaml-0.1.4/Makefile.in +736 -0
  29. data/ext/nutcracker/contrib/yaml-0.1.4/README +27 -0
  30. data/ext/nutcracker/contrib/yaml-0.1.4/aclocal.m4 +956 -0
  31. data/ext/nutcracker/contrib/yaml-0.1.4/config.h.in +80 -0
  32. data/ext/nutcracker/contrib/yaml-0.1.4/config/config.guess +1561 -0
  33. data/ext/nutcracker/contrib/yaml-0.1.4/config/config.sub +1686 -0
  34. data/ext/nutcracker/contrib/yaml-0.1.4/config/depcomp +630 -0
  35. data/ext/nutcracker/contrib/yaml-0.1.4/config/install-sh +520 -0
  36. data/ext/nutcracker/contrib/yaml-0.1.4/config/ltmain.sh +8406 -0
  37. data/ext/nutcracker/contrib/yaml-0.1.4/config/missing +376 -0
  38. data/ext/nutcracker/contrib/yaml-0.1.4/configure +13085 -0
  39. data/ext/nutcracker/contrib/yaml-0.1.4/configure.ac +75 -0
  40. data/ext/nutcracker/contrib/yaml-0.1.4/doc/doxygen.cfg +222 -0
  41. data/ext/nutcracker/contrib/yaml-0.1.4/include/yaml.h +1971 -0
  42. data/ext/nutcracker/contrib/yaml-0.1.4/m4/libtool.m4 +7357 -0
  43. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltoptions.m4 +368 -0
  44. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltsugar.m4 +123 -0
  45. data/ext/nutcracker/contrib/yaml-0.1.4/m4/ltversion.m4 +23 -0
  46. data/ext/nutcracker/contrib/yaml-0.1.4/m4/lt~obsolete.m4 +92 -0
  47. data/ext/nutcracker/contrib/yaml-0.1.4/src/Makefile.am +4 -0
  48. data/ext/nutcracker/contrib/yaml-0.1.4/src/Makefile.in +484 -0
  49. data/ext/nutcracker/contrib/yaml-0.1.4/src/api.c +1392 -0
  50. data/ext/nutcracker/contrib/yaml-0.1.4/src/dumper.c +394 -0
  51. data/ext/nutcracker/contrib/yaml-0.1.4/src/emitter.c +2329 -0
  52. data/ext/nutcracker/contrib/yaml-0.1.4/src/loader.c +432 -0
  53. data/ext/nutcracker/contrib/yaml-0.1.4/src/parser.c +1374 -0
  54. data/ext/nutcracker/contrib/yaml-0.1.4/src/reader.c +465 -0
  55. data/ext/nutcracker/contrib/yaml-0.1.4/src/scanner.c +3570 -0
  56. data/ext/nutcracker/contrib/yaml-0.1.4/src/writer.c +141 -0
  57. data/ext/nutcracker/contrib/yaml-0.1.4/src/yaml_private.h +640 -0
  58. data/ext/nutcracker/contrib/yaml-0.1.4/tests/Makefile.am +8 -0
  59. data/ext/nutcracker/contrib/yaml-0.1.4/tests/Makefile.in +675 -0
  60. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-deconstructor-alt.c +800 -0
  61. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-deconstructor.c +1130 -0
  62. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-reformatter-alt.c +217 -0
  63. data/ext/nutcracker/contrib/yaml-0.1.4/tests/example-reformatter.c +202 -0
  64. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-dumper.c +311 -0
  65. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-emitter.c +327 -0
  66. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-loader.c +63 -0
  67. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-parser.c +63 -0
  68. data/ext/nutcracker/contrib/yaml-0.1.4/tests/run-scanner.c +63 -0
  69. data/ext/nutcracker/contrib/yaml-0.1.4/tests/test-reader.c +354 -0
  70. data/ext/nutcracker/contrib/yaml-0.1.4/tests/test-version.c +29 -0
  71. data/ext/nutcracker/extconf.rb +5 -0
  72. data/ext/nutcracker/m4/libtool.m4 +7376 -0
  73. data/ext/nutcracker/m4/ltoptions.m4 +368 -0
  74. data/ext/nutcracker/m4/ltsugar.m4 +123 -0
  75. data/ext/nutcracker/m4/ltversion.m4 +23 -0
  76. data/ext/nutcracker/m4/lt~obsolete.m4 +92 -0
  77. data/ext/nutcracker/notes/c-styleguide.txt +425 -0
  78. data/ext/nutcracker/notes/debug.txt +96 -0
  79. data/ext/nutcracker/notes/memcache.txt +123 -0
  80. data/ext/nutcracker/notes/recommendation.md +118 -0
  81. data/ext/nutcracker/notes/redis.md +415 -0
  82. data/ext/nutcracker/notes/socket.txt +131 -0
  83. data/ext/nutcracker/scripts/multi_get.sh +26 -0
  84. data/ext/nutcracker/scripts/nutcracker.init +73 -0
  85. data/ext/nutcracker/scripts/nutcracker.spec +52 -0
  86. data/ext/nutcracker/scripts/pipelined_read.sh +23 -0
  87. data/ext/nutcracker/scripts/pipelined_write.sh +29 -0
  88. data/ext/nutcracker/scripts/populate_memcached.sh +24 -0
  89. data/ext/nutcracker/scripts/redis-check.py +23 -0
  90. data/ext/nutcracker/scripts/redis-check.sh +564 -0
  91. data/ext/nutcracker/src/Makefile.am +46 -0
  92. data/ext/nutcracker/src/Makefile.in +726 -0
  93. data/ext/nutcracker/src/hashkit/Makefile.am +22 -0
  94. data/ext/nutcracker/src/hashkit/Makefile.in +501 -0
  95. data/ext/nutcracker/src/hashkit/nc_crc32.c +105 -0
  96. data/ext/nutcracker/src/hashkit/nc_fnv.c +82 -0
  97. data/ext/nutcracker/src/hashkit/nc_hashkit.h +74 -0
  98. data/ext/nutcracker/src/hashkit/nc_hsieh.c +93 -0
  99. data/ext/nutcracker/src/hashkit/nc_jenkins.c +230 -0
  100. data/ext/nutcracker/src/hashkit/nc_ketama.c +240 -0
  101. data/ext/nutcracker/src/hashkit/nc_md5.c +379 -0
  102. data/ext/nutcracker/src/hashkit/nc_modula.c +144 -0
  103. data/ext/nutcracker/src/hashkit/nc_murmur.c +99 -0
  104. data/ext/nutcracker/src/hashkit/nc_one_at_a_time.c +51 -0
  105. data/ext/nutcracker/src/hashkit/nc_random.c +146 -0
  106. data/ext/nutcracker/src/nc.c +573 -0
  107. data/ext/nutcracker/src/nc_array.c +204 -0
  108. data/ext/nutcracker/src/nc_array.h +73 -0
  109. data/ext/nutcracker/src/nc_client.c +189 -0
  110. data/ext/nutcracker/src/nc_client.h +28 -0
  111. data/ext/nutcracker/src/nc_conf.c +1766 -0
  112. data/ext/nutcracker/src/nc_conf.h +134 -0
  113. data/ext/nutcracker/src/nc_connection.c +392 -0
  114. data/ext/nutcracker/src/nc_connection.h +99 -0
  115. data/ext/nutcracker/src/nc_core.c +334 -0
  116. data/ext/nutcracker/src/nc_core.h +131 -0
  117. data/ext/nutcracker/src/nc_event.c +214 -0
  118. data/ext/nutcracker/src/nc_event.h +39 -0
  119. data/ext/nutcracker/src/nc_log.c +254 -0
  120. data/ext/nutcracker/src/nc_log.h +120 -0
  121. data/ext/nutcracker/src/nc_mbuf.c +285 -0
  122. data/ext/nutcracker/src/nc_mbuf.h +67 -0
  123. data/ext/nutcracker/src/nc_message.c +828 -0
  124. data/ext/nutcracker/src/nc_message.h +253 -0
  125. data/ext/nutcracker/src/nc_proxy.c +359 -0
  126. data/ext/nutcracker/src/nc_proxy.h +34 -0
  127. data/ext/nutcracker/src/nc_queue.h +788 -0
  128. data/ext/nutcracker/src/nc_rbtree.c +348 -0
  129. data/ext/nutcracker/src/nc_rbtree.h +47 -0
  130. data/ext/nutcracker/src/nc_request.c +588 -0
  131. data/ext/nutcracker/src/nc_response.c +332 -0
  132. data/ext/nutcracker/src/nc_server.c +841 -0
  133. data/ext/nutcracker/src/nc_server.h +143 -0
  134. data/ext/nutcracker/src/nc_signal.c +131 -0
  135. data/ext/nutcracker/src/nc_signal.h +34 -0
  136. data/ext/nutcracker/src/nc_stats.c +1188 -0
  137. data/ext/nutcracker/src/nc_stats.h +206 -0
  138. data/ext/nutcracker/src/nc_string.c +109 -0
  139. data/ext/nutcracker/src/nc_string.h +112 -0
  140. data/ext/nutcracker/src/nc_util.c +619 -0
  141. data/ext/nutcracker/src/nc_util.h +214 -0
  142. data/ext/nutcracker/src/proto/Makefile.am +14 -0
  143. data/ext/nutcracker/src/proto/Makefile.in +482 -0
  144. data/ext/nutcracker/src/proto/nc_memcache.c +1306 -0
  145. data/ext/nutcracker/src/proto/nc_proto.h +155 -0
  146. data/ext/nutcracker/src/proto/nc_redis.c +2102 -0
  147. data/lib/nutcracker.rb +7 -0
  148. data/lib/nutcracker/version.rb +3 -0
  149. metadata +194 -0
@@ -0,0 +1,828 @@
1
+ /*
2
+ * twemproxy - A fast and lightweight proxy for memcached protocol.
3
+ * Copyright (C) 2011 Twitter, Inc.
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #include <stdio.h>
19
+ #include <stdlib.h>
20
+
21
+ #include <sys/uio.h>
22
+
23
+ #include <nc_core.h>
24
+ #include <nc_server.h>
25
+ #include <proto/nc_proto.h>
26
+
27
+ #if (IOV_MAX > 128)
28
+ #define NC_IOV_MAX 128
29
+ #else
30
+ #define NC_IOV_MAX IOV_MAX
31
+ #endif
32
+
33
+ /*
34
+ * nc_message.[ch]
35
+ * message (struct msg)
36
+ * + + .
37
+ * | | .
38
+ * / \ .
39
+ * Request Response .../ nc_mbuf.[ch] (mesage buffers)
40
+ * nc_request.c nc_response.c .../ nc_memcache.c; nc_redis.c (message parser)
41
+ *
42
+ * Messages in nutcracker are manipulated by a chain of processing handlers,
43
+ * where each handler is responsible for taking the input and producing an
44
+ * output for the next handler in the chain. This mechanism of processing
45
+ * loosely conforms to the standard chain-of-responsibility design pattern
46
+ *
47
+ * At the high level, each handler takes in a message: request or response
48
+ * and produces the message for the next handler in the chain. The input
49
+ * for a handler is either a request or response, but never both and
50
+ * similarly the output of an handler is either a request or response or
51
+ * nothing.
52
+ *
53
+ * Each handler itself is composed of two processing units:
54
+ *
55
+ * 1). filter: manipulates output produced by the handler, usually based
56
+ * on a policy. If needed, multiple filters can be hooked into each
57
+ * location.
58
+ * 2). forwarder: chooses one of the backend servers to send the request
59
+ * to, usually based on the configured distribution and key hasher.
60
+ *
61
+ * Handlers are registered either with Client or Server or Proxy
62
+ * connections. A Proxy connection only has a read handler as it is only
63
+ * responsible for accepting new connections from client. Read handler
64
+ * (conn_recv_t) registered with client is responsible for reading requests,
65
+ * while that registered with server is responsible for reading responses.
66
+ * Write handler (conn_send_t) registered with client is responsible for
67
+ * writing response, while that registered with server is responsible for
68
+ * writing requests.
69
+ *
70
+ * Note that in the above discussion, the terminology send is used
71
+ * synonymously with write or OUT event. Similarly recv is used synonymously
72
+ * with read or IN event
73
+ *
74
+ * Client+ Proxy Server+
75
+ * (nutcracker)
76
+ * .
77
+ * msg_recv {read event} . msg_recv {read event}
78
+ * + . +
79
+ * | . |
80
+ * \ . /
81
+ * req_recv_next . rsp_recv_next
82
+ * + . +
83
+ * | . | Rsp
84
+ * req_recv_done . rsp_recv_done <===
85
+ * + . +
86
+ * | . |
87
+ * Req \ . /
88
+ * ===> req_filter* . *rsp_filter
89
+ * + . +
90
+ * | . |
91
+ * \ . /
92
+ * req_forward-// (a) . (c) \\-rsp_forward
93
+ * .
94
+ * .
95
+ * msg_send {write event} . msg_send {write event}
96
+ * + . +
97
+ * | . |
98
+ * Rsp' \ . / Req'
99
+ * <=== rsp_send_next . req_send_next ===>
100
+ * + . +
101
+ * | . |
102
+ * \ . /
103
+ * rsp_send_done-// (d) . (b) //-req_send_done
104
+ *
105
+ *
106
+ * (a) -> (b) -> (c) -> (d) is the normal flow of transaction consisting
107
+ * of a single request response, where (a) and (b) handle request from
108
+ * client, while (c) and (d) handle the corresponding response from the
109
+ * server.
110
+ */
111
+
112
+ static uint64_t msg_id; /* message id counter */
113
+ static uint64_t frag_id; /* fragment id counter */
114
+ static uint32_t nfree_msgq; /* # free msg q */
115
+ static struct msg_tqh free_msgq; /* free msg q */
116
+ static struct rbtree tmo_rbt; /* timeout rbtree */
117
+ static struct rbnode tmo_rbs; /* timeout rbtree sentinel */
118
+
119
+ static struct msg *
120
+ msg_from_rbe(struct rbnode *node)
121
+ {
122
+ struct msg *msg;
123
+ int offset;
124
+
125
+ offset = offsetof(struct msg, tmo_rbe);
126
+ msg = (struct msg *)((char *)node - offset);
127
+
128
+ return msg;
129
+ }
130
+
131
+ struct msg *
132
+ msg_tmo_min(void)
133
+ {
134
+ struct rbnode *node;
135
+
136
+ node = rbtree_min(&tmo_rbt);
137
+ if (node == NULL) {
138
+ return NULL;
139
+ }
140
+
141
+ return msg_from_rbe(node);
142
+ }
143
+
144
+ void
145
+ msg_tmo_insert(struct msg *msg, struct conn *conn)
146
+ {
147
+ struct rbnode *node;
148
+ int timeout;
149
+
150
+ ASSERT(msg->request);
151
+ ASSERT(!msg->quit && !msg->noreply);
152
+
153
+ timeout = server_timeout(conn);
154
+ if (timeout <= 0) {
155
+ return;
156
+ }
157
+
158
+ node = &msg->tmo_rbe;
159
+ node->key = nc_msec_now() + timeout;
160
+ node->data = conn;
161
+
162
+ rbtree_insert(&tmo_rbt, node);
163
+
164
+ log_debug(LOG_VERB, "insert msg %"PRIu64" into tmo rbt with expiry of "
165
+ "%d msec", msg->id, timeout);
166
+ }
167
+
168
+ void
169
+ msg_tmo_delete(struct msg *msg)
170
+ {
171
+ struct rbnode *node;
172
+
173
+ node = &msg->tmo_rbe;
174
+
175
+ /* already deleted */
176
+
177
+ if (node->data == NULL) {
178
+ return;
179
+ }
180
+
181
+ rbtree_delete(&tmo_rbt, node);
182
+
183
+ log_debug(LOG_VERB, "delete msg %"PRIu64" from tmo rbt", msg->id);
184
+ }
185
+
186
+ static struct msg *
187
+ _msg_get(void)
188
+ {
189
+ struct msg *msg;
190
+
191
+ if (!TAILQ_EMPTY(&free_msgq)) {
192
+ ASSERT(nfree_msgq > 0);
193
+
194
+ msg = TAILQ_FIRST(&free_msgq);
195
+ nfree_msgq--;
196
+ TAILQ_REMOVE(&free_msgq, msg, m_tqe);
197
+ goto done;
198
+ }
199
+
200
+ msg = nc_alloc(sizeof(*msg));
201
+ if (msg == NULL) {
202
+ return NULL;
203
+ }
204
+
205
+ done:
206
+ /* c_tqe, s_tqe, and m_tqe are left uninitialized */
207
+ msg->id = ++msg_id;
208
+ msg->peer = NULL;
209
+ msg->owner = NULL;
210
+
211
+ rbtree_node_init(&msg->tmo_rbe);
212
+
213
+ STAILQ_INIT(&msg->mhdr);
214
+ msg->mlen = 0;
215
+
216
+ msg->state = 0;
217
+ msg->pos = NULL;
218
+ msg->token = NULL;
219
+
220
+ msg->parser = NULL;
221
+ msg->result = MSG_PARSE_OK;
222
+
223
+ msg->pre_splitcopy = NULL;
224
+ msg->post_splitcopy = NULL;
225
+ msg->pre_coalesce = NULL;
226
+ msg->post_coalesce = NULL;
227
+
228
+ msg->type = MSG_UNKNOWN;
229
+
230
+ msg->key_start = NULL;
231
+ msg->key_end = NULL;
232
+
233
+ msg->vlen = 0;
234
+ msg->end = NULL;
235
+
236
+ msg->frag_owner = NULL;
237
+ msg->nfrag = 0;
238
+ msg->frag_id = 0;
239
+
240
+ msg->narg_start = NULL;
241
+ msg->narg_end = NULL;
242
+ msg->narg = 0;
243
+ msg->rnarg = 0;
244
+ msg->rlen = 0;
245
+ msg->integer = 0;
246
+
247
+ msg->err = 0;
248
+ msg->error = 0;
249
+ msg->ferror = 0;
250
+ msg->request = 0;
251
+ msg->quit = 0;
252
+ msg->noreply = 0;
253
+ msg->done = 0;
254
+ msg->fdone = 0;
255
+ msg->first_fragment = 0;
256
+ msg->last_fragment = 0;
257
+ msg->swallow = 0;
258
+ msg->redis = 0;
259
+
260
+ return msg;
261
+ }
262
+
263
+ struct msg *
264
+ msg_get(struct conn *conn, bool request, bool redis)
265
+ {
266
+ struct msg *msg;
267
+
268
+ msg = _msg_get();
269
+ if (msg == NULL) {
270
+ return NULL;
271
+ }
272
+
273
+ msg->owner = conn;
274
+ msg->request = request ? 1 : 0;
275
+ msg->redis = redis ? 1 : 0;
276
+
277
+ if (redis) {
278
+ if (request) {
279
+ msg->parser = redis_parse_req;
280
+ } else {
281
+ msg->parser = redis_parse_rsp;
282
+ }
283
+ msg->pre_splitcopy = redis_pre_splitcopy;
284
+ msg->post_splitcopy = redis_post_splitcopy;
285
+ msg->pre_coalesce = redis_pre_coalesce;
286
+ msg->post_coalesce = redis_post_coalesce;
287
+ } else {
288
+ if (request) {
289
+ msg->parser = memcache_parse_req;
290
+ } else {
291
+ msg->parser = memcache_parse_rsp;
292
+ }
293
+ msg->pre_splitcopy = memcache_pre_splitcopy;
294
+ msg->post_splitcopy = memcache_post_splitcopy;
295
+ msg->pre_coalesce = memcache_pre_coalesce;
296
+ msg->post_coalesce = memcache_post_coalesce;
297
+ }
298
+
299
+ log_debug(LOG_VVERB, "get msg %p id %"PRIu64" request %d owner sd %d",
300
+ msg, msg->id, msg->request, conn->sd);
301
+
302
+ return msg;
303
+ }
304
+
305
+ struct msg *
306
+ msg_get_error(bool redis, err_t err)
307
+ {
308
+ struct msg *msg;
309
+ struct mbuf *mbuf;
310
+ int n;
311
+ char *errstr = err ? strerror(err) : "unknown";
312
+ char *protstr = redis ? "-ERR" : "SERVER_ERROR";
313
+
314
+ msg = _msg_get();
315
+ if (msg == NULL) {
316
+ return NULL;
317
+ }
318
+
319
+ msg->state = 0;
320
+ msg->type = MSG_RSP_MC_SERVER_ERROR;
321
+
322
+ mbuf = mbuf_get();
323
+ if (mbuf == NULL) {
324
+ msg_put(msg);
325
+ return NULL;
326
+ }
327
+ mbuf_insert(&msg->mhdr, mbuf);
328
+
329
+ n = nc_scnprintf(mbuf->last, mbuf_size(mbuf), "%s %s"CRLF, protstr, errstr);
330
+ mbuf->last += n;
331
+ msg->mlen = (uint32_t)n;
332
+
333
+ log_debug(LOG_VVERB, "get msg %p id %"PRIu64" len %"PRIu32" error '%s'",
334
+ msg, msg->id, msg->mlen, errstr);
335
+
336
+ return msg;
337
+ }
338
+
339
+ static void
340
+ msg_free(struct msg *msg)
341
+ {
342
+ ASSERT(STAILQ_EMPTY(&msg->mhdr));
343
+
344
+ log_debug(LOG_VVERB, "free msg %p id %"PRIu64"", msg, msg->id);
345
+ nc_free(msg);
346
+ }
347
+
348
+ void
349
+ msg_put(struct msg *msg)
350
+ {
351
+ log_debug(LOG_VVERB, "put msg %p id %"PRIu64"", msg, msg->id);
352
+
353
+ while (!STAILQ_EMPTY(&msg->mhdr)) {
354
+ struct mbuf *mbuf = STAILQ_FIRST(&msg->mhdr);
355
+ mbuf_remove(&msg->mhdr, mbuf);
356
+ mbuf_put(mbuf);
357
+ }
358
+
359
+ nfree_msgq++;
360
+ TAILQ_INSERT_HEAD(&free_msgq, msg, m_tqe);
361
+ }
362
+
363
+ void
364
+ msg_dump(struct msg *msg)
365
+ {
366
+ struct mbuf *mbuf;
367
+
368
+ loga("msg dump id %"PRIu64" request %d len %"PRIu32" type %d done %d "
369
+ "error %d (err %d)", msg->id, msg->request, msg->mlen, msg->type,
370
+ msg->done, msg->error, msg->err);
371
+
372
+ STAILQ_FOREACH(mbuf, &msg->mhdr, next) {
373
+ uint8_t *p, *q;
374
+ long int len;
375
+
376
+ p = mbuf->start;
377
+ q = mbuf->last;
378
+ len = q - p;
379
+
380
+ loga_hexdump(p, len, "mbuf with %ld bytes of data", len);
381
+ }
382
+ }
383
+
384
+ void
385
+ msg_init(void)
386
+ {
387
+ log_debug(LOG_DEBUG, "msg size %d", sizeof(struct msg));
388
+ msg_id = 0;
389
+ frag_id = 0;
390
+ nfree_msgq = 0;
391
+ TAILQ_INIT(&free_msgq);
392
+ rbtree_init(&tmo_rbt, &tmo_rbs);
393
+ }
394
+
395
+ void
396
+ msg_deinit(void)
397
+ {
398
+ struct msg *msg, *nmsg;
399
+
400
+ for (msg = TAILQ_FIRST(&free_msgq); msg != NULL;
401
+ msg = nmsg, nfree_msgq--) {
402
+ ASSERT(nfree_msgq > 0);
403
+ nmsg = TAILQ_NEXT(msg, m_tqe);
404
+ msg_free(msg);
405
+ }
406
+ ASSERT(nfree_msgq == 0);
407
+ }
408
+
409
+ bool
410
+ msg_empty(struct msg *msg)
411
+ {
412
+ return msg->mlen == 0 ? true : false;
413
+ }
414
+
415
+ static rstatus_t
416
+ msg_parsed(struct context *ctx, struct conn *conn, struct msg *msg)
417
+ {
418
+ struct msg *nmsg;
419
+ struct mbuf *mbuf, *nbuf;
420
+
421
+ mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next);
422
+ if (msg->pos == mbuf->last) {
423
+ /* no more data to parse */
424
+ conn->recv_done(ctx, conn, msg, NULL);
425
+ return NC_OK;
426
+ }
427
+
428
+ /*
429
+ * Input mbuf has un-parsed data. Split mbuf of the current message msg
430
+ * into (mbuf, nbuf), where mbuf is the portion of the message that has
431
+ * been parsed and nbuf is the portion of the message that is un-parsed.
432
+ * Parse nbuf as a new message nmsg in the next iteration.
433
+ */
434
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, NULL, NULL);
435
+ if (nbuf == NULL) {
436
+ return NC_ENOMEM;
437
+ }
438
+
439
+ nmsg = msg_get(msg->owner, msg->request, conn->redis);
440
+ if (nmsg == NULL) {
441
+ mbuf_put(nbuf);
442
+ return NC_ENOMEM;
443
+ }
444
+ mbuf_insert(&nmsg->mhdr, nbuf);
445
+ nmsg->pos = nbuf->pos;
446
+
447
+ /* update length of current (msg) and new message (nmsg) */
448
+ nmsg->mlen = mbuf_length(nbuf);
449
+ msg->mlen -= nmsg->mlen;
450
+
451
+ conn->recv_done(ctx, conn, msg, nmsg);
452
+
453
+ return NC_OK;
454
+ }
455
+
456
+ static rstatus_t
457
+ msg_fragment(struct context *ctx, struct conn *conn, struct msg *msg)
458
+ {
459
+ rstatus_t status; /* return status */
460
+ struct msg *nmsg; /* new message */
461
+ struct mbuf *nbuf; /* new mbuf */
462
+
463
+ ASSERT(conn->client && !conn->proxy);
464
+ ASSERT(msg->request);
465
+
466
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, msg->pre_splitcopy, msg);
467
+ if (nbuf == NULL) {
468
+ return NC_ENOMEM;
469
+ }
470
+
471
+ status = msg->post_splitcopy(msg);
472
+ if (status != NC_OK) {
473
+ mbuf_put(nbuf);
474
+ return status;
475
+ }
476
+
477
+ nmsg = msg_get(msg->owner, msg->request, msg->redis);
478
+ if (nmsg == NULL) {
479
+ mbuf_put(nbuf);
480
+ return NC_ENOMEM;
481
+ }
482
+ mbuf_insert(&nmsg->mhdr, nbuf);
483
+ nmsg->pos = nbuf->pos;
484
+
485
+ /* update length of current (msg) and new message (nmsg) */
486
+ nmsg->mlen = mbuf_length(nbuf);
487
+ msg->mlen -= nmsg->mlen;
488
+
489
+ /*
490
+ * Attach unique fragment id to all fragments of the message vector. All
491
+ * fragments of the message, including the first fragment point to the
492
+ * first fragment through the frag_owner pointer. The first_fragment and
493
+ * last_fragment identify first and last fragment respectively.
494
+ *
495
+ * For example, a message vector given below is split into 3 fragments:
496
+ * 'get key1 key2 key3\r\n'
497
+ * Or,
498
+ * '*4\r\n$4\r\nmget\r\n$4\r\nkey1\r\n$4\r\nkey2\r\n$4\r\nkey3\r\n'
499
+ *
500
+ * +--------------+
501
+ * | msg vector |
502
+ * |(original msg)|
503
+ * +--------------+
504
+ *
505
+ * frag_owner frag_owner
506
+ * /-----------+ /------------+
507
+ * | | | |
508
+ * | v v |
509
+ * +--------------------+ +---------------------+
510
+ * | frag_id = 10 | | frag_id = 10 |
511
+ * | first_fragment = 1 | | first_fragment = 0 |
512
+ * | last_fragment = 0 | | last_fragment = 0 |
513
+ * | nfrag = 3 | | nfrag = 0 |
514
+ * +--------------------+ +---------------------+
515
+ * ^
516
+ * | frag_owner
517
+ * \-------------+
518
+ * |
519
+ * |
520
+ * +---------------------+
521
+ * | frag_id = 10 |
522
+ * | first_fragment = 0 |
523
+ * | last_fragment = 1 |
524
+ * | nfrag = 0 |
525
+ * +---------------------+
526
+ *
527
+ *
528
+ */
529
+ if (msg->frag_id == 0) {
530
+ msg->frag_id = ++frag_id;
531
+ msg->first_fragment = 1;
532
+ msg->nfrag = 1;
533
+ msg->frag_owner = msg;
534
+ }
535
+ nmsg->frag_id = msg->frag_id;
536
+ msg->last_fragment = 0;
537
+ nmsg->last_fragment = 1;
538
+ nmsg->frag_owner = msg->frag_owner;
539
+ msg->frag_owner->nfrag++;
540
+
541
+ stats_pool_incr(ctx, conn->owner, fragments);
542
+
543
+ log_debug(LOG_VERB, "fragment msg into %"PRIu64" and %"PRIu64" frag id "
544
+ "%"PRIu64"", msg->id, nmsg->id, msg->frag_id);
545
+
546
+ conn->recv_done(ctx, conn, msg, nmsg);
547
+
548
+ return NC_OK;
549
+ }
550
+
551
+ static rstatus_t
552
+ msg_repair(struct context *ctx, struct conn *conn, struct msg *msg)
553
+ {
554
+ struct mbuf *nbuf;
555
+
556
+ nbuf = mbuf_split(&msg->mhdr, msg->pos, NULL, NULL);
557
+ if (nbuf == NULL) {
558
+ return NC_ENOMEM;
559
+ }
560
+ mbuf_insert(&msg->mhdr, nbuf);
561
+ msg->pos = nbuf->pos;
562
+
563
+ return NC_OK;
564
+ }
565
+
566
+ static rstatus_t
567
+ msg_parse(struct context *ctx, struct conn *conn, struct msg *msg)
568
+ {
569
+ rstatus_t status;
570
+
571
+ if (msg_empty(msg)) {
572
+ /* no data to parse */
573
+ conn->recv_done(ctx, conn, msg, NULL);
574
+ return NC_OK;
575
+ }
576
+
577
+ msg->parser(msg);
578
+
579
+ switch (msg->result) {
580
+ case MSG_PARSE_OK:
581
+ status = msg_parsed(ctx, conn, msg);
582
+ break;
583
+
584
+ case MSG_PARSE_FRAGMENT:
585
+ status = msg_fragment(ctx, conn, msg);
586
+ break;
587
+
588
+ case MSG_PARSE_REPAIR:
589
+ status = msg_repair(ctx, conn, msg);
590
+ break;
591
+
592
+ case MSG_PARSE_AGAIN:
593
+ status = NC_OK;
594
+ break;
595
+
596
+ default:
597
+ status = NC_ERROR;
598
+ conn->err = errno;
599
+ break;
600
+ }
601
+
602
+ return conn->err != 0 ? NC_ERROR : status;
603
+ }
604
+
605
+ static rstatus_t
606
+ msg_recv_chain(struct context *ctx, struct conn *conn, struct msg *msg)
607
+ {
608
+ rstatus_t status;
609
+ struct msg *nmsg;
610
+ struct mbuf *mbuf;
611
+ size_t msize;
612
+ ssize_t n;
613
+
614
+ mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next);
615
+ if (mbuf == NULL || mbuf_full(mbuf)) {
616
+ mbuf = mbuf_get();
617
+ if (mbuf == NULL) {
618
+ return NC_ENOMEM;
619
+ }
620
+ mbuf_insert(&msg->mhdr, mbuf);
621
+ msg->pos = mbuf->pos;
622
+ }
623
+ ASSERT(mbuf->end - mbuf->last > 0);
624
+
625
+ msize = mbuf_size(mbuf);
626
+
627
+ n = conn_recv(conn, mbuf->last, msize);
628
+ if (n < 0) {
629
+ if (n == NC_EAGAIN) {
630
+ return NC_OK;
631
+ }
632
+ return NC_ERROR;
633
+ }
634
+
635
+ ASSERT((mbuf->last + n) <= mbuf->end);
636
+ mbuf->last += n;
637
+ msg->mlen += (uint32_t)n;
638
+
639
+ for (;;) {
640
+ status = msg_parse(ctx, conn, msg);
641
+ if (status != NC_OK) {
642
+ return status;
643
+ }
644
+
645
+ /* get next message to parse */
646
+ nmsg = conn->recv_next(ctx, conn, false);
647
+ if (nmsg == NULL || nmsg == msg) {
648
+ /* no more data to parse */
649
+ break;
650
+ }
651
+
652
+ msg = nmsg;
653
+ }
654
+
655
+ return NC_OK;
656
+ }
657
+
658
+ rstatus_t
659
+ msg_recv(struct context *ctx, struct conn *conn)
660
+ {
661
+ rstatus_t status;
662
+ struct msg *msg;
663
+
664
+ ASSERT(conn->recv_active);
665
+
666
+ conn->recv_ready = 1;
667
+ do {
668
+ msg = conn->recv_next(ctx, conn, true);
669
+ if (msg == NULL) {
670
+ return NC_OK;
671
+ }
672
+
673
+ status = msg_recv_chain(ctx, conn, msg);
674
+ if (status != NC_OK) {
675
+ return status;
676
+ }
677
+ } while (conn->recv_ready);
678
+
679
+ return NC_OK;
680
+ }
681
+
682
+ static rstatus_t
683
+ msg_send_chain(struct context *ctx, struct conn *conn, struct msg *msg)
684
+ {
685
+ struct msg_tqh send_msgq; /* send msg q */
686
+ struct msg *nmsg; /* next msg */
687
+ struct mbuf *mbuf, *nbuf; /* current and next mbuf */
688
+ size_t mlen; /* current mbuf data length */
689
+ struct iovec *ciov, iov[NC_IOV_MAX]; /* current iovec */
690
+ struct array sendv; /* send iovec */
691
+ size_t nsend, nsent; /* bytes to send; bytes sent */
692
+ size_t limit; /* bytes to send limit */
693
+ ssize_t n; /* bytes sent by sendv */
694
+
695
+ TAILQ_INIT(&send_msgq);
696
+
697
+ array_set(&sendv, iov, sizeof(iov[0]), NC_IOV_MAX);
698
+
699
+ /* preprocess - build iovec */
700
+
701
+ nsend = 0;
702
+ /*
703
+ * readv() and writev() returns EINVAL if the sum of the iov_len values
704
+ * overflows an ssize_t value Or, the vector count iovcnt is less than
705
+ * zero or greater than the permitted maximum.
706
+ */
707
+ limit = SSIZE_MAX;
708
+
709
+ for (;;) {
710
+ ASSERT(conn->smsg == msg);
711
+
712
+ TAILQ_INSERT_TAIL(&send_msgq, msg, m_tqe);
713
+
714
+ for (mbuf = STAILQ_FIRST(&msg->mhdr);
715
+ mbuf != NULL && array_n(&sendv) < NC_IOV_MAX && nsend < limit;
716
+ mbuf = nbuf) {
717
+ nbuf = STAILQ_NEXT(mbuf, next);
718
+
719
+ if (mbuf_empty(mbuf)) {
720
+ continue;
721
+ }
722
+
723
+ mlen = mbuf_length(mbuf);
724
+ if ((nsend + mlen) > limit) {
725
+ mlen = limit - nsend;
726
+ }
727
+
728
+ ciov = array_push(&sendv);
729
+ ciov->iov_base = mbuf->pos;
730
+ ciov->iov_len = mlen;
731
+
732
+ nsend += mlen;
733
+ }
734
+
735
+ if (array_n(&sendv) >= NC_IOV_MAX || nsend >= limit) {
736
+ break;
737
+ }
738
+
739
+ msg = conn->send_next(ctx, conn);
740
+ if (msg == NULL) {
741
+ break;
742
+ }
743
+ }
744
+
745
+ ASSERT(!TAILQ_EMPTY(&send_msgq) && nsend != 0);
746
+
747
+ conn->smsg = NULL;
748
+
749
+ n = conn_sendv(conn, &sendv, nsend);
750
+
751
+ nsent = n > 0 ? (size_t)n : 0;
752
+
753
+ /* postprocess - process sent messages in send_msgq */
754
+
755
+ for (msg = TAILQ_FIRST(&send_msgq); msg != NULL; msg = nmsg) {
756
+ nmsg = TAILQ_NEXT(msg, m_tqe);
757
+
758
+ TAILQ_REMOVE(&send_msgq, msg, m_tqe);
759
+
760
+ if (nsent == 0) {
761
+ if (msg->mlen == 0) {
762
+ conn->send_done(ctx, conn, msg);
763
+ }
764
+ continue;
765
+ }
766
+
767
+ /* adjust mbufs of the sent message */
768
+ for (mbuf = STAILQ_FIRST(&msg->mhdr); mbuf != NULL; mbuf = nbuf) {
769
+ nbuf = STAILQ_NEXT(mbuf, next);
770
+
771
+ if (mbuf_empty(mbuf)) {
772
+ continue;
773
+ }
774
+
775
+ mlen = mbuf_length(mbuf);
776
+ if (nsent < mlen) {
777
+ /* mbuf was sent partially; process remaining bytes later */
778
+ mbuf->pos += nsent;
779
+ ASSERT(mbuf->pos < mbuf->last);
780
+ nsent = 0;
781
+ break;
782
+ }
783
+
784
+ /* mbuf was sent completely; mark it empty */
785
+ mbuf->pos = mbuf->last;
786
+ nsent -= mlen;
787
+ }
788
+
789
+ /* message has been sent completely, finalize it */
790
+ if (mbuf == NULL) {
791
+ conn->send_done(ctx, conn, msg);
792
+ }
793
+ }
794
+
795
+ ASSERT(TAILQ_EMPTY(&send_msgq));
796
+
797
+ if (n > 0) {
798
+ return NC_OK;
799
+ }
800
+
801
+ return (n == NC_EAGAIN) ? NC_OK : NC_ERROR;
802
+ }
803
+
804
+ rstatus_t
805
+ msg_send(struct context *ctx, struct conn *conn)
806
+ {
807
+ rstatus_t status;
808
+ struct msg *msg;
809
+
810
+ ASSERT(conn->send_active);
811
+
812
+ conn->send_ready = 1;
813
+ do {
814
+ msg = conn->send_next(ctx, conn);
815
+ if (msg == NULL) {
816
+ /* nothing to send */
817
+ return NC_OK;
818
+ }
819
+
820
+ status = msg_send_chain(ctx, conn, msg);
821
+ if (status != NC_OK) {
822
+ return status;
823
+ }
824
+
825
+ } while (conn->send_ready);
826
+
827
+ return NC_OK;
828
+ }