fast_curl 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -12
- data/ext/fast_curl/extconf.rb +1 -0
- data/ext/fast_curl/fast_curl.c +494 -242
- data/lib/fast_curl/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 00f4a847ee360180fa617212693206970a8e7af85ef02e6370d37cb62d2b384e
|
|
4
|
+
data.tar.gz: f5633dc59f41b4b31efa6e07eb32bc6a85b146e85126fa68b24b742294df5e8b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b7f2e299c89da40d0a546e8545310430a46f0cca8abafff53e46953871c4c205d8fc53c16e33e65b9ff3a22c951d212f41f562476977200de84136ce0342cacc
|
|
7
|
+
data.tar.gz: a5fa06d7e44c0846de3cccd3c955a136fd7ee05e01611f05b940bf174cbd356315d23418ebc40ea77d94667c928ae3f2c7087c227df0ea7a39c0b3a27f52da77
|
data/README.md
CHANGED
|
@@ -12,7 +12,9 @@ Ultra-fast parallel HTTP client for Ruby. C extension built on libcurl `curl_mul
|
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
15
|
-
**Requirements**: Ruby >= 3.
|
|
15
|
+
**Requirements**: Ruby >= 3.1, libcurl
|
|
16
|
+
|
|
17
|
+
> **Why Ruby 3.1?** The C extension uses `rb_fiber_scheduler_current`, `rb_fiber_scheduler_block` and `rb_fiber_scheduler_unblock` to properly yield control to the Fiber Scheduler during I/O. These APIs are stable starting from Ruby 3.1. Without them, there is no correct way for a C extension to cooperate with the scheduler — earlier approaches (`rb_thread_schedule`) hold the GVL and block other fibers.
|
|
16
18
|
|
|
17
19
|
```ruby
|
|
18
20
|
gem 'fast_curl'
|
|
@@ -76,6 +78,23 @@ FastCurl.stream_get(urls, connections: 50) do |index, response|
|
|
|
76
78
|
end
|
|
77
79
|
```
|
|
78
80
|
|
|
81
|
+
### Retry functionality (v0.2.0+)
|
|
82
|
+
|
|
83
|
+
```ruby
|
|
84
|
+
# Automatic retry on network errors (timeout, connection issues)
|
|
85
|
+
results = FastCurl.get([
|
|
86
|
+
{ url: "https://unreliable-api.com/data" }
|
|
87
|
+
], retries: 3, retry_delay: 1000) # 3 retries with 1s delay
|
|
88
|
+
|
|
89
|
+
# Retry on specific HTTP status codes
|
|
90
|
+
results = FastCurl.get([
|
|
91
|
+
{ url: "https://api.example.com/data" }
|
|
92
|
+
], retries: 2, retry_codes: [500, 502, 503], retry_delay: 500)
|
|
93
|
+
|
|
94
|
+
# Disable retries (default is 1 retry)
|
|
95
|
+
results = FastCurl.get(urls, retries: 0)
|
|
96
|
+
```
|
|
97
|
+
|
|
79
98
|
### Inside Async
|
|
80
99
|
|
|
81
100
|
```ruby
|
|
@@ -119,22 +138,60 @@ end
|
|
|
119
138
|
|---|---|---|
|
|
120
139
|
| `connections` | 20 | Max parallel connections |
|
|
121
140
|
| `timeout` | 30 | Per-request timeout in seconds |
|
|
141
|
+
| `retries` | 1 | Number of retry attempts (0-10) |
|
|
142
|
+
| `retry_delay` | 0 | Delay between retries in milliseconds |
|
|
143
|
+
| `retry_codes` | [] | HTTP status codes to retry on |
|
|
122
144
|
|
|
123
145
|
## Performance
|
|
124
146
|
|
|
125
|
-
|
|
147
|
+
Benchmarks against `httpbin.org`, 5 iterations with 1 warmup, median times.
|
|
148
|
+
Run yourself: `bundle exec ruby benchmark/local_bench.rb`.
|
|
149
|
+
|
|
150
|
+
Each request hits `/delay/1` (server-side 1-second delay), so sequential baseline
|
|
151
|
+
grows linearly while parallel clients stay near ~1s plus network overhead.
|
|
152
|
+
|
|
153
|
+
### Time to completion (lower is better)
|
|
154
|
+
|
|
155
|
+
| Scenario | Net::HTTP sequential | fast_curl (thread) | fast_curl (fiber/Async) | Async::HTTP::Internet |
|
|
156
|
+
|---------------------------------|---------------------:|-------------------:|------------------------:|----------------------:|
|
|
157
|
+
| 4 requests × 1s, conn=4 | 8.27s | 2.36s | 2.13s | 2.56s |
|
|
158
|
+
| 10 requests × 1s, conn=10 | 20.92s | 3.49s | 5.23s | 3.83s |
|
|
159
|
+
| 20 requests × 1s, conn=5 | 42.56s | 2.94s | 2.90s | 12.14s |
|
|
160
|
+
| 200 requests × 1s, conn=20 | — | 22.19s | 21.77s | 23.59s |
|
|
161
|
+
|
|
162
|
+
### Speedup vs Net::HTTP (median)
|
|
163
|
+
|
|
164
|
+
| Scenario | fast_curl (thread) | fast_curl (fiber) | Async::HTTP |
|
|
165
|
+
|-----------------------------------|-------------------:|------------------:|------------:|
|
|
166
|
+
| 4 requests × 1s | **3.5x** | 3.9x | 3.2x |
|
|
167
|
+
| 10 requests × 1s | **6.0x** | 4.0x | 5.5x |
|
|
168
|
+
| 20 requests × 1s (queued, conn=5) | **14.5x** | 14.7x | 3.5x |
|
|
169
|
+
|
|
170
|
+
### Memory & allocations per request batch (lower is better)
|
|
171
|
+
|
|
172
|
+
| Scenario | fast_curl (thread) allocated | fast_curl (fiber) allocated | Async::HTTP allocated |
|
|
173
|
+
|---------------------------------|-----------------------------:|----------------------------:|----------------------:|
|
|
174
|
+
| 4 requests × 1s | **278 obj** | 350 obj | 2,433 obj |
|
|
175
|
+
| 10 requests × 1s | **490 obj** | 756 obj | 4,763 obj |
|
|
176
|
+
| 20 requests × 1s, conn=5 | **621 obj** | 750 obj | 8,536 obj |
|
|
177
|
+
| 200 requests × 1s, conn=20 | **5,188 obj** | 5,642 obj | 78,203 obj |
|
|
178
|
+
|
|
179
|
+
Ruby heap delta stays near zero across all scenarios for fast_curl — most allocation
|
|
180
|
+
happens in C, not on the Ruby heap.
|
|
181
|
+
|
|
182
|
+
### Error handling
|
|
183
|
+
|
|
184
|
+
| Scenario | Time |
|
|
185
|
+
|--------------------------------------------------------------|------:|
|
|
186
|
+
| 4 mixed requests (404, 500, DNS fail, 30s delay), timeout=2s | 4.02s |
|
|
187
|
+
|
|
188
|
+
Bounded by `timeout=2s` rather than by the slow request.
|
|
126
189
|
|
|
127
|
-
|
|
128
|
-
|--------|------------|-------------|--------------|---------------|
|
|
129
|
-
| Net::HTTP sequential | 7.93s (+2.1 MB) | 24.20s (+0.3 MB) | 48.58s (+1.2 MB) | - |
|
|
130
|
-
| fast_curl (thread) | 2.09s (+0.7 MB) | 3.73s (+0.9 MB) | 3.76s (+0.0 MB) | 5.88s (+2.3 MB) |
|
|
131
|
-
| fast_curl (fiber) | 1.96s (+0.4 MB) | 4.86s (+0.0 MB) | 3.71s (+0.2 MB) | 9.60s (+1.6 MB) |
|
|
132
|
-
| Async::HTTP | 2.54s (+0.3 MB) | 4.27s (+0.4 MB) | 9.16s (+0.5 MB) | 22.44s (+10.7 MB) |
|
|
190
|
+
### Notes on the numbers
|
|
133
191
|
|
|
134
|
-
|
|
135
|
-
-
|
|
136
|
-
-
|
|
137
|
-
- Error handling (timeout=2s): 2.01s (+0.0 MB)
|
|
192
|
+
- **Net::HTTP sequential** is the proof-of-parallelism baseline — it confirms fast_curl and Async are actually running concurrently, not that they "beat" a different library. 4×1s sequentially = 4s, parallel = ~1s + overhead.
|
|
193
|
+
- **Variance is high against remote endpoints** (httpbin.org). For stable numbers, use `--local` which spawns a WEBrick server on 127.0.0.1.
|
|
194
|
+
- **fast_curl (thread) vs (fiber)**: same underlying C code, different scheduling. "thread" is the default; "fiber" kicks in automatically when called inside `Async do ... end`.
|
|
138
195
|
|
|
139
196
|
## License
|
|
140
197
|
|
data/ext/fast_curl/extconf.rb
CHANGED
|
@@ -6,6 +6,7 @@ abort "curl/curl.h is required" unless have_header("curl/curl.h")
|
|
|
6
6
|
have_header("ruby/thread.h")
|
|
7
7
|
have_header("ruby/fiber/scheduler.h")
|
|
8
8
|
|
|
9
|
+
have_func("curl_multi_wakeup", "curl/curl.h")
|
|
9
10
|
have_func("rb_fiber_scheduler_current", "ruby.h")
|
|
10
11
|
have_func("rb_io_wait", "ruby.h")
|
|
11
12
|
|
data/ext/fast_curl/fast_curl.c
CHANGED
|
@@ -11,31 +11,28 @@
|
|
|
11
11
|
#define MAX_RESPONSE_SIZE (100 * 1024 * 1024)
|
|
12
12
|
#define MAX_REDIRECTS 5
|
|
13
13
|
#define MAX_TIMEOUT 300
|
|
14
|
+
#define MAX_RETRIES 10
|
|
15
|
+
#define MAX_REQUESTS 10000
|
|
16
|
+
#define DEFAULT_RETRIES 1
|
|
17
|
+
#define DEFAULT_RETRY_DELAY 0
|
|
14
18
|
#define INITIAL_BUF_CAP 8192
|
|
15
19
|
#define INITIAL_HEADER_CAP 16
|
|
16
20
|
#define POLL_TIMEOUT_MS 50
|
|
17
21
|
#define FIBER_POLL_TIMEOUT_MS 10
|
|
18
22
|
#define HEADER_LINE_BUF_SIZE 512
|
|
19
23
|
|
|
20
|
-
static
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
static ID
|
|
27
|
-
static ID id_connections;
|
|
28
|
-
static ID
|
|
29
|
-
static
|
|
30
|
-
static VALUE
|
|
31
|
-
static VALUE
|
|
32
|
-
static VALUE sym_body;
|
|
33
|
-
static VALUE sym_error_code;
|
|
34
|
-
static VALUE sym_url;
|
|
35
|
-
static VALUE sym_method;
|
|
36
|
-
static VALUE sym_timeout;
|
|
37
|
-
static VALUE sym_connections;
|
|
38
|
-
static VALUE sym_count;
|
|
24
|
+
static const CURLcode DEFAULT_RETRYABLE_CURLE[] = {
|
|
25
|
+
CURLE_COULDNT_CONNECT, CURLE_OPERATION_TIMEDOUT, CURLE_SEND_ERROR, CURLE_RECV_ERROR,
|
|
26
|
+
CURLE_GOT_NOTHING, CURLE_PARTIAL_FILE, CURLE_SSL_CONNECT_ERROR};
|
|
27
|
+
#define DEFAULT_RETRYABLE_CURLE_COUNT \
|
|
28
|
+
(int)(sizeof(DEFAULT_RETRYABLE_CURLE) / sizeof(DEFAULT_RETRYABLE_CURLE[0]))
|
|
29
|
+
|
|
30
|
+
static ID id_status, id_headers, id_body, id_error_code, id_url, id_method;
|
|
31
|
+
static ID id_timeout, id_connections, id_count, id_keys;
|
|
32
|
+
static ID id_retries, id_retry_delay, id_retry_codes;
|
|
33
|
+
static VALUE sym_status, sym_headers, sym_body, sym_error_code, sym_url, sym_method;
|
|
34
|
+
static VALUE sym_timeout, sym_connections, sym_count;
|
|
35
|
+
static VALUE sym_retries, sym_retry_delay, sym_retry_codes;
|
|
39
36
|
|
|
40
37
|
typedef struct {
|
|
41
38
|
char *data;
|
|
@@ -60,29 +57,27 @@ static inline void buffer_free(buffer_t *buf) {
|
|
|
60
57
|
buf->cap = 0;
|
|
61
58
|
}
|
|
62
59
|
|
|
60
|
+
static inline void buffer_reset(buffer_t *buf) {
|
|
61
|
+
buf->len = 0;
|
|
62
|
+
}
|
|
63
|
+
|
|
63
64
|
static size_t write_callback(char *ptr, size_t size, size_t nmemb, void *userdata) {
|
|
64
65
|
buffer_t *buf = (buffer_t *)userdata;
|
|
65
66
|
size_t total = size * nmemb;
|
|
66
|
-
|
|
67
|
-
if (buf->len + total > buf->max_size) {
|
|
67
|
+
if (buf->len + total > buf->max_size)
|
|
68
68
|
return 0;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
69
|
if (buf->len + total >= buf->cap) {
|
|
72
70
|
size_t new_cap = (buf->cap == 0) ? INITIAL_BUF_CAP : buf->cap;
|
|
73
71
|
while (new_cap <= buf->len + total)
|
|
74
72
|
new_cap *= 2;
|
|
75
|
-
|
|
76
73
|
if (new_cap > buf->max_size)
|
|
77
74
|
new_cap = buf->max_size;
|
|
78
|
-
|
|
79
75
|
char *new_data = realloc(buf->data, new_cap);
|
|
80
76
|
if (!new_data)
|
|
81
77
|
return 0;
|
|
82
78
|
buf->data = new_data;
|
|
83
79
|
buf->cap = new_cap;
|
|
84
80
|
}
|
|
85
|
-
|
|
86
81
|
memcpy(buf->data + buf->len, ptr, total);
|
|
87
82
|
buf->len += total;
|
|
88
83
|
return total;
|
|
@@ -114,36 +109,36 @@ static void header_list_free(header_list_t *h) {
|
|
|
114
109
|
h->cap = 0;
|
|
115
110
|
}
|
|
116
111
|
|
|
112
|
+
static void header_list_reset(header_list_t *h) {
|
|
113
|
+
for (int i = 0; i < h->count; i++)
|
|
114
|
+
free(h->entries[i].str);
|
|
115
|
+
h->count = 0;
|
|
116
|
+
}
|
|
117
|
+
|
|
117
118
|
static size_t header_callback(char *ptr, size_t size, size_t nmemb, void *userdata) {
|
|
118
119
|
header_list_t *h = (header_list_t *)userdata;
|
|
119
120
|
size_t total = size * nmemb;
|
|
120
|
-
|
|
121
121
|
if (total <= 2)
|
|
122
122
|
return total;
|
|
123
|
-
|
|
124
123
|
if (h->count >= h->cap) {
|
|
125
124
|
int new_cap = (h->cap == 0) ? INITIAL_HEADER_CAP : h->cap * 2;
|
|
126
|
-
header_entry_t *
|
|
127
|
-
if (!
|
|
125
|
+
header_entry_t *ne = realloc(h->entries, sizeof(header_entry_t) * new_cap);
|
|
126
|
+
if (!ne)
|
|
128
127
|
return 0;
|
|
129
|
-
h->entries =
|
|
128
|
+
h->entries = ne;
|
|
130
129
|
h->cap = new_cap;
|
|
131
130
|
}
|
|
132
|
-
|
|
133
131
|
size_t stripped = total;
|
|
134
132
|
while (stripped > 0 && (ptr[stripped - 1] == '\r' || ptr[stripped - 1] == '\n'))
|
|
135
133
|
stripped--;
|
|
136
|
-
|
|
137
134
|
char *entry = malloc(stripped + 1);
|
|
138
135
|
if (!entry)
|
|
139
136
|
return 0;
|
|
140
137
|
memcpy(entry, ptr, stripped);
|
|
141
138
|
entry[stripped] = '\0';
|
|
142
|
-
|
|
143
139
|
h->entries[h->count].str = entry;
|
|
144
140
|
h->entries[h->count].len = stripped;
|
|
145
141
|
h->count++;
|
|
146
|
-
|
|
147
142
|
return size * nmemb;
|
|
148
143
|
}
|
|
149
144
|
|
|
@@ -154,6 +149,8 @@ typedef struct {
|
|
|
154
149
|
header_list_t headers;
|
|
155
150
|
struct curl_slist *req_headers;
|
|
156
151
|
int done;
|
|
152
|
+
CURLcode curl_result;
|
|
153
|
+
long http_status;
|
|
157
154
|
} request_ctx_t;
|
|
158
155
|
|
|
159
156
|
static inline void request_ctx_init(request_ctx_t *ctx, int index) {
|
|
@@ -163,6 +160,8 @@ static inline void request_ctx_init(request_ctx_t *ctx, int index) {
|
|
|
163
160
|
header_list_init(&ctx->headers);
|
|
164
161
|
ctx->req_headers = NULL;
|
|
165
162
|
ctx->done = 0;
|
|
163
|
+
ctx->curl_result = CURLE_OK;
|
|
164
|
+
ctx->http_status = 0;
|
|
166
165
|
}
|
|
167
166
|
|
|
168
167
|
static void request_ctx_free(request_ctx_t *ctx) {
|
|
@@ -178,6 +177,26 @@ static void request_ctx_free(request_ctx_t *ctx) {
|
|
|
178
177
|
}
|
|
179
178
|
}
|
|
180
179
|
|
|
180
|
+
static int request_ctx_reset_for_retry(request_ctx_t *ctx) {
|
|
181
|
+
if (ctx->easy) {
|
|
182
|
+
curl_easy_cleanup(ctx->easy);
|
|
183
|
+
ctx->easy = NULL;
|
|
184
|
+
}
|
|
185
|
+
buffer_reset(&ctx->body);
|
|
186
|
+
header_list_reset(&ctx->headers);
|
|
187
|
+
if (ctx->req_headers) {
|
|
188
|
+
curl_slist_free_all(ctx->req_headers);
|
|
189
|
+
ctx->req_headers = NULL;
|
|
190
|
+
}
|
|
191
|
+
ctx->easy = curl_easy_init();
|
|
192
|
+
if (!ctx->easy)
|
|
193
|
+
return 0;
|
|
194
|
+
ctx->done = 0;
|
|
195
|
+
ctx->curl_result = CURLE_OK;
|
|
196
|
+
ctx->http_status = 0;
|
|
197
|
+
return 1;
|
|
198
|
+
}
|
|
199
|
+
|
|
181
200
|
typedef struct {
|
|
182
201
|
CURLM *multi;
|
|
183
202
|
request_ctx_t *requests;
|
|
@@ -185,87 +204,134 @@ typedef struct {
|
|
|
185
204
|
int still_running;
|
|
186
205
|
long timeout_ms;
|
|
187
206
|
int max_connections;
|
|
207
|
+
volatile int cancelled;
|
|
188
208
|
} multi_session_t;
|
|
189
209
|
|
|
210
|
+
typedef struct {
|
|
211
|
+
int max_retries;
|
|
212
|
+
long retry_delay_ms;
|
|
213
|
+
int *retry_http_codes;
|
|
214
|
+
int retry_http_count;
|
|
215
|
+
} retry_config_t;
|
|
216
|
+
|
|
217
|
+
static int contains_header_injection(const char *str, long len) {
|
|
218
|
+
for (long i = 0; i < len; i++) {
|
|
219
|
+
if (str[i] == '\r' || str[i] == '\n' || str[i] == '\0')
|
|
220
|
+
return 1;
|
|
221
|
+
}
|
|
222
|
+
return 0;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
#ifdef HAVE_RB_FIBER_SCHEDULER_CURRENT
|
|
226
|
+
static VALUE current_fiber_scheduler(void) {
|
|
227
|
+
VALUE sched = rb_fiber_scheduler_current();
|
|
228
|
+
if (sched == Qnil || sched == Qfalse)
|
|
229
|
+
return Qnil;
|
|
230
|
+
return sched;
|
|
231
|
+
}
|
|
232
|
+
#else
|
|
233
|
+
static VALUE current_fiber_scheduler(void) {
|
|
234
|
+
return Qnil;
|
|
235
|
+
}
|
|
236
|
+
#endif
|
|
237
|
+
|
|
238
|
+
typedef struct {
|
|
239
|
+
void *(*func)(void *);
|
|
240
|
+
void *arg;
|
|
241
|
+
VALUE scheduler;
|
|
242
|
+
VALUE blocker;
|
|
243
|
+
VALUE fiber;
|
|
244
|
+
} fiber_worker_ctx_t;
|
|
245
|
+
|
|
246
|
+
static void *fiber_worker_nogvl(void *arg) {
|
|
247
|
+
fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
|
|
248
|
+
c->func(c->arg);
|
|
249
|
+
return NULL;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
static VALUE fiber_worker_thread(void *arg) {
|
|
253
|
+
fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
|
|
254
|
+
rb_thread_call_without_gvl(fiber_worker_nogvl, c, RUBY_UBF_PROCESS, NULL);
|
|
255
|
+
rb_fiber_scheduler_unblock(c->scheduler, c->blocker, c->fiber);
|
|
256
|
+
return Qnil;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
static void run_via_fiber_worker(VALUE scheduler, void *(*func)(void *), void *arg) {
|
|
260
|
+
fiber_worker_ctx_t ctx = {
|
|
261
|
+
.func = func,
|
|
262
|
+
.arg = arg,
|
|
263
|
+
.scheduler = scheduler,
|
|
264
|
+
.blocker = rb_obj_alloc(rb_cObject),
|
|
265
|
+
.fiber = rb_fiber_current(),
|
|
266
|
+
};
|
|
267
|
+
VALUE th = rb_thread_create(fiber_worker_thread, &ctx);
|
|
268
|
+
rb_fiber_scheduler_block(scheduler, ctx.blocker, Qnil);
|
|
269
|
+
rb_funcall(th, rb_intern("join"), 0);
|
|
270
|
+
}
|
|
271
|
+
|
|
190
272
|
static VALUE build_response(request_ctx_t *ctx) {
|
|
191
273
|
long status = 0;
|
|
192
274
|
curl_easy_getinfo(ctx->easy, CURLINFO_RESPONSE_CODE, &status);
|
|
193
|
-
|
|
194
275
|
VALUE headers_hash = rb_hash_new();
|
|
195
276
|
for (int i = 0; i < ctx->headers.count; i++) {
|
|
196
277
|
const char *hdr = ctx->headers.entries[i].str;
|
|
197
278
|
size_t hdr_len = ctx->headers.entries[i].len;
|
|
198
|
-
|
|
199
279
|
const char *colon = memchr(hdr, ':', hdr_len);
|
|
200
280
|
if (!colon)
|
|
201
281
|
continue;
|
|
202
|
-
|
|
203
282
|
VALUE key = rb_str_new(hdr, colon - hdr);
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
while (val_end > val_start && (*(val_end - 1) == ' ' || *(val_end - 1) == '\t'))
|
|
212
|
-
val_end--;
|
|
213
|
-
|
|
214
|
-
VALUE val = rb_str_new(val_start, val_end - val_start);
|
|
283
|
+
const char *vs = colon + 1, *ve = hdr + hdr_len;
|
|
284
|
+
while (vs < ve && (*vs == ' ' || *vs == '\t'))
|
|
285
|
+
vs++;
|
|
286
|
+
while (ve > vs && (*(ve - 1) == ' ' || *(ve - 1) == '\t'))
|
|
287
|
+
ve--;
|
|
288
|
+
VALUE val = rb_str_new(vs, ve - vs);
|
|
215
289
|
rb_hash_aset(headers_hash, key, val);
|
|
216
290
|
}
|
|
217
|
-
|
|
218
291
|
VALUE body_str =
|
|
219
292
|
ctx->body.data ? rb_str_new(ctx->body.data, ctx->body.len) : rb_str_new_cstr("");
|
|
220
|
-
|
|
221
293
|
VALUE result = rb_hash_new();
|
|
222
294
|
rb_hash_aset(result, sym_status, LONG2NUM(status));
|
|
223
295
|
rb_hash_aset(result, sym_headers, headers_hash);
|
|
224
296
|
rb_hash_aset(result, sym_body, body_str);
|
|
225
|
-
|
|
226
297
|
return result;
|
|
227
298
|
}
|
|
228
299
|
|
|
229
300
|
static VALUE build_error_response(const char *message) {
|
|
230
|
-
VALUE
|
|
231
|
-
rb_hash_aset(
|
|
232
|
-
rb_hash_aset(
|
|
233
|
-
rb_hash_aset(
|
|
234
|
-
return
|
|
301
|
+
VALUE r = rb_hash_new();
|
|
302
|
+
rb_hash_aset(r, sym_status, INT2NUM(0));
|
|
303
|
+
rb_hash_aset(r, sym_headers, Qnil);
|
|
304
|
+
rb_hash_aset(r, sym_body, rb_str_new_cstr(message));
|
|
305
|
+
return r;
|
|
235
306
|
}
|
|
236
307
|
|
|
237
308
|
static VALUE build_error_response_with_code(const char *message, int error_code) {
|
|
238
|
-
VALUE
|
|
239
|
-
rb_hash_aset(
|
|
240
|
-
rb_hash_aset(
|
|
241
|
-
rb_hash_aset(
|
|
242
|
-
rb_hash_aset(
|
|
243
|
-
return
|
|
309
|
+
VALUE r = rb_hash_new();
|
|
310
|
+
rb_hash_aset(r, sym_status, INT2NUM(0));
|
|
311
|
+
rb_hash_aset(r, sym_headers, Qnil);
|
|
312
|
+
rb_hash_aset(r, sym_body, rb_str_new_cstr(message));
|
|
313
|
+
rb_hash_aset(r, sym_error_code, INT2NUM(error_code));
|
|
314
|
+
return r;
|
|
244
315
|
}
|
|
245
316
|
|
|
246
317
|
static int is_valid_url(const char *url) {
|
|
247
318
|
if (!url)
|
|
248
319
|
return 0;
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
if (url_len < 8 || url_len > 2048)
|
|
320
|
+
size_t len = strlen(url);
|
|
321
|
+
if (len < 8 || len > 2048)
|
|
253
322
|
return 0;
|
|
254
|
-
|
|
255
323
|
if (strncmp(url, "https://", 8) == 0)
|
|
256
324
|
return 1;
|
|
257
|
-
if (
|
|
325
|
+
if (len >= 7 && strncmp(url, "http://", 7) == 0)
|
|
258
326
|
return 1;
|
|
259
|
-
|
|
260
327
|
return 0;
|
|
261
328
|
}
|
|
262
329
|
|
|
263
|
-
#define CURL_SETOPT_CHECK(handle, option, value)
|
|
264
|
-
do {
|
|
265
|
-
CURLcode
|
|
266
|
-
if (
|
|
267
|
-
return
|
|
268
|
-
} \
|
|
330
|
+
#define CURL_SETOPT_CHECK(handle, option, value) \
|
|
331
|
+
do { \
|
|
332
|
+
CURLcode _r = curl_easy_setopt(handle, option, value); \
|
|
333
|
+
if (_r != CURLE_OK) \
|
|
334
|
+
return _r; \
|
|
269
335
|
} while (0)
|
|
270
336
|
|
|
271
337
|
static CURLcode setup_basic_options(CURL *easy, const char *url_str, long timeout_sec,
|
|
@@ -282,14 +348,12 @@ static CURLcode setup_basic_options(CURL *easy, const char *url_str, long timeou
|
|
|
282
348
|
CURL_SETOPT_CHECK(easy, CURLOPT_ACCEPT_ENCODING, "");
|
|
283
349
|
CURL_SETOPT_CHECK(easy, CURLOPT_PRIVATE, (char *)ctx);
|
|
284
350
|
CURL_SETOPT_CHECK(easy, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS);
|
|
285
|
-
|
|
286
351
|
return CURLE_OK;
|
|
287
352
|
}
|
|
288
353
|
|
|
289
354
|
static CURLcode setup_security_options(CURL *easy) {
|
|
290
355
|
CURL_SETOPT_CHECK(easy, CURLOPT_SSL_VERIFYPEER, 1L);
|
|
291
356
|
CURL_SETOPT_CHECK(easy, CURLOPT_SSL_VERIFYHOST, 2L);
|
|
292
|
-
|
|
293
357
|
#ifdef CURLOPT_PROTOCOLS_STR
|
|
294
358
|
CURL_SETOPT_CHECK(easy, CURLOPT_PROTOCOLS_STR, "http,https");
|
|
295
359
|
CURL_SETOPT_CHECK(easy, CURLOPT_REDIR_PROTOCOLS_STR, "http,https");
|
|
@@ -297,7 +361,6 @@ static CURLcode setup_security_options(CURL *easy) {
|
|
|
297
361
|
CURL_SETOPT_CHECK(easy, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
|
298
362
|
CURL_SETOPT_CHECK(easy, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
|
299
363
|
#endif
|
|
300
|
-
|
|
301
364
|
return CURLE_OK;
|
|
302
365
|
}
|
|
303
366
|
|
|
@@ -318,46 +381,45 @@ static CURLcode setup_method_and_body(CURL *easy, VALUE method, VALUE body) {
|
|
|
318
381
|
}
|
|
319
382
|
|
|
320
383
|
if (!NIL_P(body)) {
|
|
321
|
-
CURL_SETOPT_CHECK(easy, CURLOPT_POSTFIELDS, StringValuePtr(body));
|
|
322
384
|
CURL_SETOPT_CHECK(easy, CURLOPT_POSTFIELDSIZE, RSTRING_LEN(body));
|
|
385
|
+
CURL_SETOPT_CHECK(easy, CURLOPT_COPYPOSTFIELDS, StringValuePtr(body));
|
|
323
386
|
}
|
|
324
|
-
|
|
325
387
|
return CURLE_OK;
|
|
326
388
|
}
|
|
327
389
|
|
|
328
390
|
static int header_iter_cb(VALUE key, VALUE val, VALUE arg) {
|
|
329
391
|
request_ctx_t *ctx = (request_ctx_t *)arg;
|
|
330
|
-
|
|
331
392
|
VALUE key_str = rb_String(key);
|
|
332
393
|
const char *k = RSTRING_PTR(key_str);
|
|
333
394
|
long klen = RSTRING_LEN(key_str);
|
|
334
395
|
|
|
396
|
+
if (contains_header_injection(k, klen))
|
|
397
|
+
return ST_CONTINUE;
|
|
398
|
+
|
|
335
399
|
if (NIL_P(val) || RSTRING_LEN(rb_String(val)) == 0) {
|
|
336
|
-
char
|
|
337
|
-
char *buf =
|
|
400
|
+
char sbuf[HEADER_LINE_BUF_SIZE];
|
|
401
|
+
char *buf = sbuf;
|
|
338
402
|
long need = klen + 2;
|
|
339
|
-
|
|
340
403
|
if (need > HEADER_LINE_BUF_SIZE)
|
|
341
404
|
buf = malloc(need);
|
|
342
405
|
if (!buf)
|
|
343
406
|
return ST_CONTINUE;
|
|
344
|
-
|
|
345
407
|
memcpy(buf, k, klen);
|
|
346
408
|
buf[klen] = ';';
|
|
347
409
|
buf[klen + 1] = '\0';
|
|
348
|
-
|
|
349
410
|
ctx->req_headers = curl_slist_append(ctx->req_headers, buf);
|
|
350
|
-
|
|
351
|
-
if (buf != stack_buf)
|
|
411
|
+
if (buf != sbuf)
|
|
352
412
|
free(buf);
|
|
353
413
|
} else {
|
|
354
414
|
VALUE val_str = rb_String(val);
|
|
355
415
|
const char *v = RSTRING_PTR(val_str);
|
|
356
416
|
long vlen = RSTRING_LEN(val_str);
|
|
357
|
-
char stack_buf[HEADER_LINE_BUF_SIZE];
|
|
358
|
-
char *buf = stack_buf;
|
|
359
|
-
long need = klen + 2 + vlen + 1;
|
|
360
417
|
|
|
418
|
+
if (contains_header_injection(v, vlen))
|
|
419
|
+
return ST_CONTINUE;
|
|
420
|
+
char sbuf[HEADER_LINE_BUF_SIZE];
|
|
421
|
+
char *buf = sbuf;
|
|
422
|
+
long need = klen + 2 + vlen + 1;
|
|
361
423
|
if (need > HEADER_LINE_BUF_SIZE)
|
|
362
424
|
buf = malloc(need);
|
|
363
425
|
if (!buf)
|
|
@@ -368,13 +430,10 @@ static int header_iter_cb(VALUE key, VALUE val, VALUE arg) {
|
|
|
368
430
|
buf[klen + 1] = ' ';
|
|
369
431
|
memcpy(buf + klen + 2, v, vlen);
|
|
370
432
|
buf[klen + 2 + vlen] = '\0';
|
|
371
|
-
|
|
372
433
|
ctx->req_headers = curl_slist_append(ctx->req_headers, buf);
|
|
373
|
-
|
|
374
|
-
if (buf != stack_buf)
|
|
434
|
+
if (buf != sbuf)
|
|
375
435
|
free(buf);
|
|
376
436
|
}
|
|
377
|
-
|
|
378
437
|
return ST_CONTINUE;
|
|
379
438
|
}
|
|
380
439
|
|
|
@@ -383,33 +442,25 @@ static int setup_easy_handle(request_ctx_t *ctx, VALUE request, long timeout_sec
|
|
|
383
442
|
VALUE method = rb_hash_aref(request, sym_method);
|
|
384
443
|
VALUE headers = rb_hash_aref(request, sym_headers);
|
|
385
444
|
VALUE body = rb_hash_aref(request, sym_body);
|
|
386
|
-
|
|
387
445
|
if (NIL_P(url))
|
|
388
446
|
return 0;
|
|
389
|
-
|
|
390
447
|
const char *url_str = StringValueCStr(url);
|
|
391
|
-
|
|
392
|
-
if (!is_valid_url(url_str)) {
|
|
448
|
+
if (!is_valid_url(url_str))
|
|
393
449
|
rb_raise(rb_eArgError, "Invalid URL: %s", url_str);
|
|
394
|
-
}
|
|
395
450
|
|
|
396
451
|
CURLcode res;
|
|
397
|
-
|
|
398
452
|
res = setup_basic_options(ctx->easy, url_str, timeout_sec, ctx);
|
|
399
453
|
if (res != CURLE_OK)
|
|
400
454
|
return 0;
|
|
401
|
-
|
|
402
455
|
res = setup_security_options(ctx->easy);
|
|
403
456
|
if (res != CURLE_OK)
|
|
404
457
|
return 0;
|
|
405
|
-
|
|
406
458
|
res = setup_method_and_body(ctx->easy, method, body);
|
|
407
459
|
if (res != CURLE_OK)
|
|
408
460
|
return 0;
|
|
409
461
|
|
|
410
462
|
if (!NIL_P(headers) && rb_obj_is_kind_of(headers, rb_cHash)) {
|
|
411
463
|
rb_hash_foreach(headers, header_iter_cb, (VALUE)ctx);
|
|
412
|
-
|
|
413
464
|
if (ctx->req_headers) {
|
|
414
465
|
res = curl_easy_setopt(ctx->easy, CURLOPT_HTTPHEADER, ctx->req_headers);
|
|
415
466
|
if (res != CURLE_OK)
|
|
@@ -417,46 +468,29 @@ static int setup_easy_handle(request_ctx_t *ctx, VALUE request, long timeout_sec
|
|
|
417
468
|
}
|
|
418
469
|
}
|
|
419
470
|
|
|
471
|
+
RB_GC_GUARD(url);
|
|
472
|
+
RB_GC_GUARD(method);
|
|
473
|
+
RB_GC_GUARD(headers);
|
|
474
|
+
RB_GC_GUARD(body);
|
|
475
|
+
RB_GC_GUARD(request);
|
|
420
476
|
return 1;
|
|
421
477
|
}
|
|
422
478
|
|
|
423
|
-
static void *perform_without_gvl(void *arg) {
|
|
424
|
-
multi_session_t *session = (multi_session_t *)arg;
|
|
425
|
-
|
|
426
|
-
while (session->still_running > 0) {
|
|
427
|
-
CURLMcode mc = curl_multi_perform(session->multi, &session->still_running);
|
|
428
|
-
if (mc != CURLM_OK)
|
|
429
|
-
break;
|
|
430
|
-
|
|
431
|
-
if (session->still_running > 0) {
|
|
432
|
-
int numfds = 0;
|
|
433
|
-
mc = curl_multi_poll(session->multi, NULL, 0, POLL_TIMEOUT_MS, &numfds);
|
|
434
|
-
if (mc != CURLM_OK)
|
|
435
|
-
break;
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
return NULL;
|
|
440
|
-
}
|
|
441
|
-
|
|
442
479
|
static void *poll_without_gvl(void *arg) {
|
|
443
|
-
multi_session_t *
|
|
480
|
+
multi_session_t *s = (multi_session_t *)arg;
|
|
481
|
+
if (s->cancelled)
|
|
482
|
+
return NULL;
|
|
444
483
|
int numfds = 0;
|
|
445
|
-
curl_multi_poll(
|
|
446
|
-
curl_multi_perform(
|
|
484
|
+
curl_multi_poll(s->multi, NULL, 0, POLL_TIMEOUT_MS, &numfds);
|
|
485
|
+
curl_multi_perform(s->multi, &s->still_running);
|
|
447
486
|
return NULL;
|
|
448
487
|
}
|
|
449
488
|
|
|
450
489
|
static void unblock_perform(void *arg) {
|
|
451
|
-
(
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
#ifdef HAVE_RB_FIBER_SCHEDULER_CURRENT
|
|
456
|
-
VALUE scheduler = rb_fiber_scheduler_current();
|
|
457
|
-
return scheduler != Qnil && scheduler != Qfalse;
|
|
458
|
-
#else
|
|
459
|
-
return 0;
|
|
490
|
+
multi_session_t *s = (multi_session_t *)arg;
|
|
491
|
+
s->cancelled = 1;
|
|
492
|
+
#ifdef HAVE_CURL_MULTI_WAKEUP
|
|
493
|
+
curl_multi_wakeup(s->multi);
|
|
460
494
|
#endif
|
|
461
495
|
}
|
|
462
496
|
|
|
@@ -470,181 +504,394 @@ typedef struct {
|
|
|
470
504
|
static int process_completed(multi_session_t *session, completion_ctx_t *cctx) {
|
|
471
505
|
CURLMsg *msg;
|
|
472
506
|
int msgs_left;
|
|
473
|
-
|
|
474
507
|
while ((msg = curl_multi_info_read(session->multi, &msgs_left))) {
|
|
475
508
|
if (msg->msg != CURLMSG_DONE)
|
|
476
509
|
continue;
|
|
477
|
-
|
|
478
510
|
request_ctx_t *ctx = NULL;
|
|
479
511
|
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char **)&ctx);
|
|
480
512
|
if (!ctx || ctx->done)
|
|
481
513
|
continue;
|
|
482
514
|
ctx->done = 1;
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
515
|
+
ctx->curl_result = msg->data.result;
|
|
516
|
+
if (msg->data.result == CURLE_OK)
|
|
517
|
+
curl_easy_getinfo(ctx->easy, CURLINFO_RESPONSE_CODE, &ctx->http_status);
|
|
518
|
+
if (cctx->stream) {
|
|
519
|
+
VALUE response = (msg->data.result == CURLE_OK)
|
|
520
|
+
? build_response(ctx)
|
|
521
|
+
: build_error_response_with_code(
|
|
522
|
+
curl_easy_strerror(msg->data.result), (int)msg->data.result);
|
|
523
|
+
VALUE pair = rb_ary_new_from_args(2, INT2NUM(ctx->index), response);
|
|
524
|
+
rb_yield(pair);
|
|
525
|
+
cctx->completed++;
|
|
487
526
|
} else {
|
|
488
|
-
|
|
489
|
-
(int)msg->data.result);
|
|
527
|
+
cctx->completed++;
|
|
490
528
|
}
|
|
529
|
+
if (cctx->target > 0 && cctx->completed >= cctx->target)
|
|
530
|
+
return 1;
|
|
531
|
+
}
|
|
532
|
+
return 0;
|
|
533
|
+
}
|
|
491
534
|
|
|
492
|
-
|
|
535
|
+
static void run_multi_loop(multi_session_t *session, completion_ctx_t *cctx) {
|
|
536
|
+
VALUE scheduler = current_fiber_scheduler();
|
|
493
537
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
538
|
+
if (scheduler != Qnil) {
|
|
539
|
+
curl_multi_perform(session->multi, &session->still_running);
|
|
540
|
+
while (session->still_running > 0) {
|
|
541
|
+
if (session->cancelled)
|
|
542
|
+
break;
|
|
543
|
+
run_via_fiber_worker(scheduler, poll_without_gvl, session);
|
|
544
|
+
if (process_completed(session, cctx))
|
|
545
|
+
break;
|
|
498
546
|
}
|
|
547
|
+
process_completed(session, cctx);
|
|
548
|
+
} else {
|
|
549
|
+
curl_multi_perform(session->multi, &session->still_running);
|
|
550
|
+
while (session->still_running > 0) {
|
|
551
|
+
if (session->cancelled)
|
|
552
|
+
break;
|
|
553
|
+
rb_thread_call_without_gvl(poll_without_gvl, session, unblock_perform, session);
|
|
554
|
+
if (process_completed(session, cctx))
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
process_completed(session, cctx);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
499
560
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
if (
|
|
561
|
+
static int is_default_retryable_curle(CURLcode code) {
|
|
562
|
+
for (int i = 0; i < DEFAULT_RETRYABLE_CURLE_COUNT; i++)
|
|
563
|
+
if (DEFAULT_RETRYABLE_CURLE[i] == code)
|
|
503
564
|
return 1;
|
|
504
|
-
|
|
565
|
+
return 0;
|
|
566
|
+
}
|
|
505
567
|
|
|
568
|
+
static int should_retry(request_ctx_t *ctx, retry_config_t *rc) {
|
|
569
|
+
if (ctx->curl_result != CURLE_OK)
|
|
570
|
+
return is_default_retryable_curle(ctx->curl_result);
|
|
571
|
+
for (int i = 0; i < rc->retry_http_count; i++)
|
|
572
|
+
if (rc->retry_http_codes[i] == (int)ctx->http_status)
|
|
573
|
+
return 1;
|
|
506
574
|
return 0;
|
|
507
575
|
}
|
|
508
576
|
|
|
509
|
-
|
|
577
|
+
typedef struct {
|
|
578
|
+
long delay_ms;
|
|
579
|
+
} sleep_arg_t;
|
|
580
|
+
|
|
581
|
+
static void *sleep_without_gvl(void *arg) {
|
|
582
|
+
sleep_arg_t *sa = (sleep_arg_t *)arg;
|
|
583
|
+
struct timespec ts = {.tv_sec = sa->delay_ms / 1000,
|
|
584
|
+
.tv_nsec = (sa->delay_ms % 1000) * 1000000L};
|
|
585
|
+
nanosleep(&ts, NULL);
|
|
586
|
+
return NULL;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/* FIX #2: Fiber path releases GVL via run_via_fiber_worker */
|
|
590
|
+
static void retry_delay_sleep(long delay_ms) {
|
|
591
|
+
if (delay_ms <= 0)
|
|
592
|
+
return;
|
|
593
|
+
VALUE scheduler = current_fiber_scheduler();
|
|
594
|
+
if (scheduler != Qnil) {
|
|
595
|
+
long remaining = delay_ms;
|
|
596
|
+
while (remaining > 0) {
|
|
597
|
+
long chunk = remaining > FIBER_POLL_TIMEOUT_MS ? FIBER_POLL_TIMEOUT_MS : remaining;
|
|
598
|
+
sleep_arg_t sa = {.delay_ms = chunk};
|
|
599
|
+
run_via_fiber_worker(scheduler, sleep_without_gvl, &sa);
|
|
600
|
+
remaining -= chunk;
|
|
601
|
+
}
|
|
602
|
+
} else {
|
|
603
|
+
sleep_arg_t sa = {.delay_ms = delay_ms};
|
|
604
|
+
rb_thread_call_without_gvl(sleep_without_gvl, &sa, (rb_unblock_function_t *)0, NULL);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
static void parse_options(VALUE options, long *timeout, int *max_conn, retry_config_t *retry_cfg) {
|
|
510
609
|
*timeout = 30;
|
|
511
610
|
*max_conn = 20;
|
|
512
|
-
|
|
611
|
+
retry_cfg->max_retries = DEFAULT_RETRIES;
|
|
612
|
+
retry_cfg->retry_delay_ms = DEFAULT_RETRY_DELAY;
|
|
613
|
+
retry_cfg->retry_http_codes = NULL;
|
|
614
|
+
retry_cfg->retry_http_count = 0;
|
|
513
615
|
if (NIL_P(options) || !rb_obj_is_kind_of(options, rb_cHash))
|
|
514
616
|
return;
|
|
515
617
|
|
|
516
618
|
VALUE t = rb_hash_aref(options, sym_timeout);
|
|
517
619
|
if (!NIL_P(t)) {
|
|
518
|
-
long
|
|
519
|
-
if (
|
|
520
|
-
|
|
521
|
-
else if (
|
|
522
|
-
|
|
523
|
-
*timeout =
|
|
620
|
+
long v = NUM2LONG(t);
|
|
621
|
+
if (v > MAX_TIMEOUT)
|
|
622
|
+
v = MAX_TIMEOUT;
|
|
623
|
+
else if (v <= 0)
|
|
624
|
+
v = 30;
|
|
625
|
+
*timeout = v;
|
|
524
626
|
}
|
|
525
|
-
|
|
526
627
|
VALUE c = rb_hash_aref(options, sym_connections);
|
|
527
628
|
if (!NIL_P(c)) {
|
|
528
|
-
int
|
|
529
|
-
if (
|
|
530
|
-
|
|
531
|
-
else if (
|
|
532
|
-
|
|
533
|
-
*max_conn =
|
|
629
|
+
int v = NUM2INT(c);
|
|
630
|
+
if (v > 100)
|
|
631
|
+
v = 100;
|
|
632
|
+
else if (v <= 0)
|
|
633
|
+
v = 20;
|
|
634
|
+
*max_conn = v;
|
|
635
|
+
}
|
|
636
|
+
VALUE r = rb_hash_aref(options, sym_retries);
|
|
637
|
+
if (!NIL_P(r)) {
|
|
638
|
+
int v = NUM2INT(r);
|
|
639
|
+
if (v < 0)
|
|
640
|
+
v = 0;
|
|
641
|
+
if (v > MAX_RETRIES)
|
|
642
|
+
v = MAX_RETRIES;
|
|
643
|
+
retry_cfg->max_retries = v;
|
|
644
|
+
}
|
|
645
|
+
VALUE rd = rb_hash_aref(options, sym_retry_delay);
|
|
646
|
+
if (!NIL_P(rd)) {
|
|
647
|
+
long v = NUM2LONG(rd);
|
|
648
|
+
if (v < 0)
|
|
649
|
+
v = 0;
|
|
650
|
+
if (v > 30000)
|
|
651
|
+
v = 30000;
|
|
652
|
+
retry_cfg->retry_delay_ms = v;
|
|
653
|
+
}
|
|
654
|
+
VALUE rc = rb_hash_aref(options, sym_retry_codes);
|
|
655
|
+
if (!NIL_P(rc) && rb_obj_is_kind_of(rc, rb_cArray)) {
|
|
656
|
+
int len = (int)RARRAY_LEN(rc);
|
|
657
|
+
if (len > 0) {
|
|
658
|
+
retry_cfg->retry_http_codes = malloc(sizeof(int) * len);
|
|
659
|
+
if (retry_cfg->retry_http_codes) {
|
|
660
|
+
retry_cfg->retry_http_count = len;
|
|
661
|
+
for (int i = 0; i < len; i++)
|
|
662
|
+
retry_cfg->retry_http_codes[i] = NUM2INT(rb_ary_entry(rc, i));
|
|
663
|
+
}
|
|
664
|
+
}
|
|
534
665
|
}
|
|
535
666
|
}
|
|
536
667
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
int
|
|
540
|
-
|
|
541
|
-
|
|
668
|
+
typedef struct {
|
|
669
|
+
multi_session_t *session;
|
|
670
|
+
int *invalid;
|
|
671
|
+
retry_config_t *retry_cfg;
|
|
672
|
+
} cleanup_ctx_t;
|
|
673
|
+
|
|
674
|
+
static VALUE cleanup_session(VALUE arg) {
|
|
675
|
+
cleanup_ctx_t *ctx = (cleanup_ctx_t *)arg;
|
|
676
|
+
if (ctx->session->requests) {
|
|
677
|
+
for (int i = 0; i < ctx->session->count; i++) {
|
|
678
|
+
if (ctx->session->requests[i].easy)
|
|
679
|
+
curl_multi_remove_handle(ctx->session->multi, ctx->session->requests[i].easy);
|
|
680
|
+
request_ctx_free(&ctx->session->requests[i]);
|
|
681
|
+
}
|
|
682
|
+
free(ctx->session->requests);
|
|
683
|
+
ctx->session->requests = NULL;
|
|
684
|
+
}
|
|
685
|
+
if (ctx->invalid) {
|
|
686
|
+
free(ctx->invalid);
|
|
687
|
+
ctx->invalid = NULL;
|
|
688
|
+
}
|
|
689
|
+
if (ctx->session->multi) {
|
|
690
|
+
curl_multi_cleanup(ctx->session->multi);
|
|
691
|
+
ctx->session->multi = NULL;
|
|
692
|
+
}
|
|
693
|
+
if (ctx->retry_cfg && ctx->retry_cfg->retry_http_codes) {
|
|
694
|
+
free(ctx->retry_cfg->retry_http_codes);
|
|
695
|
+
ctx->retry_cfg->retry_http_codes = NULL;
|
|
696
|
+
}
|
|
697
|
+
return Qnil;
|
|
698
|
+
}
|
|
542
699
|
|
|
700
|
+
typedef struct {
|
|
701
|
+
VALUE requests;
|
|
702
|
+
VALUE options;
|
|
703
|
+
int target;
|
|
704
|
+
int stream;
|
|
705
|
+
multi_session_t *session;
|
|
706
|
+
int *invalid;
|
|
707
|
+
retry_config_t *retry_cfg;
|
|
543
708
|
long timeout_sec;
|
|
544
|
-
|
|
545
|
-
parse_options(options, &timeout_sec, &max_conn);
|
|
709
|
+
} execute_args_t;
|
|
546
710
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
session
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
#ifdef CURLPIPE_MULTIPLEX
|
|
556
|
-
curl_multi_setopt(session.multi, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX);
|
|
557
|
-
#endif
|
|
558
|
-
|
|
559
|
-
session.requests = calloc(count, sizeof(request_ctx_t));
|
|
560
|
-
if (!session.requests) {
|
|
561
|
-
curl_multi_cleanup(session.multi);
|
|
562
|
-
rb_raise(rb_eNoMemError, "failed to allocate request contexts");
|
|
563
|
-
}
|
|
711
|
+
static VALUE internal_execute_body(VALUE arg) {
|
|
712
|
+
execute_args_t *ea = (execute_args_t *)arg;
|
|
713
|
+
VALUE requests = ea->requests;
|
|
714
|
+
multi_session_t *session = ea->session;
|
|
715
|
+
int *invalid = ea->invalid;
|
|
716
|
+
retry_config_t *retry_cfg = ea->retry_cfg;
|
|
717
|
+
long timeout_sec = ea->timeout_sec;
|
|
718
|
+
int count = session->count, target = ea->target, stream = ea->stream;
|
|
564
719
|
|
|
565
720
|
int valid_requests = 0;
|
|
566
721
|
for (int i = 0; i < count; i++) {
|
|
567
722
|
VALUE req = rb_ary_entry(requests, i);
|
|
568
|
-
request_ctx_init(&session
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
723
|
+
request_ctx_init(&session->requests[i], i);
|
|
724
|
+
if (!setup_easy_handle(&session->requests[i], req, timeout_sec)) {
|
|
725
|
+
session->requests[i].done = 1;
|
|
726
|
+
invalid[i] = 1;
|
|
572
727
|
continue;
|
|
573
728
|
}
|
|
574
|
-
|
|
575
|
-
CURLMcode mc = curl_multi_add_handle(session.multi, session.requests[i].easy);
|
|
729
|
+
CURLMcode mc = curl_multi_add_handle(session->multi, session->requests[i].easy);
|
|
576
730
|
if (mc != CURLM_OK) {
|
|
577
|
-
session
|
|
731
|
+
session->requests[i].done = 1;
|
|
732
|
+
invalid[i] = 1;
|
|
578
733
|
continue;
|
|
579
734
|
}
|
|
580
|
-
|
|
581
735
|
valid_requests++;
|
|
582
736
|
}
|
|
583
|
-
|
|
584
737
|
if (valid_requests == 0)
|
|
585
|
-
session
|
|
738
|
+
session->still_running = 0;
|
|
586
739
|
|
|
587
740
|
completion_ctx_t cctx;
|
|
588
741
|
cctx.results = stream ? Qnil : rb_ary_new2(count);
|
|
589
742
|
cctx.completed = 0;
|
|
590
743
|
cctx.target = target;
|
|
591
744
|
cctx.stream = stream;
|
|
592
|
-
|
|
593
745
|
if (!stream) {
|
|
594
746
|
for (int i = 0; i < count; i++)
|
|
595
747
|
rb_ary_store(cctx.results, i, Qnil);
|
|
596
748
|
}
|
|
597
749
|
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
750
|
+
run_multi_loop(session, &cctx);
|
|
751
|
+
|
|
752
|
+
if (!stream && retry_cfg->max_retries > 0) {
|
|
753
|
+
int prev_all_failed = 0;
|
|
754
|
+
for (int attempt = 0; attempt < retry_cfg->max_retries; attempt++) {
|
|
755
|
+
int retry_count = 0;
|
|
756
|
+
int *ri = malloc(sizeof(int) * count);
|
|
757
|
+
if (!ri)
|
|
602
758
|
break;
|
|
603
|
-
|
|
759
|
+
for (int i = 0; i < count; i++) {
|
|
760
|
+
if (invalid[i] || !session->requests[i].done)
|
|
761
|
+
continue;
|
|
762
|
+
if (should_retry(&session->requests[i], retry_cfg))
|
|
763
|
+
ri[retry_count++] = i;
|
|
764
|
+
}
|
|
765
|
+
if (retry_count == 0) {
|
|
766
|
+
free(ri);
|
|
604
767
|
break;
|
|
605
|
-
|
|
768
|
+
}
|
|
769
|
+
int done_count = 0;
|
|
770
|
+
for (int i = 0; i < count; i++)
|
|
771
|
+
if (!invalid[i] && session->requests[i].done)
|
|
772
|
+
done_count++;
|
|
773
|
+
int all_failed = (retry_count == done_count);
|
|
774
|
+
if (all_failed && prev_all_failed) {
|
|
775
|
+
free(ri);
|
|
606
776
|
break;
|
|
607
|
-
|
|
608
|
-
int numfds = 0;
|
|
609
|
-
curl_multi_poll(session.multi, NULL, 0, FIBER_POLL_TIMEOUT_MS, &numfds);
|
|
610
|
-
rb_thread_schedule();
|
|
611
|
-
}
|
|
612
|
-
process_completed(&session, &cctx);
|
|
613
|
-
} else {
|
|
614
|
-
if (stream || target > 0) {
|
|
615
|
-
curl_multi_perform(session.multi, &session.still_running);
|
|
616
|
-
while (session.still_running > 0) {
|
|
617
|
-
rb_thread_call_without_gvl(poll_without_gvl, &session, unblock_perform, &session);
|
|
618
|
-
if (process_completed(&session, &cctx))
|
|
619
|
-
break;
|
|
620
777
|
}
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
778
|
+
prev_all_failed = all_failed;
|
|
779
|
+
retry_delay_sleep(retry_cfg->retry_delay_ms);
|
|
780
|
+
for (int r = 0; r < retry_count; r++) {
|
|
781
|
+
int idx = ri[r];
|
|
782
|
+
request_ctx_t *rc = &session->requests[idx];
|
|
783
|
+
curl_multi_remove_handle(session->multi, rc->easy);
|
|
784
|
+
if (!request_ctx_reset_for_retry(rc)) {
|
|
785
|
+
rc->done = 1;
|
|
786
|
+
invalid[idx] = 1;
|
|
787
|
+
continue;
|
|
788
|
+
}
|
|
789
|
+
VALUE req = rb_ary_entry(requests, idx);
|
|
790
|
+
if (!setup_easy_handle(rc, req, timeout_sec)) {
|
|
791
|
+
rc->done = 1;
|
|
792
|
+
invalid[idx] = 1;
|
|
793
|
+
continue;
|
|
794
|
+
}
|
|
795
|
+
CURLMcode mc = curl_multi_add_handle(session->multi, rc->easy);
|
|
796
|
+
if (mc != CURLM_OK) {
|
|
797
|
+
rc->done = 1;
|
|
798
|
+
invalid[idx] = 1;
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
free(ri);
|
|
802
|
+
cctx.completed = 0;
|
|
803
|
+
run_multi_loop(session, &cctx);
|
|
627
804
|
}
|
|
628
805
|
}
|
|
629
806
|
|
|
630
807
|
if (!stream) {
|
|
631
808
|
for (int i = 0; i < count; i++) {
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
809
|
+
request_ctx_t *rc = &session->requests[i];
|
|
810
|
+
VALUE response;
|
|
811
|
+
if (invalid[i]) {
|
|
812
|
+
response = build_error_response("Invalid request configuration");
|
|
813
|
+
} else if (rc->curl_result == CURLE_OK) {
|
|
814
|
+
response = build_response(rc);
|
|
815
|
+
} else {
|
|
816
|
+
response = build_error_response_with_code(curl_easy_strerror(rc->curl_result),
|
|
817
|
+
(int)rc->curl_result);
|
|
636
818
|
}
|
|
819
|
+
rb_ary_store(cctx.results, i, rb_ary_new_from_args(2, INT2NUM(i), response));
|
|
637
820
|
}
|
|
638
821
|
}
|
|
822
|
+
return stream ? Qnil : cctx.results;
|
|
823
|
+
}
|
|
639
824
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
825
|
+
static VALUE internal_execute(VALUE requests, VALUE options, int target, int stream) {
|
|
826
|
+
Check_Type(requests, T_ARRAY);
|
|
827
|
+
|
|
828
|
+
long count_long = RARRAY_LEN(requests);
|
|
829
|
+
if (count_long == 0)
|
|
830
|
+
return rb_ary_new();
|
|
831
|
+
if (count_long > MAX_REQUESTS)
|
|
832
|
+
rb_raise(rb_eArgError, "too many requests (%ld), maximum is %d", count_long, MAX_REQUESTS);
|
|
833
|
+
if (count_long > INT_MAX)
|
|
834
|
+
rb_raise(rb_eArgError, "request count overflows int");
|
|
835
|
+
int count = (int)count_long;
|
|
836
|
+
|
|
837
|
+
long timeout_sec;
|
|
838
|
+
int max_conn;
|
|
839
|
+
retry_config_t retry_cfg;
|
|
840
|
+
parse_options(options, &timeout_sec, &max_conn, &retry_cfg);
|
|
841
|
+
|
|
842
|
+
if (stream || target > 0) {
|
|
843
|
+
if (retry_cfg.max_retries > 0 && stream)
|
|
844
|
+
rb_warn("FastCurl: retries are not supported in stream_execute, ignoring "
|
|
845
|
+
"retries option");
|
|
846
|
+
if (retry_cfg.max_retries > 0 && target > 0)
|
|
847
|
+
rb_warn("FastCurl: retries are not supported in first_execute, ignoring "
|
|
848
|
+
"retries option");
|
|
849
|
+
retry_cfg.max_retries = 0;
|
|
643
850
|
}
|
|
644
|
-
free(session.requests);
|
|
645
|
-
curl_multi_cleanup(session.multi);
|
|
646
851
|
|
|
647
|
-
|
|
852
|
+
multi_session_t session;
|
|
853
|
+
session.multi = curl_multi_init();
|
|
854
|
+
session.count = count;
|
|
855
|
+
session.timeout_ms = timeout_sec * 1000;
|
|
856
|
+
session.max_connections = max_conn;
|
|
857
|
+
session.cancelled = 0;
|
|
858
|
+
session.requests = NULL;
|
|
859
|
+
|
|
860
|
+
curl_multi_setopt(session.multi, CURLMOPT_MAXCONNECTS, (long)max_conn);
|
|
861
|
+
curl_multi_setopt(session.multi, CURLMOPT_MAX_TOTAL_CONNECTIONS, (long)max_conn);
|
|
862
|
+
#ifdef CURLPIPE_MULTIPLEX
|
|
863
|
+
curl_multi_setopt(session.multi, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX);
|
|
864
|
+
#endif
|
|
865
|
+
|
|
866
|
+
session.requests = calloc(count, sizeof(request_ctx_t));
|
|
867
|
+
if (!session.requests) {
|
|
868
|
+
curl_multi_cleanup(session.multi);
|
|
869
|
+
if (retry_cfg.retry_http_codes)
|
|
870
|
+
free(retry_cfg.retry_http_codes);
|
|
871
|
+
rb_raise(rb_eNoMemError, "failed to allocate request contexts");
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
int *invalid = calloc(count, sizeof(int));
|
|
875
|
+
if (!invalid) {
|
|
876
|
+
free(session.requests);
|
|
877
|
+
curl_multi_cleanup(session.multi);
|
|
878
|
+
if (retry_cfg.retry_http_codes)
|
|
879
|
+
free(retry_cfg.retry_http_codes);
|
|
880
|
+
rb_raise(rb_eNoMemError, "failed to allocate tracking array");
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
cleanup_ctx_t cleanup = {.session = &session, .invalid = invalid, .retry_cfg = &retry_cfg};
|
|
884
|
+
execute_args_t ea = {
|
|
885
|
+
.requests = requests,
|
|
886
|
+
.options = options,
|
|
887
|
+
.target = target,
|
|
888
|
+
.stream = stream,
|
|
889
|
+
.session = &session,
|
|
890
|
+
.invalid = invalid,
|
|
891
|
+
.retry_cfg = &retry_cfg,
|
|
892
|
+
.timeout_sec = timeout_sec,
|
|
893
|
+
};
|
|
894
|
+
return rb_ensure(internal_execute_body, (VALUE)&ea, cleanup_session, (VALUE)&cleanup);
|
|
648
895
|
}
|
|
649
896
|
|
|
650
897
|
static VALUE rb_fast_curl_execute(int argc, VALUE *argv, VALUE self) {
|
|
@@ -656,24 +903,20 @@ static VALUE rb_fast_curl_execute(int argc, VALUE *argv, VALUE self) {
|
|
|
656
903
|
static VALUE rb_fast_curl_first_execute(int argc, VALUE *argv, VALUE self) {
|
|
657
904
|
VALUE requests, options;
|
|
658
905
|
rb_scan_args(argc, argv, "1:", &requests, &options);
|
|
659
|
-
|
|
660
906
|
int count = 1;
|
|
661
907
|
if (!NIL_P(options)) {
|
|
662
908
|
VALUE c = rb_hash_aref(options, sym_count);
|
|
663
909
|
if (!NIL_P(c))
|
|
664
910
|
count = NUM2INT(c);
|
|
665
911
|
}
|
|
666
|
-
|
|
667
912
|
return internal_execute(requests, options, count, 0);
|
|
668
913
|
}
|
|
669
914
|
|
|
670
915
|
static VALUE rb_fast_curl_stream_execute(int argc, VALUE *argv, VALUE self) {
|
|
671
916
|
VALUE requests, options;
|
|
672
917
|
rb_scan_args(argc, argv, "1:", &requests, &options);
|
|
673
|
-
|
|
674
918
|
if (!rb_block_given_p())
|
|
675
919
|
rb_raise(rb_eArgError, "stream_execute requires a block");
|
|
676
|
-
|
|
677
920
|
return internal_execute(requests, options, -1, 1);
|
|
678
921
|
}
|
|
679
922
|
|
|
@@ -690,6 +933,9 @@ void Init_fast_curl(void) {
|
|
|
690
933
|
id_connections = rb_intern("connections");
|
|
691
934
|
id_count = rb_intern("count");
|
|
692
935
|
id_keys = rb_intern("keys");
|
|
936
|
+
id_retries = rb_intern("retries");
|
|
937
|
+
id_retry_delay = rb_intern("retry_delay");
|
|
938
|
+
id_retry_codes = rb_intern("retry_codes");
|
|
693
939
|
|
|
694
940
|
sym_status = ID2SYM(id_status);
|
|
695
941
|
rb_gc_register_address(&sym_status);
|
|
@@ -709,6 +955,12 @@ void Init_fast_curl(void) {
|
|
|
709
955
|
rb_gc_register_address(&sym_connections);
|
|
710
956
|
sym_count = ID2SYM(id_count);
|
|
711
957
|
rb_gc_register_address(&sym_count);
|
|
958
|
+
sym_retries = ID2SYM(id_retries);
|
|
959
|
+
rb_gc_register_address(&sym_retries);
|
|
960
|
+
sym_retry_delay = ID2SYM(id_retry_delay);
|
|
961
|
+
rb_gc_register_address(&sym_retry_delay);
|
|
962
|
+
sym_retry_codes = ID2SYM(id_retry_codes);
|
|
963
|
+
rb_gc_register_address(&sym_retry_codes);
|
|
712
964
|
|
|
713
965
|
VALUE mFastCurl = rb_define_module("FastCurl");
|
|
714
966
|
|
data/lib/fast_curl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fast_curl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- roman-haidarov
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-04-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|
|
@@ -96,7 +96,8 @@ dependencies:
|
|
|
96
96
|
version: '1.8'
|
|
97
97
|
description: Parallel HTTP requests via libcurl curl_multi API. Releases GVL during
|
|
98
98
|
I/O, compatible with Async gem and Fiber scheduler. Supports execute (all), first_execute
|
|
99
|
-
(first N), stream_execute (yield as ready).
|
|
99
|
+
(first N), stream_execute (yield as ready). Built-in retry functionality for network
|
|
100
|
+
errors and custom HTTP status codes.
|
|
100
101
|
email:
|
|
101
102
|
- roman.haidarov@gmail.com
|
|
102
103
|
executables: []
|
|
@@ -122,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
122
123
|
requirements:
|
|
123
124
|
- - ">="
|
|
124
125
|
- !ruby/object:Gem::Version
|
|
125
|
-
version: 3.
|
|
126
|
+
version: 3.1.0
|
|
126
127
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
128
|
requirements:
|
|
128
129
|
- - ">="
|