sqlite_web_vfs 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,435 @@
1
+ /** Wrappers for libcurl HTTP operations.
2
+ */
3
+ #pragma once
4
+
5
+ #include <algorithm>
6
+ #include <curl/curl.h>
7
+ #include <functional>
8
+ #include <iostream>
9
+ #include <list>
10
+ #include <locale>
11
+ #include <map>
12
+ #include <memory>
13
+ #include <mutex>
14
+ #include <queue>
15
+ #include <sstream>
16
+ #include <string>
17
+ #include <unistd.h>
18
+
19
+ // lazycurl: because libcurl.{so,dylib} has a large dependency tree, here's a mechanism to avoid
20
+ // linking it at build time, instead using dlopen() and dlsym() so that the program can defer
21
+ // loading it until necessary -- by invoking HTTP::global_init().
22
+ // Similar overrides may be needed for libcurl API functions we need to use.
23
+ #ifdef HTTP_LAZYCURL
24
+ #include <dlfcn.h>
25
+ extern "C" {
26
+ struct lazycurl_api {
27
+ CURLcode (*global_init)(long);
28
+ CURL *(*easy_init)();
29
+ void (*easy_cleanup)(CURL *);
30
+ CURLcode (*easy_getinfo)(CURL *, CURLINFO, ...);
31
+ CURLcode (*easy_setopt)(CURL *, CURLoption, ...);
32
+ CURLcode (*easy_perform)(CURL *);
33
+ char *(*easy_escape)(CURL *, const char *, int);
34
+ char *(*easy_unescape)(CURL *, const char *, int, int *);
35
+ const char *(*easy_strerror)(int);
36
+ curl_slist *(*slist_append)(curl_slist *, const char *);
37
+ void (*slist_free_all)(curl_slist *);
38
+ void (*free)(void *);
39
+ };
40
+ }
41
+ static lazycurl_api __lazycurl;
42
+ #define curl_global_init __lazycurl.global_init
43
+ #define curl_easy_init __lazycurl.easy_init
44
+ #define curl_easy_cleanup __lazycurl.easy_cleanup
45
+ #ifdef curl_easy_getinfo
46
+ #undef curl_easy_getinfo
47
+ #endif
48
+ #define curl_easy_getinfo __lazycurl.easy_getinfo
49
+ #ifdef curl_easy_setopt
50
+ #undef curl_easy_setopt
51
+ #endif
52
+ #define curl_easy_setopt __lazycurl.easy_setopt
53
+ #define curl_easy_perform __lazycurl.easy_perform
54
+ #define curl_easy_escape __lazycurl.easy_escape
55
+ #define curl_easy_unescape __lazycurl.easy_unescape
56
+ #define curl_easy_strerror __lazycurl.easy_strerror
57
+ #define curl_slist_append __lazycurl.slist_append
58
+ #define curl_slist_free_all __lazycurl.slist_free_all
59
+ #define curl_free __lazycurl.free
60
+ #endif
61
+
62
+ namespace HTTP {
63
+
64
+ #ifndef HTTP_LAZYCURL
65
+ CURLcode global_init() { return curl_global_init(CURL_GLOBAL_ALL); }
66
+ #else
67
+ CURLcode global_init() {
68
+ #if defined(__APPLE__)
69
+ static const char *libname[] = {"libcurl.4.dylib", "libcurl.dylib"};
70
+ #else
71
+ static const char *libname[] = {"libcurl.so.4", "libcurl.so"};
72
+ #endif
73
+ static void *hlib = nullptr;
74
+ if (hlib) {
75
+ return CURLE_OK;
76
+ }
77
+ for (int i = 0; !hlib && i < sizeof(libname) / sizeof(const char *); ++i) {
78
+ hlib = dlopen(libname[i], RTLD_NOW | RTLD_GLOBAL);
79
+ }
80
+ if (!hlib) {
81
+ return CURLE_FAILED_INIT;
82
+ }
83
+ if ((__lazycurl.global_init = (CURLcode(*)(long))dlsym(hlib, "curl_global_init")) &&
84
+ (__lazycurl.easy_init = (CURL(*(*)()))dlsym(hlib, "curl_easy_init")) &&
85
+ (__lazycurl.easy_cleanup = (void (*)(CURL *))dlsym(hlib, "curl_easy_cleanup")) &&
86
+ (__lazycurl.easy_getinfo =
87
+ (CURLcode(*)(CURL *, CURLINFO, ...))dlsym(hlib, "curl_easy_getinfo")) &&
88
+ (__lazycurl.easy_setopt =
89
+ (CURLcode(*)(CURL *, CURLoption, ...))dlsym(hlib, "curl_easy_setopt")) &&
90
+ (__lazycurl.easy_perform = (CURLcode(*)(CURL *))dlsym(hlib, "curl_easy_perform")) &&
91
+ (__lazycurl.easy_escape =
92
+ (char(*(*)(CURL *, const char *, int)))dlsym(hlib, "curl_easy_escape")) &&
93
+ (__lazycurl.easy_unescape =
94
+ (char(*(*)(CURL *, const char *, int, int *)))dlsym(hlib, "curl_easy_unescape")) &&
95
+ (__lazycurl.easy_strerror = (const char(*(*)(int)))dlsym(hlib, "curl_easy_strerror")) &&
96
+ (__lazycurl.slist_append =
97
+ (curl_slist(*(*)(curl_slist *, const char *)))dlsym(hlib, "curl_slist_append")) &&
98
+ (__lazycurl.slist_free_all = (void (*)(curl_slist *))dlsym(hlib, "curl_slist_free_all")) &&
99
+ (__lazycurl.free = (void (*)(void *))dlsym(hlib, "curl_free"))) {
100
+ return __lazycurl.global_init(CURL_GLOBAL_ALL);
101
+ }
102
+ return CURLE_NOT_BUILT_IN;
103
+ }
104
+ #endif
105
+
106
+ // Helper class to scope a CURL handle
107
+ class CURLconn {
108
+ CURL *h_;
109
+
110
+ public:
111
+ CURLconn(bool insecure = false) : h_(nullptr) {
112
+ h_ = curl_easy_init();
113
+ if (!h_) {
114
+ throw std::bad_alloc();
115
+ }
116
+ if (insecure) {
117
+ curl_easy_setopt(h_, CURLOPT_SSL_VERIFYPEER, 0);
118
+ curl_easy_setopt(h_, CURLOPT_SSL_VERIFYHOST, 0);
119
+ }
120
+ }
121
+ virtual ~CURLconn() {
122
+ if (h_) {
123
+ curl_easy_cleanup(h_);
124
+ }
125
+ }
126
+ operator CURL *() const { return h_; }
127
+
128
+ bool escape(const std::string &in, std::string &out) {
129
+ char *pOut = curl_easy_escape(h_, in.c_str(), in.size());
130
+ if (!pOut) {
131
+ return false;
132
+ }
133
+ out = pOut;
134
+ curl_free(pOut);
135
+ return true;
136
+ }
137
+
138
+ bool unescape(const std::string &in, std::string &out) {
139
+ int outlength = -1;
140
+ char *pOut = curl_easy_unescape(h_, in.c_str(), in.size(), &outlength);
141
+ if (!pOut || outlength < 0) {
142
+ curl_free(pOut);
143
+ return false;
144
+ }
145
+ out.assign(pOut, (size_t)outlength);
146
+ curl_free(pOut);
147
+ return true;
148
+ }
149
+ };
150
+
151
+ // A pool of CURL handles, which can persist server connections in between requests. Any number of
152
+ // handles can be checked out; at most `size` handles will be kept one checked back in. (Since we
153
+ // use blocking operations, `size` should usually be set to the number of threads that can make
154
+ // concurrent requests.)
155
+ class CURLpool {
156
+ unsigned int size_;
157
+ bool insecure_;
158
+ std::queue<std::unique_ptr<CURLconn>> pool_;
159
+ std::mutex mu_;
160
+ unsigned int cumulative_connections_ = 0;
161
+
162
+ public:
163
+ CURLpool(const unsigned int size, bool insecure = false) : size_(size), insecure_(insecure) {}
164
+
165
+ std::unique_ptr<CURLconn> checkout() {
166
+ std::lock_guard<std::mutex> lock(mu_);
167
+ std::unique_ptr<CURLconn> ans;
168
+ if (pool_.empty()) {
169
+ ans.reset(new CURLconn(insecure_));
170
+ ++cumulative_connections_;
171
+ } else {
172
+ ans.reset(pool_.front().release());
173
+ pool_.pop();
174
+ }
175
+ return ans;
176
+ }
177
+
178
+ void checkin(std::unique_ptr<CURLconn> &p) {
179
+ std::lock_guard<std::mutex> lock(mu_);
180
+ if (pool_.size() < size_) {
181
+ pool_.push(std::move(p));
182
+ } else {
183
+ p.reset();
184
+ }
185
+ }
186
+
187
+ unsigned int cumulative_connections() const { return cumulative_connections_; }
188
+ };
189
+
190
+ using headers = std::map<std::string, std::string>;
191
+
192
+ // Helper class for providing HTTP request headers to libcurl
193
+ class RequestHeadersHelper {
194
+ std::list<std::string> bufs;
195
+ curl_slist *slist_;
196
+
197
+ public:
198
+ RequestHeadersHelper(const headers &headers) : slist_(NULL) {
199
+ for (auto it = headers.cbegin(); it != headers.cend(); it++) {
200
+ std::ostringstream stm;
201
+ stm << it->first << ": " << it->second;
202
+ bufs.push_back(stm.str());
203
+ slist_ = curl_slist_append(slist_, bufs.back().c_str());
204
+ }
205
+ }
206
+ virtual ~RequestHeadersHelper() { curl_slist_free_all(slist_); }
207
+ operator curl_slist *() const { return slist_; }
208
+ };
209
+
210
+ // functions for receiving responses from libcurl
211
+ size_t writefunction(char *ptr, size_t size, size_t nmemb, void *userdata) {
212
+ size *= nmemb;
213
+ std::ostream *response_body = reinterpret_cast<std::ostream *>(userdata);
214
+ response_body->write(ptr, size);
215
+ if (response_body->fail())
216
+ return 0;
217
+ return size;
218
+ }
219
+
220
+ std::string &ltrim(std::string &s) {
221
+ s.erase(s.begin(),
222
+ std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
223
+ return s;
224
+ }
225
+
226
+ std::string &rtrim(std::string &s) {
227
+ s.erase(
228
+ std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(),
229
+ s.end());
230
+ return s;
231
+ }
232
+
233
+ std::string &trim(std::string &s) { return ltrim(rtrim(s)); }
234
+
235
+ size_t headerfunction(char *ptr, size_t size, size_t nmemb, void *userdata) {
236
+ size *= nmemb;
237
+ headers &h = *reinterpret_cast<headers *>(userdata);
238
+
239
+ size_t sep;
240
+ for (sep = 0; sep < size; sep++) {
241
+ if (ptr[sep] == ':')
242
+ break;
243
+ }
244
+
245
+ std::string k, v;
246
+ k.assign(ptr, sep);
247
+ k = trim(k);
248
+
249
+ if (k.size()) {
250
+ if (sep < size - 1) {
251
+ v.assign(ptr + sep + 1, size - sep - 1);
252
+ v = trim(v);
253
+ if (v.size()) {
254
+ std::transform(k.begin(), k.end(), k.begin(), ::tolower); // lowercase key
255
+ h[k] = v;
256
+ }
257
+ }
258
+ }
259
+
260
+ return size;
261
+ }
262
+
263
+ // Read content-length response header
264
+ // return: >= 0 the value
265
+ // -1 header absent
266
+ // -2 header present, but unreadable
267
+ long long ReadContentLengthHeader(const headers &response_headers) {
268
+ auto size_it = response_headers.find("content-length");
269
+ if (size_it == response_headers.end()) {
270
+ return 1;
271
+ }
272
+ std::string size = size_it->second;
273
+ size = trim(size);
274
+ const char *size_str = size.c_str();
275
+ char *endptr = nullptr;
276
+ errno = 0;
277
+ unsigned long long file_size = strtoull(size_str, &endptr, 10);
278
+ if (errno || endptr != size_str + size.size() || file_size > LLONG_MAX) {
279
+ return -2;
280
+ }
281
+ return (long long)file_size;
282
+ }
283
+
284
+ enum class Method { GET, HEAD };
285
+ // helper macros
286
+ #define CURLcall(call) \
287
+ if ((c = call) != CURLE_OK) \
288
+ return c
289
+ #define CURLsetopt(x, y, z) CURLcall(curl_easy_setopt(x, y, z))
290
+
291
+ CURLcode Request(Method method, const std::string &url, const headers &request_headers,
292
+ long &response_code, headers &response_headers, std::ostream &response_body,
293
+ CURLpool *pool = nullptr) {
294
+ CURLcode c;
295
+
296
+ std::unique_ptr<CURLconn> conn;
297
+
298
+ if (pool) {
299
+ conn = pool->checkout();
300
+ } else {
301
+ conn.reset(new CURLconn());
302
+ }
303
+
304
+ CURLsetopt(*conn, CURLOPT_URL, url.c_str());
305
+
306
+ switch (method) {
307
+ case Method::GET:
308
+ CURLsetopt(*conn, CURLOPT_HTTPGET, 1);
309
+ break;
310
+ case Method::HEAD:
311
+ CURLsetopt(*conn, CURLOPT_NOBODY, 1);
312
+ break;
313
+ }
314
+
315
+ RequestHeadersHelper headers4curl(request_headers);
316
+ CURLsetopt(*conn, CURLOPT_HTTPHEADER, ((curl_slist *)headers4curl));
317
+
318
+ CURLsetopt(*conn, CURLOPT_WRITEDATA, &response_body);
319
+ CURLsetopt(*conn, CURLOPT_WRITEFUNCTION, writefunction);
320
+
321
+ response_headers.clear();
322
+ CURLsetopt(*conn, CURLOPT_WRITEHEADER, &response_headers);
323
+ CURLsetopt(*conn, CURLOPT_HEADERFUNCTION, headerfunction);
324
+
325
+ CURLsetopt(*conn, CURLOPT_FOLLOWLOCATION, 1);
326
+ CURLsetopt(*conn, CURLOPT_MAXREDIRS, 4);
327
+ CURLsetopt(*conn, CURLOPT_CONNECTTIMEOUT, 10);
328
+
329
+ CURLcall(curl_easy_perform(*conn));
330
+
331
+ CURLcall(curl_easy_getinfo(*conn, CURLINFO_RESPONSE_CODE, &response_code));
332
+
333
+ if (pool) {
334
+ pool->checkin(conn);
335
+ }
336
+
337
+ return CURLE_OK;
338
+ }
339
+
340
+ CURLcode Get(const std::string &url, const headers &request_headers, long &response_code,
341
+ headers &response_headers, std::ostream &response_body, CURLpool *pool = nullptr) {
342
+ return Request(Method::GET, url, request_headers, response_code, response_headers,
343
+ response_body, pool);
344
+ }
345
+
346
+ CURLcode Head(const std::string &url, const headers &request_headers, long &response_code,
347
+ headers &response_headers, CURLpool *pool = nullptr) {
348
+ std::ostringstream dummy;
349
+ CURLcode ans =
350
+ Request(Method::HEAD, url, request_headers, response_code, response_headers, dummy, pool);
351
+ return ans;
352
+ }
353
+
354
+ // Parameters controlling request retry logic. Retryable errors:
355
+ // - Connection errors
356
+ // - 5xx response codes
357
+ // - Mismatched content-length header & actual body size
358
+ struct RetryOptions {
359
+ // Maximum number of attempts (including the first one)
360
+ unsigned int max_tries = 5;
361
+ // Microseconds to wait before the first retry attempt
362
+ useconds_t initial_delay = 100000;
363
+ // On each subsequent retry, the delay is multiplied by this factor
364
+ unsigned int backoff_factor = 4;
365
+
366
+ // Retry if response body has fewer bytes
367
+ size_t min_response_body = 0;
368
+
369
+ HTTP::CURLpool *connpool = nullptr;
370
+
371
+ // callback to invoke on retryable error (e.g. for logging)
372
+ std::function<void(Method method, const std::string &url, const headers &request_headers,
373
+ CURLcode rc, long response_code, const headers &response_headers,
374
+ const std::string &response_body, unsigned int attempt)>
375
+ on_retry = nullptr;
376
+ };
377
+
378
+ CURLcode RetryRequest(Method method, const std::string &url, const headers &request_headers,
379
+ long &response_code, headers &response_headers, std::string &response_body,
380
+ const RetryOptions &options) {
381
+ useconds_t delay = options.initial_delay;
382
+ std::ostringstream response_body_stream;
383
+ CURLcode rc;
384
+ response_body.clear();
385
+
386
+ for (unsigned int i = 0; i < options.max_tries; ++i) {
387
+ if (i) {
388
+ if (options.on_retry) {
389
+ options.on_retry(method, url, request_headers, rc, response_code, response_headers,
390
+ response_body, i + 1);
391
+ }
392
+ usleep(delay);
393
+ delay *= options.backoff_factor;
394
+ }
395
+
396
+ response_code = -1;
397
+ response_headers.clear();
398
+ response_body_stream.str(""); // ostringstream::clear() doesn't do this!
399
+ rc = HTTP::Request(method, url, request_headers, response_code, response_headers,
400
+ response_body_stream, options.connpool);
401
+ if (rc == CURLE_OK) {
402
+ if (response_code >= 200 && response_code < 300) {
403
+ response_body = std::move(response_body_stream.str());
404
+ long long content_length = ReadContentLengthHeader(response_headers);
405
+ if (response_body.size() >= options.min_response_body &&
406
+ (method == Method::HEAD || content_length < 0 ||
407
+ response_body.size() == content_length)) {
408
+ return CURLE_OK;
409
+ }
410
+ } else if (response_code == 429) {
411
+ // TODO: honor Retry-After (within reason)
412
+ } else if (response_code < 500 || response_code >= 600) {
413
+ return CURLE_OK;
414
+ }
415
+ }
416
+ }
417
+
418
+ return rc;
419
+ }
420
+
421
+ CURLcode RetryGet(const std::string &url, const headers &request_headers, long &response_code,
422
+ headers &response_headers, std::string &response_body,
423
+ const RetryOptions &options) {
424
+ return RetryRequest(Method::GET, url, request_headers, response_code, response_headers,
425
+ response_body, options);
426
+ }
427
+
428
+ CURLcode RetryHead(const std::string &url, const headers &request_headers, long &response_code,
429
+ headers &response_headers, const RetryOptions &options) {
430
+ std::string dummy;
431
+ return RetryRequest(Method::HEAD, url, request_headers, response_code, response_headers, dummy,
432
+ options);
433
+ }
434
+
435
+ } // namespace HTTP