iodine 0.7.16 → 0.7.17

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of iodine might be problematic. Click here for more details.

Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +5 -4
  3. data/.yardopts +8 -0
  4. data/CHANGELOG.md +26 -0
  5. data/LICENSE.txt +1 -1
  6. data/LIMITS.md +6 -0
  7. data/README.md +93 -13
  8. data/{SPEC-Websocket-Draft.md → SPEC-WebSocket-Draft.md} +0 -0
  9. data/examples/tcp_client.rb +66 -0
  10. data/examples/x-sendfile.ru +14 -0
  11. data/exe/iodine +3 -3
  12. data/ext/iodine/extconf.rb +21 -0
  13. data/ext/iodine/fio.c +659 -69
  14. data/ext/iodine/fio.h +350 -95
  15. data/ext/iodine/fio_cli.c +4 -3
  16. data/ext/iodine/fio_json_parser.h +1 -1
  17. data/ext/iodine/fio_siphash.c +13 -11
  18. data/ext/iodine/fio_siphash.h +6 -3
  19. data/ext/iodine/fio_tls.h +129 -0
  20. data/ext/iodine/fio_tls_missing.c +634 -0
  21. data/ext/iodine/fio_tls_openssl.c +1011 -0
  22. data/ext/iodine/fio_tmpfile.h +1 -1
  23. data/ext/iodine/fiobj.h +1 -1
  24. data/ext/iodine/fiobj_ary.c +1 -1
  25. data/ext/iodine/fiobj_ary.h +1 -1
  26. data/ext/iodine/fiobj_data.c +1 -1
  27. data/ext/iodine/fiobj_data.h +1 -1
  28. data/ext/iodine/fiobj_hash.c +1 -1
  29. data/ext/iodine/fiobj_hash.h +1 -1
  30. data/ext/iodine/fiobj_json.c +18 -16
  31. data/ext/iodine/fiobj_json.h +1 -1
  32. data/ext/iodine/fiobj_mustache.c +4 -0
  33. data/ext/iodine/fiobj_mustache.h +4 -0
  34. data/ext/iodine/fiobj_numbers.c +1 -1
  35. data/ext/iodine/fiobj_numbers.h +1 -1
  36. data/ext/iodine/fiobj_str.c +3 -3
  37. data/ext/iodine/fiobj_str.h +1 -1
  38. data/ext/iodine/fiobject.c +1 -1
  39. data/ext/iodine/fiobject.h +8 -2
  40. data/ext/iodine/http.c +128 -337
  41. data/ext/iodine/http.h +11 -18
  42. data/ext/iodine/http1.c +6 -6
  43. data/ext/iodine/http1.h +1 -1
  44. data/ext/iodine/http1_parser.c +1 -1
  45. data/ext/iodine/http1_parser.h +1 -1
  46. data/ext/iodine/http_internal.c +10 -8
  47. data/ext/iodine/http_internal.h +13 -3
  48. data/ext/iodine/http_mime_parser.h +1 -1
  49. data/ext/iodine/iodine.c +806 -22
  50. data/ext/iodine/iodine.h +33 -0
  51. data/ext/iodine/iodine_connection.c +23 -18
  52. data/ext/iodine/iodine_http.c +239 -225
  53. data/ext/iodine/iodine_http.h +4 -1
  54. data/ext/iodine/iodine_mustache.c +59 -54
  55. data/ext/iodine/iodine_pubsub.c +1 -1
  56. data/ext/iodine/iodine_tcp.c +34 -100
  57. data/ext/iodine/iodine_tcp.h +4 -0
  58. data/ext/iodine/iodine_tls.c +267 -0
  59. data/ext/iodine/iodine_tls.h +13 -0
  60. data/ext/iodine/mustache_parser.h +1 -1
  61. data/ext/iodine/redis_engine.c +14 -6
  62. data/ext/iodine/redis_engine.h +1 -1
  63. data/ext/iodine/resp_parser.h +1 -1
  64. data/ext/iodine/websocket_parser.h +1 -1
  65. data/ext/iodine/websockets.c +1 -1
  66. data/ext/iodine/websockets.h +1 -1
  67. data/iodine.gemspec +2 -1
  68. data/lib/iodine.rb +19 -5
  69. data/lib/iodine/connection.rb +13 -0
  70. data/lib/iodine/mustache.rb +7 -24
  71. data/lib/iodine/tls.rb +16 -0
  72. data/lib/iodine/version.rb +1 -1
  73. data/lib/rack/handler/iodine.rb +1 -1
  74. metadata +15 -5
@@ -1,5 +1,5 @@
1
1
  /*
2
- Copyright: Boaz Segev, 2018
2
+ Copyright: Boaz Segev, 2018-2019
3
3
  License: MIT
4
4
 
5
5
  Feel free to copy, use and enjoy according to the license provided.
@@ -110,7 +110,7 @@ Version and helper macros
110
110
  #define FIO_VERSION_MAJOR 0
111
111
  #define FIO_VERSION_MINOR 7
112
112
  #define FIO_VERSION_PATCH 0
113
- #define FIO_VERSION_BETA 6
113
+ #define FIO_VERSION_BETA 7
114
114
 
115
115
  /* Automatically convert version data to a string constant - ignore these two */
116
116
  #define FIO_MACRO2STR_STEP2(macro) #macro
@@ -415,7 +415,7 @@ Logging and testing helpers
415
415
  #define FIO_LOG_LEVEL_DEBUG 5
416
416
 
417
417
  /** The logging level */
418
- extern int FIO_LOG_LEVEL;
418
+ int __attribute__((weak)) FIO_LOG_LEVEL;
419
419
 
420
420
  #ifndef FIO_LOG_PRINT
421
421
  #define FIO_LOG_PRINT(level, ...) \
@@ -658,6 +658,8 @@ struct fio_listen_args {
658
658
  const char *port;
659
659
  /** The socket binding address. Defaults to the recommended NULL. */
660
660
  const char *address;
661
+ /** a pointer to a `fio_tls_s` object, for SSL/TLS support (fio_tls.h). */
662
+ void *tls;
661
663
  /** Opaque user data. */
662
664
  void *udata;
663
665
  /**
@@ -788,10 +790,7 @@ struct fio_connect_args {
788
790
  /** The port on the server we are connecting to. */
789
791
  const char *port;
790
792
  /**
791
- * The `on_connect` callback should return a pointer to a protocol object
792
- * that will handle any connection related events.
793
- *
794
- * Should either call `fio_attach` or close the connection.
793
+ * The `on_connect` callback either call `fio_attach` or close the connection.
795
794
  */
796
795
  void (*on_connect)(intptr_t uuid, void *udata);
797
796
  /**
@@ -799,6 +798,8 @@ struct fio_connect_args {
799
798
  * is passed along.
800
799
  */
801
800
  void (*on_fail)(intptr_t uuid, void *udata);
801
+ /** a pointer to a `fio_tls_s` object, for SSL/TLS support (fio_tls.h). */
802
+ void *tls;
802
803
  /** Opaque user data. */
803
804
  void *udata;
804
805
  /** A non-system timeout after which connection is assumed to have failed. */
@@ -828,6 +829,60 @@ See the `struct fio_connect_args` details for any possible named arguments.
828
829
  intptr_t fio_connect(struct fio_connect_args);
829
830
  #define fio_connect(...) fio_connect((struct fio_connect_args){__VA_ARGS__})
830
831
 
832
+ /* *****************************************************************************
833
+ URL address parsing
834
+ ***************************************************************************** */
835
+
836
+ /** the result returned by `fio_url_parse` */
837
+ typedef struct {
838
+ fio_str_info_s scheme;
839
+ fio_str_info_s user;
840
+ fio_str_info_s password;
841
+ fio_str_info_s host;
842
+ fio_str_info_s port;
843
+ fio_str_info_s path;
844
+ fio_str_info_s query;
845
+ fio_str_info_s target;
846
+ } fio_url_s;
847
+
848
+ /**
849
+ * Parses the URI returning it's components and their lengths (no decoding
850
+ * performed, doesn't accept decoded URIs).
851
+ *
852
+ * The returned string are NOT NUL terminated, they are merely locations within
853
+ * the original string.
854
+ *
855
+ * This function attempts to accept many different formats, including any of the
856
+ * following:
857
+ *
858
+ * * `/complete_path?query#target`
859
+ *
860
+ * i.e.: /index.html?page=1#list
861
+ *
862
+ * * `host:port/complete_path?query#target`
863
+ *
864
+ * i.e.:
865
+ * example.com
866
+ * example.com:8080
867
+ * example.com/index.html
868
+ * example.com:8080/index.html
869
+ * example.com:8080/index.html?key=val#target
870
+ *
871
+ * * `user:password@host:port/path?query#target`
872
+ *
873
+ * i.e.: user:1234@example.com:8080/index.html
874
+ *
875
+ * * `username[:password]@host[:port][...]`
876
+ *
877
+ * i.e.: john:1234@example.com
878
+ *
879
+ * * `schema://user:password@host:port/path?query#target`
880
+ *
881
+ * i.e.: http://example.com/index.html?page=1#list
882
+ *
883
+ * Invalid formats might produce unexpected results. No error testing performed.
884
+ */
885
+ fio_url_s fio_url_parse(const char *url, size_t length);
831
886
  /* *****************************************************************************
832
887
  Starting the IO reactor and reviewing it's state
833
888
  ***************************************************************************** */
@@ -1005,6 +1060,20 @@ void fio_force_close(intptr_t uuid);
1005
1060
  */
1006
1061
  fio_str_info_s fio_peer_addr(intptr_t uuid);
1007
1062
 
1063
+ /**
1064
+ * Writes the local machine address (qualified host name) to the buffer.
1065
+ *
1066
+ * Returns the amount of data written (excluding the NUL byte).
1067
+ *
1068
+ * `limit` is the maximum number of bytes in the buffer, including the NUL byte.
1069
+ *
1070
+ * If the returned value == limit - 1, the result might have been truncated.
1071
+ *
1072
+ * If 0 is returned, an erro might have occured (see `errno`) and the contents
1073
+ * of `dest` is undefined.
1074
+ */
1075
+ size_t fio_local_addr(char *dest, size_t limit);
1076
+
1008
1077
  /**
1009
1078
  * `fio_read` attempts to read up to count bytes from the socket into the
1010
1079
  * buffer starting at `buffer`.
@@ -2058,7 +2127,7 @@ FIO_FUNC inline uintptr_t fio_ct_if2(uintptr_t cond, uintptr_t a, uintptr_t b) {
2058
2127
  #endif
2059
2128
  /** inplace byte swap 32 bit integer */
2060
2129
  #if __has_builtin(__builtin_bswap32)
2061
- #define fio_bswap32(i) __builtin_bswap32((uint32_t)(i));
2130
+ #define fio_bswap32(i) __builtin_bswap32((uint32_t)(i))
2062
2131
  #else
2063
2132
  #define fio_bswap32(i) \
2064
2133
  ((((i)&0xFFUL) << 24) | (((i)&0xFF00UL) << 8) | (((i)&0xFF0000UL) >> 8) | \
@@ -2066,7 +2135,7 @@ FIO_FUNC inline uintptr_t fio_ct_if2(uintptr_t cond, uintptr_t a, uintptr_t b) {
2066
2135
  #endif
2067
2136
  /** inplace byte swap 64 bit integer */
2068
2137
  #if __has_builtin(__builtin_bswap64)
2069
- #define fio_bswap64(i) __builtin_bswap64((uint64_t)(i));
2138
+ #define fio_bswap64(i) __builtin_bswap64((uint64_t)(i))
2070
2139
  #else
2071
2140
  #define fio_bswap64(i) \
2072
2141
  ((((i)&0xFFULL) << 56) | (((i)&0xFF00ULL) << 40) | \
@@ -2101,27 +2170,6 @@ FIO_FUNC inline uintptr_t fio_ct_if2(uintptr_t cond, uintptr_t a, uintptr_t b) {
2101
2170
  /** Network byte order to Local byte order, 62 bit integer */
2102
2171
  #define fio_ntol64(i) (i)
2103
2172
 
2104
- /** Converts an unaligned network ordered byte stream to a 16 bit number. */
2105
- #define fio_str2u16(c) \
2106
- ((uint16_t)((((uint16_t)0 + ((uint8_t *)(c))[1]) << 8) | \
2107
- ((uint16_t)0 + ((uint8_t *)(c))[0])))
2108
- /** Converts an unaligned network ordered byte stream to a 32 bit number. */
2109
- #define fio_str2u32(c) \
2110
- ((uint32_t)((((uint32_t)0 + ((uint8_t *)(c))[3]) << 24) | \
2111
- (((uint32_t)0 + ((uint8_t *)(c))[2]) << 16) | \
2112
- (((uint32_t)0 + ((uint8_t *)(c))[1]) << 8) | \
2113
- ((uint32_t)0 + ((uint8_t *)(c))[0])))
2114
- /** Converts an unaligned network ordered byte stream to a 64 bit number. */
2115
- #define fio_str2u64(c) \
2116
- ((uint64_t)((((uint64_t)0 + ((uint8_t *)(c))[7]) << 56) | \
2117
- (((uint64_t)0 + ((uint8_t *)(c))[6]) << 48) | \
2118
- (((uint64_t)0 + ((uint8_t *)(c))[5]) << 40) | \
2119
- (((uint64_t)0 + ((uint8_t *)(c))[4]) << 32) | \
2120
- (((uint64_t)0 + ((uint8_t *)(c))[3]) << 24) | \
2121
- (((uint64_t)0 + ((uint8_t *)(c))[2]) << 16) | \
2122
- (((uint64_t)0 + ((uint8_t *)(c))[1]) << 8) | \
2123
- ((uint64_t)0 + ((uint8_t *)(c))[0])))
2124
-
2125
2173
  #else /* Little Endian */
2126
2174
 
2127
2175
  /** Local byte order to Network byte order, 16 bit integer */
@@ -2138,27 +2186,48 @@ FIO_FUNC inline uintptr_t fio_ct_if2(uintptr_t cond, uintptr_t a, uintptr_t b) {
2138
2186
  /** Network byte order to Local byte order, 62 bit integer */
2139
2187
  #define fio_ntol64(i) fio_bswap64((i))
2140
2188
 
2189
+ #endif
2190
+
2191
+ /** 32Bit left rotation, inlined. */
2192
+ #define fio_lrot32(i, bits) \
2193
+ (((uint32_t)(i) << ((bits)&31UL)) | ((uint32_t)(i) >> ((-(bits)) & 31UL)))
2194
+ /** 32Bit right rotation, inlined. */
2195
+ #define fio_rrot32(i, bits) \
2196
+ (((uint32_t)(i) >> ((bits)&31UL)) | ((uint32_t)(i) << ((-(bits)) & 31UL)))
2197
+ /** 64Bit left rotation, inlined. */
2198
+ #define fio_lrot64(i, bits) \
2199
+ (((uint64_t)(i) << ((bits)&63UL)) | ((uint64_t)(i) >> ((-(bits)) & 63UL)))
2200
+ /** 64Bit right rotation, inlined. */
2201
+ #define fio_rrot64(i, bits) \
2202
+ (((uint64_t)(i) >> ((bits)&63UL)) | ((uint64_t)(i) << ((-(bits)) & 63UL)))
2203
+ /** unknown size element - left rotation, inlined. */
2204
+ #define fio_lrot(i, bits) \
2205
+ (((i) << ((bits) & ((sizeof((i)) << 3) - 1))) | \
2206
+ ((i) >> ((-(bits)) & ((sizeof((i)) << 3) - 1))))
2207
+ /** unknown size element - right rotation, inlined. */
2208
+ #define fio_rrot(i, bits) \
2209
+ (((i) >> (bits)) | ((i) << ((-(bits)) & ((sizeof((i)) << 3) - 1))))
2210
+
2141
2211
  /** Converts an unaligned network ordered byte stream to a 16 bit number. */
2142
2212
  #define fio_str2u16(c) \
2143
- ((uint16_t)((((uint16_t)0 + ((uint8_t *)(c))[0]) << 8) | \
2144
- ((uint16_t)0 + ((uint8_t *)(c))[1])))
2213
+ ((uint16_t)(((uint16_t)(((uint8_t *)(c))[0]) << 8) | \
2214
+ (uint16_t)(((uint8_t *)(c))[1])))
2145
2215
  /** Converts an unaligned network ordered byte stream to a 32 bit number. */
2146
2216
  #define fio_str2u32(c) \
2147
- ((uint32_t)((((uint32_t)0 + ((uint8_t *)(c))[0]) << 24) | \
2148
- (((uint32_t)0 + ((uint8_t *)(c))[1]) << 16) | \
2149
- (((uint32_t)0 + ((uint8_t *)(c))[2]) << 8) | \
2150
- ((uint32_t)0 + ((uint8_t *)(c))[3])))
2217
+ ((uint32_t)(((uint32_t)(((uint8_t *)(c))[0]) << 24) | \
2218
+ ((uint32_t)(((uint8_t *)(c))[1]) << 16) | \
2219
+ ((uint32_t)(((uint8_t *)(c))[2]) << 8) | \
2220
+ (uint32_t)(((uint8_t *)(c))[3])))
2221
+
2151
2222
  /** Converts an unaligned network ordered byte stream to a 64 bit number. */
2152
2223
  #define fio_str2u64(c) \
2153
- ((uint64_t)((((uint64_t)0 + ((uint8_t *)(c))[0]) << 56) | \
2154
- (((uint64_t)0 + ((uint8_t *)(c))[1]) << 48) | \
2155
- (((uint64_t)0 + ((uint8_t *)(c))[2]) << 40) | \
2156
- (((uint64_t)0 + ((uint8_t *)(c))[3]) << 32) | \
2157
- (((uint64_t)0 + ((uint8_t *)(c))[4]) << 24) | \
2158
- (((uint64_t)0 + ((uint8_t *)(c))[5]) << 16) | \
2159
- (((uint64_t)0 + ((uint8_t *)(c))[6]) << 8) | \
2160
- ((uint64_t)0 + ((uint8_t *)(c))[7])))
2161
- #endif
2224
+ ((uint64_t)((((uint64_t)((uint8_t *)(c))[0]) << 56) | \
2225
+ (((uint64_t)((uint8_t *)(c))[1]) << 48) | \
2226
+ (((uint64_t)((uint8_t *)(c))[2]) << 40) | \
2227
+ (((uint64_t)((uint8_t *)(c))[3]) << 32) | \
2228
+ (((uint64_t)((uint8_t *)(c))[4]) << 24) | \
2229
+ (((uint64_t)((uint8_t *)(c))[5]) << 16) | \
2230
+ (((uint64_t)((uint8_t *)(c))[6]) << 8) | (((uint8_t *)(c))[7])))
2162
2231
 
2163
2232
  /** Writes a local 16 bit number to an unaligned buffer in network order. */
2164
2233
  #define fio_u2str16(buffer, i) \
@@ -2311,6 +2380,126 @@ void fio_rand_bytes(void *target, size_t length);
2311
2380
 
2312
2381
  ***************************************************************************** */
2313
2382
 
2383
+ /* defines the secret seed to be used by keyd hashing functions*/
2384
+ #ifndef FIO_HASH_SECRET_SEED64_1
2385
+ uint8_t __attribute__((weak)) fio_hash_secret_marker1;
2386
+ uint8_t __attribute__((weak)) fio_hash_secret_marker2;
2387
+ #define FIO_HASH_SECRET_SEED64_1 ((uintptr_t)&fio_hash_secret_marker1)
2388
+ #define FIO_HASH_SECRET_SEED64_2 ((uintptr_t)&fio_hash_secret_marker2)
2389
+ #endif
2390
+
2391
+ #if FIO_USE_RISKY_HASH
2392
+ #define FIO_HASH_FN(data, length, key1, key2) \
2393
+ fio_risky_hash((data), (length), \
2394
+ ((uint64_t)(key1) >> 19) | ((uint64_t)(key2) << 27))
2395
+ #else
2396
+ #define FIO_HASH_FN(data, length, key1, key2) \
2397
+ fio_siphash13((data), (length), (uint64_t)(key1), (uint64_t)(key2))
2398
+ #endif
2399
+
2400
+ /* *****************************************************************************
2401
+ Risky Hash (always available, even if using only the fio.h header)
2402
+ ***************************************************************************** */
2403
+
2404
+ /**
2405
+ * Computes a facil.io Risky Hash, modeled after the amazing
2406
+ * [xxHash](https://github.com/Cyan4973/xxHash) (which has a BSD license)
2407
+ * and named "Risky Hash" because writing your own hashing function is a risky
2408
+ * business, full of pitfalls, hours of testing and security risks...
2409
+ *
2410
+ * Risky Hash isn't as battle tested as SipHash, but it did pass the
2411
+ * [SMHasher](https://github.com/rurban/smhasher) tests with wonderful results,
2412
+ * can be used for processing safe data and is easy (and short) to implement.
2413
+ */
2414
+ inline FIO_FUNC uintptr_t fio_risky_hash(const void *data_, size_t len,
2415
+ uint64_t seed) {
2416
+ /* The primes used by Risky Hash */
2417
+ const uint64_t primes[] = {
2418
+ 0xFBBA3FA15B22113B, // 1111101110111010001111111010000101011011001000100001000100111011
2419
+ 0xAB137439982B86C9, // 1010101100010011011101000011100110011000001010111000011011001001
2420
+ };
2421
+ /* The consumption vectors initialized state */
2422
+ uint64_t v[4] = {
2423
+ seed ^ primes[1],
2424
+ ~seed + primes[1],
2425
+ fio_lrot64(seed, 17) ^ primes[1],
2426
+ fio_lrot64(seed, 33) + primes[1],
2427
+ };
2428
+
2429
+ /* Risky Hash consumption round */
2430
+ #define fio_risky_consume(w, i) \
2431
+ v[i] ^= (w); \
2432
+ v[i] = fio_lrot64(v[i], 33) + (w); \
2433
+ v[i] *= primes[0];
2434
+
2435
+ /* compilers could, hopefully, optimize this code for SIMD */
2436
+ #define fio_risky_consume256(w0, w1, w2, w3) \
2437
+ fio_risky_consume(w0, 0); \
2438
+ fio_risky_consume(w1, 1); \
2439
+ fio_risky_consume(w2, 2); \
2440
+ fio_risky_consume(w3, 3);
2441
+
2442
+ /* reading position */
2443
+ const uint8_t *data = (uint8_t *)data_;
2444
+
2445
+ /* consume 256bit blocks */
2446
+ for (size_t i = len >> 5; i; --i) {
2447
+ fio_risky_consume256(fio_str2u64(data), fio_str2u64(data + 8),
2448
+ fio_str2u64(data + 16), fio_str2u64(data + 24));
2449
+ data += 32;
2450
+ }
2451
+ /* Consume any remaining 64 bit words. */
2452
+ switch (len & 24) {
2453
+ case 24:
2454
+ fio_risky_consume(fio_str2u64(data + 16), 2);
2455
+ case 16: /* overflow */
2456
+ fio_risky_consume(fio_str2u64(data + 8), 1);
2457
+ case 8: /* overflow */
2458
+ fio_risky_consume(fio_str2u64(data), 0);
2459
+ data += len & 24;
2460
+ }
2461
+
2462
+ uintptr_t tmp = 0;
2463
+ /* consume leftover bytes, if any */
2464
+ switch ((len & 7)) {
2465
+ case 7: /* overflow */
2466
+ tmp |= ((uint64_t)data[6]) << 56;
2467
+ case 6: /* overflow */
2468
+ tmp |= ((uint64_t)data[5]) << 48;
2469
+ case 5: /* overflow */
2470
+ tmp |= ((uint64_t)data[4]) << 40;
2471
+ case 4: /* overflow */
2472
+ tmp |= ((uint64_t)data[3]) << 32;
2473
+ case 3: /* overflow */
2474
+ tmp |= ((uint64_t)data[2]) << 24;
2475
+ case 2: /* overflow */
2476
+ tmp |= ((uint64_t)data[1]) << 16;
2477
+ case 1: /* overflow */
2478
+ tmp |= ((uint64_t)data[0]) << 8;
2479
+ fio_risky_consume(tmp, 3);
2480
+ }
2481
+
2482
+ /* merge and mix */
2483
+ uint64_t result = fio_lrot64(v[0], 17) + fio_lrot64(v[1], 13) +
2484
+ fio_lrot64(v[2], 47) + fio_lrot64(v[3], 57);
2485
+ result += len;
2486
+ result += v[0] * primes[1];
2487
+ result ^= fio_lrot64(result, 13);
2488
+ result += v[1] * primes[1];
2489
+ result ^= fio_lrot64(result, 29);
2490
+ result += v[2] * primes[1];
2491
+ result ^= fio_lrot64(result, 33);
2492
+ result += v[3] * primes[1];
2493
+ result ^= fio_lrot64(result, 51);
2494
+
2495
+ /* irreversible avalanche... I think */
2496
+ result ^= (result >> 29) * primes[0];
2497
+ return result;
2498
+
2499
+ #undef fio_risky_consume256
2500
+ #undef fio_risky_consume
2501
+ }
2502
+
2314
2503
  /* *****************************************************************************
2315
2504
  SipHash
2316
2505
  ***************************************************************************** */
@@ -2318,19 +2507,22 @@ SipHash
2318
2507
  /**
2319
2508
  * A SipHash variation (2-4).
2320
2509
  */
2321
- uint64_t fio_siphash24(const void *data, size_t len);
2510
+ uint64_t fio_siphash24(const void *data, size_t len, uint64_t key1,
2511
+ uint64_t key2);
2322
2512
 
2323
2513
  /**
2324
2514
  * A SipHash 1-3 variation.
2325
2515
  */
2326
- uint64_t fio_siphash13(const void *data, size_t len);
2516
+ uint64_t fio_siphash13(const void *data, size_t len, uint64_t key1,
2517
+ uint64_t key2);
2327
2518
 
2328
2519
  /**
2329
2520
  * The Hashing function used by dynamic facil.io objects.
2330
2521
  *
2331
2522
  * Currently implemented using SipHash 1-3.
2332
2523
  */
2333
- #define fio_siphash(data, length) fio_siphash13((data), (length))
2524
+ #define fio_siphash(data, length, k1, k2) \
2525
+ fio_siphash13((data), (length), (k1), (k2))
2334
2526
 
2335
2527
  /* *****************************************************************************
2336
2528
  SHA-1
@@ -3323,9 +3515,11 @@ inline FIO_FUNC fio_str_info_s fio_str_resize(fio_str_s *s, size_t size);
3323
3515
  #define fio_str_clear(s) fio_str_resize((s), 0)
3324
3516
 
3325
3517
  /**
3326
- * Returns the string's siphash value (Uses SipHash 1-3).
3518
+ * Returns the string's Risky Hash value.
3519
+ *
3520
+ * Note: Hash algorithm might change without notice.
3327
3521
  */
3328
- inline FIO_FUNC uint64_t fio_str_hash(const fio_str_s *s);
3522
+ FIO_FUNC uint64_t fio_str_hash(const fio_str_s *s);
3329
3523
 
3330
3524
  /* *****************************************************************************
3331
3525
  String API - Memory management
@@ -3681,13 +3875,23 @@ inline FIO_FUNC fio_str_info_s fio_str_resize(fio_str_s *s, size_t size) {
3681
3875
  return (fio_str_info_s){.capa = s->capa, .len = size, .data = s->data};
3682
3876
  }
3683
3877
 
3878
+ /* *****************************************************************************
3879
+ String Implementation - Hashing
3880
+ ***************************************************************************** */
3881
+
3684
3882
  /**
3685
- * Returns the string's siphash value (Uses SipHash 1-3).
3883
+ * Return's the String's Risky Hash (see fio_risky_hash).
3884
+ *
3885
+ * This value is machine/instance specific (hash seed is a memory address).
3886
+ *
3887
+ * NOTE: the hashing function might be changed at any time without notice. It
3888
+ * wasn't cryptographically analyzed and safety against malicious data can't be
3889
+ * guaranteed. Use fio_siphash13 or fio_siphash24 when hashing data from
3890
+ * external sources.
3686
3891
  */
3687
- /** Returns the String's complete state (capacity, length and pointer). */
3688
- inline FIO_FUNC uint64_t fio_str_hash(const fio_str_s *s) {
3892
+ FIO_FUNC uint64_t fio_str_hash(const fio_str_s *s) {
3689
3893
  fio_str_info_s state = fio_str_info(s);
3690
- return fio_siphash(state.data, state.len);
3894
+ return fio_risky_hash(state.data, state.len, FIO_HASH_SECRET_SEED64_1);
3691
3895
  }
3692
3896
 
3693
3897
  /* *****************************************************************************
@@ -5152,18 +5356,19 @@ Done
5152
5356
  *
5153
5357
  * To create a Set or a Hash Map, the macro FIO_SET_NAME must be defined. i.e.:
5154
5358
  *
5155
- * #define FIO_SET_NAME fio_cstr_set
5359
+ * #define FIO_SET_NAME cstr_set
5156
5360
  * #define FIO_SET_OBJ_TYPE char *
5157
5361
  * #define FIO_SET_OBJ_COMPARE(k1, k2) (!strcmp((k1), (k2)))
5158
5362
  * #include <fio.h>
5159
5363
  *
5160
- * To create a Hash Map, rather than a pure Set, the macro FIO_SET_KET_TYPE must
5364
+ * To create a Hash Map, rather than a pure Set, the macro FIO_SET_KEY_TYPE must
5161
5365
  * be defined. i.e.:
5162
5366
  *
5163
5367
  * #define FIO_SET_KEY_TYPE char *
5164
5368
  *
5165
5369
  * This allows the FIO_SET_KEY_* macros to be defined as well. For example:
5166
5370
  *
5371
+ * #define FIO_SET_NAME cstr_hashmap
5167
5372
  * #define FIO_SET_KEY_TYPE char *
5168
5373
  * #define FIO_SET_KEY_COMPARE(k1, k2) (!strcmp((k1), (k2)))
5169
5374
  * #define FIO_SET_OBJ_TYPE char *
@@ -5177,7 +5382,10 @@ Done
5177
5382
  * #include <fio.h> // adds the fio_str_s types and functions
5178
5383
  *
5179
5384
  * #define FIO_SET_NAME fio_str_set
5180
- * #define FIO_SET_KEY_TYPE fio_str_s *
5385
+ * #define FIO_SET_OBJ_TYPE fio_str_s *
5386
+ * #define FIO_SET_OBJ_COMPARE(k1, k2) (fio_str_iseq((k1), (k2)))
5387
+ * #define FIO_SET_OBJ_COPY(key) fio_str_dup((key))
5388
+ * #define FIO_SET_OBJ_DESTROY(key) fio_str_free2((key))
5181
5389
  * #include <fio.h> // creates the fio_str_set_s Set and functions
5182
5390
  *
5183
5391
  * #define FIO_SET_NAME fio_str_hash
@@ -5194,9 +5402,10 @@ Done
5194
5402
  * The default integer Hash used is a pointer length type (uintptr_t). This can
5195
5403
  * be changed by defining ALL of the following macros:
5196
5404
  * * FIO_SET_HASH_TYPE - the type of the hash value.
5197
- * * FIO_SET_HASH2UINTPTR(hash) - converts the hash value to a uintptr_t.
5405
+ * * FIO_SET_HASH2UINTPTR(hash, i) - converts the hash value to a uintptr_t.
5198
5406
  * * FIO_SET_HASH_COMPARE(h1, h2) - compares two hash values (1 == equal).
5199
5407
  * * FIO_SET_HASH_INVALID - an invalid Hash value, all bytes are 0.
5408
+ * * FIO_SET_HASH_FORCE - an always valid Hash value, all bytes 0xFF
5200
5409
  *
5201
5410
  *
5202
5411
  * Note: FIO_SET_HASH_TYPE should, normaly be left alone (uintptr_t is
@@ -5244,14 +5453,20 @@ Done
5244
5453
  #define FIO_SET_OBJ_DESTROY(obj) ((void)0)
5245
5454
  #endif
5246
5455
 
5247
- /** test for a pre-defined hash value type */
5456
+ /** test for a pre-defined hash type, must be numerical (i.e. __int128_t)*/
5248
5457
  #ifndef FIO_SET_HASH_TYPE
5249
5458
  #define FIO_SET_HASH_TYPE uintptr_t
5250
5459
  #endif
5251
5460
 
5252
5461
  /** test for a pre-defined hash to integer conversion */
5253
5462
  #ifndef FIO_SET_HASH2UINTPTR
5254
- #define FIO_SET_HASH2UINTPTR(hash) ((uintptr_t)(hash))
5463
+ #define FIO_SET_HASH2UINTPTR(hash, bits_used) \
5464
+ (fio_rrot(hash, bits_used) ^ fio_ct_if2(bits_used, hash, 0))
5465
+ #endif
5466
+
5467
+ /** test for a pre-defined hash to integer conversion */
5468
+ #ifndef FIO_SET_HASH_FORCE
5469
+ #define FIO_SET_HASH_FORCE (~(uintptr_t)0)
5255
5470
  #endif
5256
5471
 
5257
5472
  /** test for a pre-defined invalid hash value (all bytes are 0) */
@@ -5278,11 +5493,16 @@ Done
5278
5493
  #define FIO_SET_FREE(ptr, size) FIO_FREE((ptr))
5279
5494
  #endif
5280
5495
 
5281
- /* The maximum number of bins to rotate when partial collisions occure */
5496
+ /* The maximum number of bins to rotate when (partial/full) collisions occure */
5282
5497
  #ifndef FIO_SET_MAX_MAP_SEEK
5283
5498
  #define FIO_SET_MAX_MAP_SEEK (96)
5284
5499
  #endif
5285
5500
 
5501
+ /* The maximum number of full hash collisions that can be consumed */
5502
+ #ifndef FIO_SET_MAX_MAP_FULL_COLLISIONS
5503
+ #define FIO_SET_MAX_MAP_FULL_COLLISIONS (96)
5504
+ #endif
5505
+
5286
5506
  /* Prime numbers are better */
5287
5507
  #ifndef FIO_SET_CUCKOO_STEPS
5288
5508
  #define FIO_SET_CUCKOO_STEPS 11
@@ -5524,10 +5744,11 @@ struct FIO_NAME(s) {
5524
5744
  uintptr_t count;
5525
5745
  uintptr_t capa;
5526
5746
  uintptr_t pos;
5527
- uintptr_t mask;
5528
5747
  FIO_NAME(_ordered_s_) * ordered;
5529
5748
  FIO_NAME(_map_s_) * map;
5530
5749
  uint8_t has_collisions;
5750
+ uint8_t used_bits;
5751
+ uint8_t under_attack;
5531
5752
  };
5532
5753
 
5533
5754
  #undef FIO_SET_FOR_LOOP
@@ -5541,42 +5762,65 @@ Set / Hash Map Internal Helpers
5541
5762
 
5542
5763
  /** Locates an object's map position in the Set, if it exists. */
5543
5764
  FIO_FUNC inline FIO_NAME(_map_s_) *
5544
- FIO_NAME(_find_map_pos_)(FIO_NAME(s) * set,
5545
- const FIO_SET_HASH_TYPE hash_value,
5765
+ FIO_NAME(_find_map_pos_)(FIO_NAME(s) * set, FIO_SET_HASH_TYPE hash_value,
5546
5766
  FIO_SET_TYPE obj) {
5767
+ if (FIO_SET_HASH_COMPARE(hash_value, FIO_SET_HASH_INVALID))
5768
+ hash_value = FIO_SET_HASH_FORCE;
5547
5769
  if (set->map) {
5548
5770
  /* make sure collisions don't effect seeking */
5549
5771
  if (set->has_collisions && set->pos != set->count) {
5550
5772
  FIO_NAME(rehash)(set);
5551
5773
  }
5774
+ size_t full_collisions_counter = 0;
5775
+ FIO_NAME(_map_s_) * pos;
5776
+ /*
5777
+ * Commonly, the hash is rotated, depending on it's state.
5778
+ * Different bits are used for each mapping, instead of a single new bit.
5779
+ */
5780
+ const uintptr_t mask = (1ULL << set->used_bits) - 1;
5781
+
5782
+ uintptr_t i;
5783
+ const uintptr_t hash_value_i = FIO_SET_HASH2UINTPTR(hash_value, 0);
5784
+ uintptr_t hash_alt = FIO_SET_HASH2UINTPTR(hash_value, set->used_bits);
5552
5785
 
5553
5786
  /* O(1) access to object */
5554
- FIO_NAME(_map_s_) *pos =
5555
- set->map + (FIO_SET_HASH2UINTPTR(hash_value) & set->mask);
5787
+ pos = set->map + (hash_alt & mask);
5556
5788
  if (FIO_SET_HASH_COMPARE(FIO_SET_HASH_INVALID, pos->hash))
5557
5789
  return pos;
5558
- if (FIO_SET_HASH_COMPARE(pos->hash, hash_value)) {
5790
+ if (FIO_SET_HASH_COMPARE(pos->hash, hash_value_i)) {
5559
5791
  if (!pos->pos || FIO_SET_COMPARE(pos->pos->obj, obj))
5560
5792
  return pos;
5793
+ /* full hash value collision detected */
5561
5794
  set->has_collisions = 1;
5795
+ ++full_collisions_counter;
5562
5796
  }
5563
5797
 
5564
5798
  /* Handle partial / full collisions with cuckoo steps O(x) access time */
5565
- uintptr_t i = FIO_SET_CUCKOO_STEPS;
5799
+ i = 0;
5566
5800
  const uintptr_t limit =
5567
5801
  FIO_SET_CUCKOO_STEPS * (set->capa > (FIO_SET_MAX_MAP_SEEK << 2)
5568
5802
  ? FIO_SET_MAX_MAP_SEEK
5569
5803
  : (set->capa >> 2));
5570
5804
  while (i < limit) {
5571
- pos = set->map + ((FIO_SET_HASH2UINTPTR(hash_value) + i) & set->mask);
5805
+ i += FIO_SET_CUCKOO_STEPS;
5806
+ pos = set->map + ((hash_alt + i) & mask);
5572
5807
  if (FIO_SET_HASH_COMPARE(FIO_SET_HASH_INVALID, pos->hash))
5573
5808
  return pos;
5574
- if (FIO_SET_HASH_COMPARE(pos->hash, hash_value)) {
5809
+ if (FIO_SET_HASH_COMPARE(pos->hash, hash_value_i)) {
5575
5810
  if (!pos->pos || FIO_SET_COMPARE(pos->pos->obj, obj))
5576
5811
  return pos;
5812
+ /* full hash value collision detected */
5577
5813
  set->has_collisions = 1;
5814
+ if (++full_collisions_counter >= FIO_SET_MAX_MAP_FULL_COLLISIONS) {
5815
+ /* is the hash under attack? */
5816
+ FIO_LOG_WARNING(
5817
+ "(fio hash map) too many full collisions - under attack?");
5818
+ set->under_attack = 1;
5819
+ }
5820
+ if (set->under_attack) {
5821
+ return pos;
5822
+ }
5578
5823
  }
5579
- i += FIO_SET_CUCKOO_STEPS;
5580
5824
  }
5581
5825
  }
5582
5826
  return NULL;
@@ -5605,18 +5849,17 @@ FIO_FUNC inline void FIO_NAME(_compact_ordered_array_)(FIO_NAME(s) * set) {
5605
5849
 
5606
5850
  /** (Re)allocates the set's internal, invalidatint the mapping (must rehash) */
5607
5851
  FIO_FUNC inline void FIO_NAME(_reallocate_set_mem_)(FIO_NAME(s) * set) {
5852
+ const uintptr_t new_capa = 1ULL << set->used_bits;
5608
5853
  FIO_SET_FREE(set->map, set->capa * sizeof(*set->map));
5609
- set->map =
5610
- (FIO_NAME(_map_s_) *)FIO_SET_CALLOC(sizeof(*set->map), (set->mask + 1));
5854
+ set->map = (FIO_NAME(_map_s_) *)FIO_SET_CALLOC(sizeof(*set->map), new_capa);
5611
5855
  set->ordered = (FIO_NAME(_ordered_s_) *)FIO_SET_REALLOC(
5612
5856
  set->ordered, (set->capa * sizeof(*set->ordered)),
5613
- ((set->mask + 1) * sizeof(*set->ordered)),
5614
- (set->pos * sizeof(*set->ordered)));
5857
+ (new_capa * sizeof(*set->ordered)), (set->pos * sizeof(*set->ordered)));
5615
5858
  if (!set->map || !set->ordered) {
5616
5859
  perror("FATAL ERROR: couldn't allocate memory for Set data");
5617
5860
  exit(errno);
5618
5861
  }
5619
- set->capa = set->mask + 1;
5862
+ set->capa = new_capa;
5620
5863
  }
5621
5864
 
5622
5865
  /**
@@ -5626,21 +5869,19 @@ FIO_FUNC inline void FIO_NAME(_reallocate_set_mem_)(FIO_NAME(s) * set) {
5626
5869
  * If the object already exists in the set, it will be destroyed and
5627
5870
  * overwritten.
5628
5871
  */
5629
- FIO_FUNC inline FIO_SET_TYPE FIO_NAME(_insert_or_overwrite_)(
5630
- FIO_NAME(s) * set, const FIO_SET_HASH_TYPE hash_value, FIO_SET_TYPE obj,
5631
- int overwrite, FIO_SET_OBJ_TYPE *old) {
5632
- if (FIO_SET_HASH_COMPARE(hash_value, FIO_SET_HASH_INVALID)) {
5633
- FIO_SET_TYPE empty;
5634
- memset(&empty, 0, sizeof(empty));
5635
- return empty;
5636
- }
5872
+ FIO_FUNC inline FIO_SET_TYPE
5873
+ FIO_NAME(_insert_or_overwrite_)(FIO_NAME(s) * set, FIO_SET_HASH_TYPE hash_value,
5874
+ FIO_SET_TYPE obj, int overwrite,
5875
+ FIO_SET_OBJ_TYPE *old) {
5876
+ if (FIO_SET_HASH_COMPARE(hash_value, FIO_SET_HASH_INVALID))
5877
+ hash_value = FIO_SET_HASH_FORCE;
5637
5878
 
5638
5879
  /* automatic fragmentation protection */
5639
5880
  if (FIO_NAME(is_fragmented)(set))
5640
5881
  FIO_NAME(rehash)(set);
5641
5882
  /* automatic capacity validation (we can never be at 100% capacity) */
5642
5883
  else if (set->pos >= set->capa) {
5643
- set->mask = (set->mask << 1) | 3;
5884
+ ++set->used_bits;
5644
5885
  FIO_NAME(rehash)(set);
5645
5886
  }
5646
5887
 
@@ -5715,6 +5956,8 @@ FIO_FUNC void FIO_NAME_FREE()(FIO_NAME(s) * s) {
5715
5956
 
5716
5957
  #ifdef FIO_SET_KEY_TYPE
5717
5958
 
5959
+ /* Hash Map unique implementation */
5960
+
5718
5961
  /**
5719
5962
  * Locates an object in the Set, if it exists.
5720
5963
  *
@@ -5766,8 +6009,6 @@ FIO_FUNC inline int FIO_NAME(remove)(FIO_NAME(s) * set,
5766
6009
  const FIO_SET_HASH_TYPE hash_value,
5767
6010
  FIO_SET_KEY_TYPE key,
5768
6011
  FIO_SET_OBJ_TYPE *old) {
5769
- if (FIO_SET_HASH_COMPARE(hash_value, FIO_SET_HASH_INVALID))
5770
- return -1;
5771
6012
  FIO_NAME(_map_s_) *pos =
5772
6013
  FIO_NAME(_find_map_pos_)(set, hash_value, (FIO_SET_TYPE){.key = key});
5773
6014
  if (!pos || !pos->pos)
@@ -5789,7 +6030,9 @@ FIO_FUNC inline int FIO_NAME(remove)(FIO_NAME(s) * set,
5789
6030
  return 0;
5790
6031
  }
5791
6032
 
5792
- #else
6033
+ #else /* FIO_SET_KEY_TYPE */
6034
+
6035
+ /* Set unique implementation */
5793
6036
 
5794
6037
  /** Locates an object in the Set, if it exists. */
5795
6038
  FIO_FUNC FIO_SET_OBJ_TYPE FIO_NAME(find)(FIO_NAME(s) * set,
@@ -5915,9 +6158,9 @@ FIO_FUNC inline size_t FIO_NAME(capa_require)(FIO_NAME(s) * set,
5915
6158
  size_t min_capa) {
5916
6159
  if (min_capa <= FIO_NAME(capa)(set))
5917
6160
  return FIO_NAME(capa)(set);
5918
- set->mask = 1;
5919
- while (min_capa > set->mask) {
5920
- set->mask = (set->mask << 1) | 3;
6161
+ set->used_bits = 2;
6162
+ while (min_capa > (1ULL << set->used_bits)) {
6163
+ ++set->used_bits;
5921
6164
  }
5922
6165
  FIO_NAME(rehash)(set);
5923
6166
  return FIO_NAME(capa)(set);
@@ -5938,9 +6181,9 @@ FIO_FUNC inline size_t FIO_NAME(is_fragmented)(const FIO_NAME(s) * set) {
5938
6181
  */
5939
6182
  FIO_FUNC inline size_t FIO_NAME(compact)(FIO_NAME(s) * set) {
5940
6183
  FIO_NAME(_compact_ordered_array_)(set);
5941
- set->mask = 3;
5942
- while (set->count >= set->mask) {
5943
- set->mask = (set->mask << 1) | 1;
6184
+ set->used_bits = 2;
6185
+ while (set->count >= (1ULL << set->used_bits)) {
6186
+ ++set->used_bits;
5944
6187
  }
5945
6188
  FIO_NAME(rehash)(set);
5946
6189
  return FIO_NAME(capa)(set);
@@ -5950,7 +6193,18 @@ FIO_FUNC inline size_t FIO_NAME(compact)(FIO_NAME(s) * set) {
5950
6193
  FIO_FUNC void FIO_NAME(rehash)(FIO_NAME(s) * set) {
5951
6194
  FIO_NAME(_compact_ordered_array_)(set);
5952
6195
  set->has_collisions = 0;
6196
+ uint8_t attempts = 0;
5953
6197
  restart:
6198
+ if (set->used_bits >= 16 && ++attempts >= 3 && set->has_collisions) {
6199
+ FIO_LOG_FATAL(
6200
+ "facil.io Set / Hash Map has too many collisions (%zu/%zu)."
6201
+ "\n\t\tthis is a fatal implementation error,"
6202
+ "please report this issue at facio.io's open source project"
6203
+ "\n\t\tNote: hash maps and sets should never reach this point."
6204
+ "\n\t\tThey should be guarded against collision attacks.",
6205
+ set->pos, set->capa);
6206
+ exit(-1);
6207
+ }
5954
6208
  FIO_NAME(_reallocate_set_mem_)(set);
5955
6209
  {
5956
6210
  FIO_NAME(_ordered_s_) const *const end = set->ordered + set->pos;
@@ -5958,7 +6212,7 @@ restart:
5958
6212
  FIO_NAME(_map_s_) *mp =
5959
6213
  FIO_NAME(_find_map_pos_)(set, pos->hash, pos->obj);
5960
6214
  if (!mp) {
5961
- set->mask = (set->mask << 1) | 3;
6215
+ ++set->used_bits;
5962
6216
  goto restart;
5963
6217
  }
5964
6218
  mp->pos = pos;
@@ -5984,6 +6238,7 @@ restart:
5984
6238
  #undef FIO_SET_COPY
5985
6239
  #undef FIO_SET_DESTROY
5986
6240
  #undef FIO_SET_MAX_MAP_SEEK
6241
+ #undef FIO_SET_MAX_MAP_FULL_COLLISIONS
5987
6242
  #undef FIO_SET_REALLOC
5988
6243
  #undef FIO_SET_CALLOC
5989
6244
  #undef FIO_SET_FREE