rgss 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +6 -0
  3. data/.gitignore +167 -0
  4. data/.yardopts +6 -0
  5. data/CHANGELOG.md +4 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/Rakefile +9 -0
  9. data/ext/rgss/cglm-v0.7.9.tar.gz +0 -0
  10. data/ext/rgss/color.c +599 -0
  11. data/ext/rgss/entity.c +373 -0
  12. data/ext/rgss/extconf.rb +53 -0
  13. data/ext/rgss/font.c +135 -0
  14. data/ext/rgss/game.c +469 -0
  15. data/ext/rgss/game.h +99 -0
  16. data/ext/rgss/gl.c +3217 -0
  17. data/ext/rgss/glad.c +1140 -0
  18. data/ext/rgss/glad.h +2129 -0
  19. data/ext/rgss/glfw.c +1453 -0
  20. data/ext/rgss/graphics.c +324 -0
  21. data/ext/rgss/image.c +274 -0
  22. data/ext/rgss/input.c +745 -0
  23. data/ext/rgss/khrplatform.h +290 -0
  24. data/ext/rgss/mat4.c +279 -0
  25. data/ext/rgss/pax_global_header +1 -0
  26. data/ext/rgss/point.c +253 -0
  27. data/ext/rgss/rect.c +449 -0
  28. data/ext/rgss/rgss.c +56 -0
  29. data/ext/rgss/rgss.h +241 -0
  30. data/ext/rgss/stb_image.h +7762 -0
  31. data/ext/rgss/stb_image_write.h +1690 -0
  32. data/ext/rgss/stb_rect_pack.h +628 -0
  33. data/ext/rgss/stb_truetype.h +5011 -0
  34. data/ext/rgss/utf8.h +1652 -0
  35. data/ext/rgss/uthash.h +1133 -0
  36. data/ext/rgss/vec.c +114 -0
  37. data/ext/rgss/vec.h +192 -0
  38. data/ext/rgss/vec2.c +489 -0
  39. data/ext/rgss/vec3.c +751 -0
  40. data/ext/rgss/vec4.c +681 -0
  41. data/lib/rgss.rb +140 -0
  42. data/lib/rgss/batch.rb +57 -0
  43. data/lib/rgss/blend.rb +47 -0
  44. data/lib/rgss/game_object.rb +28 -0
  45. data/lib/rgss/plane.rb +95 -0
  46. data/lib/rgss/renderable.rb +158 -0
  47. data/lib/rgss/rgss.so +0 -0
  48. data/lib/rgss/shader.rb +94 -0
  49. data/lib/rgss/shaders/sprite-frag.glsl +40 -0
  50. data/lib/rgss/shaders/sprite-vert.glsl +17 -0
  51. data/lib/rgss/sprite.rb +139 -0
  52. data/lib/rgss/stubs/color.rb +318 -0
  53. data/lib/rgss/stubs/gl.rb +1999 -0
  54. data/lib/rgss/stubs/glfw.rb +626 -0
  55. data/lib/rgss/stubs/rect.rb +324 -0
  56. data/lib/rgss/stubs/rpg.rb +267 -0
  57. data/lib/rgss/stubs/tone.rb +65 -0
  58. data/lib/rgss/texture.rb +132 -0
  59. data/lib/rgss/tilemap.rb +116 -0
  60. data/lib/rgss/version.rb +3 -0
  61. data/lib/rgss/viewport.rb +67 -0
  62. data/rgss.gemspec +44 -0
  63. data/test.png +0 -0
  64. metadata +178 -0
@@ -0,0 +1,1652 @@
1
+ // The latest version of this library is available on GitHub;
2
+ // https://github.com/sheredom/utf8.h
3
+
4
+ // This is free and unencumbered software released into the public domain.
5
+ //
6
+ // Anyone is free to copy, modify, publish, use, compile, sell, or
7
+ // distribute this software, either in source code form or as a compiled
8
+ // binary, for any purpose, commercial or non-commercial, and by any
9
+ // means.
10
+ //
11
+ // In jurisdictions that recognize copyright laws, the author or authors
12
+ // of this software dedicate any and all copyright interest in the
13
+ // software to the public domain. We make this dedication for the benefit
14
+ // of the public at large and to the detriment of our heirs and
15
+ // successors. We intend this dedication to be an overt act of
16
+ // relinquishment in perpetuity of all present and future rights to this
17
+ // software under copyright law.
18
+ //
19
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23
+ // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24
+ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ // OTHER DEALINGS IN THE SOFTWARE.
26
+ //
27
+ // For more information, please refer to <http://unlicense.org/>
28
+
29
+ #ifndef SHEREDOM_UTF8_H_INCLUDED
30
+ #define SHEREDOM_UTF8_H_INCLUDED
31
+
32
+ #if defined(_MSC_VER)
33
+ #pragma warning(push)
34
+
35
+ // disable 'bytes padding added after construct' warning
36
+ #pragma warning(disable : 4820)
37
+ #endif
38
+
39
+ #include <stddef.h>
40
+ #include <stdlib.h>
41
+
42
+ #if defined(_MSC_VER)
43
+ #pragma warning(pop)
44
+ #endif
45
+
46
+ #if defined(_MSC_VER)
47
+ typedef __int32 utf8_int32_t;
48
+ #else
49
+ #include <stdint.h>
50
+ typedef int32_t utf8_int32_t;
51
+ #endif
52
+
53
+ #if defined(__clang__)
54
+ #pragma clang diagnostic push
55
+ #pragma clang diagnostic ignored "-Wold-style-cast"
56
+ #pragma clang diagnostic ignored "-Wcast-qual"
57
+ #endif
58
+
59
+ #ifdef __cplusplus
60
+ extern "C"
61
+ {
62
+ #endif
63
+
64
+ #if defined(__clang__) || defined(__GNUC__)
65
+ #define utf8_nonnull __attribute__((nonnull))
66
+ #define utf8_pure __attribute__((pure))
67
+ #define utf8_restrict __restrict__
68
+ #define utf8_weak __attribute__((weak))
69
+ #elif defined(_MSC_VER)
70
+ #define utf8_nonnull
71
+ #define utf8_pure
72
+ #define utf8_restrict __restrict
73
+ #define utf8_weak __inline
74
+ #else
75
+ #error Non clang, non gcc, non MSVC compiler found!
76
+ #endif
77
+
78
+ #ifdef __cplusplus
79
+ #define utf8_null NULL
80
+ #else
81
+ #define utf8_null 0
82
+ #endif
83
+
84
+ // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
85
+ // src2 respectively, case insensitive.
86
+ utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1, const void *src2);
87
+
88
+ // Append the utf8 string src onto the utf8 string dst.
89
+ utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src);
90
+
91
+ // Find the first match of the utf8 codepoint chr in the utf8 string src.
92
+ utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src, utf8_int32_t chr);
93
+
94
+ // Return less than 0, 0, greater than 0 if src1 < src2,
95
+ // src1 == src2, src1 > src2 respectively.
96
+ utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1, const void *src2);
97
+
98
+ // Copy the utf8 string src onto the memory allocated in dst.
99
+ utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src);
100
+
101
+ // Number of utf8 codepoints in the utf8 string src that consists entirely
102
+ // of utf8 codepoints not from the utf8 string reject.
103
+ utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src, const void *reject);
104
+
105
+ // Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
106
+ // copying over the data, and returning that. Or 0 if malloc failed.
107
+ utf8_nonnull utf8_weak void *utf8dup(const void *src);
108
+
109
+ // Number of utf8 codepoints in the utf8 string str,
110
+ // excluding the null terminating byte.
111
+ utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
112
+
113
+ // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
114
+ // src2 respectively, case insensitive. Checking at most n bytes of each utf8
115
+ // string.
116
+ utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1, const void *src2, size_t n);
117
+
118
+ // Append the utf8 string src onto the utf8 string dst,
119
+ // writing at most n+1 bytes. Can produce an invalid utf8
120
+ // string if n falls partway through a utf8 codepoint.
121
+ utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
122
+
123
+ // Return less than 0, 0, greater than 0 if src1 < src2,
124
+ // src1 == src2, src1 > src2 respectively. Checking at most n
125
+ // bytes of each utf8 string.
126
+ utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1, const void *src2, size_t n);
127
+
128
+ // Copy the utf8 string src onto the memory allocated in dst.
129
+ // Copies at most n bytes. If there is no terminating null byte in
130
+ // the first n bytes of src, the string placed into dst will not be
131
+ // null-terminated. If the size (in bytes) of src is less than n,
132
+ // extra null terminating bytes are appended to dst such that at
133
+ // total of n bytes are written. Can produce an invalid utf8
134
+ // string if n falls partway through a utf8 codepoint.
135
+ utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
136
+
137
+ // Similar to utf8dup, except that at most n bytes of src are copied. If src is
138
+ // longer than n, only n bytes are copied and a null byte is added.
139
+ //
140
+ // Returns a new string if successful, 0 otherwise
141
+ utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
142
+
143
+ // Locates the first occurrence in the utf8 string str of any byte in the
144
+ // utf8 string accept, or 0 if no match was found.
145
+ utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str, const void *accept);
146
+
147
+ // Find the last match of the utf8 codepoint chr in the utf8 string src.
148
+ utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
149
+
150
+ // Number of bytes in the utf8 string str,
151
+ // including the null terminating byte.
152
+ utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
153
+
154
+ // Number of utf8 codepoints in the utf8 string src that consists entirely
155
+ // of utf8 codepoints from the utf8 string accept.
156
+ utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src, const void *accept);
157
+
158
+ // The position of the utf8 string needle in the utf8 string haystack.
159
+ utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack, const void *needle);
160
+
161
+ // The position of the utf8 string needle in the utf8 string haystack, case
162
+ // insensitive.
163
+ utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack, const void *needle);
164
+
165
+ // Return 0 on success, or the position of the invalid
166
+ // utf8 codepoint on failure.
167
+ utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
168
+
169
+ // Sets out_codepoint to the next utf8 codepoint in str, and returns the address
170
+ // of the utf8 codepoint after the current one in str.
171
+ utf8_nonnull utf8_weak void *utf8codepoint(const void *utf8_restrict str,
172
+ utf8_int32_t *utf8_restrict out_codepoint);
173
+
174
+ // Calculates the size of the next utf8 codepoint in str.
175
+ utf8_nonnull utf8_weak size_t utf8codepointcalcsize(const void *utf8_restrict str);
176
+
177
+ // Returns the size of the given codepoint in bytes.
178
+ utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
179
+
180
+ // Write a codepoint to the given string, and return the address to the next
181
+ // place after the written codepoint. Pass how many bytes left in the buffer to
182
+ // n. If there is not enough space for the codepoint, this function returns
183
+ // null.
184
+ utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n);
185
+
186
+ // Returns 1 if the given character is lowercase, or 0 if it is not.
187
+ utf8_weak int utf8islower(utf8_int32_t chr);
188
+
189
+ // Returns 1 if the given character is uppercase, or 0 if it is not.
190
+ utf8_weak int utf8isupper(utf8_int32_t chr);
191
+
192
+ // Transform the given string into all lowercase codepoints.
193
+ utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
194
+
195
+ // Transform the given string into all uppercase codepoints.
196
+ utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
197
+
198
+ // Make a codepoint lower case if possible.
199
+ utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
200
+
201
+ // Make a codepoint upper case if possible.
202
+ utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
203
+
204
+ #undef utf8_weak
205
+ #undef utf8_pure
206
+ #undef utf8_nonnull
207
+
208
+ int utf8casecmp(const void *src1, const void *src2)
209
+ {
210
+ utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
211
+
212
+ for (;;)
213
+ {
214
+ src1 = utf8codepoint(src1, &src1_cp);
215
+ src2 = utf8codepoint(src2, &src2_cp);
216
+
217
+ // take a copy of src1 & src2
218
+ src1_orig_cp = src1_cp;
219
+ src2_orig_cp = src2_cp;
220
+
221
+ // lower the srcs if required
222
+ src1_cp = utf8lwrcodepoint(src1_cp);
223
+ src2_cp = utf8lwrcodepoint(src2_cp);
224
+
225
+ // check if the lowered codepoints match
226
+ if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
227
+ {
228
+ return 0;
229
+ }
230
+ else if (src1_cp == src2_cp)
231
+ {
232
+ continue;
233
+ }
234
+
235
+ // if they don't match, then we return the difference between the characters
236
+ return src1_cp - src2_cp;
237
+ }
238
+ }
239
+
240
+ void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src)
241
+ {
242
+ char *d = (char *)dst;
243
+ const char *s = (const char *)src;
244
+
245
+ // find the null terminating byte in dst
246
+ while ('\0' != *d)
247
+ {
248
+ d++;
249
+ }
250
+
251
+ // overwriting the null terminating byte in dst, append src byte-by-byte
252
+ while ('\0' != *s)
253
+ {
254
+ *d++ = *s++;
255
+ }
256
+
257
+ // write out a new null terminating byte into dst
258
+ *d = '\0';
259
+
260
+ return dst;
261
+ }
262
+
263
+ void *utf8chr(const void *src, utf8_int32_t chr)
264
+ {
265
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
266
+
267
+ if (0 == chr)
268
+ {
269
+ // being asked to return position of null terminating byte, so
270
+ // just run s to the end, and return!
271
+ const char *s = (const char *)src;
272
+ while ('\0' != *s)
273
+ {
274
+ s++;
275
+ }
276
+ return (void *)s;
277
+ }
278
+ else if (0 == ((utf8_int32_t)0xffffff80 & chr))
279
+ {
280
+ // 1-byte/7-bit ascii
281
+ // (0b0xxxxxxx)
282
+ c[0] = (char)chr;
283
+ }
284
+ else if (0 == ((utf8_int32_t)0xfffff800 & chr))
285
+ {
286
+ // 2-byte/11-bit utf8 code point
287
+ // (0b110xxxxx 0b10xxxxxx)
288
+ c[0] = 0xc0 | (char)(chr >> 6);
289
+ c[1] = 0x80 | (char)(chr & 0x3f);
290
+ }
291
+ else if (0 == ((utf8_int32_t)0xffff0000 & chr))
292
+ {
293
+ // 3-byte/16-bit utf8 code point
294
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
295
+ c[0] = 0xe0 | (char)(chr >> 12);
296
+ c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
297
+ c[2] = 0x80 | (char)(chr & 0x3f);
298
+ }
299
+ else
300
+ { // if (0 == ((int)0xffe00000 & chr)) {
301
+ // 4-byte/21-bit utf8 code point
302
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
303
+ c[0] = 0xf0 | (char)(chr >> 18);
304
+ c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
305
+ c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
306
+ c[3] = 0x80 | (char)(chr & 0x3f);
307
+ }
308
+
309
+ // we've made c into a 2 utf8 codepoint string, one for the chr we are
310
+ // seeking, another for the null terminating byte. Now use utf8str to
311
+ // search
312
+ return utf8str(src, c);
313
+ }
314
+
315
+ int utf8cmp(const void *src1, const void *src2)
316
+ {
317
+ const unsigned char *s1 = (const unsigned char *)src1;
318
+ const unsigned char *s2 = (const unsigned char *)src2;
319
+
320
+ while (('\0' != *s1) || ('\0' != *s2))
321
+ {
322
+ if (*s1 < *s2)
323
+ {
324
+ return -1;
325
+ }
326
+ else if (*s1 > *s2)
327
+ {
328
+ return 1;
329
+ }
330
+
331
+ s1++;
332
+ s2++;
333
+ }
334
+
335
+ // both utf8 strings matched
336
+ return 0;
337
+ }
338
+
339
+ int utf8coll(const void *src1, const void *src2);
340
+
341
+ void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src)
342
+ {
343
+ char *d = (char *)dst;
344
+ const char *s = (const char *)src;
345
+
346
+ // overwriting anything previously in dst, write byte-by-byte
347
+ // from src
348
+ while ('\0' != *s)
349
+ {
350
+ *d++ = *s++;
351
+ }
352
+
353
+ // append null terminating byte
354
+ *d = '\0';
355
+
356
+ return dst;
357
+ }
358
+
359
+ size_t utf8cspn(const void *src, const void *reject)
360
+ {
361
+ const char *s = (const char *)src;
362
+ size_t chars = 0;
363
+
364
+ while ('\0' != *s)
365
+ {
366
+ const char *r = (const char *)reject;
367
+ size_t offset = 0;
368
+
369
+ while ('\0' != *r)
370
+ {
371
+ // checking that if *r is the start of a utf8 codepoint
372
+ // (it is not 0b10xxxxxx) and we have successfully matched
373
+ // a previous character (0 < offset) - we found a match
374
+ if ((0x80 != (0xc0 & *r)) && (0 < offset))
375
+ {
376
+ return chars;
377
+ }
378
+ else
379
+ {
380
+ if (*r == s[offset])
381
+ {
382
+ // part of a utf8 codepoint matched, so move our checking
383
+ // onwards to the next byte
384
+ offset++;
385
+ r++;
386
+ }
387
+ else
388
+ {
389
+ // r could be in the middle of an unmatching utf8 code point,
390
+ // so we need to march it on to the next character beginning,
391
+
392
+ do
393
+ {
394
+ r++;
395
+ } while (0x80 == (0xc0 & *r));
396
+
397
+ // reset offset too as we found a mismatch
398
+ offset = 0;
399
+ }
400
+ }
401
+ }
402
+
403
+ // found a match at the end of *r, so didn't get a chance to test it
404
+ if (0 < offset)
405
+ {
406
+ return chars;
407
+ }
408
+
409
+ // the current utf8 codepoint in src did not match reject, but src
410
+ // could have been partway through a utf8 codepoint, so we need to
411
+ // march it onto the next utf8 codepoint starting byte
412
+ do
413
+ {
414
+ s++;
415
+ } while ((0x80 == (0xc0 & *s)));
416
+ chars++;
417
+ }
418
+
419
+ return chars;
420
+ }
421
+
422
+ void *utf8dup(const void *src)
423
+ {
424
+ const char *s = (const char *)src;
425
+ char *n = utf8_null;
426
+
427
+ // figure out how many bytes (including the terminator) we need to copy first
428
+ size_t bytes = utf8size(src);
429
+
430
+ n = (char *)malloc(bytes);
431
+
432
+ if (utf8_null == n)
433
+ {
434
+ // out of memory so we bail
435
+ return utf8_null;
436
+ }
437
+ else
438
+ {
439
+ bytes = 0;
440
+
441
+ // copy src byte-by-byte into our new utf8 string
442
+ while ('\0' != s[bytes])
443
+ {
444
+ n[bytes] = s[bytes];
445
+ bytes++;
446
+ }
447
+
448
+ // append null terminating byte
449
+ n[bytes] = '\0';
450
+ return n;
451
+ }
452
+ }
453
+
454
+ void *utf8fry(const void *str);
455
+
456
+ size_t utf8len(const void *str)
457
+ {
458
+ const unsigned char *s = (const unsigned char *)str;
459
+ size_t length = 0;
460
+
461
+ while ('\0' != *s)
462
+ {
463
+ if (0xf0 == (0xf8 & *s))
464
+ {
465
+ // 4-byte utf8 code point (began with 0b11110xxx)
466
+ s += 4;
467
+ }
468
+ else if (0xe0 == (0xf0 & *s))
469
+ {
470
+ // 3-byte utf8 code point (began with 0b1110xxxx)
471
+ s += 3;
472
+ }
473
+ else if (0xc0 == (0xe0 & *s))
474
+ {
475
+ // 2-byte utf8 code point (began with 0b110xxxxx)
476
+ s += 2;
477
+ }
478
+ else
479
+ { // if (0x00 == (0x80 & *s)) {
480
+ // 1-byte ascii (began with 0b0xxxxxxx)
481
+ s += 1;
482
+ }
483
+
484
+ // no matter the bytes we marched s forward by, it was
485
+ // only 1 utf8 codepoint
486
+ length++;
487
+ }
488
+
489
+ return length;
490
+ }
491
+
492
+ int utf8ncasecmp(const void *src1, const void *src2, size_t n)
493
+ {
494
+ utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
495
+
496
+ do
497
+ {
498
+ const unsigned char *const s1 = (const unsigned char *)src1;
499
+ const unsigned char *const s2 = (const unsigned char *)src2;
500
+
501
+ // first check that we have enough bytes left in n to contain an entire
502
+ // codepoint
503
+ if (0 == n)
504
+ {
505
+ return 0;
506
+ }
507
+
508
+ if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2))))
509
+ {
510
+ const utf8_int32_t c1 = (0xe0 & *s1);
511
+ const utf8_int32_t c2 = (0xe0 & *s2);
512
+
513
+ if (c1 < c2)
514
+ {
515
+ return c1 - c2;
516
+ }
517
+ else
518
+ {
519
+ return 0;
520
+ }
521
+ }
522
+
523
+ if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2))))
524
+ {
525
+ const utf8_int32_t c1 = (0xf0 & *s1);
526
+ const utf8_int32_t c2 = (0xf0 & *s2);
527
+
528
+ if (c1 < c2)
529
+ {
530
+ return c1 - c2;
531
+ }
532
+ else
533
+ {
534
+ return 0;
535
+ }
536
+ }
537
+
538
+ if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2))))
539
+ {
540
+ const utf8_int32_t c1 = (0xf8 & *s1);
541
+ const utf8_int32_t c2 = (0xf8 & *s2);
542
+
543
+ if (c1 < c2)
544
+ {
545
+ return c1 - c2;
546
+ }
547
+ else
548
+ {
549
+ return 0;
550
+ }
551
+ }
552
+
553
+ src1 = utf8codepoint(src1, &src1_cp);
554
+ src2 = utf8codepoint(src2, &src2_cp);
555
+ n -= utf8codepointsize(src1_cp);
556
+
557
+ // Take a copy of src1 & src2
558
+ src1_orig_cp = src1_cp;
559
+ src2_orig_cp = src2_cp;
560
+
561
+ // Lower srcs if required
562
+ src1_cp = utf8lwrcodepoint(src1_cp);
563
+ src2_cp = utf8lwrcodepoint(src2_cp);
564
+
565
+ // Check if the lowered codepoints match
566
+ if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
567
+ {
568
+ return 0;
569
+ }
570
+ else if (src1_cp == src2_cp)
571
+ {
572
+ continue;
573
+ }
574
+
575
+ // if they don't match, then we return the difference between the characters
576
+ if (src1_orig_cp != src2_orig_cp)
577
+ {
578
+ return src1_cp - src2_cp;
579
+ }
580
+ } while (0 < n);
581
+
582
+ // both utf8 strings matched
583
+ return 0;
584
+ }
585
+
586
+ void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
587
+ {
588
+ char *d = (char *)dst;
589
+ const char *s = (const char *)src;
590
+
591
+ // find the null terminating byte in dst
592
+ while ('\0' != *d)
593
+ {
594
+ d++;
595
+ }
596
+
597
+ // overwriting the null terminating byte in dst, append src byte-by-byte
598
+ // stopping if we run out of space
599
+ do
600
+ {
601
+ *d++ = *s++;
602
+ } while (('\0' != *s) && (0 != --n));
603
+
604
+ // write out a new null terminating byte into dst
605
+ *d = '\0';
606
+
607
+ return dst;
608
+ }
609
+
610
+ int utf8ncmp(const void *src1, const void *src2, size_t n)
611
+ {
612
+ const unsigned char *s1 = (const unsigned char *)src1;
613
+ const unsigned char *s2 = (const unsigned char *)src2;
614
+
615
+ while ((0 != n--) && (('\0' != *s1) || ('\0' != *s2)))
616
+ {
617
+ if (*s1 < *s2)
618
+ {
619
+ return -1;
620
+ }
621
+ else if (*s1 > *s2)
622
+ {
623
+ return 1;
624
+ }
625
+
626
+ s1++;
627
+ s2++;
628
+ }
629
+
630
+ // both utf8 strings matched
631
+ return 0;
632
+ }
633
+
634
+ void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
635
+ {
636
+ char *d = (char *)dst;
637
+ const char *s = (const char *)src;
638
+ size_t index;
639
+
640
+ // overwriting anything previously in dst, write byte-by-byte
641
+ // from src
642
+ for (index = 0; index < n; index++)
643
+ {
644
+ d[index] = s[index];
645
+ if ('\0' == s[index])
646
+ {
647
+ break;
648
+ }
649
+ }
650
+
651
+ // append null terminating byte
652
+ for (; index < n; index++)
653
+ {
654
+ d[index] = 0;
655
+ }
656
+
657
+ return dst;
658
+ }
659
+
660
+ void *utf8ndup(const void *src, size_t n)
661
+ {
662
+ const char *s = (const char *)src;
663
+ char *c = utf8_null;
664
+ size_t bytes = 0;
665
+
666
+ // Find the end of the string or stop when n is reached
667
+ while ('\0' != s[bytes] && bytes < n)
668
+ {
669
+ bytes++;
670
+ }
671
+
672
+ // In case bytes is actually less than n, we need to set it
673
+ // to be used later in the copy byte by byte.
674
+ n = bytes;
675
+
676
+ c = (char *)malloc(bytes + 1);
677
+ if (utf8_null == c)
678
+ {
679
+ // out of memory so we bail
680
+ return utf8_null;
681
+ }
682
+
683
+ bytes = 0;
684
+
685
+ // copy src byte-by-byte into our new utf8 string
686
+ while ('\0' != s[bytes] && bytes < n)
687
+ {
688
+ c[bytes] = s[bytes];
689
+ bytes++;
690
+ }
691
+
692
+ // append null terminating byte
693
+ c[bytes] = '\0';
694
+ return c;
695
+ }
696
+
697
+ void *utf8rchr(const void *src, int chr)
698
+ {
699
+ const char *s = (const char *)src;
700
+ const char *match = utf8_null;
701
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
702
+
703
+ if (0 == chr)
704
+ {
705
+ // being asked to return position of null terminating byte, so
706
+ // just run s to the end, and return!
707
+ while ('\0' != *s)
708
+ {
709
+ s++;
710
+ }
711
+ return (void *)s;
712
+ }
713
+ else if (0 == ((int)0xffffff80 & chr))
714
+ {
715
+ // 1-byte/7-bit ascii
716
+ // (0b0xxxxxxx)
717
+ c[0] = (char)chr;
718
+ }
719
+ else if (0 == ((int)0xfffff800 & chr))
720
+ {
721
+ // 2-byte/11-bit utf8 code point
722
+ // (0b110xxxxx 0b10xxxxxx)
723
+ c[0] = 0xc0 | (char)(chr >> 6);
724
+ c[1] = 0x80 | (char)(chr & 0x3f);
725
+ }
726
+ else if (0 == ((int)0xffff0000 & chr))
727
+ {
728
+ // 3-byte/16-bit utf8 code point
729
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
730
+ c[0] = 0xe0 | (char)(chr >> 12);
731
+ c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
732
+ c[2] = 0x80 | (char)(chr & 0x3f);
733
+ }
734
+ else
735
+ { // if (0 == ((int)0xffe00000 & chr)) {
736
+ // 4-byte/21-bit utf8 code point
737
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
738
+ c[0] = 0xf0 | (char)(chr >> 18);
739
+ c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
740
+ c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
741
+ c[3] = 0x80 | (char)(chr & 0x3f);
742
+ }
743
+
744
+ // we've created a 2 utf8 codepoint string in c that is
745
+ // the utf8 character asked for by chr, and a null
746
+ // terminating byte
747
+
748
+ while ('\0' != *s)
749
+ {
750
+ size_t offset = 0;
751
+
752
+ while (s[offset] == c[offset])
753
+ {
754
+ offset++;
755
+ }
756
+
757
+ if ('\0' == c[offset])
758
+ {
759
+ // we found a matching utf8 code point
760
+ match = s;
761
+ s += offset;
762
+ }
763
+ else
764
+ {
765
+ s += offset;
766
+
767
+ // need to march s along to next utf8 codepoint start
768
+ // (the next byte that doesn't match 0b10xxxxxx)
769
+ if ('\0' != *s)
770
+ {
771
+ do
772
+ {
773
+ s++;
774
+ } while (0x80 == (0xc0 & *s));
775
+ }
776
+ }
777
+ }
778
+
779
+ // return the last match we found (or 0 if no match was found)
780
+ return (void *)match;
781
+ }
782
+
783
+ void *utf8pbrk(const void *str, const void *accept)
784
+ {
785
+ const char *s = (const char *)str;
786
+
787
+ while ('\0' != *s)
788
+ {
789
+ const char *a = (const char *)accept;
790
+ size_t offset = 0;
791
+
792
+ while ('\0' != *a)
793
+ {
794
+ // checking that if *a is the start of a utf8 codepoint
795
+ // (it is not 0b10xxxxxx) and we have successfully matched
796
+ // a previous character (0 < offset) - we found a match
797
+ if ((0x80 != (0xc0 & *a)) && (0 < offset))
798
+ {
799
+ return (void *)s;
800
+ }
801
+ else
802
+ {
803
+ if (*a == s[offset])
804
+ {
805
+ // part of a utf8 codepoint matched, so move our checking
806
+ // onwards to the next byte
807
+ offset++;
808
+ a++;
809
+ }
810
+ else
811
+ {
812
+ // r could be in the middle of an unmatching utf8 code point,
813
+ // so we need to march it on to the next character beginning,
814
+
815
+ do
816
+ {
817
+ a++;
818
+ } while (0x80 == (0xc0 & *a));
819
+
820
+ // reset offset too as we found a mismatch
821
+ offset = 0;
822
+ }
823
+ }
824
+ }
825
+
826
+ // we found a match on the last utf8 codepoint
827
+ if (0 < offset)
828
+ {
829
+ return (void *)s;
830
+ }
831
+
832
+ // the current utf8 codepoint in src did not match accept, but src
833
+ // could have been partway through a utf8 codepoint, so we need to
834
+ // march it onto the next utf8 codepoint starting byte
835
+ do
836
+ {
837
+ s++;
838
+ } while ((0x80 == (0xc0 & *s)));
839
+ }
840
+
841
+ return utf8_null;
842
+ }
843
+
844
+ size_t utf8size(const void *str)
845
+ {
846
+ const char *s = (const char *)str;
847
+ size_t size = 0;
848
+ while ('\0' != s[size])
849
+ {
850
+ size++;
851
+ }
852
+
853
+ // we are including the null terminating byte in the size calculation
854
+ size++;
855
+ return size;
856
+ }
857
+
858
+ size_t utf8spn(const void *src, const void *accept)
859
+ {
860
+ const char *s = (const char *)src;
861
+ size_t chars = 0;
862
+
863
+ while ('\0' != *s)
864
+ {
865
+ const char *a = (const char *)accept;
866
+ size_t offset = 0;
867
+
868
+ while ('\0' != *a)
869
+ {
870
+ // checking that if *r is the start of a utf8 codepoint
871
+ // (it is not 0b10xxxxxx) and we have successfully matched
872
+ // a previous character (0 < offset) - we found a match
873
+ if ((0x80 != (0xc0 & *a)) && (0 < offset))
874
+ {
875
+ // found a match, so increment the number of utf8 codepoints
876
+ // that have matched and stop checking whether any other utf8
877
+ // codepoints in a match
878
+ chars++;
879
+ s += offset;
880
+ offset = 0;
881
+ break;
882
+ }
883
+ else
884
+ {
885
+ if (*a == s[offset])
886
+ {
887
+ offset++;
888
+ a++;
889
+ }
890
+ else
891
+ {
892
+ // a could be in the middle of an unmatching utf8 codepoint,
893
+ // so we need to march it on to the next character beginning,
894
+ do
895
+ {
896
+ a++;
897
+ } while (0x80 == (0xc0 & *a));
898
+
899
+ // reset offset too as we found a mismatch
900
+ offset = 0;
901
+ }
902
+ }
903
+ }
904
+
905
+ // found a match at the end of *a, so didn't get a chance to test it
906
+ if (0 < offset)
907
+ {
908
+ chars++;
909
+ s += offset;
910
+ continue;
911
+ }
912
+
913
+ // if a got to its terminating null byte, then we didn't find a match.
914
+ // Return the current number of matched utf8 codepoints
915
+ if ('\0' == *a)
916
+ {
917
+ return chars;
918
+ }
919
+ }
920
+
921
+ return chars;
922
+ }
923
+
924
+ void *utf8str(const void *haystack, const void *needle)
925
+ {
926
+ const char *h = (const char *)haystack;
927
+ utf8_int32_t throwaway_codepoint;
928
+
929
+ // if needle has no utf8 codepoints before the null terminating
930
+ // byte then return haystack
931
+ if ('\0' == *((const char *)needle))
932
+ {
933
+ return (void *)haystack;
934
+ }
935
+
936
+ while ('\0' != *h)
937
+ {
938
+ const char *maybeMatch = h;
939
+ const char *n = (const char *)needle;
940
+
941
+ while (*h == *n && (*h != '\0' && *n != '\0'))
942
+ {
943
+ n++;
944
+ h++;
945
+ }
946
+
947
+ if ('\0' == *n)
948
+ {
949
+ // we found the whole utf8 string for needle in haystack at
950
+ // maybeMatch, so return it
951
+ return (void *)maybeMatch;
952
+ }
953
+ else
954
+ {
955
+ // h could be in the middle of an unmatching utf8 codepoint,
956
+ // so we need to march it on to the next character beginning
957
+ // starting from the current character
958
+ h = (const char *)utf8codepoint(maybeMatch, &throwaway_codepoint);
959
+ }
960
+ }
961
+
962
+ // no match
963
+ return utf8_null;
964
+ }
965
+
966
+ void *utf8casestr(const void *haystack, const void *needle)
967
+ {
968
+ const void *h = haystack;
969
+
970
+ // if needle has no utf8 codepoints before the null terminating
971
+ // byte then return haystack
972
+ if ('\0' == *((const char *)needle))
973
+ {
974
+ return (void *)haystack;
975
+ }
976
+
977
+ for (;;)
978
+ {
979
+ const void *maybeMatch = h;
980
+ const void *n = needle;
981
+ utf8_int32_t h_cp, n_cp;
982
+
983
+ // Get the next code point and track it
984
+ const void *nextH = h = utf8codepoint(h, &h_cp);
985
+ n = utf8codepoint(n, &n_cp);
986
+
987
+ while ((0 != h_cp) && (0 != n_cp))
988
+ {
989
+ h_cp = utf8lwrcodepoint(h_cp);
990
+ n_cp = utf8lwrcodepoint(n_cp);
991
+
992
+ // if we find a mismatch, bail out!
993
+ if (h_cp != n_cp)
994
+ {
995
+ break;
996
+ }
997
+
998
+ h = utf8codepoint(h, &h_cp);
999
+ n = utf8codepoint(n, &n_cp);
1000
+ }
1001
+
1002
+ if (0 == n_cp)
1003
+ {
1004
+ // we found the whole utf8 string for needle in haystack at
1005
+ // maybeMatch, so return it
1006
+ return (void *)maybeMatch;
1007
+ }
1008
+
1009
+ if (0 == h_cp)
1010
+ {
1011
+ // no match
1012
+ return utf8_null;
1013
+ }
1014
+
1015
+ // Roll back to the next code point in the haystack to test
1016
+ h = nextH;
1017
+ }
1018
+ }
1019
+
1020
+ void *utf8valid(const void *str)
1021
+ {
1022
+ const char *s = (const char *)str;
1023
+
1024
+ while ('\0' != *s)
1025
+ {
1026
+ if (0xf0 == (0xf8 & *s))
1027
+ {
1028
+ // ensure each of the 3 following bytes in this 4-byte
1029
+ // utf8 codepoint began with 0b10xxxxxx
1030
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || (0x80 != (0xc0 & s[3])))
1031
+ {
1032
+ return (void *)s;
1033
+ }
1034
+
1035
+ // ensure that our utf8 codepoint ended after 4 bytes
1036
+ if (0x80 == (0xc0 & s[4]))
1037
+ {
1038
+ return (void *)s;
1039
+ }
1040
+
1041
+ // ensure that the top 5 bits of this 4-byte utf8
1042
+ // codepoint were not 0, as then we could have used
1043
+ // one of the smaller encodings
1044
+ if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1])))
1045
+ {
1046
+ return (void *)s;
1047
+ }
1048
+
1049
+ // 4-byte utf8 code point (began with 0b11110xxx)
1050
+ s += 4;
1051
+ }
1052
+ else if (0xe0 == (0xf0 & *s))
1053
+ {
1054
+ // ensure each of the 2 following bytes in this 3-byte
1055
+ // utf8 codepoint began with 0b10xxxxxx
1056
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])))
1057
+ {
1058
+ return (void *)s;
1059
+ }
1060
+
1061
+ // ensure that our utf8 codepoint ended after 3 bytes
1062
+ if (0x80 == (0xc0 & s[3]))
1063
+ {
1064
+ return (void *)s;
1065
+ }
1066
+
1067
+ // ensure that the top 5 bits of this 3-byte utf8
1068
+ // codepoint were not 0, as then we could have used
1069
+ // one of the smaller encodings
1070
+ if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1])))
1071
+ {
1072
+ return (void *)s;
1073
+ }
1074
+
1075
+ // 3-byte utf8 code point (began with 0b1110xxxx)
1076
+ s += 3;
1077
+ }
1078
+ else if (0xc0 == (0xe0 & *s))
1079
+ {
1080
+ // ensure the 1 following byte in this 2-byte
1081
+ // utf8 codepoint began with 0b10xxxxxx
1082
+ if (0x80 != (0xc0 & s[1]))
1083
+ {
1084
+ return (void *)s;
1085
+ }
1086
+
1087
+ // ensure that our utf8 codepoint ended after 2 bytes
1088
+ if (0x80 == (0xc0 & s[2]))
1089
+ {
1090
+ return (void *)s;
1091
+ }
1092
+
1093
+ // ensure that the top 4 bits of this 2-byte utf8
1094
+ // codepoint were not 0, as then we could have used
1095
+ // one of the smaller encodings
1096
+ if (0 == (0x1e & s[0]))
1097
+ {
1098
+ return (void *)s;
1099
+ }
1100
+
1101
+ // 2-byte utf8 code point (began with 0b110xxxxx)
1102
+ s += 2;
1103
+ }
1104
+ else if (0x00 == (0x80 & *s))
1105
+ {
1106
+ // 1-byte ascii (began with 0b0xxxxxxx)
1107
+ s += 1;
1108
+ }
1109
+ else
1110
+ {
1111
+ // we have an invalid 0b1xxxxxxx utf8 code point entry
1112
+ return (void *)s;
1113
+ }
1114
+ }
1115
+
1116
+ return utf8_null;
1117
+ }
1118
+
1119
+ void *utf8codepoint(const void *utf8_restrict str, utf8_int32_t *utf8_restrict out_codepoint)
1120
+ {
1121
+ const char *s = (const char *)str;
1122
+
1123
+ if (0xf0 == (0xf8 & s[0]))
1124
+ {
1125
+ // 4 byte utf8 codepoint
1126
+ *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | ((0x3f & s[2]) << 6) | (0x3f & s[3]);
1127
+ s += 4;
1128
+ }
1129
+ else if (0xe0 == (0xf0 & s[0]))
1130
+ {
1131
+ // 3 byte utf8 codepoint
1132
+ *out_codepoint = ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
1133
+ s += 3;
1134
+ }
1135
+ else if (0xc0 == (0xe0 & s[0]))
1136
+ {
1137
+ // 2 byte utf8 codepoint
1138
+ *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
1139
+ s += 2;
1140
+ }
1141
+ else
1142
+ {
1143
+ // 1 byte utf8 codepoint otherwise
1144
+ *out_codepoint = s[0];
1145
+ s += 1;
1146
+ }
1147
+
1148
+ return (void *)s;
1149
+ }
1150
+
1151
+ size_t utf8codepointcalcsize(const void *utf8_restrict str)
1152
+ {
1153
+ const char *s = (const char *)str;
1154
+
1155
+ if (0xf0 == (0xf8 & s[0]))
1156
+ {
1157
+ // 4 byte utf8 codepoint
1158
+ return 4;
1159
+ }
1160
+ else if (0xe0 == (0xf0 & s[0]))
1161
+ {
1162
+ // 3 byte utf8 codepoint
1163
+ return 3;
1164
+ }
1165
+ else if (0xc0 == (0xe0 & s[0]))
1166
+ {
1167
+ // 2 byte utf8 codepoint
1168
+ return 2;
1169
+ }
1170
+
1171
+ // 1 byte utf8 codepoint otherwise
1172
+ return 1;
1173
+ }
1174
+
1175
+ size_t utf8codepointsize(utf8_int32_t chr)
1176
+ {
1177
+ if (0 == ((utf8_int32_t)0xffffff80 & chr))
1178
+ {
1179
+ return 1;
1180
+ }
1181
+ else if (0 == ((utf8_int32_t)0xfffff800 & chr))
1182
+ {
1183
+ return 2;
1184
+ }
1185
+ else if (0 == ((utf8_int32_t)0xffff0000 & chr))
1186
+ {
1187
+ return 3;
1188
+ }
1189
+ else
1190
+ { // if (0 == ((int)0xffe00000 & chr)) {
1191
+ return 4;
1192
+ }
1193
+ }
1194
+
1195
+ void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n)
1196
+ {
1197
+ char *s = (char *)str;
1198
+
1199
+ if (0 == ((utf8_int32_t)0xffffff80 & chr))
1200
+ {
1201
+ // 1-byte/7-bit ascii
1202
+ // (0b0xxxxxxx)
1203
+ if (n < 1)
1204
+ {
1205
+ return utf8_null;
1206
+ }
1207
+ s[0] = (char)chr;
1208
+ s += 1;
1209
+ }
1210
+ else if (0 == ((utf8_int32_t)0xfffff800 & chr))
1211
+ {
1212
+ // 2-byte/11-bit utf8 code point
1213
+ // (0b110xxxxx 0b10xxxxxx)
1214
+ if (n < 2)
1215
+ {
1216
+ return utf8_null;
1217
+ }
1218
+ s[0] = 0xc0 | (char)(chr >> 6);
1219
+ s[1] = 0x80 | (char)(chr & 0x3f);
1220
+ s += 2;
1221
+ }
1222
+ else if (0 == ((utf8_int32_t)0xffff0000 & chr))
1223
+ {
1224
+ // 3-byte/16-bit utf8 code point
1225
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
1226
+ if (n < 3)
1227
+ {
1228
+ return utf8_null;
1229
+ }
1230
+ s[0] = 0xe0 | (char)(chr >> 12);
1231
+ s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
1232
+ s[2] = 0x80 | (char)(chr & 0x3f);
1233
+ s += 3;
1234
+ }
1235
+ else
1236
+ { // if (0 == ((int)0xffe00000 & chr)) {
1237
+ // 4-byte/21-bit utf8 code point
1238
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
1239
+ if (n < 4)
1240
+ {
1241
+ return utf8_null;
1242
+ }
1243
+ s[0] = 0xf0 | (char)(chr >> 18);
1244
+ s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
1245
+ s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
1246
+ s[3] = 0x80 | (char)(chr & 0x3f);
1247
+ s += 4;
1248
+ }
1249
+
1250
+ return s;
1251
+ }
1252
+
1253
+ int utf8islower(utf8_int32_t chr)
1254
+ {
1255
+ return chr != utf8uprcodepoint(chr);
1256
+ }
1257
+
1258
+ int utf8isupper(utf8_int32_t chr)
1259
+ {
1260
+ return chr != utf8lwrcodepoint(chr);
1261
+ }
1262
+
1263
+ void utf8lwr(void *utf8_restrict str)
1264
+ {
1265
+ void *p, *pn;
1266
+ utf8_int32_t cp;
1267
+
1268
+ p = (char *)str;
1269
+ pn = utf8codepoint(p, &cp);
1270
+
1271
+ while (cp != 0)
1272
+ {
1273
+ const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
1274
+ const size_t size = utf8codepointsize(lwr_cp);
1275
+
1276
+ if (lwr_cp != cp)
1277
+ {
1278
+ utf8catcodepoint(p, lwr_cp, size);
1279
+ }
1280
+
1281
+ p = pn;
1282
+ pn = utf8codepoint(p, &cp);
1283
+ }
1284
+ }
1285
+
1286
+ void utf8upr(void *utf8_restrict str)
1287
+ {
1288
+ void *p, *pn;
1289
+ utf8_int32_t cp;
1290
+
1291
+ p = (char *)str;
1292
+ pn = utf8codepoint(p, &cp);
1293
+
1294
+ while (cp != 0)
1295
+ {
1296
+ const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
1297
+ const size_t size = utf8codepointsize(lwr_cp);
1298
+
1299
+ if (lwr_cp != cp)
1300
+ {
1301
+ utf8catcodepoint(p, lwr_cp, size);
1302
+ }
1303
+
1304
+ p = pn;
1305
+ pn = utf8codepoint(p, &cp);
1306
+ }
1307
+ }
1308
+
1309
+ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp)
1310
+ {
1311
+ if (((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
1312
+ ((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp)) ||
1313
+ ((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)))
1314
+ {
1315
+ cp += 32;
1316
+ }
1317
+ else if ((0x0400 <= cp) && (0x040f >= cp))
1318
+ {
1319
+ cp += 80;
1320
+ }
1321
+ else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
1322
+ ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
1323
+ ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
1324
+ ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
1325
+ ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1326
+ ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
1327
+ {
1328
+ cp |= 0x1;
1329
+ }
1330
+ else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
1331
+ ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1332
+ ((0x01cd <= cp) && (0x01dc >= cp)))
1333
+ {
1334
+ cp += 1;
1335
+ cp &= ~0x1;
1336
+ }
1337
+ else
1338
+ {
1339
+ switch (cp)
1340
+ {
1341
+ default:
1342
+ break;
1343
+ case 0x0178:
1344
+ cp = 0x00ff;
1345
+ break;
1346
+ case 0x0243:
1347
+ cp = 0x0180;
1348
+ break;
1349
+ case 0x018e:
1350
+ cp = 0x01dd;
1351
+ break;
1352
+ case 0x023d:
1353
+ cp = 0x019a;
1354
+ break;
1355
+ case 0x0220:
1356
+ cp = 0x019e;
1357
+ break;
1358
+ case 0x01b7:
1359
+ cp = 0x0292;
1360
+ break;
1361
+ case 0x01c4:
1362
+ cp = 0x01c6;
1363
+ break;
1364
+ case 0x01c7:
1365
+ cp = 0x01c9;
1366
+ break;
1367
+ case 0x01ca:
1368
+ cp = 0x01cc;
1369
+ break;
1370
+ case 0x01f1:
1371
+ cp = 0x01f3;
1372
+ break;
1373
+ case 0x01f7:
1374
+ cp = 0x01bf;
1375
+ break;
1376
+ case 0x0187:
1377
+ cp = 0x0188;
1378
+ break;
1379
+ case 0x018b:
1380
+ cp = 0x018c;
1381
+ break;
1382
+ case 0x0191:
1383
+ cp = 0x0192;
1384
+ break;
1385
+ case 0x0198:
1386
+ cp = 0x0199;
1387
+ break;
1388
+ case 0x01a7:
1389
+ cp = 0x01a8;
1390
+ break;
1391
+ case 0x01ac:
1392
+ cp = 0x01ad;
1393
+ break;
1394
+ case 0x01af:
1395
+ cp = 0x01b0;
1396
+ break;
1397
+ case 0x01b8:
1398
+ cp = 0x01b9;
1399
+ break;
1400
+ case 0x01bc:
1401
+ cp = 0x01bd;
1402
+ break;
1403
+ case 0x01f4:
1404
+ cp = 0x01f5;
1405
+ break;
1406
+ case 0x023b:
1407
+ cp = 0x023c;
1408
+ break;
1409
+ case 0x0241:
1410
+ cp = 0x0242;
1411
+ break;
1412
+ case 0x03fd:
1413
+ cp = 0x037b;
1414
+ break;
1415
+ case 0x03fe:
1416
+ cp = 0x037c;
1417
+ break;
1418
+ case 0x03ff:
1419
+ cp = 0x037d;
1420
+ break;
1421
+ case 0x037f:
1422
+ cp = 0x03f3;
1423
+ break;
1424
+ case 0x0386:
1425
+ cp = 0x03ac;
1426
+ break;
1427
+ case 0x0388:
1428
+ cp = 0x03ad;
1429
+ break;
1430
+ case 0x0389:
1431
+ cp = 0x03ae;
1432
+ break;
1433
+ case 0x038a:
1434
+ cp = 0x03af;
1435
+ break;
1436
+ case 0x038c:
1437
+ cp = 0x03cc;
1438
+ break;
1439
+ case 0x038e:
1440
+ cp = 0x03cd;
1441
+ break;
1442
+ case 0x038f:
1443
+ cp = 0x03ce;
1444
+ break;
1445
+ case 0x0370:
1446
+ cp = 0x0371;
1447
+ break;
1448
+ case 0x0372:
1449
+ cp = 0x0373;
1450
+ break;
1451
+ case 0x0376:
1452
+ cp = 0x0377;
1453
+ break;
1454
+ case 0x03f4:
1455
+ cp = 0x03d1;
1456
+ break;
1457
+ case 0x03cf:
1458
+ cp = 0x03d7;
1459
+ break;
1460
+ case 0x03f9:
1461
+ cp = 0x03f2;
1462
+ break;
1463
+ case 0x03f7:
1464
+ cp = 0x03f8;
1465
+ break;
1466
+ case 0x03fa:
1467
+ cp = 0x03fb;
1468
+ break;
1469
+ };
1470
+ }
1471
+
1472
+ return cp;
1473
+ }
1474
+
1475
+ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp)
1476
+ {
1477
+ if (((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
1478
+ ((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
1479
+ ((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)))
1480
+ {
1481
+ cp -= 32;
1482
+ }
1483
+ else if ((0x0450 <= cp) && (0x045f >= cp))
1484
+ {
1485
+ cp -= 80;
1486
+ }
1487
+ else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
1488
+ ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
1489
+ ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
1490
+ ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
1491
+ ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1492
+ ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
1493
+ {
1494
+ cp &= ~0x1;
1495
+ }
1496
+ else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
1497
+ ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1498
+ ((0x01cd <= cp) && (0x01dc >= cp)))
1499
+ {
1500
+ cp -= 1;
1501
+ cp |= 0x1;
1502
+ }
1503
+ else
1504
+ {
1505
+ switch (cp)
1506
+ {
1507
+ default:
1508
+ break;
1509
+ case 0x00ff:
1510
+ cp = 0x0178;
1511
+ break;
1512
+ case 0x0180:
1513
+ cp = 0x0243;
1514
+ break;
1515
+ case 0x01dd:
1516
+ cp = 0x018e;
1517
+ break;
1518
+ case 0x019a:
1519
+ cp = 0x023d;
1520
+ break;
1521
+ case 0x019e:
1522
+ cp = 0x0220;
1523
+ break;
1524
+ case 0x0292:
1525
+ cp = 0x01b7;
1526
+ break;
1527
+ case 0x01c6:
1528
+ cp = 0x01c4;
1529
+ break;
1530
+ case 0x01c9:
1531
+ cp = 0x01c7;
1532
+ break;
1533
+ case 0x01cc:
1534
+ cp = 0x01ca;
1535
+ break;
1536
+ case 0x01f3:
1537
+ cp = 0x01f1;
1538
+ break;
1539
+ case 0x01bf:
1540
+ cp = 0x01f7;
1541
+ break;
1542
+ case 0x0188:
1543
+ cp = 0x0187;
1544
+ break;
1545
+ case 0x018c:
1546
+ cp = 0x018b;
1547
+ break;
1548
+ case 0x0192:
1549
+ cp = 0x0191;
1550
+ break;
1551
+ case 0x0199:
1552
+ cp = 0x0198;
1553
+ break;
1554
+ case 0x01a8:
1555
+ cp = 0x01a7;
1556
+ break;
1557
+ case 0x01ad:
1558
+ cp = 0x01ac;
1559
+ break;
1560
+ case 0x01b0:
1561
+ cp = 0x01af;
1562
+ break;
1563
+ case 0x01b9:
1564
+ cp = 0x01b8;
1565
+ break;
1566
+ case 0x01bd:
1567
+ cp = 0x01bc;
1568
+ break;
1569
+ case 0x01f5:
1570
+ cp = 0x01f4;
1571
+ break;
1572
+ case 0x023c:
1573
+ cp = 0x023b;
1574
+ break;
1575
+ case 0x0242:
1576
+ cp = 0x0241;
1577
+ break;
1578
+ case 0x037b:
1579
+ cp = 0x03fd;
1580
+ break;
1581
+ case 0x037c:
1582
+ cp = 0x03fe;
1583
+ break;
1584
+ case 0x037d:
1585
+ cp = 0x03ff;
1586
+ break;
1587
+ case 0x03f3:
1588
+ cp = 0x037f;
1589
+ break;
1590
+ case 0x03ac:
1591
+ cp = 0x0386;
1592
+ break;
1593
+ case 0x03ad:
1594
+ cp = 0x0388;
1595
+ break;
1596
+ case 0x03ae:
1597
+ cp = 0x0389;
1598
+ break;
1599
+ case 0x03af:
1600
+ cp = 0x038a;
1601
+ break;
1602
+ case 0x03cc:
1603
+ cp = 0x038c;
1604
+ break;
1605
+ case 0x03cd:
1606
+ cp = 0x038e;
1607
+ break;
1608
+ case 0x03ce:
1609
+ cp = 0x038f;
1610
+ break;
1611
+ case 0x0371:
1612
+ cp = 0x0370;
1613
+ break;
1614
+ case 0x0373:
1615
+ cp = 0x0372;
1616
+ break;
1617
+ case 0x0377:
1618
+ cp = 0x0376;
1619
+ break;
1620
+ case 0x03d1:
1621
+ cp = 0x03f4;
1622
+ break;
1623
+ case 0x03d7:
1624
+ cp = 0x03cf;
1625
+ break;
1626
+ case 0x03f2:
1627
+ cp = 0x03f9;
1628
+ break;
1629
+ case 0x03f8:
1630
+ cp = 0x03f7;
1631
+ break;
1632
+ case 0x03fb:
1633
+ cp = 0x03fa;
1634
+ break;
1635
+ };
1636
+ }
1637
+
1638
+ return cp;
1639
+ }
1640
+
1641
+ #undef utf8_restrict
1642
+ #undef utf8_null
1643
+
1644
+ #ifdef __cplusplus
1645
+ } // extern "C"
1646
+ #endif
1647
+
1648
+ #if defined(__clang__)
1649
+ #pragma clang diagnostic pop
1650
+ #endif
1651
+
1652
+ #endif // SHEREDOM_UTF8_H_INCLUDED