rgss 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.clang-format +6 -0
- data/.gitignore +167 -0
- data/.yardopts +6 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Rakefile +9 -0
- data/ext/rgss/cglm-v0.7.9.tar.gz +0 -0
- data/ext/rgss/color.c +599 -0
- data/ext/rgss/entity.c +373 -0
- data/ext/rgss/extconf.rb +53 -0
- data/ext/rgss/font.c +135 -0
- data/ext/rgss/game.c +469 -0
- data/ext/rgss/game.h +99 -0
- data/ext/rgss/gl.c +3217 -0
- data/ext/rgss/glad.c +1140 -0
- data/ext/rgss/glad.h +2129 -0
- data/ext/rgss/glfw.c +1453 -0
- data/ext/rgss/graphics.c +324 -0
- data/ext/rgss/image.c +274 -0
- data/ext/rgss/input.c +745 -0
- data/ext/rgss/khrplatform.h +290 -0
- data/ext/rgss/mat4.c +279 -0
- data/ext/rgss/pax_global_header +1 -0
- data/ext/rgss/point.c +253 -0
- data/ext/rgss/rect.c +449 -0
- data/ext/rgss/rgss.c +56 -0
- data/ext/rgss/rgss.h +241 -0
- data/ext/rgss/stb_image.h +7762 -0
- data/ext/rgss/stb_image_write.h +1690 -0
- data/ext/rgss/stb_rect_pack.h +628 -0
- data/ext/rgss/stb_truetype.h +5011 -0
- data/ext/rgss/utf8.h +1652 -0
- data/ext/rgss/uthash.h +1133 -0
- data/ext/rgss/vec.c +114 -0
- data/ext/rgss/vec.h +192 -0
- data/ext/rgss/vec2.c +489 -0
- data/ext/rgss/vec3.c +751 -0
- data/ext/rgss/vec4.c +681 -0
- data/lib/rgss.rb +140 -0
- data/lib/rgss/batch.rb +57 -0
- data/lib/rgss/blend.rb +47 -0
- data/lib/rgss/game_object.rb +28 -0
- data/lib/rgss/plane.rb +95 -0
- data/lib/rgss/renderable.rb +158 -0
- data/lib/rgss/rgss.so +0 -0
- data/lib/rgss/shader.rb +94 -0
- data/lib/rgss/shaders/sprite-frag.glsl +40 -0
- data/lib/rgss/shaders/sprite-vert.glsl +17 -0
- data/lib/rgss/sprite.rb +139 -0
- data/lib/rgss/stubs/color.rb +318 -0
- data/lib/rgss/stubs/gl.rb +1999 -0
- data/lib/rgss/stubs/glfw.rb +626 -0
- data/lib/rgss/stubs/rect.rb +324 -0
- data/lib/rgss/stubs/rpg.rb +267 -0
- data/lib/rgss/stubs/tone.rb +65 -0
- data/lib/rgss/texture.rb +132 -0
- data/lib/rgss/tilemap.rb +116 -0
- data/lib/rgss/version.rb +3 -0
- data/lib/rgss/viewport.rb +67 -0
- data/rgss.gemspec +44 -0
- data/test.png +0 -0
- metadata +178 -0
data/ext/rgss/utf8.h
ADDED
@@ -0,0 +1,1652 @@
|
|
1
|
+
// The latest version of this library is available on GitHub;
|
2
|
+
// https://github.com/sheredom/utf8.h
|
3
|
+
|
4
|
+
// This is free and unencumbered software released into the public domain.
|
5
|
+
//
|
6
|
+
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
7
|
+
// distribute this software, either in source code form or as a compiled
|
8
|
+
// binary, for any purpose, commercial or non-commercial, and by any
|
9
|
+
// means.
|
10
|
+
//
|
11
|
+
// In jurisdictions that recognize copyright laws, the author or authors
|
12
|
+
// of this software dedicate any and all copyright interest in the
|
13
|
+
// software to the public domain. We make this dedication for the benefit
|
14
|
+
// of the public at large and to the detriment of our heirs and
|
15
|
+
// successors. We intend this dedication to be an overt act of
|
16
|
+
// relinquishment in perpetuity of all present and future rights to this
|
17
|
+
// software under copyright law.
|
18
|
+
//
|
19
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
22
|
+
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
23
|
+
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
24
|
+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
+
// OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
//
|
27
|
+
// For more information, please refer to <http://unlicense.org/>
|
28
|
+
|
29
|
+
#ifndef SHEREDOM_UTF8_H_INCLUDED
|
30
|
+
#define SHEREDOM_UTF8_H_INCLUDED
|
31
|
+
|
32
|
+
#if defined(_MSC_VER)
|
33
|
+
#pragma warning(push)
|
34
|
+
|
35
|
+
// disable 'bytes padding added after construct' warning
|
36
|
+
#pragma warning(disable : 4820)
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#include <stddef.h>
|
40
|
+
#include <stdlib.h>
|
41
|
+
|
42
|
+
#if defined(_MSC_VER)
|
43
|
+
#pragma warning(pop)
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#if defined(_MSC_VER)
|
47
|
+
typedef __int32 utf8_int32_t;
|
48
|
+
#else
|
49
|
+
#include <stdint.h>
|
50
|
+
typedef int32_t utf8_int32_t;
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#if defined(__clang__)
|
54
|
+
#pragma clang diagnostic push
|
55
|
+
#pragma clang diagnostic ignored "-Wold-style-cast"
|
56
|
+
#pragma clang diagnostic ignored "-Wcast-qual"
|
57
|
+
#endif
|
58
|
+
|
59
|
+
#ifdef __cplusplus
|
60
|
+
extern "C"
|
61
|
+
{
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#if defined(__clang__) || defined(__GNUC__)
|
65
|
+
#define utf8_nonnull __attribute__((nonnull))
|
66
|
+
#define utf8_pure __attribute__((pure))
|
67
|
+
#define utf8_restrict __restrict__
|
68
|
+
#define utf8_weak __attribute__((weak))
|
69
|
+
#elif defined(_MSC_VER)
|
70
|
+
#define utf8_nonnull
|
71
|
+
#define utf8_pure
|
72
|
+
#define utf8_restrict __restrict
|
73
|
+
#define utf8_weak __inline
|
74
|
+
#else
|
75
|
+
#error Non clang, non gcc, non MSVC compiler found!
|
76
|
+
#endif
|
77
|
+
|
78
|
+
#ifdef __cplusplus
|
79
|
+
#define utf8_null NULL
|
80
|
+
#else
|
81
|
+
#define utf8_null 0
|
82
|
+
#endif
|
83
|
+
|
84
|
+
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
85
|
+
// src2 respectively, case insensitive.
|
86
|
+
utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1, const void *src2);
|
87
|
+
|
88
|
+
// Append the utf8 string src onto the utf8 string dst.
|
89
|
+
utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src);
|
90
|
+
|
91
|
+
// Find the first match of the utf8 codepoint chr in the utf8 string src.
|
92
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src, utf8_int32_t chr);
|
93
|
+
|
94
|
+
// Return less than 0, 0, greater than 0 if src1 < src2,
|
95
|
+
// src1 == src2, src1 > src2 respectively.
|
96
|
+
utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1, const void *src2);
|
97
|
+
|
98
|
+
// Copy the utf8 string src onto the memory allocated in dst.
|
99
|
+
utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src);
|
100
|
+
|
101
|
+
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
102
|
+
// of utf8 codepoints not from the utf8 string reject.
|
103
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src, const void *reject);
|
104
|
+
|
105
|
+
// Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
106
|
+
// copying over the data, and returning that. Or 0 if malloc failed.
|
107
|
+
utf8_nonnull utf8_weak void *utf8dup(const void *src);
|
108
|
+
|
109
|
+
// Number of utf8 codepoints in the utf8 string str,
|
110
|
+
// excluding the null terminating byte.
|
111
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
|
112
|
+
|
113
|
+
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
114
|
+
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
115
|
+
// string.
|
116
|
+
utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1, const void *src2, size_t n);
|
117
|
+
|
118
|
+
// Append the utf8 string src onto the utf8 string dst,
|
119
|
+
// writing at most n+1 bytes. Can produce an invalid utf8
|
120
|
+
// string if n falls partway through a utf8 codepoint.
|
121
|
+
utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
|
122
|
+
|
123
|
+
// Return less than 0, 0, greater than 0 if src1 < src2,
|
124
|
+
// src1 == src2, src1 > src2 respectively. Checking at most n
|
125
|
+
// bytes of each utf8 string.
|
126
|
+
utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1, const void *src2, size_t n);
|
127
|
+
|
128
|
+
// Copy the utf8 string src onto the memory allocated in dst.
|
129
|
+
// Copies at most n bytes. If there is no terminating null byte in
|
130
|
+
// the first n bytes of src, the string placed into dst will not be
|
131
|
+
// null-terminated. If the size (in bytes) of src is less than n,
|
132
|
+
// extra null terminating bytes are appended to dst such that at
|
133
|
+
// total of n bytes are written. Can produce an invalid utf8
|
134
|
+
// string if n falls partway through a utf8 codepoint.
|
135
|
+
utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
|
136
|
+
|
137
|
+
// Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
138
|
+
// longer than n, only n bytes are copied and a null byte is added.
|
139
|
+
//
|
140
|
+
// Returns a new string if successful, 0 otherwise
|
141
|
+
utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
|
142
|
+
|
143
|
+
// Locates the first occurrence in the utf8 string str of any byte in the
|
144
|
+
// utf8 string accept, or 0 if no match was found.
|
145
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str, const void *accept);
|
146
|
+
|
147
|
+
// Find the last match of the utf8 codepoint chr in the utf8 string src.
|
148
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
|
149
|
+
|
150
|
+
// Number of bytes in the utf8 string str,
|
151
|
+
// including the null terminating byte.
|
152
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
|
153
|
+
|
154
|
+
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
155
|
+
// of utf8 codepoints from the utf8 string accept.
|
156
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src, const void *accept);
|
157
|
+
|
158
|
+
// The position of the utf8 string needle in the utf8 string haystack.
|
159
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack, const void *needle);
|
160
|
+
|
161
|
+
// The position of the utf8 string needle in the utf8 string haystack, case
|
162
|
+
// insensitive.
|
163
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack, const void *needle);
|
164
|
+
|
165
|
+
// Return 0 on success, or the position of the invalid
|
166
|
+
// utf8 codepoint on failure.
|
167
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
|
168
|
+
|
169
|
+
// Sets out_codepoint to the next utf8 codepoint in str, and returns the address
|
170
|
+
// of the utf8 codepoint after the current one in str.
|
171
|
+
utf8_nonnull utf8_weak void *utf8codepoint(const void *utf8_restrict str,
|
172
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
173
|
+
|
174
|
+
// Calculates the size of the next utf8 codepoint in str.
|
175
|
+
utf8_nonnull utf8_weak size_t utf8codepointcalcsize(const void *utf8_restrict str);
|
176
|
+
|
177
|
+
// Returns the size of the given codepoint in bytes.
|
178
|
+
utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
|
179
|
+
|
180
|
+
// Write a codepoint to the given string, and return the address to the next
|
181
|
+
// place after the written codepoint. Pass how many bytes left in the buffer to
|
182
|
+
// n. If there is not enough space for the codepoint, this function returns
|
183
|
+
// null.
|
184
|
+
utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n);
|
185
|
+
|
186
|
+
// Returns 1 if the given character is lowercase, or 0 if it is not.
|
187
|
+
utf8_weak int utf8islower(utf8_int32_t chr);
|
188
|
+
|
189
|
+
// Returns 1 if the given character is uppercase, or 0 if it is not.
|
190
|
+
utf8_weak int utf8isupper(utf8_int32_t chr);
|
191
|
+
|
192
|
+
// Transform the given string into all lowercase codepoints.
|
193
|
+
utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
|
194
|
+
|
195
|
+
// Transform the given string into all uppercase codepoints.
|
196
|
+
utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
|
197
|
+
|
198
|
+
// Make a codepoint lower case if possible.
|
199
|
+
utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
|
200
|
+
|
201
|
+
// Make a codepoint upper case if possible.
|
202
|
+
utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
|
203
|
+
|
204
|
+
#undef utf8_weak
|
205
|
+
#undef utf8_pure
|
206
|
+
#undef utf8_nonnull
|
207
|
+
|
208
|
+
int utf8casecmp(const void *src1, const void *src2)
|
209
|
+
{
|
210
|
+
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
|
211
|
+
|
212
|
+
for (;;)
|
213
|
+
{
|
214
|
+
src1 = utf8codepoint(src1, &src1_cp);
|
215
|
+
src2 = utf8codepoint(src2, &src2_cp);
|
216
|
+
|
217
|
+
// take a copy of src1 & src2
|
218
|
+
src1_orig_cp = src1_cp;
|
219
|
+
src2_orig_cp = src2_cp;
|
220
|
+
|
221
|
+
// lower the srcs if required
|
222
|
+
src1_cp = utf8lwrcodepoint(src1_cp);
|
223
|
+
src2_cp = utf8lwrcodepoint(src2_cp);
|
224
|
+
|
225
|
+
// check if the lowered codepoints match
|
226
|
+
if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
|
227
|
+
{
|
228
|
+
return 0;
|
229
|
+
}
|
230
|
+
else if (src1_cp == src2_cp)
|
231
|
+
{
|
232
|
+
continue;
|
233
|
+
}
|
234
|
+
|
235
|
+
// if they don't match, then we return the difference between the characters
|
236
|
+
return src1_cp - src2_cp;
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src)
|
241
|
+
{
|
242
|
+
char *d = (char *)dst;
|
243
|
+
const char *s = (const char *)src;
|
244
|
+
|
245
|
+
// find the null terminating byte in dst
|
246
|
+
while ('\0' != *d)
|
247
|
+
{
|
248
|
+
d++;
|
249
|
+
}
|
250
|
+
|
251
|
+
// overwriting the null terminating byte in dst, append src byte-by-byte
|
252
|
+
while ('\0' != *s)
|
253
|
+
{
|
254
|
+
*d++ = *s++;
|
255
|
+
}
|
256
|
+
|
257
|
+
// write out a new null terminating byte into dst
|
258
|
+
*d = '\0';
|
259
|
+
|
260
|
+
return dst;
|
261
|
+
}
|
262
|
+
|
263
|
+
void *utf8chr(const void *src, utf8_int32_t chr)
|
264
|
+
{
|
265
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
266
|
+
|
267
|
+
if (0 == chr)
|
268
|
+
{
|
269
|
+
// being asked to return position of null terminating byte, so
|
270
|
+
// just run s to the end, and return!
|
271
|
+
const char *s = (const char *)src;
|
272
|
+
while ('\0' != *s)
|
273
|
+
{
|
274
|
+
s++;
|
275
|
+
}
|
276
|
+
return (void *)s;
|
277
|
+
}
|
278
|
+
else if (0 == ((utf8_int32_t)0xffffff80 & chr))
|
279
|
+
{
|
280
|
+
// 1-byte/7-bit ascii
|
281
|
+
// (0b0xxxxxxx)
|
282
|
+
c[0] = (char)chr;
|
283
|
+
}
|
284
|
+
else if (0 == ((utf8_int32_t)0xfffff800 & chr))
|
285
|
+
{
|
286
|
+
// 2-byte/11-bit utf8 code point
|
287
|
+
// (0b110xxxxx 0b10xxxxxx)
|
288
|
+
c[0] = 0xc0 | (char)(chr >> 6);
|
289
|
+
c[1] = 0x80 | (char)(chr & 0x3f);
|
290
|
+
}
|
291
|
+
else if (0 == ((utf8_int32_t)0xffff0000 & chr))
|
292
|
+
{
|
293
|
+
// 3-byte/16-bit utf8 code point
|
294
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
295
|
+
c[0] = 0xe0 | (char)(chr >> 12);
|
296
|
+
c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
297
|
+
c[2] = 0x80 | (char)(chr & 0x3f);
|
298
|
+
}
|
299
|
+
else
|
300
|
+
{ // if (0 == ((int)0xffe00000 & chr)) {
|
301
|
+
// 4-byte/21-bit utf8 code point
|
302
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
303
|
+
c[0] = 0xf0 | (char)(chr >> 18);
|
304
|
+
c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
305
|
+
c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
306
|
+
c[3] = 0x80 | (char)(chr & 0x3f);
|
307
|
+
}
|
308
|
+
|
309
|
+
// we've made c into a 2 utf8 codepoint string, one for the chr we are
|
310
|
+
// seeking, another for the null terminating byte. Now use utf8str to
|
311
|
+
// search
|
312
|
+
return utf8str(src, c);
|
313
|
+
}
|
314
|
+
|
315
|
+
int utf8cmp(const void *src1, const void *src2)
|
316
|
+
{
|
317
|
+
const unsigned char *s1 = (const unsigned char *)src1;
|
318
|
+
const unsigned char *s2 = (const unsigned char *)src2;
|
319
|
+
|
320
|
+
while (('\0' != *s1) || ('\0' != *s2))
|
321
|
+
{
|
322
|
+
if (*s1 < *s2)
|
323
|
+
{
|
324
|
+
return -1;
|
325
|
+
}
|
326
|
+
else if (*s1 > *s2)
|
327
|
+
{
|
328
|
+
return 1;
|
329
|
+
}
|
330
|
+
|
331
|
+
s1++;
|
332
|
+
s2++;
|
333
|
+
}
|
334
|
+
|
335
|
+
// both utf8 strings matched
|
336
|
+
return 0;
|
337
|
+
}
|
338
|
+
|
339
|
+
int utf8coll(const void *src1, const void *src2);
|
340
|
+
|
341
|
+
void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src)
|
342
|
+
{
|
343
|
+
char *d = (char *)dst;
|
344
|
+
const char *s = (const char *)src;
|
345
|
+
|
346
|
+
// overwriting anything previously in dst, write byte-by-byte
|
347
|
+
// from src
|
348
|
+
while ('\0' != *s)
|
349
|
+
{
|
350
|
+
*d++ = *s++;
|
351
|
+
}
|
352
|
+
|
353
|
+
// append null terminating byte
|
354
|
+
*d = '\0';
|
355
|
+
|
356
|
+
return dst;
|
357
|
+
}
|
358
|
+
|
359
|
+
size_t utf8cspn(const void *src, const void *reject)
|
360
|
+
{
|
361
|
+
const char *s = (const char *)src;
|
362
|
+
size_t chars = 0;
|
363
|
+
|
364
|
+
while ('\0' != *s)
|
365
|
+
{
|
366
|
+
const char *r = (const char *)reject;
|
367
|
+
size_t offset = 0;
|
368
|
+
|
369
|
+
while ('\0' != *r)
|
370
|
+
{
|
371
|
+
// checking that if *r is the start of a utf8 codepoint
|
372
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
373
|
+
// a previous character (0 < offset) - we found a match
|
374
|
+
if ((0x80 != (0xc0 & *r)) && (0 < offset))
|
375
|
+
{
|
376
|
+
return chars;
|
377
|
+
}
|
378
|
+
else
|
379
|
+
{
|
380
|
+
if (*r == s[offset])
|
381
|
+
{
|
382
|
+
// part of a utf8 codepoint matched, so move our checking
|
383
|
+
// onwards to the next byte
|
384
|
+
offset++;
|
385
|
+
r++;
|
386
|
+
}
|
387
|
+
else
|
388
|
+
{
|
389
|
+
// r could be in the middle of an unmatching utf8 code point,
|
390
|
+
// so we need to march it on to the next character beginning,
|
391
|
+
|
392
|
+
do
|
393
|
+
{
|
394
|
+
r++;
|
395
|
+
} while (0x80 == (0xc0 & *r));
|
396
|
+
|
397
|
+
// reset offset too as we found a mismatch
|
398
|
+
offset = 0;
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
403
|
+
// found a match at the end of *r, so didn't get a chance to test it
|
404
|
+
if (0 < offset)
|
405
|
+
{
|
406
|
+
return chars;
|
407
|
+
}
|
408
|
+
|
409
|
+
// the current utf8 codepoint in src did not match reject, but src
|
410
|
+
// could have been partway through a utf8 codepoint, so we need to
|
411
|
+
// march it onto the next utf8 codepoint starting byte
|
412
|
+
do
|
413
|
+
{
|
414
|
+
s++;
|
415
|
+
} while ((0x80 == (0xc0 & *s)));
|
416
|
+
chars++;
|
417
|
+
}
|
418
|
+
|
419
|
+
return chars;
|
420
|
+
}
|
421
|
+
|
422
|
+
void *utf8dup(const void *src)
|
423
|
+
{
|
424
|
+
const char *s = (const char *)src;
|
425
|
+
char *n = utf8_null;
|
426
|
+
|
427
|
+
// figure out how many bytes (including the terminator) we need to copy first
|
428
|
+
size_t bytes = utf8size(src);
|
429
|
+
|
430
|
+
n = (char *)malloc(bytes);
|
431
|
+
|
432
|
+
if (utf8_null == n)
|
433
|
+
{
|
434
|
+
// out of memory so we bail
|
435
|
+
return utf8_null;
|
436
|
+
}
|
437
|
+
else
|
438
|
+
{
|
439
|
+
bytes = 0;
|
440
|
+
|
441
|
+
// copy src byte-by-byte into our new utf8 string
|
442
|
+
while ('\0' != s[bytes])
|
443
|
+
{
|
444
|
+
n[bytes] = s[bytes];
|
445
|
+
bytes++;
|
446
|
+
}
|
447
|
+
|
448
|
+
// append null terminating byte
|
449
|
+
n[bytes] = '\0';
|
450
|
+
return n;
|
451
|
+
}
|
452
|
+
}
|
453
|
+
|
454
|
+
void *utf8fry(const void *str);
|
455
|
+
|
456
|
+
size_t utf8len(const void *str)
|
457
|
+
{
|
458
|
+
const unsigned char *s = (const unsigned char *)str;
|
459
|
+
size_t length = 0;
|
460
|
+
|
461
|
+
while ('\0' != *s)
|
462
|
+
{
|
463
|
+
if (0xf0 == (0xf8 & *s))
|
464
|
+
{
|
465
|
+
// 4-byte utf8 code point (began with 0b11110xxx)
|
466
|
+
s += 4;
|
467
|
+
}
|
468
|
+
else if (0xe0 == (0xf0 & *s))
|
469
|
+
{
|
470
|
+
// 3-byte utf8 code point (began with 0b1110xxxx)
|
471
|
+
s += 3;
|
472
|
+
}
|
473
|
+
else if (0xc0 == (0xe0 & *s))
|
474
|
+
{
|
475
|
+
// 2-byte utf8 code point (began with 0b110xxxxx)
|
476
|
+
s += 2;
|
477
|
+
}
|
478
|
+
else
|
479
|
+
{ // if (0x00 == (0x80 & *s)) {
|
480
|
+
// 1-byte ascii (began with 0b0xxxxxxx)
|
481
|
+
s += 1;
|
482
|
+
}
|
483
|
+
|
484
|
+
// no matter the bytes we marched s forward by, it was
|
485
|
+
// only 1 utf8 codepoint
|
486
|
+
length++;
|
487
|
+
}
|
488
|
+
|
489
|
+
return length;
|
490
|
+
}
|
491
|
+
|
492
|
+
int utf8ncasecmp(const void *src1, const void *src2, size_t n)
|
493
|
+
{
|
494
|
+
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
|
495
|
+
|
496
|
+
do
|
497
|
+
{
|
498
|
+
const unsigned char *const s1 = (const unsigned char *)src1;
|
499
|
+
const unsigned char *const s2 = (const unsigned char *)src2;
|
500
|
+
|
501
|
+
// first check that we have enough bytes left in n to contain an entire
|
502
|
+
// codepoint
|
503
|
+
if (0 == n)
|
504
|
+
{
|
505
|
+
return 0;
|
506
|
+
}
|
507
|
+
|
508
|
+
if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2))))
|
509
|
+
{
|
510
|
+
const utf8_int32_t c1 = (0xe0 & *s1);
|
511
|
+
const utf8_int32_t c2 = (0xe0 & *s2);
|
512
|
+
|
513
|
+
if (c1 < c2)
|
514
|
+
{
|
515
|
+
return c1 - c2;
|
516
|
+
}
|
517
|
+
else
|
518
|
+
{
|
519
|
+
return 0;
|
520
|
+
}
|
521
|
+
}
|
522
|
+
|
523
|
+
if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2))))
|
524
|
+
{
|
525
|
+
const utf8_int32_t c1 = (0xf0 & *s1);
|
526
|
+
const utf8_int32_t c2 = (0xf0 & *s2);
|
527
|
+
|
528
|
+
if (c1 < c2)
|
529
|
+
{
|
530
|
+
return c1 - c2;
|
531
|
+
}
|
532
|
+
else
|
533
|
+
{
|
534
|
+
return 0;
|
535
|
+
}
|
536
|
+
}
|
537
|
+
|
538
|
+
if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2))))
|
539
|
+
{
|
540
|
+
const utf8_int32_t c1 = (0xf8 & *s1);
|
541
|
+
const utf8_int32_t c2 = (0xf8 & *s2);
|
542
|
+
|
543
|
+
if (c1 < c2)
|
544
|
+
{
|
545
|
+
return c1 - c2;
|
546
|
+
}
|
547
|
+
else
|
548
|
+
{
|
549
|
+
return 0;
|
550
|
+
}
|
551
|
+
}
|
552
|
+
|
553
|
+
src1 = utf8codepoint(src1, &src1_cp);
|
554
|
+
src2 = utf8codepoint(src2, &src2_cp);
|
555
|
+
n -= utf8codepointsize(src1_cp);
|
556
|
+
|
557
|
+
// Take a copy of src1 & src2
|
558
|
+
src1_orig_cp = src1_cp;
|
559
|
+
src2_orig_cp = src2_cp;
|
560
|
+
|
561
|
+
// Lower srcs if required
|
562
|
+
src1_cp = utf8lwrcodepoint(src1_cp);
|
563
|
+
src2_cp = utf8lwrcodepoint(src2_cp);
|
564
|
+
|
565
|
+
// Check if the lowered codepoints match
|
566
|
+
if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
|
567
|
+
{
|
568
|
+
return 0;
|
569
|
+
}
|
570
|
+
else if (src1_cp == src2_cp)
|
571
|
+
{
|
572
|
+
continue;
|
573
|
+
}
|
574
|
+
|
575
|
+
// if they don't match, then we return the difference between the characters
|
576
|
+
if (src1_orig_cp != src2_orig_cp)
|
577
|
+
{
|
578
|
+
return src1_cp - src2_cp;
|
579
|
+
}
|
580
|
+
} while (0 < n);
|
581
|
+
|
582
|
+
// both utf8 strings matched
|
583
|
+
return 0;
|
584
|
+
}
|
585
|
+
|
586
|
+
void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
|
587
|
+
{
|
588
|
+
char *d = (char *)dst;
|
589
|
+
const char *s = (const char *)src;
|
590
|
+
|
591
|
+
// find the null terminating byte in dst
|
592
|
+
while ('\0' != *d)
|
593
|
+
{
|
594
|
+
d++;
|
595
|
+
}
|
596
|
+
|
597
|
+
// overwriting the null terminating byte in dst, append src byte-by-byte
|
598
|
+
// stopping if we run out of space
|
599
|
+
do
|
600
|
+
{
|
601
|
+
*d++ = *s++;
|
602
|
+
} while (('\0' != *s) && (0 != --n));
|
603
|
+
|
604
|
+
// write out a new null terminating byte into dst
|
605
|
+
*d = '\0';
|
606
|
+
|
607
|
+
return dst;
|
608
|
+
}
|
609
|
+
|
610
|
+
int utf8ncmp(const void *src1, const void *src2, size_t n)
|
611
|
+
{
|
612
|
+
const unsigned char *s1 = (const unsigned char *)src1;
|
613
|
+
const unsigned char *s2 = (const unsigned char *)src2;
|
614
|
+
|
615
|
+
while ((0 != n--) && (('\0' != *s1) || ('\0' != *s2)))
|
616
|
+
{
|
617
|
+
if (*s1 < *s2)
|
618
|
+
{
|
619
|
+
return -1;
|
620
|
+
}
|
621
|
+
else if (*s1 > *s2)
|
622
|
+
{
|
623
|
+
return 1;
|
624
|
+
}
|
625
|
+
|
626
|
+
s1++;
|
627
|
+
s2++;
|
628
|
+
}
|
629
|
+
|
630
|
+
// both utf8 strings matched
|
631
|
+
return 0;
|
632
|
+
}
|
633
|
+
|
634
|
+
void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
|
635
|
+
{
|
636
|
+
char *d = (char *)dst;
|
637
|
+
const char *s = (const char *)src;
|
638
|
+
size_t index;
|
639
|
+
|
640
|
+
// overwriting anything previously in dst, write byte-by-byte
|
641
|
+
// from src
|
642
|
+
for (index = 0; index < n; index++)
|
643
|
+
{
|
644
|
+
d[index] = s[index];
|
645
|
+
if ('\0' == s[index])
|
646
|
+
{
|
647
|
+
break;
|
648
|
+
}
|
649
|
+
}
|
650
|
+
|
651
|
+
// append null terminating byte
|
652
|
+
for (; index < n; index++)
|
653
|
+
{
|
654
|
+
d[index] = 0;
|
655
|
+
}
|
656
|
+
|
657
|
+
return dst;
|
658
|
+
}
|
659
|
+
|
660
|
+
void *utf8ndup(const void *src, size_t n)
|
661
|
+
{
|
662
|
+
const char *s = (const char *)src;
|
663
|
+
char *c = utf8_null;
|
664
|
+
size_t bytes = 0;
|
665
|
+
|
666
|
+
// Find the end of the string or stop when n is reached
|
667
|
+
while ('\0' != s[bytes] && bytes < n)
|
668
|
+
{
|
669
|
+
bytes++;
|
670
|
+
}
|
671
|
+
|
672
|
+
// In case bytes is actually less than n, we need to set it
|
673
|
+
// to be used later in the copy byte by byte.
|
674
|
+
n = bytes;
|
675
|
+
|
676
|
+
c = (char *)malloc(bytes + 1);
|
677
|
+
if (utf8_null == c)
|
678
|
+
{
|
679
|
+
// out of memory so we bail
|
680
|
+
return utf8_null;
|
681
|
+
}
|
682
|
+
|
683
|
+
bytes = 0;
|
684
|
+
|
685
|
+
// copy src byte-by-byte into our new utf8 string
|
686
|
+
while ('\0' != s[bytes] && bytes < n)
|
687
|
+
{
|
688
|
+
c[bytes] = s[bytes];
|
689
|
+
bytes++;
|
690
|
+
}
|
691
|
+
|
692
|
+
// append null terminating byte
|
693
|
+
c[bytes] = '\0';
|
694
|
+
return c;
|
695
|
+
}
|
696
|
+
|
697
|
+
void *utf8rchr(const void *src, int chr)
|
698
|
+
{
|
699
|
+
const char *s = (const char *)src;
|
700
|
+
const char *match = utf8_null;
|
701
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
702
|
+
|
703
|
+
if (0 == chr)
|
704
|
+
{
|
705
|
+
// being asked to return position of null terminating byte, so
|
706
|
+
// just run s to the end, and return!
|
707
|
+
while ('\0' != *s)
|
708
|
+
{
|
709
|
+
s++;
|
710
|
+
}
|
711
|
+
return (void *)s;
|
712
|
+
}
|
713
|
+
else if (0 == ((int)0xffffff80 & chr))
|
714
|
+
{
|
715
|
+
// 1-byte/7-bit ascii
|
716
|
+
// (0b0xxxxxxx)
|
717
|
+
c[0] = (char)chr;
|
718
|
+
}
|
719
|
+
else if (0 == ((int)0xfffff800 & chr))
|
720
|
+
{
|
721
|
+
// 2-byte/11-bit utf8 code point
|
722
|
+
// (0b110xxxxx 0b10xxxxxx)
|
723
|
+
c[0] = 0xc0 | (char)(chr >> 6);
|
724
|
+
c[1] = 0x80 | (char)(chr & 0x3f);
|
725
|
+
}
|
726
|
+
else if (0 == ((int)0xffff0000 & chr))
|
727
|
+
{
|
728
|
+
// 3-byte/16-bit utf8 code point
|
729
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
730
|
+
c[0] = 0xe0 | (char)(chr >> 12);
|
731
|
+
c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
732
|
+
c[2] = 0x80 | (char)(chr & 0x3f);
|
733
|
+
}
|
734
|
+
else
|
735
|
+
{ // if (0 == ((int)0xffe00000 & chr)) {
|
736
|
+
// 4-byte/21-bit utf8 code point
|
737
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
738
|
+
c[0] = 0xf0 | (char)(chr >> 18);
|
739
|
+
c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
740
|
+
c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
741
|
+
c[3] = 0x80 | (char)(chr & 0x3f);
|
742
|
+
}
|
743
|
+
|
744
|
+
// we've created a 2 utf8 codepoint string in c that is
|
745
|
+
// the utf8 character asked for by chr, and a null
|
746
|
+
// terminating byte
|
747
|
+
|
748
|
+
while ('\0' != *s)
|
749
|
+
{
|
750
|
+
size_t offset = 0;
|
751
|
+
|
752
|
+
while (s[offset] == c[offset])
|
753
|
+
{
|
754
|
+
offset++;
|
755
|
+
}
|
756
|
+
|
757
|
+
if ('\0' == c[offset])
|
758
|
+
{
|
759
|
+
// we found a matching utf8 code point
|
760
|
+
match = s;
|
761
|
+
s += offset;
|
762
|
+
}
|
763
|
+
else
|
764
|
+
{
|
765
|
+
s += offset;
|
766
|
+
|
767
|
+
// need to march s along to next utf8 codepoint start
|
768
|
+
// (the next byte that doesn't match 0b10xxxxxx)
|
769
|
+
if ('\0' != *s)
|
770
|
+
{
|
771
|
+
do
|
772
|
+
{
|
773
|
+
s++;
|
774
|
+
} while (0x80 == (0xc0 & *s));
|
775
|
+
}
|
776
|
+
}
|
777
|
+
}
|
778
|
+
|
779
|
+
// return the last match we found (or 0 if no match was found)
|
780
|
+
return (void *)match;
|
781
|
+
}
|
782
|
+
|
783
|
+
void *utf8pbrk(const void *str, const void *accept)
|
784
|
+
{
|
785
|
+
const char *s = (const char *)str;
|
786
|
+
|
787
|
+
while ('\0' != *s)
|
788
|
+
{
|
789
|
+
const char *a = (const char *)accept;
|
790
|
+
size_t offset = 0;
|
791
|
+
|
792
|
+
while ('\0' != *a)
|
793
|
+
{
|
794
|
+
// checking that if *a is the start of a utf8 codepoint
|
795
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
796
|
+
// a previous character (0 < offset) - we found a match
|
797
|
+
if ((0x80 != (0xc0 & *a)) && (0 < offset))
|
798
|
+
{
|
799
|
+
return (void *)s;
|
800
|
+
}
|
801
|
+
else
|
802
|
+
{
|
803
|
+
if (*a == s[offset])
|
804
|
+
{
|
805
|
+
// part of a utf8 codepoint matched, so move our checking
|
806
|
+
// onwards to the next byte
|
807
|
+
offset++;
|
808
|
+
a++;
|
809
|
+
}
|
810
|
+
else
|
811
|
+
{
|
812
|
+
// r could be in the middle of an unmatching utf8 code point,
|
813
|
+
// so we need to march it on to the next character beginning,
|
814
|
+
|
815
|
+
do
|
816
|
+
{
|
817
|
+
a++;
|
818
|
+
} while (0x80 == (0xc0 & *a));
|
819
|
+
|
820
|
+
// reset offset too as we found a mismatch
|
821
|
+
offset = 0;
|
822
|
+
}
|
823
|
+
}
|
824
|
+
}
|
825
|
+
|
826
|
+
// we found a match on the last utf8 codepoint
|
827
|
+
if (0 < offset)
|
828
|
+
{
|
829
|
+
return (void *)s;
|
830
|
+
}
|
831
|
+
|
832
|
+
// the current utf8 codepoint in src did not match accept, but src
|
833
|
+
// could have been partway through a utf8 codepoint, so we need to
|
834
|
+
// march it onto the next utf8 codepoint starting byte
|
835
|
+
do
|
836
|
+
{
|
837
|
+
s++;
|
838
|
+
} while ((0x80 == (0xc0 & *s)));
|
839
|
+
}
|
840
|
+
|
841
|
+
return utf8_null;
|
842
|
+
}
|
843
|
+
|
844
|
+
size_t utf8size(const void *str)
|
845
|
+
{
|
846
|
+
const char *s = (const char *)str;
|
847
|
+
size_t size = 0;
|
848
|
+
while ('\0' != s[size])
|
849
|
+
{
|
850
|
+
size++;
|
851
|
+
}
|
852
|
+
|
853
|
+
// we are including the null terminating byte in the size calculation
|
854
|
+
size++;
|
855
|
+
return size;
|
856
|
+
}
|
857
|
+
|
858
|
+
size_t utf8spn(const void *src, const void *accept)
|
859
|
+
{
|
860
|
+
const char *s = (const char *)src;
|
861
|
+
size_t chars = 0;
|
862
|
+
|
863
|
+
while ('\0' != *s)
|
864
|
+
{
|
865
|
+
const char *a = (const char *)accept;
|
866
|
+
size_t offset = 0;
|
867
|
+
|
868
|
+
while ('\0' != *a)
|
869
|
+
{
|
870
|
+
// checking that if *r is the start of a utf8 codepoint
|
871
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
872
|
+
// a previous character (0 < offset) - we found a match
|
873
|
+
if ((0x80 != (0xc0 & *a)) && (0 < offset))
|
874
|
+
{
|
875
|
+
// found a match, so increment the number of utf8 codepoints
|
876
|
+
// that have matched and stop checking whether any other utf8
|
877
|
+
// codepoints in a match
|
878
|
+
chars++;
|
879
|
+
s += offset;
|
880
|
+
offset = 0;
|
881
|
+
break;
|
882
|
+
}
|
883
|
+
else
|
884
|
+
{
|
885
|
+
if (*a == s[offset])
|
886
|
+
{
|
887
|
+
offset++;
|
888
|
+
a++;
|
889
|
+
}
|
890
|
+
else
|
891
|
+
{
|
892
|
+
// a could be in the middle of an unmatching utf8 codepoint,
|
893
|
+
// so we need to march it on to the next character beginning,
|
894
|
+
do
|
895
|
+
{
|
896
|
+
a++;
|
897
|
+
} while (0x80 == (0xc0 & *a));
|
898
|
+
|
899
|
+
// reset offset too as we found a mismatch
|
900
|
+
offset = 0;
|
901
|
+
}
|
902
|
+
}
|
903
|
+
}
|
904
|
+
|
905
|
+
// found a match at the end of *a, so didn't get a chance to test it
|
906
|
+
if (0 < offset)
|
907
|
+
{
|
908
|
+
chars++;
|
909
|
+
s += offset;
|
910
|
+
continue;
|
911
|
+
}
|
912
|
+
|
913
|
+
// if a got to its terminating null byte, then we didn't find a match.
|
914
|
+
// Return the current number of matched utf8 codepoints
|
915
|
+
if ('\0' == *a)
|
916
|
+
{
|
917
|
+
return chars;
|
918
|
+
}
|
919
|
+
}
|
920
|
+
|
921
|
+
return chars;
|
922
|
+
}
|
923
|
+
|
924
|
+
void *utf8str(const void *haystack, const void *needle)
|
925
|
+
{
|
926
|
+
const char *h = (const char *)haystack;
|
927
|
+
utf8_int32_t throwaway_codepoint;
|
928
|
+
|
929
|
+
// if needle has no utf8 codepoints before the null terminating
|
930
|
+
// byte then return haystack
|
931
|
+
if ('\0' == *((const char *)needle))
|
932
|
+
{
|
933
|
+
return (void *)haystack;
|
934
|
+
}
|
935
|
+
|
936
|
+
while ('\0' != *h)
|
937
|
+
{
|
938
|
+
const char *maybeMatch = h;
|
939
|
+
const char *n = (const char *)needle;
|
940
|
+
|
941
|
+
while (*h == *n && (*h != '\0' && *n != '\0'))
|
942
|
+
{
|
943
|
+
n++;
|
944
|
+
h++;
|
945
|
+
}
|
946
|
+
|
947
|
+
if ('\0' == *n)
|
948
|
+
{
|
949
|
+
// we found the whole utf8 string for needle in haystack at
|
950
|
+
// maybeMatch, so return it
|
951
|
+
return (void *)maybeMatch;
|
952
|
+
}
|
953
|
+
else
|
954
|
+
{
|
955
|
+
// h could be in the middle of an unmatching utf8 codepoint,
|
956
|
+
// so we need to march it on to the next character beginning
|
957
|
+
// starting from the current character
|
958
|
+
h = (const char *)utf8codepoint(maybeMatch, &throwaway_codepoint);
|
959
|
+
}
|
960
|
+
}
|
961
|
+
|
962
|
+
// no match
|
963
|
+
return utf8_null;
|
964
|
+
}
|
965
|
+
|
966
|
+
void *utf8casestr(const void *haystack, const void *needle)
|
967
|
+
{
|
968
|
+
const void *h = haystack;
|
969
|
+
|
970
|
+
// if needle has no utf8 codepoints before the null terminating
|
971
|
+
// byte then return haystack
|
972
|
+
if ('\0' == *((const char *)needle))
|
973
|
+
{
|
974
|
+
return (void *)haystack;
|
975
|
+
}
|
976
|
+
|
977
|
+
for (;;)
|
978
|
+
{
|
979
|
+
const void *maybeMatch = h;
|
980
|
+
const void *n = needle;
|
981
|
+
utf8_int32_t h_cp, n_cp;
|
982
|
+
|
983
|
+
// Get the next code point and track it
|
984
|
+
const void *nextH = h = utf8codepoint(h, &h_cp);
|
985
|
+
n = utf8codepoint(n, &n_cp);
|
986
|
+
|
987
|
+
while ((0 != h_cp) && (0 != n_cp))
|
988
|
+
{
|
989
|
+
h_cp = utf8lwrcodepoint(h_cp);
|
990
|
+
n_cp = utf8lwrcodepoint(n_cp);
|
991
|
+
|
992
|
+
// if we find a mismatch, bail out!
|
993
|
+
if (h_cp != n_cp)
|
994
|
+
{
|
995
|
+
break;
|
996
|
+
}
|
997
|
+
|
998
|
+
h = utf8codepoint(h, &h_cp);
|
999
|
+
n = utf8codepoint(n, &n_cp);
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
if (0 == n_cp)
|
1003
|
+
{
|
1004
|
+
// we found the whole utf8 string for needle in haystack at
|
1005
|
+
// maybeMatch, so return it
|
1006
|
+
return (void *)maybeMatch;
|
1007
|
+
}
|
1008
|
+
|
1009
|
+
if (0 == h_cp)
|
1010
|
+
{
|
1011
|
+
// no match
|
1012
|
+
return utf8_null;
|
1013
|
+
}
|
1014
|
+
|
1015
|
+
// Roll back to the next code point in the haystack to test
|
1016
|
+
h = nextH;
|
1017
|
+
}
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
void *utf8valid(const void *str)
|
1021
|
+
{
|
1022
|
+
const char *s = (const char *)str;
|
1023
|
+
|
1024
|
+
while ('\0' != *s)
|
1025
|
+
{
|
1026
|
+
if (0xf0 == (0xf8 & *s))
|
1027
|
+
{
|
1028
|
+
// ensure each of the 3 following bytes in this 4-byte
|
1029
|
+
// utf8 codepoint began with 0b10xxxxxx
|
1030
|
+
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || (0x80 != (0xc0 & s[3])))
|
1031
|
+
{
|
1032
|
+
return (void *)s;
|
1033
|
+
}
|
1034
|
+
|
1035
|
+
// ensure that our utf8 codepoint ended after 4 bytes
|
1036
|
+
if (0x80 == (0xc0 & s[4]))
|
1037
|
+
{
|
1038
|
+
return (void *)s;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
// ensure that the top 5 bits of this 4-byte utf8
|
1042
|
+
// codepoint were not 0, as then we could have used
|
1043
|
+
// one of the smaller encodings
|
1044
|
+
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1])))
|
1045
|
+
{
|
1046
|
+
return (void *)s;
|
1047
|
+
}
|
1048
|
+
|
1049
|
+
// 4-byte utf8 code point (began with 0b11110xxx)
|
1050
|
+
s += 4;
|
1051
|
+
}
|
1052
|
+
else if (0xe0 == (0xf0 & *s))
|
1053
|
+
{
|
1054
|
+
// ensure each of the 2 following bytes in this 3-byte
|
1055
|
+
// utf8 codepoint began with 0b10xxxxxx
|
1056
|
+
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])))
|
1057
|
+
{
|
1058
|
+
return (void *)s;
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
// ensure that our utf8 codepoint ended after 3 bytes
|
1062
|
+
if (0x80 == (0xc0 & s[3]))
|
1063
|
+
{
|
1064
|
+
return (void *)s;
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
// ensure that the top 5 bits of this 3-byte utf8
|
1068
|
+
// codepoint were not 0, as then we could have used
|
1069
|
+
// one of the smaller encodings
|
1070
|
+
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1])))
|
1071
|
+
{
|
1072
|
+
return (void *)s;
|
1073
|
+
}
|
1074
|
+
|
1075
|
+
// 3-byte utf8 code point (began with 0b1110xxxx)
|
1076
|
+
s += 3;
|
1077
|
+
}
|
1078
|
+
else if (0xc0 == (0xe0 & *s))
|
1079
|
+
{
|
1080
|
+
// ensure the 1 following byte in this 2-byte
|
1081
|
+
// utf8 codepoint began with 0b10xxxxxx
|
1082
|
+
if (0x80 != (0xc0 & s[1]))
|
1083
|
+
{
|
1084
|
+
return (void *)s;
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
// ensure that our utf8 codepoint ended after 2 bytes
|
1088
|
+
if (0x80 == (0xc0 & s[2]))
|
1089
|
+
{
|
1090
|
+
return (void *)s;
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
// ensure that the top 4 bits of this 2-byte utf8
|
1094
|
+
// codepoint were not 0, as then we could have used
|
1095
|
+
// one of the smaller encodings
|
1096
|
+
if (0 == (0x1e & s[0]))
|
1097
|
+
{
|
1098
|
+
return (void *)s;
|
1099
|
+
}
|
1100
|
+
|
1101
|
+
// 2-byte utf8 code point (began with 0b110xxxxx)
|
1102
|
+
s += 2;
|
1103
|
+
}
|
1104
|
+
else if (0x00 == (0x80 & *s))
|
1105
|
+
{
|
1106
|
+
// 1-byte ascii (began with 0b0xxxxxxx)
|
1107
|
+
s += 1;
|
1108
|
+
}
|
1109
|
+
else
|
1110
|
+
{
|
1111
|
+
// we have an invalid 0b1xxxxxxx utf8 code point entry
|
1112
|
+
return (void *)s;
|
1113
|
+
}
|
1114
|
+
}
|
1115
|
+
|
1116
|
+
return utf8_null;
|
1117
|
+
}
|
1118
|
+
|
1119
|
+
void *utf8codepoint(const void *utf8_restrict str, utf8_int32_t *utf8_restrict out_codepoint)
|
1120
|
+
{
|
1121
|
+
const char *s = (const char *)str;
|
1122
|
+
|
1123
|
+
if (0xf0 == (0xf8 & s[0]))
|
1124
|
+
{
|
1125
|
+
// 4 byte utf8 codepoint
|
1126
|
+
*out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | ((0x3f & s[2]) << 6) | (0x3f & s[3]);
|
1127
|
+
s += 4;
|
1128
|
+
}
|
1129
|
+
else if (0xe0 == (0xf0 & s[0]))
|
1130
|
+
{
|
1131
|
+
// 3 byte utf8 codepoint
|
1132
|
+
*out_codepoint = ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
|
1133
|
+
s += 3;
|
1134
|
+
}
|
1135
|
+
else if (0xc0 == (0xe0 & s[0]))
|
1136
|
+
{
|
1137
|
+
// 2 byte utf8 codepoint
|
1138
|
+
*out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
|
1139
|
+
s += 2;
|
1140
|
+
}
|
1141
|
+
else
|
1142
|
+
{
|
1143
|
+
// 1 byte utf8 codepoint otherwise
|
1144
|
+
*out_codepoint = s[0];
|
1145
|
+
s += 1;
|
1146
|
+
}
|
1147
|
+
|
1148
|
+
return (void *)s;
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
size_t utf8codepointcalcsize(const void *utf8_restrict str)
|
1152
|
+
{
|
1153
|
+
const char *s = (const char *)str;
|
1154
|
+
|
1155
|
+
if (0xf0 == (0xf8 & s[0]))
|
1156
|
+
{
|
1157
|
+
// 4 byte utf8 codepoint
|
1158
|
+
return 4;
|
1159
|
+
}
|
1160
|
+
else if (0xe0 == (0xf0 & s[0]))
|
1161
|
+
{
|
1162
|
+
// 3 byte utf8 codepoint
|
1163
|
+
return 3;
|
1164
|
+
}
|
1165
|
+
else if (0xc0 == (0xe0 & s[0]))
|
1166
|
+
{
|
1167
|
+
// 2 byte utf8 codepoint
|
1168
|
+
return 2;
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
// 1 byte utf8 codepoint otherwise
|
1172
|
+
return 1;
|
1173
|
+
}
|
1174
|
+
|
1175
|
+
size_t utf8codepointsize(utf8_int32_t chr)
|
1176
|
+
{
|
1177
|
+
if (0 == ((utf8_int32_t)0xffffff80 & chr))
|
1178
|
+
{
|
1179
|
+
return 1;
|
1180
|
+
}
|
1181
|
+
else if (0 == ((utf8_int32_t)0xfffff800 & chr))
|
1182
|
+
{
|
1183
|
+
return 2;
|
1184
|
+
}
|
1185
|
+
else if (0 == ((utf8_int32_t)0xffff0000 & chr))
|
1186
|
+
{
|
1187
|
+
return 3;
|
1188
|
+
}
|
1189
|
+
else
|
1190
|
+
{ // if (0 == ((int)0xffe00000 & chr)) {
|
1191
|
+
return 4;
|
1192
|
+
}
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n)
|
1196
|
+
{
|
1197
|
+
char *s = (char *)str;
|
1198
|
+
|
1199
|
+
if (0 == ((utf8_int32_t)0xffffff80 & chr))
|
1200
|
+
{
|
1201
|
+
// 1-byte/7-bit ascii
|
1202
|
+
// (0b0xxxxxxx)
|
1203
|
+
if (n < 1)
|
1204
|
+
{
|
1205
|
+
return utf8_null;
|
1206
|
+
}
|
1207
|
+
s[0] = (char)chr;
|
1208
|
+
s += 1;
|
1209
|
+
}
|
1210
|
+
else if (0 == ((utf8_int32_t)0xfffff800 & chr))
|
1211
|
+
{
|
1212
|
+
// 2-byte/11-bit utf8 code point
|
1213
|
+
// (0b110xxxxx 0b10xxxxxx)
|
1214
|
+
if (n < 2)
|
1215
|
+
{
|
1216
|
+
return utf8_null;
|
1217
|
+
}
|
1218
|
+
s[0] = 0xc0 | (char)(chr >> 6);
|
1219
|
+
s[1] = 0x80 | (char)(chr & 0x3f);
|
1220
|
+
s += 2;
|
1221
|
+
}
|
1222
|
+
else if (0 == ((utf8_int32_t)0xffff0000 & chr))
|
1223
|
+
{
|
1224
|
+
// 3-byte/16-bit utf8 code point
|
1225
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
1226
|
+
if (n < 3)
|
1227
|
+
{
|
1228
|
+
return utf8_null;
|
1229
|
+
}
|
1230
|
+
s[0] = 0xe0 | (char)(chr >> 12);
|
1231
|
+
s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
1232
|
+
s[2] = 0x80 | (char)(chr & 0x3f);
|
1233
|
+
s += 3;
|
1234
|
+
}
|
1235
|
+
else
|
1236
|
+
{ // if (0 == ((int)0xffe00000 & chr)) {
|
1237
|
+
// 4-byte/21-bit utf8 code point
|
1238
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
1239
|
+
if (n < 4)
|
1240
|
+
{
|
1241
|
+
return utf8_null;
|
1242
|
+
}
|
1243
|
+
s[0] = 0xf0 | (char)(chr >> 18);
|
1244
|
+
s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
1245
|
+
s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
1246
|
+
s[3] = 0x80 | (char)(chr & 0x3f);
|
1247
|
+
s += 4;
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
return s;
|
1251
|
+
}
|
1252
|
+
|
1253
|
+
int utf8islower(utf8_int32_t chr)
|
1254
|
+
{
|
1255
|
+
return chr != utf8uprcodepoint(chr);
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
int utf8isupper(utf8_int32_t chr)
|
1259
|
+
{
|
1260
|
+
return chr != utf8lwrcodepoint(chr);
|
1261
|
+
}
|
1262
|
+
|
1263
|
+
void utf8lwr(void *utf8_restrict str)
|
1264
|
+
{
|
1265
|
+
void *p, *pn;
|
1266
|
+
utf8_int32_t cp;
|
1267
|
+
|
1268
|
+
p = (char *)str;
|
1269
|
+
pn = utf8codepoint(p, &cp);
|
1270
|
+
|
1271
|
+
while (cp != 0)
|
1272
|
+
{
|
1273
|
+
const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
|
1274
|
+
const size_t size = utf8codepointsize(lwr_cp);
|
1275
|
+
|
1276
|
+
if (lwr_cp != cp)
|
1277
|
+
{
|
1278
|
+
utf8catcodepoint(p, lwr_cp, size);
|
1279
|
+
}
|
1280
|
+
|
1281
|
+
p = pn;
|
1282
|
+
pn = utf8codepoint(p, &cp);
|
1283
|
+
}
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
void utf8upr(void *utf8_restrict str)
|
1287
|
+
{
|
1288
|
+
void *p, *pn;
|
1289
|
+
utf8_int32_t cp;
|
1290
|
+
|
1291
|
+
p = (char *)str;
|
1292
|
+
pn = utf8codepoint(p, &cp);
|
1293
|
+
|
1294
|
+
while (cp != 0)
|
1295
|
+
{
|
1296
|
+
const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
|
1297
|
+
const size_t size = utf8codepointsize(lwr_cp);
|
1298
|
+
|
1299
|
+
if (lwr_cp != cp)
|
1300
|
+
{
|
1301
|
+
utf8catcodepoint(p, lwr_cp, size);
|
1302
|
+
}
|
1303
|
+
|
1304
|
+
p = pn;
|
1305
|
+
pn = utf8codepoint(p, &cp);
|
1306
|
+
}
|
1307
|
+
}
|
1308
|
+
|
1309
|
+
utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp)
|
1310
|
+
{
|
1311
|
+
if (((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
|
1312
|
+
((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp)) ||
|
1313
|
+
((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)))
|
1314
|
+
{
|
1315
|
+
cp += 32;
|
1316
|
+
}
|
1317
|
+
else if ((0x0400 <= cp) && (0x040f >= cp))
|
1318
|
+
{
|
1319
|
+
cp += 80;
|
1320
|
+
}
|
1321
|
+
else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1322
|
+
((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
|
1323
|
+
((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
|
1324
|
+
((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1325
|
+
((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1326
|
+
((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
|
1327
|
+
{
|
1328
|
+
cp |= 0x1;
|
1329
|
+
}
|
1330
|
+
else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
|
1331
|
+
((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
|
1332
|
+
((0x01cd <= cp) && (0x01dc >= cp)))
|
1333
|
+
{
|
1334
|
+
cp += 1;
|
1335
|
+
cp &= ~0x1;
|
1336
|
+
}
|
1337
|
+
else
|
1338
|
+
{
|
1339
|
+
switch (cp)
|
1340
|
+
{
|
1341
|
+
default:
|
1342
|
+
break;
|
1343
|
+
case 0x0178:
|
1344
|
+
cp = 0x00ff;
|
1345
|
+
break;
|
1346
|
+
case 0x0243:
|
1347
|
+
cp = 0x0180;
|
1348
|
+
break;
|
1349
|
+
case 0x018e:
|
1350
|
+
cp = 0x01dd;
|
1351
|
+
break;
|
1352
|
+
case 0x023d:
|
1353
|
+
cp = 0x019a;
|
1354
|
+
break;
|
1355
|
+
case 0x0220:
|
1356
|
+
cp = 0x019e;
|
1357
|
+
break;
|
1358
|
+
case 0x01b7:
|
1359
|
+
cp = 0x0292;
|
1360
|
+
break;
|
1361
|
+
case 0x01c4:
|
1362
|
+
cp = 0x01c6;
|
1363
|
+
break;
|
1364
|
+
case 0x01c7:
|
1365
|
+
cp = 0x01c9;
|
1366
|
+
break;
|
1367
|
+
case 0x01ca:
|
1368
|
+
cp = 0x01cc;
|
1369
|
+
break;
|
1370
|
+
case 0x01f1:
|
1371
|
+
cp = 0x01f3;
|
1372
|
+
break;
|
1373
|
+
case 0x01f7:
|
1374
|
+
cp = 0x01bf;
|
1375
|
+
break;
|
1376
|
+
case 0x0187:
|
1377
|
+
cp = 0x0188;
|
1378
|
+
break;
|
1379
|
+
case 0x018b:
|
1380
|
+
cp = 0x018c;
|
1381
|
+
break;
|
1382
|
+
case 0x0191:
|
1383
|
+
cp = 0x0192;
|
1384
|
+
break;
|
1385
|
+
case 0x0198:
|
1386
|
+
cp = 0x0199;
|
1387
|
+
break;
|
1388
|
+
case 0x01a7:
|
1389
|
+
cp = 0x01a8;
|
1390
|
+
break;
|
1391
|
+
case 0x01ac:
|
1392
|
+
cp = 0x01ad;
|
1393
|
+
break;
|
1394
|
+
case 0x01af:
|
1395
|
+
cp = 0x01b0;
|
1396
|
+
break;
|
1397
|
+
case 0x01b8:
|
1398
|
+
cp = 0x01b9;
|
1399
|
+
break;
|
1400
|
+
case 0x01bc:
|
1401
|
+
cp = 0x01bd;
|
1402
|
+
break;
|
1403
|
+
case 0x01f4:
|
1404
|
+
cp = 0x01f5;
|
1405
|
+
break;
|
1406
|
+
case 0x023b:
|
1407
|
+
cp = 0x023c;
|
1408
|
+
break;
|
1409
|
+
case 0x0241:
|
1410
|
+
cp = 0x0242;
|
1411
|
+
break;
|
1412
|
+
case 0x03fd:
|
1413
|
+
cp = 0x037b;
|
1414
|
+
break;
|
1415
|
+
case 0x03fe:
|
1416
|
+
cp = 0x037c;
|
1417
|
+
break;
|
1418
|
+
case 0x03ff:
|
1419
|
+
cp = 0x037d;
|
1420
|
+
break;
|
1421
|
+
case 0x037f:
|
1422
|
+
cp = 0x03f3;
|
1423
|
+
break;
|
1424
|
+
case 0x0386:
|
1425
|
+
cp = 0x03ac;
|
1426
|
+
break;
|
1427
|
+
case 0x0388:
|
1428
|
+
cp = 0x03ad;
|
1429
|
+
break;
|
1430
|
+
case 0x0389:
|
1431
|
+
cp = 0x03ae;
|
1432
|
+
break;
|
1433
|
+
case 0x038a:
|
1434
|
+
cp = 0x03af;
|
1435
|
+
break;
|
1436
|
+
case 0x038c:
|
1437
|
+
cp = 0x03cc;
|
1438
|
+
break;
|
1439
|
+
case 0x038e:
|
1440
|
+
cp = 0x03cd;
|
1441
|
+
break;
|
1442
|
+
case 0x038f:
|
1443
|
+
cp = 0x03ce;
|
1444
|
+
break;
|
1445
|
+
case 0x0370:
|
1446
|
+
cp = 0x0371;
|
1447
|
+
break;
|
1448
|
+
case 0x0372:
|
1449
|
+
cp = 0x0373;
|
1450
|
+
break;
|
1451
|
+
case 0x0376:
|
1452
|
+
cp = 0x0377;
|
1453
|
+
break;
|
1454
|
+
case 0x03f4:
|
1455
|
+
cp = 0x03d1;
|
1456
|
+
break;
|
1457
|
+
case 0x03cf:
|
1458
|
+
cp = 0x03d7;
|
1459
|
+
break;
|
1460
|
+
case 0x03f9:
|
1461
|
+
cp = 0x03f2;
|
1462
|
+
break;
|
1463
|
+
case 0x03f7:
|
1464
|
+
cp = 0x03f8;
|
1465
|
+
break;
|
1466
|
+
case 0x03fa:
|
1467
|
+
cp = 0x03fb;
|
1468
|
+
break;
|
1469
|
+
};
|
1470
|
+
}
|
1471
|
+
|
1472
|
+
return cp;
|
1473
|
+
}
|
1474
|
+
|
1475
|
+
utf8_int32_t utf8uprcodepoint(utf8_int32_t cp)
|
1476
|
+
{
|
1477
|
+
if (((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
|
1478
|
+
((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
|
1479
|
+
((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)))
|
1480
|
+
{
|
1481
|
+
cp -= 32;
|
1482
|
+
}
|
1483
|
+
else if ((0x0450 <= cp) && (0x045f >= cp))
|
1484
|
+
{
|
1485
|
+
cp -= 80;
|
1486
|
+
}
|
1487
|
+
else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1488
|
+
((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
|
1489
|
+
((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
|
1490
|
+
((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1491
|
+
((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1492
|
+
((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
|
1493
|
+
{
|
1494
|
+
cp &= ~0x1;
|
1495
|
+
}
|
1496
|
+
else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
|
1497
|
+
((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
|
1498
|
+
((0x01cd <= cp) && (0x01dc >= cp)))
|
1499
|
+
{
|
1500
|
+
cp -= 1;
|
1501
|
+
cp |= 0x1;
|
1502
|
+
}
|
1503
|
+
else
|
1504
|
+
{
|
1505
|
+
switch (cp)
|
1506
|
+
{
|
1507
|
+
default:
|
1508
|
+
break;
|
1509
|
+
case 0x00ff:
|
1510
|
+
cp = 0x0178;
|
1511
|
+
break;
|
1512
|
+
case 0x0180:
|
1513
|
+
cp = 0x0243;
|
1514
|
+
break;
|
1515
|
+
case 0x01dd:
|
1516
|
+
cp = 0x018e;
|
1517
|
+
break;
|
1518
|
+
case 0x019a:
|
1519
|
+
cp = 0x023d;
|
1520
|
+
break;
|
1521
|
+
case 0x019e:
|
1522
|
+
cp = 0x0220;
|
1523
|
+
break;
|
1524
|
+
case 0x0292:
|
1525
|
+
cp = 0x01b7;
|
1526
|
+
break;
|
1527
|
+
case 0x01c6:
|
1528
|
+
cp = 0x01c4;
|
1529
|
+
break;
|
1530
|
+
case 0x01c9:
|
1531
|
+
cp = 0x01c7;
|
1532
|
+
break;
|
1533
|
+
case 0x01cc:
|
1534
|
+
cp = 0x01ca;
|
1535
|
+
break;
|
1536
|
+
case 0x01f3:
|
1537
|
+
cp = 0x01f1;
|
1538
|
+
break;
|
1539
|
+
case 0x01bf:
|
1540
|
+
cp = 0x01f7;
|
1541
|
+
break;
|
1542
|
+
case 0x0188:
|
1543
|
+
cp = 0x0187;
|
1544
|
+
break;
|
1545
|
+
case 0x018c:
|
1546
|
+
cp = 0x018b;
|
1547
|
+
break;
|
1548
|
+
case 0x0192:
|
1549
|
+
cp = 0x0191;
|
1550
|
+
break;
|
1551
|
+
case 0x0199:
|
1552
|
+
cp = 0x0198;
|
1553
|
+
break;
|
1554
|
+
case 0x01a8:
|
1555
|
+
cp = 0x01a7;
|
1556
|
+
break;
|
1557
|
+
case 0x01ad:
|
1558
|
+
cp = 0x01ac;
|
1559
|
+
break;
|
1560
|
+
case 0x01b0:
|
1561
|
+
cp = 0x01af;
|
1562
|
+
break;
|
1563
|
+
case 0x01b9:
|
1564
|
+
cp = 0x01b8;
|
1565
|
+
break;
|
1566
|
+
case 0x01bd:
|
1567
|
+
cp = 0x01bc;
|
1568
|
+
break;
|
1569
|
+
case 0x01f5:
|
1570
|
+
cp = 0x01f4;
|
1571
|
+
break;
|
1572
|
+
case 0x023c:
|
1573
|
+
cp = 0x023b;
|
1574
|
+
break;
|
1575
|
+
case 0x0242:
|
1576
|
+
cp = 0x0241;
|
1577
|
+
break;
|
1578
|
+
case 0x037b:
|
1579
|
+
cp = 0x03fd;
|
1580
|
+
break;
|
1581
|
+
case 0x037c:
|
1582
|
+
cp = 0x03fe;
|
1583
|
+
break;
|
1584
|
+
case 0x037d:
|
1585
|
+
cp = 0x03ff;
|
1586
|
+
break;
|
1587
|
+
case 0x03f3:
|
1588
|
+
cp = 0x037f;
|
1589
|
+
break;
|
1590
|
+
case 0x03ac:
|
1591
|
+
cp = 0x0386;
|
1592
|
+
break;
|
1593
|
+
case 0x03ad:
|
1594
|
+
cp = 0x0388;
|
1595
|
+
break;
|
1596
|
+
case 0x03ae:
|
1597
|
+
cp = 0x0389;
|
1598
|
+
break;
|
1599
|
+
case 0x03af:
|
1600
|
+
cp = 0x038a;
|
1601
|
+
break;
|
1602
|
+
case 0x03cc:
|
1603
|
+
cp = 0x038c;
|
1604
|
+
break;
|
1605
|
+
case 0x03cd:
|
1606
|
+
cp = 0x038e;
|
1607
|
+
break;
|
1608
|
+
case 0x03ce:
|
1609
|
+
cp = 0x038f;
|
1610
|
+
break;
|
1611
|
+
case 0x0371:
|
1612
|
+
cp = 0x0370;
|
1613
|
+
break;
|
1614
|
+
case 0x0373:
|
1615
|
+
cp = 0x0372;
|
1616
|
+
break;
|
1617
|
+
case 0x0377:
|
1618
|
+
cp = 0x0376;
|
1619
|
+
break;
|
1620
|
+
case 0x03d1:
|
1621
|
+
cp = 0x03f4;
|
1622
|
+
break;
|
1623
|
+
case 0x03d7:
|
1624
|
+
cp = 0x03cf;
|
1625
|
+
break;
|
1626
|
+
case 0x03f2:
|
1627
|
+
cp = 0x03f9;
|
1628
|
+
break;
|
1629
|
+
case 0x03f8:
|
1630
|
+
cp = 0x03f7;
|
1631
|
+
break;
|
1632
|
+
case 0x03fb:
|
1633
|
+
cp = 0x03fa;
|
1634
|
+
break;
|
1635
|
+
};
|
1636
|
+
}
|
1637
|
+
|
1638
|
+
return cp;
|
1639
|
+
}
|
1640
|
+
|
1641
|
+
#undef utf8_restrict
|
1642
|
+
#undef utf8_null
|
1643
|
+
|
1644
|
+
#ifdef __cplusplus
|
1645
|
+
} // extern "C"
|
1646
|
+
#endif
|
1647
|
+
|
1648
|
+
#if defined(__clang__)
|
1649
|
+
#pragma clang diagnostic pop
|
1650
|
+
#endif
|
1651
|
+
|
1652
|
+
#endif // SHEREDOM_UTF8_H_INCLUDED
|