rgss 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.clang-format +6 -0
 - data/.gitignore +167 -0
 - data/.yardopts +6 -0
 - data/CHANGELOG.md +4 -0
 - data/Gemfile +4 -0
 - data/LICENSE.txt +21 -0
 - data/Rakefile +9 -0
 - data/ext/rgss/cglm-v0.7.9.tar.gz +0 -0
 - data/ext/rgss/color.c +599 -0
 - data/ext/rgss/entity.c +373 -0
 - data/ext/rgss/extconf.rb +53 -0
 - data/ext/rgss/font.c +135 -0
 - data/ext/rgss/game.c +469 -0
 - data/ext/rgss/game.h +99 -0
 - data/ext/rgss/gl.c +3217 -0
 - data/ext/rgss/glad.c +1140 -0
 - data/ext/rgss/glad.h +2129 -0
 - data/ext/rgss/glfw.c +1453 -0
 - data/ext/rgss/graphics.c +324 -0
 - data/ext/rgss/image.c +274 -0
 - data/ext/rgss/input.c +745 -0
 - data/ext/rgss/khrplatform.h +290 -0
 - data/ext/rgss/mat4.c +279 -0
 - data/ext/rgss/pax_global_header +1 -0
 - data/ext/rgss/point.c +253 -0
 - data/ext/rgss/rect.c +449 -0
 - data/ext/rgss/rgss.c +56 -0
 - data/ext/rgss/rgss.h +241 -0
 - data/ext/rgss/stb_image.h +7762 -0
 - data/ext/rgss/stb_image_write.h +1690 -0
 - data/ext/rgss/stb_rect_pack.h +628 -0
 - data/ext/rgss/stb_truetype.h +5011 -0
 - data/ext/rgss/utf8.h +1652 -0
 - data/ext/rgss/uthash.h +1133 -0
 - data/ext/rgss/vec.c +114 -0
 - data/ext/rgss/vec.h +192 -0
 - data/ext/rgss/vec2.c +489 -0
 - data/ext/rgss/vec3.c +751 -0
 - data/ext/rgss/vec4.c +681 -0
 - data/lib/rgss.rb +140 -0
 - data/lib/rgss/batch.rb +57 -0
 - data/lib/rgss/blend.rb +47 -0
 - data/lib/rgss/game_object.rb +28 -0
 - data/lib/rgss/plane.rb +95 -0
 - data/lib/rgss/renderable.rb +158 -0
 - data/lib/rgss/rgss.so +0 -0
 - data/lib/rgss/shader.rb +94 -0
 - data/lib/rgss/shaders/sprite-frag.glsl +40 -0
 - data/lib/rgss/shaders/sprite-vert.glsl +17 -0
 - data/lib/rgss/sprite.rb +139 -0
 - data/lib/rgss/stubs/color.rb +318 -0
 - data/lib/rgss/stubs/gl.rb +1999 -0
 - data/lib/rgss/stubs/glfw.rb +626 -0
 - data/lib/rgss/stubs/rect.rb +324 -0
 - data/lib/rgss/stubs/rpg.rb +267 -0
 - data/lib/rgss/stubs/tone.rb +65 -0
 - data/lib/rgss/texture.rb +132 -0
 - data/lib/rgss/tilemap.rb +116 -0
 - data/lib/rgss/version.rb +3 -0
 - data/lib/rgss/viewport.rb +67 -0
 - data/rgss.gemspec +44 -0
 - data/test.png +0 -0
 - metadata +178 -0
 
    
        data/ext/rgss/utf8.h
    ADDED
    
    | 
         @@ -0,0 +1,1652 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            // The latest version of this library is available on GitHub;
         
     | 
| 
      
 2 
     | 
    
         
            +
            // https://github.com/sheredom/utf8.h
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            // This is free and unencumbered software released into the public domain.
         
     | 
| 
      
 5 
     | 
    
         
            +
            //
         
     | 
| 
      
 6 
     | 
    
         
            +
            // Anyone is free to copy, modify, publish, use, compile, sell, or
         
     | 
| 
      
 7 
     | 
    
         
            +
            // distribute this software, either in source code form or as a compiled
         
     | 
| 
      
 8 
     | 
    
         
            +
            // binary, for any purpose, commercial or non-commercial, and by any
         
     | 
| 
      
 9 
     | 
    
         
            +
            // means.
         
     | 
| 
      
 10 
     | 
    
         
            +
            //
         
     | 
| 
      
 11 
     | 
    
         
            +
            // In jurisdictions that recognize copyright laws, the author or authors
         
     | 
| 
      
 12 
     | 
    
         
            +
            // of this software dedicate any and all copyright interest in the
         
     | 
| 
      
 13 
     | 
    
         
            +
            // software to the public domain. We make this dedication for the benefit
         
     | 
| 
      
 14 
     | 
    
         
            +
            // of the public at large and to the detriment of our heirs and
         
     | 
| 
      
 15 
     | 
    
         
            +
            // successors. We intend this dedication to be an overt act of
         
     | 
| 
      
 16 
     | 
    
         
            +
            // relinquishment in perpetuity of all present and future rights to this
         
     | 
| 
      
 17 
     | 
    
         
            +
            // software under copyright law.
         
     | 
| 
      
 18 
     | 
    
         
            +
            //
         
     | 
| 
      
 19 
     | 
    
         
            +
            // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 20 
     | 
    
         
            +
            // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 21 
     | 
    
         
            +
            // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
         
     | 
| 
      
 22 
     | 
    
         
            +
            // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
         
     | 
| 
      
 23 
     | 
    
         
            +
            // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
         
     | 
| 
      
 24 
     | 
    
         
            +
            // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
         
     | 
| 
      
 25 
     | 
    
         
            +
            // OTHER DEALINGS IN THE SOFTWARE.
         
     | 
| 
      
 26 
     | 
    
         
            +
            //
         
     | 
| 
      
 27 
     | 
    
         
            +
            // For more information, please refer to <http://unlicense.org/>
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            #ifndef SHEREDOM_UTF8_H_INCLUDED
         
     | 
| 
      
 30 
     | 
    
         
            +
            #define SHEREDOM_UTF8_H_INCLUDED
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            #if defined(_MSC_VER)
         
     | 
| 
      
 33 
     | 
    
         
            +
            #pragma warning(push)
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            // disable 'bytes padding added after construct' warning
         
     | 
| 
      
 36 
     | 
    
         
            +
            #pragma warning(disable : 4820)
         
     | 
| 
      
 37 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            #include <stddef.h>
         
     | 
| 
      
 40 
     | 
    
         
            +
            #include <stdlib.h>
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            #if defined(_MSC_VER)
         
     | 
| 
      
 43 
     | 
    
         
            +
            #pragma warning(pop)
         
     | 
| 
      
 44 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            #if defined(_MSC_VER)
         
     | 
| 
      
 47 
     | 
    
         
            +
            typedef __int32 utf8_int32_t;
         
     | 
| 
      
 48 
     | 
    
         
            +
            #else
         
     | 
| 
      
 49 
     | 
    
         
            +
            #include <stdint.h>
         
     | 
| 
      
 50 
     | 
    
         
            +
            typedef int32_t utf8_int32_t;
         
     | 
| 
      
 51 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            #if defined(__clang__)
         
     | 
| 
      
 54 
     | 
    
         
            +
            #pragma clang diagnostic push
         
     | 
| 
      
 55 
     | 
    
         
            +
            #pragma clang diagnostic ignored "-Wold-style-cast"
         
     | 
| 
      
 56 
     | 
    
         
            +
            #pragma clang diagnostic ignored "-Wcast-qual"
         
     | 
| 
      
 57 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            #ifdef __cplusplus
         
     | 
| 
      
 60 
     | 
    
         
            +
            extern "C"
         
     | 
| 
      
 61 
     | 
    
         
            +
            {
         
     | 
| 
      
 62 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            #if defined(__clang__) || defined(__GNUC__)
         
     | 
| 
      
 65 
     | 
    
         
            +
            #define utf8_nonnull  __attribute__((nonnull))
         
     | 
| 
      
 66 
     | 
    
         
            +
            #define utf8_pure     __attribute__((pure))
         
     | 
| 
      
 67 
     | 
    
         
            +
            #define utf8_restrict __restrict__
         
     | 
| 
      
 68 
     | 
    
         
            +
            #define utf8_weak     __attribute__((weak))
         
     | 
| 
      
 69 
     | 
    
         
            +
            #elif defined(_MSC_VER)
         
     | 
| 
      
 70 
     | 
    
         
            +
            #define utf8_nonnull
         
     | 
| 
      
 71 
     | 
    
         
            +
            #define utf8_pure
         
     | 
| 
      
 72 
     | 
    
         
            +
            #define utf8_restrict __restrict
         
     | 
| 
      
 73 
     | 
    
         
            +
            #define utf8_weak     __inline
         
     | 
| 
      
 74 
     | 
    
         
            +
            #else
         
     | 
| 
      
 75 
     | 
    
         
            +
            #error Non clang, non gcc, non MSVC compiler found!
         
     | 
| 
      
 76 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
            #ifdef __cplusplus
         
     | 
| 
      
 79 
     | 
    
         
            +
            #define utf8_null NULL
         
     | 
| 
      
 80 
     | 
    
         
            +
            #else
         
     | 
| 
      
 81 
     | 
    
         
            +
            #define utf8_null 0
         
     | 
| 
      
 82 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
         
     | 
| 
      
 85 
     | 
    
         
            +
                // src2 respectively, case insensitive.
         
     | 
| 
      
 86 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1, const void *src2);
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                // Append the utf8 string src onto the utf8 string dst.
         
     | 
| 
      
 89 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src);
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                // Find the first match of the utf8 codepoint chr in the utf8 string src.
         
     | 
| 
      
 92 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src, utf8_int32_t chr);
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                // Return less than 0, 0, greater than 0 if src1 < src2,
         
     | 
| 
      
 95 
     | 
    
         
            +
                // src1 == src2, src1 > src2 respectively.
         
     | 
| 
      
 96 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1, const void *src2);
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                // Copy the utf8 string src onto the memory allocated in dst.
         
     | 
| 
      
 99 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src);
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                // Number of utf8 codepoints in the utf8 string src that consists entirely
         
     | 
| 
      
 102 
     | 
    
         
            +
                // of utf8 codepoints not from the utf8 string reject.
         
     | 
| 
      
 103 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src, const void *reject);
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                // Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
         
     | 
| 
      
 106 
     | 
    
         
            +
                // copying over the data, and returning that. Or 0 if malloc failed.
         
     | 
| 
      
 107 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8dup(const void *src);
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
                // Number of utf8 codepoints in the utf8 string str,
         
     | 
| 
      
 110 
     | 
    
         
            +
                // excluding the null terminating byte.
         
     | 
| 
      
 111 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
         
     | 
| 
      
 114 
     | 
    
         
            +
                // src2 respectively, case insensitive. Checking at most n bytes of each utf8
         
     | 
| 
      
 115 
     | 
    
         
            +
                // string.
         
     | 
| 
      
 116 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1, const void *src2, size_t n);
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                // Append the utf8 string src onto the utf8 string dst,
         
     | 
| 
      
 119 
     | 
    
         
            +
                // writing at most n+1 bytes. Can produce an invalid utf8
         
     | 
| 
      
 120 
     | 
    
         
            +
                // string if n falls partway through a utf8 codepoint.
         
     | 
| 
      
 121 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                // Return less than 0, 0, greater than 0 if src1 < src2,
         
     | 
| 
      
 124 
     | 
    
         
            +
                // src1 == src2, src1 > src2 respectively. Checking at most n
         
     | 
| 
      
 125 
     | 
    
         
            +
                // bytes of each utf8 string.
         
     | 
| 
      
 126 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1, const void *src2, size_t n);
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                // Copy the utf8 string src onto the memory allocated in dst.
         
     | 
| 
      
 129 
     | 
    
         
            +
                // Copies at most n bytes. If there is no terminating null byte in
         
     | 
| 
      
 130 
     | 
    
         
            +
                // the first n bytes of src, the string placed into dst will not be
         
     | 
| 
      
 131 
     | 
    
         
            +
                // null-terminated. If the size (in bytes) of src is less than n,
         
     | 
| 
      
 132 
     | 
    
         
            +
                // extra null terminating bytes are appended to dst such that at
         
     | 
| 
      
 133 
     | 
    
         
            +
                // total of n bytes are written. Can produce an invalid utf8
         
     | 
| 
      
 134 
     | 
    
         
            +
                // string if n falls partway through a utf8 codepoint.
         
     | 
| 
      
 135 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n);
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                // Similar to utf8dup, except that at most n bytes of src are copied. If src is
         
     | 
| 
      
 138 
     | 
    
         
            +
                // longer than n, only n bytes are copied and a null byte is added.
         
     | 
| 
      
 139 
     | 
    
         
            +
                //
         
     | 
| 
      
 140 
     | 
    
         
            +
                // Returns a new string if successful, 0 otherwise
         
     | 
| 
      
 141 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                // Locates the first occurrence in the utf8 string str of any byte in the
         
     | 
| 
      
 144 
     | 
    
         
            +
                // utf8 string accept, or 0 if no match was found.
         
     | 
| 
      
 145 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str, const void *accept);
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                // Find the last match of the utf8 codepoint chr in the utf8 string src.
         
     | 
| 
      
 148 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
                // Number of bytes in the utf8 string str,
         
     | 
| 
      
 151 
     | 
    
         
            +
                // including the null terminating byte.
         
     | 
| 
      
 152 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
                // Number of utf8 codepoints in the utf8 string src that consists entirely
         
     | 
| 
      
 155 
     | 
    
         
            +
                // of utf8 codepoints from the utf8 string accept.
         
     | 
| 
      
 156 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src, const void *accept);
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
                // The position of the utf8 string needle in the utf8 string haystack.
         
     | 
| 
      
 159 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack, const void *needle);
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                // The position of the utf8 string needle in the utf8 string haystack, case
         
     | 
| 
      
 162 
     | 
    
         
            +
                // insensitive.
         
     | 
| 
      
 163 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack, const void *needle);
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                // Return 0 on success, or the position of the invalid
         
     | 
| 
      
 166 
     | 
    
         
            +
                // utf8 codepoint on failure.
         
     | 
| 
      
 167 
     | 
    
         
            +
                utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                // Sets out_codepoint to the next utf8 codepoint in str, and returns the address
         
     | 
| 
      
 170 
     | 
    
         
            +
                // of the utf8 codepoint after the current one in str.
         
     | 
| 
      
 171 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8codepoint(const void *utf8_restrict str,
         
     | 
| 
      
 172 
     | 
    
         
            +
                                                           utf8_int32_t *utf8_restrict out_codepoint);
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
                // Calculates the size of the next utf8 codepoint in str.
         
     | 
| 
      
 175 
     | 
    
         
            +
                utf8_nonnull utf8_weak size_t utf8codepointcalcsize(const void *utf8_restrict str);
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                // Returns the size of the given codepoint in bytes.
         
     | 
| 
      
 178 
     | 
    
         
            +
                utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
                // Write a codepoint to the given string, and return the address to the next
         
     | 
| 
      
 181 
     | 
    
         
            +
                // place after the written codepoint. Pass how many bytes left in the buffer to
         
     | 
| 
      
 182 
     | 
    
         
            +
                // n. If there is not enough space for the codepoint, this function returns
         
     | 
| 
      
 183 
     | 
    
         
            +
                // null.
         
     | 
| 
      
 184 
     | 
    
         
            +
                utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n);
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
                // Returns 1 if the given character is lowercase, or 0 if it is not.
         
     | 
| 
      
 187 
     | 
    
         
            +
                utf8_weak int utf8islower(utf8_int32_t chr);
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                // Returns 1 if the given character is uppercase, or 0 if it is not.
         
     | 
| 
      
 190 
     | 
    
         
            +
                utf8_weak int utf8isupper(utf8_int32_t chr);
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                // Transform the given string into all lowercase codepoints.
         
     | 
| 
      
 193 
     | 
    
         
            +
                utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
                // Transform the given string into all uppercase codepoints.
         
     | 
| 
      
 196 
     | 
    
         
            +
                utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                // Make a codepoint lower case if possible.
         
     | 
| 
      
 199 
     | 
    
         
            +
                utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
         
     | 
| 
      
 200 
     | 
    
         
            +
             
     | 
| 
      
 201 
     | 
    
         
            +
                // Make a codepoint upper case if possible.
         
     | 
| 
      
 202 
     | 
    
         
            +
                utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
            #undef utf8_weak
         
     | 
| 
      
 205 
     | 
    
         
            +
            #undef utf8_pure
         
     | 
| 
      
 206 
     | 
    
         
            +
            #undef utf8_nonnull
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                int utf8casecmp(const void *src1, const void *src2)
         
     | 
| 
      
 209 
     | 
    
         
            +
                {
         
     | 
| 
      
 210 
     | 
    
         
            +
                    utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                    for (;;)
         
     | 
| 
      
 213 
     | 
    
         
            +
                    {
         
     | 
| 
      
 214 
     | 
    
         
            +
                        src1 = utf8codepoint(src1, &src1_cp);
         
     | 
| 
      
 215 
     | 
    
         
            +
                        src2 = utf8codepoint(src2, &src2_cp);
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
                        // take a copy of src1 & src2
         
     | 
| 
      
 218 
     | 
    
         
            +
                        src1_orig_cp = src1_cp;
         
     | 
| 
      
 219 
     | 
    
         
            +
                        src2_orig_cp = src2_cp;
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
                        // lower the srcs if required
         
     | 
| 
      
 222 
     | 
    
         
            +
                        src1_cp = utf8lwrcodepoint(src1_cp);
         
     | 
| 
      
 223 
     | 
    
         
            +
                        src2_cp = utf8lwrcodepoint(src2_cp);
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
                        // check if the lowered codepoints match
         
     | 
| 
      
 226 
     | 
    
         
            +
                        if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
         
     | 
| 
      
 227 
     | 
    
         
            +
                        {
         
     | 
| 
      
 228 
     | 
    
         
            +
                            return 0;
         
     | 
| 
      
 229 
     | 
    
         
            +
                        }
         
     | 
| 
      
 230 
     | 
    
         
            +
                        else if (src1_cp == src2_cp)
         
     | 
| 
      
 231 
     | 
    
         
            +
                        {
         
     | 
| 
      
 232 
     | 
    
         
            +
                            continue;
         
     | 
| 
      
 233 
     | 
    
         
            +
                        }
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                        // if they don't match, then we return the difference between the characters
         
     | 
| 
      
 236 
     | 
    
         
            +
                        return src1_cp - src2_cp;
         
     | 
| 
      
 237 
     | 
    
         
            +
                    }
         
     | 
| 
      
 238 
     | 
    
         
            +
                }
         
     | 
| 
      
 239 
     | 
    
         
            +
             
     | 
| 
      
 240 
     | 
    
         
            +
                void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src)
         
     | 
| 
      
 241 
     | 
    
         
            +
                {
         
     | 
| 
      
 242 
     | 
    
         
            +
                    char *d = (char *)dst;
         
     | 
| 
      
 243 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
                    // find the null terminating byte in dst
         
     | 
| 
      
 246 
     | 
    
         
            +
                    while ('\0' != *d)
         
     | 
| 
      
 247 
     | 
    
         
            +
                    {
         
     | 
| 
      
 248 
     | 
    
         
            +
                        d++;
         
     | 
| 
      
 249 
     | 
    
         
            +
                    }
         
     | 
| 
      
 250 
     | 
    
         
            +
             
     | 
| 
      
 251 
     | 
    
         
            +
                    // overwriting the null terminating byte in dst, append src byte-by-byte
         
     | 
| 
      
 252 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 253 
     | 
    
         
            +
                    {
         
     | 
| 
      
 254 
     | 
    
         
            +
                        *d++ = *s++;
         
     | 
| 
      
 255 
     | 
    
         
            +
                    }
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                    // write out a new null terminating byte into dst
         
     | 
| 
      
 258 
     | 
    
         
            +
                    *d = '\0';
         
     | 
| 
      
 259 
     | 
    
         
            +
             
     | 
| 
      
 260 
     | 
    
         
            +
                    return dst;
         
     | 
| 
      
 261 
     | 
    
         
            +
                }
         
     | 
| 
      
 262 
     | 
    
         
            +
             
     | 
| 
      
 263 
     | 
    
         
            +
                void *utf8chr(const void *src, utf8_int32_t chr)
         
     | 
| 
      
 264 
     | 
    
         
            +
                {
         
     | 
| 
      
 265 
     | 
    
         
            +
                    char c[5] = {'\0', '\0', '\0', '\0', '\0'};
         
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
      
 267 
     | 
    
         
            +
                    if (0 == chr)
         
     | 
| 
      
 268 
     | 
    
         
            +
                    {
         
     | 
| 
      
 269 
     | 
    
         
            +
                        // being asked to return position of null terminating byte, so
         
     | 
| 
      
 270 
     | 
    
         
            +
                        // just run s to the end, and return!
         
     | 
| 
      
 271 
     | 
    
         
            +
                        const char *s = (const char *)src;
         
     | 
| 
      
 272 
     | 
    
         
            +
                        while ('\0' != *s)
         
     | 
| 
      
 273 
     | 
    
         
            +
                        {
         
     | 
| 
      
 274 
     | 
    
         
            +
                            s++;
         
     | 
| 
      
 275 
     | 
    
         
            +
                        }
         
     | 
| 
      
 276 
     | 
    
         
            +
                        return (void *)s;
         
     | 
| 
      
 277 
     | 
    
         
            +
                    }
         
     | 
| 
      
 278 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xffffff80 & chr))
         
     | 
| 
      
 279 
     | 
    
         
            +
                    {
         
     | 
| 
      
 280 
     | 
    
         
            +
                        // 1-byte/7-bit ascii
         
     | 
| 
      
 281 
     | 
    
         
            +
                        // (0b0xxxxxxx)
         
     | 
| 
      
 282 
     | 
    
         
            +
                        c[0] = (char)chr;
         
     | 
| 
      
 283 
     | 
    
         
            +
                    }
         
     | 
| 
      
 284 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xfffff800 & chr))
         
     | 
| 
      
 285 
     | 
    
         
            +
                    {
         
     | 
| 
      
 286 
     | 
    
         
            +
                        // 2-byte/11-bit utf8 code point
         
     | 
| 
      
 287 
     | 
    
         
            +
                        // (0b110xxxxx 0b10xxxxxx)
         
     | 
| 
      
 288 
     | 
    
         
            +
                        c[0] = 0xc0 | (char)(chr >> 6);
         
     | 
| 
      
 289 
     | 
    
         
            +
                        c[1] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 290 
     | 
    
         
            +
                    }
         
     | 
| 
      
 291 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xffff0000 & chr))
         
     | 
| 
      
 292 
     | 
    
         
            +
                    {
         
     | 
| 
      
 293 
     | 
    
         
            +
                        // 3-byte/16-bit utf8 code point
         
     | 
| 
      
 294 
     | 
    
         
            +
                        // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 295 
     | 
    
         
            +
                        c[0] = 0xe0 | (char)(chr >> 12);
         
     | 
| 
      
 296 
     | 
    
         
            +
                        c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 297 
     | 
    
         
            +
                        c[2] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 298 
     | 
    
         
            +
                    }
         
     | 
| 
      
 299 
     | 
    
         
            +
                    else
         
     | 
| 
      
 300 
     | 
    
         
            +
                    { // if (0 == ((int)0xffe00000 & chr)) {
         
     | 
| 
      
 301 
     | 
    
         
            +
                        // 4-byte/21-bit utf8 code point
         
     | 
| 
      
 302 
     | 
    
         
            +
                        // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 303 
     | 
    
         
            +
                        c[0] = 0xf0 | (char)(chr >> 18);
         
     | 
| 
      
 304 
     | 
    
         
            +
                        c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
         
     | 
| 
      
 305 
     | 
    
         
            +
                        c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 306 
     | 
    
         
            +
                        c[3] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 307 
     | 
    
         
            +
                    }
         
     | 
| 
      
 308 
     | 
    
         
            +
             
     | 
| 
      
 309 
     | 
    
         
            +
                    // we've made c into a 2 utf8 codepoint string, one for the chr we are
         
     | 
| 
      
 310 
     | 
    
         
            +
                    // seeking, another for the null terminating byte. Now use utf8str to
         
     | 
| 
      
 311 
     | 
    
         
            +
                    // search
         
     | 
| 
      
 312 
     | 
    
         
            +
                    return utf8str(src, c);
         
     | 
| 
      
 313 
     | 
    
         
            +
                }
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
                int utf8cmp(const void *src1, const void *src2)
         
     | 
| 
      
 316 
     | 
    
         
            +
                {
         
     | 
| 
      
 317 
     | 
    
         
            +
                    const unsigned char *s1 = (const unsigned char *)src1;
         
     | 
| 
      
 318 
     | 
    
         
            +
                    const unsigned char *s2 = (const unsigned char *)src2;
         
     | 
| 
      
 319 
     | 
    
         
            +
             
     | 
| 
      
 320 
     | 
    
         
            +
                    while (('\0' != *s1) || ('\0' != *s2))
         
     | 
| 
      
 321 
     | 
    
         
            +
                    {
         
     | 
| 
      
 322 
     | 
    
         
            +
                        if (*s1 < *s2)
         
     | 
| 
      
 323 
     | 
    
         
            +
                        {
         
     | 
| 
      
 324 
     | 
    
         
            +
                            return -1;
         
     | 
| 
      
 325 
     | 
    
         
            +
                        }
         
     | 
| 
      
 326 
     | 
    
         
            +
                        else if (*s1 > *s2)
         
     | 
| 
      
 327 
     | 
    
         
            +
                        {
         
     | 
| 
      
 328 
     | 
    
         
            +
                            return 1;
         
     | 
| 
      
 329 
     | 
    
         
            +
                        }
         
     | 
| 
      
 330 
     | 
    
         
            +
             
     | 
| 
      
 331 
     | 
    
         
            +
                        s1++;
         
     | 
| 
      
 332 
     | 
    
         
            +
                        s2++;
         
     | 
| 
      
 333 
     | 
    
         
            +
                    }
         
     | 
| 
      
 334 
     | 
    
         
            +
             
     | 
| 
      
 335 
     | 
    
         
            +
                    // both utf8 strings matched
         
     | 
| 
      
 336 
     | 
    
         
            +
                    return 0;
         
     | 
| 
      
 337 
     | 
    
         
            +
                }
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
                int utf8coll(const void *src1, const void *src2);
         
     | 
| 
      
 340 
     | 
    
         
            +
             
     | 
| 
      
 341 
     | 
    
         
            +
                void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src)
         
     | 
| 
      
 342 
     | 
    
         
            +
                {
         
     | 
| 
      
 343 
     | 
    
         
            +
                    char *d = (char *)dst;
         
     | 
| 
      
 344 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 345 
     | 
    
         
            +
             
     | 
| 
      
 346 
     | 
    
         
            +
                    // overwriting anything previously in dst, write byte-by-byte
         
     | 
| 
      
 347 
     | 
    
         
            +
                    // from src
         
     | 
| 
      
 348 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 349 
     | 
    
         
            +
                    {
         
     | 
| 
      
 350 
     | 
    
         
            +
                        *d++ = *s++;
         
     | 
| 
      
 351 
     | 
    
         
            +
                    }
         
     | 
| 
      
 352 
     | 
    
         
            +
             
     | 
| 
      
 353 
     | 
    
         
            +
                    // append null terminating byte
         
     | 
| 
      
 354 
     | 
    
         
            +
                    *d = '\0';
         
     | 
| 
      
 355 
     | 
    
         
            +
             
     | 
| 
      
 356 
     | 
    
         
            +
                    return dst;
         
     | 
| 
      
 357 
     | 
    
         
            +
                }
         
     | 
| 
      
 358 
     | 
    
         
            +
             
     | 
| 
      
 359 
     | 
    
         
            +
                size_t utf8cspn(const void *src, const void *reject)
         
     | 
| 
      
 360 
     | 
    
         
            +
                {
         
     | 
| 
      
 361 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 362 
     | 
    
         
            +
                    size_t chars = 0;
         
     | 
| 
      
 363 
     | 
    
         
            +
             
     | 
| 
      
 364 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 365 
     | 
    
         
            +
                    {
         
     | 
| 
      
 366 
     | 
    
         
            +
                        const char *r = (const char *)reject;
         
     | 
| 
      
 367 
     | 
    
         
            +
                        size_t offset = 0;
         
     | 
| 
      
 368 
     | 
    
         
            +
             
     | 
| 
      
 369 
     | 
    
         
            +
                        while ('\0' != *r)
         
     | 
| 
      
 370 
     | 
    
         
            +
                        {
         
     | 
| 
      
 371 
     | 
    
         
            +
                            // checking that if *r is the start of a utf8 codepoint
         
     | 
| 
      
 372 
     | 
    
         
            +
                            // (it is not 0b10xxxxxx) and we have successfully matched
         
     | 
| 
      
 373 
     | 
    
         
            +
                            // a previous character (0 < offset) - we found a match
         
     | 
| 
      
 374 
     | 
    
         
            +
                            if ((0x80 != (0xc0 & *r)) && (0 < offset))
         
     | 
| 
      
 375 
     | 
    
         
            +
                            {
         
     | 
| 
      
 376 
     | 
    
         
            +
                                return chars;
         
     | 
| 
      
 377 
     | 
    
         
            +
                            }
         
     | 
| 
      
 378 
     | 
    
         
            +
                            else
         
     | 
| 
      
 379 
     | 
    
         
            +
                            {
         
     | 
| 
      
 380 
     | 
    
         
            +
                                if (*r == s[offset])
         
     | 
| 
      
 381 
     | 
    
         
            +
                                {
         
     | 
| 
      
 382 
     | 
    
         
            +
                                    // part of a utf8 codepoint matched, so move our checking
         
     | 
| 
      
 383 
     | 
    
         
            +
                                    // onwards to the next byte
         
     | 
| 
      
 384 
     | 
    
         
            +
                                    offset++;
         
     | 
| 
      
 385 
     | 
    
         
            +
                                    r++;
         
     | 
| 
      
 386 
     | 
    
         
            +
                                }
         
     | 
| 
      
 387 
     | 
    
         
            +
                                else
         
     | 
| 
      
 388 
     | 
    
         
            +
                                {
         
     | 
| 
      
 389 
     | 
    
         
            +
                                    // r could be in the middle of an unmatching utf8 code point,
         
     | 
| 
      
 390 
     | 
    
         
            +
                                    // so we need to march it on to the next character beginning,
         
     | 
| 
      
 391 
     | 
    
         
            +
             
     | 
| 
      
 392 
     | 
    
         
            +
                                    do
         
     | 
| 
      
 393 
     | 
    
         
            +
                                    {
         
     | 
| 
      
 394 
     | 
    
         
            +
                                        r++;
         
     | 
| 
      
 395 
     | 
    
         
            +
                                    } while (0x80 == (0xc0 & *r));
         
     | 
| 
      
 396 
     | 
    
         
            +
             
     | 
| 
      
 397 
     | 
    
         
            +
                                    // reset offset too as we found a mismatch
         
     | 
| 
      
 398 
     | 
    
         
            +
                                    offset = 0;
         
     | 
| 
      
 399 
     | 
    
         
            +
                                }
         
     | 
| 
      
 400 
     | 
    
         
            +
                            }
         
     | 
| 
      
 401 
     | 
    
         
            +
                        }
         
     | 
| 
      
 402 
     | 
    
         
            +
             
     | 
| 
      
 403 
     | 
    
         
            +
                        // found a match at the end of *r, so didn't get a chance to test it
         
     | 
| 
      
 404 
     | 
    
         
            +
                        if (0 < offset)
         
     | 
| 
      
 405 
     | 
    
         
            +
                        {
         
     | 
| 
      
 406 
     | 
    
         
            +
                            return chars;
         
     | 
| 
      
 407 
     | 
    
         
            +
                        }
         
     | 
| 
      
 408 
     | 
    
         
            +
             
     | 
| 
      
 409 
     | 
    
         
            +
                        // the current utf8 codepoint in src did not match reject, but src
         
     | 
| 
      
 410 
     | 
    
         
            +
                        // could have been partway through a utf8 codepoint, so we need to
         
     | 
| 
      
 411 
     | 
    
         
            +
                        // march it onto the next utf8 codepoint starting byte
         
     | 
| 
      
 412 
     | 
    
         
            +
                        do
         
     | 
| 
      
 413 
     | 
    
         
            +
                        {
         
     | 
| 
      
 414 
     | 
    
         
            +
                            s++;
         
     | 
| 
      
 415 
     | 
    
         
            +
                        } while ((0x80 == (0xc0 & *s)));
         
     | 
| 
      
 416 
     | 
    
         
            +
                        chars++;
         
     | 
| 
      
 417 
     | 
    
         
            +
                    }
         
     | 
| 
      
 418 
     | 
    
         
            +
             
     | 
| 
      
 419 
     | 
    
         
            +
                    return chars;
         
     | 
| 
      
 420 
     | 
    
         
            +
                }
         
     | 
| 
      
 421 
     | 
    
         
            +
             
     | 
| 
      
 422 
     | 
    
         
            +
                void *utf8dup(const void *src)
         
     | 
| 
      
 423 
     | 
    
         
            +
                {
         
     | 
| 
      
 424 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 425 
     | 
    
         
            +
                    char *n = utf8_null;
         
     | 
| 
      
 426 
     | 
    
         
            +
             
     | 
| 
      
 427 
     | 
    
         
            +
                    // figure out how many bytes (including the terminator) we need to copy first
         
     | 
| 
      
 428 
     | 
    
         
            +
                    size_t bytes = utf8size(src);
         
     | 
| 
      
 429 
     | 
    
         
            +
             
     | 
| 
      
 430 
     | 
    
         
            +
                    n = (char *)malloc(bytes);
         
     | 
| 
      
 431 
     | 
    
         
            +
             
     | 
| 
      
 432 
     | 
    
         
            +
                    if (utf8_null == n)
         
     | 
| 
      
 433 
     | 
    
         
            +
                    {
         
     | 
| 
      
 434 
     | 
    
         
            +
                        // out of memory so we bail
         
     | 
| 
      
 435 
     | 
    
         
            +
                        return utf8_null;
         
     | 
| 
      
 436 
     | 
    
         
            +
                    }
         
     | 
| 
      
 437 
     | 
    
         
            +
                    else
         
     | 
| 
      
 438 
     | 
    
         
            +
                    {
         
     | 
| 
      
 439 
     | 
    
         
            +
                        bytes = 0;
         
     | 
| 
      
 440 
     | 
    
         
            +
             
     | 
| 
      
 441 
     | 
    
         
            +
                        // copy src byte-by-byte into our new utf8 string
         
     | 
| 
      
 442 
     | 
    
         
            +
                        while ('\0' != s[bytes])
         
     | 
| 
      
 443 
     | 
    
         
            +
                        {
         
     | 
| 
      
 444 
     | 
    
         
            +
                            n[bytes] = s[bytes];
         
     | 
| 
      
 445 
     | 
    
         
            +
                            bytes++;
         
     | 
| 
      
 446 
     | 
    
         
            +
                        }
         
     | 
| 
      
 447 
     | 
    
         
            +
             
     | 
| 
      
 448 
     | 
    
         
            +
                        // append null terminating byte
         
     | 
| 
      
 449 
     | 
    
         
            +
                        n[bytes] = '\0';
         
     | 
| 
      
 450 
     | 
    
         
            +
                        return n;
         
     | 
| 
      
 451 
     | 
    
         
            +
                    }
         
     | 
| 
      
 452 
     | 
    
         
            +
                }
         
     | 
| 
      
 453 
     | 
    
         
            +
             
     | 
| 
      
 454 
     | 
    
         
            +
                void *utf8fry(const void *str);
         
     | 
| 
      
 455 
     | 
    
         
            +
             
     | 
| 
      
 456 
     | 
    
         
            +
                size_t utf8len(const void *str)
         
     | 
| 
      
 457 
     | 
    
         
            +
                {
         
     | 
| 
      
 458 
     | 
    
         
            +
                    const unsigned char *s = (const unsigned char *)str;
         
     | 
| 
      
 459 
     | 
    
         
            +
                    size_t length = 0;
         
     | 
| 
      
 460 
     | 
    
         
            +
             
     | 
| 
      
 461 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 462 
     | 
    
         
            +
                    {
         
     | 
| 
      
 463 
     | 
    
         
            +
                        if (0xf0 == (0xf8 & *s))
         
     | 
| 
      
 464 
     | 
    
         
            +
                        {
         
     | 
| 
      
 465 
     | 
    
         
            +
                            // 4-byte utf8 code point (began with 0b11110xxx)
         
     | 
| 
      
 466 
     | 
    
         
            +
                            s += 4;
         
     | 
| 
      
 467 
     | 
    
         
            +
                        }
         
     | 
| 
      
 468 
     | 
    
         
            +
                        else if (0xe0 == (0xf0 & *s))
         
     | 
| 
      
 469 
     | 
    
         
            +
                        {
         
     | 
| 
      
 470 
     | 
    
         
            +
                            // 3-byte utf8 code point (began with 0b1110xxxx)
         
     | 
| 
      
 471 
     | 
    
         
            +
                            s += 3;
         
     | 
| 
      
 472 
     | 
    
         
            +
                        }
         
     | 
| 
      
 473 
     | 
    
         
            +
                        else if (0xc0 == (0xe0 & *s))
         
     | 
| 
      
 474 
     | 
    
         
            +
                        {
         
     | 
| 
      
 475 
     | 
    
         
            +
                            // 2-byte utf8 code point (began with 0b110xxxxx)
         
     | 
| 
      
 476 
     | 
    
         
            +
                            s += 2;
         
     | 
| 
      
 477 
     | 
    
         
            +
                        }
         
     | 
| 
      
 478 
     | 
    
         
            +
                        else
         
     | 
| 
      
 479 
     | 
    
         
            +
                        { // if (0x00 == (0x80 & *s)) {
         
     | 
| 
      
 480 
     | 
    
         
            +
                            // 1-byte ascii (began with 0b0xxxxxxx)
         
     | 
| 
      
 481 
     | 
    
         
            +
                            s += 1;
         
     | 
| 
      
 482 
     | 
    
         
            +
                        }
         
     | 
| 
      
 483 
     | 
    
         
            +
             
     | 
| 
      
 484 
     | 
    
         
            +
                        // no matter the bytes we marched s forward by, it was
         
     | 
| 
      
 485 
     | 
    
         
            +
                        // only 1 utf8 codepoint
         
     | 
| 
      
 486 
     | 
    
         
            +
                        length++;
         
     | 
| 
      
 487 
     | 
    
         
            +
                    }
         
     | 
| 
      
 488 
     | 
    
         
            +
             
     | 
| 
      
 489 
     | 
    
         
            +
                    return length;
         
     | 
| 
      
 490 
     | 
    
         
            +
                }
         
     | 
| 
      
 491 
     | 
    
         
            +
             
     | 
| 
      
 492 
     | 
    
         
            +
                int utf8ncasecmp(const void *src1, const void *src2, size_t n)
         
     | 
| 
      
 493 
     | 
    
         
            +
                {
         
     | 
| 
      
 494 
     | 
    
         
            +
                    utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
         
     | 
| 
      
 495 
     | 
    
         
            +
             
     | 
| 
      
 496 
     | 
    
         
            +
                    do
         
     | 
| 
      
 497 
     | 
    
         
            +
                    {
         
     | 
| 
      
 498 
     | 
    
         
            +
                        const unsigned char *const s1 = (const unsigned char *)src1;
         
     | 
| 
      
 499 
     | 
    
         
            +
                        const unsigned char *const s2 = (const unsigned char *)src2;
         
     | 
| 
      
 500 
     | 
    
         
            +
             
     | 
| 
      
 501 
     | 
    
         
            +
                        // first check that we have enough bytes left in n to contain an entire
         
     | 
| 
      
 502 
     | 
    
         
            +
                        // codepoint
         
     | 
| 
      
 503 
     | 
    
         
            +
                        if (0 == n)
         
     | 
| 
      
 504 
     | 
    
         
            +
                        {
         
     | 
| 
      
 505 
     | 
    
         
            +
                            return 0;
         
     | 
| 
      
 506 
     | 
    
         
            +
                        }
         
     | 
| 
      
 507 
     | 
    
         
            +
             
     | 
| 
      
 508 
     | 
    
         
            +
                        if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2))))
         
     | 
| 
      
 509 
     | 
    
         
            +
                        {
         
     | 
| 
      
 510 
     | 
    
         
            +
                            const utf8_int32_t c1 = (0xe0 & *s1);
         
     | 
| 
      
 511 
     | 
    
         
            +
                            const utf8_int32_t c2 = (0xe0 & *s2);
         
     | 
| 
      
 512 
     | 
    
         
            +
             
     | 
| 
      
 513 
     | 
    
         
            +
                            if (c1 < c2)
         
     | 
| 
      
 514 
     | 
    
         
            +
                            {
         
     | 
| 
      
 515 
     | 
    
         
            +
                                return c1 - c2;
         
     | 
| 
      
 516 
     | 
    
         
            +
                            }
         
     | 
| 
      
 517 
     | 
    
         
            +
                            else
         
     | 
| 
      
 518 
     | 
    
         
            +
                            {
         
     | 
| 
      
 519 
     | 
    
         
            +
                                return 0;
         
     | 
| 
      
 520 
     | 
    
         
            +
                            }
         
     | 
| 
      
 521 
     | 
    
         
            +
                        }
         
     | 
| 
      
 522 
     | 
    
         
            +
             
     | 
| 
      
 523 
     | 
    
         
            +
                        if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2))))
         
     | 
| 
      
 524 
     | 
    
         
            +
                        {
         
     | 
| 
      
 525 
     | 
    
         
            +
                            const utf8_int32_t c1 = (0xf0 & *s1);
         
     | 
| 
      
 526 
     | 
    
         
            +
                            const utf8_int32_t c2 = (0xf0 & *s2);
         
     | 
| 
      
 527 
     | 
    
         
            +
             
     | 
| 
      
 528 
     | 
    
         
            +
                            if (c1 < c2)
         
     | 
| 
      
 529 
     | 
    
         
            +
                            {
         
     | 
| 
      
 530 
     | 
    
         
            +
                                return c1 - c2;
         
     | 
| 
      
 531 
     | 
    
         
            +
                            }
         
     | 
| 
      
 532 
     | 
    
         
            +
                            else
         
     | 
| 
      
 533 
     | 
    
         
            +
                            {
         
     | 
| 
      
 534 
     | 
    
         
            +
                                return 0;
         
     | 
| 
      
 535 
     | 
    
         
            +
                            }
         
     | 
| 
      
 536 
     | 
    
         
            +
                        }
         
     | 
| 
      
 537 
     | 
    
         
            +
             
     | 
| 
      
 538 
     | 
    
         
            +
                        if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2))))
         
     | 
| 
      
 539 
     | 
    
         
            +
                        {
         
     | 
| 
      
 540 
     | 
    
         
            +
                            const utf8_int32_t c1 = (0xf8 & *s1);
         
     | 
| 
      
 541 
     | 
    
         
            +
                            const utf8_int32_t c2 = (0xf8 & *s2);
         
     | 
| 
      
 542 
     | 
    
         
            +
             
     | 
| 
      
 543 
     | 
    
         
            +
                            if (c1 < c2)
         
     | 
| 
      
 544 
     | 
    
         
            +
                            {
         
     | 
| 
      
 545 
     | 
    
         
            +
                                return c1 - c2;
         
     | 
| 
      
 546 
     | 
    
         
            +
                            }
         
     | 
| 
      
 547 
     | 
    
         
            +
                            else
         
     | 
| 
      
 548 
     | 
    
         
            +
                            {
         
     | 
| 
      
 549 
     | 
    
         
            +
                                return 0;
         
     | 
| 
      
 550 
     | 
    
         
            +
                            }
         
     | 
| 
      
 551 
     | 
    
         
            +
                        }
         
     | 
| 
      
 552 
     | 
    
         
            +
             
     | 
| 
      
 553 
     | 
    
         
            +
                        src1 = utf8codepoint(src1, &src1_cp);
         
     | 
| 
      
 554 
     | 
    
         
            +
                        src2 = utf8codepoint(src2, &src2_cp);
         
     | 
| 
      
 555 
     | 
    
         
            +
                        n -= utf8codepointsize(src1_cp);
         
     | 
| 
      
 556 
     | 
    
         
            +
             
     | 
| 
      
 557 
     | 
    
         
            +
                        // Take a copy of src1 & src2
         
     | 
| 
      
 558 
     | 
    
         
            +
                        src1_orig_cp = src1_cp;
         
     | 
| 
      
 559 
     | 
    
         
            +
                        src2_orig_cp = src2_cp;
         
     | 
| 
      
 560 
     | 
    
         
            +
             
     | 
| 
      
 561 
     | 
    
         
            +
                        // Lower srcs if required
         
     | 
| 
      
 562 
     | 
    
         
            +
                        src1_cp = utf8lwrcodepoint(src1_cp);
         
     | 
| 
      
 563 
     | 
    
         
            +
                        src2_cp = utf8lwrcodepoint(src2_cp);
         
     | 
| 
      
 564 
     | 
    
         
            +
             
     | 
| 
      
 565 
     | 
    
         
            +
                        // Check if the lowered codepoints match
         
     | 
| 
      
 566 
     | 
    
         
            +
                        if ((0 == src1_orig_cp) && (0 == src2_orig_cp))
         
     | 
| 
      
 567 
     | 
    
         
            +
                        {
         
     | 
| 
      
 568 
     | 
    
         
            +
                            return 0;
         
     | 
| 
      
 569 
     | 
    
         
            +
                        }
         
     | 
| 
      
 570 
     | 
    
         
            +
                        else if (src1_cp == src2_cp)
         
     | 
| 
      
 571 
     | 
    
         
            +
                        {
         
     | 
| 
      
 572 
     | 
    
         
            +
                            continue;
         
     | 
| 
      
 573 
     | 
    
         
            +
                        }
         
     | 
| 
      
 574 
     | 
    
         
            +
             
     | 
| 
      
 575 
     | 
    
         
            +
                        // if they don't match, then we return the difference between the characters
         
     | 
| 
      
 576 
     | 
    
         
            +
                        if (src1_orig_cp != src2_orig_cp)
         
     | 
| 
      
 577 
     | 
    
         
            +
                        {
         
     | 
| 
      
 578 
     | 
    
         
            +
                            return src1_cp - src2_cp;
         
     | 
| 
      
 579 
     | 
    
         
            +
                        }
         
     | 
| 
      
 580 
     | 
    
         
            +
                    } while (0 < n);
         
     | 
| 
      
 581 
     | 
    
         
            +
             
     | 
| 
      
 582 
     | 
    
         
            +
                    // both utf8 strings matched
         
     | 
| 
      
 583 
     | 
    
         
            +
                    return 0;
         
     | 
| 
      
 584 
     | 
    
         
            +
                }
         
     | 
| 
      
 585 
     | 
    
         
            +
             
     | 
| 
      
 586 
     | 
    
         
            +
                void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
         
     | 
| 
      
 587 
     | 
    
         
            +
                {
         
     | 
| 
      
 588 
     | 
    
         
            +
                    char *d = (char *)dst;
         
     | 
| 
      
 589 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 590 
     | 
    
         
            +
             
     | 
| 
      
 591 
     | 
    
         
            +
                    // find the null terminating byte in dst
         
     | 
| 
      
 592 
     | 
    
         
            +
                    while ('\0' != *d)
         
     | 
| 
      
 593 
     | 
    
         
            +
                    {
         
     | 
| 
      
 594 
     | 
    
         
            +
                        d++;
         
     | 
| 
      
 595 
     | 
    
         
            +
                    }
         
     | 
| 
      
 596 
     | 
    
         
            +
             
     | 
| 
      
 597 
     | 
    
         
            +
                    // overwriting the null terminating byte in dst, append src byte-by-byte
         
     | 
| 
      
 598 
     | 
    
         
            +
                    // stopping if we run out of space
         
     | 
| 
      
 599 
     | 
    
         
            +
                    do
         
     | 
| 
      
 600 
     | 
    
         
            +
                    {
         
     | 
| 
      
 601 
     | 
    
         
            +
                        *d++ = *s++;
         
     | 
| 
      
 602 
     | 
    
         
            +
                    } while (('\0' != *s) && (0 != --n));
         
     | 
| 
      
 603 
     | 
    
         
            +
             
     | 
| 
      
 604 
     | 
    
         
            +
                    // write out a new null terminating byte into dst
         
     | 
| 
      
 605 
     | 
    
         
            +
                    *d = '\0';
         
     | 
| 
      
 606 
     | 
    
         
            +
             
     | 
| 
      
 607 
     | 
    
         
            +
                    return dst;
         
     | 
| 
      
 608 
     | 
    
         
            +
                }
         
     | 
| 
      
 609 
     | 
    
         
            +
             
     | 
| 
      
 610 
     | 
    
         
            +
                int utf8ncmp(const void *src1, const void *src2, size_t n)
         
     | 
| 
      
 611 
     | 
    
         
            +
                {
         
     | 
| 
      
 612 
     | 
    
         
            +
                    const unsigned char *s1 = (const unsigned char *)src1;
         
     | 
| 
      
 613 
     | 
    
         
            +
                    const unsigned char *s2 = (const unsigned char *)src2;
         
     | 
| 
      
 614 
     | 
    
         
            +
             
     | 
| 
      
 615 
     | 
    
         
            +
                    while ((0 != n--) && (('\0' != *s1) || ('\0' != *s2)))
         
     | 
| 
      
 616 
     | 
    
         
            +
                    {
         
     | 
| 
      
 617 
     | 
    
         
            +
                        if (*s1 < *s2)
         
     | 
| 
      
 618 
     | 
    
         
            +
                        {
         
     | 
| 
      
 619 
     | 
    
         
            +
                            return -1;
         
     | 
| 
      
 620 
     | 
    
         
            +
                        }
         
     | 
| 
      
 621 
     | 
    
         
            +
                        else if (*s1 > *s2)
         
     | 
| 
      
 622 
     | 
    
         
            +
                        {
         
     | 
| 
      
 623 
     | 
    
         
            +
                            return 1;
         
     | 
| 
      
 624 
     | 
    
         
            +
                        }
         
     | 
| 
      
 625 
     | 
    
         
            +
             
     | 
| 
      
 626 
     | 
    
         
            +
                        s1++;
         
     | 
| 
      
 627 
     | 
    
         
            +
                        s2++;
         
     | 
| 
      
 628 
     | 
    
         
            +
                    }
         
     | 
| 
      
 629 
     | 
    
         
            +
             
     | 
| 
      
 630 
     | 
    
         
            +
                    // both utf8 strings matched
         
     | 
| 
      
 631 
     | 
    
         
            +
                    return 0;
         
     | 
| 
      
 632 
     | 
    
         
            +
                }
         
     | 
| 
      
 633 
     | 
    
         
            +
             
     | 
| 
      
 634 
     | 
    
         
            +
                void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, size_t n)
         
     | 
| 
      
 635 
     | 
    
         
            +
                {
         
     | 
| 
      
 636 
     | 
    
         
            +
                    char *d = (char *)dst;
         
     | 
| 
      
 637 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 638 
     | 
    
         
            +
                    size_t index;
         
     | 
| 
      
 639 
     | 
    
         
            +
             
     | 
| 
      
 640 
     | 
    
         
            +
                    // overwriting anything previously in dst, write byte-by-byte
         
     | 
| 
      
 641 
     | 
    
         
            +
                    // from src
         
     | 
| 
      
 642 
     | 
    
         
            +
                    for (index = 0; index < n; index++)
         
     | 
| 
      
 643 
     | 
    
         
            +
                    {
         
     | 
| 
      
 644 
     | 
    
         
            +
                        d[index] = s[index];
         
     | 
| 
      
 645 
     | 
    
         
            +
                        if ('\0' == s[index])
         
     | 
| 
      
 646 
     | 
    
         
            +
                        {
         
     | 
| 
      
 647 
     | 
    
         
            +
                            break;
         
     | 
| 
      
 648 
     | 
    
         
            +
                        }
         
     | 
| 
      
 649 
     | 
    
         
            +
                    }
         
     | 
| 
      
 650 
     | 
    
         
            +
             
     | 
| 
      
 651 
     | 
    
         
            +
                    // append null terminating byte
         
     | 
| 
      
 652 
     | 
    
         
            +
                    for (; index < n; index++)
         
     | 
| 
      
 653 
     | 
    
         
            +
                    {
         
     | 
| 
      
 654 
     | 
    
         
            +
                        d[index] = 0;
         
     | 
| 
      
 655 
     | 
    
         
            +
                    }
         
     | 
| 
      
 656 
     | 
    
         
            +
             
     | 
| 
      
 657 
     | 
    
         
            +
                    return dst;
         
     | 
| 
      
 658 
     | 
    
         
            +
                }
         
     | 
| 
      
 659 
     | 
    
         
            +
             
     | 
| 
      
 660 
     | 
    
         
            +
                void *utf8ndup(const void *src, size_t n)
         
     | 
| 
      
 661 
     | 
    
         
            +
                {
         
     | 
| 
      
 662 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 663 
     | 
    
         
            +
                    char *c = utf8_null;
         
     | 
| 
      
 664 
     | 
    
         
            +
                    size_t bytes = 0;
         
     | 
| 
      
 665 
     | 
    
         
            +
             
     | 
| 
      
 666 
     | 
    
         
            +
                    // Find the end of the string or stop when n is reached
         
     | 
| 
      
 667 
     | 
    
         
            +
                    while ('\0' != s[bytes] && bytes < n)
         
     | 
| 
      
 668 
     | 
    
         
            +
                    {
         
     | 
| 
      
 669 
     | 
    
         
            +
                        bytes++;
         
     | 
| 
      
 670 
     | 
    
         
            +
                    }
         
     | 
| 
      
 671 
     | 
    
         
            +
             
     | 
| 
      
 672 
     | 
    
         
            +
                    // In case bytes is actually less than n, we need to set it
         
     | 
| 
      
 673 
     | 
    
         
            +
                    // to be used later in the copy byte by byte.
         
     | 
| 
      
 674 
     | 
    
         
            +
                    n = bytes;
         
     | 
| 
      
 675 
     | 
    
         
            +
             
     | 
| 
      
 676 
     | 
    
         
            +
                    c = (char *)malloc(bytes + 1);
         
     | 
| 
      
 677 
     | 
    
         
            +
                    if (utf8_null == c)
         
     | 
| 
      
 678 
     | 
    
         
            +
                    {
         
     | 
| 
      
 679 
     | 
    
         
            +
                        // out of memory so we bail
         
     | 
| 
      
 680 
     | 
    
         
            +
                        return utf8_null;
         
     | 
| 
      
 681 
     | 
    
         
            +
                    }
         
     | 
| 
      
 682 
     | 
    
         
            +
             
     | 
| 
      
 683 
     | 
    
         
            +
                    bytes = 0;
         
     | 
| 
      
 684 
     | 
    
         
            +
             
     | 
| 
      
 685 
     | 
    
         
            +
                    // copy src byte-by-byte into our new utf8 string
         
     | 
| 
      
 686 
     | 
    
         
            +
                    while ('\0' != s[bytes] && bytes < n)
         
     | 
| 
      
 687 
     | 
    
         
            +
                    {
         
     | 
| 
      
 688 
     | 
    
         
            +
                        c[bytes] = s[bytes];
         
     | 
| 
      
 689 
     | 
    
         
            +
                        bytes++;
         
     | 
| 
      
 690 
     | 
    
         
            +
                    }
         
     | 
| 
      
 691 
     | 
    
         
            +
             
     | 
| 
      
 692 
     | 
    
         
            +
                    // append null terminating byte
         
     | 
| 
      
 693 
     | 
    
         
            +
                    c[bytes] = '\0';
         
     | 
| 
      
 694 
     | 
    
         
            +
                    return c;
         
     | 
| 
      
 695 
     | 
    
         
            +
                }
         
     | 
| 
      
 696 
     | 
    
         
            +
             
     | 
| 
      
 697 
     | 
    
         
            +
                void *utf8rchr(const void *src, int chr)
         
     | 
| 
      
 698 
     | 
    
         
            +
                {
         
     | 
| 
      
 699 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 700 
     | 
    
         
            +
                    const char *match = utf8_null;
         
     | 
| 
      
 701 
     | 
    
         
            +
                    char c[5] = {'\0', '\0', '\0', '\0', '\0'};
         
     | 
| 
      
 702 
     | 
    
         
            +
             
     | 
| 
      
 703 
     | 
    
         
            +
                    if (0 == chr)
         
     | 
| 
      
 704 
     | 
    
         
            +
                    {
         
     | 
| 
      
 705 
     | 
    
         
            +
                        // being asked to return position of null terminating byte, so
         
     | 
| 
      
 706 
     | 
    
         
            +
                        // just run s to the end, and return!
         
     | 
| 
      
 707 
     | 
    
         
            +
                        while ('\0' != *s)
         
     | 
| 
      
 708 
     | 
    
         
            +
                        {
         
     | 
| 
      
 709 
     | 
    
         
            +
                            s++;
         
     | 
| 
      
 710 
     | 
    
         
            +
                        }
         
     | 
| 
      
 711 
     | 
    
         
            +
                        return (void *)s;
         
     | 
| 
      
 712 
     | 
    
         
            +
                    }
         
     | 
| 
      
 713 
     | 
    
         
            +
                    else if (0 == ((int)0xffffff80 & chr))
         
     | 
| 
      
 714 
     | 
    
         
            +
                    {
         
     | 
| 
      
 715 
     | 
    
         
            +
                        // 1-byte/7-bit ascii
         
     | 
| 
      
 716 
     | 
    
         
            +
                        // (0b0xxxxxxx)
         
     | 
| 
      
 717 
     | 
    
         
            +
                        c[0] = (char)chr;
         
     | 
| 
      
 718 
     | 
    
         
            +
                    }
         
     | 
| 
      
 719 
     | 
    
         
            +
                    else if (0 == ((int)0xfffff800 & chr))
         
     | 
| 
      
 720 
     | 
    
         
            +
                    {
         
     | 
| 
      
 721 
     | 
    
         
            +
                        // 2-byte/11-bit utf8 code point
         
     | 
| 
      
 722 
     | 
    
         
            +
                        // (0b110xxxxx 0b10xxxxxx)
         
     | 
| 
      
 723 
     | 
    
         
            +
                        c[0] = 0xc0 | (char)(chr >> 6);
         
     | 
| 
      
 724 
     | 
    
         
            +
                        c[1] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 725 
     | 
    
         
            +
                    }
         
     | 
| 
      
 726 
     | 
    
         
            +
                    else if (0 == ((int)0xffff0000 & chr))
         
     | 
| 
      
 727 
     | 
    
         
            +
                    {
         
     | 
| 
      
 728 
     | 
    
         
            +
                        // 3-byte/16-bit utf8 code point
         
     | 
| 
      
 729 
     | 
    
         
            +
                        // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 730 
     | 
    
         
            +
                        c[0] = 0xe0 | (char)(chr >> 12);
         
     | 
| 
      
 731 
     | 
    
         
            +
                        c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 732 
     | 
    
         
            +
                        c[2] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 733 
     | 
    
         
            +
                    }
         
     | 
| 
      
 734 
     | 
    
         
            +
                    else
         
     | 
| 
      
 735 
     | 
    
         
            +
                    { // if (0 == ((int)0xffe00000 & chr)) {
         
     | 
| 
      
 736 
     | 
    
         
            +
                        // 4-byte/21-bit utf8 code point
         
     | 
| 
      
 737 
     | 
    
         
            +
                        // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 738 
     | 
    
         
            +
                        c[0] = 0xf0 | (char)(chr >> 18);
         
     | 
| 
      
 739 
     | 
    
         
            +
                        c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
         
     | 
| 
      
 740 
     | 
    
         
            +
                        c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 741 
     | 
    
         
            +
                        c[3] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 742 
     | 
    
         
            +
                    }
         
     | 
| 
      
 743 
     | 
    
         
            +
             
     | 
| 
      
 744 
     | 
    
         
            +
                    // we've created a 2 utf8 codepoint string in c that is
         
     | 
| 
      
 745 
     | 
    
         
            +
                    // the utf8 character asked for by chr, and a null
         
     | 
| 
      
 746 
     | 
    
         
            +
                    // terminating byte
         
     | 
| 
      
 747 
     | 
    
         
            +
             
     | 
| 
      
 748 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 749 
     | 
    
         
            +
                    {
         
     | 
| 
      
 750 
     | 
    
         
            +
                        size_t offset = 0;
         
     | 
| 
      
 751 
     | 
    
         
            +
             
     | 
| 
      
 752 
     | 
    
         
            +
                        while (s[offset] == c[offset])
         
     | 
| 
      
 753 
     | 
    
         
            +
                        {
         
     | 
| 
      
 754 
     | 
    
         
            +
                            offset++;
         
     | 
| 
      
 755 
     | 
    
         
            +
                        }
         
     | 
| 
      
 756 
     | 
    
         
            +
             
     | 
| 
      
 757 
     | 
    
         
            +
                        if ('\0' == c[offset])
         
     | 
| 
      
 758 
     | 
    
         
            +
                        {
         
     | 
| 
      
 759 
     | 
    
         
            +
                            // we found a matching utf8 code point
         
     | 
| 
      
 760 
     | 
    
         
            +
                            match = s;
         
     | 
| 
      
 761 
     | 
    
         
            +
                            s += offset;
         
     | 
| 
      
 762 
     | 
    
         
            +
                        }
         
     | 
| 
      
 763 
     | 
    
         
            +
                        else
         
     | 
| 
      
 764 
     | 
    
         
            +
                        {
         
     | 
| 
      
 765 
     | 
    
         
            +
                            s += offset;
         
     | 
| 
      
 766 
     | 
    
         
            +
             
     | 
| 
      
 767 
     | 
    
         
            +
                            // need to march s along to next utf8 codepoint start
         
     | 
| 
      
 768 
     | 
    
         
            +
                            // (the next byte that doesn't match 0b10xxxxxx)
         
     | 
| 
      
 769 
     | 
    
         
            +
                            if ('\0' != *s)
         
     | 
| 
      
 770 
     | 
    
         
            +
                            {
         
     | 
| 
      
 771 
     | 
    
         
            +
                                do
         
     | 
| 
      
 772 
     | 
    
         
            +
                                {
         
     | 
| 
      
 773 
     | 
    
         
            +
                                    s++;
         
     | 
| 
      
 774 
     | 
    
         
            +
                                } while (0x80 == (0xc0 & *s));
         
     | 
| 
      
 775 
     | 
    
         
            +
                            }
         
     | 
| 
      
 776 
     | 
    
         
            +
                        }
         
     | 
| 
      
 777 
     | 
    
         
            +
                    }
         
     | 
| 
      
 778 
     | 
    
         
            +
             
     | 
| 
      
 779 
     | 
    
         
            +
                    // return the last match we found (or 0 if no match was found)
         
     | 
| 
      
 780 
     | 
    
         
            +
                    return (void *)match;
         
     | 
| 
      
 781 
     | 
    
         
            +
                }
         
     | 
| 
      
 782 
     | 
    
         
            +
             
     | 
| 
      
 783 
     | 
    
         
            +
                void *utf8pbrk(const void *str, const void *accept)
         
     | 
| 
      
 784 
     | 
    
         
            +
                {
         
     | 
| 
      
 785 
     | 
    
         
            +
                    const char *s = (const char *)str;
         
     | 
| 
      
 786 
     | 
    
         
            +
             
     | 
| 
      
 787 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 788 
     | 
    
         
            +
                    {
         
     | 
| 
      
 789 
     | 
    
         
            +
                        const char *a = (const char *)accept;
         
     | 
| 
      
 790 
     | 
    
         
            +
                        size_t offset = 0;
         
     | 
| 
      
 791 
     | 
    
         
            +
             
     | 
| 
      
 792 
     | 
    
         
            +
                        while ('\0' != *a)
         
     | 
| 
      
 793 
     | 
    
         
            +
                        {
         
     | 
| 
      
 794 
     | 
    
         
            +
                            // checking that if *a is the start of a utf8 codepoint
         
     | 
| 
      
 795 
     | 
    
         
            +
                            // (it is not 0b10xxxxxx) and we have successfully matched
         
     | 
| 
      
 796 
     | 
    
         
            +
                            // a previous character (0 < offset) - we found a match
         
     | 
| 
      
 797 
     | 
    
         
            +
                            if ((0x80 != (0xc0 & *a)) && (0 < offset))
         
     | 
| 
      
 798 
     | 
    
         
            +
                            {
         
     | 
| 
      
 799 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 800 
     | 
    
         
            +
                            }
         
     | 
| 
      
 801 
     | 
    
         
            +
                            else
         
     | 
| 
      
 802 
     | 
    
         
            +
                            {
         
     | 
| 
      
 803 
     | 
    
         
            +
                                if (*a == s[offset])
         
     | 
| 
      
 804 
     | 
    
         
            +
                                {
         
     | 
| 
      
 805 
     | 
    
         
            +
                                    // part of a utf8 codepoint matched, so move our checking
         
     | 
| 
      
 806 
     | 
    
         
            +
                                    // onwards to the next byte
         
     | 
| 
      
 807 
     | 
    
         
            +
                                    offset++;
         
     | 
| 
      
 808 
     | 
    
         
            +
                                    a++;
         
     | 
| 
      
 809 
     | 
    
         
            +
                                }
         
     | 
| 
      
 810 
     | 
    
         
            +
                                else
         
     | 
| 
      
 811 
     | 
    
         
            +
                                {
         
     | 
| 
      
 812 
     | 
    
         
            +
                                    // r could be in the middle of an unmatching utf8 code point,
         
     | 
| 
      
 813 
     | 
    
         
            +
                                    // so we need to march it on to the next character beginning,
         
     | 
| 
      
 814 
     | 
    
         
            +
             
     | 
| 
      
 815 
     | 
    
         
            +
                                    do
         
     | 
| 
      
 816 
     | 
    
         
            +
                                    {
         
     | 
| 
      
 817 
     | 
    
         
            +
                                        a++;
         
     | 
| 
      
 818 
     | 
    
         
            +
                                    } while (0x80 == (0xc0 & *a));
         
     | 
| 
      
 819 
     | 
    
         
            +
             
     | 
| 
      
 820 
     | 
    
         
            +
                                    // reset offset too as we found a mismatch
         
     | 
| 
      
 821 
     | 
    
         
            +
                                    offset = 0;
         
     | 
| 
      
 822 
     | 
    
         
            +
                                }
         
     | 
| 
      
 823 
     | 
    
         
            +
                            }
         
     | 
| 
      
 824 
     | 
    
         
            +
                        }
         
     | 
| 
      
 825 
     | 
    
         
            +
             
     | 
| 
      
 826 
     | 
    
         
            +
                        // we found a match on the last utf8 codepoint
         
     | 
| 
      
 827 
     | 
    
         
            +
                        if (0 < offset)
         
     | 
| 
      
 828 
     | 
    
         
            +
                        {
         
     | 
| 
      
 829 
     | 
    
         
            +
                            return (void *)s;
         
     | 
| 
      
 830 
     | 
    
         
            +
                        }
         
     | 
| 
      
 831 
     | 
    
         
            +
             
     | 
| 
      
 832 
     | 
    
         
            +
                        // the current utf8 codepoint in src did not match accept, but src
         
     | 
| 
      
 833 
     | 
    
         
            +
                        // could have been partway through a utf8 codepoint, so we need to
         
     | 
| 
      
 834 
     | 
    
         
            +
                        // march it onto the next utf8 codepoint starting byte
         
     | 
| 
      
 835 
     | 
    
         
            +
                        do
         
     | 
| 
      
 836 
     | 
    
         
            +
                        {
         
     | 
| 
      
 837 
     | 
    
         
            +
                            s++;
         
     | 
| 
      
 838 
     | 
    
         
            +
                        } while ((0x80 == (0xc0 & *s)));
         
     | 
| 
      
 839 
     | 
    
         
            +
                    }
         
     | 
| 
      
 840 
     | 
    
         
            +
             
     | 
| 
      
 841 
     | 
    
         
            +
                    return utf8_null;
         
     | 
| 
      
 842 
     | 
    
         
            +
                }
         
     | 
| 
      
 843 
     | 
    
         
            +
             
     | 
| 
      
 844 
     | 
    
         
            +
                size_t utf8size(const void *str)
         
     | 
| 
      
 845 
     | 
    
         
            +
                {
         
     | 
| 
      
 846 
     | 
    
         
            +
                    const char *s = (const char *)str;
         
     | 
| 
      
 847 
     | 
    
         
            +
                    size_t size = 0;
         
     | 
| 
      
 848 
     | 
    
         
            +
                    while ('\0' != s[size])
         
     | 
| 
      
 849 
     | 
    
         
            +
                    {
         
     | 
| 
      
 850 
     | 
    
         
            +
                        size++;
         
     | 
| 
      
 851 
     | 
    
         
            +
                    }
         
     | 
| 
      
 852 
     | 
    
         
            +
             
     | 
| 
      
 853 
     | 
    
         
            +
                    // we are including the null terminating byte in the size calculation
         
     | 
| 
      
 854 
     | 
    
         
            +
                    size++;
         
     | 
| 
      
 855 
     | 
    
         
            +
                    return size;
         
     | 
| 
      
 856 
     | 
    
         
            +
                }
         
     | 
| 
      
 857 
     | 
    
         
            +
             
     | 
| 
      
 858 
     | 
    
         
            +
                size_t utf8spn(const void *src, const void *accept)
         
     | 
| 
      
 859 
     | 
    
         
            +
                {
         
     | 
| 
      
 860 
     | 
    
         
            +
                    const char *s = (const char *)src;
         
     | 
| 
      
 861 
     | 
    
         
            +
                    size_t chars = 0;
         
     | 
| 
      
 862 
     | 
    
         
            +
             
     | 
| 
      
 863 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 864 
     | 
    
         
            +
                    {
         
     | 
| 
      
 865 
     | 
    
         
            +
                        const char *a = (const char *)accept;
         
     | 
| 
      
 866 
     | 
    
         
            +
                        size_t offset = 0;
         
     | 
| 
      
 867 
     | 
    
         
            +
             
     | 
| 
      
 868 
     | 
    
         
            +
                        while ('\0' != *a)
         
     | 
| 
      
 869 
     | 
    
         
            +
                        {
         
     | 
| 
      
 870 
     | 
    
         
            +
                            // checking that if *r is the start of a utf8 codepoint
         
     | 
| 
      
 871 
     | 
    
         
            +
                            // (it is not 0b10xxxxxx) and we have successfully matched
         
     | 
| 
      
 872 
     | 
    
         
            +
                            // a previous character (0 < offset) - we found a match
         
     | 
| 
      
 873 
     | 
    
         
            +
                            if ((0x80 != (0xc0 & *a)) && (0 < offset))
         
     | 
| 
      
 874 
     | 
    
         
            +
                            {
         
     | 
| 
      
 875 
     | 
    
         
            +
                                // found a match, so increment the number of utf8 codepoints
         
     | 
| 
      
 876 
     | 
    
         
            +
                                // that have matched and stop checking whether any other utf8
         
     | 
| 
      
 877 
     | 
    
         
            +
                                // codepoints in a match
         
     | 
| 
      
 878 
     | 
    
         
            +
                                chars++;
         
     | 
| 
      
 879 
     | 
    
         
            +
                                s += offset;
         
     | 
| 
      
 880 
     | 
    
         
            +
                                offset = 0;
         
     | 
| 
      
 881 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 882 
     | 
    
         
            +
                            }
         
     | 
| 
      
 883 
     | 
    
         
            +
                            else
         
     | 
| 
      
 884 
     | 
    
         
            +
                            {
         
     | 
| 
      
 885 
     | 
    
         
            +
                                if (*a == s[offset])
         
     | 
| 
      
 886 
     | 
    
         
            +
                                {
         
     | 
| 
      
 887 
     | 
    
         
            +
                                    offset++;
         
     | 
| 
      
 888 
     | 
    
         
            +
                                    a++;
         
     | 
| 
      
 889 
     | 
    
         
            +
                                }
         
     | 
| 
      
 890 
     | 
    
         
            +
                                else
         
     | 
| 
      
 891 
     | 
    
         
            +
                                {
         
     | 
| 
      
 892 
     | 
    
         
            +
                                    // a could be in the middle of an unmatching utf8 codepoint,
         
     | 
| 
      
 893 
     | 
    
         
            +
                                    // so we need to march it on to the next character beginning,
         
     | 
| 
      
 894 
     | 
    
         
            +
                                    do
         
     | 
| 
      
 895 
     | 
    
         
            +
                                    {
         
     | 
| 
      
 896 
     | 
    
         
            +
                                        a++;
         
     | 
| 
      
 897 
     | 
    
         
            +
                                    } while (0x80 == (0xc0 & *a));
         
     | 
| 
      
 898 
     | 
    
         
            +
             
     | 
| 
      
 899 
     | 
    
         
            +
                                    // reset offset too as we found a mismatch
         
     | 
| 
      
 900 
     | 
    
         
            +
                                    offset = 0;
         
     | 
| 
      
 901 
     | 
    
         
            +
                                }
         
     | 
| 
      
 902 
     | 
    
         
            +
                            }
         
     | 
| 
      
 903 
     | 
    
         
            +
                        }
         
     | 
| 
      
 904 
     | 
    
         
            +
             
     | 
| 
      
 905 
     | 
    
         
            +
                        // found a match at the end of *a, so didn't get a chance to test it
         
     | 
| 
      
 906 
     | 
    
         
            +
                        if (0 < offset)
         
     | 
| 
      
 907 
     | 
    
         
            +
                        {
         
     | 
| 
      
 908 
     | 
    
         
            +
                            chars++;
         
     | 
| 
      
 909 
     | 
    
         
            +
                            s += offset;
         
     | 
| 
      
 910 
     | 
    
         
            +
                            continue;
         
     | 
| 
      
 911 
     | 
    
         
            +
                        }
         
     | 
| 
      
 912 
     | 
    
         
            +
             
     | 
| 
      
 913 
     | 
    
         
            +
                        // if a got to its terminating null byte, then we didn't find a match.
         
     | 
| 
      
 914 
     | 
    
         
            +
                        // Return the current number of matched utf8 codepoints
         
     | 
| 
      
 915 
     | 
    
         
            +
                        if ('\0' == *a)
         
     | 
| 
      
 916 
     | 
    
         
            +
                        {
         
     | 
| 
      
 917 
     | 
    
         
            +
                            return chars;
         
     | 
| 
      
 918 
     | 
    
         
            +
                        }
         
     | 
| 
      
 919 
     | 
    
         
            +
                    }
         
     | 
| 
      
 920 
     | 
    
         
            +
             
     | 
| 
      
 921 
     | 
    
         
            +
                    return chars;
         
     | 
| 
      
 922 
     | 
    
         
            +
                }
         
     | 
| 
      
 923 
     | 
    
         
            +
             
     | 
| 
      
 924 
     | 
    
         
            +
                void *utf8str(const void *haystack, const void *needle)
         
     | 
| 
      
 925 
     | 
    
         
            +
                {
         
     | 
| 
      
 926 
     | 
    
         
            +
                    const char *h = (const char *)haystack;
         
     | 
| 
      
 927 
     | 
    
         
            +
                    utf8_int32_t throwaway_codepoint;
         
     | 
| 
      
 928 
     | 
    
         
            +
             
     | 
| 
      
 929 
     | 
    
         
            +
                    // if needle has no utf8 codepoints before the null terminating
         
     | 
| 
      
 930 
     | 
    
         
            +
                    // byte then return haystack
         
     | 
| 
      
 931 
     | 
    
         
            +
                    if ('\0' == *((const char *)needle))
         
     | 
| 
      
 932 
     | 
    
         
            +
                    {
         
     | 
| 
      
 933 
     | 
    
         
            +
                        return (void *)haystack;
         
     | 
| 
      
 934 
     | 
    
         
            +
                    }
         
     | 
| 
      
 935 
     | 
    
         
            +
             
     | 
| 
      
 936 
     | 
    
         
            +
                    while ('\0' != *h)
         
     | 
| 
      
 937 
     | 
    
         
            +
                    {
         
     | 
| 
      
 938 
     | 
    
         
            +
                        const char *maybeMatch = h;
         
     | 
| 
      
 939 
     | 
    
         
            +
                        const char *n = (const char *)needle;
         
     | 
| 
      
 940 
     | 
    
         
            +
             
     | 
| 
      
 941 
     | 
    
         
            +
                        while (*h == *n && (*h != '\0' && *n != '\0'))
         
     | 
| 
      
 942 
     | 
    
         
            +
                        {
         
     | 
| 
      
 943 
     | 
    
         
            +
                            n++;
         
     | 
| 
      
 944 
     | 
    
         
            +
                            h++;
         
     | 
| 
      
 945 
     | 
    
         
            +
                        }
         
     | 
| 
      
 946 
     | 
    
         
            +
             
     | 
| 
      
 947 
     | 
    
         
            +
                        if ('\0' == *n)
         
     | 
| 
      
 948 
     | 
    
         
            +
                        {
         
     | 
| 
      
 949 
     | 
    
         
            +
                            // we found the whole utf8 string for needle in haystack at
         
     | 
| 
      
 950 
     | 
    
         
            +
                            // maybeMatch, so return it
         
     | 
| 
      
 951 
     | 
    
         
            +
                            return (void *)maybeMatch;
         
     | 
| 
      
 952 
     | 
    
         
            +
                        }
         
     | 
| 
      
 953 
     | 
    
         
            +
                        else
         
     | 
| 
      
 954 
     | 
    
         
            +
                        {
         
     | 
| 
      
 955 
     | 
    
         
            +
                            // h could be in the middle of an unmatching utf8 codepoint,
         
     | 
| 
      
 956 
     | 
    
         
            +
                            // so we need to march it on to the next character beginning
         
     | 
| 
      
 957 
     | 
    
         
            +
                            // starting from the current character
         
     | 
| 
      
 958 
     | 
    
         
            +
                            h = (const char *)utf8codepoint(maybeMatch, &throwaway_codepoint);
         
     | 
| 
      
 959 
     | 
    
         
            +
                        }
         
     | 
| 
      
 960 
     | 
    
         
            +
                    }
         
     | 
| 
      
 961 
     | 
    
         
            +
             
     | 
| 
      
 962 
     | 
    
         
            +
                    // no match
         
     | 
| 
      
 963 
     | 
    
         
            +
                    return utf8_null;
         
     | 
| 
      
 964 
     | 
    
         
            +
                }
         
     | 
| 
      
 965 
     | 
    
         
            +
             
     | 
| 
      
 966 
     | 
    
         
            +
                void *utf8casestr(const void *haystack, const void *needle)
         
     | 
| 
      
 967 
     | 
    
         
            +
                {
         
     | 
| 
      
 968 
     | 
    
         
            +
                    const void *h = haystack;
         
     | 
| 
      
 969 
     | 
    
         
            +
             
     | 
| 
      
 970 
     | 
    
         
            +
                    // if needle has no utf8 codepoints before the null terminating
         
     | 
| 
      
 971 
     | 
    
         
            +
                    // byte then return haystack
         
     | 
| 
      
 972 
     | 
    
         
            +
                    if ('\0' == *((const char *)needle))
         
     | 
| 
      
 973 
     | 
    
         
            +
                    {
         
     | 
| 
      
 974 
     | 
    
         
            +
                        return (void *)haystack;
         
     | 
| 
      
 975 
     | 
    
         
            +
                    }
         
     | 
| 
      
 976 
     | 
    
         
            +
             
     | 
| 
      
 977 
     | 
    
         
            +
                    for (;;)
         
     | 
| 
      
 978 
     | 
    
         
            +
                    {
         
     | 
| 
      
 979 
     | 
    
         
            +
                        const void *maybeMatch = h;
         
     | 
| 
      
 980 
     | 
    
         
            +
                        const void *n = needle;
         
     | 
| 
      
 981 
     | 
    
         
            +
                        utf8_int32_t h_cp, n_cp;
         
     | 
| 
      
 982 
     | 
    
         
            +
             
     | 
| 
      
 983 
     | 
    
         
            +
                        // Get the next code point and track it
         
     | 
| 
      
 984 
     | 
    
         
            +
                        const void *nextH = h = utf8codepoint(h, &h_cp);
         
     | 
| 
      
 985 
     | 
    
         
            +
                        n = utf8codepoint(n, &n_cp);
         
     | 
| 
      
 986 
     | 
    
         
            +
             
     | 
| 
      
 987 
     | 
    
         
            +
                        while ((0 != h_cp) && (0 != n_cp))
         
     | 
| 
      
 988 
     | 
    
         
            +
                        {
         
     | 
| 
      
 989 
     | 
    
         
            +
                            h_cp = utf8lwrcodepoint(h_cp);
         
     | 
| 
      
 990 
     | 
    
         
            +
                            n_cp = utf8lwrcodepoint(n_cp);
         
     | 
| 
      
 991 
     | 
    
         
            +
             
     | 
| 
      
 992 
     | 
    
         
            +
                            // if we find a mismatch, bail out!
         
     | 
| 
      
 993 
     | 
    
         
            +
                            if (h_cp != n_cp)
         
     | 
| 
      
 994 
     | 
    
         
            +
                            {
         
     | 
| 
      
 995 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 996 
     | 
    
         
            +
                            }
         
     | 
| 
      
 997 
     | 
    
         
            +
             
     | 
| 
      
 998 
     | 
    
         
            +
                            h = utf8codepoint(h, &h_cp);
         
     | 
| 
      
 999 
     | 
    
         
            +
                            n = utf8codepoint(n, &n_cp);
         
     | 
| 
      
 1000 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1001 
     | 
    
         
            +
             
     | 
| 
      
 1002 
     | 
    
         
            +
                        if (0 == n_cp)
         
     | 
| 
      
 1003 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1004 
     | 
    
         
            +
                            // we found the whole utf8 string for needle in haystack at
         
     | 
| 
      
 1005 
     | 
    
         
            +
                            // maybeMatch, so return it
         
     | 
| 
      
 1006 
     | 
    
         
            +
                            return (void *)maybeMatch;
         
     | 
| 
      
 1007 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1008 
     | 
    
         
            +
             
     | 
| 
      
 1009 
     | 
    
         
            +
                        if (0 == h_cp)
         
     | 
| 
      
 1010 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1011 
     | 
    
         
            +
                            // no match
         
     | 
| 
      
 1012 
     | 
    
         
            +
                            return utf8_null;
         
     | 
| 
      
 1013 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1014 
     | 
    
         
            +
             
     | 
| 
      
 1015 
     | 
    
         
            +
                        // Roll back to the next code point in the haystack to test
         
     | 
| 
      
 1016 
     | 
    
         
            +
                        h = nextH;
         
     | 
| 
      
 1017 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1018 
     | 
    
         
            +
                }
         
     | 
| 
      
 1019 
     | 
    
         
            +
             
     | 
| 
      
 1020 
     | 
    
         
            +
                void *utf8valid(const void *str)
         
     | 
| 
      
 1021 
     | 
    
         
            +
                {
         
     | 
| 
      
 1022 
     | 
    
         
            +
                    const char *s = (const char *)str;
         
     | 
| 
      
 1023 
     | 
    
         
            +
             
     | 
| 
      
 1024 
     | 
    
         
            +
                    while ('\0' != *s)
         
     | 
| 
      
 1025 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1026 
     | 
    
         
            +
                        if (0xf0 == (0xf8 & *s))
         
     | 
| 
      
 1027 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1028 
     | 
    
         
            +
                            // ensure each of the 3 following bytes in this 4-byte
         
     | 
| 
      
 1029 
     | 
    
         
            +
                            // utf8 codepoint began with 0b10xxxxxx
         
     | 
| 
      
 1030 
     | 
    
         
            +
                            if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || (0x80 != (0xc0 & s[3])))
         
     | 
| 
      
 1031 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1032 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1033 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1034 
     | 
    
         
            +
             
     | 
| 
      
 1035 
     | 
    
         
            +
                            // ensure that our utf8 codepoint ended after 4 bytes
         
     | 
| 
      
 1036 
     | 
    
         
            +
                            if (0x80 == (0xc0 & s[4]))
         
     | 
| 
      
 1037 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1038 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1039 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1040 
     | 
    
         
            +
             
     | 
| 
      
 1041 
     | 
    
         
            +
                            // ensure that the top 5 bits of this 4-byte utf8
         
     | 
| 
      
 1042 
     | 
    
         
            +
                            // codepoint were not 0, as then we could have used
         
     | 
| 
      
 1043 
     | 
    
         
            +
                            // one of the smaller encodings
         
     | 
| 
      
 1044 
     | 
    
         
            +
                            if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1])))
         
     | 
| 
      
 1045 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1046 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1047 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1048 
     | 
    
         
            +
             
     | 
| 
      
 1049 
     | 
    
         
            +
                            // 4-byte utf8 code point (began with 0b11110xxx)
         
     | 
| 
      
 1050 
     | 
    
         
            +
                            s += 4;
         
     | 
| 
      
 1051 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1052 
     | 
    
         
            +
                        else if (0xe0 == (0xf0 & *s))
         
     | 
| 
      
 1053 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1054 
     | 
    
         
            +
                            // ensure each of the 2 following bytes in this 3-byte
         
     | 
| 
      
 1055 
     | 
    
         
            +
                            // utf8 codepoint began with 0b10xxxxxx
         
     | 
| 
      
 1056 
     | 
    
         
            +
                            if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])))
         
     | 
| 
      
 1057 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1058 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1059 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1060 
     | 
    
         
            +
             
     | 
| 
      
 1061 
     | 
    
         
            +
                            // ensure that our utf8 codepoint ended after 3 bytes
         
     | 
| 
      
 1062 
     | 
    
         
            +
                            if (0x80 == (0xc0 & s[3]))
         
     | 
| 
      
 1063 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1064 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1065 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1066 
     | 
    
         
            +
             
     | 
| 
      
 1067 
     | 
    
         
            +
                            // ensure that the top 5 bits of this 3-byte utf8
         
     | 
| 
      
 1068 
     | 
    
         
            +
                            // codepoint were not 0, as then we could have used
         
     | 
| 
      
 1069 
     | 
    
         
            +
                            // one of the smaller encodings
         
     | 
| 
      
 1070 
     | 
    
         
            +
                            if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1])))
         
     | 
| 
      
 1071 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1072 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1073 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1074 
     | 
    
         
            +
             
     | 
| 
      
 1075 
     | 
    
         
            +
                            // 3-byte utf8 code point (began with 0b1110xxxx)
         
     | 
| 
      
 1076 
     | 
    
         
            +
                            s += 3;
         
     | 
| 
      
 1077 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1078 
     | 
    
         
            +
                        else if (0xc0 == (0xe0 & *s))
         
     | 
| 
      
 1079 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1080 
     | 
    
         
            +
                            // ensure the 1 following byte in this 2-byte
         
     | 
| 
      
 1081 
     | 
    
         
            +
                            // utf8 codepoint began with 0b10xxxxxx
         
     | 
| 
      
 1082 
     | 
    
         
            +
                            if (0x80 != (0xc0 & s[1]))
         
     | 
| 
      
 1083 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1084 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1085 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1086 
     | 
    
         
            +
             
     | 
| 
      
 1087 
     | 
    
         
            +
                            // ensure that our utf8 codepoint ended after 2 bytes
         
     | 
| 
      
 1088 
     | 
    
         
            +
                            if (0x80 == (0xc0 & s[2]))
         
     | 
| 
      
 1089 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1090 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1091 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1092 
     | 
    
         
            +
             
     | 
| 
      
 1093 
     | 
    
         
            +
                            // ensure that the top 4 bits of this 2-byte utf8
         
     | 
| 
      
 1094 
     | 
    
         
            +
                            // codepoint were not 0, as then we could have used
         
     | 
| 
      
 1095 
     | 
    
         
            +
                            // one of the smaller encodings
         
     | 
| 
      
 1096 
     | 
    
         
            +
                            if (0 == (0x1e & s[0]))
         
     | 
| 
      
 1097 
     | 
    
         
            +
                            {
         
     | 
| 
      
 1098 
     | 
    
         
            +
                                return (void *)s;
         
     | 
| 
      
 1099 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1100 
     | 
    
         
            +
             
     | 
| 
      
 1101 
     | 
    
         
            +
                            // 2-byte utf8 code point (began with 0b110xxxxx)
         
     | 
| 
      
 1102 
     | 
    
         
            +
                            s += 2;
         
     | 
| 
      
 1103 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1104 
     | 
    
         
            +
                        else if (0x00 == (0x80 & *s))
         
     | 
| 
      
 1105 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1106 
     | 
    
         
            +
                            // 1-byte ascii (began with 0b0xxxxxxx)
         
     | 
| 
      
 1107 
     | 
    
         
            +
                            s += 1;
         
     | 
| 
      
 1108 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1109 
     | 
    
         
            +
                        else
         
     | 
| 
      
 1110 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1111 
     | 
    
         
            +
                            // we have an invalid 0b1xxxxxxx utf8 code point entry
         
     | 
| 
      
 1112 
     | 
    
         
            +
                            return (void *)s;
         
     | 
| 
      
 1113 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1114 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1115 
     | 
    
         
            +
             
     | 
| 
      
 1116 
     | 
    
         
            +
                    return utf8_null;
         
     | 
| 
      
 1117 
     | 
    
         
            +
                }
         
     | 
| 
      
 1118 
     | 
    
         
            +
             
     | 
| 
      
 1119 
     | 
    
         
            +
                void *utf8codepoint(const void *utf8_restrict str, utf8_int32_t *utf8_restrict out_codepoint)
         
     | 
| 
      
 1120 
     | 
    
         
            +
                {
         
     | 
| 
      
 1121 
     | 
    
         
            +
                    const char *s = (const char *)str;
         
     | 
| 
      
 1122 
     | 
    
         
            +
             
     | 
| 
      
 1123 
     | 
    
         
            +
                    if (0xf0 == (0xf8 & s[0]))
         
     | 
| 
      
 1124 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1125 
     | 
    
         
            +
                        // 4 byte utf8 codepoint
         
     | 
| 
      
 1126 
     | 
    
         
            +
                        *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | ((0x3f & s[2]) << 6) | (0x3f & s[3]);
         
     | 
| 
      
 1127 
     | 
    
         
            +
                        s += 4;
         
     | 
| 
      
 1128 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1129 
     | 
    
         
            +
                    else if (0xe0 == (0xf0 & s[0]))
         
     | 
| 
      
 1130 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1131 
     | 
    
         
            +
                        // 3 byte utf8 codepoint
         
     | 
| 
      
 1132 
     | 
    
         
            +
                        *out_codepoint = ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
         
     | 
| 
      
 1133 
     | 
    
         
            +
                        s += 3;
         
     | 
| 
      
 1134 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1135 
     | 
    
         
            +
                    else if (0xc0 == (0xe0 & s[0]))
         
     | 
| 
      
 1136 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1137 
     | 
    
         
            +
                        // 2 byte utf8 codepoint
         
     | 
| 
      
 1138 
     | 
    
         
            +
                        *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
         
     | 
| 
      
 1139 
     | 
    
         
            +
                        s += 2;
         
     | 
| 
      
 1140 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1141 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1142 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1143 
     | 
    
         
            +
                        // 1 byte utf8 codepoint otherwise
         
     | 
| 
      
 1144 
     | 
    
         
            +
                        *out_codepoint = s[0];
         
     | 
| 
      
 1145 
     | 
    
         
            +
                        s += 1;
         
     | 
| 
      
 1146 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1147 
     | 
    
         
            +
             
     | 
| 
      
 1148 
     | 
    
         
            +
                    return (void *)s;
         
     | 
| 
      
 1149 
     | 
    
         
            +
                }
         
     | 
| 
      
 1150 
     | 
    
         
            +
             
     | 
| 
      
 1151 
     | 
    
         
            +
                size_t utf8codepointcalcsize(const void *utf8_restrict str)
         
     | 
| 
      
 1152 
     | 
    
         
            +
                {
         
     | 
| 
      
 1153 
     | 
    
         
            +
                    const char *s = (const char *)str;
         
     | 
| 
      
 1154 
     | 
    
         
            +
             
     | 
| 
      
 1155 
     | 
    
         
            +
                    if (0xf0 == (0xf8 & s[0]))
         
     | 
| 
      
 1156 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1157 
     | 
    
         
            +
                        // 4 byte utf8 codepoint
         
     | 
| 
      
 1158 
     | 
    
         
            +
                        return 4;
         
     | 
| 
      
 1159 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1160 
     | 
    
         
            +
                    else if (0xe0 == (0xf0 & s[0]))
         
     | 
| 
      
 1161 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1162 
     | 
    
         
            +
                        // 3 byte utf8 codepoint
         
     | 
| 
      
 1163 
     | 
    
         
            +
                        return 3;
         
     | 
| 
      
 1164 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1165 
     | 
    
         
            +
                    else if (0xc0 == (0xe0 & s[0]))
         
     | 
| 
      
 1166 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1167 
     | 
    
         
            +
                        // 2 byte utf8 codepoint
         
     | 
| 
      
 1168 
     | 
    
         
            +
                        return 2;
         
     | 
| 
      
 1169 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1170 
     | 
    
         
            +
             
     | 
| 
      
 1171 
     | 
    
         
            +
                    // 1 byte utf8 codepoint otherwise
         
     | 
| 
      
 1172 
     | 
    
         
            +
                    return 1;
         
     | 
| 
      
 1173 
     | 
    
         
            +
                }
         
     | 
| 
      
 1174 
     | 
    
         
            +
             
     | 
| 
      
 1175 
     | 
    
         
            +
                size_t utf8codepointsize(utf8_int32_t chr)
         
     | 
| 
      
 1176 
     | 
    
         
            +
                {
         
     | 
| 
      
 1177 
     | 
    
         
            +
                    if (0 == ((utf8_int32_t)0xffffff80 & chr))
         
     | 
| 
      
 1178 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1179 
     | 
    
         
            +
                        return 1;
         
     | 
| 
      
 1180 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1181 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xfffff800 & chr))
         
     | 
| 
      
 1182 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1183 
     | 
    
         
            +
                        return 2;
         
     | 
| 
      
 1184 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1185 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xffff0000 & chr))
         
     | 
| 
      
 1186 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1187 
     | 
    
         
            +
                        return 3;
         
     | 
| 
      
 1188 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1189 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1190 
     | 
    
         
            +
                    { // if (0 == ((int)0xffe00000 & chr)) {
         
     | 
| 
      
 1191 
     | 
    
         
            +
                        return 4;
         
     | 
| 
      
 1192 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1193 
     | 
    
         
            +
                }
         
     | 
| 
      
 1194 
     | 
    
         
            +
             
     | 
| 
      
 1195 
     | 
    
         
            +
                void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n)
         
     | 
| 
      
 1196 
     | 
    
         
            +
                {
         
     | 
| 
      
 1197 
     | 
    
         
            +
                    char *s = (char *)str;
         
     | 
| 
      
 1198 
     | 
    
         
            +
             
     | 
| 
      
 1199 
     | 
    
         
            +
                    if (0 == ((utf8_int32_t)0xffffff80 & chr))
         
     | 
| 
      
 1200 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1201 
     | 
    
         
            +
                        // 1-byte/7-bit ascii
         
     | 
| 
      
 1202 
     | 
    
         
            +
                        // (0b0xxxxxxx)
         
     | 
| 
      
 1203 
     | 
    
         
            +
                        if (n < 1)
         
     | 
| 
      
 1204 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1205 
     | 
    
         
            +
                            return utf8_null;
         
     | 
| 
      
 1206 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1207 
     | 
    
         
            +
                        s[0] = (char)chr;
         
     | 
| 
      
 1208 
     | 
    
         
            +
                        s += 1;
         
     | 
| 
      
 1209 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1210 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xfffff800 & chr))
         
     | 
| 
      
 1211 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1212 
     | 
    
         
            +
                        // 2-byte/11-bit utf8 code point
         
     | 
| 
      
 1213 
     | 
    
         
            +
                        // (0b110xxxxx 0b10xxxxxx)
         
     | 
| 
      
 1214 
     | 
    
         
            +
                        if (n < 2)
         
     | 
| 
      
 1215 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1216 
     | 
    
         
            +
                            return utf8_null;
         
     | 
| 
      
 1217 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1218 
     | 
    
         
            +
                        s[0] = 0xc0 | (char)(chr >> 6);
         
     | 
| 
      
 1219 
     | 
    
         
            +
                        s[1] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 1220 
     | 
    
         
            +
                        s += 2;
         
     | 
| 
      
 1221 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1222 
     | 
    
         
            +
                    else if (0 == ((utf8_int32_t)0xffff0000 & chr))
         
     | 
| 
      
 1223 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1224 
     | 
    
         
            +
                        // 3-byte/16-bit utf8 code point
         
     | 
| 
      
 1225 
     | 
    
         
            +
                        // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 1226 
     | 
    
         
            +
                        if (n < 3)
         
     | 
| 
      
 1227 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1228 
     | 
    
         
            +
                            return utf8_null;
         
     | 
| 
      
 1229 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1230 
     | 
    
         
            +
                        s[0] = 0xe0 | (char)(chr >> 12);
         
     | 
| 
      
 1231 
     | 
    
         
            +
                        s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 1232 
     | 
    
         
            +
                        s[2] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 1233 
     | 
    
         
            +
                        s += 3;
         
     | 
| 
      
 1234 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1235 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1236 
     | 
    
         
            +
                    { // if (0 == ((int)0xffe00000 & chr)) {
         
     | 
| 
      
 1237 
     | 
    
         
            +
                        // 4-byte/21-bit utf8 code point
         
     | 
| 
      
 1238 
     | 
    
         
            +
                        // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
         
     | 
| 
      
 1239 
     | 
    
         
            +
                        if (n < 4)
         
     | 
| 
      
 1240 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1241 
     | 
    
         
            +
                            return utf8_null;
         
     | 
| 
      
 1242 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1243 
     | 
    
         
            +
                        s[0] = 0xf0 | (char)(chr >> 18);
         
     | 
| 
      
 1244 
     | 
    
         
            +
                        s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
         
     | 
| 
      
 1245 
     | 
    
         
            +
                        s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
         
     | 
| 
      
 1246 
     | 
    
         
            +
                        s[3] = 0x80 | (char)(chr & 0x3f);
         
     | 
| 
      
 1247 
     | 
    
         
            +
                        s += 4;
         
     | 
| 
      
 1248 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1249 
     | 
    
         
            +
             
     | 
| 
      
 1250 
     | 
    
         
            +
                    return s;
         
     | 
| 
      
 1251 
     | 
    
         
            +
                }
         
     | 
| 
      
 1252 
     | 
    
         
            +
             
     | 
| 
      
 1253 
     | 
    
         
            +
                int utf8islower(utf8_int32_t chr)
         
     | 
| 
      
 1254 
     | 
    
         
            +
                {
         
     | 
| 
      
 1255 
     | 
    
         
            +
                    return chr != utf8uprcodepoint(chr);
         
     | 
| 
      
 1256 
     | 
    
         
            +
                }
         
     | 
| 
      
 1257 
     | 
    
         
            +
             
     | 
| 
      
 1258 
     | 
    
         
            +
                int utf8isupper(utf8_int32_t chr)
         
     | 
| 
      
 1259 
     | 
    
         
            +
                {
         
     | 
| 
      
 1260 
     | 
    
         
            +
                    return chr != utf8lwrcodepoint(chr);
         
     | 
| 
      
 1261 
     | 
    
         
            +
                }
         
     | 
| 
      
 1262 
     | 
    
         
            +
             
     | 
| 
      
 1263 
     | 
    
         
            +
                void utf8lwr(void *utf8_restrict str)
         
     | 
| 
      
 1264 
     | 
    
         
            +
                {
         
     | 
| 
      
 1265 
     | 
    
         
            +
                    void *p, *pn;
         
     | 
| 
      
 1266 
     | 
    
         
            +
                    utf8_int32_t cp;
         
     | 
| 
      
 1267 
     | 
    
         
            +
             
     | 
| 
      
 1268 
     | 
    
         
            +
                    p = (char *)str;
         
     | 
| 
      
 1269 
     | 
    
         
            +
                    pn = utf8codepoint(p, &cp);
         
     | 
| 
      
 1270 
     | 
    
         
            +
             
     | 
| 
      
 1271 
     | 
    
         
            +
                    while (cp != 0)
         
     | 
| 
      
 1272 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1273 
     | 
    
         
            +
                        const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
         
     | 
| 
      
 1274 
     | 
    
         
            +
                        const size_t size = utf8codepointsize(lwr_cp);
         
     | 
| 
      
 1275 
     | 
    
         
            +
             
     | 
| 
      
 1276 
     | 
    
         
            +
                        if (lwr_cp != cp)
         
     | 
| 
      
 1277 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1278 
     | 
    
         
            +
                            utf8catcodepoint(p, lwr_cp, size);
         
     | 
| 
      
 1279 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1280 
     | 
    
         
            +
             
     | 
| 
      
 1281 
     | 
    
         
            +
                        p = pn;
         
     | 
| 
      
 1282 
     | 
    
         
            +
                        pn = utf8codepoint(p, &cp);
         
     | 
| 
      
 1283 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1284 
     | 
    
         
            +
                }
         
     | 
| 
      
 1285 
     | 
    
         
            +
             
     | 
| 
      
 1286 
     | 
    
         
            +
                void utf8upr(void *utf8_restrict str)
         
     | 
| 
      
 1287 
     | 
    
         
            +
                {
         
     | 
| 
      
 1288 
     | 
    
         
            +
                    void *p, *pn;
         
     | 
| 
      
 1289 
     | 
    
         
            +
                    utf8_int32_t cp;
         
     | 
| 
      
 1290 
     | 
    
         
            +
             
     | 
| 
      
 1291 
     | 
    
         
            +
                    p = (char *)str;
         
     | 
| 
      
 1292 
     | 
    
         
            +
                    pn = utf8codepoint(p, &cp);
         
     | 
| 
      
 1293 
     | 
    
         
            +
             
     | 
| 
      
 1294 
     | 
    
         
            +
                    while (cp != 0)
         
     | 
| 
      
 1295 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1296 
     | 
    
         
            +
                        const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
         
     | 
| 
      
 1297 
     | 
    
         
            +
                        const size_t size = utf8codepointsize(lwr_cp);
         
     | 
| 
      
 1298 
     | 
    
         
            +
             
     | 
| 
      
 1299 
     | 
    
         
            +
                        if (lwr_cp != cp)
         
     | 
| 
      
 1300 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1301 
     | 
    
         
            +
                            utf8catcodepoint(p, lwr_cp, size);
         
     | 
| 
      
 1302 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1303 
     | 
    
         
            +
             
     | 
| 
      
 1304 
     | 
    
         
            +
                        p = pn;
         
     | 
| 
      
 1305 
     | 
    
         
            +
                        pn = utf8codepoint(p, &cp);
         
     | 
| 
      
 1306 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1307 
     | 
    
         
            +
                }
         
     | 
| 
      
 1308 
     | 
    
         
            +
             
     | 
| 
      
 1309 
     | 
    
         
            +
                utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp)
         
     | 
| 
      
 1310 
     | 
    
         
            +
                {
         
     | 
| 
      
 1311 
     | 
    
         
            +
                    if (((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
         
     | 
| 
      
 1312 
     | 
    
         
            +
                        ((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp)) ||
         
     | 
| 
      
 1313 
     | 
    
         
            +
                        ((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)))
         
     | 
| 
      
 1314 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1315 
     | 
    
         
            +
                        cp += 32;
         
     | 
| 
      
 1316 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1317 
     | 
    
         
            +
                    else if ((0x0400 <= cp) && (0x040f >= cp))
         
     | 
| 
      
 1318 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1319 
     | 
    
         
            +
                        cp += 80;
         
     | 
| 
      
 1320 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1321 
     | 
    
         
            +
                    else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
         
     | 
| 
      
 1322 
     | 
    
         
            +
                             ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
         
     | 
| 
      
 1323 
     | 
    
         
            +
                             ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
         
     | 
| 
      
 1324 
     | 
    
         
            +
                             ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
         
     | 
| 
      
 1325 
     | 
    
         
            +
                             ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
         
     | 
| 
      
 1326 
     | 
    
         
            +
                             ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
         
     | 
| 
      
 1327 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1328 
     | 
    
         
            +
                        cp |= 0x1;
         
     | 
| 
      
 1329 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1330 
     | 
    
         
            +
                    else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
         
     | 
| 
      
 1331 
     | 
    
         
            +
                             ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
         
     | 
| 
      
 1332 
     | 
    
         
            +
                             ((0x01cd <= cp) && (0x01dc >= cp)))
         
     | 
| 
      
 1333 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1334 
     | 
    
         
            +
                        cp += 1;
         
     | 
| 
      
 1335 
     | 
    
         
            +
                        cp &= ~0x1;
         
     | 
| 
      
 1336 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1337 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1338 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1339 
     | 
    
         
            +
                        switch (cp)
         
     | 
| 
      
 1340 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1341 
     | 
    
         
            +
                            default:
         
     | 
| 
      
 1342 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1343 
     | 
    
         
            +
                            case 0x0178:
         
     | 
| 
      
 1344 
     | 
    
         
            +
                                cp = 0x00ff;
         
     | 
| 
      
 1345 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1346 
     | 
    
         
            +
                            case 0x0243:
         
     | 
| 
      
 1347 
     | 
    
         
            +
                                cp = 0x0180;
         
     | 
| 
      
 1348 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1349 
     | 
    
         
            +
                            case 0x018e:
         
     | 
| 
      
 1350 
     | 
    
         
            +
                                cp = 0x01dd;
         
     | 
| 
      
 1351 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1352 
     | 
    
         
            +
                            case 0x023d:
         
     | 
| 
      
 1353 
     | 
    
         
            +
                                cp = 0x019a;
         
     | 
| 
      
 1354 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1355 
     | 
    
         
            +
                            case 0x0220:
         
     | 
| 
      
 1356 
     | 
    
         
            +
                                cp = 0x019e;
         
     | 
| 
      
 1357 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1358 
     | 
    
         
            +
                            case 0x01b7:
         
     | 
| 
      
 1359 
     | 
    
         
            +
                                cp = 0x0292;
         
     | 
| 
      
 1360 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1361 
     | 
    
         
            +
                            case 0x01c4:
         
     | 
| 
      
 1362 
     | 
    
         
            +
                                cp = 0x01c6;
         
     | 
| 
      
 1363 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1364 
     | 
    
         
            +
                            case 0x01c7:
         
     | 
| 
      
 1365 
     | 
    
         
            +
                                cp = 0x01c9;
         
     | 
| 
      
 1366 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1367 
     | 
    
         
            +
                            case 0x01ca:
         
     | 
| 
      
 1368 
     | 
    
         
            +
                                cp = 0x01cc;
         
     | 
| 
      
 1369 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1370 
     | 
    
         
            +
                            case 0x01f1:
         
     | 
| 
      
 1371 
     | 
    
         
            +
                                cp = 0x01f3;
         
     | 
| 
      
 1372 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1373 
     | 
    
         
            +
                            case 0x01f7:
         
     | 
| 
      
 1374 
     | 
    
         
            +
                                cp = 0x01bf;
         
     | 
| 
      
 1375 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1376 
     | 
    
         
            +
                            case 0x0187:
         
     | 
| 
      
 1377 
     | 
    
         
            +
                                cp = 0x0188;
         
     | 
| 
      
 1378 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1379 
     | 
    
         
            +
                            case 0x018b:
         
     | 
| 
      
 1380 
     | 
    
         
            +
                                cp = 0x018c;
         
     | 
| 
      
 1381 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1382 
     | 
    
         
            +
                            case 0x0191:
         
     | 
| 
      
 1383 
     | 
    
         
            +
                                cp = 0x0192;
         
     | 
| 
      
 1384 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1385 
     | 
    
         
            +
                            case 0x0198:
         
     | 
| 
      
 1386 
     | 
    
         
            +
                                cp = 0x0199;
         
     | 
| 
      
 1387 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1388 
     | 
    
         
            +
                            case 0x01a7:
         
     | 
| 
      
 1389 
     | 
    
         
            +
                                cp = 0x01a8;
         
     | 
| 
      
 1390 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1391 
     | 
    
         
            +
                            case 0x01ac:
         
     | 
| 
      
 1392 
     | 
    
         
            +
                                cp = 0x01ad;
         
     | 
| 
      
 1393 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1394 
     | 
    
         
            +
                            case 0x01af:
         
     | 
| 
      
 1395 
     | 
    
         
            +
                                cp = 0x01b0;
         
     | 
| 
      
 1396 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1397 
     | 
    
         
            +
                            case 0x01b8:
         
     | 
| 
      
 1398 
     | 
    
         
            +
                                cp = 0x01b9;
         
     | 
| 
      
 1399 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1400 
     | 
    
         
            +
                            case 0x01bc:
         
     | 
| 
      
 1401 
     | 
    
         
            +
                                cp = 0x01bd;
         
     | 
| 
      
 1402 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1403 
     | 
    
         
            +
                            case 0x01f4:
         
     | 
| 
      
 1404 
     | 
    
         
            +
                                cp = 0x01f5;
         
     | 
| 
      
 1405 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1406 
     | 
    
         
            +
                            case 0x023b:
         
     | 
| 
      
 1407 
     | 
    
         
            +
                                cp = 0x023c;
         
     | 
| 
      
 1408 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1409 
     | 
    
         
            +
                            case 0x0241:
         
     | 
| 
      
 1410 
     | 
    
         
            +
                                cp = 0x0242;
         
     | 
| 
      
 1411 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1412 
     | 
    
         
            +
                            case 0x03fd:
         
     | 
| 
      
 1413 
     | 
    
         
            +
                                cp = 0x037b;
         
     | 
| 
      
 1414 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1415 
     | 
    
         
            +
                            case 0x03fe:
         
     | 
| 
      
 1416 
     | 
    
         
            +
                                cp = 0x037c;
         
     | 
| 
      
 1417 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1418 
     | 
    
         
            +
                            case 0x03ff:
         
     | 
| 
      
 1419 
     | 
    
         
            +
                                cp = 0x037d;
         
     | 
| 
      
 1420 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1421 
     | 
    
         
            +
                            case 0x037f:
         
     | 
| 
      
 1422 
     | 
    
         
            +
                                cp = 0x03f3;
         
     | 
| 
      
 1423 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1424 
     | 
    
         
            +
                            case 0x0386:
         
     | 
| 
      
 1425 
     | 
    
         
            +
                                cp = 0x03ac;
         
     | 
| 
      
 1426 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1427 
     | 
    
         
            +
                            case 0x0388:
         
     | 
| 
      
 1428 
     | 
    
         
            +
                                cp = 0x03ad;
         
     | 
| 
      
 1429 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1430 
     | 
    
         
            +
                            case 0x0389:
         
     | 
| 
      
 1431 
     | 
    
         
            +
                                cp = 0x03ae;
         
     | 
| 
      
 1432 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1433 
     | 
    
         
            +
                            case 0x038a:
         
     | 
| 
      
 1434 
     | 
    
         
            +
                                cp = 0x03af;
         
     | 
| 
      
 1435 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1436 
     | 
    
         
            +
                            case 0x038c:
         
     | 
| 
      
 1437 
     | 
    
         
            +
                                cp = 0x03cc;
         
     | 
| 
      
 1438 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1439 
     | 
    
         
            +
                            case 0x038e:
         
     | 
| 
      
 1440 
     | 
    
         
            +
                                cp = 0x03cd;
         
     | 
| 
      
 1441 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1442 
     | 
    
         
            +
                            case 0x038f:
         
     | 
| 
      
 1443 
     | 
    
         
            +
                                cp = 0x03ce;
         
     | 
| 
      
 1444 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1445 
     | 
    
         
            +
                            case 0x0370:
         
     | 
| 
      
 1446 
     | 
    
         
            +
                                cp = 0x0371;
         
     | 
| 
      
 1447 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1448 
     | 
    
         
            +
                            case 0x0372:
         
     | 
| 
      
 1449 
     | 
    
         
            +
                                cp = 0x0373;
         
     | 
| 
      
 1450 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1451 
     | 
    
         
            +
                            case 0x0376:
         
     | 
| 
      
 1452 
     | 
    
         
            +
                                cp = 0x0377;
         
     | 
| 
      
 1453 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1454 
     | 
    
         
            +
                            case 0x03f4:
         
     | 
| 
      
 1455 
     | 
    
         
            +
                                cp = 0x03d1;
         
     | 
| 
      
 1456 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1457 
     | 
    
         
            +
                            case 0x03cf:
         
     | 
| 
      
 1458 
     | 
    
         
            +
                                cp = 0x03d7;
         
     | 
| 
      
 1459 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1460 
     | 
    
         
            +
                            case 0x03f9:
         
     | 
| 
      
 1461 
     | 
    
         
            +
                                cp = 0x03f2;
         
     | 
| 
      
 1462 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1463 
     | 
    
         
            +
                            case 0x03f7:
         
     | 
| 
      
 1464 
     | 
    
         
            +
                                cp = 0x03f8;
         
     | 
| 
      
 1465 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1466 
     | 
    
         
            +
                            case 0x03fa:
         
     | 
| 
      
 1467 
     | 
    
         
            +
                                cp = 0x03fb;
         
     | 
| 
      
 1468 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1469 
     | 
    
         
            +
                        };
         
     | 
| 
      
 1470 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1471 
     | 
    
         
            +
             
     | 
| 
      
 1472 
     | 
    
         
            +
                    return cp;
         
     | 
| 
      
 1473 
     | 
    
         
            +
                }
         
     | 
| 
      
 1474 
     | 
    
         
            +
             
     | 
| 
      
 1475 
     | 
    
         
            +
                utf8_int32_t utf8uprcodepoint(utf8_int32_t cp)
         
     | 
| 
      
 1476 
     | 
    
         
            +
                {
         
     | 
| 
      
 1477 
     | 
    
         
            +
                    if (((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
         
     | 
| 
      
 1478 
     | 
    
         
            +
                        ((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
         
     | 
| 
      
 1479 
     | 
    
         
            +
                        ((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)))
         
     | 
| 
      
 1480 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1481 
     | 
    
         
            +
                        cp -= 32;
         
     | 
| 
      
 1482 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1483 
     | 
    
         
            +
                    else if ((0x0450 <= cp) && (0x045f >= cp))
         
     | 
| 
      
 1484 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1485 
     | 
    
         
            +
                        cp -= 80;
         
     | 
| 
      
 1486 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1487 
     | 
    
         
            +
                    else if (((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) ||
         
     | 
| 
      
 1488 
     | 
    
         
            +
                             ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) ||
         
     | 
| 
      
 1489 
     | 
    
         
            +
                             ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) ||
         
     | 
| 
      
 1490 
     | 
    
         
            +
                             ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) ||
         
     | 
| 
      
 1491 
     | 
    
         
            +
                             ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) ||
         
     | 
| 
      
 1492 
     | 
    
         
            +
                             ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)))
         
     | 
| 
      
 1493 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1494 
     | 
    
         
            +
                        cp &= ~0x1;
         
     | 
| 
      
 1495 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1496 
     | 
    
         
            +
                    else if (((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) ||
         
     | 
| 
      
 1497 
     | 
    
         
            +
                             ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
         
     | 
| 
      
 1498 
     | 
    
         
            +
                             ((0x01cd <= cp) && (0x01dc >= cp)))
         
     | 
| 
      
 1499 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1500 
     | 
    
         
            +
                        cp -= 1;
         
     | 
| 
      
 1501 
     | 
    
         
            +
                        cp |= 0x1;
         
     | 
| 
      
 1502 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1503 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1504 
     | 
    
         
            +
                    {
         
     | 
| 
      
 1505 
     | 
    
         
            +
                        switch (cp)
         
     | 
| 
      
 1506 
     | 
    
         
            +
                        {
         
     | 
| 
      
 1507 
     | 
    
         
            +
                            default:
         
     | 
| 
      
 1508 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1509 
     | 
    
         
            +
                            case 0x00ff:
         
     | 
| 
      
 1510 
     | 
    
         
            +
                                cp = 0x0178;
         
     | 
| 
      
 1511 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1512 
     | 
    
         
            +
                            case 0x0180:
         
     | 
| 
      
 1513 
     | 
    
         
            +
                                cp = 0x0243;
         
     | 
| 
      
 1514 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1515 
     | 
    
         
            +
                            case 0x01dd:
         
     | 
| 
      
 1516 
     | 
    
         
            +
                                cp = 0x018e;
         
     | 
| 
      
 1517 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1518 
     | 
    
         
            +
                            case 0x019a:
         
     | 
| 
      
 1519 
     | 
    
         
            +
                                cp = 0x023d;
         
     | 
| 
      
 1520 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1521 
     | 
    
         
            +
                            case 0x019e:
         
     | 
| 
      
 1522 
     | 
    
         
            +
                                cp = 0x0220;
         
     | 
| 
      
 1523 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1524 
     | 
    
         
            +
                            case 0x0292:
         
     | 
| 
      
 1525 
     | 
    
         
            +
                                cp = 0x01b7;
         
     | 
| 
      
 1526 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1527 
     | 
    
         
            +
                            case 0x01c6:
         
     | 
| 
      
 1528 
     | 
    
         
            +
                                cp = 0x01c4;
         
     | 
| 
      
 1529 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1530 
     | 
    
         
            +
                            case 0x01c9:
         
     | 
| 
      
 1531 
     | 
    
         
            +
                                cp = 0x01c7;
         
     | 
| 
      
 1532 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1533 
     | 
    
         
            +
                            case 0x01cc:
         
     | 
| 
      
 1534 
     | 
    
         
            +
                                cp = 0x01ca;
         
     | 
| 
      
 1535 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1536 
     | 
    
         
            +
                            case 0x01f3:
         
     | 
| 
      
 1537 
     | 
    
         
            +
                                cp = 0x01f1;
         
     | 
| 
      
 1538 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1539 
     | 
    
         
            +
                            case 0x01bf:
         
     | 
| 
      
 1540 
     | 
    
         
            +
                                cp = 0x01f7;
         
     | 
| 
      
 1541 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1542 
     | 
    
         
            +
                            case 0x0188:
         
     | 
| 
      
 1543 
     | 
    
         
            +
                                cp = 0x0187;
         
     | 
| 
      
 1544 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1545 
     | 
    
         
            +
                            case 0x018c:
         
     | 
| 
      
 1546 
     | 
    
         
            +
                                cp = 0x018b;
         
     | 
| 
      
 1547 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1548 
     | 
    
         
            +
                            case 0x0192:
         
     | 
| 
      
 1549 
     | 
    
         
            +
                                cp = 0x0191;
         
     | 
| 
      
 1550 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1551 
     | 
    
         
            +
                            case 0x0199:
         
     | 
| 
      
 1552 
     | 
    
         
            +
                                cp = 0x0198;
         
     | 
| 
      
 1553 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1554 
     | 
    
         
            +
                            case 0x01a8:
         
     | 
| 
      
 1555 
     | 
    
         
            +
                                cp = 0x01a7;
         
     | 
| 
      
 1556 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1557 
     | 
    
         
            +
                            case 0x01ad:
         
     | 
| 
      
 1558 
     | 
    
         
            +
                                cp = 0x01ac;
         
     | 
| 
      
 1559 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1560 
     | 
    
         
            +
                            case 0x01b0:
         
     | 
| 
      
 1561 
     | 
    
         
            +
                                cp = 0x01af;
         
     | 
| 
      
 1562 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1563 
     | 
    
         
            +
                            case 0x01b9:
         
     | 
| 
      
 1564 
     | 
    
         
            +
                                cp = 0x01b8;
         
     | 
| 
      
 1565 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1566 
     | 
    
         
            +
                            case 0x01bd:
         
     | 
| 
      
 1567 
     | 
    
         
            +
                                cp = 0x01bc;
         
     | 
| 
      
 1568 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1569 
     | 
    
         
            +
                            case 0x01f5:
         
     | 
| 
      
 1570 
     | 
    
         
            +
                                cp = 0x01f4;
         
     | 
| 
      
 1571 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1572 
     | 
    
         
            +
                            case 0x023c:
         
     | 
| 
      
 1573 
     | 
    
         
            +
                                cp = 0x023b;
         
     | 
| 
      
 1574 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1575 
     | 
    
         
            +
                            case 0x0242:
         
     | 
| 
      
 1576 
     | 
    
         
            +
                                cp = 0x0241;
         
     | 
| 
      
 1577 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1578 
     | 
    
         
            +
                            case 0x037b:
         
     | 
| 
      
 1579 
     | 
    
         
            +
                                cp = 0x03fd;
         
     | 
| 
      
 1580 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1581 
     | 
    
         
            +
                            case 0x037c:
         
     | 
| 
      
 1582 
     | 
    
         
            +
                                cp = 0x03fe;
         
     | 
| 
      
 1583 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1584 
     | 
    
         
            +
                            case 0x037d:
         
     | 
| 
      
 1585 
     | 
    
         
            +
                                cp = 0x03ff;
         
     | 
| 
      
 1586 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1587 
     | 
    
         
            +
                            case 0x03f3:
         
     | 
| 
      
 1588 
     | 
    
         
            +
                                cp = 0x037f;
         
     | 
| 
      
 1589 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1590 
     | 
    
         
            +
                            case 0x03ac:
         
     | 
| 
      
 1591 
     | 
    
         
            +
                                cp = 0x0386;
         
     | 
| 
      
 1592 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1593 
     | 
    
         
            +
                            case 0x03ad:
         
     | 
| 
      
 1594 
     | 
    
         
            +
                                cp = 0x0388;
         
     | 
| 
      
 1595 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1596 
     | 
    
         
            +
                            case 0x03ae:
         
     | 
| 
      
 1597 
     | 
    
         
            +
                                cp = 0x0389;
         
     | 
| 
      
 1598 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1599 
     | 
    
         
            +
                            case 0x03af:
         
     | 
| 
      
 1600 
     | 
    
         
            +
                                cp = 0x038a;
         
     | 
| 
      
 1601 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1602 
     | 
    
         
            +
                            case 0x03cc:
         
     | 
| 
      
 1603 
     | 
    
         
            +
                                cp = 0x038c;
         
     | 
| 
      
 1604 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1605 
     | 
    
         
            +
                            case 0x03cd:
         
     | 
| 
      
 1606 
     | 
    
         
            +
                                cp = 0x038e;
         
     | 
| 
      
 1607 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1608 
     | 
    
         
            +
                            case 0x03ce:
         
     | 
| 
      
 1609 
     | 
    
         
            +
                                cp = 0x038f;
         
     | 
| 
      
 1610 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1611 
     | 
    
         
            +
                            case 0x0371:
         
     | 
| 
      
 1612 
     | 
    
         
            +
                                cp = 0x0370;
         
     | 
| 
      
 1613 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1614 
     | 
    
         
            +
                            case 0x0373:
         
     | 
| 
      
 1615 
     | 
    
         
            +
                                cp = 0x0372;
         
     | 
| 
      
 1616 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1617 
     | 
    
         
            +
                            case 0x0377:
         
     | 
| 
      
 1618 
     | 
    
         
            +
                                cp = 0x0376;
         
     | 
| 
      
 1619 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1620 
     | 
    
         
            +
                            case 0x03d1:
         
     | 
| 
      
 1621 
     | 
    
         
            +
                                cp = 0x03f4;
         
     | 
| 
      
 1622 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1623 
     | 
    
         
            +
                            case 0x03d7:
         
     | 
| 
      
 1624 
     | 
    
         
            +
                                cp = 0x03cf;
         
     | 
| 
      
 1625 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1626 
     | 
    
         
            +
                            case 0x03f2:
         
     | 
| 
      
 1627 
     | 
    
         
            +
                                cp = 0x03f9;
         
     | 
| 
      
 1628 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1629 
     | 
    
         
            +
                            case 0x03f8:
         
     | 
| 
      
 1630 
     | 
    
         
            +
                                cp = 0x03f7;
         
     | 
| 
      
 1631 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1632 
     | 
    
         
            +
                            case 0x03fb:
         
     | 
| 
      
 1633 
     | 
    
         
            +
                                cp = 0x03fa;
         
     | 
| 
      
 1634 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 1635 
     | 
    
         
            +
                        };
         
     | 
| 
      
 1636 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1637 
     | 
    
         
            +
             
     | 
| 
      
 1638 
     | 
    
         
            +
                    return cp;
         
     | 
| 
      
 1639 
     | 
    
         
            +
                }
         
     | 
| 
      
 1640 
     | 
    
         
            +
             
     | 
| 
      
 1641 
     | 
    
         
            +
            #undef utf8_restrict
         
     | 
| 
      
 1642 
     | 
    
         
            +
            #undef utf8_null
         
     | 
| 
      
 1643 
     | 
    
         
            +
             
     | 
| 
      
 1644 
     | 
    
         
            +
            #ifdef __cplusplus
         
     | 
| 
      
 1645 
     | 
    
         
            +
            } // extern "C"
         
     | 
| 
      
 1646 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 1647 
     | 
    
         
            +
             
     | 
| 
      
 1648 
     | 
    
         
            +
            #if defined(__clang__)
         
     | 
| 
      
 1649 
     | 
    
         
            +
            #pragma clang diagnostic pop
         
     | 
| 
      
 1650 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 1651 
     | 
    
         
            +
             
     | 
| 
      
 1652 
     | 
    
         
            +
            #endif // SHEREDOM_UTF8_H_INCLUDED
         
     |