bare-buffer 2.5.9 → 2.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -11
- package/package.json +3 -8
- package/prebuilds/android-arm/bare-buffer.bare +0 -0
- package/prebuilds/android-arm64/bare-buffer.bare +0 -0
- package/prebuilds/android-ia32/bare-buffer.bare +0 -0
- package/prebuilds/android-x64/bare-buffer.bare +0 -0
- package/prebuilds/darwin-arm64/bare-buffer.bare +0 -0
- package/prebuilds/darwin-x64/bare-buffer.bare +0 -0
- package/prebuilds/ios-arm64/bare-buffer.bare +0 -0
- package/prebuilds/ios-arm64-simulator/bare-buffer.bare +0 -0
- package/prebuilds/ios-x64-simulator/bare-buffer.bare +0 -0
- package/prebuilds/linux-arm64/bare-buffer.bare +0 -0
- package/prebuilds/linux-x64/bare-buffer.bare +0 -0
- package/prebuilds/win32-arm64/bare-buffer.bare +0 -0
- package/prebuilds/win32-x64/bare-buffer.bare +0 -0
- package/vendor/libbase64/CMakeLists.txt +0 -70
- package/vendor/libbase64/include/base64.h +0 -22
- package/vendor/libbase64/src/base64.c +0 -107
- package/vendor/libhex/CMakeLists.txt +0 -71
- package/vendor/libhex/include/hex.h +0 -22
- package/vendor/libhex/src/hex.c +0 -67
- package/vendor/libutf/CMakeLists.txt +0 -93
- package/vendor/libutf/include/utf/string.h +0 -786
- package/vendor/libutf/include/utf.h +0 -132
- package/vendor/libutf/src/ascii/validate.c +0 -47
- package/vendor/libutf/src/endianness.c +0 -19
- package/vendor/libutf/src/endianness.h +0 -54
- package/vendor/libutf/src/latin1/convert-to-utf16.c +0 -37
- package/vendor/libutf/src/latin1/convert-to-utf32.c +0 -34
- package/vendor/libutf/src/latin1/convert-to-utf8.c +0 -58
- package/vendor/libutf/src/latin1/length-from-utf16.c +0 -26
- package/vendor/libutf/src/latin1/length-from-utf32.c +0 -26
- package/vendor/libutf/src/latin1/length-from-utf8.c +0 -34
- package/vendor/libutf/src/utf16/convert-to-latin1.c +0 -41
- package/vendor/libutf/src/utf16/convert-to-utf32.c +0 -56
- package/vendor/libutf/src/utf16/convert-to-utf8.c +0 -75
- package/vendor/libutf/src/utf16/length-from-latin1.c +0 -26
- package/vendor/libutf/src/utf16/length-from-utf32.c +0 -33
- package/vendor/libutf/src/utf16/length-from-utf8.c +0 -38
- package/vendor/libutf/src/utf16/validate.c +0 -53
- package/vendor/libutf/src/utf32/convert-to-latin1.c +0 -40
- package/vendor/libutf/src/utf32/convert-to-utf16.c +0 -56
- package/vendor/libutf/src/utf32/convert-to-utf8.c +0 -71
- package/vendor/libutf/src/utf32/length-from-latin1.c +0 -26
- package/vendor/libutf/src/utf32/length-from-utf16.c +0 -35
- package/vendor/libutf/src/utf32/length-from-utf8.c +0 -35
- package/vendor/libutf/src/utf32/validate.c +0 -39
- package/vendor/libutf/src/utf8/convert-to-latin1.c +0 -70
- package/vendor/libutf/src/utf8/convert-to-utf16.c +0 -95
- package/vendor/libutf/src/utf8/convert-to-utf32.c +0 -110
- package/vendor/libutf/src/utf8/length-from-latin1.c +0 -32
- package/vendor/libutf/src/utf8/length-from-utf16.c +0 -44
- package/vendor/libutf/src/utf8/length-from-utf32.c +0 -35
- package/vendor/libutf/src/utf8/string.c +0 -149
- package/vendor/libutf/src/utf8/validate.c +0 -107
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
#include <stddef.h>
|
|
2
|
-
#include <stdint.h>
|
|
3
|
-
#include <string.h>
|
|
4
|
-
|
|
5
|
-
#include "../../include/utf.h"
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Modified from https://github.com/simdutf/simdutf
|
|
9
|
-
*
|
|
10
|
-
* Copyright 2020 The simdutf authors
|
|
11
|
-
*
|
|
12
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
-
* you may not use this file except in compliance with the License.
|
|
14
|
-
* You may obtain a copy of the License at
|
|
15
|
-
*
|
|
16
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
-
*
|
|
18
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
19
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
-
* See the License for the specific language governing permissions and
|
|
22
|
-
* limitations under the License.
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
size_t
|
|
26
|
-
utf8_convert_to_utf32 (const utf8_t *data, size_t len, utf32_t *result) {
|
|
27
|
-
size_t pos = 0;
|
|
28
|
-
utf32_t *start = result;
|
|
29
|
-
|
|
30
|
-
while (pos < len) {
|
|
31
|
-
if (pos + 16 <= len) {
|
|
32
|
-
uint64_t v1;
|
|
33
|
-
memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
34
|
-
uint64_t v2;
|
|
35
|
-
memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
36
|
-
uint64_t v = v1 | v2;
|
|
37
|
-
if ((v & 0x8080808080808080) == 0) {
|
|
38
|
-
size_t final_pos = pos + 16;
|
|
39
|
-
while (pos < final_pos) {
|
|
40
|
-
*result++ = data[pos];
|
|
41
|
-
pos++;
|
|
42
|
-
}
|
|
43
|
-
continue;
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
uint8_t leading_byte = data[pos];
|
|
47
|
-
if (leading_byte < 0b10000000) {
|
|
48
|
-
*result++ = leading_byte;
|
|
49
|
-
pos++;
|
|
50
|
-
} else if ((leading_byte & 0b11100000) == 0b11000000) {
|
|
51
|
-
if (pos + 1 >= len) {
|
|
52
|
-
return 0;
|
|
53
|
-
}
|
|
54
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
55
|
-
return 0;
|
|
56
|
-
}
|
|
57
|
-
uint32_t code_point = ((leading_byte & 0b00011111) << 6) |
|
|
58
|
-
(data[pos + 1] & 0b00111111);
|
|
59
|
-
if (code_point < 0x80 || 0x7ff < code_point) {
|
|
60
|
-
return 0;
|
|
61
|
-
}
|
|
62
|
-
*result++ = code_point;
|
|
63
|
-
pos += 2;
|
|
64
|
-
} else if ((leading_byte & 0b11110000) == 0b11100000) {
|
|
65
|
-
if (pos + 2 >= len) {
|
|
66
|
-
return 0;
|
|
67
|
-
}
|
|
68
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
69
|
-
return 0;
|
|
70
|
-
}
|
|
71
|
-
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
72
|
-
return 0;
|
|
73
|
-
}
|
|
74
|
-
uint32_t code_point = ((leading_byte & 0b00001111) << 12) |
|
|
75
|
-
((data[pos + 1] & 0b00111111) << 6) |
|
|
76
|
-
(data[pos + 2] & 0b00111111);
|
|
77
|
-
if (code_point < 0x800 || 0xffff < code_point || (0xd7ff < code_point && code_point < 0xe000)) {
|
|
78
|
-
return 0;
|
|
79
|
-
}
|
|
80
|
-
*result++ = code_point;
|
|
81
|
-
pos += 3;
|
|
82
|
-
} else if ((leading_byte & 0b11111000) == 0b11110000) {
|
|
83
|
-
if (pos + 3 >= len) {
|
|
84
|
-
return 0;
|
|
85
|
-
}
|
|
86
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
87
|
-
return 0;
|
|
88
|
-
}
|
|
89
|
-
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
90
|
-
return 0;
|
|
91
|
-
}
|
|
92
|
-
if ((data[pos + 3] & 0b11000000) != 0b10000000) {
|
|
93
|
-
return 0;
|
|
94
|
-
}
|
|
95
|
-
uint32_t code_point = ((leading_byte & 0b00000111) << 18) |
|
|
96
|
-
((data[pos + 1] & 0b00111111) << 12) |
|
|
97
|
-
((data[pos + 2] & 0b00111111) << 6) |
|
|
98
|
-
(data[pos + 3] & 0b00111111);
|
|
99
|
-
if (code_point <= 0xffff || 0x10ffff < code_point) {
|
|
100
|
-
return 0;
|
|
101
|
-
}
|
|
102
|
-
*result++ = code_point;
|
|
103
|
-
pos += 4;
|
|
104
|
-
} else {
|
|
105
|
-
return 0;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
return result - start;
|
|
110
|
-
}
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
#include <stddef.h>
|
|
2
|
-
|
|
3
|
-
#include "../../include/utf.h"
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Modified from https://github.com/simdutf/simdutf
|
|
7
|
-
*
|
|
8
|
-
* Copyright 2020 The simdutf authors
|
|
9
|
-
*
|
|
10
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
-
* you may not use this file except in compliance with the License.
|
|
12
|
-
* You may obtain a copy of the License at
|
|
13
|
-
*
|
|
14
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
-
*
|
|
16
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
-
* See the License for the specific language governing permissions and
|
|
20
|
-
* limitations under the License.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
size_t
|
|
24
|
-
utf8_length_from_latin1 (const latin1_t *data, size_t len) {
|
|
25
|
-
size_t counter = len;
|
|
26
|
-
|
|
27
|
-
for (size_t i = 0; i < len; i++) {
|
|
28
|
-
counter += data[i] >> 7;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return counter;
|
|
32
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
#include <stddef.h>
|
|
2
|
-
#include <stdint.h>
|
|
3
|
-
|
|
4
|
-
#include "../../include/utf.h"
|
|
5
|
-
#include "../endianness.h"
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Modified from https://github.com/simdutf/simdutf
|
|
9
|
-
*
|
|
10
|
-
* Copyright 2020 The simdutf authors
|
|
11
|
-
*
|
|
12
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
-
* you may not use this file except in compliance with the License.
|
|
14
|
-
* You may obtain a copy of the License at
|
|
15
|
-
*
|
|
16
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
-
*
|
|
18
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
19
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
-
* See the License for the specific language governing permissions and
|
|
22
|
-
* limitations under the License.
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
size_t
|
|
26
|
-
utf8_length_from_utf16le (const utf16_t *data, size_t len) {
|
|
27
|
-
size_t counter = 0;
|
|
28
|
-
uint16_t word;
|
|
29
|
-
|
|
30
|
-
for (size_t i = 0; i < len; i++) {
|
|
31
|
-
word = utf_is_be() ? utf_swap_uint16(data[i]) : data[i];
|
|
32
|
-
if (word <= 0x7f) {
|
|
33
|
-
counter++;
|
|
34
|
-
} else if (word <= 0x7ff) {
|
|
35
|
-
counter += 2;
|
|
36
|
-
} else if ((word <= 0xd7ff) || (word >= 0xe000)) {
|
|
37
|
-
counter += 3;
|
|
38
|
-
} else {
|
|
39
|
-
counter += 2;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return counter;
|
|
44
|
-
}
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
#include <stddef.h>
|
|
2
|
-
|
|
3
|
-
#include "../../include/utf.h"
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Modified from https://github.com/simdutf/simdutf
|
|
7
|
-
*
|
|
8
|
-
* Copyright 2020 The simdutf authors
|
|
9
|
-
*
|
|
10
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
-
* you may not use this file except in compliance with the License.
|
|
12
|
-
* You may obtain a copy of the License at
|
|
13
|
-
*
|
|
14
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
-
*
|
|
16
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
-
* See the License for the specific language governing permissions and
|
|
20
|
-
* limitations under the License.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
size_t
|
|
24
|
-
utf8_length_from_utf32 (const utf32_t *data, size_t len) {
|
|
25
|
-
size_t counter = 0;
|
|
26
|
-
|
|
27
|
-
for (size_t i = 0; i < len; i++) {
|
|
28
|
-
counter++;
|
|
29
|
-
counter += data[i] > 0x7f;
|
|
30
|
-
counter += data[i] > 0x7ff;
|
|
31
|
-
counter += data[i] > 0xffff;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
return counter;
|
|
35
|
-
}
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
#include <stdbool.h>
|
|
2
|
-
#include <stddef.h>
|
|
3
|
-
|
|
4
|
-
#include "../../include/utf.h"
|
|
5
|
-
#include "../../include/utf/string.h"
|
|
6
|
-
|
|
7
|
-
extern void
|
|
8
|
-
utf8_string_init (utf8_string_t *string);
|
|
9
|
-
|
|
10
|
-
extern utf8_string_view_t
|
|
11
|
-
utf8_string_view_init (const utf8_t *data, size_t len);
|
|
12
|
-
|
|
13
|
-
extern void
|
|
14
|
-
utf8_string_destroy (utf8_string_t *string);
|
|
15
|
-
|
|
16
|
-
extern int
|
|
17
|
-
utf8_string_reserve (utf8_string_t *string, size_t len);
|
|
18
|
-
|
|
19
|
-
extern int
|
|
20
|
-
utf8_string_shrink_to_fit (utf8_string_t *string);
|
|
21
|
-
|
|
22
|
-
extern utf8_string_view_t
|
|
23
|
-
utf8_string_view (const utf8_string_t *string);
|
|
24
|
-
|
|
25
|
-
extern void
|
|
26
|
-
utf8_string_clear (utf8_string_t *string);
|
|
27
|
-
|
|
28
|
-
extern bool
|
|
29
|
-
utf8_string_empty (const utf8_string_t *string);
|
|
30
|
-
|
|
31
|
-
extern bool
|
|
32
|
-
utf8_string_view_empty (const utf8_string_view_t view);
|
|
33
|
-
|
|
34
|
-
extern int
|
|
35
|
-
utf8_string_copy (const utf8_string_t *string, utf8_string_t *result);
|
|
36
|
-
|
|
37
|
-
extern int
|
|
38
|
-
utf8_string_view_copy (const utf8_string_view_t view, utf8_string_t *result);
|
|
39
|
-
|
|
40
|
-
extern int
|
|
41
|
-
utf8_string_append (utf8_string_t *string, const utf8_string_t *other);
|
|
42
|
-
|
|
43
|
-
extern int
|
|
44
|
-
utf8_string_append_view (utf8_string_t *string, const utf8_string_view_t view);
|
|
45
|
-
|
|
46
|
-
extern int
|
|
47
|
-
utf8_string_append_character (utf8_string_t *string, utf8_t c);
|
|
48
|
-
|
|
49
|
-
extern int
|
|
50
|
-
utf8_string_append_literal (utf8_string_t *string, const utf8_t *literal, size_t n);
|
|
51
|
-
|
|
52
|
-
extern int
|
|
53
|
-
utf8_string_prepend (utf8_string_t *string, const utf8_string_t *other);
|
|
54
|
-
|
|
55
|
-
extern int
|
|
56
|
-
utf8_string_prepend_view (utf8_string_t *string, const utf8_string_view_t view);
|
|
57
|
-
|
|
58
|
-
extern int
|
|
59
|
-
utf8_string_prepend_character (utf8_string_t *string, utf8_t c);
|
|
60
|
-
|
|
61
|
-
extern int
|
|
62
|
-
utf8_string_prepend_literal (utf8_string_t *string, const utf8_t *literal, size_t n);
|
|
63
|
-
|
|
64
|
-
extern int
|
|
65
|
-
utf8_string_insert (utf8_string_t *string, size_t pos, const utf8_string_t *other);
|
|
66
|
-
|
|
67
|
-
extern int
|
|
68
|
-
utf8_string_insert_view (utf8_string_t *string, size_t pos, const utf8_string_view_t other);
|
|
69
|
-
|
|
70
|
-
extern int
|
|
71
|
-
utf8_string_insert_character (utf8_string_t *string, size_t pos, utf8_t c);
|
|
72
|
-
|
|
73
|
-
extern int
|
|
74
|
-
utf8_string_insert_literal (utf8_string_t *string, size_t pos, const utf8_t *literal, size_t n);
|
|
75
|
-
|
|
76
|
-
extern int
|
|
77
|
-
utf8_string_replace (utf8_string_t *string, size_t pos, size_t len, const utf8_string_t *replacement);
|
|
78
|
-
|
|
79
|
-
extern int
|
|
80
|
-
utf8_string_replace_view (utf8_string_t *string, size_t pos, size_t len, const utf8_string_view_t replacement);
|
|
81
|
-
|
|
82
|
-
extern int
|
|
83
|
-
utf8_string_replace_character (utf8_string_t *string, size_t pos, size_t len, utf8_t c);
|
|
84
|
-
|
|
85
|
-
extern int
|
|
86
|
-
utf8_string_replace_literal (utf8_string_t *string, size_t pos, size_t len, const utf8_t *literal, size_t n);
|
|
87
|
-
|
|
88
|
-
extern int
|
|
89
|
-
utf8_string_erase (utf8_string_t *string, size_t pos, size_t len);
|
|
90
|
-
|
|
91
|
-
extern int
|
|
92
|
-
utf8_string_concat (const utf8_string_t *string, const utf8_string_t *other, utf8_string_t *result);
|
|
93
|
-
|
|
94
|
-
extern int
|
|
95
|
-
utf8_string_view_concat (const utf8_string_view_t view, const utf8_string_t *other, utf8_string_t *result);
|
|
96
|
-
|
|
97
|
-
extern int
|
|
98
|
-
utf8_string_concat_view (const utf8_string_t *string, const utf8_string_view_t other, utf8_string_t *result);
|
|
99
|
-
|
|
100
|
-
extern int
|
|
101
|
-
utf8_string_view_concat_view (const utf8_string_view_t view, const utf8_string_view_t other, utf8_string_t *result);
|
|
102
|
-
|
|
103
|
-
extern int
|
|
104
|
-
utf8_string_concat_character (const utf8_string_t *string, utf8_t c, utf8_string_t *result);
|
|
105
|
-
|
|
106
|
-
extern int
|
|
107
|
-
utf8_string_view_concat_character (const utf8_string_view_t view, utf8_t c, utf8_string_t *result);
|
|
108
|
-
|
|
109
|
-
extern int
|
|
110
|
-
utf8_string_concat_literal (const utf8_string_t *string, const utf8_t *literal, size_t n, utf8_string_t *result);
|
|
111
|
-
|
|
112
|
-
extern int
|
|
113
|
-
utf8_string_view_concat_literal (const utf8_string_view_t view, const utf8_t *literal, size_t n, utf8_string_t *result);
|
|
114
|
-
|
|
115
|
-
extern int
|
|
116
|
-
utf8_string_compare (const utf8_string_t *string, const utf8_string_t *other);
|
|
117
|
-
|
|
118
|
-
extern int
|
|
119
|
-
utf8_string_view_compare (const utf8_string_view_t view, const utf8_string_view_t other);
|
|
120
|
-
|
|
121
|
-
extern int
|
|
122
|
-
utf8_string_compare_literal (const utf8_string_t *string, const utf8_t *literal, size_t n);
|
|
123
|
-
|
|
124
|
-
extern int
|
|
125
|
-
utf8_string_view_compare_literal (const utf8_string_view_t view, const utf8_t *literal, size_t n);
|
|
126
|
-
|
|
127
|
-
extern utf8_string_view_t
|
|
128
|
-
utf8_string_substring (const utf8_string_t *string, size_t start, size_t end);
|
|
129
|
-
|
|
130
|
-
extern utf8_string_view_t
|
|
131
|
-
utf8_string_view_substring (const utf8_string_view_t view, size_t start, size_t end);
|
|
132
|
-
|
|
133
|
-
extern int
|
|
134
|
-
utf8_string_substring_copy (const utf8_string_t *string, size_t start, size_t end, utf8_string_t *result);
|
|
135
|
-
|
|
136
|
-
extern int
|
|
137
|
-
utf8_string_view_substring_copy (const utf8_string_view_t view, size_t start, size_t end, utf8_string_t *result);
|
|
138
|
-
|
|
139
|
-
extern size_t
|
|
140
|
-
utf8_string_index_of_character (const utf8_string_t *string, size_t pos, utf8_t c);
|
|
141
|
-
|
|
142
|
-
extern size_t
|
|
143
|
-
utf8_string_view_index_of_character (const utf8_string_view_t view, size_t pos, utf8_t c);
|
|
144
|
-
|
|
145
|
-
extern size_t
|
|
146
|
-
utf8_string_last_index_of_character (const utf8_string_t *string, size_t pos, utf8_t c);
|
|
147
|
-
|
|
148
|
-
extern size_t
|
|
149
|
-
utf8_string_view_last_index_of_character (const utf8_string_view_t view, size_t pos, utf8_t c);
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
#include <stdbool.h>
|
|
2
|
-
#include <stddef.h>
|
|
3
|
-
#include <stdint.h>
|
|
4
|
-
#include <string.h>
|
|
5
|
-
|
|
6
|
-
#include "../../include/utf.h"
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Modified from https://github.com/simdutf/simdutf
|
|
10
|
-
*
|
|
11
|
-
* Copyright 2020 The simdutf authors
|
|
12
|
-
*
|
|
13
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
14
|
-
* you may not use this file except in compliance with the License.
|
|
15
|
-
* You may obtain a copy of the License at
|
|
16
|
-
*
|
|
17
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
-
*
|
|
19
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
20
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
21
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
22
|
-
* See the License for the specific language governing permissions and
|
|
23
|
-
* limitations under the License.
|
|
24
|
-
*/
|
|
25
|
-
|
|
26
|
-
bool
|
|
27
|
-
utf8_validate (const utf8_t *data, size_t len) {
|
|
28
|
-
uint64_t pos = 0;
|
|
29
|
-
uint8_t word;
|
|
30
|
-
|
|
31
|
-
while (pos < len) {
|
|
32
|
-
uint64_t next_pos = pos + 16;
|
|
33
|
-
if (next_pos <= len) {
|
|
34
|
-
uint64_t v1;
|
|
35
|
-
memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
36
|
-
uint64_t v2;
|
|
37
|
-
memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
38
|
-
uint64_t v = v1 | v2;
|
|
39
|
-
if ((v & 0x8080808080808080) == 0) {
|
|
40
|
-
pos = next_pos;
|
|
41
|
-
continue;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
word = data[pos];
|
|
45
|
-
while (word < 0b10000000) {
|
|
46
|
-
if (++pos == len) return true;
|
|
47
|
-
word = data[pos];
|
|
48
|
-
}
|
|
49
|
-
if ((word & 0b11100000) == 0b11000000) {
|
|
50
|
-
next_pos = pos + 2;
|
|
51
|
-
if (next_pos > len) {
|
|
52
|
-
return false;
|
|
53
|
-
}
|
|
54
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
uint32_t code_point = ((word & 0b00011111) << 6) |
|
|
58
|
-
(data[pos + 1] & 0b00111111);
|
|
59
|
-
if ((code_point < 0x80) || (0x7ff < code_point)) {
|
|
60
|
-
return false;
|
|
61
|
-
}
|
|
62
|
-
} else if ((word & 0b11110000) == 0b11100000) {
|
|
63
|
-
next_pos = pos + 3;
|
|
64
|
-
if (next_pos > len) {
|
|
65
|
-
return false;
|
|
66
|
-
}
|
|
67
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
68
|
-
return false;
|
|
69
|
-
}
|
|
70
|
-
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
71
|
-
return false;
|
|
72
|
-
}
|
|
73
|
-
uint32_t code_point = ((word & 0b00001111) << 12) |
|
|
74
|
-
((data[pos + 1] & 0b00111111) << 6) |
|
|
75
|
-
(data[pos + 2] & 0b00111111);
|
|
76
|
-
if ((code_point < 0x800) || (0xffff < code_point) || (0xd7ff < code_point && code_point < 0xe000)) {
|
|
77
|
-
return false;
|
|
78
|
-
}
|
|
79
|
-
} else if ((word & 0b11111000) == 0b11110000) {
|
|
80
|
-
next_pos = pos + 4;
|
|
81
|
-
if (next_pos > len) {
|
|
82
|
-
return false;
|
|
83
|
-
}
|
|
84
|
-
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
85
|
-
return false;
|
|
86
|
-
}
|
|
87
|
-
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
88
|
-
return false;
|
|
89
|
-
}
|
|
90
|
-
if ((data[pos + 3] & 0b11000000) != 0b10000000) {
|
|
91
|
-
return false;
|
|
92
|
-
}
|
|
93
|
-
uint32_t code_point = ((word & 0b00000111) << 18) |
|
|
94
|
-
((data[pos + 1] & 0b00111111) << 12) |
|
|
95
|
-
((data[pos + 2] & 0b00111111) << 6) |
|
|
96
|
-
(data[pos + 3] & 0b00111111);
|
|
97
|
-
if (code_point <= 0xffff || 0x10ffff < code_point) {
|
|
98
|
-
return false;
|
|
99
|
-
}
|
|
100
|
-
} else {
|
|
101
|
-
return false;
|
|
102
|
-
}
|
|
103
|
-
pos = next_pos;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return true;
|
|
107
|
-
}
|