ffi-hydrogen 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +30 -0
- data/.travis.yml +10 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +72 -0
- data/Rakefile +46 -0
- data/bench/both.rb +86 -0
- data/bench/encode.rb +57 -0
- data/bench/encrypt.rb +80 -0
- data/bench/init.rb +5 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ffi-hydrogen.gemspec +31 -0
- data/lib/ffi/hydrogen.rb +216 -0
- data/vendor/.clang-format +2 -0
- data/vendor/.gitignore +3 -0
- data/vendor/README.md +2 -0
- data/vendor/libhydrogen/.clang-format +95 -0
- data/vendor/libhydrogen/.gitignore +32 -0
- data/vendor/libhydrogen/.travis.yml +22 -0
- data/vendor/libhydrogen/LICENSE +18 -0
- data/vendor/libhydrogen/Makefile +61 -0
- data/vendor/libhydrogen/Makefile.arduino +51 -0
- data/vendor/libhydrogen/README.md +29 -0
- data/vendor/libhydrogen/hydrogen.c +18 -0
- data/vendor/libhydrogen/hydrogen.h +317 -0
- data/vendor/libhydrogen/impl/common.h +316 -0
- data/vendor/libhydrogen/impl/core.h +220 -0
- data/vendor/libhydrogen/impl/gimli-core/portable.h +39 -0
- data/vendor/libhydrogen/impl/gimli-core/sse2.h +97 -0
- data/vendor/libhydrogen/impl/gimli-core.h +25 -0
- data/vendor/libhydrogen/impl/hash.h +138 -0
- data/vendor/libhydrogen/impl/hydrogen_p.h +83 -0
- data/vendor/libhydrogen/impl/kdf.h +20 -0
- data/vendor/libhydrogen/impl/kx.h +441 -0
- data/vendor/libhydrogen/impl/pwhash.h +281 -0
- data/vendor/libhydrogen/impl/random.h +376 -0
- data/vendor/libhydrogen/impl/secretbox.h +236 -0
- data/vendor/libhydrogen/impl/sign.h +207 -0
- data/vendor/libhydrogen/impl/x25519.h +383 -0
- data/vendor/libhydrogen/library.properties +10 -0
- data/vendor/libhydrogen/logo.png +0 -0
- data/vendor/libhydrogen/tests/tests.c +431 -0
- data/vendor/main.c +140 -0
- data/vendor/stringencoders/.gitignore +25 -0
- data/vendor/stringencoders/.travis.yml +13 -0
- data/vendor/stringencoders/AUTHORS +1 -0
- data/vendor/stringencoders/COPYING +2 -0
- data/vendor/stringencoders/ChangeLog +170 -0
- data/vendor/stringencoders/Doxyfile +276 -0
- data/vendor/stringencoders/INSTALL +119 -0
- data/vendor/stringencoders/LICENSE +22 -0
- data/vendor/stringencoders/Makefile.am +3 -0
- data/vendor/stringencoders/NEWS +3 -0
- data/vendor/stringencoders/README +2 -0
- data/vendor/stringencoders/README.md +32 -0
- data/vendor/stringencoders/bootstrap.sh +3 -0
- data/vendor/stringencoders/configure-gcc-hardened.sh +16 -0
- data/vendor/stringencoders/configure.ac +44 -0
- data/vendor/stringencoders/doxy/footer.html +34 -0
- data/vendor/stringencoders/doxy/header.html +85 -0
- data/vendor/stringencoders/indent.sh +9 -0
- data/vendor/stringencoders/javascript/base64-speed.html +43 -0
- data/vendor/stringencoders/javascript/base64-test.html +209 -0
- data/vendor/stringencoders/javascript/base64.html +18 -0
- data/vendor/stringencoders/javascript/base64.js +176 -0
- data/vendor/stringencoders/javascript/qunit.css +119 -0
- data/vendor/stringencoders/javascript/qunit.js +1062 -0
- data/vendor/stringencoders/javascript/urlparse-test.html +367 -0
- data/vendor/stringencoders/javascript/urlparse.js +328 -0
- data/vendor/stringencoders/make-ci.sh +13 -0
- data/vendor/stringencoders/makerelease.sh +16 -0
- data/vendor/stringencoders/python/b85.py +176 -0
- data/vendor/stringencoders/src/Makefile.am +134 -0
- data/vendor/stringencoders/src/arraytoc.c +85 -0
- data/vendor/stringencoders/src/arraytoc.h +43 -0
- data/vendor/stringencoders/src/extern_c_begin.h +3 -0
- data/vendor/stringencoders/src/extern_c_end.h +3 -0
- data/vendor/stringencoders/src/html_named_entities_generator.py +203 -0
- data/vendor/stringencoders/src/modp_ascii.c +159 -0
- data/vendor/stringencoders/src/modp_ascii.h +162 -0
- data/vendor/stringencoders/src/modp_ascii_data.h +84 -0
- data/vendor/stringencoders/src/modp_ascii_gen.c +55 -0
- data/vendor/stringencoders/src/modp_b16.c +125 -0
- data/vendor/stringencoders/src/modp_b16.h +148 -0
- data/vendor/stringencoders/src/modp_b16_data.h +104 -0
- data/vendor/stringencoders/src/modp_b16_gen.c +65 -0
- data/vendor/stringencoders/src/modp_b2.c +69 -0
- data/vendor/stringencoders/src/modp_b2.h +130 -0
- data/vendor/stringencoders/src/modp_b2_data.h +44 -0
- data/vendor/stringencoders/src/modp_b2_gen.c +36 -0
- data/vendor/stringencoders/src/modp_b36.c +108 -0
- data/vendor/stringencoders/src/modp_b36.h +170 -0
- data/vendor/stringencoders/src/modp_b64.c +254 -0
- data/vendor/stringencoders/src/modp_b64.h +236 -0
- data/vendor/stringencoders/src/modp_b64_data.h +477 -0
- data/vendor/stringencoders/src/modp_b64_gen.c +168 -0
- data/vendor/stringencoders/src/modp_b64r.c +254 -0
- data/vendor/stringencoders/src/modp_b64r.h +242 -0
- data/vendor/stringencoders/src/modp_b64r_data.h +477 -0
- data/vendor/stringencoders/src/modp_b64w.c +254 -0
- data/vendor/stringencoders/src/modp_b64w.h +231 -0
- data/vendor/stringencoders/src/modp_b64w_data.h +477 -0
- data/vendor/stringencoders/src/modp_b85.c +109 -0
- data/vendor/stringencoders/src/modp_b85.h +171 -0
- data/vendor/stringencoders/src/modp_b85_data.h +36 -0
- data/vendor/stringencoders/src/modp_b85_gen.c +65 -0
- data/vendor/stringencoders/src/modp_bjavascript.c +65 -0
- data/vendor/stringencoders/src/modp_bjavascript.h +105 -0
- data/vendor/stringencoders/src/modp_bjavascript_data.h +84 -0
- data/vendor/stringencoders/src/modp_bjavascript_gen.c +58 -0
- data/vendor/stringencoders/src/modp_burl.c +228 -0
- data/vendor/stringencoders/src/modp_burl.h +259 -0
- data/vendor/stringencoders/src/modp_burl_data.h +136 -0
- data/vendor/stringencoders/src/modp_burl_gen.c +121 -0
- data/vendor/stringencoders/src/modp_html.c +128 -0
- data/vendor/stringencoders/src/modp_html.h +53 -0
- data/vendor/stringencoders/src/modp_html_named_entities.h +9910 -0
- data/vendor/stringencoders/src/modp_json.c +315 -0
- data/vendor/stringencoders/src/modp_json.h +103 -0
- data/vendor/stringencoders/src/modp_json_data.h +57 -0
- data/vendor/stringencoders/src/modp_json_gen.py +60 -0
- data/vendor/stringencoders/src/modp_mainpage.h +120 -0
- data/vendor/stringencoders/src/modp_numtoa.c +350 -0
- data/vendor/stringencoders/src/modp_numtoa.h +100 -0
- data/vendor/stringencoders/src/modp_qsiter.c +76 -0
- data/vendor/stringencoders/src/modp_qsiter.h +71 -0
- data/vendor/stringencoders/src/modp_stdint.h +43 -0
- data/vendor/stringencoders/src/modp_utf8.c +88 -0
- data/vendor/stringencoders/src/modp_utf8.h +38 -0
- data/vendor/stringencoders/src/modp_xml.c +311 -0
- data/vendor/stringencoders/src/modp_xml.h +166 -0
- data/vendor/stringencoders/src/stringencoders.pc +10 -0
- data/vendor/stringencoders/src/stringencoders.pc.in +10 -0
- data/vendor/stringencoders/test/Makefile.am +113 -0
- data/vendor/stringencoders/test/apr_base64.c +262 -0
- data/vendor/stringencoders/test/apr_base64.h +120 -0
- data/vendor/stringencoders/test/cxx_test.cc +482 -0
- data/vendor/stringencoders/test/minunit.h +82 -0
- data/vendor/stringencoders/test/modp_ascii_test.c +281 -0
- data/vendor/stringencoders/test/modp_b16_test.c +288 -0
- data/vendor/stringencoders/test/modp_b2_test.c +250 -0
- data/vendor/stringencoders/test/modp_b64_test.c +266 -0
- data/vendor/stringencoders/test/modp_b85_test.c +130 -0
- data/vendor/stringencoders/test/modp_bjavascript_test.c +137 -0
- data/vendor/stringencoders/test/modp_burl_test.c +423 -0
- data/vendor/stringencoders/test/modp_html_test.c +296 -0
- data/vendor/stringencoders/test/modp_json_test.c +336 -0
- data/vendor/stringencoders/test/modp_numtoa_test.c +545 -0
- data/vendor/stringencoders/test/modp_qsiter_test.c +280 -0
- data/vendor/stringencoders/test/modp_utf8_test.c +188 -0
- data/vendor/stringencoders/test/modp_xml_test.c +339 -0
- data/vendor/stringencoders/test/speedtest.c +241 -0
- data/vendor/stringencoders/test/speedtest_ascii.c +345 -0
- data/vendor/stringencoders/test/speedtest_msg.c +78 -0
- data/vendor/stringencoders/test/speedtest_numtoa.c +276 -0
- metadata +314 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
/**
|
3
|
+
* \file modp_utf8.c
|
4
|
+
* <PRE>
|
5
|
+
* MODP_UTF8 - UTF8 string utilities
|
6
|
+
* https://github.com/client9/stringencoders/
|
7
|
+
*
|
8
|
+
* Copyright 2013-2016 Nick Galbreath -- nickg [at] client9 [dot] com
|
9
|
+
* All rights reserved.
|
10
|
+
* Released under MIT license. See LICENSE for details.
|
11
|
+
* </PRE>
|
12
|
+
*/
|
13
|
+
|
14
|
+
#include "config.h"
|
15
|
+
#include "modp_utf8.h"
|
16
|
+
#include <string.h>
|
17
|
+
|
18
|
+
int modp_utf8_validate(const char* src_orig, size_t len)
|
19
|
+
{
|
20
|
+
const uint8_t* src = (const uint8_t*)src_orig;
|
21
|
+
const uint8_t* srcend = src + len;
|
22
|
+
uint8_t c, c1, c2, c3;
|
23
|
+
int d;
|
24
|
+
while (src < srcend) {
|
25
|
+
c = *src;
|
26
|
+
if (c < 0x80) {
|
27
|
+
src += 1;
|
28
|
+
} else if (c < 0xE0) {
|
29
|
+
/* c starts with 110 */
|
30
|
+
if (srcend - src < 2) {
|
31
|
+
return MODP_UTF8_SHORT;
|
32
|
+
}
|
33
|
+
c1 = *(src + 1);
|
34
|
+
if ((c1 & 0xC0) != 0x80) {
|
35
|
+
return MODP_UTF8_INVALID;
|
36
|
+
}
|
37
|
+
d = ((c & 0x1F) << 6) | (c1 & 0x3F);
|
38
|
+
if (d < 0x80) {
|
39
|
+
return MODP_UTF8_OVERLONG;
|
40
|
+
}
|
41
|
+
src += 2;
|
42
|
+
} else if (c < 0xF0) {
|
43
|
+
if (srcend - src < 3) {
|
44
|
+
return MODP_UTF8_SHORT;
|
45
|
+
}
|
46
|
+
c1 = *(src + 1);
|
47
|
+
c2 = *(src + 2);
|
48
|
+
if ((c1 & 0xC0) != 0x80) {
|
49
|
+
return MODP_UTF8_INVALID;
|
50
|
+
}
|
51
|
+
if ((c2 & 0xC0) != 0x80) {
|
52
|
+
return MODP_UTF8_INVALID;
|
53
|
+
}
|
54
|
+
d = ((c & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F);
|
55
|
+
if (d < 0x0800) {
|
56
|
+
return MODP_UTF8_OVERLONG;
|
57
|
+
}
|
58
|
+
if (d >= 0xD800 && d <= 0xDFFF) {
|
59
|
+
return MODP_UTF8_CODEPOINT;
|
60
|
+
}
|
61
|
+
src += 3;
|
62
|
+
} else if (c < 0xF8) {
|
63
|
+
if (srcend - src < 4) {
|
64
|
+
return MODP_UTF8_SHORT;
|
65
|
+
}
|
66
|
+
c1 = *(src + 1);
|
67
|
+
c2 = *(src + 2);
|
68
|
+
c3 = *(src + 3);
|
69
|
+
if ((c1 & 0xC0) != 0x80) {
|
70
|
+
return MODP_UTF8_INVALID;
|
71
|
+
}
|
72
|
+
if ((c2 & 0xC0) != 0x80) {
|
73
|
+
return MODP_UTF8_INVALID;
|
74
|
+
}
|
75
|
+
if ((c3 & 0xC0) != 0x80) {
|
76
|
+
return MODP_UTF8_INVALID;
|
77
|
+
}
|
78
|
+
d = ((c & 0x07) << 18) | ((c1 & 0x3F) << 12) | ((c2 & 0x3F) < 6) | (c3 & 0x3F);
|
79
|
+
if (d < 0x010000) {
|
80
|
+
return MODP_UTF8_OVERLONG;
|
81
|
+
}
|
82
|
+
src += 4;
|
83
|
+
} else {
|
84
|
+
return MODP_UTF8_CODEPOINT;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
return MODP_UTF8_OK;
|
88
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
/**
|
2
|
+
* \file modp_utf.h
|
3
|
+
* \brief Various UTF8 utilities
|
4
|
+
*
|
5
|
+
*/
|
6
|
+
|
7
|
+
/*
|
8
|
+
* <PRE>
|
9
|
+
* MODP_UTF8 -- UTF-8 utilities
|
10
|
+
* https://github.com/client9/stringencoders
|
11
|
+
*
|
12
|
+
* Copyright © 2013-2016, Nick Galbreath
|
13
|
+
* All rights reserved.
|
14
|
+
*
|
15
|
+
* Released under MIT license. See LICENSE for details.
|
16
|
+
* </PRE>
|
17
|
+
*
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef COM_MODP_UTF8
|
21
|
+
#define COM_MODP_UTF8
|
22
|
+
|
23
|
+
#include "extern_c_begin.h"
|
24
|
+
#include "modp_stdint.h"
|
25
|
+
|
26
|
+
#define MODP_UTF8_OK 0
|
27
|
+
#define MODP_UTF8_SHORT 1
|
28
|
+
#define MODP_UTF8_INVALID 2
|
29
|
+
#define MODP_UTF8_OVERLONG 3
|
30
|
+
#define MODP_UTF8_CODEPOINT 4
|
31
|
+
/**
|
32
|
+
* Validate a UTF-8 string.
|
33
|
+
* checks for blah blah blah
|
34
|
+
*/
|
35
|
+
int modp_utf8_validate(const char* src, size_t len);
|
36
|
+
|
37
|
+
#include "extern_c_end.h"
|
38
|
+
#endif /* ifndef modp_utf8 */
|
@@ -0,0 +1,311 @@
|
|
1
|
+
/*
|
2
|
+
* <pre>
|
3
|
+
* modp_xml xml decoders
|
4
|
+
* https://github.com/client9/stringencoders
|
5
|
+
*
|
6
|
+
* Copyright © 2012-2016 Nick Galbreath
|
7
|
+
* All rights reserved.
|
8
|
+
* Released under MIT license. See LICENSE for details.
|
9
|
+
* </pre>
|
10
|
+
*/
|
11
|
+
#include "modp_xml.h"
|
12
|
+
|
13
|
+
static const int gsHexDecodeMap[256] = {
|
14
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
15
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
16
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
17
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
18
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256,
|
19
|
+
256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256,
|
20
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
21
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
22
|
+
256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256,
|
23
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
24
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
25
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
26
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
27
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
28
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
29
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
30
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
31
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
32
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
33
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
34
|
+
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
|
35
|
+
256, 256, 256, 256
|
36
|
+
};
|
37
|
+
|
38
|
+
size_t modp_xml_unicode_char_to_utf8(char* dest, int uval)
|
39
|
+
{
|
40
|
+
if (uval <= 0x7F) {
|
41
|
+
dest[0] = (char)uval;
|
42
|
+
return 1;
|
43
|
+
}
|
44
|
+
if (uval <= 0x7FF) {
|
45
|
+
dest[0] = (char)((uval >> 6) + 0xC0);
|
46
|
+
dest[1] = (char)((uval & 63) + 0x80);
|
47
|
+
return 2;
|
48
|
+
}
|
49
|
+
if (uval <= 0xFFFF) {
|
50
|
+
dest[0] = (char)((uval >> 12) + 224);
|
51
|
+
dest[1] = (char)(((uval >> 6) & 63) + 128);
|
52
|
+
dest[2] = (char)((uval & 63) + 128);
|
53
|
+
return 3;
|
54
|
+
}
|
55
|
+
if (uval <= 0x1FFFFF) {
|
56
|
+
dest[0] = (char)((uval >> 18) + 240);
|
57
|
+
dest[1] = (char)(((uval >> 12) & 63) + 128);
|
58
|
+
dest[2] = (char)(((uval >> 6) & 63) + 128);
|
59
|
+
dest[3] = (char)((uval & 63) + 128);
|
60
|
+
return 4;
|
61
|
+
}
|
62
|
+
return 0;
|
63
|
+
}
|
64
|
+
|
65
|
+
/**
|
66
|
+
* Returns -1 if unicode code point is invalid for HTML (undefined or a
|
67
|
+
* non-whitespace control char).
|
68
|
+
*
|
69
|
+
* Exposed for testing
|
70
|
+
*
|
71
|
+
* http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#character-references
|
72
|
+
* Retrieved 20120811:
|
73
|
+
|
74
|
+
* The numeric character reference forms described above are allowed
|
75
|
+
* to reference any Unicode code point other than U+0000, U+000D,
|
76
|
+
* permanently undefined Unicode characters (noncharacters), and
|
77
|
+
* control characters other than space characters.
|
78
|
+
*/
|
79
|
+
int modp_xml_validate_unicode(int val)
|
80
|
+
{
|
81
|
+
static const int ranges[] = {
|
82
|
+
0x0000, 0x0008, /* control characters */
|
83
|
+
0x000B, 0x000B, /* Vertical Tab is forbidden, ?? */
|
84
|
+
0x000E, 0x001F, /* control characters */
|
85
|
+
0x0080, 0x009F, /* control characters */
|
86
|
+
0x0750, 0x077D, /* the rest are undefined */
|
87
|
+
0x07C0, 0x08FF,
|
88
|
+
0x1380, 0x139F,
|
89
|
+
0x18B0, 0x18FF,
|
90
|
+
0x1980, 0x19DF,
|
91
|
+
0x1A00, 0x1CFF,
|
92
|
+
0x1D80, 0x1DFF,
|
93
|
+
0x2C00, 0x2E7F,
|
94
|
+
0x2FE0, 0x2FEF,
|
95
|
+
0x31C0, 0x31EF,
|
96
|
+
0x9FB0, 0x9FFF,
|
97
|
+
0xA4D0, 0xABFF,
|
98
|
+
0xD7B0, 0xD7FF,
|
99
|
+
0xFE10, 0xFE1F,
|
100
|
+
0x10140, 0x102FF,
|
101
|
+
0x104B0, 0x107FF,
|
102
|
+
0x1D200, 0x1D2FF,
|
103
|
+
0x1D360, 0x1D3FF,
|
104
|
+
0x1D800, 0x1FFFF,
|
105
|
+
0x2A6E0, 0x2F7FF,
|
106
|
+
0x2FAB0, 0xDFFFF,
|
107
|
+
0xE0080, 0xE00FF,
|
108
|
+
0xE01F0, 0xEFFFF,
|
109
|
+
0xFFFFE, 0xFFFFF
|
110
|
+
};
|
111
|
+
|
112
|
+
static const int imax = sizeof(ranges) / sizeof(uint32_t);
|
113
|
+
|
114
|
+
int i;
|
115
|
+
for (i = 0; i < imax; i += 2) {
|
116
|
+
if (val >= ranges[i]) {
|
117
|
+
if (val <= ranges[i + 1]) {
|
118
|
+
return -1;
|
119
|
+
}
|
120
|
+
} else {
|
121
|
+
return val;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
return -1;
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Exposed for testing
|
129
|
+
*/
|
130
|
+
|
131
|
+
int modp_xml_parse_dec_entity(const char* s, size_t len)
|
132
|
+
{
|
133
|
+
int val = 0;
|
134
|
+
size_t i;
|
135
|
+
for (i = 0; i < len; ++i) {
|
136
|
+
int d = gsHexDecodeMap[(uint32_t)s[i]];
|
137
|
+
if (d > 9) {
|
138
|
+
return -1;
|
139
|
+
}
|
140
|
+
val = (val * 10) + d;
|
141
|
+
if (val > 0x1000FF) {
|
142
|
+
return -1;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
return modp_xml_validate_unicode(val);
|
146
|
+
}
|
147
|
+
|
148
|
+
/**
|
149
|
+
* parses
|
150
|
+
* Exposed for testing
|
151
|
+
*/
|
152
|
+
int modp_xml_parse_hex_entity(const char* s, size_t len)
|
153
|
+
{
|
154
|
+
int val = 0;
|
155
|
+
size_t i;
|
156
|
+
for (i = 0; i < len; ++i) {
|
157
|
+
int d = gsHexDecodeMap[(uint32_t)s[i]];
|
158
|
+
if (d == 256) {
|
159
|
+
return -1;
|
160
|
+
}
|
161
|
+
val = (val * 16) + d;
|
162
|
+
if (val > 0x1000FF) {
|
163
|
+
return -1;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
return modp_xml_validate_unicode(val);
|
167
|
+
}
|
168
|
+
|
169
|
+
size_t modp_xml_decode(char* dest, const char* s, size_t len)
|
170
|
+
{
|
171
|
+
const uint8_t* src = (const uint8_t*)s;
|
172
|
+
const char* deststart = dest;
|
173
|
+
const uint8_t* srcend = (const uint8_t*)(src + len);
|
174
|
+
int unichar;
|
175
|
+
|
176
|
+
while (src < srcend) {
|
177
|
+
if (*src != '&') {
|
178
|
+
*dest++ = (char)*src++;
|
179
|
+
continue;
|
180
|
+
}
|
181
|
+
|
182
|
+
const uint8_t* pos = (const uint8_t*)memchr(src + 1, ';',
|
183
|
+
(size_t)(srcend - src - 1));
|
184
|
+
if (pos == NULL) {
|
185
|
+
/* if not found, just copy */
|
186
|
+
*dest++ = (char)*src++;
|
187
|
+
continue;
|
188
|
+
}
|
189
|
+
size_t elen = (size_t)(pos - src);
|
190
|
+
if (*(src + 1) == '#') {
|
191
|
+
if (*(src + 2) == 'x' || *(src + 2) == 'X') {
|
192
|
+
unichar = modp_xml_parse_hex_entity((const char*)(src + 3), elen - 3);
|
193
|
+
} else {
|
194
|
+
|
195
|
+
unichar = modp_xml_parse_dec_entity((const char*)(src + 2), elen - 2);
|
196
|
+
}
|
197
|
+
if (unichar == 0) {
|
198
|
+
*dest++ = (char)*src++;
|
199
|
+
} else {
|
200
|
+
dest += modp_xml_unicode_char_to_utf8(dest, unichar);
|
201
|
+
src = pos + 1;
|
202
|
+
}
|
203
|
+
} else if (elen == 5 && src[1] == 'q' && src[2] == 'u' && src[3] == 'o' && src[4] == 't') {
|
204
|
+
*dest++ = '"';
|
205
|
+
src = pos + 1;
|
206
|
+
} else if (elen == 5 && src[1] == 'a' && src[2] == 'p' && src[3] == 'o' && src[4] == 's') {
|
207
|
+
*dest++ = '\'';
|
208
|
+
src = pos + 1;
|
209
|
+
} else if (elen == 4 && src[1] == 'a' && src[2] == 'm' && src[3] == 'p') {
|
210
|
+
*dest++ = '&';
|
211
|
+
src = pos + 1;
|
212
|
+
} else if (elen == 3 && src[1] == 'l' && src[2] == 't') {
|
213
|
+
*dest++ = '<';
|
214
|
+
src = pos + 1;
|
215
|
+
} else if (elen == 3 && src[1] == 'g' && src[2] == 't') {
|
216
|
+
*dest++ = '>';
|
217
|
+
src = pos + 1;
|
218
|
+
} else {
|
219
|
+
/* if not found, just copy */
|
220
|
+
*dest++ = (char)*src++;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
*dest = '\0';
|
225
|
+
return (size_t)(dest - deststart); /* compute "strlen" of dest. */
|
226
|
+
}
|
227
|
+
|
228
|
+
size_t modp_xml_encode(char* dest, const char* src, size_t len)
|
229
|
+
{
|
230
|
+
size_t count = 0;
|
231
|
+
const char* srcend = src + len;
|
232
|
+
char ch;
|
233
|
+
while (src < srcend) {
|
234
|
+
ch = *src++;
|
235
|
+
switch (ch) {
|
236
|
+
case '&':
|
237
|
+
*dest++ = '&';
|
238
|
+
*dest++ = 'a';
|
239
|
+
*dest++ = 'm';
|
240
|
+
*dest++ = 'p';
|
241
|
+
*dest++ = ';';
|
242
|
+
count += 5; /* & */
|
243
|
+
break;
|
244
|
+
case '<':
|
245
|
+
*dest++ = '&';
|
246
|
+
*dest++ = 'l';
|
247
|
+
*dest++ = 't';
|
248
|
+
*dest++ = ';';
|
249
|
+
count += 4; /* < */
|
250
|
+
break;
|
251
|
+
case '>':
|
252
|
+
*dest++ = '&';
|
253
|
+
*dest++ = 'g';
|
254
|
+
*dest++ = 't';
|
255
|
+
*dest++ = ';';
|
256
|
+
count += 4; /* > */
|
257
|
+
break;
|
258
|
+
case '\'':
|
259
|
+
*dest++ = '&';
|
260
|
+
*dest++ = 'q';
|
261
|
+
*dest++ = 'u';
|
262
|
+
*dest++ = 'o';
|
263
|
+
*dest++ = 't';
|
264
|
+
*dest++ = ';';
|
265
|
+
count += 6; /* " */
|
266
|
+
break;
|
267
|
+
case '\"':
|
268
|
+
*dest++ = '&';
|
269
|
+
*dest++ = 'a';
|
270
|
+
*dest++ = 'p';
|
271
|
+
*dest++ = 'o';
|
272
|
+
*dest++ = 's';
|
273
|
+
*dest++ = ';';
|
274
|
+
count += 6; /* ' */
|
275
|
+
break;
|
276
|
+
default:
|
277
|
+
*dest++ = ch;
|
278
|
+
count += 1;
|
279
|
+
}
|
280
|
+
}
|
281
|
+
*dest = '\0';
|
282
|
+
return count;
|
283
|
+
}
|
284
|
+
|
285
|
+
size_t modp_xml_min_encode_strlen(const char* src, const size_t len)
|
286
|
+
{
|
287
|
+
size_t count = 0;
|
288
|
+
const char* srcend = src + len;
|
289
|
+
while (src < srcend) {
|
290
|
+
switch (*src++) {
|
291
|
+
case '&':
|
292
|
+
count += 5; /* & */
|
293
|
+
break;
|
294
|
+
case '<':
|
295
|
+
count += 4; /* < */
|
296
|
+
break;
|
297
|
+
case '>':
|
298
|
+
count += 4; /* > */
|
299
|
+
break;
|
300
|
+
case '\'':
|
301
|
+
count += 6; /* " */
|
302
|
+
break;
|
303
|
+
case '\"':
|
304
|
+
count += 6; /* ' */
|
305
|
+
break;
|
306
|
+
default:
|
307
|
+
count += 1;
|
308
|
+
}
|
309
|
+
}
|
310
|
+
return count;
|
311
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
/**
|
2
|
+
* \file modp_xml.h
|
3
|
+
* \brief Experimental XML/HTML decoder
|
4
|
+
*
|
5
|
+
* This is mostly experimental.
|
6
|
+
*/
|
7
|
+
|
8
|
+
/*
|
9
|
+
* <PRE>
|
10
|
+
* High Performance XML Decoder (for now)
|
11
|
+
*
|
12
|
+
* Copyright © 2012-2016 Nick Galbreath
|
13
|
+
* All rights reserved.
|
14
|
+
*
|
15
|
+
* Released under MIT license. See LICENSE fro details.
|
16
|
+
*
|
17
|
+
* https://github.com/client9/stringencoders
|
18
|
+
*
|
19
|
+
* </PRE>
|
20
|
+
*/
|
21
|
+
|
22
|
+
#ifndef COM_MODP_STRINGENCODERS_XML
|
23
|
+
#define COM_MODP_STRINGENCODERS_XML
|
24
|
+
|
25
|
+
#include "modp_stdint.h"
|
26
|
+
|
27
|
+
#ifdef __cplusplus
|
28
|
+
#define BEGIN_C extern "C" {
|
29
|
+
#define END_C }
|
30
|
+
#else
|
31
|
+
#define BEGIN_C
|
32
|
+
#define END_C
|
33
|
+
#endif
|
34
|
+
|
35
|
+
BEGIN_C
|
36
|
+
|
37
|
+
/**
|
38
|
+
* \brief Validates a unicode code point is valid for HTML (undefined
|
39
|
+
* or non-white-space control char)
|
40
|
+
*
|
41
|
+
* \param[in] val a unicode char expressed as a uint32_t
|
42
|
+
* \return 0 if invalid, else returns passes back the input value.
|
43
|
+
*
|
44
|
+
* See http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#character-references for more details
|
45
|
+
*
|
46
|
+
* This is only exposed for testing. It is not designed for public use.
|
47
|
+
*/
|
48
|
+
int modp_xml_validate_unicode(int val);
|
49
|
+
|
50
|
+
/**
|
51
|
+
* \brief converts a unicode char expressed as uint32_t into a UTF-8 byte sequence.
|
52
|
+
* \param[out] dest assumed to have at least 4 chars available in buffer.
|
53
|
+
* \param[in] uval A unicode character expressed as a uint32_t type
|
54
|
+
* \return 0 if input value is invalid or not a unicode character, else
|
55
|
+
* returns number of bytes written to dest.
|
56
|
+
*
|
57
|
+
* This is only exposed for testing. It is not designed for public use.
|
58
|
+
*/
|
59
|
+
size_t modp_xml_unicode_char_to_utf8(char* dest, int uval);
|
60
|
+
|
61
|
+
/**
|
62
|
+
* \brief parse a hex encoded entity between "&#x" and ";"
|
63
|
+
* \param[in] s a buffer pointing at the first char after "&$x"
|
64
|
+
* \param[in] len the length of string between "&#x" and ";"
|
65
|
+
* \return -1 if invalid, otherwise the unicode character value
|
66
|
+
*
|
67
|
+
* This is only exposed for testing. It is not designed for public use.
|
68
|
+
*/
|
69
|
+
int modp_xml_parse_hex_entity(const char* s, size_t len);
|
70
|
+
|
71
|
+
/**
|
72
|
+
* \brief parse a numerical decimal XML entity, eg. &x39;
|
73
|
+
*
|
74
|
+
* \param[in] s the buffer pointing to first char after '&#'.
|
75
|
+
* \param[in] len the length between '&#' and ';'. It is expected
|
76
|
+
* that all chars between are to be decimal digits.
|
77
|
+
* \return -1 if invalid, else the unicode numeric value
|
78
|
+
*
|
79
|
+
* Exposed for testing. Not designed to be useful for public consumption.
|
80
|
+
*/
|
81
|
+
int modp_xml_parse_dec_entity(const char* s, size_t len);
|
82
|
+
|
83
|
+
/**
|
84
|
+
* \brief XML decode a string
|
85
|
+
* \param[out] dest output string. Must
|
86
|
+
* \param[in] str The input string
|
87
|
+
* \param[in] len The length of the input string, excluding any
|
88
|
+
* final null byte.
|
89
|
+
* \return the final size of the output, excluding any ending null byte.
|
90
|
+
*
|
91
|
+
* Decode numerical entities (decimal or hexadecimal), and following named
|
92
|
+
* entities:
|
93
|
+
* * '
|
94
|
+
* * "
|
95
|
+
* * &
|
96
|
+
* * <
|
97
|
+
* * >
|
98
|
+
*
|
99
|
+
*/
|
100
|
+
size_t modp_xml_decode(char* dest, const char* str, size_t len);
|
101
|
+
|
102
|
+
/**
|
103
|
+
* \brief XML encode a UTF-8 string
|
104
|
+
* \param[out] dest output string.
|
105
|
+
* \param[in] str The input string
|
106
|
+
* \param[in] len The length of the input string, excluding any
|
107
|
+
* final null byte.
|
108
|
+
* \return the final size of the output, excluding any ending null byte.
|
109
|
+
* Encodes an assumed valid UTF-8 input and escapes
|
110
|
+
* * '
|
111
|
+
* * "
|
112
|
+
* * &
|
113
|
+
* * <
|
114
|
+
* * >
|
115
|
+
*/
|
116
|
+
size_t modp_xml_encode(char* dest, const char* str, size_t len);
|
117
|
+
|
118
|
+
size_t modp_xml_min_encode_strlen(const char* str, size_t len);
|
119
|
+
|
120
|
+
END_C
|
121
|
+
|
122
|
+
#ifdef __cplusplus
|
123
|
+
#include <cstring>
|
124
|
+
#include <string>
|
125
|
+
|
126
|
+
namespace modp {
|
127
|
+
|
128
|
+
/**
|
129
|
+
* Url decode a string.
|
130
|
+
* This function does not allocate memory.
|
131
|
+
*
|
132
|
+
* \param[in,out] s the string to be decoded
|
133
|
+
* \return a reference to the input string.
|
134
|
+
* There is no error case, bad characters are passed through
|
135
|
+
*/
|
136
|
+
inline std::string& xml_decode(std::string& s)
|
137
|
+
{
|
138
|
+
size_t d = modp_xml_decode(const_cast<char*>(s.data()), s.data(), s.size());
|
139
|
+
s.erase(d, std::string::npos);
|
140
|
+
return s;
|
141
|
+
}
|
142
|
+
|
143
|
+
inline std::string xml_decode(const char* str)
|
144
|
+
{
|
145
|
+
std::string s(str);
|
146
|
+
xml_decode(s);
|
147
|
+
return s;
|
148
|
+
}
|
149
|
+
|
150
|
+
inline std::string xml_decode(const char* str, size_t len)
|
151
|
+
{
|
152
|
+
std::string s(str, len);
|
153
|
+
xml_decode(s);
|
154
|
+
return s;
|
155
|
+
}
|
156
|
+
|
157
|
+
inline std::string xml_decode(const std::string& s)
|
158
|
+
{
|
159
|
+
std::string x(s);
|
160
|
+
xml_decode(x);
|
161
|
+
return x;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
#endif
|
165
|
+
|
166
|
+
#endif
|
@@ -0,0 +1,10 @@
|
|
1
|
+
prefix=/usr/local
|
2
|
+
exec_prefix=${prefix}
|
3
|
+
libdir=${exec_prefix}/lib
|
4
|
+
includedir=${prefix}/include
|
5
|
+
|
6
|
+
Name: stringencoders
|
7
|
+
Description: collection of high performance c-string transformations
|
8
|
+
Version: v3.10.3
|
9
|
+
Libs: -L${libdir} -lmodpbase64
|
10
|
+
Cflags: -I${includedir}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
prefix=@prefix@
|
2
|
+
exec_prefix=@exec_prefix@
|
3
|
+
libdir=@libdir@
|
4
|
+
includedir=@includedir@
|
5
|
+
|
6
|
+
Name: stringencoders
|
7
|
+
Description: collection of high performance c-string transformations
|
8
|
+
Version: @PACKAGE_VERSION@
|
9
|
+
Libs: -L${libdir} -lmodpbase64
|
10
|
+
Cflags: -I${includedir}
|