escape_utils 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +43 -0
  3. data/.gitignore +0 -1
  4. data/CHANGELOG.md +23 -0
  5. data/Gemfile +15 -0
  6. data/README.md +48 -91
  7. data/Rakefile +4 -2
  8. data/benchmark/html_escape_once.rb +25 -0
  9. data/benchmark/javascript_escape.rb +1 -1
  10. data/benchmark/javascript_unescape.rb +1 -1
  11. data/benchmark/url_decode.rb +28 -0
  12. data/benchmark/url_encode.rb +37 -0
  13. data/benchmark/xml_escape.rb +7 -11
  14. data/bin/console +8 -0
  15. data/escape_utils.gemspec +1 -12
  16. data/ext/escape_utils/escape_utils.c +8 -115
  17. data/ext/escape_utils/houdini.h +3 -5
  18. data/ext/escape_utils/houdini_html_e.c +52 -24
  19. data/ext/escape_utils/houdini_js_e.c +15 -3
  20. data/ext/escape_utils/houdini_uri_e.c +7 -18
  21. data/ext/escape_utils/houdini_uri_u.c +5 -15
  22. data/ext/escape_utils/houdini_xml_e.c +15 -1
  23. data/lib/escape_utils/html/cgi.rb +10 -8
  24. data/lib/escape_utils/html/erb.rb +1 -10
  25. data/lib/escape_utils/html/haml.rb +1 -7
  26. data/lib/escape_utils/html/rack.rb +3 -3
  27. data/lib/escape_utils/html_safety.rb +13 -0
  28. data/lib/escape_utils/url/cgi.rb +0 -8
  29. data/lib/escape_utils/url/erb.rb +1 -1
  30. data/lib/escape_utils/url/rack.rb +0 -12
  31. data/lib/escape_utils/url/uri.rb +11 -7
  32. data/lib/escape_utils/version.rb +1 -1
  33. data/lib/escape_utils/xml/builder.rb +2 -2
  34. data/lib/escape_utils.rb +61 -9
  35. data/test/helper.rb +16 -3
  36. data/test/html/escape_test.rb +66 -42
  37. data/test/html/unescape_test.rb +3 -21
  38. data/test/html_safety_test.rb +1 -27
  39. data/test/javascript/escape_test.rb +53 -20
  40. data/test/javascript/unescape_test.rb +16 -18
  41. data/test/query/escape_test.rb +3 -21
  42. data/test/query/unescape_test.rb +5 -23
  43. data/test/uri/escape_test.rb +16 -18
  44. data/test/uri/unescape_test.rb +17 -19
  45. data/test/uri_component/escape_test.rb +15 -17
  46. data/test/uri_component/unescape_test.rb +17 -19
  47. data/test/url/escape_test.rb +3 -21
  48. data/test/url/unescape_test.rb +5 -23
  49. data/test/xml/escape_test.rb +15 -17
  50. metadata +14 -127
  51. data/.travis.yml +0 -7
  52. data/benchmark/html_escape.rb +0 -68
  53. data/benchmark/html_unescape.rb +0 -35
  54. data/benchmark/url_escape.rb +0 -56
  55. data/benchmark/url_unescape.rb +0 -50
  56. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,122 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- static inline void
9
- gh_buf_put_utf8(gh_buf *ob, int c)
10
- {
11
- unsigned char unichar[4];
12
-
13
- if (c < 0x80) {
14
- gh_buf_putc(ob, c);
15
- }
16
- else if (c < 0x800) {
17
- unichar[0] = 192 + (c / 64);
18
- unichar[1] = 128 + (c % 64);
19
- gh_buf_put(ob, unichar, 2);
20
- }
21
- else if (c - 0xd800u < 0x800) {
22
- gh_buf_putc(ob, '?');
23
- }
24
- else if (c < 0x10000) {
25
- unichar[0] = 224 + (c / 4096);
26
- unichar[1] = 128 + (c / 64) % 64;
27
- unichar[2] = 128 + (c % 64);
28
- gh_buf_put(ob, unichar, 3);
29
- }
30
- else if (c < 0x110000) {
31
- unichar[0] = 240 + (c / 262144);
32
- unichar[1] = 128 + (c / 4096) % 64;
33
- unichar[2] = 128 + (c / 64) % 64;
34
- unichar[3] = 128 + (c % 64);
35
- gh_buf_put(ob, unichar, 4);
36
- }
37
- else {
38
- gh_buf_putc(ob, '?');
39
- }
40
- }
41
-
42
- static size_t
43
- unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
- {
45
- size_t i = 0;
46
-
47
- if (size > 3 && src[0] == '#') {
48
- int codepoint = 0;
49
-
50
- if (_isdigit(src[1])) {
51
- for (i = 1; i < size && _isdigit(src[i]); ++i)
52
- codepoint = (codepoint * 10) + (src[i] - '0');
53
- }
54
-
55
- else if (src[1] == 'x' || src[1] == 'X') {
56
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
- }
59
-
60
- if (i < size && src[i] == ';' && codepoint) {
61
- gh_buf_put_utf8(ob, codepoint);
62
- return i + 1;
63
- }
64
- }
65
-
66
- else {
67
- if (size > MAX_WORD_LENGTH)
68
- size = MAX_WORD_LENGTH;
69
-
70
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
- if (src[i] == ' ')
72
- break;
73
-
74
- if (src[i] == ';') {
75
- const struct html_ent *entity = find_entity((char *)src, i);
76
-
77
- if (entity != NULL) {
78
- gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
- return i + 1;
80
- }
81
-
82
- break;
83
- }
84
- }
85
- }
86
-
87
- gh_buf_putc(ob, '&');
88
- return 0;
89
- }
90
-
91
- int
92
- houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
- {
94
- size_t i = 0, org;
95
-
96
- while (i < size) {
97
- org = i;
98
- while (i < size && src[i] != '&')
99
- i++;
100
-
101
- if (likely(i > org)) {
102
- if (unlikely(org == 0)) {
103
- if (i >= size)
104
- return 0;
105
-
106
- gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
- }
108
-
109
- gh_buf_put(ob, src + org, i - org);
110
- }
111
-
112
- /* escaping */
113
- if (i >= size)
114
- break;
115
-
116
- i++;
117
- i += unescape_ent(ob, src + i, size - i);
118
- }
119
-
120
- return 1;
121
- }
122
-