escape_utils 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +43 -0
  3. data/.gitignore +0 -1
  4. data/CHANGELOG.md +23 -0
  5. data/Gemfile +15 -0
  6. data/README.md +48 -91
  7. data/Rakefile +4 -2
  8. data/benchmark/html_escape_once.rb +25 -0
  9. data/benchmark/javascript_escape.rb +1 -1
  10. data/benchmark/javascript_unescape.rb +1 -1
  11. data/benchmark/url_decode.rb +28 -0
  12. data/benchmark/url_encode.rb +37 -0
  13. data/benchmark/xml_escape.rb +7 -11
  14. data/bin/console +8 -0
  15. data/escape_utils.gemspec +1 -12
  16. data/ext/escape_utils/escape_utils.c +8 -115
  17. data/ext/escape_utils/houdini.h +3 -5
  18. data/ext/escape_utils/houdini_html_e.c +52 -24
  19. data/ext/escape_utils/houdini_js_e.c +15 -3
  20. data/ext/escape_utils/houdini_uri_e.c +7 -18
  21. data/ext/escape_utils/houdini_uri_u.c +5 -15
  22. data/ext/escape_utils/houdini_xml_e.c +15 -1
  23. data/lib/escape_utils/html/cgi.rb +10 -8
  24. data/lib/escape_utils/html/erb.rb +1 -10
  25. data/lib/escape_utils/html/haml.rb +1 -7
  26. data/lib/escape_utils/html/rack.rb +3 -3
  27. data/lib/escape_utils/html_safety.rb +13 -0
  28. data/lib/escape_utils/url/cgi.rb +0 -8
  29. data/lib/escape_utils/url/erb.rb +1 -1
  30. data/lib/escape_utils/url/rack.rb +0 -12
  31. data/lib/escape_utils/url/uri.rb +11 -7
  32. data/lib/escape_utils/version.rb +1 -1
  33. data/lib/escape_utils/xml/builder.rb +2 -2
  34. data/lib/escape_utils.rb +61 -9
  35. data/test/helper.rb +16 -3
  36. data/test/html/escape_test.rb +66 -42
  37. data/test/html/unescape_test.rb +3 -21
  38. data/test/html_safety_test.rb +1 -27
  39. data/test/javascript/escape_test.rb +53 -20
  40. data/test/javascript/unescape_test.rb +16 -18
  41. data/test/query/escape_test.rb +3 -21
  42. data/test/query/unescape_test.rb +5 -23
  43. data/test/uri/escape_test.rb +16 -18
  44. data/test/uri/unescape_test.rb +17 -19
  45. data/test/uri_component/escape_test.rb +15 -17
  46. data/test/uri_component/unescape_test.rb +17 -19
  47. data/test/url/escape_test.rb +3 -21
  48. data/test/url/unescape_test.rb +5 -23
  49. data/test/xml/escape_test.rb +15 -17
  50. metadata +14 -127
  51. data/.travis.yml +0 -7
  52. data/benchmark/html_escape.rb +0 -68
  53. data/benchmark/html_unescape.rb +0 -35
  54. data/benchmark/url_escape.rb +0 -56
  55. data/benchmark/url_unescape.rb +0 -50
  56. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,122 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- static inline void
9
- gh_buf_put_utf8(gh_buf *ob, int c)
10
- {
11
- unsigned char unichar[4];
12
-
13
- if (c < 0x80) {
14
- gh_buf_putc(ob, c);
15
- }
16
- else if (c < 0x800) {
17
- unichar[0] = 192 + (c / 64);
18
- unichar[1] = 128 + (c % 64);
19
- gh_buf_put(ob, unichar, 2);
20
- }
21
- else if (c - 0xd800u < 0x800) {
22
- gh_buf_putc(ob, '?');
23
- }
24
- else if (c < 0x10000) {
25
- unichar[0] = 224 + (c / 4096);
26
- unichar[1] = 128 + (c / 64) % 64;
27
- unichar[2] = 128 + (c % 64);
28
- gh_buf_put(ob, unichar, 3);
29
- }
30
- else if (c < 0x110000) {
31
- unichar[0] = 240 + (c / 262144);
32
- unichar[1] = 128 + (c / 4096) % 64;
33
- unichar[2] = 128 + (c / 64) % 64;
34
- unichar[3] = 128 + (c % 64);
35
- gh_buf_put(ob, unichar, 4);
36
- }
37
- else {
38
- gh_buf_putc(ob, '?');
39
- }
40
- }
41
-
42
- static size_t
43
- unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
- {
45
- size_t i = 0;
46
-
47
- if (size > 3 && src[0] == '#') {
48
- int codepoint = 0;
49
-
50
- if (_isdigit(src[1])) {
51
- for (i = 1; i < size && _isdigit(src[i]); ++i)
52
- codepoint = (codepoint * 10) + (src[i] - '0');
53
- }
54
-
55
- else if (src[1] == 'x' || src[1] == 'X') {
56
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
- }
59
-
60
- if (i < size && src[i] == ';' && codepoint) {
61
- gh_buf_put_utf8(ob, codepoint);
62
- return i + 1;
63
- }
64
- }
65
-
66
- else {
67
- if (size > MAX_WORD_LENGTH)
68
- size = MAX_WORD_LENGTH;
69
-
70
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
- if (src[i] == ' ')
72
- break;
73
-
74
- if (src[i] == ';') {
75
- const struct html_ent *entity = find_entity((char *)src, i);
76
-
77
- if (entity != NULL) {
78
- gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
- return i + 1;
80
- }
81
-
82
- break;
83
- }
84
- }
85
- }
86
-
87
- gh_buf_putc(ob, '&');
88
- return 0;
89
- }
90
-
91
- int
92
- houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
- {
94
- size_t i = 0, org;
95
-
96
- while (i < size) {
97
- org = i;
98
- while (i < size && src[i] != '&')
99
- i++;
100
-
101
- if (likely(i > org)) {
102
- if (unlikely(org == 0)) {
103
- if (i >= size)
104
- return 0;
105
-
106
- gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
- }
108
-
109
- gh_buf_put(ob, src + org, i - org);
110
- }
111
-
112
- /* escaping */
113
- if (i >= size)
114
- break;
115
-
116
- i++;
117
- i += unescape_ent(ob, src + i, size - i);
118
- }
119
-
120
- return 1;
121
- }
122
-