berns 3.0.6 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/berns/extconf.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
+ $CFLAGS = '-O3 -msse4' # rubocop:disable Style/GlobalVars
5
+
6
+ dir_config('berns')
4
7
  create_header
5
8
  create_makefile 'berns/berns'
@@ -0,0 +1,167 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <stdlib.h>
4
+ #include "hescape.h"
5
+
6
+ #ifdef __SSE4_2__
7
+ # ifdef _MSC_VER
8
+ # include <nmmintrin.h>
9
+ # else
10
+ # include <x86intrin.h>
11
+ # endif
12
+ #endif
13
+
14
+ #if __GNUC__ >= 3
15
+ # define likely(x) __builtin_expect(!!(x), 1)
16
+ # define unlikely(x) __builtin_expect(!!(x), 0)
17
+ #else
18
+ # define likely(x) (x)
19
+ # define unlikely(x) (x)
20
+ #endif
21
+
22
+ static const uint8_t *ESCAPED_STRING[] = {
23
+ "",
24
+ "&quot;",
25
+ "&amp;",
26
+ "&#39;",
27
+ "&lt;",
28
+ "&gt;",
29
+ };
30
+
31
+ // This is strlen(ESCAPED_STRING[x]) optimized specially.
32
+ // Mapping: 1 => 6, 2 => 5, 3 => 5, 4 => 4, 5 => 4
33
+ #define ESC_LEN(x) ((13 - x) / 2)
34
+
35
+ /*
36
+ * Given ASCII-compatible character, return index of ESCAPED_STRING.
37
+ *
38
+ * " (34) => 1 (&quot;)
39
+ * & (38) => 2 (&amp;)
40
+ * ' (39) => 3 (&#39;)
41
+ * < (60) => 4 (&lt;)
42
+ * > (62) => 5 (&gt;)
43
+ */
44
+ static const char HTML_ESCAPE_TABLE[] = {
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ };
62
+
63
+ static uint8_t*
64
+ ensure_allocated(uint8_t *buf, size_t size, size_t *asize)
65
+ {
66
+ if (size < *asize)
67
+ return buf;
68
+
69
+ size_t new_size;
70
+ if (*asize == 0) {
71
+ new_size = size;
72
+ } else {
73
+ new_size = *asize;
74
+ }
75
+
76
+ // Increase buffer size by 1.5x if realloced multiple times.
77
+ while (new_size < size)
78
+ new_size = (new_size << 1) - (new_size >> 1);
79
+
80
+ *asize = new_size;
81
+ return realloc(buf, new_size);
82
+ }
83
+
84
+ #ifdef __SSE4_2__
85
+ static size_t
86
+ find_char_fast(const char *buf, size_t i, size_t size, __m128i range, size_t range_size, int *found)
87
+ {
88
+ size_t left = (size - i) & ~15;
89
+ do {
90
+ __m128i b16 = _mm_loadu_si128((void *)(buf + i));
91
+ int index = _mm_cmpestri(range, range_size, b16, 16, _SIDD_CMP_EQUAL_ANY);
92
+ if (unlikely(index != 16)) {
93
+ i += index;
94
+ *found = 1;
95
+ break;
96
+ }
97
+ i += 16;
98
+ left -= 16;
99
+ } while(likely(left != 0));
100
+
101
+ return i;
102
+ }
103
+ #endif
104
+
105
+ static inline size_t
106
+ append_pending_buf(uint8_t *rbuf, size_t rbuf_i, const uint8_t *buf, size_t buf_i, size_t esize)
107
+ {
108
+ memcpy(rbuf + rbuf_i, buf + (rbuf_i - esize), buf_i - (rbuf_i - esize));
109
+ return buf_i + esize;
110
+ }
111
+
112
+ static inline size_t
113
+ append_escaped_buf(uint8_t *rbuf, size_t rbuf_i, size_t esc_i, size_t *esize)
114
+ {
115
+ memcpy(rbuf + rbuf_i, ESCAPED_STRING[esc_i], ESC_LEN(esc_i));
116
+ *esize += ESC_LEN(esc_i) - 1;
117
+ return rbuf_i + ESC_LEN(esc_i);
118
+ }
119
+
120
+ size_t
121
+ hesc_escape_html(uint8_t **dest, const uint8_t *buf, size_t size)
122
+ {
123
+ size_t asize = 0, esc_i, esize = 0, i = 0, rbuf_i = 0;
124
+ const uint8_t *esc;
125
+ uint8_t *rbuf = NULL;
126
+
127
+ # ifdef __SSE4_2__
128
+ __m128i escapes5 = _mm_loadu_si128((const __m128i *)"\"&'<>");
129
+ while (likely(size - i >= 16)) {
130
+ int found = 0;
131
+ if (unlikely((esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)) {
132
+ i = find_char_fast(buf, i, size, escapes5, 5, &found);
133
+ if (!found) break;
134
+ esc_i = HTML_ESCAPE_TABLE[buf[i]];
135
+ }
136
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
137
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
138
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
139
+ i++;
140
+ }
141
+ # endif
142
+
143
+ while (i < size) {
144
+ // Loop here to skip non-escaped characters fast.
145
+ while (i < size && (esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)
146
+ i++;
147
+
148
+ if (esc_i) {
149
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
150
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
151
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
152
+ }
153
+ i++;
154
+ }
155
+
156
+ if (rbuf_i == 0) {
157
+ // Return given buf and size if there are no escaped characters.
158
+ *dest = (uint8_t *)buf;
159
+ return size;
160
+ } else {
161
+ append_pending_buf(rbuf, rbuf_i, buf, size, esize);
162
+ rbuf[size + esize] = '\0';
163
+
164
+ *dest = rbuf;
165
+ return size + esize;
166
+ }
167
+ }
@@ -0,0 +1,21 @@
1
+ #ifndef HESCAPE_H
2
+ #define HESCAPE_H
3
+
4
+ #include <sys/types.h>
5
+ #include <stdint.h>
6
+
7
+ /*
8
+ * Replace characters according to the following rules.
9
+ * Note that this function can handle only ASCII-compatible string.
10
+ *
11
+ * " => &quot;
12
+ * & => &amp;
13
+ * ' => &#39;
14
+ * < => &lt;
15
+ * > => &gt;
16
+ *
17
+ * @return size of dest. If it's larger than len, dest is required to be freed.
18
+ */
19
+ extern size_t hesc_escape_html(uint8_t **dest, const uint8_t *src, size_t size);
20
+
21
+ #endif
data/lib/berns.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi/escape'
3
-
4
2
  require 'berns/berns'
5
3
  require 'berns/version'
6
4
 
data/lib/berns/berns.so CHANGED
Binary file
data/lib/berns/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Berns
3
- VERSION = '3.0.6'
3
+ VERSION = '3.1.0'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: berns
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.6
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taylor Beck
@@ -9,22 +9,8 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-22 00:00:00.000000000 Z
12
+ date: 2021-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: cgi
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - ">="
19
- - !ruby/object:Gem::Version
20
- version: '0'
21
- type: :runtime
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- version: '0'
28
14
  - !ruby/object:Gem::Dependency
29
15
  name: bundler
30
16
  requirement: !ruby/object:Gem::Requirement
@@ -164,9 +150,9 @@ files:
164
150
  - README.org
165
151
  - ext/berns/berns.c
166
152
  - ext/berns/extconf.rb
167
- - ext/mkmf.log
153
+ - ext/berns/hescape.c
154
+ - ext/berns/hescape.h
168
155
  - lib/berns.rb
169
- - lib/berns/berns.bundle
170
156
  - lib/berns/berns.so
171
157
  - lib/berns/version.rb
172
158
  homepage: https://github.com/evanleck/berns
data/ext/mkmf.log DELETED
@@ -1,7 +0,0 @@
1
- extconf.h is:
2
- /* begin */
3
- 1: #ifndef EXTCONF_H
4
- 2: #define EXTCONF_H
5
- 3: #endif
6
- /* end */
7
-
Binary file