berns 3.0.4 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/berns/extconf.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
+ $CFLAGS = '-O3 -msse4' # rubocop:disable Style/GlobalVars
5
+
6
+ dir_config('berns')
4
7
  create_header
5
8
  create_makefile 'berns/berns'
@@ -0,0 +1,167 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <stdlib.h>
4
+ #include "hescape.h"
5
+
6
+ #ifdef __SSE4_2__
7
+ # ifdef _MSC_VER
8
+ # include <nmmintrin.h>
9
+ # else
10
+ # include <x86intrin.h>
11
+ # endif
12
+ #endif
13
+
14
+ #if __GNUC__ >= 3
15
+ # define likely(x) __builtin_expect(!!(x), 1)
16
+ # define unlikely(x) __builtin_expect(!!(x), 0)
17
+ #else
18
+ # define likely(x) (x)
19
+ # define unlikely(x) (x)
20
+ #endif
21
+
22
+ static const uint8_t *ESCAPED_STRING[] = {
23
+ "",
24
+ "&quot;",
25
+ "&amp;",
26
+ "&#39;",
27
+ "&lt;",
28
+ "&gt;",
29
+ };
30
+
31
+ // This is strlen(ESCAPED_STRING[x]) optimized specially.
32
+ // Mapping: 1 => 6, 2 => 5, 3 => 5, 4 => 4, 5 => 4
33
+ #define ESC_LEN(x) ((13 - x) / 2)
34
+
35
+ /*
36
+ * Given ASCII-compatible character, return index of ESCAPED_STRING.
37
+ *
38
+ * " (34) => 1 (&quot;)
39
+ * & (38) => 2 (&amp;)
40
+ * ' (39) => 3 (&#39;)
41
+ * < (60) => 4 (&lt;)
42
+ * > (62) => 5 (&gt;)
43
+ */
44
+ static const char HTML_ESCAPE_TABLE[] = {
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ };
62
+
63
+ static uint8_t*
64
+ ensure_allocated(uint8_t *buf, size_t size, size_t *asize)
65
+ {
66
+ if (size < *asize)
67
+ return buf;
68
+
69
+ size_t new_size;
70
+ if (*asize == 0) {
71
+ new_size = size;
72
+ } else {
73
+ new_size = *asize;
74
+ }
75
+
76
+ // Increase buffer size by 1.5x if realloced multiple times.
77
+ while (new_size < size)
78
+ new_size = (new_size << 1) - (new_size >> 1);
79
+
80
+ *asize = new_size;
81
+ return realloc(buf, new_size);
82
+ }
83
+
84
+ #ifdef __SSE4_2__
85
+ static size_t
86
+ find_char_fast(const char *buf, size_t i, size_t size, __m128i range, size_t range_size, int *found)
87
+ {
88
+ size_t left = (size - i) & ~15;
89
+ do {
90
+ __m128i b16 = _mm_loadu_si128((void *)(buf + i));
91
+ int index = _mm_cmpestri(range, range_size, b16, 16, _SIDD_CMP_EQUAL_ANY);
92
+ if (unlikely(index != 16)) {
93
+ i += index;
94
+ *found = 1;
95
+ break;
96
+ }
97
+ i += 16;
98
+ left -= 16;
99
+ } while(likely(left != 0));
100
+
101
+ return i;
102
+ }
103
+ #endif
104
+
105
+ static inline size_t
106
+ append_pending_buf(uint8_t *rbuf, size_t rbuf_i, const uint8_t *buf, size_t buf_i, size_t esize)
107
+ {
108
+ memcpy(rbuf + rbuf_i, buf + (rbuf_i - esize), buf_i - (rbuf_i - esize));
109
+ return buf_i + esize;
110
+ }
111
+
112
+ static inline size_t
113
+ append_escaped_buf(uint8_t *rbuf, size_t rbuf_i, size_t esc_i, size_t *esize)
114
+ {
115
+ memcpy(rbuf + rbuf_i, ESCAPED_STRING[esc_i], ESC_LEN(esc_i));
116
+ *esize += ESC_LEN(esc_i) - 1;
117
+ return rbuf_i + ESC_LEN(esc_i);
118
+ }
119
+
120
+ size_t
121
+ hesc_escape_html(uint8_t **dest, const uint8_t *buf, size_t size)
122
+ {
123
+ size_t asize = 0, esc_i, esize = 0, i = 0, rbuf_i = 0;
124
+ const uint8_t *esc;
125
+ uint8_t *rbuf = NULL;
126
+
127
+ # ifdef __SSE4_2__
128
+ __m128i escapes5 = _mm_loadu_si128((const __m128i *)"\"&'<>");
129
+ while (likely(size - i >= 16)) {
130
+ int found = 0;
131
+ if (unlikely((esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)) {
132
+ i = find_char_fast(buf, i, size, escapes5, 5, &found);
133
+ if (!found) break;
134
+ esc_i = HTML_ESCAPE_TABLE[buf[i]];
135
+ }
136
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
137
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
138
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
139
+ i++;
140
+ }
141
+ # endif
142
+
143
+ while (i < size) {
144
+ // Loop here to skip non-escaped characters fast.
145
+ while (i < size && (esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)
146
+ i++;
147
+
148
+ if (esc_i) {
149
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
150
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
151
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
152
+ }
153
+ i++;
154
+ }
155
+
156
+ if (rbuf_i == 0) {
157
+ // Return given buf and size if there are no escaped characters.
158
+ *dest = (uint8_t *)buf;
159
+ return size;
160
+ } else {
161
+ append_pending_buf(rbuf, rbuf_i, buf, size, esize);
162
+ rbuf[size + esize] = '\0';
163
+
164
+ *dest = rbuf;
165
+ return size + esize;
166
+ }
167
+ }
@@ -0,0 +1,21 @@
1
+ #ifndef HESCAPE_H
2
+ #define HESCAPE_H
3
+
4
+ #include <sys/types.h>
5
+ #include <stdint.h>
6
+
7
+ /*
8
+ * Replace characters according to the following rules.
9
+ * Note that this function can handle only ASCII-compatible string.
10
+ *
11
+ * " => &quot;
12
+ * & => &amp;
13
+ * ' => &#39;
14
+ * < => &lt;
15
+ * > => &gt;
16
+ *
17
+ * @return size of dest. If it's larger than len, dest is required to be freed.
18
+ */
19
+ extern size_t hesc_escape_html(uint8_t **dest, const uint8_t *src, size_t size);
20
+
21
+ #endif
data/lib/berns.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi/escape'
3
-
4
2
  require 'berns/berns'
5
3
  require 'berns/version'
6
4
 
Binary file
data/lib/berns/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Berns
3
- VERSION = '3.0.4'
3
+ VERSION = '3.1.2'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: berns
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taylor Beck
@@ -9,16 +9,16 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-15 00:00:00.000000000 Z
12
+ date: 2021-05-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: cgi
15
+ name: benchmark-ips
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
- type: :runtime
21
+ type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
@@ -109,6 +109,20 @@ dependencies:
109
109
  - - ">="
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
+ - !ruby/object:Gem::Dependency
113
+ name: rubocop-packaging
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
112
126
  - !ruby/object:Gem::Dependency
113
127
  name: rubocop-performance
114
128
  requirement: !ruby/object:Gem::Requirement
@@ -146,19 +160,14 @@ extensions:
146
160
  - ext/berns/extconf.rb
147
161
  extra_rdoc_files: []
148
162
  files:
149
- - ".editorconfig"
150
- - ".github/workflows/main.yml"
151
- - ".gitignore"
152
- - ".rubocop.yml"
153
- - CHANGELOG.org
154
- - Gemfile
155
163
  - LICENSE.txt
156
164
  - README.org
157
- - Rakefile
158
- - berns.gemspec
159
165
  - ext/berns/berns.c
160
166
  - ext/berns/extconf.rb
167
+ - ext/berns/hescape.c
168
+ - ext/berns/hescape.h
161
169
  - lib/berns.rb
170
+ - lib/berns/berns.so
162
171
  - lib/berns/version.rb
163
172
  homepage: https://github.com/evanleck/berns
164
173
  licenses:
data/.editorconfig DELETED
@@ -1,20 +0,0 @@
1
- # http://EditorConfig.org
2
- # This is the top most config file.
3
- root = true
4
-
5
- # All files
6
- [*]
7
-
8
- # Unix-style newlines with a newline ending every file
9
- end_of_line = lf
10
- insert_final_newline = true
11
-
12
- # Character set
13
- charset = utf-8
14
-
15
- # Trim extra whitespace.
16
- trim_trailing_whitespace = true
17
-
18
- # Soft tabs and 2 spaces.
19
- indent_style = space
20
- indent_size = 2
@@ -1,24 +0,0 @@
1
- name: Ruby
2
-
3
- on: [push,pull_request]
4
-
5
- jobs:
6
- test:
7
- strategy:
8
- fail-fast: false
9
- matrix:
10
- os: [ubuntu-latest, macos-latest]
11
- ruby-version: ['3.0', 2.7, 2.6, 2.5]
12
-
13
- runs-on: ${{ matrix.os }}
14
- steps:
15
- - uses: actions/checkout@v2
16
-
17
- - name: Set up Ruby ${{ matrix.ruby-version }}
18
- uses: ruby/setup-ruby@v1
19
- with:
20
- bundler-cache: true
21
- ruby-version: ${{ matrix.ruby-version }}
22
-
23
- - name: Run tests & lint
24
- run: bundle exec rake
data/.gitignore DELETED
@@ -1,9 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
data/.rubocop.yml DELETED
@@ -1,58 +0,0 @@
1
- # http://rubocop.readthedocs.io
2
- # https://github.com/bbatsov/rubocop/blob/master/config/enabled.yml
3
- require:
4
- - rubocop-minitest
5
- - rubocop-performance
6
- - rubocop-rake
7
-
8
- AllCops:
9
- DisplayCopNames: true
10
- DisplayStyleGuide: true
11
- ExtraDetails: true
12
- NewCops: enable
13
- TargetRubyVersion: 2.5
14
-
15
- Layout/ParameterAlignment:
16
- EnforcedStyle: with_fixed_indentation
17
-
18
- Layout/EmptyLineAfterMagicComment:
19
- Enabled: false
20
-
21
- Layout/FirstHashElementIndentation:
22
- EnforcedStyle: consistent
23
-
24
- Layout/MultilineOperationIndentation:
25
- EnforcedStyle: indented
26
-
27
- Layout/SpaceInsideStringInterpolation:
28
- EnforcedStyle: space
29
-
30
- Layout/LineLength:
31
- Enabled: false
32
-
33
- Metrics/AbcSize:
34
- Enabled: false
35
-
36
- Metrics/BlockLength:
37
- Enabled: false
38
-
39
- Metrics/ClassLength:
40
- Enabled: false
41
-
42
- Metrics/CyclomaticComplexity:
43
- Enabled: false
44
-
45
- Metrics/MethodLength:
46
- Enabled: false
47
-
48
- Metrics/ModuleLength:
49
- Enabled: false
50
-
51
- Metrics/PerceivedComplexity:
52
- Enabled: false
53
-
54
- Style/IfUnlessModifier:
55
- Enabled: false
56
-
57
- Style/Next:
58
- MinBodyLength: 8
data/CHANGELOG.org DELETED
@@ -1,146 +0,0 @@
1
- * Berns Changelog
2
-
3
- ** 3.0.4
4
-
5
- Fix an =ArgumentError= when passing a nested empty hash to =to_attribute=.
6
-
7
- ** 3.0.3
8
-
9
- Fix a buffer overflow error.
10
-
11
- ** 3.0.2
12
-
13
- Ensure all returned strings are UTF-8 encoded.
14
-
15
- ** 3.0.1
16
-
17
- Fix a regression when content blocks are nil. They should be treated the same as
18
- if they are not there instead of throwing an error.
19
-
20
- ** 3.0.0
21
-
22
- Version 3.0 is another mostly API-compatible refactor of Berns, this time in
23
- blazing fast C! I debated simply calling this version 2.1.0 but because it's a
24
- complete rewrite it didn't seem right to do a simple point release and there may
25
- be corner cases that I've not accounted for in this new C-backed version.
26
-
27
- Running the same benchmarks as from 2.0 but pitting 2.0 against 3.0 yields some
28
- great speed improvements, particularly for the =empty= and =simple= cases.
29
-
30
- /These benchmarks were performed on a desktop with a AMD Ryzen 5 3600X 6-Core
31
- Processor running Linux Mint 20.1 and kernel 5.4./
32
-
33
- Before:
34
-
35
- #+begin_example
36
- empty 1.668M (± 0.6%) i/s - 8.356M in 5.011099s
37
- simple 442.102k (± 1.3%) i/s - 2.214M in 5.008068s
38
- nested 267.716k (± 0.4%) i/s - 1.357M in 5.068747s
39
- #+end_example
40
-
41
- After:
42
-
43
- #+begin_example
44
- empty 3.573M (± 1.2%) i/s - 17.881M in 5.005001s
45
- simple 840.631k (± 0.6%) i/s - 4.253M in 5.059771s
46
- nested 267.281k (± 0.5%) i/s - 1.347M in 5.037887s
47
- #+end_example
48
-
49
- With both empty and simple attributes we see performance effectively double, and
50
- with nested attributes performance remains more or less the same.
51
-
52
- This is another set of fairly contrived benchmarks, testing a singleton method,
53
- =void= call, and =element= call against each other.
54
-
55
- Before:
56
-
57
- #+begin_example
58
- br 3.061M (± 0.8%) i/s - 15.613M in 5.100154s
59
- void("br") 6.141M (± 1.4%) i/s - 30.990M in 5.047338s
60
- element("div") 2.789M (± 0.6%) i/s - 14.171M in 5.080626s
61
- #+end_example
62
-
63
- After:
64
-
65
- #+begin_example
66
- br 8.155M (± 1.0%) i/s - 41.339M in 5.069681s
67
- void("br") 9.782M (± 1.5%) i/s - 49.096M in 5.020114s
68
- element("div") 6.769M (± 1.1%) i/s - 33.983M in 5.021362s
69
- #+end_example
70
-
71
- Lastly, benchmarking =to_attributes= with the following hash as the only
72
- argument shows about double the performance with 3.0.
73
-
74
- #+begin_src ruby
75
- ATTRS = { this: 'tag', should: 'work', data: { foo: 'bar', bar: { baz: 'foo' } } }.freeze
76
- #+end_src
77
-
78
- Before:
79
-
80
- #+begin_example
81
- to_attributes 228.829k (± 1.3%) i/s - 1.159M in 5.065714s
82
- #+end_example
83
-
84
- After:
85
-
86
- #+begin_example
87
- to_attributes 457.387k (± 1.2%) i/s - 2.305M in 5.041036s
88
- #+end_example
89
-
90
- ** 2.0.0
91
-
92
- Version 2.0 is a mostly API-compatible refactor of all of the core
93
- methods that make up Berns. The goal is to improve performance, mostly
94
- using mutable strings and inlining variables that were otherwise short
95
- lived.
96
-
97
- In addition, the target Ruby version has been raised to 2.5 or later.
98
- 2.4 has reached its end of life.
99
-
100
- Running this benchmarking code:
101
-
102
- #+begin_src ruby
103
- Benchmark.ips do |x|
104
- x.report('empty') { Berns.element(:a) { 'Link to something' } }
105
- x.report('simple') { Berns.element(:a, { href: 'Something', class: 'my-class' }) { 'Link to something' } }
106
- x.report('nested') { Berns.element(:a, { href: 'Something', class: 'my-class', data: { something: 'Else' } }) { 'Link to something' } }
107
-
108
- x.compare!
109
- end
110
- #+end_src
111
-
112
- Before:
113
-
114
- #+begin_example
115
- empty 993.521k (± 1.7%) i/s - 5.062M in 5.096368s
116
- simple 340.795k (± 0.4%) i/s - 1.729M in 5.074101s
117
- nested 215.160k (± 1.0%) i/s - 1.081M in 5.025324s
118
- #+end_example
119
-
120
- After:
121
-
122
- #+begin_example
123
- empty 1.769M (± 1.9%) i/s - 9.012M in 5.094973s
124
- simple 441.020k (± 1.0%) i/s - 2.233M in 5.063326s
125
- nested 280.255k (± 3.0%) i/s - 1.400M in 5.001009s
126
- #+end_example
127
-
128
- With empty attributes we see ~ 100% increase in iterations per second,
129
- with simple attributes we see ~ 30% increase in the same, and with
130
- nested attributes we see ~ 30% increase as well.
131
-
132
- ** 1.3.0
133
-
134
- With version 1.3, nested HTML attributes can be created with nil keys
135
- and boolean values to produce e.g. "data-foo data-foo-bar='whatever'"
136
- from =data: { foo: { nil => true, bar: 'whatever' } }=
137
-
138
- ** 1.2.0 - 1.2.2
139
-
140
- Starting with version 1.2, Berns will now HTML-escape all attribute
141
- values using =CGI.escapeHTML=. This should prevent attribute values from
142
- escaping themselves and injecting HTML into the DOM.
143
-
144
- ** 1.1.0
145
-
146
- - Add =#sanitize= method.