berns 3.0.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/berns/extconf.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
+ $CFLAGS = '-O3 -msse4' # rubocop:disable Style/GlobalVars
5
+
6
+ dir_config('berns')
4
7
  create_header
5
8
  create_makefile 'berns/berns'
@@ -0,0 +1,167 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <stdlib.h>
4
+ #include "hescape.h"
5
+
6
+ #ifdef __SSE4_2__
7
+ # ifdef _MSC_VER
8
+ # include <nmmintrin.h>
9
+ # else
10
+ # include <x86intrin.h>
11
+ # endif
12
+ #endif
13
+
14
+ #if __GNUC__ >= 3
15
+ # define likely(x) __builtin_expect(!!(x), 1)
16
+ # define unlikely(x) __builtin_expect(!!(x), 0)
17
+ #else
18
+ # define likely(x) (x)
19
+ # define unlikely(x) (x)
20
+ #endif
21
+
22
+ static const uint8_t *ESCAPED_STRING[] = {
23
+ "",
24
+ "&quot;",
25
+ "&amp;",
26
+ "&#39;",
27
+ "&lt;",
28
+ "&gt;",
29
+ };
30
+
31
+ // This is strlen(ESCAPED_STRING[x]) optimized specially.
32
+ // Mapping: 1 => 6, 2 => 5, 3 => 5, 4 => 4, 5 => 4
33
+ #define ESC_LEN(x) ((13 - x) / 2)
34
+
35
+ /*
36
+ * Given ASCII-compatible character, return index of ESCAPED_STRING.
37
+ *
38
+ * " (34) => 1 (&quot;)
39
+ * & (38) => 2 (&amp;)
40
+ * ' (39) => 3 (&#39;)
41
+ * < (60) => 4 (&lt;)
42
+ * > (62) => 5 (&gt;)
43
+ */
44
+ static const char HTML_ESCAPE_TABLE[] = {
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ };
62
+
63
+ static uint8_t*
64
+ ensure_allocated(uint8_t *buf, size_t size, size_t *asize)
65
+ {
66
+ if (size < *asize)
67
+ return buf;
68
+
69
+ size_t new_size;
70
+ if (*asize == 0) {
71
+ new_size = size;
72
+ } else {
73
+ new_size = *asize;
74
+ }
75
+
76
+ // Increase buffer size by 1.5x if realloced multiple times.
77
+ while (new_size < size)
78
+ new_size = (new_size << 1) - (new_size >> 1);
79
+
80
+ *asize = new_size;
81
+ return realloc(buf, new_size);
82
+ }
83
+
84
+ #ifdef __SSE4_2__
85
+ static size_t
86
+ find_char_fast(const char *buf, size_t i, size_t size, __m128i range, size_t range_size, int *found)
87
+ {
88
+ size_t left = (size - i) & ~15;
89
+ do {
90
+ __m128i b16 = _mm_loadu_si128((void *)(buf + i));
91
+ int index = _mm_cmpestri(range, range_size, b16, 16, _SIDD_CMP_EQUAL_ANY);
92
+ if (unlikely(index != 16)) {
93
+ i += index;
94
+ *found = 1;
95
+ break;
96
+ }
97
+ i += 16;
98
+ left -= 16;
99
+ } while(likely(left != 0));
100
+
101
+ return i;
102
+ }
103
+ #endif
104
+
105
+ static inline size_t
106
+ append_pending_buf(uint8_t *rbuf, size_t rbuf_i, const uint8_t *buf, size_t buf_i, size_t esize)
107
+ {
108
+ memcpy(rbuf + rbuf_i, buf + (rbuf_i - esize), buf_i - (rbuf_i - esize));
109
+ return buf_i + esize;
110
+ }
111
+
112
+ static inline size_t
113
+ append_escaped_buf(uint8_t *rbuf, size_t rbuf_i, size_t esc_i, size_t *esize)
114
+ {
115
+ memcpy(rbuf + rbuf_i, ESCAPED_STRING[esc_i], ESC_LEN(esc_i));
116
+ *esize += ESC_LEN(esc_i) - 1;
117
+ return rbuf_i + ESC_LEN(esc_i);
118
+ }
119
+
120
+ size_t
121
+ hesc_escape_html(uint8_t **dest, const uint8_t *buf, size_t size)
122
+ {
123
+ size_t asize = 0, esc_i, esize = 0, i = 0, rbuf_i = 0;
124
+ const uint8_t *esc;
125
+ uint8_t *rbuf = NULL;
126
+
127
+ # ifdef __SSE4_2__
128
+ __m128i escapes5 = _mm_loadu_si128((const __m128i *)"\"&'<>");
129
+ while (likely(size - i >= 16)) {
130
+ int found = 0;
131
+ if (unlikely((esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)) {
132
+ i = find_char_fast(buf, i, size, escapes5, 5, &found);
133
+ if (!found) break;
134
+ esc_i = HTML_ESCAPE_TABLE[buf[i]];
135
+ }
136
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
137
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
138
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
139
+ i++;
140
+ }
141
+ # endif
142
+
143
+ while (i < size) {
144
+ // Loop here to skip non-escaped characters fast.
145
+ while (i < size && (esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)
146
+ i++;
147
+
148
+ if (esc_i) {
149
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
150
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
151
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
152
+ }
153
+ i++;
154
+ }
155
+
156
+ if (rbuf_i == 0) {
157
+ // Return given buf and size if there are no escaped characters.
158
+ *dest = (uint8_t *)buf;
159
+ return size;
160
+ } else {
161
+ append_pending_buf(rbuf, rbuf_i, buf, size, esize);
162
+ rbuf[size + esize] = '\0';
163
+
164
+ *dest = rbuf;
165
+ return size + esize;
166
+ }
167
+ }
@@ -0,0 +1,21 @@
1
+ #ifndef HESCAPE_H
2
+ #define HESCAPE_H
3
+
4
+ #include <sys/types.h>
5
+ #include <stdint.h>
6
+
7
+ /*
8
+ * Replace characters according to the following rules.
9
+ * Note that this function can handle only ASCII-compatible string.
10
+ *
11
+ * " => &quot;
12
+ * & => &amp;
13
+ * ' => &#39;
14
+ * < => &lt;
15
+ * > => &gt;
16
+ *
17
+ * @return size of dest. If it's larger than len, dest is required to be freed.
18
+ */
19
+ extern size_t hesc_escape_html(uint8_t **dest, const uint8_t *src, size_t size);
20
+
21
+ #endif
data/lib/berns.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi/escape'
3
-
4
2
  require 'berns/berns'
5
3
  require 'berns/version'
6
4
 
Binary file
data/lib/berns/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Berns
3
- VERSION = '3.0.2'
3
+ VERSION = '3.1.0'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: berns
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taylor Beck
@@ -9,16 +9,16 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-15 00:00:00.000000000 Z
12
+ date: 2021-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: cgi
15
+ name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
- type: :runtime
21
+ type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
@@ -26,7 +26,7 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
- name: bundler
29
+ name: minitest
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - ">="
@@ -40,7 +40,7 @@ dependencies:
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
- name: minitest
43
+ name: rake
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
46
  - - ">="
@@ -54,7 +54,7 @@ dependencies:
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
- name: rake
57
+ name: rake-compiler
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - ">="
@@ -68,7 +68,7 @@ dependencies:
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
- name: rake-compiler
71
+ name: rubocop
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ">="
@@ -82,7 +82,7 @@ dependencies:
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  - !ruby/object:Gem::Dependency
85
- name: rubocop
85
+ name: rubocop-minitest
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - ">="
@@ -96,7 +96,7 @@ dependencies:
96
96
  - !ruby/object:Gem::Version
97
97
  version: '0'
98
98
  - !ruby/object:Gem::Dependency
99
- name: rubocop-minitest
99
+ name: rubocop-packaging
100
100
  requirement: !ruby/object:Gem::Requirement
101
101
  requirements:
102
102
  - - ">="
@@ -146,19 +146,14 @@ extensions:
146
146
  - ext/berns/extconf.rb
147
147
  extra_rdoc_files: []
148
148
  files:
149
- - ".editorconfig"
150
- - ".github/workflows/main.yml"
151
- - ".gitignore"
152
- - ".rubocop.yml"
153
- - CHANGELOG.org
154
- - Gemfile
155
149
  - LICENSE.txt
156
150
  - README.org
157
- - Rakefile
158
- - berns.gemspec
159
151
  - ext/berns/berns.c
160
152
  - ext/berns/extconf.rb
153
+ - ext/berns/hescape.c
154
+ - ext/berns/hescape.h
161
155
  - lib/berns.rb
156
+ - lib/berns/berns.so
162
157
  - lib/berns/version.rb
163
158
  homepage: https://github.com/evanleck/berns
164
159
  licenses:
data/.editorconfig DELETED
@@ -1,20 +0,0 @@
1
- # http://EditorConfig.org
2
- # This is the top most config file.
3
- root = true
4
-
5
- # All files
6
- [*]
7
-
8
- # Unix-style newlines with a newline ending every file
9
- end_of_line = lf
10
- insert_final_newline = true
11
-
12
- # Character set
13
- charset = utf-8
14
-
15
- # Trim extra whitespace.
16
- trim_trailing_whitespace = true
17
-
18
- # Soft tabs and 2 spaces.
19
- indent_style = space
20
- indent_size = 2
@@ -1,24 +0,0 @@
1
- name: Ruby
2
-
3
- on: [push,pull_request]
4
-
5
- jobs:
6
- test:
7
- strategy:
8
- fail-fast: false
9
- matrix:
10
- os: [ubuntu-latest, macos-latest]
11
- ruby-version: ['3.0', 2.7, 2.6, 2.5]
12
-
13
- runs-on: ${{ matrix.os }}
14
- steps:
15
- - uses: actions/checkout@v2
16
-
17
- - name: Set up Ruby ${{ matrix.ruby-version }}
18
- uses: ruby/setup-ruby@v1
19
- with:
20
- bundler-cache: true
21
- ruby-version: ${{ matrix.ruby-version }}
22
-
23
- - name: Run tests & lint
24
- run: bundle exec rake
data/.gitignore DELETED
@@ -1,9 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
data/.rubocop.yml DELETED
@@ -1,58 +0,0 @@
1
- # http://rubocop.readthedocs.io
2
- # https://github.com/bbatsov/rubocop/blob/master/config/enabled.yml
3
- require:
4
- - rubocop-minitest
5
- - rubocop-performance
6
- - rubocop-rake
7
-
8
- AllCops:
9
- DisplayCopNames: true
10
- DisplayStyleGuide: true
11
- ExtraDetails: true
12
- NewCops: enable
13
- TargetRubyVersion: 2.5
14
-
15
- Layout/ParameterAlignment:
16
- EnforcedStyle: with_fixed_indentation
17
-
18
- Layout/EmptyLineAfterMagicComment:
19
- Enabled: false
20
-
21
- Layout/FirstHashElementIndentation:
22
- EnforcedStyle: consistent
23
-
24
- Layout/MultilineOperationIndentation:
25
- EnforcedStyle: indented
26
-
27
- Layout/SpaceInsideStringInterpolation:
28
- EnforcedStyle: space
29
-
30
- Layout/LineLength:
31
- Enabled: false
32
-
33
- Metrics/AbcSize:
34
- Enabled: false
35
-
36
- Metrics/BlockLength:
37
- Enabled: false
38
-
39
- Metrics/ClassLength:
40
- Enabled: false
41
-
42
- Metrics/CyclomaticComplexity:
43
- Enabled: false
44
-
45
- Metrics/MethodLength:
46
- Enabled: false
47
-
48
- Metrics/ModuleLength:
49
- Enabled: false
50
-
51
- Metrics/PerceivedComplexity:
52
- Enabled: false
53
-
54
- Style/IfUnlessModifier:
55
- Enabled: false
56
-
57
- Style/Next:
58
- MinBodyLength: 8
data/CHANGELOG.org DELETED
@@ -1,138 +0,0 @@
1
- * Berns Changelog
2
-
3
- ** 3.0.2
4
-
5
- Ensure all returned strings are UTF-8 encoded.
6
-
7
- ** 3.0.1
8
-
9
- Fix a regression when content blocks are nil. They should be treated the same as
10
- if they are not there instead of throwing an error.
11
-
12
- ** 3.0.0
13
-
14
- Version 3.0 is another mostly API-compatible refactor of Berns, this time in
15
- blazing fast C! I debated simply calling this version 2.1.0 but because it's a
16
- complete rewrite it didn't seem right to do a simple point release and there may
17
- be corner cases that I've not accounted for in this new C-backed version.
18
-
19
- Running the same benchmarks as from 2.0 but pitting 2.0 against 3.0 yields some
20
- great speed improvements, particularly for the =empty= and =simple= cases.
21
-
22
- /These benchmarks were performed on a desktop with a AMD Ryzen 5 3600X 6-Core
23
- Processor running Linux Mint 20.1 and kernel 5.4./
24
-
25
- Before:
26
-
27
- #+begin_example
28
- empty 1.668M (± 0.6%) i/s - 8.356M in 5.011099s
29
- simple 442.102k (± 1.3%) i/s - 2.214M in 5.008068s
30
- nested 267.716k (± 0.4%) i/s - 1.357M in 5.068747s
31
- #+end_example
32
-
33
- After:
34
-
35
- #+begin_example
36
- empty 3.573M (± 1.2%) i/s - 17.881M in 5.005001s
37
- simple 840.631k (± 0.6%) i/s - 4.253M in 5.059771s
38
- nested 267.281k (± 0.5%) i/s - 1.347M in 5.037887s
39
- #+end_example
40
-
41
- With both empty and simple attributes we see performance effectively double, and
42
- with nested attributes performance remains more or less the same.
43
-
44
- This is another set of fairly contrived benchmarks, testing a singleton method,
45
- =void= call, and =element= call against each other.
46
-
47
- Before:
48
-
49
- #+begin_example
50
- br 3.061M (± 0.8%) i/s - 15.613M in 5.100154s
51
- void("br") 6.141M (± 1.4%) i/s - 30.990M in 5.047338s
52
- element("div") 2.789M (± 0.6%) i/s - 14.171M in 5.080626s
53
- #+end_example
54
-
55
- After:
56
-
57
- #+begin_example
58
- br 8.155M (± 1.0%) i/s - 41.339M in 5.069681s
59
- void("br") 9.782M (± 1.5%) i/s - 49.096M in 5.020114s
60
- element("div") 6.769M (± 1.1%) i/s - 33.983M in 5.021362s
61
- #+end_example
62
-
63
- Lastly, benchmarking =to_attributes= with the following hash as the only
64
- argument shows about double the performance with 3.0.
65
-
66
- #+begin_src ruby
67
- ATTRS = { this: 'tag', should: 'work', data: { foo: 'bar', bar: { baz: 'foo' } } }.freeze
68
- #+end_src
69
-
70
- Before:
71
-
72
- #+begin_example
73
- to_attributes 228.829k (± 1.3%) i/s - 1.159M in 5.065714s
74
- #+end_example
75
-
76
- After:
77
-
78
- #+begin_example
79
- to_attributes 457.387k (± 1.2%) i/s - 2.305M in 5.041036s
80
- #+end_example
81
-
82
- ** 2.0.0
83
-
84
- Version 2.0 is a mostly API-compatible refactor of all of the core
85
- methods that make up Berns. The goal is to improve performance, mostly
86
- using mutable strings and inlining variables that were otherwise short
87
- lived.
88
-
89
- In addition, the target Ruby version has been raised to 2.5 or later.
90
- 2.4 has reached its end of life.
91
-
92
- Running this benchmarking code:
93
-
94
- #+begin_src ruby
95
- Benchmark.ips do |x|
96
- x.report('empty') { Berns.element(:a) { 'Link to something' } }
97
- x.report('simple') { Berns.element(:a, { href: 'Something', class: 'my-class' }) { 'Link to something' } }
98
- x.report('nested') { Berns.element(:a, { href: 'Something', class: 'my-class', data: { something: 'Else' } }) { 'Link to something' } }
99
-
100
- x.compare!
101
- end
102
- #+end_src
103
-
104
- Before:
105
-
106
- #+begin_example
107
- empty 993.521k (± 1.7%) i/s - 5.062M in 5.096368s
108
- simple 340.795k (± 0.4%) i/s - 1.729M in 5.074101s
109
- nested 215.160k (± 1.0%) i/s - 1.081M in 5.025324s
110
- #+end_example
111
-
112
- After:
113
-
114
- #+begin_example
115
- empty 1.769M (± 1.9%) i/s - 9.012M in 5.094973s
116
- simple 441.020k (± 1.0%) i/s - 2.233M in 5.063326s
117
- nested 280.255k (± 3.0%) i/s - 1.400M in 5.001009s
118
- #+end_example
119
-
120
- With empty attributes we see ~ 100% increase in iterations per second,
121
- with simple attributes we see ~ 30% increase in the same, and with
122
- nested attributes we see ~ 30% increase as well.
123
-
124
- ** 1.3.0
125
-
126
- With version 1.3, nested HTML attributes can be created with nil keys
127
- and boolean values to produce e.g. "data-foo data-foo-bar='whatever'"
128
- from =data: { foo: { nil => true, bar: 'whatever' } }=
129
-
130
- ** 1.2.0 - 1.2.2
131
-
132
- Starting with version 1.2, Berns will now HTML-escape all attribute
133
- values using =CGI.escapeHTML=. This should prevent attribute values from
134
- escaping themselves and injecting HTML into the DOM.
135
-
136
- ** 1.1.0
137
-
138
- - Add =#sanitize= method.