berns 3.0.2 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/berns/extconf.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
+ $CFLAGS = '-O3 -msse4' # rubocop:disable Style/GlobalVars
5
+
6
+ dir_config('berns')
4
7
  create_header
5
8
  create_makefile 'berns/berns'
@@ -0,0 +1,167 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <stdlib.h>
4
+ #include "hescape.h"
5
+
6
+ #ifdef __SSE4_2__
7
+ # ifdef _MSC_VER
8
+ # include <nmmintrin.h>
9
+ # else
10
+ # include <x86intrin.h>
11
+ # endif
12
+ #endif
13
+
14
+ #if __GNUC__ >= 3
15
+ # define likely(x) __builtin_expect(!!(x), 1)
16
+ # define unlikely(x) __builtin_expect(!!(x), 0)
17
+ #else
18
+ # define likely(x) (x)
19
+ # define unlikely(x) (x)
20
+ #endif
21
+
22
+ static const uint8_t *ESCAPED_STRING[] = {
23
+ "",
24
+ "&quot;",
25
+ "&amp;",
26
+ "&#39;",
27
+ "&lt;",
28
+ "&gt;",
29
+ };
30
+
31
+ // This is strlen(ESCAPED_STRING[x]) optimized specially.
32
+ // Mapping: 1 => 6, 2 => 5, 3 => 5, 4 => 4, 5 => 4
33
+ #define ESC_LEN(x) ((13 - x) / 2)
34
+
35
+ /*
36
+ * Given ASCII-compatible character, return index of ESCAPED_STRING.
37
+ *
38
+ * " (34) => 1 (&quot;)
39
+ * & (38) => 2 (&amp;)
40
+ * ' (39) => 3 (&#39;)
41
+ * < (60) => 4 (&lt;)
42
+ * > (62) => 5 (&gt;)
43
+ */
44
+ static const char HTML_ESCAPE_TABLE[] = {
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ };
62
+
63
+ static uint8_t*
64
+ ensure_allocated(uint8_t *buf, size_t size, size_t *asize)
65
+ {
66
+ if (size < *asize)
67
+ return buf;
68
+
69
+ size_t new_size;
70
+ if (*asize == 0) {
71
+ new_size = size;
72
+ } else {
73
+ new_size = *asize;
74
+ }
75
+
76
+ // Increase buffer size by 1.5x if realloced multiple times.
77
+ while (new_size < size)
78
+ new_size = (new_size << 1) - (new_size >> 1);
79
+
80
+ *asize = new_size;
81
+ return realloc(buf, new_size);
82
+ }
83
+
84
+ #ifdef __SSE4_2__
85
+ static size_t
86
+ find_char_fast(const char *buf, size_t i, size_t size, __m128i range, size_t range_size, int *found)
87
+ {
88
+ size_t left = (size - i) & ~15;
89
+ do {
90
+ __m128i b16 = _mm_loadu_si128((void *)(buf + i));
91
+ int index = _mm_cmpestri(range, range_size, b16, 16, _SIDD_CMP_EQUAL_ANY);
92
+ if (unlikely(index != 16)) {
93
+ i += index;
94
+ *found = 1;
95
+ break;
96
+ }
97
+ i += 16;
98
+ left -= 16;
99
+ } while(likely(left != 0));
100
+
101
+ return i;
102
+ }
103
+ #endif
104
+
105
+ static inline size_t
106
+ append_pending_buf(uint8_t *rbuf, size_t rbuf_i, const uint8_t *buf, size_t buf_i, size_t esize)
107
+ {
108
+ memcpy(rbuf + rbuf_i, buf + (rbuf_i - esize), buf_i - (rbuf_i - esize));
109
+ return buf_i + esize;
110
+ }
111
+
112
+ static inline size_t
113
+ append_escaped_buf(uint8_t *rbuf, size_t rbuf_i, size_t esc_i, size_t *esize)
114
+ {
115
+ memcpy(rbuf + rbuf_i, ESCAPED_STRING[esc_i], ESC_LEN(esc_i));
116
+ *esize += ESC_LEN(esc_i) - 1;
117
+ return rbuf_i + ESC_LEN(esc_i);
118
+ }
119
+
120
+ size_t
121
+ hesc_escape_html(uint8_t **dest, const uint8_t *buf, size_t size)
122
+ {
123
+ size_t asize = 0, esc_i, esize = 0, i = 0, rbuf_i = 0;
124
+ const uint8_t *esc;
125
+ uint8_t *rbuf = NULL;
126
+
127
+ # ifdef __SSE4_2__
128
+ __m128i escapes5 = _mm_loadu_si128((const __m128i *)"\"&'<>");
129
+ while (likely(size - i >= 16)) {
130
+ int found = 0;
131
+ if (unlikely((esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)) {
132
+ i = find_char_fast(buf, i, size, escapes5, 5, &found);
133
+ if (!found) break;
134
+ esc_i = HTML_ESCAPE_TABLE[buf[i]];
135
+ }
136
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
137
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
138
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
139
+ i++;
140
+ }
141
+ # endif
142
+
143
+ while (i < size) {
144
+ // Loop here to skip non-escaped characters fast.
145
+ while (i < size && (esc_i = HTML_ESCAPE_TABLE[buf[i]]) == 0)
146
+ i++;
147
+
148
+ if (esc_i) {
149
+ rbuf = ensure_allocated(rbuf, sizeof(uint8_t) * (size + esize + ESC_LEN(esc_i) + 1), &asize);
150
+ rbuf_i = append_pending_buf(rbuf, rbuf_i, buf, i, esize);
151
+ rbuf_i = append_escaped_buf(rbuf, rbuf_i, esc_i, &esize);
152
+ }
153
+ i++;
154
+ }
155
+
156
+ if (rbuf_i == 0) {
157
+ // Return given buf and size if there are no escaped characters.
158
+ *dest = (uint8_t *)buf;
159
+ return size;
160
+ } else {
161
+ append_pending_buf(rbuf, rbuf_i, buf, size, esize);
162
+ rbuf[size + esize] = '\0';
163
+
164
+ *dest = rbuf;
165
+ return size + esize;
166
+ }
167
+ }
@@ -0,0 +1,21 @@
1
+ #ifndef HESCAPE_H
2
+ #define HESCAPE_H
3
+
4
+ #include <sys/types.h>
5
+ #include <stdint.h>
6
+
7
+ /*
8
+ * Replace characters according to the following rules.
9
+ * Note that this function can handle only ASCII-compatible string.
10
+ *
11
+ * " => &quot;
12
+ * & => &amp;
13
+ * ' => &#39;
14
+ * < => &lt;
15
+ * > => &gt;
16
+ *
17
+ * @return size of dest. If it's larger than len, dest is required to be freed.
18
+ */
19
+ extern size_t hesc_escape_html(uint8_t **dest, const uint8_t *src, size_t size);
20
+
21
+ #endif
data/lib/berns.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi/escape'
3
-
4
2
  require 'berns/berns'
5
3
  require 'berns/version'
6
4
 
Binary file
data/lib/berns/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Berns
3
- VERSION = '3.0.2'
3
+ VERSION = '3.1.0'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: berns
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taylor Beck
@@ -9,16 +9,16 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-15 00:00:00.000000000 Z
12
+ date: 2021-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: cgi
15
+ name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
- type: :runtime
21
+ type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
@@ -26,7 +26,7 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
- name: bundler
29
+ name: minitest
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - ">="
@@ -40,7 +40,7 @@ dependencies:
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
- name: minitest
43
+ name: rake
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
46
  - - ">="
@@ -54,7 +54,7 @@ dependencies:
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
- name: rake
57
+ name: rake-compiler
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - ">="
@@ -68,7 +68,7 @@ dependencies:
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
- name: rake-compiler
71
+ name: rubocop
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ">="
@@ -82,7 +82,7 @@ dependencies:
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  - !ruby/object:Gem::Dependency
85
- name: rubocop
85
+ name: rubocop-minitest
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - ">="
@@ -96,7 +96,7 @@ dependencies:
96
96
  - !ruby/object:Gem::Version
97
97
  version: '0'
98
98
  - !ruby/object:Gem::Dependency
99
- name: rubocop-minitest
99
+ name: rubocop-packaging
100
100
  requirement: !ruby/object:Gem::Requirement
101
101
  requirements:
102
102
  - - ">="
@@ -146,19 +146,14 @@ extensions:
146
146
  - ext/berns/extconf.rb
147
147
  extra_rdoc_files: []
148
148
  files:
149
- - ".editorconfig"
150
- - ".github/workflows/main.yml"
151
- - ".gitignore"
152
- - ".rubocop.yml"
153
- - CHANGELOG.org
154
- - Gemfile
155
149
  - LICENSE.txt
156
150
  - README.org
157
- - Rakefile
158
- - berns.gemspec
159
151
  - ext/berns/berns.c
160
152
  - ext/berns/extconf.rb
153
+ - ext/berns/hescape.c
154
+ - ext/berns/hescape.h
161
155
  - lib/berns.rb
156
+ - lib/berns/berns.so
162
157
  - lib/berns/version.rb
163
158
  homepage: https://github.com/evanleck/berns
164
159
  licenses:
data/.editorconfig DELETED
@@ -1,20 +0,0 @@
1
- # http://EditorConfig.org
2
- # This is the top most config file.
3
- root = true
4
-
5
- # All files
6
- [*]
7
-
8
- # Unix-style newlines with a newline ending every file
9
- end_of_line = lf
10
- insert_final_newline = true
11
-
12
- # Character set
13
- charset = utf-8
14
-
15
- # Trim extra whitespace.
16
- trim_trailing_whitespace = true
17
-
18
- # Soft tabs and 2 spaces.
19
- indent_style = space
20
- indent_size = 2
@@ -1,24 +0,0 @@
1
- name: Ruby
2
-
3
- on: [push,pull_request]
4
-
5
- jobs:
6
- test:
7
- strategy:
8
- fail-fast: false
9
- matrix:
10
- os: [ubuntu-latest, macos-latest]
11
- ruby-version: ['3.0', 2.7, 2.6, 2.5]
12
-
13
- runs-on: ${{ matrix.os }}
14
- steps:
15
- - uses: actions/checkout@v2
16
-
17
- - name: Set up Ruby ${{ matrix.ruby-version }}
18
- uses: ruby/setup-ruby@v1
19
- with:
20
- bundler-cache: true
21
- ruby-version: ${{ matrix.ruby-version }}
22
-
23
- - name: Run tests & lint
24
- run: bundle exec rake
data/.gitignore DELETED
@@ -1,9 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
data/.rubocop.yml DELETED
@@ -1,58 +0,0 @@
1
- # http://rubocop.readthedocs.io
2
- # https://github.com/bbatsov/rubocop/blob/master/config/enabled.yml
3
- require:
4
- - rubocop-minitest
5
- - rubocop-performance
6
- - rubocop-rake
7
-
8
- AllCops:
9
- DisplayCopNames: true
10
- DisplayStyleGuide: true
11
- ExtraDetails: true
12
- NewCops: enable
13
- TargetRubyVersion: 2.5
14
-
15
- Layout/ParameterAlignment:
16
- EnforcedStyle: with_fixed_indentation
17
-
18
- Layout/EmptyLineAfterMagicComment:
19
- Enabled: false
20
-
21
- Layout/FirstHashElementIndentation:
22
- EnforcedStyle: consistent
23
-
24
- Layout/MultilineOperationIndentation:
25
- EnforcedStyle: indented
26
-
27
- Layout/SpaceInsideStringInterpolation:
28
- EnforcedStyle: space
29
-
30
- Layout/LineLength:
31
- Enabled: false
32
-
33
- Metrics/AbcSize:
34
- Enabled: false
35
-
36
- Metrics/BlockLength:
37
- Enabled: false
38
-
39
- Metrics/ClassLength:
40
- Enabled: false
41
-
42
- Metrics/CyclomaticComplexity:
43
- Enabled: false
44
-
45
- Metrics/MethodLength:
46
- Enabled: false
47
-
48
- Metrics/ModuleLength:
49
- Enabled: false
50
-
51
- Metrics/PerceivedComplexity:
52
- Enabled: false
53
-
54
- Style/IfUnlessModifier:
55
- Enabled: false
56
-
57
- Style/Next:
58
- MinBodyLength: 8
data/CHANGELOG.org DELETED
@@ -1,138 +0,0 @@
1
- * Berns Changelog
2
-
3
- ** 3.0.2
4
-
5
- Ensure all returned strings are UTF-8 encoded.
6
-
7
- ** 3.0.1
8
-
9
- Fix a regression when content blocks are nil. They should be treated the same as
10
- if they are not there instead of throwing an error.
11
-
12
- ** 3.0.0
13
-
14
- Version 3.0 is another mostly API-compatible refactor of Berns, this time in
15
- blazing fast C! I debated simply calling this version 2.1.0 but because it's a
16
- complete rewrite it didn't seem right to do a simple point release and there may
17
- be corner cases that I've not accounted for in this new C-backed version.
18
-
19
- Running the same benchmarks as from 2.0 but pitting 2.0 against 3.0 yields some
20
- great speed improvements, particularly for the =empty= and =simple= cases.
21
-
22
- /These benchmarks were performed on a desktop with a AMD Ryzen 5 3600X 6-Core
23
- Processor running Linux Mint 20.1 and kernel 5.4./
24
-
25
- Before:
26
-
27
- #+begin_example
28
- empty 1.668M (± 0.6%) i/s - 8.356M in 5.011099s
29
- simple 442.102k (± 1.3%) i/s - 2.214M in 5.008068s
30
- nested 267.716k (± 0.4%) i/s - 1.357M in 5.068747s
31
- #+end_example
32
-
33
- After:
34
-
35
- #+begin_example
36
- empty 3.573M (± 1.2%) i/s - 17.881M in 5.005001s
37
- simple 840.631k (± 0.6%) i/s - 4.253M in 5.059771s
38
- nested 267.281k (± 0.5%) i/s - 1.347M in 5.037887s
39
- #+end_example
40
-
41
- With both empty and simple attributes we see performance effectively double, and
42
- with nested attributes performance remains more or less the same.
43
-
44
- This is another set of fairly contrived benchmarks, testing a singleton method,
45
- =void= call, and =element= call against each other.
46
-
47
- Before:
48
-
49
- #+begin_example
50
- br 3.061M (± 0.8%) i/s - 15.613M in 5.100154s
51
- void("br") 6.141M (± 1.4%) i/s - 30.990M in 5.047338s
52
- element("div") 2.789M (± 0.6%) i/s - 14.171M in 5.080626s
53
- #+end_example
54
-
55
- After:
56
-
57
- #+begin_example
58
- br 8.155M (± 1.0%) i/s - 41.339M in 5.069681s
59
- void("br") 9.782M (± 1.5%) i/s - 49.096M in 5.020114s
60
- element("div") 6.769M (± 1.1%) i/s - 33.983M in 5.021362s
61
- #+end_example
62
-
63
- Lastly, benchmarking =to_attributes= with the following hash as the only
64
- argument shows about double the performance with 3.0.
65
-
66
- #+begin_src ruby
67
- ATTRS = { this: 'tag', should: 'work', data: { foo: 'bar', bar: { baz: 'foo' } } }.freeze
68
- #+end_src
69
-
70
- Before:
71
-
72
- #+begin_example
73
- to_attributes 228.829k (± 1.3%) i/s - 1.159M in 5.065714s
74
- #+end_example
75
-
76
- After:
77
-
78
- #+begin_example
79
- to_attributes 457.387k (± 1.2%) i/s - 2.305M in 5.041036s
80
- #+end_example
81
-
82
- ** 2.0.0
83
-
84
- Version 2.0 is a mostly API-compatible refactor of all of the core
85
- methods that make up Berns. The goal is to improve performance, mostly
86
- using mutable strings and inlining variables that were otherwise short
87
- lived.
88
-
89
- In addition, the target Ruby version has been raised to 2.5 or later.
90
- 2.4 has reached its end of life.
91
-
92
- Running this benchmarking code:
93
-
94
- #+begin_src ruby
95
- Benchmark.ips do |x|
96
- x.report('empty') { Berns.element(:a) { 'Link to something' } }
97
- x.report('simple') { Berns.element(:a, { href: 'Something', class: 'my-class' }) { 'Link to something' } }
98
- x.report('nested') { Berns.element(:a, { href: 'Something', class: 'my-class', data: { something: 'Else' } }) { 'Link to something' } }
99
-
100
- x.compare!
101
- end
102
- #+end_src
103
-
104
- Before:
105
-
106
- #+begin_example
107
- empty 993.521k (± 1.7%) i/s - 5.062M in 5.096368s
108
- simple 340.795k (± 0.4%) i/s - 1.729M in 5.074101s
109
- nested 215.160k (± 1.0%) i/s - 1.081M in 5.025324s
110
- #+end_example
111
-
112
- After:
113
-
114
- #+begin_example
115
- empty 1.769M (± 1.9%) i/s - 9.012M in 5.094973s
116
- simple 441.020k (± 1.0%) i/s - 2.233M in 5.063326s
117
- nested 280.255k (± 3.0%) i/s - 1.400M in 5.001009s
118
- #+end_example
119
-
120
- With empty attributes we see ~ 100% increase in iterations per second,
121
- with simple attributes we see ~ 30% increase in the same, and with
122
- nested attributes we see ~ 30% increase as well.
123
-
124
- ** 1.3.0
125
-
126
- With version 1.3, nested HTML attributes can be created with nil keys
127
- and boolean values to produce e.g. "data-foo data-foo-bar='whatever'"
128
- from =data: { foo: { nil => true, bar: 'whatever' } }=
129
-
130
- ** 1.2.0 - 1.2.2
131
-
132
- Starting with version 1.2, Berns will now HTML-escape all attribute
133
- values using =CGI.escapeHTML=. This should prevent attribute values from
134
- escaping themselves and injecting HTML into the DOM.
135
-
136
- ** 1.1.0
137
-
138
- - Add =#sanitize= method.