hescape 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8582fcf14d0eddac5df90d5bf8463c85e32e277b
4
+ data.tar.gz: 6ace6ba41e38ec0b154caca8f7d05f82833bfbf6
5
+ SHA512:
6
+ metadata.gz: 048d1fbe3fb8559035577e9ccdb8bb60c9d08ba907494a53f455dde6bf79dc98a4e5e699f4118c2ea5e49690908f0aacf2949bda39ca45e0fc8d3ecc0312154e
7
+ data.tar.gz: b3c16e9a04736b0b3f023e7029684485789ccb9722e383e31edf6899709f4604e179a30e152179494aade4aebcf0e60789be9071690eb3b07d0ad975cf5712eb
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in hescape.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Takashi Kokubun
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # hescape-ruby [![wercker status](https://app.wercker.com/status/c75c229e77ca1a27a7e70189b6d7aed8/s/master "wercker status")](https://app.wercker.com/project/byKey/c75c229e77ca1a27a7e70189b6d7aed8)
2
+
3
+ A gem to use [hescape](https://github.com/k0kubun/hescape) from Ruby, which is fast HTML escape implementation using SSE instructions.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'hescape'
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ Hescape.escape_html(%[<>"'&]) #=> "&lt;&gt;&quot;&#39;&amp;"
17
+ ```
18
+
19
+ ## License
20
+
21
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/extensiontask"
3
+
4
+ task :build => :compile
5
+
6
+ Rake::ExtensionTask.new("hescape") do |ext|
7
+ ext.lib_dir = "lib/hescape"
8
+ end
9
+
10
+ task :default => [:clobber, :compile, :spec]
@@ -0,0 +1,23 @@
1
+ require 'benchmark/ips'
2
+ require 'escape_utils'
3
+ require 'hescape'
4
+ require 'cgi/escape' # Require Ruby 2.3+
5
+
6
+ Benchmark.ips do |x|
7
+ str = ENV['ESCAPE_STRING'] || "https://github.com/k0kubun/hescape-ruby"
8
+ puts "escaping: #{str}"
9
+
10
+ escape_table = {
11
+ "'" => '&#39;',
12
+ '&' => '&amp;',
13
+ '"' => '&quot;',
14
+ '<' => '&lt;',
15
+ '>' => '&gt;',
16
+ }
17
+
18
+ x.report('gsub') { str.gsub(/['&\"<>]/, escape_table) }
19
+ x.report('escape_utils') { EscapeUtils.escape_html(str, false) }
20
+ x.report('cgi/escape') { CGI.escapeHTML(str) }
21
+ x.report('hescape') { Hescape.escape_html(str) }
22
+ x.compare!
23
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "hescape"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ require "mkmf"
2
+
3
+ $CFLAGS << ' -msse4'
4
+
5
+ create_makefile("hescape/hescape")
@@ -0,0 +1,190 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <sys/types.h>
4
+ #include "hescape.h"
5
+ #include <ruby/encoding.h>
6
+
7
+ VALUE rb_mHescape;
8
+
9
+ #ifdef __SSE4_2__
10
+ # ifdef _MSC_VER
11
+ # include <nmmintrin.h>
12
+ # else
13
+ # include <x86intrin.h>
14
+ # endif
15
+ #endif
16
+
17
+ #if __GNUC__ >= 3
18
+ # define likely(x) __builtin_expect(!!(x), 1)
19
+ # define unlikely(x) __builtin_expect(!!(x), 0)
20
+ #else
21
+ # define likely(x) (x)
22
+ # define unlikely(x) (x)
23
+ #endif
24
+
25
+ static const char *ESCAPED_STRING[] = {
26
+ "",
27
+ "&quot;",
28
+ "&amp;",
29
+ "&#39;",
30
+ "&lt;",
31
+ "&gt;",
32
+ };
33
+
34
+ // This is strlen(ESCAPED_STRING[x]) optimized specially.
35
+ // Mapping: 1 => 6, 2 => 5, 3 => 5, 4 => 4, 5 => 4
36
+ #define ESC_LEN(x) ((13 - x) / 2)
37
+
38
+ /*
39
+ * Given ASCII-compatible character, return index of ESCAPED_STRING.
40
+ *
41
+ * " (34) => 1 (&quot;)
42
+ * & (38) => 2 (&amp;)
43
+ * ' (39) => 3 (&#39;)
44
+ * < (60) => 4 (&lt;)
45
+ * > (62) => 5 (&gt;)
46
+ */
47
+ static const char HTML_ESCAPE_TABLE[] = {
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
+ };
65
+
66
+ static char*
67
+ ensure_allocated(char *buf, size_t size, size_t *asize)
68
+ {
69
+ size_t new_size;
70
+
71
+ if (size < *asize)
72
+ return buf;
73
+
74
+ if (*asize == 0) {
75
+ new_size = size;
76
+ } else {
77
+ new_size = *asize;
78
+ }
79
+
80
+ // Increase buffer size by 1.5x if realloced multiple times.
81
+ while (new_size < size)
82
+ new_size = (new_size << 1) - (new_size >> 1);
83
+
84
+ // Round allocation up to multiple of 8.
85
+ new_size = (new_size + 7) & ~7;
86
+
87
+ *asize = new_size;
88
+ return realloc(buf, new_size);
89
+ }
90
+
91
+ #ifdef __SSE4_2__
92
+ static size_t
93
+ find_escape_fast(const char *buf, size_t i, size_t size, int *found)
94
+ {
95
+ static const char escapes[] = "\"&'<>";
96
+
97
+ if (likely(size - i >= 16)) {
98
+ __m128i escapes5 = _mm_loadu_si128((const __m128i *)escapes);
99
+ size_t left = (size - i) & ~15;
100
+ do {
101
+ __m128i b16 = _mm_loadu_si128((void *)buf);
102
+ int index = _mm_cmpestri(escapes5, 5, b16, 16, _SIDD_CMP_EQUAL_ANY);
103
+ if (unlikely(index != 16)) {
104
+ i += index;
105
+ *found = 1;
106
+ break;
107
+ }
108
+ i += 16;
109
+ left -= 16;
110
+ } while(likely(left != 0));
111
+ }
112
+ return i;
113
+ }
114
+ #endif
115
+
116
+ size_t
117
+ hesc_escape_html(char **dest, const char *buf, size_t size)
118
+ {
119
+ size_t asize = 0, esc_i, esize = 0, i = 0, rbuf_end = 0;
120
+ const char *esc;
121
+ char *rbuf = NULL;
122
+
123
+ # define DO_ESCAPE() { \
124
+ esc = ESCAPED_STRING[esc_i]; \
125
+ rbuf = ensure_allocated(rbuf, sizeof(char) * (size + esize + ESC_LEN(esc_i) + 1), &asize); \
126
+ memmove(rbuf + rbuf_end, buf + (rbuf_end - esize), i - (rbuf_end - esize)); \
127
+ memmove(rbuf + i + esize, esc, ESC_LEN(esc_i)); \
128
+ rbuf_end = i + esize + ESC_LEN(esc_i); \
129
+ esize += ESC_LEN(esc_i) - 1; \
130
+ }
131
+
132
+ # ifdef __SSE4_2__
133
+ int found = 0;
134
+ while (i < size) {
135
+ i = find_escape_fast(buf, i, size, &found);
136
+ if (!found) break;
137
+
138
+ esc_i = HTML_ESCAPE_TABLE[(unsigned char)buf[i]];
139
+ if (i < size && esc_i) DO_ESCAPE();
140
+ i++;
141
+ }
142
+ # endif
143
+
144
+ while (i < size) {
145
+ // Loop here to skip non-escaped characters fast.
146
+ while (i < size && (esc_i = HTML_ESCAPE_TABLE[(unsigned char)buf[i]]) == 0)
147
+ i++;
148
+
149
+ if (i < size && esc_i) DO_ESCAPE();
150
+ i++;
151
+ }
152
+
153
+ if (rbuf_end == 0) {
154
+ // Return given buf and size if there are no escaped characters.
155
+ *dest = (char *)buf;
156
+ return size;
157
+ } else {
158
+ // Copy pending characters including NULL character.
159
+ memmove(rbuf + rbuf_end, buf + (rbuf_end - esize), (size + 1) - (rbuf_end - esize));
160
+
161
+ *dest = rbuf;
162
+ return size + esize;
163
+ }
164
+ }
165
+
166
+ static VALUE
167
+ rb_escape_html(RB_UNUSED_VAR(VALUE self), VALUE value)
168
+ {
169
+ char *buf;
170
+ unsigned int size;
171
+ VALUE str;
172
+
173
+ Check_Type(value, T_STRING);
174
+ str = rb_convert_type(value, T_STRING, "String", "to_s");
175
+
176
+ size = hesc_escape_html(&buf, RSTRING_PTR(str), RSTRING_LEN(str));
177
+ if (size > RSTRING_LEN(str)) {
178
+ str = rb_enc_str_new(buf, size, rb_utf8_encoding());
179
+ free((void *)buf);
180
+ }
181
+
182
+ return str;
183
+ }
184
+
185
+ void
186
+ Init_hescape(void)
187
+ {
188
+ rb_mHescape = rb_define_module("Hescape");
189
+ rb_define_singleton_method(rb_mHescape, "escape_html", rb_escape_html, 1);
190
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef HESCAPE_H
2
+ #define HESCAPE_H 1
3
+
4
+ #include "ruby.h"
5
+
6
+ #endif /* HESCAPE_H */
data/hescape.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hescape/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hescape"
8
+ spec.version = Hescape::VERSION
9
+ spec.authors = ["Takashi Kokubun"]
10
+ spec.email = ["takashikkbn@gmail.com"]
11
+
12
+ spec.summary = %q{Fast HTML escape utility}
13
+ spec.description = %q{Fast HTML escape utility}
14
+ spec.homepage = "https://github.com/k0kubun/hescape-ruby"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+ spec.extensions = ["ext/hescape/extconf.rb"]
22
+
23
+ spec.add_development_dependency "benchmark-ips"
24
+ spec.add_development_dependency "bundler", "~> 1.12"
25
+ spec.add_development_dependency "escape_utils"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rake-compiler"
28
+ end
data/lib/hescape.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "hescape/version"
2
+ require "hescape/hescape"
3
+
4
+ module Hescape
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,3 @@
1
+ module Hescape
2
+ VERSION = "0.1.0"
3
+ end
data/wercker.yml ADDED
@@ -0,0 +1,16 @@
1
+ box: ruby
2
+ build:
3
+ steps:
4
+ - bundle-install
5
+ - script:
6
+ name: compile
7
+ code: bundle exec rake compile
8
+ - script:
9
+ name: "[bench] no escape"
10
+ code: ESCAPE_STRING="https://github.com/k0kubun/hescape-ruby" bundle exec ruby benchmark/benchmark.rb
11
+ - script:
12
+ name: "[bench] short escape"
13
+ code: ESCAPE_STRING="<script />" bundle exec ruby benchmark/benchmark.rb
14
+ - script:
15
+ name: "[bench] long escape"
16
+ code: ESCAPE_STRING="<<<<<<<<<<>>>>>>>>>&&&&&&&&&&\"\"\"\"\"\"'''''" bundle exec ruby benchmark/benchmark.rb
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hescape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Takashi Kokubun
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-08-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: benchmark-ips
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: escape_utils
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake-compiler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Fast HTML escape utility
84
+ email:
85
+ - takashikkbn@gmail.com
86
+ executables: []
87
+ extensions:
88
+ - ext/hescape/extconf.rb
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - benchmark/benchmark.rb
97
+ - bin/console
98
+ - bin/setup
99
+ - ext/hescape/extconf.rb
100
+ - ext/hescape/hescape.c
101
+ - ext/hescape/hescape.h
102
+ - hescape.gemspec
103
+ - lib/hescape.rb
104
+ - lib/hescape/version.rb
105
+ - wercker.yml
106
+ homepage: https://github.com/k0kubun/hescape-ruby
107
+ licenses:
108
+ - MIT
109
+ metadata: {}
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubyforge_project:
126
+ rubygems_version: 2.5.1
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: Fast HTML escape utility
130
+ test_files: []