faster_html_escape 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/extconf.rb +2 -0
- data/faster_html_escape.c +142 -0
- data/faster_html_escape.gemspec +17 -0
- data/speed_up_html_escape.rb +19 -0
- data/test/test_faster_html_escape.rb +18 -0
- data/test/test_faster_html_escape.sh +31 -0
- metadata +50 -0
data/extconf.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
|
3
|
+
#define STR_ASSOC FL_USER3
|
4
|
+
#define STR_NOCAPA (ELTS_SHARED|STR_ASSOC)
|
5
|
+
#define RESIZE_CAPA(str,capacity) do {\
|
6
|
+
REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
|
7
|
+
if (!FL_TEST(str, STR_NOCAPA))\
|
8
|
+
RSTRING(str)->aux.capa = (capacity);\
|
9
|
+
} while (0)
|
10
|
+
|
11
|
+
/*
|
12
|
+
* call-seq:
|
13
|
+
* FasterHTMLEscape.html_escape(obj) => str
|
14
|
+
*
|
15
|
+
* HTML Escape-A faster version of ERB::Util.html_escape. Coverts obj to a
|
16
|
+
* string and replaces <, >, ", and & with their HTML counterparts.
|
17
|
+
*
|
18
|
+
* - FasterHTMLEscape.html_escape('X') => 'X'
|
19
|
+
* - FasterHTMLEscape.html_escape('>') => '>'
|
20
|
+
* - FasterHTMLEscape.html_escape('<') => '<'
|
21
|
+
* - FasterHTMLEscape.html_escape('"') => '"'
|
22
|
+
* - FasterHTMLEscape.html_escape('&') => '&'
|
23
|
+
* - FasterHTMLEscape.html_escape(1) => '1'
|
24
|
+
*
|
25
|
+
*/
|
26
|
+
static VALUE faster_html_escape(VALUE self, VALUE str)
|
27
|
+
{
|
28
|
+
VALUE dest; /* New ruby string */
|
29
|
+
/*
|
30
|
+
blen: length of new string buffer
|
31
|
+
len: temporary holder of lengths
|
32
|
+
extra: amount of extra space to allocate
|
33
|
+
*/
|
34
|
+
long blen, len, extra = 30;
|
35
|
+
/*
|
36
|
+
buf: start of new string buffer
|
37
|
+
bp: current position in new string buffer
|
38
|
+
sp: start of old string buffer
|
39
|
+
cp: start of copying position in old string buffer
|
40
|
+
lp: current position in old string buffer
|
41
|
+
send: end of old string buffer
|
42
|
+
*/
|
43
|
+
char *buf, *bp, *sp, *cp, *lp, *send;
|
44
|
+
|
45
|
+
ID to_s_id;
|
46
|
+
to_s_id = rb_intern("to_s");
|
47
|
+
str = rb_funcall(str, to_s_id, 0);
|
48
|
+
str = StringValue(str);
|
49
|
+
if (RSTRING(str)->len == 0) {
|
50
|
+
return rb_str_buf_new(0);
|
51
|
+
}
|
52
|
+
|
53
|
+
if (RSTRING(str)->len < 6)
|
54
|
+
extra = RSTRING(str)->len * 5; /* Don't allocate more space than escaped string */
|
55
|
+
/* could possibly take up */
|
56
|
+
blen = RSTRING(str)->len + extra; /* add some extra space to account for escaped HTML */
|
57
|
+
dest = rb_str_buf_new(blen); /* create new ruby string */
|
58
|
+
sp = cp = lp = StringValuePtr(str); /* Initialize old string pointers */
|
59
|
+
bp = buf = StringValuePtr(dest); /* Initialize new string pointers */
|
60
|
+
send = (char *)((long)sp + RSTRING(str)->len); /* Get end of ruby string */
|
61
|
+
|
62
|
+
rb_str_locktmp(dest);
|
63
|
+
while (lp < send) {
|
64
|
+
/* Scan characters until HTML character is found */
|
65
|
+
if(!(*lp=='&'||*lp=='"'||*lp=='>'||*lp=='<')) {
|
66
|
+
lp++; /* skip to next character in old string */
|
67
|
+
continue;
|
68
|
+
}
|
69
|
+
|
70
|
+
/* Reallocate new string memory if new string won't be large enough*/
|
71
|
+
len = (bp - buf) /* length of new string */
|
72
|
+
+ (lp - cp) /* length of added text */
|
73
|
+
+ 6; /* Maximum amount of space that can be taken up with html replacement */
|
74
|
+
if (blen < len) {
|
75
|
+
blen = len + (extra = extra << 1); /* Add double the amount of extra space */
|
76
|
+
/* previously allocated to new required length */
|
77
|
+
len = bp - buf; /* Record length of new string buffer currently used */
|
78
|
+
RESIZE_CAPA(dest, blen); /* Give ruby string additional capacity */
|
79
|
+
RSTRING(dest)->len = blen; /* Set new length of ruby string */
|
80
|
+
buf = RSTRING(dest)->ptr; /* Set new start of new string buffer */
|
81
|
+
bp = buf + len; /* Set new current position of new string buffer */
|
82
|
+
}
|
83
|
+
|
84
|
+
/* Copy previous non-HTML text from old string to new string */
|
85
|
+
len = lp - cp; /* length of previous non-HTML text */
|
86
|
+
memcpy(bp, cp, len); /* copy non-HTML from old buffer to new buffer */
|
87
|
+
bp += len; /* Update new string pointer by length copied */
|
88
|
+
|
89
|
+
/* Copy HTML replacement text to new string if not currently at end of source */
|
90
|
+
switch(*lp) {
|
91
|
+
case '&': memcpy(bp, "&", 5); bp+=5; break;
|
92
|
+
case '"': memcpy(bp, """, 6); bp+=6; break;
|
93
|
+
case '>': memcpy(bp, ">", 4); bp+=4; break;
|
94
|
+
case '<': memcpy(bp, "<", 4); bp+=4; break;
|
95
|
+
}
|
96
|
+
cp = ++lp; /* Set new current and copying start point for old string */
|
97
|
+
}
|
98
|
+
if(cp != lp) {
|
99
|
+
len = (bp - buf) + (lp - cp);
|
100
|
+
if (blen < len) {
|
101
|
+
blen = len;
|
102
|
+
len = bp - buf;
|
103
|
+
RESIZE_CAPA(dest, blen);
|
104
|
+
RSTRING(dest)->len = blen;
|
105
|
+
buf = RSTRING(dest)->ptr;
|
106
|
+
bp = buf + len;
|
107
|
+
}
|
108
|
+
len = lp - cp;
|
109
|
+
memcpy(bp, cp, len);
|
110
|
+
bp += len;
|
111
|
+
}
|
112
|
+
*bp = '\0';
|
113
|
+
rb_str_unlocktmp(dest);
|
114
|
+
RBASIC(dest)->klass = rb_obj_class(str);
|
115
|
+
OBJ_INFECT(dest, str);
|
116
|
+
RSTRING(dest)->len = bp - buf; /* Set correct ruby string length */
|
117
|
+
|
118
|
+
/* Taint new string if old string tainted */
|
119
|
+
if (OBJ_TAINTED(str))
|
120
|
+
OBJ_TAINT(dest);
|
121
|
+
/* Return new ruby string */
|
122
|
+
return dest;
|
123
|
+
}
|
124
|
+
|
125
|
+
/*
|
126
|
+
* FasterHTMLEscape gives a single method, html_escape, which is a faster version
|
127
|
+
* of the ERB::Util.html_escape method. There is an alias, h, for ease of use.
|
128
|
+
* Both are available as module functions, so they can be called with:
|
129
|
+
*
|
130
|
+
* - FasterHTMLEscape.h(string)
|
131
|
+
*
|
132
|
+
*/
|
133
|
+
void
|
134
|
+
Init_faster_html_escape()
|
135
|
+
{
|
136
|
+
VALUE mod;
|
137
|
+
mod = rb_define_module("FasterHTMLEscape");
|
138
|
+
rb_define_method(mod, "html_escape", faster_html_escape, 1);
|
139
|
+
rb_define_alias(mod, "h", "html_escape");
|
140
|
+
rb_define_module_function(mod, "html_escape", faster_html_escape, 1);
|
141
|
+
rb_define_module_function(mod, "h", faster_html_escape, 1);
|
142
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
spec = Gem::Specification.new do |s|
|
2
|
+
s.name = "faster_html_escape"
|
3
|
+
s.version = "1.0.0"
|
4
|
+
s.author = "Jeremy Evans"
|
5
|
+
s.email = "jeremyevans0@gmail.com"
|
6
|
+
s.homepage = "http://rubyforge.org/projects/fasterh/"
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.summary = "Faster version of ERB::Util.html_escape"
|
9
|
+
s.files = Dir["*"]
|
10
|
+
s.require_paths = ["."]
|
11
|
+
s.extensions << 'extconf.rb'
|
12
|
+
s.autorequire = "faster_html_escape"
|
13
|
+
s.test_files = Dir["test/*"]
|
14
|
+
s.has_rdoc = true
|
15
|
+
s.rubyforge_project = 'fasterh'
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Requiring this file speeds up ERB::Util.html_escape, ERB::Util.h and
|
2
|
+
# CGI.escapeHTML using the C version from FasterHTMLEscape
|
3
|
+
|
4
|
+
# Replaces ERB::Util.h, ERB::Util.html_escape, and CGI.escapeHTML to call
|
5
|
+
# FasterHTMLEscape.h
|
6
|
+
def replace_erb_cgi_html_escape
|
7
|
+
require 'faster_html_escape'
|
8
|
+
require 'erb'
|
9
|
+
require 'cgi'
|
10
|
+
ERB::Util.send :remove_method, :h, :html_escape
|
11
|
+
class << ERB::Util; self; end.send :remove_method, :h, :html_escape
|
12
|
+
ERB::Util.send :include, FasterHTMLEscape
|
13
|
+
ERB::Util.send :module_function, :h
|
14
|
+
ERB::Util.send :module_function, :html_escape
|
15
|
+
class << CGI; self; end.send :remove_method, :escapeHTML
|
16
|
+
class << CGI; self; end.send(:define_method, :escapeHTML){|string| FasterHTMLEscape.h(string)}
|
17
|
+
end
|
18
|
+
|
19
|
+
replace_erb_cgi_html_escape
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
require 'faster_html_escape'
|
3
|
+
require 'erb'
|
4
|
+
require 'time'
|
5
|
+
text = STDIN.read
|
6
|
+
method = (ARGV[0] == 'fhe' ? FasterHTMLEscape : ERB::Util).method(:h)
|
7
|
+
times = (ARGV[1] || 1).to_i
|
8
|
+
multiplier = (ARGV[2] || 1).to_i
|
9
|
+
text *= multiplier
|
10
|
+
if ARGV[0] == 'check'
|
11
|
+
puts "ERB::Util.h(text) #{ERB::Util.h(text) == FasterHTMLEscape.h(text) ? '==' : '!='} FasterHTMLEscape.h(text), size #{text.length}"
|
12
|
+
else
|
13
|
+
print "Using #{method.inspect} #{times} time(s) on text of size #{text.length}..."
|
14
|
+
start = Time.now
|
15
|
+
times.times{ method.call(text) }
|
16
|
+
seconds = Time.now - start
|
17
|
+
puts "took %0.6f seconds" % seconds
|
18
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
for CHAR in \< \> \" \&; do
|
3
|
+
for NUM in 1 2 3 4 5 6 7; do
|
4
|
+
echo -n "Checking '${CHAR}'*$NUM: "
|
5
|
+
echo -n "$CHAR" | ./test_faster_html_escape.rb check 1 $NUM
|
6
|
+
done
|
7
|
+
echo -n "Checking 'X${CHAR}': "
|
8
|
+
echo -n "X$CHAR" | ./test_faster_html_escape.rb check
|
9
|
+
echo -n "Checking '${CHAR}X': "
|
10
|
+
echo -n "${CHAR}X" | ./test_faster_html_escape.rb check
|
11
|
+
echo -n "Checking 'X${CHAR}X': "
|
12
|
+
echo -n "X${CHAR}X" | ./test_faster_html_escape.rb check
|
13
|
+
echo -n "Checking '${CHAR}X${CHAR}': "
|
14
|
+
echo -n "${CHAR}X${CHAR}" | ./test_faster_html_escape.rb check
|
15
|
+
done
|
16
|
+
for INPUT in '<blah>' '<>&"' blah X ''; do
|
17
|
+
echo -n "Checking '$INPUT'*1000000: "
|
18
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb check 1 1000000
|
19
|
+
echo "Benchmarking '$INPUT'*1000000"
|
20
|
+
for METHOD in fhe erb; do
|
21
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 1 1000000
|
22
|
+
done
|
23
|
+
echo "Benchmarking '$INPUT' multiple times"
|
24
|
+
for METHOD in fhe erb; do
|
25
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 1
|
26
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 2
|
27
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 3
|
28
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 1000
|
29
|
+
echo -n "$INPUT" | ./test_faster_html_escape.rb $METHOD 100000
|
30
|
+
done
|
31
|
+
done
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: faster_html_escape
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2006-12-13 00:00:00 -08:00
|
8
|
+
summary: Faster version of ERB::Util.html_escape
|
9
|
+
require_paths:
|
10
|
+
- .
|
11
|
+
email: jeremyevans0@gmail.com
|
12
|
+
homepage: http://rubyforge.org/projects/fasterh/
|
13
|
+
rubyforge_project: fasterh
|
14
|
+
description:
|
15
|
+
autorequire: faster_html_escape
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
authors:
|
29
|
+
- Jeremy Evans
|
30
|
+
files:
|
31
|
+
- test
|
32
|
+
- speed_up_html_escape.rb
|
33
|
+
- faster_html_escape.c
|
34
|
+
- extconf.rb
|
35
|
+
- faster_html_escape.gemspec
|
36
|
+
test_files:
|
37
|
+
- test/test_faster_html_escape.sh
|
38
|
+
- test/test_faster_html_escape.rb
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
executables: []
|
44
|
+
|
45
|
+
extensions:
|
46
|
+
- extconf.rb
|
47
|
+
requirements: []
|
48
|
+
|
49
|
+
dependencies: []
|
50
|
+
|