hamlit 1.7.2 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -3
- data/.gitmodules +3 -0
- data/.travis.yml +25 -37
- data/CHANGELOG.md +18 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +23 -2
- data/README.md +106 -48
- data/REFERENCE.md +222 -0
- data/Rakefile +77 -19
- data/benchmark/boolean_attribute.haml +6 -0
- data/benchmark/class_attribute.haml +5 -0
- data/benchmark/common_attribute.haml +3 -0
- data/benchmark/data_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/boolean_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/class_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/common_attribute.haml +2 -0
- data/benchmark/dynamic_attributes/data_attribute.haml +2 -0
- data/benchmark/dynamic_attributes/id_attribute.haml +2 -0
- data/benchmark/etc/attribute_builder.haml +5 -0
- data/benchmark/etc/real_sample.haml +888 -0
- data/benchmark/etc/real_sample.rb +11 -0
- data/benchmark/etc/static_analyzer.haml +1 -0
- data/benchmark/etc/tags.haml +3 -0
- data/benchmark/ext/build_data.rb +15 -0
- data/benchmark/ext/build_id.rb +13 -0
- data/benchmark/id_attribute.haml +3 -0
- data/benchmark/plain.haml +4 -0
- data/benchmark/script.haml +4 -0
- data/benchmark/slim/LICENSE +21 -0
- data/{benchmarks → benchmark/slim}/context.rb +2 -4
- data/benchmark/slim/run-benchmarks.rb +94 -0
- data/{benchmarks → benchmark/slim}/view.erb +3 -3
- data/{benchmarks → benchmark/slim}/view.haml +0 -0
- data/{benchmarks/view.escaped.slim → benchmark/slim/view.slim} +1 -1
- data/benchmark/string_interpolation.haml +2 -0
- data/benchmark/utils/benchmark_ips_extension.rb +43 -0
- data/bin/bench +85 -0
- data/bin/clone +14 -0
- data/bin/console +11 -0
- data/bin/lineprof +48 -0
- data/bin/ruby +3 -0
- data/bin/setup +7 -0
- data/bin/stackprof +27 -0
- data/{test → bin/test} +6 -10
- data/{bin → exe}/hamlit +0 -0
- data/ext/hamlit/extconf.rb +14 -0
- data/ext/hamlit/hamlit.c +512 -0
- data/ext/hamlit/houdini/.gitignore +3 -0
- data/ext/hamlit/houdini/COPYING +7 -0
- data/ext/hamlit/houdini/Makefile +79 -0
- data/ext/hamlit/houdini/README.md +59 -0
- data/ext/hamlit/houdini/buffer.c +249 -0
- data/ext/hamlit/houdini/buffer.h +113 -0
- data/ext/hamlit/houdini/houdini.h +46 -0
- data/ext/hamlit/houdini/houdini_href_e.c +115 -0
- data/ext/hamlit/houdini/houdini_html_e.c +90 -0
- data/ext/hamlit/houdini/houdini_html_u.c +122 -0
- data/ext/hamlit/houdini/houdini_js_e.c +90 -0
- data/ext/hamlit/houdini/houdini_js_u.c +60 -0
- data/ext/hamlit/houdini/houdini_uri_e.c +107 -0
- data/ext/hamlit/houdini/houdini_uri_u.c +68 -0
- data/ext/hamlit/houdini/houdini_xml_e.c +136 -0
- data/ext/hamlit/houdini/html_unescape.gperf +258 -0
- data/ext/hamlit/houdini/html_unescape.h +754 -0
- data/ext/hamlit/houdini/tools/build_table.py +13 -0
- data/ext/hamlit/houdini/tools/build_tables.c +51 -0
- data/ext/hamlit/houdini/tools/wikipedia_table.txt +2025 -0
- data/hamlit.gemspec +30 -31
- data/lib/hamlit.rb +3 -1
- data/lib/hamlit/attribute_builder.rb +12 -0
- data/lib/hamlit/cli.rb +44 -43
- data/lib/hamlit/compiler.rb +92 -16
- data/lib/hamlit/compiler/attribute_compiler.rb +148 -0
- data/lib/hamlit/compiler/children_compiler.rb +111 -0
- data/lib/hamlit/compiler/comment_compiler.rb +36 -0
- data/lib/hamlit/compiler/doctype_compiler.rb +45 -0
- data/lib/hamlit/compiler/script_compiler.rb +97 -0
- data/lib/hamlit/compiler/silent_script_compiler.rb +24 -0
- data/lib/hamlit/compiler/tag_compiler.rb +69 -0
- data/lib/hamlit/engine.rb +12 -7
- data/lib/hamlit/error.rb +14 -0
- data/lib/hamlit/escapable.rb +12 -0
- data/lib/hamlit/filters.rb +65 -0
- data/lib/hamlit/filters/base.rb +4 -62
- data/lib/hamlit/filters/coffee.rb +9 -7
- data/lib/hamlit/filters/css.rb +25 -8
- data/lib/hamlit/filters/erb.rb +4 -6
- data/lib/hamlit/filters/escaped.rb +11 -9
- data/lib/hamlit/filters/javascript.rb +25 -8
- data/lib/hamlit/filters/less.rb +9 -7
- data/lib/hamlit/filters/markdown.rb +5 -6
- data/lib/hamlit/filters/plain.rb +11 -15
- data/lib/hamlit/filters/preserve.rb +15 -5
- data/lib/hamlit/filters/ruby.rb +3 -5
- data/lib/hamlit/filters/sass.rb +9 -7
- data/lib/hamlit/filters/scss.rb +9 -7
- data/lib/hamlit/filters/text_base.rb +24 -0
- data/lib/hamlit/filters/tilt_base.rb +47 -0
- data/lib/hamlit/hash_parser.rb +107 -0
- data/lib/hamlit/html.rb +9 -6
- data/lib/hamlit/identity.rb +12 -0
- data/lib/hamlit/object_ref.rb +29 -0
- data/lib/hamlit/parser.rb +25 -142
- data/lib/hamlit/parser/MIT-LICENSE +20 -0
- data/lib/hamlit/parser/README.md +28 -0
- data/lib/hamlit/parser/haml_buffer.rb +348 -0
- data/lib/hamlit/parser/haml_compiler.rb +553 -0
- data/lib/hamlit/parser/haml_error.rb +61 -0
- data/lib/hamlit/parser/haml_helpers.rb +727 -0
- data/lib/hamlit/parser/haml_options.rb +286 -0
- data/lib/hamlit/parser/haml_parser.rb +801 -0
- data/lib/hamlit/parser/haml_util.rb +283 -0
- data/lib/hamlit/parser/haml_xss_mods.rb +109 -0
- data/lib/hamlit/{helpers.rb → rails_helpers.rb} +2 -7
- data/lib/hamlit/rails_template.rb +30 -0
- data/lib/hamlit/railtie.rb +1 -12
- data/lib/hamlit/ruby_expression.rb +31 -0
- data/lib/hamlit/static_analyzer.rb +49 -0
- data/lib/hamlit/string_interpolation.rb +69 -0
- data/lib/hamlit/template.rb +8 -0
- data/lib/hamlit/utils.rb +9 -0
- data/lib/hamlit/version.rb +1 -1
- metadata +116 -324
- data/.rspec +0 -2
- data/benchmarks/benchmark.rb +0 -110
- data/benchmarks/view.slim +0 -17
- data/doc/README.md +0 -19
- data/doc/engine/indent.md +0 -48
- data/doc/engine/new_attribute.md +0 -77
- data/doc/engine/old_attributes.md +0 -198
- data/doc/engine/silent_script.md +0 -97
- data/doc/engine/tag.md +0 -48
- data/doc/engine/text.md +0 -64
- data/doc/faml/README.md +0 -16
- data/doc/faml/engine/indent.md +0 -48
- data/doc/faml/engine/old_attributes.md +0 -111
- data/doc/faml/engine/silent_script.md +0 -97
- data/doc/faml/engine/text.md +0 -59
- data/doc/faml/filters/erb.md +0 -24
- data/doc/faml/filters/javascript.md +0 -27
- data/doc/faml/filters/less.md +0 -57
- data/doc/faml/filters/plain.md +0 -25
- data/doc/filters/erb.md +0 -31
- data/doc/filters/javascript.md +0 -83
- data/doc/filters/less.md +0 -57
- data/doc/filters/markdown.md +0 -31
- data/doc/filters/plain.md +0 -25
- data/doc/haml/README.md +0 -15
- data/doc/haml/engine/new_attribute.md +0 -77
- data/doc/haml/engine/old_attributes.md +0 -142
- data/doc/haml/engine/tag.md +0 -48
- data/doc/haml/engine/text.md +0 -29
- data/doc/haml/filters/erb.md +0 -26
- data/doc/haml/filters/javascript.md +0 -76
- data/doc/haml/filters/markdown.md +0 -31
- data/lib/hamlit/attribute.rb +0 -78
- data/lib/hamlit/compilers/attributes.rb +0 -108
- data/lib/hamlit/compilers/comment.rb +0 -13
- data/lib/hamlit/compilers/doctype.rb +0 -39
- data/lib/hamlit/compilers/filter.rb +0 -53
- data/lib/hamlit/compilers/new_attribute.rb +0 -115
- data/lib/hamlit/compilers/old_attribute.rb +0 -241
- data/lib/hamlit/compilers/runtime_attribute.rb +0 -58
- data/lib/hamlit/compilers/script.rb +0 -31
- data/lib/hamlit/compilers/strip.rb +0 -19
- data/lib/hamlit/compilers/text.rb +0 -111
- data/lib/hamlit/concerns/attribute_builder.rb +0 -22
- data/lib/hamlit/concerns/balanceable.rb +0 -68
- data/lib/hamlit/concerns/deprecation.rb +0 -20
- data/lib/hamlit/concerns/error.rb +0 -31
- data/lib/hamlit/concerns/escapable.rb +0 -17
- data/lib/hamlit/concerns/included.rb +0 -28
- data/lib/hamlit/concerns/indentable.rb +0 -117
- data/lib/hamlit/concerns/lexable.rb +0 -32
- data/lib/hamlit/concerns/line_reader.rb +0 -62
- data/lib/hamlit/concerns/registerable.rb +0 -24
- data/lib/hamlit/concerns/string_interpolation.rb +0 -48
- data/lib/hamlit/concerns/whitespace.rb +0 -91
- data/lib/hamlit/filters/tilt.rb +0 -41
- data/lib/hamlit/parsers/attribute.rb +0 -71
- data/lib/hamlit/parsers/comment.rb +0 -30
- data/lib/hamlit/parsers/doctype.rb +0 -18
- data/lib/hamlit/parsers/filter.rb +0 -18
- data/lib/hamlit/parsers/multiline.rb +0 -58
- data/lib/hamlit/parsers/script.rb +0 -126
- data/lib/hamlit/parsers/tag.rb +0 -83
- data/lib/hamlit/parsers/text.rb +0 -28
- data/lib/hamlit/temple.rb +0 -9
- data/release +0 -6
- data/spec/Rakefile +0 -72
- data/spec/hamlit/engine/comment_spec.rb +0 -56
- data/spec/hamlit/engine/doctype_spec.rb +0 -19
- data/spec/hamlit/engine/error_spec.rb +0 -135
- data/spec/hamlit/engine/indent_spec.rb +0 -42
- data/spec/hamlit/engine/multiline_spec.rb +0 -44
- data/spec/hamlit/engine/new_attribute_spec.rb +0 -110
- data/spec/hamlit/engine/old_attributes_spec.rb +0 -404
- data/spec/hamlit/engine/script_spec.rb +0 -116
- data/spec/hamlit/engine/silent_script_spec.rb +0 -213
- data/spec/hamlit/engine/tag_spec.rb +0 -295
- data/spec/hamlit/engine/text_spec.rb +0 -239
- data/spec/hamlit/engine_spec.rb +0 -58
- data/spec/hamlit/filters/coffee_spec.rb +0 -60
- data/spec/hamlit/filters/css_spec.rb +0 -33
- data/spec/hamlit/filters/erb_spec.rb +0 -16
- data/spec/hamlit/filters/javascript_spec.rb +0 -82
- data/spec/hamlit/filters/less_spec.rb +0 -37
- data/spec/hamlit/filters/markdown_spec.rb +0 -30
- data/spec/hamlit/filters/plain_spec.rb +0 -15
- data/spec/hamlit/filters/ruby_spec.rb +0 -24
- data/spec/hamlit/filters/sass_spec.rb +0 -33
- data/spec/hamlit/filters/scss_spec.rb +0 -37
- data/spec/hamlit/haml_spec.rb +0 -910
- data/spec/rails/.gitignore +0 -18
- data/spec/rails/.rspec +0 -2
- data/spec/rails/Gemfile +0 -19
- data/spec/rails/README.rdoc +0 -28
- data/spec/rails/Rakefile +0 -6
- data/spec/rails/app/assets/images/.keep +0 -0
- data/spec/rails/app/assets/javascripts/application.js +0 -15
- data/spec/rails/app/assets/stylesheets/application.css +0 -15
- data/spec/rails/app/controllers/application_controller.rb +0 -8
- data/spec/rails/app/controllers/concerns/.keep +0 -0
- data/spec/rails/app/controllers/users_controller.rb +0 -23
- data/spec/rails/app/helpers/application_helper.rb +0 -2
- data/spec/rails/app/mailers/.keep +0 -0
- data/spec/rails/app/models/.keep +0 -0
- data/spec/rails/app/models/concerns/.keep +0 -0
- data/spec/rails/app/views/application/index.html.haml +0 -18
- data/spec/rails/app/views/layouts/application.html.haml +0 -12
- data/spec/rails/app/views/users/capture.html.haml +0 -5
- data/spec/rails/app/views/users/capture_haml.html.haml +0 -5
- data/spec/rails/app/views/users/form.html.haml +0 -2
- data/spec/rails/app/views/users/helpers.html.haml +0 -10
- data/spec/rails/app/views/users/index.html.haml +0 -9
- data/spec/rails/app/views/users/inline.html.haml +0 -6
- data/spec/rails/app/views/users/old_attributes.html.haml +0 -5
- data/spec/rails/app/views/users/safe_buffer.html.haml +0 -4
- data/spec/rails/app/views/users/whitespace.html.haml +0 -4
- data/spec/rails/bin/bundle +0 -3
- data/spec/rails/bin/rails +0 -8
- data/spec/rails/bin/rake +0 -8
- data/spec/rails/bin/setup +0 -29
- data/spec/rails/bin/spring +0 -15
- data/spec/rails/config.ru +0 -4
- data/spec/rails/config/application.rb +0 -34
- data/spec/rails/config/boot.rb +0 -3
- data/spec/rails/config/database.yml +0 -25
- data/spec/rails/config/environment.rb +0 -5
- data/spec/rails/config/environments/development.rb +0 -41
- data/spec/rails/config/environments/production.rb +0 -79
- data/spec/rails/config/environments/test.rb +0 -42
- data/spec/rails/config/initializers/assets.rb +0 -11
- data/spec/rails/config/initializers/backtrace_silencers.rb +0 -7
- data/spec/rails/config/initializers/cookies_serializer.rb +0 -3
- data/spec/rails/config/initializers/filter_parameter_logging.rb +0 -4
- data/spec/rails/config/initializers/inflections.rb +0 -16
- data/spec/rails/config/initializers/mime_types.rb +0 -4
- data/spec/rails/config/initializers/session_store.rb +0 -3
- data/spec/rails/config/initializers/wrap_parameters.rb +0 -14
- data/spec/rails/config/locales/en.yml +0 -24
- data/spec/rails/config/routes.rb +0 -16
- data/spec/rails/config/secrets.yml +0 -22
- data/spec/rails/db/schema.rb +0 -16
- data/spec/rails/db/seeds.rb +0 -7
- data/spec/rails/lib/assets/.keep +0 -0
- data/spec/rails/lib/tasks/.keep +0 -0
- data/spec/rails/log/.keep +0 -0
- data/spec/rails/public/404.html +0 -67
- data/spec/rails/public/422.html +0 -67
- data/spec/rails/public/500.html +0 -66
- data/spec/rails/public/favicon.ico +0 -0
- data/spec/rails/public/robots.txt +0 -5
- data/spec/rails/spec/hamlit_spec.rb +0 -123
- data/spec/rails/spec/rails_helper.rb +0 -56
- data/spec/rails/spec/spec_helper.rb +0 -91
- data/spec/rails/vendor/assets/javascripts/.keep +0 -0
- data/spec/rails/vendor/assets/stylesheets/.keep +0 -0
- data/spec/spec_helper.rb +0 -36
- data/spec/spec_helper/document_generator.rb +0 -93
- data/spec/spec_helper/render_helper.rb +0 -120
- data/spec/spec_helper/test_case.rb +0 -55
@@ -0,0 +1,46 @@
|
|
1
|
+
#ifndef __HOUDINI_H__
|
2
|
+
#define __HOUDINI_H__
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include <stdint.h>
|
9
|
+
#include "buffer.h"
|
10
|
+
|
11
|
+
#define likely(x) __builtin_expect((x),1)
|
12
|
+
#define unlikely(x) __builtin_expect((x),0)
|
13
|
+
|
14
|
+
#ifdef HOUDINI_USE_LOCALE
|
15
|
+
# define _isxdigit(c) isxdigit(c)
|
16
|
+
# define _isdigit(c) isdigit(c)
|
17
|
+
#else
|
18
|
+
/*
|
19
|
+
* Helper _isdigit methods -- do not trust the current locale
|
20
|
+
* */
|
21
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
22
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
|
26
|
+
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
27
|
+
|
28
|
+
extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
29
|
+
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
|
30
|
+
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
31
|
+
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
|
32
|
+
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
33
|
+
extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
34
|
+
extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
35
|
+
extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
|
36
|
+
extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
37
|
+
extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
38
|
+
extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
39
|
+
extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
40
|
+
extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
41
|
+
|
42
|
+
#ifdef __cplusplus
|
43
|
+
}
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#endif
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/*
|
8
|
+
* The following characters will not be escaped:
|
9
|
+
*
|
10
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
11
|
+
*
|
12
|
+
* Note that this character set is the addition of:
|
13
|
+
*
|
14
|
+
* - The characters which are safe to be in an URL
|
15
|
+
* - The characters which are *not* safe to be in
|
16
|
+
* an URL because they are RESERVED characters.
|
17
|
+
*
|
18
|
+
* We asume (lazily) that any RESERVED char that
|
19
|
+
* appears inside an URL is actually meant to
|
20
|
+
* have its native function (i.e. as an URL
|
21
|
+
* component/separator) and hence needs no escaping.
|
22
|
+
*
|
23
|
+
* There are two exceptions: the chacters & (amp)
|
24
|
+
* and ' (single quote) do not appear in the table.
|
25
|
+
* They are meant to appear in the URL as components,
|
26
|
+
* yet they require special HTML-entity escaping
|
27
|
+
* to generate valid HTML markup.
|
28
|
+
*
|
29
|
+
* All other characters will be escaped to %XX.
|
30
|
+
*
|
31
|
+
*/
|
32
|
+
static const char HREF_SAFE[] = {
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
37
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
39
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
};
|
50
|
+
|
51
|
+
int
|
52
|
+
houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
|
53
|
+
{
|
54
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
55
|
+
size_t i = 0, org;
|
56
|
+
uint8_t hex_str[3];
|
57
|
+
|
58
|
+
hex_str[0] = '%';
|
59
|
+
|
60
|
+
while (i < size) {
|
61
|
+
org = i;
|
62
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
63
|
+
i++;
|
64
|
+
|
65
|
+
if (likely(i > org)) {
|
66
|
+
if (unlikely(org == 0)) {
|
67
|
+
if (i >= size)
|
68
|
+
return 0;
|
69
|
+
|
70
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
71
|
+
}
|
72
|
+
|
73
|
+
gh_buf_put(ob, src + org, i - org);
|
74
|
+
}
|
75
|
+
|
76
|
+
/* escaping */
|
77
|
+
if (i >= size)
|
78
|
+
break;
|
79
|
+
|
80
|
+
switch (src[i]) {
|
81
|
+
/* amp appears all the time in URLs, but needs
|
82
|
+
* HTML-entity escaping to be inside an href */
|
83
|
+
case '&':
|
84
|
+
gh_buf_PUTS(ob, "&");
|
85
|
+
break;
|
86
|
+
|
87
|
+
/* the single quote is a valid URL character
|
88
|
+
* according to the standard; it needs HTML
|
89
|
+
* entity escaping too */
|
90
|
+
case '\'':
|
91
|
+
gh_buf_PUTS(ob, "'");
|
92
|
+
break;
|
93
|
+
|
94
|
+
/* the space can be escaped to %20 or a plus
|
95
|
+
* sign. we're going with the generic escape
|
96
|
+
* for now. the plus thing is more commonly seen
|
97
|
+
* when building GET strings */
|
98
|
+
#if 0
|
99
|
+
case ' ':
|
100
|
+
gh_buf_putc(ob, '+');
|
101
|
+
break;
|
102
|
+
#endif
|
103
|
+
|
104
|
+
/* every other character goes with a %XX escaping */
|
105
|
+
default:
|
106
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
107
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
108
|
+
gh_buf_put(ob, hex_str, 3);
|
109
|
+
}
|
110
|
+
|
111
|
+
i++;
|
112
|
+
}
|
113
|
+
|
114
|
+
return 1;
|
115
|
+
}
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/**
|
8
|
+
* According to the OWASP rules:
|
9
|
+
*
|
10
|
+
* & --> &
|
11
|
+
* < --> <
|
12
|
+
* > --> >
|
13
|
+
* " --> "
|
14
|
+
* ' --> ' ' is not recommended
|
15
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
};
|
36
|
+
|
37
|
+
static const char *HTML_ESCAPES[] = {
|
38
|
+
"",
|
39
|
+
""",
|
40
|
+
"&",
|
41
|
+
"'",
|
42
|
+
"/",
|
43
|
+
"<",
|
44
|
+
">"
|
45
|
+
};
|
46
|
+
|
47
|
+
int
|
48
|
+
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
|
49
|
+
{
|
50
|
+
size_t i = 0, org, esc = 0;
|
51
|
+
|
52
|
+
while (i < size) {
|
53
|
+
org = i;
|
54
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
55
|
+
i++;
|
56
|
+
|
57
|
+
if (i > org) {
|
58
|
+
if (unlikely(org == 0)) {
|
59
|
+
if (i >= size)
|
60
|
+
return 0;
|
61
|
+
|
62
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
63
|
+
}
|
64
|
+
|
65
|
+
gh_buf_put(ob, src + org, i - org);
|
66
|
+
}
|
67
|
+
|
68
|
+
/* escaping */
|
69
|
+
if (unlikely(i >= size))
|
70
|
+
break;
|
71
|
+
|
72
|
+
/* The forward slash is only escaped in secure mode */
|
73
|
+
if (src[i] == '/' && !secure) {
|
74
|
+
gh_buf_putc(ob, '/');
|
75
|
+
} else {
|
76
|
+
gh_buf_puts(ob, HTML_ESCAPES[esc]);
|
77
|
+
}
|
78
|
+
|
79
|
+
i++;
|
80
|
+
}
|
81
|
+
|
82
|
+
return 1;
|
83
|
+
}
|
84
|
+
|
85
|
+
int
|
86
|
+
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
87
|
+
{
|
88
|
+
return houdini_escape_html0(ob, src, size, 1);
|
89
|
+
}
|
90
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
#include "html_unescape.h"
|
7
|
+
|
8
|
+
static inline void
|
9
|
+
gh_buf_put_utf8(gh_buf *ob, int c)
|
10
|
+
{
|
11
|
+
unsigned char unichar[4];
|
12
|
+
|
13
|
+
if (c < 0x80) {
|
14
|
+
gh_buf_putc(ob, c);
|
15
|
+
}
|
16
|
+
else if (c < 0x800) {
|
17
|
+
unichar[0] = 192 + (c / 64);
|
18
|
+
unichar[1] = 128 + (c % 64);
|
19
|
+
gh_buf_put(ob, unichar, 2);
|
20
|
+
}
|
21
|
+
else if (c - 0xd800u < 0x800) {
|
22
|
+
gh_buf_putc(ob, '?');
|
23
|
+
}
|
24
|
+
else if (c < 0x10000) {
|
25
|
+
unichar[0] = 224 + (c / 4096);
|
26
|
+
unichar[1] = 128 + (c / 64) % 64;
|
27
|
+
unichar[2] = 128 + (c % 64);
|
28
|
+
gh_buf_put(ob, unichar, 3);
|
29
|
+
}
|
30
|
+
else if (c < 0x110000) {
|
31
|
+
unichar[0] = 240 + (c / 262144);
|
32
|
+
unichar[1] = 128 + (c / 4096) % 64;
|
33
|
+
unichar[2] = 128 + (c / 64) % 64;
|
34
|
+
unichar[3] = 128 + (c % 64);
|
35
|
+
gh_buf_put(ob, unichar, 4);
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
gh_buf_putc(ob, '?');
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static size_t
|
43
|
+
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
44
|
+
{
|
45
|
+
size_t i = 0;
|
46
|
+
|
47
|
+
if (size > 3 && src[0] == '#') {
|
48
|
+
int codepoint = 0;
|
49
|
+
|
50
|
+
if (_isdigit(src[1])) {
|
51
|
+
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
52
|
+
codepoint = (codepoint * 10) + (src[i] - '0');
|
53
|
+
}
|
54
|
+
|
55
|
+
else if (src[1] == 'x' || src[1] == 'X') {
|
56
|
+
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
57
|
+
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
58
|
+
}
|
59
|
+
|
60
|
+
if (i < size && src[i] == ';' && codepoint) {
|
61
|
+
gh_buf_put_utf8(ob, codepoint);
|
62
|
+
return i + 1;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
else {
|
67
|
+
if (size > MAX_WORD_LENGTH)
|
68
|
+
size = MAX_WORD_LENGTH;
|
69
|
+
|
70
|
+
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
71
|
+
if (src[i] == ' ')
|
72
|
+
break;
|
73
|
+
|
74
|
+
if (src[i] == ';') {
|
75
|
+
const struct html_ent *entity = find_entity((char *)src, i);
|
76
|
+
|
77
|
+
if (entity != NULL) {
|
78
|
+
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
79
|
+
return i + 1;
|
80
|
+
}
|
81
|
+
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
gh_buf_putc(ob, '&');
|
88
|
+
return 0;
|
89
|
+
}
|
90
|
+
|
91
|
+
int
|
92
|
+
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
93
|
+
{
|
94
|
+
size_t i = 0, org;
|
95
|
+
|
96
|
+
while (i < size) {
|
97
|
+
org = i;
|
98
|
+
while (i < size && src[i] != '&')
|
99
|
+
i++;
|
100
|
+
|
101
|
+
if (likely(i > org)) {
|
102
|
+
if (unlikely(org == 0)) {
|
103
|
+
if (i >= size)
|
104
|
+
return 0;
|
105
|
+
|
106
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
107
|
+
}
|
108
|
+
|
109
|
+
gh_buf_put(ob, src + org, i - org);
|
110
|
+
}
|
111
|
+
|
112
|
+
/* escaping */
|
113
|
+
if (i >= size)
|
114
|
+
break;
|
115
|
+
|
116
|
+
i++;
|
117
|
+
i += unescape_ent(ob, src + i, size - i);
|
118
|
+
}
|
119
|
+
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
static const char JS_ESCAPE[] = {
|
8
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
|
9
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
10
|
+
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
11
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
12
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
13
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
14
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
};
|
25
|
+
|
26
|
+
int
|
27
|
+
houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
|
28
|
+
{
|
29
|
+
size_t i = 0, org, ch;
|
30
|
+
|
31
|
+
while (i < size) {
|
32
|
+
org = i;
|
33
|
+
while (i < size && JS_ESCAPE[src[i]] == 0)
|
34
|
+
i++;
|
35
|
+
|
36
|
+
if (likely(i > org)) {
|
37
|
+
if (unlikely(org == 0)) {
|
38
|
+
if (i >= size)
|
39
|
+
return 0;
|
40
|
+
|
41
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
42
|
+
}
|
43
|
+
|
44
|
+
gh_buf_put(ob, src + org, i - org);
|
45
|
+
}
|
46
|
+
|
47
|
+
/* escaping */
|
48
|
+
if (i >= size)
|
49
|
+
break;
|
50
|
+
|
51
|
+
ch = src[i];
|
52
|
+
|
53
|
+
switch (ch) {
|
54
|
+
case '/':
|
55
|
+
/*
|
56
|
+
* Escape only if preceded by a lt
|
57
|
+
*/
|
58
|
+
if (i && src[i - 1] == '<')
|
59
|
+
gh_buf_putc(ob, '\\');
|
60
|
+
|
61
|
+
gh_buf_putc(ob, ch);
|
62
|
+
break;
|
63
|
+
|
64
|
+
case '\r':
|
65
|
+
/*
|
66
|
+
* Escape as \n, and skip the next \n if it's there
|
67
|
+
*/
|
68
|
+
if (i + 1 < size && src[i + 1] == '\n') i++;
|
69
|
+
|
70
|
+
case '\n':
|
71
|
+
/*
|
72
|
+
* Escape actually as '\','n', not as '\', '\n'
|
73
|
+
*/
|
74
|
+
ch = 'n';
|
75
|
+
|
76
|
+
default:
|
77
|
+
/*
|
78
|
+
* Normal escaping
|
79
|
+
*/
|
80
|
+
gh_buf_putc(ob, '\\');
|
81
|
+
gh_buf_putc(ob, ch);
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
|
85
|
+
i++;
|
86
|
+
}
|
87
|
+
|
88
|
+
return 1;
|
89
|
+
}
|
90
|
+
|