hamlit 1.7.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -3
- data/.gitmodules +3 -0
- data/.travis.yml +25 -37
- data/CHANGELOG.md +18 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +23 -2
- data/README.md +106 -48
- data/REFERENCE.md +222 -0
- data/Rakefile +77 -19
- data/benchmark/boolean_attribute.haml +6 -0
- data/benchmark/class_attribute.haml +5 -0
- data/benchmark/common_attribute.haml +3 -0
- data/benchmark/data_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/boolean_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/class_attribute.haml +4 -0
- data/benchmark/dynamic_attributes/common_attribute.haml +2 -0
- data/benchmark/dynamic_attributes/data_attribute.haml +2 -0
- data/benchmark/dynamic_attributes/id_attribute.haml +2 -0
- data/benchmark/etc/attribute_builder.haml +5 -0
- data/benchmark/etc/real_sample.haml +888 -0
- data/benchmark/etc/real_sample.rb +11 -0
- data/benchmark/etc/static_analyzer.haml +1 -0
- data/benchmark/etc/tags.haml +3 -0
- data/benchmark/ext/build_data.rb +15 -0
- data/benchmark/ext/build_id.rb +13 -0
- data/benchmark/id_attribute.haml +3 -0
- data/benchmark/plain.haml +4 -0
- data/benchmark/script.haml +4 -0
- data/benchmark/slim/LICENSE +21 -0
- data/{benchmarks → benchmark/slim}/context.rb +2 -4
- data/benchmark/slim/run-benchmarks.rb +94 -0
- data/{benchmarks → benchmark/slim}/view.erb +3 -3
- data/{benchmarks → benchmark/slim}/view.haml +0 -0
- data/{benchmarks/view.escaped.slim → benchmark/slim/view.slim} +1 -1
- data/benchmark/string_interpolation.haml +2 -0
- data/benchmark/utils/benchmark_ips_extension.rb +43 -0
- data/bin/bench +85 -0
- data/bin/clone +14 -0
- data/bin/console +11 -0
- data/bin/lineprof +48 -0
- data/bin/ruby +3 -0
- data/bin/setup +7 -0
- data/bin/stackprof +27 -0
- data/{test → bin/test} +6 -10
- data/{bin → exe}/hamlit +0 -0
- data/ext/hamlit/extconf.rb +14 -0
- data/ext/hamlit/hamlit.c +512 -0
- data/ext/hamlit/houdini/.gitignore +3 -0
- data/ext/hamlit/houdini/COPYING +7 -0
- data/ext/hamlit/houdini/Makefile +79 -0
- data/ext/hamlit/houdini/README.md +59 -0
- data/ext/hamlit/houdini/buffer.c +249 -0
- data/ext/hamlit/houdini/buffer.h +113 -0
- data/ext/hamlit/houdini/houdini.h +46 -0
- data/ext/hamlit/houdini/houdini_href_e.c +115 -0
- data/ext/hamlit/houdini/houdini_html_e.c +90 -0
- data/ext/hamlit/houdini/houdini_html_u.c +122 -0
- data/ext/hamlit/houdini/houdini_js_e.c +90 -0
- data/ext/hamlit/houdini/houdini_js_u.c +60 -0
- data/ext/hamlit/houdini/houdini_uri_e.c +107 -0
- data/ext/hamlit/houdini/houdini_uri_u.c +68 -0
- data/ext/hamlit/houdini/houdini_xml_e.c +136 -0
- data/ext/hamlit/houdini/html_unescape.gperf +258 -0
- data/ext/hamlit/houdini/html_unescape.h +754 -0
- data/ext/hamlit/houdini/tools/build_table.py +13 -0
- data/ext/hamlit/houdini/tools/build_tables.c +51 -0
- data/ext/hamlit/houdini/tools/wikipedia_table.txt +2025 -0
- data/hamlit.gemspec +30 -31
- data/lib/hamlit.rb +3 -1
- data/lib/hamlit/attribute_builder.rb +12 -0
- data/lib/hamlit/cli.rb +44 -43
- data/lib/hamlit/compiler.rb +92 -16
- data/lib/hamlit/compiler/attribute_compiler.rb +148 -0
- data/lib/hamlit/compiler/children_compiler.rb +111 -0
- data/lib/hamlit/compiler/comment_compiler.rb +36 -0
- data/lib/hamlit/compiler/doctype_compiler.rb +45 -0
- data/lib/hamlit/compiler/script_compiler.rb +97 -0
- data/lib/hamlit/compiler/silent_script_compiler.rb +24 -0
- data/lib/hamlit/compiler/tag_compiler.rb +69 -0
- data/lib/hamlit/engine.rb +12 -7
- data/lib/hamlit/error.rb +14 -0
- data/lib/hamlit/escapable.rb +12 -0
- data/lib/hamlit/filters.rb +65 -0
- data/lib/hamlit/filters/base.rb +4 -62
- data/lib/hamlit/filters/coffee.rb +9 -7
- data/lib/hamlit/filters/css.rb +25 -8
- data/lib/hamlit/filters/erb.rb +4 -6
- data/lib/hamlit/filters/escaped.rb +11 -9
- data/lib/hamlit/filters/javascript.rb +25 -8
- data/lib/hamlit/filters/less.rb +9 -7
- data/lib/hamlit/filters/markdown.rb +5 -6
- data/lib/hamlit/filters/plain.rb +11 -15
- data/lib/hamlit/filters/preserve.rb +15 -5
- data/lib/hamlit/filters/ruby.rb +3 -5
- data/lib/hamlit/filters/sass.rb +9 -7
- data/lib/hamlit/filters/scss.rb +9 -7
- data/lib/hamlit/filters/text_base.rb +24 -0
- data/lib/hamlit/filters/tilt_base.rb +47 -0
- data/lib/hamlit/hash_parser.rb +107 -0
- data/lib/hamlit/html.rb +9 -6
- data/lib/hamlit/identity.rb +12 -0
- data/lib/hamlit/object_ref.rb +29 -0
- data/lib/hamlit/parser.rb +25 -142
- data/lib/hamlit/parser/MIT-LICENSE +20 -0
- data/lib/hamlit/parser/README.md +28 -0
- data/lib/hamlit/parser/haml_buffer.rb +348 -0
- data/lib/hamlit/parser/haml_compiler.rb +553 -0
- data/lib/hamlit/parser/haml_error.rb +61 -0
- data/lib/hamlit/parser/haml_helpers.rb +727 -0
- data/lib/hamlit/parser/haml_options.rb +286 -0
- data/lib/hamlit/parser/haml_parser.rb +801 -0
- data/lib/hamlit/parser/haml_util.rb +283 -0
- data/lib/hamlit/parser/haml_xss_mods.rb +109 -0
- data/lib/hamlit/{helpers.rb → rails_helpers.rb} +2 -7
- data/lib/hamlit/rails_template.rb +30 -0
- data/lib/hamlit/railtie.rb +1 -12
- data/lib/hamlit/ruby_expression.rb +31 -0
- data/lib/hamlit/static_analyzer.rb +49 -0
- data/lib/hamlit/string_interpolation.rb +69 -0
- data/lib/hamlit/template.rb +8 -0
- data/lib/hamlit/utils.rb +9 -0
- data/lib/hamlit/version.rb +1 -1
- metadata +116 -324
- data/.rspec +0 -2
- data/benchmarks/benchmark.rb +0 -110
- data/benchmarks/view.slim +0 -17
- data/doc/README.md +0 -19
- data/doc/engine/indent.md +0 -48
- data/doc/engine/new_attribute.md +0 -77
- data/doc/engine/old_attributes.md +0 -198
- data/doc/engine/silent_script.md +0 -97
- data/doc/engine/tag.md +0 -48
- data/doc/engine/text.md +0 -64
- data/doc/faml/README.md +0 -16
- data/doc/faml/engine/indent.md +0 -48
- data/doc/faml/engine/old_attributes.md +0 -111
- data/doc/faml/engine/silent_script.md +0 -97
- data/doc/faml/engine/text.md +0 -59
- data/doc/faml/filters/erb.md +0 -24
- data/doc/faml/filters/javascript.md +0 -27
- data/doc/faml/filters/less.md +0 -57
- data/doc/faml/filters/plain.md +0 -25
- data/doc/filters/erb.md +0 -31
- data/doc/filters/javascript.md +0 -83
- data/doc/filters/less.md +0 -57
- data/doc/filters/markdown.md +0 -31
- data/doc/filters/plain.md +0 -25
- data/doc/haml/README.md +0 -15
- data/doc/haml/engine/new_attribute.md +0 -77
- data/doc/haml/engine/old_attributes.md +0 -142
- data/doc/haml/engine/tag.md +0 -48
- data/doc/haml/engine/text.md +0 -29
- data/doc/haml/filters/erb.md +0 -26
- data/doc/haml/filters/javascript.md +0 -76
- data/doc/haml/filters/markdown.md +0 -31
- data/lib/hamlit/attribute.rb +0 -78
- data/lib/hamlit/compilers/attributes.rb +0 -108
- data/lib/hamlit/compilers/comment.rb +0 -13
- data/lib/hamlit/compilers/doctype.rb +0 -39
- data/lib/hamlit/compilers/filter.rb +0 -53
- data/lib/hamlit/compilers/new_attribute.rb +0 -115
- data/lib/hamlit/compilers/old_attribute.rb +0 -241
- data/lib/hamlit/compilers/runtime_attribute.rb +0 -58
- data/lib/hamlit/compilers/script.rb +0 -31
- data/lib/hamlit/compilers/strip.rb +0 -19
- data/lib/hamlit/compilers/text.rb +0 -111
- data/lib/hamlit/concerns/attribute_builder.rb +0 -22
- data/lib/hamlit/concerns/balanceable.rb +0 -68
- data/lib/hamlit/concerns/deprecation.rb +0 -20
- data/lib/hamlit/concerns/error.rb +0 -31
- data/lib/hamlit/concerns/escapable.rb +0 -17
- data/lib/hamlit/concerns/included.rb +0 -28
- data/lib/hamlit/concerns/indentable.rb +0 -117
- data/lib/hamlit/concerns/lexable.rb +0 -32
- data/lib/hamlit/concerns/line_reader.rb +0 -62
- data/lib/hamlit/concerns/registerable.rb +0 -24
- data/lib/hamlit/concerns/string_interpolation.rb +0 -48
- data/lib/hamlit/concerns/whitespace.rb +0 -91
- data/lib/hamlit/filters/tilt.rb +0 -41
- data/lib/hamlit/parsers/attribute.rb +0 -71
- data/lib/hamlit/parsers/comment.rb +0 -30
- data/lib/hamlit/parsers/doctype.rb +0 -18
- data/lib/hamlit/parsers/filter.rb +0 -18
- data/lib/hamlit/parsers/multiline.rb +0 -58
- data/lib/hamlit/parsers/script.rb +0 -126
- data/lib/hamlit/parsers/tag.rb +0 -83
- data/lib/hamlit/parsers/text.rb +0 -28
- data/lib/hamlit/temple.rb +0 -9
- data/release +0 -6
- data/spec/Rakefile +0 -72
- data/spec/hamlit/engine/comment_spec.rb +0 -56
- data/spec/hamlit/engine/doctype_spec.rb +0 -19
- data/spec/hamlit/engine/error_spec.rb +0 -135
- data/spec/hamlit/engine/indent_spec.rb +0 -42
- data/spec/hamlit/engine/multiline_spec.rb +0 -44
- data/spec/hamlit/engine/new_attribute_spec.rb +0 -110
- data/spec/hamlit/engine/old_attributes_spec.rb +0 -404
- data/spec/hamlit/engine/script_spec.rb +0 -116
- data/spec/hamlit/engine/silent_script_spec.rb +0 -213
- data/spec/hamlit/engine/tag_spec.rb +0 -295
- data/spec/hamlit/engine/text_spec.rb +0 -239
- data/spec/hamlit/engine_spec.rb +0 -58
- data/spec/hamlit/filters/coffee_spec.rb +0 -60
- data/spec/hamlit/filters/css_spec.rb +0 -33
- data/spec/hamlit/filters/erb_spec.rb +0 -16
- data/spec/hamlit/filters/javascript_spec.rb +0 -82
- data/spec/hamlit/filters/less_spec.rb +0 -37
- data/spec/hamlit/filters/markdown_spec.rb +0 -30
- data/spec/hamlit/filters/plain_spec.rb +0 -15
- data/spec/hamlit/filters/ruby_spec.rb +0 -24
- data/spec/hamlit/filters/sass_spec.rb +0 -33
- data/spec/hamlit/filters/scss_spec.rb +0 -37
- data/spec/hamlit/haml_spec.rb +0 -910
- data/spec/rails/.gitignore +0 -18
- data/spec/rails/.rspec +0 -2
- data/spec/rails/Gemfile +0 -19
- data/spec/rails/README.rdoc +0 -28
- data/spec/rails/Rakefile +0 -6
- data/spec/rails/app/assets/images/.keep +0 -0
- data/spec/rails/app/assets/javascripts/application.js +0 -15
- data/spec/rails/app/assets/stylesheets/application.css +0 -15
- data/spec/rails/app/controllers/application_controller.rb +0 -8
- data/spec/rails/app/controllers/concerns/.keep +0 -0
- data/spec/rails/app/controllers/users_controller.rb +0 -23
- data/spec/rails/app/helpers/application_helper.rb +0 -2
- data/spec/rails/app/mailers/.keep +0 -0
- data/spec/rails/app/models/.keep +0 -0
- data/spec/rails/app/models/concerns/.keep +0 -0
- data/spec/rails/app/views/application/index.html.haml +0 -18
- data/spec/rails/app/views/layouts/application.html.haml +0 -12
- data/spec/rails/app/views/users/capture.html.haml +0 -5
- data/spec/rails/app/views/users/capture_haml.html.haml +0 -5
- data/spec/rails/app/views/users/form.html.haml +0 -2
- data/spec/rails/app/views/users/helpers.html.haml +0 -10
- data/spec/rails/app/views/users/index.html.haml +0 -9
- data/spec/rails/app/views/users/inline.html.haml +0 -6
- data/spec/rails/app/views/users/old_attributes.html.haml +0 -5
- data/spec/rails/app/views/users/safe_buffer.html.haml +0 -4
- data/spec/rails/app/views/users/whitespace.html.haml +0 -4
- data/spec/rails/bin/bundle +0 -3
- data/spec/rails/bin/rails +0 -8
- data/spec/rails/bin/rake +0 -8
- data/spec/rails/bin/setup +0 -29
- data/spec/rails/bin/spring +0 -15
- data/spec/rails/config.ru +0 -4
- data/spec/rails/config/application.rb +0 -34
- data/spec/rails/config/boot.rb +0 -3
- data/spec/rails/config/database.yml +0 -25
- data/spec/rails/config/environment.rb +0 -5
- data/spec/rails/config/environments/development.rb +0 -41
- data/spec/rails/config/environments/production.rb +0 -79
- data/spec/rails/config/environments/test.rb +0 -42
- data/spec/rails/config/initializers/assets.rb +0 -11
- data/spec/rails/config/initializers/backtrace_silencers.rb +0 -7
- data/spec/rails/config/initializers/cookies_serializer.rb +0 -3
- data/spec/rails/config/initializers/filter_parameter_logging.rb +0 -4
- data/spec/rails/config/initializers/inflections.rb +0 -16
- data/spec/rails/config/initializers/mime_types.rb +0 -4
- data/spec/rails/config/initializers/session_store.rb +0 -3
- data/spec/rails/config/initializers/wrap_parameters.rb +0 -14
- data/spec/rails/config/locales/en.yml +0 -24
- data/spec/rails/config/routes.rb +0 -16
- data/spec/rails/config/secrets.yml +0 -22
- data/spec/rails/db/schema.rb +0 -16
- data/spec/rails/db/seeds.rb +0 -7
- data/spec/rails/lib/assets/.keep +0 -0
- data/spec/rails/lib/tasks/.keep +0 -0
- data/spec/rails/log/.keep +0 -0
- data/spec/rails/public/404.html +0 -67
- data/spec/rails/public/422.html +0 -67
- data/spec/rails/public/500.html +0 -66
- data/spec/rails/public/favicon.ico +0 -0
- data/spec/rails/public/robots.txt +0 -5
- data/spec/rails/spec/hamlit_spec.rb +0 -123
- data/spec/rails/spec/rails_helper.rb +0 -56
- data/spec/rails/spec/spec_helper.rb +0 -91
- data/spec/rails/vendor/assets/javascripts/.keep +0 -0
- data/spec/rails/vendor/assets/stylesheets/.keep +0 -0
- data/spec/spec_helper.rb +0 -36
- data/spec/spec_helper/document_generator.rb +0 -93
- data/spec/spec_helper/render_helper.rb +0 -120
- data/spec/spec_helper/test_case.rb +0 -55
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#ifndef __HOUDINI_H__
|
|
2
|
+
#define __HOUDINI_H__
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
#include <stdint.h>
|
|
9
|
+
#include "buffer.h"
|
|
10
|
+
|
|
11
|
+
#define likely(x) __builtin_expect((x),1)
|
|
12
|
+
#define unlikely(x) __builtin_expect((x),0)
|
|
13
|
+
|
|
14
|
+
#ifdef HOUDINI_USE_LOCALE
|
|
15
|
+
# define _isxdigit(c) isxdigit(c)
|
|
16
|
+
# define _isdigit(c) isdigit(c)
|
|
17
|
+
#else
|
|
18
|
+
/*
|
|
19
|
+
* Helper _isdigit methods -- do not trust the current locale
|
|
20
|
+
* */
|
|
21
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
|
22
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
|
|
26
|
+
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
|
27
|
+
|
|
28
|
+
extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
|
29
|
+
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
|
|
30
|
+
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
|
31
|
+
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
|
|
32
|
+
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
|
33
|
+
extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
|
34
|
+
extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
|
35
|
+
extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
|
|
36
|
+
extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
|
37
|
+
extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
|
38
|
+
extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
|
39
|
+
extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
|
40
|
+
extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
|
41
|
+
|
|
42
|
+
#ifdef __cplusplus
|
|
43
|
+
}
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
#endif
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#include <assert.h>
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
|
|
5
|
+
#include "houdini.h"
|
|
6
|
+
|
|
7
|
+
/*
|
|
8
|
+
* The following characters will not be escaped:
|
|
9
|
+
*
|
|
10
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
|
11
|
+
*
|
|
12
|
+
* Note that this character set is the addition of:
|
|
13
|
+
*
|
|
14
|
+
* - The characters which are safe to be in an URL
|
|
15
|
+
* - The characters which are *not* safe to be in
|
|
16
|
+
* an URL because they are RESERVED characters.
|
|
17
|
+
*
|
|
18
|
+
* We asume (lazily) that any RESERVED char that
|
|
19
|
+
* appears inside an URL is actually meant to
|
|
20
|
+
* have its native function (i.e. as an URL
|
|
21
|
+
* component/separator) and hence needs no escaping.
|
|
22
|
+
*
|
|
23
|
+
* There are two exceptions: the chacters & (amp)
|
|
24
|
+
* and ' (single quote) do not appear in the table.
|
|
25
|
+
* They are meant to appear in the URL as components,
|
|
26
|
+
* yet they require special HTML-entity escaping
|
|
27
|
+
* to generate valid HTML markup.
|
|
28
|
+
*
|
|
29
|
+
* All other characters will be escaped to %XX.
|
|
30
|
+
*
|
|
31
|
+
*/
|
|
32
|
+
static const char HREF_SAFE[] = {
|
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
35
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
36
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
|
37
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
|
39
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
int
|
|
52
|
+
houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
|
|
53
|
+
{
|
|
54
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
|
55
|
+
size_t i = 0, org;
|
|
56
|
+
uint8_t hex_str[3];
|
|
57
|
+
|
|
58
|
+
hex_str[0] = '%';
|
|
59
|
+
|
|
60
|
+
while (i < size) {
|
|
61
|
+
org = i;
|
|
62
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
|
63
|
+
i++;
|
|
64
|
+
|
|
65
|
+
if (likely(i > org)) {
|
|
66
|
+
if (unlikely(org == 0)) {
|
|
67
|
+
if (i >= size)
|
|
68
|
+
return 0;
|
|
69
|
+
|
|
70
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
gh_buf_put(ob, src + org, i - org);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/* escaping */
|
|
77
|
+
if (i >= size)
|
|
78
|
+
break;
|
|
79
|
+
|
|
80
|
+
switch (src[i]) {
|
|
81
|
+
/* amp appears all the time in URLs, but needs
|
|
82
|
+
* HTML-entity escaping to be inside an href */
|
|
83
|
+
case '&':
|
|
84
|
+
gh_buf_PUTS(ob, "&");
|
|
85
|
+
break;
|
|
86
|
+
|
|
87
|
+
/* the single quote is a valid URL character
|
|
88
|
+
* according to the standard; it needs HTML
|
|
89
|
+
* entity escaping too */
|
|
90
|
+
case '\'':
|
|
91
|
+
gh_buf_PUTS(ob, "'");
|
|
92
|
+
break;
|
|
93
|
+
|
|
94
|
+
/* the space can be escaped to %20 or a plus
|
|
95
|
+
* sign. we're going with the generic escape
|
|
96
|
+
* for now. the plus thing is more commonly seen
|
|
97
|
+
* when building GET strings */
|
|
98
|
+
#if 0
|
|
99
|
+
case ' ':
|
|
100
|
+
gh_buf_putc(ob, '+');
|
|
101
|
+
break;
|
|
102
|
+
#endif
|
|
103
|
+
|
|
104
|
+
/* every other character goes with a %XX escaping */
|
|
105
|
+
default:
|
|
106
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
|
107
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
|
108
|
+
gh_buf_put(ob, hex_str, 3);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
i++;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return 1;
|
|
115
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#include <assert.h>
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
|
|
5
|
+
#include "houdini.h"
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* According to the OWASP rules:
|
|
9
|
+
*
|
|
10
|
+
* & --> &
|
|
11
|
+
* < --> <
|
|
12
|
+
* > --> >
|
|
13
|
+
* " --> "
|
|
14
|
+
* ' --> ' ' is not recommended
|
|
15
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
|
16
|
+
*
|
|
17
|
+
*/
|
|
18
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
21
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
static const char *HTML_ESCAPES[] = {
|
|
38
|
+
"",
|
|
39
|
+
""",
|
|
40
|
+
"&",
|
|
41
|
+
"'",
|
|
42
|
+
"/",
|
|
43
|
+
"<",
|
|
44
|
+
">"
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
int
|
|
48
|
+
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
|
|
49
|
+
{
|
|
50
|
+
size_t i = 0, org, esc = 0;
|
|
51
|
+
|
|
52
|
+
while (i < size) {
|
|
53
|
+
org = i;
|
|
54
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
|
55
|
+
i++;
|
|
56
|
+
|
|
57
|
+
if (i > org) {
|
|
58
|
+
if (unlikely(org == 0)) {
|
|
59
|
+
if (i >= size)
|
|
60
|
+
return 0;
|
|
61
|
+
|
|
62
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
gh_buf_put(ob, src + org, i - org);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/* escaping */
|
|
69
|
+
if (unlikely(i >= size))
|
|
70
|
+
break;
|
|
71
|
+
|
|
72
|
+
/* The forward slash is only escaped in secure mode */
|
|
73
|
+
if (src[i] == '/' && !secure) {
|
|
74
|
+
gh_buf_putc(ob, '/');
|
|
75
|
+
} else {
|
|
76
|
+
gh_buf_puts(ob, HTML_ESCAPES[esc]);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return 1;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
int
|
|
86
|
+
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
|
87
|
+
{
|
|
88
|
+
return houdini_escape_html0(ob, src, size, 1);
|
|
89
|
+
}
|
|
90
|
+
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#include <assert.h>
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
|
|
5
|
+
#include "houdini.h"
|
|
6
|
+
#include "html_unescape.h"
|
|
7
|
+
|
|
8
|
+
static inline void
|
|
9
|
+
gh_buf_put_utf8(gh_buf *ob, int c)
|
|
10
|
+
{
|
|
11
|
+
unsigned char unichar[4];
|
|
12
|
+
|
|
13
|
+
if (c < 0x80) {
|
|
14
|
+
gh_buf_putc(ob, c);
|
|
15
|
+
}
|
|
16
|
+
else if (c < 0x800) {
|
|
17
|
+
unichar[0] = 192 + (c / 64);
|
|
18
|
+
unichar[1] = 128 + (c % 64);
|
|
19
|
+
gh_buf_put(ob, unichar, 2);
|
|
20
|
+
}
|
|
21
|
+
else if (c - 0xd800u < 0x800) {
|
|
22
|
+
gh_buf_putc(ob, '?');
|
|
23
|
+
}
|
|
24
|
+
else if (c < 0x10000) {
|
|
25
|
+
unichar[0] = 224 + (c / 4096);
|
|
26
|
+
unichar[1] = 128 + (c / 64) % 64;
|
|
27
|
+
unichar[2] = 128 + (c % 64);
|
|
28
|
+
gh_buf_put(ob, unichar, 3);
|
|
29
|
+
}
|
|
30
|
+
else if (c < 0x110000) {
|
|
31
|
+
unichar[0] = 240 + (c / 262144);
|
|
32
|
+
unichar[1] = 128 + (c / 4096) % 64;
|
|
33
|
+
unichar[2] = 128 + (c / 64) % 64;
|
|
34
|
+
unichar[3] = 128 + (c % 64);
|
|
35
|
+
gh_buf_put(ob, unichar, 4);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
gh_buf_putc(ob, '?');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
static size_t
|
|
43
|
+
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
|
44
|
+
{
|
|
45
|
+
size_t i = 0;
|
|
46
|
+
|
|
47
|
+
if (size > 3 && src[0] == '#') {
|
|
48
|
+
int codepoint = 0;
|
|
49
|
+
|
|
50
|
+
if (_isdigit(src[1])) {
|
|
51
|
+
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
|
52
|
+
codepoint = (codepoint * 10) + (src[i] - '0');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
else if (src[1] == 'x' || src[1] == 'X') {
|
|
56
|
+
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
|
57
|
+
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (i < size && src[i] == ';' && codepoint) {
|
|
61
|
+
gh_buf_put_utf8(ob, codepoint);
|
|
62
|
+
return i + 1;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
else {
|
|
67
|
+
if (size > MAX_WORD_LENGTH)
|
|
68
|
+
size = MAX_WORD_LENGTH;
|
|
69
|
+
|
|
70
|
+
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
|
71
|
+
if (src[i] == ' ')
|
|
72
|
+
break;
|
|
73
|
+
|
|
74
|
+
if (src[i] == ';') {
|
|
75
|
+
const struct html_ent *entity = find_entity((char *)src, i);
|
|
76
|
+
|
|
77
|
+
if (entity != NULL) {
|
|
78
|
+
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
|
79
|
+
return i + 1;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
gh_buf_putc(ob, '&');
|
|
88
|
+
return 0;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
int
|
|
92
|
+
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
|
93
|
+
{
|
|
94
|
+
size_t i = 0, org;
|
|
95
|
+
|
|
96
|
+
while (i < size) {
|
|
97
|
+
org = i;
|
|
98
|
+
while (i < size && src[i] != '&')
|
|
99
|
+
i++;
|
|
100
|
+
|
|
101
|
+
if (likely(i > org)) {
|
|
102
|
+
if (unlikely(org == 0)) {
|
|
103
|
+
if (i >= size)
|
|
104
|
+
return 0;
|
|
105
|
+
|
|
106
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
gh_buf_put(ob, src + org, i - org);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/* escaping */
|
|
113
|
+
if (i >= size)
|
|
114
|
+
break;
|
|
115
|
+
|
|
116
|
+
i++;
|
|
117
|
+
i += unescape_ent(ob, src + i, size - i);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return 1;
|
|
121
|
+
}
|
|
122
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#include <assert.h>
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
|
|
5
|
+
#include "houdini.h"
|
|
6
|
+
|
|
7
|
+
static const char JS_ESCAPE[] = {
|
|
8
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
|
|
9
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
10
|
+
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
|
11
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
12
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
13
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
|
14
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
17
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
int
|
|
27
|
+
houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
|
|
28
|
+
{
|
|
29
|
+
size_t i = 0, org, ch;
|
|
30
|
+
|
|
31
|
+
while (i < size) {
|
|
32
|
+
org = i;
|
|
33
|
+
while (i < size && JS_ESCAPE[src[i]] == 0)
|
|
34
|
+
i++;
|
|
35
|
+
|
|
36
|
+
if (likely(i > org)) {
|
|
37
|
+
if (unlikely(org == 0)) {
|
|
38
|
+
if (i >= size)
|
|
39
|
+
return 0;
|
|
40
|
+
|
|
41
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
gh_buf_put(ob, src + org, i - org);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/* escaping */
|
|
48
|
+
if (i >= size)
|
|
49
|
+
break;
|
|
50
|
+
|
|
51
|
+
ch = src[i];
|
|
52
|
+
|
|
53
|
+
switch (ch) {
|
|
54
|
+
case '/':
|
|
55
|
+
/*
|
|
56
|
+
* Escape only if preceded by a lt
|
|
57
|
+
*/
|
|
58
|
+
if (i && src[i - 1] == '<')
|
|
59
|
+
gh_buf_putc(ob, '\\');
|
|
60
|
+
|
|
61
|
+
gh_buf_putc(ob, ch);
|
|
62
|
+
break;
|
|
63
|
+
|
|
64
|
+
case '\r':
|
|
65
|
+
/*
|
|
66
|
+
* Escape as \n, and skip the next \n if it's there
|
|
67
|
+
*/
|
|
68
|
+
if (i + 1 < size && src[i + 1] == '\n') i++;
|
|
69
|
+
|
|
70
|
+
case '\n':
|
|
71
|
+
/*
|
|
72
|
+
* Escape actually as '\','n', not as '\', '\n'
|
|
73
|
+
*/
|
|
74
|
+
ch = 'n';
|
|
75
|
+
|
|
76
|
+
default:
|
|
77
|
+
/*
|
|
78
|
+
* Normal escaping
|
|
79
|
+
*/
|
|
80
|
+
gh_buf_putc(ob, '\\');
|
|
81
|
+
gh_buf_putc(ob, ch);
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
i++;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return 1;
|
|
89
|
+
}
|
|
90
|
+
|