hamlit 1.7.2 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (283) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -3
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +25 -37
  5. data/CHANGELOG.md +18 -0
  6. data/Gemfile +16 -0
  7. data/LICENSE.txt +23 -2
  8. data/README.md +106 -48
  9. data/REFERENCE.md +222 -0
  10. data/Rakefile +77 -19
  11. data/benchmark/boolean_attribute.haml +6 -0
  12. data/benchmark/class_attribute.haml +5 -0
  13. data/benchmark/common_attribute.haml +3 -0
  14. data/benchmark/data_attribute.haml +4 -0
  15. data/benchmark/dynamic_attributes/boolean_attribute.haml +4 -0
  16. data/benchmark/dynamic_attributes/class_attribute.haml +4 -0
  17. data/benchmark/dynamic_attributes/common_attribute.haml +2 -0
  18. data/benchmark/dynamic_attributes/data_attribute.haml +2 -0
  19. data/benchmark/dynamic_attributes/id_attribute.haml +2 -0
  20. data/benchmark/etc/attribute_builder.haml +5 -0
  21. data/benchmark/etc/real_sample.haml +888 -0
  22. data/benchmark/etc/real_sample.rb +11 -0
  23. data/benchmark/etc/static_analyzer.haml +1 -0
  24. data/benchmark/etc/tags.haml +3 -0
  25. data/benchmark/ext/build_data.rb +15 -0
  26. data/benchmark/ext/build_id.rb +13 -0
  27. data/benchmark/id_attribute.haml +3 -0
  28. data/benchmark/plain.haml +4 -0
  29. data/benchmark/script.haml +4 -0
  30. data/benchmark/slim/LICENSE +21 -0
  31. data/{benchmarks → benchmark/slim}/context.rb +2 -4
  32. data/benchmark/slim/run-benchmarks.rb +94 -0
  33. data/{benchmarks → benchmark/slim}/view.erb +3 -3
  34. data/{benchmarks → benchmark/slim}/view.haml +0 -0
  35. data/{benchmarks/view.escaped.slim → benchmark/slim/view.slim} +1 -1
  36. data/benchmark/string_interpolation.haml +2 -0
  37. data/benchmark/utils/benchmark_ips_extension.rb +43 -0
  38. data/bin/bench +85 -0
  39. data/bin/clone +14 -0
  40. data/bin/console +11 -0
  41. data/bin/lineprof +48 -0
  42. data/bin/ruby +3 -0
  43. data/bin/setup +7 -0
  44. data/bin/stackprof +27 -0
  45. data/{test → bin/test} +6 -10
  46. data/{bin → exe}/hamlit +0 -0
  47. data/ext/hamlit/extconf.rb +14 -0
  48. data/ext/hamlit/hamlit.c +512 -0
  49. data/ext/hamlit/houdini/.gitignore +3 -0
  50. data/ext/hamlit/houdini/COPYING +7 -0
  51. data/ext/hamlit/houdini/Makefile +79 -0
  52. data/ext/hamlit/houdini/README.md +59 -0
  53. data/ext/hamlit/houdini/buffer.c +249 -0
  54. data/ext/hamlit/houdini/buffer.h +113 -0
  55. data/ext/hamlit/houdini/houdini.h +46 -0
  56. data/ext/hamlit/houdini/houdini_href_e.c +115 -0
  57. data/ext/hamlit/houdini/houdini_html_e.c +90 -0
  58. data/ext/hamlit/houdini/houdini_html_u.c +122 -0
  59. data/ext/hamlit/houdini/houdini_js_e.c +90 -0
  60. data/ext/hamlit/houdini/houdini_js_u.c +60 -0
  61. data/ext/hamlit/houdini/houdini_uri_e.c +107 -0
  62. data/ext/hamlit/houdini/houdini_uri_u.c +68 -0
  63. data/ext/hamlit/houdini/houdini_xml_e.c +136 -0
  64. data/ext/hamlit/houdini/html_unescape.gperf +258 -0
  65. data/ext/hamlit/houdini/html_unescape.h +754 -0
  66. data/ext/hamlit/houdini/tools/build_table.py +13 -0
  67. data/ext/hamlit/houdini/tools/build_tables.c +51 -0
  68. data/ext/hamlit/houdini/tools/wikipedia_table.txt +2025 -0
  69. data/hamlit.gemspec +30 -31
  70. data/lib/hamlit.rb +3 -1
  71. data/lib/hamlit/attribute_builder.rb +12 -0
  72. data/lib/hamlit/cli.rb +44 -43
  73. data/lib/hamlit/compiler.rb +92 -16
  74. data/lib/hamlit/compiler/attribute_compiler.rb +148 -0
  75. data/lib/hamlit/compiler/children_compiler.rb +111 -0
  76. data/lib/hamlit/compiler/comment_compiler.rb +36 -0
  77. data/lib/hamlit/compiler/doctype_compiler.rb +45 -0
  78. data/lib/hamlit/compiler/script_compiler.rb +97 -0
  79. data/lib/hamlit/compiler/silent_script_compiler.rb +24 -0
  80. data/lib/hamlit/compiler/tag_compiler.rb +69 -0
  81. data/lib/hamlit/engine.rb +12 -7
  82. data/lib/hamlit/error.rb +14 -0
  83. data/lib/hamlit/escapable.rb +12 -0
  84. data/lib/hamlit/filters.rb +65 -0
  85. data/lib/hamlit/filters/base.rb +4 -62
  86. data/lib/hamlit/filters/coffee.rb +9 -7
  87. data/lib/hamlit/filters/css.rb +25 -8
  88. data/lib/hamlit/filters/erb.rb +4 -6
  89. data/lib/hamlit/filters/escaped.rb +11 -9
  90. data/lib/hamlit/filters/javascript.rb +25 -8
  91. data/lib/hamlit/filters/less.rb +9 -7
  92. data/lib/hamlit/filters/markdown.rb +5 -6
  93. data/lib/hamlit/filters/plain.rb +11 -15
  94. data/lib/hamlit/filters/preserve.rb +15 -5
  95. data/lib/hamlit/filters/ruby.rb +3 -5
  96. data/lib/hamlit/filters/sass.rb +9 -7
  97. data/lib/hamlit/filters/scss.rb +9 -7
  98. data/lib/hamlit/filters/text_base.rb +24 -0
  99. data/lib/hamlit/filters/tilt_base.rb +47 -0
  100. data/lib/hamlit/hash_parser.rb +107 -0
  101. data/lib/hamlit/html.rb +9 -6
  102. data/lib/hamlit/identity.rb +12 -0
  103. data/lib/hamlit/object_ref.rb +29 -0
  104. data/lib/hamlit/parser.rb +25 -142
  105. data/lib/hamlit/parser/MIT-LICENSE +20 -0
  106. data/lib/hamlit/parser/README.md +28 -0
  107. data/lib/hamlit/parser/haml_buffer.rb +348 -0
  108. data/lib/hamlit/parser/haml_compiler.rb +553 -0
  109. data/lib/hamlit/parser/haml_error.rb +61 -0
  110. data/lib/hamlit/parser/haml_helpers.rb +727 -0
  111. data/lib/hamlit/parser/haml_options.rb +286 -0
  112. data/lib/hamlit/parser/haml_parser.rb +801 -0
  113. data/lib/hamlit/parser/haml_util.rb +283 -0
  114. data/lib/hamlit/parser/haml_xss_mods.rb +109 -0
  115. data/lib/hamlit/{helpers.rb → rails_helpers.rb} +2 -7
  116. data/lib/hamlit/rails_template.rb +30 -0
  117. data/lib/hamlit/railtie.rb +1 -12
  118. data/lib/hamlit/ruby_expression.rb +31 -0
  119. data/lib/hamlit/static_analyzer.rb +49 -0
  120. data/lib/hamlit/string_interpolation.rb +69 -0
  121. data/lib/hamlit/template.rb +8 -0
  122. data/lib/hamlit/utils.rb +9 -0
  123. data/lib/hamlit/version.rb +1 -1
  124. metadata +116 -324
  125. data/.rspec +0 -2
  126. data/benchmarks/benchmark.rb +0 -110
  127. data/benchmarks/view.slim +0 -17
  128. data/doc/README.md +0 -19
  129. data/doc/engine/indent.md +0 -48
  130. data/doc/engine/new_attribute.md +0 -77
  131. data/doc/engine/old_attributes.md +0 -198
  132. data/doc/engine/silent_script.md +0 -97
  133. data/doc/engine/tag.md +0 -48
  134. data/doc/engine/text.md +0 -64
  135. data/doc/faml/README.md +0 -16
  136. data/doc/faml/engine/indent.md +0 -48
  137. data/doc/faml/engine/old_attributes.md +0 -111
  138. data/doc/faml/engine/silent_script.md +0 -97
  139. data/doc/faml/engine/text.md +0 -59
  140. data/doc/faml/filters/erb.md +0 -24
  141. data/doc/faml/filters/javascript.md +0 -27
  142. data/doc/faml/filters/less.md +0 -57
  143. data/doc/faml/filters/plain.md +0 -25
  144. data/doc/filters/erb.md +0 -31
  145. data/doc/filters/javascript.md +0 -83
  146. data/doc/filters/less.md +0 -57
  147. data/doc/filters/markdown.md +0 -31
  148. data/doc/filters/plain.md +0 -25
  149. data/doc/haml/README.md +0 -15
  150. data/doc/haml/engine/new_attribute.md +0 -77
  151. data/doc/haml/engine/old_attributes.md +0 -142
  152. data/doc/haml/engine/tag.md +0 -48
  153. data/doc/haml/engine/text.md +0 -29
  154. data/doc/haml/filters/erb.md +0 -26
  155. data/doc/haml/filters/javascript.md +0 -76
  156. data/doc/haml/filters/markdown.md +0 -31
  157. data/lib/hamlit/attribute.rb +0 -78
  158. data/lib/hamlit/compilers/attributes.rb +0 -108
  159. data/lib/hamlit/compilers/comment.rb +0 -13
  160. data/lib/hamlit/compilers/doctype.rb +0 -39
  161. data/lib/hamlit/compilers/filter.rb +0 -53
  162. data/lib/hamlit/compilers/new_attribute.rb +0 -115
  163. data/lib/hamlit/compilers/old_attribute.rb +0 -241
  164. data/lib/hamlit/compilers/runtime_attribute.rb +0 -58
  165. data/lib/hamlit/compilers/script.rb +0 -31
  166. data/lib/hamlit/compilers/strip.rb +0 -19
  167. data/lib/hamlit/compilers/text.rb +0 -111
  168. data/lib/hamlit/concerns/attribute_builder.rb +0 -22
  169. data/lib/hamlit/concerns/balanceable.rb +0 -68
  170. data/lib/hamlit/concerns/deprecation.rb +0 -20
  171. data/lib/hamlit/concerns/error.rb +0 -31
  172. data/lib/hamlit/concerns/escapable.rb +0 -17
  173. data/lib/hamlit/concerns/included.rb +0 -28
  174. data/lib/hamlit/concerns/indentable.rb +0 -117
  175. data/lib/hamlit/concerns/lexable.rb +0 -32
  176. data/lib/hamlit/concerns/line_reader.rb +0 -62
  177. data/lib/hamlit/concerns/registerable.rb +0 -24
  178. data/lib/hamlit/concerns/string_interpolation.rb +0 -48
  179. data/lib/hamlit/concerns/whitespace.rb +0 -91
  180. data/lib/hamlit/filters/tilt.rb +0 -41
  181. data/lib/hamlit/parsers/attribute.rb +0 -71
  182. data/lib/hamlit/parsers/comment.rb +0 -30
  183. data/lib/hamlit/parsers/doctype.rb +0 -18
  184. data/lib/hamlit/parsers/filter.rb +0 -18
  185. data/lib/hamlit/parsers/multiline.rb +0 -58
  186. data/lib/hamlit/parsers/script.rb +0 -126
  187. data/lib/hamlit/parsers/tag.rb +0 -83
  188. data/lib/hamlit/parsers/text.rb +0 -28
  189. data/lib/hamlit/temple.rb +0 -9
  190. data/release +0 -6
  191. data/spec/Rakefile +0 -72
  192. data/spec/hamlit/engine/comment_spec.rb +0 -56
  193. data/spec/hamlit/engine/doctype_spec.rb +0 -19
  194. data/spec/hamlit/engine/error_spec.rb +0 -135
  195. data/spec/hamlit/engine/indent_spec.rb +0 -42
  196. data/spec/hamlit/engine/multiline_spec.rb +0 -44
  197. data/spec/hamlit/engine/new_attribute_spec.rb +0 -110
  198. data/spec/hamlit/engine/old_attributes_spec.rb +0 -404
  199. data/spec/hamlit/engine/script_spec.rb +0 -116
  200. data/spec/hamlit/engine/silent_script_spec.rb +0 -213
  201. data/spec/hamlit/engine/tag_spec.rb +0 -295
  202. data/spec/hamlit/engine/text_spec.rb +0 -239
  203. data/spec/hamlit/engine_spec.rb +0 -58
  204. data/spec/hamlit/filters/coffee_spec.rb +0 -60
  205. data/spec/hamlit/filters/css_spec.rb +0 -33
  206. data/spec/hamlit/filters/erb_spec.rb +0 -16
  207. data/spec/hamlit/filters/javascript_spec.rb +0 -82
  208. data/spec/hamlit/filters/less_spec.rb +0 -37
  209. data/spec/hamlit/filters/markdown_spec.rb +0 -30
  210. data/spec/hamlit/filters/plain_spec.rb +0 -15
  211. data/spec/hamlit/filters/ruby_spec.rb +0 -24
  212. data/spec/hamlit/filters/sass_spec.rb +0 -33
  213. data/spec/hamlit/filters/scss_spec.rb +0 -37
  214. data/spec/hamlit/haml_spec.rb +0 -910
  215. data/spec/rails/.gitignore +0 -18
  216. data/spec/rails/.rspec +0 -2
  217. data/spec/rails/Gemfile +0 -19
  218. data/spec/rails/README.rdoc +0 -28
  219. data/spec/rails/Rakefile +0 -6
  220. data/spec/rails/app/assets/images/.keep +0 -0
  221. data/spec/rails/app/assets/javascripts/application.js +0 -15
  222. data/spec/rails/app/assets/stylesheets/application.css +0 -15
  223. data/spec/rails/app/controllers/application_controller.rb +0 -8
  224. data/spec/rails/app/controllers/concerns/.keep +0 -0
  225. data/spec/rails/app/controllers/users_controller.rb +0 -23
  226. data/spec/rails/app/helpers/application_helper.rb +0 -2
  227. data/spec/rails/app/mailers/.keep +0 -0
  228. data/spec/rails/app/models/.keep +0 -0
  229. data/spec/rails/app/models/concerns/.keep +0 -0
  230. data/spec/rails/app/views/application/index.html.haml +0 -18
  231. data/spec/rails/app/views/layouts/application.html.haml +0 -12
  232. data/spec/rails/app/views/users/capture.html.haml +0 -5
  233. data/spec/rails/app/views/users/capture_haml.html.haml +0 -5
  234. data/spec/rails/app/views/users/form.html.haml +0 -2
  235. data/spec/rails/app/views/users/helpers.html.haml +0 -10
  236. data/spec/rails/app/views/users/index.html.haml +0 -9
  237. data/spec/rails/app/views/users/inline.html.haml +0 -6
  238. data/spec/rails/app/views/users/old_attributes.html.haml +0 -5
  239. data/spec/rails/app/views/users/safe_buffer.html.haml +0 -4
  240. data/spec/rails/app/views/users/whitespace.html.haml +0 -4
  241. data/spec/rails/bin/bundle +0 -3
  242. data/spec/rails/bin/rails +0 -8
  243. data/spec/rails/bin/rake +0 -8
  244. data/spec/rails/bin/setup +0 -29
  245. data/spec/rails/bin/spring +0 -15
  246. data/spec/rails/config.ru +0 -4
  247. data/spec/rails/config/application.rb +0 -34
  248. data/spec/rails/config/boot.rb +0 -3
  249. data/spec/rails/config/database.yml +0 -25
  250. data/spec/rails/config/environment.rb +0 -5
  251. data/spec/rails/config/environments/development.rb +0 -41
  252. data/spec/rails/config/environments/production.rb +0 -79
  253. data/spec/rails/config/environments/test.rb +0 -42
  254. data/spec/rails/config/initializers/assets.rb +0 -11
  255. data/spec/rails/config/initializers/backtrace_silencers.rb +0 -7
  256. data/spec/rails/config/initializers/cookies_serializer.rb +0 -3
  257. data/spec/rails/config/initializers/filter_parameter_logging.rb +0 -4
  258. data/spec/rails/config/initializers/inflections.rb +0 -16
  259. data/spec/rails/config/initializers/mime_types.rb +0 -4
  260. data/spec/rails/config/initializers/session_store.rb +0 -3
  261. data/spec/rails/config/initializers/wrap_parameters.rb +0 -14
  262. data/spec/rails/config/locales/en.yml +0 -24
  263. data/spec/rails/config/routes.rb +0 -16
  264. data/spec/rails/config/secrets.yml +0 -22
  265. data/spec/rails/db/schema.rb +0 -16
  266. data/spec/rails/db/seeds.rb +0 -7
  267. data/spec/rails/lib/assets/.keep +0 -0
  268. data/spec/rails/lib/tasks/.keep +0 -0
  269. data/spec/rails/log/.keep +0 -0
  270. data/spec/rails/public/404.html +0 -67
  271. data/spec/rails/public/422.html +0 -67
  272. data/spec/rails/public/500.html +0 -66
  273. data/spec/rails/public/favicon.ico +0 -0
  274. data/spec/rails/public/robots.txt +0 -5
  275. data/spec/rails/spec/hamlit_spec.rb +0 -123
  276. data/spec/rails/spec/rails_helper.rb +0 -56
  277. data/spec/rails/spec/spec_helper.rb +0 -91
  278. data/spec/rails/vendor/assets/javascripts/.keep +0 -0
  279. data/spec/rails/vendor/assets/stylesheets/.keep +0 -0
  280. data/spec/spec_helper.rb +0 -36
  281. data/spec/spec_helper/document_generator.rb +0 -93
  282. data/spec/spec_helper/render_helper.rb +0 -120
  283. data/spec/spec_helper/test_case.rb +0 -55
@@ -0,0 +1,46 @@
1
+ #ifndef __HOUDINI_H__
2
+ #define __HOUDINI_H__
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <stdint.h>
9
+ #include "buffer.h"
10
+
11
+ #define likely(x) __builtin_expect((x),1)
12
+ #define unlikely(x) __builtin_expect((x),0)
13
+
14
+ #ifdef HOUDINI_USE_LOCALE
15
+ # define _isxdigit(c) isxdigit(c)
16
+ # define _isdigit(c) isdigit(c)
17
+ #else
18
+ /*
19
+ * Helper _isdigit methods -- do not trust the current locale
20
+ * */
21
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
22
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
+ #endif
24
+
25
+ #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
+ #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
+
28
+ extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
+ extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
+ extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
31
+ extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
+ extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
+ extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
+ extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
+ extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
+ extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
+ extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
+ extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
+ extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
+ extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
+
42
+ #ifdef __cplusplus
43
+ }
44
+ #endif
45
+
46
+ #endif
@@ -0,0 +1,115 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /*
8
+ * The following characters will not be escaped:
9
+ *
10
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
11
+ *
12
+ * Note that this character set is the addition of:
13
+ *
14
+ * - The characters which are safe to be in an URL
15
+ * - The characters which are *not* safe to be in
16
+ * an URL because they are RESERVED characters.
17
+ *
18
+ * We asume (lazily) that any RESERVED char that
19
+ * appears inside an URL is actually meant to
20
+ * have its native function (i.e. as an URL
21
+ * component/separator) and hence needs no escaping.
22
+ *
23
+ * There are two exceptions: the chacters & (amp)
24
+ * and ' (single quote) do not appear in the table.
25
+ * They are meant to appear in the URL as components,
26
+ * yet they require special HTML-entity escaping
27
+ * to generate valid HTML markup.
28
+ *
29
+ * All other characters will be escaped to %XX.
30
+ *
31
+ */
32
+ static const char HREF_SAFE[] = {
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
37
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
39
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ };
50
+
51
+ int
52
+ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
53
+ {
54
+ static const uint8_t hex_chars[] = "0123456789ABCDEF";
55
+ size_t i = 0, org;
56
+ uint8_t hex_str[3];
57
+
58
+ hex_str[0] = '%';
59
+
60
+ while (i < size) {
61
+ org = i;
62
+ while (i < size && HREF_SAFE[src[i]] != 0)
63
+ i++;
64
+
65
+ if (likely(i > org)) {
66
+ if (unlikely(org == 0)) {
67
+ if (i >= size)
68
+ return 0;
69
+
70
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
71
+ }
72
+
73
+ gh_buf_put(ob, src + org, i - org);
74
+ }
75
+
76
+ /* escaping */
77
+ if (i >= size)
78
+ break;
79
+
80
+ switch (src[i]) {
81
+ /* amp appears all the time in URLs, but needs
82
+ * HTML-entity escaping to be inside an href */
83
+ case '&':
84
+ gh_buf_PUTS(ob, "&amp;");
85
+ break;
86
+
87
+ /* the single quote is a valid URL character
88
+ * according to the standard; it needs HTML
89
+ * entity escaping too */
90
+ case '\'':
91
+ gh_buf_PUTS(ob, "&#x27;");
92
+ break;
93
+
94
+ /* the space can be escaped to %20 or a plus
95
+ * sign. we're going with the generic escape
96
+ * for now. the plus thing is more commonly seen
97
+ * when building GET strings */
98
+ #if 0
99
+ case ' ':
100
+ gh_buf_putc(ob, '+');
101
+ break;
102
+ #endif
103
+
104
+ /* every other character goes with a %XX escaping */
105
+ default:
106
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
107
+ hex_str[2] = hex_chars[src[i] & 0xF];
108
+ gh_buf_put(ob, hex_str, 3);
109
+ }
110
+
111
+ i++;
112
+ }
113
+
114
+ return 1;
115
+ }
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /**
8
+ * According to the OWASP rules:
9
+ *
10
+ * & --> &amp;
11
+ * < --> &lt;
12
+ * > --> &gt;
13
+ * " --> &quot;
14
+ * ' --> &#x27; &apos; is not recommended
15
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
+ *
17
+ */
18
+ static const char HTML_ESCAPE_TABLE[] = {
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ static const char *HTML_ESCAPES[] = {
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&#47;",
43
+ "&lt;",
44
+ "&gt;"
45
+ };
46
+
47
+ int
48
+ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
49
+ {
50
+ size_t i = 0, org, esc = 0;
51
+
52
+ while (i < size) {
53
+ org = i;
54
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
55
+ i++;
56
+
57
+ if (i > org) {
58
+ if (unlikely(org == 0)) {
59
+ if (i >= size)
60
+ return 0;
61
+
62
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
63
+ }
64
+
65
+ gh_buf_put(ob, src + org, i - org);
66
+ }
67
+
68
+ /* escaping */
69
+ if (unlikely(i >= size))
70
+ break;
71
+
72
+ /* The forward slash is only escaped in secure mode */
73
+ if (src[i] == '/' && !secure) {
74
+ gh_buf_putc(ob, '/');
75
+ } else {
76
+ gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
+ }
78
+
79
+ i++;
80
+ }
81
+
82
+ return 1;
83
+ }
84
+
85
+ int
86
+ houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
+ {
88
+ return houdini_escape_html0(ob, src, size, 1);
89
+ }
90
+
@@ -0,0 +1,122 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "html_unescape.h"
7
+
8
+ static inline void
9
+ gh_buf_put_utf8(gh_buf *ob, int c)
10
+ {
11
+ unsigned char unichar[4];
12
+
13
+ if (c < 0x80) {
14
+ gh_buf_putc(ob, c);
15
+ }
16
+ else if (c < 0x800) {
17
+ unichar[0] = 192 + (c / 64);
18
+ unichar[1] = 128 + (c % 64);
19
+ gh_buf_put(ob, unichar, 2);
20
+ }
21
+ else if (c - 0xd800u < 0x800) {
22
+ gh_buf_putc(ob, '?');
23
+ }
24
+ else if (c < 0x10000) {
25
+ unichar[0] = 224 + (c / 4096);
26
+ unichar[1] = 128 + (c / 64) % 64;
27
+ unichar[2] = 128 + (c % 64);
28
+ gh_buf_put(ob, unichar, 3);
29
+ }
30
+ else if (c < 0x110000) {
31
+ unichar[0] = 240 + (c / 262144);
32
+ unichar[1] = 128 + (c / 4096) % 64;
33
+ unichar[2] = 128 + (c / 64) % 64;
34
+ unichar[3] = 128 + (c % 64);
35
+ gh_buf_put(ob, unichar, 4);
36
+ }
37
+ else {
38
+ gh_buf_putc(ob, '?');
39
+ }
40
+ }
41
+
42
+ static size_t
43
+ unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
+ {
45
+ size_t i = 0;
46
+
47
+ if (size > 3 && src[0] == '#') {
48
+ int codepoint = 0;
49
+
50
+ if (_isdigit(src[1])) {
51
+ for (i = 1; i < size && _isdigit(src[i]); ++i)
52
+ codepoint = (codepoint * 10) + (src[i] - '0');
53
+ }
54
+
55
+ else if (src[1] == 'x' || src[1] == 'X') {
56
+ for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
+ }
59
+
60
+ if (i < size && src[i] == ';' && codepoint) {
61
+ gh_buf_put_utf8(ob, codepoint);
62
+ return i + 1;
63
+ }
64
+ }
65
+
66
+ else {
67
+ if (size > MAX_WORD_LENGTH)
68
+ size = MAX_WORD_LENGTH;
69
+
70
+ for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
+ if (src[i] == ' ')
72
+ break;
73
+
74
+ if (src[i] == ';') {
75
+ const struct html_ent *entity = find_entity((char *)src, i);
76
+
77
+ if (entity != NULL) {
78
+ gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
+ return i + 1;
80
+ }
81
+
82
+ break;
83
+ }
84
+ }
85
+ }
86
+
87
+ gh_buf_putc(ob, '&');
88
+ return 0;
89
+ }
90
+
91
+ int
92
+ houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
+ {
94
+ size_t i = 0, org;
95
+
96
+ while (i < size) {
97
+ org = i;
98
+ while (i < size && src[i] != '&')
99
+ i++;
100
+
101
+ if (likely(i > org)) {
102
+ if (unlikely(org == 0)) {
103
+ if (i >= size)
104
+ return 0;
105
+
106
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
+ }
108
+
109
+ gh_buf_put(ob, src + org, i - org);
110
+ }
111
+
112
+ /* escaping */
113
+ if (i >= size)
114
+ break;
115
+
116
+ i++;
117
+ i += unescape_ent(ob, src + i, size - i);
118
+ }
119
+
120
+ return 1;
121
+ }
122
+
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ static const char JS_ESCAPE[] = {
8
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ };
25
+
26
+ int
27
+ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
28
+ {
29
+ size_t i = 0, org, ch;
30
+
31
+ while (i < size) {
32
+ org = i;
33
+ while (i < size && JS_ESCAPE[src[i]] == 0)
34
+ i++;
35
+
36
+ if (likely(i > org)) {
37
+ if (unlikely(org == 0)) {
38
+ if (i >= size)
39
+ return 0;
40
+
41
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
42
+ }
43
+
44
+ gh_buf_put(ob, src + org, i - org);
45
+ }
46
+
47
+ /* escaping */
48
+ if (i >= size)
49
+ break;
50
+
51
+ ch = src[i];
52
+
53
+ switch (ch) {
54
+ case '/':
55
+ /*
56
+ * Escape only if preceded by a lt
57
+ */
58
+ if (i && src[i - 1] == '<')
59
+ gh_buf_putc(ob, '\\');
60
+
61
+ gh_buf_putc(ob, ch);
62
+ break;
63
+
64
+ case '\r':
65
+ /*
66
+ * Escape as \n, and skip the next \n if it's there
67
+ */
68
+ if (i + 1 < size && src[i + 1] == '\n') i++;
69
+
70
+ case '\n':
71
+ /*
72
+ * Escape actually as '\','n', not as '\', '\n'
73
+ */
74
+ ch = 'n';
75
+
76
+ default:
77
+ /*
78
+ * Normal escaping
79
+ */
80
+ gh_buf_putc(ob, '\\');
81
+ gh_buf_putc(ob, ch);
82
+ break;
83
+ }
84
+
85
+ i++;
86
+ }
87
+
88
+ return 1;
89
+ }
90
+