hamlit 1.7.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -3
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +25 -37
  5. data/CHANGELOG.md +18 -0
  6. data/Gemfile +16 -0
  7. data/LICENSE.txt +23 -2
  8. data/README.md +106 -48
  9. data/REFERENCE.md +222 -0
  10. data/Rakefile +77 -19
  11. data/benchmark/boolean_attribute.haml +6 -0
  12. data/benchmark/class_attribute.haml +5 -0
  13. data/benchmark/common_attribute.haml +3 -0
  14. data/benchmark/data_attribute.haml +4 -0
  15. data/benchmark/dynamic_attributes/boolean_attribute.haml +4 -0
  16. data/benchmark/dynamic_attributes/class_attribute.haml +4 -0
  17. data/benchmark/dynamic_attributes/common_attribute.haml +2 -0
  18. data/benchmark/dynamic_attributes/data_attribute.haml +2 -0
  19. data/benchmark/dynamic_attributes/id_attribute.haml +2 -0
  20. data/benchmark/etc/attribute_builder.haml +5 -0
  21. data/benchmark/etc/real_sample.haml +888 -0
  22. data/benchmark/etc/real_sample.rb +11 -0
  23. data/benchmark/etc/static_analyzer.haml +1 -0
  24. data/benchmark/etc/tags.haml +3 -0
  25. data/benchmark/ext/build_data.rb +15 -0
  26. data/benchmark/ext/build_id.rb +13 -0
  27. data/benchmark/id_attribute.haml +3 -0
  28. data/benchmark/plain.haml +4 -0
  29. data/benchmark/script.haml +4 -0
  30. data/benchmark/slim/LICENSE +21 -0
  31. data/{benchmarks → benchmark/slim}/context.rb +2 -4
  32. data/benchmark/slim/run-benchmarks.rb +94 -0
  33. data/{benchmarks → benchmark/slim}/view.erb +3 -3
  34. data/{benchmarks → benchmark/slim}/view.haml +0 -0
  35. data/{benchmarks/view.escaped.slim → benchmark/slim/view.slim} +1 -1
  36. data/benchmark/string_interpolation.haml +2 -0
  37. data/benchmark/utils/benchmark_ips_extension.rb +43 -0
  38. data/bin/bench +85 -0
  39. data/bin/clone +14 -0
  40. data/bin/console +11 -0
  41. data/bin/lineprof +48 -0
  42. data/bin/ruby +3 -0
  43. data/bin/setup +7 -0
  44. data/bin/stackprof +27 -0
  45. data/{test → bin/test} +6 -10
  46. data/{bin → exe}/hamlit +0 -0
  47. data/ext/hamlit/extconf.rb +14 -0
  48. data/ext/hamlit/hamlit.c +512 -0
  49. data/ext/hamlit/houdini/.gitignore +3 -0
  50. data/ext/hamlit/houdini/COPYING +7 -0
  51. data/ext/hamlit/houdini/Makefile +79 -0
  52. data/ext/hamlit/houdini/README.md +59 -0
  53. data/ext/hamlit/houdini/buffer.c +249 -0
  54. data/ext/hamlit/houdini/buffer.h +113 -0
  55. data/ext/hamlit/houdini/houdini.h +46 -0
  56. data/ext/hamlit/houdini/houdini_href_e.c +115 -0
  57. data/ext/hamlit/houdini/houdini_html_e.c +90 -0
  58. data/ext/hamlit/houdini/houdini_html_u.c +122 -0
  59. data/ext/hamlit/houdini/houdini_js_e.c +90 -0
  60. data/ext/hamlit/houdini/houdini_js_u.c +60 -0
  61. data/ext/hamlit/houdini/houdini_uri_e.c +107 -0
  62. data/ext/hamlit/houdini/houdini_uri_u.c +68 -0
  63. data/ext/hamlit/houdini/houdini_xml_e.c +136 -0
  64. data/ext/hamlit/houdini/html_unescape.gperf +258 -0
  65. data/ext/hamlit/houdini/html_unescape.h +754 -0
  66. data/ext/hamlit/houdini/tools/build_table.py +13 -0
  67. data/ext/hamlit/houdini/tools/build_tables.c +51 -0
  68. data/ext/hamlit/houdini/tools/wikipedia_table.txt +2025 -0
  69. data/hamlit.gemspec +30 -31
  70. data/lib/hamlit.rb +3 -1
  71. data/lib/hamlit/attribute_builder.rb +12 -0
  72. data/lib/hamlit/cli.rb +44 -43
  73. data/lib/hamlit/compiler.rb +92 -16
  74. data/lib/hamlit/compiler/attribute_compiler.rb +148 -0
  75. data/lib/hamlit/compiler/children_compiler.rb +111 -0
  76. data/lib/hamlit/compiler/comment_compiler.rb +36 -0
  77. data/lib/hamlit/compiler/doctype_compiler.rb +45 -0
  78. data/lib/hamlit/compiler/script_compiler.rb +97 -0
  79. data/lib/hamlit/compiler/silent_script_compiler.rb +24 -0
  80. data/lib/hamlit/compiler/tag_compiler.rb +69 -0
  81. data/lib/hamlit/engine.rb +12 -7
  82. data/lib/hamlit/error.rb +14 -0
  83. data/lib/hamlit/escapable.rb +12 -0
  84. data/lib/hamlit/filters.rb +65 -0
  85. data/lib/hamlit/filters/base.rb +4 -62
  86. data/lib/hamlit/filters/coffee.rb +9 -7
  87. data/lib/hamlit/filters/css.rb +25 -8
  88. data/lib/hamlit/filters/erb.rb +4 -6
  89. data/lib/hamlit/filters/escaped.rb +11 -9
  90. data/lib/hamlit/filters/javascript.rb +25 -8
  91. data/lib/hamlit/filters/less.rb +9 -7
  92. data/lib/hamlit/filters/markdown.rb +5 -6
  93. data/lib/hamlit/filters/plain.rb +11 -15
  94. data/lib/hamlit/filters/preserve.rb +15 -5
  95. data/lib/hamlit/filters/ruby.rb +3 -5
  96. data/lib/hamlit/filters/sass.rb +9 -7
  97. data/lib/hamlit/filters/scss.rb +9 -7
  98. data/lib/hamlit/filters/text_base.rb +24 -0
  99. data/lib/hamlit/filters/tilt_base.rb +47 -0
  100. data/lib/hamlit/hash_parser.rb +107 -0
  101. data/lib/hamlit/html.rb +9 -6
  102. data/lib/hamlit/identity.rb +12 -0
  103. data/lib/hamlit/object_ref.rb +29 -0
  104. data/lib/hamlit/parser.rb +25 -142
  105. data/lib/hamlit/parser/MIT-LICENSE +20 -0
  106. data/lib/hamlit/parser/README.md +28 -0
  107. data/lib/hamlit/parser/haml_buffer.rb +348 -0
  108. data/lib/hamlit/parser/haml_compiler.rb +553 -0
  109. data/lib/hamlit/parser/haml_error.rb +61 -0
  110. data/lib/hamlit/parser/haml_helpers.rb +727 -0
  111. data/lib/hamlit/parser/haml_options.rb +286 -0
  112. data/lib/hamlit/parser/haml_parser.rb +801 -0
  113. data/lib/hamlit/parser/haml_util.rb +283 -0
  114. data/lib/hamlit/parser/haml_xss_mods.rb +109 -0
  115. data/lib/hamlit/{helpers.rb → rails_helpers.rb} +2 -7
  116. data/lib/hamlit/rails_template.rb +30 -0
  117. data/lib/hamlit/railtie.rb +1 -12
  118. data/lib/hamlit/ruby_expression.rb +31 -0
  119. data/lib/hamlit/static_analyzer.rb +49 -0
  120. data/lib/hamlit/string_interpolation.rb +69 -0
  121. data/lib/hamlit/template.rb +8 -0
  122. data/lib/hamlit/utils.rb +9 -0
  123. data/lib/hamlit/version.rb +1 -1
  124. metadata +116 -324
  125. data/.rspec +0 -2
  126. data/benchmarks/benchmark.rb +0 -110
  127. data/benchmarks/view.slim +0 -17
  128. data/doc/README.md +0 -19
  129. data/doc/engine/indent.md +0 -48
  130. data/doc/engine/new_attribute.md +0 -77
  131. data/doc/engine/old_attributes.md +0 -198
  132. data/doc/engine/silent_script.md +0 -97
  133. data/doc/engine/tag.md +0 -48
  134. data/doc/engine/text.md +0 -64
  135. data/doc/faml/README.md +0 -16
  136. data/doc/faml/engine/indent.md +0 -48
  137. data/doc/faml/engine/old_attributes.md +0 -111
  138. data/doc/faml/engine/silent_script.md +0 -97
  139. data/doc/faml/engine/text.md +0 -59
  140. data/doc/faml/filters/erb.md +0 -24
  141. data/doc/faml/filters/javascript.md +0 -27
  142. data/doc/faml/filters/less.md +0 -57
  143. data/doc/faml/filters/plain.md +0 -25
  144. data/doc/filters/erb.md +0 -31
  145. data/doc/filters/javascript.md +0 -83
  146. data/doc/filters/less.md +0 -57
  147. data/doc/filters/markdown.md +0 -31
  148. data/doc/filters/plain.md +0 -25
  149. data/doc/haml/README.md +0 -15
  150. data/doc/haml/engine/new_attribute.md +0 -77
  151. data/doc/haml/engine/old_attributes.md +0 -142
  152. data/doc/haml/engine/tag.md +0 -48
  153. data/doc/haml/engine/text.md +0 -29
  154. data/doc/haml/filters/erb.md +0 -26
  155. data/doc/haml/filters/javascript.md +0 -76
  156. data/doc/haml/filters/markdown.md +0 -31
  157. data/lib/hamlit/attribute.rb +0 -78
  158. data/lib/hamlit/compilers/attributes.rb +0 -108
  159. data/lib/hamlit/compilers/comment.rb +0 -13
  160. data/lib/hamlit/compilers/doctype.rb +0 -39
  161. data/lib/hamlit/compilers/filter.rb +0 -53
  162. data/lib/hamlit/compilers/new_attribute.rb +0 -115
  163. data/lib/hamlit/compilers/old_attribute.rb +0 -241
  164. data/lib/hamlit/compilers/runtime_attribute.rb +0 -58
  165. data/lib/hamlit/compilers/script.rb +0 -31
  166. data/lib/hamlit/compilers/strip.rb +0 -19
  167. data/lib/hamlit/compilers/text.rb +0 -111
  168. data/lib/hamlit/concerns/attribute_builder.rb +0 -22
  169. data/lib/hamlit/concerns/balanceable.rb +0 -68
  170. data/lib/hamlit/concerns/deprecation.rb +0 -20
  171. data/lib/hamlit/concerns/error.rb +0 -31
  172. data/lib/hamlit/concerns/escapable.rb +0 -17
  173. data/lib/hamlit/concerns/included.rb +0 -28
  174. data/lib/hamlit/concerns/indentable.rb +0 -117
  175. data/lib/hamlit/concerns/lexable.rb +0 -32
  176. data/lib/hamlit/concerns/line_reader.rb +0 -62
  177. data/lib/hamlit/concerns/registerable.rb +0 -24
  178. data/lib/hamlit/concerns/string_interpolation.rb +0 -48
  179. data/lib/hamlit/concerns/whitespace.rb +0 -91
  180. data/lib/hamlit/filters/tilt.rb +0 -41
  181. data/lib/hamlit/parsers/attribute.rb +0 -71
  182. data/lib/hamlit/parsers/comment.rb +0 -30
  183. data/lib/hamlit/parsers/doctype.rb +0 -18
  184. data/lib/hamlit/parsers/filter.rb +0 -18
  185. data/lib/hamlit/parsers/multiline.rb +0 -58
  186. data/lib/hamlit/parsers/script.rb +0 -126
  187. data/lib/hamlit/parsers/tag.rb +0 -83
  188. data/lib/hamlit/parsers/text.rb +0 -28
  189. data/lib/hamlit/temple.rb +0 -9
  190. data/release +0 -6
  191. data/spec/Rakefile +0 -72
  192. data/spec/hamlit/engine/comment_spec.rb +0 -56
  193. data/spec/hamlit/engine/doctype_spec.rb +0 -19
  194. data/spec/hamlit/engine/error_spec.rb +0 -135
  195. data/spec/hamlit/engine/indent_spec.rb +0 -42
  196. data/spec/hamlit/engine/multiline_spec.rb +0 -44
  197. data/spec/hamlit/engine/new_attribute_spec.rb +0 -110
  198. data/spec/hamlit/engine/old_attributes_spec.rb +0 -404
  199. data/spec/hamlit/engine/script_spec.rb +0 -116
  200. data/spec/hamlit/engine/silent_script_spec.rb +0 -213
  201. data/spec/hamlit/engine/tag_spec.rb +0 -295
  202. data/spec/hamlit/engine/text_spec.rb +0 -239
  203. data/spec/hamlit/engine_spec.rb +0 -58
  204. data/spec/hamlit/filters/coffee_spec.rb +0 -60
  205. data/spec/hamlit/filters/css_spec.rb +0 -33
  206. data/spec/hamlit/filters/erb_spec.rb +0 -16
  207. data/spec/hamlit/filters/javascript_spec.rb +0 -82
  208. data/spec/hamlit/filters/less_spec.rb +0 -37
  209. data/spec/hamlit/filters/markdown_spec.rb +0 -30
  210. data/spec/hamlit/filters/plain_spec.rb +0 -15
  211. data/spec/hamlit/filters/ruby_spec.rb +0 -24
  212. data/spec/hamlit/filters/sass_spec.rb +0 -33
  213. data/spec/hamlit/filters/scss_spec.rb +0 -37
  214. data/spec/hamlit/haml_spec.rb +0 -910
  215. data/spec/rails/.gitignore +0 -18
  216. data/spec/rails/.rspec +0 -2
  217. data/spec/rails/Gemfile +0 -19
  218. data/spec/rails/README.rdoc +0 -28
  219. data/spec/rails/Rakefile +0 -6
  220. data/spec/rails/app/assets/images/.keep +0 -0
  221. data/spec/rails/app/assets/javascripts/application.js +0 -15
  222. data/spec/rails/app/assets/stylesheets/application.css +0 -15
  223. data/spec/rails/app/controllers/application_controller.rb +0 -8
  224. data/spec/rails/app/controllers/concerns/.keep +0 -0
  225. data/spec/rails/app/controllers/users_controller.rb +0 -23
  226. data/spec/rails/app/helpers/application_helper.rb +0 -2
  227. data/spec/rails/app/mailers/.keep +0 -0
  228. data/spec/rails/app/models/.keep +0 -0
  229. data/spec/rails/app/models/concerns/.keep +0 -0
  230. data/spec/rails/app/views/application/index.html.haml +0 -18
  231. data/spec/rails/app/views/layouts/application.html.haml +0 -12
  232. data/spec/rails/app/views/users/capture.html.haml +0 -5
  233. data/spec/rails/app/views/users/capture_haml.html.haml +0 -5
  234. data/spec/rails/app/views/users/form.html.haml +0 -2
  235. data/spec/rails/app/views/users/helpers.html.haml +0 -10
  236. data/spec/rails/app/views/users/index.html.haml +0 -9
  237. data/spec/rails/app/views/users/inline.html.haml +0 -6
  238. data/spec/rails/app/views/users/old_attributes.html.haml +0 -5
  239. data/spec/rails/app/views/users/safe_buffer.html.haml +0 -4
  240. data/spec/rails/app/views/users/whitespace.html.haml +0 -4
  241. data/spec/rails/bin/bundle +0 -3
  242. data/spec/rails/bin/rails +0 -8
  243. data/spec/rails/bin/rake +0 -8
  244. data/spec/rails/bin/setup +0 -29
  245. data/spec/rails/bin/spring +0 -15
  246. data/spec/rails/config.ru +0 -4
  247. data/spec/rails/config/application.rb +0 -34
  248. data/spec/rails/config/boot.rb +0 -3
  249. data/spec/rails/config/database.yml +0 -25
  250. data/spec/rails/config/environment.rb +0 -5
  251. data/spec/rails/config/environments/development.rb +0 -41
  252. data/spec/rails/config/environments/production.rb +0 -79
  253. data/spec/rails/config/environments/test.rb +0 -42
  254. data/spec/rails/config/initializers/assets.rb +0 -11
  255. data/spec/rails/config/initializers/backtrace_silencers.rb +0 -7
  256. data/spec/rails/config/initializers/cookies_serializer.rb +0 -3
  257. data/spec/rails/config/initializers/filter_parameter_logging.rb +0 -4
  258. data/spec/rails/config/initializers/inflections.rb +0 -16
  259. data/spec/rails/config/initializers/mime_types.rb +0 -4
  260. data/spec/rails/config/initializers/session_store.rb +0 -3
  261. data/spec/rails/config/initializers/wrap_parameters.rb +0 -14
  262. data/spec/rails/config/locales/en.yml +0 -24
  263. data/spec/rails/config/routes.rb +0 -16
  264. data/spec/rails/config/secrets.yml +0 -22
  265. data/spec/rails/db/schema.rb +0 -16
  266. data/spec/rails/db/seeds.rb +0 -7
  267. data/spec/rails/lib/assets/.keep +0 -0
  268. data/spec/rails/lib/tasks/.keep +0 -0
  269. data/spec/rails/log/.keep +0 -0
  270. data/spec/rails/public/404.html +0 -67
  271. data/spec/rails/public/422.html +0 -67
  272. data/spec/rails/public/500.html +0 -66
  273. data/spec/rails/public/favicon.ico +0 -0
  274. data/spec/rails/public/robots.txt +0 -5
  275. data/spec/rails/spec/hamlit_spec.rb +0 -123
  276. data/spec/rails/spec/rails_helper.rb +0 -56
  277. data/spec/rails/spec/spec_helper.rb +0 -91
  278. data/spec/rails/vendor/assets/javascripts/.keep +0 -0
  279. data/spec/rails/vendor/assets/stylesheets/.keep +0 -0
  280. data/spec/spec_helper.rb +0 -36
  281. data/spec/spec_helper/document_generator.rb +0 -93
  282. data/spec/spec_helper/render_helper.rb +0 -120
  283. data/spec/spec_helper/test_case.rb +0 -55
@@ -0,0 +1,46 @@
1
+ #ifndef __HOUDINI_H__
2
+ #define __HOUDINI_H__
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <stdint.h>
9
+ #include "buffer.h"
10
+
11
+ #define likely(x) __builtin_expect((x),1)
12
+ #define unlikely(x) __builtin_expect((x),0)
13
+
14
+ #ifdef HOUDINI_USE_LOCALE
15
+ # define _isxdigit(c) isxdigit(c)
16
+ # define _isdigit(c) isdigit(c)
17
+ #else
18
+ /*
19
+ * Helper _isdigit methods -- do not trust the current locale
20
+ * */
21
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
22
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
+ #endif
24
+
25
+ #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
+ #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
+
28
+ extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
+ extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
+ extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
31
+ extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
+ extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
+ extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
+ extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
+ extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
+ extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
+ extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
+ extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
+ extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
+ extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
+
42
+ #ifdef __cplusplus
43
+ }
44
+ #endif
45
+
46
+ #endif
@@ -0,0 +1,115 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /*
8
+ * The following characters will not be escaped:
9
+ *
10
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
11
+ *
12
+ * Note that this character set is the addition of:
13
+ *
14
+ * - The characters which are safe to be in an URL
15
+ * - The characters which are *not* safe to be in
16
+ * an URL because they are RESERVED characters.
17
+ *
18
+ * We asume (lazily) that any RESERVED char that
19
+ * appears inside an URL is actually meant to
20
+ * have its native function (i.e. as an URL
21
+ * component/separator) and hence needs no escaping.
22
+ *
23
+ * There are two exceptions: the chacters & (amp)
24
+ * and ' (single quote) do not appear in the table.
25
+ * They are meant to appear in the URL as components,
26
+ * yet they require special HTML-entity escaping
27
+ * to generate valid HTML markup.
28
+ *
29
+ * All other characters will be escaped to %XX.
30
+ *
31
+ */
32
+ static const char HREF_SAFE[] = {
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
37
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
39
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ };
50
+
51
+ int
52
+ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
53
+ {
54
+ static const uint8_t hex_chars[] = "0123456789ABCDEF";
55
+ size_t i = 0, org;
56
+ uint8_t hex_str[3];
57
+
58
+ hex_str[0] = '%';
59
+
60
+ while (i < size) {
61
+ org = i;
62
+ while (i < size && HREF_SAFE[src[i]] != 0)
63
+ i++;
64
+
65
+ if (likely(i > org)) {
66
+ if (unlikely(org == 0)) {
67
+ if (i >= size)
68
+ return 0;
69
+
70
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
71
+ }
72
+
73
+ gh_buf_put(ob, src + org, i - org);
74
+ }
75
+
76
+ /* escaping */
77
+ if (i >= size)
78
+ break;
79
+
80
+ switch (src[i]) {
81
+ /* amp appears all the time in URLs, but needs
82
+ * HTML-entity escaping to be inside an href */
83
+ case '&':
84
+ gh_buf_PUTS(ob, "&amp;");
85
+ break;
86
+
87
+ /* the single quote is a valid URL character
88
+ * according to the standard; it needs HTML
89
+ * entity escaping too */
90
+ case '\'':
91
+ gh_buf_PUTS(ob, "&#x27;");
92
+ break;
93
+
94
+ /* the space can be escaped to %20 or a plus
95
+ * sign. we're going with the generic escape
96
+ * for now. the plus thing is more commonly seen
97
+ * when building GET strings */
98
+ #if 0
99
+ case ' ':
100
+ gh_buf_putc(ob, '+');
101
+ break;
102
+ #endif
103
+
104
+ /* every other character goes with a %XX escaping */
105
+ default:
106
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
107
+ hex_str[2] = hex_chars[src[i] & 0xF];
108
+ gh_buf_put(ob, hex_str, 3);
109
+ }
110
+
111
+ i++;
112
+ }
113
+
114
+ return 1;
115
+ }
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /**
8
+ * According to the OWASP rules:
9
+ *
10
+ * & --> &amp;
11
+ * < --> &lt;
12
+ * > --> &gt;
13
+ * " --> &quot;
14
+ * ' --> &#x27; &apos; is not recommended
15
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
+ *
17
+ */
18
+ static const char HTML_ESCAPE_TABLE[] = {
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ static const char *HTML_ESCAPES[] = {
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&#47;",
43
+ "&lt;",
44
+ "&gt;"
45
+ };
46
+
47
+ int
48
+ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
49
+ {
50
+ size_t i = 0, org, esc = 0;
51
+
52
+ while (i < size) {
53
+ org = i;
54
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
55
+ i++;
56
+
57
+ if (i > org) {
58
+ if (unlikely(org == 0)) {
59
+ if (i >= size)
60
+ return 0;
61
+
62
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
63
+ }
64
+
65
+ gh_buf_put(ob, src + org, i - org);
66
+ }
67
+
68
+ /* escaping */
69
+ if (unlikely(i >= size))
70
+ break;
71
+
72
+ /* The forward slash is only escaped in secure mode */
73
+ if (src[i] == '/' && !secure) {
74
+ gh_buf_putc(ob, '/');
75
+ } else {
76
+ gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
+ }
78
+
79
+ i++;
80
+ }
81
+
82
+ return 1;
83
+ }
84
+
85
+ int
86
+ houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
+ {
88
+ return houdini_escape_html0(ob, src, size, 1);
89
+ }
90
+
@@ -0,0 +1,122 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "html_unescape.h"
7
+
8
+ static inline void
9
+ gh_buf_put_utf8(gh_buf *ob, int c)
10
+ {
11
+ unsigned char unichar[4];
12
+
13
+ if (c < 0x80) {
14
+ gh_buf_putc(ob, c);
15
+ }
16
+ else if (c < 0x800) {
17
+ unichar[0] = 192 + (c / 64);
18
+ unichar[1] = 128 + (c % 64);
19
+ gh_buf_put(ob, unichar, 2);
20
+ }
21
+ else if (c - 0xd800u < 0x800) {
22
+ gh_buf_putc(ob, '?');
23
+ }
24
+ else if (c < 0x10000) {
25
+ unichar[0] = 224 + (c / 4096);
26
+ unichar[1] = 128 + (c / 64) % 64;
27
+ unichar[2] = 128 + (c % 64);
28
+ gh_buf_put(ob, unichar, 3);
29
+ }
30
+ else if (c < 0x110000) {
31
+ unichar[0] = 240 + (c / 262144);
32
+ unichar[1] = 128 + (c / 4096) % 64;
33
+ unichar[2] = 128 + (c / 64) % 64;
34
+ unichar[3] = 128 + (c % 64);
35
+ gh_buf_put(ob, unichar, 4);
36
+ }
37
+ else {
38
+ gh_buf_putc(ob, '?');
39
+ }
40
+ }
41
+
42
+ static size_t
43
+ unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
+ {
45
+ size_t i = 0;
46
+
47
+ if (size > 3 && src[0] == '#') {
48
+ int codepoint = 0;
49
+
50
+ if (_isdigit(src[1])) {
51
+ for (i = 1; i < size && _isdigit(src[i]); ++i)
52
+ codepoint = (codepoint * 10) + (src[i] - '0');
53
+ }
54
+
55
+ else if (src[1] == 'x' || src[1] == 'X') {
56
+ for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
+ }
59
+
60
+ if (i < size && src[i] == ';' && codepoint) {
61
+ gh_buf_put_utf8(ob, codepoint);
62
+ return i + 1;
63
+ }
64
+ }
65
+
66
+ else {
67
+ if (size > MAX_WORD_LENGTH)
68
+ size = MAX_WORD_LENGTH;
69
+
70
+ for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
+ if (src[i] == ' ')
72
+ break;
73
+
74
+ if (src[i] == ';') {
75
+ const struct html_ent *entity = find_entity((char *)src, i);
76
+
77
+ if (entity != NULL) {
78
+ gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
+ return i + 1;
80
+ }
81
+
82
+ break;
83
+ }
84
+ }
85
+ }
86
+
87
+ gh_buf_putc(ob, '&');
88
+ return 0;
89
+ }
90
+
91
+ int
92
+ houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
+ {
94
+ size_t i = 0, org;
95
+
96
+ while (i < size) {
97
+ org = i;
98
+ while (i < size && src[i] != '&')
99
+ i++;
100
+
101
+ if (likely(i > org)) {
102
+ if (unlikely(org == 0)) {
103
+ if (i >= size)
104
+ return 0;
105
+
106
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
+ }
108
+
109
+ gh_buf_put(ob, src + org, i - org);
110
+ }
111
+
112
+ /* escaping */
113
+ if (i >= size)
114
+ break;
115
+
116
+ i++;
117
+ i += unescape_ent(ob, src + i, size - i);
118
+ }
119
+
120
+ return 1;
121
+ }
122
+
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ static const char JS_ESCAPE[] = {
8
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ };
25
+
26
+ int
27
+ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
28
+ {
29
+ size_t i = 0, org, ch;
30
+
31
+ while (i < size) {
32
+ org = i;
33
+ while (i < size && JS_ESCAPE[src[i]] == 0)
34
+ i++;
35
+
36
+ if (likely(i > org)) {
37
+ if (unlikely(org == 0)) {
38
+ if (i >= size)
39
+ return 0;
40
+
41
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
42
+ }
43
+
44
+ gh_buf_put(ob, src + org, i - org);
45
+ }
46
+
47
+ /* escaping */
48
+ if (i >= size)
49
+ break;
50
+
51
+ ch = src[i];
52
+
53
+ switch (ch) {
54
+ case '/':
55
+ /*
56
+ * Escape only if preceded by a lt
57
+ */
58
+ if (i && src[i - 1] == '<')
59
+ gh_buf_putc(ob, '\\');
60
+
61
+ gh_buf_putc(ob, ch);
62
+ break;
63
+
64
+ case '\r':
65
+ /*
66
+ * Escape as \n, and skip the next \n if it's there
67
+ */
68
+ if (i + 1 < size && src[i + 1] == '\n') i++;
69
+
70
+ case '\n':
71
+ /*
72
+ * Escape actually as '\','n', not as '\', '\n'
73
+ */
74
+ ch = 'n';
75
+
76
+ default:
77
+ /*
78
+ * Normal escaping
79
+ */
80
+ gh_buf_putc(ob, '\\');
81
+ gh_buf_putc(ob, ch);
82
+ break;
83
+ }
84
+
85
+ i++;
86
+ }
87
+
88
+ return 1;
89
+ }
90
+