herb 0.8.9-arm-linux-gnu → 0.9.0-arm-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +33 -11
  3. data/README.md +64 -34
  4. data/Rakefile +48 -40
  5. data/config.yml +323 -33
  6. data/ext/herb/error_helpers.c +384 -132
  7. data/ext/herb/error_helpers.h +1 -0
  8. data/ext/herb/extconf.rb +67 -28
  9. data/ext/herb/extension.c +317 -51
  10. data/ext/herb/extension.h +1 -0
  11. data/ext/herb/extension_helpers.c +23 -14
  12. data/ext/herb/extension_helpers.h +2 -2
  13. data/ext/herb/nodes.c +537 -270
  14. data/ext/herb/nodes.h +1 -0
  15. data/herb.gemspec +3 -2
  16. data/lib/herb/3.0/herb.so +0 -0
  17. data/lib/herb/3.1/herb.so +0 -0
  18. data/lib/herb/3.2/herb.so +0 -0
  19. data/lib/herb/3.3/herb.so +0 -0
  20. data/lib/herb/3.4/herb.so +0 -0
  21. data/lib/herb/4.0/herb.so +0 -0
  22. data/lib/herb/ast/helpers.rb +3 -3
  23. data/lib/herb/ast/node.rb +15 -2
  24. data/lib/herb/ast/nodes.rb +1132 -157
  25. data/lib/herb/bootstrap.rb +87 -0
  26. data/lib/herb/cli.rb +341 -31
  27. data/lib/herb/configuration.rb +248 -0
  28. data/lib/herb/defaults.yml +32 -0
  29. data/lib/herb/engine/compiler.rb +83 -14
  30. data/lib/herb/engine/debug_visitor.rb +51 -6
  31. data/lib/herb/engine/error_formatter.rb +13 -9
  32. data/lib/herb/engine/parser_error_overlay.rb +10 -6
  33. data/lib/herb/engine/validator.rb +8 -3
  34. data/lib/herb/engine/validators/nesting_validator.rb +2 -2
  35. data/lib/herb/engine.rb +92 -33
  36. data/lib/herb/errors.rb +582 -87
  37. data/lib/herb/lex_result.rb +1 -0
  38. data/lib/herb/location.rb +7 -3
  39. data/lib/herb/parse_result.rb +12 -2
  40. data/lib/herb/parser_options.rb +57 -0
  41. data/lib/herb/position.rb +1 -0
  42. data/lib/herb/prism_inspect.rb +116 -0
  43. data/lib/herb/project.rb +923 -331
  44. data/lib/herb/range.rb +1 -0
  45. data/lib/herb/token.rb +7 -1
  46. data/lib/herb/version.rb +1 -1
  47. data/lib/herb/visitor.rb +37 -2
  48. data/lib/herb/warnings.rb +6 -1
  49. data/lib/herb.rb +35 -3
  50. data/sig/herb/ast/helpers.rbs +2 -2
  51. data/sig/herb/ast/node.rbs +12 -2
  52. data/sig/herb/ast/nodes.rbs +641 -128
  53. data/sig/herb/bootstrap.rbs +31 -0
  54. data/sig/herb/configuration.rbs +89 -0
  55. data/sig/herb/engine/compiler.rbs +9 -1
  56. data/sig/herb/engine/debug_visitor.rbs +8 -0
  57. data/sig/herb/engine/validator.rbs +5 -1
  58. data/sig/herb/engine.rbs +18 -2
  59. data/sig/herb/errors.rbs +268 -63
  60. data/sig/herb/location.rbs +4 -0
  61. data/sig/herb/parse_result.rbs +4 -2
  62. data/sig/herb/parser_options.rbs +42 -0
  63. data/sig/herb/position.rbs +1 -0
  64. data/sig/herb/prism_inspect.rbs +28 -0
  65. data/sig/herb/range.rbs +1 -0
  66. data/sig/herb/token.rbs +6 -0
  67. data/sig/herb/visitor.rbs +25 -4
  68. data/sig/herb/warnings.rbs +6 -1
  69. data/sig/herb.rbs +14 -0
  70. data/sig/herb_c_extension.rbs +5 -2
  71. data/sig/serialized_ast_errors.rbs +57 -6
  72. data/sig/serialized_ast_nodes.rbs +60 -6
  73. data/src/analyze/action_view/attribute_extraction_helpers.c +290 -0
  74. data/src/analyze/action_view/content_tag.c +70 -0
  75. data/src/analyze/action_view/link_to.c +143 -0
  76. data/src/analyze/action_view/registry.c +60 -0
  77. data/src/analyze/action_view/tag.c +64 -0
  78. data/src/analyze/action_view/tag_helper_node_builders.c +305 -0
  79. data/src/analyze/action_view/tag_helpers.c +748 -0
  80. data/src/analyze/action_view/turbo_frame_tag.c +88 -0
  81. data/src/analyze/analyze.c +882 -0
  82. data/src/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
  83. data/src/analyze/builders.c +343 -0
  84. data/src/analyze/conditional_elements.c +594 -0
  85. data/src/analyze/conditional_open_tags.c +640 -0
  86. data/src/analyze/control_type.c +250 -0
  87. data/src/{analyze_helpers.c → analyze/helpers.c} +79 -31
  88. data/src/analyze/invalid_structures.c +193 -0
  89. data/src/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
  90. data/src/analyze/parse_errors.c +84 -0
  91. data/src/analyze/prism_annotate.c +397 -0
  92. data/src/{analyze_transform.c → analyze/transform.c} +17 -3
  93. data/src/ast_node.c +17 -7
  94. data/src/ast_nodes.c +662 -387
  95. data/src/ast_pretty_print.c +190 -6
  96. data/src/errors.c +1099 -506
  97. data/src/extract.c +148 -49
  98. data/src/herb.c +52 -34
  99. data/src/html_util.c +241 -12
  100. data/src/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
  101. data/src/include/analyze/action_view/tag_helper_handler.h +41 -0
  102. data/src/include/analyze/action_view/tag_helper_node_builders.h +70 -0
  103. data/src/include/analyze/action_view/tag_helpers.h +38 -0
  104. data/src/include/{analyze.h → analyze/analyze.h} +14 -4
  105. data/src/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  106. data/src/include/analyze/builders.h +27 -0
  107. data/src/include/analyze/conditional_elements.h +9 -0
  108. data/src/include/analyze/conditional_open_tags.h +9 -0
  109. data/src/include/analyze/control_type.h +14 -0
  110. data/src/include/{analyze_helpers.h → analyze/helpers.h} +22 -17
  111. data/src/include/analyze/invalid_structures.h +11 -0
  112. data/src/include/analyze/prism_annotate.h +16 -0
  113. data/src/include/ast_node.h +11 -5
  114. data/src/include/ast_nodes.h +117 -38
  115. data/src/include/ast_pretty_print.h +5 -0
  116. data/src/include/element_source.h +3 -8
  117. data/src/include/errors.h +154 -53
  118. data/src/include/extract.h +21 -5
  119. data/src/include/herb.h +18 -6
  120. data/src/include/herb_prism_node.h +13 -0
  121. data/src/include/html_util.h +7 -2
  122. data/src/include/io.h +3 -1
  123. data/src/include/lex_helpers.h +29 -0
  124. data/src/include/lexer.h +1 -1
  125. data/src/include/lexer_peek_helpers.h +87 -13
  126. data/src/include/lexer_struct.h +2 -0
  127. data/src/include/location.h +2 -1
  128. data/src/include/parser.h +27 -2
  129. data/src/include/parser_helpers.h +19 -3
  130. data/src/include/pretty_print.h +10 -5
  131. data/src/include/prism_context.h +45 -0
  132. data/src/include/prism_helpers.h +10 -7
  133. data/src/include/prism_serialized.h +12 -0
  134. data/src/include/token.h +16 -4
  135. data/src/include/token_struct.h +10 -3
  136. data/src/include/utf8.h +2 -1
  137. data/src/include/util/hb_allocator.h +78 -0
  138. data/src/include/util/hb_arena.h +6 -1
  139. data/src/include/util/hb_arena_debug.h +12 -1
  140. data/src/include/util/hb_array.h +7 -3
  141. data/src/include/util/hb_buffer.h +6 -4
  142. data/src/include/util/hb_foreach.h +79 -0
  143. data/src/include/util/hb_narray.h +8 -4
  144. data/src/include/util/hb_string.h +56 -9
  145. data/src/include/util/string.h +11 -0
  146. data/src/include/util.h +6 -3
  147. data/src/include/version.h +1 -1
  148. data/src/io.c +3 -2
  149. data/src/lexer.c +42 -30
  150. data/src/lexer_peek_helpers.c +12 -74
  151. data/src/location.c +2 -2
  152. data/src/main.c +79 -66
  153. data/src/parser.c +784 -247
  154. data/src/parser_helpers.c +110 -23
  155. data/src/parser_match_tags.c +109 -48
  156. data/src/pretty_print.c +29 -24
  157. data/src/prism_helpers.c +30 -27
  158. data/src/ruby_parser.c +2 -0
  159. data/src/token.c +151 -66
  160. data/src/token_matchers.c +0 -1
  161. data/src/utf8.c +7 -6
  162. data/src/util/hb_allocator.c +341 -0
  163. data/src/util/hb_arena.c +81 -56
  164. data/src/util/hb_arena_debug.c +32 -17
  165. data/src/util/hb_array.c +30 -15
  166. data/src/util/hb_buffer.c +17 -21
  167. data/src/util/hb_narray.c +22 -7
  168. data/src/util/hb_string.c +49 -35
  169. data/src/util.c +21 -11
  170. data/src/visitor.c +47 -0
  171. data/templates/ext/herb/error_helpers.c.erb +24 -11
  172. data/templates/ext/herb/error_helpers.h.erb +1 -0
  173. data/templates/ext/herb/nodes.c.erb +50 -16
  174. data/templates/ext/herb/nodes.h.erb +1 -0
  175. data/templates/java/error_helpers.c.erb +1 -1
  176. data/templates/java/nodes.c.erb +30 -8
  177. data/templates/java/org/herb/ast/Errors.java.erb +24 -1
  178. data/templates/java/org/herb/ast/Nodes.java.erb +80 -21
  179. data/templates/javascript/packages/core/src/errors.ts.erb +16 -3
  180. data/templates/javascript/packages/core/src/node-type-guards.ts.erb +3 -1
  181. data/templates/javascript/packages/core/src/nodes.ts.erb +109 -32
  182. data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +13 -4
  183. data/templates/javascript/packages/node/extension/nodes.cpp.erb +43 -4
  184. data/templates/lib/herb/ast/nodes.rb.erb +88 -31
  185. data/templates/lib/herb/errors.rb.erb +15 -3
  186. data/templates/lib/herb/visitor.rb.erb +2 -2
  187. data/templates/rust/src/ast/nodes.rs.erb +97 -44
  188. data/templates/rust/src/errors.rs.erb +2 -1
  189. data/templates/rust/src/nodes.rs.erb +167 -15
  190. data/templates/rust/src/union_types.rs.erb +60 -0
  191. data/templates/rust/src/visitor.rs.erb +81 -0
  192. data/templates/src/{analyze_missing_end.c.erb → analyze/missing_end.c.erb} +9 -6
  193. data/templates/src/{analyze_transform.c.erb → analyze/transform.c.erb} +2 -2
  194. data/templates/src/ast_nodes.c.erb +34 -26
  195. data/templates/src/ast_pretty_print.c.erb +24 -5
  196. data/templates/src/errors.c.erb +60 -54
  197. data/templates/src/include/ast_nodes.h.erb +6 -2
  198. data/templates/src/include/ast_pretty_print.h.erb +5 -0
  199. data/templates/src/include/errors.h.erb +15 -11
  200. data/templates/src/include/util/hb_foreach.h.erb +20 -0
  201. data/templates/src/parser_match_tags.c.erb +10 -4
  202. data/templates/src/visitor.c.erb +2 -2
  203. data/templates/template.rb +204 -29
  204. data/templates/wasm/error_helpers.cpp.erb +9 -5
  205. data/templates/wasm/nodes.cpp.erb +41 -4
  206. data/vendor/prism/config.yml +4 -4
  207. data/vendor/prism/include/prism/ast.h +4 -4
  208. data/vendor/prism/include/prism/version.h +2 -2
  209. data/vendor/prism/src/prism.c +1 -1
  210. data/vendor/prism/templates/java/org/prism/Loader.java.erb +1 -1
  211. data/vendor/prism/templates/javascript/src/deserialize.js.erb +1 -1
  212. data/vendor/prism/templates/lib/prism/node.rb.erb +23 -15
  213. data/vendor/prism/templates/lib/prism/serialize.rb.erb +1 -1
  214. data/vendor/prism/templates/rbi/prism/node.rbi.erb +3 -0
  215. data/vendor/prism/templates/sig/prism/node.rbs.erb +3 -0
  216. data/vendor/prism/templates/sig/prism.rbs.erb +9 -10
  217. metadata +58 -16
  218. data/src/analyze.c +0 -1594
  219. data/src/element_source.c +0 -12
  220. data/src/include/util/hb_system.h +0 -9
  221. data/src/util/hb_system.c +0 -30
@@ -4,26 +4,73 @@
4
4
  #include <stdbool.h>
5
5
  #include <stddef.h>
6
6
  #include <stdint.h>
7
+ #include <string.h>
8
+ #include <strings.h>
7
9
 
8
- #include "hb_arena.h"
10
+ #include "hb_allocator.h"
11
+ #include "hb_foreach.h"
9
12
 
10
13
  typedef struct HB_STRING_STRUCT {
11
14
  char* data;
12
15
  uint32_t length;
13
16
  } hb_string_T;
14
17
 
15
- hb_string_T hb_string(const char* null_terminated_c_string);
16
- hb_string_T hb_string_slice(hb_string_T string, uint32_t offset);
17
- bool hb_string_equals(hb_string_T a, hb_string_T b);
18
- bool hb_string_equals_case_insensitive(hb_string_T a, hb_string_T b);
19
- bool hb_string_starts_with(hb_string_T string, hb_string_T expected_prefix);
20
- bool hb_string_is_empty(hb_string_T string);
21
- hb_string_T hb_string_truncate(hb_string_T string, uint32_t max_length);
18
+ #define HB_STRING_EMPTY ((hb_string_T) { .data = "", .length = 0 })
19
+ #define HB_STRING_NULL ((hb_string_T) { .data = NULL, .length = 0 })
20
+
21
+ #define HB_STRING_LITERAL(string) { .data = (char*) (string), .length = (uint32_t) (sizeof(string) - 1) }
22
+
23
+ #define HB_STRING_LIST(...) { HB_FOR_EACH(HB_STRING_LITERAL, __VA_ARGS__) }
24
+
25
+ #define hb_string(string) \
26
+ (__builtin_constant_p(string) \
27
+ ? ((hb_string_T) { .data = (char*) (string), .length = (uint32_t) __builtin_strlen(string) }) \
28
+ : hb_string_from_c_string(string))
29
+
30
+ hb_string_T hb_string_from_c_string(const char* null_terminated_c_string);
31
+
32
+ static inline bool hb_string_is_null(hb_string_T string) {
33
+ return string.data == NULL;
34
+ }
35
+
36
+ static inline bool hb_string_is_empty(hb_string_T string) {
37
+ return string.data == NULL || string.length == 0;
38
+ }
39
+
40
+ static inline hb_string_T hb_string_slice(hb_string_T string, uint32_t offset) {
41
+ if (string.length < offset) { return HB_STRING_NULL; }
42
+
43
+ return (hb_string_T) { .data = string.data + offset, .length = string.length - offset };
44
+ }
22
45
 
46
+ static inline bool hb_string_equals(hb_string_T a, hb_string_T b) {
47
+ if (a.length != b.length) { return false; }
48
+
49
+ return strncmp(a.data, b.data, a.length) == 0;
50
+ }
51
+
52
+ static inline bool hb_string_equals_case_insensitive(hb_string_T a, hb_string_T b) {
53
+ if (a.length != b.length) { return false; }
54
+
55
+ return strncasecmp(a.data, b.data, a.length) == 0;
56
+ }
57
+
58
+ static inline bool hb_string_starts_with(hb_string_T string, hb_string_T expected_prefix) {
59
+ if (hb_string_is_empty(string) || hb_string_is_empty(expected_prefix)) { return false; }
60
+ if (string.length < expected_prefix.length) { return false; }
61
+
62
+ return strncmp(string.data, expected_prefix.data, expected_prefix.length) == 0;
63
+ }
64
+
65
+ hb_string_T hb_string_truncate(hb_string_T string, uint32_t max_length);
23
66
  hb_string_T hb_string_range(hb_string_T string, uint32_t from, uint32_t to);
67
+ hb_string_T hb_string_trim_start(hb_string_T string);
68
+ hb_string_T hb_string_trim_end(hb_string_T string);
69
+ hb_string_T hb_string_trim(hb_string_T string);
70
+ bool hb_string_is_blank(hb_string_T string);
71
+ hb_string_T hb_string_copy(hb_string_T string, hb_allocator_T* allocator);
24
72
 
25
73
  char* hb_string_to_c_string_using_malloc(hb_string_T string);
26
-
27
74
  char* hb_string_to_c_string(hb_arena_T* allocator, hb_string_T string);
28
75
 
29
76
  #endif
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTILS_STRING_H
2
+ #define HERB_UTILS_STRING_H
3
+
4
+ #include <stdbool.h>
5
+ #include <string.h>
6
+
7
+ static inline bool string_equals(const char* a, const char* b) {
8
+ return strcmp(a, b) == 0;
9
+ }
10
+
11
+ #endif
data/src/include/util.h CHANGED
@@ -5,10 +5,13 @@
5
5
  #include <stdbool.h>
6
6
  #include <stdlib.h>
7
7
 
8
+ struct hb_allocator;
9
+
8
10
  int is_newline(int character);
11
+ int is_whitespace(int character);
12
+ hb_string_T escape_newlines(struct hb_allocator* allocator, hb_string_T input);
13
+ hb_string_T quoted_string(struct hb_allocator* allocator, hb_string_T input);
9
14
 
10
- hb_string_T escape_newlines(hb_string_T input);
11
- hb_string_T quoted_string(hb_string_T input);
12
- char* herb_strdup(const char* s);
15
+ char* convert_underscores_to_dashes(const char* input);
13
16
 
14
17
  #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.8.9"
4
+ #define HERB_VERSION "0.9.0"
5
5
 
6
6
  #endif
data/src/io.c CHANGED
@@ -1,4 +1,5 @@
1
1
  #include "include/io.h"
2
+ #include "include/util/hb_allocator.h"
2
3
  #include "include/util/hb_buffer.h"
3
4
 
4
5
  #include <errno.h>
@@ -7,7 +8,7 @@
7
8
 
8
9
  #define FILE_READ_CHUNK 4096
9
10
 
10
- char* herb_read_file(const char* filename) {
11
+ char* herb_read_file(const char* filename, struct hb_allocator* allocator) {
11
12
  if (!filename) { return NULL; }
12
13
 
13
14
  FILE* fp = fopen(filename, "rb");
@@ -18,7 +19,7 @@ char* herb_read_file(const char* filename) {
18
19
  }
19
20
 
20
21
  hb_buffer_T buffer;
21
- hb_buffer_init(&buffer, 4096);
22
+ hb_buffer_init(&buffer, 4096, allocator);
22
23
 
23
24
  char chunk[FILE_READ_CHUNK];
24
25
  size_t bytes_read;
data/src/lexer.c CHANGED
@@ -1,15 +1,18 @@
1
1
  #include "include/lexer_peek_helpers.h"
2
+ #include "include/macros.h"
2
3
  #include "include/token.h"
3
4
  #include "include/utf8.h"
4
5
  #include "include/util.h"
5
- #include "include/util/hb_buffer.h"
6
6
  #include "include/util/hb_string.h"
7
7
 
8
8
  #include <ctype.h>
9
+ #include <stdint.h>
9
10
  #include <string.h>
10
11
 
11
12
  #define LEXER_STALL_LIMIT 5
12
13
 
14
+ static hb_string_T erb_open_patterns[] = HB_STRING_LIST("<%==", "<%%=", "<%graphql", "<%=", "<%#", "<%-", "<%%", "<%");
15
+
13
16
  static bool lexer_eof(const lexer_T* lexer) {
14
17
  return lexer->current_character == '\0' || lexer->stalled;
15
18
  }
@@ -31,11 +34,13 @@ static bool lexer_stalled(lexer_T* lexer) {
31
34
  return lexer->stalled;
32
35
  }
33
36
 
34
- void lexer_init(lexer_T* lexer, const char* source) {
37
+ void lexer_init(lexer_T* lexer, const char* source, hb_allocator_T* allocator) {
38
+ lexer->allocator = allocator;
39
+
35
40
  if (source != NULL) {
36
41
  lexer->source = hb_string(source);
37
42
  } else {
38
- lexer->source = hb_string("");
43
+ lexer->source = HB_STRING_EMPTY;
39
44
  }
40
45
 
41
46
  lexer->current_character = lexer->source.data[0];
@@ -55,11 +60,11 @@ void lexer_init(lexer_T* lexer, const char* source) {
55
60
  }
56
61
 
57
62
  token_T* lexer_error(lexer_T* lexer, const char* message) {
58
- char error_message[128];
63
+ char buffer[128];
59
64
 
60
65
  snprintf(
61
- error_message,
62
- sizeof(error_message),
66
+ buffer,
67
+ sizeof(buffer),
63
68
  "[Lexer] Error: %s (character '%c', line %u, col %u)\n",
64
69
  message,
65
70
  lexer->current_character,
@@ -67,7 +72,10 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
67
72
  lexer->current_column
68
73
  );
69
74
 
70
- return token_init(hb_string(error_message), TOKEN_ERROR, lexer);
75
+ size_t length = strlen(buffer);
76
+ char* error_message = hb_allocator_strndup(lexer->allocator, buffer, length);
77
+
78
+ return token_init((hb_string_T) { .data = error_message, .length = (uint32_t) length }, TOKEN_ERROR, lexer);
71
79
  }
72
80
 
73
81
  static void lexer_advance(lexer_T* lexer) {
@@ -79,8 +87,8 @@ static void lexer_advance(lexer_T* lexer) {
79
87
  }
80
88
  }
81
89
 
82
- static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
83
- if (byte_count <= 0) { return; }
90
+ static void lexer_advance_utf8_bytes(lexer_T* lexer, uint32_t byte_count) {
91
+ if (byte_count == 0) { return; }
84
92
 
85
93
  if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
86
94
  if (!is_newline(lexer->current_character)) { lexer->current_column++; }
@@ -120,19 +128,17 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type
120
128
  }
121
129
 
122
130
  static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
123
- char buffer[2];
124
- buffer[0] = lexer->current_character;
125
- buffer[1] = '\0';
126
-
127
- return lexer_advance_with(lexer, hb_string(buffer), type);
131
+ return lexer_advance_with_next(lexer, 1, type);
128
132
  }
129
133
 
130
134
  static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
131
- int char_byte_length = utf8_sequence_length(lexer->source.data, lexer->current_position, lexer->source.length);
135
+ uint32_t char_byte_length = utf8_sequence_length(hb_string_slice(lexer->source, lexer->current_position));
136
+
132
137
  if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
138
+
133
139
  uint32_t start_position = lexer->current_position;
134
140
 
135
- for (int i = 0; i < char_byte_length; i++) {
141
+ for (uint32_t i = 0; i < char_byte_length; i++) {
136
142
  if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); }
137
143
  }
138
144
 
@@ -171,7 +177,8 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
171
177
 
172
178
  while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
173
179
  || lexer->current_character == ':')
174
- && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) {
180
+ && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_peek_for_html_comment_invalid_end(lexer, 0)
181
+ && !lexer_eof(lexer)) {
175
182
 
176
183
  lexer_advance(lexer);
177
184
  }
@@ -185,13 +192,9 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
185
192
  // ===== ERB Parsing
186
193
 
187
194
  static token_T* lexer_parse_erb_open(lexer_T* lexer) {
188
- hb_string_T erb_patterns[] = { hb_string("<%=="), hb_string("<%%="), hb_string("<%="), hb_string("<%#"),
189
- hb_string("<%-"), hb_string("<%%"), hb_string("<%graphql"), hb_string("<%") };
190
-
191
195
  lexer->state = STATE_ERB_CONTENT;
192
-
193
- for (size_t i = 0; i < sizeof(erb_patterns) / sizeof(erb_patterns[0]); i++) {
194
- token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START);
196
+ for (size_t i = 0; i < sizeof(erb_open_patterns) / sizeof(erb_open_patterns[0]); i++) {
197
+ token_T* match = lexer_match_and_advance(lexer, erb_open_patterns[i], TOKEN_ERB_START);
195
198
  if (match) { return match; }
196
199
  }
197
200
 
@@ -203,11 +206,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
203
206
 
204
207
  while (!lexer_peek_erb_end(lexer, 0)) {
205
208
  if (lexer_eof(lexer)) {
206
- token_T* token = token_init(
207
- hb_string_range(lexer->source, start_position, lexer->current_position),
208
- TOKEN_ERROR,
209
- lexer
210
- ); // Handle unexpected EOF
209
+ token_T* token =
210
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
211
+
212
+ return token;
213
+ }
214
+
215
+ if (lexer_peek_erb_start(lexer, 0)) {
216
+ lexer->state = STATE_DATA;
217
+
218
+ token_T* token =
219
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
211
220
 
212
221
  return token;
213
222
  }
@@ -244,7 +253,7 @@ static token_T* lexer_parse_erb_close(lexer_T* lexer) {
244
253
  // ===== Tokenizing Function
245
254
 
246
255
  token_T* lexer_next_token(lexer_T* lexer) {
247
- if (lexer_eof(lexer)) { return token_init(hb_string(""), TOKEN_EOF, lexer); }
256
+ if (lexer_eof(lexer)) { return token_init(HB_STRING_EMPTY, TOKEN_EOF, lexer); }
248
257
  if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
249
258
 
250
259
  if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
@@ -302,7 +311,10 @@ token_T* lexer_next_token(lexer_T* lexer) {
302
311
  }
303
312
 
304
313
  case '-': {
305
- token_T* token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
314
+ token_T* token = lexer_match_and_advance(lexer, hb_string("--!>"), TOKEN_HTML_COMMENT_INVALID_END);
315
+ if (token) { return token; }
316
+
317
+ token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
306
318
  return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
307
319
  }
308
320
 
@@ -1,22 +1,10 @@
1
1
  #include "include/lexer_peek_helpers.h"
2
2
  #include "include/lexer.h"
3
- #include "include/lexer_struct.h"
4
- #include "include/macros.h"
5
3
  #include "include/token.h"
6
- #include "include/util/hb_string.h"
7
4
 
8
5
  #include <ctype.h>
9
- #include <stdbool.h>
10
6
 
11
- char lexer_backtrack(const lexer_T* lexer, uint32_t offset) {
12
- return lexer->source.data[MAX(lexer->current_position - offset, 0)];
13
- }
14
-
15
- char lexer_peek(const lexer_T* lexer, uint32_t offset) {
16
- return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
17
- }
18
-
19
- bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) {
7
+ static bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, bool case_insensitive) {
20
8
  hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset);
21
9
  remaining_source.length = MIN(pattern.length, remaining_source.length);
22
10
 
@@ -47,31 +35,19 @@ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset) {
47
35
  return lexer_peek_for(lexer, offset, hb_string("<!--"), false);
48
36
  }
49
37
 
50
- bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) {
51
- return lexer_peek_for(lexer, offset, hb_string("-->"), false);
52
- }
53
-
54
- bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
55
- return lexer_peek_for(lexer, offset, hb_string("%>"), false);
56
- }
38
+ bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
39
+ if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
57
40
 
58
- bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) {
59
- return lexer_peek_for(lexer, offset, hb_string("-%>"), false);
60
- }
41
+ uint32_t position = offset + 2;
61
42
 
62
- bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) {
63
- return lexer_peek_for(lexer, offset, hb_string("%%>"), false);
64
- }
43
+ while (lexer_peek(lexer, position) == ' ' || lexer_peek(lexer, position) == '\t'
44
+ || lexer_peek(lexer, position) == '\n' || lexer_peek(lexer, position) == '\r') {
45
+ position++;
46
+ }
65
47
 
66
- bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) {
67
- return lexer_peek_for(lexer, offset, hb_string("=%>"), false);
68
- }
48
+ char character = lexer_peek(lexer, position);
69
49
 
70
- bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
71
- return (
72
- lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
73
- || lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset)
74
- );
50
+ return isalpha(character) || character == '_';
75
51
  }
76
52
 
77
53
  bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
@@ -84,13 +60,13 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
84
60
  token_T* token = lexer_next_token(lexer);
85
61
 
86
62
  while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) {
87
- token_free(token);
63
+ token_free(token, lexer->allocator);
88
64
  token = lexer_next_token(lexer);
89
65
  }
90
66
 
91
67
  bool result = (token && token->type == token_type);
92
68
 
93
- if (token) { token_free(token); }
69
+ if (token) { token_free(token, lexer->allocator); }
94
70
 
95
71
  lexer->current_position = saved_position;
96
72
  lexer->current_line = saved_line;
@@ -100,41 +76,3 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
100
76
 
101
77
  return result;
102
78
  }
103
-
104
- bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
105
- if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
106
-
107
- uint32_t pos = offset + 2;
108
-
109
- while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
110
- || lexer_peek(lexer, pos) == '\r') {
111
- pos++;
112
- }
113
-
114
- char c = lexer_peek(lexer, pos);
115
-
116
- return isalpha(c) || c == '_';
117
- }
118
-
119
- lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
120
- lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
121
- .line = lexer->current_line,
122
- .column = lexer->current_column,
123
- .previous_position = lexer->previous_position,
124
- .previous_line = lexer->previous_line,
125
- .previous_column = lexer->previous_column,
126
- .current_character = lexer->current_character,
127
- .state = lexer->state };
128
- return snapshot;
129
- }
130
-
131
- void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
132
- lexer->current_position = snapshot.position;
133
- lexer->current_line = snapshot.line;
134
- lexer->current_column = snapshot.column;
135
- lexer->previous_position = snapshot.previous_position;
136
- lexer->previous_line = snapshot.previous_line;
137
- lexer->previous_column = snapshot.previous_column;
138
- lexer->current_character = snapshot.current_character;
139
- lexer->state = snapshot.state;
140
- }
data/src/location.c CHANGED
@@ -17,8 +17,8 @@ void location_from_positions(location_T* location, position_T start, position_T
17
17
  location->end = end;
18
18
  }
19
19
 
20
- location_T* location_create(position_T start, position_T end) {
21
- location_T* location = malloc(sizeof(location_T));
20
+ location_T* location_create(position_T start, position_T end, hb_allocator_T* allocator) {
21
+ location_T* location = hb_allocator_alloc(allocator, sizeof(location_T));
22
22
 
23
23
  if (location != NULL) {
24
24
  location->start = start;
data/src/main.c CHANGED
@@ -1,16 +1,26 @@
1
1
  #define _POSIX_C_SOURCE 199309L // Enables `clock_gettime()`
2
2
 
3
- #include "include/analyze.h"
4
3
  #include "include/ast_node.h"
5
4
  #include "include/ast_nodes.h"
6
- #include "include/ast_pretty_print.h"
5
+
6
+ #ifndef HERB_EXCLUDE_PRETTYPRINT
7
+ # include "include/ast_pretty_print.h"
8
+ #endif
9
+
7
10
  #include "include/extract.h"
8
11
  #include "include/herb.h"
9
12
  #include "include/io.h"
13
+ #include "include/lex_helpers.h"
14
+ #include "include/macros.h"
10
15
  #include "include/ruby_parser.h"
16
+ #include "include/util/hb_allocator.h"
17
+ #include "include/util/hb_arena.h"
18
+ #include "include/util/hb_arena_debug.h"
11
19
  #include "include/util/hb_buffer.h"
20
+ #include "include/util/string.h"
12
21
 
13
22
  #include <stdio.h>
23
+ #include <stdlib.h>
14
24
  #include <string.h>
15
25
  #include <time.h>
16
26
 
@@ -34,125 +44,128 @@ void print_time_diff(const struct timespec start, const struct timespec end, con
34
44
 
35
45
  int main(const int argc, char* argv[]) {
36
46
  if (argc < 2) {
37
- printf("./herb [command] [options]\n\n");
47
+ puts("./herb [command] [options]\n");
38
48
 
39
- printf("Herb 🌿 Powerful and seamless HTML-aware ERB parsing and tooling.\n\n");
49
+ puts("Herb 🌿 Powerful and seamless HTML-aware ERB toolchain.\n");
40
50
 
41
- printf("./herb lex [file] - Lex a file\n");
42
- printf("./herb parse [file] - Parse a file\n");
43
- printf("./herb ruby [file] - Extract Ruby from a file\n");
44
- printf("./herb html [file] - Extract HTML from a file\n");
45
- printf("./herb prism [file] - Extract Ruby from a file and parse the Ruby source with Prism\n");
51
+ puts("./herb lex [file] - Lex a file");
52
+ puts("./herb parse [file] - Parse a file");
53
+ puts("./herb ruby [file] - Extract Ruby from a file");
54
+ puts("./herb html [file] - Extract HTML from a file");
55
+ puts("./herb prism [file] - Extract Ruby from a file and parse the Ruby source with Prism");
46
56
 
47
- return 1;
57
+ return EXIT_FAILURE;
48
58
  }
49
59
 
50
60
  if (argc < 3) {
51
- printf("Please specify input file.\n");
52
- return 1;
61
+ puts("Please specify input file.");
62
+ return EXIT_FAILURE;
53
63
  }
54
64
 
55
- hb_buffer_T output;
65
+ hb_allocator_T malloc_allocator = hb_allocator_with_malloc();
66
+ char* source = herb_read_file(argv[2], &malloc_allocator);
56
67
 
57
- if (!hb_buffer_init(&output, 4096)) { return 1; }
68
+ hb_allocator_T allocator;
69
+ if (!hb_allocator_init(&allocator, HB_ALLOCATOR_ARENA)) {
70
+ fprintf(stderr, "Failed to initialize allocator\n");
71
+ hb_allocator_dealloc(&malloc_allocator, source);
72
+ return EXIT_FAILURE;
73
+ }
58
74
 
59
- char* source = herb_read_file(argv[2]);
75
+ hb_buffer_T output;
76
+ if (!hb_buffer_init(&output, 4096, &allocator)) { return 1; }
60
77
 
61
78
  struct timespec start, end;
62
79
  clock_gettime(CLOCK_MONOTONIC, &start);
63
80
 
64
- if (strcmp(argv[1], "visit") == 0) {
65
- AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
66
- clock_gettime(CLOCK_MONOTONIC, &end);
67
-
68
- herb_analyze_parse_tree(root, source);
69
-
70
- ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
71
- printf("%s\n", output.value);
72
-
73
- print_time_diff(start, end, "visiting");
81
+ int silent = 0;
82
+ if (argc > 3 && string_equals(argv[3], "--silent")) { silent = 1; }
74
83
 
75
- ast_node_free((AST_NODE_T*) root);
76
- free(output.value);
77
- free(source);
78
-
79
- return 0;
80
- }
81
-
82
- if (strcmp(argv[1], "lex") == 0) {
83
- herb_lex_to_buffer(source, &output);
84
+ if (string_equals(argv[1], "lex")) {
85
+ herb_lex_to_buffer(source, &output, &allocator);
84
86
  clock_gettime(CLOCK_MONOTONIC, &end);
85
87
 
86
- printf("%s\n", output.value);
88
+ if (!silent) { hb_arena_print_stats((hb_arena_T*) allocator.context); }
89
+
90
+ puts(output.value);
87
91
  print_time_diff(start, end, "lexing");
88
92
 
89
- free(output.value);
90
- free(source);
93
+ hb_buffer_free(&output);
94
+ hb_allocator_destroy(&allocator);
95
+ hb_allocator_dealloc(&malloc_allocator, source);
91
96
 
92
- return 0;
97
+ return EXIT_SUCCESS;
93
98
  }
94
99
 
95
- if (strcmp(argv[1], "parse") == 0) {
96
- AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
97
-
98
- herb_analyze_parse_tree(root, source);
100
+ if (string_equals(argv[1], "parse")) {
101
+ AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, &allocator);
99
102
 
100
103
  clock_gettime(CLOCK_MONOTONIC, &end);
101
104
 
102
- int silent = 0;
103
- if (argc > 3 && strcmp(argv[3], "--silent") == 0) { silent = 1; }
104
-
105
105
  if (!silent) {
106
+ hb_arena_print_stats((hb_arena_T*) allocator.context);
107
+
108
+ #ifndef HERB_EXCLUDE_PRETTYPRINT
106
109
  ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
107
- printf("%s\n", output.value);
110
+ puts(output.value);
111
+ #endif
108
112
 
109
113
  print_time_diff(start, end, "parsing");
110
114
  }
111
115
 
112
- ast_node_free((AST_NODE_T*) root);
113
- free(output.value);
114
- free(source);
116
+ ast_node_free((AST_NODE_T*) root, &allocator);
115
117
 
116
- return 0;
118
+ hb_buffer_free(&output);
119
+ hb_allocator_destroy(&allocator);
120
+ hb_allocator_dealloc(&malloc_allocator, source);
121
+
122
+ return EXIT_SUCCESS;
117
123
  }
118
124
 
119
- if (strcmp(argv[1], "ruby") == 0) {
120
- herb_extract_ruby_to_buffer(source, &output);
125
+ if (string_equals(argv[1], "ruby")) {
126
+ herb_extract_ruby_to_buffer(source, &output, &allocator);
121
127
  clock_gettime(CLOCK_MONOTONIC, &end);
122
128
 
123
- printf("%s\n", output.value);
129
+ puts(output.value);
124
130
  print_time_diff(start, end, "extracting Ruby");
125
131
 
126
- free(output.value);
127
- free(source);
132
+ hb_buffer_free(&output);
133
+ hb_allocator_destroy(&allocator);
134
+ hb_allocator_dealloc(&malloc_allocator, source);
128
135
 
129
- return 0;
136
+ return EXIT_SUCCESS;
130
137
  }
131
138
 
132
- if (strcmp(argv[1], "html") == 0) {
133
- herb_extract_html_to_buffer(source, &output);
139
+ if (string_equals(argv[1], "html")) {
140
+ herb_extract_html_to_buffer(source, &output, &allocator);
134
141
  clock_gettime(CLOCK_MONOTONIC, &end);
135
142
 
136
- printf("%s\n", output.value);
143
+ puts(output.value);
137
144
  print_time_diff(start, end, "extracting HTML");
138
145
 
139
- free(output.value);
140
- free(source);
146
+ hb_buffer_free(&output);
147
+ hb_allocator_destroy(&allocator);
148
+ hb_allocator_dealloc(&malloc_allocator, source);
141
149
 
142
- return 0;
150
+ return EXIT_SUCCESS;
143
151
  }
144
152
 
145
- if (strcmp(argv[1], "prism") == 0) {
153
+ if (string_equals(argv[1], "prism")) {
146
154
  printf("HTML+ERB File: \n%s\n", source);
147
155
 
148
- char* ruby_source = herb_extract(source, HERB_EXTRACT_LANGUAGE_RUBY);
156
+ char* ruby_source = herb_extract(source, HERB_EXTRACT_LANGUAGE_RUBY, &allocator);
149
157
  printf("Extracted Ruby: \n%s\n", ruby_source);
150
158
 
151
159
  herb_parse_ruby_to_stdout(ruby_source);
152
160
 
153
- return 0;
161
+ hb_allocator_dealloc(&allocator, ruby_source);
162
+ hb_buffer_free(&output);
163
+ hb_allocator_destroy(&allocator);
164
+ hb_allocator_dealloc(&malloc_allocator, source);
165
+
166
+ return EXIT_SUCCESS;
154
167
  }
155
168
 
156
169
  printf("Unknown Command: %s\n", argv[1]);
157
- return 1;
170
+ return EXIT_FAILURE;
158
171
  }