@herb-tools/node 0.8.9 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/binding.gyp +26 -8
  3. package/dist/herb-node.cjs +41 -12
  4. package/dist/herb-node.cjs.map +1 -1
  5. package/dist/herb-node.esm.js +8 -1
  6. package/dist/herb-node.esm.js.map +1 -1
  7. package/dist/types/node-backend.d.ts +3 -1
  8. package/extension/error_helpers.cpp +419 -71
  9. package/extension/error_helpers.h +14 -3
  10. package/extension/extension_helpers.cpp +38 -35
  11. package/extension/extension_helpers.h +2 -2
  12. package/extension/herb.cpp +183 -64
  13. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
  14. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
  15. package/extension/libherb/analyze/action_view/content_tag.c +70 -0
  16. package/extension/libherb/analyze/action_view/link_to.c +143 -0
  17. package/extension/libherb/analyze/action_view/registry.c +60 -0
  18. package/extension/libherb/analyze/action_view/tag.c +64 -0
  19. package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
  20. package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
  21. package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
  22. package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
  23. package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
  24. package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
  25. package/extension/libherb/analyze/analyze.c +882 -0
  26. package/extension/libherb/{include → analyze}/analyze.h +14 -4
  27. package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
  28. package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  29. package/extension/libherb/analyze/builders.c +343 -0
  30. package/extension/libherb/analyze/builders.h +27 -0
  31. package/extension/libherb/analyze/conditional_elements.c +594 -0
  32. package/extension/libherb/analyze/conditional_elements.h +9 -0
  33. package/extension/libherb/analyze/conditional_open_tags.c +640 -0
  34. package/extension/libherb/analyze/conditional_open_tags.h +9 -0
  35. package/extension/libherb/analyze/control_type.c +250 -0
  36. package/extension/libherb/analyze/control_type.h +14 -0
  37. package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +79 -31
  38. package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +22 -17
  39. package/extension/libherb/analyze/invalid_structures.c +193 -0
  40. package/extension/libherb/analyze/invalid_structures.h +11 -0
  41. package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
  42. package/extension/libherb/analyze/parse_errors.c +84 -0
  43. package/extension/libherb/analyze/prism_annotate.c +397 -0
  44. package/extension/libherb/analyze/prism_annotate.h +16 -0
  45. package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
  46. package/extension/libherb/ast_node.c +17 -7
  47. package/extension/libherb/ast_node.h +11 -5
  48. package/extension/libherb/ast_nodes.c +663 -388
  49. package/extension/libherb/ast_nodes.h +118 -39
  50. package/extension/libherb/ast_pretty_print.c +191 -7
  51. package/extension/libherb/ast_pretty_print.h +6 -1
  52. package/extension/libherb/element_source.h +3 -8
  53. package/extension/libherb/errors.c +1100 -507
  54. package/extension/libherb/errors.h +155 -54
  55. package/extension/libherb/extract.c +148 -49
  56. package/extension/libherb/extract.h +21 -5
  57. package/extension/libherb/herb.c +52 -34
  58. package/extension/libherb/herb.h +18 -6
  59. package/extension/libherb/herb_prism_node.h +13 -0
  60. package/extension/libherb/html_util.c +241 -12
  61. package/extension/libherb/html_util.h +7 -2
  62. package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
  63. package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
  64. package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
  65. package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
  66. package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
  67. package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  68. package/extension/libherb/include/analyze/builders.h +27 -0
  69. package/extension/libherb/include/analyze/conditional_elements.h +9 -0
  70. package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
  71. package/extension/libherb/include/analyze/control_type.h +14 -0
  72. package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +22 -17
  73. package/extension/libherb/include/analyze/invalid_structures.h +11 -0
  74. package/extension/libherb/include/analyze/prism_annotate.h +16 -0
  75. package/extension/libherb/include/ast_node.h +11 -5
  76. package/extension/libherb/include/ast_nodes.h +118 -39
  77. package/extension/libherb/include/ast_pretty_print.h +6 -1
  78. package/extension/libherb/include/element_source.h +3 -8
  79. package/extension/libherb/include/errors.h +155 -54
  80. package/extension/libherb/include/extract.h +21 -5
  81. package/extension/libherb/include/herb.h +18 -6
  82. package/extension/libherb/include/herb_prism_node.h +13 -0
  83. package/extension/libherb/include/html_util.h +7 -2
  84. package/extension/libherb/include/io.h +3 -1
  85. package/extension/libherb/include/lex_helpers.h +29 -0
  86. package/extension/libherb/include/lexer.h +1 -1
  87. package/extension/libherb/include/lexer_peek_helpers.h +87 -13
  88. package/extension/libherb/include/lexer_struct.h +2 -0
  89. package/extension/libherb/include/location.h +2 -1
  90. package/extension/libherb/include/parser.h +27 -2
  91. package/extension/libherb/include/parser_helpers.h +19 -3
  92. package/extension/libherb/include/pretty_print.h +10 -5
  93. package/extension/libherb/include/prism_context.h +45 -0
  94. package/extension/libherb/include/prism_helpers.h +10 -7
  95. package/extension/libherb/include/prism_serialized.h +12 -0
  96. package/extension/libherb/include/token.h +16 -4
  97. package/extension/libherb/include/token_struct.h +10 -3
  98. package/extension/libherb/include/utf8.h +2 -1
  99. package/extension/libherb/include/util/hb_allocator.h +78 -0
  100. package/extension/libherb/include/util/hb_arena.h +6 -1
  101. package/extension/libherb/include/util/hb_arena_debug.h +12 -1
  102. package/extension/libherb/include/util/hb_array.h +7 -3
  103. package/extension/libherb/include/util/hb_buffer.h +6 -4
  104. package/extension/libherb/include/util/hb_foreach.h +79 -0
  105. package/extension/libherb/include/util/hb_narray.h +8 -4
  106. package/extension/libherb/include/util/hb_string.h +56 -9
  107. package/extension/libherb/include/util/string.h +11 -0
  108. package/extension/libherb/include/util.h +6 -3
  109. package/extension/libherb/include/version.h +1 -1
  110. package/extension/libherb/io.c +3 -2
  111. package/extension/libherb/io.h +3 -1
  112. package/extension/libherb/lex_helpers.h +29 -0
  113. package/extension/libherb/lexer.c +42 -30
  114. package/extension/libherb/lexer.h +1 -1
  115. package/extension/libherb/lexer_peek_helpers.c +12 -74
  116. package/extension/libherb/lexer_peek_helpers.h +87 -13
  117. package/extension/libherb/lexer_struct.h +2 -0
  118. package/extension/libherb/location.c +2 -2
  119. package/extension/libherb/location.h +2 -1
  120. package/extension/libherb/main.c +79 -66
  121. package/extension/libherb/parser.c +784 -247
  122. package/extension/libherb/parser.h +27 -2
  123. package/extension/libherb/parser_helpers.c +110 -23
  124. package/extension/libherb/parser_helpers.h +19 -3
  125. package/extension/libherb/parser_match_tags.c +110 -49
  126. package/extension/libherb/pretty_print.c +29 -24
  127. package/extension/libherb/pretty_print.h +10 -5
  128. package/extension/libherb/prism_context.h +45 -0
  129. package/extension/libherb/prism_helpers.c +30 -27
  130. package/extension/libherb/prism_helpers.h +10 -7
  131. package/extension/libherb/prism_serialized.h +12 -0
  132. package/extension/libherb/ruby_parser.c +2 -0
  133. package/extension/libherb/token.c +151 -66
  134. package/extension/libherb/token.h +16 -4
  135. package/extension/libherb/token_matchers.c +0 -1
  136. package/extension/libherb/token_struct.h +10 -3
  137. package/extension/libherb/utf8.c +7 -6
  138. package/extension/libherb/utf8.h +2 -1
  139. package/extension/libherb/util/hb_allocator.c +341 -0
  140. package/extension/libherb/util/hb_allocator.h +78 -0
  141. package/extension/libherb/util/hb_arena.c +81 -56
  142. package/extension/libherb/util/hb_arena.h +6 -1
  143. package/extension/libherb/util/hb_arena_debug.c +32 -17
  144. package/extension/libherb/util/hb_arena_debug.h +12 -1
  145. package/extension/libherb/util/hb_array.c +30 -15
  146. package/extension/libherb/util/hb_array.h +7 -3
  147. package/extension/libherb/util/hb_buffer.c +17 -21
  148. package/extension/libherb/util/hb_buffer.h +6 -4
  149. package/extension/libherb/util/hb_foreach.h +79 -0
  150. package/extension/libherb/util/hb_narray.c +22 -7
  151. package/extension/libherb/util/hb_narray.h +8 -4
  152. package/extension/libherb/util/hb_string.c +49 -35
  153. package/extension/libherb/util/hb_string.h +56 -9
  154. package/extension/libherb/util/string.h +11 -0
  155. package/extension/libherb/util.c +21 -11
  156. package/extension/libherb/util.h +6 -3
  157. package/extension/libherb/version.h +1 -1
  158. package/extension/libherb/visitor.c +48 -1
  159. package/extension/nodes.cpp +451 -6
  160. package/extension/nodes.h +8 -1
  161. package/extension/prism/include/prism/ast.h +4 -4
  162. package/extension/prism/include/prism/version.h +2 -2
  163. package/extension/prism/src/prism.c +1 -1
  164. package/package.json +12 -8
  165. package/src/node-backend.ts +11 -1
  166. package/dist/types/index-cjs.d.cts +0 -1
  167. package/extension/libherb/analyze.c +0 -1594
  168. package/extension/libherb/element_source.c +0 -12
  169. package/extension/libherb/include/util/hb_system.h +0 -9
  170. package/extension/libherb/util/hb_system.c +0 -30
  171. package/extension/libherb/util/hb_system.h +0 -9
  172. package/src/index-cjs.cts +0 -22
  173. /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
  174. /package/src/{index-esm.mts → index.ts} +0 -0
@@ -3,20 +3,18 @@
3
3
  #include "include/errors.h"
4
4
  #include "include/location.h"
5
5
  #include "include/position.h"
6
- #include "include/util.h"
7
6
  #include "include/util/hb_buffer.h"
7
+ #include "include/util/hb_string.h"
8
8
 
9
9
  #include <prism.h>
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
 
13
- const char* pm_error_level_to_string(pm_error_level_t level) {
13
+ hb_string_T pm_error_level_to_string(pm_error_level_t level) {
14
14
  switch (level) {
15
- case PM_ERROR_LEVEL_SYNTAX: return "syntax";
16
- case PM_ERROR_LEVEL_ARGUMENT: return "argument";
17
- case PM_ERROR_LEVEL_LOAD: return "load";
18
-
19
- default: return "Unknown pm_error_level_t";
15
+ case PM_ERROR_LEVEL_SYNTAX: return hb_string("syntax");
16
+ case PM_ERROR_LEVEL_ARGUMENT: return hb_string("argument");
17
+ case PM_ERROR_LEVEL_LOAD: return hb_string("load");
20
18
  }
21
19
  }
22
20
 
@@ -24,7 +22,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
24
22
  const pm_diagnostic_t* error,
25
23
  const AST_NODE_T* node,
26
24
  const char* source,
27
- pm_parser_t* parser
25
+ pm_parser_t* parser,
26
+ hb_allocator_T* allocator
28
27
  ) {
29
28
  size_t start_offset = (size_t) (error->location.start - parser->start);
30
29
  size_t end_offset = (size_t) (error->location.end - parser->start);
@@ -33,25 +32,28 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
33
32
  position_T end = position_from_source_with_offset(source, end_offset);
34
33
 
35
34
  return ruby_parse_error_init(
36
- error->message,
37
- pm_diagnostic_id_human(error->diag_id),
35
+ hb_string(error->message),
36
+ hb_string(pm_diagnostic_id_human(error->diag_id)),
38
37
  pm_error_level_to_string(error->level),
39
38
  start,
40
- end
39
+ end,
40
+ allocator
41
41
  );
42
42
  }
43
43
 
44
44
  RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
45
45
  const pm_diagnostic_t* error,
46
46
  position_T start,
47
- position_T end
47
+ position_T end,
48
+ hb_allocator_T* allocator
48
49
  ) {
49
50
  return ruby_parse_error_init(
50
- error->message,
51
- pm_diagnostic_id_human(error->diag_id),
51
+ hb_string(error->message),
52
+ hb_string(pm_diagnostic_id_human(error->diag_id)),
52
53
  pm_error_level_to_string(error->level),
53
54
  start,
54
- end
55
+ end,
56
+ allocator
55
57
  );
56
58
  }
57
59
 
@@ -118,7 +120,7 @@ static bool search_then_keyword_location(const pm_node_t* node, void* data) {
118
120
  return false;
119
121
  }
120
122
 
121
- location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source) {
123
+ location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator) {
122
124
  if (analyzed == NULL || analyzed->root == NULL || source == NULL) { return NULL; }
123
125
 
124
126
  then_keyword_search_context_T context = { .then_keyword_loc = { .start = NULL, .end = NULL }, .found = false };
@@ -133,7 +135,7 @@ location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* sou
133
135
  position_T start_position = position_from_source_with_offset(source, start_offset);
134
136
  position_T end_position = position_from_source_with_offset(source, end_offset);
135
137
 
136
- return location_create(start_position, end_position);
138
+ return location_create(start_position, end_position, allocator);
137
139
  }
138
140
 
139
141
  static location_T* parse_wrapped_and_find_then_keyword(
@@ -142,7 +144,8 @@ static location_T* parse_wrapped_and_find_then_keyword(
142
144
  size_t source_length,
143
145
  size_t prefix_length,
144
146
  size_t adjustment_threshold,
145
- size_t adjustment_amount
147
+ size_t adjustment_amount,
148
+ hb_allocator_T* allocator
146
149
  ) {
147
150
  pm_parser_t parser;
148
151
  pm_parser_init(&parser, (const uint8_t*) hb_buffer_value(buffer), hb_buffer_length(buffer), NULL);
@@ -177,7 +180,7 @@ static location_T* parse_wrapped_and_find_then_keyword(
177
180
  position_T start_position = position_from_source_with_offset(source, start_offset);
178
181
  position_T end_position = position_from_source_with_offset(source, end_offset);
179
182
 
180
- location = location_create(start_position, end_position);
183
+ location = location_create(start_position, end_position, allocator);
181
184
  }
182
185
  }
183
186
  }
@@ -188,14 +191,14 @@ static location_T* parse_wrapped_and_find_then_keyword(
188
191
  return location;
189
192
  }
190
193
 
191
- location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause) {
194
+ location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator) {
192
195
  if (source == NULL) { return NULL; }
193
196
 
194
197
  size_t source_length = strlen(source);
195
198
 
196
199
  hb_buffer_T buffer;
197
200
 
198
- if (!hb_buffer_init(&buffer, source_length + 16)) { return NULL; }
201
+ if (!hb_buffer_init(&buffer, source_length + 16, allocator)) { return NULL; }
199
202
 
200
203
  hb_buffer_append(&buffer, "case x\n");
201
204
  size_t prefix_length = hb_buffer_length(&buffer);
@@ -203,14 +206,14 @@ location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_cla
203
206
  hb_buffer_append(&buffer, "\nend");
204
207
 
205
208
  location_T* location =
206
- parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0);
209
+ parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0, allocator);
207
210
 
208
- free(buffer.value);
211
+ hb_buffer_free(&buffer);
209
212
 
210
213
  return location;
211
214
  }
212
215
 
213
- location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
216
+ location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator) {
214
217
  if (source == NULL) { return NULL; }
215
218
 
216
219
  const char* elsif_position = strstr(source, "elsif");
@@ -223,7 +226,7 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
223
226
 
224
227
  hb_buffer_T buffer;
225
228
 
226
- if (!hb_buffer_init(&buffer, source_length + 8)) { return NULL; }
229
+ if (!hb_buffer_init(&buffer, source_length + 8, allocator)) { return NULL; }
227
230
 
228
231
  hb_buffer_append_with_length(&buffer, source, elsif_offset);
229
232
  hb_buffer_append(&buffer, "if");
@@ -232,9 +235,9 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
232
235
  hb_buffer_append(&buffer, "\nend");
233
236
 
234
237
  location_T* location =
235
- parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff);
238
+ parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff, allocator);
236
239
 
237
- free(buffer.value);
240
+ hb_buffer_free(&buffer);
238
241
 
239
242
  return location;
240
243
  }
@@ -1,31 +1,34 @@
1
1
  #ifndef HERB_PRISM_HELPERS_H
2
2
  #define HERB_PRISM_HELPERS_H
3
3
 
4
- #include "analyzed_ruby.h"
4
+ #include "analyze/analyzed_ruby.h"
5
5
  #include "ast_nodes.h"
6
6
  #include "errors.h"
7
7
  #include "location.h"
8
8
  #include "position.h"
9
+ #include "util/hb_allocator.h"
9
10
 
10
11
  #include <prism.h>
11
12
 
12
- const char* pm_error_level_to_string(pm_error_level_t level);
13
+ hb_string_T pm_error_level_to_string(pm_error_level_t level);
13
14
 
14
15
  RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
15
16
  const pm_diagnostic_t* error,
16
17
  const AST_NODE_T* node,
17
18
  const char* source,
18
- pm_parser_t* parser
19
+ pm_parser_t* parser,
20
+ hb_allocator_T* allocator
19
21
  );
20
22
 
21
23
  RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
22
24
  const pm_diagnostic_t* error,
23
25
  position_T start,
24
- position_T end
26
+ position_T end,
27
+ hb_allocator_T* allocator
25
28
  );
26
29
 
27
- location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source);
28
- location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause);
29
- location_T* get_then_keyword_location_elsif_wrapped(const char* source);
30
+ location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator);
31
+ location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator);
32
+ location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator);
30
33
 
31
34
  #endif
@@ -0,0 +1,12 @@
1
+ #ifndef HERB_PRISM_SERIALIZED_H
2
+ #define HERB_PRISM_SERIALIZED_H
3
+
4
+ #include <stddef.h>
5
+ #include <stdint.h>
6
+
7
+ typedef struct {
8
+ uint8_t* data;
9
+ size_t length;
10
+ } prism_serialized_T;
11
+
12
+ #endif
@@ -38,8 +38,10 @@ void herb_parse_ruby_to_stdout(char* source) {
38
38
 
39
39
  pm_visit_node(root, herb_prism_visit, data);
40
40
 
41
+ #ifndef PRISM_EXCLUDE_PRETTYPRINT
41
42
  pm_prettyprint(&buffer, &parser, root);
42
43
  printf("%s\n", buffer.value);
44
+ #endif
43
45
 
44
46
  pm_buffer_free(&buffer);
45
47
  pm_node_destroy(&parser, root);
@@ -1,24 +1,30 @@
1
1
  #include "include/token.h"
2
- #include "include/lexer.h"
3
2
  #include "include/position.h"
4
3
  #include "include/range.h"
5
4
  #include "include/token_struct.h"
6
5
  #include "include/util.h"
6
+ #include "include/util/hb_allocator.h"
7
+ #include "include/util/hb_buffer.h"
8
+ #include "include/util/hb_string.h"
7
9
 
10
+ #include <stdarg.h>
8
11
  #include <stdbool.h>
9
12
  #include <stdio.h>
10
13
  #include <stdlib.h>
11
14
  #include <string.h>
12
15
 
13
16
  token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
14
- token_T* token = calloc(1, sizeof(token_T));
17
+ hb_allocator_T* allocator = lexer->allocator;
18
+ token_T* token = hb_allocator_alloc(allocator, sizeof(token_T));
19
+
20
+ if (!token) { return NULL; }
15
21
 
16
22
  if (type == TOKEN_NEWLINE) {
17
23
  lexer->current_line++;
18
24
  lexer->current_column = 0;
19
25
  }
20
26
 
21
- token->value = hb_string_to_c_string_using_malloc(value);
27
+ token->value = value;
22
28
 
23
29
  token->type = type;
24
30
  token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };
@@ -38,65 +44,147 @@ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer)
38
44
  return token;
39
45
  }
40
46
 
41
- const char* token_type_to_string(const token_type_T type) {
47
+ hb_string_T token_type_to_string(const token_type_T type) {
48
+ switch (type) {
49
+ case TOKEN_WHITESPACE: return hb_string("TOKEN_WHITESPACE");
50
+ case TOKEN_NBSP: return hb_string("TOKEN_NBSP");
51
+ case TOKEN_NEWLINE: return hb_string("TOKEN_NEWLINE");
52
+ case TOKEN_IDENTIFIER: return hb_string("TOKEN_IDENTIFIER");
53
+ case TOKEN_HTML_DOCTYPE: return hb_string("TOKEN_HTML_DOCTYPE");
54
+ case TOKEN_XML_DECLARATION: return hb_string("TOKEN_XML_DECLARATION");
55
+ case TOKEN_XML_DECLARATION_END: return hb_string("TOKEN_XML_DECLARATION_END");
56
+ case TOKEN_CDATA_START: return hb_string("TOKEN_CDATA_START");
57
+ case TOKEN_CDATA_END: return hb_string("TOKEN_CDATA_END");
58
+ case TOKEN_HTML_TAG_START: return hb_string("TOKEN_HTML_TAG_START");
59
+ case TOKEN_HTML_TAG_END: return hb_string("TOKEN_HTML_TAG_END");
60
+ case TOKEN_HTML_TAG_START_CLOSE: return hb_string("TOKEN_HTML_TAG_START_CLOSE");
61
+ case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("TOKEN_HTML_TAG_SELF_CLOSE");
62
+ case TOKEN_HTML_COMMENT_START: return hb_string("TOKEN_HTML_COMMENT_START");
63
+ case TOKEN_HTML_COMMENT_END: return hb_string("TOKEN_HTML_COMMENT_END");
64
+ case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("TOKEN_HTML_COMMENT_INVALID_END");
65
+ case TOKEN_EQUALS: return hb_string("TOKEN_EQUALS");
66
+ case TOKEN_QUOTE: return hb_string("TOKEN_QUOTE");
67
+ case TOKEN_BACKTICK: return hb_string("TOKEN_BACKTICK");
68
+ case TOKEN_BACKSLASH: return hb_string("TOKEN_BACKSLASH");
69
+ case TOKEN_DASH: return hb_string("TOKEN_DASH");
70
+ case TOKEN_UNDERSCORE: return hb_string("TOKEN_UNDERSCORE");
71
+ case TOKEN_EXCLAMATION: return hb_string("TOKEN_EXCLAMATION");
72
+ case TOKEN_SLASH: return hb_string("TOKEN_SLASH");
73
+ case TOKEN_SEMICOLON: return hb_string("TOKEN_SEMICOLON");
74
+ case TOKEN_COLON: return hb_string("TOKEN_COLON");
75
+ case TOKEN_AT: return hb_string("TOKEN_AT");
76
+ case TOKEN_LT: return hb_string("TOKEN_LT");
77
+ case TOKEN_PERCENT: return hb_string("TOKEN_PERCENT");
78
+ case TOKEN_AMPERSAND: return hb_string("TOKEN_AMPERSAND");
79
+ case TOKEN_ERB_START: return hb_string("TOKEN_ERB_START");
80
+ case TOKEN_ERB_CONTENT: return hb_string("TOKEN_ERB_CONTENT");
81
+ case TOKEN_ERB_END: return hb_string("TOKEN_ERB_END");
82
+ case TOKEN_CHARACTER: return hb_string("TOKEN_CHARACTER");
83
+ case TOKEN_ERROR: return hb_string("TOKEN_ERROR");
84
+ case TOKEN_EOF: return hb_string("TOKEN_EOF");
85
+ }
86
+ }
87
+
88
+ hb_string_T token_type_to_friendly_string(const token_type_T type) {
42
89
  switch (type) {
43
- case TOKEN_WHITESPACE: return "TOKEN_WHITESPACE";
44
- case TOKEN_NBSP: return "TOKEN_NBSP";
45
- case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
46
- case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
47
- case TOKEN_HTML_DOCTYPE: return "TOKEN_HTML_DOCTYPE";
48
- case TOKEN_XML_DECLARATION: return "TOKEN_XML_DECLARATION";
49
- case TOKEN_XML_DECLARATION_END: return "TOKEN_XML_DECLARATION_END";
50
- case TOKEN_CDATA_START: return "TOKEN_CDATA_START";
51
- case TOKEN_CDATA_END: return "TOKEN_CDATA_END";
52
- case TOKEN_HTML_TAG_START: return "TOKEN_HTML_TAG_START";
53
- case TOKEN_HTML_TAG_END: return "TOKEN_HTML_TAG_END";
54
- case TOKEN_HTML_TAG_START_CLOSE: return "TOKEN_HTML_TAG_START_CLOSE";
55
- case TOKEN_HTML_TAG_SELF_CLOSE: return "TOKEN_HTML_TAG_SELF_CLOSE";
56
- case TOKEN_HTML_COMMENT_START: return "TOKEN_HTML_COMMENT_START";
57
- case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
58
- case TOKEN_EQUALS: return "TOKEN_EQUALS";
59
- case TOKEN_QUOTE: return "TOKEN_QUOTE";
60
- case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
61
- case TOKEN_BACKSLASH: return "TOKEN_BACKSLASH";
62
- case TOKEN_DASH: return "TOKEN_DASH";
63
- case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
64
- case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
65
- case TOKEN_SLASH: return "TOKEN_SLASH";
66
- case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
67
- case TOKEN_COLON: return "TOKEN_COLON";
68
- case TOKEN_AT: return "TOKEN_AT";
69
- case TOKEN_LT: return "TOKEN_LT";
70
- case TOKEN_PERCENT: return "TOKEN_PERCENT";
71
- case TOKEN_AMPERSAND: return "TOKEN_AMPERSAND";
72
- case TOKEN_ERB_START: return "TOKEN_ERB_START";
73
- case TOKEN_ERB_CONTENT: return "TOKEN_ERB_CONTENT";
74
- case TOKEN_ERB_END: return "TOKEN_ERB_END";
75
- case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
76
- case TOKEN_ERROR: return "TOKEN_ERROR";
77
- case TOKEN_EOF: return "TOKEN_EOF";
90
+ case TOKEN_WHITESPACE: return hb_string("whitespace");
91
+ case TOKEN_NBSP: return hb_string("non-breaking space");
92
+ case TOKEN_NEWLINE: return hb_string("a newline");
93
+ case TOKEN_IDENTIFIER: return hb_string("an identifier");
94
+ case TOKEN_HTML_DOCTYPE: return hb_string("`<!DOCTYPE`");
95
+ case TOKEN_XML_DECLARATION: return hb_string("`<?xml`");
96
+ case TOKEN_XML_DECLARATION_END: return hb_string("`?>`");
97
+ case TOKEN_CDATA_START: return hb_string("`<![CDATA[`");
98
+ case TOKEN_CDATA_END: return hb_string("`]]>`");
99
+ case TOKEN_HTML_TAG_START: return hb_string("`<`");
100
+ case TOKEN_HTML_TAG_END: return hb_string("`>`");
101
+ case TOKEN_HTML_TAG_START_CLOSE: return hb_string("`</`");
102
+ case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("`/>`");
103
+ case TOKEN_HTML_COMMENT_START: return hb_string("`<!--`");
104
+ case TOKEN_HTML_COMMENT_END: return hb_string("`-->`");
105
+ case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("`--!>`");
106
+ case TOKEN_EQUALS: return hb_string("`=`");
107
+ case TOKEN_QUOTE: return hb_string("a quote");
108
+ case TOKEN_BACKTICK: return hb_string("a backtick");
109
+ case TOKEN_BACKSLASH: return hb_string("`\\`");
110
+ case TOKEN_DASH: return hb_string("`-`");
111
+ case TOKEN_UNDERSCORE: return hb_string("`_`");
112
+ case TOKEN_EXCLAMATION: return hb_string("`!`");
113
+ case TOKEN_SLASH: return hb_string("`/`");
114
+ case TOKEN_SEMICOLON: return hb_string("`;`");
115
+ case TOKEN_COLON: return hb_string("`:`");
116
+ case TOKEN_AT: return hb_string("`@`");
117
+ case TOKEN_LT: return hb_string("`<`");
118
+ case TOKEN_PERCENT: return hb_string("`%`");
119
+ case TOKEN_AMPERSAND: return hb_string("`&`");
120
+ case TOKEN_ERB_START: return hb_string("`<%`");
121
+ case TOKEN_ERB_CONTENT: return hb_string("ERB content");
122
+ case TOKEN_ERB_END: return hb_string("`%>`");
123
+ case TOKEN_CHARACTER: return hb_string("a character");
124
+ case TOKEN_ERROR: return hb_string("an error token");
125
+ case TOKEN_EOF: return hb_string("end of file");
126
+ }
127
+ }
128
+
129
+ char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args) {
130
+ if ((int) first_token == TOKEN_SENTINEL) { return hb_allocator_strdup(allocator, ""); }
131
+
132
+ size_t count = 0;
133
+ hb_string_T names[32];
134
+ token_type_T current = first_token;
135
+
136
+ while ((int) current != TOKEN_SENTINEL && count < 32) {
137
+ names[count++] = token_type_to_friendly_string(current);
138
+ current = va_arg(args, token_type_T);
139
+ }
140
+
141
+ hb_buffer_T buffer;
142
+ hb_buffer_init(&buffer, 128, allocator);
143
+
144
+ for (size_t i = 0; i < count; i++) {
145
+ hb_buffer_append_string(&buffer, names[i]);
146
+
147
+ if (i < count - 1) {
148
+ if (count > 2) { hb_buffer_append(&buffer, ", "); }
149
+ if (i == count - 2) { hb_buffer_append(&buffer, count == 2 ? " or " : "or "); }
150
+ }
78
151
  }
79
152
 
80
- return "Unknown token_type_T";
153
+ return hb_buffer_value(&buffer);
154
+ }
155
+
156
+ char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...) {
157
+ va_list args;
158
+ va_start(args, first_token);
159
+ char* result = token_types_to_friendly_string_valist(allocator, first_token, args);
160
+ va_end(args);
161
+ return result;
81
162
  }
82
163
 
83
- hb_string_T token_to_string(const token_T* token) {
84
- const char* type_string = token_type_to_string(token->type);
85
- const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
164
+ hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token) {
165
+ hb_string_T type_string = token_type_to_string(token->type);
166
+ hb_string_T template =
167
+ hb_string("#<Herb::Token type=\"%.*s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>");
168
+
169
+ char* string = hb_allocator_alloc(allocator, template.length + type_string.length + token->value.length + 16);
170
+
171
+ if (!string) { return HB_STRING_EMPTY; }
172
+
173
+ memset(string, 0, template.length + type_string.length + token->value.length + 16);
86
174
 
87
- char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
88
175
  hb_string_T escaped;
89
176
 
90
177
  if (token->type == TOKEN_EOF) {
91
- escaped = hb_string(herb_strdup("<EOF>"));
178
+ escaped = hb_string(hb_allocator_strdup(allocator, "<EOF>"));
92
179
  } else {
93
- escaped = escape_newlines(hb_string(token->value));
180
+ escaped = escape_newlines(allocator, token_value(token));
94
181
  }
95
182
 
96
183
  sprintf(
97
184
  string,
98
- template,
99
- type_string,
185
+ template.data,
186
+ type_string.length,
187
+ type_string.data,
100
188
  escaped.length,
101
189
  escaped.data,
102
190
  token->range.from,
@@ -107,28 +195,27 @@ hb_string_T token_to_string(const token_T* token) {
107
195
  token->location.end.column
108
196
  );
109
197
 
110
- free(escaped.data);
198
+ hb_allocator_dealloc(allocator, escaped.data);
111
199
 
112
200
  return hb_string(string);
113
201
  }
114
202
 
115
- token_T* token_copy(token_T* token) {
203
+ hb_string_T token_value(const token_T* token) {
204
+ return token->value;
205
+ }
206
+
207
+ int token_type(const token_T* token) {
208
+ return token->type;
209
+ }
210
+
211
+ token_T* token_copy(token_T* token, hb_allocator_T* allocator) {
116
212
  if (!token) { return NULL; }
117
213
 
118
- token_T* new_token = calloc(1, sizeof(token_T));
214
+ token_T* new_token = hb_allocator_alloc(allocator, sizeof(token_T));
119
215
 
120
216
  if (!new_token) { return NULL; }
121
217
 
122
- if (token->value) {
123
- new_token->value = herb_strdup(token->value);
124
-
125
- if (!new_token->value) {
126
- free(new_token);
127
- return NULL;
128
- }
129
- } else {
130
- new_token->value = NULL;
131
- }
218
+ new_token->value = token->value;
132
219
 
133
220
  new_token->type = token->type;
134
221
  new_token->range = token->range;
@@ -138,13 +225,11 @@ token_T* token_copy(token_T* token) {
138
225
  }
139
226
 
140
227
  bool token_value_empty(const token_T* token) {
141
- return token == NULL || token->value == NULL || token->value[0] == '\0';
228
+ return token == NULL || hb_string_is_empty(token->value);
142
229
  }
143
230
 
144
- void token_free(token_T* token) {
231
+ void token_free(token_T* token, hb_allocator_T* allocator) {
145
232
  if (!token) { return; }
146
233
 
147
- if (token->value != NULL) { free(token->value); }
148
-
149
- free(token);
234
+ hb_allocator_dealloc(allocator, token);
150
235
  }
@@ -4,15 +4,27 @@
4
4
  #include "lexer_struct.h"
5
5
  #include "position.h"
6
6
  #include "token_struct.h"
7
+ #include "util/hb_allocator.h"
7
8
  #include "util/hb_string.h"
8
9
 
10
+ #include <stdarg.h>
11
+
9
12
  token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
10
- hb_string_T token_to_string(const token_T* token);
11
- const char* token_type_to_string(token_type_T type);
13
+ hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token);
14
+ hb_string_T token_type_to_string(token_type_T type);
15
+ hb_string_T token_type_to_friendly_string(token_type_T type);
16
+ char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...);
17
+ char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args);
18
+
19
+ #define token_types_to_friendly_string(allocator, ...) \
20
+ token_types_to_friendly_string_va(allocator, __VA_ARGS__, TOKEN_SENTINEL)
21
+
22
+ hb_string_T token_value(const token_T* token);
23
+ int token_type(const token_T* token);
12
24
 
13
- token_T* token_copy(token_T* token);
25
+ token_T* token_copy(token_T* token, hb_allocator_T* allocator);
14
26
 
15
- void token_free(token_T* token);
27
+ void token_free(token_T* token, hb_allocator_T* allocator);
16
28
 
17
29
  bool token_value_empty(const token_T* token);
18
30
 
@@ -1,6 +1,5 @@
1
1
  #include "include/token_matchers.h"
2
2
  #include "include/parser.h"
3
- #include "include/token.h"
4
3
 
5
4
  #include <stdarg.h>
6
5
  #include <stdbool.h>
@@ -1,8 +1,11 @@
1
1
  #ifndef HERB_TOKEN_STRUCT_H
2
2
  #define HERB_TOKEN_STRUCT_H
3
3
 
4
+ #include <stdbool.h>
5
+
4
6
  #include "location.h"
5
7
  #include "range.h"
8
+ #include "util/hb_string.h"
6
9
 
7
10
  typedef enum {
8
11
  TOKEN_WHITESPACE, // ' '
@@ -21,8 +24,9 @@ typedef enum {
21
24
  TOKEN_HTML_TAG_END, // >
22
25
  TOKEN_HTML_TAG_SELF_CLOSE, // />
23
26
 
24
- TOKEN_HTML_COMMENT_START, // <!--
25
- TOKEN_HTML_COMMENT_END, // -->
27
+ TOKEN_HTML_COMMENT_START, // <!--
28
+ TOKEN_HTML_COMMENT_END, // -->
29
+ TOKEN_HTML_COMMENT_INVALID_END, // --!>
26
30
 
27
31
  TOKEN_ERB_START, // <%, <%=, <%%=, <%#, <%-, <%==, <%%
28
32
  TOKEN_ERB_CONTENT, // Ruby Code
@@ -48,8 +52,11 @@ typedef enum {
48
52
  TOKEN_EOF,
49
53
  } token_type_T;
50
54
 
55
+ // Sentinel value for variadic functions
56
+ #define TOKEN_SENTINEL 99999999
57
+
51
58
  typedef struct TOKEN_STRUCT {
52
- char* value;
59
+ hb_string_T value;
53
60
  range_T range;
54
61
  location_T location;
55
62
  token_type_T type;
@@ -1,4 +1,6 @@
1
1
  #include "include/utf8.h"
2
+ #include "include/util/hb_string.h"
3
+ #include <stdint.h>
2
4
 
3
5
  // UTF-8 byte patterns:
4
6
  // 0xxxxxxx = 1 byte (ASCII)
@@ -24,19 +26,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
26
  return (byte & 0xC0) == 0x80;
25
27
  }
26
28
 
27
- uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
- if (position >= max_length) { return 0; }
29
+ uint32_t utf8_sequence_length(hb_string_T value) {
30
+ if (hb_string_is_empty(value)) { return 0; }
29
31
 
30
- unsigned char first_byte = (unsigned char) str[position];
31
- uint32_t expected_length = utf8_char_byte_length(first_byte);
32
+ uint32_t expected_length = utf8_char_byte_length((unsigned char) value.data[0]);
32
33
 
33
- if (position + expected_length > max_length) {
34
+ if (value.length < expected_length) {
34
35
  return 1; // Not enough bytes, treat as single byte
35
36
  }
36
37
 
37
38
  if (expected_length > 1) {
38
39
  for (uint32_t i = 1; i < expected_length; i++) {
39
- if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
+ if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) {
40
41
  return 1; // Invalid continuation byte, treat first byte as single byte
41
42
  }
42
43
  }
@@ -1,12 +1,13 @@
1
1
  #ifndef HERB_UTF8_H
2
2
  #define HERB_UTF8_H
3
3
 
4
+ #include "util/hb_string.h"
4
5
  #include <stdbool.h>
5
6
  #include <stdint.h>
6
7
  #include <stdlib.h>
7
8
 
8
9
  uint32_t utf8_char_byte_length(unsigned char first_byte);
9
- uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length);
10
+ uint32_t utf8_sequence_length(hb_string_T value);
10
11
  bool utf8_is_valid_continuation_byte(unsigned char byte);
11
12
 
12
13
  #endif