npm - @herb-tools/node - Versions diffs - 0.8.10 → 0.9.0 - Mend

@herb-tools/node 0.8.10 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/CHANGELOG.md +19 -0
package/binding.gyp +26 -8
package/dist/herb-node.cjs +41 -12
package/dist/herb-node.cjs.map +1 -1
package/dist/herb-node.esm.js +8 -1
package/dist/herb-node.esm.js.map +1 -1
package/dist/types/node-backend.d.ts +3 -1
package/extension/error_helpers.cpp +395 -73
package/extension/error_helpers.h +13 -3
package/extension/extension_helpers.cpp +38 -35
package/extension/extension_helpers.h +2 -2
package/extension/herb.cpp +183 -64
package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
package/extension/libherb/analyze/action_view/content_tag.c +70 -0
package/extension/libherb/analyze/action_view/link_to.c +143 -0
package/extension/libherb/analyze/action_view/registry.c +60 -0
package/extension/libherb/analyze/action_view/tag.c +64 -0
package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
package/extension/libherb/analyze/analyze.c +882 -0
package/extension/libherb/{include → analyze}/analyze.h +14 -4
package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
package/extension/libherb/analyze/builders.c +343 -0
package/extension/libherb/analyze/builders.h +27 -0
package/extension/libherb/analyze/conditional_elements.c +594 -0
package/extension/libherb/analyze/conditional_elements.h +9 -0
package/extension/libherb/analyze/conditional_open_tags.c +640 -0
package/extension/libherb/analyze/conditional_open_tags.h +9 -0
package/extension/libherb/analyze/control_type.c +250 -0
package/extension/libherb/analyze/control_type.h +14 -0
package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
package/extension/libherb/analyze/invalid_structures.c +193 -0
package/extension/libherb/analyze/invalid_structures.h +11 -0
package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
package/extension/libherb/analyze/parse_errors.c +84 -0
package/extension/libherb/analyze/prism_annotate.c +397 -0
package/extension/libherb/analyze/prism_annotate.h +16 -0
package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
package/extension/libherb/ast_node.c +17 -7
package/extension/libherb/ast_node.h +11 -5
package/extension/libherb/ast_nodes.c +663 -388
package/extension/libherb/ast_nodes.h +118 -39
package/extension/libherb/ast_pretty_print.c +191 -7
package/extension/libherb/ast_pretty_print.h +6 -1
package/extension/libherb/element_source.h +3 -8
package/extension/libherb/errors.c +1077 -521
package/extension/libherb/errors.h +149 -56
package/extension/libherb/extract.c +145 -49
package/extension/libherb/extract.h +21 -5
package/extension/libherb/herb.c +52 -34
package/extension/libherb/herb.h +18 -6
package/extension/libherb/herb_prism_node.h +13 -0
package/extension/libherb/html_util.c +241 -12
package/extension/libherb/html_util.h +7 -2
package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
package/extension/libherb/include/analyze/builders.h +27 -0
package/extension/libherb/include/analyze/conditional_elements.h +9 -0
package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
package/extension/libherb/include/analyze/control_type.h +14 -0
package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
package/extension/libherb/include/analyze/invalid_structures.h +11 -0
package/extension/libherb/include/analyze/prism_annotate.h +16 -0
package/extension/libherb/include/ast_node.h +11 -5
package/extension/libherb/include/ast_nodes.h +118 -39
package/extension/libherb/include/ast_pretty_print.h +6 -1
package/extension/libherb/include/element_source.h +3 -8
package/extension/libherb/include/errors.h +149 -56
package/extension/libherb/include/extract.h +21 -5
package/extension/libherb/include/herb.h +18 -6
package/extension/libherb/include/herb_prism_node.h +13 -0
package/extension/libherb/include/html_util.h +7 -2
package/extension/libherb/include/io.h +3 -1
package/extension/libherb/include/lex_helpers.h +29 -0
package/extension/libherb/include/lexer.h +1 -1
package/extension/libherb/include/lexer_peek_helpers.h +87 -13
package/extension/libherb/include/lexer_struct.h +2 -0
package/extension/libherb/include/location.h +2 -1
package/extension/libherb/include/parser.h +27 -2
package/extension/libherb/include/parser_helpers.h +19 -3
package/extension/libherb/include/pretty_print.h +10 -5
package/extension/libherb/include/prism_context.h +45 -0
package/extension/libherb/include/prism_helpers.h +10 -7
package/extension/libherb/include/prism_serialized.h +12 -0
package/extension/libherb/include/token.h +16 -4
package/extension/libherb/include/token_struct.h +10 -3
package/extension/libherb/include/utf8.h +2 -1
package/extension/libherb/include/util/hb_allocator.h +78 -0
package/extension/libherb/include/util/hb_arena.h +6 -1
package/extension/libherb/include/util/hb_arena_debug.h +12 -1
package/extension/libherb/include/util/hb_array.h +7 -3
package/extension/libherb/include/util/hb_buffer.h +6 -4
package/extension/libherb/include/util/hb_foreach.h +79 -0
package/extension/libherb/include/util/hb_narray.h +8 -4
package/extension/libherb/include/util/hb_string.h +56 -9
package/extension/libherb/include/util.h +6 -3
package/extension/libherb/include/version.h +1 -1
package/extension/libherb/io.c +3 -2
package/extension/libherb/io.h +3 -1
package/extension/libherb/lex_helpers.h +29 -0
package/extension/libherb/lexer.c +42 -30
package/extension/libherb/lexer.h +1 -1
package/extension/libherb/lexer_peek_helpers.c +12 -74
package/extension/libherb/lexer_peek_helpers.h +87 -13
package/extension/libherb/lexer_struct.h +2 -0
package/extension/libherb/location.c +2 -2
package/extension/libherb/location.h +2 -1
package/extension/libherb/main.c +53 -28
package/extension/libherb/parser.c +783 -247
package/extension/libherb/parser.h +27 -2
package/extension/libherb/parser_helpers.c +110 -23
package/extension/libherb/parser_helpers.h +19 -3
package/extension/libherb/parser_match_tags.c +110 -49
package/extension/libherb/pretty_print.c +29 -24
package/extension/libherb/pretty_print.h +10 -5
package/extension/libherb/prism_context.h +45 -0
package/extension/libherb/prism_helpers.c +30 -27
package/extension/libherb/prism_helpers.h +10 -7
package/extension/libherb/prism_serialized.h +12 -0
package/extension/libherb/ruby_parser.c +2 -0
package/extension/libherb/token.c +151 -66
package/extension/libherb/token.h +16 -4
package/extension/libherb/token_matchers.c +0 -1
package/extension/libherb/token_struct.h +10 -3
package/extension/libherb/utf8.c +7 -6
package/extension/libherb/utf8.h +2 -1
package/extension/libherb/util/hb_allocator.c +341 -0
package/extension/libherb/util/hb_allocator.h +78 -0
package/extension/libherb/util/hb_arena.c +81 -56
package/extension/libherb/util/hb_arena.h +6 -1
package/extension/libherb/util/hb_arena_debug.c +32 -17
package/extension/libherb/util/hb_arena_debug.h +12 -1
package/extension/libherb/util/hb_array.c +30 -15
package/extension/libherb/util/hb_array.h +7 -3
package/extension/libherb/util/hb_buffer.c +17 -21
package/extension/libherb/util/hb_buffer.h +6 -4
package/extension/libherb/util/hb_foreach.h +79 -0
package/extension/libherb/util/hb_narray.c +22 -7
package/extension/libherb/util/hb_narray.h +8 -4
package/extension/libherb/util/hb_string.c +49 -35
package/extension/libherb/util/hb_string.h +56 -9
package/extension/libherb/util.c +21 -11
package/extension/libherb/util.h +6 -3
package/extension/libherb/version.h +1 -1
package/extension/libherb/visitor.c +48 -1
package/extension/nodes.cpp +451 -6
package/extension/nodes.h +8 -1
package/package.json +12 -8
package/src/node-backend.ts +11 -1
package/dist/types/index-cjs.d.cts +0 -1
package/extension/libherb/analyze.c +0 -1608
package/extension/libherb/element_source.c +0 -12
package/extension/libherb/include/util/hb_system.h +0 -9
package/extension/libherb/util/hb_system.c +0 -30
package/extension/libherb/util/hb_system.h +0 -9
package/src/index-cjs.cts +0 -22
/package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
/package/src/{index-esm.mts → index.ts} +0 -0

package/extension/libherb/prism_helpers.c CHANGED Viewed

@@ -3,20 +3,18 @@
 #include "include/errors.h"
 #include "include/location.h"
 #include "include/position.h"
-#include "include/util.h"
 #include "include/util/hb_buffer.h"
+#include "include/util/hb_string.h"
 #include <prism.h>
 #include <stdlib.h>
 #include <string.h>
-const char* pm_error_level_to_string(pm_error_level_t level) {
+hb_string_T pm_error_level_to_string(pm_error_level_t level) {
   switch (level) {
-    case PM_ERROR_LEVEL_SYNTAX: return "syntax";
-    case PM_ERROR_LEVEL_ARGUMENT: return "argument";
-    case PM_ERROR_LEVEL_LOAD: return "load";
-    default: return "Unknown pm_error_level_t";
+    case PM_ERROR_LEVEL_SYNTAX: return hb_string("syntax");
+    case PM_ERROR_LEVEL_ARGUMENT: return hb_string("argument");
+    case PM_ERROR_LEVEL_LOAD: return hb_string("load");
   }
 }
@@ -24,7 +22,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
   const pm_diagnostic_t* error,
   const AST_NODE_T* node,
   const char* source,
-  pm_parser_t* parser
+  pm_parser_t* parser,
+  hb_allocator_T* allocator
 ) {
   size_t start_offset = (size_t) (error->location.start - parser->start);
   size_t end_offset = (size_t) (error->location.end - parser->start);
@@ -33,25 +32,28 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
   position_T end = position_from_source_with_offset(source, end_offset);
   return ruby_parse_error_init(
-    error->message,
-    pm_diagnostic_id_human(error->diag_id),
+    hb_string(error->message),
+    hb_string(pm_diagnostic_id_human(error->diag_id)),
     pm_error_level_to_string(error->level),
     start,
-    end
+    end,
+    allocator
   );
 }
 RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
   const pm_diagnostic_t* error,
   position_T start,
-  position_T end
+  position_T end,
+  hb_allocator_T* allocator
 ) {
   return ruby_parse_error_init(
-    error->message,
-    pm_diagnostic_id_human(error->diag_id),
+    hb_string(error->message),
+    hb_string(pm_diagnostic_id_human(error->diag_id)),
     pm_error_level_to_string(error->level),
     start,
-    end
+    end,
+    allocator
   );
 }
@@ -118,7 +120,7 @@ static bool search_then_keyword_location(const pm_node_t* node, void* data) {
   return false;
 }
-location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source) {
+location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator) {
   if (analyzed == NULL || analyzed->root == NULL || source == NULL) { return NULL; }
   then_keyword_search_context_T context = { .then_keyword_loc = { .start = NULL, .end = NULL }, .found = false };
@@ -133,7 +135,7 @@ location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* sou
   position_T start_position = position_from_source_with_offset(source, start_offset);
   position_T end_position = position_from_source_with_offset(source, end_offset);
-  return location_create(start_position, end_position);
+  return location_create(start_position, end_position, allocator);
 }
 static location_T* parse_wrapped_and_find_then_keyword(
@@ -142,7 +144,8 @@ static location_T* parse_wrapped_and_find_then_keyword(
   size_t source_length,
   size_t prefix_length,
   size_t adjustment_threshold,
-  size_t adjustment_amount
+  size_t adjustment_amount,
+  hb_allocator_T* allocator
 ) {
   pm_parser_t parser;
   pm_parser_init(&parser, (const uint8_t*) hb_buffer_value(buffer), hb_buffer_length(buffer), NULL);
@@ -177,7 +180,7 @@ static location_T* parse_wrapped_and_find_then_keyword(
         position_T start_position = position_from_source_with_offset(source, start_offset);
         position_T end_position = position_from_source_with_offset(source, end_offset);
-        location = location_create(start_position, end_position);
+        location = location_create(start_position, end_position, allocator);
       }
     }
   }
@@ -188,14 +191,14 @@ static location_T* parse_wrapped_and_find_then_keyword(
   return location;
 }
-location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause) {
+location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator) {
   if (source == NULL) { return NULL; }
   size_t source_length = strlen(source);
   hb_buffer_T buffer;
-  if (!hb_buffer_init(&buffer, source_length + 16)) { return NULL; }
+  if (!hb_buffer_init(&buffer, source_length + 16, allocator)) { return NULL; }
   hb_buffer_append(&buffer, "case x\n");
   size_t prefix_length = hb_buffer_length(&buffer);
@@ -203,14 +206,14 @@ location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_cla
   hb_buffer_append(&buffer, "\nend");
   location_T* location =
-    parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0);
+    parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0, allocator);
-  free(buffer.value);
+  hb_buffer_free(&buffer);
   return location;
 }
-location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
+location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator) {
   if (source == NULL) { return NULL; }
   const char* elsif_position = strstr(source, "elsif");
@@ -223,7 +226,7 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
   hb_buffer_T buffer;
-  if (!hb_buffer_init(&buffer, source_length + 8)) { return NULL; }
+  if (!hb_buffer_init(&buffer, source_length + 8, allocator)) { return NULL; }
   hb_buffer_append_with_length(&buffer, source, elsif_offset);
   hb_buffer_append(&buffer, "if");
@@ -232,9 +235,9 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
   hb_buffer_append(&buffer, "\nend");
   location_T* location =
-    parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff);
+    parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff, allocator);
-  free(buffer.value);
+  hb_buffer_free(&buffer);
   return location;
 }

package/extension/libherb/prism_helpers.h CHANGED Viewed

@@ -1,31 +1,34 @@
 #ifndef HERB_PRISM_HELPERS_H
 #define HERB_PRISM_HELPERS_H
-#include "analyzed_ruby.h"
+#include "analyze/analyzed_ruby.h"
 #include "ast_nodes.h"
 #include "errors.h"
 #include "location.h"
 #include "position.h"
+#include "util/hb_allocator.h"
 #include <prism.h>
-const char* pm_error_level_to_string(pm_error_level_t level);
+hb_string_T pm_error_level_to_string(pm_error_level_t level);
 RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
   const pm_diagnostic_t* error,
   const AST_NODE_T* node,
   const char* source,
-  pm_parser_t* parser
+  pm_parser_t* parser,
+  hb_allocator_T* allocator
 );
 RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
   const pm_diagnostic_t* error,
   position_T start,
-  position_T end
+  position_T end,
+  hb_allocator_T* allocator
 );
-location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source);
-location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause);
-location_T* get_then_keyword_location_elsif_wrapped(const char* source);
+location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator);
+location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator);
+location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator);
 #endif

package/extension/libherb/prism_serialized.h ADDED Viewed

@@ -0,0 +1,12 @@
+#ifndef HERB_PRISM_SERIALIZED_H
+#define HERB_PRISM_SERIALIZED_H
+#include <stddef.h>
+#include <stdint.h>
+typedef struct {
+  uint8_t* data;
+  size_t length;
+} prism_serialized_T;
+#endif

package/extension/libherb/ruby_parser.c CHANGED Viewed

@@ -38,8 +38,10 @@ void herb_parse_ruby_to_stdout(char* source) {
   pm_visit_node(root, herb_prism_visit, data);
+#ifndef PRISM_EXCLUDE_PRETTYPRINT
   pm_prettyprint(&buffer, &parser, root);
   printf("%s\n", buffer.value);
+#endif
   pm_buffer_free(&buffer);
   pm_node_destroy(&parser, root);

package/extension/libherb/token.c CHANGED Viewed

@@ -1,24 +1,30 @@
 #include "include/token.h"
-#include "include/lexer.h"
 #include "include/position.h"
 #include "include/range.h"
 #include "include/token_struct.h"
 #include "include/util.h"
+#include "include/util/hb_allocator.h"
+#include "include/util/hb_buffer.h"
+#include "include/util/hb_string.h"
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
-  token_T* token = calloc(1, sizeof(token_T));
+  hb_allocator_T* allocator = lexer->allocator;
+  token_T* token = hb_allocator_alloc(allocator, sizeof(token_T));
+  if (!token) { return NULL; }
   if (type == TOKEN_NEWLINE) {
     lexer->current_line++;
     lexer->current_column = 0;
   }
-  token->value = hb_string_to_c_string_using_malloc(value);
+  token->value = value;
   token->type = type;
   token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };
@@ -38,65 +44,147 @@ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer)
   return token;
 }
-const char* token_type_to_string(const token_type_T type) {
+hb_string_T token_type_to_string(const token_type_T type) {
+  switch (type) {
+    case TOKEN_WHITESPACE: return hb_string("TOKEN_WHITESPACE");
+    case TOKEN_NBSP: return hb_string("TOKEN_NBSP");
+    case TOKEN_NEWLINE: return hb_string("TOKEN_NEWLINE");
+    case TOKEN_IDENTIFIER: return hb_string("TOKEN_IDENTIFIER");
+    case TOKEN_HTML_DOCTYPE: return hb_string("TOKEN_HTML_DOCTYPE");
+    case TOKEN_XML_DECLARATION: return hb_string("TOKEN_XML_DECLARATION");
+    case TOKEN_XML_DECLARATION_END: return hb_string("TOKEN_XML_DECLARATION_END");
+    case TOKEN_CDATA_START: return hb_string("TOKEN_CDATA_START");
+    case TOKEN_CDATA_END: return hb_string("TOKEN_CDATA_END");
+    case TOKEN_HTML_TAG_START: return hb_string("TOKEN_HTML_TAG_START");
+    case TOKEN_HTML_TAG_END: return hb_string("TOKEN_HTML_TAG_END");
+    case TOKEN_HTML_TAG_START_CLOSE: return hb_string("TOKEN_HTML_TAG_START_CLOSE");
+    case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("TOKEN_HTML_TAG_SELF_CLOSE");
+    case TOKEN_HTML_COMMENT_START: return hb_string("TOKEN_HTML_COMMENT_START");
+    case TOKEN_HTML_COMMENT_END: return hb_string("TOKEN_HTML_COMMENT_END");
+    case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("TOKEN_HTML_COMMENT_INVALID_END");
+    case TOKEN_EQUALS: return hb_string("TOKEN_EQUALS");
+    case TOKEN_QUOTE: return hb_string("TOKEN_QUOTE");
+    case TOKEN_BACKTICK: return hb_string("TOKEN_BACKTICK");
+    case TOKEN_BACKSLASH: return hb_string("TOKEN_BACKSLASH");
+    case TOKEN_DASH: return hb_string("TOKEN_DASH");
+    case TOKEN_UNDERSCORE: return hb_string("TOKEN_UNDERSCORE");
+    case TOKEN_EXCLAMATION: return hb_string("TOKEN_EXCLAMATION");
+    case TOKEN_SLASH: return hb_string("TOKEN_SLASH");
+    case TOKEN_SEMICOLON: return hb_string("TOKEN_SEMICOLON");
+    case TOKEN_COLON: return hb_string("TOKEN_COLON");
+    case TOKEN_AT: return hb_string("TOKEN_AT");
+    case TOKEN_LT: return hb_string("TOKEN_LT");
+    case TOKEN_PERCENT: return hb_string("TOKEN_PERCENT");
+    case TOKEN_AMPERSAND: return hb_string("TOKEN_AMPERSAND");
+    case TOKEN_ERB_START: return hb_string("TOKEN_ERB_START");
+    case TOKEN_ERB_CONTENT: return hb_string("TOKEN_ERB_CONTENT");
+    case TOKEN_ERB_END: return hb_string("TOKEN_ERB_END");
+    case TOKEN_CHARACTER: return hb_string("TOKEN_CHARACTER");
+    case TOKEN_ERROR: return hb_string("TOKEN_ERROR");
+    case TOKEN_EOF: return hb_string("TOKEN_EOF");
+  }
+}
+hb_string_T token_type_to_friendly_string(const token_type_T type) {
   switch (type) {
-    case TOKEN_WHITESPACE: return "TOKEN_WHITESPACE";
-    case TOKEN_NBSP: return "TOKEN_NBSP";
-    case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
-    case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
-    case TOKEN_HTML_DOCTYPE: return "TOKEN_HTML_DOCTYPE";
-    case TOKEN_XML_DECLARATION: return "TOKEN_XML_DECLARATION";
-    case TOKEN_XML_DECLARATION_END: return "TOKEN_XML_DECLARATION_END";
-    case TOKEN_CDATA_START: return "TOKEN_CDATA_START";
-    case TOKEN_CDATA_END: return "TOKEN_CDATA_END";
-    case TOKEN_HTML_TAG_START: return "TOKEN_HTML_TAG_START";
-    case TOKEN_HTML_TAG_END: return "TOKEN_HTML_TAG_END";
-    case TOKEN_HTML_TAG_START_CLOSE: return "TOKEN_HTML_TAG_START_CLOSE";
-    case TOKEN_HTML_TAG_SELF_CLOSE: return "TOKEN_HTML_TAG_SELF_CLOSE";
-    case TOKEN_HTML_COMMENT_START: return "TOKEN_HTML_COMMENT_START";
-    case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
-    case TOKEN_EQUALS: return "TOKEN_EQUALS";
-    case TOKEN_QUOTE: return "TOKEN_QUOTE";
-    case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
-    case TOKEN_BACKSLASH: return "TOKEN_BACKSLASH";
-    case TOKEN_DASH: return "TOKEN_DASH";
-    case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
-    case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
-    case TOKEN_SLASH: return "TOKEN_SLASH";
-    case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
-    case TOKEN_COLON: return "TOKEN_COLON";
-    case TOKEN_AT: return "TOKEN_AT";
-    case TOKEN_LT: return "TOKEN_LT";
-    case TOKEN_PERCENT: return "TOKEN_PERCENT";
-    case TOKEN_AMPERSAND: return "TOKEN_AMPERSAND";
-    case TOKEN_ERB_START: return "TOKEN_ERB_START";
-    case TOKEN_ERB_CONTENT: return "TOKEN_ERB_CONTENT";
-    case TOKEN_ERB_END: return "TOKEN_ERB_END";
-    case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
-    case TOKEN_ERROR: return "TOKEN_ERROR";
-    case TOKEN_EOF: return "TOKEN_EOF";
+    case TOKEN_WHITESPACE: return hb_string("whitespace");
+    case TOKEN_NBSP: return hb_string("non-breaking space");
+    case TOKEN_NEWLINE: return hb_string("a newline");
+    case TOKEN_IDENTIFIER: return hb_string("an identifier");
+    case TOKEN_HTML_DOCTYPE: return hb_string("`<!DOCTYPE`");
+    case TOKEN_XML_DECLARATION: return hb_string("`<?xml`");
+    case TOKEN_XML_DECLARATION_END: return hb_string("`?>`");
+    case TOKEN_CDATA_START: return hb_string("`<![CDATA[`");
+    case TOKEN_CDATA_END: return hb_string("`]]>`");
+    case TOKEN_HTML_TAG_START: return hb_string("`<`");
+    case TOKEN_HTML_TAG_END: return hb_string("`>`");
+    case TOKEN_HTML_TAG_START_CLOSE: return hb_string("`</`");
+    case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("`/>`");
+    case TOKEN_HTML_COMMENT_START: return hb_string("`<!--`");
+    case TOKEN_HTML_COMMENT_END: return hb_string("`-->`");
+    case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("`--!>`");
+    case TOKEN_EQUALS: return hb_string("`=`");
+    case TOKEN_QUOTE: return hb_string("a quote");
+    case TOKEN_BACKTICK: return hb_string("a backtick");
+    case TOKEN_BACKSLASH: return hb_string("`\\`");
+    case TOKEN_DASH: return hb_string("`-`");
+    case TOKEN_UNDERSCORE: return hb_string("`_`");
+    case TOKEN_EXCLAMATION: return hb_string("`!`");
+    case TOKEN_SLASH: return hb_string("`/`");
+    case TOKEN_SEMICOLON: return hb_string("`;`");
+    case TOKEN_COLON: return hb_string("`:`");
+    case TOKEN_AT: return hb_string("`@`");
+    case TOKEN_LT: return hb_string("`<`");
+    case TOKEN_PERCENT: return hb_string("`%`");
+    case TOKEN_AMPERSAND: return hb_string("`&`");
+    case TOKEN_ERB_START: return hb_string("`<%`");
+    case TOKEN_ERB_CONTENT: return hb_string("ERB content");
+    case TOKEN_ERB_END: return hb_string("`%>`");
+    case TOKEN_CHARACTER: return hb_string("a character");
+    case TOKEN_ERROR: return hb_string("an error token");
+    case TOKEN_EOF: return hb_string("end of file");
+  }
+}
+char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args) {
+  if ((int) first_token == TOKEN_SENTINEL) { return hb_allocator_strdup(allocator, ""); }
+  size_t count = 0;
+  hb_string_T names[32];
+  token_type_T current = first_token;
+  while ((int) current != TOKEN_SENTINEL && count < 32) {
+    names[count++] = token_type_to_friendly_string(current);
+    current = va_arg(args, token_type_T);
+  }
+  hb_buffer_T buffer;
+  hb_buffer_init(&buffer, 128, allocator);
+  for (size_t i = 0; i < count; i++) {
+    hb_buffer_append_string(&buffer, names[i]);
+    if (i < count - 1) {
+      if (count > 2) { hb_buffer_append(&buffer, ", "); }
+      if (i == count - 2) { hb_buffer_append(&buffer, count == 2 ? " or " : "or "); }
+    }
   }
-  return "Unknown token_type_T";
+  return hb_buffer_value(&buffer);
+}
+char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...) {
+  va_list args;
+  va_start(args, first_token);
+  char* result = token_types_to_friendly_string_valist(allocator, first_token, args);
+  va_end(args);
+  return result;
 }
-hb_string_T token_to_string(const token_T* token) {
-  const char* type_string = token_type_to_string(token->type);
-  const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
+hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token) {
+  hb_string_T type_string = token_type_to_string(token->type);
+  hb_string_T template =
+    hb_string("#<Herb::Token type=\"%.*s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>");
+  char* string = hb_allocator_alloc(allocator, template.length + type_string.length + token->value.length + 16);
+  if (!string) { return HB_STRING_EMPTY; }
+  memset(string, 0, template.length + type_string.length + token->value.length + 16);
-  char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
   hb_string_T escaped;
   if (token->type == TOKEN_EOF) {
-    escaped = hb_string(herb_strdup("<EOF>"));
+    escaped = hb_string(hb_allocator_strdup(allocator, "<EOF>"));
   } else {
-    escaped = escape_newlines(hb_string(token->value));
+    escaped = escape_newlines(allocator, token_value(token));
   }
   sprintf(
     string,
-    template,
-    type_string,
+    template.data,
+    type_string.length,
+    type_string.data,
     escaped.length,
     escaped.data,
     token->range.from,
@@ -107,28 +195,27 @@ hb_string_T token_to_string(const token_T* token) {
     token->location.end.column
   );
-  free(escaped.data);
+  hb_allocator_dealloc(allocator, escaped.data);
   return hb_string(string);
 }
-token_T* token_copy(token_T* token) {
+hb_string_T token_value(const token_T* token) {
+  return token->value;
+}
+int token_type(const token_T* token) {
+  return token->type;
+}
+token_T* token_copy(token_T* token, hb_allocator_T* allocator) {
   if (!token) { return NULL; }
-  token_T* new_token = calloc(1, sizeof(token_T));
+  token_T* new_token = hb_allocator_alloc(allocator, sizeof(token_T));
   if (!new_token) { return NULL; }
-  if (token->value) {
-    new_token->value = herb_strdup(token->value);
-    if (!new_token->value) {
-      free(new_token);
-      return NULL;
-    }
-  } else {
-    new_token->value = NULL;
-  }
+  new_token->value = token->value;
   new_token->type = token->type;
   new_token->range = token->range;
@@ -138,13 +225,11 @@ token_T* token_copy(token_T* token) {
 }
 bool token_value_empty(const token_T* token) {
-  return token == NULL || token->value == NULL || token->value[0] == '\0';
+  return token == NULL || hb_string_is_empty(token->value);
 }
-void token_free(token_T* token) {
+void token_free(token_T* token, hb_allocator_T* allocator) {
   if (!token) { return; }
-  if (token->value != NULL) { free(token->value); }
-  free(token);
+  hb_allocator_dealloc(allocator, token);
 }

package/extension/libherb/token.h CHANGED Viewed

@@ -4,15 +4,27 @@
 #include "lexer_struct.h"
 #include "position.h"
 #include "token_struct.h"
+#include "util/hb_allocator.h"
 #include "util/hb_string.h"
+#include <stdarg.h>
 token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
-hb_string_T token_to_string(const token_T* token);
-const char* token_type_to_string(token_type_T type);
+hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token);
+hb_string_T token_type_to_string(token_type_T type);
+hb_string_T token_type_to_friendly_string(token_type_T type);
+char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...);
+char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args);
+#define token_types_to_friendly_string(allocator, ...)                                                                 \
+  token_types_to_friendly_string_va(allocator, __VA_ARGS__, TOKEN_SENTINEL)
+hb_string_T token_value(const token_T* token);
+int token_type(const token_T* token);
-token_T* token_copy(token_T* token);
+token_T* token_copy(token_T* token, hb_allocator_T* allocator);
-void token_free(token_T* token);
+void token_free(token_T* token, hb_allocator_T* allocator);
 bool token_value_empty(const token_T* token);

package/extension/libherb/token_matchers.c CHANGED Viewed

@@ -1,6 +1,5 @@
 #include "include/token_matchers.h"
 #include "include/parser.h"
-#include "include/token.h"
 #include <stdarg.h>
 #include <stdbool.h>

package/extension/libherb/token_struct.h CHANGED Viewed

@@ -1,8 +1,11 @@
 #ifndef HERB_TOKEN_STRUCT_H
 #define HERB_TOKEN_STRUCT_H
+#include <stdbool.h>
 #include "location.h"
 #include "range.h"
+#include "util/hb_string.h"
 typedef enum {
   TOKEN_WHITESPACE, // ' '
@@ -21,8 +24,9 @@ typedef enum {
   TOKEN_HTML_TAG_END,         // >
   TOKEN_HTML_TAG_SELF_CLOSE,  // />
-  TOKEN_HTML_COMMENT_START, // <!--
-  TOKEN_HTML_COMMENT_END,   // -->
+  TOKEN_HTML_COMMENT_START,       // <!--
+  TOKEN_HTML_COMMENT_END,         // -->
+  TOKEN_HTML_COMMENT_INVALID_END, // --!>
   TOKEN_ERB_START,   // <%, <%=, <%%=, <%#, <%-, <%==, <%%
   TOKEN_ERB_CONTENT, // Ruby Code
@@ -48,8 +52,11 @@ typedef enum {
   TOKEN_EOF,
 } token_type_T;
+// Sentinel value for variadic functions
+#define TOKEN_SENTINEL 99999999
 typedef struct TOKEN_STRUCT {
-  char* value;
+  hb_string_T value;
   range_T range;
   location_T location;
   token_type_T type;

package/extension/libherb/utf8.c CHANGED Viewed

@@ -1,4 +1,6 @@
 #include "include/utf8.h"
+#include "include/util/hb_string.h"
+#include <stdint.h>
 // UTF-8 byte patterns:
 //   0xxxxxxx = 1 byte (ASCII)
@@ -24,19 +26,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
   return (byte & 0xC0) == 0x80;
 }
-uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length) {
-  if (position >= max_length) { return 0; }
+uint32_t utf8_sequence_length(hb_string_T value) {
+  if (hb_string_is_empty(value)) { return 0; }
-  unsigned char first_byte = (unsigned char) str[position];
-  uint32_t expected_length = utf8_char_byte_length(first_byte);
+  uint32_t expected_length = utf8_char_byte_length((unsigned char) value.data[0]);
-  if (position + expected_length > max_length) {
+  if (value.length < expected_length) {
     return 1; // Not enough bytes, treat as single byte
   }
   if (expected_length > 1) {
     for (uint32_t i = 1; i < expected_length; i++) {
-      if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
+      if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) {
         return 1; // Invalid continuation byte, treat first byte as single byte
       }
     }

package/extension/libherb/utf8.h CHANGED Viewed

@@ -1,12 +1,13 @@
 #ifndef HERB_UTF8_H
 #define HERB_UTF8_H
+#include "util/hb_string.h"
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 uint32_t utf8_char_byte_length(unsigned char first_byte);
-uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length);
+uint32_t utf8_sequence_length(hb_string_T value);
 bool utf8_is_valid_continuation_byte(unsigned char byte);
 #endif