npm - @herb-tools/node - Versions diffs - 0.4.2 → 0.5.0 - Mend

@herb-tools/node 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/binding.gyp +1 -0
package/dist/herb-node.esm.js +1 -1
package/extension/error_helpers.cpp +1 -1
package/extension/error_helpers.h +1 -1
package/extension/libherb/analyze.c +2 -1
package/extension/libherb/ast_nodes.c +1 -1
package/extension/libherb/ast_nodes.h +1 -1
package/extension/libherb/ast_pretty_print.c +1 -1
package/extension/libherb/ast_pretty_print.h +1 -1
package/extension/libherb/errors.c +1 -1
package/extension/libherb/errors.h +1 -1
package/extension/libherb/include/ast_nodes.h +1 -1
package/extension/libherb/include/ast_pretty_print.h +1 -1
package/extension/libherb/include/errors.h +1 -1
package/extension/libherb/include/parser.h +12 -0
package/extension/libherb/include/parser_helpers.h +9 -0
package/extension/libherb/include/token_struct.h +1 -0
package/extension/libherb/include/utf8.h +11 -0
package/extension/libherb/include/version.h +1 -1
package/extension/libherb/lexer.c +50 -2
package/extension/libherb/parser.c +224 -8
package/extension/libherb/parser.h +12 -0
package/extension/libherb/parser_helpers.c +46 -0
package/extension/libherb/parser_helpers.h +9 -0
package/extension/libherb/token.c +1 -0
package/extension/libherb/token_struct.h +1 -0
package/extension/libherb/utf8.c +46 -0
package/extension/libherb/utf8.h +11 -0
package/extension/libherb/version.h +1 -1
package/extension/libherb/visitor.c +1 -1
package/extension/nodes.cpp +1 -1
package/extension/nodes.h +1 -1
package/package.json +2 -2

package/binding.gyp CHANGED Viewed

@@ -36,6 +36,7 @@
         "./extension/libherb/range.c",
         "./extension/libherb/token_matchers.c",
         "./extension/libherb/token.c",
+        "./extension/libherb/utf8.c",
         "./extension/libherb/util.c",
         "./extension/libherb/visitor.c",

package/dist/herb-node.esm.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { createRequire } from 'module';
 import { fileURLToPath } from 'url';
 var name = "@herb-tools/node";
-var version = "0.4.2";
+var version = "0.5.0";
 var packageJSON = {
 	name: name,
 	version: version};

package/extension/error_helpers.cpp CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.cpp.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/error_helpers.cpp.erb
 #include <node_api.h>
 #include "error_helpers.h"

package/extension/error_helpers.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/error_helpers.h.erb
 #ifndef HERB_EXTENSION_ERRORS_H
 #define HERB_EXTENSION_ERRORS_H

package/extension/libherb/analyze.c CHANGED Viewed

@@ -50,7 +50,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
     AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
     const char* opening = erb_content_node->tag_opening->value;
-    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0) {
+    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
       analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
       if (false) { pretty_print_analyed_ruby(analyzed, erb_content_node->content->value); }

package/extension/libherb/ast_nodes.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_nodes.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_nodes.c.erb
 #include <stdio.h>
 #include <stdbool.h>

package/extension/libherb/ast_nodes.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
 #ifndef HERB_AST_NODES_H
 #define HERB_AST_NODES_H

package/extension/libherb/ast_pretty_print.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_pretty_print.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_pretty_print.c.erb
 #include "include/ast_node.h"
 #include "include/ast_nodes.h"

package/extension/libherb/ast_pretty_print.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
 #ifndef HERB_AST_PRETTY_PRINT_H
 #define HERB_AST_PRETTY_PRINT_H

package/extension/libherb/errors.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/errors.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/errors.c.erb
 #include "include/array.h"
 #include "include/errors.h"

package/extension/libherb/errors.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
 #ifndef HERB_ERRORS_H
 #define HERB_ERRORS_H

package/extension/libherb/include/ast_nodes.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
 #ifndef HERB_AST_NODES_H
 #define HERB_AST_NODES_H

package/extension/libherb/include/ast_pretty_print.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
 #ifndef HERB_AST_PRETTY_PRINT_H
 #define HERB_AST_PRETTY_PRINT_H

package/extension/libherb/include/errors.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
 #ifndef HERB_ERRORS_H
 #define HERB_ERRORS_H

package/extension/libherb/include/parser.h CHANGED Viewed

@@ -5,10 +5,22 @@
 #include "ast_node.h"
 #include "lexer.h"
+typedef enum {
+  FOREIGN_CONTENT_UNKNOWN = 0,
+  FOREIGN_CONTENT_SCRIPT,
+  FOREIGN_CONTENT_STYLE,
+  // FOREIGN_CONTENT_RUBY,
+  // FOREIGN_CONTENT_TEMPLATE
+} foreign_content_type_T;
+typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
 typedef struct PARSER_STRUCT {
   lexer_T* lexer;
   token_T* current_token;
   array_T* open_tags_stack;
+  parser_state_T state;
+  foreign_content_type_T foreign_content_type;
 } parser_T;
 parser_T* parser_init(lexer_T* lexer);

package/extension/libherb/include/parser_helpers.h CHANGED Viewed

@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
 bool parser_in_svg_context(const parser_T* parser);
+foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
+bool parser_is_foreign_content_tag(const char* tag_name);
+const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
+void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
+void parser_exit_foreign_content(parser_T* parser);
+bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
 token_T* parser_advance(parser_T* parser);
 token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
 token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);

package/extension/libherb/include/token_struct.h CHANGED Viewed

@@ -28,6 +28,7 @@ typedef enum {
   TOKEN_SLASH,       // /
   TOKEN_EQUALS,      // =
   TOKEN_QUOTE,       // ", '
+  TOKEN_BACKTICK,    // `
   TOKEN_DASH,        // -
   TOKEN_UNDERSCORE,  // _
   TOKEN_EXCLAMATION, // !

package/extension/libherb/include/utf8.h ADDED Viewed

@@ -0,0 +1,11 @@
+#ifndef HERB_UTF8_H
+#define HERB_UTF8_H
+#include <stdbool.h>
+#include <stdlib.h>
+int utf8_char_byte_length(unsigned char first_byte);
+int utf8_sequence_length(const char* str, size_t position, size_t max_length);
+bool utf8_is_valid_continuation_byte(unsigned char byte);
+#endif

package/extension/libherb/include/version.h CHANGED Viewed

@@ -1,6 +1,6 @@
 #ifndef HERB_VERSION_H
 #define HERB_VERSION_H
-#define HERB_VERSION "0.4.2"
+#define HERB_VERSION "0.5.0"
 #endif

package/extension/libherb/lexer.c CHANGED Viewed

@@ -1,6 +1,7 @@
 #include "include/buffer.h"
 #include "include/lexer_peek_helpers.h"
 #include "include/token.h"
+#include "include/utf8.h"
 #include "include/util.h"
 #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
   }
 }
+static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
+  if (byte_count <= 0) { return; }
+  if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
+    if (!is_newline(lexer->current_character)) { lexer->current_column++; }
+    lexer->current_position += byte_count;
+    if (lexer->current_position >= lexer->source_length) {
+      lexer->current_position = lexer->source_length;
+      lexer->current_character = '\0';
+    } else {
+      lexer->current_character = lexer->source[lexer->current_position];
+    }
+  }
+}
 static void lexer_advance_by(lexer_T* lexer, const size_t count) {
   for (size_t i = 0; i < count; i++) {
     lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
   return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
 }
+static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
+  int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
+  if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
+  char* utf8_char = malloc(char_byte_length + 1);
+  if (!utf8_char) { return lexer_advance_current(lexer, type); }
+  for (int i = 0; i < char_byte_length; i++) {
+    if (lexer->current_position + i >= lexer->source_length) {
+      free(utf8_char);
+      return lexer_advance_current(lexer, type);
+    }
+    utf8_char[i] = lexer->source[lexer->current_position + i];
+  }
+  utf8_char[char_byte_length] = '\0';
+  lexer_advance_utf8_bytes(lexer, char_byte_length);
+  token_T* token = token_init(utf8_char, type, lexer);
+  free(utf8_char);
+  return token;
+}
 static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
   if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
     return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
   if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
   if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
-    return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
+    return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
   }
   switch (lexer->current_character) {
@@ -278,11 +325,12 @@ token_T* lexer_next_token(lexer_T* lexer) {
     case '"':
     case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
+    case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
     default: {
       if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
-      return lexer_advance_current(lexer, TOKEN_CHARACTER);
+      return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
     }
   }
 }

package/extension/libherb/parser.c CHANGED Viewed

@@ -9,6 +9,7 @@
 #include "include/parser_helpers.h"
 #include "include/token.h"
 #include "include/token_matchers.h"
+#include "include/util.h"
 #include <stdio.h>
 #include <stdlib.h>
@@ -16,6 +17,7 @@
 #include <strings.h>
 static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
+static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
 static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
 size_t parser_sizeof(void) {
@@ -28,6 +30,8 @@ parser_T* parser_init(lexer_T* lexer) {
   parser->lexer = lexer;
   parser->current_token = lexer_next_token(lexer);
   parser->open_tags_stack = array_init(16);
+  parser->state = PARSER_STATE_DATA;
+  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
   return parser;
 }
@@ -184,14 +188,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
 static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
   array_T* errors = array_init(8);
-  token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
-  if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
+  token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
+  token_T* first_token = NULL;
+  if (at_token != NULL) {
+    first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
+    if (first_token == NULL) {
+      parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
+      AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
+        ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
+      token_free(at_token);
+      return attribute_name;
+    }
+  } else {
+    first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
+    if (first_token == NULL) {
+      parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
+      AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
+      return attribute_name;
+    }
+  }
+  buffer_T name_buffer = buffer_new();
+  position_T* start_position;
+  if (at_token != NULL) {
+    buffer_append(&name_buffer, at_token->value);
+    start_position = position_copy(at_token->location->start);
+  } else {
+    start_position = position_copy(first_token->location->start);
+  }
+  buffer_append(&name_buffer, first_token->value);
+  position_T* end_position = position_copy(first_token->location->end);
+  size_t range_end = first_token->range->to;
+  while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
+         && strcmp(parser->current_token->value, ".") == 0) {
+    token_T* dot_token = parser_advance(parser);
+    buffer_append(&name_buffer, dot_token->value);
+    position_free(end_position);
+    end_position = position_copy(dot_token->location->end);
+    range_end = dot_token->range->to;
+    token_free(dot_token);
+    if (parser->current_token->type == TOKEN_IDENTIFIER) {
+      token_T* next_identifier = parser_advance(parser);
+      buffer_append(&name_buffer, next_identifier->value);
+      position_free(end_position);
+      end_position = position_copy(next_identifier->location->end);
+      range_end = next_identifier->range->to;
+      token_free(next_identifier);
+    } else {
+      break;
+    }
+  }
+  token_T* combined_token = calloc(1, sizeof(token_T));
+  combined_token->value = herb_strdup(name_buffer.value);
+  combined_token->type = TOKEN_IDENTIFIER;
+  combined_token->location =
+    location_from(start_position->line, start_position->column, end_position->line, end_position->column);
+  size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
+  combined_token->range = range_init(range_start, range_end);
   AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
-    ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
+    ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
+  buffer_free(&name_buffer);
+  position_free(start_position);
+  position_free(end_position);
+  token_free(first_token);
-  token_free(identifier);
+  if (at_token != NULL) { token_free(at_token); }
+  token_free(combined_token);
   return attribute_name;
 }
@@ -300,6 +386,30 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
   // <div id="home">
   if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
+  if (token_is(parser, TOKEN_BACKTICK)) {
+    token_T* token = parser_advance(parser);
+    position_T* start = position_copy(token->location->start);
+    position_T* end = position_copy(token->location->end);
+    append_unexpected_error(
+      "Invalid quote character for HTML attribute",
+      "single quote (') or double quote (\")",
+      "backtick (`)",
+      start,
+      end,
+      errors
+    );
+    AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
+      ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
+    position_free(start);
+    position_free(end);
+    token_free(token);
+    return value;
+  }
   token_T* token = parser_advance(parser);
   append_unexpected_error(
@@ -329,9 +439,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
 static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
   AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
+  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+    token_T* whitespace = parser_advance(parser);
+    token_free(whitespace);
+  }
   token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
   if (equals != NULL) {
+    while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+      token_T* whitespace = parser_advance(parser);
+      token_free(whitespace);
+    }
     AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
     AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -390,10 +510,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
       continue;
     }
+    if (parser->current_token->type == TOKEN_AT) {
+      array_append(children, parser_parse_html_attribute(parser));
+      continue;
+    }
     parser_append_unexpected_error(
       parser,
       "Unexpected Token",
-      "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
+      "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
       errors
     );
   }
@@ -441,6 +566,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
   token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
   token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
+  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+    token_T* whitespace = parser_advance(parser);
+    token_free(whitespace);
+  }
   token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
   if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -502,7 +633,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
   parser_push_open_tag(parser, open_tag->tag_name);
-  parser_parse_in_data_state(parser, body, errors);
+  if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
+    foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
+    parser_enter_foreign_content(parser, content_type);
+    parser_parse_foreign_content(parser, body, errors);
+  } else {
+    parser_parse_in_data_state(parser, body, errors);
+  }
   if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
@@ -593,6 +730,83 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
   return erb_node;
 }
+static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
+  buffer_T content = buffer_new();
+  position_T* start = position_copy(parser->current_token->location->start);
+  const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
+  if (expected_closing_tag == NULL) {
+    parser_exit_foreign_content(parser);
+    position_free(start);
+    buffer_free(&content);
+    return;
+  }
+  while (!token_is(parser, TOKEN_EOF)) {
+    if (token_is(parser, TOKEN_ERB_START)) {
+      parser_append_literal_node_from_buffer(parser, &content, children, start);
+      AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
+      array_append(children, erb_node);
+      position_free(start);
+      start = position_copy(parser->current_token->location->start);
+      continue;
+    }
+    if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
+      size_t saved_position = parser->lexer->current_position;
+      size_t saved_line = parser->lexer->current_line;
+      size_t saved_column = parser->lexer->current_column;
+      size_t saved_previous_position = parser->lexer->previous_position;
+      size_t saved_previous_line = parser->lexer->previous_line;
+      size_t saved_previous_column = parser->lexer->previous_column;
+      char saved_char = parser->lexer->current_character;
+      lexer_state_T saved_state = parser->lexer->state;
+      token_T* next_token = lexer_next_token(parser->lexer);
+      bool is_potential_match = false;
+      if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
+        is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
+      }
+      parser->lexer->current_position = saved_position;
+      parser->lexer->current_line = saved_line;
+      parser->lexer->current_column = saved_column;
+      parser->lexer->previous_position = saved_previous_position;
+      parser->lexer->previous_line = saved_previous_line;
+      parser->lexer->previous_column = saved_previous_column;
+      parser->lexer->current_character = saved_char;
+      parser->lexer->state = saved_state;
+      if (next_token) { token_free(next_token); }
+      if (is_potential_match) {
+        parser_append_literal_node_from_buffer(parser, &content, children, start);
+        parser_exit_foreign_content(parser);
+        position_free(start);
+        buffer_free(&content);
+        return;
+      }
+    }
+    token_T* token = parser_advance(parser);
+    buffer_append(&content, token->value);
+    token_free(token);
+  }
+  parser_append_literal_node_from_buffer(parser, &content, children, start);
+  parser_exit_foreign_content(parser);
+  position_free(start);
+  buffer_free(&content);
+}
 static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
   while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
     if (token_is(parser, TOKEN_ERB_START)) {
@@ -618,12 +832,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
     if (token_is_any_of(
           parser,
           TOKEN_AMPERSAND,
+          TOKEN_AT,
           TOKEN_CHARACTER,
           TOKEN_COLON,
           TOKEN_DASH,
           TOKEN_EQUALS,
           TOKEN_EXCLAMATION,
           TOKEN_IDENTIFIER,
+          TOKEN_NBSP,
           TOKEN_NEWLINE,
           TOKEN_PERCENT,
           TOKEN_QUOTE,
@@ -639,8 +855,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
     parser_append_unexpected_error(
       parser,
       "Unexpected token",
-      "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
-      "TOKEN_NEWLINE",
+      "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
+      "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
       errors
     );
   }

package/extension/libherb/parser.h CHANGED Viewed

@@ -5,10 +5,22 @@
 #include "ast_node.h"
 #include "lexer.h"
+typedef enum {
+  FOREIGN_CONTENT_UNKNOWN = 0,
+  FOREIGN_CONTENT_SCRIPT,
+  FOREIGN_CONTENT_STYLE,
+  // FOREIGN_CONTENT_RUBY,
+  // FOREIGN_CONTENT_TEMPLATE
+} foreign_content_type_T;
+typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
 typedef struct PARSER_STRUCT {
   lexer_T* lexer;
   token_T* current_token;
   array_T* open_tags_stack;
+  parser_state_T state;
+  foreign_content_type_T foreign_content_type;
 } parser_T;
 parser_T* parser_init(lexer_T* lexer);

package/extension/libherb/parser_helpers.c CHANGED Viewed

@@ -8,6 +8,7 @@
 #include "include/lexer.h"
 #include "include/parser.h"
 #include "include/token.h"
+#include "include/token_matchers.h"
 #include <stdio.h>
 #include <strings.h>
@@ -54,6 +55,43 @@ bool parser_in_svg_context(const parser_T* parser) {
   return false;
 }
+// ===== Foreign Content Handling =====
+foreign_content_type_T parser_get_foreign_content_type(const char* tag_name) {
+  if (tag_name == NULL) { return FOREIGN_CONTENT_UNKNOWN; }
+  if (strcasecmp(tag_name, "script") == 0) { return FOREIGN_CONTENT_SCRIPT; }
+  if (strcasecmp(tag_name, "style") == 0) { return FOREIGN_CONTENT_STYLE; }
+  return FOREIGN_CONTENT_UNKNOWN;
+}
+bool parser_is_foreign_content_tag(const char* tag_name) {
+  return parser_get_foreign_content_type(tag_name) != FOREIGN_CONTENT_UNKNOWN;
+}
+const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type) {
+  switch (type) {
+    case FOREIGN_CONTENT_SCRIPT: return "script";
+    case FOREIGN_CONTENT_STYLE: return "style";
+    default: return NULL;
+  }
+}
+void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type) {
+  if (parser == NULL) { return; }
+  parser->state = PARSER_STATE_FOREIGN_CONTENT;
+  parser->foreign_content_type = type;
+}
+void parser_exit_foreign_content(parser_T* parser) {
+  if (parser == NULL) { return; }
+  parser->state = PARSER_STATE_DATA;
+  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
+}
 void parser_append_unexpected_error(parser_T* parser, const char* description, const char* expected, array_T* errors) {
   token_T* token = parser_advance(parser);
@@ -166,3 +204,11 @@ void parser_handle_mismatched_tags(
     );
   }
 }
+bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type) {
+  const char* expected_tag_name = parser_get_foreign_content_closing_tag(expected_type);
+  if (expected_tag_name == NULL || tag_name == NULL) { return false; }
+  return strcmp(tag_name, expected_tag_name) == 0;
+}

package/extension/libherb/parser_helpers.h CHANGED Viewed

@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
 bool parser_in_svg_context(const parser_T* parser);
+foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
+bool parser_is_foreign_content_tag(const char* tag_name);
+const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
+void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
+void parser_exit_foreign_content(parser_T* parser);
+bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
 token_T* parser_advance(parser_T* parser);
 token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
 token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);

package/extension/libherb/token.c CHANGED Viewed

@@ -55,6 +55,7 @@ const char* token_type_to_string(const token_type_T type) {
     case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
     case TOKEN_EQUALS: return "TOKEN_EQUALS";
     case TOKEN_QUOTE: return "TOKEN_QUOTE";
+    case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
     case TOKEN_DASH: return "TOKEN_DASH";
     case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
     case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";

package/extension/libherb/token_struct.h CHANGED Viewed

@@ -28,6 +28,7 @@ typedef enum {
   TOKEN_SLASH,       // /
   TOKEN_EQUALS,      // =
   TOKEN_QUOTE,       // ", '
+  TOKEN_BACKTICK,    // `
   TOKEN_DASH,        // -
   TOKEN_UNDERSCORE,  // _
   TOKEN_EXCLAMATION, // !

package/extension/libherb/utf8.c ADDED Viewed

@@ -0,0 +1,46 @@
+#include "include/utf8.h"
+// UTF-8 byte patterns:
+//   0xxxxxxx = 1 byte (ASCII)
+//   110xxxxx = 2 bytes
+//   1110xxxx = 3 bytes
+//   11110xxx = 4 bytes
+int utf8_char_byte_length(unsigned char first_byte) {
+  if ((first_byte & 0x80) == 0) {
+    return 1;
+  } else if ((first_byte & 0xE0) == 0xC0) {
+    return 2;
+  } else if ((first_byte & 0xF0) == 0xE0) {
+    return 3;
+  } else if ((first_byte & 0xF8) == 0xF0) {
+    return 4;
+  }
+  return 1;
+}
+// Continuation bytes have pattern 10xxxxxx
+bool utf8_is_valid_continuation_byte(unsigned char byte) {
+  return (byte & 0xC0) == 0x80;
+}
+int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
+  if (position >= max_length) { return 0; }
+  unsigned char first_byte = (unsigned char) str[position];
+  int expected_length = utf8_char_byte_length(first_byte);
+  if (position + expected_length > max_length) {
+    return 1; // Not enough bytes, treat as single byte
+  }
+  if (expected_length > 1) {
+    for (int i = 1; i < expected_length; i++) {
+      if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
+        return 1; // Invalid continuation byte, treat first byte as single byte
+      }
+    }
+  }
+  return expected_length;
+}

package/extension/libherb/utf8.h ADDED Viewed

@@ -0,0 +1,11 @@
+#ifndef HERB_UTF8_H
+#define HERB_UTF8_H
+#include <stdbool.h>
+#include <stdlib.h>
+int utf8_char_byte_length(unsigned char first_byte);
+int utf8_sequence_length(const char* str, size_t position, size_t max_length);
+bool utf8_is_valid_continuation_byte(unsigned char byte);
+#endif

package/extension/libherb/version.h CHANGED Viewed

@@ -1,6 +1,6 @@
 #ifndef HERB_VERSION_H
 #define HERB_VERSION_H
-#define HERB_VERSION "0.4.2"
+#define HERB_VERSION "0.5.0"
 #endif

package/extension/libherb/visitor.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/visitor.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/visitor.c.erb
 #include <stdio.h>

package/extension/nodes.cpp CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.cpp.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/nodes.cpp.erb
 #include <node_api.h>
 #include "error_helpers.h"

package/extension/nodes.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/nodes.h.erb
 #ifndef HERB_EXTENSION_NODES_H
 #define HERB_EXTENSION_NODES_H

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@herb-tools/node",
-  "version": "0.4.2",
+  "version": "0.5.0",
   "description": "Native Node.js addon for HTML-aware ERB parsing using Herb.",
   "type": "module",
   "license": "MIT",
@@ -48,7 +48,7 @@
     "host": "https://github.com/marcoroth/herb/releases/download/"
   },
   "dependencies": {
-    "@herb-tools/core": "0.4.2",
+    "@herb-tools/core": "0.5.0",
     "@mapbox/node-pre-gyp": "^2.0.0",
     "node-addon-api": "^5.1.0",
     "node-pre-gyp-github": "^2.0.0"