herb 0.4.2-x86_64-linux-musl → 0.5.0-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/herb/error_helpers.c +1 -1
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/nodes.c +2 -2
- data/ext/herb/nodes.h +1 -1
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/ast/nodes.rb +1 -1
- data/lib/herb/cli.rb +2 -2
- data/lib/herb/errors.rb +1 -1
- data/lib/herb/project.rb +2 -0
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +1 -1
- data/sig/serialized_ast_errors.rbs +1 -1
- data/sig/serialized_ast_nodes.rbs +1 -1
- data/src/analyze.c +2 -1
- data/src/ast_nodes.c +1 -1
- data/src/ast_pretty_print.c +1 -1
- data/src/errors.c +1 -1
- data/src/include/ast_nodes.h +1 -1
- data/src/include/ast_pretty_print.h +1 -1
- data/src/include/errors.h +1 -1
- data/src/include/parser.h +12 -0
- data/src/include/parser_helpers.h +9 -0
- data/src/include/token_struct.h +1 -0
- data/src/include/utf8.h +11 -0
- data/src/include/version.h +1 -1
- data/src/lexer.c +50 -2
- data/src/parser.c +224 -8
- data/src/parser_helpers.c +46 -0
- data/src/token.c +1 -0
- data/src/utf8.c +46 -0
- data/src/visitor.c +1 -1
- metadata +4 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 664c9b2f6e666c2628a3e639478bcd69c1309991af6feae13b3e7612c45c7620
         | 
| 4 | 
            +
              data.tar.gz: 93a3c442cf431c5b3eb6c497156a50756f475b129e811e0948b70cb87e87d74c
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 8afc11ca461b7f1fd92e48cc0b2353726d1dc6f44cb028d364f31af80547aac720df059b8fed270776d174ec5c5c3d533784800d50cee55632682a9806feb4b2
         | 
| 7 | 
            +
              data.tar.gz: 52618ed5ff7571e94118ac3c3d4a7cb34a124b75f5e435158ce7e77a1d51134d21cd384e56a374cdd24cff605ed26081d81cc21fdc98bd0fe01a3116e0d61cfc
         | 
    
        data/README.md
    CHANGED
    
    | @@ -37,7 +37,7 @@ Herb provides a complete ecosystem of HTML+ERB tooling, designed to simplify and | |
| 37 37 | 
             
              Automatic, consistent formatting for HTML+ERB files, reducing manual styling and enforcing a standard across projects. Currently in experimental preview - use with caution on version-controlled files.
         | 
| 38 38 |  | 
| 39 39 | 
             
            - **Herb Linter** ([available now](https://herb-tools.dev/projects/linter)):  
         | 
| 40 | 
            -
              Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with  | 
| 40 | 
            +
              Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with plenty of rules.
         | 
| 41 41 |  | 
| 42 42 | 
             
            You can use Herb programmatically in **Ruby**, as well as in **JavaScript** via Node.js, WebAssembly, or directly in browsers.
         | 
| 43 43 |  | 
    
        data/ext/herb/error_helpers.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include <ruby.h>
         | 
| 5 5 |  | 
    
        data/ext/herb/error_helpers.h
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.h.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #ifndef HERB_EXTENSION_ERROR_HELPERS_H
         | 
| 5 5 | 
             
            #define HERB_EXTENSION_ERROR_HELPERS_H
         | 
    
        data/ext/herb/nodes.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include <ruby.h>
         | 
| 5 5 |  | 
| @@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte | |
| 415 415 | 
             
              VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
         | 
| 416 416 | 
             
              VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
         | 
| 417 417 | 
             
              VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
         | 
| 418 | 
            -
              /* #<Herb::Template::AnalyzedRubyField: | 
| 418 | 
            +
              /* #<Herb::Template::AnalyzedRubyField:0x00007fffe32d1398 @name="analyzed_ruby", @options={kind: nil}> */
         | 
| 419 419 | 
             
              VALUE erb_content_node_analyzed_ruby = Qnil;
         | 
| 420 420 | 
             
              VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
         | 
| 421 421 | 
             
              VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;
         | 
    
        data/ext/herb/nodes.h
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.h.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #ifndef HERB_EXTENSION_NODES_H
         | 
| 5 5 | 
             
            #define HERB_EXTENSION_NODES_H
         | 
    
        data/lib/herb/3.0/herb.so
    CHANGED
    
    | Binary file | 
    
        data/lib/herb/3.1/herb.so
    CHANGED
    
    | Binary file | 
    
        data/lib/herb/3.2/herb.so
    CHANGED
    
    | Binary file | 
    
        data/lib/herb/3.3/herb.so
    CHANGED
    
    | Binary file | 
    
        data/lib/herb/3.4/herb.so
    CHANGED
    
    | Binary file | 
    
        data/lib/herb/ast/nodes.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            # typed: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            # NOTE: This file is generated by the templates/template.rb script and should not be
         | 
| 5 | 
            -
            # modified manually. See /Users/marcoroth/Development/herb-release- | 
| 5 | 
            +
            # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/ast/nodes.rb.erb
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Herb
         | 
| 8 8 | 
             
              module AST
         | 
    
        data/lib/herb/cli.rb
    CHANGED
    
    | @@ -110,8 +110,8 @@ class Herb::CLI | |
| 110 110 | 
             
                              project.no_interactive = no_interactive
         | 
| 111 111 | 
             
                              project.no_log_file = no_log_file
         | 
| 112 112 | 
             
                              project.no_timing = no_timing
         | 
| 113 | 
            -
                              project.parse!
         | 
| 114 | 
            -
                              exit(0)
         | 
| 113 | 
            +
                              has_issues = project.parse!
         | 
| 114 | 
            +
                              exit(has_issues ? 1 : 0)
         | 
| 115 115 | 
             
                            when "parse"
         | 
| 116 116 | 
             
                              Herb.parse(file_content)
         | 
| 117 117 | 
             
                            when "lex"
         | 
    
        data/lib/herb/errors.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            # typed: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            # NOTE: This file is generated by the templates/template.rb script and should not be
         | 
| 5 | 
            -
            # modified manually. See /Users/marcoroth/Development/herb-release- | 
| 5 | 
            +
            # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/errors.rb.erb
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Herb
         | 
| 8 8 | 
             
              module Errors
         | 
    
        data/lib/herb/project.rb
    CHANGED
    
    
    
        data/lib/herb/version.rb
    CHANGED
    
    
    
        data/lib/herb/visitor.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            # typed: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            # NOTE: This file is generated by the templates/template.rb script and should not be
         | 
| 5 | 
            -
            # modified manually. See /Users/marcoroth/Development/herb-release- | 
| 5 | 
            +
            # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/visitor.rb.erb
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Herb
         | 
| 8 8 | 
             
              class Visitor
         | 
| @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            # typed: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            # NOTE: This file is generated by the templates/template.rb script and should not be
         | 
| 5 | 
            -
            # modified manually. See /Users/marcoroth/Development/herb-release- | 
| 5 | 
            +
            # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_errors.rbs.erb
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Herb
         | 
| 8 8 | 
             
              type serialized_unexpected_error = serialized_error & {
         | 
| @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            # typed: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            # NOTE: This file is generated by the templates/template.rb script and should not be
         | 
| 5 | 
            -
            # modified manually. See /Users/marcoroth/Development/herb-release- | 
| 5 | 
            +
            # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_nodes.rbs.erb
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Herb
         | 
| 8 8 | 
             
              type serialized_document_node = serialized_node & {
         | 
    
        data/src/analyze.c
    CHANGED
    
    | @@ -50,7 +50,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { | |
| 50 50 | 
             
                AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
         | 
| 51 51 |  | 
| 52 52 | 
             
                const char* opening = erb_content_node->tag_opening->value;
         | 
| 53 | 
            -
             | 
| 53 | 
            +
             | 
| 54 | 
            +
                if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
         | 
| 54 55 | 
             
                  analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
         | 
| 55 56 |  | 
| 56 57 | 
             
                  if (false) { pretty_print_analyed_ruby(analyzed, erb_content_node->content->value); }
         | 
    
        data/src/ast_nodes.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_nodes.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include <stdio.h>
         | 
| 5 5 | 
             
            #include <stdbool.h>
         | 
    
        data/src/ast_pretty_print.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_pretty_print.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include "include/ast_node.h"
         | 
| 5 5 | 
             
            #include "include/ast_nodes.h"
         | 
    
        data/src/errors.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/errors.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include "include/array.h"
         | 
| 5 5 | 
             
            #include "include/errors.h"
         | 
    
        data/src/include/ast_nodes.h
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #ifndef HERB_AST_NODES_H
         | 
| 5 5 | 
             
            #define HERB_AST_NODES_H
         | 
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #ifndef HERB_AST_PRETTY_PRINT_H
         | 
| 5 5 | 
             
            #define HERB_AST_PRETTY_PRINT_H
         | 
    
        data/src/include/errors.h
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #ifndef HERB_ERRORS_H
         | 
| 5 5 | 
             
            #define HERB_ERRORS_H
         | 
    
        data/src/include/parser.h
    CHANGED
    
    | @@ -5,10 +5,22 @@ | |
| 5 5 | 
             
            #include "ast_node.h"
         | 
| 6 6 | 
             
            #include "lexer.h"
         | 
| 7 7 |  | 
| 8 | 
            +
            typedef enum {
         | 
| 9 | 
            +
              FOREIGN_CONTENT_UNKNOWN = 0,
         | 
| 10 | 
            +
              FOREIGN_CONTENT_SCRIPT,
         | 
| 11 | 
            +
              FOREIGN_CONTENT_STYLE,
         | 
| 12 | 
            +
              // FOREIGN_CONTENT_RUBY,
         | 
| 13 | 
            +
              // FOREIGN_CONTENT_TEMPLATE
         | 
| 14 | 
            +
            } foreign_content_type_T;
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
         | 
| 17 | 
            +
             | 
| 8 18 | 
             
            typedef struct PARSER_STRUCT {
         | 
| 9 19 | 
             
              lexer_T* lexer;
         | 
| 10 20 | 
             
              token_T* current_token;
         | 
| 11 21 | 
             
              array_T* open_tags_stack;
         | 
| 22 | 
            +
              parser_state_T state;
         | 
| 23 | 
            +
              foreign_content_type_T foreign_content_type;
         | 
| 12 24 | 
             
            } parser_T;
         | 
| 13 25 |  | 
| 14 26 | 
             
            parser_T* parser_init(lexer_T* lexer);
         | 
| @@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer( | |
| 24 24 |  | 
| 25 25 | 
             
            bool parser_in_svg_context(const parser_T* parser);
         | 
| 26 26 |  | 
| 27 | 
            +
            foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
         | 
| 28 | 
            +
            bool parser_is_foreign_content_tag(const char* tag_name);
         | 
| 29 | 
            +
            const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
         | 
| 32 | 
            +
            void parser_exit_foreign_content(parser_T* parser);
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
         | 
| 35 | 
            +
             | 
| 27 36 | 
             
            token_T* parser_advance(parser_T* parser);
         | 
| 28 37 | 
             
            token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
         | 
| 29 38 | 
             
            token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);
         | 
    
        data/src/include/token_struct.h
    CHANGED
    
    
    
        data/src/include/utf8.h
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
| 1 | 
            +
            #ifndef HERB_UTF8_H
         | 
| 2 | 
            +
            #define HERB_UTF8_H
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include <stdbool.h>
         | 
| 5 | 
            +
            #include <stdlib.h>
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            int utf8_char_byte_length(unsigned char first_byte);
         | 
| 8 | 
            +
            int utf8_sequence_length(const char* str, size_t position, size_t max_length);
         | 
| 9 | 
            +
            bool utf8_is_valid_continuation_byte(unsigned char byte);
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            #endif
         | 
    
        data/src/include/version.h
    CHANGED
    
    
    
        data/src/lexer.c
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            #include "include/buffer.h"
         | 
| 2 2 | 
             
            #include "include/lexer_peek_helpers.h"
         | 
| 3 3 | 
             
            #include "include/token.h"
         | 
| 4 | 
            +
            #include "include/utf8.h"
         | 
| 4 5 | 
             
            #include "include/util.h"
         | 
| 5 6 |  | 
| 6 7 | 
             
            #include <ctype.h>
         | 
| @@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) { | |
| 84 85 | 
             
              }
         | 
| 85 86 | 
             
            }
         | 
| 86 87 |  | 
| 88 | 
            +
            static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
         | 
| 89 | 
            +
              if (byte_count <= 0) { return; }
         | 
| 90 | 
            +
             | 
| 91 | 
            +
              if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
         | 
| 92 | 
            +
                if (!is_newline(lexer->current_character)) { lexer->current_column++; }
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                lexer->current_position += byte_count;
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                if (lexer->current_position >= lexer->source_length) {
         | 
| 97 | 
            +
                  lexer->current_position = lexer->source_length;
         | 
| 98 | 
            +
                  lexer->current_character = '\0';
         | 
| 99 | 
            +
                } else {
         | 
| 100 | 
            +
                  lexer->current_character = lexer->source[lexer->current_position];
         | 
| 101 | 
            +
                }
         | 
| 102 | 
            +
              }
         | 
| 103 | 
            +
            }
         | 
| 104 | 
            +
             | 
| 87 105 | 
             
            static void lexer_advance_by(lexer_T* lexer, const size_t count) {
         | 
| 88 106 | 
             
              for (size_t i = 0; i < count; i++) {
         | 
| 89 107 | 
             
                lexer_advance(lexer);
         | 
| @@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) { | |
| 116 134 | 
             
              return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
         | 
| 117 135 | 
             
            }
         | 
| 118 136 |  | 
| 137 | 
            +
            static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
         | 
| 138 | 
            +
              int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
         | 
| 139 | 
            +
             | 
| 140 | 
            +
              if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
         | 
| 141 | 
            +
             | 
| 142 | 
            +
              char* utf8_char = malloc(char_byte_length + 1);
         | 
| 143 | 
            +
             | 
| 144 | 
            +
              if (!utf8_char) { return lexer_advance_current(lexer, type); }
         | 
| 145 | 
            +
             | 
| 146 | 
            +
              for (int i = 0; i < char_byte_length; i++) {
         | 
| 147 | 
            +
                if (lexer->current_position + i >= lexer->source_length) {
         | 
| 148 | 
            +
                  free(utf8_char);
         | 
| 149 | 
            +
                  return lexer_advance_current(lexer, type);
         | 
| 150 | 
            +
                }
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                utf8_char[i] = lexer->source[lexer->current_position + i];
         | 
| 153 | 
            +
              }
         | 
| 154 | 
            +
             | 
| 155 | 
            +
              utf8_char[char_byte_length] = '\0';
         | 
| 156 | 
            +
             | 
| 157 | 
            +
              lexer_advance_utf8_bytes(lexer, char_byte_length);
         | 
| 158 | 
            +
             | 
| 159 | 
            +
              token_T* token = token_init(utf8_char, type, lexer);
         | 
| 160 | 
            +
             | 
| 161 | 
            +
              free(utf8_char);
         | 
| 162 | 
            +
             | 
| 163 | 
            +
              return token;
         | 
| 164 | 
            +
            }
         | 
| 165 | 
            +
             | 
| 119 166 | 
             
            static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
         | 
| 120 167 | 
             
              if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
         | 
| 121 168 | 
             
                return lexer_advance_with(lexer, value, type);
         | 
| @@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) { | |
| 232 279 | 
             
              if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
         | 
| 233 280 |  | 
| 234 281 | 
             
              if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
         | 
| 235 | 
            -
                return  | 
| 282 | 
            +
                return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
         | 
| 236 283 | 
             
              }
         | 
| 237 284 |  | 
| 238 285 | 
             
              switch (lexer->current_character) {
         | 
| @@ -278,11 +325,12 @@ token_T* lexer_next_token(lexer_T* lexer) { | |
| 278 325 |  | 
| 279 326 | 
             
                case '"':
         | 
| 280 327 | 
             
                case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
         | 
| 328 | 
            +
                case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
         | 
| 281 329 |  | 
| 282 330 | 
             
                default: {
         | 
| 283 331 | 
             
                  if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
         | 
| 284 332 |  | 
| 285 | 
            -
                  return  | 
| 333 | 
            +
                  return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
         | 
| 286 334 | 
             
                }
         | 
| 287 335 | 
             
              }
         | 
| 288 336 | 
             
            }
         | 
    
        data/src/parser.c
    CHANGED
    
    | @@ -9,6 +9,7 @@ | |
| 9 9 | 
             
            #include "include/parser_helpers.h"
         | 
| 10 10 | 
             
            #include "include/token.h"
         | 
| 11 11 | 
             
            #include "include/token_matchers.h"
         | 
| 12 | 
            +
            #include "include/util.h"
         | 
| 12 13 |  | 
| 13 14 | 
             
            #include <stdio.h>
         | 
| 14 15 | 
             
            #include <stdlib.h>
         | 
| @@ -16,6 +17,7 @@ | |
| 16 17 | 
             
            #include <strings.h>
         | 
| 17 18 |  | 
| 18 19 | 
             
            static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
         | 
| 20 | 
            +
            static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
         | 
| 19 21 | 
             
            static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
         | 
| 20 22 |  | 
| 21 23 | 
             
            size_t parser_sizeof(void) {
         | 
| @@ -28,6 +30,8 @@ parser_T* parser_init(lexer_T* lexer) { | |
| 28 30 | 
             
              parser->lexer = lexer;
         | 
| 29 31 | 
             
              parser->current_token = lexer_next_token(lexer);
         | 
| 30 32 | 
             
              parser->open_tags_stack = array_init(16);
         | 
| 33 | 
            +
              parser->state = PARSER_STATE_DATA;
         | 
| 34 | 
            +
              parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
         | 
| 31 35 |  | 
| 32 36 | 
             
              return parser;
         | 
| 33 37 | 
             
            }
         | 
| @@ -184,14 +188,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T | |
| 184 188 |  | 
| 185 189 | 
             
            static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
         | 
| 186 190 | 
             
              array_T* errors = array_init(8);
         | 
| 187 | 
            -
              token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
         | 
| 188 191 |  | 
| 189 | 
            -
               | 
| 192 | 
            +
              token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
         | 
| 193 | 
            +
              token_T* first_token = NULL;
         | 
| 194 | 
            +
             | 
| 195 | 
            +
              if (at_token != NULL) {
         | 
| 196 | 
            +
                first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                if (first_token == NULL) {
         | 
| 199 | 
            +
                  parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
         | 
| 202 | 
            +
                    ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                  token_free(at_token);
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                  return attribute_name;
         | 
| 207 | 
            +
                }
         | 
| 208 | 
            +
              } else {
         | 
| 209 | 
            +
                first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                if (first_token == NULL) {
         | 
| 212 | 
            +
                  parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
         | 
| 213 | 
            +
                  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
         | 
| 214 | 
            +
                  return attribute_name;
         | 
| 215 | 
            +
                }
         | 
| 216 | 
            +
              }
         | 
| 217 | 
            +
             | 
| 218 | 
            +
              buffer_T name_buffer = buffer_new();
         | 
| 219 | 
            +
             | 
| 220 | 
            +
              position_T* start_position;
         | 
| 221 | 
            +
             | 
| 222 | 
            +
              if (at_token != NULL) {
         | 
| 223 | 
            +
                buffer_append(&name_buffer, at_token->value);
         | 
| 224 | 
            +
                start_position = position_copy(at_token->location->start);
         | 
| 225 | 
            +
              } else {
         | 
| 226 | 
            +
                start_position = position_copy(first_token->location->start);
         | 
| 227 | 
            +
              }
         | 
| 228 | 
            +
             | 
| 229 | 
            +
              buffer_append(&name_buffer, first_token->value);
         | 
| 230 | 
            +
             | 
| 231 | 
            +
              position_T* end_position = position_copy(first_token->location->end);
         | 
| 232 | 
            +
              size_t range_end = first_token->range->to;
         | 
| 233 | 
            +
             | 
| 234 | 
            +
              while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
         | 
| 235 | 
            +
                     && strcmp(parser->current_token->value, ".") == 0) {
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                token_T* dot_token = parser_advance(parser);
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                buffer_append(&name_buffer, dot_token->value);
         | 
| 240 | 
            +
                position_free(end_position);
         | 
| 241 | 
            +
             | 
| 242 | 
            +
                end_position = position_copy(dot_token->location->end);
         | 
| 243 | 
            +
                range_end = dot_token->range->to;
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                token_free(dot_token);
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                if (parser->current_token->type == TOKEN_IDENTIFIER) {
         | 
| 248 | 
            +
                  token_T* next_identifier = parser_advance(parser);
         | 
| 249 | 
            +
             | 
| 250 | 
            +
                  buffer_append(&name_buffer, next_identifier->value);
         | 
| 251 | 
            +
                  position_free(end_position);
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                  end_position = position_copy(next_identifier->location->end);
         | 
| 254 | 
            +
                  range_end = next_identifier->range->to;
         | 
| 255 | 
            +
                  token_free(next_identifier);
         | 
| 256 | 
            +
                } else {
         | 
| 257 | 
            +
                  break;
         | 
| 258 | 
            +
                }
         | 
| 259 | 
            +
              }
         | 
| 260 | 
            +
             | 
| 261 | 
            +
              token_T* combined_token = calloc(1, sizeof(token_T));
         | 
| 262 | 
            +
              combined_token->value = herb_strdup(name_buffer.value);
         | 
| 263 | 
            +
              combined_token->type = TOKEN_IDENTIFIER;
         | 
| 264 | 
            +
              combined_token->location =
         | 
| 265 | 
            +
                location_from(start_position->line, start_position->column, end_position->line, end_position->column);
         | 
| 266 | 
            +
             | 
| 267 | 
            +
              size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
         | 
| 268 | 
            +
              combined_token->range = range_init(range_start, range_end);
         | 
| 190 269 |  | 
| 191 270 | 
             
              AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
         | 
| 192 | 
            -
                ast_html_attribute_name_node_init( | 
| 271 | 
            +
                ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
         | 
| 272 | 
            +
             | 
| 273 | 
            +
              buffer_free(&name_buffer);
         | 
| 274 | 
            +
              position_free(start_position);
         | 
| 275 | 
            +
              position_free(end_position);
         | 
| 276 | 
            +
              token_free(first_token);
         | 
| 193 277 |  | 
| 194 | 
            -
              token_free( | 
| 278 | 
            +
              if (at_token != NULL) { token_free(at_token); }
         | 
| 279 | 
            +
             | 
| 280 | 
            +
              token_free(combined_token);
         | 
| 195 281 |  | 
| 196 282 | 
             
              return attribute_name;
         | 
| 197 283 | 
             
            }
         | 
| @@ -300,6 +386,30 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser | |
| 300 386 | 
             
              // <div id="home">
         | 
| 301 387 | 
             
              if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
         | 
| 302 388 |  | 
| 389 | 
            +
              if (token_is(parser, TOKEN_BACKTICK)) {
         | 
| 390 | 
            +
                token_T* token = parser_advance(parser);
         | 
| 391 | 
            +
                position_T* start = position_copy(token->location->start);
         | 
| 392 | 
            +
                position_T* end = position_copy(token->location->end);
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                append_unexpected_error(
         | 
| 395 | 
            +
                  "Invalid quote character for HTML attribute",
         | 
| 396 | 
            +
                  "single quote (') or double quote (\")",
         | 
| 397 | 
            +
                  "backtick (`)",
         | 
| 398 | 
            +
                  start,
         | 
| 399 | 
            +
                  end,
         | 
| 400 | 
            +
                  errors
         | 
| 401 | 
            +
                );
         | 
| 402 | 
            +
             | 
| 403 | 
            +
                AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
         | 
| 404 | 
            +
                  ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
         | 
| 405 | 
            +
             | 
| 406 | 
            +
                position_free(start);
         | 
| 407 | 
            +
                position_free(end);
         | 
| 408 | 
            +
                token_free(token);
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                return value;
         | 
| 411 | 
            +
              }
         | 
| 412 | 
            +
             | 
| 303 413 | 
             
              token_T* token = parser_advance(parser);
         | 
| 304 414 |  | 
| 305 415 | 
             
              append_unexpected_error(
         | 
| @@ -329,9 +439,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser | |
| 329 439 | 
             
            static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
         | 
| 330 440 | 
             
              AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
         | 
| 331 441 |  | 
| 442 | 
            +
              while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
         | 
| 443 | 
            +
                token_T* whitespace = parser_advance(parser);
         | 
| 444 | 
            +
                token_free(whitespace);
         | 
| 445 | 
            +
              }
         | 
| 446 | 
            +
             | 
| 332 447 | 
             
              token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
         | 
| 333 448 |  | 
| 334 449 | 
             
              if (equals != NULL) {
         | 
| 450 | 
            +
                while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
         | 
| 451 | 
            +
                  token_T* whitespace = parser_advance(parser);
         | 
| 452 | 
            +
                  token_free(whitespace);
         | 
| 453 | 
            +
                }
         | 
| 454 | 
            +
             | 
| 335 455 | 
             
                AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
         | 
| 336 456 |  | 
| 337 457 | 
             
                AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
         | 
| @@ -390,10 +510,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { | |
| 390 510 | 
             
                  continue;
         | 
| 391 511 | 
             
                }
         | 
| 392 512 |  | 
| 513 | 
            +
                if (parser->current_token->type == TOKEN_AT) {
         | 
| 514 | 
            +
                  array_append(children, parser_parse_html_attribute(parser));
         | 
| 515 | 
            +
                  continue;
         | 
| 516 | 
            +
                }
         | 
| 517 | 
            +
             | 
| 393 518 | 
             
                parser_append_unexpected_error(
         | 
| 394 519 | 
             
                  parser,
         | 
| 395 520 | 
             
                  "Unexpected Token",
         | 
| 396 | 
            -
                  "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
         | 
| 521 | 
            +
                  "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
         | 
| 397 522 | 
             
                  errors
         | 
| 398 523 | 
             
                );
         | 
| 399 524 | 
             
              }
         | 
| @@ -441,6 +566,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) | |
| 441 566 |  | 
| 442 567 | 
             
              token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
         | 
| 443 568 | 
             
              token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
         | 
| 569 | 
            +
             | 
| 570 | 
            +
              while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
         | 
| 571 | 
            +
                token_T* whitespace = parser_advance(parser);
         | 
| 572 | 
            +
                token_free(whitespace);
         | 
| 573 | 
            +
              }
         | 
| 574 | 
            +
             | 
| 444 575 | 
             
              token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
         | 
| 445 576 |  | 
| 446 577 | 
             
              if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
         | 
| @@ -502,7 +633,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( | |
| 502 633 |  | 
| 503 634 | 
             
              parser_push_open_tag(parser, open_tag->tag_name);
         | 
| 504 635 |  | 
| 505 | 
            -
               | 
| 636 | 
            +
              if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
         | 
| 637 | 
            +
                foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
         | 
| 638 | 
            +
                parser_enter_foreign_content(parser, content_type);
         | 
| 639 | 
            +
                parser_parse_foreign_content(parser, body, errors);
         | 
| 640 | 
            +
              } else {
         | 
| 641 | 
            +
                parser_parse_in_data_state(parser, body, errors);
         | 
| 642 | 
            +
              }
         | 
| 506 643 |  | 
| 507 644 | 
             
              if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
         | 
| 508 645 |  | 
| @@ -593,6 +730,83 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { | |
| 593 730 | 
             
              return erb_node;
         | 
| 594 731 | 
             
            }
         | 
| 595 732 |  | 
| 733 | 
            +
            static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
         | 
| 734 | 
            +
              buffer_T content = buffer_new();
         | 
| 735 | 
            +
              position_T* start = position_copy(parser->current_token->location->start);
         | 
| 736 | 
            +
              const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
         | 
| 737 | 
            +
             | 
| 738 | 
            +
              if (expected_closing_tag == NULL) {
         | 
| 739 | 
            +
                parser_exit_foreign_content(parser);
         | 
| 740 | 
            +
                position_free(start);
         | 
| 741 | 
            +
                buffer_free(&content);
         | 
| 742 | 
            +
             | 
| 743 | 
            +
                return;
         | 
| 744 | 
            +
              }
         | 
| 745 | 
            +
             | 
| 746 | 
            +
              while (!token_is(parser, TOKEN_EOF)) {
         | 
| 747 | 
            +
                if (token_is(parser, TOKEN_ERB_START)) {
         | 
| 748 | 
            +
                  parser_append_literal_node_from_buffer(parser, &content, children, start);
         | 
| 749 | 
            +
             | 
| 750 | 
            +
                  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
         | 
| 751 | 
            +
                  array_append(children, erb_node);
         | 
| 752 | 
            +
             | 
| 753 | 
            +
                  position_free(start);
         | 
| 754 | 
            +
                  start = position_copy(parser->current_token->location->start);
         | 
| 755 | 
            +
             | 
| 756 | 
            +
                  continue;
         | 
| 757 | 
            +
                }
         | 
| 758 | 
            +
             | 
| 759 | 
            +
                if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
         | 
| 760 | 
            +
                  size_t saved_position = parser->lexer->current_position;
         | 
| 761 | 
            +
                  size_t saved_line = parser->lexer->current_line;
         | 
| 762 | 
            +
                  size_t saved_column = parser->lexer->current_column;
         | 
| 763 | 
            +
                  size_t saved_previous_position = parser->lexer->previous_position;
         | 
| 764 | 
            +
                  size_t saved_previous_line = parser->lexer->previous_line;
         | 
| 765 | 
            +
                  size_t saved_previous_column = parser->lexer->previous_column;
         | 
| 766 | 
            +
             | 
| 767 | 
            +
                  char saved_char = parser->lexer->current_character;
         | 
| 768 | 
            +
                  lexer_state_T saved_state = parser->lexer->state;
         | 
| 769 | 
            +
             | 
| 770 | 
            +
                  token_T* next_token = lexer_next_token(parser->lexer);
         | 
| 771 | 
            +
                  bool is_potential_match = false;
         | 
| 772 | 
            +
             | 
| 773 | 
            +
                  if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
         | 
| 774 | 
            +
                    is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
         | 
| 775 | 
            +
                  }
         | 
| 776 | 
            +
             | 
| 777 | 
            +
                  parser->lexer->current_position = saved_position;
         | 
| 778 | 
            +
                  parser->lexer->current_line = saved_line;
         | 
| 779 | 
            +
                  parser->lexer->current_column = saved_column;
         | 
| 780 | 
            +
                  parser->lexer->previous_position = saved_previous_position;
         | 
| 781 | 
            +
                  parser->lexer->previous_line = saved_previous_line;
         | 
| 782 | 
            +
                  parser->lexer->previous_column = saved_previous_column;
         | 
| 783 | 
            +
                  parser->lexer->current_character = saved_char;
         | 
| 784 | 
            +
                  parser->lexer->state = saved_state;
         | 
| 785 | 
            +
             | 
| 786 | 
            +
                  if (next_token) { token_free(next_token); }
         | 
| 787 | 
            +
             | 
| 788 | 
            +
                  if (is_potential_match) {
         | 
| 789 | 
            +
                    parser_append_literal_node_from_buffer(parser, &content, children, start);
         | 
| 790 | 
            +
                    parser_exit_foreign_content(parser);
         | 
| 791 | 
            +
             | 
| 792 | 
            +
                    position_free(start);
         | 
| 793 | 
            +
                    buffer_free(&content);
         | 
| 794 | 
            +
             | 
| 795 | 
            +
                    return;
         | 
| 796 | 
            +
                  }
         | 
| 797 | 
            +
                }
         | 
| 798 | 
            +
             | 
| 799 | 
            +
                token_T* token = parser_advance(parser);
         | 
| 800 | 
            +
                buffer_append(&content, token->value);
         | 
| 801 | 
            +
                token_free(token);
         | 
| 802 | 
            +
              }
         | 
| 803 | 
            +
             | 
| 804 | 
            +
              parser_append_literal_node_from_buffer(parser, &content, children, start);
         | 
| 805 | 
            +
              parser_exit_foreign_content(parser);
         | 
| 806 | 
            +
              position_free(start);
         | 
| 807 | 
            +
              buffer_free(&content);
         | 
| 808 | 
            +
            }
         | 
| 809 | 
            +
             | 
| 596 810 | 
             
            static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
         | 
| 597 811 | 
             
              while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
         | 
| 598 812 | 
             
                if (token_is(parser, TOKEN_ERB_START)) {
         | 
| @@ -618,12 +832,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra | |
| 618 832 | 
             
                if (token_is_any_of(
         | 
| 619 833 | 
             
                      parser,
         | 
| 620 834 | 
             
                      TOKEN_AMPERSAND,
         | 
| 835 | 
            +
                      TOKEN_AT,
         | 
| 621 836 | 
             
                      TOKEN_CHARACTER,
         | 
| 622 837 | 
             
                      TOKEN_COLON,
         | 
| 623 838 | 
             
                      TOKEN_DASH,
         | 
| 624 839 | 
             
                      TOKEN_EQUALS,
         | 
| 625 840 | 
             
                      TOKEN_EXCLAMATION,
         | 
| 626 841 | 
             
                      TOKEN_IDENTIFIER,
         | 
| 842 | 
            +
                      TOKEN_NBSP,
         | 
| 627 843 | 
             
                      TOKEN_NEWLINE,
         | 
| 628 844 | 
             
                      TOKEN_PERCENT,
         | 
| 629 845 | 
             
                      TOKEN_QUOTE,
         | 
| @@ -639,8 +855,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra | |
| 639 855 | 
             
                parser_append_unexpected_error(
         | 
| 640 856 | 
             
                  parser,
         | 
| 641 857 | 
             
                  "Unexpected token",
         | 
| 642 | 
            -
                  "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE,  | 
| 643 | 
            -
                  "TOKEN_NEWLINE",
         | 
| 858 | 
            +
                  "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
         | 
| 859 | 
            +
                  "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
         | 
| 644 860 | 
             
                  errors
         | 
| 645 861 | 
             
                );
         | 
| 646 862 | 
             
              }
         | 
    
        data/src/parser_helpers.c
    CHANGED
    
    | @@ -8,6 +8,7 @@ | |
| 8 8 | 
             
            #include "include/lexer.h"
         | 
| 9 9 | 
             
            #include "include/parser.h"
         | 
| 10 10 | 
             
            #include "include/token.h"
         | 
| 11 | 
            +
            #include "include/token_matchers.h"
         | 
| 11 12 |  | 
| 12 13 | 
             
            #include <stdio.h>
         | 
| 13 14 | 
             
            #include <strings.h>
         | 
| @@ -54,6 +55,43 @@ bool parser_in_svg_context(const parser_T* parser) { | |
| 54 55 | 
             
              return false;
         | 
| 55 56 | 
             
            }
         | 
| 56 57 |  | 
| 58 | 
            +
            // ===== Foreign Content Handling =====
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            foreign_content_type_T parser_get_foreign_content_type(const char* tag_name) {
         | 
| 61 | 
            +
              if (tag_name == NULL) { return FOREIGN_CONTENT_UNKNOWN; }
         | 
| 62 | 
            +
             | 
| 63 | 
            +
              if (strcasecmp(tag_name, "script") == 0) { return FOREIGN_CONTENT_SCRIPT; }
         | 
| 64 | 
            +
              if (strcasecmp(tag_name, "style") == 0) { return FOREIGN_CONTENT_STYLE; }
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              return FOREIGN_CONTENT_UNKNOWN;
         | 
| 67 | 
            +
            }
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            bool parser_is_foreign_content_tag(const char* tag_name) {
         | 
| 70 | 
            +
              return parser_get_foreign_content_type(tag_name) != FOREIGN_CONTENT_UNKNOWN;
         | 
| 71 | 
            +
            }
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type) {
         | 
| 74 | 
            +
              switch (type) {
         | 
| 75 | 
            +
                case FOREIGN_CONTENT_SCRIPT: return "script";
         | 
| 76 | 
            +
                case FOREIGN_CONTENT_STYLE: return "style";
         | 
| 77 | 
            +
                default: return NULL;
         | 
| 78 | 
            +
              }
         | 
| 79 | 
            +
            }
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type) {
         | 
| 82 | 
            +
              if (parser == NULL) { return; }
         | 
| 83 | 
            +
             | 
| 84 | 
            +
              parser->state = PARSER_STATE_FOREIGN_CONTENT;
         | 
| 85 | 
            +
              parser->foreign_content_type = type;
         | 
| 86 | 
            +
            }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            void parser_exit_foreign_content(parser_T* parser) {
         | 
| 89 | 
            +
              if (parser == NULL) { return; }
         | 
| 90 | 
            +
             | 
| 91 | 
            +
              parser->state = PARSER_STATE_DATA;
         | 
| 92 | 
            +
              parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
         | 
| 93 | 
            +
            }
         | 
| 94 | 
            +
             | 
| 57 95 | 
             
            void parser_append_unexpected_error(parser_T* parser, const char* description, const char* expected, array_T* errors) {
         | 
| 58 96 | 
             
              token_T* token = parser_advance(parser);
         | 
| 59 97 |  | 
| @@ -166,3 +204,11 @@ void parser_handle_mismatched_tags( | |
| 166 204 | 
             
                );
         | 
| 167 205 | 
             
              }
         | 
| 168 206 | 
             
            }
         | 
| 207 | 
            +
             | 
| 208 | 
            +
            bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type) {
         | 
| 209 | 
            +
              const char* expected_tag_name = parser_get_foreign_content_closing_tag(expected_type);
         | 
| 210 | 
            +
             | 
| 211 | 
            +
              if (expected_tag_name == NULL || tag_name == NULL) { return false; }
         | 
| 212 | 
            +
             | 
| 213 | 
            +
              return strcmp(tag_name, expected_tag_name) == 0;
         | 
| 214 | 
            +
            }
         | 
    
        data/src/token.c
    CHANGED
    
    | @@ -55,6 +55,7 @@ const char* token_type_to_string(const token_type_T type) { | |
| 55 55 | 
             
                case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
         | 
| 56 56 | 
             
                case TOKEN_EQUALS: return "TOKEN_EQUALS";
         | 
| 57 57 | 
             
                case TOKEN_QUOTE: return "TOKEN_QUOTE";
         | 
| 58 | 
            +
                case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
         | 
| 58 59 | 
             
                case TOKEN_DASH: return "TOKEN_DASH";
         | 
| 59 60 | 
             
                case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
         | 
| 60 61 | 
             
                case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
         | 
    
        data/src/utf8.c
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            #include "include/utf8.h"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            // UTF-8 byte patterns:
         | 
| 4 | 
            +
            //   0xxxxxxx = 1 byte (ASCII)
         | 
| 5 | 
            +
            //   110xxxxx = 2 bytes
         | 
| 6 | 
            +
            //   1110xxxx = 3 bytes
         | 
| 7 | 
            +
            //   11110xxx = 4 bytes
         | 
| 8 | 
            +
            int utf8_char_byte_length(unsigned char first_byte) {
         | 
| 9 | 
            +
              if ((first_byte & 0x80) == 0) {
         | 
| 10 | 
            +
                return 1;
         | 
| 11 | 
            +
              } else if ((first_byte & 0xE0) == 0xC0) {
         | 
| 12 | 
            +
                return 2;
         | 
| 13 | 
            +
              } else if ((first_byte & 0xF0) == 0xE0) {
         | 
| 14 | 
            +
                return 3;
         | 
| 15 | 
            +
              } else if ((first_byte & 0xF8) == 0xF0) {
         | 
| 16 | 
            +
                return 4;
         | 
| 17 | 
            +
              }
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              return 1;
         | 
| 20 | 
            +
            }
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            // Continuation bytes have pattern 10xxxxxx
         | 
| 23 | 
            +
            bool utf8_is_valid_continuation_byte(unsigned char byte) {
         | 
| 24 | 
            +
              return (byte & 0xC0) == 0x80;
         | 
| 25 | 
            +
            }
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
         | 
| 28 | 
            +
              if (position >= max_length) { return 0; }
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              unsigned char first_byte = (unsigned char) str[position];
         | 
| 31 | 
            +
              int expected_length = utf8_char_byte_length(first_byte);
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              if (position + expected_length > max_length) {
         | 
| 34 | 
            +
                return 1; // Not enough bytes, treat as single byte
         | 
| 35 | 
            +
              }
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              if (expected_length > 1) {
         | 
| 38 | 
            +
                for (int i = 1; i < expected_length; i++) {
         | 
| 39 | 
            +
                  if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
         | 
| 40 | 
            +
                    return 1; // Invalid continuation byte, treat first byte as single byte
         | 
| 41 | 
            +
                  }
         | 
| 42 | 
            +
                }
         | 
| 43 | 
            +
              }
         | 
| 44 | 
            +
             | 
| 45 | 
            +
              return expected_length;
         | 
| 46 | 
            +
            }
         | 
    
        data/src/visitor.c
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            // NOTE: This file is generated by the templates/template.rb script and should not
         | 
| 2 | 
            -
            // be modified manually. See /Users/marcoroth/Development/herb-release- | 
| 2 | 
            +
            // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/visitor.c.erb
         | 
| 3 3 |  | 
| 4 4 | 
             
            #include <stdio.h>
         | 
| 5 5 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: herb
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.5.0
         | 
| 5 5 | 
             
            platform: x86_64-linux-musl
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Marco Roth
         | 
| 8 8 | 
             
            bindir: exe
         | 
| 9 9 | 
             
            cert_chain: []
         | 
| 10 | 
            -
            date: 2025- | 
| 10 | 
            +
            date: 2025-08-18 00:00:00.000000000 Z
         | 
| 11 11 | 
             
            dependencies: []
         | 
| 12 12 | 
             
            description: Powerful and seamless HTML-aware ERB parsing and tooling.
         | 
| 13 13 | 
             
            email:
         | 
| @@ -125,6 +125,7 @@ files: | |
| 125 125 | 
             
            - src/include/token.h
         | 
| 126 126 | 
             
            - src/include/token_matchers.h
         | 
| 127 127 | 
             
            - src/include/token_struct.h
         | 
| 128 | 
            +
            - src/include/utf8.h
         | 
| 128 129 | 
             
            - src/include/util.h
         | 
| 129 130 | 
             
            - src/include/version.h
         | 
| 130 131 | 
             
            - src/include/visitor.h
         | 
| @@ -144,6 +145,7 @@ files: | |
| 144 145 | 
             
            - src/ruby_parser.c
         | 
| 145 146 | 
             
            - src/token.c
         | 
| 146 147 | 
             
            - src/token_matchers.c
         | 
| 148 | 
            +
            - src/utf8.c
         | 
| 147 149 | 
             
            - src/util.c
         | 
| 148 150 | 
             
            - src/visitor.c
         | 
| 149 151 | 
             
            homepage: https://herb-tools.dev
         |