RubyGems - herb - Versions diffs - 0.7.5 → 0.8.0 - Mend

herb 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

checksums.yaml +4 -4
data/Makefile +8 -5
data/config.yml +26 -6
data/ext/herb/error_helpers.c +57 -3
data/ext/herb/error_helpers.h +1 -1
data/ext/herb/extconf.rb +1 -0
data/ext/herb/extension.c +10 -24
data/ext/herb/extension_helpers.c +3 -3
data/ext/herb/extension_helpers.h +1 -1
data/ext/herb/nodes.c +72 -37
data/herb.gemspec +0 -2
data/lib/herb/ast/helpers.rb +11 -0
data/lib/herb/ast/node.rb +15 -6
data/lib/herb/ast/nodes.rb +609 -392
data/lib/herb/cli.rb +31 -0
data/lib/herb/colors.rb +82 -0
data/lib/herb/engine/compiler.rb +140 -14
data/lib/herb/engine/debug_visitor.rb +1 -5
data/lib/herb/engine/parser_error_overlay.rb +1 -1
data/lib/herb/engine.rb +8 -14
data/lib/herb/errors.rb +166 -56
data/lib/herb/location.rb +2 -2
data/lib/herb/project.rb +86 -21
data/lib/herb/token.rb +14 -2
data/lib/herb/version.rb +1 -1
data/lib/herb.rb +1 -0
data/sig/herb/ast/helpers.rbs +3 -0
data/sig/herb/ast/node.rbs +12 -5
data/sig/herb/ast/nodes.rbs +124 -62
data/sig/herb/colors.rbs +35 -0
data/sig/herb/engine/compiler.rbs +23 -1
data/sig/herb/errors.rbs +74 -20
data/sig/herb/token.rbs +8 -0
data/sig/herb_c_extension.rbs +1 -1
data/sig/serialized_ast_errors.rbs +8 -0
data/src/analyze.c +420 -171
data/src/analyze_helpers.c +5 -0
data/src/analyze_missing_end.c +147 -0
data/src/analyze_transform.c +196 -0
data/src/analyzed_ruby.c +23 -2
data/src/ast_node.c +5 -5
data/src/ast_nodes.c +179 -179
data/src/ast_pretty_print.c +232 -232
data/src/element_source.c +7 -6
data/src/errors.c +246 -126
data/src/extract.c +92 -34
data/src/herb.c +37 -49
data/src/html_util.c +34 -96
data/src/include/analyze.h +10 -2
data/src/include/analyze_helpers.h +3 -0
data/src/include/analyzed_ruby.h +4 -2
data/src/include/ast_node.h +2 -2
data/src/include/ast_nodes.h +67 -66
data/src/include/ast_pretty_print.h +2 -2
data/src/include/element_source.h +3 -1
data/src/include/errors.h +30 -14
data/src/include/extract.h +4 -4
data/src/include/herb.h +6 -7
data/src/include/html_util.h +4 -5
data/src/include/lexer.h +1 -3
data/src/include/lexer_peek_helpers.h +14 -14
data/src/include/lexer_struct.h +3 -2
data/src/include/macros.h +4 -0
data/src/include/parser.h +12 -6
data/src/include/parser_helpers.h +25 -15
data/src/include/pretty_print.h +38 -28
data/src/include/token.h +5 -8
data/src/include/utf8.h +3 -2
data/src/include/util/hb_arena.h +31 -0
data/src/include/util/hb_arena_debug.h +8 -0
data/src/include/util/hb_array.h +33 -0
data/src/include/util/hb_buffer.h +34 -0
data/src/include/util/hb_string.h +29 -0
data/src/include/util/hb_system.h +9 -0
data/src/include/util.h +3 -14
data/src/include/version.h +1 -1
data/src/include/visitor.h +1 -1
data/src/io.c +7 -4
data/src/lexer.c +61 -88
data/src/lexer_peek_helpers.c +35 -37
data/src/main.c +19 -23
data/src/parser.c +282 -201
data/src/parser_helpers.c +46 -40
data/src/parser_match_tags.c +316 -0
data/src/pretty_print.c +82 -106
data/src/token.c +18 -65
data/src/utf8.c +4 -4
data/src/util/hb_arena.c +179 -0
data/src/util/hb_arena_debug.c +237 -0
data/src/{array.c → util/hb_array.c} +26 -27
data/src/util/hb_buffer.c +203 -0
data/src/util/hb_string.c +85 -0
data/src/util/hb_system.c +30 -0
data/src/util.c +29 -99
data/src/visitor.c +54 -54
data/templates/ext/herb/error_helpers.c.erb +3 -3
data/templates/ext/herb/error_helpers.h.erb +1 -1
data/templates/ext/herb/nodes.c.erb +11 -6
data/templates/java/error_helpers.c.erb +75 -0
data/templates/java/error_helpers.h.erb +20 -0
data/templates/java/nodes.c.erb +97 -0
data/templates/java/nodes.h.erb +23 -0
data/templates/java/org/herb/ast/Errors.java.erb +121 -0
data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
data/templates/lib/herb/ast/nodes.rb.erb +28 -16
data/templates/lib/herb/errors.rb.erb +17 -12
data/templates/rust/src/ast/nodes.rs.erb +220 -0
data/templates/rust/src/errors.rs.erb +216 -0
data/templates/rust/src/nodes.rs.erb +374 -0
data/templates/src/analyze_missing_end.c.erb +36 -0
data/templates/src/analyze_transform.c.erb +24 -0
data/templates/src/ast_nodes.c.erb +14 -14
data/templates/src/ast_pretty_print.c.erb +36 -36
data/templates/src/errors.c.erb +31 -31
data/templates/src/include/ast_nodes.h.erb +10 -9
data/templates/src/include/ast_pretty_print.h.erb +2 -2
data/templates/src/include/errors.h.erb +6 -6
data/templates/src/parser_match_tags.c.erb +38 -0
data/templates/src/visitor.c.erb +4 -4
data/templates/template.rb +22 -3
data/templates/wasm/error_helpers.cpp.erb +9 -9
data/templates/wasm/error_helpers.h.erb +1 -1
data/templates/wasm/nodes.cpp.erb +9 -9
data/templates/wasm/nodes.h.erb +1 -1
data/vendor/prism/Rakefile +4 -1
data/vendor/prism/config.yml +2 -1
data/vendor/prism/include/prism/ast.h +31 -1
data/vendor/prism/include/prism/diagnostic.h +1 -0
data/vendor/prism/include/prism/version.h +3 -3
data/vendor/prism/src/diagnostic.c +3 -1
data/vendor/prism/src/prism.c +130 -71
data/vendor/prism/src/util/pm_string.c +6 -8
data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
metadata +34 -20
data/lib/herb/libherb/array.rb +0 -51
data/lib/herb/libherb/ast_node.rb +0 -50
data/lib/herb/libherb/buffer.rb +0 -56
data/lib/herb/libherb/extract_result.rb +0 -20
data/lib/herb/libherb/lex_result.rb +0 -32
data/lib/herb/libherb/libherb.rb +0 -52
data/lib/herb/libherb/parse_result.rb +0 -20
data/lib/herb/libherb/token.rb +0 -46
data/lib/herb/libherb.rb +0 -35
data/src/buffer.c +0 -241
data/src/include/array.h +0 -33
data/src/include/buffer.h +0 -39
data/src/include/json.h +0 -28
data/src/include/memory.h +0 -12
data/src/json.c +0 -205
data/src/memory.c +0 -53

data/src/pretty_print.c CHANGED Viewed

@@ -3,146 +3,149 @@
 #include "include/ast_node.h"
 #include "include/ast_nodes.h"
 #include "include/ast_pretty_print.h"
-#include "include/buffer.h"
 #include "include/errors.h"
 #include "include/token_struct.h"
 #include "include/util.h"
+#include "include/util/hb_buffer.h"
+#include "include/util/hb_string.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
-void pretty_print_indent(buffer_T* buffer, const size_t indent) {
+void pretty_print_indent(hb_buffer_T* buffer, const size_t indent) {
   for (size_t i = 0; i < indent; i++) {
-    buffer_append(buffer, "    ");
+    hb_buffer_append(buffer, "    ");
   }
 }
-void pretty_print_newline(const size_t indent, const size_t relative_indent, buffer_T* buffer) {
+void pretty_print_newline(const size_t indent, const size_t relative_indent, hb_buffer_T* buffer) {
   pretty_print_indent(buffer, indent);
   pretty_print_indent(buffer, relative_indent);
-  buffer_append(buffer, "\n");
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_label(
-  const char* name,
+  hb_string_T name,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   pretty_print_indent(buffer, indent);
   pretty_print_indent(buffer, relative_indent);
   if (last_property) {
-    buffer_append(buffer, "└── ");
+    hb_buffer_append(buffer, "└── ");
   } else {
-    buffer_append(buffer, "├── ");
+    hb_buffer_append(buffer, "├── ");
   }
-  buffer_append(buffer, name);
-  buffer_append(buffer, ": ");
+  hb_buffer_append_string(buffer, name);
+  hb_buffer_append(buffer, ": ");
 }
 void pretty_print_quoted_property(
-  const char* name,
-  const char* value,
+  hb_string_T name,
+  hb_string_T value,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
-  char* quoted = quoted_string(value);
+  hb_string_T quoted = quoted_string(value);
   pretty_print_property(name, quoted, indent, relative_indent, last_property, buffer);
-  free(quoted);
+  free(quoted.data);
 }
 void pretty_print_boolean_property(
-  const char* name,
+  hb_string_T name,
   bool value,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
-  pretty_print_property(name, value ? "true" : "false", indent, relative_indent, last_property, buffer);
+  pretty_print_property(name, hb_string(value ? "true" : "false"), indent, relative_indent, last_property, buffer);
 }
 void pretty_print_property(
-  const char* name,
-  const char* value,
+  hb_string_T name,
+  hb_string_T value,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
-  buffer_append(buffer, value);
-  buffer_append(buffer, "\n");
+  hb_buffer_append_string(buffer, value);
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_size_t_property(
   size_t value,
-  const char* name,
+  hb_string_T name,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
-  char* string = size_t_to_string(value);
-  buffer_append(buffer, string);
-  buffer_append(buffer, "\n");
-  free(string);
+  char size_string[21];
+  snprintf(size_string, 21, "%zu", value);
+  hb_buffer_append(buffer, size_string);
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_array(
-  const char* name,
-  array_T* array,
+  hb_string_T name,
+  hb_array_T* array,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   if (array == NULL) {
-    pretty_print_property(name, "∅", indent, relative_indent, last_property, buffer);
+    pretty_print_property(name, hb_string("∅"), indent, relative_indent, last_property, buffer);
     return;
   }
-  if (array_size(array) == 0) {
-    pretty_print_property(name, "[]", indent, relative_indent, last_property, buffer);
+  if (hb_array_size(array) == 0) {
+    pretty_print_property(name, hb_string("[]"), indent, relative_indent, last_property, buffer);
     return;
   }
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
-  buffer_append(buffer, "(");
+  hb_buffer_append(buffer, "(");
   char count[16];
-  sprintf(count, "%zu", array_size(array));
-  buffer_append(buffer, count);
-  buffer_append(buffer, ")\n");
+  sprintf(count, "%zu", hb_array_size(array));
+  hb_buffer_append(buffer, count);
+  hb_buffer_append(buffer, ")\n");
   if (indent < 20) {
-    for (size_t i = 0; i < array_size(array); i++) {
-      AST_NODE_T* child = array_get(array, i);
+    for (size_t i = 0; i < hb_array_size(array); i++) {
+      AST_NODE_T* child = hb_array_get(array, i);
       pretty_print_indent(buffer, indent);
       pretty_print_indent(buffer, relative_indent + 1);
-      if (i == array_size(array) - 1) {
-        buffer_append(buffer, "└── ");
+      if (i == hb_array_size(array) - 1) {
+        hb_buffer_append(buffer, "└── ");
       } else {
-        buffer_append(buffer, "├── ");
+        hb_buffer_append(buffer, "├── ");
       }
       ast_pretty_print_node(child, indent + 1, relative_indent + 1, buffer);
-      if (i != array_size(array) - 1) { pretty_print_newline(indent + 1, relative_indent, buffer); }
+      if (i != hb_array_size(array) - 1) { pretty_print_newline(indent + 1, relative_indent, buffer); }
     }
   }
-  buffer_append(buffer, "\n");
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_errors(
@@ -150,16 +153,16 @@ void pretty_print_errors(
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
-  if (node->errors != NULL && array_size(node->errors) > 0) {
+  if (node->errors != NULL && hb_array_size(node->errors) > 0) {
     error_pretty_print_array("errors", node->errors, indent, relative_indent, last_property, buffer);
-    buffer_append(buffer, "\n");
+    hb_buffer_append(buffer, "\n");
   }
 }
-void pretty_print_location(location_T location, buffer_T* buffer) {
-  buffer_append(buffer, "(location: (");
+void pretty_print_location(location_T location, hb_buffer_T* buffer) {
+  hb_buffer_append(buffer, "(location: (");
   char location_string[128];
   sprintf(
     location_string,
@@ -169,73 +172,73 @@ void pretty_print_location(location_T location, buffer_T* buffer) {
     location.end.line,
     location.end.column
   );
-  buffer_append(buffer, location_string);
-  buffer_append(buffer, "))");
+  hb_buffer_append(buffer, location_string);
+  hb_buffer_append(buffer, "))");
 }
 void pretty_print_position_property(
   position_T* position,
-  const char* name,
+  hb_string_T name,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
   if (position != NULL) {
-    buffer_append(buffer, "(");
+    hb_buffer_append(buffer, "(");
     char position_string[128];
     sprintf(position_string, "%u:%u", (position->line) ? position->line : 0, (position->column) ? position->column : 0);
-    buffer_append(buffer, position_string);
-    buffer_append(buffer, ")");
+    hb_buffer_append(buffer, position_string);
+    hb_buffer_append(buffer, ")");
   } else {
-    buffer_append(buffer, "∅");
+    hb_buffer_append(buffer, "∅");
   }
-  buffer_append(buffer, "\n");
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_token_property(
   token_T* token,
-  const char* name,
+  hb_string_T name,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
   if (token != NULL && token->value != NULL) {
-    char* quoted = quoted_string(token->value);
-    buffer_append(buffer, quoted);
-    free(quoted);
+    hb_string_T quoted = quoted_string(hb_string(token->value));
+    hb_buffer_append_string(buffer, quoted);
+    free(quoted.data);
-    buffer_append(buffer, " ");
+    hb_buffer_append(buffer, " ");
     pretty_print_location(token->location, buffer);
   } else {
-    buffer_append(buffer, "∅");
+    hb_buffer_append(buffer, "∅");
   }
-  buffer_append(buffer, "\n");
+  hb_buffer_append(buffer, "\n");
 }
 void pretty_print_string_property(
-  const char* string,
-  const char* name,
+  hb_string_T string,
+  hb_string_T name,
   const size_t indent,
   const size_t relative_indent,
   const bool last_property,
-  buffer_T* buffer
+  hb_buffer_T* buffer
 ) {
-  const char* value = "∅";
-  char* escaped = NULL;
-  char* quoted = NULL;
+  hb_string_T value = hb_string("∅");
+  hb_string_T escaped = { .data = NULL, .length = 0 };
+  hb_string_T quoted;
-  if (string != NULL) {
+  if (!hb_string_is_empty(string)) {
     escaped = escape_newlines(string);
     quoted = quoted_string(escaped);
     value = quoted;
@@ -243,35 +246,8 @@ void pretty_print_string_property(
   pretty_print_property(name, value, indent, relative_indent, last_property, buffer);
-  if (string != NULL) {
-    if (escaped != NULL) { free(escaped); }
-    if (quoted != NULL) { free(quoted); }
+  if (!hb_string_is_empty(string)) {
+    if (!hb_string_is_empty(escaped)) { free(escaped.data); }
+    if (!hb_string_is_empty(quoted)) { free(quoted.data); }
   }
 }
-void pretty_print_analyzed_ruby(analyzed_ruby_T* analyzed, const char* source) {
-  printf(
-    "------------------------\nanalyzed (%p)\n------------------------\n%s\n------------------------\n  if:     %i\n "
-    " elsif:  %i\n  else:   %i\n  end:    %i\n  block:  %i\n  block_closing: %i\n  case:   %i\n  when:   %i\n  for:    "
-    "%i\n  while:  %i\n "
-    " until:  %i\n  begin:  %i\n  "
-    "rescue: %i\n  ensure: %i\n  unless: %i\n==================\n\n",
-    (void*) analyzed,
-    source,
-    analyzed->has_if_node,
-    analyzed->has_elsif_node,
-    analyzed->has_else_node,
-    analyzed->has_end,
-    analyzed->has_block_node,
-    analyzed->has_block_closing,
-    analyzed->has_case_node,
-    analyzed->has_when_node,
-    analyzed->has_for_node,
-    analyzed->has_while_node,
-    analyzed->has_until_node,
-    analyzed->has_begin_node,
-    analyzed->has_rescue_node,
-    analyzed->has_ensure_node,
-    analyzed->has_unless_node
-  );
-}

data/src/token.c CHANGED Viewed

@@ -1,32 +1,24 @@
 #include "include/token.h"
-#include "include/json.h"
 #include "include/lexer.h"
 #include "include/position.h"
 #include "include/range.h"
 #include "include/token_struct.h"
 #include "include/util.h"
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-size_t token_sizeof(void) {
-  return sizeof(struct TOKEN_STRUCT);
-}
-token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) {
-  token_T* token = calloc(1, token_sizeof());
+token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
+  token_T* token = calloc(1, sizeof(token_T));
   if (type == TOKEN_NEWLINE) {
     lexer->current_line++;
     lexer->current_column = 0;
   }
-  if (value) {
-    token->value = herb_strdup(value);
-  } else {
-    token->value = NULL;
-  }
+  token->value = hb_string_to_c_string_using_malloc(value);
   token->type = type;
   token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };
@@ -88,24 +80,25 @@ const char* token_type_to_string(const token_type_T type) {
   return "Unknown token_type_T";
 }
-char* token_to_string(const token_T* token) {
+hb_string_T token_to_string(const token_T* token) {
   const char* type_string = token_type_to_string(token->type);
-  const char* template = "#<Herb::Token type=\"%s\" value=\"%s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
+  const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
   char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
-  char* escaped;
+  hb_string_T escaped;
   if (token->type == TOKEN_EOF) {
-    escaped = herb_strdup("<EOF>");
+    escaped = hb_string(herb_strdup("<EOF>"));
   } else {
-    escaped = escape_newlines(token->value);
+    escaped = escape_newlines(hb_string(token->value));
   }
   sprintf(
     string,
     template,
     type_string,
-    escaped,
+    escaped.length,
+    escaped.data,
     token->range.from,
     token->range.to,
     token->location.start.line,
@@ -114,59 +107,15 @@ char* token_to_string(const token_T* token) {
     token->location.end.column
   );
-  free(escaped);
-  return string;
-}
-char* token_to_json(const token_T* token) {
-  buffer_T json = buffer_new();
-  json_start_root_object(&json);
-  json_add_string(&json, "type", token_type_to_string(token->type));
-  json_add_string(&json, "value", token->value);
-  buffer_T range = buffer_new();
-  json_start_array(&json, "range");
-  json_add_size_t(&range, NULL, token->range.from);
-  json_add_size_t(&range, NULL, token->range.to);
-  buffer_concat(&json, &range);
-  buffer_free(&range);
-  json_end_array(&json);
-  buffer_T start = buffer_new();
-  json_start_object(&json, "start");
-  json_add_size_t(&start, "line", token->location.start.line);
-  json_add_size_t(&start, "column", token->location.start.column);
-  buffer_concat(&json, &start);
-  buffer_free(&start);
-  json_end_object(&json);
-  buffer_T end = buffer_new();
-  json_start_object(&json, "end");
-  json_add_size_t(&end, "line", token->location.end.line);
-  json_add_size_t(&end, "column", token->location.end.column);
-  buffer_concat(&json, &end);
-  buffer_free(&end);
-  json_end_object(&json);
-  json_end_object(&json);
-  return buffer_value(&json);
-}
-char* token_value(const token_T* token) {
-  return token->value;
-}
+  free(escaped.data);
-int token_type(const token_T* token) {
-  return token->type;
+  return hb_string(string);
 }
 token_T* token_copy(token_T* token) {
   if (!token) { return NULL; }
-  token_T* new_token = calloc(1, token_sizeof());
+  token_T* new_token = calloc(1, sizeof(token_T));
   if (!new_token) { return NULL; }
@@ -188,6 +137,10 @@ token_T* token_copy(token_T* token) {
   return new_token;
 }
+bool token_value_empty(const token_T* token) {
+  return token == NULL || token->value == NULL || token->value[0] == '\0';
+}
 void token_free(token_T* token) {
   if (!token) { return; }

data/src/utf8.c CHANGED Viewed

@@ -5,7 +5,7 @@
 //   110xxxxx = 2 bytes
 //   1110xxxx = 3 bytes
 //   11110xxx = 4 bytes
-int utf8_char_byte_length(unsigned char first_byte) {
+uint32_t utf8_char_byte_length(unsigned char first_byte) {
   if ((first_byte & 0x80) == 0) {
     return 1;
   } else if ((first_byte & 0xE0) == 0xC0) {
@@ -24,18 +24,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
   return (byte & 0xC0) == 0x80;
 }
-int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
+uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length) {
   if (position >= max_length) { return 0; }
   unsigned char first_byte = (unsigned char) str[position];
-  int expected_length = utf8_char_byte_length(first_byte);
+  uint32_t expected_length = utf8_char_byte_length(first_byte);
   if (position + expected_length > max_length) {
     return 1; // Not enough bytes, treat as single byte
   }
   if (expected_length > 1) {
-    for (int i = 1; i < expected_length; i++) {
+    for (uint32_t i = 1; i < expected_length; i++) {
       if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
         return 1; // Invalid continuation byte, treat first byte as single byte
       }