@herb-tools/node 0.7.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/binding.gyp +8 -5
  2. package/dist/herb-node.esm.js +6 -6
  3. package/dist/herb-node.esm.js.map +1 -1
  4. package/extension/error_helpers.cpp +67 -9
  5. package/extension/error_helpers.h +4 -2
  6. package/extension/extension_helpers.cpp +20 -31
  7. package/extension/extension_helpers.h +7 -5
  8. package/extension/herb.cpp +10 -42
  9. package/extension/libherb/analyze.c +461 -249
  10. package/extension/libherb/analyze.h +10 -2
  11. package/extension/libherb/analyze_helpers.c +5 -0
  12. package/extension/libherb/analyze_helpers.h +3 -0
  13. package/extension/libherb/analyze_missing_end.c +147 -0
  14. package/extension/libherb/analyze_transform.c +196 -0
  15. package/extension/libherb/analyzed_ruby.c +23 -2
  16. package/extension/libherb/analyzed_ruby.h +4 -2
  17. package/extension/libherb/ast_node.c +14 -17
  18. package/extension/libherb/ast_node.h +4 -4
  19. package/extension/libherb/ast_nodes.c +180 -182
  20. package/extension/libherb/ast_nodes.h +69 -68
  21. package/extension/libherb/ast_pretty_print.c +233 -233
  22. package/extension/libherb/ast_pretty_print.h +3 -3
  23. package/extension/libherb/element_source.c +7 -6
  24. package/extension/libherb/element_source.h +3 -1
  25. package/extension/libherb/errors.c +273 -153
  26. package/extension/libherb/errors.h +43 -27
  27. package/extension/libherb/extract.c +92 -34
  28. package/extension/libherb/extract.h +4 -4
  29. package/extension/libherb/herb.c +37 -49
  30. package/extension/libherb/herb.h +6 -7
  31. package/extension/libherb/html_util.c +34 -96
  32. package/extension/libherb/html_util.h +4 -5
  33. package/extension/libherb/include/analyze.h +10 -2
  34. package/extension/libherb/include/analyze_helpers.h +3 -0
  35. package/extension/libherb/include/analyzed_ruby.h +4 -2
  36. package/extension/libherb/include/ast_node.h +4 -4
  37. package/extension/libherb/include/ast_nodes.h +69 -68
  38. package/extension/libherb/include/ast_pretty_print.h +3 -3
  39. package/extension/libherb/include/element_source.h +3 -1
  40. package/extension/libherb/include/errors.h +43 -27
  41. package/extension/libherb/include/extract.h +4 -4
  42. package/extension/libherb/include/herb.h +6 -7
  43. package/extension/libherb/include/html_util.h +4 -5
  44. package/extension/libherb/include/lexer.h +1 -3
  45. package/extension/libherb/include/lexer_peek_helpers.h +21 -19
  46. package/extension/libherb/include/lexer_struct.h +12 -10
  47. package/extension/libherb/include/location.h +10 -13
  48. package/extension/libherb/include/macros.h +4 -0
  49. package/extension/libherb/include/parser.h +12 -6
  50. package/extension/libherb/include/parser_helpers.h +26 -16
  51. package/extension/libherb/include/position.h +3 -14
  52. package/extension/libherb/include/pretty_print.h +38 -28
  53. package/extension/libherb/include/prism_helpers.h +1 -1
  54. package/extension/libherb/include/range.h +4 -13
  55. package/extension/libherb/include/token.h +5 -11
  56. package/extension/libherb/include/token_struct.h +2 -2
  57. package/extension/libherb/include/utf8.h +3 -2
  58. package/extension/libherb/include/util/hb_arena.h +31 -0
  59. package/extension/libherb/include/util/hb_arena_debug.h +8 -0
  60. package/extension/libherb/include/util/hb_array.h +33 -0
  61. package/extension/libherb/include/util/hb_buffer.h +34 -0
  62. package/extension/libherb/include/util/hb_string.h +29 -0
  63. package/extension/libherb/include/util/hb_system.h +9 -0
  64. package/extension/libherb/include/util.h +3 -14
  65. package/extension/libherb/include/version.h +1 -1
  66. package/extension/libherb/include/visitor.h +1 -1
  67. package/extension/libherb/io.c +7 -4
  68. package/extension/libherb/lexer.c +62 -88
  69. package/extension/libherb/lexer.h +1 -3
  70. package/extension/libherb/lexer_peek_helpers.c +42 -38
  71. package/extension/libherb/lexer_peek_helpers.h +21 -19
  72. package/extension/libherb/lexer_struct.h +12 -10
  73. package/extension/libherb/location.c +9 -37
  74. package/extension/libherb/location.h +10 -13
  75. package/extension/libherb/macros.h +4 -0
  76. package/extension/libherb/main.c +19 -23
  77. package/extension/libherb/parser.c +373 -313
  78. package/extension/libherb/parser.h +12 -6
  79. package/extension/libherb/parser_helpers.c +60 -54
  80. package/extension/libherb/parser_helpers.h +26 -16
  81. package/extension/libherb/parser_match_tags.c +316 -0
  82. package/extension/libherb/position.h +3 -14
  83. package/extension/libherb/pretty_print.c +88 -117
  84. package/extension/libherb/pretty_print.h +38 -28
  85. package/extension/libherb/prism_helpers.c +7 -7
  86. package/extension/libherb/prism_helpers.h +1 -1
  87. package/extension/libherb/range.c +2 -35
  88. package/extension/libherb/range.h +4 -13
  89. package/extension/libherb/token.c +36 -87
  90. package/extension/libherb/token.h +5 -11
  91. package/extension/libherb/token_struct.h +2 -2
  92. package/extension/libherb/utf8.c +4 -4
  93. package/extension/libherb/utf8.h +3 -2
  94. package/extension/libherb/util/hb_arena.c +179 -0
  95. package/extension/libherb/util/hb_arena.h +31 -0
  96. package/extension/libherb/util/hb_arena_debug.c +237 -0
  97. package/extension/libherb/util/hb_arena_debug.h +8 -0
  98. package/extension/libherb/{array.c → util/hb_array.c} +26 -27
  99. package/extension/libherb/util/hb_array.h +33 -0
  100. package/extension/libherb/util/hb_buffer.c +203 -0
  101. package/extension/libherb/util/hb_buffer.h +34 -0
  102. package/extension/libherb/util/hb_string.c +85 -0
  103. package/extension/libherb/util/hb_string.h +29 -0
  104. package/extension/libherb/util/hb_system.c +30 -0
  105. package/extension/libherb/util/hb_system.h +9 -0
  106. package/extension/libherb/util.c +29 -99
  107. package/extension/libherb/util.h +3 -14
  108. package/extension/libherb/version.h +1 -1
  109. package/extension/libherb/visitor.c +55 -55
  110. package/extension/libherb/visitor.h +1 -1
  111. package/extension/nodes.cpp +40 -40
  112. package/extension/nodes.h +2 -2
  113. package/extension/prism/include/prism/ast.h +31 -1
  114. package/extension/prism/include/prism/diagnostic.h +1 -0
  115. package/extension/prism/include/prism/version.h +3 -3
  116. package/extension/prism/src/diagnostic.c +3 -1
  117. package/extension/prism/src/prism.c +130 -71
  118. package/extension/prism/src/util/pm_string.c +6 -8
  119. package/package.json +3 -3
  120. package/extension/libherb/array.h +0 -33
  121. package/extension/libherb/buffer.c +0 -232
  122. package/extension/libherb/buffer.h +0 -39
  123. package/extension/libherb/include/array.h +0 -33
  124. package/extension/libherb/include/buffer.h +0 -39
  125. package/extension/libherb/include/json.h +0 -28
  126. package/extension/libherb/include/memory.h +0 -12
  127. package/extension/libherb/json.c +0 -205
  128. package/extension/libherb/json.h +0 -28
  129. package/extension/libherb/memory.c +0 -53
  130. package/extension/libherb/memory.h +0 -12
  131. package/extension/libherb/position.c +0 -33
@@ -1,23 +1,14 @@
1
1
  #ifndef HERB_RANGE_H
2
2
  #define HERB_RANGE_H
3
3
 
4
+ #include <stdint.h>
4
5
  #include <stdlib.h>
5
6
 
6
7
  typedef struct RANGE_STRUCT {
7
- size_t from;
8
- size_t to;
8
+ uint32_t from;
9
+ uint32_t to;
9
10
  } range_T;
10
11
 
11
- range_T* range_init(size_t from, size_t to);
12
-
13
- size_t range_from(const range_T* range);
14
- size_t range_to(const range_T* range);
15
- size_t range_length(range_T* range);
16
-
17
- range_T* range_copy(range_T* range);
18
-
19
- size_t range_sizeof(void);
20
-
21
- void range_free(range_T* range);
12
+ uint32_t range_length(range_T range);
22
13
 
23
14
  #endif
@@ -4,22 +4,16 @@
4
4
  #include "lexer_struct.h"
5
5
  #include "position.h"
6
6
  #include "token_struct.h"
7
+ #include "util/hb_string.h"
7
8
 
8
- token_T* token_init(const char* value, token_type_T type, lexer_T* lexer);
9
- char* token_to_string(const token_T* token);
10
- char* token_to_json(const token_T* token);
9
+ token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
10
+ hb_string_T token_to_string(const token_T* token);
11
11
  const char* token_type_to_string(token_type_T type);
12
12
 
13
- char* token_value(const token_T* token);
14
- int token_type(const token_T* token);
15
-
16
- position_T* token_start_position(token_T* token);
17
- position_T* token_end_position(token_T* token);
18
-
19
- size_t token_sizeof(void);
20
-
21
13
  token_T* token_copy(token_T* token);
22
14
 
23
15
  void token_free(token_T* token);
24
16
 
17
+ bool token_value_empty(const token_T* token);
18
+
25
19
  #endif
@@ -50,8 +50,8 @@ typedef enum {
50
50
 
51
51
  typedef struct TOKEN_STRUCT {
52
52
  char* value;
53
- range_T* range;
54
- location_T* location;
53
+ range_T range;
54
+ location_T location;
55
55
  token_type_T type;
56
56
  } token_T;
57
57
 
@@ -2,10 +2,11 @@
2
2
  #define HERB_UTF8_H
3
3
 
4
4
  #include <stdbool.h>
5
+ #include <stdint.h>
5
6
  #include <stdlib.h>
6
7
 
7
- int utf8_char_byte_length(unsigned char first_byte);
8
- int utf8_sequence_length(const char* str, size_t position, size_t max_length);
8
+ uint32_t utf8_char_byte_length(unsigned char first_byte);
9
+ uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
10
  bool utf8_is_valid_continuation_byte(unsigned char byte);
10
11
 
11
12
  #endif
@@ -0,0 +1,31 @@
1
+ #ifndef HERB_ARENA_H
2
+ #define HERB_ARENA_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stddef.h>
6
+
7
+ typedef struct HB_ARENA_PAGE_STRUCT hb_arena_page_T;
8
+
9
+ struct HB_ARENA_PAGE_STRUCT {
10
+ hb_arena_page_T* next;
11
+ size_t capacity;
12
+ size_t position;
13
+ char memory[];
14
+ };
15
+
16
+ typedef struct HB_ARENA_STRUCT {
17
+ hb_arena_page_T* head;
18
+ hb_arena_page_T* tail;
19
+ size_t default_page_size;
20
+ size_t allocation_count;
21
+ } hb_arena_T;
22
+
23
+ bool hb_arena_init(hb_arena_T* allocator, size_t initial_size);
24
+ void* hb_arena_alloc(hb_arena_T* allocator, size_t size);
25
+ size_t hb_arena_position(hb_arena_T* allocator);
26
+ size_t hb_arena_capacity(hb_arena_T* allocator);
27
+ void hb_arena_reset(hb_arena_T* allocator);
28
+ void hb_arena_reset_to(hb_arena_T* allocator, size_t new_position);
29
+ void hb_arena_free(hb_arena_T* allocator);
30
+
31
+ #endif
@@ -0,0 +1,8 @@
1
+ #ifndef HERB_ARENA_DEBUG_H
2
+ #define HERB_ARENA_DEBUG_H
3
+
4
+ #include "hb_arena.h"
5
+
6
+ void hb_arena_print_stats(const hb_arena_T* allocator);
7
+
8
+ #endif
@@ -0,0 +1,33 @@
1
+ #ifndef HERB_ARRAY_H
2
+ #define HERB_ARRAY_H
3
+
4
+ #include <stdlib.h>
5
+
6
+ typedef struct HB_ARRAY_STRUCT {
7
+ void** items;
8
+ size_t size;
9
+ size_t capacity;
10
+ } hb_array_T;
11
+
12
+ hb_array_T* hb_array_init(size_t capacity);
13
+
14
+ void* hb_array_get(const hb_array_T* array, size_t index);
15
+ void* hb_array_first(hb_array_T* array);
16
+ void* hb_array_last(hb_array_T* array);
17
+
18
+ void hb_array_append(hb_array_T* array, void* item);
19
+ void hb_array_set(const hb_array_T* array, size_t index, void* item);
20
+ void hb_array_free(hb_array_T** array);
21
+ void hb_array_remove(hb_array_T* array, size_t index);
22
+
23
+ size_t hb_array_index_of(hb_array_T* array, void* item);
24
+ void hb_array_remove_item(hb_array_T* array, void* item);
25
+
26
+ void hb_array_push(hb_array_T* array, void* item);
27
+ void* hb_array_pop(hb_array_T* array);
28
+
29
+ size_t hb_array_capacity(const hb_array_T* array);
30
+ size_t hb_array_size(const hb_array_T* array);
31
+ size_t hb_array_sizeof(void);
32
+
33
+ #endif
@@ -0,0 +1,34 @@
1
+ #ifndef HERB_BUFFER_H
2
+ #define HERB_BUFFER_H
3
+
4
+ #include "hb_string.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stdlib.h>
8
+
9
+ typedef struct HB_BUFFER_STRUCT {
10
+ char* value;
11
+ size_t length;
12
+ size_t capacity;
13
+ } hb_buffer_T;
14
+
15
+ bool hb_buffer_init(hb_buffer_T* buffer, size_t capacity);
16
+
17
+ void hb_buffer_append(hb_buffer_T* buffer, const char* text);
18
+ void hb_buffer_append_with_length(hb_buffer_T* buffer, const char* text, size_t length);
19
+ void hb_buffer_append_string(hb_buffer_T* buffer, hb_string_T string);
20
+ void hb_buffer_append_char(hb_buffer_T* buffer, char character);
21
+ void hb_buffer_append_whitespace(hb_buffer_T* buffer, size_t length);
22
+ void hb_buffer_prepend(hb_buffer_T* buffer, const char* text);
23
+ void hb_buffer_concat(hb_buffer_T* destination, hb_buffer_T* source);
24
+
25
+ char* hb_buffer_value(const hb_buffer_T* buffer);
26
+
27
+ size_t hb_buffer_length(const hb_buffer_T* buffer);
28
+ size_t hb_buffer_capacity(const hb_buffer_T* buffer);
29
+ size_t hb_buffer_sizeof(void);
30
+
31
+ void hb_buffer_clear(hb_buffer_T* buffer);
32
+ void hb_buffer_free(hb_buffer_T** buffer);
33
+
34
+ #endif
@@ -0,0 +1,29 @@
1
+ #ifndef HERB_STRING_H
2
+ #define HERB_STRING_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stddef.h>
6
+ #include <stdint.h>
7
+
8
+ #include "hb_arena.h"
9
+
10
+ typedef struct HB_STRING_STRUCT {
11
+ char* data;
12
+ uint32_t length;
13
+ } hb_string_T;
14
+
15
+ hb_string_T hb_string(const char* null_terminated_c_string);
16
+ hb_string_T hb_string_slice(hb_string_T string, uint32_t offset);
17
+ bool hb_string_equals(hb_string_T a, hb_string_T b);
18
+ bool hb_string_equals_case_insensitive(hb_string_T a, hb_string_T b);
19
+ bool hb_string_starts_with(hb_string_T string, hb_string_T expected_prefix);
20
+ bool hb_string_is_empty(hb_string_T string);
21
+ hb_string_T hb_string_truncate(hb_string_T string, uint32_t max_length);
22
+
23
+ hb_string_T hb_string_range(hb_string_T string, uint32_t from, uint32_t to);
24
+
25
+ char* hb_string_to_c_string_using_malloc(hb_string_T string);
26
+
27
+ char* hb_string_to_c_string(hb_arena_T* allocator, hb_string_T string);
28
+
29
+ #endif
@@ -0,0 +1,9 @@
1
+ #ifndef HERB_SYSTEM_H
2
+ #define HERB_SYSTEM_H
3
+
4
+ #include <stddef.h>
5
+
6
+ void* hb_system_allocate_memory(size_t size);
7
+ void hb_system_free_memory(void* ptr, size_t size);
8
+
9
+ #endif
@@ -1,25 +1,14 @@
1
1
  #ifndef HERB_UTIL_H
2
2
  #define HERB_UTIL_H
3
3
 
4
+ #include "util/hb_string.h"
4
5
  #include <stdbool.h>
5
6
  #include <stdlib.h>
6
7
 
7
- int is_whitespace(int character);
8
8
  int is_newline(int character);
9
9
 
10
- int count_in_string(const char* string, char character);
11
- int count_newlines(const char* string);
12
-
13
- char* replace_char(char* string, char find, char replace);
14
- char* escape_newlines(const char* input);
15
- char* quoted_string(const char* input);
16
- char* wrap_string(const char* input, char character);
17
-
18
- bool string_blank(const char* input);
19
- bool string_present(const char* input);
20
-
10
+ hb_string_T escape_newlines(hb_string_T input);
11
+ hb_string_T quoted_string(hb_string_T input);
21
12
  char* herb_strdup(const char* s);
22
13
 
23
- char* size_t_to_string(size_t value);
24
-
25
14
  #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.7.4"
4
+ #define HERB_VERSION "0.8.0"
5
5
 
6
6
  #endif
@@ -1,9 +1,9 @@
1
1
  #ifndef HERB_VISITOR_H
2
2
  #define HERB_VISITOR_H
3
3
 
4
- #include "array.h"
5
4
  #include "ast_node.h"
6
5
  #include "ast_nodes.h"
6
+ #include "util/hb_array.h"
7
7
 
8
8
  void herb_visit_node(const AST_NODE_T* node, bool (*visitor)(const AST_NODE_T*, void*), void* data);
9
9
  void herb_visit_child_nodes(const AST_NODE_T* node, bool (*visitor)(const AST_NODE_T* node, void* data), void* data);
@@ -1,5 +1,5 @@
1
1
  #include "include/io.h"
2
- #include "include/buffer.h"
2
+ #include "include/util/hb_buffer.h"
3
3
 
4
4
  #include <errno.h>
5
5
  #include <stdio.h>
@@ -8,6 +8,8 @@
8
8
  #define FILE_READ_CHUNK 4096
9
9
 
10
10
  char* herb_read_file(const char* filename) {
11
+ if (!filename) { return NULL; }
12
+
11
13
  FILE* fp = fopen(filename, "rb");
12
14
 
13
15
  if (fp == NULL) {
@@ -15,16 +17,17 @@ char* herb_read_file(const char* filename) {
15
17
  exit(1);
16
18
  }
17
19
 
18
- buffer_T buffer = buffer_new();
20
+ hb_buffer_T buffer;
21
+ hb_buffer_init(&buffer, 4096);
19
22
 
20
23
  char chunk[FILE_READ_CHUNK];
21
24
  size_t bytes_read;
22
25
 
23
26
  while ((bytes_read = fread(chunk, 1, FILE_READ_CHUNK, fp)) > 0) {
24
- buffer_append_with_length(&buffer, chunk, bytes_read);
27
+ hb_buffer_append_with_length(&buffer, chunk, bytes_read);
25
28
  }
26
29
 
27
30
  fclose(fp);
28
31
 
29
- return buffer_value(&buffer);
32
+ return hb_buffer_value(&buffer);
30
33
  }
@@ -1,24 +1,21 @@
1
- #include "include/buffer.h"
2
1
  #include "include/lexer_peek_helpers.h"
3
2
  #include "include/token.h"
4
3
  #include "include/utf8.h"
5
4
  #include "include/util.h"
5
+ #include "include/util/hb_buffer.h"
6
+ #include "include/util/hb_string.h"
6
7
 
7
8
  #include <ctype.h>
8
9
  #include <string.h>
9
10
 
10
11
  #define LEXER_STALL_LIMIT 5
11
12
 
12
- static size_t lexer_sizeof(void) {
13
- return sizeof(struct LEXER_STRUCT);
14
- }
15
-
16
13
  static bool lexer_eof(const lexer_T* lexer) {
17
14
  return lexer->current_character == '\0' || lexer->stalled;
18
15
  }
19
16
 
20
17
  static bool lexer_has_more_characters(const lexer_T* lexer) {
21
- return lexer->current_position < lexer->source_length;
18
+ return lexer->current_position < lexer->source.length;
22
19
  }
23
20
 
24
21
  static bool lexer_stalled(lexer_T* lexer) {
@@ -34,17 +31,16 @@ static bool lexer_stalled(lexer_T* lexer) {
34
31
  return lexer->stalled;
35
32
  }
36
33
 
37
- lexer_T* lexer_init(const char* source) {
38
- if (source == NULL) { source = ""; }
39
-
40
- lexer_T* lexer = calloc(1, lexer_sizeof());
34
+ void lexer_init(lexer_T* lexer, const char* source) {
35
+ if (source != NULL) {
36
+ lexer->source = hb_string(source);
37
+ } else {
38
+ lexer->source = hb_string("");
39
+ }
41
40
 
41
+ lexer->current_character = lexer->source.data[0];
42
42
  lexer->state = STATE_DATA;
43
43
 
44
- lexer->source = source;
45
- lexer->source_length = strlen(source);
46
- lexer->current_character = source[0];
47
-
48
44
  lexer->current_line = 1;
49
45
  lexer->current_column = 0;
50
46
  lexer->current_position = 0;
@@ -56,8 +52,6 @@ lexer_T* lexer_init(const char* source) {
56
52
  lexer->stall_counter = 0;
57
53
  lexer->last_position = 0;
58
54
  lexer->stalled = false;
59
-
60
- return lexer;
61
55
  }
62
56
 
63
57
  token_T* lexer_error(lexer_T* lexer, const char* message) {
@@ -66,14 +60,14 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
66
60
  snprintf(
67
61
  error_message,
68
62
  sizeof(error_message),
69
- "[Lexer] Error: %s (character '%c', line %zu, col %zu)\n",
63
+ "[Lexer] Error: %s (character '%c', line %u, col %u)\n",
70
64
  message,
71
65
  lexer->current_character,
72
66
  lexer->current_line,
73
67
  lexer->current_column
74
68
  );
75
69
 
76
- return token_init(error_message, TOKEN_ERROR, lexer);
70
+ return token_init(hb_string(error_message), TOKEN_ERROR, lexer);
77
71
  }
78
72
 
79
73
  static void lexer_advance(lexer_T* lexer) {
@@ -81,7 +75,7 @@ static void lexer_advance(lexer_T* lexer) {
81
75
  if (!is_newline(lexer->current_character)) { lexer->current_column++; }
82
76
 
83
77
  lexer->current_position++;
84
- lexer->current_character = lexer->source[lexer->current_position];
78
+ lexer->current_character = lexer->source.data[lexer->current_position];
85
79
  }
86
80
  }
87
81
 
@@ -93,11 +87,11 @@ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
93
87
 
94
88
  lexer->current_position += byte_count;
95
89
 
96
- if (lexer->current_position >= lexer->source_length) {
97
- lexer->current_position = lexer->source_length;
90
+ if (lexer->current_position >= lexer->source.length) {
91
+ lexer->current_position = lexer->source.length;
98
92
  lexer->current_character = '\0';
99
93
  } else {
100
- lexer->current_character = lexer->source[lexer->current_position];
94
+ lexer->current_character = lexer->source.data[lexer->current_position];
101
95
  }
102
96
  }
103
97
  }
@@ -108,65 +102,50 @@ static void lexer_advance_by(lexer_T* lexer, const size_t count) {
108
102
  }
109
103
  }
110
104
 
111
- static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const token_type_T type) {
112
- lexer_advance_by(lexer, strlen(value));
105
+ static token_T* lexer_advance_with(lexer_T* lexer, hb_string_T value, const token_type_T type) {
106
+ lexer_advance_by(lexer, value.length);
113
107
  return token_init(value, type, lexer);
114
108
  }
115
109
 
116
110
  static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) {
117
- char* collected = malloc(count + 1);
118
- if (!collected) { return NULL; }
111
+ uint32_t start_position = lexer->current_position;
119
112
 
120
113
  for (size_t i = 0; i < count; i++) {
121
- collected[i] = lexer->current_character;
122
114
  lexer_advance(lexer);
123
115
  }
124
116
 
125
- collected[count] = '\0';
126
-
127
- token_T* token = token_init(collected, type, lexer);
128
- free(collected);
117
+ token_T* token = token_init(hb_string_range(lexer->source, start_position, lexer->current_position), type, lexer);
129
118
 
130
119
  return token;
131
120
  }
132
121
 
133
122
  static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
134
- return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
123
+ char buffer[2];
124
+ buffer[0] = lexer->current_character;
125
+ buffer[1] = '\0';
126
+
127
+ return lexer_advance_with(lexer, hb_string(buffer), type);
135
128
  }
136
129
 
137
130
  static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
- int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
-
131
+ int char_byte_length = utf8_sequence_length(lexer->source.data, lexer->current_position, lexer->source.length);
140
132
  if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
-
142
- char* utf8_char = malloc(char_byte_length + 1);
143
-
144
- if (!utf8_char) { return lexer_advance_current(lexer, type); }
133
+ uint32_t start_position = lexer->current_position;
145
134
 
146
135
  for (int i = 0; i < char_byte_length; i++) {
147
- if (lexer->current_position + i >= lexer->source_length) {
148
- free(utf8_char);
149
- return lexer_advance_current(lexer, type);
150
- }
151
-
152
- utf8_char[i] = lexer->source[lexer->current_position + i];
136
+ if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); }
153
137
  }
154
138
 
155
- utf8_char[char_byte_length] = '\0';
156
-
157
139
  lexer_advance_utf8_bytes(lexer, char_byte_length);
158
140
 
159
- token_T* token = token_init(utf8_char, type, lexer);
160
-
161
- free(utf8_char);
141
+ token_T* token = token_init(hb_string_range(lexer->source, start_position, lexer->current_position), type, lexer);
162
142
 
163
143
  return token;
164
144
  }
165
145
 
166
- static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
167
- if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
168
- return lexer_advance_with(lexer, value, type);
169
- }
146
+ static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) {
147
+ hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position);
148
+ if (hb_string_starts_with(remaining_source, value)) { return lexer_advance_with(lexer, value, type); }
170
149
 
171
150
  return NULL;
172
151
  }
@@ -174,35 +153,31 @@ static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const
174
153
  // ===== Specialized Parsers
175
154
 
176
155
  static token_T* lexer_parse_whitespace(lexer_T* lexer) {
177
- buffer_T buffer = buffer_new();
156
+ uint32_t start_position = lexer->current_position;
178
157
 
179
158
  while (isspace(lexer->current_character) && lexer->current_character != '\n' && lexer->current_character != '\r'
180
159
  && !lexer_eof(lexer)) {
181
- buffer_append_char(&buffer, lexer->current_character);
182
160
  lexer_advance(lexer);
183
161
  }
184
162
 
185
- token_T* token = token_init(buffer.value, TOKEN_WHITESPACE, lexer);
186
-
187
- buffer_free(&buffer);
163
+ token_T* token =
164
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_WHITESPACE, lexer);
188
165
 
189
166
  return token;
190
167
  }
191
168
 
192
169
  static token_T* lexer_parse_identifier(lexer_T* lexer) {
193
- buffer_T buffer = buffer_new();
170
+ uint32_t start_position = lexer->current_position;
194
171
 
195
172
  while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
196
173
  || lexer->current_character == ':')
197
174
  && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) {
198
175
 
199
- buffer_append_char(&buffer, lexer->current_character);
200
176
  lexer_advance(lexer);
201
177
  }
202
178
 
203
- token_T* token = token_init(buffer.value, TOKEN_IDENTIFIER, lexer);
204
-
205
- buffer_free(&buffer);
179
+ token_T* token =
180
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_IDENTIFIER, lexer);
206
181
 
207
182
  return token;
208
183
  }
@@ -210,7 +185,8 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
210
185
  // ===== ERB Parsing
211
186
 
212
187
  static token_T* lexer_parse_erb_open(lexer_T* lexer) {
213
- const char* erb_patterns[] = { "<%==", "<%%=", "<%=", "<%#", "<%-", "<%%", "<%" };
188
+ hb_string_T erb_patterns[] = { hb_string("<%=="), hb_string("<%%="), hb_string("<%="), hb_string("<%#"),
189
+ hb_string("<%-"), hb_string("<%%"), hb_string("<%") };
214
190
 
215
191
  lexer->state = STATE_ERB_CONTENT;
216
192
 
@@ -223,14 +199,18 @@ static token_T* lexer_parse_erb_open(lexer_T* lexer) {
223
199
  }
224
200
 
225
201
  static token_T* lexer_parse_erb_content(lexer_T* lexer) {
226
- buffer_T buffer = buffer_new();
202
+ uint32_t start_position = lexer->current_position;
227
203
 
228
204
  while (!lexer_peek_erb_end(lexer, 0)) {
229
205
  if (lexer_eof(lexer)) {
230
- return token_init(buffer.value, TOKEN_ERROR, lexer); // Handle unexpected EOF
231
- }
206
+ token_T* token = token_init(
207
+ hb_string_range(lexer->source, start_position, lexer->current_position),
208
+ TOKEN_ERROR,
209
+ lexer
210
+ ); // Handle unexpected EOF
232
211
 
233
- buffer_append_char(&buffer, lexer->current_character);
212
+ return token;
213
+ }
234
214
 
235
215
  if (is_newline(lexer->current_character)) {
236
216
  lexer->current_line++;
@@ -240,14 +220,13 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
240
220
  }
241
221
 
242
222
  lexer->current_position++;
243
- lexer->current_character = lexer->source[lexer->current_position];
223
+ lexer->current_character = lexer->source.data[lexer->current_position];
244
224
  }
245
225
 
246
226
  lexer->state = STATE_ERB_CLOSE;
247
227
 
248
- token_T* token = token_init(buffer.value, TOKEN_ERB_CONTENT, lexer);
249
-
250
- buffer_free(&buffer);
228
+ token_T* token =
229
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
251
230
 
252
231
  return token;
253
232
  }
@@ -255,16 +234,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
255
234
  static token_T* lexer_parse_erb_close(lexer_T* lexer) {
256
235
  lexer->state = STATE_DATA;
257
236
 
258
- if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "%%>", TOKEN_ERB_END); }
259
- if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "-%>", TOKEN_ERB_END); }
237
+ if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("%%>"), TOKEN_ERB_END); }
238
+ if (lexer_peek_erb_equals_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("=%>"), TOKEN_ERB_END); }
239
+ if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("-%>"), TOKEN_ERB_END); }
260
240
 
261
- return lexer_advance_with(lexer, "%>", TOKEN_ERB_END);
241
+ return lexer_advance_with(lexer, hb_string("%>"), TOKEN_ERB_END);
262
242
  }
263
243
 
264
244
  // ===== Tokenizing Function
265
245
 
266
246
  token_T* lexer_next_token(lexer_T* lexer) {
267
- if (lexer_eof(lexer)) { return token_init("", TOKEN_EOF, lexer); }
247
+ if (lexer_eof(lexer)) { return token_init(hb_string(""), TOKEN_EOF, lexer); }
268
248
  if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
269
249
 
270
250
  if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
@@ -301,33 +281,33 @@ token_T* lexer_next_token(lexer_T* lexer) {
301
281
  if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
302
282
 
303
283
  if (lexer_peek_for_html_comment_start(lexer, 0)) {
304
- return lexer_advance_with(lexer, "<!--", TOKEN_HTML_COMMENT_START);
284
+ return lexer_advance_with(lexer, hb_string("<!--"), TOKEN_HTML_COMMENT_START);
305
285
  }
306
286
 
307
287
  if (lexer_peek_for_close_tag_start(lexer, 0)) {
308
- return lexer_advance_with(lexer, "</", TOKEN_HTML_TAG_START_CLOSE);
288
+ return lexer_advance_with(lexer, hb_string("</"), TOKEN_HTML_TAG_START_CLOSE);
309
289
  }
310
290
 
311
291
  return lexer_advance_current(lexer, TOKEN_LT);
312
292
  }
313
293
 
314
294
  case '/': {
315
- token_T* token = lexer_match_and_advance(lexer, "/>", TOKEN_HTML_TAG_SELF_CLOSE);
295
+ token_T* token = lexer_match_and_advance(lexer, hb_string("/>"), TOKEN_HTML_TAG_SELF_CLOSE);
316
296
  return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
317
297
  }
318
298
 
319
299
  case '?': {
320
- token_T* token = lexer_match_and_advance(lexer, "?>", TOKEN_XML_DECLARATION_END);
300
+ token_T* token = lexer_match_and_advance(lexer, hb_string("?>"), TOKEN_XML_DECLARATION_END);
321
301
  return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
322
302
  }
323
303
 
324
304
  case '-': {
325
- token_T* token = lexer_match_and_advance(lexer, "-->", TOKEN_HTML_COMMENT_END);
305
+ token_T* token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
326
306
  return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
327
307
  }
328
308
 
329
309
  case ']': {
330
- token_T* token = lexer_match_and_advance(lexer, "]]>", TOKEN_CDATA_END);
310
+ token_T* token = lexer_match_and_advance(lexer, hb_string("]]>"), TOKEN_CDATA_END);
331
311
  return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
332
312
  }
333
313
 
@@ -353,9 +333,3 @@ token_T* lexer_next_token(lexer_T* lexer) {
353
333
  }
354
334
  }
355
335
  }
356
-
357
- void lexer_free(lexer_T* lexer) {
358
- if (lexer == NULL) { return; }
359
-
360
- free(lexer);
361
- }