herb 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +8 -5
  3. data/config.yml +26 -6
  4. data/ext/herb/error_helpers.c +57 -3
  5. data/ext/herb/error_helpers.h +1 -1
  6. data/ext/herb/extconf.rb +1 -0
  7. data/ext/herb/extension.c +10 -24
  8. data/ext/herb/extension_helpers.c +3 -3
  9. data/ext/herb/extension_helpers.h +1 -1
  10. data/ext/herb/nodes.c +72 -37
  11. data/herb.gemspec +0 -2
  12. data/lib/herb/ast/helpers.rb +11 -0
  13. data/lib/herb/ast/node.rb +15 -6
  14. data/lib/herb/ast/nodes.rb +609 -392
  15. data/lib/herb/cli.rb +31 -0
  16. data/lib/herb/colors.rb +82 -0
  17. data/lib/herb/engine/compiler.rb +140 -14
  18. data/lib/herb/engine/debug_visitor.rb +1 -5
  19. data/lib/herb/engine/parser_error_overlay.rb +1 -1
  20. data/lib/herb/engine.rb +8 -14
  21. data/lib/herb/errors.rb +166 -56
  22. data/lib/herb/location.rb +2 -2
  23. data/lib/herb/project.rb +86 -21
  24. data/lib/herb/token.rb +14 -2
  25. data/lib/herb/version.rb +1 -1
  26. data/lib/herb.rb +1 -0
  27. data/sig/herb/ast/helpers.rbs +3 -0
  28. data/sig/herb/ast/node.rbs +12 -5
  29. data/sig/herb/ast/nodes.rbs +124 -62
  30. data/sig/herb/colors.rbs +35 -0
  31. data/sig/herb/engine/compiler.rbs +23 -1
  32. data/sig/herb/errors.rbs +74 -20
  33. data/sig/herb/token.rbs +8 -0
  34. data/sig/herb_c_extension.rbs +1 -1
  35. data/sig/serialized_ast_errors.rbs +8 -0
  36. data/src/analyze.c +420 -171
  37. data/src/analyze_helpers.c +5 -0
  38. data/src/analyze_missing_end.c +147 -0
  39. data/src/analyze_transform.c +196 -0
  40. data/src/analyzed_ruby.c +23 -2
  41. data/src/ast_node.c +5 -5
  42. data/src/ast_nodes.c +179 -179
  43. data/src/ast_pretty_print.c +232 -232
  44. data/src/element_source.c +7 -6
  45. data/src/errors.c +246 -126
  46. data/src/extract.c +92 -34
  47. data/src/herb.c +37 -49
  48. data/src/html_util.c +34 -96
  49. data/src/include/analyze.h +10 -2
  50. data/src/include/analyze_helpers.h +3 -0
  51. data/src/include/analyzed_ruby.h +4 -2
  52. data/src/include/ast_node.h +2 -2
  53. data/src/include/ast_nodes.h +67 -66
  54. data/src/include/ast_pretty_print.h +2 -2
  55. data/src/include/element_source.h +3 -1
  56. data/src/include/errors.h +30 -14
  57. data/src/include/extract.h +4 -4
  58. data/src/include/herb.h +6 -7
  59. data/src/include/html_util.h +4 -5
  60. data/src/include/lexer.h +1 -3
  61. data/src/include/lexer_peek_helpers.h +14 -14
  62. data/src/include/lexer_struct.h +3 -2
  63. data/src/include/macros.h +4 -0
  64. data/src/include/parser.h +12 -6
  65. data/src/include/parser_helpers.h +25 -15
  66. data/src/include/pretty_print.h +38 -28
  67. data/src/include/token.h +5 -8
  68. data/src/include/utf8.h +3 -2
  69. data/src/include/util/hb_arena.h +31 -0
  70. data/src/include/util/hb_arena_debug.h +8 -0
  71. data/src/include/util/hb_array.h +33 -0
  72. data/src/include/util/hb_buffer.h +34 -0
  73. data/src/include/util/hb_string.h +29 -0
  74. data/src/include/util/hb_system.h +9 -0
  75. data/src/include/util.h +3 -14
  76. data/src/include/version.h +1 -1
  77. data/src/include/visitor.h +1 -1
  78. data/src/io.c +7 -4
  79. data/src/lexer.c +61 -88
  80. data/src/lexer_peek_helpers.c +35 -37
  81. data/src/main.c +19 -23
  82. data/src/parser.c +282 -201
  83. data/src/parser_helpers.c +46 -40
  84. data/src/parser_match_tags.c +316 -0
  85. data/src/pretty_print.c +82 -106
  86. data/src/token.c +18 -65
  87. data/src/utf8.c +4 -4
  88. data/src/util/hb_arena.c +179 -0
  89. data/src/util/hb_arena_debug.c +237 -0
  90. data/src/{array.c → util/hb_array.c} +26 -27
  91. data/src/util/hb_buffer.c +203 -0
  92. data/src/util/hb_string.c +85 -0
  93. data/src/util/hb_system.c +30 -0
  94. data/src/util.c +29 -99
  95. data/src/visitor.c +54 -54
  96. data/templates/ext/herb/error_helpers.c.erb +3 -3
  97. data/templates/ext/herb/error_helpers.h.erb +1 -1
  98. data/templates/ext/herb/nodes.c.erb +11 -6
  99. data/templates/java/error_helpers.c.erb +75 -0
  100. data/templates/java/error_helpers.h.erb +20 -0
  101. data/templates/java/nodes.c.erb +97 -0
  102. data/templates/java/nodes.h.erb +23 -0
  103. data/templates/java/org/herb/ast/Errors.java.erb +121 -0
  104. data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
  105. data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
  106. data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
  107. data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
  108. data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
  109. data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
  110. data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
  111. data/templates/lib/herb/ast/nodes.rb.erb +28 -16
  112. data/templates/lib/herb/errors.rb.erb +17 -12
  113. data/templates/rust/src/ast/nodes.rs.erb +220 -0
  114. data/templates/rust/src/errors.rs.erb +216 -0
  115. data/templates/rust/src/nodes.rs.erb +374 -0
  116. data/templates/src/analyze_missing_end.c.erb +36 -0
  117. data/templates/src/analyze_transform.c.erb +24 -0
  118. data/templates/src/ast_nodes.c.erb +14 -14
  119. data/templates/src/ast_pretty_print.c.erb +36 -36
  120. data/templates/src/errors.c.erb +31 -31
  121. data/templates/src/include/ast_nodes.h.erb +10 -9
  122. data/templates/src/include/ast_pretty_print.h.erb +2 -2
  123. data/templates/src/include/errors.h.erb +6 -6
  124. data/templates/src/parser_match_tags.c.erb +38 -0
  125. data/templates/src/visitor.c.erb +4 -4
  126. data/templates/template.rb +22 -3
  127. data/templates/wasm/error_helpers.cpp.erb +9 -9
  128. data/templates/wasm/error_helpers.h.erb +1 -1
  129. data/templates/wasm/nodes.cpp.erb +9 -9
  130. data/templates/wasm/nodes.h.erb +1 -1
  131. data/vendor/prism/Rakefile +4 -1
  132. data/vendor/prism/config.yml +2 -1
  133. data/vendor/prism/include/prism/ast.h +31 -1
  134. data/vendor/prism/include/prism/diagnostic.h +1 -0
  135. data/vendor/prism/include/prism/version.h +3 -3
  136. data/vendor/prism/src/diagnostic.c +3 -1
  137. data/vendor/prism/src/prism.c +130 -71
  138. data/vendor/prism/src/util/pm_string.c +6 -8
  139. data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
  140. data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
  141. data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
  142. data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
  143. data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
  144. data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
  145. metadata +34 -20
  146. data/lib/herb/libherb/array.rb +0 -51
  147. data/lib/herb/libherb/ast_node.rb +0 -50
  148. data/lib/herb/libherb/buffer.rb +0 -56
  149. data/lib/herb/libherb/extract_result.rb +0 -20
  150. data/lib/herb/libherb/lex_result.rb +0 -32
  151. data/lib/herb/libherb/libherb.rb +0 -52
  152. data/lib/herb/libherb/parse_result.rb +0 -20
  153. data/lib/herb/libherb/token.rb +0 -46
  154. data/lib/herb/libherb.rb +0 -35
  155. data/src/buffer.c +0 -241
  156. data/src/include/array.h +0 -33
  157. data/src/include/buffer.h +0 -39
  158. data/src/include/json.h +0 -28
  159. data/src/include/memory.h +0 -12
  160. data/src/json.c +0 -205
  161. data/src/memory.c +0 -53
data/src/pretty_print.c CHANGED
@@ -3,146 +3,149 @@
3
3
  #include "include/ast_node.h"
4
4
  #include "include/ast_nodes.h"
5
5
  #include "include/ast_pretty_print.h"
6
- #include "include/buffer.h"
7
6
  #include "include/errors.h"
8
7
  #include "include/token_struct.h"
9
8
  #include "include/util.h"
9
+ #include "include/util/hb_buffer.h"
10
+ #include "include/util/hb_string.h"
10
11
 
11
12
  #include <stdbool.h>
12
13
  #include <stdio.h>
13
14
  #include <stdlib.h>
14
15
 
15
- void pretty_print_indent(buffer_T* buffer, const size_t indent) {
16
+ void pretty_print_indent(hb_buffer_T* buffer, const size_t indent) {
16
17
  for (size_t i = 0; i < indent; i++) {
17
- buffer_append(buffer, " ");
18
+ hb_buffer_append(buffer, " ");
18
19
  }
19
20
  }
20
21
 
21
- void pretty_print_newline(const size_t indent, const size_t relative_indent, buffer_T* buffer) {
22
+ void pretty_print_newline(const size_t indent, const size_t relative_indent, hb_buffer_T* buffer) {
22
23
  pretty_print_indent(buffer, indent);
23
24
  pretty_print_indent(buffer, relative_indent);
24
- buffer_append(buffer, "\n");
25
+ hb_buffer_append(buffer, "\n");
25
26
  }
26
27
 
27
28
  void pretty_print_label(
28
- const char* name,
29
+ hb_string_T name,
29
30
  const size_t indent,
30
31
  const size_t relative_indent,
31
32
  const bool last_property,
32
- buffer_T* buffer
33
+ hb_buffer_T* buffer
33
34
  ) {
34
35
  pretty_print_indent(buffer, indent);
35
36
  pretty_print_indent(buffer, relative_indent);
36
37
 
37
38
  if (last_property) {
38
- buffer_append(buffer, "└── ");
39
+ hb_buffer_append(buffer, "└── ");
39
40
  } else {
40
- buffer_append(buffer, "├── ");
41
+ hb_buffer_append(buffer, "├── ");
41
42
  }
42
43
 
43
- buffer_append(buffer, name);
44
- buffer_append(buffer, ": ");
44
+ hb_buffer_append_string(buffer, name);
45
+ hb_buffer_append(buffer, ": ");
45
46
  }
46
47
 
47
48
  void pretty_print_quoted_property(
48
- const char* name,
49
- const char* value,
49
+ hb_string_T name,
50
+ hb_string_T value,
50
51
  const size_t indent,
51
52
  const size_t relative_indent,
52
53
  const bool last_property,
53
- buffer_T* buffer
54
+ hb_buffer_T* buffer
54
55
  ) {
55
- char* quoted = quoted_string(value);
56
+ hb_string_T quoted = quoted_string(value);
56
57
  pretty_print_property(name, quoted, indent, relative_indent, last_property, buffer);
57
- free(quoted);
58
+ free(quoted.data);
58
59
  }
59
60
 
60
61
  void pretty_print_boolean_property(
61
- const char* name,
62
+ hb_string_T name,
62
63
  bool value,
63
64
  const size_t indent,
64
65
  const size_t relative_indent,
65
66
  const bool last_property,
66
- buffer_T* buffer
67
+ hb_buffer_T* buffer
67
68
  ) {
68
- pretty_print_property(name, value ? "true" : "false", indent, relative_indent, last_property, buffer);
69
+ pretty_print_property(name, hb_string(value ? "true" : "false"), indent, relative_indent, last_property, buffer);
69
70
  }
70
71
 
71
72
  void pretty_print_property(
72
- const char* name,
73
- const char* value,
73
+ hb_string_T name,
74
+ hb_string_T value,
74
75
  const size_t indent,
75
76
  const size_t relative_indent,
76
77
  const bool last_property,
77
- buffer_T* buffer
78
+ hb_buffer_T* buffer
78
79
  ) {
79
80
  pretty_print_label(name, indent, relative_indent, last_property, buffer);
80
- buffer_append(buffer, value);
81
- buffer_append(buffer, "\n");
81
+ hb_buffer_append_string(buffer, value);
82
+ hb_buffer_append(buffer, "\n");
82
83
  }
83
84
 
84
85
  void pretty_print_size_t_property(
85
86
  size_t value,
86
- const char* name,
87
+ hb_string_T name,
87
88
  const size_t indent,
88
89
  const size_t relative_indent,
89
90
  const bool last_property,
90
- buffer_T* buffer
91
+ hb_buffer_T* buffer
91
92
  ) {
92
93
  pretty_print_label(name, indent, relative_indent, last_property, buffer);
93
- char* string = size_t_to_string(value);
94
- buffer_append(buffer, string);
95
- buffer_append(buffer, "\n");
96
- free(string);
94
+
95
+ char size_string[21];
96
+ snprintf(size_string, 21, "%zu", value);
97
+
98
+ hb_buffer_append(buffer, size_string);
99
+ hb_buffer_append(buffer, "\n");
97
100
  }
98
101
 
99
102
  void pretty_print_array(
100
- const char* name,
101
- array_T* array,
103
+ hb_string_T name,
104
+ hb_array_T* array,
102
105
  const size_t indent,
103
106
  const size_t relative_indent,
104
107
  const bool last_property,
105
- buffer_T* buffer
108
+ hb_buffer_T* buffer
106
109
  ) {
107
110
  if (array == NULL) {
108
- pretty_print_property(name, "∅", indent, relative_indent, last_property, buffer);
111
+ pretty_print_property(name, hb_string("∅"), indent, relative_indent, last_property, buffer);
109
112
 
110
113
  return;
111
114
  }
112
115
 
113
- if (array_size(array) == 0) {
114
- pretty_print_property(name, "[]", indent, relative_indent, last_property, buffer);
116
+ if (hb_array_size(array) == 0) {
117
+ pretty_print_property(name, hb_string("[]"), indent, relative_indent, last_property, buffer);
115
118
 
116
119
  return;
117
120
  }
118
121
 
119
122
  pretty_print_label(name, indent, relative_indent, last_property, buffer);
120
123
 
121
- buffer_append(buffer, "(");
124
+ hb_buffer_append(buffer, "(");
122
125
 
123
126
  char count[16];
124
- sprintf(count, "%zu", array_size(array));
125
- buffer_append(buffer, count);
126
- buffer_append(buffer, ")\n");
127
+ sprintf(count, "%zu", hb_array_size(array));
128
+ hb_buffer_append(buffer, count);
129
+ hb_buffer_append(buffer, ")\n");
127
130
 
128
131
  if (indent < 20) {
129
- for (size_t i = 0; i < array_size(array); i++) {
130
- AST_NODE_T* child = array_get(array, i);
132
+ for (size_t i = 0; i < hb_array_size(array); i++) {
133
+ AST_NODE_T* child = hb_array_get(array, i);
131
134
  pretty_print_indent(buffer, indent);
132
135
  pretty_print_indent(buffer, relative_indent + 1);
133
136
 
134
- if (i == array_size(array) - 1) {
135
- buffer_append(buffer, "└── ");
137
+ if (i == hb_array_size(array) - 1) {
138
+ hb_buffer_append(buffer, "└── ");
136
139
  } else {
137
- buffer_append(buffer, "├── ");
140
+ hb_buffer_append(buffer, "├── ");
138
141
  }
139
142
 
140
143
  ast_pretty_print_node(child, indent + 1, relative_indent + 1, buffer);
141
144
 
142
- if (i != array_size(array) - 1) { pretty_print_newline(indent + 1, relative_indent, buffer); }
145
+ if (i != hb_array_size(array) - 1) { pretty_print_newline(indent + 1, relative_indent, buffer); }
143
146
  }
144
147
  }
145
- buffer_append(buffer, "\n");
148
+ hb_buffer_append(buffer, "\n");
146
149
  }
147
150
 
148
151
  void pretty_print_errors(
@@ -150,16 +153,16 @@ void pretty_print_errors(
150
153
  const size_t indent,
151
154
  const size_t relative_indent,
152
155
  const bool last_property,
153
- buffer_T* buffer
156
+ hb_buffer_T* buffer
154
157
  ) {
155
- if (node->errors != NULL && array_size(node->errors) > 0) {
158
+ if (node->errors != NULL && hb_array_size(node->errors) > 0) {
156
159
  error_pretty_print_array("errors", node->errors, indent, relative_indent, last_property, buffer);
157
- buffer_append(buffer, "\n");
160
+ hb_buffer_append(buffer, "\n");
158
161
  }
159
162
  }
160
163
 
161
- void pretty_print_location(location_T location, buffer_T* buffer) {
162
- buffer_append(buffer, "(location: (");
164
+ void pretty_print_location(location_T location, hb_buffer_T* buffer) {
165
+ hb_buffer_append(buffer, "(location: (");
163
166
  char location_string[128];
164
167
  sprintf(
165
168
  location_string,
@@ -169,73 +172,73 @@ void pretty_print_location(location_T location, buffer_T* buffer) {
169
172
  location.end.line,
170
173
  location.end.column
171
174
  );
172
- buffer_append(buffer, location_string);
173
- buffer_append(buffer, "))");
175
+ hb_buffer_append(buffer, location_string);
176
+ hb_buffer_append(buffer, "))");
174
177
  }
175
178
 
176
179
  void pretty_print_position_property(
177
180
  position_T* position,
178
- const char* name,
181
+ hb_string_T name,
179
182
  const size_t indent,
180
183
  const size_t relative_indent,
181
184
  const bool last_property,
182
- buffer_T* buffer
185
+ hb_buffer_T* buffer
183
186
  ) {
184
187
  pretty_print_label(name, indent, relative_indent, last_property, buffer);
185
188
 
186
189
  if (position != NULL) {
187
- buffer_append(buffer, "(");
190
+ hb_buffer_append(buffer, "(");
188
191
 
189
192
  char position_string[128];
190
193
 
191
194
  sprintf(position_string, "%u:%u", (position->line) ? position->line : 0, (position->column) ? position->column : 0);
192
195
 
193
- buffer_append(buffer, position_string);
194
- buffer_append(buffer, ")");
196
+ hb_buffer_append(buffer, position_string);
197
+ hb_buffer_append(buffer, ")");
195
198
  } else {
196
- buffer_append(buffer, "∅");
199
+ hb_buffer_append(buffer, "∅");
197
200
  }
198
201
 
199
- buffer_append(buffer, "\n");
202
+ hb_buffer_append(buffer, "\n");
200
203
  }
201
204
 
202
205
  void pretty_print_token_property(
203
206
  token_T* token,
204
- const char* name,
207
+ hb_string_T name,
205
208
  const size_t indent,
206
209
  const size_t relative_indent,
207
210
  const bool last_property,
208
- buffer_T* buffer
211
+ hb_buffer_T* buffer
209
212
  ) {
210
213
  pretty_print_label(name, indent, relative_indent, last_property, buffer);
211
214
 
212
215
  if (token != NULL && token->value != NULL) {
213
- char* quoted = quoted_string(token->value);
214
- buffer_append(buffer, quoted);
215
- free(quoted);
216
+ hb_string_T quoted = quoted_string(hb_string(token->value));
217
+ hb_buffer_append_string(buffer, quoted);
218
+ free(quoted.data);
216
219
 
217
- buffer_append(buffer, " ");
220
+ hb_buffer_append(buffer, " ");
218
221
  pretty_print_location(token->location, buffer);
219
222
  } else {
220
- buffer_append(buffer, "∅");
223
+ hb_buffer_append(buffer, "∅");
221
224
  }
222
225
 
223
- buffer_append(buffer, "\n");
226
+ hb_buffer_append(buffer, "\n");
224
227
  }
225
228
 
226
229
  void pretty_print_string_property(
227
- const char* string,
228
- const char* name,
230
+ hb_string_T string,
231
+ hb_string_T name,
229
232
  const size_t indent,
230
233
  const size_t relative_indent,
231
234
  const bool last_property,
232
- buffer_T* buffer
235
+ hb_buffer_T* buffer
233
236
  ) {
234
- const char* value = "∅";
235
- char* escaped = NULL;
236
- char* quoted = NULL;
237
+ hb_string_T value = hb_string("∅");
238
+ hb_string_T escaped = { .data = NULL, .length = 0 };
239
+ hb_string_T quoted;
237
240
 
238
- if (string != NULL) {
241
+ if (!hb_string_is_empty(string)) {
239
242
  escaped = escape_newlines(string);
240
243
  quoted = quoted_string(escaped);
241
244
  value = quoted;
@@ -243,35 +246,8 @@ void pretty_print_string_property(
243
246
 
244
247
  pretty_print_property(name, value, indent, relative_indent, last_property, buffer);
245
248
 
246
- if (string != NULL) {
247
- if (escaped != NULL) { free(escaped); }
248
- if (quoted != NULL) { free(quoted); }
249
+ if (!hb_string_is_empty(string)) {
250
+ if (!hb_string_is_empty(escaped)) { free(escaped.data); }
251
+ if (!hb_string_is_empty(quoted)) { free(quoted.data); }
249
252
  }
250
253
  }
251
-
252
- void pretty_print_analyzed_ruby(analyzed_ruby_T* analyzed, const char* source) {
253
- printf(
254
- "------------------------\nanalyzed (%p)\n------------------------\n%s\n------------------------\n if: %i\n "
255
- " elsif: %i\n else: %i\n end: %i\n block: %i\n block_closing: %i\n case: %i\n when: %i\n for: "
256
- "%i\n while: %i\n "
257
- " until: %i\n begin: %i\n "
258
- "rescue: %i\n ensure: %i\n unless: %i\n==================\n\n",
259
- (void*) analyzed,
260
- source,
261
- analyzed->has_if_node,
262
- analyzed->has_elsif_node,
263
- analyzed->has_else_node,
264
- analyzed->has_end,
265
- analyzed->has_block_node,
266
- analyzed->has_block_closing,
267
- analyzed->has_case_node,
268
- analyzed->has_when_node,
269
- analyzed->has_for_node,
270
- analyzed->has_while_node,
271
- analyzed->has_until_node,
272
- analyzed->has_begin_node,
273
- analyzed->has_rescue_node,
274
- analyzed->has_ensure_node,
275
- analyzed->has_unless_node
276
- );
277
- }
data/src/token.c CHANGED
@@ -1,32 +1,24 @@
1
1
  #include "include/token.h"
2
- #include "include/json.h"
3
2
  #include "include/lexer.h"
4
3
  #include "include/position.h"
5
4
  #include "include/range.h"
6
5
  #include "include/token_struct.h"
7
6
  #include "include/util.h"
8
7
 
8
+ #include <stdbool.h>
9
9
  #include <stdio.h>
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
 
13
- size_t token_sizeof(void) {
14
- return sizeof(struct TOKEN_STRUCT);
15
- }
16
-
17
- token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) {
18
- token_T* token = calloc(1, token_sizeof());
13
+ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
14
+ token_T* token = calloc(1, sizeof(token_T));
19
15
 
20
16
  if (type == TOKEN_NEWLINE) {
21
17
  lexer->current_line++;
22
18
  lexer->current_column = 0;
23
19
  }
24
20
 
25
- if (value) {
26
- token->value = herb_strdup(value);
27
- } else {
28
- token->value = NULL;
29
- }
21
+ token->value = hb_string_to_c_string_using_malloc(value);
30
22
 
31
23
  token->type = type;
32
24
  token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };
@@ -88,24 +80,25 @@ const char* token_type_to_string(const token_type_T type) {
88
80
  return "Unknown token_type_T";
89
81
  }
90
82
 
91
- char* token_to_string(const token_T* token) {
83
+ hb_string_T token_to_string(const token_T* token) {
92
84
  const char* type_string = token_type_to_string(token->type);
93
- const char* template = "#<Herb::Token type=\"%s\" value=\"%s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
85
+ const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
94
86
 
95
87
  char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
96
- char* escaped;
88
+ hb_string_T escaped;
97
89
 
98
90
  if (token->type == TOKEN_EOF) {
99
- escaped = herb_strdup("<EOF>");
91
+ escaped = hb_string(herb_strdup("<EOF>"));
100
92
  } else {
101
- escaped = escape_newlines(token->value);
93
+ escaped = escape_newlines(hb_string(token->value));
102
94
  }
103
95
 
104
96
  sprintf(
105
97
  string,
106
98
  template,
107
99
  type_string,
108
- escaped,
100
+ escaped.length,
101
+ escaped.data,
109
102
  token->range.from,
110
103
  token->range.to,
111
104
  token->location.start.line,
@@ -114,59 +107,15 @@ char* token_to_string(const token_T* token) {
114
107
  token->location.end.column
115
108
  );
116
109
 
117
- free(escaped);
118
-
119
- return string;
120
- }
121
-
122
- char* token_to_json(const token_T* token) {
123
- buffer_T json = buffer_new();
124
-
125
- json_start_root_object(&json);
126
- json_add_string(&json, "type", token_type_to_string(token->type));
127
- json_add_string(&json, "value", token->value);
128
-
129
- buffer_T range = buffer_new();
130
- json_start_array(&json, "range");
131
- json_add_size_t(&range, NULL, token->range.from);
132
- json_add_size_t(&range, NULL, token->range.to);
133
- buffer_concat(&json, &range);
134
- buffer_free(&range);
135
- json_end_array(&json);
136
-
137
- buffer_T start = buffer_new();
138
- json_start_object(&json, "start");
139
- json_add_size_t(&start, "line", token->location.start.line);
140
- json_add_size_t(&start, "column", token->location.start.column);
141
- buffer_concat(&json, &start);
142
- buffer_free(&start);
143
- json_end_object(&json);
144
-
145
- buffer_T end = buffer_new();
146
- json_start_object(&json, "end");
147
- json_add_size_t(&end, "line", token->location.end.line);
148
- json_add_size_t(&end, "column", token->location.end.column);
149
- buffer_concat(&json, &end);
150
- buffer_free(&end);
151
- json_end_object(&json);
152
-
153
- json_end_object(&json);
154
-
155
- return buffer_value(&json);
156
- }
157
-
158
- char* token_value(const token_T* token) {
159
- return token->value;
160
- }
110
+ free(escaped.data);
161
111
 
162
- int token_type(const token_T* token) {
163
- return token->type;
112
+ return hb_string(string);
164
113
  }
165
114
 
166
115
  token_T* token_copy(token_T* token) {
167
116
  if (!token) { return NULL; }
168
117
 
169
- token_T* new_token = calloc(1, token_sizeof());
118
+ token_T* new_token = calloc(1, sizeof(token_T));
170
119
 
171
120
  if (!new_token) { return NULL; }
172
121
 
@@ -188,6 +137,10 @@ token_T* token_copy(token_T* token) {
188
137
  return new_token;
189
138
  }
190
139
 
140
+ bool token_value_empty(const token_T* token) {
141
+ return token == NULL || token->value == NULL || token->value[0] == '\0';
142
+ }
143
+
191
144
  void token_free(token_T* token) {
192
145
  if (!token) { return; }
193
146
 
data/src/utf8.c CHANGED
@@ -5,7 +5,7 @@
5
5
  // 110xxxxx = 2 bytes
6
6
  // 1110xxxx = 3 bytes
7
7
  // 11110xxx = 4 bytes
8
- int utf8_char_byte_length(unsigned char first_byte) {
8
+ uint32_t utf8_char_byte_length(unsigned char first_byte) {
9
9
  if ((first_byte & 0x80) == 0) {
10
10
  return 1;
11
11
  } else if ((first_byte & 0xE0) == 0xC0) {
@@ -24,18 +24,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
24
  return (byte & 0xC0) == 0x80;
25
25
  }
26
26
 
27
- int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
27
+ uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
28
  if (position >= max_length) { return 0; }
29
29
 
30
30
  unsigned char first_byte = (unsigned char) str[position];
31
- int expected_length = utf8_char_byte_length(first_byte);
31
+ uint32_t expected_length = utf8_char_byte_length(first_byte);
32
32
 
33
33
  if (position + expected_length > max_length) {
34
34
  return 1; // Not enough bytes, treat as single byte
35
35
  }
36
36
 
37
37
  if (expected_length > 1) {
38
- for (int i = 1; i < expected_length; i++) {
38
+ for (uint32_t i = 1; i < expected_length; i++) {
39
39
  if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
40
  return 1; // Invalid continuation byte, treat first byte as single byte
41
41
  }