herb 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +8 -5
  3. data/config.yml +26 -6
  4. data/ext/herb/error_helpers.c +57 -3
  5. data/ext/herb/error_helpers.h +1 -1
  6. data/ext/herb/extconf.rb +1 -0
  7. data/ext/herb/extension.c +10 -24
  8. data/ext/herb/extension_helpers.c +3 -3
  9. data/ext/herb/extension_helpers.h +1 -1
  10. data/ext/herb/nodes.c +72 -37
  11. data/herb.gemspec +0 -2
  12. data/lib/herb/ast/helpers.rb +11 -0
  13. data/lib/herb/ast/node.rb +15 -6
  14. data/lib/herb/ast/nodes.rb +609 -392
  15. data/lib/herb/cli.rb +31 -0
  16. data/lib/herb/colors.rb +82 -0
  17. data/lib/herb/engine/compiler.rb +140 -14
  18. data/lib/herb/engine/debug_visitor.rb +1 -5
  19. data/lib/herb/engine/parser_error_overlay.rb +1 -1
  20. data/lib/herb/engine.rb +8 -14
  21. data/lib/herb/errors.rb +166 -56
  22. data/lib/herb/location.rb +2 -2
  23. data/lib/herb/project.rb +86 -21
  24. data/lib/herb/token.rb +14 -2
  25. data/lib/herb/version.rb +1 -1
  26. data/lib/herb.rb +1 -0
  27. data/sig/herb/ast/helpers.rbs +3 -0
  28. data/sig/herb/ast/node.rbs +12 -5
  29. data/sig/herb/ast/nodes.rbs +124 -62
  30. data/sig/herb/colors.rbs +35 -0
  31. data/sig/herb/engine/compiler.rbs +23 -1
  32. data/sig/herb/errors.rbs +74 -20
  33. data/sig/herb/token.rbs +8 -0
  34. data/sig/herb_c_extension.rbs +1 -1
  35. data/sig/serialized_ast_errors.rbs +8 -0
  36. data/src/analyze.c +420 -171
  37. data/src/analyze_helpers.c +5 -0
  38. data/src/analyze_missing_end.c +147 -0
  39. data/src/analyze_transform.c +196 -0
  40. data/src/analyzed_ruby.c +23 -2
  41. data/src/ast_node.c +5 -5
  42. data/src/ast_nodes.c +179 -179
  43. data/src/ast_pretty_print.c +232 -232
  44. data/src/element_source.c +7 -6
  45. data/src/errors.c +246 -126
  46. data/src/extract.c +92 -34
  47. data/src/herb.c +37 -49
  48. data/src/html_util.c +34 -96
  49. data/src/include/analyze.h +10 -2
  50. data/src/include/analyze_helpers.h +3 -0
  51. data/src/include/analyzed_ruby.h +4 -2
  52. data/src/include/ast_node.h +2 -2
  53. data/src/include/ast_nodes.h +67 -66
  54. data/src/include/ast_pretty_print.h +2 -2
  55. data/src/include/element_source.h +3 -1
  56. data/src/include/errors.h +30 -14
  57. data/src/include/extract.h +4 -4
  58. data/src/include/herb.h +6 -7
  59. data/src/include/html_util.h +4 -5
  60. data/src/include/lexer.h +1 -3
  61. data/src/include/lexer_peek_helpers.h +14 -14
  62. data/src/include/lexer_struct.h +3 -2
  63. data/src/include/macros.h +4 -0
  64. data/src/include/parser.h +12 -6
  65. data/src/include/parser_helpers.h +25 -15
  66. data/src/include/pretty_print.h +38 -28
  67. data/src/include/token.h +5 -8
  68. data/src/include/utf8.h +3 -2
  69. data/src/include/util/hb_arena.h +31 -0
  70. data/src/include/util/hb_arena_debug.h +8 -0
  71. data/src/include/util/hb_array.h +33 -0
  72. data/src/include/util/hb_buffer.h +34 -0
  73. data/src/include/util/hb_string.h +29 -0
  74. data/src/include/util/hb_system.h +9 -0
  75. data/src/include/util.h +3 -14
  76. data/src/include/version.h +1 -1
  77. data/src/include/visitor.h +1 -1
  78. data/src/io.c +7 -4
  79. data/src/lexer.c +61 -88
  80. data/src/lexer_peek_helpers.c +35 -37
  81. data/src/main.c +19 -23
  82. data/src/parser.c +282 -201
  83. data/src/parser_helpers.c +46 -40
  84. data/src/parser_match_tags.c +316 -0
  85. data/src/pretty_print.c +82 -106
  86. data/src/token.c +18 -65
  87. data/src/utf8.c +4 -4
  88. data/src/util/hb_arena.c +179 -0
  89. data/src/util/hb_arena_debug.c +237 -0
  90. data/src/{array.c → util/hb_array.c} +26 -27
  91. data/src/util/hb_buffer.c +203 -0
  92. data/src/util/hb_string.c +85 -0
  93. data/src/util/hb_system.c +30 -0
  94. data/src/util.c +29 -99
  95. data/src/visitor.c +54 -54
  96. data/templates/ext/herb/error_helpers.c.erb +3 -3
  97. data/templates/ext/herb/error_helpers.h.erb +1 -1
  98. data/templates/ext/herb/nodes.c.erb +11 -6
  99. data/templates/java/error_helpers.c.erb +75 -0
  100. data/templates/java/error_helpers.h.erb +20 -0
  101. data/templates/java/nodes.c.erb +97 -0
  102. data/templates/java/nodes.h.erb +23 -0
  103. data/templates/java/org/herb/ast/Errors.java.erb +121 -0
  104. data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
  105. data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
  106. data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
  107. data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
  108. data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
  109. data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
  110. data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
  111. data/templates/lib/herb/ast/nodes.rb.erb +28 -16
  112. data/templates/lib/herb/errors.rb.erb +17 -12
  113. data/templates/rust/src/ast/nodes.rs.erb +220 -0
  114. data/templates/rust/src/errors.rs.erb +216 -0
  115. data/templates/rust/src/nodes.rs.erb +374 -0
  116. data/templates/src/analyze_missing_end.c.erb +36 -0
  117. data/templates/src/analyze_transform.c.erb +24 -0
  118. data/templates/src/ast_nodes.c.erb +14 -14
  119. data/templates/src/ast_pretty_print.c.erb +36 -36
  120. data/templates/src/errors.c.erb +31 -31
  121. data/templates/src/include/ast_nodes.h.erb +10 -9
  122. data/templates/src/include/ast_pretty_print.h.erb +2 -2
  123. data/templates/src/include/errors.h.erb +6 -6
  124. data/templates/src/parser_match_tags.c.erb +38 -0
  125. data/templates/src/visitor.c.erb +4 -4
  126. data/templates/template.rb +22 -3
  127. data/templates/wasm/error_helpers.cpp.erb +9 -9
  128. data/templates/wasm/error_helpers.h.erb +1 -1
  129. data/templates/wasm/nodes.cpp.erb +9 -9
  130. data/templates/wasm/nodes.h.erb +1 -1
  131. data/vendor/prism/Rakefile +4 -1
  132. data/vendor/prism/config.yml +2 -1
  133. data/vendor/prism/include/prism/ast.h +31 -1
  134. data/vendor/prism/include/prism/diagnostic.h +1 -0
  135. data/vendor/prism/include/prism/version.h +3 -3
  136. data/vendor/prism/src/diagnostic.c +3 -1
  137. data/vendor/prism/src/prism.c +130 -71
  138. data/vendor/prism/src/util/pm_string.c +6 -8
  139. data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
  140. data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
  141. data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
  142. data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
  143. data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
  144. data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
  145. metadata +34 -20
  146. data/lib/herb/libherb/array.rb +0 -51
  147. data/lib/herb/libherb/ast_node.rb +0 -50
  148. data/lib/herb/libherb/buffer.rb +0 -56
  149. data/lib/herb/libherb/extract_result.rb +0 -20
  150. data/lib/herb/libherb/lex_result.rb +0 -32
  151. data/lib/herb/libherb/libherb.rb +0 -52
  152. data/lib/herb/libherb/parse_result.rb +0 -20
  153. data/lib/herb/libherb/token.rb +0 -46
  154. data/lib/herb/libherb.rb +0 -35
  155. data/src/buffer.c +0 -241
  156. data/src/include/array.h +0 -33
  157. data/src/include/buffer.h +0 -39
  158. data/src/include/json.h +0 -28
  159. data/src/include/memory.h +0 -12
  160. data/src/json.c +0 -205
  161. data/src/memory.c +0 -53
data/src/lexer.c CHANGED
@@ -1,24 +1,21 @@
1
- #include "include/buffer.h"
2
1
  #include "include/lexer_peek_helpers.h"
3
2
  #include "include/token.h"
4
3
  #include "include/utf8.h"
5
4
  #include "include/util.h"
5
+ #include "include/util/hb_buffer.h"
6
+ #include "include/util/hb_string.h"
6
7
 
7
8
  #include <ctype.h>
8
9
  #include <string.h>
9
10
 
10
11
  #define LEXER_STALL_LIMIT 5
11
12
 
12
- static size_t lexer_sizeof(void) {
13
- return sizeof(struct LEXER_STRUCT);
14
- }
15
-
16
13
  static bool lexer_eof(const lexer_T* lexer) {
17
14
  return lexer->current_character == '\0' || lexer->stalled;
18
15
  }
19
16
 
20
17
  static bool lexer_has_more_characters(const lexer_T* lexer) {
21
- return lexer->current_position < lexer->source_length;
18
+ return lexer->current_position < lexer->source.length;
22
19
  }
23
20
 
24
21
  static bool lexer_stalled(lexer_T* lexer) {
@@ -34,17 +31,16 @@ static bool lexer_stalled(lexer_T* lexer) {
34
31
  return lexer->stalled;
35
32
  }
36
33
 
37
- lexer_T* lexer_init(const char* source) {
38
- if (source == NULL) { source = ""; }
39
-
40
- lexer_T* lexer = calloc(1, lexer_sizeof());
34
+ void lexer_init(lexer_T* lexer, const char* source) {
35
+ if (source != NULL) {
36
+ lexer->source = hb_string(source);
37
+ } else {
38
+ lexer->source = hb_string("");
39
+ }
41
40
 
41
+ lexer->current_character = lexer->source.data[0];
42
42
  lexer->state = STATE_DATA;
43
43
 
44
- lexer->source = source;
45
- lexer->source_length = (uint32_t) strlen(source);
46
- lexer->current_character = source[0];
47
-
48
44
  lexer->current_line = 1;
49
45
  lexer->current_column = 0;
50
46
  lexer->current_position = 0;
@@ -56,8 +52,6 @@ lexer_T* lexer_init(const char* source) {
56
52
  lexer->stall_counter = 0;
57
53
  lexer->last_position = 0;
58
54
  lexer->stalled = false;
59
-
60
- return lexer;
61
55
  }
62
56
 
63
57
  token_T* lexer_error(lexer_T* lexer, const char* message) {
@@ -73,7 +67,7 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
73
67
  lexer->current_column
74
68
  );
75
69
 
76
- return token_init(error_message, TOKEN_ERROR, lexer);
70
+ return token_init(hb_string(error_message), TOKEN_ERROR, lexer);
77
71
  }
78
72
 
79
73
  static void lexer_advance(lexer_T* lexer) {
@@ -81,7 +75,7 @@ static void lexer_advance(lexer_T* lexer) {
81
75
  if (!is_newline(lexer->current_character)) { lexer->current_column++; }
82
76
 
83
77
  lexer->current_position++;
84
- lexer->current_character = lexer->source[lexer->current_position];
78
+ lexer->current_character = lexer->source.data[lexer->current_position];
85
79
  }
86
80
  }
87
81
 
@@ -93,11 +87,11 @@ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
93
87
 
94
88
  lexer->current_position += byte_count;
95
89
 
96
- if (lexer->current_position >= lexer->source_length) {
97
- lexer->current_position = lexer->source_length;
90
+ if (lexer->current_position >= lexer->source.length) {
91
+ lexer->current_position = lexer->source.length;
98
92
  lexer->current_character = '\0';
99
93
  } else {
100
- lexer->current_character = lexer->source[lexer->current_position];
94
+ lexer->current_character = lexer->source.data[lexer->current_position];
101
95
  }
102
96
  }
103
97
  }
@@ -108,65 +102,50 @@ static void lexer_advance_by(lexer_T* lexer, const size_t count) {
108
102
  }
109
103
  }
110
104
 
111
- static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const token_type_T type) {
112
- lexer_advance_by(lexer, strlen(value));
105
+ static token_T* lexer_advance_with(lexer_T* lexer, hb_string_T value, const token_type_T type) {
106
+ lexer_advance_by(lexer, value.length);
113
107
  return token_init(value, type, lexer);
114
108
  }
115
109
 
116
110
  static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) {
117
- char* collected = malloc(count + 1);
118
- if (!collected) { return NULL; }
111
+ uint32_t start_position = lexer->current_position;
119
112
 
120
113
  for (size_t i = 0; i < count; i++) {
121
- collected[i] = lexer->current_character;
122
114
  lexer_advance(lexer);
123
115
  }
124
116
 
125
- collected[count] = '\0';
126
-
127
- token_T* token = token_init(collected, type, lexer);
128
- free(collected);
117
+ token_T* token = token_init(hb_string_range(lexer->source, start_position, lexer->current_position), type, lexer);
129
118
 
130
119
  return token;
131
120
  }
132
121
 
133
122
  static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
134
- return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
123
+ char buffer[2];
124
+ buffer[0] = lexer->current_character;
125
+ buffer[1] = '\0';
126
+
127
+ return lexer_advance_with(lexer, hb_string(buffer), type);
135
128
  }
136
129
 
137
130
  static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
- int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
-
131
+ int char_byte_length = utf8_sequence_length(lexer->source.data, lexer->current_position, lexer->source.length);
140
132
  if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
-
142
- char* utf8_char = malloc(char_byte_length + 1);
143
-
144
- if (!utf8_char) { return lexer_advance_current(lexer, type); }
133
+ uint32_t start_position = lexer->current_position;
145
134
 
146
135
  for (int i = 0; i < char_byte_length; i++) {
147
- if (lexer->current_position + i >= lexer->source_length) {
148
- free(utf8_char);
149
- return lexer_advance_current(lexer, type);
150
- }
151
-
152
- utf8_char[i] = lexer->source[lexer->current_position + i];
136
+ if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); }
153
137
  }
154
138
 
155
- utf8_char[char_byte_length] = '\0';
156
-
157
139
  lexer_advance_utf8_bytes(lexer, char_byte_length);
158
140
 
159
- token_T* token = token_init(utf8_char, type, lexer);
160
-
161
- free(utf8_char);
141
+ token_T* token = token_init(hb_string_range(lexer->source, start_position, lexer->current_position), type, lexer);
162
142
 
163
143
  return token;
164
144
  }
165
145
 
166
- static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
167
- if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
168
- return lexer_advance_with(lexer, value, type);
169
- }
146
+ static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) {
147
+ hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position);
148
+ if (hb_string_starts_with(remaining_source, value)) { return lexer_advance_with(lexer, value, type); }
170
149
 
171
150
  return NULL;
172
151
  }
@@ -174,35 +153,31 @@ static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const
174
153
  // ===== Specialized Parsers
175
154
 
176
155
  static token_T* lexer_parse_whitespace(lexer_T* lexer) {
177
- buffer_T buffer = buffer_new();
156
+ uint32_t start_position = lexer->current_position;
178
157
 
179
158
  while (isspace(lexer->current_character) && lexer->current_character != '\n' && lexer->current_character != '\r'
180
159
  && !lexer_eof(lexer)) {
181
- buffer_append_char(&buffer, lexer->current_character);
182
160
  lexer_advance(lexer);
183
161
  }
184
162
 
185
- token_T* token = token_init(buffer.value, TOKEN_WHITESPACE, lexer);
186
-
187
- buffer_free(&buffer);
163
+ token_T* token =
164
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_WHITESPACE, lexer);
188
165
 
189
166
  return token;
190
167
  }
191
168
 
192
169
  static token_T* lexer_parse_identifier(lexer_T* lexer) {
193
- buffer_T buffer = buffer_new();
170
+ uint32_t start_position = lexer->current_position;
194
171
 
195
172
  while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
196
173
  || lexer->current_character == ':')
197
174
  && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) {
198
175
 
199
- buffer_append_char(&buffer, lexer->current_character);
200
176
  lexer_advance(lexer);
201
177
  }
202
178
 
203
- token_T* token = token_init(buffer.value, TOKEN_IDENTIFIER, lexer);
204
-
205
- buffer_free(&buffer);
179
+ token_T* token =
180
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_IDENTIFIER, lexer);
206
181
 
207
182
  return token;
208
183
  }
@@ -210,7 +185,8 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
210
185
  // ===== ERB Parsing
211
186
 
212
187
  static token_T* lexer_parse_erb_open(lexer_T* lexer) {
213
- const char* erb_patterns[] = { "<%==", "<%%=", "<%=", "<%#", "<%-", "<%%", "<%" };
188
+ hb_string_T erb_patterns[] = { hb_string("<%=="), hb_string("<%%="), hb_string("<%="), hb_string("<%#"),
189
+ hb_string("<%-"), hb_string("<%%"), hb_string("<%") };
214
190
 
215
191
  lexer->state = STATE_ERB_CONTENT;
216
192
 
@@ -223,14 +199,18 @@ static token_T* lexer_parse_erb_open(lexer_T* lexer) {
223
199
  }
224
200
 
225
201
  static token_T* lexer_parse_erb_content(lexer_T* lexer) {
226
- buffer_T buffer = buffer_new();
202
+ uint32_t start_position = lexer->current_position;
227
203
 
228
204
  while (!lexer_peek_erb_end(lexer, 0)) {
229
205
  if (lexer_eof(lexer)) {
230
- return token_init(buffer.value, TOKEN_ERROR, lexer); // Handle unexpected EOF
231
- }
206
+ token_T* token = token_init(
207
+ hb_string_range(lexer->source, start_position, lexer->current_position),
208
+ TOKEN_ERROR,
209
+ lexer
210
+ ); // Handle unexpected EOF
232
211
 
233
- buffer_append_char(&buffer, lexer->current_character);
212
+ return token;
213
+ }
234
214
 
235
215
  if (is_newline(lexer->current_character)) {
236
216
  lexer->current_line++;
@@ -240,14 +220,13 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
240
220
  }
241
221
 
242
222
  lexer->current_position++;
243
- lexer->current_character = lexer->source[lexer->current_position];
223
+ lexer->current_character = lexer->source.data[lexer->current_position];
244
224
  }
245
225
 
246
226
  lexer->state = STATE_ERB_CLOSE;
247
227
 
248
- token_T* token = token_init(buffer.value, TOKEN_ERB_CONTENT, lexer);
249
-
250
- buffer_free(&buffer);
228
+ token_T* token =
229
+ token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
251
230
 
252
231
  return token;
253
232
  }
@@ -255,17 +234,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
255
234
  static token_T* lexer_parse_erb_close(lexer_T* lexer) {
256
235
  lexer->state = STATE_DATA;
257
236
 
258
- if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "%%>", TOKEN_ERB_END); }
259
- if (lexer_peek_erb_equals_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "=%>", TOKEN_ERB_END); }
260
- if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "-%>", TOKEN_ERB_END); }
237
+ if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("%%>"), TOKEN_ERB_END); }
238
+ if (lexer_peek_erb_equals_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("=%>"), TOKEN_ERB_END); }
239
+ if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, hb_string("-%>"), TOKEN_ERB_END); }
261
240
 
262
- return lexer_advance_with(lexer, "%>", TOKEN_ERB_END);
241
+ return lexer_advance_with(lexer, hb_string("%>"), TOKEN_ERB_END);
263
242
  }
264
243
 
265
244
  // ===== Tokenizing Function
266
245
 
267
246
  token_T* lexer_next_token(lexer_T* lexer) {
268
- if (lexer_eof(lexer)) { return token_init("", TOKEN_EOF, lexer); }
247
+ if (lexer_eof(lexer)) { return token_init(hb_string(""), TOKEN_EOF, lexer); }
269
248
  if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
270
249
 
271
250
  if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
@@ -302,33 +281,33 @@ token_T* lexer_next_token(lexer_T* lexer) {
302
281
  if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
303
282
 
304
283
  if (lexer_peek_for_html_comment_start(lexer, 0)) {
305
- return lexer_advance_with(lexer, "<!--", TOKEN_HTML_COMMENT_START);
284
+ return lexer_advance_with(lexer, hb_string("<!--"), TOKEN_HTML_COMMENT_START);
306
285
  }
307
286
 
308
287
  if (lexer_peek_for_close_tag_start(lexer, 0)) {
309
- return lexer_advance_with(lexer, "</", TOKEN_HTML_TAG_START_CLOSE);
288
+ return lexer_advance_with(lexer, hb_string("</"), TOKEN_HTML_TAG_START_CLOSE);
310
289
  }
311
290
 
312
291
  return lexer_advance_current(lexer, TOKEN_LT);
313
292
  }
314
293
 
315
294
  case '/': {
316
- token_T* token = lexer_match_and_advance(lexer, "/>", TOKEN_HTML_TAG_SELF_CLOSE);
295
+ token_T* token = lexer_match_and_advance(lexer, hb_string("/>"), TOKEN_HTML_TAG_SELF_CLOSE);
317
296
  return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
318
297
  }
319
298
 
320
299
  case '?': {
321
- token_T* token = lexer_match_and_advance(lexer, "?>", TOKEN_XML_DECLARATION_END);
300
+ token_T* token = lexer_match_and_advance(lexer, hb_string("?>"), TOKEN_XML_DECLARATION_END);
322
301
  return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
323
302
  }
324
303
 
325
304
  case '-': {
326
- token_T* token = lexer_match_and_advance(lexer, "-->", TOKEN_HTML_COMMENT_END);
305
+ token_T* token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
327
306
  return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
328
307
  }
329
308
 
330
309
  case ']': {
331
- token_T* token = lexer_match_and_advance(lexer, "]]>", TOKEN_CDATA_END);
310
+ token_T* token = lexer_match_and_advance(lexer, hb_string("]]>"), TOKEN_CDATA_END);
332
311
  return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
333
312
  }
334
313
 
@@ -354,9 +333,3 @@ token_T* lexer_next_token(lexer_T* lexer) {
354
333
  }
355
334
  }
356
335
  }
357
-
358
- void lexer_free(lexer_T* lexer) {
359
- if (lexer == NULL) { return; }
360
-
361
- free(lexer);
362
- }
@@ -3,73 +3,71 @@
3
3
  #include "include/lexer_struct.h"
4
4
  #include "include/macros.h"
5
5
  #include "include/token.h"
6
+ #include "include/util/hb_string.h"
6
7
 
7
8
  #include <ctype.h>
8
9
  #include <stdbool.h>
9
10
 
10
- char lexer_backtrack(const lexer_T* lexer, const int offset) {
11
- return lexer->source[MAX(lexer->current_position - offset, 0)];
11
+ char lexer_backtrack(const lexer_T* lexer, uint32_t offset) {
12
+ return lexer->source.data[MAX(lexer->current_position - offset, 0)];
12
13
  }
13
14
 
14
- char lexer_peek(const lexer_T* lexer, const int offset) {
15
- return lexer->source[MIN(lexer->current_position + offset, lexer->source_length)];
15
+ char lexer_peek(const lexer_T* lexer, uint32_t offset) {
16
+ return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
16
17
  }
17
18
 
18
- bool lexer_peek_for(const lexer_T* lexer, const int offset, const char* pattern, const bool case_insensitive) {
19
- for (int index = 0; pattern[index]; index++) {
20
- const char character = lexer_peek(lexer, offset + index);
19
+ bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) {
20
+ hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset);
21
+ remaining_source.length = MIN(pattern.length, remaining_source.length);
21
22
 
22
- if (case_insensitive) {
23
- if (tolower(character) != tolower(pattern[index])) { return false; }
24
- } else {
25
- if (character != pattern[index]) { return false; }
26
- }
23
+ if (case_insensitive) {
24
+ return hb_string_equals_case_insensitive(remaining_source, pattern);
25
+ } else {
26
+ return hb_string_equals(remaining_source, pattern);
27
27
  }
28
-
29
- return true;
30
28
  }
31
29
 
32
- bool lexer_peek_for_doctype(const lexer_T* lexer, const int offset) {
33
- return lexer_peek_for(lexer, offset, "<!DOCTYPE", true);
30
+ bool lexer_peek_for_doctype(const lexer_T* lexer, uint32_t offset) {
31
+ return lexer_peek_for(lexer, offset, hb_string("<!DOCTYPE"), true);
34
32
  }
35
33
 
36
- bool lexer_peek_for_xml_declaration(const lexer_T* lexer, const int offset) {
37
- return lexer_peek_for(lexer, offset, "<?xml", true);
34
+ bool lexer_peek_for_xml_declaration(const lexer_T* lexer, uint32_t offset) {
35
+ return lexer_peek_for(lexer, offset, hb_string("<?xml"), true);
38
36
  }
39
37
 
40
- bool lexer_peek_for_cdata_start(const lexer_T* lexer, const int offset) {
41
- return lexer_peek_for(lexer, offset, "<![CDATA[", false);
38
+ bool lexer_peek_for_cdata_start(const lexer_T* lexer, uint32_t offset) {
39
+ return lexer_peek_for(lexer, offset, hb_string("<![CDATA["), false);
42
40
  }
43
41
 
44
- bool lexer_peek_for_cdata_end(const lexer_T* lexer, const int offset) {
45
- return lexer_peek_for(lexer, offset, "]]>", false);
42
+ bool lexer_peek_for_cdata_end(const lexer_T* lexer, uint32_t offset) {
43
+ return lexer_peek_for(lexer, offset, hb_string("]]>"), false);
46
44
  }
47
45
 
48
- bool lexer_peek_for_html_comment_start(const lexer_T* lexer, const int offset) {
49
- return lexer_peek_for(lexer, offset, "<!--", false);
46
+ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset) {
47
+ return lexer_peek_for(lexer, offset, hb_string("<!--"), false);
50
48
  }
51
49
 
52
- bool lexer_peek_for_html_comment_end(const lexer_T* lexer, const int offset) {
53
- return lexer_peek_for(lexer, offset, "-->", false);
50
+ bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) {
51
+ return lexer_peek_for(lexer, offset, hb_string("-->"), false);
54
52
  }
55
53
 
56
- bool lexer_peek_erb_close_tag(const lexer_T* lexer, const int offset) {
57
- return lexer_peek_for(lexer, offset, "%>", false);
54
+ bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
55
+ return lexer_peek_for(lexer, offset, hb_string("%>"), false);
58
56
  }
59
57
 
60
- bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, const int offset) {
61
- return lexer_peek_for(lexer, offset, "-%>", false);
58
+ bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) {
59
+ return lexer_peek_for(lexer, offset, hb_string("-%>"), false);
62
60
  }
63
61
 
64
- bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, const int offset) {
65
- return lexer_peek_for(lexer, offset, "%%>", false);
62
+ bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) {
63
+ return lexer_peek_for(lexer, offset, hb_string("%%>"), false);
66
64
  }
67
65
 
68
- bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, const int offset) {
69
- return lexer_peek_for(lexer, offset, "=%>", false);
66
+ bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) {
67
+ return lexer_peek_for(lexer, offset, hb_string("=%>"), false);
70
68
  }
71
69
 
72
- bool lexer_peek_erb_end(const lexer_T* lexer, const int offset) {
70
+ bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
73
71
  return (
74
72
  lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
75
73
  || lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset)
@@ -103,10 +101,10 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
103
101
  return result;
104
102
  }
105
103
 
106
- bool lexer_peek_for_close_tag_start(const lexer_T* lexer, const int offset) {
104
+ bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
107
105
  if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
108
106
 
109
- int pos = offset + 2;
107
+ uint32_t pos = offset + 2;
110
108
 
111
109
  while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
112
110
  || lexer_peek(lexer, pos) == '\r') {
data/src/main.c CHANGED
@@ -4,11 +4,11 @@
4
4
  #include "include/ast_node.h"
5
5
  #include "include/ast_nodes.h"
6
6
  #include "include/ast_pretty_print.h"
7
- #include "include/buffer.h"
8
7
  #include "include/extract.h"
9
8
  #include "include/herb.h"
10
9
  #include "include/io.h"
11
10
  #include "include/ruby_parser.h"
11
+ #include "include/util/hb_buffer.h"
12
12
 
13
13
  #include <stdio.h>
14
14
  #include <string.h>
@@ -39,7 +39,6 @@ int main(const int argc, char* argv[]) {
39
39
  printf("Herb 🌿 Powerful and seamless HTML-aware ERB parsing and tooling.\n\n");
40
40
 
41
41
  printf("./herb lex [file] - Lex a file\n");
42
- printf("./herb lex_json [file] - Lex a file and return the result as json.\n");
43
42
  printf("./herb parse [file] - Parse a file\n");
44
43
  printf("./herb ruby [file] - Extract Ruby from a file\n");
45
44
  printf("./herb html [file] - Extract HTML from a file\n");
@@ -53,9 +52,9 @@ int main(const int argc, char* argv[]) {
53
52
  return 1;
54
53
  }
55
54
 
56
- buffer_T output;
55
+ hb_buffer_T output;
57
56
 
58
- if (!buffer_init(&output)) { return 1; }
57
+ if (!hb_buffer_init(&output, 4096)) { return 1; }
59
58
 
60
59
  char* source = herb_read_file(argv[2]);
61
60
 
@@ -74,7 +73,7 @@ int main(const int argc, char* argv[]) {
74
73
  print_time_diff(start, end, "visiting");
75
74
 
76
75
  ast_node_free((AST_NODE_T*) root);
77
- buffer_free(&output);
76
+ free(output.value);
78
77
  free(source);
79
78
 
80
79
  return 0;
@@ -87,18 +86,7 @@ int main(const int argc, char* argv[]) {
87
86
  printf("%s\n", output.value);
88
87
  print_time_diff(start, end, "lexing");
89
88
 
90
- buffer_free(&output);
91
- free(source);
92
-
93
- return 0;
94
- }
95
-
96
- if (strcmp(argv[1], "lex_json") == 0) {
97
- herb_lex_json_to_buffer(source, &output);
98
-
99
- printf("%s\n", output.value);
100
-
101
- buffer_free(&output);
89
+ free(output.value);
102
90
  free(source);
103
91
 
104
92
  return 0;
@@ -106,15 +94,23 @@ int main(const int argc, char* argv[]) {
106
94
 
107
95
  if (strcmp(argv[1], "parse") == 0) {
108
96
  AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
97
+
98
+ herb_analyze_parse_tree(root, source);
99
+
109
100
  clock_gettime(CLOCK_MONOTONIC, &end);
110
101
 
111
- ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
112
- printf("%s\n", output.value);
102
+ int silent = 0;
103
+ if (argc > 3 && strcmp(argv[3], "--silent") == 0) { silent = 1; }
104
+
105
+ if (!silent) {
106
+ ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
107
+ printf("%s\n", output.value);
113
108
 
114
- print_time_diff(start, end, "parsing");
109
+ print_time_diff(start, end, "parsing");
110
+ }
115
111
 
116
112
  ast_node_free((AST_NODE_T*) root);
117
- buffer_free(&output);
113
+ free(output.value);
118
114
  free(source);
119
115
 
120
116
  return 0;
@@ -127,7 +123,7 @@ int main(const int argc, char* argv[]) {
127
123
  printf("%s\n", output.value);
128
124
  print_time_diff(start, end, "extracting Ruby");
129
125
 
130
- buffer_free(&output);
126
+ free(output.value);
131
127
  free(source);
132
128
 
133
129
  return 0;
@@ -140,7 +136,7 @@ int main(const int argc, char* argv[]) {
140
136
  printf("%s\n", output.value);
141
137
  print_time_diff(start, end, "extracting HTML");
142
138
 
143
- buffer_free(&output);
139
+ free(output.value);
144
140
  free(source);
145
141
 
146
142
  return 0;