@herb-tools/node 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binding.gyp CHANGED
@@ -36,6 +36,7 @@
36
36
  "./extension/libherb/range.c",
37
37
  "./extension/libherb/token_matchers.c",
38
38
  "./extension/libherb/token.c",
39
+ "./extension/libherb/utf8.c",
39
40
  "./extension/libherb/util.c",
40
41
  "./extension/libherb/visitor.c",
41
42
 
@@ -6,7 +6,7 @@ import { createRequire } from 'module';
6
6
  import { fileURLToPath } from 'url';
7
7
 
8
8
  var name = "@herb-tools/node";
9
- var version = "0.4.2";
9
+ var version = "0.5.0";
10
10
  var packageJSON = {
11
11
  name: name,
12
12
  version: version};
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.cpp.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/error_helpers.cpp.erb
3
3
 
4
4
  #include <node_api.h>
5
5
  #include "error_helpers.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/error_helpers.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_ERRORS_H
5
5
  #define HERB_EXTENSION_ERRORS_H
@@ -50,7 +50,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
50
50
  AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
51
51
 
52
52
  const char* opening = erb_content_node->tag_opening->value;
53
- if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0) {
53
+
54
+ if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
54
55
  analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
55
56
 
56
57
  if (false) { pretty_print_analyed_ruby(analyzed, erb_content_node->content->value); }
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_nodes.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
  #include <stdbool.h>
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_pretty_print.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_pretty_print.c.erb
3
3
 
4
4
  #include "include/ast_node.h"
5
5
  #include "include/ast_nodes.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/errors.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/errors.c.erb
3
3
 
4
4
  #include "include/array.h"
5
5
  #include "include/errors.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
@@ -5,10 +5,22 @@
5
5
  #include "ast_node.h"
6
6
  #include "lexer.h"
7
7
 
8
+ typedef enum {
9
+ FOREIGN_CONTENT_UNKNOWN = 0,
10
+ FOREIGN_CONTENT_SCRIPT,
11
+ FOREIGN_CONTENT_STYLE,
12
+ // FOREIGN_CONTENT_RUBY,
13
+ // FOREIGN_CONTENT_TEMPLATE
14
+ } foreign_content_type_T;
15
+
16
+ typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
17
+
8
18
  typedef struct PARSER_STRUCT {
9
19
  lexer_T* lexer;
10
20
  token_T* current_token;
11
21
  array_T* open_tags_stack;
22
+ parser_state_T state;
23
+ foreign_content_type_T foreign_content_type;
12
24
  } parser_T;
13
25
 
14
26
  parser_T* parser_init(lexer_T* lexer);
@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
24
24
 
25
25
  bool parser_in_svg_context(const parser_T* parser);
26
26
 
27
+ foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
28
+ bool parser_is_foreign_content_tag(const char* tag_name);
29
+ const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
30
+
31
+ void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
32
+ void parser_exit_foreign_content(parser_T* parser);
33
+
34
+ bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
35
+
27
36
  token_T* parser_advance(parser_T* parser);
28
37
  token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
29
38
  token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);
@@ -28,6 +28,7 @@ typedef enum {
28
28
  TOKEN_SLASH, // /
29
29
  TOKEN_EQUALS, // =
30
30
  TOKEN_QUOTE, // ", '
31
+ TOKEN_BACKTICK, // `
31
32
  TOKEN_DASH, // -
32
33
  TOKEN_UNDERSCORE, // _
33
34
  TOKEN_EXCLAMATION, // !
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.2"
4
+ #define HERB_VERSION "0.5.0"
5
5
 
6
6
  #endif
@@ -1,6 +1,7 @@
1
1
  #include "include/buffer.h"
2
2
  #include "include/lexer_peek_helpers.h"
3
3
  #include "include/token.h"
4
+ #include "include/utf8.h"
4
5
  #include "include/util.h"
5
6
 
6
7
  #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
84
85
  }
85
86
  }
86
87
 
88
+ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
89
+ if (byte_count <= 0) { return; }
90
+
91
+ if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
92
+ if (!is_newline(lexer->current_character)) { lexer->current_column++; }
93
+
94
+ lexer->current_position += byte_count;
95
+
96
+ if (lexer->current_position >= lexer->source_length) {
97
+ lexer->current_position = lexer->source_length;
98
+ lexer->current_character = '\0';
99
+ } else {
100
+ lexer->current_character = lexer->source[lexer->current_position];
101
+ }
102
+ }
103
+ }
104
+
87
105
  static void lexer_advance_by(lexer_T* lexer, const size_t count) {
88
106
  for (size_t i = 0; i < count; i++) {
89
107
  lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
116
134
  return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
117
135
  }
118
136
 
137
+ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
+ int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
+
140
+ if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
+
142
+ char* utf8_char = malloc(char_byte_length + 1);
143
+
144
+ if (!utf8_char) { return lexer_advance_current(lexer, type); }
145
+
146
+ for (int i = 0; i < char_byte_length; i++) {
147
+ if (lexer->current_position + i >= lexer->source_length) {
148
+ free(utf8_char);
149
+ return lexer_advance_current(lexer, type);
150
+ }
151
+
152
+ utf8_char[i] = lexer->source[lexer->current_position + i];
153
+ }
154
+
155
+ utf8_char[char_byte_length] = '\0';
156
+
157
+ lexer_advance_utf8_bytes(lexer, char_byte_length);
158
+
159
+ token_T* token = token_init(utf8_char, type, lexer);
160
+
161
+ free(utf8_char);
162
+
163
+ return token;
164
+ }
165
+
119
166
  static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
120
167
  if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
121
168
  return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
232
279
  if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
233
280
 
234
281
  if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
235
- return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
282
+ return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
236
283
  }
237
284
 
238
285
  switch (lexer->current_character) {
@@ -278,11 +325,12 @@ token_T* lexer_next_token(lexer_T* lexer) {
278
325
 
279
326
  case '"':
280
327
  case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
328
+ case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
281
329
 
282
330
  default: {
283
331
  if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
284
332
 
285
- return lexer_advance_current(lexer, TOKEN_CHARACTER);
333
+ return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
286
334
  }
287
335
  }
288
336
  }
@@ -9,6 +9,7 @@
9
9
  #include "include/parser_helpers.h"
10
10
  #include "include/token.h"
11
11
  #include "include/token_matchers.h"
12
+ #include "include/util.h"
12
13
 
13
14
  #include <stdio.h>
14
15
  #include <stdlib.h>
@@ -16,6 +17,7 @@
16
17
  #include <strings.h>
17
18
 
18
19
  static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
20
+ static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
19
21
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
20
22
 
21
23
  size_t parser_sizeof(void) {
@@ -28,6 +30,8 @@ parser_T* parser_init(lexer_T* lexer) {
28
30
  parser->lexer = lexer;
29
31
  parser->current_token = lexer_next_token(lexer);
30
32
  parser->open_tags_stack = array_init(16);
33
+ parser->state = PARSER_STATE_DATA;
34
+ parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
31
35
 
32
36
  return parser;
33
37
  }
@@ -184,14 +188,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
184
188
 
185
189
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
186
190
  array_T* errors = array_init(8);
187
- token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
188
191
 
189
- if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
192
+ token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
193
+ token_T* first_token = NULL;
194
+
195
+ if (at_token != NULL) {
196
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
197
+
198
+ if (first_token == NULL) {
199
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
200
+
201
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
202
+ ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
203
+
204
+ token_free(at_token);
205
+
206
+ return attribute_name;
207
+ }
208
+ } else {
209
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
210
+
211
+ if (first_token == NULL) {
212
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
213
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
214
+ return attribute_name;
215
+ }
216
+ }
217
+
218
+ buffer_T name_buffer = buffer_new();
219
+
220
+ position_T* start_position;
221
+
222
+ if (at_token != NULL) {
223
+ buffer_append(&name_buffer, at_token->value);
224
+ start_position = position_copy(at_token->location->start);
225
+ } else {
226
+ start_position = position_copy(first_token->location->start);
227
+ }
228
+
229
+ buffer_append(&name_buffer, first_token->value);
230
+
231
+ position_T* end_position = position_copy(first_token->location->end);
232
+ size_t range_end = first_token->range->to;
233
+
234
+ while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
235
+ && strcmp(parser->current_token->value, ".") == 0) {
236
+
237
+ token_T* dot_token = parser_advance(parser);
238
+
239
+ buffer_append(&name_buffer, dot_token->value);
240
+ position_free(end_position);
241
+
242
+ end_position = position_copy(dot_token->location->end);
243
+ range_end = dot_token->range->to;
244
+
245
+ token_free(dot_token);
246
+
247
+ if (parser->current_token->type == TOKEN_IDENTIFIER) {
248
+ token_T* next_identifier = parser_advance(parser);
249
+
250
+ buffer_append(&name_buffer, next_identifier->value);
251
+ position_free(end_position);
252
+
253
+ end_position = position_copy(next_identifier->location->end);
254
+ range_end = next_identifier->range->to;
255
+ token_free(next_identifier);
256
+ } else {
257
+ break;
258
+ }
259
+ }
260
+
261
+ token_T* combined_token = calloc(1, sizeof(token_T));
262
+ combined_token->value = herb_strdup(name_buffer.value);
263
+ combined_token->type = TOKEN_IDENTIFIER;
264
+ combined_token->location =
265
+ location_from(start_position->line, start_position->column, end_position->line, end_position->column);
266
+
267
+ size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
268
+ combined_token->range = range_init(range_start, range_end);
190
269
 
191
270
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
192
- ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
271
+ ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
272
+
273
+ buffer_free(&name_buffer);
274
+ position_free(start_position);
275
+ position_free(end_position);
276
+ token_free(first_token);
193
277
 
194
- token_free(identifier);
278
+ if (at_token != NULL) { token_free(at_token); }
279
+
280
+ token_free(combined_token);
195
281
 
196
282
  return attribute_name;
197
283
  }
@@ -300,6 +386,30 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
300
386
  // <div id="home">
301
387
  if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
302
388
 
389
+ if (token_is(parser, TOKEN_BACKTICK)) {
390
+ token_T* token = parser_advance(parser);
391
+ position_T* start = position_copy(token->location->start);
392
+ position_T* end = position_copy(token->location->end);
393
+
394
+ append_unexpected_error(
395
+ "Invalid quote character for HTML attribute",
396
+ "single quote (') or double quote (\")",
397
+ "backtick (`)",
398
+ start,
399
+ end,
400
+ errors
401
+ );
402
+
403
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
404
+ ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
405
+
406
+ position_free(start);
407
+ position_free(end);
408
+ token_free(token);
409
+
410
+ return value;
411
+ }
412
+
303
413
  token_T* token = parser_advance(parser);
304
414
 
305
415
  append_unexpected_error(
@@ -329,9 +439,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
329
439
  static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
330
440
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
331
441
 
442
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
443
+ token_T* whitespace = parser_advance(parser);
444
+ token_free(whitespace);
445
+ }
446
+
332
447
  token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
333
448
 
334
449
  if (equals != NULL) {
450
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
451
+ token_T* whitespace = parser_advance(parser);
452
+ token_free(whitespace);
453
+ }
454
+
335
455
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
336
456
 
337
457
  AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -390,10 +510,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
390
510
  continue;
391
511
  }
392
512
 
513
+ if (parser->current_token->type == TOKEN_AT) {
514
+ array_append(children, parser_parse_html_attribute(parser));
515
+ continue;
516
+ }
517
+
393
518
  parser_append_unexpected_error(
394
519
  parser,
395
520
  "Unexpected Token",
396
- "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
521
+ "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
397
522
  errors
398
523
  );
399
524
  }
@@ -441,6 +566,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
441
566
 
442
567
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
443
568
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
569
+
570
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
571
+ token_T* whitespace = parser_advance(parser);
572
+ token_free(whitespace);
573
+ }
574
+
444
575
  token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
445
576
 
446
577
  if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -502,7 +633,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
502
633
 
503
634
  parser_push_open_tag(parser, open_tag->tag_name);
504
635
 
505
- parser_parse_in_data_state(parser, body, errors);
636
+ if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
637
+ foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
638
+ parser_enter_foreign_content(parser, content_type);
639
+ parser_parse_foreign_content(parser, body, errors);
640
+ } else {
641
+ parser_parse_in_data_state(parser, body, errors);
642
+ }
506
643
 
507
644
  if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
508
645
 
@@ -593,6 +730,83 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
593
730
  return erb_node;
594
731
  }
595
732
 
733
+ static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
734
+ buffer_T content = buffer_new();
735
+ position_T* start = position_copy(parser->current_token->location->start);
736
+ const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
737
+
738
+ if (expected_closing_tag == NULL) {
739
+ parser_exit_foreign_content(parser);
740
+ position_free(start);
741
+ buffer_free(&content);
742
+
743
+ return;
744
+ }
745
+
746
+ while (!token_is(parser, TOKEN_EOF)) {
747
+ if (token_is(parser, TOKEN_ERB_START)) {
748
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
749
+
750
+ AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
751
+ array_append(children, erb_node);
752
+
753
+ position_free(start);
754
+ start = position_copy(parser->current_token->location->start);
755
+
756
+ continue;
757
+ }
758
+
759
+ if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
760
+ size_t saved_position = parser->lexer->current_position;
761
+ size_t saved_line = parser->lexer->current_line;
762
+ size_t saved_column = parser->lexer->current_column;
763
+ size_t saved_previous_position = parser->lexer->previous_position;
764
+ size_t saved_previous_line = parser->lexer->previous_line;
765
+ size_t saved_previous_column = parser->lexer->previous_column;
766
+
767
+ char saved_char = parser->lexer->current_character;
768
+ lexer_state_T saved_state = parser->lexer->state;
769
+
770
+ token_T* next_token = lexer_next_token(parser->lexer);
771
+ bool is_potential_match = false;
772
+
773
+ if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
774
+ is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
775
+ }
776
+
777
+ parser->lexer->current_position = saved_position;
778
+ parser->lexer->current_line = saved_line;
779
+ parser->lexer->current_column = saved_column;
780
+ parser->lexer->previous_position = saved_previous_position;
781
+ parser->lexer->previous_line = saved_previous_line;
782
+ parser->lexer->previous_column = saved_previous_column;
783
+ parser->lexer->current_character = saved_char;
784
+ parser->lexer->state = saved_state;
785
+
786
+ if (next_token) { token_free(next_token); }
787
+
788
+ if (is_potential_match) {
789
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
790
+ parser_exit_foreign_content(parser);
791
+
792
+ position_free(start);
793
+ buffer_free(&content);
794
+
795
+ return;
796
+ }
797
+ }
798
+
799
+ token_T* token = parser_advance(parser);
800
+ buffer_append(&content, token->value);
801
+ token_free(token);
802
+ }
803
+
804
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
805
+ parser_exit_foreign_content(parser);
806
+ position_free(start);
807
+ buffer_free(&content);
808
+ }
809
+
596
810
  static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
597
811
  while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
598
812
  if (token_is(parser, TOKEN_ERB_START)) {
@@ -618,12 +832,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
618
832
  if (token_is_any_of(
619
833
  parser,
620
834
  TOKEN_AMPERSAND,
835
+ TOKEN_AT,
621
836
  TOKEN_CHARACTER,
622
837
  TOKEN_COLON,
623
838
  TOKEN_DASH,
624
839
  TOKEN_EQUALS,
625
840
  TOKEN_EXCLAMATION,
626
841
  TOKEN_IDENTIFIER,
842
+ TOKEN_NBSP,
627
843
  TOKEN_NEWLINE,
628
844
  TOKEN_PERCENT,
629
845
  TOKEN_QUOTE,
@@ -639,8 +855,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
639
855
  parser_append_unexpected_error(
640
856
  parser,
641
857
  "Unexpected token",
642
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
643
- "TOKEN_NEWLINE",
858
+ "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
859
+ "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
644
860
  errors
645
861
  );
646
862
  }
@@ -5,10 +5,22 @@
5
5
  #include "ast_node.h"
6
6
  #include "lexer.h"
7
7
 
8
+ typedef enum {
9
+ FOREIGN_CONTENT_UNKNOWN = 0,
10
+ FOREIGN_CONTENT_SCRIPT,
11
+ FOREIGN_CONTENT_STYLE,
12
+ // FOREIGN_CONTENT_RUBY,
13
+ // FOREIGN_CONTENT_TEMPLATE
14
+ } foreign_content_type_T;
15
+
16
+ typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
17
+
8
18
  typedef struct PARSER_STRUCT {
9
19
  lexer_T* lexer;
10
20
  token_T* current_token;
11
21
  array_T* open_tags_stack;
22
+ parser_state_T state;
23
+ foreign_content_type_T foreign_content_type;
12
24
  } parser_T;
13
25
 
14
26
  parser_T* parser_init(lexer_T* lexer);
@@ -8,6 +8,7 @@
8
8
  #include "include/lexer.h"
9
9
  #include "include/parser.h"
10
10
  #include "include/token.h"
11
+ #include "include/token_matchers.h"
11
12
 
12
13
  #include <stdio.h>
13
14
  #include <strings.h>
@@ -54,6 +55,43 @@ bool parser_in_svg_context(const parser_T* parser) {
54
55
  return false;
55
56
  }
56
57
 
58
+ // ===== Foreign Content Handling =====
59
+
60
+ foreign_content_type_T parser_get_foreign_content_type(const char* tag_name) {
61
+ if (tag_name == NULL) { return FOREIGN_CONTENT_UNKNOWN; }
62
+
63
+ if (strcasecmp(tag_name, "script") == 0) { return FOREIGN_CONTENT_SCRIPT; }
64
+ if (strcasecmp(tag_name, "style") == 0) { return FOREIGN_CONTENT_STYLE; }
65
+
66
+ return FOREIGN_CONTENT_UNKNOWN;
67
+ }
68
+
69
+ bool parser_is_foreign_content_tag(const char* tag_name) {
70
+ return parser_get_foreign_content_type(tag_name) != FOREIGN_CONTENT_UNKNOWN;
71
+ }
72
+
73
+ const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type) {
74
+ switch (type) {
75
+ case FOREIGN_CONTENT_SCRIPT: return "script";
76
+ case FOREIGN_CONTENT_STYLE: return "style";
77
+ default: return NULL;
78
+ }
79
+ }
80
+
81
+ void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type) {
82
+ if (parser == NULL) { return; }
83
+
84
+ parser->state = PARSER_STATE_FOREIGN_CONTENT;
85
+ parser->foreign_content_type = type;
86
+ }
87
+
88
+ void parser_exit_foreign_content(parser_T* parser) {
89
+ if (parser == NULL) { return; }
90
+
91
+ parser->state = PARSER_STATE_DATA;
92
+ parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
93
+ }
94
+
57
95
  void parser_append_unexpected_error(parser_T* parser, const char* description, const char* expected, array_T* errors) {
58
96
  token_T* token = parser_advance(parser);
59
97
 
@@ -166,3 +204,11 @@ void parser_handle_mismatched_tags(
166
204
  );
167
205
  }
168
206
  }
207
+
208
+ bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type) {
209
+ const char* expected_tag_name = parser_get_foreign_content_closing_tag(expected_type);
210
+
211
+ if (expected_tag_name == NULL || tag_name == NULL) { return false; }
212
+
213
+ return strcmp(tag_name, expected_tag_name) == 0;
214
+ }
@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
24
24
 
25
25
  bool parser_in_svg_context(const parser_T* parser);
26
26
 
27
+ foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
28
+ bool parser_is_foreign_content_tag(const char* tag_name);
29
+ const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
30
+
31
+ void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
32
+ void parser_exit_foreign_content(parser_T* parser);
33
+
34
+ bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
35
+
27
36
  token_T* parser_advance(parser_T* parser);
28
37
  token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
29
38
  token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);
@@ -55,6 +55,7 @@ const char* token_type_to_string(const token_type_T type) {
55
55
  case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
56
56
  case TOKEN_EQUALS: return "TOKEN_EQUALS";
57
57
  case TOKEN_QUOTE: return "TOKEN_QUOTE";
58
+ case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
58
59
  case TOKEN_DASH: return "TOKEN_DASH";
59
60
  case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
60
61
  case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
@@ -28,6 +28,7 @@ typedef enum {
28
28
  TOKEN_SLASH, // /
29
29
  TOKEN_EQUALS, // =
30
30
  TOKEN_QUOTE, // ", '
31
+ TOKEN_BACKTICK, // `
31
32
  TOKEN_DASH, // -
32
33
  TOKEN_UNDERSCORE, // _
33
34
  TOKEN_EXCLAMATION, // !
@@ -0,0 +1,46 @@
1
+ #include "include/utf8.h"
2
+
3
+ // UTF-8 byte patterns:
4
+ // 0xxxxxxx = 1 byte (ASCII)
5
+ // 110xxxxx = 2 bytes
6
+ // 1110xxxx = 3 bytes
7
+ // 11110xxx = 4 bytes
8
+ int utf8_char_byte_length(unsigned char first_byte) {
9
+ if ((first_byte & 0x80) == 0) {
10
+ return 1;
11
+ } else if ((first_byte & 0xE0) == 0xC0) {
12
+ return 2;
13
+ } else if ((first_byte & 0xF0) == 0xE0) {
14
+ return 3;
15
+ } else if ((first_byte & 0xF8) == 0xF0) {
16
+ return 4;
17
+ }
18
+
19
+ return 1;
20
+ }
21
+
22
+ // Continuation bytes have pattern 10xxxxxx
23
+ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
+ return (byte & 0xC0) == 0x80;
25
+ }
26
+
27
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
+ if (position >= max_length) { return 0; }
29
+
30
+ unsigned char first_byte = (unsigned char) str[position];
31
+ int expected_length = utf8_char_byte_length(first_byte);
32
+
33
+ if (position + expected_length > max_length) {
34
+ return 1; // Not enough bytes, treat as single byte
35
+ }
36
+
37
+ if (expected_length > 1) {
38
+ for (int i = 1; i < expected_length; i++) {
39
+ if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
+ return 1; // Invalid continuation byte, treat first byte as single byte
41
+ }
42
+ }
43
+ }
44
+
45
+ return expected_length;
46
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.2"
4
+ #define HERB_VERSION "0.5.0"
5
5
 
6
6
  #endif
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/visitor.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/visitor.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.cpp.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/nodes.cpp.erb
3
3
 
4
4
  #include <node_api.h>
5
5
  #include "error_helpers.h"
package/extension/nodes.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/javascript/packages/node/extension/nodes.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_NODES_H
5
5
  #define HERB_EXTENSION_NODES_H
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@herb-tools/node",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "description": "Native Node.js addon for HTML-aware ERB parsing using Herb.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -48,7 +48,7 @@
48
48
  "host": "https://github.com/marcoroth/herb/releases/download/"
49
49
  },
50
50
  "dependencies": {
51
- "@herb-tools/core": "0.4.2",
51
+ "@herb-tools/core": "0.5.0",
52
52
  "@mapbox/node-pre-gyp": "^2.0.0",
53
53
  "node-addon-api": "^5.1.0",
54
54
  "node-pre-gyp-github": "^2.0.0"