@herb-tools/node 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binding.gyp CHANGED
@@ -36,6 +36,7 @@
36
36
  "./extension/libherb/range.c",
37
37
  "./extension/libherb/token_matchers.c",
38
38
  "./extension/libherb/token.c",
39
+ "./extension/libherb/utf8.c",
39
40
  "./extension/libherb/util.c",
40
41
  "./extension/libherb/visitor.c",
41
42
 
@@ -6,7 +6,7 @@ import { createRequire } from 'module';
6
6
  import { fileURLToPath } from 'url';
7
7
 
8
8
  var name = "@herb-tools/node";
9
- var version = "0.4.2";
9
+ var version = "0.4.3";
10
10
  var packageJSON = {
11
11
  name: name,
12
12
  version: version};
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.cpp.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/javascript/packages/node/extension/error_helpers.cpp.erb
3
3
 
4
4
  #include <node_api.h>
5
5
  #include "error_helpers.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/error_helpers.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/javascript/packages/node/extension/error_helpers.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_ERRORS_H
5
5
  #define HERB_EXTENSION_ERRORS_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_nodes.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
  #include <stdbool.h>
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_pretty_print.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_pretty_print.c.erb
3
3
 
4
4
  #include "include/ast_node.h"
5
5
  #include "include/ast_nodes.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/errors.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/errors.c.erb
3
3
 
4
4
  #include "include/array.h"
5
5
  #include "include/errors.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.2"
4
+ #define HERB_VERSION "0.4.3"
5
5
 
6
6
  #endif
@@ -1,6 +1,7 @@
1
1
  #include "include/buffer.h"
2
2
  #include "include/lexer_peek_helpers.h"
3
3
  #include "include/token.h"
4
+ #include "include/utf8.h"
4
5
  #include "include/util.h"
5
6
 
6
7
  #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
84
85
  }
85
86
  }
86
87
 
88
+ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
89
+ if (byte_count <= 0) { return; }
90
+
91
+ if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
92
+ if (!is_newline(lexer->current_character)) { lexer->current_column++; }
93
+
94
+ lexer->current_position += byte_count;
95
+
96
+ if (lexer->current_position >= lexer->source_length) {
97
+ lexer->current_position = lexer->source_length;
98
+ lexer->current_character = '\0';
99
+ } else {
100
+ lexer->current_character = lexer->source[lexer->current_position];
101
+ }
102
+ }
103
+ }
104
+
87
105
  static void lexer_advance_by(lexer_T* lexer, const size_t count) {
88
106
  for (size_t i = 0; i < count; i++) {
89
107
  lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
116
134
  return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
117
135
  }
118
136
 
137
+ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
+ int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
+
140
+ if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
+
142
+ char* utf8_char = malloc(char_byte_length + 1);
143
+
144
+ if (!utf8_char) { return lexer_advance_current(lexer, type); }
145
+
146
+ for (int i = 0; i < char_byte_length; i++) {
147
+ if (lexer->current_position + i >= lexer->source_length) {
148
+ free(utf8_char);
149
+ return lexer_advance_current(lexer, type);
150
+ }
151
+
152
+ utf8_char[i] = lexer->source[lexer->current_position + i];
153
+ }
154
+
155
+ utf8_char[char_byte_length] = '\0';
156
+
157
+ lexer_advance_utf8_bytes(lexer, char_byte_length);
158
+
159
+ token_T* token = token_init(utf8_char, type, lexer);
160
+
161
+ free(utf8_char);
162
+
163
+ return token;
164
+ }
165
+
119
166
  static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
120
167
  if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
121
168
  return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
232
279
  if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
233
280
 
234
281
  if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
235
- return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
282
+ return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
236
283
  }
237
284
 
238
285
  switch (lexer->current_character) {
@@ -282,7 +329,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
282
329
  default: {
283
330
  if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
284
331
 
285
- return lexer_advance_current(lexer, TOKEN_CHARACTER);
332
+ return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
286
333
  }
287
334
  }
288
335
  }
@@ -9,6 +9,7 @@
9
9
  #include "include/parser_helpers.h"
10
10
  #include "include/token.h"
11
11
  #include "include/token_matchers.h"
12
+ #include "include/util.h"
12
13
 
13
14
  #include <stdio.h>
14
15
  #include <stdlib.h>
@@ -184,14 +185,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
184
185
 
185
186
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
186
187
  array_T* errors = array_init(8);
187
- token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
188
188
 
189
- if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
189
+ token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
190
+ token_T* first_token = NULL;
191
+
192
+ if (at_token != NULL) {
193
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
194
+
195
+ if (first_token == NULL) {
196
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
197
+
198
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
199
+ ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
200
+
201
+ token_free(at_token);
202
+
203
+ return attribute_name;
204
+ }
205
+ } else {
206
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
207
+
208
+ if (first_token == NULL) {
209
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
210
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
211
+ return attribute_name;
212
+ }
213
+ }
214
+
215
+ buffer_T name_buffer = buffer_new();
216
+
217
+ position_T* start_position;
218
+
219
+ if (at_token != NULL) {
220
+ buffer_append(&name_buffer, at_token->value);
221
+ start_position = position_copy(at_token->location->start);
222
+ } else {
223
+ start_position = position_copy(first_token->location->start);
224
+ }
225
+
226
+ buffer_append(&name_buffer, first_token->value);
227
+
228
+ position_T* end_position = position_copy(first_token->location->end);
229
+ size_t range_end = first_token->range->to;
230
+
231
+ while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
232
+ && strcmp(parser->current_token->value, ".") == 0) {
233
+
234
+ token_T* dot_token = parser_advance(parser);
235
+
236
+ buffer_append(&name_buffer, dot_token->value);
237
+ position_free(end_position);
238
+
239
+ end_position = position_copy(dot_token->location->end);
240
+ range_end = dot_token->range->to;
241
+
242
+ token_free(dot_token);
243
+
244
+ if (parser->current_token->type == TOKEN_IDENTIFIER) {
245
+ token_T* next_identifier = parser_advance(parser);
246
+
247
+ buffer_append(&name_buffer, next_identifier->value);
248
+ position_free(end_position);
249
+
250
+ end_position = position_copy(next_identifier->location->end);
251
+ range_end = next_identifier->range->to;
252
+ token_free(next_identifier);
253
+ } else {
254
+ break;
255
+ }
256
+ }
257
+
258
+ token_T* combined_token = calloc(1, sizeof(token_T));
259
+ combined_token->value = herb_strdup(name_buffer.value);
260
+ combined_token->type = TOKEN_IDENTIFIER;
261
+ combined_token->location =
262
+ location_from(start_position->line, start_position->column, end_position->line, end_position->column);
263
+
264
+ size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
265
+ combined_token->range = range_init(range_start, range_end);
190
266
 
191
267
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
192
- ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
268
+ ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
193
269
 
194
- token_free(identifier);
270
+ buffer_free(&name_buffer);
271
+ position_free(start_position);
272
+ position_free(end_position);
273
+ token_free(first_token);
274
+
275
+ if (at_token != NULL) { token_free(at_token); }
276
+
277
+ token_free(combined_token);
195
278
 
196
279
  return attribute_name;
197
280
  }
@@ -390,10 +473,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
390
473
  continue;
391
474
  }
392
475
 
476
+ if (parser->current_token->type == TOKEN_AT) {
477
+ array_append(children, parser_parse_html_attribute(parser));
478
+ continue;
479
+ }
480
+
393
481
  parser_append_unexpected_error(
394
482
  parser,
395
483
  "Unexpected Token",
396
- "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
484
+ "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
397
485
  errors
398
486
  );
399
487
  }
@@ -441,6 +529,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
441
529
 
442
530
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
443
531
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
532
+
533
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
534
+ token_T* whitespace = parser_advance(parser);
535
+ token_free(whitespace);
536
+ }
537
+
444
538
  token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
445
539
 
446
540
  if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -618,12 +712,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
618
712
  if (token_is_any_of(
619
713
  parser,
620
714
  TOKEN_AMPERSAND,
715
+ TOKEN_AT,
621
716
  TOKEN_CHARACTER,
622
717
  TOKEN_COLON,
623
718
  TOKEN_DASH,
624
719
  TOKEN_EQUALS,
625
720
  TOKEN_EXCLAMATION,
626
721
  TOKEN_IDENTIFIER,
722
+ TOKEN_NBSP,
627
723
  TOKEN_NEWLINE,
628
724
  TOKEN_PERCENT,
629
725
  TOKEN_QUOTE,
@@ -639,8 +735,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
639
735
  parser_append_unexpected_error(
640
736
  parser,
641
737
  "Unexpected token",
642
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
643
- "TOKEN_NEWLINE",
738
+ "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
739
+ "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
644
740
  errors
645
741
  );
646
742
  }
@@ -0,0 +1,46 @@
1
+ #include "include/utf8.h"
2
+
3
+ // UTF-8 byte patterns:
4
+ // 0xxxxxxx = 1 byte (ASCII)
5
+ // 110xxxxx = 2 bytes
6
+ // 1110xxxx = 3 bytes
7
+ // 11110xxx = 4 bytes
8
+ int utf8_char_byte_length(unsigned char first_byte) {
9
+ if ((first_byte & 0x80) == 0) {
10
+ return 1;
11
+ } else if ((first_byte & 0xE0) == 0xC0) {
12
+ return 2;
13
+ } else if ((first_byte & 0xF0) == 0xE0) {
14
+ return 3;
15
+ } else if ((first_byte & 0xF8) == 0xF0) {
16
+ return 4;
17
+ }
18
+
19
+ return 1;
20
+ }
21
+
22
+ // Continuation bytes have pattern 10xxxxxx
23
+ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
+ return (byte & 0xC0) == 0x80;
25
+ }
26
+
27
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
+ if (position >= max_length) { return 0; }
29
+
30
+ unsigned char first_byte = (unsigned char) str[position];
31
+ int expected_length = utf8_char_byte_length(first_byte);
32
+
33
+ if (position + expected_length > max_length) {
34
+ return 1; // Not enough bytes, treat as single byte
35
+ }
36
+
37
+ if (expected_length > 1) {
38
+ for (int i = 1; i < expected_length; i++) {
39
+ if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
+ return 1; // Invalid continuation byte, treat first byte as single byte
41
+ }
42
+ }
43
+ }
44
+
45
+ return expected_length;
46
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.2"
4
+ #define HERB_VERSION "0.4.3"
5
5
 
6
6
  #endif
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/visitor.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/visitor.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.cpp.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/javascript/packages/node/extension/nodes.cpp.erb
3
3
 
4
4
  #include <node_api.h>
5
5
  #include "error_helpers.h"
package/extension/nodes.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/javascript/packages/node/extension/nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/javascript/packages/node/extension/nodes.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_NODES_H
5
5
  #define HERB_EXTENSION_NODES_H
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@herb-tools/node",
3
- "version": "0.4.2",
3
+ "version": "0.4.3",
4
4
  "description": "Native Node.js addon for HTML-aware ERB parsing using Herb.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -48,7 +48,7 @@
48
48
  "host": "https://github.com/marcoroth/herb/releases/download/"
49
49
  },
50
50
  "dependencies": {
51
- "@herb-tools/core": "0.4.2",
51
+ "@herb-tools/core": "0.4.3",
52
52
  "@mapbox/node-pre-gyp": "^2.0.0",
53
53
  "node-addon-api": "^5.1.0",
54
54
  "node-pre-gyp-github": "^2.0.0"