@projectwallace/css-parser 0.13.2 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ export declare let CHAR_DIGIT: number;
3
3
  export declare let CHAR_HEX: number;
4
4
  export declare let CHAR_WHITESPACE: number;
5
5
  export declare let CHAR_NEWLINE: number;
6
+ export declare let CHAR_IDENT: number;
6
7
  export declare let char_types: Uint8Array<ArrayBuffer>;
7
8
  export declare function is_digit(ch: number): boolean;
8
9
  export declare function is_hex_digit(ch: number): boolean;
@@ -3,6 +3,7 @@ let CHAR_DIGIT = 1 << 1;
3
3
  let CHAR_HEX = 1 << 2;
4
4
  let CHAR_WHITESPACE = 1 << 3;
5
5
  let CHAR_NEWLINE = 1 << 4;
6
+ let CHAR_IDENT = 1 << 5;
6
7
  let char_types = new Uint8Array(128);
7
8
  for (let i = 48; i <= 57; i++) {
8
9
  char_types[i] = CHAR_DIGIT;
@@ -27,9 +28,13 @@ char_types[9] = CHAR_WHITESPACE;
27
28
  char_types[10] = CHAR_NEWLINE;
28
29
  char_types[13] = CHAR_NEWLINE;
29
30
  char_types[12] = CHAR_NEWLINE;
30
- function is_digit(ch) {
31
- return ch < 128 && (char_types[ch] & CHAR_DIGIT) !== 0;
31
+ for (let i = 0; i < 128; i++) {
32
+ if (char_types[i] & (CHAR_ALPHA | CHAR_DIGIT)) {
33
+ char_types[i] |= CHAR_IDENT;
34
+ }
32
35
  }
36
+ char_types[45] |= CHAR_IDENT;
37
+ char_types[95] |= CHAR_IDENT;
33
38
  function is_hex_digit(ch) {
34
39
  return ch < 128 && (char_types[ch] & CHAR_HEX) !== 0;
35
40
  }
@@ -39,17 +44,10 @@ function is_alpha(ch) {
39
44
  function is_whitespace(ch) {
40
45
  return ch < 128 && (char_types[ch] & CHAR_WHITESPACE) !== 0;
41
46
  }
42
- function is_newline(ch) {
43
- return ch < 128 && (char_types[ch] & CHAR_NEWLINE) !== 0;
44
- }
45
47
  function is_ident_start(ch) {
46
48
  if (ch >= 128) return true;
47
49
  if (ch === 95) return true;
48
50
  return is_alpha(ch);
49
51
  }
50
- function is_ident_char(ch) {
51
- if (ch === 45) return true;
52
- return is_ident_start(ch) || is_digit(ch);
53
- }
54
52
 
55
- export { CHAR_ALPHA, CHAR_DIGIT, CHAR_HEX, CHAR_NEWLINE, CHAR_WHITESPACE, char_types, is_alpha, is_digit, is_hex_digit, is_ident_char, is_ident_start, is_newline, is_whitespace };
53
+ export { CHAR_ALPHA, CHAR_DIGIT, CHAR_HEX, CHAR_IDENT, CHAR_NEWLINE, CHAR_WHITESPACE, char_types, is_alpha, is_hex_digit, is_ident_start, is_whitespace };
@@ -2,7 +2,6 @@ import { Lexer } from './tokenize.js';
2
2
  import { NTH_SELECTOR, CSSDataArena } from './arena.js';
3
3
  import { TOKEN_IDENT, TOKEN_DELIM, TOKEN_DIMENSION, TOKEN_NUMBER } from './token-types.js';
4
4
  import { str_equals, CHAR_MINUS_HYPHEN, CHAR_PLUS, str_index_of } from './string-utils.js';
5
- import { skip_whitespace_and_comments_forward } from './parse-utils.js';
6
5
  import { CSSNode } from './css-node.js';
7
6
 
8
7
  class ANplusBParser {
@@ -22,8 +21,7 @@ class ANplusBParser {
22
21
  */
23
22
  parse_anplusb(start, end, line = 1) {
24
23
  this.expr_end = end;
25
- this.lexer.pos = start;
26
- this.lexer.line = line;
24
+ this.lexer.seek(start, line);
27
25
  let b = null;
28
26
  let a_start = start;
29
27
  let a_end = start;
@@ -186,7 +184,7 @@ class ANplusBParser {
186
184
  return null;
187
185
  }
188
186
  skip_whitespace() {
189
- this.lexer.pos = skip_whitespace_and_comments_forward(this.source, this.lexer.pos, this.expr_end);
187
+ this.lexer.skip_whitespace_in_range(this.expr_end);
190
188
  }
191
189
  create_anplusb_node(start, a_start, a_end, b_start, b_end) {
192
190
  const node = this.arena.create_node(NTH_SELECTOR, start, this.lexer.pos - start, this.lexer.line, 1);
@@ -19,9 +19,7 @@ class AtRulePreludeParser {
19
19
  // Parse an at-rule prelude into nodes (standalone use)
20
20
  parse_prelude(at_rule_name, start, end, line = 1, column = 1) {
21
21
  this.prelude_end = end;
22
- this.lexer.pos = start;
23
- this.lexer.line = line;
24
- this.lexer.column = column;
22
+ this.lexer.seek(start, line, column);
25
23
  return this.parse_prelude_dispatch(at_rule_name);
26
24
  }
27
25
  // Dispatch to appropriate parser based on at-rule type
@@ -76,15 +74,15 @@ class AtRulePreludeParser {
76
74
  let query_start = this.lexer.pos;
77
75
  this.skip_whitespace();
78
76
  if (this.lexer.pos >= this.prelude_end) return null;
79
- let token_start = this.lexer.pos;
77
+ const saved_token_start = this.lexer.save_position();
80
78
  this.next_token();
81
79
  if (this.lexer.token_type === TOKEN_IDENT) {
82
80
  let text = this.source.substring(this.lexer.token_start, this.lexer.token_end);
83
81
  if (!str_equals("only", text) && !str_equals("not", text)) {
84
- this.lexer.pos = token_start;
82
+ this.lexer.restore_position(saved_token_start);
85
83
  }
86
84
  } else {
87
- this.lexer.pos = token_start;
85
+ this.lexer.restore_position(saved_token_start);
88
86
  }
89
87
  let components = [];
90
88
  while (this.lexer.pos < this.prelude_end) {
@@ -468,7 +466,7 @@ class AtRulePreludeParser {
468
466
  }
469
467
  // Helper: Skip whitespace and comments
470
468
  skip_whitespace() {
471
- this.lexer.pos = skip_whitespace_and_comments_forward(this.source, this.lexer.pos, this.prelude_end);
469
+ this.lexer.skip_whitespace_in_range(this.prelude_end);
472
470
  }
473
471
  // Helper: Peek at next token type without consuming
474
472
  peek_token_type() {
@@ -504,9 +502,11 @@ class AtRulePreludeParser {
504
502
  }
505
503
  // Helper: Parse feature value portion into typed nodes
506
504
  parse_feature_value(start, end) {
507
- let saved_pos = this.lexer.save_position();
508
- this.lexer.pos = start;
505
+ let temp_lexer = new Lexer(this.source);
506
+ temp_lexer.seek(start, this.lexer.line, this.lexer.column);
509
507
  let nodes = [];
508
+ let saved_lexer = this.lexer;
509
+ this.lexer = temp_lexer;
510
510
  while (this.lexer.pos < end) {
511
511
  this.lexer.next_token_fast(false);
512
512
  if (this.lexer.token_start >= end) break;
@@ -521,7 +521,7 @@ class AtRulePreludeParser {
521
521
  let node = this.parse_value_token();
522
522
  if (node !== null) nodes.push(node);
523
523
  }
524
- this.lexer.restore_position(saved_pos);
524
+ this.lexer = saved_lexer;
525
525
  return nodes;
526
526
  }
527
527
  // Parse media feature range syntax: (50px <= width <= 100px)
@@ -18,9 +18,7 @@ class DeclarationParser {
18
18
  // Parse a declaration range into a declaration node (standalone use)
19
19
  parse_declaration(start, end, line = 1, column = 1) {
20
20
  const lexer = new Lexer(this.source);
21
- lexer.pos = start;
22
- lexer.line = line;
23
- lexer.column = column;
21
+ lexer.seek(start, line, column);
24
22
  lexer.next_token_fast(true);
25
23
  return this.parse_declaration_with_lexer(lexer, end);
26
24
  }
@@ -21,9 +21,7 @@ class SelectorParser {
21
21
  // Always returns a NODE_SELECTOR_LIST with selector components as children
22
22
  parse_selector(start, end, line = 1, column = 1, allow_relative = true) {
23
23
  this.selector_end = end;
24
- this.lexer.pos = start;
25
- this.lexer.line = line;
26
- this.lexer.column = column;
24
+ this.lexer.seek(start, line, column);
27
25
  return this.parse_selector_list(allow_relative);
28
26
  }
29
27
  // Parse comma-separated selectors
@@ -230,7 +228,7 @@ class SelectorParser {
230
228
  this.lexer.pos++;
231
229
  let node = this.parse_namespace_local_part(start, start, end - start);
232
230
  if (node !== null) return node;
233
- this.lexer.pos = end;
231
+ this.lexer.restore_position(saved);
234
232
  } else {
235
233
  this.lexer.restore_position(saved);
236
234
  }
@@ -245,7 +243,7 @@ class SelectorParser {
245
243
  this.lexer.pos++;
246
244
  let node = this.parse_namespace_local_part(start, start, end - start);
247
245
  if (node !== null) return node;
248
- this.lexer.pos = end;
246
+ this.lexer.restore_position(saved);
249
247
  } else {
250
248
  this.lexer.restore_position(saved);
251
249
  }
@@ -258,16 +256,12 @@ class SelectorParser {
258
256
  }
259
257
  // Parse combinator (>, +, ~, or descendant space)
260
258
  try_parse_combinator() {
261
- let whitespace_start = this.lexer.pos;
262
- let whitespace_start_line = this.lexer.line;
263
- let whitespace_start_column = this.lexer.column;
259
+ const saved_whitespace_start = this.lexer.save_position();
264
260
  let has_whitespace = this.lexer.pos < this.selector_end;
265
261
  this.skip_whitespace();
266
- has_whitespace = has_whitespace && this.lexer.pos > whitespace_start;
262
+ has_whitespace = has_whitespace && this.lexer.pos > saved_whitespace_start.pos;
267
263
  if (this.lexer.pos >= this.selector_end) {
268
- this.lexer.pos = whitespace_start;
269
- this.lexer.line = whitespace_start_line;
270
- this.lexer.column = whitespace_start_column;
264
+ this.lexer.restore_position(saved_whitespace_start);
271
265
  return null;
272
266
  }
273
267
  this.lexer.next_token_fast(false);
@@ -278,15 +272,11 @@ class SelectorParser {
278
272
  }
279
273
  }
280
274
  if (has_whitespace) {
281
- this.lexer.pos = whitespace_start;
282
- this.lexer.line = whitespace_start_line;
283
- this.lexer.column = whitespace_start_column;
275
+ this.lexer.restore_position(saved_whitespace_start);
284
276
  this.skip_whitespace();
285
- return this.create_node_at(COMBINATOR, whitespace_start, this.lexer.pos, whitespace_start_line, whitespace_start_column);
277
+ return this.create_node_at(COMBINATOR, saved_whitespace_start.pos, this.lexer.pos, saved_whitespace_start.line, saved_whitespace_start.column);
286
278
  }
287
- this.lexer.pos = whitespace_start;
288
- this.lexer.line = whitespace_start_line;
289
- this.lexer.column = whitespace_start_column;
279
+ this.lexer.restore_position(saved_whitespace_start);
290
280
  return null;
291
281
  }
292
282
  // Parse class selector (.classname)
@@ -512,9 +502,11 @@ class SelectorParser {
512
502
  // Parse :lang() content - comma-separated language identifiers
513
503
  // Accepts both quoted strings: :lang("en", "fr") and unquoted: :lang(en, fr)
514
504
  parse_lang_identifiers(start, end, parent_node) {
505
+ let temp_lexer = new Lexer(this.source);
506
+ temp_lexer.seek(start, this.lexer.line, this.lexer.column);
515
507
  let saved_selector_end = this.selector_end;
516
- const saved = this.lexer.save_position();
517
- this.lexer.pos = start;
508
+ let saved_lexer = this.lexer;
509
+ this.lexer = temp_lexer;
518
510
  this.selector_end = end;
519
511
  let first_child = null;
520
512
  let last_child = null;
@@ -547,7 +539,7 @@ class SelectorParser {
547
539
  this.arena.set_first_child(parent_node, first_child);
548
540
  }
549
541
  this.selector_end = saved_selector_end;
550
- this.lexer.restore_position(saved);
542
+ this.lexer = saved_lexer;
551
543
  }
552
544
  // Parse An+B expression for nth-* pseudo-classes
553
545
  // Handles both simple An+B and "An+B of S" syntax
@@ -19,9 +19,7 @@ class ValueParser {
19
19
  // Returns single VALUE node index
20
20
  parse_value(start, end, start_line, start_column) {
21
21
  this.value_end = end;
22
- this.lexer.pos = start;
23
- this.lexer.line = start_line;
24
- this.lexer.column = start_column;
22
+ this.lexer.seek(start, start_line, start_column);
25
23
  let value_nodes = this.parse_value_tokens();
26
24
  if (value_nodes.length === 0) {
27
25
  let value_node2 = this.arena.create_node(VALUE, start, 0, start_line, start_column);
@@ -3,6 +3,7 @@ export interface LexerPosition {
3
3
  pos: number;
4
4
  line: number;
5
5
  column: number;
6
+ _line_offset: number;
6
7
  token_type: TokenType;
7
8
  token_start: number;
8
9
  token_end: number;
package/dist/tokenize.js CHANGED
@@ -1,6 +1,9 @@
1
- import { char_types, CHAR_WHITESPACE, CHAR_NEWLINE, CHAR_DIGIT, is_ident_start, is_newline, is_hex_digit, is_whitespace, is_ident_char } from './char-types.js';
1
+ import { char_types, CHAR_WHITESPACE, CHAR_NEWLINE, CHAR_DIGIT, is_ident_start, is_hex_digit, is_whitespace, CHAR_IDENT } from './char-types.js';
2
2
  import { TOKEN_EOF, TOKEN_RIGHT_PAREN, TOKEN_LEFT_PAREN, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_BRACKET, TOKEN_COMMA, TOKEN_SEMICOLON, TOKEN_COLON, TOKEN_RIGHT_BRACE, TOKEN_LEFT_BRACE, TOKEN_CDO, TOKEN_CDC, TOKEN_DELIM, TOKEN_WHITESPACE, TOKEN_STRING, TOKEN_BAD_STRING, TOKEN_PERCENTAGE, TOKEN_DIMENSION, TOKEN_NUMBER, TOKEN_FUNCTION, TOKEN_IDENT, TOKEN_UNICODE_RANGE, TOKEN_AT_KEYWORD, TOKEN_HASH } from './token-types.js';
3
3
 
4
+ function is_newline(ch) {
5
+ return ch < 128 && (char_types[ch] & CHAR_NEWLINE) !== 0;
6
+ }
4
7
  const CHAR_LEFT_BRACE = 123;
5
8
  const CHAR_RIGHT_BRACE = 125;
6
9
  const CHAR_COLON = 58;
@@ -34,8 +37,8 @@ const CHAR_LINE_FEED = 10;
34
37
  class Lexer {
35
38
  source;
36
39
  pos;
37
- line;
38
- column;
40
+ _line;
41
+ _line_offset;
39
42
  on_comment;
40
43
  // Current token properties (avoiding object allocation)
41
44
  token_type;
@@ -46,8 +49,8 @@ class Lexer {
46
49
  constructor(source, on_comment) {
47
50
  this.source = source;
48
51
  this.pos = 0;
49
- this.line = 1;
50
- this.column = 1;
52
+ this._line = 1;
53
+ this._line_offset = 0;
51
54
  this.on_comment = on_comment;
52
55
  this.token_type = TOKEN_EOF;
53
56
  this.token_start = 0;
@@ -55,6 +58,17 @@ class Lexer {
55
58
  this.token_line = 1;
56
59
  this.token_column = 1;
57
60
  }
61
+ get line() {
62
+ return this._line;
63
+ }
64
+ get column() {
65
+ return this.pos - this._line_offset + 1;
66
+ }
67
+ seek(pos, line, column = 1) {
68
+ this.pos = pos;
69
+ this._line = line;
70
+ this._line_offset = pos - column + 1;
71
+ }
58
72
  // Fast token advancing without object allocation (for internal parser use)
59
73
  next_token_fast(skip_whitespace = false) {
60
74
  if (skip_whitespace) {
@@ -295,7 +309,9 @@ class Lexer {
295
309
  return this.make_token(TOKEN_PERCENTAGE, start, this.pos, start_line, start_column);
296
310
  }
297
311
  if (is_ident_start(ch2) || ch2 === CHAR_HYPHEN && is_ident_start(this.peek())) {
298
- while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) {
312
+ while (this.pos < this.source.length) {
313
+ let ch3 = this.source.charCodeAt(this.pos);
314
+ if (ch3 < 128 && (char_types[ch3] & CHAR_IDENT) === 0) break;
299
315
  this.advance();
300
316
  }
301
317
  return this.make_token(TOKEN_DIMENSION, start, this.pos, start_line, start_column);
@@ -327,7 +343,7 @@ class Lexer {
327
343
  } else {
328
344
  this.advance();
329
345
  }
330
- } else if (is_ident_char(ch)) {
346
+ } else if (ch >= 128 || (char_types[ch] & CHAR_IDENT) !== 0) {
331
347
  this.advance();
332
348
  } else {
333
349
  break;
@@ -388,7 +404,9 @@ class Lexer {
388
404
  consume_at_keyword(start_line, start_column) {
389
405
  let start = this.pos;
390
406
  this.advance();
391
- while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) {
407
+ while (this.pos < this.source.length) {
408
+ let ch = this.source.charCodeAt(this.pos);
409
+ if (ch < 128 && (char_types[ch] & CHAR_IDENT) === 0) break;
392
410
  this.advance();
393
411
  }
394
412
  return this.make_token(TOKEN_AT_KEYWORD, start, this.pos, start_line, start_column);
@@ -396,7 +414,9 @@ class Lexer {
396
414
  consume_hash(start_line, start_column) {
397
415
  let start = this.pos;
398
416
  this.advance();
399
- while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) {
417
+ while (this.pos < this.source.length) {
418
+ let ch = this.source.charCodeAt(this.pos);
419
+ if (ch < 128 && (char_types[ch] & CHAR_IDENT) === 0) break;
400
420
  this.advance();
401
421
  }
402
422
  return this.make_token(TOKEN_HASH, start, this.pos, start_line, start_column);
@@ -406,14 +426,12 @@ class Lexer {
406
426
  if (this.pos >= this.source.length) return;
407
427
  let ch = this.source.charCodeAt(this.pos);
408
428
  this.pos++;
409
- if (is_newline(ch)) {
429
+ if (ch < 128 && (char_types[ch] & CHAR_NEWLINE) !== 0) {
410
430
  if (ch === CHAR_CARRIAGE_RETURN && this.pos < this.source.length && this.source.charCodeAt(this.pos) === CHAR_LINE_FEED) {
411
431
  this.pos++;
412
432
  }
413
- this.line++;
414
- this.column = 1;
415
- } else {
416
- this.column++;
433
+ this._line++;
434
+ this._line_offset = this.pos;
417
435
  }
418
436
  return;
419
437
  }
@@ -421,15 +439,13 @@ class Lexer {
421
439
  if (this.pos >= this.source.length) break;
422
440
  let ch = this.source.charCodeAt(this.pos);
423
441
  this.pos++;
424
- if (is_newline(ch)) {
442
+ if (ch < 128 && (char_types[ch] & CHAR_NEWLINE) !== 0) {
425
443
  if (ch === CHAR_CARRIAGE_RETURN && this.pos < this.source.length && this.source.charCodeAt(this.pos) === CHAR_LINE_FEED) {
426
444
  this.pos++;
427
445
  i++;
428
446
  }
429
- this.line++;
430
- this.column = 1;
431
- } else {
432
- this.column++;
447
+ this._line++;
448
+ this._line_offset = this.pos;
433
449
  }
434
450
  }
435
451
  }
@@ -464,8 +480,9 @@ class Lexer {
464
480
  save_position() {
465
481
  return {
466
482
  pos: this.pos,
467
- line: this.line,
483
+ line: this._line,
468
484
  column: this.column,
485
+ _line_offset: this._line_offset,
469
486
  token_type: this.token_type,
470
487
  token_start: this.token_start,
471
488
  token_end: this.token_end,
@@ -479,14 +496,41 @@ class Lexer {
479
496
  */
480
497
  restore_position(saved) {
481
498
  this.pos = saved.pos;
482
- this.line = saved.line;
483
- this.column = saved.column;
499
+ this._line = saved.line;
500
+ this._line_offset = saved._line_offset;
484
501
  this.token_type = saved.token_type;
485
502
  this.token_start = saved.token_start;
486
503
  this.token_end = saved.token_end;
487
504
  this.token_line = saved.token_line;
488
505
  this.token_column = saved.token_column;
489
506
  }
507
+ /**
508
+ * Skip whitespace and comments within a range, maintaining line/column tracking
509
+ * @param end The end boundary (exclusive)
510
+ */
511
+ skip_whitespace_in_range(end) {
512
+ while (this.pos < end) {
513
+ let ch = this.source.charCodeAt(this.pos);
514
+ if (is_whitespace(ch)) {
515
+ this.advance();
516
+ continue;
517
+ }
518
+ if (ch === CHAR_FORWARD_SLASH && this.pos + 1 < end && this.source.charCodeAt(this.pos + 1) === CHAR_ASTERISK) {
519
+ this.advance();
520
+ this.advance();
521
+ while (this.pos < end) {
522
+ if (this.source.charCodeAt(this.pos) === CHAR_ASTERISK && this.pos + 1 < end && this.source.charCodeAt(this.pos + 1) === CHAR_FORWARD_SLASH) {
523
+ this.advance();
524
+ this.advance();
525
+ break;
526
+ }
527
+ this.advance();
528
+ }
529
+ continue;
530
+ }
531
+ break;
532
+ }
533
+ }
490
534
  }
491
535
  function* tokenize(source, on_comment) {
492
536
  const lexer = new Lexer(source, on_comment);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectwallace/css-parser",
3
- "version": "0.13.2",
3
+ "version": "0.13.3",
4
4
  "description": "High-performance CSS lexer and parser, optimized for CSS inspection and analysis",
5
5
  "author": "Bart Veneman <bart@projectwallace.com>",
6
6
  "license": "MIT",