@projectwallace/css-parser 0.11.4 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  Built for speed and efficiency, this parser handles large CSS files with minimal memory overhead and blazing-fast parse times. Designed with a data-oriented architecture using a single contiguous memory arena for zero allocations during parsing.
9
9
 
10
- This parser was heavily influenced by [CSSTree](https://github.com/csstree/csstree), one of the most robust CSS parsers available. Some of the parsing mechanics are taken from CSSTree, as well as some of the performance mechanics, but a lot of things are very different which is why this isn't a direct fork.
10
+ This parser was heavily influenced by [CSSTree](https://github.com/csstree/csstree), one of the most robust CSS parsers available. Some of the parsing mechanics are taken from CSSTree, as well as some of the performance mechanics, but a lot of things are very different which is why this isn't a direct fork and there is very little overlap in API's.
11
11
 
12
12
  ## Features
13
13
 
@@ -15,6 +15,7 @@ This parser was heavily influenced by [CSSTree](https://github.com/csstree/csstr
15
15
  - **Error recovery** - Continues parsing on malformed CSS
16
16
  - **Location tracking** - Line, column, offset, and length for all nodes
17
17
  - **Performance** - Low memory usage and excellent parsing speed
18
+ - **Small bundle size** - Fast download and installation in any environment
18
19
 
19
20
  ## Installation
20
21
 
package/dist/index.d.ts CHANGED
@@ -9,7 +9,7 @@ export { walk, traverse, SKIP, BREAK } from './walk';
9
9
  export { is_custom, is_vendor_prefixed, str_equals, str_starts_with, str_index_of } from './string-utils';
10
10
  export { type ParserOptions } from './parse';
11
11
  export { CSSNode, type CSSNodeType, TYPE_NAMES, type CloneOptions, type PlainCSSNode } from './css-node';
12
- export type { LexerPosition } from './tokenize';
12
+ export type { LexerPosition, CommentInfo } from './tokenize';
13
13
  export { ATTR_OPERATOR_NONE, ATTR_OPERATOR_EQUAL, ATTR_OPERATOR_TILDE_EQUAL, ATTR_OPERATOR_PIPE_EQUAL, ATTR_OPERATOR_CARET_EQUAL, ATTR_OPERATOR_DOLLAR_EQUAL, ATTR_OPERATOR_STAR_EQUAL, ATTR_FLAG_NONE, ATTR_FLAG_CASE_INSENSITIVE, ATTR_FLAG_CASE_SENSITIVE, } from './arena';
14
14
  export * from './constants';
15
15
  export * from './token-types';
@@ -13,7 +13,7 @@ class ANplusBParser {
13
13
  constructor(arena, source) {
14
14
  this.arena = arena;
15
15
  this.source = source;
16
- this.lexer = new Lexer(source, true);
16
+ this.lexer = new Lexer(source);
17
17
  this.expr_end = 0;
18
18
  }
19
19
  /**
@@ -13,7 +13,7 @@ class AtRulePreludeParser {
13
13
  constructor(arena, source) {
14
14
  this.arena = arena;
15
15
  this.source = source;
16
- this.lexer = new Lexer(source, false);
16
+ this.lexer = new Lexer(source);
17
17
  this.prelude_end = 0;
18
18
  }
19
19
  // Parse an at-rule prelude into nodes (standalone use)
@@ -17,7 +17,7 @@ class DeclarationParser {
17
17
  }
18
18
  // Parse a declaration range into a declaration node (standalone use)
19
19
  parse_declaration(start, end, line = 1, column = 1) {
20
- const lexer = new Lexer(this.source, false);
20
+ const lexer = new Lexer(this.source);
21
21
  lexer.pos = start;
22
22
  lexer.line = line;
23
23
  lexer.column = column;
@@ -1,8 +1,8 @@
1
1
  import { Lexer } from './tokenize.js';
2
2
  import { CSSDataArena, SELECTOR_LIST, SELECTOR, COMBINATOR, NESTING_SELECTOR, ID_SELECTOR, TYPE_SELECTOR, UNIVERSAL_SELECTOR, CLASS_SELECTOR, ATTRIBUTE_SELECTOR, ATTR_OPERATOR_NONE, ATTR_FLAG_NONE, ATTR_OPERATOR_EQUAL, ATTR_OPERATOR_TILDE_EQUAL, ATTR_OPERATOR_PIPE_EQUAL, ATTR_OPERATOR_CARET_EQUAL, ATTR_OPERATOR_DOLLAR_EQUAL, ATTR_OPERATOR_STAR_EQUAL, ATTR_FLAG_CASE_INSENSITIVE, ATTR_FLAG_CASE_SENSITIVE, PSEUDO_ELEMENT_SELECTOR, PSEUDO_CLASS_SELECTOR, FLAG_HAS_PARENS, LANG_SELECTOR, NTH_OF_SELECTOR } from './arena.js';
3
- import { TOKEN_COMMENT, TOKEN_COMMA, TOKEN_DELIM, TOKEN_EOF, TOKEN_WHITESPACE, TOKEN_FUNCTION, TOKEN_COLON, TOKEN_LEFT_BRACKET, TOKEN_HASH, TOKEN_IDENT, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_STRING } from './token-types.js';
3
+ import { TOKEN_COMMA, TOKEN_DELIM, TOKEN_EOF, TOKEN_WHITESPACE, TOKEN_FUNCTION, TOKEN_COLON, TOKEN_LEFT_BRACKET, TOKEN_HASH, TOKEN_IDENT, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_STRING } from './token-types.js';
4
4
  import { skip_whitespace_and_comments_forward, skip_whitespace_and_comments_backward, skip_whitespace_forward } from './parse-utils.js';
5
- import { CHAR_GREATER_THAN, CHAR_PLUS, CHAR_TILDE, CHAR_PERIOD, CHAR_ASTERISK, CHAR_AMPERSAND, CHAR_PIPE, CHAR_SPACE, CHAR_TAB, CHAR_NEWLINE, CHAR_CARRIAGE_RETURN, CHAR_FORM_FEED, is_combinator, is_whitespace, CHAR_EQUALS, CHAR_CARET, CHAR_DOLLAR, CHAR_SINGLE_QUOTE, CHAR_DOUBLE_QUOTE, CHAR_COLON, str_equals } from './string-utils.js';
5
+ import { CHAR_GREATER_THAN, CHAR_PLUS, CHAR_TILDE, CHAR_PERIOD, CHAR_ASTERISK, CHAR_AMPERSAND, CHAR_PIPE, is_combinator, is_whitespace, CHAR_EQUALS, CHAR_CARET, CHAR_DOLLAR, CHAR_SINGLE_QUOTE, CHAR_DOUBLE_QUOTE, CHAR_COLON, str_equals, CHAR_FORWARD_SLASH } from './string-utils.js';
6
6
  import { ANplusBParser } from './parse-anplusb.js';
7
7
  import { CSSNode } from './css-node.js';
8
8
 
@@ -14,7 +14,7 @@ class SelectorParser {
14
14
  constructor(arena, source) {
15
15
  this.arena = arena;
16
16
  this.source = source;
17
- this.lexer = new Lexer(source, false);
17
+ this.lexer = new Lexer(source);
18
18
  this.selector_end = 0;
19
19
  }
20
20
  // Parse a selector range into selector nodes (standalone use)
@@ -38,7 +38,13 @@ class SelectorParser {
38
38
  let selector_column = this.lexer.column;
39
39
  let complex_selector = this.parse_complex_selector(allow_relative);
40
40
  if (complex_selector !== null) {
41
- let selector_wrapper = this.arena.create_node(SELECTOR, selector_start, this.lexer.pos - selector_start, selector_line, selector_column);
41
+ let selector_wrapper = this.arena.create_node(
42
+ SELECTOR,
43
+ selector_start,
44
+ this.lexer.pos - selector_start,
45
+ selector_line,
46
+ selector_column
47
+ );
42
48
  this.arena.set_content_start_delta(selector_wrapper, 0);
43
49
  this.arena.set_content_length(selector_wrapper, this.lexer.pos - selector_start);
44
50
  let last_component = complex_selector;
@@ -52,29 +58,10 @@ class SelectorParser {
52
58
  }
53
59
  this.skip_whitespace();
54
60
  if (this.lexer.pos >= this.selector_end) break;
55
- let token_type = TOKEN_EOF;
56
- while (this.lexer.pos < this.selector_end) {
57
- this.lexer.next_token_fast(false);
58
- token_type = this.lexer.token_type;
59
- if (token_type === TOKEN_COMMENT) {
60
- this.skip_whitespace();
61
- } else {
62
- break;
63
- }
64
- }
61
+ this.lexer.next_token_fast(false);
62
+ let token_type = this.lexer.token_type;
65
63
  if (token_type === TOKEN_COMMA) {
66
64
  this.skip_whitespace();
67
- while (this.lexer.pos < this.selector_end) {
68
- const saved = this.lexer.save_position();
69
- this.lexer.next_token_fast(false);
70
- token_type = this.lexer.token_type;
71
- if (token_type === TOKEN_COMMENT) {
72
- this.skip_whitespace();
73
- } else {
74
- this.lexer.restore_position(saved);
75
- break;
76
- }
77
- }
78
65
  continue;
79
66
  } else {
80
67
  break;
@@ -100,7 +87,13 @@ class SelectorParser {
100
87
  if (token_type === TOKEN_DELIM) {
101
88
  let ch = this.source.charCodeAt(this.lexer.token_start);
102
89
  if (ch === CHAR_GREATER_THAN || ch === CHAR_PLUS || ch === CHAR_TILDE) {
103
- let combinator = this.create_node_at(COMBINATOR, this.lexer.token_start, this.lexer.token_end, this.lexer.token_line, this.lexer.token_column);
90
+ let combinator = this.create_node_at(
91
+ COMBINATOR,
92
+ this.lexer.token_start,
93
+ this.lexer.token_end,
94
+ this.lexer.token_line,
95
+ this.lexer.token_column
96
+ );
104
97
  components.push(combinator);
105
98
  this.skip_whitespace();
106
99
  } else {
@@ -258,16 +251,9 @@ class SelectorParser {
258
251
  let whitespace_start = this.lexer.pos;
259
252
  let whitespace_start_line = this.lexer.line;
260
253
  let whitespace_start_column = this.lexer.column;
261
- let has_whitespace = false;
262
- while (this.lexer.pos < this.selector_end) {
263
- let ch = this.source.charCodeAt(this.lexer.pos);
264
- if (ch === CHAR_SPACE || ch === CHAR_TAB || ch === CHAR_NEWLINE || ch === CHAR_CARRIAGE_RETURN || ch === CHAR_FORM_FEED) {
265
- has_whitespace = true;
266
- this.lexer.advance();
267
- } else {
268
- break;
269
- }
270
- }
254
+ let has_whitespace = this.lexer.pos < this.selector_end;
255
+ this.skip_whitespace();
256
+ has_whitespace = has_whitespace && this.lexer.pos > whitespace_start;
271
257
  if (this.lexer.pos >= this.selector_end) {
272
258
  this.lexer.pos = whitespace_start;
273
259
  this.lexer.line = whitespace_start_line;
@@ -285,14 +271,7 @@ class SelectorParser {
285
271
  this.lexer.pos = whitespace_start;
286
272
  this.lexer.line = whitespace_start_line;
287
273
  this.lexer.column = whitespace_start_column;
288
- while (this.lexer.pos < this.selector_end) {
289
- let ch = this.source.charCodeAt(this.lexer.pos);
290
- if (ch === CHAR_SPACE || ch === CHAR_TAB || ch === CHAR_NEWLINE || ch === CHAR_CARRIAGE_RETURN || ch === CHAR_FORM_FEED) {
291
- this.lexer.advance();
292
- } else {
293
- break;
294
- }
295
- }
274
+ this.skip_whitespace();
296
275
  return this.create_node_at(COMBINATOR, whitespace_start, this.lexer.pos, whitespace_start_line, whitespace_start_column);
297
276
  }
298
277
  this.lexer.pos = whitespace_start;
@@ -610,10 +589,26 @@ class SelectorParser {
610
589
  this.arena.set_content_length(node, end - start);
611
590
  return node;
612
591
  }
613
- // Helper to skip whitespace and update line/column
592
+ // Helper to skip whitespace and comments, updating line/column
614
593
  skip_whitespace() {
615
- while (this.lexer.pos < this.selector_end && is_whitespace(this.source.charCodeAt(this.lexer.pos))) {
616
- this.lexer.advance();
594
+ while (this.lexer.pos < this.selector_end) {
595
+ let ch = this.source.charCodeAt(this.lexer.pos);
596
+ if (is_whitespace(ch)) {
597
+ this.lexer.advance();
598
+ continue;
599
+ }
600
+ if (ch === CHAR_FORWARD_SLASH && this.lexer.pos + 1 < this.selector_end && this.source.charCodeAt(this.lexer.pos + 1) === CHAR_ASTERISK) {
601
+ this.lexer.advance(2);
602
+ while (this.lexer.pos < this.selector_end) {
603
+ if (this.source.charCodeAt(this.lexer.pos) === CHAR_ASTERISK && this.lexer.pos + 1 < this.selector_end && this.source.charCodeAt(this.lexer.pos + 1) === CHAR_FORWARD_SLASH) {
604
+ this.lexer.advance(2);
605
+ break;
606
+ }
607
+ this.lexer.advance();
608
+ }
609
+ continue;
610
+ }
611
+ break;
617
612
  }
618
613
  }
619
614
  }
@@ -12,7 +12,7 @@ class ValueParser {
12
12
  constructor(arena, source) {
13
13
  this.arena = arena;
14
14
  this.source = source;
15
- this.lexer = new Lexer(source, false);
15
+ this.lexer = new Lexer(source);
16
16
  this.value_end = 0;
17
17
  }
18
18
  // Parse a declaration value range into a VALUE wrapper node
package/dist/parse.d.ts CHANGED
@@ -1,9 +1,10 @@
1
+ import { type CommentInfo } from './tokenize';
1
2
  import { CSSNode } from './css-node';
2
3
  export interface ParserOptions {
3
- skip_comments?: boolean;
4
4
  parse_values?: boolean;
5
5
  parse_selectors?: boolean;
6
6
  parse_atrule_preludes?: boolean;
7
+ on_comment?: (info: CommentInfo) => void;
7
8
  }
8
9
  /**
9
10
  * Parse CSS and return an AST
package/dist/parse.js CHANGED
@@ -23,11 +23,10 @@ class Parser {
23
23
  constructor(source, options) {
24
24
  this.source = source;
25
25
  let opts = options || {};
26
- let skip_comments = opts.skip_comments ?? true;
27
26
  this.parse_values_enabled = opts.parse_values ?? true;
28
27
  this.parse_selectors_enabled = opts.parse_selectors ?? true;
29
28
  this.parse_atrule_preludes_enabled = opts.parse_atrule_preludes ?? true;
30
- this.lexer = new Lexer(source, skip_comments);
29
+ this.lexer = new Lexer(source, opts.on_comment);
31
30
  let capacity = CSSDataArena.capacity_for_source(source.length);
32
31
  this.arena = new CSSDataArena(capacity);
33
32
  this.selector_parser = this.parse_selectors_enabled ? new SelectorParser(this.arena, source) : null;
@@ -9,10 +9,17 @@ export interface LexerPosition {
9
9
  token_line: number;
10
10
  token_column: number;
11
11
  }
12
+ export interface CommentInfo {
13
+ start: number;
14
+ end: number;
15
+ length: number;
16
+ line: number;
17
+ column: number;
18
+ }
12
19
  /**
13
20
  * Tokenize CSS source code
14
21
  * @param source - The CSS source code to tokenize
15
- * @param skip_comments - Whether to skip comment tokens (default: true)
22
+ * @param on_comment - Optional callback for comment tokens
16
23
  * @yields CSS tokens
17
24
  */
18
- export declare function tokenize(source: string, skip_comments?: boolean): Generator<Token, void, undefined>;
25
+ export declare function tokenize(source: string, on_comment?: (info: CommentInfo) => void): Generator<Token, void, undefined>;
package/dist/tokenize.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { char_types, CHAR_WHITESPACE, CHAR_NEWLINE, CHAR_DIGIT, is_ident_start, is_newline, is_hex_digit, is_whitespace, is_ident_char } from './char-types.js';
2
- import { TOKEN_EOF, TOKEN_RIGHT_PAREN, TOKEN_LEFT_PAREN, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_BRACKET, TOKEN_COMMA, TOKEN_SEMICOLON, TOKEN_COLON, TOKEN_RIGHT_BRACE, TOKEN_LEFT_BRACE, TOKEN_CDO, TOKEN_CDC, TOKEN_DELIM, TOKEN_WHITESPACE, TOKEN_COMMENT, TOKEN_STRING, TOKEN_BAD_STRING, TOKEN_PERCENTAGE, TOKEN_DIMENSION, TOKEN_NUMBER, TOKEN_FUNCTION, TOKEN_IDENT, TOKEN_AT_KEYWORD, TOKEN_HASH } from './token-types.js';
2
+ import { TOKEN_EOF, TOKEN_RIGHT_PAREN, TOKEN_LEFT_PAREN, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_BRACKET, TOKEN_COMMA, TOKEN_SEMICOLON, TOKEN_COLON, TOKEN_RIGHT_BRACE, TOKEN_LEFT_BRACE, TOKEN_CDO, TOKEN_CDC, TOKEN_DELIM, TOKEN_WHITESPACE, TOKEN_STRING, TOKEN_BAD_STRING, TOKEN_PERCENTAGE, TOKEN_DIMENSION, TOKEN_NUMBER, TOKEN_FUNCTION, TOKEN_IDENT, TOKEN_AT_KEYWORD, TOKEN_HASH } from './token-types.js';
3
3
 
4
4
  const CHAR_LEFT_BRACE = 123;
5
5
  const CHAR_RIGHT_BRACE = 125;
@@ -33,19 +33,19 @@ class Lexer {
33
33
  pos;
34
34
  line;
35
35
  column;
36
- skip_comments;
36
+ on_comment;
37
37
  // Current token properties (avoiding object allocation)
38
38
  token_type;
39
39
  token_start;
40
40
  token_end;
41
41
  token_line;
42
42
  token_column;
43
- constructor(source, skip_comments = false) {
43
+ constructor(source, on_comment) {
44
44
  this.source = source;
45
45
  this.pos = 0;
46
46
  this.line = 1;
47
47
  this.column = 1;
48
- this.skip_comments = skip_comments;
48
+ this.on_comment = on_comment;
49
49
  this.token_type = TOKEN_EOF;
50
50
  this.token_start = 0;
51
51
  this.token_end = 0;
@@ -101,19 +101,29 @@ class Lexer {
101
101
  return this.consume_whitespace(start_line, start_column);
102
102
  }
103
103
  if (ch === CHAR_FORWARD_SLASH && this.peek() === CHAR_ASTERISK) {
104
- if (this.skip_comments) {
105
- this.advance(2);
106
- while (this.pos < this.source.length - 1) {
107
- let ch2 = this.source.charCodeAt(this.pos);
108
- if (ch2 === CHAR_ASTERISK && this.peek() === CHAR_FORWARD_SLASH) {
109
- this.advance(2);
110
- break;
111
- }
112
- this.advance();
104
+ let comment_start = start;
105
+ let comment_line = start_line;
106
+ let comment_column = start_column;
107
+ this.advance(2);
108
+ while (this.pos < this.source.length - 1) {
109
+ let ch2 = this.source.charCodeAt(this.pos);
110
+ if (ch2 === CHAR_ASTERISK && this.peek() === CHAR_FORWARD_SLASH) {
111
+ this.advance(2);
112
+ break;
113
113
  }
114
- return this.next_token_fast(skip_whitespace);
114
+ this.advance();
115
+ }
116
+ let comment_end = this.pos;
117
+ if (this.on_comment) {
118
+ this.on_comment({
119
+ start: comment_start,
120
+ end: comment_end,
121
+ length: comment_end - comment_start,
122
+ line: comment_line,
123
+ column: comment_column
124
+ });
115
125
  }
116
- return this.consume_comment(start_line, start_column);
126
+ return this.next_token_fast(skip_whitespace);
117
127
  }
118
128
  if (ch === CHAR_DOUBLE_QUOTE || ch === CHAR_SINGLE_QUOTE) {
119
129
  return this.consume_string(ch, start_line, start_column);
@@ -185,19 +195,6 @@ class Lexer {
185
195
  }
186
196
  return this.make_token(TOKEN_WHITESPACE, start, this.pos, start_line, start_column);
187
197
  }
188
- consume_comment(start_line, start_column) {
189
- let start = this.pos;
190
- this.advance(2);
191
- while (this.pos < this.source.length - 1) {
192
- let ch = this.source.charCodeAt(this.pos);
193
- if (ch === CHAR_ASTERISK && this.peek() === CHAR_FORWARD_SLASH) {
194
- this.advance(2);
195
- break;
196
- }
197
- this.advance();
198
- }
199
- return this.make_token(TOKEN_COMMENT, start, this.pos, start_line, start_column);
200
- }
201
198
  consume_string(quote, start_line, start_column) {
202
199
  let start = this.pos;
203
200
  this.advance();
@@ -442,8 +439,8 @@ class Lexer {
442
439
  this.token_column = saved.token_column;
443
440
  }
444
441
  }
445
- function* tokenize(source, skip_comments = true) {
446
- const lexer = new Lexer(source, skip_comments);
442
+ function* tokenize(source, on_comment) {
443
+ const lexer = new Lexer(source, on_comment);
447
444
  while (true) {
448
445
  const token = lexer.next_token();
449
446
  if (!token || token.type === TOKEN_EOF) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectwallace/css-parser",
3
- "version": "0.11.4",
3
+ "version": "0.12.1",
4
4
  "description": "High-performance CSS lexer and parser, optimized for CSS inspection and analysis",
5
5
  "author": "Bart Veneman <bart@projectwallace.com>",
6
6
  "license": "MIT",