p_css 0.2.0.beta1-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +282 -0
  3. data/Cargo.toml +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +357 -0
  6. data/ext/css_native/Cargo.toml +12 -0
  7. data/ext/css_native/extconf.rb +4 -0
  8. data/ext/css_native/src/lib.rs +117 -0
  9. data/ext/css_native/src/matcher.rs +356 -0
  10. data/ext/css_native/src/selectors.rs +411 -0
  11. data/ext/css_native/src/snapshot.rs +370 -0
  12. data/ext/css_native/src/state.rs +174 -0
  13. data/ext/css_native/src/tokenizer.rs +596 -0
  14. data/lib/css/3.3/css_native.so +0 -0
  15. data/lib/css/3.4/css_native.so +0 -0
  16. data/lib/css/4.0/css_native.so +0 -0
  17. data/lib/css/cascade.rb +277 -0
  18. data/lib/css/code_points.rb +59 -0
  19. data/lib/css/escape.rb +82 -0
  20. data/lib/css/media_queries/context.rb +60 -0
  21. data/lib/css/media_queries/evaluator.rb +157 -0
  22. data/lib/css/media_queries/nodes.rb +41 -0
  23. data/lib/css/media_queries/parser.rb +374 -0
  24. data/lib/css/media_queries.rb +9 -0
  25. data/lib/css/native.rb +179 -0
  26. data/lib/css/nesting.rb +229 -0
  27. data/lib/css/nodes.rb +42 -0
  28. data/lib/css/parser.rb +429 -0
  29. data/lib/css/selectors/anb_parser.rb +174 -0
  30. data/lib/css/selectors/matcher.rb +545 -0
  31. data/lib/css/selectors/nodes.rb +61 -0
  32. data/lib/css/selectors/parser.rb +395 -0
  33. data/lib/css/selectors/serializer.rb +102 -0
  34. data/lib/css/selectors/specificity.rb +81 -0
  35. data/lib/css/selectors.rb +11 -0
  36. data/lib/css/serializer.rb +167 -0
  37. data/lib/css/token.rb +107 -0
  38. data/lib/css/token_cursor.rb +49 -0
  39. data/lib/css/tokenizer.rb +447 -0
  40. data/lib/css/urange.rb +45 -0
  41. data/lib/css/version.rb +3 -0
  42. data/lib/css.rb +73 -0
  43. data/lib/p_css.rb +1 -0
  44. data/sig/css/cascade.rbs +22 -0
  45. data/sig/css/media_queries.rbs +107 -0
  46. data/sig/css/nodes.rbs +76 -0
  47. data/sig/css/selectors.rbs +164 -0
  48. data/sig/css/token.rbs +33 -0
  49. data/sig/css.rbs +99 -0
  50. metadata +113 -0
@@ -0,0 +1,596 @@
1
+ // Port of lib/css/tokenizer.rb (CSS Syntax Module Level 3/4 §4).
2
+ // Position tracking is intentionally omitted in this first cut — only
3
+ // type/value/flag/unit parity with the pure-Ruby Token is targeted.
4
+
5
+ const REPLACEMENT: char = '\u{FFFD}';
6
+
7
+ #[derive(Clone, Copy, Debug, PartialEq, Eq)]
8
+ pub enum Kind {
9
+ Ident, Function, AtKeyword, Hash, String_, BadString, Url, BadUrl,
10
+ Delim, Number, Percentage, Dimension, Whitespace, Cdo, Cdc, Comment,
11
+ Colon, Semicolon, Comma,
12
+ LBracket, RBracket, LParen, RParen, LBrace, RBrace,
13
+ }
14
+
15
+ #[derive(Clone, Copy, Debug, PartialEq, Eq)]
16
+ pub enum HashFlag { Id, Unrestricted }
17
+
18
+ #[derive(Clone, Copy, Debug, PartialEq, Eq)]
19
+ pub enum NumberFlag { Integer, Number }
20
+
21
+ #[derive(Clone, Debug)]
22
+ pub enum TokenValue {
23
+ None,
24
+ Str(String),
25
+ Delim(char),
26
+ Int(i64),
27
+ Float(f64),
28
+ }
29
+
30
+ #[derive(Clone, Debug)]
31
+ pub struct Token {
32
+ pub kind: Kind,
33
+ pub value: TokenValue,
34
+ pub number_flag: Option<NumberFlag>,
35
+ pub hash_flag: Option<HashFlag>,
36
+ pub unit: Option<String>,
37
+ }
38
+
39
+ impl Token {
40
+ fn bare(kind: Kind) -> Self {
41
+ Self { kind, value: TokenValue::None, number_flag: None, hash_flag: None, unit: None }
42
+ }
43
+
44
+ fn delim(c: char) -> Self {
45
+ Self { value: TokenValue::Delim(c), ..Self::bare(Kind::Delim) }
46
+ }
47
+
48
+ fn with_str(kind: Kind, s: String) -> Self {
49
+ Self { value: TokenValue::Str(s), ..Self::bare(kind) }
50
+ }
51
+ }
52
+
53
+ pub struct Tokenizer {
54
+ chars: Vec<char>,
55
+ pos: usize,
56
+ preserve_comments: bool,
57
+ }
58
+
59
+ impl Tokenizer {
60
+ pub fn new(input: &str, preserve_comments: bool) -> Self {
61
+ Self {
62
+ chars: preprocess(input),
63
+ pos: 0,
64
+ preserve_comments,
65
+ }
66
+ }
67
+
68
+ pub fn tokenize(&mut self) -> Vec<Token> {
69
+ let mut out = Vec::new();
70
+
71
+ loop {
72
+ if !self.preserve_comments {
73
+ self.consume_comments();
74
+ }
75
+
76
+ if self.eof() {
77
+ break;
78
+ }
79
+
80
+ out.push(self.consume_one_token());
81
+ }
82
+
83
+ out
84
+ }
85
+
86
+ // --- cursor primitives -----------------------------------------
87
+
88
+ fn peek(&self, offset: usize) -> Option<char> {
89
+ self.chars.get(self.pos + offset).copied()
90
+ }
91
+
92
+ fn consume(&mut self) -> Option<char> {
93
+ let c = self.chars.get(self.pos).copied();
94
+
95
+ if c.is_some() {
96
+ self.pos += 1;
97
+ }
98
+
99
+ c
100
+ }
101
+
102
+ fn reconsume(&mut self) {
103
+ self.pos -= 1;
104
+ }
105
+
106
+ fn eof(&self) -> bool {
107
+ self.pos >= self.chars.len()
108
+ }
109
+
110
+ // --- main dispatch ---------------------------------------------
111
+
112
+ fn consume_one_token(&mut self) -> Token {
113
+ if self.peek(0) == Some('/') && self.peek(1) == Some('*') {
114
+ return self.consume_comment_token();
115
+ }
116
+
117
+ let c = self.consume().expect("eof handled by caller");
118
+
119
+ if is_whitespace(c) {
120
+ return self.consume_whitespace();
121
+ }
122
+
123
+ if c == '"' || c == '\'' {
124
+ return self.consume_string_token(c);
125
+ }
126
+
127
+ if (c == '+' || c == '-' || c == '.') && number_starts(Some(c), self.peek(0), self.peek(1)) {
128
+ self.reconsume();
129
+ return self.consume_numeric_token();
130
+ }
131
+
132
+ if let Some(kind) = punctuation_kind(c) {
133
+ return Token::bare(kind);
134
+ }
135
+
136
+ match c {
137
+ '#' => {
138
+ if is_ident_code_point(self.peek(0)) || valid_escape(self.peek(0), self.peek(1)) {
139
+ let flag = if ident_sequence_starts(self.peek(0), self.peek(1), self.peek(2)) {
140
+ HashFlag::Id
141
+ } else {
142
+ HashFlag::Unrestricted
143
+ };
144
+
145
+ let name = self.consume_ident_sequence();
146
+
147
+ Token {
148
+ hash_flag: Some(flag),
149
+ ..Token::with_str(Kind::Hash, name)
150
+ }
151
+ } else {
152
+ Token::delim(c)
153
+ }
154
+ }
155
+ '+' | '.' => Token::delim(c),
156
+ '-' => {
157
+ if self.peek(0) == Some('-') && self.peek(1) == Some('>') {
158
+ self.consume();
159
+ self.consume();
160
+ Token::bare(Kind::Cdc)
161
+ } else if ident_sequence_starts(Some(c), self.peek(0), self.peek(1)) {
162
+ self.reconsume();
163
+ self.consume_ident_like_token()
164
+ } else {
165
+ Token::delim(c)
166
+ }
167
+ }
168
+ '<' => {
169
+ if self.peek(0) == Some('!') && self.peek(1) == Some('-') && self.peek(2) == Some('-') {
170
+ self.consume();
171
+ self.consume();
172
+ self.consume();
173
+ Token::bare(Kind::Cdo)
174
+ } else {
175
+ Token::delim(c)
176
+ }
177
+ }
178
+ '@' => {
179
+ if ident_sequence_starts(self.peek(0), self.peek(1), self.peek(2)) {
180
+ Token::with_str(Kind::AtKeyword, self.consume_ident_sequence())
181
+ } else {
182
+ Token::delim(c)
183
+ }
184
+ }
185
+ '\\' => {
186
+ if valid_escape(Some(c), self.peek(0)) {
187
+ self.reconsume();
188
+ self.consume_ident_like_token()
189
+ } else {
190
+ Token::delim(c)
191
+ }
192
+ }
193
+ '0'..='9' => {
194
+ self.reconsume();
195
+ self.consume_numeric_token()
196
+ }
197
+ _ => {
198
+ if is_ident_start_code_point(Some(c)) {
199
+ self.reconsume();
200
+ self.consume_ident_like_token()
201
+ } else {
202
+ Token::delim(c)
203
+ }
204
+ }
205
+ }
206
+ }
207
+
208
+ // --- comments --------------------------------------------------
209
+
210
+ fn consume_comments(&mut self) {
211
+ while self.peek(0) == Some('/') && self.peek(1) == Some('*') {
212
+ self.consume();
213
+ self.consume();
214
+
215
+ while !self.eof() {
216
+ if self.consume() == Some('*') && self.peek(0) == Some('/') {
217
+ self.consume();
218
+ break;
219
+ }
220
+ }
221
+ }
222
+ }
223
+
224
+ fn consume_comment_token(&mut self) -> Token {
225
+ self.consume();
226
+ self.consume();
227
+ let mut buf = String::new();
228
+
229
+ while !self.eof() {
230
+ let c = self.consume().unwrap();
231
+
232
+ if c == '*' && self.peek(0) == Some('/') {
233
+ self.consume();
234
+ break;
235
+ }
236
+
237
+ buf.push(c);
238
+ }
239
+
240
+ Token::with_str(Kind::Comment, buf)
241
+ }
242
+
243
+ fn consume_whitespace(&mut self) -> Token {
244
+ while is_whitespace_opt(self.peek(0)) {
245
+ self.consume();
246
+ }
247
+
248
+ Token::bare(Kind::Whitespace)
249
+ }
250
+
251
+ // --- strings ---------------------------------------------------
252
+
253
+ fn consume_string_token(&mut self, ending: char) -> Token {
254
+ let mut buf = String::new();
255
+
256
+ loop {
257
+ match self.consume() {
258
+ None => return Token::with_str(Kind::String_, buf),
259
+ Some(c) if c == ending => return Token::with_str(Kind::String_, buf),
260
+ Some('\n') => {
261
+ self.reconsume();
262
+ return Token::bare(Kind::BadString);
263
+ }
264
+ Some('\\') => {
265
+ let n = self.peek(0);
266
+
267
+ if n.is_none() {
268
+ continue;
269
+ } else if n == Some('\n') {
270
+ self.consume();
271
+ } else {
272
+ buf.push(self.consume_escaped_code_point());
273
+ }
274
+ }
275
+ Some(c) => buf.push(c),
276
+ }
277
+ }
278
+ }
279
+
280
+ // --- escape ----------------------------------------------------
281
+
282
+ fn consume_escaped_code_point(&mut self) -> char {
283
+ let c = match self.consume() {
284
+ None => return REPLACEMENT,
285
+ Some(c) => c,
286
+ };
287
+
288
+ if !is_hex_digit(Some(c)) {
289
+ return c;
290
+ }
291
+
292
+ let mut hex = String::with_capacity(6);
293
+ hex.push(c);
294
+
295
+ while hex.len() < 6 && is_hex_digit(self.peek(0)) {
296
+ hex.push(self.consume().unwrap());
297
+ }
298
+
299
+ if is_whitespace_opt(self.peek(0)) {
300
+ self.consume();
301
+ }
302
+
303
+ let n = u32::from_str_radix(&hex, 16).unwrap_or(0);
304
+
305
+ if n == 0 || (0xD800..=0xDFFF).contains(&n) || n > 0x10FFFF {
306
+ REPLACEMENT
307
+ } else {
308
+ char::from_u32(n).unwrap_or(REPLACEMENT)
309
+ }
310
+ }
311
+
312
+ // --- ident-like ------------------------------------------------
313
+
314
+ fn consume_ident_sequence(&mut self) -> String {
315
+ let mut buf = String::new();
316
+
317
+ loop {
318
+ let c = self.consume();
319
+
320
+ if is_ident_code_point(c) {
321
+ buf.push(c.unwrap());
322
+ } else if valid_escape(c, self.peek(0)) {
323
+ buf.push(self.consume_escaped_code_point());
324
+ } else {
325
+ if c.is_some() {
326
+ self.reconsume();
327
+ }
328
+ return buf;
329
+ }
330
+ }
331
+ }
332
+
333
+ fn consume_ident_like_token(&mut self) -> Token {
334
+ let name = self.consume_ident_sequence();
335
+
336
+ if name.eq_ignore_ascii_case("url") && self.peek(0) == Some('(') {
337
+ self.consume();
338
+
339
+ while is_whitespace_opt(self.peek(0)) && is_whitespace_opt(self.peek(1)) {
340
+ self.consume();
341
+ }
342
+
343
+ let n1 = self.peek(0);
344
+ let n2 = if is_whitespace_opt(n1) { self.peek(1) } else { n1 };
345
+
346
+ let is_quote = |c: Option<char>| c == Some('"') || c == Some('\'');
347
+
348
+ if is_quote(n1) || (is_whitespace_opt(n1) && is_quote(n2)) {
349
+ Token::with_str(Kind::Function, name)
350
+ } else {
351
+ self.consume_url_token()
352
+ }
353
+ } else if self.peek(0) == Some('(') {
354
+ self.consume();
355
+ Token::with_str(Kind::Function, name)
356
+ } else {
357
+ Token::with_str(Kind::Ident, name)
358
+ }
359
+ }
360
+
361
+ fn consume_url_token(&mut self) -> Token {
362
+ let mut buf = String::new();
363
+
364
+ while is_whitespace_opt(self.peek(0)) {
365
+ self.consume();
366
+ }
367
+
368
+ loop {
369
+ let c = self.consume();
370
+
371
+ match c {
372
+ None | Some(')') => return Token::with_str(Kind::Url, buf),
373
+ Some('"') | Some('\'') | Some('(') => {
374
+ self.consume_bad_url_remnants();
375
+ return Token::bare(Kind::BadUrl);
376
+ }
377
+ Some(' ') | Some('\t') | Some('\n') => {
378
+ while is_whitespace_opt(self.peek(0)) {
379
+ self.consume();
380
+ }
381
+
382
+ let n = self.peek(0);
383
+
384
+ if n.is_none() || n == Some(')') {
385
+ if n.is_some() {
386
+ self.consume();
387
+ }
388
+ return Token::with_str(Kind::Url, buf);
389
+ } else {
390
+ self.consume_bad_url_remnants();
391
+ return Token::bare(Kind::BadUrl);
392
+ }
393
+ }
394
+ Some('\\') => {
395
+ if valid_escape(c, self.peek(0)) {
396
+ buf.push(self.consume_escaped_code_point());
397
+ } else {
398
+ self.consume_bad_url_remnants();
399
+ return Token::bare(Kind::BadUrl);
400
+ }
401
+ }
402
+ Some(c) => {
403
+ if is_non_printable(c) {
404
+ self.consume_bad_url_remnants();
405
+ return Token::bare(Kind::BadUrl);
406
+ }
407
+ buf.push(c);
408
+ }
409
+ }
410
+ }
411
+ }
412
+
413
+ fn consume_bad_url_remnants(&mut self) {
414
+ loop {
415
+ let c = self.consume();
416
+
417
+ if c.is_none() || c == Some(')') {
418
+ return;
419
+ }
420
+
421
+ if valid_escape(c, self.peek(0)) {
422
+ self.consume_escaped_code_point();
423
+ }
424
+ }
425
+ }
426
+
427
+ // --- numbers ---------------------------------------------------
428
+
429
+ fn consume_numeric_token(&mut self) -> Token {
430
+ let (value, flag) = self.consume_number();
431
+
432
+ if ident_sequence_starts(self.peek(0), self.peek(1), self.peek(2)) {
433
+ let unit = self.consume_ident_sequence();
434
+
435
+ Token {
436
+ number_flag: Some(flag),
437
+ unit: Some(unit),
438
+ ..Self::with_number_value(Kind::Dimension, value, flag)
439
+ }
440
+ } else if self.peek(0) == Some('%') {
441
+ self.consume();
442
+ Self::with_number_value(Kind::Percentage, value, flag)
443
+ } else {
444
+ Token {
445
+ number_flag: Some(flag),
446
+ ..Self::with_number_value(Kind::Number, value, flag)
447
+ }
448
+ }
449
+ }
450
+
451
+ fn with_number_value(kind: Kind, value: TokenValue, _flag: NumberFlag) -> Token {
452
+ Token { value, ..Token::bare(kind) }
453
+ }
454
+
455
+ fn consume_number(&mut self) -> (TokenValue, NumberFlag) {
456
+ let mut repr = String::new();
457
+ let mut flag = NumberFlag::Integer;
458
+
459
+ if self.peek(0) == Some('+') || self.peek(0) == Some('-') {
460
+ repr.push(self.consume().unwrap());
461
+ }
462
+
463
+ while is_digit(self.peek(0)) {
464
+ repr.push(self.consume().unwrap());
465
+ }
466
+
467
+ if self.peek(0) == Some('.') && is_digit(self.peek(1)) {
468
+ repr.push(self.consume().unwrap());
469
+ while is_digit(self.peek(0)) {
470
+ repr.push(self.consume().unwrap());
471
+ }
472
+ flag = NumberFlag::Number;
473
+ }
474
+
475
+ let exp = self.peek(0);
476
+ let after_exp = self.peek(1);
477
+
478
+ if (exp == Some('E') || exp == Some('e'))
479
+ && (is_digit(after_exp)
480
+ || ((after_exp == Some('+') || after_exp == Some('-')) && is_digit(self.peek(2))))
481
+ {
482
+ repr.push(self.consume().unwrap());
483
+ if self.peek(0) == Some('+') || self.peek(0) == Some('-') {
484
+ repr.push(self.consume().unwrap());
485
+ }
486
+ while is_digit(self.peek(0)) {
487
+ repr.push(self.consume().unwrap());
488
+ }
489
+ flag = NumberFlag::Number;
490
+ }
491
+
492
+ let value = match flag {
493
+ NumberFlag::Integer => TokenValue::Int(repr.parse().unwrap_or(0)),
494
+ NumberFlag::Number => TokenValue::Float(repr.parse().unwrap_or(0.0)),
495
+ };
496
+
497
+ (value, flag)
498
+ }
499
+ }
500
+
501
+ // --- preprocessing ----------------------------------------------
502
+
503
+ fn preprocess(input: &str) -> Vec<char> {
504
+ let mut out = Vec::with_capacity(input.len());
505
+ let mut iter = input.chars().peekable();
506
+
507
+ while let Some(c) = iter.next() {
508
+ match c {
509
+ '\r' => {
510
+ out.push('\n');
511
+ if iter.peek() == Some(&'\n') {
512
+ iter.next();
513
+ }
514
+ }
515
+ '\x0C' => out.push('\n'),
516
+ '\0' => out.push(REPLACEMENT),
517
+ _ => out.push(c),
518
+ }
519
+ }
520
+
521
+ out
522
+ }
523
+
524
+ // --- code point classifiers -------------------------------------
525
+
526
+ fn is_whitespace(c: char) -> bool {
527
+ c == ' ' || c == '\n' || c == '\t'
528
+ }
529
+
530
+ fn is_whitespace_opt(c: Option<char>) -> bool {
531
+ matches!(c, Some(' ') | Some('\n') | Some('\t'))
532
+ }
533
+
534
+ fn is_digit(c: Option<char>) -> bool {
535
+ matches!(c, Some('0'..='9'))
536
+ }
537
+
538
+ fn is_hex_digit(c: Option<char>) -> bool {
539
+ matches!(c, Some('0'..='9' | 'A'..='F' | 'a'..='f'))
540
+ }
541
+
542
+ fn is_ident_start_code_point(c: Option<char>) -> bool {
543
+ match c {
544
+ Some(c) if c.is_ascii_alphabetic() => true,
545
+ Some('_') => true,
546
+ Some(c) if (c as u32) >= 0x80 => true,
547
+ _ => false,
548
+ }
549
+ }
550
+
551
+ fn is_ident_code_point(c: Option<char>) -> bool {
552
+ is_ident_start_code_point(c) || is_digit(c) || c == Some('-')
553
+ }
554
+
555
+ fn is_non_printable(c: char) -> bool {
556
+ let o = c as u32;
557
+ o <= 0x08 || o == 0x0B || (0x0E..=0x1F).contains(&o) || o == 0x7F
558
+ }
559
+
560
+ // §4.3.8
561
+ fn valid_escape(c1: Option<char>, c2: Option<char>) -> bool {
562
+ c1 == Some('\\') && c2.is_some() && c2 != Some('\n')
563
+ }
564
+
565
+ // §4.3.9
566
+ fn ident_sequence_starts(c1: Option<char>, c2: Option<char>, c3: Option<char>) -> bool {
567
+ match c1 {
568
+ Some('-') => is_ident_start_code_point(c2) || c2 == Some('-') || valid_escape(c2, c3),
569
+ Some('\\') => valid_escape(c1, c2),
570
+ _ => is_ident_start_code_point(c1),
571
+ }
572
+ }
573
+
574
+ // §4.3.10
575
+ fn number_starts(c1: Option<char>, c2: Option<char>, c3: Option<char>) -> bool {
576
+ match c1 {
577
+ Some('+') | Some('-') => is_digit(c2) || (c2 == Some('.') && is_digit(c3)),
578
+ Some('.') => is_digit(c2),
579
+ _ => is_digit(c1),
580
+ }
581
+ }
582
+
583
+ fn punctuation_kind(c: char) -> Option<Kind> {
584
+ Some(match c {
585
+ '(' => Kind::LParen,
586
+ ')' => Kind::RParen,
587
+ ',' => Kind::Comma,
588
+ ':' => Kind::Colon,
589
+ ';' => Kind::Semicolon,
590
+ '[' => Kind::LBracket,
591
+ ']' => Kind::RBracket,
592
+ '{' => Kind::LBrace,
593
+ '}' => Kind::RBrace,
594
+ _ => return None,
595
+ })
596
+ }
Binary file
Binary file
Binary file