p_css 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/css/code_points.rb +32 -9
- data/lib/css/token.rb +35 -6
- data/lib/css/tokenizer.rb +6 -25
- data/lib/css/version.rb +1 -1
- data/sig/css/token.rbs +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 20afa2206ed855fdd796f19179d06a8dc8b76231c223560d59e664cb43ddf897
|
|
4
|
+
data.tar.gz: e951f89d04ff6db6f68151ad05a414e4f0f7d05f2dfc48580b22ef31b6f949de
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dc533dd2a146654d7a622b3206568168bea1e404b163ef6b24ae1c841ef4cd5ff3a4621bc6794f64178ffd5c87a0c4e6a46f70fd6c7999a52539091849ae2941
|
|
7
|
+
data.tar.gz: 67d08837559466bc5713aa6a40d8e67030ff9389228527b4ed8b4bb5e1121965fa90a31e6cf5aae40167151e535ae83b9f6073068e72d495a5098559b331698d
|
data/lib/css/code_points.rb
CHANGED
|
@@ -1,36 +1,59 @@
|
|
|
1
1
|
module CSS
|
|
2
2
|
# Character class predicates from CSS Syntax §4.2 Definitions, plus the
|
|
3
3
|
# U+FFFD replacement character used both during tokenization and
|
|
4
|
-
# serialization.
|
|
5
|
-
#
|
|
4
|
+
# serialization.
|
|
5
|
+
#
|
|
6
|
+
# ASCII bytes are looked up in a precomputed boolean table (one Array
|
|
7
|
+
# access + one branch); non-ASCII code points (>= 0x80) are always
|
|
8
|
+
# ident-cp / ident-start per spec, so the helpers fall back to a single
|
|
9
|
+
# `c.ord >= 0x80` check. Avoids the chain of `String#<=>` calls a
|
|
10
|
+
# range-style predicate would dispatch.
|
|
6
11
|
module CodePoints
|
|
7
12
|
REPLACEMENT = "�".freeze
|
|
8
13
|
|
|
14
|
+
def self.build_table(*ranges_or_ints)
|
|
15
|
+
Array.new(128, false).tap {|a|
|
|
16
|
+
ranges_or_ints.each {|r|
|
|
17
|
+
if r.is_a?(Range) then r.each { a[it] = true }
|
|
18
|
+
else a[r] = true
|
|
19
|
+
end
|
|
20
|
+
}
|
|
21
|
+
}.freeze
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
DIGIT_TABLE = build_table(0x30..0x39)
|
|
25
|
+
HEX_DIGIT_TABLE = build_table(0x30..0x39, 0x41..0x46, 0x61..0x66)
|
|
26
|
+
IDENT_START_TABLE = build_table(0x41..0x5A, 0x61..0x7A, 0x5F)
|
|
27
|
+
IDENT_CP_TABLE = build_table(0x30..0x39, 0x41..0x5A, 0x61..0x7A, 0x5F, 0x2D)
|
|
28
|
+
|
|
9
29
|
module_function
|
|
10
30
|
|
|
11
31
|
def digit?(c)
|
|
12
|
-
|
|
32
|
+
return false if c.nil?
|
|
33
|
+
|
|
34
|
+
o = c.ord
|
|
35
|
+
o < 128 && DIGIT_TABLE[o]
|
|
13
36
|
end
|
|
14
37
|
|
|
15
38
|
def hex_digit?(c)
|
|
16
39
|
return false if c.nil?
|
|
17
40
|
|
|
18
|
-
|
|
41
|
+
o = c.ord
|
|
42
|
+
o < 128 && HEX_DIGIT_TABLE[o]
|
|
19
43
|
end
|
|
20
44
|
|
|
21
45
|
def ident_start_code_point?(c)
|
|
22
46
|
return false if c.nil?
|
|
23
|
-
return true if c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
24
47
|
|
|
25
|
-
c.ord
|
|
48
|
+
o = c.ord
|
|
49
|
+
o >= 128 || IDENT_START_TABLE[o]
|
|
26
50
|
end
|
|
27
51
|
|
|
28
52
|
def ident_code_point?(c)
|
|
29
53
|
return false if c.nil?
|
|
30
|
-
return true if c == '_' || c == '-' || (c >= '0' && c <= '9')
|
|
31
|
-
return true if (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
32
54
|
|
|
33
|
-
c.ord
|
|
55
|
+
o = c.ord
|
|
56
|
+
o >= 128 || IDENT_CP_TABLE[o]
|
|
34
57
|
end
|
|
35
58
|
end
|
|
36
59
|
end
|
data/lib/css/token.rb
CHANGED
|
@@ -17,7 +17,7 @@ module CSS
|
|
|
17
17
|
eof
|
|
18
18
|
].freeze
|
|
19
19
|
|
|
20
|
-
attr_reader :type, :value, :flag, :unit
|
|
20
|
+
attr_reader :type, :value, :flag, :unit
|
|
21
21
|
|
|
22
22
|
def initialize(type, value = nil, flag: nil, unit: nil, position: nil)
|
|
23
23
|
raise ArgumentError, "unknown token type: #{type.inspect}" unless TYPES.include?(type)
|
|
@@ -58,21 +58,50 @@ module CSS
|
|
|
58
58
|
type == :whitespace || type == :comment
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
#
|
|
62
|
-
#
|
|
63
|
-
|
|
64
|
-
|
|
61
|
+
# Most tokens never have their `position` read after parsing, so the
|
|
62
|
+
# tokenizer plants raw offsets + a shared `@newlines` reference here
|
|
63
|
+
# via this method, and `Token#position` materializes the `Position`
|
|
64
|
+
# Data on first read.
|
|
65
|
+
def assign_source!(start_offset, end_offset, newlines)
|
|
66
|
+
@start_offset = start_offset
|
|
67
|
+
@end_offset = end_offset
|
|
68
|
+
@newlines = newlines
|
|
65
69
|
self
|
|
66
70
|
end
|
|
67
71
|
|
|
72
|
+
# Returns nil for tokens built without source info (i.e. tokens
|
|
73
|
+
# constructed by hand or via `Token.new(:eof)`).
|
|
74
|
+
def position
|
|
75
|
+
return @position if @position
|
|
76
|
+
return nil unless instance_variable_defined?(:@start_offset)
|
|
77
|
+
|
|
78
|
+
@position = compute_position
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Reads `@position` directly so debug-style introspection doesn't
|
|
82
|
+
# materialize a `Position` as a side effect.
|
|
68
83
|
def inspect
|
|
69
84
|
parts = ["type=#{type.inspect}"]
|
|
70
85
|
parts << "value=#{value.inspect}" unless value.nil?
|
|
71
86
|
parts << "flag=#{flag.inspect}" unless flag.nil?
|
|
72
87
|
parts << "unit=#{unit.inspect}" unless unit.nil?
|
|
73
|
-
parts << "@#{position}"
|
|
88
|
+
parts << "@#{@position}" if @position
|
|
74
89
|
|
|
75
90
|
"#<CSS::Token #{parts.join(' ')}>"
|
|
76
91
|
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
def compute_position
|
|
96
|
+
idx = @newlines.bsearch_index { it >= @start_offset } || @newlines.size
|
|
97
|
+
prev_nl = idx.zero? ? -1 : @newlines[idx - 1]
|
|
98
|
+
|
|
99
|
+
Position.new(
|
|
100
|
+
line: idx + 1,
|
|
101
|
+
column: @start_offset - prev_nl,
|
|
102
|
+
offset: @start_offset,
|
|
103
|
+
end_offset: @end_offset
|
|
104
|
+
)
|
|
105
|
+
end
|
|
77
106
|
end
|
|
78
107
|
end
|
data/lib/css/tokenizer.rb
CHANGED
|
@@ -2,9 +2,8 @@ module CSS
|
|
|
2
2
|
# Tokenizer based on CSS Syntax Module Level 3/4 §4.
|
|
3
3
|
# https://www.w3.org/TR/css-syntax-3/#tokenization
|
|
4
4
|
#
|
|
5
|
-
# Not thread-safe: an instance carries mutable
|
|
6
|
-
#
|
|
7
|
-
# tokenizer per thread.
|
|
5
|
+
# Not thread-safe: an instance carries a mutable cursor (`@pos`) that
|
|
6
|
+
# advances over the input. Allocate one tokenizer per thread.
|
|
8
7
|
class Tokenizer
|
|
9
8
|
include CodePoints
|
|
10
9
|
|
|
@@ -26,9 +25,9 @@ module CSS
|
|
|
26
25
|
|
|
27
26
|
def initialize(input, preserve_comments: false)
|
|
28
27
|
@chars = preprocess(input)
|
|
28
|
+
@length = @chars.length
|
|
29
29
|
@pos = 0
|
|
30
30
|
@newlines = collect_newline_offsets(@chars)
|
|
31
|
-
@newline_cursor = 0
|
|
32
31
|
@preserve_comments = preserve_comments
|
|
33
32
|
end
|
|
34
33
|
|
|
@@ -48,13 +47,12 @@ module CSS
|
|
|
48
47
|
def next_token
|
|
49
48
|
consume_comments unless @preserve_comments
|
|
50
49
|
|
|
51
|
-
return Token.new(:eof) if @pos >= @
|
|
50
|
+
return Token.new(:eof) if @pos >= @length
|
|
52
51
|
|
|
53
52
|
start_offset = @pos
|
|
54
53
|
tok = consume_one_token
|
|
55
|
-
line, column = line_column_at(start_offset)
|
|
56
54
|
|
|
57
|
-
tok.
|
|
55
|
+
tok.assign_source!(start_offset, @pos, @newlines)
|
|
58
56
|
end
|
|
59
57
|
|
|
60
58
|
private
|
|
@@ -174,23 +172,6 @@ module CSS
|
|
|
174
172
|
offsets
|
|
175
173
|
end
|
|
176
174
|
|
|
177
|
-
# Newline characters themselves are reported as belonging to the
|
|
178
|
-
# line they terminate (col = offset + 1 on line 1, etc).
|
|
179
|
-
#
|
|
180
|
-
# Tokens are emitted in order, so the offsets passed in are
|
|
181
|
-
# monotonically non-decreasing. We keep a running cursor into
|
|
182
|
-
# `@newlines` and advance linearly — amortized O(1) per call,
|
|
183
|
-
# vs. O(log n) per call with a fresh `bsearch`.
|
|
184
|
-
def line_column_at(offset)
|
|
185
|
-
while @newline_cursor < @newlines.size && @newlines[@newline_cursor] < offset
|
|
186
|
-
@newline_cursor += 1
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
prev_nl = @newline_cursor.zero? ? -1 : @newlines[@newline_cursor - 1]
|
|
190
|
-
|
|
191
|
-
[@newline_cursor + 1, offset - prev_nl]
|
|
192
|
-
end
|
|
193
|
-
|
|
194
175
|
def whitespace?(c)
|
|
195
176
|
c == ' ' || c == "\n" || c == "\t"
|
|
196
177
|
end
|
|
@@ -267,7 +248,7 @@ module CSS
|
|
|
267
248
|
end
|
|
268
249
|
|
|
269
250
|
def eof?
|
|
270
|
-
@pos >= @
|
|
251
|
+
@pos >= @length
|
|
271
252
|
end
|
|
272
253
|
|
|
273
254
|
def consume_whitespace
|
data/lib/css/version.rb
CHANGED
data/sig/css/token.rbs
CHANGED
|
@@ -22,7 +22,7 @@ module CSS
|
|
|
22
22
|
def comment?: () -> bool
|
|
23
23
|
def trivia?: () -> bool
|
|
24
24
|
|
|
25
|
-
def
|
|
25
|
+
def assign_source!: (Integer start_offset, Integer end_offset, Array[Integer] newlines) -> self
|
|
26
26
|
|
|
27
27
|
def ==: (untyped other) -> bool
|
|
28
28
|
def eql?: (untyped other) -> bool
|