p_css 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df1cd693075fe04da6a0c9ce4c65c9c4ef5f85c5b84bb82bd0136cf62fe52552
4
- data.tar.gz: 9a9fc875c3872c49396c5b6c753a099ba3cc344efa7aa97745958d277630c7cb
3
+ metadata.gz: 20afa2206ed855fdd796f19179d06a8dc8b76231c223560d59e664cb43ddf897
4
+ data.tar.gz: e951f89d04ff6db6f68151ad05a414e4f0f7d05f2dfc48580b22ef31b6f949de
5
5
  SHA512:
6
- metadata.gz: 28c76784dac592aa39cfaa5100b69e4765861ec2ab1ed536fb95dfb963481de2bd64d6b6b954f71b14b17ea4a328cc65af6f7a63df662f2083c2ef5ec623e892
7
- data.tar.gz: 291d12b999205c032e7cebaa5da4b69aff717cb9f7b9743c3c54ce99539b59ec49612b87e9bcf105d374cc79871d8955fc57f18200b8ba76c396894db14f349f
6
+ metadata.gz: dc533dd2a146654d7a622b3206568168bea1e404b163ef6b24ae1c841ef4cd5ff3a4621bc6794f64178ffd5c87a0c4e6a46f70fd6c7999a52539091849ae2941
7
+ data.tar.gz: 67d08837559466bc5713aa6a40d8e67030ff9389228527b4ed8b4bb5e1121965fa90a31e6cf5aae40167151e535ae83b9f6073068e72d495a5098559b331698d
@@ -1,36 +1,59 @@
1
1
  module CSS
2
2
  # Character class predicates from CSS Syntax §4.2 Definitions, plus the
3
3
  # U+FFFD replacement character used both during tokenization and
4
- # serialization. Implemented with char comparisons rather than regex to
5
- # avoid pattern-match overhead in the tokenizer's inner loop.
4
+ # serialization.
5
+ #
6
+ # ASCII bytes are looked up in a precomputed boolean table (one Array
7
+ # access + one branch); non-ASCII code points (>= 0x80) are always
8
+ # ident-cp / ident-start per spec, so the helpers fall back to a single
9
+ # `c.ord >= 0x80` check. Avoids the chain of `String#<=>` calls a
10
+ # range-style predicate would dispatch.
6
11
  module CodePoints
7
12
  REPLACEMENT = "�".freeze
8
13
 
14
+ def self.build_table(*ranges_or_ints)
15
+ Array.new(128, false).tap {|a|
16
+ ranges_or_ints.each {|r|
17
+ if r.is_a?(Range) then r.each { a[it] = true }
18
+ else a[r] = true
19
+ end
20
+ }
21
+ }.freeze
22
+ end
23
+
24
+ DIGIT_TABLE = build_table(0x30..0x39)
25
+ HEX_DIGIT_TABLE = build_table(0x30..0x39, 0x41..0x46, 0x61..0x66)
26
+ IDENT_START_TABLE = build_table(0x41..0x5A, 0x61..0x7A, 0x5F)
27
+ IDENT_CP_TABLE = build_table(0x30..0x39, 0x41..0x5A, 0x61..0x7A, 0x5F, 0x2D)
28
+
9
29
  module_function
10
30
 
11
31
  def digit?(c)
12
- !c.nil? && c >= '0' && c <= '9'
32
+ return false if c.nil?
33
+
34
+ o = c.ord
35
+ o < 128 && DIGIT_TABLE[o]
13
36
  end
14
37
 
15
38
  def hex_digit?(c)
16
39
  return false if c.nil?
17
40
 
18
- (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
41
+ o = c.ord
42
+ o < 128 && HEX_DIGIT_TABLE[o]
19
43
  end
20
44
 
21
45
  def ident_start_code_point?(c)
22
46
  return false if c.nil?
23
- return true if c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
24
47
 
25
- c.ord >= 0x80
48
+ o = c.ord
49
+ o >= 128 || IDENT_START_TABLE[o]
26
50
  end
27
51
 
28
52
  def ident_code_point?(c)
29
53
  return false if c.nil?
30
- return true if c == '_' || c == '-' || (c >= '0' && c <= '9')
31
- return true if (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
32
54
 
33
- c.ord >= 0x80
55
+ o = c.ord
56
+ o >= 128 || IDENT_CP_TABLE[o]
34
57
  end
35
58
  end
36
59
  end
data/lib/css/token.rb CHANGED
@@ -17,7 +17,7 @@ module CSS
17
17
  eof
18
18
  ].freeze
19
19
 
20
- attr_reader :type, :value, :flag, :unit, :position
20
+ attr_reader :type, :value, :flag, :unit
21
21
 
22
22
  def initialize(type, value = nil, flag: nil, unit: nil, position: nil)
23
23
  raise ArgumentError, "unknown token type: #{type.inspect}" unless TYPES.include?(type)
@@ -58,21 +58,50 @@ module CSS
58
58
  type == :whitespace || type == :comment
59
59
  end
60
60
 
61
- # Mutating: assigns the token's source position and returns self. Used
62
- # by the tokenizer so each token requires only a single allocation.
63
- def assign_position!(pos)
64
- @position = pos
61
+ # Most tokens never have their `position` read after parsing, so the
62
+ # tokenizer plants raw offsets + a shared `@newlines` reference here
63
+ # via this method, and `Token#position` materializes the `Position`
64
+ # Data on first read.
65
+ def assign_source!(start_offset, end_offset, newlines)
66
+ @start_offset = start_offset
67
+ @end_offset = end_offset
68
+ @newlines = newlines
65
69
  self
66
70
  end
67
71
 
72
+ # Returns nil for tokens built without source info (i.e. tokens
73
+ # constructed by hand or via `Token.new(:eof)`).
74
+ def position
75
+ return @position if @position
76
+ return nil unless instance_variable_defined?(:@start_offset)
77
+
78
+ @position = compute_position
79
+ end
80
+
81
+ # Reads `@position` directly so debug-style introspection doesn't
82
+ # materialize a `Position` as a side effect.
68
83
  def inspect
69
84
  parts = ["type=#{type.inspect}"]
70
85
  parts << "value=#{value.inspect}" unless value.nil?
71
86
  parts << "flag=#{flag.inspect}" unless flag.nil?
72
87
  parts << "unit=#{unit.inspect}" unless unit.nil?
73
- parts << "@#{position}" unless position.nil?
88
+ parts << "@#{@position}" if @position
74
89
 
75
90
  "#<CSS::Token #{parts.join(' ')}>"
76
91
  end
92
+
93
+ private
94
+
95
+ def compute_position
96
+ idx = @newlines.bsearch_index { it >= @start_offset } || @newlines.size
97
+ prev_nl = idx.zero? ? -1 : @newlines[idx - 1]
98
+
99
+ Position.new(
100
+ line: idx + 1,
101
+ column: @start_offset - prev_nl,
102
+ offset: @start_offset,
103
+ end_offset: @end_offset
104
+ )
105
+ end
77
106
  end
78
107
  end
data/lib/css/tokenizer.rb CHANGED
@@ -2,9 +2,8 @@ module CSS
2
2
  # Tokenizer based on CSS Syntax Module Level 3/4 §4.
3
3
  # https://www.w3.org/TR/css-syntax-3/#tokenization
4
4
  #
5
- # Not thread-safe: an instance carries mutable cursors (`@pos`,
6
- # `@newline_cursor`) that advance over the input. Allocate one
7
- # tokenizer per thread.
5
+ # Not thread-safe: an instance carries a mutable cursor (`@pos`) that
6
+ # advances over the input. Allocate one tokenizer per thread.
8
7
  class Tokenizer
9
8
  include CodePoints
10
9
 
@@ -26,9 +25,9 @@ module CSS
26
25
 
27
26
  def initialize(input, preserve_comments: false)
28
27
  @chars = preprocess(input)
28
+ @length = @chars.length
29
29
  @pos = 0
30
30
  @newlines = collect_newline_offsets(@chars)
31
- @newline_cursor = 0
32
31
  @preserve_comments = preserve_comments
33
32
  end
34
33
 
@@ -48,13 +47,12 @@ module CSS
48
47
  def next_token
49
48
  consume_comments unless @preserve_comments
50
49
 
51
- return Token.new(:eof) if @pos >= @chars.length
50
+ return Token.new(:eof) if @pos >= @length
52
51
 
53
52
  start_offset = @pos
54
53
  tok = consume_one_token
55
- line, column = line_column_at(start_offset)
56
54
 
57
- tok.assign_position!(Position.new(line:, column:, offset: start_offset, end_offset: @pos))
55
+ tok.assign_source!(start_offset, @pos, @newlines)
58
56
  end
59
57
 
60
58
  private
@@ -174,23 +172,6 @@ module CSS
174
172
  offsets
175
173
  end
176
174
 
177
- # Newline characters themselves are reported as belonging to the
178
- # line they terminate (col = offset + 1 on line 1, etc).
179
- #
180
- # Tokens are emitted in order, so the offsets passed in are
181
- # monotonically non-decreasing. We keep a running cursor into
182
- # `@newlines` and advance linearly — amortized O(1) per call,
183
- # vs. O(log n) per call with a fresh `bsearch`.
184
- def line_column_at(offset)
185
- while @newline_cursor < @newlines.size && @newlines[@newline_cursor] < offset
186
- @newline_cursor += 1
187
- end
188
-
189
- prev_nl = @newline_cursor.zero? ? -1 : @newlines[@newline_cursor - 1]
190
-
191
- [@newline_cursor + 1, offset - prev_nl]
192
- end
193
-
194
175
  def whitespace?(c)
195
176
  c == ' ' || c == "\n" || c == "\t"
196
177
  end
@@ -267,7 +248,7 @@ module CSS
267
248
  end
268
249
 
269
250
  def eof?
270
- @pos >= @chars.length
251
+ @pos >= @length
271
252
  end
272
253
 
273
254
  def consume_whitespace
data/lib/css/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CSS
2
- VERSION = '0.1.3'
2
+ VERSION = '0.1.4'
3
3
  end
data/sig/css/token.rbs CHANGED
@@ -22,7 +22,7 @@ module CSS
22
22
  def comment?: () -> bool
23
23
  def trivia?: () -> bool
24
24
 
25
- def assign_position!: (Position pos) -> self
25
+ def assign_source!: (Integer start_offset, Integer end_offset, Array[Integer] newlines) -> self
26
26
 
27
27
  def ==: (untyped other) -> bool
28
28
  def eql?: (untyped other) -> bool
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: p_css
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keita Urashima