p_css 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 772060eec5d726253913cd8be2daa6180429680d014c1752c9b26b15618e4ba8
4
- data.tar.gz: edd5e5afc5871362dc21cca89d8eb6a5b085350022f79f2b8c6f07a032f07aaa
3
+ metadata.gz: df1cd693075fe04da6a0c9ce4c65c9c4ef5f85c5b84bb82bd0136cf62fe52552
4
+ data.tar.gz: 9a9fc875c3872c49396c5b6c753a099ba3cc344efa7aa97745958d277630c7cb
5
5
  SHA512:
6
- metadata.gz: 7b7e331023830f09938bbd03a29f50c49b19b40169226a9c13866c1ce2436b0dcd71987fb183f403b7b98784c68c8e8426e6043dd3757ebe5343e37862e2c228
7
- data.tar.gz: c06594e2e861c77e56cc32dcb1980b3fba8bfe0175fa1f108e4b0d02bef929efd59274e5c53f04ecea8245b47c583f41eb8a4396a5689052a569af54e8026f5a
6
+ metadata.gz: 28c76784dac592aa39cfaa5100b69e4765861ec2ab1ed536fb95dfb963481de2bd64d6b6b954f71b14b17ea4a328cc65af6f7a63df662f2083c2ef5ec623e892
7
+ data.tar.gz: 291d12b999205c032e7cebaa5da4b69aff717cb9f7b9743c3c54ce99539b59ec49612b87e9bcf105d374cc79871d8955fc57f18200b8ba76c396894db14f349f
data/lib/css/tokenizer.rb CHANGED
@@ -1,6 +1,10 @@
1
1
  module CSS
2
2
  # Tokenizer based on CSS Syntax Module Level 3/4 §4.
3
3
  # https://www.w3.org/TR/css-syntax-3/#tokenization
4
+ #
5
+ # Not thread-safe: an instance carries mutable cursors (`@pos`,
6
+ # `@newline_cursor`) that advance over the input. Allocate one
7
+ # tokenizer per thread.
4
8
  class Tokenizer
5
9
  include CodePoints
6
10
 
@@ -21,9 +25,10 @@ module CSS
21
25
  PREPROCESS_RE = /\r\n?|\f|\0/.freeze
22
26
 
23
27
  def initialize(input, preserve_comments: false)
24
- @input = preprocess(input)
28
+ @chars = preprocess(input)
25
29
  @pos = 0
26
- @newlines = collect_newline_offsets(@input)
30
+ @newlines = collect_newline_offsets(@chars)
31
+ @newline_cursor = 0
27
32
  @preserve_comments = preserve_comments
28
33
  end
29
34
 
@@ -43,7 +48,7 @@ module CSS
43
48
  def next_token
44
49
  consume_comments unless @preserve_comments
45
50
 
46
- return Token.new(:eof) if @pos >= @input.length
51
+ return Token.new(:eof) if @pos >= @chars.length
47
52
 
48
53
  start_offset = @pos
49
54
  tok = consume_one_token
@@ -127,18 +132,25 @@ module CSS
127
132
  end
128
133
  end
129
134
 
135
+ # Random access on a non-ascii-only UTF-8 String is O(distance from
136
+ # the cached character index), and the peek-ahead pattern (`peek`,
137
+ # `peek(1)`, `peek(2)`) defeats the cache — empirically ~200× slower
138
+ # than indexing a flat Array. Splitting into `chars` once amortizes
139
+ # the UTF-8 walk and gives us O(1) random access for the rest of
140
+ # tokenization.
130
141
  def preprocess(input)
131
- input.encode('UTF-8').gsub(PREPROCESS_RE) {
132
- $~[0] == "\0" ? CodePoints::REPLACEMENT : "\n"
133
- }
142
+ input
143
+ .encode('UTF-8')
144
+ .gsub(PREPROCESS_RE) { $~[0] == "\0" ? CodePoints::REPLACEMENT : "\n" }
145
+ .chars
134
146
  end
135
147
 
136
148
  def peek(offset = 0)
137
- @input[@pos + offset]
149
+ @chars[@pos + offset]
138
150
  end
139
151
 
140
152
  def consume
141
- c = @input[@pos]
153
+ c = @chars[@pos]
142
154
  return nil if c.nil?
143
155
 
144
156
  @pos += 1
@@ -149,21 +161,34 @@ module CSS
149
161
  @pos -= 1
150
162
  end
151
163
 
152
- def collect_newline_offsets(input)
164
+ def collect_newline_offsets(chars)
153
165
  offsets = []
154
- i = -1
166
+ i = 0
167
+ n = chars.length
168
+
169
+ while i < n
170
+ offsets << i if chars[i] == "\n"
171
+ i += 1
172
+ end
155
173
 
156
- offsets << i while (i = input.index("\n", i + 1))
157
174
  offsets
158
175
  end
159
176
 
160
- # Newline characters themselves are reported as belonging to the line
161
- # they terminate (col = offset + 1 on line 1, etc).
177
+ # Newline characters themselves are reported as belonging to the
178
+ # line they terminate (col = offset + 1 on line 1, etc).
179
+ #
180
+ # Tokens are emitted in order, so the offsets passed in are
181
+ # monotonically non-decreasing. We keep a running cursor into
182
+ # `@newlines` and advance linearly — amortized O(1) per call,
183
+ # vs. O(log n) per call with a fresh `bsearch`.
162
184
  def line_column_at(offset)
163
- idx = @newlines.bsearch_index { it >= offset } || @newlines.size
164
- prev_nl = idx.zero? ? -1 : @newlines[idx - 1]
185
+ while @newline_cursor < @newlines.size && @newlines[@newline_cursor] < offset
186
+ @newline_cursor += 1
187
+ end
188
+
189
+ prev_nl = @newline_cursor.zero? ? -1 : @newlines[@newline_cursor - 1]
165
190
 
166
- [idx + 1, offset - prev_nl]
191
+ [@newline_cursor + 1, offset - prev_nl]
167
192
  end
168
193
 
169
194
  def whitespace?(c)
@@ -242,7 +267,7 @@ module CSS
242
267
  end
243
268
 
244
269
  def eof?
245
- @pos >= @input.length
270
+ @pos >= @chars.length
246
271
  end
247
272
 
248
273
  def consume_whitespace
data/lib/css/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CSS
2
- VERSION = '0.1.2'
2
+ VERSION = '0.1.3'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: p_css
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keita Urashima