RubyGems - p_css - Versions diffs - 0.1.2 → 0.1.3 - Mend

p_css 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 772060eec5d726253913cd8be2daa6180429680d014c1752c9b26b15618e4ba8
-  data.tar.gz: edd5e5afc5871362dc21cca89d8eb6a5b085350022f79f2b8c6f07a032f07aaa
+  metadata.gz: df1cd693075fe04da6a0c9ce4c65c9c4ef5f85c5b84bb82bd0136cf62fe52552
+  data.tar.gz: 9a9fc875c3872c49396c5b6c753a099ba3cc344efa7aa97745958d277630c7cb
 SHA512:
-  metadata.gz: 7b7e331023830f09938bbd03a29f50c49b19b40169226a9c13866c1ce2436b0dcd71987fb183f403b7b98784c68c8e8426e6043dd3757ebe5343e37862e2c228
-  data.tar.gz: c06594e2e861c77e56cc32dcb1980b3fba8bfe0175fa1f108e4b0d02bef929efd59274e5c53f04ecea8245b47c583f41eb8a4396a5689052a569af54e8026f5a
+  metadata.gz: 28c76784dac592aa39cfaa5100b69e4765861ec2ab1ed536fb95dfb963481de2bd64d6b6b954f71b14b17ea4a328cc65af6f7a63df662f2083c2ef5ec623e892
+  data.tar.gz: 291d12b999205c032e7cebaa5da4b69aff717cb9f7b9743c3c54ce99539b59ec49612b87e9bcf105d374cc79871d8955fc57f18200b8ba76c396894db14f349f

data/lib/css/tokenizer.rb CHANGED Viewed

@@ -1,6 +1,10 @@
 module CSS
   # Tokenizer based on CSS Syntax Module Level 3/4 §4.
   # https://www.w3.org/TR/css-syntax-3/#tokenization
+  #
+  # Not thread-safe: an instance carries mutable cursors (`@pos`,
+  # `@newline_cursor`) that advance over the input. Allocate one
+  # tokenizer per thread.
   class Tokenizer
     include CodePoints
@@ -21,9 +25,10 @@ module CSS
     PREPROCESS_RE = /\r\n?|\f|\0/.freeze
     def initialize(input, preserve_comments: false)
-      @input             = preprocess(input)
+      @chars             = preprocess(input)
       @pos               = 0
-      @newlines          = collect_newline_offsets(@input)
+      @newlines          = collect_newline_offsets(@chars)
+      @newline_cursor    = 0
       @preserve_comments = preserve_comments
     end
@@ -43,7 +48,7 @@ module CSS
     def next_token
       consume_comments unless @preserve_comments
-      return Token.new(:eof) if @pos >= @input.length
+      return Token.new(:eof) if @pos >= @chars.length
       start_offset = @pos
       tok          = consume_one_token
@@ -127,18 +132,25 @@ module CSS
       end
     end
+    # Random access on a non-ascii-only UTF-8 String is O(distance from
+    # the cached character index), and the peek-ahead pattern (`peek`,
+    # `peek(1)`, `peek(2)`) defeats the cache — empirically ~200× slower
+    # than indexing a flat Array. Splitting into `chars` once amortizes
+    # the UTF-8 walk and gives us O(1) random access for the rest of
+    # tokenization.
     def preprocess(input)
-      input.encode('UTF-8').gsub(PREPROCESS_RE) {
-        $~[0] == "\0" ? CodePoints::REPLACEMENT : "\n"
-      }
+      input
+        .encode('UTF-8')
+        .gsub(PREPROCESS_RE) { $~[0] == "\0" ? CodePoints::REPLACEMENT : "\n" }
+        .chars
     end
     def peek(offset = 0)
-      @input[@pos + offset]
+      @chars[@pos + offset]
     end
     def consume
-      c = @input[@pos]
+      c = @chars[@pos]
       return nil if c.nil?
       @pos += 1
@@ -149,21 +161,34 @@ module CSS
       @pos -= 1
     end
-    def collect_newline_offsets(input)
+    def collect_newline_offsets(chars)
       offsets = []
-      i       = -1
+      i       = 0
+      n       = chars.length
+      while i < n
+        offsets << i if chars[i] == "\n"
+        i += 1
+      end
-      offsets << i while (i = input.index("\n", i + 1))
       offsets
     end
-    # Newline characters themselves are reported as belonging to the line
-    # they terminate (col = offset + 1 on line 1, etc).
+    # Newline characters themselves are reported as belonging to the
+    # line they terminate (col = offset + 1 on line 1, etc).
+    #
+    # Tokens are emitted in order, so the offsets passed in are
+    # monotonically non-decreasing. We keep a running cursor into
+    # `@newlines` and advance linearly — amortized O(1) per call,
+    # vs. O(log n) per call with a fresh `bsearch`.
     def line_column_at(offset)
-      idx     = @newlines.bsearch_index { it >= offset } || @newlines.size
-      prev_nl = idx.zero? ? -1 : @newlines[idx - 1]
+      while @newline_cursor < @newlines.size && @newlines[@newline_cursor] < offset
+        @newline_cursor += 1
+      end
+      prev_nl = @newline_cursor.zero? ? -1 : @newlines[@newline_cursor - 1]
-      [idx + 1, offset - prev_nl]
+      [@newline_cursor + 1, offset - prev_nl]
     end
     def whitespace?(c)
@@ -242,7 +267,7 @@ module CSS
     end
     def eof?
-      @pos >= @input.length
+      @pos >= @chars.length
     end
     def consume_whitespace

data/lib/css/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module CSS
-  VERSION = '0.1.2'
+  VERSION = '0.1.3'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: p_css
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Keita Urashima