RubyGems - tans-parser - Versions diffs - 0.1.3 → 0.1.4 - Mend

tans-parser 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -4
data/README.md +3 -0
data/lib/tans_parser/ansi_parser.rb +25 -8
data/lib/tans_parser/state.rb +1 -1
data/lib/tans_parser/version.rb +1 -1
metadata +15 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fc3b07fbdd59e595af3d07d3091cdb75b9faa0c4daeaaaf3232f7787d35406e4
-  data.tar.gz: 75f354d01a6881cc8bb702bb900b41b2f654ecae8b7d5033bc439ad876eeb97a
+  metadata.gz: 907b90ef203876bd99cc0dbca8eb6b184ed6472b580c6d2918e31469d0f7cc12
+  data.tar.gz: b828065265563752bb2acf5ef380c2ba805f4b4e785fc602337c2b5048267824
 SHA512:
-  metadata.gz: 7d79356c65433991718c0c1854faa58f5a077a906ec653f13202d241b37fc01bb3fe253de8ba4fa9deb3b7bdee17965a1b2d2476a3b782c0a849c8b0696c4a65
-  data.tar.gz: 05045d158716d7e4ef9f87ecd2d4936735d1de681fb17cb97835a078b31654c9d0ed47022b610db7824d2a7408e82b66c34df738f1bc3b0ad7e67e40bfb4e111
+  metadata.gz: ae5ea0f42c3663d0edfc35e86bb11064396099f73beebcdcf2fe5d43cfc2b65d810ab2dc055cf5d186a3571aa11703abc006962407a30b979c1a7d4d6e46f3f7
+  data.tar.gz: b3379404c4cee09f3e49ceeaaa0e3ec90322a665fbb671a7d5e19506759b72e338fce4e782cc2017779d7812eadfddc098df82ba6bd0cb4ecec50a283d4ce044

data/CHANGELOG.md CHANGED Viewed

@@ -1,14 +1,27 @@
 # CHANGELOG
+## 0.1.4
+- **Unicode width support** — correct display width for CJK, emoji, and combining characters:
+  - `unicode-display_width` gem as runtime dependency (~> 2.5)
+  - `:width` key in cell hash (1 or 2) and `default_cell`
+  - Cursor advances by display width instead of always +1
+  - Wide chars (CJK/emoji) clear continuation cells
+  - Combining characters (zero-width) appended to previous cell
+  - Bugfix: parse loop uses `bytesize` to handle multi-byte chars at start of string
+- 4 new tests, 329 total, 100% line and branch coverage maintained
 ## 0.1.3
-- **Dialog recognition** — added support for rounded corners (`╭╮╰╯`) and double-line (`╔╗╚╝`) box-drawing characters
+- **Dialog recognition** — extended box-drawing character support and titled borders:
+  - Added rounded corners (`╭╮╰╯`) and double-line (`╔╗╚╝`)
   - `TOP_LEFT_CORNERS` extended with `╭╔╓╒`
   - `dialog_top_width` extended with `╮╗╖╕` (top-right) and `═` (double horizontal)
+  - Supports titled borders: finds first top-right corner anywhere on line (e.g. `╭─ Commands ─╮`)
 - **Statusbar recognition** — more flexible detection:
-  - Now checks last 2 rows instead of only the last row
+  - Checks last 2 rows instead of only the last row
   - Fallback: detects last row as statusbar if it has ≥30 characters of content, even without background color info
-  - Handles Karat-style footers (`? for shortcuts | mock ctx ░░░░░░░░░░ 0%`)
+  - Separator-preceded footers: scans all rows for footer after `───` separator line (Karat-style)
 - **Custom role registration** — `State#annotate_role(role, row:, col:, width:, height:, text:, **extra)`:
   - Manually annotate grid regions with semantic roles
   - `Selector#detect_annotations` picks them up during `scan` alongside auto-detected elements
@@ -16,9 +29,10 @@
 - **State#diff** — cell-level comparison between two State instances:
   - `diff(other_state)` — compares all 7 cell keys (`char`, `fg`, `bg`, `bold`, `italic`, `underline`, `blink`)
   - `diff(other_state, chars_only: true)` — compares only `:char`, ignores style/color changes
+  - `diff(other_state, ignore_rows: [2, 5])` — skips specified rows (e.g. cursor/prompt lines)
   - Handles different grid sizes (fills missing cells with `DEFAULT_CELL`)
   - Accepts raw hash or State object
-- 19 new tests, 319 total, 100% line and branch coverage maintained
+- 25 new tests, 325 total, 100% line and branch coverage maintained
 ## 0.1.2

data/README.md CHANGED Viewed

@@ -173,6 +173,9 @@ diff = before.diff(after)
 diff = before.diff(after, chars_only: true)
 # Only reports actual character differences
+# Ignore specific rows — useful for cursor/prompt lines
+diff = before.diff(after, chars_only: true, ignore_rows: [prompt_row])
 # Accepts raw hash as argument
 diff = before.diff({size: {rows: 5, cols: 10}, cursor: {...}, rows: [...]})
 ```

data/lib/tans_parser/ansi_parser.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require "unicode/display_width"
 module TansParser
   # Parses raw terminal output (ANSI escape sequences + text) into a
   # structured state representation.
@@ -154,11 +156,11 @@ module TansParser
       processed = raw
       i = 0
-      while i < processed.length
+      while i < processed.bytesize
         if processed[i] == "\e" && processed[i + 1] == "["
           # Find end of CSI sequence
           j = i + 2
-          j += 1 while j < processed.length && !processed[j].match?(/[A-HJ-KP-SX@`fhlmnRrsuq]/)
+          j += 1 while j < processed.bytesize && !processed[j].match?(/[A-HJ-KP-SX@`fhlmnRrsuq]/)
           seq = processed[i..j]
           dsr, new_saved, action = _apply_csi(seq, cursor, attrs, grid, rows, cols, saved_cursor, scroll_region)
@@ -279,14 +281,29 @@ module TansParser
         elsif (char, char_len = _utf8_char_at(processed, i))
           # Printable character (including multi-byte UTF-8)
           # cursor row/col are always clamped within bounds
-          cell = grid[cursor[:row]][cursor[:col]]
           current_charset = (active_charset == :g1 ? g1_charset : g0_charset)
           mapped_char = char
           mapped_char = DEC_MAP[char] if current_charset == :dec && DEC_MAP.key?(char)
-          cell[:char] = mapped_char
-          cell.merge!(attrs)
-          cursor[:col] += 1
-          cursor[:col] = cols - 1 if cursor[:col] >= cols
+          char_width = Unicode::DisplayWidth.of(mapped_char)
+          if char_width.zero? && cursor[:col].positive?
+            # Combining character — append to previous cell
+            prev_cell = grid[cursor[:row]][cursor[:col] - 1]
+            prev_cell[:char] = prev_cell[:char] + mapped_char
+          else
+            cell = grid[cursor[:row]][cursor[:col]]
+            cell[:char] = mapped_char
+            cell[:width] = char_width
+            cell.merge!(attrs)
+            # Clear continuation cells for wide characters
+            (1...char_width).each do |off|
+              cont = grid[cursor[:row]][cursor[:col] + off]
+              cont[:char] = ""
+              cont[:width] = 0
+            end
+            cursor[:col] += char_width
+            cursor[:col] = cols - 1 if cursor[:col] >= cols
+          end
           i += char_len
         else # rubocop:disable Lint/DuplicateBranch
           i += 1
@@ -691,7 +708,7 @@ module TansParser
     # rubocop:enable Metrics/CyclomaticComplexity
     def self.default_cell
-      { char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false }
+      { char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false, width: 1 }
     end
     # Extract a single UTF-8 character at position i in a binary string.

data/lib/tans_parser/state.rb CHANGED Viewed

@@ -124,7 +124,7 @@ module TansParser
     end
     DEFAULT_CELL = { char: " ", fg: "default", bg: "default",
-                     bold: false, italic: false, underline: false, blink: false, }.freeze
+                     bold: false, italic: false, underline: false, blink: false, width: 1, }.freeze
     # Compare this state with another State and return cell-level differences.
     # With chars_only: true, only differences in the :char key are reported.

data/lib/tans_parser/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module TansParser
-  VERSION = "0.1.3"
+  VERSION = "0.1.4"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: tans-parser
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Haluk Durmus
@@ -121,6 +121,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.22'
+- !ruby/object:Gem::Dependency
+  name: unicode-display_width
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
 description: tans-parser parses raw terminal output with ANSI escape sequences into
   a structured grid representation with per-cell attributes (char, fg, bg, bold, italic,
   underline, blink). Includes a query API (State) for text search, color inspection,