tans-parser 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc3b07fbdd59e595af3d07d3091cdb75b9faa0c4daeaaaf3232f7787d35406e4
4
- data.tar.gz: 75f354d01a6881cc8bb702bb900b41b2f654ecae8b7d5033bc439ad876eeb97a
3
+ metadata.gz: 907b90ef203876bd99cc0dbca8eb6b184ed6472b580c6d2918e31469d0f7cc12
4
+ data.tar.gz: b828065265563752bb2acf5ef380c2ba805f4b4e785fc602337c2b5048267824
5
5
  SHA512:
6
- metadata.gz: 7d79356c65433991718c0c1854faa58f5a077a906ec653f13202d241b37fc01bb3fe253de8ba4fa9deb3b7bdee17965a1b2d2476a3b782c0a849c8b0696c4a65
7
- data.tar.gz: 05045d158716d7e4ef9f87ecd2d4936735d1de681fb17cb97835a078b31654c9d0ed47022b610db7824d2a7408e82b66c34df738f1bc3b0ad7e67e40bfb4e111
6
+ metadata.gz: ae5ea0f42c3663d0edfc35e86bb11064396099f73beebcdcf2fe5d43cfc2b65d810ab2dc055cf5d186a3571aa11703abc006962407a30b979c1a7d4d6e46f3f7
7
+ data.tar.gz: b3379404c4cee09f3e49ceeaaa0e3ec90322a665fbb671a7d5e19506759b72e338fce4e782cc2017779d7812eadfddc098df82ba6bd0cb4ecec50a283d4ce044
data/CHANGELOG.md CHANGED
@@ -1,14 +1,27 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 0.1.4
4
+
5
+ - **Unicode width support** — correct display width for CJK, emoji, and combining characters:
6
+ - `unicode-display_width` gem as runtime dependency (~> 2.5)
7
+ - `:width` key in cell hash (1 or 2) and `default_cell`
8
+ - Cursor advances by display width instead of always +1
9
+ - Wide chars (CJK/emoji) clear continuation cells
10
+ - Combining characters (zero-width) appended to previous cell
11
+ - Bugfix: parse loop uses `bytesize` to handle multi-byte chars at start of string
12
+ - 4 new tests, 329 total, 100% line and branch coverage maintained
13
+
3
14
  ## 0.1.3
4
15
 
5
- - **Dialog recognition** — added support for rounded corners (`╭╮╰╯`) and double-line (`╔╗╚╝`) box-drawing characters
16
+ - **Dialog recognition** — extended box-drawing character support and titled borders:
17
+ - Added rounded corners (`╭╮╰╯`) and double-line (`╔╗╚╝`)
6
18
  - `TOP_LEFT_CORNERS` extended with `╭╔╓╒`
7
19
  - `dialog_top_width` extended with `╮╗╖╕` (top-right) and `═` (double horizontal)
20
+ - Supports titled borders: finds first top-right corner anywhere on line (e.g. `╭─ Commands ─╮`)
8
21
  - **Statusbar recognition** — more flexible detection:
9
- - Now checks last 2 rows instead of only the last row
22
+ - Checks last 2 rows instead of only the last row
10
23
  - Fallback: detects last row as statusbar if it has ≥30 characters of content, even without background color info
11
- - Handles Karat-style footers (`? for shortcuts | mock ctx ░░░░░░░░░░ 0%`)
24
+ - Separator-preceded footers: scans all rows for footer after `───` separator line (Karat-style)
12
25
  - **Custom role registration** — `State#annotate_role(role, row:, col:, width:, height:, text:, **extra)`:
13
26
  - Manually annotate grid regions with semantic roles
14
27
  - `Selector#detect_annotations` picks them up during `scan` alongside auto-detected elements
@@ -16,9 +29,10 @@
16
29
  - **State#diff** — cell-level comparison between two State instances:
17
30
  - `diff(other_state)` — compares all 7 cell keys (`char`, `fg`, `bg`, `bold`, `italic`, `underline`, `blink`)
18
31
  - `diff(other_state, chars_only: true)` — compares only `:char`, ignores style/color changes
32
+ - `diff(other_state, ignore_rows: [2, 5])` — skips specified rows (e.g. cursor/prompt lines)
19
33
  - Handles different grid sizes (fills missing cells with `DEFAULT_CELL`)
20
34
  - Accepts raw hash or State object
21
- - 19 new tests, 319 total, 100% line and branch coverage maintained
35
+ - 25 new tests, 325 total, 100% line and branch coverage maintained
22
36
 
23
37
  ## 0.1.2
24
38
 
data/README.md CHANGED
@@ -173,6 +173,9 @@ diff = before.diff(after)
173
173
  diff = before.diff(after, chars_only: true)
174
174
  # Only reports actual character differences
175
175
 
176
+ # Ignore specific rows — useful for cursor/prompt lines
177
+ diff = before.diff(after, chars_only: true, ignore_rows: [prompt_row])
178
+
176
179
  # Accepts raw hash as argument
177
180
  diff = before.diff({size: {rows: 5, cols: 10}, cursor: {...}, rows: [...]})
178
181
  ```
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "unicode/display_width"
4
+
3
5
  module TansParser
4
6
  # Parses raw terminal output (ANSI escape sequences + text) into a
5
7
  # structured state representation.
@@ -154,11 +156,11 @@ module TansParser
154
156
  processed = raw
155
157
 
156
158
  i = 0
157
- while i < processed.length
159
+ while i < processed.bytesize
158
160
  if processed[i] == "\e" && processed[i + 1] == "["
159
161
  # Find end of CSI sequence
160
162
  j = i + 2
161
- j += 1 while j < processed.length && !processed[j].match?(/[A-HJ-KP-SX@`fhlmnRrsuq]/)
163
+ j += 1 while j < processed.bytesize && !processed[j].match?(/[A-HJ-KP-SX@`fhlmnRrsuq]/)
162
164
  seq = processed[i..j]
163
165
 
164
166
  dsr, new_saved, action = _apply_csi(seq, cursor, attrs, grid, rows, cols, saved_cursor, scroll_region)
@@ -279,14 +281,29 @@ module TansParser
279
281
  elsif (char, char_len = _utf8_char_at(processed, i))
280
282
  # Printable character (including multi-byte UTF-8)
281
283
  # cursor row/col are always clamped within bounds
282
- cell = grid[cursor[:row]][cursor[:col]]
283
284
  current_charset = (active_charset == :g1 ? g1_charset : g0_charset)
284
285
  mapped_char = char
285
286
  mapped_char = DEC_MAP[char] if current_charset == :dec && DEC_MAP.key?(char)
286
- cell[:char] = mapped_char
287
- cell.merge!(attrs)
288
- cursor[:col] += 1
289
- cursor[:col] = cols - 1 if cursor[:col] >= cols
287
+
288
+ char_width = Unicode::DisplayWidth.of(mapped_char)
289
+ if char_width.zero? && cursor[:col].positive?
290
+ # Combining character append to previous cell
291
+ prev_cell = grid[cursor[:row]][cursor[:col] - 1]
292
+ prev_cell[:char] = prev_cell[:char] + mapped_char
293
+ else
294
+ cell = grid[cursor[:row]][cursor[:col]]
295
+ cell[:char] = mapped_char
296
+ cell[:width] = char_width
297
+ cell.merge!(attrs)
298
+ # Clear continuation cells for wide characters
299
+ (1...char_width).each do |off|
300
+ cont = grid[cursor[:row]][cursor[:col] + off]
301
+ cont[:char] = ""
302
+ cont[:width] = 0
303
+ end
304
+ cursor[:col] += char_width
305
+ cursor[:col] = cols - 1 if cursor[:col] >= cols
306
+ end
290
307
  i += char_len
291
308
  else # rubocop:disable Lint/DuplicateBranch
292
309
  i += 1
@@ -691,7 +708,7 @@ module TansParser
691
708
  # rubocop:enable Metrics/CyclomaticComplexity
692
709
 
693
710
  def self.default_cell
694
- { char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false }
711
+ { char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false, width: 1 }
695
712
  end
696
713
 
697
714
  # Extract a single UTF-8 character at position i in a binary string.
@@ -124,7 +124,7 @@ module TansParser
124
124
  end
125
125
 
126
126
  DEFAULT_CELL = { char: " ", fg: "default", bg: "default",
127
- bold: false, italic: false, underline: false, blink: false, }.freeze
127
+ bold: false, italic: false, underline: false, blink: false, width: 1, }.freeze
128
128
 
129
129
  # Compare this state with another State and return cell-level differences.
130
130
  # With chars_only: true, only differences in the :char key are reported.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TansParser
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.4"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tans-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Haluk Durmus
@@ -121,6 +121,20 @@ dependencies:
121
121
  - - "~>"
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0.22'
124
+ - !ruby/object:Gem::Dependency
125
+ name: unicode-display_width
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2.5'
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '2.5'
124
138
  description: tans-parser parses raw terminal output with ANSI escape sequences into
125
139
  a structured grid representation with per-cell attributes (char, fg, bg, bold, italic,
126
140
  underline, blink). Includes a query API (State) for text search, color inspection,