tans-parser 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -4
- data/README.md +3 -0
- data/lib/tans_parser/ansi_parser.rb +25 -8
- data/lib/tans_parser/state.rb +1 -1
- data/lib/tans_parser/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 907b90ef203876bd99cc0dbca8eb6b184ed6472b580c6d2918e31469d0f7cc12
|
|
4
|
+
data.tar.gz: b828065265563752bb2acf5ef380c2ba805f4b4e785fc602337c2b5048267824
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ae5ea0f42c3663d0edfc35e86bb11064396099f73beebcdcf2fe5d43cfc2b65d810ab2dc055cf5d186a3571aa11703abc006962407a30b979c1a7d4d6e46f3f7
|
|
7
|
+
data.tar.gz: b3379404c4cee09f3e49ceeaaa0e3ec90322a665fbb671a7d5e19506759b72e338fce4e782cc2017779d7812eadfddc098df82ba6bd0cb4ecec50a283d4ce044
|
data/CHANGELOG.md
CHANGED
|
@@ -1,14 +1,27 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
|
+
## 0.1.4
|
|
4
|
+
|
|
5
|
+
- **Unicode width support** — correct display width for CJK, emoji, and combining characters:
|
|
6
|
+
- `unicode-display_width` gem as runtime dependency (~> 2.5)
|
|
7
|
+
- `:width` key in cell hash (1 or 2) and `default_cell`
|
|
8
|
+
- Cursor advances by display width instead of always +1
|
|
9
|
+
- Wide chars (CJK/emoji) clear continuation cells
|
|
10
|
+
- Combining characters (zero-width) appended to previous cell
|
|
11
|
+
- Bugfix: parse loop uses `bytesize` to handle multi-byte chars at start of string
|
|
12
|
+
- 4 new tests, 329 total, 100% line and branch coverage maintained
|
|
13
|
+
|
|
3
14
|
## 0.1.3
|
|
4
15
|
|
|
5
|
-
- **Dialog recognition** —
|
|
16
|
+
- **Dialog recognition** — extended box-drawing character support and titled borders:
|
|
17
|
+
- Added rounded corners (`╭╮╰╯`) and double-line (`╔╗╚╝`)
|
|
6
18
|
- `TOP_LEFT_CORNERS` extended with `╭╔╓╒`
|
|
7
19
|
- `dialog_top_width` extended with `╮╗╖╕` (top-right) and `═` (double horizontal)
|
|
20
|
+
- Supports titled borders: finds first top-right corner anywhere on line (e.g. `╭─ Commands ─╮`)
|
|
8
21
|
- **Statusbar recognition** — more flexible detection:
|
|
9
|
-
-
|
|
22
|
+
- Checks last 2 rows instead of only the last row
|
|
10
23
|
- Fallback: detects last row as statusbar if it has ≥30 characters of content, even without background color info
|
|
11
|
-
-
|
|
24
|
+
- Separator-preceded footers: scans all rows for footer after `───` separator line (Karat-style)
|
|
12
25
|
- **Custom role registration** — `State#annotate_role(role, row:, col:, width:, height:, text:, **extra)`:
|
|
13
26
|
- Manually annotate grid regions with semantic roles
|
|
14
27
|
- `Selector#detect_annotations` picks them up during `scan` alongside auto-detected elements
|
|
@@ -16,9 +29,10 @@
|
|
|
16
29
|
- **State#diff** — cell-level comparison between two State instances:
|
|
17
30
|
- `diff(other_state)` — compares all 7 cell keys (`char`, `fg`, `bg`, `bold`, `italic`, `underline`, `blink`)
|
|
18
31
|
- `diff(other_state, chars_only: true)` — compares only `:char`, ignores style/color changes
|
|
32
|
+
- `diff(other_state, ignore_rows: [2, 5])` — skips specified rows (e.g. cursor/prompt lines)
|
|
19
33
|
- Handles different grid sizes (fills missing cells with `DEFAULT_CELL`)
|
|
20
34
|
- Accepts raw hash or State object
|
|
21
|
-
-
|
|
35
|
+
- 25 new tests, 325 total, 100% line and branch coverage maintained
|
|
22
36
|
|
|
23
37
|
## 0.1.2
|
|
24
38
|
|
data/README.md
CHANGED
|
@@ -173,6 +173,9 @@ diff = before.diff(after)
|
|
|
173
173
|
diff = before.diff(after, chars_only: true)
|
|
174
174
|
# Only reports actual character differences
|
|
175
175
|
|
|
176
|
+
# Ignore specific rows — useful for cursor/prompt lines
|
|
177
|
+
diff = before.diff(after, chars_only: true, ignore_rows: [prompt_row])
|
|
178
|
+
|
|
176
179
|
# Accepts raw hash as argument
|
|
177
180
|
diff = before.diff({size: {rows: 5, cols: 10}, cursor: {...}, rows: [...]})
|
|
178
181
|
```
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "unicode/display_width"
|
|
4
|
+
|
|
3
5
|
module TansParser
|
|
4
6
|
# Parses raw terminal output (ANSI escape sequences + text) into a
|
|
5
7
|
# structured state representation.
|
|
@@ -154,11 +156,11 @@ module TansParser
|
|
|
154
156
|
processed = raw
|
|
155
157
|
|
|
156
158
|
i = 0
|
|
157
|
-
while i < processed.
|
|
159
|
+
while i < processed.bytesize
|
|
158
160
|
if processed[i] == "\e" && processed[i + 1] == "["
|
|
159
161
|
# Find end of CSI sequence
|
|
160
162
|
j = i + 2
|
|
161
|
-
j += 1 while j < processed.
|
|
163
|
+
j += 1 while j < processed.bytesize && !processed[j].match?(/[A-HJ-KP-SX@`fhlmnRrsuq]/)
|
|
162
164
|
seq = processed[i..j]
|
|
163
165
|
|
|
164
166
|
dsr, new_saved, action = _apply_csi(seq, cursor, attrs, grid, rows, cols, saved_cursor, scroll_region)
|
|
@@ -279,14 +281,29 @@ module TansParser
|
|
|
279
281
|
elsif (char, char_len = _utf8_char_at(processed, i))
|
|
280
282
|
# Printable character (including multi-byte UTF-8)
|
|
281
283
|
# cursor row/col are always clamped within bounds
|
|
282
|
-
cell = grid[cursor[:row]][cursor[:col]]
|
|
283
284
|
current_charset = (active_charset == :g1 ? g1_charset : g0_charset)
|
|
284
285
|
mapped_char = char
|
|
285
286
|
mapped_char = DEC_MAP[char] if current_charset == :dec && DEC_MAP.key?(char)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
cursor[:col]
|
|
289
|
-
|
|
287
|
+
|
|
288
|
+
char_width = Unicode::DisplayWidth.of(mapped_char)
|
|
289
|
+
if char_width.zero? && cursor[:col].positive?
|
|
290
|
+
# Combining character — append to previous cell
|
|
291
|
+
prev_cell = grid[cursor[:row]][cursor[:col] - 1]
|
|
292
|
+
prev_cell[:char] = prev_cell[:char] + mapped_char
|
|
293
|
+
else
|
|
294
|
+
cell = grid[cursor[:row]][cursor[:col]]
|
|
295
|
+
cell[:char] = mapped_char
|
|
296
|
+
cell[:width] = char_width
|
|
297
|
+
cell.merge!(attrs)
|
|
298
|
+
# Clear continuation cells for wide characters
|
|
299
|
+
(1...char_width).each do |off|
|
|
300
|
+
cont = grid[cursor[:row]][cursor[:col] + off]
|
|
301
|
+
cont[:char] = ""
|
|
302
|
+
cont[:width] = 0
|
|
303
|
+
end
|
|
304
|
+
cursor[:col] += char_width
|
|
305
|
+
cursor[:col] = cols - 1 if cursor[:col] >= cols
|
|
306
|
+
end
|
|
290
307
|
i += char_len
|
|
291
308
|
else # rubocop:disable Lint/DuplicateBranch
|
|
292
309
|
i += 1
|
|
@@ -691,7 +708,7 @@ module TansParser
|
|
|
691
708
|
# rubocop:enable Metrics/CyclomaticComplexity
|
|
692
709
|
|
|
693
710
|
def self.default_cell
|
|
694
|
-
{ char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false }
|
|
711
|
+
{ char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false, width: 1 }
|
|
695
712
|
end
|
|
696
713
|
|
|
697
714
|
# Extract a single UTF-8 character at position i in a binary string.
|
data/lib/tans_parser/state.rb
CHANGED
|
@@ -124,7 +124,7 @@ module TansParser
|
|
|
124
124
|
end
|
|
125
125
|
|
|
126
126
|
DEFAULT_CELL = { char: " ", fg: "default", bg: "default",
|
|
127
|
-
bold: false, italic: false, underline: false, blink: false, }.freeze
|
|
127
|
+
bold: false, italic: false, underline: false, blink: false, width: 1, }.freeze
|
|
128
128
|
|
|
129
129
|
# Compare this state with another State and return cell-level differences.
|
|
130
130
|
# With chars_only: true, only differences in the :char key are reported.
|
data/lib/tans_parser/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tans-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Haluk Durmus
|
|
@@ -121,6 +121,20 @@ dependencies:
|
|
|
121
121
|
- - "~>"
|
|
122
122
|
- !ruby/object:Gem::Version
|
|
123
123
|
version: '0.22'
|
|
124
|
+
- !ruby/object:Gem::Dependency
|
|
125
|
+
name: unicode-display_width
|
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - "~>"
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '2.5'
|
|
131
|
+
type: :runtime
|
|
132
|
+
prerelease: false
|
|
133
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
134
|
+
requirements:
|
|
135
|
+
- - "~>"
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
version: '2.5'
|
|
124
138
|
description: tans-parser parses raw terminal output with ANSI escape sequences into
|
|
125
139
|
a structured grid representation with per-cell attributes (char, fg, bg, bold, italic,
|
|
126
140
|
underline, blink). Includes a query API (State) for text search, color inspection,
|