tans-parser 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -0
- data/README.md +51 -4
- data/lib/tans_parser/element.rb +6 -0
- data/lib/tans_parser/selector.rb +36 -1
- data/lib/tans_parser/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0ea2a71fb0db321851e51510e4250d9c2afcd2830495771ebb3d0adf36e707b0
|
|
4
|
+
data.tar.gz: 7198b01f46232a42fc80b8d09668b07dc20834c47d16e0a5502d1bb18b7a5d8f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 165b2cd6b7cc5c0edb4bf4dd510fca304bd48fdcd0bbc7757d9edaf9ef4ef9fd76e3bfc39708301eb16ad263df2a3ec7f2b450ccca9a9b9ed9c24aca64f68f0b
|
|
7
|
+
data.tar.gz: 8eaede4178263a217f715d9ee79e263aff0169801bb7f9eecc6b82a81608e32f8c442788e17afff67e3a6bd68165de6612a5ca0a80d661b3711fc092b4871561
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,34 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
|
+
## 0.1.5
|
|
4
|
+
|
|
5
|
+
- **Confidence scoring** — each detected element now carries a `confidence` value (0.0–1.0):
|
|
6
|
+
- `Element#confident?` — returns `true` when confidence ≥ 0.5 (or nil, for backward compatibility)
|
|
7
|
+
- Scoring heuristics per role:
|
|
8
|
+
- **Buttons**: 0.9 (square), 0.85 (round), 0.75 (angle); −0.2 penalty for single-character text
|
|
9
|
+
- **Checkboxes**: 0.9 (checked), 0.85 (unchecked)
|
|
10
|
+
- **Dialogs**: 0.9 base; +0.05 bonus for titled borders (text in top border)
|
|
11
|
+
- **Statusbars**: 0.9 (inverse colors), 0.85 (separator-preceded footer), 0.5 (≥30-char fallback)
|
|
12
|
+
- **Progress bars**: 0.9 (incomplete), 0.95 (100% complete)
|
|
13
|
+
- **Inputs**: 0.9
|
|
14
|
+
- **Labels**: 0.8 (single-word), 0.85 (multi-word)
|
|
15
|
+
- **Menus**: 0.9 (3+ items), 0.85 (2 items), 0.8 (dropdown `> Item`)
|
|
16
|
+
- **Tabs**: 0.85 (3+ tabs), 0.7 (2 tabs); +0.05 for focused tab
|
|
17
|
+
- **Annotations**: 1.0 (manually annotated); can be overridden via `confidence:` keyword
|
|
18
|
+
- `confidence` included in `Element#to_h` when set; excluded when nil (backward compatible)
|
|
19
|
+
- **Reduced false positives** — tighter heuristics to avoid misdetection:
|
|
20
|
+
- **Buttons**: skip numeric-only brackets (e.g. `[12]`, `[3]`)
|
|
21
|
+
- **Labels**: skip URL schemes (`https://example.com`) and time patterns (`Meeting at 3:00`)
|
|
22
|
+
- **Progress bars**: minimum width of 6 characters (`[##]` is no longer detected)
|
|
23
|
+
- **Negative tests** — 15 new tests covering edge cases (numeric brackets, URLs, time patterns, short progress bars, tabs across rows, incomplete boxes, prompt-like menus, etc.)
|
|
24
|
+
- **Confidence tests** — 15 new tests verifying scoring for every element type and edge case (titled dialogs, focused tabs, complete progress bars, annotation override, etc.)
|
|
25
|
+
- **Benchmarks** — `benchmark-ips` suite for parser, diff, and selector:
|
|
26
|
+
- `benchmarks/parser_benchmark.rb` — plain, ANSI, cursor, complex, and dialog-like workloads
|
|
27
|
+
- `benchmarks/diff_benchmark.rb` — full, chars_only, and ignore_rows modes
|
|
28
|
+
- `benchmarks/selector_benchmark.rb` — full scan with mixed UI elements
|
|
29
|
+
- `benchmark-ips` added as development dependency
|
|
30
|
+
- 30 new tests, 374 total, 100% line and branch coverage maintained
|
|
31
|
+
|
|
3
32
|
## 0.1.4
|
|
4
33
|
|
|
5
34
|
- **Unicode width support** — correct display width for CJK, emoji, and combining characters:
|
data/README.md
CHANGED
|
@@ -152,7 +152,7 @@ selector.statusbars # => includes annotated statusbar
|
|
|
152
152
|
|
|
153
153
|
# Annotations accept extra attributes
|
|
154
154
|
state.annotate_role(:button, row: 0, col: 0, width: 6, height: 1,
|
|
155
|
-
text: "Submit", fg: "green", disabled: false)
|
|
155
|
+
text: "Submit", fg: "green", disabled: false, confidence: 0.8)
|
|
156
156
|
```
|
|
157
157
|
|
|
158
158
|
### State comparison (diff)
|
|
@@ -197,13 +197,15 @@ el.height # => 1
|
|
|
197
197
|
el.checked # => true/false/nil
|
|
198
198
|
el.focused # => true/false/nil
|
|
199
199
|
el.disabled # => true/false/nil
|
|
200
|
+
el.confidence # => 0.9 (Float 0.0-1.0) or nil when not set
|
|
200
201
|
el.fg # => "default"
|
|
201
202
|
el.bg # => "default"
|
|
202
|
-
el.to_h # => {role: :button, text: "OK", row: 1, col: 2, ...}
|
|
203
|
+
el.to_h # => {role: :button, text: "OK", row: 1, col: 2, confidence: 0.9, ...}
|
|
203
204
|
|
|
204
205
|
# Predicates
|
|
205
206
|
el.checked? # => false (always boolean)
|
|
206
207
|
el.disabled? # => false (always boolean)
|
|
208
|
+
el.confident? # => true when confidence >= 0.5 (or nil)
|
|
207
209
|
|
|
208
210
|
# Geometry
|
|
209
211
|
el.bounds # => {row: 1, col: 2, width: 4, height: 1}
|
|
@@ -214,6 +216,49 @@ el.type("hello") # => {action: :type, target: el, row: 1, col: 4, text: "hell
|
|
|
214
216
|
el.press_key(:tab) # => {action: :press_key, target: el, key: :tab}
|
|
215
217
|
```
|
|
216
218
|
|
|
219
|
+
### Confidence scoring
|
|
220
|
+
|
|
221
|
+
Each detected element carries a `confidence` value (0.0–1.0) reflecting how sure the heuristics are:
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
btn = selector.button
|
|
225
|
+
btn.confidence # => 0.9 (square-bracket buttons are high confidence)
|
|
226
|
+
btn.confident? # => true
|
|
227
|
+
|
|
228
|
+
# Low-confidence detections can be filtered out
|
|
229
|
+
reliable = selector.buttons.select(&:confident?) # confidence >= 0.5
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Confidence values per role and context:
|
|
233
|
+
|
|
234
|
+
| Role | Scenario | Confidence |
|
|
235
|
+
|------|----------|------------|
|
|
236
|
+
| `:button` | `[ OK ]` square brackets | 0.9 |
|
|
237
|
+
| `:button` | `(Cancel)` round brackets | 0.85 |
|
|
238
|
+
| `:button` | `<Submit>` angle brackets | 0.75 |
|
|
239
|
+
| `:button` | Single-character text | −0.2 penalty |
|
|
240
|
+
| `:checkbox` | `[x]` checked | 0.9 |
|
|
241
|
+
| `:checkbox` | `[ ]` unchecked | 0.85 |
|
|
242
|
+
| `:input` | `[________]` underscore brackets | 0.9 |
|
|
243
|
+
| `:label` | `Project Name:` (multi-word) | 0.85 |
|
|
244
|
+
| `:label` | `Username:` (single-word) | 0.8 |
|
|
245
|
+
| `:menu` | 3+ items on menu bar | 0.9 |
|
|
246
|
+
| `:menu` | 2 items on menu bar | 0.85 |
|
|
247
|
+
| `:menu` | `> Item` dropdown | 0.8 |
|
|
248
|
+
| `:tab` | 3+ tabs | 0.85 |
|
|
249
|
+
| `:tab` | 2 tabs | 0.7 |
|
|
250
|
+
| `:tab` | Focused tab (underline/bg) | +0.05 bonus |
|
|
251
|
+
| `:dialog` | Complete box with all 4 corners | 0.9 |
|
|
252
|
+
| `:dialog` | Titled border (text in top border) | 0.95 |
|
|
253
|
+
| `:statusbar` | Inverse colors + ≥3 colored cells | 0.9 |
|
|
254
|
+
| `:statusbar` | Separator-preceded footer | 0.85 |
|
|
255
|
+
| `:statusbar` | Fallback (≥30 chars, no bg info) | 0.5 |
|
|
256
|
+
| `:progress` | `[##### ]` incomplete | 0.9 |
|
|
257
|
+
| `:progress` | `[##########]` 100% complete | 0.95 |
|
|
258
|
+
| Annotation | Manually annotated via `annotate_role` | 1.0 |
|
|
259
|
+
|
|
260
|
+
`confidence` is excluded from `to_h` when nil (backward compatible).
|
|
261
|
+
|
|
217
262
|
### Recognized element patterns
|
|
218
263
|
|
|
219
264
|
| Role | Pattern | Example |
|
|
@@ -241,8 +286,9 @@ Each cell is a Hash with these keys:
|
|
|
241
286
|
| `italic` | Boolean | Italic style |
|
|
242
287
|
| `underline` | Boolean | Underline style |
|
|
243
288
|
| `blink` | Boolean | Blink style |
|
|
289
|
+
| `width` | Integer | Display width (1 for normal, 2 for CJK/emoji, 0 for continuation) |
|
|
244
290
|
|
|
245
|
-
Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false}`
|
|
291
|
+
Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false, width: 1}`
|
|
246
292
|
|
|
247
293
|
## Supported ANSI sequences
|
|
248
294
|
|
|
@@ -255,7 +301,8 @@ Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: fa
|
|
|
255
301
|
- **Cursor style** — DECSCUSR
|
|
256
302
|
- **Mouse tracking** — DEC private modes 1000, 1002, 1003, 1006
|
|
257
303
|
- **ISO 2022** — G0/G1 charset switching, DEC Special Graphics
|
|
258
|
-
- **UTF-8** — Multi-byte characters including emoji
|
|
304
|
+
- **UTF-8** — Multi-byte characters including CJK, emoji (correct display width via `unicode-display_width`)
|
|
305
|
+
- **Combining characters** — Zero-width combining marks appended to previous cell
|
|
259
306
|
|
|
260
307
|
## License
|
|
261
308
|
|
data/lib/tans_parser/element.rb
CHANGED
|
@@ -11,6 +11,7 @@ module TansParser
|
|
|
11
11
|
:focused,
|
|
12
12
|
:fg, :bg,
|
|
13
13
|
:disabled,
|
|
14
|
+
:confidence,
|
|
14
15
|
keyword_init: true,
|
|
15
16
|
) do
|
|
16
17
|
def checked?
|
|
@@ -37,6 +38,10 @@ module TansParser
|
|
|
37
38
|
{ action: :press_key, target: self, key: key }
|
|
38
39
|
end
|
|
39
40
|
|
|
41
|
+
def confident?
|
|
42
|
+
confidence.nil? || confidence >= 0.5
|
|
43
|
+
end
|
|
44
|
+
|
|
40
45
|
def to_h
|
|
41
46
|
{
|
|
42
47
|
role: role,
|
|
@@ -47,6 +52,7 @@ module TansParser
|
|
|
47
52
|
focused: focused,
|
|
48
53
|
fg: fg, bg: bg,
|
|
49
54
|
disabled: disabled,
|
|
55
|
+
confidence: confidence,
|
|
50
56
|
}.compact
|
|
51
57
|
end
|
|
52
58
|
end
|
data/lib/tans_parser/selector.rb
CHANGED
|
@@ -147,7 +147,7 @@ module TansParser
|
|
|
147
147
|
|
|
148
148
|
# Detects annotations: manually annotated roles from State#annotate_role
|
|
149
149
|
def detect_annotations
|
|
150
|
-
@state.annotations.map { |a| Element.new(a) }
|
|
150
|
+
@state.annotations.map { |a| Element.new(**a, confidence: a[:confidence] || 1.0) }
|
|
151
151
|
end
|
|
152
152
|
|
|
153
153
|
# Detects buttons: [ OK ], (Cancel), <Submit>
|
|
@@ -163,8 +163,18 @@ module TansParser
|
|
|
163
163
|
next if text.empty?
|
|
164
164
|
next if text.match?(/^_+$/)
|
|
165
165
|
next if text.match?(/^[ xX*]$/) # skip checkbox markers
|
|
166
|
+
next if text.match?(/^\d+$/) # skip numeric-only brackets (e.g. [12])
|
|
166
167
|
|
|
167
168
|
col = ::Regexp.last_match.begin(0)
|
|
169
|
+
confidence = if ::Regexp.last_match[1]
|
|
170
|
+
0.9
|
|
171
|
+
elsif ::Regexp.last_match[2]
|
|
172
|
+
0.85
|
|
173
|
+
else
|
|
174
|
+
0.75
|
|
175
|
+
end
|
|
176
|
+
confidence -= 0.2 if text.length == 1 # penalize single-character buttons
|
|
177
|
+
|
|
168
178
|
buttons << Element.new(
|
|
169
179
|
role: :button,
|
|
170
180
|
text: text,
|
|
@@ -172,6 +182,7 @@ module TansParser
|
|
|
172
182
|
width: ::Regexp.last_match[0].length, height: 1,
|
|
173
183
|
fg: row[col][:fg],
|
|
174
184
|
bg: row[col][:bg],
|
|
185
|
+
confidence: confidence,
|
|
175
186
|
)
|
|
176
187
|
end
|
|
177
188
|
end
|
|
@@ -190,12 +201,14 @@ module TansParser
|
|
|
190
201
|
checked = match[2] != " "
|
|
191
202
|
label_text = match[3].strip
|
|
192
203
|
col = match.begin(3)
|
|
204
|
+
confidence = checked ? 0.9 : 0.85
|
|
193
205
|
checkboxes << Element.new(
|
|
194
206
|
role: :checkbox,
|
|
195
207
|
text: label_text,
|
|
196
208
|
row: r, col: col,
|
|
197
209
|
width: label_text.length, height: 1,
|
|
198
210
|
checked: checked,
|
|
211
|
+
confidence: confidence,
|
|
199
212
|
)
|
|
200
213
|
end
|
|
201
214
|
checkboxes
|
|
@@ -218,11 +231,16 @@ module TansParser
|
|
|
218
231
|
if bottom_r
|
|
219
232
|
height = bottom_r - r + 1
|
|
220
233
|
text = extract_dialog_text(r + 1, tl_idx + 1, width - 2, height - 2)
|
|
234
|
+
confidence = 0.9
|
|
235
|
+
# Bonus for titled borders (text in top border)
|
|
236
|
+
top_border = line[tl_idx..(tl_idx + width - 1)]
|
|
237
|
+
confidence = (confidence + 0.05).round(2) if top_border.match?(/[A-Za-z]/)
|
|
221
238
|
dialogs << Element.new(
|
|
222
239
|
role: :dialog,
|
|
223
240
|
text: text,
|
|
224
241
|
row: r, col: tl_idx,
|
|
225
242
|
width: width, height: height,
|
|
243
|
+
confidence: confidence,
|
|
226
244
|
)
|
|
227
245
|
end
|
|
228
246
|
tl_idx += 1
|
|
@@ -291,6 +309,7 @@ module TansParser
|
|
|
291
309
|
row: row_idx, col: 0,
|
|
292
310
|
width: row.length, height: 1,
|
|
293
311
|
bg: non_default.first[:bg],
|
|
312
|
+
confidence: 0.9,
|
|
294
313
|
)
|
|
295
314
|
return bars
|
|
296
315
|
end
|
|
@@ -303,6 +322,7 @@ module TansParser
|
|
|
303
322
|
role: :statusbar, text: text,
|
|
304
323
|
row: grid.length - 1, col: 0,
|
|
305
324
|
width: last_row.length, height: 1,
|
|
325
|
+
confidence: 0.5,
|
|
306
326
|
)
|
|
307
327
|
return bars
|
|
308
328
|
end
|
|
@@ -326,6 +346,7 @@ module TansParser
|
|
|
326
346
|
role: :statusbar, text: text,
|
|
327
347
|
row: r, col: 0,
|
|
328
348
|
width: row.length, height: 1,
|
|
349
|
+
confidence: 0.85,
|
|
329
350
|
)
|
|
330
351
|
return bars
|
|
331
352
|
end
|
|
@@ -341,16 +362,19 @@ module TansParser
|
|
|
341
362
|
line = row.map { |c| c[:char] }.join
|
|
342
363
|
match = line.match(/\[([#>=-]+)\s*\]/)
|
|
343
364
|
next unless match
|
|
365
|
+
next if match[0].length < 6 # skip too-short brackets (e.g. [##])
|
|
344
366
|
|
|
345
367
|
filled = match[1]
|
|
346
368
|
total = match[0].length - 2
|
|
347
369
|
percent = (filled.length.to_f / total * 100).round
|
|
370
|
+
confidence = percent == 100 ? 0.95 : 0.9
|
|
348
371
|
bars << Element.new(
|
|
349
372
|
role: :progress,
|
|
350
373
|
text: "#{percent}%",
|
|
351
374
|
row: r, col: ::Regexp.last_match.begin(0),
|
|
352
375
|
width: match[0].length, height: 1,
|
|
353
376
|
checked: percent == 100,
|
|
377
|
+
confidence: confidence,
|
|
354
378
|
)
|
|
355
379
|
end
|
|
356
380
|
bars
|
|
@@ -369,6 +393,7 @@ module TansParser
|
|
|
369
393
|
text: "",
|
|
370
394
|
row: r, col: col,
|
|
371
395
|
width: m[0].length, height: 1,
|
|
396
|
+
confidence: 0.9,
|
|
372
397
|
)
|
|
373
398
|
end
|
|
374
399
|
end
|
|
@@ -385,13 +410,17 @@ module TansParser
|
|
|
385
410
|
|
|
386
411
|
label_text = match[1].strip.sub(/:$/, "").strip
|
|
387
412
|
next if label_text.empty? || label_text.length < 2
|
|
413
|
+
next if match[1].match?(/\d:/) # skip patterns ending with digit before colon (e.g. "Meeting at 3:")
|
|
414
|
+
next if line[match.end(1), 2] == "//" # skip URL schemes (e.g. "https://example.com")
|
|
388
415
|
|
|
389
416
|
col = match.begin(1)
|
|
417
|
+
confidence = label_text.include?(" ") ? 0.85 : 0.8 # multi-word labels are stronger signals
|
|
390
418
|
labels << Element.new(
|
|
391
419
|
role: :label,
|
|
392
420
|
text: label_text,
|
|
393
421
|
row: r, col: col,
|
|
394
422
|
width: match[1].length, height: 1,
|
|
423
|
+
confidence: confidence,
|
|
395
424
|
)
|
|
396
425
|
end
|
|
397
426
|
labels
|
|
@@ -411,11 +440,13 @@ module TansParser
|
|
|
411
440
|
items = stripped.split(/\s{2,}/)
|
|
412
441
|
if items.length >= 2 && items.all? { |i| i.match?(/^[A-Za-z]/) }
|
|
413
442
|
col = line.index(stripped)
|
|
443
|
+
confidence = items.length >= 3 ? 0.9 : 0.85
|
|
414
444
|
menus << Element.new(
|
|
415
445
|
role: :menu,
|
|
416
446
|
text: items.join(" | "),
|
|
417
447
|
row: r, col: col || 0,
|
|
418
448
|
width: line.length, height: 1,
|
|
449
|
+
confidence: confidence,
|
|
419
450
|
)
|
|
420
451
|
end
|
|
421
452
|
end
|
|
@@ -428,6 +459,7 @@ module TansParser
|
|
|
428
459
|
text: m[0].sub(/^>\s*/, "").strip,
|
|
429
460
|
row: r, col: m.begin(0),
|
|
430
461
|
width: m[0].length, height: 1,
|
|
462
|
+
confidence: 0.8,
|
|
431
463
|
)
|
|
432
464
|
end
|
|
433
465
|
end
|
|
@@ -453,12 +485,15 @@ module TansParser
|
|
|
453
485
|
|
|
454
486
|
cell = row[m.begin(0)]
|
|
455
487
|
focused = cell[:underline] || cell[:bg] != "default"
|
|
488
|
+
base_confidence = matches.length >= 3 ? 0.85 : 0.7
|
|
489
|
+
confidence = focused ? [base_confidence + 0.05, 0.9].min.round(2) : base_confidence
|
|
456
490
|
tabs << Element.new(
|
|
457
491
|
role: :tab,
|
|
458
492
|
text: tab_text,
|
|
459
493
|
row: r, col: m.begin(0),
|
|
460
494
|
width: m[0].length, height: 1,
|
|
461
495
|
focused: focused,
|
|
496
|
+
confidence: confidence,
|
|
462
497
|
)
|
|
463
498
|
end
|
|
464
499
|
end
|
data/lib/tans_parser/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tans-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Haluk Durmus
|
|
@@ -121,6 +121,20 @@ dependencies:
|
|
|
121
121
|
- - "~>"
|
|
122
122
|
- !ruby/object:Gem::Version
|
|
123
123
|
version: '0.22'
|
|
124
|
+
- !ruby/object:Gem::Dependency
|
|
125
|
+
name: benchmark-ips
|
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - "~>"
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '2.13'
|
|
131
|
+
type: :development
|
|
132
|
+
prerelease: false
|
|
133
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
134
|
+
requirements:
|
|
135
|
+
- - "~>"
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
version: '2.13'
|
|
124
138
|
- !ruby/object:Gem::Dependency
|
|
125
139
|
name: unicode-display_width
|
|
126
140
|
requirement: !ruby/object:Gem::Requirement
|