tans-parser 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 907b90ef203876bd99cc0dbca8eb6b184ed6472b580c6d2918e31469d0f7cc12
4
- data.tar.gz: b828065265563752bb2acf5ef380c2ba805f4b4e785fc602337c2b5048267824
3
+ metadata.gz: 0ea2a71fb0db321851e51510e4250d9c2afcd2830495771ebb3d0adf36e707b0
4
+ data.tar.gz: 7198b01f46232a42fc80b8d09668b07dc20834c47d16e0a5502d1bb18b7a5d8f
5
5
  SHA512:
6
- metadata.gz: ae5ea0f42c3663d0edfc35e86bb11064396099f73beebcdcf2fe5d43cfc2b65d810ab2dc055cf5d186a3571aa11703abc006962407a30b979c1a7d4d6e46f3f7
7
- data.tar.gz: b3379404c4cee09f3e49ceeaaa0e3ec90322a665fbb671a7d5e19506759b72e338fce4e782cc2017779d7812eadfddc098df82ba6bd0cb4ecec50a283d4ce044
6
+ metadata.gz: 165b2cd6b7cc5c0edb4bf4dd510fca304bd48fdcd0bbc7757d9edaf9ef4ef9fd76e3bfc39708301eb16ad263df2a3ec7f2b450ccca9a9b9ed9c24aca64f68f0b
7
+ data.tar.gz: 8eaede4178263a217f715d9ee79e263aff0169801bb7f9eecc6b82a81608e32f8c442788e17afff67e3a6bd68165de6612a5ca0a80d661b3711fc092b4871561
data/CHANGELOG.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 0.1.5
4
+
5
+ - **Confidence scoring** — each detected element now carries a `confidence` value (0.0–1.0):
6
+ - `Element#confident?` — returns `true` when confidence ≥ 0.5 (or nil, for backward compatibility)
7
+ - Scoring heuristics per role:
8
+ - **Buttons**: 0.9 (square), 0.85 (round), 0.75 (angle); −0.2 penalty for single-character text
9
+ - **Checkboxes**: 0.9 (checked), 0.85 (unchecked)
10
+ - **Dialogs**: 0.9 base; +0.05 bonus for titled borders (text in top border)
11
+ - **Statusbars**: 0.9 (inverse colors), 0.85 (separator-preceded footer), 0.5 (≥30-char fallback)
12
+ - **Progress bars**: 0.9 (incomplete), 0.95 (100% complete)
13
+ - **Inputs**: 0.9
14
+ - **Labels**: 0.8 (single-word), 0.85 (multi-word)
15
+ - **Menus**: 0.9 (3+ items), 0.85 (2 items), 0.8 (dropdown `> Item`)
16
+ - **Tabs**: 0.85 (3+ tabs), 0.7 (2 tabs); +0.05 for focused tab
17
+ - **Annotations**: 1.0 (manually annotated); can be overridden via `confidence:` keyword
18
+ - `confidence` included in `Element#to_h` when set; excluded when nil (backward compatible)
19
+ - **Reduced false positives** — tighter heuristics to avoid misdetection:
20
+ - **Buttons**: skip numeric-only brackets (e.g. `[12]`, `[3]`)
21
+ - **Labels**: skip URL schemes (`https://example.com`) and time patterns (`Meeting at 3:00`)
22
+ - **Progress bars**: minimum width of 6 characters (`[##]` is no longer detected)
23
+ - **Negative tests** — 15 new tests covering edge cases (numeric brackets, URLs, time patterns, short progress bars, tabs across rows, incomplete boxes, prompt-like menus, etc.)
24
+ - **Confidence tests** — 15 new tests verifying scoring for every element type and edge case (titled dialogs, focused tabs, complete progress bars, annotation override, etc.)
25
+ - **Benchmarks** — `benchmark-ips` suite for parser, diff, and selector:
26
+ - `benchmarks/parser_benchmark.rb` — plain, ANSI, cursor, complex, and dialog-like workloads
27
+ - `benchmarks/diff_benchmark.rb` — full, chars_only, and ignore_rows modes
28
+ - `benchmarks/selector_benchmark.rb` — full scan with mixed UI elements
29
+ - `benchmark-ips` added as development dependency
30
+ - 30 new tests, 374 total, 100% line and branch coverage maintained
31
+
3
32
  ## 0.1.4
4
33
 
5
34
  - **Unicode width support** — correct display width for CJK, emoji, and combining characters:
data/README.md CHANGED
@@ -152,7 +152,7 @@ selector.statusbars # => includes annotated statusbar
152
152
 
153
153
  # Annotations accept extra attributes
154
154
  state.annotate_role(:button, row: 0, col: 0, width: 6, height: 1,
155
- text: "Submit", fg: "green", disabled: false)
155
+ text: "Submit", fg: "green", disabled: false, confidence: 0.8)
156
156
  ```
157
157
 
158
158
  ### State comparison (diff)
@@ -197,13 +197,15 @@ el.height # => 1
197
197
  el.checked # => true/false/nil
198
198
  el.focused # => true/false/nil
199
199
  el.disabled # => true/false/nil
200
+ el.confidence # => 0.9 (Float 0.0-1.0) or nil when not set
200
201
  el.fg # => "default"
201
202
  el.bg # => "default"
202
- el.to_h # => {role: :button, text: "OK", row: 1, col: 2, ...}
203
+ el.to_h # => {role: :button, text: "OK", row: 1, col: 2, confidence: 0.9, ...}
203
204
 
204
205
  # Predicates
205
206
  el.checked? # => false (always boolean)
206
207
  el.disabled? # => false (always boolean)
208
+ el.confident? # => true when confidence >= 0.5 (or nil)
207
209
 
208
210
  # Geometry
209
211
  el.bounds # => {row: 1, col: 2, width: 4, height: 1}
@@ -214,6 +216,49 @@ el.type("hello") # => {action: :type, target: el, row: 1, col: 4, text: "hell
214
216
  el.press_key(:tab) # => {action: :press_key, target: el, key: :tab}
215
217
  ```
216
218
 
219
+ ### Confidence scoring
220
+
221
+ Each detected element carries a `confidence` value (0.0–1.0) reflecting how sure the heuristics are:
222
+
223
+ ```ruby
224
+ btn = selector.button
225
+ btn.confidence # => 0.9 (square-bracket buttons are high confidence)
226
+ btn.confident? # => true
227
+
228
+ # Low-confidence detections can be filtered out
229
+ reliable = selector.buttons.select(&:confident?) # confidence >= 0.5
230
+ ```
231
+
232
+ Confidence values per role and context:
233
+
234
+ | Role | Scenario | Confidence |
235
+ |------|----------|------------|
236
+ | `:button` | `[ OK ]` square brackets | 0.9 |
237
+ | `:button` | `(Cancel)` round brackets | 0.85 |
238
+ | `:button` | `<Submit>` angle brackets | 0.75 |
239
+ | `:button` | Single-character text | −0.2 penalty |
240
+ | `:checkbox` | `[x]` checked | 0.9 |
241
+ | `:checkbox` | `[ ]` unchecked | 0.85 |
242
+ | `:input` | `[________]` underscore brackets | 0.9 |
243
+ | `:label` | `Project Name:` (multi-word) | 0.85 |
244
+ | `:label` | `Username:` (single-word) | 0.8 |
245
+ | `:menu` | 3+ items on menu bar | 0.9 |
246
+ | `:menu` | 2 items on menu bar | 0.85 |
247
+ | `:menu` | `> Item` dropdown | 0.8 |
248
+ | `:tab` | 3+ tabs | 0.85 |
249
+ | `:tab` | 2 tabs | 0.7 |
250
+ | `:tab` | Focused tab (underline/bg) | +0.05 bonus |
251
+ | `:dialog` | Complete box with all 4 corners | 0.9 |
252
+ | `:dialog` | Titled border (text in top border) | 0.95 |
253
+ | `:statusbar` | Inverse colors + ≥3 colored cells | 0.9 |
254
+ | `:statusbar` | Separator-preceded footer | 0.85 |
255
+ | `:statusbar` | Fallback (≥30 chars, no bg info) | 0.5 |
256
+ | `:progress` | `[##### ]` incomplete | 0.9 |
257
+ | `:progress` | `[##########]` 100% complete | 0.95 |
258
+ | Annotation | Manually annotated via `annotate_role` | 1.0 |
259
+
260
+ `confidence` is excluded from `to_h` when nil (backward compatible).
261
+
217
262
  ### Recognized element patterns
218
263
 
219
264
  | Role | Pattern | Example |
@@ -241,8 +286,9 @@ Each cell is a Hash with these keys:
241
286
  | `italic` | Boolean | Italic style |
242
287
  | `underline` | Boolean | Underline style |
243
288
  | `blink` | Boolean | Blink style |
289
+ | `width` | Integer | Display width (1 for normal, 2 for CJK/emoji, 0 for continuation) |
244
290
 
245
- Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false}`
291
+ Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: false, underline: false, blink: false, width: 1}`
246
292
 
247
293
  ## Supported ANSI sequences
248
294
 
@@ -255,7 +301,8 @@ Default cell: `{char: " ", fg: "default", bg: "default", bold: false, italic: fa
255
301
  - **Cursor style** — DECSCUSR
256
302
  - **Mouse tracking** — DEC private modes 1000, 1002, 1003, 1006
257
303
  - **ISO 2022** — G0/G1 charset switching, DEC Special Graphics
258
- - **UTF-8** — Multi-byte characters including emoji
304
+ - **UTF-8** — Multi-byte characters including CJK, emoji (correct display width via `unicode-display_width`)
305
+ - **Combining characters** — Zero-width combining marks appended to previous cell
259
306
 
260
307
  ## License
261
308
 
@@ -11,6 +11,7 @@ module TansParser
11
11
  :focused,
12
12
  :fg, :bg,
13
13
  :disabled,
14
+ :confidence,
14
15
  keyword_init: true,
15
16
  ) do
16
17
  def checked?
@@ -37,6 +38,10 @@ module TansParser
37
38
  { action: :press_key, target: self, key: key }
38
39
  end
39
40
 
41
+ def confident?
42
+ confidence.nil? || confidence >= 0.5
43
+ end
44
+
40
45
  def to_h
41
46
  {
42
47
  role: role,
@@ -47,6 +52,7 @@ module TansParser
47
52
  focused: focused,
48
53
  fg: fg, bg: bg,
49
54
  disabled: disabled,
55
+ confidence: confidence,
50
56
  }.compact
51
57
  end
52
58
  end
@@ -147,7 +147,7 @@ module TansParser
147
147
 
148
148
  # Detects annotations: manually annotated roles from State#annotate_role
149
149
  def detect_annotations
150
- @state.annotations.map { |a| Element.new(a) }
150
+ @state.annotations.map { |a| Element.new(**a, confidence: a[:confidence] || 1.0) }
151
151
  end
152
152
 
153
153
  # Detects buttons: [ OK ], (Cancel), <Submit>
@@ -163,8 +163,18 @@ module TansParser
163
163
  next if text.empty?
164
164
  next if text.match?(/^_+$/)
165
165
  next if text.match?(/^[ xX*]$/) # skip checkbox markers
166
+ next if text.match?(/^\d+$/) # skip numeric-only brackets (e.g. [12])
166
167
 
167
168
  col = ::Regexp.last_match.begin(0)
169
+ confidence = if ::Regexp.last_match[1]
170
+ 0.9
171
+ elsif ::Regexp.last_match[2]
172
+ 0.85
173
+ else
174
+ 0.75
175
+ end
176
+ confidence -= 0.2 if text.length == 1 # penalize single-character buttons
177
+
168
178
  buttons << Element.new(
169
179
  role: :button,
170
180
  text: text,
@@ -172,6 +182,7 @@ module TansParser
172
182
  width: ::Regexp.last_match[0].length, height: 1,
173
183
  fg: row[col][:fg],
174
184
  bg: row[col][:bg],
185
+ confidence: confidence,
175
186
  )
176
187
  end
177
188
  end
@@ -190,12 +201,14 @@ module TansParser
190
201
  checked = match[2] != " "
191
202
  label_text = match[3].strip
192
203
  col = match.begin(3)
204
+ confidence = checked ? 0.9 : 0.85
193
205
  checkboxes << Element.new(
194
206
  role: :checkbox,
195
207
  text: label_text,
196
208
  row: r, col: col,
197
209
  width: label_text.length, height: 1,
198
210
  checked: checked,
211
+ confidence: confidence,
199
212
  )
200
213
  end
201
214
  checkboxes
@@ -218,11 +231,16 @@ module TansParser
218
231
  if bottom_r
219
232
  height = bottom_r - r + 1
220
233
  text = extract_dialog_text(r + 1, tl_idx + 1, width - 2, height - 2)
234
+ confidence = 0.9
235
+ # Bonus for titled borders (text in top border)
236
+ top_border = line[tl_idx..(tl_idx + width - 1)]
237
+ confidence = (confidence + 0.05).round(2) if top_border.match?(/[A-Za-z]/)
221
238
  dialogs << Element.new(
222
239
  role: :dialog,
223
240
  text: text,
224
241
  row: r, col: tl_idx,
225
242
  width: width, height: height,
243
+ confidence: confidence,
226
244
  )
227
245
  end
228
246
  tl_idx += 1
@@ -291,6 +309,7 @@ module TansParser
291
309
  row: row_idx, col: 0,
292
310
  width: row.length, height: 1,
293
311
  bg: non_default.first[:bg],
312
+ confidence: 0.9,
294
313
  )
295
314
  return bars
296
315
  end
@@ -303,6 +322,7 @@ module TansParser
303
322
  role: :statusbar, text: text,
304
323
  row: grid.length - 1, col: 0,
305
324
  width: last_row.length, height: 1,
325
+ confidence: 0.5,
306
326
  )
307
327
  return bars
308
328
  end
@@ -326,6 +346,7 @@ module TansParser
326
346
  role: :statusbar, text: text,
327
347
  row: r, col: 0,
328
348
  width: row.length, height: 1,
349
+ confidence: 0.85,
329
350
  )
330
351
  return bars
331
352
  end
@@ -341,16 +362,19 @@ module TansParser
341
362
  line = row.map { |c| c[:char] }.join
342
363
  match = line.match(/\[([#>=-]+)\s*\]/)
343
364
  next unless match
365
+ next if match[0].length < 6 # skip too-short brackets (e.g. [##])
344
366
 
345
367
  filled = match[1]
346
368
  total = match[0].length - 2
347
369
  percent = (filled.length.to_f / total * 100).round
370
+ confidence = percent == 100 ? 0.95 : 0.9
348
371
  bars << Element.new(
349
372
  role: :progress,
350
373
  text: "#{percent}%",
351
374
  row: r, col: ::Regexp.last_match.begin(0),
352
375
  width: match[0].length, height: 1,
353
376
  checked: percent == 100,
377
+ confidence: confidence,
354
378
  )
355
379
  end
356
380
  bars
@@ -369,6 +393,7 @@ module TansParser
369
393
  text: "",
370
394
  row: r, col: col,
371
395
  width: m[0].length, height: 1,
396
+ confidence: 0.9,
372
397
  )
373
398
  end
374
399
  end
@@ -385,13 +410,17 @@ module TansParser
385
410
 
386
411
  label_text = match[1].strip.sub(/:$/, "").strip
387
412
  next if label_text.empty? || label_text.length < 2
413
+ next if match[1].match?(/\d:/) # skip patterns ending with digit before colon (e.g. "Meeting at 3:")
414
+ next if line[match.end(1), 2] == "//" # skip URL schemes (e.g. "https://example.com")
388
415
 
389
416
  col = match.begin(1)
417
+ confidence = label_text.include?(" ") ? 0.85 : 0.8 # multi-word labels are stronger signals
390
418
  labels << Element.new(
391
419
  role: :label,
392
420
  text: label_text,
393
421
  row: r, col: col,
394
422
  width: match[1].length, height: 1,
423
+ confidence: confidence,
395
424
  )
396
425
  end
397
426
  labels
@@ -411,11 +440,13 @@ module TansParser
411
440
  items = stripped.split(/\s{2,}/)
412
441
  if items.length >= 2 && items.all? { |i| i.match?(/^[A-Za-z]/) }
413
442
  col = line.index(stripped)
443
+ confidence = items.length >= 3 ? 0.9 : 0.85
414
444
  menus << Element.new(
415
445
  role: :menu,
416
446
  text: items.join(" | "),
417
447
  row: r, col: col || 0,
418
448
  width: line.length, height: 1,
449
+ confidence: confidence,
419
450
  )
420
451
  end
421
452
  end
@@ -428,6 +459,7 @@ module TansParser
428
459
  text: m[0].sub(/^>\s*/, "").strip,
429
460
  row: r, col: m.begin(0),
430
461
  width: m[0].length, height: 1,
462
+ confidence: 0.8,
431
463
  )
432
464
  end
433
465
  end
@@ -453,12 +485,15 @@ module TansParser
453
485
 
454
486
  cell = row[m.begin(0)]
455
487
  focused = cell[:underline] || cell[:bg] != "default"
488
+ base_confidence = matches.length >= 3 ? 0.85 : 0.7
489
+ confidence = focused ? [base_confidence + 0.05, 0.9].min.round(2) : base_confidence
456
490
  tabs << Element.new(
457
491
  role: :tab,
458
492
  text: tab_text,
459
493
  row: r, col: m.begin(0),
460
494
  width: m[0].length, height: 1,
461
495
  focused: focused,
496
+ confidence: confidence,
462
497
  )
463
498
  end
464
499
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TansParser
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tans-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Haluk Durmus
@@ -121,6 +121,20 @@ dependencies:
121
121
  - - "~>"
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0.22'
124
+ - !ruby/object:Gem::Dependency
125
+ name: benchmark-ips
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2.13'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '2.13'
124
138
  - !ruby/object:Gem::Dependency
125
139
  name: unicode-display_width
126
140
  requirement: !ruby/object:Gem::Requirement