docalign 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docalign/__init__.py +2 -0
- docalign/checks/__init__.py +0 -0
- docalign/checks/arrows.py +100 -0
- docalign/checks/box_padding.py +117 -0
- docalign/checks/box_spacing.py +213 -0
- docalign/checks/box_walls.py +249 -0
- docalign/checks/box_widths.py +140 -0
- docalign/checks/def_lists.py +99 -0
- docalign/checks/horiz_arrows.py +122 -0
- docalign/checks/list_descs.py +60 -0
- docalign/checks/pipes.py +90 -0
- docalign/checks/rails.py +459 -0
- docalign/checks/tables.py +91 -0
- docalign/checks/wide_chars.py +31 -0
- docalign/cli.py +273 -0
- docalign/constants.py +37 -0
- docalign/hints.py +82 -0
- docalign/parser.py +46 -0
- docalign/py.typed +0 -0
- docalign/utils.py +200 -0
- docalign-0.1.0.dist-info/METADATA +188 -0
- docalign-0.1.0.dist-info/RECORD +25 -0
- docalign-0.1.0.dist-info/WHEEL +4 -0
- docalign-0.1.0.dist-info/entry_points.txt +2 -0
- docalign-0.1.0.dist-info/licenses/LICENSE +21 -0
docalign/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from docalign.constants import ARROW_CHARS, ARROW_SEARCH_RANGE, BOX_CHARS, HORIZ_ARROW_CHARS
|
|
2
|
+
from docalign.parser import iter_code_blocks
|
|
3
|
+
from docalign.utils import _is_standalone_arrow
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def check(lines):
|
|
7
|
+
errors = []
|
|
8
|
+
for _, code_lines in iter_code_blocks(lines):
|
|
9
|
+
errors.extend(_check_arrows(code_lines))
|
|
10
|
+
return errors
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def fix(lines):
|
|
14
|
+
result = list(lines)
|
|
15
|
+
for code_indices, _ in iter_code_blocks(lines):
|
|
16
|
+
_fix_arrows_in_block(code_indices, result)
|
|
17
|
+
return result
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _check_arrows(code_lines):
|
|
21
|
+
errors = []
|
|
22
|
+
for idx, (i, raw) in enumerate(code_lines):
|
|
23
|
+
for j, c in enumerate(raw):
|
|
24
|
+
if c in ARROW_CHARS:
|
|
25
|
+
if _is_standalone_arrow(raw, j):
|
|
26
|
+
expected = _find_arrow_target(code_lines, idx, j, c)
|
|
27
|
+
if expected is not None and expected != j:
|
|
28
|
+
errors.append(f"L{i + 1} arrow '{c}' at col {j}, expected col {expected}")
|
|
29
|
+
elif _is_embedded_in_horiz_border(raw, j):
|
|
30
|
+
errors.append(f"L{i + 1} arrow '{c}' embedded in border at col {j}")
|
|
31
|
+
elif c in HORIZ_ARROW_CHARS and _is_embedded_in_vert_border(code_lines, idx, j):
|
|
32
|
+
errors.append(f"L{i + 1} arrow '{c}' embedded in border at col {j}")
|
|
33
|
+
return errors
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _is_embedded_in_horiz_border(raw, j):
|
|
37
|
+
left = raw[j - 1] if j > 0 else " "
|
|
38
|
+
right = raw[j + 1] if j < len(raw) - 1 else " "
|
|
39
|
+
return left == "─" or right == "─"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_embedded_in_vert_border(code_lines, line_idx, col):
|
|
43
|
+
above = line_idx - 1
|
|
44
|
+
below = line_idx + 1
|
|
45
|
+
has_above = above >= 0 and col < len(code_lines[above][1]) and code_lines[above][1][col] == "│"
|
|
46
|
+
has_below = below < len(code_lines) and col < len(code_lines[below][1]) and code_lines[below][1][col] == "│"
|
|
47
|
+
return has_above or has_below
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _find_arrow_target(code_lines, arrow_idx, arrow_col, arrow_char):
|
|
51
|
+
search_range = range(arrow_idx - 1, -1, -1) if arrow_char == "v" else range(arrow_idx + 1, len(code_lines))
|
|
52
|
+
for si in search_range:
|
|
53
|
+
_, sraw = code_lines[si]
|
|
54
|
+
for dc in [i for r in range(ARROW_SEARCH_RANGE + 1) for i in ([0] if r == 0 else [-r, r])]:
|
|
55
|
+
col = arrow_col + dc
|
|
56
|
+
if 0 <= col < len(sraw) and sraw[col] in BOX_CHARS:
|
|
57
|
+
return col if dc != 0 else None
|
|
58
|
+
break
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _fix_arrows_in_block(code_indices, all_lines):
|
|
63
|
+
code_lines = [(i, all_lines[i].rstrip("\n")) for i in code_indices]
|
|
64
|
+
for idx, (i, raw) in enumerate(code_lines):
|
|
65
|
+
arrows = [(j, c) for j, c in enumerate(raw) if c in ARROW_CHARS and _is_standalone_arrow(raw, j)]
|
|
66
|
+
if not arrows:
|
|
67
|
+
continue
|
|
68
|
+
corrections = []
|
|
69
|
+
for j, c in arrows:
|
|
70
|
+
expected = _find_arrow_target(code_lines, idx, j, c)
|
|
71
|
+
if expected is not None and expected != j:
|
|
72
|
+
corrections.append((j, expected))
|
|
73
|
+
if not corrections:
|
|
74
|
+
continue
|
|
75
|
+
new_raw = raw
|
|
76
|
+
for j, expected in sorted(corrections, key=lambda x: -x[0]):
|
|
77
|
+
delta = expected - j
|
|
78
|
+
if delta > 0:
|
|
79
|
+
spaces_after = 0
|
|
80
|
+
for k in range(j + 1, len(new_raw)):
|
|
81
|
+
if new_raw[k] == " ":
|
|
82
|
+
spaces_after += 1
|
|
83
|
+
else:
|
|
84
|
+
break
|
|
85
|
+
if spaces_after >= delta:
|
|
86
|
+
new_raw = new_raw[:j] + " " * delta + new_raw[j] + new_raw[j + 1 + delta :]
|
|
87
|
+
elif j + 1 + spaces_after >= len(new_raw):
|
|
88
|
+
new_raw = new_raw[:j] + " " * delta + new_raw[j]
|
|
89
|
+
elif delta < 0:
|
|
90
|
+
remove = abs(delta)
|
|
91
|
+
spaces_before = 0
|
|
92
|
+
for k in range(j - 1, -1, -1):
|
|
93
|
+
if new_raw[k] == " ":
|
|
94
|
+
spaces_before += 1
|
|
95
|
+
else:
|
|
96
|
+
break
|
|
97
|
+
if spaces_before >= remove:
|
|
98
|
+
new_raw = new_raw[: j - remove] + new_raw[j] + " " * remove + new_raw[j + 1 :]
|
|
99
|
+
if new_raw != raw:
|
|
100
|
+
all_lines[i] = new_raw + "\n"
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
|
|
3
|
+
from docalign.constants import BOX_CHARS, MAX_PAD_DRIFT
|
|
4
|
+
from docalign.parser import iter_code_blocks
|
|
5
|
+
from docalign.utils import _find_boxes, _is_tree_block
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def check(lines):
|
|
9
|
+
errors = []
|
|
10
|
+
for _, code_lines in iter_code_blocks(lines):
|
|
11
|
+
errors.extend(_check_padding(code_lines))
|
|
12
|
+
return errors
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def fix(lines):
|
|
16
|
+
result = list(lines)
|
|
17
|
+
for code_indices, _ in iter_code_blocks(lines):
|
|
18
|
+
_fix_padding_in_block(code_indices, result)
|
|
19
|
+
return result
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_left_padding(raw, col_left, col_right):
|
|
23
|
+
if col_left >= len(raw) or raw[col_left] != "│":
|
|
24
|
+
return None
|
|
25
|
+
if col_right >= len(raw) or raw[col_right] not in BOX_CHARS:
|
|
26
|
+
return None
|
|
27
|
+
inner = raw[col_left + 1 : col_right]
|
|
28
|
+
if not inner.strip():
|
|
29
|
+
return None
|
|
30
|
+
if any(c in BOX_CHARS for c in inner):
|
|
31
|
+
return None
|
|
32
|
+
pad = 0
|
|
33
|
+
for c in inner:
|
|
34
|
+
if c == " ":
|
|
35
|
+
pad += 1
|
|
36
|
+
else:
|
|
37
|
+
break
|
|
38
|
+
return pad
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _expected_padding(paddings):
|
|
42
|
+
counts = Counter(paddings)
|
|
43
|
+
max_count = max(counts.values())
|
|
44
|
+
candidates = [p for p, c in counts.items() if c == max_count]
|
|
45
|
+
if len(candidates) == 1:
|
|
46
|
+
return candidates[0]
|
|
47
|
+
if 0 in candidates:
|
|
48
|
+
non_zero = [c for c in candidates if c > 0]
|
|
49
|
+
return min(non_zero)
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _has_layout_intent(pad_values):
|
|
54
|
+
return max(pad_values) - min(pad_values) >= MAX_PAD_DRIFT
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _check_padding(code_lines):
|
|
58
|
+
errors = []
|
|
59
|
+
if _is_tree_block(code_lines):
|
|
60
|
+
return errors
|
|
61
|
+
|
|
62
|
+
for col_left, col_right, _, _, content_indices in _find_boxes(code_lines):
|
|
63
|
+
paddings = []
|
|
64
|
+
for ci in content_indices:
|
|
65
|
+
line_idx, raw = code_lines[ci]
|
|
66
|
+
pad = _get_left_padding(raw, col_left, col_right)
|
|
67
|
+
if pad is not None:
|
|
68
|
+
paddings.append((line_idx, pad))
|
|
69
|
+
|
|
70
|
+
if len(paddings) < 2 or _has_layout_intent([p for _, p in paddings]):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
expected = _expected_padding([p for _, p in paddings])
|
|
74
|
+
if expected is None:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
for line_idx, pad in paddings:
|
|
78
|
+
if pad != expected:
|
|
79
|
+
errors.append(f"L{line_idx + 1} box padding={pad}, expected={expected}")
|
|
80
|
+
|
|
81
|
+
return errors
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _fix_padding_in_block(code_indices, all_lines):
|
|
85
|
+
code_lines = [(i, all_lines[i].rstrip("\n")) for i in code_indices]
|
|
86
|
+
if _is_tree_block(code_lines):
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
for col_left, col_right, _, _, content_indices in _find_boxes(code_lines):
|
|
90
|
+
paddings = []
|
|
91
|
+
for ci in content_indices:
|
|
92
|
+
line_idx, raw = code_lines[ci]
|
|
93
|
+
pad = _get_left_padding(raw, col_left, col_right)
|
|
94
|
+
if pad is not None:
|
|
95
|
+
paddings.append((ci, line_idx, pad))
|
|
96
|
+
|
|
97
|
+
if len(paddings) < 2 or _has_layout_intent([p for _, _, p in paddings]):
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
expected = _expected_padding([p for _, _, p in paddings])
|
|
101
|
+
if expected is None:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
for ci, line_idx, pad in paddings:
|
|
105
|
+
if pad == expected:
|
|
106
|
+
continue
|
|
107
|
+
raw = all_lines[line_idx].rstrip("\n")
|
|
108
|
+
inner = raw[col_left + 1 : col_right]
|
|
109
|
+
content = inner.strip()
|
|
110
|
+
total_width = col_right - col_left - 1
|
|
111
|
+
new_inner = " " * expected + content
|
|
112
|
+
remaining = total_width - len(new_inner)
|
|
113
|
+
if remaining < 1:
|
|
114
|
+
continue
|
|
115
|
+
new_inner = new_inner + " " * remaining
|
|
116
|
+
new_raw = raw[: col_left + 1] + new_inner + raw[col_right:]
|
|
117
|
+
all_lines[line_idx] = new_raw + "\n"
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from docalign.constants import (
|
|
2
|
+
BORDER_CHARS,
|
|
3
|
+
BOX_CHARS,
|
|
4
|
+
LARGE_SPACE_GAP,
|
|
5
|
+
MAX_FIX_ITERATIONS,
|
|
6
|
+
MIN_PAD,
|
|
7
|
+
)
|
|
8
|
+
from docalign.parser import iter_code_blocks
|
|
9
|
+
from docalign.utils import _find_boxes, _is_tree_block
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check(lines):
|
|
13
|
+
errors = []
|
|
14
|
+
for _, code_lines in iter_code_blocks(lines):
|
|
15
|
+
errors.extend(_check_spacing(code_lines))
|
|
16
|
+
return errors
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def fix(lines):
|
|
20
|
+
result = list(lines)
|
|
21
|
+
for code_indices, _ in iter_code_blocks(result):
|
|
22
|
+
_fix_spacing_in_block(code_indices, result)
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_right_padding(raw, col_left, col_right):
|
|
27
|
+
if col_left >= len(raw) or raw[col_left] != "│":
|
|
28
|
+
return None
|
|
29
|
+
if col_right >= len(raw) or raw[col_right] not in BOX_CHARS:
|
|
30
|
+
return None
|
|
31
|
+
inner = raw[col_left + 1 : col_right]
|
|
32
|
+
if not inner.strip():
|
|
33
|
+
return None
|
|
34
|
+
if any(c in BOX_CHARS for c in inner):
|
|
35
|
+
return None
|
|
36
|
+
return len(inner) - len(inner.rstrip())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_left_padding(raw, col_left, col_right):
|
|
40
|
+
if col_left >= len(raw) or raw[col_left] != "│":
|
|
41
|
+
return None
|
|
42
|
+
if col_right >= len(raw) or raw[col_right] not in BOX_CHARS:
|
|
43
|
+
return None
|
|
44
|
+
inner = raw[col_left + 1 : col_right]
|
|
45
|
+
if not inner.strip():
|
|
46
|
+
return None
|
|
47
|
+
if any(c in BOX_CHARS for c in inner):
|
|
48
|
+
return None
|
|
49
|
+
return len(inner) - len(inner.lstrip())
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _check_spacing(code_lines):
|
|
53
|
+
errors = []
|
|
54
|
+
if _is_tree_block(code_lines):
|
|
55
|
+
return errors
|
|
56
|
+
|
|
57
|
+
for col_left, col_right, _, _, content_indices in _find_boxes(code_lines):
|
|
58
|
+
for ci in content_indices:
|
|
59
|
+
line_idx, raw = code_lines[ci]
|
|
60
|
+
rpad = _get_right_padding(raw, col_left, col_right)
|
|
61
|
+
if rpad is not None and rpad < MIN_PAD:
|
|
62
|
+
errors.append(f"L{line_idx + 1} box right spacing={rpad}, minimum={MIN_PAD}")
|
|
63
|
+
lpad = _get_left_padding(raw, col_left, col_right)
|
|
64
|
+
if lpad is not None and lpad < MIN_PAD:
|
|
65
|
+
errors.append(f"L{line_idx + 1} box left spacing={lpad}, minimum={MIN_PAD}")
|
|
66
|
+
|
|
67
|
+
return errors
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _find_connectors_in_range(raw, col_left, col_right):
|
|
71
|
+
connectors = []
|
|
72
|
+
for col in range(col_left, col_right + 1):
|
|
73
|
+
if col < len(raw) and raw[col] in ("┬", "┴"):
|
|
74
|
+
connectors.append(col)
|
|
75
|
+
return connectors
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _collect_box_insertions(code_lines):
|
|
79
|
+
all_boxes = list(_find_boxes(code_lines))
|
|
80
|
+
|
|
81
|
+
def get_parent_info(col_left, col_right):
|
|
82
|
+
for other_left, other_right, opening_ci, *_ in all_boxes:
|
|
83
|
+
if other_left < col_left and col_right < other_right:
|
|
84
|
+
return (other_left, other_right, opening_ci)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def has_sibling_after(col_right, opener_raw):
|
|
88
|
+
after = opener_raw[col_right + 1 :]
|
|
89
|
+
if "┌" not in after:
|
|
90
|
+
return False
|
|
91
|
+
corner_pos = after.index("┌")
|
|
92
|
+
between = after[:corner_pos]
|
|
93
|
+
return LARGE_SPACE_GAP in between
|
|
94
|
+
|
|
95
|
+
box_insertions = []
|
|
96
|
+
for col_left, col_right, opening_ci, closing_ci, content_indices in all_boxes:
|
|
97
|
+
parent_info = get_parent_info(col_left, col_right)
|
|
98
|
+
if parent_info is not None:
|
|
99
|
+
_, parent_right, parent_opening_ci = parent_info
|
|
100
|
+
parent_opener_raw = code_lines[parent_opening_ci][1]
|
|
101
|
+
if has_sibling_after(parent_right, parent_opener_raw):
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
min_rpad = None
|
|
105
|
+
min_lpad = None
|
|
106
|
+
for ci in content_indices:
|
|
107
|
+
_, raw = code_lines[ci]
|
|
108
|
+
rpad = _get_right_padding(raw, col_left, col_right)
|
|
109
|
+
if rpad is not None:
|
|
110
|
+
if min_rpad is None or rpad < min_rpad:
|
|
111
|
+
min_rpad = rpad
|
|
112
|
+
lpad = _get_left_padding(raw, col_left, col_right)
|
|
113
|
+
if lpad is not None:
|
|
114
|
+
if min_lpad is None or lpad < min_lpad:
|
|
115
|
+
min_lpad = lpad
|
|
116
|
+
|
|
117
|
+
all_ci = [opening_ci] + content_indices + [closing_ci]
|
|
118
|
+
line_indices = [code_lines[ci][0] for ci in all_ci]
|
|
119
|
+
|
|
120
|
+
opener_raw = code_lines[opening_ci][1]
|
|
121
|
+
closer_raw = code_lines[closing_ci][1]
|
|
122
|
+
connectors = _find_connectors_in_range(opener_raw, col_left, col_right)
|
|
123
|
+
connectors.extend(_find_connectors_in_range(closer_raw, col_left, col_right))
|
|
124
|
+
|
|
125
|
+
if min_rpad is not None and min_rpad < MIN_PAD:
|
|
126
|
+
deficit = MIN_PAD - min_rpad
|
|
127
|
+
box_insertions.append((col_right, deficit, "right", line_indices, []))
|
|
128
|
+
|
|
129
|
+
if min_lpad is not None and min_lpad < MIN_PAD:
|
|
130
|
+
deficit = MIN_PAD - min_lpad
|
|
131
|
+
box_insertions.append((col_left + 1, deficit, "left", line_indices, connectors))
|
|
132
|
+
|
|
133
|
+
return box_insertions
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _trace_connected_pipes(code_indices, all_lines, box_lines, connector_col):
|
|
137
|
+
connected = set()
|
|
138
|
+
max_box = max(box_lines)
|
|
139
|
+
for line_idx in code_indices:
|
|
140
|
+
if line_idx <= max_box:
|
|
141
|
+
continue
|
|
142
|
+
raw = all_lines[line_idx].rstrip("\n")
|
|
143
|
+
if connector_col >= len(raw):
|
|
144
|
+
break
|
|
145
|
+
char = raw[connector_col]
|
|
146
|
+
if char == "│":
|
|
147
|
+
connected.add(line_idx)
|
|
148
|
+
elif char in ("┬", "┴"):
|
|
149
|
+
if "┌" in raw or "└" in raw:
|
|
150
|
+
break
|
|
151
|
+
connected.add(line_idx)
|
|
152
|
+
break
|
|
153
|
+
else:
|
|
154
|
+
break
|
|
155
|
+
return connected
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _is_complex_multi_column(box_insertions):
|
|
159
|
+
left_insertions = [ins for ins in box_insertions if ins[2] == "left"]
|
|
160
|
+
if len(left_insertions) <= 1:
|
|
161
|
+
return False
|
|
162
|
+
cols = set(ins[0] for ins in left_insertions)
|
|
163
|
+
if len(cols) <= 1:
|
|
164
|
+
return False
|
|
165
|
+
line_sets = [set(ins[3]) for ins in left_insertions]
|
|
166
|
+
for i, lines_a in enumerate(line_sets):
|
|
167
|
+
for lines_b in line_sets[i + 1 :]:
|
|
168
|
+
if not lines_a & lines_b:
|
|
169
|
+
return True
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _apply_box_insertions(all_lines, box_insertions, code_indices):
|
|
174
|
+
if not box_insertions:
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
if _is_complex_multi_column(box_insertions):
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
sorted_insertions = sorted(box_insertions, key=lambda x: -x[0])
|
|
181
|
+
|
|
182
|
+
for col, deficit, ins_type, line_indices, connectors in sorted_insertions:
|
|
183
|
+
extended = set(line_indices)
|
|
184
|
+
if ins_type == "left" and connectors:
|
|
185
|
+
for connector_col in connectors:
|
|
186
|
+
connected = _trace_connected_pipes(code_indices, all_lines, set(line_indices), connector_col)
|
|
187
|
+
extended.update(connected)
|
|
188
|
+
|
|
189
|
+
for line_idx in extended:
|
|
190
|
+
raw = all_lines[line_idx].rstrip("\n")
|
|
191
|
+
if col > len(raw):
|
|
192
|
+
continue
|
|
193
|
+
if col == 0:
|
|
194
|
+
insert = " " * deficit
|
|
195
|
+
elif col <= len(raw) and raw[col - 1] in BORDER_CHARS:
|
|
196
|
+
insert = "─" * deficit
|
|
197
|
+
else:
|
|
198
|
+
insert = " " * deficit
|
|
199
|
+
new_raw = raw[:col] + insert + raw[col:]
|
|
200
|
+
all_lines[line_idx] = new_raw + "\n"
|
|
201
|
+
|
|
202
|
+
return True
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _fix_spacing_in_block(code_indices, all_lines):
|
|
206
|
+
if _is_tree_block([(i, all_lines[i].rstrip("\n")) for i in code_indices]):
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
for _ in range(MAX_FIX_ITERATIONS):
|
|
210
|
+
code_lines = [(i, all_lines[i].rstrip("\n")) for i in code_indices]
|
|
211
|
+
box_insertions = _collect_box_insertions(code_lines)
|
|
212
|
+
if not _apply_box_insertions(all_lines, box_insertions, code_indices):
|
|
213
|
+
break
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
from docalign.constants import (
|
|
2
|
+
BOX_CHARS,
|
|
3
|
+
BOX_WALL_DRIFT,
|
|
4
|
+
LARGE_SPACE_GAP,
|
|
5
|
+
MIN_BOX_WIDTH,
|
|
6
|
+
MIN_PIPES_FOR_ADJACENT,
|
|
7
|
+
)
|
|
8
|
+
from docalign.parser import iter_code_blocks
|
|
9
|
+
from docalign.utils import (
|
|
10
|
+
_find_box_closer,
|
|
11
|
+
_find_nearby_closer_start,
|
|
12
|
+
_find_nearby_pipe,
|
|
13
|
+
_fix_closer,
|
|
14
|
+
_is_tree_block,
|
|
15
|
+
_shift_pipe,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def check(lines):
|
|
20
|
+
errors = []
|
|
21
|
+
for _, code_lines in iter_code_blocks(lines):
|
|
22
|
+
errors.extend(_check_box_walls(code_lines))
|
|
23
|
+
return errors
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def fix(lines):
|
|
27
|
+
result = list(lines)
|
|
28
|
+
for code_indices, _ in iter_code_blocks(lines):
|
|
29
|
+
_fix_box_walls_in_block(code_indices, result)
|
|
30
|
+
return result
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _has_independent_box_after(raw, col):
|
|
34
|
+
after = raw[col + 1 :] if col + 1 < len(raw) else ""
|
|
35
|
+
if not after:
|
|
36
|
+
return False
|
|
37
|
+
has_box_structure = "┌" in after or "└" in after
|
|
38
|
+
if not has_box_structure:
|
|
39
|
+
return False
|
|
40
|
+
pipe_indices = [i for i, c in enumerate(after) if c == "│"]
|
|
41
|
+
if len(pipe_indices) < MIN_PIPES_FOR_ADJACENT:
|
|
42
|
+
return False
|
|
43
|
+
first_pipe = pipe_indices[0]
|
|
44
|
+
second_pipe = pipe_indices[1]
|
|
45
|
+
between_pipes = after[first_pipe + 1 : second_pipe]
|
|
46
|
+
return LARGE_SPACE_GAP in between_pipes
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _check_box_walls(code_lines):
|
|
50
|
+
errors = []
|
|
51
|
+
if _is_tree_block(code_lines):
|
|
52
|
+
return errors
|
|
53
|
+
|
|
54
|
+
for idx, (line_idx, raw) in enumerate(code_lines):
|
|
55
|
+
j = 0
|
|
56
|
+
while j < len(raw):
|
|
57
|
+
if raw[j] != "┌":
|
|
58
|
+
j += 1
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
col_left = j
|
|
62
|
+
col_right_open = _find_box_closer(raw, "┌", "┐", j)
|
|
63
|
+
if col_right_open is None or col_right_open - col_left < MIN_BOX_WIDTH:
|
|
64
|
+
j += 1
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
closing_idx = None
|
|
68
|
+
fuzzy_col_left = None
|
|
69
|
+
for si in range(idx + 1, len(code_lines)):
|
|
70
|
+
_, sraw = code_lines[si]
|
|
71
|
+
if col_left < len(sraw) and sraw[col_left] == "└":
|
|
72
|
+
closing_idx = si
|
|
73
|
+
break
|
|
74
|
+
nc = _find_nearby_closer_start(sraw, col_left, col_right_open)
|
|
75
|
+
if nc is not None:
|
|
76
|
+
closing_idx = si
|
|
77
|
+
fuzzy_col_left = nc
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
if closing_idx is None or closing_idx - idx < 3:
|
|
81
|
+
j = col_right_open + 1
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
closing_line_idx, closing_raw = code_lines[closing_idx]
|
|
85
|
+
actual_col_left = fuzzy_col_left if fuzzy_col_left is not None else col_left
|
|
86
|
+
col_right_close = _find_box_closer(closing_raw, "└", "┘", actual_col_left)
|
|
87
|
+
|
|
88
|
+
if fuzzy_col_left is not None:
|
|
89
|
+
errors.append(
|
|
90
|
+
f"L{closing_line_idx + 1} box └ at col {fuzzy_col_left}, "
|
|
91
|
+
f"expected col {col_left} "
|
|
92
|
+
f"(box ┌ at L{line_idx + 1} col {col_left})"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if col_right_close is not None:
|
|
96
|
+
if abs(col_right_close - col_right_open) > BOX_WALL_DRIFT:
|
|
97
|
+
j = col_right_open + 1
|
|
98
|
+
continue
|
|
99
|
+
expected_right = max(col_right_open, col_right_close)
|
|
100
|
+
else:
|
|
101
|
+
expected_right = col_right_open
|
|
102
|
+
|
|
103
|
+
if col_right_open != expected_right:
|
|
104
|
+
errors.append(f"L{line_idx + 1} box ┐ at col {col_right_open}, expected col {expected_right}")
|
|
105
|
+
|
|
106
|
+
if col_right_close is not None and col_right_close != expected_right:
|
|
107
|
+
errors.append(f"L{closing_line_idx + 1} box ┘ at col {col_right_close}, expected col {expected_right}")
|
|
108
|
+
|
|
109
|
+
for mi in range(idx + 1, closing_idx):
|
|
110
|
+
m_line_idx, m_raw = code_lines[mi]
|
|
111
|
+
right_ok = expected_right < len(m_raw) and m_raw[expected_right] in BOX_CHARS
|
|
112
|
+
if not right_ok:
|
|
113
|
+
found = _find_nearby_pipe(m_raw, expected_right, BOX_WALL_DRIFT)
|
|
114
|
+
if found is not None:
|
|
115
|
+
errors.append(
|
|
116
|
+
f"L{m_line_idx + 1} box wall │ at col {found}, "
|
|
117
|
+
f"expected col {expected_right} "
|
|
118
|
+
f"(box ┌ at L{line_idx + 1} col {col_left})"
|
|
119
|
+
)
|
|
120
|
+
if col_left < len(m_raw):
|
|
121
|
+
if m_raw[col_left] not in BOX_CHARS:
|
|
122
|
+
found = _find_nearby_pipe(m_raw, col_left, BOX_WALL_DRIFT)
|
|
123
|
+
if found is not None:
|
|
124
|
+
errors.append(
|
|
125
|
+
f"L{m_line_idx + 1} box wall │ at col {found}, "
|
|
126
|
+
f"expected col {col_left} "
|
|
127
|
+
f"(box ┌ at L{line_idx + 1} col {col_left})"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
j = col_right_open + 1
|
|
131
|
+
|
|
132
|
+
return errors
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _fix_box_walls_in_block(code_indices, all_lines):
|
|
136
|
+
code_lines = [(i, all_lines[i].rstrip("\n")) for i in code_indices]
|
|
137
|
+
if _is_tree_block(code_lines):
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
for idx, (line_idx, raw) in enumerate(code_lines):
|
|
141
|
+
j = 0
|
|
142
|
+
while j < len(raw):
|
|
143
|
+
if raw[j] != "┌":
|
|
144
|
+
j += 1
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
col_left = j
|
|
148
|
+
col_right_open = _find_box_closer(raw, "┌", "┐", j)
|
|
149
|
+
if col_right_open is None or col_right_open - col_left < MIN_BOX_WIDTH:
|
|
150
|
+
j += 1
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
closing_idx = None
|
|
154
|
+
fuzzy_col_left = None
|
|
155
|
+
for si in range(idx + 1, len(code_lines)):
|
|
156
|
+
si_idx = code_lines[si][0]
|
|
157
|
+
sraw = all_lines[si_idx].rstrip("\n")
|
|
158
|
+
if col_left < len(sraw) and sraw[col_left] == "└":
|
|
159
|
+
closing_idx = si
|
|
160
|
+
break
|
|
161
|
+
nc = _find_nearby_closer_start(sraw, col_left, col_right_open)
|
|
162
|
+
if nc is not None:
|
|
163
|
+
closing_idx = si
|
|
164
|
+
fuzzy_col_left = nc
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
if closing_idx is None or closing_idx - idx < 3:
|
|
168
|
+
j = col_right_open + 1
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
closing_line_idx = code_lines[closing_idx][0]
|
|
172
|
+
closing_raw = all_lines[closing_line_idx].rstrip("\n")
|
|
173
|
+
actual_col_left = fuzzy_col_left if fuzzy_col_left is not None else col_left
|
|
174
|
+
col_right_close = _find_box_closer(closing_raw, "└", "┘", actual_col_left)
|
|
175
|
+
|
|
176
|
+
if col_right_close is not None:
|
|
177
|
+
if abs(col_right_close - col_right_open) > BOX_WALL_DRIFT:
|
|
178
|
+
j = col_right_open + 1
|
|
179
|
+
continue
|
|
180
|
+
expected_right = max(col_right_open, col_right_close)
|
|
181
|
+
else:
|
|
182
|
+
expected_right = col_right_open
|
|
183
|
+
|
|
184
|
+
changed = False
|
|
185
|
+
|
|
186
|
+
if fuzzy_col_left is not None:
|
|
187
|
+
from docalign.utils import _realign_box_chars
|
|
188
|
+
|
|
189
|
+
cur = all_lines[closing_line_idx].rstrip("\n")
|
|
190
|
+
actual_positions = [k for k, c in enumerate(cur) if c in BOX_CHARS]
|
|
191
|
+
expected_positions = []
|
|
192
|
+
for ap in actual_positions:
|
|
193
|
+
if ap == fuzzy_col_left:
|
|
194
|
+
expected_positions.append(col_left)
|
|
195
|
+
elif ap == col_right_close and col_right_close is not None:
|
|
196
|
+
expected_positions.append(col_right_open)
|
|
197
|
+
else:
|
|
198
|
+
expected_positions.append(ap)
|
|
199
|
+
fixed = _realign_box_chars(cur, actual_positions, expected_positions).rstrip(" ")
|
|
200
|
+
if fixed != cur:
|
|
201
|
+
all_lines[closing_line_idx] = fixed + "\n"
|
|
202
|
+
closing_raw = fixed
|
|
203
|
+
col_right_close = col_right_open
|
|
204
|
+
expected_right = col_right_open
|
|
205
|
+
changed = True
|
|
206
|
+
|
|
207
|
+
if col_right_open != expected_right:
|
|
208
|
+
fixed = _fix_closer(raw, col_right_open, expected_right, "┐")
|
|
209
|
+
if fixed != raw:
|
|
210
|
+
all_lines[line_idx] = fixed + "\n"
|
|
211
|
+
changed = True
|
|
212
|
+
|
|
213
|
+
if col_right_close is not None and col_right_close != expected_right:
|
|
214
|
+
cur = all_lines[closing_line_idx].rstrip("\n")
|
|
215
|
+
fixed = _fix_closer(cur, col_right_close, expected_right, "┘")
|
|
216
|
+
if fixed != cur:
|
|
217
|
+
all_lines[closing_line_idx] = fixed + "\n"
|
|
218
|
+
changed = True
|
|
219
|
+
|
|
220
|
+
has_adjacent_box_on_line = "┌" in raw[col_right_open + 1 :]
|
|
221
|
+
|
|
222
|
+
for mi in range(idx + 1, closing_idx):
|
|
223
|
+
m_line_idx = code_lines[mi][0]
|
|
224
|
+
m_raw = all_lines[m_line_idx].rstrip("\n")
|
|
225
|
+
has_box_after_right = _has_independent_box_after(m_raw, expected_right)
|
|
226
|
+
has_box_after_left = _has_independent_box_after(m_raw, col_left)
|
|
227
|
+
right_ok = expected_right < len(m_raw) and m_raw[expected_right] in BOX_CHARS
|
|
228
|
+
if not right_ok:
|
|
229
|
+
found = _find_nearby_pipe(m_raw, expected_right, BOX_WALL_DRIFT)
|
|
230
|
+
if found is not None and not has_box_after_right and not has_adjacent_box_on_line:
|
|
231
|
+
fixed = _shift_pipe(m_raw, found, expected_right)
|
|
232
|
+
if fixed != m_raw:
|
|
233
|
+
all_lines[m_line_idx] = fixed + "\n"
|
|
234
|
+
m_raw = fixed
|
|
235
|
+
changed = True
|
|
236
|
+
if col_left < len(m_raw):
|
|
237
|
+
if m_raw[col_left] not in BOX_CHARS:
|
|
238
|
+
found = _find_nearby_pipe(m_raw, col_left, BOX_WALL_DRIFT)
|
|
239
|
+
if found is not None and not has_box_after_left and not has_adjacent_box_on_line:
|
|
240
|
+
fixed = _shift_pipe(m_raw, found, col_left)
|
|
241
|
+
if fixed != m_raw:
|
|
242
|
+
all_lines[m_line_idx] = fixed + "\n"
|
|
243
|
+
changed = True
|
|
244
|
+
|
|
245
|
+
if changed:
|
|
246
|
+
code_lines = [(i, all_lines[i].rstrip("\n")) for i in code_indices]
|
|
247
|
+
raw = all_lines[line_idx].rstrip("\n")
|
|
248
|
+
|
|
249
|
+
j = col_right_open + 1
|