slack-markdown-parser 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- slack_markdown_parser/__init__.py +40 -0
- slack_markdown_parser/converter.py +572 -0
- slack_markdown_parser-2.0.2.dist-info/METADATA +189 -0
- slack_markdown_parser-2.0.2.dist-info/RECORD +7 -0
- slack_markdown_parser-2.0.2.dist-info/WHEEL +5 -0
- slack_markdown_parser-2.0.2.dist-info/licenses/LICENSE +21 -0
- slack_markdown_parser-2.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""slack-markdown-parser public package API."""
|
|
2
|
+
|
|
3
|
+
__version__ = "2.0.2"
|
|
4
|
+
__license__ = "MIT"
|
|
5
|
+
|
|
6
|
+
from .converter import (
|
|
7
|
+
add_zero_width_spaces,
|
|
8
|
+
add_zero_width_spaces_to_markdown,
|
|
9
|
+
blocks_to_plain_text,
|
|
10
|
+
build_fallback_text_from_blocks,
|
|
11
|
+
convert_markdown_text_to_blocks,
|
|
12
|
+
convert_markdown_to_slack_blocks,
|
|
13
|
+
convert_markdown_to_slack_messages,
|
|
14
|
+
decode_html_entities,
|
|
15
|
+
extract_plain_text_from_table_cell,
|
|
16
|
+
markdown_table_to_slack_table,
|
|
17
|
+
normalize_markdown_tables,
|
|
18
|
+
parse_markdown_table,
|
|
19
|
+
split_blocks_by_table,
|
|
20
|
+
split_markdown_into_segments,
|
|
21
|
+
strip_zero_width_spaces,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"add_zero_width_spaces",
|
|
26
|
+
"add_zero_width_spaces_to_markdown",
|
|
27
|
+
"blocks_to_plain_text",
|
|
28
|
+
"build_fallback_text_from_blocks",
|
|
29
|
+
"convert_markdown_text_to_blocks",
|
|
30
|
+
"convert_markdown_to_slack_blocks",
|
|
31
|
+
"convert_markdown_to_slack_messages",
|
|
32
|
+
"decode_html_entities",
|
|
33
|
+
"extract_plain_text_from_table_cell",
|
|
34
|
+
"markdown_table_to_slack_table",
|
|
35
|
+
"normalize_markdown_tables",
|
|
36
|
+
"parse_markdown_table",
|
|
37
|
+
"split_blocks_by_table",
|
|
38
|
+
"split_markdown_into_segments",
|
|
39
|
+
"strip_zero_width_spaces",
|
|
40
|
+
]
|
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
"""Core conversion utilities for Slack Block Kit output.
|
|
2
|
+
|
|
3
|
+
This module converts LLM-generated Markdown text into Slack Block Kit blocks,
|
|
4
|
+
with support for Slack table blocks and robust fallback text generation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import html
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
ZWSP = "\u200b"
|
|
14
|
+
|
|
15
|
+
TABLE_SEPARATOR_PATTERN = re.compile(r"^\s*\|[\s\-:|]+\|\s*$")
|
|
16
|
+
LOOSE_TABLE_SEPARATOR_PATTERN = re.compile(
|
|
17
|
+
r"^\s*\|?\s*:?-{3,}\s*(\|\s*:?-{3,}\s*)+\|?\s*$"
|
|
18
|
+
)
|
|
19
|
+
INLINE_CELL_PATTERN = re.compile(
|
|
20
|
+
r"(`[^`]+`|~~[^~]+~~|\*\*[^*]+\*\*|(?<!\*)\*[^*]+\*(?!\*))"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def decode_html_entities(text: str) -> str:
|
|
25
|
+
"""Decode HTML entities that may appear in model output."""
|
|
26
|
+
if not text:
|
|
27
|
+
return text
|
|
28
|
+
return html.unescape(text)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def strip_zero_width_spaces(text: str) -> str:
|
|
32
|
+
"""Strip zero-width spaces from text."""
|
|
33
|
+
return re.sub(r"[\u200B\uFEFF]", "", text or "")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def add_zero_width_spaces_to_markdown(text: str) -> str:
|
|
37
|
+
"""Stabilize markdown rendering by padding style markers with ZWSP.
|
|
38
|
+
|
|
39
|
+
Code fences are preserved untouched.
|
|
40
|
+
"""
|
|
41
|
+
if not text:
|
|
42
|
+
return text
|
|
43
|
+
|
|
44
|
+
boundary_chars = {" ", "\t", "\n", "\r", ZWSP}
|
|
45
|
+
|
|
46
|
+
def wrap_match(match: re.Match[str], source: str) -> str:
|
|
47
|
+
start, end = match.start(), match.end()
|
|
48
|
+
before_safe = start > 0 and source[start - 1] in boundary_chars
|
|
49
|
+
after_safe = end < len(source) and source[end] in boundary_chars
|
|
50
|
+
if before_safe and after_safe:
|
|
51
|
+
return match.group(0)
|
|
52
|
+
|
|
53
|
+
# When either outer edge is tightly coupled to surrounding text or
|
|
54
|
+
# punctuation, wrap the whole token so Slack can treat the decoration
|
|
55
|
+
# as a standalone span.
|
|
56
|
+
prefix = ZWSP
|
|
57
|
+
suffix = ZWSP
|
|
58
|
+
return f"{prefix}{match.group(0)}{suffix}"
|
|
59
|
+
|
|
60
|
+
def wrap_segment(segment: str) -> str:
|
|
61
|
+
if not segment:
|
|
62
|
+
return segment
|
|
63
|
+
patterns = [
|
|
64
|
+
r"(?<!`)`[^`\n]+`(?!`)",
|
|
65
|
+
r"(?<!\*)\*\*(.+?)\*\*(?!\*)",
|
|
66
|
+
r"(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)",
|
|
67
|
+
r"~~(.+?)~~",
|
|
68
|
+
]
|
|
69
|
+
for pattern in patterns:
|
|
70
|
+
segment = re.sub(
|
|
71
|
+
pattern,
|
|
72
|
+
lambda m, s=segment: wrap_match(m, s),
|
|
73
|
+
segment,
|
|
74
|
+
flags=re.DOTALL,
|
|
75
|
+
)
|
|
76
|
+
return re.sub(f"{ZWSP}+", ZWSP, segment)
|
|
77
|
+
|
|
78
|
+
code_fence_pattern = r"(```.*?```)"
|
|
79
|
+
parts = re.split(code_fence_pattern, text, flags=re.DOTALL)
|
|
80
|
+
for idx, part in enumerate(parts):
|
|
81
|
+
if re.fullmatch(code_fence_pattern, part or "", flags=re.DOTALL):
|
|
82
|
+
continue
|
|
83
|
+
parts[idx] = wrap_segment(part)
|
|
84
|
+
return "".join(parts)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Backward-compatible alias
|
|
88
|
+
add_zero_width_spaces = add_zero_width_spaces_to_markdown
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _is_strict_markdown_table_line(line: str) -> bool:
|
|
92
|
+
stripped = line.strip()
|
|
93
|
+
return bool(stripped) and stripped.startswith("|") and stripped.endswith("|")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _split_markdown_table_cells(line: str) -> List[str]:
|
|
97
|
+
"""Split markdown table cells while preserving pipes inside <...|...> links."""
|
|
98
|
+
working = line.strip()
|
|
99
|
+
if not working:
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
if working.startswith("|"):
|
|
103
|
+
working = working[1:]
|
|
104
|
+
if working.endswith("|"):
|
|
105
|
+
working = working[:-1]
|
|
106
|
+
|
|
107
|
+
cells: List[str] = []
|
|
108
|
+
current: List[str] = []
|
|
109
|
+
in_angle = False
|
|
110
|
+
in_inline_code = False
|
|
111
|
+
escaped = False
|
|
112
|
+
|
|
113
|
+
for ch in working:
|
|
114
|
+
if escaped:
|
|
115
|
+
current.append(ch)
|
|
116
|
+
escaped = False
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if ch == "\\":
|
|
120
|
+
current.append(ch)
|
|
121
|
+
escaped = True
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if ch == "`":
|
|
125
|
+
in_inline_code = not in_inline_code
|
|
126
|
+
current.append(ch)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
if not in_inline_code:
|
|
130
|
+
if ch == "<":
|
|
131
|
+
in_angle = True
|
|
132
|
+
elif ch == ">" and in_angle:
|
|
133
|
+
in_angle = False
|
|
134
|
+
|
|
135
|
+
if ch == "|" and not in_angle and not in_inline_code:
|
|
136
|
+
cells.append("".join(current).strip())
|
|
137
|
+
current = []
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
current.append(ch)
|
|
141
|
+
|
|
142
|
+
cells.append("".join(current).strip())
|
|
143
|
+
return cells
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _split_heading_and_table_row(line: str) -> Optional[tuple[str, str]]:
|
|
147
|
+
"""Split lines like '# Heading |a|b|' into heading and table row."""
|
|
148
|
+
if "|" not in line:
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
in_code = False
|
|
152
|
+
first_pipe = -1
|
|
153
|
+
for i, ch in enumerate(line):
|
|
154
|
+
if ch == "`":
|
|
155
|
+
in_code = not in_code
|
|
156
|
+
elif ch == "|" and not in_code:
|
|
157
|
+
first_pipe = i
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
if first_pipe < 0:
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
heading_part = line[:first_pipe].rstrip()
|
|
164
|
+
table_part = line[first_pipe:].strip()
|
|
165
|
+
|
|
166
|
+
if not heading_part or not heading_part.lstrip().startswith("#"):
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
heading_text = heading_part
|
|
170
|
+
table_prefix = ""
|
|
171
|
+
if " " in heading_part.strip():
|
|
172
|
+
head, tail = heading_part.rsplit(" ", 1)
|
|
173
|
+
heading_text = head
|
|
174
|
+
table_prefix = tail
|
|
175
|
+
|
|
176
|
+
table_line = (table_prefix + " " + table_part).strip()
|
|
177
|
+
if "|" not in table_line:
|
|
178
|
+
return None
|
|
179
|
+
if not table_line.startswith("|"):
|
|
180
|
+
table_line = "|" + table_line
|
|
181
|
+
if not table_line.endswith("|"):
|
|
182
|
+
table_line = table_line + "|"
|
|
183
|
+
|
|
184
|
+
return heading_text, table_line
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def normalize_markdown_tables(markdown_text: str) -> str:
|
|
188
|
+
"""Normalize markdown tables: pipe completion, separator completion, column sizing."""
|
|
189
|
+
if not markdown_text:
|
|
190
|
+
return markdown_text
|
|
191
|
+
|
|
192
|
+
lines = markdown_text.splitlines()
|
|
193
|
+
normalized: List[str] = []
|
|
194
|
+
buffer: List[str] = []
|
|
195
|
+
|
|
196
|
+
def is_table_block(candidates: List[str]) -> bool:
|
|
197
|
+
if len(candidates) < 2:
|
|
198
|
+
return False
|
|
199
|
+
if any(
|
|
200
|
+
LOOSE_TABLE_SEPARATOR_PATTERN.match(line.strip()) for line in candidates
|
|
201
|
+
):
|
|
202
|
+
return True
|
|
203
|
+
|
|
204
|
+
column_counts: List[int] = []
|
|
205
|
+
for line in candidates:
|
|
206
|
+
working = line.strip()
|
|
207
|
+
if "|" not in working:
|
|
208
|
+
continue
|
|
209
|
+
if not working.startswith("|"):
|
|
210
|
+
working = "|" + working
|
|
211
|
+
if not working.endswith("|"):
|
|
212
|
+
working = working + "|"
|
|
213
|
+
if TABLE_SEPARATOR_PATTERN.match(working):
|
|
214
|
+
continue
|
|
215
|
+
column_counts.append(len(_split_markdown_table_cells(working)))
|
|
216
|
+
|
|
217
|
+
if len(column_counts) < 2:
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
min_cols, max_cols = min(column_counts), max(column_counts)
|
|
221
|
+
if max_cols < 2:
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
return max_cols - min_cols <= 1
|
|
225
|
+
|
|
226
|
+
def flush_buffer() -> None:
|
|
227
|
+
nonlocal buffer
|
|
228
|
+
if buffer and is_table_block(buffer):
|
|
229
|
+
header_line = buffer[0].strip()
|
|
230
|
+
if not header_line.startswith("|"):
|
|
231
|
+
header_line = "|" + header_line
|
|
232
|
+
if not header_line.endswith("|"):
|
|
233
|
+
header_line = header_line + "|"
|
|
234
|
+
header_cells = _split_markdown_table_cells(header_line)
|
|
235
|
+
column_count = max(1, len(header_cells))
|
|
236
|
+
|
|
237
|
+
def normalize_row(raw_line: str) -> str:
|
|
238
|
+
if LOOSE_TABLE_SEPARATOR_PATTERN.match(raw_line.strip()):
|
|
239
|
+
return ""
|
|
240
|
+
working = raw_line.strip()
|
|
241
|
+
if "|" not in working:
|
|
242
|
+
return ""
|
|
243
|
+
if not working.startswith("|"):
|
|
244
|
+
working = "|" + working
|
|
245
|
+
if not working.endswith("|"):
|
|
246
|
+
working = working + "|"
|
|
247
|
+
cells = _split_markdown_table_cells(working)
|
|
248
|
+
cells = (cells + [""] * column_count)[:column_count]
|
|
249
|
+
return "|" + "|".join(cells) + "|"
|
|
250
|
+
|
|
251
|
+
normalized.append(normalize_row(buffer[0]))
|
|
252
|
+
normalized.append("|" + "|".join(["---"] * column_count) + "|")
|
|
253
|
+
for line in buffer[1:]:
|
|
254
|
+
row = normalize_row(line)
|
|
255
|
+
if row:
|
|
256
|
+
normalized.append(row)
|
|
257
|
+
else:
|
|
258
|
+
normalized.extend(buffer)
|
|
259
|
+
buffer = []
|
|
260
|
+
|
|
261
|
+
for line in lines:
|
|
262
|
+
stripped = line.strip()
|
|
263
|
+
|
|
264
|
+
heading_and_table = _split_heading_and_table_row(line)
|
|
265
|
+
if heading_and_table:
|
|
266
|
+
flush_buffer()
|
|
267
|
+
heading_text, table_line = heading_and_table
|
|
268
|
+
normalized.append(heading_text)
|
|
269
|
+
buffer.append(table_line)
|
|
270
|
+
elif "|" in stripped:
|
|
271
|
+
buffer.append(line)
|
|
272
|
+
else:
|
|
273
|
+
flush_buffer()
|
|
274
|
+
normalized.append(line)
|
|
275
|
+
flush_buffer()
|
|
276
|
+
|
|
277
|
+
return "\n".join(normalized)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def looks_like_markdown_table(text: str) -> bool:
|
|
281
|
+
"""Heuristic check for markdown table candidates."""
|
|
282
|
+
lines = [line for line in text.splitlines() if line.strip()]
|
|
283
|
+
if len(lines) < 2:
|
|
284
|
+
return False
|
|
285
|
+
table_like_lines = sum(
|
|
286
|
+
1
|
|
287
|
+
for line in lines
|
|
288
|
+
if line.strip().startswith("|") and line.strip().endswith("|")
|
|
289
|
+
)
|
|
290
|
+
return table_like_lines >= 2
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _create_table_cell(text: str) -> Dict[str, Any]:
|
|
294
|
+
"""Build Slack rich_text cell from markdown fragment."""
|
|
295
|
+
clean_text = strip_zero_width_spaces(text or "")
|
|
296
|
+
clean_text = clean_text.replace("\\|", "|")
|
|
297
|
+
if not clean_text.strip():
|
|
298
|
+
clean_text = "-"
|
|
299
|
+
elements: List[Dict[str, Any]] = []
|
|
300
|
+
last_index = 0
|
|
301
|
+
|
|
302
|
+
for match in INLINE_CELL_PATTERN.finditer(clean_text):
|
|
303
|
+
if match.start() > last_index:
|
|
304
|
+
prefix = clean_text[last_index : match.start()]
|
|
305
|
+
if prefix:
|
|
306
|
+
elements.append({"type": "text", "text": prefix})
|
|
307
|
+
|
|
308
|
+
token = match.group(0)
|
|
309
|
+
style: Dict[str, bool] = {}
|
|
310
|
+
content = token
|
|
311
|
+
|
|
312
|
+
if token.startswith("`") and token.endswith("`"):
|
|
313
|
+
content = token[1:-1]
|
|
314
|
+
style["code"] = True
|
|
315
|
+
elif token.startswith("~~") and token.endswith("~~"):
|
|
316
|
+
content = token[2:-2]
|
|
317
|
+
style["strike"] = True
|
|
318
|
+
elif token.startswith("**") and token.endswith("**"):
|
|
319
|
+
content = token[2:-2]
|
|
320
|
+
style["bold"] = True
|
|
321
|
+
elif token.startswith("*") and token.endswith("*"):
|
|
322
|
+
content = token[1:-1]
|
|
323
|
+
style["italic"] = True
|
|
324
|
+
|
|
325
|
+
element: Dict[str, Any] = {"type": "text", "text": content}
|
|
326
|
+
if style:
|
|
327
|
+
element["style"] = style
|
|
328
|
+
elements.append(element)
|
|
329
|
+
last_index = match.end()
|
|
330
|
+
|
|
331
|
+
if last_index < len(clean_text):
|
|
332
|
+
suffix = clean_text[last_index:]
|
|
333
|
+
elements.append({"type": "text", "text": suffix})
|
|
334
|
+
|
|
335
|
+
if not elements:
|
|
336
|
+
elements.append({"type": "text", "text": clean_text})
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
"type": "rich_text",
|
|
340
|
+
"elements": [{"type": "rich_text_section", "elements": elements}],
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def extract_plain_text_from_table_cell(cell: Dict[str, Any]) -> str:
|
|
345
|
+
"""Extract plain text from a Slack table cell object."""
|
|
346
|
+
if not isinstance(cell, dict):
|
|
347
|
+
return ""
|
|
348
|
+
|
|
349
|
+
if cell.get("type") == "rich_text":
|
|
350
|
+
texts: List[str] = []
|
|
351
|
+
for element in cell.get("elements", []):
|
|
352
|
+
if not isinstance(element, dict):
|
|
353
|
+
continue
|
|
354
|
+
if element.get("type") == "rich_text_section":
|
|
355
|
+
for child in element.get("elements", []):
|
|
356
|
+
if isinstance(child, dict):
|
|
357
|
+
texts.append(child.get("text", ""))
|
|
358
|
+
elif "text" in element:
|
|
359
|
+
texts.append(str(element.get("text", "")))
|
|
360
|
+
return "".join(texts)
|
|
361
|
+
|
|
362
|
+
return str(cell.get("text", ""))
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def markdown_table_to_slack_table(table_markdown: str) -> Optional[Dict[str, Any]]:
|
|
366
|
+
"""Convert markdown table text to Slack table block."""
|
|
367
|
+
lines = [
|
|
368
|
+
line.rstrip() for line in table_markdown.strip().splitlines() if line.strip()
|
|
369
|
+
]
|
|
370
|
+
rows: List[List[Dict[str, Any]]] = []
|
|
371
|
+
expected_columns: Optional[int] = None
|
|
372
|
+
|
|
373
|
+
for line in lines:
|
|
374
|
+
if TABLE_SEPARATOR_PATTERN.match(line):
|
|
375
|
+
continue
|
|
376
|
+
if "|" not in line:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
cells = _split_markdown_table_cells(line)
|
|
380
|
+
if not cells:
|
|
381
|
+
continue
|
|
382
|
+
|
|
383
|
+
if expected_columns is None:
|
|
384
|
+
expected_columns = max(1, len(cells))
|
|
385
|
+
else:
|
|
386
|
+
cells = (cells + [""] * expected_columns)[:expected_columns]
|
|
387
|
+
|
|
388
|
+
rows.append(
|
|
389
|
+
[_create_table_cell(cell if cell.strip() else "-") for cell in cells]
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if not rows:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
return {"type": "table", "rows": rows}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# Backward-compatible alias
|
|
399
|
+
markdown_table_to_table_block = markdown_table_to_slack_table
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def split_markdown_into_segments(markdown_text: str) -> List[Dict[str, str]]:
|
|
403
|
+
"""Split markdown into alternating text/table segments."""
|
|
404
|
+
segments: List[Dict[str, str]] = []
|
|
405
|
+
if not markdown_text:
|
|
406
|
+
return segments
|
|
407
|
+
|
|
408
|
+
lines = markdown_text.splitlines()
|
|
409
|
+
current: List[str] = []
|
|
410
|
+
current_is_table: Optional[bool] = None
|
|
411
|
+
|
|
412
|
+
def flush() -> None:
|
|
413
|
+
nonlocal current, current_is_table
|
|
414
|
+
if current:
|
|
415
|
+
segments.append(
|
|
416
|
+
{
|
|
417
|
+
"type": "table" if current_is_table else "text",
|
|
418
|
+
"content": "\n".join(current),
|
|
419
|
+
}
|
|
420
|
+
)
|
|
421
|
+
current = []
|
|
422
|
+
current_is_table = None
|
|
423
|
+
|
|
424
|
+
for line in lines:
|
|
425
|
+
stripped = line.strip()
|
|
426
|
+
is_table_line = stripped.startswith("|") and stripped.endswith("|")
|
|
427
|
+
|
|
428
|
+
if current_is_table is None:
|
|
429
|
+
current_is_table = is_table_line
|
|
430
|
+
current.append(line)
|
|
431
|
+
continue
|
|
432
|
+
|
|
433
|
+
if is_table_line == current_is_table:
|
|
434
|
+
current.append(line)
|
|
435
|
+
else:
|
|
436
|
+
flush()
|
|
437
|
+
current_is_table = is_table_line
|
|
438
|
+
current.append(line)
|
|
439
|
+
|
|
440
|
+
flush()
|
|
441
|
+
return segments
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def convert_markdown_to_slack_blocks(markdown_text: str) -> List[Dict[str, Any]]:
|
|
445
|
+
"""Convert markdown text into Slack markdown/table blocks."""
|
|
446
|
+
if not markdown_text:
|
|
447
|
+
return []
|
|
448
|
+
|
|
449
|
+
markdown_text = decode_html_entities(markdown_text)
|
|
450
|
+
markdown_text = normalize_markdown_tables(markdown_text)
|
|
451
|
+
blocks: List[Dict[str, Any]] = []
|
|
452
|
+
|
|
453
|
+
for segment in split_markdown_into_segments(markdown_text):
|
|
454
|
+
content = segment.get("content", "")
|
|
455
|
+
if not content.strip():
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
if segment.get("type") == "table" and looks_like_markdown_table(content):
|
|
459
|
+
table_block = markdown_table_to_slack_table(content)
|
|
460
|
+
if table_block:
|
|
461
|
+
blocks.append(table_block)
|
|
462
|
+
continue
|
|
463
|
+
|
|
464
|
+
formatted = add_zero_width_spaces_to_markdown(content)
|
|
465
|
+
if formatted.strip():
|
|
466
|
+
blocks.append({"type": "markdown", "text": formatted})
|
|
467
|
+
|
|
468
|
+
return blocks
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
# Backward-compatible alias
|
|
472
|
+
convert_markdown_text_to_blocks = convert_markdown_to_slack_blocks
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def split_blocks_by_table(blocks: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
|
|
476
|
+
"""Split blocks into multiple messages to satisfy one-table-per-message constraint."""
|
|
477
|
+
messages: List[List[Dict[str, Any]]] = []
|
|
478
|
+
current_message: List[Dict[str, Any]] = []
|
|
479
|
+
|
|
480
|
+
for block in blocks or []:
|
|
481
|
+
if isinstance(block, dict) and block.get("type") == "table":
|
|
482
|
+
if current_message:
|
|
483
|
+
messages.append(current_message)
|
|
484
|
+
messages.append([block])
|
|
485
|
+
current_message = []
|
|
486
|
+
else:
|
|
487
|
+
current_message.append(block)
|
|
488
|
+
|
|
489
|
+
if current_message:
|
|
490
|
+
messages.append(current_message)
|
|
491
|
+
|
|
492
|
+
return messages
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def convert_markdown_to_slack_messages(
|
|
496
|
+
markdown_text: str,
|
|
497
|
+
) -> List[List[Dict[str, Any]]]:
|
|
498
|
+
"""Convert markdown text into a list of Slack message block groups."""
|
|
499
|
+
blocks = convert_markdown_to_slack_blocks(markdown_text)
|
|
500
|
+
if not blocks:
|
|
501
|
+
return []
|
|
502
|
+
return split_blocks_by_table(blocks)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def blocks_to_plain_text(blocks: List[Dict[str, Any]]) -> str:
|
|
506
|
+
"""Build plain text representation from Slack blocks."""
|
|
507
|
+
parts: List[str] = []
|
|
508
|
+
|
|
509
|
+
for block in blocks or []:
|
|
510
|
+
block_type = block.get("type") if isinstance(block, dict) else None
|
|
511
|
+
|
|
512
|
+
if block_type == "markdown":
|
|
513
|
+
text = block.get("text", "")
|
|
514
|
+
if text:
|
|
515
|
+
parts.append(strip_zero_width_spaces(text))
|
|
516
|
+
elif block_type == "table":
|
|
517
|
+
rows = block.get("rows") or []
|
|
518
|
+
for row in rows:
|
|
519
|
+
cell_texts: List[str] = []
|
|
520
|
+
if not isinstance(row, list):
|
|
521
|
+
continue
|
|
522
|
+
for cell in row:
|
|
523
|
+
cell_text = extract_plain_text_from_table_cell(cell)
|
|
524
|
+
if cell_text:
|
|
525
|
+
cell_texts.append(strip_zero_width_spaces(cell_text))
|
|
526
|
+
if cell_texts:
|
|
527
|
+
parts.append(" | ".join(cell_texts))
|
|
528
|
+
elif isinstance(block, dict):
|
|
529
|
+
text = block.get("text", "")
|
|
530
|
+
if text:
|
|
531
|
+
parts.append(str(text))
|
|
532
|
+
|
|
533
|
+
return "\n".join([p for p in parts if p]).strip()
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def build_fallback_text_from_blocks(blocks: List[Dict[str, Any]]) -> str:
|
|
537
|
+
"""Build Slack fallback text from block structure."""
|
|
538
|
+
plain_parts: List[str] = []
|
|
539
|
+
|
|
540
|
+
for block in blocks or []:
|
|
541
|
+
if not isinstance(block, dict):
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
if block.get("type") == "markdown":
|
|
545
|
+
text = strip_zero_width_spaces(block.get("text", ""))
|
|
546
|
+
if text.strip():
|
|
547
|
+
plain_parts.append(text)
|
|
548
|
+
elif block.get("type") == "table":
|
|
549
|
+
table_lines: List[str] = []
|
|
550
|
+
for row in block.get("rows", []):
|
|
551
|
+
if not isinstance(row, list):
|
|
552
|
+
continue
|
|
553
|
+
cells = [extract_plain_text_from_table_cell(cell) for cell in row]
|
|
554
|
+
if cells:
|
|
555
|
+
table_lines.append(" | ".join(cells))
|
|
556
|
+
if table_lines:
|
|
557
|
+
plain_parts.append("\n".join(table_lines))
|
|
558
|
+
|
|
559
|
+
return "\n\n".join([part for part in plain_parts if part.strip()])
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
# Backward-compatible helper retained for existing imports.
|
|
563
|
+
def parse_markdown_table(table_text: str) -> List[List[str]]:
|
|
564
|
+
"""Parse markdown table into row/cell text matrix."""
|
|
565
|
+
rows: List[List[str]] = []
|
|
566
|
+
for line in [line for line in table_text.strip().splitlines() if line.strip()]:
|
|
567
|
+
if TABLE_SEPARATOR_PATTERN.match(line.strip()):
|
|
568
|
+
continue
|
|
569
|
+
if "|" not in line:
|
|
570
|
+
continue
|
|
571
|
+
rows.append(_split_markdown_table_cells(line))
|
|
572
|
+
return rows
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: slack-markdown-parser
|
|
3
|
+
Version: 2.0.2
|
|
4
|
+
Summary: Convert LLM Markdown into Slack Block Kit markdown/table messages
|
|
5
|
+
Author: darkgaldragon
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/darkgaldragon/slack-markdown-parser
|
|
8
|
+
Project-URL: Source, https://github.com/darkgaldragon/slack-markdown-parser
|
|
9
|
+
Project-URL: Issues, https://github.com/darkgaldragon/slack-markdown-parser/issues
|
|
10
|
+
Keywords: slack,markdown,block-kit,table,llm
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: black>=24.0.0; extra == "dev"
|
|
24
|
+
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pip-audit>=2.7.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
29
|
+
Requires-Dist: twine>=5.1.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# slack-markdown-parser
|
|
33
|
+
|
|
34
|
+
`slack-markdown-parser` is a Python library that converts standard Markdown generated by LLMs into Slack Block Kit messages built from `markdown` and `table` blocks.
|
|
35
|
+
|
|
36
|
+
## Why this library exists
|
|
37
|
+
|
|
38
|
+
Many Slack AI bots have traditionally converted model output into Slack-specific `mrkdwn`, but that approach creates a few recurring problems:
|
|
39
|
+
|
|
40
|
+
- Conversion overhead: LLMs naturally generate standard Markdown, so `mrkdwn` usually requires extra transformation logic or prompt constraints.
|
|
41
|
+
- Unstable formatting in languages without word spacing: in Japanese and similar scripts, Slack can fail to interpret `*`, `~`, and related markers correctly, exposing the raw punctuation.
|
|
42
|
+
- No table syntax in `mrkdwn`: Markdown tables need custom fallback rendering if you stay in the old format.
|
|
43
|
+
|
|
44
|
+
## Design approach
|
|
45
|
+
|
|
46
|
+
This library leans on Slack Block Kit's `markdown` block for standard Markdown and `table` block for tables.
|
|
47
|
+
|
|
48
|
+
| Problem | Approach |
|
|
49
|
+
|---|---|
|
|
50
|
+
| Conversion overhead | Send standard Markdown through Slack `markdown` blocks without rewriting it into `mrkdwn`. |
|
|
51
|
+
| Formatting instability | Insert zero-width spaces (ZWSP, U+200B) around formatting tokens when needed so Slack parses inline styling more reliably without visible extra spaces. |
|
|
52
|
+
| No table syntax in `mrkdwn` | Detect Markdown tables and convert them into Slack `table` blocks, including repair of common LLM-generated table inconsistencies. |
|
|
53
|
+
|
|
54
|
+
## Features
|
|
55
|
+
|
|
56
|
+
- Convert standard Markdown into Slack `markdown` blocks
|
|
57
|
+
- Convert Markdown tables into Slack `table` blocks
|
|
58
|
+
- Repair common LLM table issues such as missing outer pipes, missing separator rows, mismatched column counts, and empty cells
|
|
59
|
+
- Split output into multiple Slack messages when needed to satisfy Slack's "one table per message" constraint
|
|
60
|
+
- Add ZWSP around inline formatting tokens to reduce rendering issues outside fenced code blocks
|
|
61
|
+
- Build fallback text for `chat.postMessage.text` from generated blocks
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
|
|
65
|
+
- Your Slack integration must support Block Kit payloads with `markdown` and `table` blocks.
|
|
66
|
+
- This library does not help when your delivery path only accepts plain `text` or `mrkdwn` strings.
|
|
67
|
+
|
|
68
|
+
## Installation
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install slack-markdown-parser
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick start
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from slack_markdown_parser import (
|
|
78
|
+
build_fallback_text_from_blocks,
|
|
79
|
+
convert_markdown_to_slack_messages,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
markdown = """
|
|
83
|
+
# Weekly Report
|
|
84
|
+
|
|
85
|
+
| Team | Status |
|
|
86
|
+
|---|---|
|
|
87
|
+
| API | **On track** |
|
|
88
|
+
| UI | *In progress* |
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
for blocks in convert_markdown_to_slack_messages(markdown):
|
|
92
|
+
payload = {
|
|
93
|
+
"blocks": blocks,
|
|
94
|
+
"text": build_fallback_text_from_blocks(blocks) or "report",
|
|
95
|
+
}
|
|
96
|
+
print(payload)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
`convert_markdown_to_slack_messages` automatically splits output into multiple messages when the input contains multiple tables.
|
|
100
|
+
|
|
101
|
+
## Rendering example
|
|
102
|
+
|
|
103
|
+
Example input:
|
|
104
|
+
|
|
105
|
+
````markdown
|
|
106
|
+
# Weekly Product Update
|
|
107
|
+
|
|
108
|
+
This week we worked on **search performance** and *UI polish*. The old flow is ~~scheduled for removal~~.
|
|
109
|
+
The detailed log ID is `run-20260305-02`.
|
|
110
|
+
Reference: https://example.com/changelog
|
|
111
|
+
|
|
112
|
+
- Improved **API response time**
|
|
113
|
+
- Increased *cache hit rate*
|
|
114
|
+
- Adjusted timeout settings
|
|
115
|
+
- Stabilized batch processing
|
|
116
|
+
- Unified retry counts
|
|
117
|
+
- Updated documentation
|
|
118
|
+
|
|
119
|
+
Category | Status | Owner
|
|
120
|
+
API | **In progress** | Team A
|
|
121
|
+
UI | *Under review* | Team B
|
|
122
|
+
QA | ~~On hold~~ | Team C
|
|
123
|
+
|
|
124
|
+
> Note: production rollout is scheduled for 2026-03-08 10:00 JST
|
|
125
|
+
|
|
126
|
+
1. Finalize release notes
|
|
127
|
+
1. Unify change labels
|
|
128
|
+
2. Add impact notes
|
|
129
|
+
2. Tune monitoring alert thresholds
|
|
130
|
+
1. Update the `warning` threshold
|
|
131
|
+
3. Re-check QA
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
./deploy.sh production
|
|
135
|
+
```
|
|
136
|
+
````
|
|
137
|
+
|
|
138
|
+
Example Slack bot rendering (`markdown` + `table` blocks):
|
|
139
|
+
|
|
140
|
+

|
|
141
|
+
|
|
142
|
+
## Public API
|
|
143
|
+
|
|
144
|
+
### Main functions
|
|
145
|
+
|
|
146
|
+
| Function | Description |
|
|
147
|
+
|---|---|
|
|
148
|
+
| `convert_markdown_to_slack_messages(markdown_text) -> list[list[dict]]` | Convert Markdown into Slack messages already split around table blocks. |
|
|
149
|
+
| `convert_markdown_to_slack_blocks(markdown_text) -> list[dict]` | Convert Markdown into a flat Block Kit block list. |
|
|
150
|
+
| `build_fallback_text_from_blocks(blocks) -> str` | Build fallback text suitable for `chat.postMessage.text`. |
|
|
151
|
+
| `blocks_to_plain_text(blocks) -> str` | Convert blocks into plain text. |
|
|
152
|
+
|
|
153
|
+
### Utility functions
|
|
154
|
+
|
|
155
|
+
| Function | Description |
|
|
156
|
+
|---|---|
|
|
157
|
+
| `normalize_markdown_tables(markdown_text) -> str` | Normalize Markdown table syntax before conversion. |
|
|
158
|
+
| `add_zero_width_spaces_to_markdown(text) -> str` | Insert ZWSP around formatting tokens where Slack needs stronger boundaries. |
|
|
159
|
+
| `decode_html_entities(text) -> str` | Decode HTML entities before parsing. |
|
|
160
|
+
| `strip_zero_width_spaces(text) -> str` | Remove ZWSP (`U+200B`) and BOM (`U+FEFF`) while preserving join-control characters such as ZWJ. |
|
|
161
|
+
|
|
162
|
+
### Lower-level exported helpers
|
|
163
|
+
|
|
164
|
+
These are also part of the public package surface:
|
|
165
|
+
|
|
166
|
+
- `add_zero_width_spaces`
|
|
167
|
+
- `convert_markdown_text_to_blocks`
|
|
168
|
+
- `extract_plain_text_from_table_cell`
|
|
169
|
+
- `markdown_table_to_slack_table`
|
|
170
|
+
- `parse_markdown_table`
|
|
171
|
+
- `split_blocks_by_table`
|
|
172
|
+
- `split_markdown_into_segments`
|
|
173
|
+
|
|
174
|
+
## Specification and scope
|
|
175
|
+
|
|
176
|
+
- Behavior spec: [docs/spec.md](docs/spec.md)
|
|
177
|
+
- Japanese behavior spec: [docs/spec-ja.md](docs/spec-ja.md)
|
|
178
|
+
- Non-goals:
|
|
179
|
+
- Generating Slack `mrkdwn` strings
|
|
180
|
+
- Supporting clients or MCP tools that can only send `mrkdwn`
|
|
181
|
+
|
|
182
|
+
## Contact
|
|
183
|
+
|
|
184
|
+
- GitHub Issues / Pull Requests
|
|
185
|
+
- X: [@darkgaldragon](https://x.com/darkgaldragon)
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
slack_markdown_parser/__init__.py,sha256=e4w8M0cLAUX631-wZemBODbjgD-TTyjBVvx09dwYgwI,1148
|
|
2
|
+
slack_markdown_parser/converter.py,sha256=-mMQ8500gOViFv4tcSWtLE1ar-wRvmu5LFkVdXPrRtA,18188
|
|
3
|
+
slack_markdown_parser-2.0.2.dist-info/licenses/LICENSE,sha256=lCnCtIVKUITKdB6nFakqRH3b8MspPHHNoPhnuqKSbmE,1091
|
|
4
|
+
slack_markdown_parser-2.0.2.dist-info/METADATA,sha256=E_N_3Vxn10vCeeAJuEcrMNGUUTFmRahqCQi8ZdsXmgs,6804
|
|
5
|
+
slack_markdown_parser-2.0.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
6
|
+
slack_markdown_parser-2.0.2.dist-info/top_level.txt,sha256=DTvPPtEhPEi0fPsF5-EXpITua9BCBIY9sll4QBFTYhs,22
|
|
7
|
+
slack_markdown_parser-2.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 slack-markdown-parser contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
slack_markdown_parser
|