slack-markdown-parser 2.4.2__tar.gz → 2.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/CHANGELOG.md +6 -0
- {slack_markdown_parser-2.4.2/slack_markdown_parser.egg-info → slack_markdown_parser-2.4.3}/PKG-INFO +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/docs/spec-ja.md +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/docs/spec.md +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/pyproject.toml +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser/__init__.py +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser/converter.py +75 -3
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3/slack_markdown_parser.egg-info}/PKG-INFO +1 -1
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/LICENSE +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/MANIFEST.in +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/README-ja.md +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/README.md +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/setup.cfg +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser/py.typed +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser.egg-info/SOURCES.txt +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser.egg-info/dependency_links.txt +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser.egg-info/requires.txt +0 -0
- {slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser.egg-info/top_level.txt +0 -0
|
@@ -6,6 +6,12 @@ The format is based on Keep a Changelog, and the project follows Semantic Versio
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [2.4.3] - 2026-05-29
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- Stopped bare-URL autolinking from greedily swallowing trailing text. `normalize_bare_urls_for_slack_markdown` matched `https?://[^\s<]+`, so a scheme URL glued directly to following CJK text (e.g. `(https://example.com)**。に句点を直結。`) — common in Japanese, which puts no space after a URL — captured the closing paren, the `**` markers, the CJK punctuation, and the rest of the sentence into one `<…>` autolink, over-extending the link and exposing the literal `**`. The matched URL is now trimmed GFM-style: it stops at a doubled emphasis run (`**`/`~~`), at code/angle/pipe markers (`` ` ``, `<`, `>`, `|`), and at CJK punctuation (`、`/`。`/`」` …), and trailing punctuation (GFM's autolink set `! ? . , : * _ ~`, and an unbalanced `)`) is dropped while balanced parentheses are kept. `;` and quotes are kept (URL-legal), and a lone `*` (URL wildcards/queries) and CJK letters (IRIs / Unicode IDN hosts) are preserved.
|
|
14
|
+
|
|
9
15
|
## [2.4.2] - 2026-05-29
|
|
10
16
|
|
|
11
17
|
### Fixed
|
|
@@ -72,7 +72,7 @@ Slack は 2026-03-06 に `markdown` ブロックの公式ドキュメントを
|
|
|
72
72
|
### このパーサーが補正・安定化するもの
|
|
73
73
|
|
|
74
74
|
- `_..._` / `__...__` を Slack 互換の `*...*` / `**...**` に正規化する
|
|
75
|
-
- bare URL を Slack で安定しやすい `<https://...>`
|
|
75
|
+
- bare URL を Slack で安定しやすい `<https://...>` 形式にそろえる。まず URL を実際の範囲にトリミングする(GFM 風): 二重の強調記号(`**`/`~~`)・コード/山かっこ/パイプ記号(`` ` ``・`<`・`>`・`|`)・CJK / 全角の句読点(`、` `。` `」` `)` `!` …)で停止し、末尾の句読点(GFM の autolink 集合 `! ? . , : * _ ~` と不均衡な `)`)を除外する(`;` と引用符は URL で正当なため保持)。単独の `*`(URL のワイルドカード/クエリ)と CJK の**文字**(反復記号 `々` を含む。IRI / Unicode IDN ホスト。例 `https://ja.wikipedia.org/wiki/人々`)は保持する。これにより、日本語のように URL の直後へ空白なしで CJK 本文が続く場合でも、URL が行末まで(閉じの `**` ごと)貪欲に飲み込んでリンク化する事故を防ぐ。
|
|
76
76
|
- 崩れた Markdown テーブルを補って `table` ブロックへ変換する
|
|
77
77
|
- 意味が明確な単独 Markdown 構文を Slack ネイティブのブロックへ変換する
|
|
78
78
|
- 単独行の画像構文 `` → `image`
|
|
@@ -72,7 +72,7 @@ Slack still controls when those newer features appear and how they look, so trea
|
|
|
72
72
|
### Things this parser corrects or stabilizes
|
|
73
73
|
|
|
74
74
|
- `_..._` and `__...__` are normalized into Slack-friendly `*...*` and `**...**`
|
|
75
|
-
- Bare URLs are wrapped into Slack-friendly `<https://...>` form before `markdown` block delivery
|
|
75
|
+
- Bare URLs are wrapped into Slack-friendly `<https://...>` form before `markdown` block delivery. The URL is trimmed to its real extent first (GFM-style): it stops at a doubled emphasis run (`**`/`~~`), at code/angle/pipe markers (`` ` ``, `<`, `>`, `|`), and at CJK / full-width punctuation (`、` `。` `」` `)` `!` …); trailing punctuation (GFM's autolink set `! ? . , : * _ ~`, and an unbalanced `)`) is excluded — `;` and quotes are kept because they are URL-legal. A lone `*` (URL wildcards/queries) and CJK *letters* — including iteration marks like `々` (IRIs / Unicode IDN hosts such as `https://ja.wikipedia.org/wiki/人々`) — are preserved. This keeps a scheme URL glued directly to following CJK text — common in Japanese, where no space separates them — from greedily swallowing the rest of the line (including a closing `**`) into the autolink.
|
|
76
76
|
- Malformed Markdown tables are repaired before `table` block generation
|
|
77
77
|
- Unambiguous standalone Markdown constructs are promoted into native Slack blocks:
|
|
78
78
|
- standalone image syntax `` to `image`
|
{slack_markdown_parser-2.4.2 → slack_markdown_parser-2.4.3}/slack_markdown_parser/converter.py
RENAMED
|
@@ -143,6 +143,72 @@ def _is_han_or_kana_char(char: str) -> bool:
|
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
# Code/angle/pipe markers that never appear inside a bare URL in this library's
|
|
147
|
+
# prose context. (A single ``*`` and CJK letters are intentionally NOT here: a
|
|
148
|
+
# URL may legally contain a wildcard/query ``*`` and an IRI/IDN may contain CJK
|
|
149
|
+
# letters, so those must be preserved.)
|
|
150
|
+
_URL_STOP_CHARS = frozenset("`<>|")
|
|
151
|
+
# Trailing punctuation stripped from the end of a bare URL. This is exactly
|
|
152
|
+
# GFM's autolink-extension set (``! ? . , : * _ ~``); a closing paren is handled
|
|
153
|
+
# separately, with balancing. ``;`` and quotes are intentionally NOT included —
|
|
154
|
+
# ``;`` is URL-legal in matrix/path parameters and quotes are sub-delimiters, so
|
|
155
|
+
# trimming them could change the link target rather than just shedding prose.
|
|
156
|
+
_URL_TRAILING_PUNCTUATION = frozenset("!?.,:*_~")
|
|
157
|
+
# CJK and full/half-width punctuation/brackets that terminate prose, so a bare
|
|
158
|
+
# URL is cut here. This is an explicit set rather than the whole U+3000–U+303F
|
|
159
|
+
# block on purpose: letter-like CJK iteration marks (々 U+3005, 〻 U+303B),
|
|
160
|
+
# ditto/closure marks (〆 U+3006) and the ideographic number zero (〇 U+3007)
|
|
161
|
+
# are *excluded* so IRIs such as ``https://ja.wikipedia.org/wiki/人々`` survive.
|
|
162
|
+
_URL_CJK_BOUNDARY_CHARS = frozenset(
|
|
163
|
+
"、。〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞・…" # CJK punctuation & brackets
|
|
164
|
+
"!?,.:;()[]{}<>|" # full-width punctuation & brackets
|
|
165
|
+
"。「」、" # half-width CJK punctuation & brackets
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _is_url_boundary_char(char: str) -> bool:
|
|
170
|
+
"""Return True when ``char`` is a hard boundary where a bare URL must stop.
|
|
171
|
+
|
|
172
|
+
Only unambiguous prose/markup boundaries qualify: code/angle/pipe markers
|
|
173
|
+
and CJK/full-width *punctuation* (``、``/``。``/``」``/``)`` …). CJK
|
|
174
|
+
*letters* (including iteration marks like ``々``) are not a boundary, so
|
|
175
|
+
IRIs such as ``https://ja.wikipedia.org/wiki/人々`` survive.
|
|
176
|
+
"""
|
|
177
|
+
return char in _URL_STOP_CHARS or char in _URL_CJK_BOUNDARY_CHARS
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _trim_bare_url(url: str) -> str:
|
|
181
|
+
"""Trim a greedily matched bare URL down to its real extent.
|
|
182
|
+
|
|
183
|
+
In CJK writing a URL is usually glued directly to the following text with no
|
|
184
|
+
whitespace, so the greedy ``[^\\s<]+`` match would otherwise swallow the
|
|
185
|
+
trailing ``)``/``**``/``。`` and the rest of the sentence. This stops the URL
|
|
186
|
+
at the first hard boundary or doubled emphasis run (``**``/``~~``) — single
|
|
187
|
+
``*`` and CJK letters are preserved — then drops GFM-style trailing
|
|
188
|
+
punctuation and unbalanced closing parens, so ``https://example.com)**。``
|
|
189
|
+
becomes ``https://example.com``.
|
|
190
|
+
"""
|
|
191
|
+
for index, char in enumerate(url):
|
|
192
|
+
nxt = url[index + 1] if index + 1 < len(url) else ""
|
|
193
|
+
if _is_url_boundary_char(char) or (char in "*~" and nxt == char):
|
|
194
|
+
url = url[:index]
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
while url:
|
|
198
|
+
last = url[-1]
|
|
199
|
+
if last == ")":
|
|
200
|
+
if url.count(")") <= url.count("("):
|
|
201
|
+
break
|
|
202
|
+
url = url[:-1]
|
|
203
|
+
continue
|
|
204
|
+
if last in _URL_TRAILING_PUNCTUATION:
|
|
205
|
+
url = url[:-1]
|
|
206
|
+
continue
|
|
207
|
+
break
|
|
208
|
+
|
|
209
|
+
return url
|
|
210
|
+
|
|
211
|
+
|
|
146
212
|
def _nested_code_space_strategy(
|
|
147
213
|
source: str,
|
|
148
214
|
start: int,
|
|
@@ -596,9 +662,15 @@ def normalize_bare_urls_for_slack_markdown(text: str) -> str:
|
|
|
596
662
|
|
|
597
663
|
url_match = BARE_URL_PATTERN.match(chunk, cursor)
|
|
598
664
|
if url_match:
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
665
|
+
url = _trim_bare_url(url_match.group(0))
|
|
666
|
+
scheme = re.match(r"https?://", url, re.IGNORECASE)
|
|
667
|
+
# Only autolink when something host-like survives the trim;
|
|
668
|
+
# a bare ``https://`` followed straight by CJK would otherwise
|
|
669
|
+
# produce an empty ``<https://>`` autolink.
|
|
670
|
+
if scheme and len(url) > scheme.end():
|
|
671
|
+
parts.append(f"<{url}>")
|
|
672
|
+
cursor += len(url)
|
|
673
|
+
continue
|
|
602
674
|
|
|
603
675
|
parts.append(char)
|
|
604
676
|
cursor += 1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|