md2linkedin 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- md2linkedin-0.1.0/LICENSE.md +21 -0
- md2linkedin-0.1.0/PKG-INFO +154 -0
- md2linkedin-0.1.0/README.md +122 -0
- md2linkedin-0.1.0/pyproject.toml +142 -0
- md2linkedin-0.1.0/src/md2linkedin/__init__.py +31 -0
- md2linkedin-0.1.0/src/md2linkedin/_cli.py +91 -0
- md2linkedin-0.1.0/src/md2linkedin/_converter.py +466 -0
- md2linkedin-0.1.0/src/md2linkedin/_unicode.py +169 -0
- md2linkedin-0.1.0/src/md2linkedin/py.typed +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 md2linkedin authors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: md2linkedin
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert Markdown to LinkedIn-friendly Unicode text
|
|
5
|
+
Keywords: markdown,linkedin,unicode,formatting,conversion
|
|
6
|
+
Author: Indrajeet Patil
|
|
7
|
+
Author-email: Indrajeet Patil <patilindrajeet.science@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE.md
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Dist: click>=8.0
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Project-URL: Homepage, https://github.com/IndrajeetPatil/md2linkedin
|
|
27
|
+
Project-URL: Documentation, https://www.indrapatil.com/md2linkedin/
|
|
28
|
+
Project-URL: Repository, https://github.com/IndrajeetPatil/md2linkedin
|
|
29
|
+
Project-URL: Issues, https://github.com/IndrajeetPatil/md2linkedin/issues
|
|
30
|
+
Project-URL: Changelog, https://github.com/IndrajeetPatil/md2linkedin/blob/main/CHANGELOG.md
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# md2linkedin <img src="docs/assets/logo.png" align="right" width="240" />
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/md2linkedin/)
|
|
40
|
+

|
|
41
|
+
[](https://pypistats.org/packages/md2linkedin)
|
|
43
|
+
|
|
44
|
+
`md2linkedin` converts Markdown text to LinkedIn-compatible plain text by
|
|
45
|
+
replacing bold, italic, and bold-italic markers with Unicode Mathematical
|
|
46
|
+
Sans-Serif characters. This preserves visual formatting when pasting into
|
|
47
|
+
platforms like LinkedIn that do not support Markdown natively.
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
| Package Manager | Installation Command |
|
|
52
|
+
|-----------------|---------------------------|
|
|
53
|
+
| pip | `pip install md2linkedin` |
|
|
54
|
+
| uv | `uv add md2linkedin` |
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
### Python API
|
|
59
|
+
|
|
60
|
+
``` python
|
|
61
|
+
from md2linkedin import convert
|
|
62
|
+
|
|
63
|
+
md = """
|
|
64
|
+
# Exciting News
|
|
65
|
+
|
|
66
|
+
I'm thrilled to share that **we just launched** a new product!
|
|
67
|
+
|
|
68
|
+
Key highlights:
|
|
69
|
+
|
|
70
|
+
- **Performance**: *3x faster* than the previous version
|
|
71
|
+
- **Reliability**: ***zero downtime*** deployments
|
|
72
|
+
- **Developer UX**: clean, intuitive API
|
|
73
|
+
|
|
74
|
+
Check it out and let me know what you think.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
print(convert(md))
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
82
|
+
𝗘𝗫𝗖𝗜𝗧𝗜𝗡𝗚 𝗡𝗘𝗪𝗦
|
|
83
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
84
|
+
|
|
85
|
+
𝘐'𝘮 𝘵𝘩𝘳𝘪𝘭𝘭𝘦𝘥 𝘵𝘰 𝘴𝘩𝘢𝘳𝘦 𝘵𝘩𝘢𝘵 𝗿𝗲 𝗷𝘂𝘀𝘁 𝗹𝗮𝘂𝗻𝗰𝗵𝗲𝗱 a new product!
|
|
86
|
+
|
|
87
|
+
Key highlights:
|
|
88
|
+
|
|
89
|
+
• 𝗣𝗲𝗿𝗳𝗼𝗿𝗺𝗮𝗻𝗰𝗲: 𝘴𝘱𝘦𝘦𝘥 is faster than the previous version
|
|
90
|
+
• 𝗥𝗲𝗹𝗶𝗮𝗯𝗶𝗹𝗶𝘁𝘆: 𝙯𝙚𝙧𝙤 𝙙𝙤𝙬𝙣𝙩𝙞𝙢𝙚 deployments
|
|
91
|
+
• 𝗗𝗲𝘃𝗲𝗹𝗼𝗽𝗲𝗿 𝗨𝗫: clean, intuitive API
|
|
92
|
+
|
|
93
|
+
Check it out and let me know what you think.
|
|
94
|
+
|
|
95
|
+
### CLI
|
|
96
|
+
|
|
97
|
+
``` bash
|
|
98
|
+
# Convert a Markdown file (output: post.linkedin.txt)
|
|
99
|
+
md2linkedin post.md
|
|
100
|
+
|
|
101
|
+
# Specify output path
|
|
102
|
+
md2linkedin post.md -o linkedin_post.txt
|
|
103
|
+
|
|
104
|
+
# Pipe from stdin
|
|
105
|
+
echo "**Hello**, *world*!" | md2linkedin
|
|
106
|
+
|
|
107
|
+
# Keep link URLs in the output
|
|
108
|
+
md2linkedin post.md --preserve-links
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Key Features
|
|
112
|
+
|
|
113
|
+
- **Bold**: `**text**` or `__text__` → Unicode Sans-Serif Bold (𝗯𝗼𝗹𝗱)
|
|
114
|
+
- **Italic**: `*text*` or `_text_` → Unicode Sans-Serif Italic (𝘪𝘵𝘢𝘭𝘪𝘤)
|
|
115
|
+
- **Bold-italic**: `***text***` or `___text___` → Unicode Sans-Serif Bold
|
|
116
|
+
Italic (𝙗𝙤𝙡𝙙-𝙞𝙩𝙖𝙡𝙞𝙘)
|
|
117
|
+
- **Headers**: `#`/`##`/etc. styled with bold Unicode; H1 gets a `━` border
|
|
118
|
+
- **Code spans**: backticks stripped, content kept as plain text — *never*
|
|
119
|
+
Unicode-transformed
|
|
120
|
+
- **Fenced code blocks**: preserved verbatim
|
|
121
|
+
- **Links**: stripped to display text by default; `--preserve-links` retains
|
|
122
|
+
URLs
|
|
123
|
+
- **Images**: replaced by alt text
|
|
124
|
+
- **Bullet lists**: `-`/`*`/`+` → `•`; nested items → `‣`
|
|
125
|
+
- **Blockquotes**: leading `>` stripped
|
|
126
|
+
- **HTML spans**: unwrapped, inner text preserved
|
|
127
|
+
- **HTML entities**: decoded (`&` → `&`, etc.)
|
|
128
|
+
- **Backslash escapes**: resolved (`\*` → `*`)
|
|
129
|
+
- **Windows line endings**: normalised automatically
|
|
130
|
+
- **Emojis & non-ASCII**: pass through unchanged — no accidental corruption
|
|
131
|
+
|
|
132
|
+
For more examples, check out the package documentation at:
|
|
133
|
+
<https://www.indrapatil.com/md2linkedin/>
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
This project is licensed under the MIT License.
|
|
138
|
+
|
|
139
|
+
## Code of Conduct
|
|
140
|
+
|
|
141
|
+
Please note that the md2linkedin project is released with a [Contributor
|
|
142
|
+
Code of
|
|
143
|
+
Conduct](https://www.contributor-covenant.org/version/3/0/code_of_conduct/).
|
|
144
|
+
By contributing to this project, you agree to abide by its terms.
|
|
145
|
+
|
|
146
|
+
## Acknowledgements
|
|
147
|
+
|
|
148
|
+
Hex sticker font is `Rubik`. Icons are sourced from
|
|
149
|
+
[Flaticon](https://www.flaticon.com/):
|
|
150
|
+
|
|
151
|
+
- Markdown icon by [Freepik](https://www.flaticon.com/authors/freepik)
|
|
152
|
+
- LinkedIn icon by [Freepik](https://www.flaticon.com/authors/freepik)
|
|
153
|
+
- Arrow/conversion icon by
|
|
154
|
+
[Freepik](https://www.flaticon.com/authors/freepik)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# md2linkedin <img src="docs/assets/logo.png" align="right" width="240" />
|
|
5
|
+
|
|
6
|
+
[](https://pypi.org/project/md2linkedin/)
|
|
8
|
+

|
|
9
|
+
[](https://pypistats.org/packages/md2linkedin)
|
|
11
|
+
|
|
12
|
+
`md2linkedin` converts Markdown text to LinkedIn-compatible plain text by
|
|
13
|
+
replacing bold, italic, and bold-italic markers with Unicode Mathematical
|
|
14
|
+
Sans-Serif characters. This preserves visual formatting when pasting into
|
|
15
|
+
platforms like LinkedIn that do not support Markdown natively.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
| Package Manager | Installation Command |
|
|
20
|
+
|-----------------|---------------------------|
|
|
21
|
+
| pip | `pip install md2linkedin` |
|
|
22
|
+
| uv | `uv add md2linkedin` |
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
### Python API
|
|
27
|
+
|
|
28
|
+
``` python
|
|
29
|
+
from md2linkedin import convert
|
|
30
|
+
|
|
31
|
+
md = """
|
|
32
|
+
# Exciting News
|
|
33
|
+
|
|
34
|
+
I'm thrilled to share that **we just launched** a new product!
|
|
35
|
+
|
|
36
|
+
Key highlights:
|
|
37
|
+
|
|
38
|
+
- **Performance**: *3x faster* than the previous version
|
|
39
|
+
- **Reliability**: ***zero downtime*** deployments
|
|
40
|
+
- **Developer UX**: clean, intuitive API
|
|
41
|
+
|
|
42
|
+
Check it out and let me know what you think.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
print(convert(md))
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
50
|
+
𝗘𝗫𝗖𝗜𝗧𝗜𝗡𝗚 𝗡𝗘𝗪𝗦
|
|
51
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
52
|
+
|
|
53
|
+
𝘐'𝘮 𝘵𝘩𝘳𝘪𝘭𝘭𝘦𝘥 𝘵𝘰 𝘴𝘩𝘢𝘳𝘦 𝘵𝘩𝘢𝘵 𝗿𝗲 𝗷𝘂𝘀𝘁 𝗹𝗮𝘂𝗻𝗰𝗵𝗲𝗱 a new product!
|
|
54
|
+
|
|
55
|
+
Key highlights:
|
|
56
|
+
|
|
57
|
+
• 𝗣𝗲𝗿𝗳𝗼𝗿𝗺𝗮𝗻𝗰𝗲: 𝘴𝘱𝘦𝘦𝘥 is faster than the previous version
|
|
58
|
+
• 𝗥𝗲𝗹𝗶𝗮𝗯𝗶𝗹𝗶𝘁𝘆: 𝙯𝙚𝙧𝙤 𝙙𝙤𝙬𝙣𝙩𝙞𝙢𝙚 deployments
|
|
59
|
+
• 𝗗𝗲𝘃𝗲𝗹𝗼𝗽𝗲𝗿 𝗨𝗫: clean, intuitive API
|
|
60
|
+
|
|
61
|
+
Check it out and let me know what you think.
|
|
62
|
+
|
|
63
|
+
### CLI
|
|
64
|
+
|
|
65
|
+
``` bash
|
|
66
|
+
# Convert a Markdown file (output: post.linkedin.txt)
|
|
67
|
+
md2linkedin post.md
|
|
68
|
+
|
|
69
|
+
# Specify output path
|
|
70
|
+
md2linkedin post.md -o linkedin_post.txt
|
|
71
|
+
|
|
72
|
+
# Pipe from stdin
|
|
73
|
+
echo "**Hello**, *world*!" | md2linkedin
|
|
74
|
+
|
|
75
|
+
# Keep link URLs in the output
|
|
76
|
+
md2linkedin post.md --preserve-links
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Key Features
|
|
80
|
+
|
|
81
|
+
- **Bold**: `**text**` or `__text__` → Unicode Sans-Serif Bold (𝗯𝗼𝗹𝗱)
|
|
82
|
+
- **Italic**: `*text*` or `_text_` → Unicode Sans-Serif Italic (𝘪𝘵𝘢𝘭𝘪𝘤)
|
|
83
|
+
- **Bold-italic**: `***text***` or `___text___` → Unicode Sans-Serif Bold
|
|
84
|
+
Italic (𝙗𝙤𝙡𝙙-𝙞𝙩𝙖𝙡𝙞𝙘)
|
|
85
|
+
- **Headers**: `#`/`##`/etc. styled with bold Unicode; H1 gets a `━` border
|
|
86
|
+
- **Code spans**: backticks stripped, content kept as plain text — *never*
|
|
87
|
+
Unicode-transformed
|
|
88
|
+
- **Fenced code blocks**: preserved verbatim
|
|
89
|
+
- **Links**: stripped to display text by default; `--preserve-links` retains
|
|
90
|
+
URLs
|
|
91
|
+
- **Images**: replaced by alt text
|
|
92
|
+
- **Bullet lists**: `-`/`*`/`+` → `•`; nested items → `‣`
|
|
93
|
+
- **Blockquotes**: leading `>` stripped
|
|
94
|
+
- **HTML spans**: unwrapped, inner text preserved
|
|
95
|
+
- **HTML entities**: decoded (`&` → `&`, etc.)
|
|
96
|
+
- **Backslash escapes**: resolved (`\*` → `*`)
|
|
97
|
+
- **Windows line endings**: normalised automatically
|
|
98
|
+
- **Emojis & non-ASCII**: pass through unchanged — no accidental corruption
|
|
99
|
+
|
|
100
|
+
For more examples, check out the package documentation at:
|
|
101
|
+
<https://www.indrapatil.com/md2linkedin/>
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
This project is licensed under the MIT License.
|
|
106
|
+
|
|
107
|
+
## Code of Conduct
|
|
108
|
+
|
|
109
|
+
Please note that the md2linkedin project is released with a [Contributor
|
|
110
|
+
Code of
|
|
111
|
+
Conduct](https://www.contributor-covenant.org/version/3/0/code_of_conduct/).
|
|
112
|
+
By contributing to this project, you agree to abide by its terms.
|
|
113
|
+
|
|
114
|
+
## Acknowledgements
|
|
115
|
+
|
|
116
|
+
Hex sticker font is `Rubik`. Icons are sourced from
|
|
117
|
+
[Flaticon](https://www.flaticon.com/):
|
|
118
|
+
|
|
119
|
+
- Markdown icon by [Freepik](https://www.flaticon.com/authors/freepik)
|
|
120
|
+
- LinkedIn icon by [Freepik](https://www.flaticon.com/authors/freepik)
|
|
121
|
+
- Arrow/conversion icon by
|
|
122
|
+
[Freepik](https://www.flaticon.com/authors/freepik)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "md2linkedin"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Convert Markdown to LinkedIn-friendly Unicode text"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Indrajeet Patil", email = "patilindrajeet.science@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE.md"]
|
|
12
|
+
keywords = ["markdown", "linkedin", "unicode", "formatting", "conversion"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: End Users/Desktop",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Programming Language :: Python :: 3.14",
|
|
24
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
"Topic :: Text Processing :: Markup",
|
|
27
|
+
"Topic :: Utilities",
|
|
28
|
+
]
|
|
29
|
+
dependencies = ["click>=8.0"]
|
|
30
|
+
|
|
31
|
+
[project.scripts]
|
|
32
|
+
md2linkedin = "md2linkedin._cli:main"
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/IndrajeetPatil/md2linkedin"
|
|
36
|
+
Documentation = "https://www.indrapatil.com/md2linkedin/"
|
|
37
|
+
Repository = "https://github.com/IndrajeetPatil/md2linkedin"
|
|
38
|
+
Issues = "https://github.com/IndrajeetPatil/md2linkedin/issues"
|
|
39
|
+
Changelog = "https://github.com/IndrajeetPatil/md2linkedin/blob/main/CHANGELOG.md"
|
|
40
|
+
|
|
41
|
+
[dependency-groups]
|
|
42
|
+
dev = [
|
|
43
|
+
"coverage>=7.13.5",
|
|
44
|
+
"jupyter>=1.1.1",
|
|
45
|
+
"zensical>=0.0.31",
|
|
46
|
+
"mkdocstrings-python>=2.0.3",
|
|
47
|
+
"ty>=0.0.28",
|
|
48
|
+
"prek>=0.3.8",
|
|
49
|
+
"pytest>=9.0.2",
|
|
50
|
+
"pytest-cov>=7.1.0",
|
|
51
|
+
"pytest-random-order>=1.2.0",
|
|
52
|
+
"ruff>=0.15.9",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
[build-system]
|
|
56
|
+
requires = ["uv_build>=0.11.2,<0.12"]
|
|
57
|
+
build-backend = "uv_build"
|
|
58
|
+
|
|
59
|
+
[tool.ruff]
|
|
60
|
+
fix = true
|
|
61
|
+
preview = true
|
|
62
|
+
unsafe-fixes = true
|
|
63
|
+
|
|
64
|
+
[tool.ruff.lint]
|
|
65
|
+
select = ["ALL"]
|
|
66
|
+
ignore = [
|
|
67
|
+
"COM812",
|
|
68
|
+
"CPY",
|
|
69
|
+
# Docstring content rules — we intentionally omit Returns/Raises sections
|
|
70
|
+
# in internal helpers, consistent with ignoring D (pydocstyle) above.
|
|
71
|
+
"D",
|
|
72
|
+
"DOC",
|
|
73
|
+
# This package's output IS Unicode Mathematical characters; having them in
|
|
74
|
+
# docstring examples and block-comment separators is intentional.
|
|
75
|
+
"RUF002",
|
|
76
|
+
"RUF003",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
[tool.ruff.lint.per-file-ignores]
|
|
80
|
+
"tests/*" = [
|
|
81
|
+
# docstring requirements
|
|
82
|
+
"D",
|
|
83
|
+
"DOC",
|
|
84
|
+
# tests can use asserts
|
|
85
|
+
"S101",
|
|
86
|
+
# pytest fixtures (e.g. tmp_path) don't need type annotations
|
|
87
|
+
"ANN001",
|
|
88
|
+
# tests legitimately import private internals to test them directly
|
|
89
|
+
"PLC2701",
|
|
90
|
+
# test methods inside classes don't need @staticmethod — class grouping is intentional
|
|
91
|
+
"PLR6301",
|
|
92
|
+
# test assertions for Unicode output use the literal Unicode chars (intentional)
|
|
93
|
+
"RUF001",
|
|
94
|
+
# `== ""` vs `not x` — both forms are acceptable in assertions
|
|
95
|
+
"PLC1901",
|
|
96
|
+
# magic numbers in assertions are fine (e.g. expected count == 3)
|
|
97
|
+
"PLR2004",
|
|
98
|
+
# large test classes are fine when grouping related cases
|
|
99
|
+
"PLR0904",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
[tool.ruff.format]
|
|
103
|
+
preview = true
|
|
104
|
+
docstring-code-format = true
|
|
105
|
+
|
|
106
|
+
[tool.pytest.ini_options]
|
|
107
|
+
addopts = [
|
|
108
|
+
# error on problems parsing pytest configuration
|
|
109
|
+
"--strict-config",
|
|
110
|
+
# error on using unregistered marker
|
|
111
|
+
"--strict-markers",
|
|
112
|
+
# show extra test summary info for everything
|
|
113
|
+
"-ra",
|
|
114
|
+
# include more verbose output
|
|
115
|
+
"--verbose",
|
|
116
|
+
# using pytest-random-order plugin option
|
|
117
|
+
"--random-order",
|
|
118
|
+
]
|
|
119
|
+
testpaths = ["tests"]
|
|
120
|
+
filterwarnings = ["error"]
|
|
121
|
+
xfail_strict = true
|
|
122
|
+
python_files = ["test_*.py", "test-*.py", "tests.py", "test.py"]
|
|
123
|
+
|
|
124
|
+
[tool.coverage.run]
|
|
125
|
+
branch = true
|
|
126
|
+
source = ["md2linkedin"]
|
|
127
|
+
|
|
128
|
+
[tool.coverage.report]
|
|
129
|
+
fail_under = 100
|
|
130
|
+
format = "markdown"
|
|
131
|
+
sort = "-Cover"
|
|
132
|
+
show_missing = true
|
|
133
|
+
skip_empty = true
|
|
134
|
+
|
|
135
|
+
[tool.ty.src]
|
|
136
|
+
exclude = [".venv", "build", "dist", "docs"]
|
|
137
|
+
|
|
138
|
+
[tool.ty.environment]
|
|
139
|
+
python-version = "3.10"
|
|
140
|
+
|
|
141
|
+
[tool.uv]
|
|
142
|
+
required-version = ">=0.11.2"
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""md2linkedin — Convert Markdown to LinkedIn-friendly Unicode text.
|
|
2
|
+
|
|
3
|
+
The package exposes two main entry points:
|
|
4
|
+
|
|
5
|
+
* :func:`convert` — convert a Markdown *string* in memory.
|
|
6
|
+
* :func:`convert_file` — read a ``.md`` file, convert it, and write a
|
|
7
|
+
``.linkedin.txt`` output file.
|
|
8
|
+
|
|
9
|
+
Lower-level Unicode mapping utilities (:func:`to_sans_bold`,
|
|
10
|
+
:func:`to_sans_italic`, :func:`to_sans_bold_italic`, :func:`apply_style`)
|
|
11
|
+
are also public for programmatic use.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from importlib.metadata import version
|
|
17
|
+
|
|
18
|
+
from ._converter import convert, convert_file
|
|
19
|
+
from ._unicode import apply_style, to_sans_bold, to_sans_bold_italic, to_sans_italic
|
|
20
|
+
|
|
21
|
+
__version__ = version("md2linkedin")
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"__version__",
|
|
25
|
+
"apply_style",
|
|
26
|
+
"convert",
|
|
27
|
+
"convert_file",
|
|
28
|
+
"to_sans_bold",
|
|
29
|
+
"to_sans_bold_italic",
|
|
30
|
+
"to_sans_italic",
|
|
31
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Command-line interface for md2linkedin."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from . import __version__
|
|
10
|
+
from ._converter import convert, convert_file
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command(context_settings={"help_option_names": ["-h", "--help"]})
|
|
14
|
+
@click.version_option(__version__, "-V", "--version")
|
|
15
|
+
@click.argument(
|
|
16
|
+
"input_file",
|
|
17
|
+
required=False,
|
|
18
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
19
|
+
)
|
|
20
|
+
@click.option(
|
|
21
|
+
"-o",
|
|
22
|
+
"--output",
|
|
23
|
+
"output_file",
|
|
24
|
+
default=None,
|
|
25
|
+
type=click.Path(dir_okay=False),
|
|
26
|
+
help=(
|
|
27
|
+
"Output file path. Defaults to INPUT_FILE with a '.linkedin.txt' extension. "
|
|
28
|
+
"Ignored when reading from stdin."
|
|
29
|
+
),
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--preserve-links",
|
|
33
|
+
is_flag=True,
|
|
34
|
+
default=False,
|
|
35
|
+
help=(
|
|
36
|
+
"Keep Markdown link syntax ([text](url)) in the output"
|
|
37
|
+
" instead of stripping URLs."
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
def main(
|
|
41
|
+
input_file: str | None,
|
|
42
|
+
output_file: str | None,
|
|
43
|
+
*,
|
|
44
|
+
preserve_links: bool,
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Convert Markdown to LinkedIn-friendly Unicode text.
|
|
47
|
+
|
|
48
|
+
Reads from INPUT_FILE (or stdin when INPUT_FILE is omitted) and writes
|
|
49
|
+
LinkedIn-compatible plain text in which bold and italic formatting is
|
|
50
|
+
preserved using Unicode Mathematical Sans-Serif characters.
|
|
51
|
+
|
|
52
|
+
\b
|
|
53
|
+
Examples:
|
|
54
|
+
|
|
55
|
+
# Convert a file (output written to README.linkedin.txt)
|
|
56
|
+
md2linkedin README.md
|
|
57
|
+
|
|
58
|
+
# Specify the output path explicitly
|
|
59
|
+
md2linkedin README.md -o post.txt
|
|
60
|
+
|
|
61
|
+
# Pipe from stdin
|
|
62
|
+
echo "**Hello**, *world*!" | md2linkedin
|
|
63
|
+
|
|
64
|
+
# Keep link URLs in the output
|
|
65
|
+
md2linkedin README.md --preserve-links
|
|
66
|
+
"""
|
|
67
|
+
if input_file is not None:
|
|
68
|
+
out_path = convert_file(input_file, output_file, preserve_links=preserve_links)
|
|
69
|
+
click.echo(f"LinkedIn-formatted text written to: {out_path}")
|
|
70
|
+
else:
|
|
71
|
+
# Read from stdin
|
|
72
|
+
if _stdin_is_tty():
|
|
73
|
+
msg = (
|
|
74
|
+
"No input file provided and stdin is a terminal. "
|
|
75
|
+
"Provide a file path or pipe content via stdin."
|
|
76
|
+
)
|
|
77
|
+
raise click.UsageError(
|
|
78
|
+
msg
|
|
79
|
+
)
|
|
80
|
+
md_text = sys.stdin.read()
|
|
81
|
+
result = convert(md_text, preserve_links=preserve_links)
|
|
82
|
+
click.echo(result, nl=False)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _stdin_is_tty() -> bool:
|
|
86
|
+
"""Return True when stdin is an interactive terminal.
|
|
87
|
+
|
|
88
|
+
Extracted into its own function so tests can mock it cleanly without
|
|
89
|
+
fighting Click's own stdin-swapping inside ``CliRunner.invoke``.
|
|
90
|
+
"""
|
|
91
|
+
return sys.stdin.isatty()
|
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"""Markdown-to-LinkedIn conversion pipeline.
|
|
2
|
+
|
|
3
|
+
Each step is a small, independently testable function. The top-level
|
|
4
|
+
:func:`convert` function wires them together in the correct order to avoid
|
|
5
|
+
regex conflicts (e.g. bold-italic must be processed before bold or italic).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import uuid
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from ._unicode import to_sans_bold, to_sans_bold_italic, to_sans_italic
|
|
15
|
+
|
|
16
|
+
__all__ = ["convert", "convert_file"]
|
|
17
|
+
|
|
18
|
+
_NESTED_BULLET_MIN_INDENT = 2 # spaces of indentation that triggers a nested bullet (‣)
|
|
19
|
+
|
|
20
|
+
# ── Low-level pipeline steps ───────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _normalize_line_endings(text: str) -> str:
|
|
24
|
+
"""Normalize Windows (\\r\\n) and classic Mac (\\r) line endings to \\n."""
|
|
25
|
+
return text.replace("\r\n", "\n").replace("\r", "\n")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _protect_code(text: str) -> tuple[str, dict[str, str]]:
|
|
29
|
+
"""Replace code spans and fenced blocks with unique placeholders.
|
|
30
|
+
|
|
31
|
+
Code content must never be transformed by the Unicode mapping steps.
|
|
32
|
+
Placeholders are UUID-based so they cannot accidentally match user text.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
text: Markdown text.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
A ``(modified_text, placeholder_map)`` tuple where *placeholder_map*
|
|
39
|
+
maps each placeholder back to its original code string.
|
|
40
|
+
"""
|
|
41
|
+
placeholders: dict[str, str] = {}
|
|
42
|
+
|
|
43
|
+
def _replace(match: re.Match[str]) -> str:
|
|
44
|
+
key = f"\x00CODE{uuid.uuid4().hex}\x00"
|
|
45
|
+
placeholders[key] = match.group(0)
|
|
46
|
+
return key
|
|
47
|
+
|
|
48
|
+
# Fenced code blocks (``` or ~~~, with optional language tag)
|
|
49
|
+
text = re.sub(r"```[\s\S]*?```|~~~[\s\S]*?~~~", _replace, text)
|
|
50
|
+
# Inline code spans (single backtick only; triple already caught above)
|
|
51
|
+
text = re.sub(r"`[^`\n]+`", _replace, text)
|
|
52
|
+
return text, placeholders
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _restore_code(text: str, placeholders: dict[str, str]) -> str:
|
|
56
|
+
"""Restore code placeholders to their original content.
|
|
57
|
+
|
|
58
|
+
For inline code, the surrounding backticks are stripped (the plain text
|
|
59
|
+
content is kept). For fenced blocks, the entire original block is kept
|
|
60
|
+
intact so structure is preserved.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
text: Text containing placeholders.
|
|
64
|
+
placeholders: Map of placeholder → original code string.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Text with all placeholders replaced by their original code content.
|
|
68
|
+
"""
|
|
69
|
+
for key, original in placeholders.items():
|
|
70
|
+
if original.startswith(("```", "~~~")):
|
|
71
|
+
# Keep fenced blocks as-is (no backtick stripping)
|
|
72
|
+
text = text.replace(key, original)
|
|
73
|
+
else:
|
|
74
|
+
# Strip the surrounding backticks for inline code
|
|
75
|
+
text = text.replace(key, original[1:-1])
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _strip_html_spans(text: str) -> str:
|
|
80
|
+
"""Remove ``<span ...>...</span>`` wrappers, keeping inner text.
|
|
81
|
+
|
|
82
|
+
Iterates until no more span tags remain so that arbitrarily nested
|
|
83
|
+
spans are fully unwrapped.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
text: Input text that may contain HTML span elements.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Text with all span elements removed and their inner content preserved.
|
|
90
|
+
"""
|
|
91
|
+
prev = None
|
|
92
|
+
while prev != text:
|
|
93
|
+
prev = text
|
|
94
|
+
text = re.sub(r"<span[^>]*>(.*?)</span>", r"\1", text, flags=re.DOTALL)
|
|
95
|
+
return text
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _convert_bold_italic(text: str) -> str:
|
|
99
|
+
"""Replace ``***text***`` (or ``___text___``) with bold-italic Unicode.
|
|
100
|
+
|
|
101
|
+
Must run before :func:`_convert_bold` and :func:`_convert_italic` to
|
|
102
|
+
prevent the triple markers from being consumed piecemeal.
|
|
103
|
+
|
|
104
|
+
Backslash-escaped markers (``\\***``) are not matched.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
text: Input text.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Text with bold-italic markers replaced.
|
|
111
|
+
"""
|
|
112
|
+
text = re.sub(
|
|
113
|
+
r"(?<!\\)\*{3}(.+?)(?<!\\)\*{3}",
|
|
114
|
+
lambda m: to_sans_bold_italic(m.group(1)),
|
|
115
|
+
text,
|
|
116
|
+
)
|
|
117
|
+
return re.sub(
|
|
118
|
+
r"(?<!\\)_{3}(.+?)(?<!\\)_{3}",
|
|
119
|
+
lambda m: to_sans_bold_italic(m.group(1)),
|
|
120
|
+
text,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _convert_bold(text: str) -> str:
|
|
125
|
+
"""Replace ``**text**`` (or ``__text__``) with bold Unicode.
|
|
126
|
+
|
|
127
|
+
Backslash-escaped markers (``\\**``) are not matched.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
text: Input text.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Text with bold markers replaced.
|
|
134
|
+
"""
|
|
135
|
+
text = re.sub(
|
|
136
|
+
r"(?<!\\)\*{2}(.+?)(?<!\\)\*{2}",
|
|
137
|
+
lambda m: to_sans_bold(m.group(1)),
|
|
138
|
+
text,
|
|
139
|
+
)
|
|
140
|
+
return re.sub(
|
|
141
|
+
r"(?<!\\)__(.+?)(?<!\\)__",
|
|
142
|
+
lambda m: to_sans_bold(m.group(1)),
|
|
143
|
+
text,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _convert_italic(text: str) -> str:
|
|
148
|
+
"""Replace ``*text*`` or ``_text_`` with italic Unicode.
|
|
149
|
+
|
|
150
|
+
Uses negative look-around to avoid matching asterisks that are part of
|
|
151
|
+
bold (``**``) or bold-italic (``***``) markers already consumed by
|
|
152
|
+
earlier pipeline steps. Backslash-escaped markers (``\\*``) are also
|
|
153
|
+
not matched.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
text: Input text.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Text with italic markers replaced.
|
|
160
|
+
"""
|
|
161
|
+
# *text* — negative look-around prevents matching residual ** markers or \* escapes
|
|
162
|
+
text = re.sub(
|
|
163
|
+
r"(?<!\\)(?<!\*)\*(?!\*)(.+?)(?<!\\)(?<!\*)\*(?!\*)",
|
|
164
|
+
lambda m: to_sans_italic(m.group(1)),
|
|
165
|
+
text,
|
|
166
|
+
)
|
|
167
|
+
# _text_ — word-boundary anchors prevent matching inside_words; skip \_ escapes
|
|
168
|
+
return re.sub(
|
|
169
|
+
r"(?<!\w)(?<!\\)_(?!_)(.+?)(?<!\\)(?<!_)_(?!\w)",
|
|
170
|
+
lambda m: to_sans_italic(m.group(1)),
|
|
171
|
+
text,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _convert_headers(text: str) -> str:
|
|
176
|
+
"""Convert ATX headers (``# Heading``) and setext headers to styled text.
|
|
177
|
+
|
|
178
|
+
* H1 (``#`` or setext ``===``): bold Unicode + ``━`` border.
|
|
179
|
+
* H2–H6 (``##``–``######`` or setext ``---``): bold Unicode, no border.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
text: Input text with Markdown headers.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Text with headers replaced by styled plain text.
|
|
186
|
+
"""
|
|
187
|
+
separator = "━" * 40
|
|
188
|
+
|
|
189
|
+
def _fmt_h1(title: str) -> str:
|
|
190
|
+
clean = title.strip()
|
|
191
|
+
return f"\n{separator}\n{to_sans_bold(clean.upper())}\n{separator}\n"
|
|
192
|
+
|
|
193
|
+
def _fmt_h2(title: str) -> str:
|
|
194
|
+
return to_sans_bold(title.strip())
|
|
195
|
+
|
|
196
|
+
lines = text.split("\n")
|
|
197
|
+
out: list[str] = []
|
|
198
|
+
i = 0
|
|
199
|
+
while i < len(lines):
|
|
200
|
+
line = lines[i]
|
|
201
|
+
# ATX headers
|
|
202
|
+
atx = re.match(r"^(#{1,6})\s+(.*)", line)
|
|
203
|
+
if atx:
|
|
204
|
+
level = len(atx.group(1))
|
|
205
|
+
title = atx.group(2).rstrip()
|
|
206
|
+
if level == 1:
|
|
207
|
+
out.append(_fmt_h1(title))
|
|
208
|
+
else:
|
|
209
|
+
out.append(_fmt_h2(title))
|
|
210
|
+
i += 1
|
|
211
|
+
continue
|
|
212
|
+
# Setext headers: next line is === or ---
|
|
213
|
+
if i + 1 < len(lines):
|
|
214
|
+
next_line = lines[i + 1]
|
|
215
|
+
if re.match(r"^={3,}\s*$", next_line):
|
|
216
|
+
out.append(_fmt_h1(line))
|
|
217
|
+
i += 2
|
|
218
|
+
continue
|
|
219
|
+
if re.match(r"^-{3,}\s*$", next_line) and line.strip():
|
|
220
|
+
out.append(_fmt_h2(line))
|
|
221
|
+
i += 2
|
|
222
|
+
continue
|
|
223
|
+
# Standalone horizontal rules (---, ___, ***)
|
|
224
|
+
if re.match(r"^(-{3,}|_{3,}|\*{3,})\s*$", line):
|
|
225
|
+
i += 1
|
|
226
|
+
continue
|
|
227
|
+
out.append(line)
|
|
228
|
+
i += 1
|
|
229
|
+
return "\n".join(out)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _strip_links(text: str, *, preserve: bool = False) -> str:
|
|
233
|
+
"""Handle Markdown links.
|
|
234
|
+
|
|
235
|
+
By default, links are stripped to their display text only (URLs are
|
|
236
|
+
discarded). Empty links ``[](url)`` are removed entirely. Reference-style
|
|
237
|
+
links ``[text][ref]`` are reduced to their display text.
|
|
238
|
+
|
|
239
|
+
When *preserve* is ``True`` the full link syntax is retained as-is.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
text: Input text.
|
|
243
|
+
preserve: When ``True``, leave link syntax unchanged.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Text with links handled according to *preserve*.
|
|
247
|
+
"""
|
|
248
|
+
if preserve:
|
|
249
|
+
return text
|
|
250
|
+
# Remove empty links [](url)
|
|
251
|
+
text = re.sub(r"\[\]\([^)]*\)", "", text)
|
|
252
|
+
# Inline links [text](url "optional title") → text
|
|
253
|
+
text = re.sub(r"\[([^\]]+)\]\([^)]*\)", r"\1", text)
|
|
254
|
+
# Reference-style links [text][ref] → text
|
|
255
|
+
text = re.sub(r"\[([^\]]+)\]\[[^\]]*\]", r"\1", text)
|
|
256
|
+
# Autolinks <https://example.com> → https://example.com
|
|
257
|
+
return re.sub(r"<(https?://[^>]+)>", r"\1", text)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _strip_images(text: str) -> str:
|
|
261
|
+
"""Replace Markdown images with their alt text (or nothing if empty).
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
text: Input text.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Text with image syntax replaced by alt text.
|
|
268
|
+
"""
|
|
269
|
+
#  → alt (empty alt → removed)
|
|
270
|
+
return re.sub(
|
|
271
|
+
r"!\[([^\]]*)\]\([^)]*\)",
|
|
272
|
+
lambda m: m.group(1) or "",
|
|
273
|
+
text,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _convert_bullets(text: str) -> str:
|
|
278
|
+
"""Replace Markdown list markers with Unicode bullet characters.
|
|
279
|
+
|
|
280
|
+
* First-level ``- `` → ``• ``
|
|
281
|
+
* Second-level `` - `` (2+ leading spaces) → `` ‣ ``
|
|
282
|
+
* Ordered list markers (``1. ``) are left as-is (numbers already convey order).
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
text: Input text.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Text with list markers replaced.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
def _bullet(m: re.Match[str]) -> str:
|
|
292
|
+
indent = m.group(1)
|
|
293
|
+
return (" ‣ " if len(indent) >= _NESTED_BULLET_MIN_INDENT else "• ")
|
|
294
|
+
|
|
295
|
+
return re.sub(r"^(\s*)[-*+] ", _bullet, text, flags=re.MULTILINE)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _strip_blockquotes(text: str) -> str:
|
|
299
|
+
"""Remove leading ``>`` blockquote markers.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
text: Input text.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Text with blockquote markers stripped from line beginnings.
|
|
306
|
+
"""
|
|
307
|
+
return re.sub(r"^> ?", "", text, flags=re.MULTILINE)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _clean_entities(text: str) -> str:
|
|
311
|
+
"""Decode common HTML entities to their literal characters.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
text: Input text.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Text with ``>``, ``<``, ``&``, `` ``, ``"``
|
|
318
|
+
replaced by their literal equivalents.
|
|
319
|
+
"""
|
|
320
|
+
replacements = {
|
|
321
|
+
">": ">",
|
|
322
|
+
"<": "<",
|
|
323
|
+
"&": "&",
|
|
324
|
+
" ": " ",
|
|
325
|
+
""": '"',
|
|
326
|
+
"'": "'",
|
|
327
|
+
}
|
|
328
|
+
for entity, char in replacements.items():
|
|
329
|
+
text = text.replace(entity, char)
|
|
330
|
+
return text
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _clean_escaped_chars(text: str) -> str:
|
|
334
|
+
"""Remove Markdown backslash escapes (e.g. ``\\*`` → ``*``).
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
text: Input text.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Text with backslash escapes resolved.
|
|
341
|
+
"""
|
|
342
|
+
return re.sub(r"\\([\\`*_{}\[\]()#+\-.!])", r"\1", text)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _normalize_whitespace(text: str) -> str:
|
|
346
|
+
"""Collapse excessive blank lines and strip leading/trailing whitespace.
|
|
347
|
+
|
|
348
|
+
LinkedIn renders at most two consecutive blank lines meaningfully, so
|
|
349
|
+
three or more consecutive newlines are collapsed to two.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
text: Input text.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Normalized text with a single trailing newline.
|
|
356
|
+
"""
|
|
357
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
358
|
+
return text.strip() + "\n"
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# ── Public API ─────────────────────────────────────────────────────────────────
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def convert(text: str, *, preserve_links: bool = False) -> str:
|
|
365
|
+
"""Convert Markdown text to LinkedIn-compatible Unicode plain text.
|
|
366
|
+
|
|
367
|
+
Bold (``**text**`` / ``__text__``), italic (``*text*`` / ``_text_``), and
|
|
368
|
+
bold-italic (``***text***`` / ``___text___``) markers are replaced with
|
|
369
|
+
their Unicode Mathematical Sans-Serif equivalents so that the styling is
|
|
370
|
+
preserved when pasting into LinkedIn or other plain-text rich editors.
|
|
371
|
+
|
|
372
|
+
The following Markdown constructs are also handled:
|
|
373
|
+
|
|
374
|
+
* **Headers** — ATX (``#``) and setext styles; H1 gets a ``━`` border.
|
|
375
|
+
* **Code spans** — backticks stripped, content kept as plain text.
|
|
376
|
+
* **Fenced code blocks** — preserved verbatim (no Unicode transforms).
|
|
377
|
+
* **Links** — stripped to display text by default (see *preserve_links*).
|
|
378
|
+
* **Images** — replaced by alt text.
|
|
379
|
+
* **Bullet lists** — ``-`` / ``*`` / ``+`` → ``•`` / ``‣`` (nested).
|
|
380
|
+
* **Blockquotes** — leading ``>`` stripped.
|
|
381
|
+
* **HTML spans** — unwrapped, inner text kept.
|
|
382
|
+
* **HTML entities** — decoded to literal characters.
|
|
383
|
+
* **Backslash escapes** — resolved (``\\*`` → ``*``).
|
|
384
|
+
* **Windows line endings** — normalised to ``\\n``.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
text: The Markdown source string.
|
|
388
|
+
preserve_links: When ``True``, link syntax (``[text](url)``) is left
|
|
389
|
+
unchanged in the output instead of being reduced to display text.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
A plain-text string suitable for pasting into LinkedIn.
|
|
393
|
+
|
|
394
|
+
Examples:
|
|
395
|
+
>>> convert("**Hello**, *world*!")
|
|
396
|
+
'𝗛𝗲𝗹𝗹𝗼, 𝘸𝘰𝘳𝘭𝘥!\\n'
|
|
397
|
+
|
|
398
|
+
>>> convert("")
|
|
399
|
+
''
|
|
400
|
+
"""
|
|
401
|
+
if not text or not text.strip():
|
|
402
|
+
return ""
|
|
403
|
+
|
|
404
|
+
text = _normalize_line_endings(text)
|
|
405
|
+
text, placeholders = _protect_code(text)
|
|
406
|
+
text = _strip_html_spans(text)
|
|
407
|
+
text = _strip_images(text)
|
|
408
|
+
text = _convert_bold_italic(text)
|
|
409
|
+
text = _convert_bold(text)
|
|
410
|
+
text = _convert_italic(text)
|
|
411
|
+
text = _convert_headers(text)
|
|
412
|
+
text = _strip_links(text, preserve=preserve_links)
|
|
413
|
+
text = _convert_bullets(text)
|
|
414
|
+
text = _strip_blockquotes(text)
|
|
415
|
+
text = _restore_code(text, placeholders)
|
|
416
|
+
text = _clean_entities(text)
|
|
417
|
+
text = _clean_escaped_chars(text)
|
|
418
|
+
return _normalize_whitespace(text)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def convert_file(
|
|
422
|
+
input_path: str | Path,
|
|
423
|
+
output_path: str | Path | None = None,
|
|
424
|
+
*,
|
|
425
|
+
preserve_links: bool = False,
|
|
426
|
+
) -> Path:
|
|
427
|
+
"""Convert a Markdown file and write the result to a ``.txt`` file.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
input_path: Path to the Markdown source file (``.md`` or any text
|
|
431
|
+
file).
|
|
432
|
+
output_path: Destination path for the converted output. Defaults to
|
|
433
|
+
the input path with the extension replaced by
|
|
434
|
+
``.linkedin.txt``.
|
|
435
|
+
preserve_links: Passed through to :func:`convert`.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
The resolved path of the written output file.
|
|
439
|
+
|
|
440
|
+
Raises:
|
|
441
|
+
FileNotFoundError: If *input_path* does not exist.
|
|
442
|
+
|
|
443
|
+
Examples:
|
|
444
|
+
>>> from pathlib import Path
|
|
445
|
+
>>> import tempfile, os
|
|
446
|
+
>>> with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
|
447
|
+
... _ = f.write("**bold** and *italic*")
|
|
448
|
+
... tmp = f.name
|
|
449
|
+
>>> out = convert_file(tmp)
|
|
450
|
+
>>> out.read_text(encoding="utf-8")
|
|
451
|
+
'𝗯𝗼𝗹𝗱 and 𝘪𝘵𝘢𝘭𝘪𝘤\\n'
|
|
452
|
+
>>> os.unlink(tmp); os.unlink(str(out))
|
|
453
|
+
"""
|
|
454
|
+
input_path = Path(input_path)
|
|
455
|
+
if not input_path.exists():
|
|
456
|
+
msg = f"Input file not found: {input_path}"
|
|
457
|
+
raise FileNotFoundError(msg)
|
|
458
|
+
|
|
459
|
+
if output_path is None:
|
|
460
|
+
output_path = input_path.with_suffix("").with_suffix(".linkedin.txt")
|
|
461
|
+
output_path = Path(output_path)
|
|
462
|
+
|
|
463
|
+
md_text = input_path.read_text(encoding="utf-8")
|
|
464
|
+
result = convert(md_text, preserve_links=preserve_links)
|
|
465
|
+
output_path.write_text(result, encoding="utf-8")
|
|
466
|
+
return output_path
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Unicode Mathematical Sans-Serif character mapping functions.
|
|
2
|
+
|
|
3
|
+
Converts ASCII letters and digits to their Unicode Mathematical
|
|
4
|
+
Sans-Serif equivalents, enabling bold, italic, and bold-italic
|
|
5
|
+
styling in plain-text environments like LinkedIn.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
12
|
+
__all__ = ["apply_style", "to_sans_bold", "to_sans_bold_italic", "to_sans_italic"]
|
|
13
|
+
|
|
14
|
+
# ── Unicode block offsets ──────────────────────────────────────────────────────
|
|
15
|
+
# Reference: Unicode Mathematical Alphanumeric Symbols (U+1D400–U+1D7FF)
|
|
16
|
+
|
|
17
|
+
_SANS_BOLD_UPPER = 0x1D5D4 # 𝗔
|
|
18
|
+
_SANS_BOLD_LOWER = 0x1D5EE # 𝗮
|
|
19
|
+
_SANS_BOLD_DIGIT = 0x1D7EC # 𝟬
|
|
20
|
+
|
|
21
|
+
_SANS_ITALIC_UPPER = 0x1D608 # 𝘈
|
|
22
|
+
_SANS_ITALIC_LOWER = 0x1D622 # 𝘢
|
|
23
|
+
|
|
24
|
+
_SANS_BOLD_ITALIC_UPPER = 0x1D63C # 𝘼
|
|
25
|
+
_SANS_BOLD_ITALIC_LOWER = 0x1D656 # 𝙖
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ── Public mapping functions ───────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_sans_bold(text: str) -> str:
|
|
32
|
+
"""Convert text to Unicode Mathematical Sans-Serif Bold.
|
|
33
|
+
|
|
34
|
+
ASCII uppercase letters, lowercase letters, and digits are mapped to
|
|
35
|
+
their bold sans-serif counterparts. All other characters (spaces,
|
|
36
|
+
punctuation, non-ASCII) pass through unchanged.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
text: The input string to convert.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A new string with ASCII alphanumerics replaced by bold sans-serif
|
|
43
|
+
Unicode equivalents.
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
>>> to_sans_bold("Hello, World! 123")
|
|
47
|
+
'𝗛𝗲𝗹𝗹𝗼, 𝗪𝗼𝗿𝗹𝗱! 𝟭𝟮𝟯'
|
|
48
|
+
|
|
49
|
+
>>> to_sans_bold("café")
|
|
50
|
+
'𝗰𝗮𝗳é'
|
|
51
|
+
|
|
52
|
+
>>> to_sans_bold("")
|
|
53
|
+
''
|
|
54
|
+
"""
|
|
55
|
+
out: list[str] = []
|
|
56
|
+
for c in text:
|
|
57
|
+
if "A" <= c <= "Z":
|
|
58
|
+
out.append(chr(_SANS_BOLD_UPPER + ord(c) - ord("A")))
|
|
59
|
+
elif "a" <= c <= "z":
|
|
60
|
+
out.append(chr(_SANS_BOLD_LOWER + ord(c) - ord("a")))
|
|
61
|
+
elif "0" <= c <= "9":
|
|
62
|
+
out.append(chr(_SANS_BOLD_DIGIT + ord(c) - ord("0")))
|
|
63
|
+
else:
|
|
64
|
+
out.append(c)
|
|
65
|
+
return "".join(out)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def to_sans_italic(text: str) -> str:
|
|
69
|
+
"""Convert text to Unicode Mathematical Sans-Serif Italic.
|
|
70
|
+
|
|
71
|
+
ASCII uppercase and lowercase letters are mapped to their italic
|
|
72
|
+
sans-serif counterparts. Digits and all other characters pass through
|
|
73
|
+
unchanged (there are no italic digit codepoints in this Unicode block).
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
text: The input string to convert.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A new string with ASCII letters replaced by italic sans-serif
|
|
80
|
+
Unicode equivalents.
|
|
81
|
+
|
|
82
|
+
Examples:
|
|
83
|
+
>>> to_sans_italic("Hello, World!")
|
|
84
|
+
'𝘏𝘦𝘭𝘭𝘰, 𝘞𝘰𝘳𝘭𝘥!'
|
|
85
|
+
|
|
86
|
+
>>> to_sans_italic("price: $42")
|
|
87
|
+
'𝘱𝘳𝘪𝘤𝘦: $42'
|
|
88
|
+
|
|
89
|
+
>>> to_sans_italic("")
|
|
90
|
+
''
|
|
91
|
+
"""
|
|
92
|
+
out: list[str] = []
|
|
93
|
+
for c in text:
|
|
94
|
+
if "A" <= c <= "Z":
|
|
95
|
+
out.append(chr(_SANS_ITALIC_UPPER + ord(c) - ord("A")))
|
|
96
|
+
elif "a" <= c <= "z":
|
|
97
|
+
out.append(chr(_SANS_ITALIC_LOWER + ord(c) - ord("a")))
|
|
98
|
+
else:
|
|
99
|
+
out.append(c)
|
|
100
|
+
return "".join(out)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def to_sans_bold_italic(text: str) -> str:
|
|
104
|
+
"""Convert text to Unicode Mathematical Sans-Serif Bold Italic.
|
|
105
|
+
|
|
106
|
+
ASCII uppercase and lowercase letters are mapped to their bold-italic
|
|
107
|
+
sans-serif counterparts. Digits and all other characters pass through
|
|
108
|
+
unchanged.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
text: The input string to convert.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
A new string with ASCII letters replaced by bold-italic sans-serif
|
|
115
|
+
Unicode equivalents.
|
|
116
|
+
|
|
117
|
+
Examples:
|
|
118
|
+
>>> to_sans_bold_italic("Hello, World!")
|
|
119
|
+
'𝘼𝙡𝙡𝙤, 𝙒𝙤𝙧𝙡𝙙!'
|
|
120
|
+
|
|
121
|
+
>>> to_sans_bold_italic("")
|
|
122
|
+
''
|
|
123
|
+
"""
|
|
124
|
+
out: list[str] = []
|
|
125
|
+
for c in text:
|
|
126
|
+
if "A" <= c <= "Z":
|
|
127
|
+
out.append(chr(_SANS_BOLD_ITALIC_UPPER + ord(c) - ord("A")))
|
|
128
|
+
elif "a" <= c <= "z":
|
|
129
|
+
out.append(chr(_SANS_BOLD_ITALIC_LOWER + ord(c) - ord("a")))
|
|
130
|
+
else:
|
|
131
|
+
out.append(c)
|
|
132
|
+
return "".join(out)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def apply_style(text: str, style: Literal["bold", "italic", "bold_italic"]) -> str:
|
|
136
|
+
"""Apply a Unicode sans-serif style to text.
|
|
137
|
+
|
|
138
|
+
A convenience dispatcher that routes to the appropriate mapping function
|
|
139
|
+
based on the requested style. Useful when the style is determined
|
|
140
|
+
dynamically at runtime.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
text: The input string to convert.
|
|
144
|
+
style: One of ``"bold"``, ``"italic"``, or ``"bold_italic"``.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
A new string with the requested Unicode style applied.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
ValueError: If *style* is not one of the three accepted values.
|
|
151
|
+
|
|
152
|
+
Examples:
|
|
153
|
+
>>> apply_style("hello", "bold")
|
|
154
|
+
'𝗵𝗲𝗹𝗹𝗼'
|
|
155
|
+
|
|
156
|
+
>>> apply_style("hello", "italic")
|
|
157
|
+
'𝘩𝘦𝘭𝘭𝘰'
|
|
158
|
+
|
|
159
|
+
>>> apply_style("hello", "bold_italic")
|
|
160
|
+
'𝙝𝙚𝙡𝙡𝙤'
|
|
161
|
+
"""
|
|
162
|
+
if style == "bold":
|
|
163
|
+
return to_sans_bold(text)
|
|
164
|
+
if style == "italic":
|
|
165
|
+
return to_sans_italic(text)
|
|
166
|
+
if style == "bold_italic":
|
|
167
|
+
return to_sans_bold_italic(text)
|
|
168
|
+
msg = f"Unknown style {style!r}. Expected 'bold', 'italic', or 'bold_italic'."
|
|
169
|
+
raise ValueError(msg)
|
|
File without changes
|