mdformat-sembr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,320 @@
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
3
+ {
4
+ // "name": "Node.js & TypeScript",
5
+ "name": "custom build - mdformat-sembr",
6
+ // list of images: https://github.com/devcontainers/images/tree/main/src
7
+
8
+ // https://github.com/devcontainers/images/tree/main/src/python
9
+ // "image": "mcr.microsoft.com/devcontainers/python:3.13",
10
+
11
+ // https://github.com/devcontainers/images/tree/main/src/typescript-node
12
+ // "image": "mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm",
13
+
14
+ // https://github.com/devcontainers/images/tree/main/src/base-debian
15
+ // Debian: 9-stretch (2017), 10-buster (2019), 11-bullseye (2021), 12-bookworm (2023), 13-trixie (2025), 14-forky (tbd)
16
+ "image": "mcr.microsoft.com/devcontainers/base:trixie", // bookworm, trixie
17
+ // "image": "mcr.microsoft.com/devcontainers/base:debian",
18
+
19
+ // https://github.com/devcontainers/images/tree/main/src/base-ubuntu
20
+ // "image": "mcr.microsoft.com/devcontainers/base:ubuntu", // comes with node:v18, python:v3.12
21
+ // "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.05", // available only LTEs e.g. 24.05, 22.04
22
+
23
+ // **IMPORTANT**: not from microsoft, as such no 'vscode' user added
24
+ // "image": "quay.io/toolbx/arch-toolbox:latest",
25
+
26
+ // either image or self-build docker image
27
+ // "build": {
28
+ // "dockerfile": "Dockerfile.trixie"
29
+ // },
30
+
31
+
32
+ // Features to add to the dev container. More info: https://containers.dev/features.
33
+ // "features": {},
34
+ "features": {
35
+ // https://github.com/devcontainers/features/blob/main/src/common-utils/
36
+ "ghcr.io/devcontainers/features/common-utils:2": {
37
+ // "installZsh": false, // already part of devcontainer 'base' image
38
+ "username": "vscode"
39
+ },
40
+ // https://github.com/devcontainers/features/tree/main/src/desktop-lite
41
+ // "ghcr.io/devcontainers/features/desktop-lite:1": {
42
+ // // default password 'vscode'
43
+ // // "password": "vscode",
44
+ // // Enter a port for the desktop VNC server (TigerVNC)
45
+ // // "vncPort": 5901,
46
+ // // Enter a port for the VNC web client (noVNC)
47
+ // // "webPort": 6080,
48
+ // },
49
+ // https://github.com/devcontainers/features/blob/main/src/node/
50
+ // "ghcr.io/devcontainers/features/node:1": {
51
+ // // min required version for github-copilot as of Nov'25
52
+ // "version": "22"
53
+ // },
54
+ // https://github.com/devcontainers/features/blob/main/src/python/
55
+ "ghcr.io/devcontainers/features/python:1": {
56
+ // // debian:trixie comes with 3.13, however due to this bug
57
+ // // we still need to provide the version if we want to install jupyterlab
58
+ // // https://github.com/devcontainers/features/issues/1403
59
+ "version": "os-provided",
60
+ // "version": "3.13",
61
+ // "installJupyterlab": "true"
62
+ },
63
+ // https://github.com/devcontainers/features/blob/main/src/github-cli/
64
+ "ghcr.io/devcontainers/features/github-cli:1": {},
65
+ // https://github.com/devcontainers/features/blob/main/src/copilot-cli/
66
+ "ghcr.io/devcontainers/features/copilot-cli:1": {},
67
+ // https://github.com/devcontainers/features/tree/main/src/azure-cli
68
+ // **IMPORTANT**: disable telemetry with `az config set core.collect_telemetry=false`
69
+ // "ghcr.io/devcontainers/features/azure-cli:1": {},
70
+ // https://github.com/devcontainers/features/tree/main/src/aws-cli
71
+ // "ghcr.io/devcontainers/features/aws-cli:1": {},
72
+ },
73
+
74
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
75
+ // "forwardPorts": [],
76
+ // **VNC for web**
77
+ // "forwardPorts": [6080],
78
+ // "portsAttributes": {
79
+ // "6080": {
80
+ // "label": "desktop"
81
+ // }
82
+ // }
83
+ // **VNC for TigerVNC
84
+ // "forwardPorts": [5901],
85
+ // "appPort": 5901,
86
+ // "portsAttributes": {
87
+ // "5901": {
88
+ // "label": "VNC desktop",
89
+ // }
90
+ // },
91
+
92
+ // Runs on the HOST before the container starts — ensures the .worktrees sibling folder exists.
93
+ // initializeCommand runs on the HOST before the container starts.
94
+ // Must be a string (not array) so it runs through /bin/sh -c.
95
+ "initializeCommand": {
96
+ // Git worktrees: VS Code creates <project>.worktrees/ next to the workspace folder.
97
+ // Inside the container /workspaces/ is not writable by the vscode user, so we bind-mount
98
+ // the folder from the host to avoid permission errors.
99
+ "worktrees": "mkdir -p \"${localWorkspaceFolder}.worktrees\"",
100
+ // **IMPORTANT** deletes the named docker volume; usually mounted as ~/
101
+ // uncomment for a single rebuild then comment it out again
102
+ //"wipeVolume": "docker volume rm -f DevContVol-${devcontainerId} 2>/dev/null || true",
103
+ // Timezone - align to local timezone
104
+ // "TZ": "TZ_VAL=$(readlink /etc/localtime | sed 's|.*/zoneinfo/||') && (grep -q '^TZ=' .devcontainer/.env && sed -i '' \"s|^TZ=.*|TZ=$TZ_VAL|\" .devcontainer/.env || echo \"TZ=$TZ_VAL\" >> .devcontainer/.env)",
105
+ },
106
+
107
+ "mounts": [
108
+ // FIXME
109
+ // !!! There is some permission issue:
110
+ // cannot create directory '/home/vscode/.vscode-server/bin': Permission denied
111
+ // see also for workaround: https://github.com/microsoft/vscode-remote-release/issues/7690#issuecomment-2761197753
112
+ // https://code.visualstudio.com/docs/devcontainers/tips-and-tricks
113
+ // It creates named volume called profile mounted to /root, which will survive a rebuild.
114
+ // It next creates an anonymous volume mounted to /root/.vscode-server that gets destroyed on rebuild,
115
+ // which allows VS Code to reinstall extensions and dotfiles.
116
+ // !!! Make sure to adjust the target path if you are not using the 'vscode' user
117
+ // !!! see also "remoteUser" setting below
118
+ // "source=profile,target=/home/vscode,type=volume",
119
+ // "target=/home/vscode/.vscode-server,type=volume",
120
+
121
+ // ${localEnv:HOME} on Linux/macOS
122
+ // ${localEnv:USERPROFILE} on Windows
123
+ // "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,readonly,consistency=cached",
124
+ // "source=${localEnv:HOME}/.gitignore,target=/home/vscode/.gitignore,type=bind,readonly,consistency=cached",
125
+
126
+ // Git worktrees: VS Code creates <project>.worktrees/ next to the workspace folder.
127
+ "source=${localWorkspaceFolder}.worktrees,target=${containerWorkspaceFolder}.worktrees,type=bind,consistency=cached",
128
+
129
+ // **IMPORTANT**:
130
+ // to enabled named volumes that survive container recreation, we moved to Dockerfile;
131
+ // to use copy permission on first mount of the named volumes, since they are always created with root permissions
132
+ // "source=DevContVol-${devcontainerId},target=/home/vscode,type=volume",
133
+
134
+ // **IMPORTANT**: keepassxc creates a new file on write and replaces the old one - this is a global setting for keepassxc
135
+ // this can be adjusted with writing directly to the existing file, but it is flagged as "dangerous"
136
+ // alternative to share a folder instead of a single file
137
+ // TODO: create feature request to make this database specific option
138
+ // "source=${localEnv:HOME}/code/devcont.kdbx,target=/home/vscode/devcont.kdbx,type=bind,readonly,consistency=cached",
139
+
140
+ // !!! SSH keys are sensitive, so they should be read-only and cached for better performance
141
+ // "source=${localEnv:HOME}/.ssh/id_ed25519,target=/home/vscode/.ssh/id_ed25519,type=bind,readonly,consistency=cached",
142
+ // "source=${localEnv:HOME}/.ssh/id_ed25519.pub,target=/home/vscode/.ssh/id_ed25519.pub,type=bind,readonly,consistency=cached",
143
+
144
+ // Claude Code CLI needs to write to these files, so they cannot be read-only
145
+ // "source=${localEnv:HOME}/.claude,target=/home/vscode/.claude,type=bind,consistency=cached",
146
+ // "source=${localEnv:HOME}/.claude.json,target=/home/vscode/.claude.json,type=bind,consistency=cached",
147
+ ],
148
+
149
+
150
+ // set env vars for inside the container
151
+ "containerEnv": {
152
+ // FIXME: use initializeCommand to write to .devcontainer/.env and use runArgs;
153
+ // but it is way too complicated, for now this is good enough
154
+ // timezone fix of colima docker on macos
155
+ "TZ": "Europe/Zurich",
156
+ // setting the default visual text editor
157
+ "EDITOR": "vim",
158
+ "VISUAL": "code",
159
+
160
+ // github-cli
161
+ // https://cli.github.com/telemetry
162
+ "GH_TELEMETRY": "false",
163
+ "DO_NOT_TRACK": "true",
164
+
165
+ // claude-code disable telemetry
166
+ // https://code.claude.com/docs/en/data-usage
167
+ // "DISABLE_TELEMETRY": "1",
168
+ // "DISABLE_ERROR_REPORTING": "1",
169
+ // "DISABLE_BUG_COMMAND": "1",
170
+ // "CLAUDE_CONFIG_DIR": "/home/vscode/.claude",
171
+ // "CLAUDE_CONFIG_FILE": "/home/vscode/.claude.json",
172
+ // Authentication token => or use .env file and "remoteEnv" setting below
173
+ // Token can be generated with `claude setup-token`
174
+ // "CLAUDE_CODE_OAUTH_TOKEN": "",
175
+ // "ANTHROPIC_AUTH_TOKEN": "",
176
+
177
+ // codex: no opt-out telemetry
178
+ // gh-copilot: no opt-out telemetry
179
+ },
180
+ // copy from host env when container starts
181
+ "remoteEnv": {
182
+ // "GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}",
183
+ // or use VS Code settings
184
+ // "OPENAI_API_KEY": "${env:OPENAI_API_KEY}"
185
+
186
+ // revoke unwanted ssh-forwarding: see https://github.com/microsoft/vscode-remote-release/issues/11014
187
+ "SSH_AUTH_SOCK": "",
188
+ },
189
+
190
+
191
+ // Use 'postCreateCommand' to run commands after the container is created.
192
+ // **NOTE**: that these command are executed in parallel, assure they aren't
193
+ // dependent to each other
194
+ "postCreateCommand": {
195
+ // Remember!! they are executed in parallel
196
+ // Debian
197
+ "basic-dpkgs": "sudo apt update && sudo apt install -y --no-install-recommends vim vim-scripts vim-doc shellcheck eza bat fd-find ripgrep fzf git-delta xh lazygit just jq gnupg2 pass keepassxc ffmpeg fastfetch && sudo apt clean && echo '01-basic-dpkg' >> ~/devcontainer-post-create.txt",
198
+ // Ubuntu
199
+ //"basic-dpkgs": "sudo apt update && sudo apt install -y --no-install-recommends vim vim-scripts vim-doc shellcheck eza batcat fd-find ripgrep fzf jq gnupg2 pass keepassxc && sudo apt clean && echo '01-basic-dpkg' >> ~/devcontainer-post-create.txt",
200
+
201
+ // editorconfig is required by the vscode extension
202
+ // mermaid to save as picture
203
+ // "npm-lint": "npm install -g editorconfig eslint typescript && echo '02-npm-lint' >> ~/devcontainer-post-create.txt",
204
+
205
+ // anthropic claude code cli
206
+ // https://code.claude.com/docs/en/overview
207
+ // bash is the recommended way as of Nov'25
208
+ // "claude-code-npm": "npm install -g @anthropic-ai/claude-code && echo '04-claude-code-npm' >> ~/devcontainer-post-create.txt",
209
+ //"claude-code-binary": "curl -fsSL https://claude.ai/install.sh | bash && echo '04-claude-code-binary' >> ~/devcontainer-post-create.txt",
210
+
211
+ // openai codex cli
212
+ // https://github.com/openai/codex
213
+ //"codex": "npm install -g @openai/codex && echo '05-codex' >> ~/devcontainer-post-create.txt",
214
+
215
+ // BMAD - Bayesian Model for Adaptive Design
216
+ // Requires interactive shell
217
+ //"bmad": "npx bmad-method@alpha install && echo 'doneX' >> ~/devcontainer-post-create.txt",
218
+
219
+ // "python-notebook": "pip install notebook",
220
+ //"python-anthropic": "pip install anthropic && echo '06-python-anthropic' >> ~/devcontainer-post-create.txt",
221
+ //"python-openai": "pip install openai && echo '07-python-openai' >> ~/devcontainer-post-create.txt",
222
+
223
+ //"langchain": "pip install langchain langchain-openai langchain-anthropic langgraph-cli[inmem] && echo '08-langchain' >> ~/devcontainer-post-create.txt",
224
+
225
+ "uv": "curl -LsSf https://astral.sh/uv/install.sh | sh && echo '09-uv' >> ~/devcontainer-post-create.txt",
226
+
227
+ // git based - codeberg can be replaced w/ gitlab.com or github.com
228
+ //"chezmoi": "sh -c \"$(curl -fsLS get.chezmoi.io/lb)\" -- init --apply git@codeberg.org:bugrasan/dotfiles.git && echo '10-chezmoi' >> ~/devcontainer-post-create.txt",
229
+ // http based - codeberg can be replaced w/ gitlab.com or github.com
230
+ "chezmoi": "sh -c \"$(curl -fsLS get.chezmoi.io/lb)\" -- init --one-shot --apply codeberg.org/bugrasan/dotfiles && echo '10-chezmoi' >> ~/devcontainer-post-create.txt",
231
+
232
+ // see https://github.com/microsoft/vscode-remote-release/issues/11014
233
+ "revoke-ssh-forwarding": "find /tmp -maxdepth 1 -name 'vscode-ssh-auth-*.sock' -delete 2>/dev/null || true && echo '11-revoke-ssh-forwarding' >> ~/devcontainer-post-create.txt",
234
+
235
+ // simplifies claude; debian and ubuntu don't have duckdb in their repos, so we need to install it manually
236
+ // "duckdb": "curl https://install.duckdb.org | sh && echo '12-duckdb' >> ~/devcontainer-post-create.txt",
237
+
238
+ // disable github telemetry
239
+ "github-cli-telemetry": "gh config set telemetry disabled || echo '13-github-cli-telemetry' >> ~/devcontainer-post-create.txt",
240
+ },
241
+ // "yarn install",
242
+
243
+ // Configure tool-specific properties.
244
+ "customizations": {
245
+ "vscode": {
246
+ "settings": {
247
+ // inline instructions for github copilot chat
248
+ // https://code.visualstudio.com/docs/devcontainers/tips-and-tricks
249
+
250
+ // "editor.formatOnSave": true,
251
+ "terminal.integrated.shell.linux": "/bin/bash",
252
+ "telemetry.telemetryLevel": "off",
253
+
254
+ // Keep speech extensions on the local UI side; avoid loading them in remote container host.
255
+ "remote.extensionKind": {
256
+ "ms-vscode.vscode-speech": "ui",
257
+ },
258
+
259
+ // https://code.visualstudio.com/docs/copilot/customization/custom-instructions#_use-a-githubcopilotinstructionsmd-file
260
+ "github.copilot.chat.codeGeneration.useInstructionFiles": true,
261
+ // https://code.visualstudio.com/docs/copilot/customization/custom-instructions#_use-an-agentsmd-file
262
+ "chat.useAgentsMdFile": true,
263
+ "chat.useAgentSkills": true,
264
+ "chat.useNestedAgentsMdFiles": true,
265
+ // https://code.visualstudio.com/docs/copilot/customization/mcp-servers#_add-an-mcp-server-from-the-github-mcp-server-registry
266
+ "chat.mcp.gallery.enabled": true,
267
+
268
+ // terminal tool auto approve settings
269
+ // https://code.visualstudio.com/docs/copilot/chat/chat-tools#_autoapprove-all-tools
270
+ // "chat.tools.terminal.enableAutoApprove": true,
271
+ // "chat.tools.global.autoApprove": false,
272
+ },
273
+ "extensions": [
274
+ // basic extensions
275
+ "EditorConfig.EditorConfig",
276
+ "albert.TabOut",
277
+ // git extensions
278
+ "mhutchie.git-graph",
279
+ "eamodio.gitlens",
280
+
281
+ // installed by the node feature above
282
+ // "dbaeumer.vscode-eslint",
283
+ // installed by the python feature above
284
+ // "ms-python.python",
285
+ // "ms-python.vscode-pylance",
286
+ // "ms-python.autopep8",
287
+
288
+ // jupyter support - installed by python feature above
289
+ //"ms-toolsai.jupyter",
290
+ // ms-toolsai.jupyter-keymap will be auto installed as dependency
291
+ // but will run on local host only, not in container
292
+
293
+ // AI extensions
294
+ // "anthropic.claude-code",
295
+
296
+ // vscode recommended extensions for devcontainers
297
+ "mutantdino.resourcemonitor",
298
+ ],
299
+ },
300
+ },
301
+
302
+ // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
303
+ // !!! if not set to then adjust mounts above accordingly
304
+ // !!! see also "mounts" setting above
305
+ "containerUser": "vscode",
306
+ "remoteUser": "vscode",
307
+
308
+ // Mount local .env file into the container
309
+ "runArgs": [
310
+ // .env file
311
+ // "--env-file", ".env",
312
+ // privileged access e.g. USB devices
313
+ // "--privileged",
314
+ // let the container auto-destroy after it's closed
315
+ // https://github.com/microsoft/vscode-remote-release/issues/3512#issuecomment-1018286415
316
+ "--rm",
317
+ // vnc for desktop-lite feature
318
+ // "--shm-size=1g",
319
+ ]
320
+ }
@@ -0,0 +1,22 @@
1
+ name: Publish to PyPI
2
+ on:
3
+ push:
4
+ tags: ["v*"]
5
+
6
+ jobs:
7
+ publish:
8
+ runs-on: ubuntu-latest
9
+ environment: pypi
10
+ permissions:
11
+ id-token: write # required for Trusted Publishing + attestations
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.13"
17
+ - name: Build
18
+ run: |
19
+ python -m pip install --upgrade build
20
+ python -m build
21
+ - name: Publish
22
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,58 @@
1
+ # ---------------------------------------------------------------------------
2
+ # Secrets / local env
3
+ # ---------------------------------------------------------------------------
4
+ .env
5
+ .env.*
6
+ !.env.example
7
+
8
+ # ---------------------------------------------------------------------------
9
+ # Python byte-code / caches
10
+ # ---------------------------------------------------------------------------
11
+ __pycache__/
12
+ *.py[cod]
13
+ *$py.class
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+ .cache/
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Virtual environments
21
+ # ---------------------------------------------------------------------------
22
+ .venv/
23
+ venv/
24
+ env/
25
+ ENV/
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Packaging / build artifacts
29
+ # ---------------------------------------------------------------------------
30
+ build/
31
+ dist/
32
+ *.egg-info/
33
+ *.egg
34
+ .eggs/
35
+ wheels/
36
+ pip-wheel-metadata/
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Test / coverage artifacts
40
+ # ---------------------------------------------------------------------------
41
+ .coverage
42
+ .coverage.*
43
+ htmlcov/
44
+ coverage.xml
45
+ .tox/
46
+ .nox/
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Editor / OS cruft
50
+ # ---------------------------------------------------------------------------
51
+ .DS_Store
52
+ Thumbs.db
53
+ *.swp
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Misc local tooling
57
+ # ---------------------------------------------------------------------------
58
+ .local/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 mdformat-sembr contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.4
2
+ Name: mdformat-sembr
3
+ Version: 0.1.0
4
+ Summary: mdformat plugin that inserts Semantic Line Breaks (sembr.org) as CommonMark soft breaks
5
+ Project-URL: Homepage, https://codeberg.org/bugrasan/mdformat-sembr
6
+ Project-URL: GitHub Mirror, https://github.com/bugrasan/mdformat-sembr
7
+ Author: bugrasan
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: formatter,markdown,mdformat,semantic line breaks,sembr
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: mdformat>=1.0
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest>=7; extra == 'test'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # mdformat-sembr
26
+
27
+ An [mdformat](https://mdformat.readthedocs.io) parser-extension plugin that inserts
28
+ [Semantic Line Breaks](https://sembr.org) (SemBr) as CommonMark **soft breaks**.
29
+
30
+ SemBr is a convention for adding line breaks in Markdown source at sentence and
31
+ clause boundaries. Because the breaks are CommonMark *soft* breaks (a bare `\n`
32
+ inside a paragraph), they render to a single space — the rendered HTML output is
33
+ unchanged, only the source becomes more diff-friendly.
34
+
35
+ The plugin is fully deterministic: no ML, no network, no LLM calls. The same input
36
+ always produces the same output.
37
+
38
+ ## Why
39
+
40
+ Moving SemBr logic out of an LLM/agent loop into a token-free, reproducible
41
+ formatter pass makes authored Markdown consistent and cheap to maintain.
42
+
43
+ ## Install
44
+
45
+ ```bash
46
+ # uv (recommended) — --with is repeatable (or comma-separate the plugins)
47
+ uv tool install mdformat --with mdformat-sembr --with mdformat-frontmatter
48
+
49
+ # pipx — install the app, then inject the plugins into its environment
50
+ pipx install mdformat
51
+ pipx inject mdformat mdformat-sembr mdformat-frontmatter
52
+
53
+ # local development
54
+ pip install -e .
55
+ ```
56
+
57
+ `mdformat-frontmatter` is optional: install it only if your Markdown uses
58
+ YAML/TOML frontmatter and you want mdformat to preserve/format it. It composes
59
+ with `mdformat-sembr` (frontmatter is a separate node type and is never broken).
60
+
61
+ ## Usage
62
+
63
+ ```bash
64
+ mdformat --version # should list "mdformat_sembr"
65
+ echo "First sentence. Second sentence." | mdformat -
66
+ ```
67
+
68
+ From Python:
69
+
70
+ ```python
71
+ import mdformat
72
+
73
+ mdformat.text("First sentence. Second sentence.\n", extensions={"sembr"})
74
+ # 'First sentence.\nSecond sentence.\n'
75
+ ```
76
+
77
+ ## How it works
78
+
79
+ The plugin registers a **postprocessor** on the `paragraph` node type. At that point
80
+ inline formatting (emphasis, links, inline code) is already resolved into the string,
81
+ so it operates on the final rendered text and only protects a few inline constructs by
82
+ regex. Block-level elements (headings, code blocks, tables, frontmatter, HTML blocks)
83
+ are separate node types and are never touched.
84
+
85
+ `CHANGES_AST = False`: soft breaks are AST-safe by design, so mdformat's built-in
86
+ `is_md_equal` validator gates correctness. If validation ever fails, the break logic is
87
+ wrong — it is never worked around with `--no-validate` or hard breaks.
88
+
89
+ ## Configuration
90
+
91
+ Configure via `[plugin.sembr]` in `.mdformat.toml`, or via CLI flags. CLI values merge
92
+ over TOML.
93
+
94
+ | Option | Type | Default | Meaning |
95
+ | --------------- | ----------- | --------- | -------------------------------------------------------------- |
96
+ | `min_chars` | int | `15` | Minimum length of the segment before a break is allowed. |
97
+ | `abbreviations` | list[str] | see below | Tokens after which no sentence break is inserted. |
98
+ | `break_clauses` | bool | `false` | Enable clause-level breaks (SemBr "SHOULD"). Off by default. |
99
+ | `clause_chars` | str | `",;:—"` | Clause punctuation set (only used when `break_clauses` true). |
100
+
101
+ CLI flags: `--sembr-min-chars`, `--sembr-abbreviations`, `--sembr-break-clauses`,
102
+ `--sembr-clause-chars`.
103
+
104
+ `.mdformat.toml` example:
105
+
106
+ ```toml
107
+ [plugin.sembr]
108
+ min_chars = 20
109
+ break_clauses = true
110
+ ```
111
+
112
+ ## License
113
+
114
+ MIT
@@ -0,0 +1,90 @@
1
+ # mdformat-sembr
2
+
3
+ An [mdformat](https://mdformat.readthedocs.io) parser-extension plugin that inserts
4
+ [Semantic Line Breaks](https://sembr.org) (SemBr) as CommonMark **soft breaks**.
5
+
6
+ SemBr is a convention for adding line breaks in Markdown source at sentence and
7
+ clause boundaries. Because the breaks are CommonMark *soft* breaks (a bare `\n`
8
+ inside a paragraph), they render to a single space — the rendered HTML output is
9
+ unchanged, only the source becomes more diff-friendly.
10
+
11
+ The plugin is fully deterministic: no ML, no network, no LLM calls. The same input
12
+ always produces the same output.
13
+
14
+ ## Why
15
+
16
+ Moving SemBr logic out of an LLM/agent loop into a token-free, reproducible
17
+ formatter pass makes authored Markdown consistent and cheap to maintain.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ # uv (recommended) — --with is repeatable (or comma-separate the plugins)
23
+ uv tool install mdformat --with mdformat-sembr --with mdformat-frontmatter
24
+
25
+ # pipx — install the app, then inject the plugins into its environment
26
+ pipx install mdformat
27
+ pipx inject mdformat mdformat-sembr mdformat-frontmatter
28
+
29
+ # local development
30
+ pip install -e .
31
+ ```
32
+
33
+ `mdformat-frontmatter` is optional: install it only if your Markdown uses
34
+ YAML/TOML frontmatter and you want mdformat to preserve/format it. It composes
35
+ with `mdformat-sembr` (frontmatter is a separate node type and is never broken).
36
+
37
+ ## Usage
38
+
39
+ ```bash
40
+ mdformat --version # should list "mdformat_sembr"
41
+ echo "First sentence. Second sentence." | mdformat -
42
+ ```
43
+
44
+ From Python:
45
+
46
+ ```python
47
+ import mdformat
48
+
49
+ mdformat.text("First sentence. Second sentence.\n", extensions={"sembr"})
50
+ # 'First sentence.\nSecond sentence.\n'
51
+ ```
52
+
53
+ ## How it works
54
+
55
+ The plugin registers a **postprocessor** on the `paragraph` node type. At that point
56
+ inline formatting (emphasis, links, inline code) is already resolved into the string,
57
+ so it operates on the final rendered text and only protects a few inline constructs by
58
+ regex. Block-level elements (headings, code blocks, tables, frontmatter, HTML blocks)
59
+ are separate node types and are never touched.
60
+
61
+ `CHANGES_AST = False`: soft breaks are AST-safe by design, so mdformat's built-in
62
+ `is_md_equal` validator gates correctness. If validation ever fails, the break logic is
63
+ wrong — it is never worked around with `--no-validate` or hard breaks.
64
+
65
+ ## Configuration
66
+
67
+ Configure via `[plugin.sembr]` in `.mdformat.toml`, or via CLI flags. CLI values merge
68
+ over TOML.
69
+
70
+ | Option | Type | Default | Meaning |
71
+ | --------------- | ----------- | --------- | -------------------------------------------------------------- |
72
+ | `min_chars` | int | `15` | Minimum length of the segment before a break is allowed. |
73
+ | `abbreviations` | list[str] | see below | Tokens after which no sentence break is inserted. |
74
+ | `break_clauses` | bool | `false` | Enable clause-level breaks (SemBr "SHOULD"). Off by default. |
75
+ | `clause_chars` | str | `",;:—"` | Clause punctuation set (only used when `break_clauses` true). |
76
+
77
+ CLI flags: `--sembr-min-chars`, `--sembr-abbreviations`, `--sembr-break-clauses`,
78
+ `--sembr-clause-chars`.
79
+
80
+ `.mdformat.toml` example:
81
+
82
+ ```toml
83
+ [plugin.sembr]
84
+ min_chars = 20
85
+ break_clauses = true
86
+ ```
87
+
88
+ ## License
89
+
90
+ MIT
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mdformat-sembr"
7
+ version = "0.1.0"
8
+ description = "mdformat plugin that inserts Semantic Line Breaks (sembr.org) as CommonMark soft breaks"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ license-files = ["LICENSE"]
12
+ requires-python = ">=3.10"
13
+ keywords = ["mdformat", "markdown", "sembr", "semantic line breaks", "formatter"]
14
+ authors = [ { name = "bugrasan" } ]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Software Development :: Libraries :: Python Modules",
23
+ "Topic :: Text Processing :: Markup :: Markdown",
24
+ ]
25
+ dependencies = ["mdformat>=1.0"]
26
+
27
+ [project.optional-dependencies]
28
+ test = ["pytest>=7"]
29
+
30
+ [project.urls]
31
+ Homepage = "https://codeberg.org/bugrasan/mdformat-sembr"
32
+ "GitHub Mirror" = "https://github.com/bugrasan/mdformat-sembr"
33
+
34
+ [project.entry-points."mdformat.parser_extension"]
35
+ sembr = "mdformat_sembr:_plugin"
36
+
37
+ [tool.hatch.build.targets.wheel]
38
+ packages = ["src/mdformat_sembr"]