mdformat-sembr 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdformat_sembr-0.1.0/.devcontainer/devcontainer.json +320 -0
- mdformat_sembr-0.1.0/.github/workflows/publish.yml +22 -0
- mdformat_sembr-0.1.0/.gitignore +58 -0
- mdformat_sembr-0.1.0/LICENSE +21 -0
- mdformat_sembr-0.1.0/PKG-INFO +114 -0
- mdformat_sembr-0.1.0/README.md +90 -0
- mdformat_sembr-0.1.0/pyproject.toml +38 -0
- mdformat_sembr-0.1.0/src/mdformat_sembr/__init__.py +14 -0
- mdformat_sembr-0.1.0/src/mdformat_sembr/_plugin.py +133 -0
- mdformat_sembr-0.1.0/src/mdformat_sembr/_sembr.py +228 -0
- mdformat_sembr-0.1.0/src/mdformat_sembr/py.typed +0 -0
- mdformat_sembr-0.1.0/tests/conftest.py +20 -0
- mdformat_sembr-0.1.0/tests/fixtures/abbreviations.md +1 -0
- mdformat_sembr-0.1.0/tests/fixtures/block_elements.md +13 -0
- mdformat_sembr-0.1.0/tests/fixtures/min_chars.md +3 -0
- mdformat_sembr-0.1.0/tests/fixtures/protected_inline.md +3 -0
- mdformat_sembr-0.1.0/tests/fixtures/sentences.md +1 -0
- mdformat_sembr-0.1.0/tests/test_ast_safety.py +36 -0
- mdformat_sembr-0.1.0/tests/test_break_sentence.py +76 -0
- mdformat_sembr-0.1.0/tests/test_clause_mode.py +50 -0
- mdformat_sembr-0.1.0/tests/test_idempotency.py +22 -0
- mdformat_sembr-0.1.0/tests/test_plugin_discovery.py +69 -0
- mdformat_sembr-0.1.0/tests/test_protected_regions.py +58 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
|
2
|
+
// README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
|
|
3
|
+
{
|
|
4
|
+
// "name": "Node.js & TypeScript",
|
|
5
|
+
"name": "custom build - mdformat-sembr",
|
|
6
|
+
// list of images: https://github.com/devcontainers/images/tree/main/src
|
|
7
|
+
|
|
8
|
+
// https://github.com/devcontainers/images/tree/main/src/python
|
|
9
|
+
// "image": "mcr.microsoft.com/devcontainers/python:3.13",
|
|
10
|
+
|
|
11
|
+
// https://github.com/devcontainers/images/tree/main/src/typescript-node
|
|
12
|
+
// "image": "mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm",
|
|
13
|
+
|
|
14
|
+
// https://github.com/devcontainers/images/tree/main/src/base-debian
|
|
15
|
+
// Debian: 9-stretch (2017), 10-buster (2019), 11-bullseye (2021), 12-bookworm (2023), 13-trixie (2025), 14-forky (tbd)
|
|
16
|
+
"image": "mcr.microsoft.com/devcontainers/base:trixie", // bookworm, trixie
|
|
17
|
+
// "image": "mcr.microsoft.com/devcontainers/base:debian",
|
|
18
|
+
|
|
19
|
+
// https://github.com/devcontainers/images/tree/main/src/base-ubuntu
|
|
20
|
+
// "image": "mcr.microsoft.com/devcontainers/base:ubuntu", // comes with node:v18, python:v3.12
|
|
21
|
+
// "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.05", // available only LTEs e.g. 24.05, 22.04
|
|
22
|
+
|
|
23
|
+
// **IMPORTANT**: not from microsoft, as such no 'vscode' user added
|
|
24
|
+
// "image": "quay.io/toolbx/arch-toolbox:latest",
|
|
25
|
+
|
|
26
|
+
// either image or self-build docker image
|
|
27
|
+
// "build": {
|
|
28
|
+
// "dockerfile": "Dockerfile.trixie"
|
|
29
|
+
// },
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
|
33
|
+
// "features": {},
|
|
34
|
+
"features": {
|
|
35
|
+
// https://github.com/devcontainers/features/blob/main/src/common-utils/
|
|
36
|
+
"ghcr.io/devcontainers/features/common-utils:2": {
|
|
37
|
+
// "installZsh": false, // already part of devcontainer 'base' image
|
|
38
|
+
"username": "vscode"
|
|
39
|
+
},
|
|
40
|
+
// https://github.com/devcontainers/features/tree/main/src/desktop-lite
|
|
41
|
+
// "ghcr.io/devcontainers/features/desktop-lite:1": {
|
|
42
|
+
// // default password 'vscode'
|
|
43
|
+
// // "password": "vscode",
|
|
44
|
+
// // Enter a port for the desktop VNC server (TigerVNC)
|
|
45
|
+
// // "vncPort": 5901,
|
|
46
|
+
// // Enter a port for the VNC web client (noVNC)
|
|
47
|
+
// // "webPort": 6080,
|
|
48
|
+
// },
|
|
49
|
+
// https://github.com/devcontainers/features/blob/main/src/node/
|
|
50
|
+
// "ghcr.io/devcontainers/features/node:1": {
|
|
51
|
+
// // min required version for github-copilot as of Nov'25
|
|
52
|
+
// "version": "22"
|
|
53
|
+
// },
|
|
54
|
+
// https://github.com/devcontainers/features/blob/main/src/python/
|
|
55
|
+
"ghcr.io/devcontainers/features/python:1": {
|
|
56
|
+
// // debian:trixie comes with 3.13, however due to this bug
|
|
57
|
+
// // we still need to provide the version if we want to install jupyterlab
|
|
58
|
+
// // https://github.com/devcontainers/features/issues/1403
|
|
59
|
+
"version": "os-provided",
|
|
60
|
+
// "version": "3.13",
|
|
61
|
+
// "installJupyterlab": "true"
|
|
62
|
+
},
|
|
63
|
+
// https://github.com/devcontainers/features/blob/main/src/github-cli/
|
|
64
|
+
"ghcr.io/devcontainers/features/github-cli:1": {},
|
|
65
|
+
// https://github.com/devcontainers/features/blob/main/src/copilot-cli/
|
|
66
|
+
"ghcr.io/devcontainers/features/copilot-cli:1": {},
|
|
67
|
+
// https://github.com/devcontainers/features/tree/main/src/azure-cli
|
|
68
|
+
// **IMPORTANT**: disable telemetry with `az config set core.collect_telemetry=false`
|
|
69
|
+
// "ghcr.io/devcontainers/features/azure-cli:1": {},
|
|
70
|
+
// https://github.com/devcontainers/features/tree/main/src/aws-cli
|
|
71
|
+
// "ghcr.io/devcontainers/features/aws-cli:1": {},
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
|
75
|
+
// "forwardPorts": [],
|
|
76
|
+
// **VNC for web**
|
|
77
|
+
// "forwardPorts": [6080],
|
|
78
|
+
// "portsAttributes": {
|
|
79
|
+
// "6080": {
|
|
80
|
+
// "label": "desktop"
|
|
81
|
+
// }
|
|
82
|
+
// }
|
|
83
|
+
// **VNC for TigerVNC
|
|
84
|
+
// "forwardPorts": [5901],
|
|
85
|
+
// "appPort": 5901,
|
|
86
|
+
// "portsAttributes": {
|
|
87
|
+
// "5901": {
|
|
88
|
+
// "label": "VNC desktop",
|
|
89
|
+
// }
|
|
90
|
+
// },
|
|
91
|
+
|
|
92
|
+
// Runs on the HOST before the container starts — ensures the .worktrees sibling folder exists.
|
|
93
|
+
// initializeCommand runs on the HOST before the container starts.
|
|
94
|
+
// Must be a string (not array) so it runs through /bin/sh -c.
|
|
95
|
+
"initializeCommand": {
|
|
96
|
+
// Git worktrees: VS Code creates <project>.worktrees/ next to the workspace folder.
|
|
97
|
+
// Inside the container /workspaces/ is not writable by the vscode user, so we bind-mount
|
|
98
|
+
// the folder from the host to avoid permission errors.
|
|
99
|
+
"worktrees": "mkdir -p \"${localWorkspaceFolder}.worktrees\"",
|
|
100
|
+
// **IMPORTANT** deletes the named docker volume; usually mounted as ~/
|
|
101
|
+
// uncomment for a single rebuild then comment it out again
|
|
102
|
+
//"wipeVolume": "docker volume rm -f DevContVol-${devcontainerId} 2>/dev/null || true",
|
|
103
|
+
// Timezone - align to local timezone
|
|
104
|
+
// "TZ": "TZ_VAL=$(readlink /etc/localtime | sed 's|.*/zoneinfo/||') && (grep -q '^TZ=' .devcontainer/.env && sed -i '' \"s|^TZ=.*|TZ=$TZ_VAL|\" .devcontainer/.env || echo \"TZ=$TZ_VAL\" >> .devcontainer/.env)",
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
"mounts": [
|
|
108
|
+
// FIXME
|
|
109
|
+
// !!! There is some permission issue:
|
|
110
|
+
// cannot create directory '/home/vscode/.vscode-server/bin': Permission denied
|
|
111
|
+
// see also for workaround: https://github.com/microsoft/vscode-remote-release/issues/7690#issuecomment-2761197753
|
|
112
|
+
// https://code.visualstudio.com/docs/devcontainers/tips-and-tricks
|
|
113
|
+
// It creates named volume called profile mounted to /root, which will survive a rebuild.
|
|
114
|
+
// It next creates an anonymous volume mounted to /root/.vscode-server that gets destroyed on rebuild,
|
|
115
|
+
// which allows VS Code to reinstall extensions and dotfiles.
|
|
116
|
+
// !!! Make sure to adjust the target path if you are not using the 'vscode' user
|
|
117
|
+
// !!! see also "remoteUser" setting below
|
|
118
|
+
// "source=profile,target=/home/vscode,type=volume",
|
|
119
|
+
// "target=/home/vscode/.vscode-server,type=volume",
|
|
120
|
+
|
|
121
|
+
// ${localEnv:HOME} on Linux/macOS
|
|
122
|
+
// ${localEnv:USERPROFILE} on Windows
|
|
123
|
+
// "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,readonly,consistency=cached",
|
|
124
|
+
// "source=${localEnv:HOME}/.gitignore,target=/home/vscode/.gitignore,type=bind,readonly,consistency=cached",
|
|
125
|
+
|
|
126
|
+
// Git worktrees: VS Code creates <project>.worktrees/ next to the workspace folder.
|
|
127
|
+
"source=${localWorkspaceFolder}.worktrees,target=${containerWorkspaceFolder}.worktrees,type=bind,consistency=cached",
|
|
128
|
+
|
|
129
|
+
// **IMPORTANT**:
|
|
130
|
+
// to enabled named volumes that survive container recreation, we moved to Dockerfile;
|
|
131
|
+
// to use copy permission on first mount of the named volumes, since they are always created with root permissions
|
|
132
|
+
// "source=DevContVol-${devcontainerId},target=/home/vscode,type=volume",
|
|
133
|
+
|
|
134
|
+
// **IMPORTANT**: keepassxc creates a new file on write and replaces the old one - this is a global setting for keepassxc
|
|
135
|
+
// this can be adjusted with writing directly to the existing file, but it is flagged as "dangerous"
|
|
136
|
+
// alternative to share a folder instead of a single file
|
|
137
|
+
// TODO: create feature request to make this database specific option
|
|
138
|
+
// "source=${localEnv:HOME}/code/devcont.kdbx,target=/home/vscode/devcont.kdbx,type=bind,readonly,consistency=cached",
|
|
139
|
+
|
|
140
|
+
// !!! SSH keys are sensitive, so they should be read-only and cached for better performance
|
|
141
|
+
// "source=${localEnv:HOME}/.ssh/id_ed25519,target=/home/vscode/.ssh/id_ed25519,type=bind,readonly,consistency=cached",
|
|
142
|
+
// "source=${localEnv:HOME}/.ssh/id_ed25519.pub,target=/home/vscode/.ssh/id_ed25519.pub,type=bind,readonly,consistency=cached",
|
|
143
|
+
|
|
144
|
+
// Claude Code CLI needs to write to these files, so they cannot be read-only
|
|
145
|
+
// "source=${localEnv:HOME}/.claude,target=/home/vscode/.claude,type=bind,consistency=cached",
|
|
146
|
+
// "source=${localEnv:HOME}/.claude.json,target=/home/vscode/.claude.json,type=bind,consistency=cached",
|
|
147
|
+
],
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
// set env vars for inside the container
|
|
151
|
+
"containerEnv": {
|
|
152
|
+
// FIXME: use initializeCommand to write to .devcontainer/.env and use runArgs;
|
|
153
|
+
// but it is way too complicated, for now this is good enough
|
|
154
|
+
// timezone fix of colima docker on macos
|
|
155
|
+
"TZ": "Europe/Zurich",
|
|
156
|
+
// setting the default visual text editor
|
|
157
|
+
"EDITOR": "vim",
|
|
158
|
+
"VISUAL": "code",
|
|
159
|
+
|
|
160
|
+
// github-cli
|
|
161
|
+
// https://cli.github.com/telemetry
|
|
162
|
+
"GH_TELEMETRY": "false",
|
|
163
|
+
"DO_NOT_TRACK": "true",
|
|
164
|
+
|
|
165
|
+
// claude-code disable telemetry
|
|
166
|
+
// https://code.claude.com/docs/en/data-usage
|
|
167
|
+
// "DISABLE_TELEMETRY": "1",
|
|
168
|
+
// "DISABLE_ERROR_REPORTING": "1",
|
|
169
|
+
// "DISABLE_BUG_COMMAND": "1",
|
|
170
|
+
// "CLAUDE_CONFIG_DIR": "/home/vscode/.claude",
|
|
171
|
+
// "CLAUDE_CONFIG_FILE": "/home/vscode/.claude.json",
|
|
172
|
+
// Authentication token => or use .env file and "remoteEnv" setting below
|
|
173
|
+
// Token can be generated with `claude setup-token`
|
|
174
|
+
// "CLAUDE_CODE_OAUTH_TOKEN": "",
|
|
175
|
+
// "ANTHROPIC_AUTH_TOKEN": "",
|
|
176
|
+
|
|
177
|
+
// codex: no opt-out telemetry
|
|
178
|
+
// gh-copilot: no opt-out telemetry
|
|
179
|
+
},
|
|
180
|
+
// copy from host env when container starts
|
|
181
|
+
"remoteEnv": {
|
|
182
|
+
// "GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}",
|
|
183
|
+
// or use VS Code settings
|
|
184
|
+
// "OPENAI_API_KEY": "${env:OPENAI_API_KEY}"
|
|
185
|
+
|
|
186
|
+
// revoke unwanted ssh-forwarding: see https://github.com/microsoft/vscode-remote-release/issues/11014
|
|
187
|
+
"SSH_AUTH_SOCK": "",
|
|
188
|
+
},
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
// Use 'postCreateCommand' to run commands after the container is created.
|
|
192
|
+
// **NOTE**: that these command are executed in parallel, assure they aren't
|
|
193
|
+
// dependent to each other
|
|
194
|
+
"postCreateCommand": {
|
|
195
|
+
// Remember!! they are executed in parallel
|
|
196
|
+
// Debian
|
|
197
|
+
"basic-dpkgs": "sudo apt update && sudo apt install -y --no-install-recommends vim vim-scripts vim-doc shellcheck eza bat fd-find ripgrep fzf git-delta xh lazygit just jq gnupg2 pass keepassxc ffmpeg fastfetch && sudo apt clean && echo '01-basic-dpkg' >> ~/devcontainer-post-create.txt",
|
|
198
|
+
// Ubuntu
|
|
199
|
+
//"basic-dpkgs": "sudo apt update && sudo apt install -y --no-install-recommends vim vim-scripts vim-doc shellcheck eza batcat fd-find ripgrep fzf jq gnupg2 pass keepassxc && sudo apt clean && echo '01-basic-dpkg' >> ~/devcontainer-post-create.txt",
|
|
200
|
+
|
|
201
|
+
// editorconfig is required by the vscode extension
|
|
202
|
+
// mermaid to save as picture
|
|
203
|
+
// "npm-lint": "npm install -g editorconfig eslint typescript && echo '02-npm-lint' >> ~/devcontainer-post-create.txt",
|
|
204
|
+
|
|
205
|
+
// anthropic claude code cli
|
|
206
|
+
// https://code.claude.com/docs/en/overview
|
|
207
|
+
// bash is the recommended way as of Nov'25
|
|
208
|
+
// "claude-code-npm": "npm install -g @anthropic-ai/claude-code && echo '04-claude-code-npm' >> ~/devcontainer-post-create.txt",
|
|
209
|
+
//"claude-code-binary": "curl -fsSL https://claude.ai/install.sh | bash && echo '04-claude-code-binary' >> ~/devcontainer-post-create.txt",
|
|
210
|
+
|
|
211
|
+
// openai codex cli
|
|
212
|
+
// https://github.com/openai/codex
|
|
213
|
+
//"codex": "npm install -g @openai/codex && echo '05-codex' >> ~/devcontainer-post-create.txt",
|
|
214
|
+
|
|
215
|
+
// BMAD - Bayesian Model for Adaptive Design
|
|
216
|
+
// Requires interactive shell
|
|
217
|
+
//"bmad": "npx bmad-method@alpha install && echo 'doneX' >> ~/devcontainer-post-create.txt",
|
|
218
|
+
|
|
219
|
+
// "python-notebook": "pip install notebook",
|
|
220
|
+
//"python-anthropic": "pip install anthropic && echo '06-python-anthropic' >> ~/devcontainer-post-create.txt",
|
|
221
|
+
//"python-openai": "pip install openai && echo '07-python-openai' >> ~/devcontainer-post-create.txt",
|
|
222
|
+
|
|
223
|
+
//"langchain": "pip install langchain langchain-openai langchain-anthropic langgraph-cli[inmem] && echo '08-langchain' >> ~/devcontainer-post-create.txt",
|
|
224
|
+
|
|
225
|
+
"uv": "curl -LsSf https://astral.sh/uv/install.sh | sh && echo '09-uv' >> ~/devcontainer-post-create.txt",
|
|
226
|
+
|
|
227
|
+
// git based - codeberg can be replaced w/ gitlab.com or github.com
|
|
228
|
+
//"chezmoi": "sh -c \"$(curl -fsLS get.chezmoi.io/lb)\" -- init --apply git@codeberg.org:bugrasan/dotfiles.git && echo '10-chezmoi' >> ~/devcontainer-post-create.txt",
|
|
229
|
+
// http based - codeberg can be replaced w/ gitlab.com or github.com
|
|
230
|
+
"chezmoi": "sh -c \"$(curl -fsLS get.chezmoi.io/lb)\" -- init --one-shot --apply codeberg.org/bugrasan/dotfiles && echo '10-chezmoi' >> ~/devcontainer-post-create.txt",
|
|
231
|
+
|
|
232
|
+
// see https://github.com/microsoft/vscode-remote-release/issues/11014
|
|
233
|
+
"revoke-ssh-forwarding": "find /tmp -maxdepth 1 -name 'vscode-ssh-auth-*.sock' -delete 2>/dev/null || true && echo '11-revoke-ssh-forwarding' >> ~/devcontainer-post-create.txt",
|
|
234
|
+
|
|
235
|
+
// simplifies claude; debian and ubuntu don't have duckdb in their repos, so we need to install it manually
|
|
236
|
+
// "duckdb": "curl https://install.duckdb.org | sh && echo '12-duckdb' >> ~/devcontainer-post-create.txt",
|
|
237
|
+
|
|
238
|
+
// disable github telemetry
|
|
239
|
+
"github-cli-telemetry": "gh config set telemetry disabled || echo '13-github-cli-telemetry' >> ~/devcontainer-post-create.txt",
|
|
240
|
+
},
|
|
241
|
+
// "yarn install",
|
|
242
|
+
|
|
243
|
+
// Configure tool-specific properties.
|
|
244
|
+
"customizations": {
|
|
245
|
+
"vscode": {
|
|
246
|
+
"settings": {
|
|
247
|
+
// inline instructions for github copilot chat
|
|
248
|
+
// https://code.visualstudio.com/docs/devcontainers/tips-and-tricks
|
|
249
|
+
|
|
250
|
+
// "editor.formatOnSave": true,
|
|
251
|
+
"terminal.integrated.shell.linux": "/bin/bash",
|
|
252
|
+
"telemetry.telemetryLevel": "off",
|
|
253
|
+
|
|
254
|
+
// Keep speech extensions on the local UI side; avoid loading them in remote container host.
|
|
255
|
+
"remote.extensionKind": {
|
|
256
|
+
"ms-vscode.vscode-speech": "ui",
|
|
257
|
+
},
|
|
258
|
+
|
|
259
|
+
// https://code.visualstudio.com/docs/copilot/customization/custom-instructions#_use-a-githubcopilotinstructionsmd-file
|
|
260
|
+
"github.copilot.chat.codeGeneration.useInstructionFiles": true,
|
|
261
|
+
// https://code.visualstudio.com/docs/copilot/customization/custom-instructions#_use-an-agentsmd-file
|
|
262
|
+
"chat.useAgentsMdFile": true,
|
|
263
|
+
"chat.useAgentSkills": true,
|
|
264
|
+
"chat.useNestedAgentsMdFiles": true,
|
|
265
|
+
// https://code.visualstudio.com/docs/copilot/customization/mcp-servers#_add-an-mcp-server-from-the-github-mcp-server-registry
|
|
266
|
+
"chat.mcp.gallery.enabled": true,
|
|
267
|
+
|
|
268
|
+
// terminal tool auto approve settings
|
|
269
|
+
// https://code.visualstudio.com/docs/copilot/chat/chat-tools#_autoapprove-all-tools
|
|
270
|
+
// "chat.tools.terminal.enableAutoApprove": true,
|
|
271
|
+
// "chat.tools.global.autoApprove": false,
|
|
272
|
+
},
|
|
273
|
+
"extensions": [
|
|
274
|
+
// basic extensions
|
|
275
|
+
"EditorConfig.EditorConfig",
|
|
276
|
+
"albert.TabOut",
|
|
277
|
+
// git extensions
|
|
278
|
+
"mhutchie.git-graph",
|
|
279
|
+
"eamodio.gitlens",
|
|
280
|
+
|
|
281
|
+
// installed by the node feature above
|
|
282
|
+
// "dbaeumer.vscode-eslint",
|
|
283
|
+
// installed by the python feature above
|
|
284
|
+
// "ms-python.python",
|
|
285
|
+
// "ms-python.vscode-pylance",
|
|
286
|
+
// "ms-python.autopep8",
|
|
287
|
+
|
|
288
|
+
// jupyter support - installed by python feature above
|
|
289
|
+
//"ms-toolsai.jupyter",
|
|
290
|
+
// ms-toolsai.jupyter-keymap will be auto installed as dependency
|
|
291
|
+
// but will run on local host only, not in container
|
|
292
|
+
|
|
293
|
+
// AI extensions
|
|
294
|
+
// "anthropic.claude-code",
|
|
295
|
+
|
|
296
|
+
// vscode recommended extensions for devcontainers
|
|
297
|
+
"mutantdino.resourcemonitor",
|
|
298
|
+
],
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
|
|
302
|
+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
|
303
|
+
// !!! if not set to then adjust mounts above accordingly
|
|
304
|
+
// !!! see also "mounts" setting above
|
|
305
|
+
"containerUser": "vscode",
|
|
306
|
+
"remoteUser": "vscode",
|
|
307
|
+
|
|
308
|
+
// Mount local .env file into the container
|
|
309
|
+
"runArgs": [
|
|
310
|
+
// .env file
|
|
311
|
+
// "--env-file", ".env",
|
|
312
|
+
// privileged access e.g. USB devices
|
|
313
|
+
// "--privileged",
|
|
314
|
+
// let the container auto-destroy after it's closed
|
|
315
|
+
// https://github.com/microsoft/vscode-remote-release/issues/3512#issuecomment-1018286415
|
|
316
|
+
"--rm",
|
|
317
|
+
// vnc for desktop-lite feature
|
|
318
|
+
// "--shm-size=1g",
|
|
319
|
+
]
|
|
320
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags: ["v*"]
|
|
5
|
+
|
|
6
|
+
jobs:
|
|
7
|
+
publish:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
environment: pypi
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write # required for Trusted Publishing + attestations
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.13"
|
|
17
|
+
- name: Build
|
|
18
|
+
run: |
|
|
19
|
+
python -m pip install --upgrade build
|
|
20
|
+
python -m build
|
|
21
|
+
- name: Publish
|
|
22
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# Secrets / local env
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
.env
|
|
5
|
+
.env.*
|
|
6
|
+
!.env.example
|
|
7
|
+
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
# Python byte-code / caches
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
__pycache__/
|
|
12
|
+
*.py[cod]
|
|
13
|
+
*$py.class
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.cache/
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# Virtual environments
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
.venv/
|
|
23
|
+
venv/
|
|
24
|
+
env/
|
|
25
|
+
ENV/
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Packaging / build artifacts
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
build/
|
|
31
|
+
dist/
|
|
32
|
+
*.egg-info/
|
|
33
|
+
*.egg
|
|
34
|
+
.eggs/
|
|
35
|
+
wheels/
|
|
36
|
+
pip-wheel-metadata/
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Test / coverage artifacts
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
htmlcov/
|
|
44
|
+
coverage.xml
|
|
45
|
+
.tox/
|
|
46
|
+
.nox/
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Editor / OS cruft
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
.DS_Store
|
|
52
|
+
Thumbs.db
|
|
53
|
+
*.swp
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Misc local tooling
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
.local/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 mdformat-sembr contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mdformat-sembr
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: mdformat plugin that inserts Semantic Line Breaks (sembr.org) as CommonMark soft breaks
|
|
5
|
+
Project-URL: Homepage, https://codeberg.org/bugrasan/mdformat-sembr
|
|
6
|
+
Project-URL: GitHub Mirror, https://github.com/bugrasan/mdformat-sembr
|
|
7
|
+
Author: bugrasan
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: formatter,markdown,mdformat,semantic line breaks,sembr
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: mdformat>=1.0
|
|
21
|
+
Provides-Extra: test
|
|
22
|
+
Requires-Dist: pytest>=7; extra == 'test'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# mdformat-sembr
|
|
26
|
+
|
|
27
|
+
An [mdformat](https://mdformat.readthedocs.io) parser-extension plugin that inserts
|
|
28
|
+
[Semantic Line Breaks](https://sembr.org) (SemBr) as CommonMark **soft breaks**.
|
|
29
|
+
|
|
30
|
+
SemBr is a convention for adding line breaks in Markdown source at sentence and
|
|
31
|
+
clause boundaries. Because the breaks are CommonMark *soft* breaks (a bare `\n`
|
|
32
|
+
inside a paragraph), they render to a single space — the rendered HTML output is
|
|
33
|
+
unchanged, only the source becomes more diff-friendly.
|
|
34
|
+
|
|
35
|
+
The plugin is fully deterministic: no ML, no network, no LLM calls. The same input
|
|
36
|
+
always produces the same output.
|
|
37
|
+
|
|
38
|
+
## Why
|
|
39
|
+
|
|
40
|
+
Moving SemBr logic out of an LLM/agent loop into a token-free, reproducible
|
|
41
|
+
formatter pass makes authored Markdown consistent and cheap to maintain.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# uv (recommended) — --with is repeatable (or comma-separate the plugins)
|
|
47
|
+
uv tool install mdformat --with mdformat-sembr --with mdformat-frontmatter
|
|
48
|
+
|
|
49
|
+
# pipx — install the app, then inject the plugins into its environment
|
|
50
|
+
pipx install mdformat
|
|
51
|
+
pipx inject mdformat mdformat-sembr mdformat-frontmatter
|
|
52
|
+
|
|
53
|
+
# local development
|
|
54
|
+
pip install -e .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`mdformat-frontmatter` is optional: install it only if your Markdown uses
|
|
58
|
+
YAML/TOML frontmatter and you want mdformat to preserve/format it. It composes
|
|
59
|
+
with `mdformat-sembr` (frontmatter is a separate node type and is never broken).
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
mdformat --version # should list "mdformat_sembr"
|
|
65
|
+
echo "First sentence. Second sentence." | mdformat -
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
From Python:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import mdformat
|
|
72
|
+
|
|
73
|
+
mdformat.text("First sentence. Second sentence.\n", extensions={"sembr"})
|
|
74
|
+
# 'First sentence.\nSecond sentence.\n'
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## How it works
|
|
78
|
+
|
|
79
|
+
The plugin registers a **postprocessor** on the `paragraph` node type. At that point
|
|
80
|
+
inline formatting (emphasis, links, inline code) is already resolved into the string,
|
|
81
|
+
so it operates on the final rendered text and only protects a few inline constructs by
|
|
82
|
+
regex. Block-level elements (headings, code blocks, tables, frontmatter, HTML blocks)
|
|
83
|
+
are separate node types and are never touched.
|
|
84
|
+
|
|
85
|
+
`CHANGES_AST = False`: soft breaks are AST-safe by design, so mdformat's built-in
|
|
86
|
+
`is_md_equal` validator gates correctness. If validation ever fails, the break logic is
|
|
87
|
+
wrong — it is never worked around with `--no-validate` or hard breaks.
|
|
88
|
+
|
|
89
|
+
## Configuration
|
|
90
|
+
|
|
91
|
+
Configure via `[plugin.sembr]` in `.mdformat.toml`, or via CLI flags. CLI values merge
|
|
92
|
+
over TOML.
|
|
93
|
+
|
|
94
|
+
| Option | Type | Default | Meaning |
|
|
95
|
+
| --------------- | ----------- | --------- | -------------------------------------------------------------- |
|
|
96
|
+
| `min_chars` | int | `15` | Minimum length of the segment before a break is allowed. |
|
|
97
|
+
| `abbreviations` | list[str] | see below | Tokens after which no sentence break is inserted. |
|
|
98
|
+
| `break_clauses` | bool | `false` | Enable clause-level breaks (SemBr "SHOULD"). Off by default. |
|
|
99
|
+
| `clause_chars` | str | `",;:—"` | Clause punctuation set (only used when `break_clauses` true). |
|
|
100
|
+
|
|
101
|
+
CLI flags: `--sembr-min-chars`, `--sembr-abbreviations`, `--sembr-break-clauses`,
|
|
102
|
+
`--sembr-clause-chars`.
|
|
103
|
+
|
|
104
|
+
`.mdformat.toml` example:
|
|
105
|
+
|
|
106
|
+
```toml
|
|
107
|
+
[plugin.sembr]
|
|
108
|
+
min_chars = 20
|
|
109
|
+
break_clauses = true
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## License
|
|
113
|
+
|
|
114
|
+
MIT
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# mdformat-sembr
|
|
2
|
+
|
|
3
|
+
An [mdformat](https://mdformat.readthedocs.io) parser-extension plugin that inserts
|
|
4
|
+
[Semantic Line Breaks](https://sembr.org) (SemBr) as CommonMark **soft breaks**.
|
|
5
|
+
|
|
6
|
+
SemBr is a convention for adding line breaks in Markdown source at sentence and
|
|
7
|
+
clause boundaries. Because the breaks are CommonMark *soft* breaks (a bare `\n`
|
|
8
|
+
inside a paragraph), they render to a single space — the rendered HTML output is
|
|
9
|
+
unchanged, only the source becomes more diff-friendly.
|
|
10
|
+
|
|
11
|
+
The plugin is fully deterministic: no ML, no network, no LLM calls. The same input
|
|
12
|
+
always produces the same output.
|
|
13
|
+
|
|
14
|
+
## Why
|
|
15
|
+
|
|
16
|
+
Moving SemBr logic out of an LLM/agent loop into a token-free, reproducible
|
|
17
|
+
formatter pass makes authored Markdown consistent and cheap to maintain.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# uv (recommended) — --with is repeatable (or comma-separate the plugins)
|
|
23
|
+
uv tool install mdformat --with mdformat-sembr --with mdformat-frontmatter
|
|
24
|
+
|
|
25
|
+
# pipx — install the app, then inject the plugins into its environment
|
|
26
|
+
pipx install mdformat
|
|
27
|
+
pipx inject mdformat mdformat-sembr mdformat-frontmatter
|
|
28
|
+
|
|
29
|
+
# local development
|
|
30
|
+
pip install -e .
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`mdformat-frontmatter` is optional: install it only if your Markdown uses
|
|
34
|
+
YAML/TOML frontmatter and you want mdformat to preserve/format it. It composes
|
|
35
|
+
with `mdformat-sembr` (frontmatter is a separate node type and is never broken).
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
mdformat --version # should list "mdformat_sembr"
|
|
41
|
+
echo "First sentence. Second sentence." | mdformat -
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
From Python:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import mdformat
|
|
48
|
+
|
|
49
|
+
mdformat.text("First sentence. Second sentence.\n", extensions={"sembr"})
|
|
50
|
+
# 'First sentence.\nSecond sentence.\n'
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## How it works
|
|
54
|
+
|
|
55
|
+
The plugin registers a **postprocessor** on the `paragraph` node type. At that point
|
|
56
|
+
inline formatting (emphasis, links, inline code) is already resolved into the string,
|
|
57
|
+
so it operates on the final rendered text and only protects a few inline constructs by
|
|
58
|
+
regex. Block-level elements (headings, code blocks, tables, frontmatter, HTML blocks)
|
|
59
|
+
are separate node types and are never touched.
|
|
60
|
+
|
|
61
|
+
`CHANGES_AST = False`: soft breaks are AST-safe by design, so mdformat's built-in
|
|
62
|
+
`is_md_equal` validator gates correctness. If validation ever fails, the break logic is
|
|
63
|
+
wrong — it is never worked around with `--no-validate` or hard breaks.
|
|
64
|
+
|
|
65
|
+
## Configuration
|
|
66
|
+
|
|
67
|
+
Configure via `[plugin.sembr]` in `.mdformat.toml`, or via CLI flags. CLI values merge
|
|
68
|
+
over TOML.
|
|
69
|
+
|
|
70
|
+
| Option | Type | Default | Meaning |
|
|
71
|
+
| --------------- | ----------- | --------- | -------------------------------------------------------------- |
|
|
72
|
+
| `min_chars` | int | `15` | Minimum length of the segment before a break is allowed. |
|
|
73
|
+
| `abbreviations` | list[str] | see below | Tokens after which no sentence break is inserted. |
|
|
74
|
+
| `break_clauses` | bool | `false` | Enable clause-level breaks (SemBr "SHOULD"). Off by default. |
|
|
75
|
+
| `clause_chars` | str | `",;:—"` | Clause punctuation set (only used when `break_clauses` true). |
|
|
76
|
+
|
|
77
|
+
CLI flags: `--sembr-min-chars`, `--sembr-abbreviations`, `--sembr-break-clauses`,
|
|
78
|
+
`--sembr-clause-chars`.
|
|
79
|
+
|
|
80
|
+
`.mdformat.toml` example:
|
|
81
|
+
|
|
82
|
+
```toml
|
|
83
|
+
[plugin.sembr]
|
|
84
|
+
min_chars = 20
|
|
85
|
+
break_clauses = true
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## License
|
|
89
|
+
|
|
90
|
+
MIT
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mdformat-sembr"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "mdformat plugin that inserts Semantic Line Breaks (sembr.org) as CommonMark soft breaks"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
keywords = ["mdformat", "markdown", "sembr", "semantic line breaks", "formatter"]
|
|
14
|
+
authors = [ { name = "bugrasan" } ]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
|
24
|
+
]
|
|
25
|
+
dependencies = ["mdformat>=1.0"]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
test = ["pytest>=7"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://codeberg.org/bugrasan/mdformat-sembr"
|
|
32
|
+
"GitHub Mirror" = "https://github.com/bugrasan/mdformat-sembr"
|
|
33
|
+
|
|
34
|
+
[project.entry-points."mdformat.parser_extension"]
|
|
35
|
+
sembr = "mdformat_sembr:_plugin"
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.wheel]
|
|
38
|
+
packages = ["src/mdformat_sembr"]
|