puku-markdown 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. puku_markdown-0.1.0/.github/workflows/publish.yaml +99 -0
  2. puku_markdown-0.1.0/.github/workflows/pytest.yaml +41 -0
  3. puku_markdown-0.1.0/.gitignore +5 -0
  4. puku_markdown-0.1.0/PKG-INFO +5 -0
  5. puku_markdown-0.1.0/README.md +0 -0
  6. puku_markdown-0.1.0/puku_markdown/__init__.py +0 -0
  7. puku_markdown-0.1.0/puku_markdown/_utils/constants.py +315 -0
  8. puku_markdown-0.1.0/puku_markdown/_utils/metrics.py +26 -0
  9. puku_markdown-0.1.0/puku_markdown/_utils/predicates.py +38 -0
  10. puku_markdown-0.1.0/puku_markdown/_utils/re_patterns.py +180 -0
  11. puku_markdown-0.1.0/puku_markdown/_utils/scanners/__init__.py +29 -0
  12. puku_markdown-0.1.0/puku_markdown/_utils/scanners/link_destination.py +95 -0
  13. puku_markdown-0.1.0/puku_markdown/_utils/scanners/link_title.py +113 -0
  14. puku_markdown-0.1.0/puku_markdown/column_resolution.py +130 -0
  15. puku_markdown-0.1.0/puku_markdown/elements/__init__.py +36 -0
  16. puku_markdown-0.1.0/puku_markdown/elements/block/__init__.py +34 -0
  17. puku_markdown-0.1.0/puku_markdown/elements/block/base.py +11 -0
  18. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/__init__.py +31 -0
  19. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/atx_heading.py +9 -0
  20. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/blockquote.py +10 -0
  21. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/fenced_code_block.py +10 -0
  22. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/html_block.py +20 -0
  23. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/indented_code_block.py +8 -0
  24. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/link_reference_definition.py +10 -0
  25. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/list.py +23 -0
  26. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/paragraph.py +8 -0
  27. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/setext_heading.py +9 -0
  28. puku_markdown-0.1.0/puku_markdown/elements/block/commonmark/thematic_break.py +8 -0
  29. puku_markdown-0.1.0/puku_markdown/elements/document.py +17 -0
  30. puku_markdown-0.1.0/puku_markdown/line_span.py +31 -0
  31. puku_markdown-0.1.0/puku_markdown/parser/block/block_stream.py +29 -0
  32. puku_markdown-0.1.0/puku_markdown/parser/block/command.py +273 -0
  33. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/__init__.py +55 -0
  34. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/atx_heading.py +134 -0
  35. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/blockquote.py +287 -0
  36. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/fenced_code_block.py +158 -0
  37. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/html_blocks.py +314 -0
  38. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/indented_code_block.py +82 -0
  39. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/link_reference_definition.py +443 -0
  40. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/list.py +553 -0
  41. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/__init__.py +19 -0
  42. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/blockquote.py +54 -0
  43. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/link_reference_definition.py +101 -0
  44. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/list.py +59 -0
  45. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/paragraph.py +8 -0
  46. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/locals/setext_heading.py +9 -0
  47. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/paragraph.py +105 -0
  48. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/setext_heading.py +190 -0
  49. puku_markdown-0.1.0/puku_markdown/parser/block/commonmark/rules/thematic_break.py +79 -0
  50. puku_markdown-0.1.0/puku_markdown/parser/block/frame.py +331 -0
  51. puku_markdown-0.1.0/puku_markdown/parser/block/frame_actuals.py +83 -0
  52. puku_markdown-0.1.0/puku_markdown/parser/block/frame_spec.py +46 -0
  53. puku_markdown-0.1.0/puku_markdown/parser/block/line_descriptor.py +107 -0
  54. puku_markdown-0.1.0/puku_markdown/parser/block/logger.py +4 -0
  55. puku_markdown-0.1.0/puku_markdown/parser/block/parse.py +308 -0
  56. puku_markdown-0.1.0/puku_markdown/parser/block/rule.py +22 -0
  57. puku_markdown-0.1.0/puku_markdown/parser/block/rule_chain.py +28 -0
  58. puku_markdown-0.1.0/puku_markdown/parser/block/rule_chains_registry.py +87 -0
  59. puku_markdown-0.1.0/puku_markdown/parser/block/rule_context.py +223 -0
  60. puku_markdown-0.1.0/puku_markdown/parser/block/rule_locals.py +38 -0
  61. puku_markdown-0.1.0/puku_markdown/parser/block/state.py +746 -0
  62. puku_markdown-0.1.0/puku_markdown/parser/block/type_aliases.py +14 -0
  63. puku_markdown-0.1.0/puku_markdown/parser/block/upcall.py +35 -0
  64. puku_markdown-0.1.0/puku_markdown/parser/parse.py +37 -0
  65. puku_markdown-0.1.0/puku_markdown/persistent_list/__init__.py +5 -0
  66. puku_markdown-0.1.0/puku_markdown/persistent_list/change_set.py +41 -0
  67. puku_markdown-0.1.0/puku_markdown/persistent_list/core.py +76 -0
  68. puku_markdown-0.1.0/puku_markdown/persistent_list/modification.py +36 -0
  69. puku_markdown-0.1.0/puku_markdown/persistent_list/transactional_editor.py +173 -0
  70. puku_markdown-0.1.0/puku_markdown/persistent_list/transient.py +131 -0
  71. puku_markdown-0.1.0/puku_markdown/persistent_list/type_vars.py +4 -0
  72. puku_markdown-0.1.0/pyproject.toml +19 -0
  73. puku_markdown-0.1.0/tests/__init__.py +0 -0
  74. puku_markdown-0.1.0/tests/commonmark/__init__.py +0 -0
  75. puku_markdown-0.1.0/tests/commonmark/spec.json +5218 -0
  76. puku_markdown-0.1.0/tests/commonmark/spec.md +9756 -0
  77. puku_markdown-0.1.0/tests/commonmark/spec.py +96 -0
  78. puku_markdown-0.1.0/tests/commonmark/test_block_parser.py +19 -0
  79. puku_markdown-0.1.0/tests/commonmark/update_spec.py +76 -0
  80. puku_markdown-0.1.0/tests/markdown_it_py/__init__.py +4 -0
  81. puku_markdown-0.1.0/tests/markdown_it_py/block_parse.py +64 -0
  82. puku_markdown-0.1.0/tests/markdown_it_py/block_token.py +366 -0
  83. puku_markdown-0.1.0/uv.lock +321 -0
@@ -0,0 +1,99 @@
1
+ name: Publish
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ workflow_dispatch:
7
+ inputs:
8
+ use_testpypi:
9
+ description: 'Publish to TestPyPI instead of PyPI'
10
+ required: false
11
+ default: false
12
+ type: boolean
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ jobs:
18
+ validate:
19
+ name: Validate source branch and destination
20
+ runs-on: ubuntu-latest
21
+ steps:
22
+ - name: Check branch and destination compatibility
23
+ run: |
24
+ BRANCH="${{ github.ref_name }}"
25
+ USE_TEST="${{ inputs.use_testpypi }}"
26
+ EVENT="${{ github.event_name }}"
27
+
28
+ if [[ "$USE_TEST" == "true" ]]; then
29
+ echo "✅ Publishing to TestPyPI - branch $BRANCH is allowed."
30
+ exit 0
31
+ fi
32
+
33
+ if [[ "$BRANCH" != "main" && "$EVENT" != "release" ]]; then
34
+ echo "❌ Error: Publishing to PyPI is only allowed from the 'main' branch or a release tag."
35
+ echo "Current branch: $BRANCH, event: $EVENT"
36
+ exit 1
37
+ fi
38
+
39
+ echo "✅ Validation passed - publishing to PyPI from $BRANCH (event: $EVENT)."
40
+
41
+ test:
42
+ name: Run pytest suite
43
+ needs: validate
44
+ uses: ./.github/workflows/pytest.yaml
45
+ secrets: inherit
46
+
47
+ build:
48
+ name: Build distribution
49
+ needs: test
50
+ runs-on: ubuntu-latest
51
+ steps:
52
+ - uses: actions/checkout@v4
53
+
54
+ - name: Set up Python
55
+ uses: actions/setup-python@v6
56
+ with:
57
+ python-version: '3.12'
58
+
59
+ - name: Install uv
60
+ uses: astral-sh/setup-uv@v5
61
+
62
+ - name: Build wheel and sdist
63
+ run: uv build
64
+
65
+ - name: Upload dist as artifact
66
+ uses: actions/upload-artifact@v4
67
+ with:
68
+ name: dist
69
+ path: dist
70
+ retention-days: 1
71
+
72
+ publish:
73
+ name: Publish to ${{ inputs.use_testpypi && 'TestPyPI' || 'PyPI' }}
74
+ needs: build
75
+ runs-on: ubuntu-latest
76
+ environment:
77
+ name: ${{ inputs.use_testpypi && 'testpypi' || 'pypi' }}
78
+ url: ${{ inputs.use_testpypi && 'https://test.pypi.org/project/puku-markdown' || 'https://pypi.org/project/puku-markdown' }}
79
+ steps:
80
+ - name: Download dist
81
+ uses: actions/download-artifact@v4
82
+ with:
83
+ name: dist
84
+ path: dist
85
+
86
+ - name: Publish to TestPyPI (with API token)
87
+ if: inputs.use_testpypi
88
+ uses: pypa/gh-action-pypi-publish@release/v1
89
+ with:
90
+ packages-dir: dist
91
+ repository-url: https://test.pypi.org/legacy/
92
+ password: ${{ secrets.TEST_PYPI_API_TOKEN }}
93
+
94
+ - name: Publish to PyPI (with API token)
95
+ if: ${{ !inputs.use_testpypi }}
96
+ uses: pypa/gh-action-pypi-publish@release/v1
97
+ with:
98
+ packages-dir: dist
99
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,41 @@
1
+ name: Pytest
2
+
3
+ on:
4
+ workflow_call:
5
+ push:
6
+ branches: [main]
7
+ pull_request:
8
+ branches: [main]
9
+
10
+ permissions:
11
+ contents: read
12
+
13
+ jobs:
14
+ test:
15
+ name: Python ${{ matrix.python-version }}
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ python-version: ["3.12", "3.13"]
21
+
22
+ steps:
23
+ - name: Checkout repository
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Set up Python ${{ matrix.python-version }}
27
+ uses: actions/setup-python@v6.2.0
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+
31
+ - name: Install uv
32
+ uses: astral-sh/setup-uv@v5
33
+ with:
34
+ enable-cache: true
35
+ cache-dependency-glob: "uv.lock"
36
+
37
+ - name: Install dependencies (including test group)
38
+ run: uv sync --group test
39
+
40
+ - name: Run pytest with coverage monitoring
41
+ run: uv run pytest --cov --cov-report=term-missing
@@ -0,0 +1,5 @@
1
+ __local__/
2
+ __pycache__
3
+ .pytest_cache/
4
+ .venv/
5
+ .coverage
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: puku-markdown
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.12
File without changes
File without changes
@@ -0,0 +1,315 @@
1
+ """
2
+ CommonMark constants for Markdown parsing.
3
+
4
+ Coding style note:
5
+ Typically, names are limited to at most 4 words (e.g., `INDENTED_CODE_BLOCK_MIN_INDENT`).
6
+ However, for constants that directly represent a CommonMark element (e.g., fenced code
7
+ blocks, thematic breaks), the element name in the prefix (e.g., `FENCED_CODE_BLOCK_`,
8
+ `THEMATIC_BREAK_`) is **excluded** from the word count. This allows longer, more
9
+ precise names without violating the spirit of the rule.
10
+
11
+ This exception is *temporary* and subject to revision.
12
+
13
+ TODO: Re-evaluate the word-limit exception for CommonMark element prefixes.
14
+ Consider adopting a fixed maximum total length or a different scheme.
15
+ """
16
+
17
+ from typing import Final
18
+
19
+
20
+ COMMONMARK_TAB_STOP: Final[int] = 4
21
+ """Number of spaces to which a tab character expands in CommonMark parsing.
22
+
23
+ See Also:
24
+ CommonMark Spec 0.31.2, Section 2.2 (Tabs):
25
+ https://spec.commonmark.org/0.31.2/#tabs
26
+ """
27
+
28
+ NULL_CHARACTER: Final[str] = "\0"
29
+
30
+ HASH_CHARACTER: Final[str] = "#"
31
+ """
32
+ Hash/number sign character used as the ATX heading marker in CommonMark.
33
+ """
34
+
35
+ LESS_THAN_CHARACTER: Final[str] = "<"
36
+ """
37
+ The less-than sign character '<' (Unicode U+003C).
38
+ """
39
+
40
+ GREATER_THAN_CHARACTER: Final[str] = ">"
41
+ """The greater-than sign character '>' (Unicode U+003E).
42
+ """
43
+
44
+ SPACE_CHARACTER: Final[str] = " "
45
+ """
46
+ The space character ' ' (Unicode U+0020).
47
+ """
48
+
49
+ TAB_CHARACTER: Final[str] = "\t"
50
+ """
51
+ The tab character '\\t' (Unicode U+0009, CHARACTER TABULATION).
52
+ """
53
+
54
+ LEFT_SQUARE_BRACKET_CHARACTER: Final[str] = "["
55
+ """
56
+ The left square bracket character '[' (Unicode U+005B, LEFT SQUARE BRACKET).
57
+ """
58
+
59
+ RIGHT_SQUARE_BRACKET_CHARACTER: Final[str] = "]"
60
+ """
61
+ The right square bracket character ']' (Unicode U+005D, RIGHT SQUARE BRACKET).
62
+ """
63
+
64
+ LINE_FEED_CHARACTER: Final[str] = "\n"
65
+ """
66
+ Line feed (LF), the '\n' character.
67
+ """
68
+
69
+ BACKSLASH_CHARACTER: Final[str] = "\\"
70
+ """
71
+ Backslash, the escape character.
72
+ """
73
+
74
+ COLON_CHARACTER: Final[str] = ":"
75
+ """
76
+ Colon (:), used in reference definitions after the label.
77
+ """
78
+
79
+ LEFT_PARENTHESIS_CHARACTER: Final[str] = "("
80
+ """
81
+ Left parenthesis '(' (U+0028).
82
+ """
83
+
84
+ RIGHT_PARENTHESIS_CHARACTER: Final[str] = ")"
85
+ """
86
+ Right parenthesis ')' (U+0029).
87
+ """
88
+
89
+ UNICODE_REPLACEMENT_CHARACTER: Final[str] = "\ufffd"
90
+
91
+ BACKTICK_CHARACTER: Final[str] = "`"
92
+
93
+ INDENTED_CODE_BLOCK_MIN_INDENT: Final[int] = 4
94
+ """Minimum indentation required for a line to be part of an indented code block.
95
+
96
+ According to the CommonMark specification (Section 4.4), an indented chunk is a
97
+ sequence of non-blank lines, each preceded by *four or more spaces* of indentation.
98
+ Lines meeting this threshold form an indented code block.
99
+
100
+ Note:
101
+ An indented code block cannot interrupt a paragraph. Therefore, within a
102
+ paragraph, a line with indentation >= this value is treated as a lazy
103
+ continuation of the paragraph, not as a new code block.
104
+
105
+ See Also:
106
+ CommonMark Spec 0.31.2, Section 4.4:
107
+ https://spec.commonmark.org/0.31.2/#indented-code-blocks
108
+ """
109
+
110
+ THEMATIC_BREAK_MARKERS: Final[frozenset[str]] = frozenset({"*", "-", "_"})
111
+ """
112
+ Immutable set of characters that can initiate a CommonMark thematic break.
113
+ """
114
+
115
+ THEMATIC_BREAK_MIN_MARKER_COUNT: Final[int] = 3
116
+ """
117
+ Minimum number of identical markers required for a CommonMark thematic break.
118
+
119
+ See CommonMark Spec 0.31.2, Section 4.1:
120
+ https://spec.commonmark.org/0.31.2/#thematic-breaks
121
+ """
122
+
123
+ FENCED_CODE_BLOCK_MIN_MARKER_COUNT: Final[int] = 3
124
+ """
125
+ Minimum number of consecutive backticks or tildes required for a fenced code block.
126
+
127
+ Per CommonMark Spec 0.31.2, Section 4.5:
128
+ https://spec.commonmark.org/0.31.2/#fenced-code-blocks
129
+ """
130
+
131
+ FENCED_CODE_BLOCK_MARKERS: Final[frozenset[str]] = frozenset({"~", "`"})
132
+ """
133
+ Immutable set of marker characters that can open a fenced code block.
134
+
135
+ See Also:
136
+ CommonMark Spec 0.31.2, Section 4.5:
137
+ https://spec.commonmark.org/0.31.2/#fenced-code-blocks
138
+ """
139
+
140
+ ATX_HEADING_MAX_LEVEL: Final[int] = 6
141
+ """
142
+ Maximum heading level for an ATX heading, corresponding to six '#' characters.
143
+
144
+ See Also:
145
+ CommonMark Spec 0.31.2, Section 4.2 (ATX headings):
146
+ https://spec.commonmark.org/0.31.2/#atx-headings
147
+ """
148
+
149
+ HTML_BLOCK_NAMES: Final[tuple[str, ...]] = (
150
+ "address",
151
+ "article",
152
+ "aside",
153
+ "base",
154
+ "basefont",
155
+ "blockquote",
156
+ "body",
157
+ "caption",
158
+ "center",
159
+ "col",
160
+ "colgroup",
161
+ "dd",
162
+ "details",
163
+ "dialog",
164
+ "dir",
165
+ "div",
166
+ "dl",
167
+ "dt",
168
+ "fieldset",
169
+ "figcaption",
170
+ "figure",
171
+ "footer",
172
+ "form",
173
+ "frame",
174
+ "frameset",
175
+ "h1",
176
+ "h2",
177
+ "h3",
178
+ "h4",
179
+ "h5",
180
+ "h6",
181
+ "head",
182
+ "header",
183
+ "hr",
184
+ "html",
185
+ "iframe",
186
+ "legend",
187
+ "li",
188
+ "link",
189
+ "main",
190
+ "menu",
191
+ "menuitem",
192
+ "nav",
193
+ "noframes",
194
+ "ol",
195
+ "optgroup",
196
+ "option",
197
+ "p",
198
+ "param",
199
+ "search",
200
+ "section",
201
+ "summary",
202
+ "table",
203
+ "tbody",
204
+ "td",
205
+ "tfoot",
206
+ "th",
207
+ "thead",
208
+ "title",
209
+ "tr",
210
+ "track",
211
+ "ul",
212
+ )
213
+ """
214
+ List of HTML block-level tag names as defined by the CommonMark specification.
215
+
216
+ This list is used to construct the regex for matching block-level HTML tags
217
+ (CommonMark HTML block type 6). The tags are matched case-insensitively
218
+ (using `re.IGNORECASE` flag) because HTML tag names are case-insensitive.
219
+
220
+ Reference: https://spec.commonmark.org/0.31.2/#html-blocks
221
+ """
222
+
223
+ SETEXT_HEADING_MARKERS: Final[frozenset[str]] = frozenset({"-", "="})
224
+ """Markers that denote Setext headings (CommonMark section 4.2, version 0.31.2).
225
+
226
+ A line consisting entirely of `=` characters (optionally with trailing spaces)
227
+ indicates a level-1 heading. A line consisting entirely of `-` characters
228
+ indicates a level-2 heading. The marker line must appear immediately after the
229
+ heading text (with no blank line in between).
230
+
231
+ Reference: https://spec.commonmark.org/0.31.2/#setext-headings
232
+ """
233
+
234
+ HYPHEN_MINUS_CHARACTER: Final[str] = "-"
235
+ """
236
+ The hyphen-minus character '-' (Unicode U+002D).
237
+
238
+ This character is used as a hyphen, minus sign, or dash. In Markdown,
239
+ it appears in Setext headings (level-2), unordered lists, and horizontal rules.
240
+ """
241
+
242
+ EQUALS_SIGN_CHARACTER: Final[str] = "="
243
+ """
244
+ The equals sign '=' (Unicode U+003D).
245
+
246
+ In Markdown, this character is used in Setext headings (level-1) and as a
247
+ delimiter for fenced code blocks (with backticks). It also appears in
248
+ HTML attributes and link definitions.
249
+ """
250
+
251
+ MAX_LINK_DESTINATION_PARENTHESIS_DEPTH: Final[int] = 32
252
+ """
253
+ Maximum allowed nesting depth of parentheses inside a bare link destination
254
+ (i.e., when not enclosed in `<` `>`). Exceeding this limit invalidates the
255
+ link destination and stops scanning.
256
+
257
+ This limit is a safety measure against pathological input; it is not mandated
258
+ by the CommonMark specification, which only requires balanced parentheses.
259
+ The value 32 is high enough for all practical URLs yet low enough to prevent
260
+ excessive CPU consumption.
261
+ """
262
+
263
+ DOUBLE_QUOTE_CHARACTER: Final[str] = '"'
264
+ """
265
+ Double quotation mark " (U+0022).
266
+ """
267
+
268
+ SINGLE_QUOTE_CHARACTER: Final[str] = "'"
269
+ """
270
+ Apostrophe / single quote ' (U+0027).
271
+ """
272
+
273
+ BULLET_LIST_MARKERS: Final[frozenset[str]] = frozenset({"*", "-", "+"})
274
+ """Bullet list markers as defined in CommonMark section 5.2 (version 0.31.2).
275
+
276
+ A line beginning with one of these characters, followed by a space or tab,
277
+ starts a bullet list item. The marker may be preceded by up to three spaces
278
+ of indentation.
279
+
280
+ References:
281
+ - https://spec.commonmark.org/0.31.2/#list-items
282
+ - https://spec.commonmark.org/0.31.2/#bullet-list-marker
283
+ """
284
+
285
+ ORDERED_LIST_MARKER_DELIMITERS: Final[frozenset[str]] = frozenset({".", ")"})
286
+ """Ordered list marker delimiters as defined in CommonMark section 5.2 (version 0.31.2).
287
+
288
+ An ordered list marker consists of a positive integer followed by a delimiter
289
+ character: either a period (`.`) or a right parenthesis (`)`). This constant
290
+ holds the two allowed delimiter characters.
291
+
292
+ Reference: https://spec.commonmark.org/0.31.2/#list-items
293
+ """
294
+
295
+ EMPTY_STRING: str = ""
296
+ """An immutable sentinel representing the empty string.
297
+
298
+ Use this constant instead of the literal `""` when the empty string serves as
299
+ a default value, a placeholder, or a well-known marker in public APIs or
300
+ repeated logic. This improves readability and centralises the concept.
301
+ """
302
+
303
+ MAX_ORDERED_LIST_MARKER_DIGITS: int = 9
304
+ """
305
+ The maximum number of digits permitted in an ordered list marker.
306
+
307
+ The CommonMark Spec (0.30+) limits ordered list markers (e.g., '1.', '999999999.')
308
+ to at most 9 digits. This limit prevents integer overflows in browsers that use
309
+ signed 32-bit integers for list indexing.
310
+
311
+ Any marker exceeding this length (e.g., '1000000000.') is invalid and must not
312
+ be recognized as a list marker by a conforming implementation.
313
+
314
+ Source: CommonMark Spec 0.30, Section 5.2 - List Items.
315
+ """
@@ -0,0 +1,26 @@
1
+ from puku_markdown._utils.constants import COMMONMARK_TAB_STOP, TAB_CHARACTER
2
+
3
+
4
+ def commonmark_char_width(start_colno: int, character: str) -> int:
5
+ """
6
+ Return the visual width (in columns) of a character, as defined by CommonMark.
7
+
8
+ For a tab character, the width is the number of spaces required to advance
9
+ the visual column from `start_colno` to the next tab stop (every 4 columns).
10
+ For any other character, the width is 1.
11
+
12
+ Args:
13
+ start_colno: The visual column index where the character begins (0-based).
14
+ character: The character to measure.
15
+
16
+ Returns:
17
+ The visual width increment that the character contributes.
18
+
19
+ Reference:
20
+ https://spec.commonmark.org/0.31.2/#tabs
21
+ """
22
+ return (
23
+ (COMMONMARK_TAB_STOP - (start_colno % COMMONMARK_TAB_STOP))
24
+ if character == TAB_CHARACTER
25
+ else 1
26
+ )
@@ -0,0 +1,38 @@
1
+ from puku_markdown._utils.constants import TAB_CHARACTER, SPACE_CHARACTER
2
+
3
+
4
+ def is_space_or_tab(character: str) -> bool:
5
+ """
6
+ Return True if char is ASCII space (U+0020) or tab (U+0009).
7
+
8
+ In CommonMark, only these two characters affect indentation for block structure.
9
+ Other Unicode whitespace characters (e.g., non-breaking space) do not count as
10
+ leading indentation and are treated as regular content.
11
+
12
+ Reference: https://spec.commonmark.org/0.31.2/#tabs
13
+ """
14
+ return character in (TAB_CHARACTER, SPACE_CHARACTER)
15
+
16
+
17
+ def is_ascii_control(character: str) -> bool:
18
+ """
19
+ Return True if the character is an ASCII control character.
20
+
21
+ This includes C0 control codes (U+0000-U+001F) and DEL (U+007F).
22
+ These characters are generally not allowed in plain text and
23
+ terminate link destinations or titles in CommonMark.
24
+ """
25
+ code = ord(character)
26
+ return code < 0x20 or code == 0x7F
27
+
28
+
29
+ def is_ascii_digit(character: str) -> bool:
30
+ """
31
+ Return True if the character is an ASCII digit (U+0030-U+0039).
32
+
33
+ This is equivalent to `'0' <= character <= '9'` and is the most
34
+ readable and performant way to test for ASCII digits.
35
+
36
+ Reference: https://spec.commonmark.org/0.31.2/#list-items
37
+ """
38
+ return "0" <= character <= "9"